summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristian Grothoff <christian@grothoff.org>2021-05-02 22:31:07 +0200
committerChristian Grothoff <christian@grothoff.org>2021-05-02 22:31:07 +0200
commitd40016f1e8b4578b294cfa09a59f43000c427643 (patch)
tree2d10444f14dd82ade72191f8b343aaa179852797
parent1cc2d75852b35e308d88352c23883a57a6f17c6a (diff)
resolve #2518
-rw-r--r--.gitignore1
-rw-r--r--ChangeLog3
-rw-r--r--src/plugins/Makefile.am28
-rw-r--r--src/plugins/old/real_extractor.c439
-rw-r--r--src/plugins/real_extractor.c579
-rw-r--r--src/plugins/test_real.c104
-rw-r--r--src/plugins/testdata/audiosig.rmbin0 -> 9616 bytes
-rw-r--r--src/plugins/testdata/ra3.rabin0 -> 1066 bytes
-rw-r--r--src/plugins/vlc_extractor.c334
9 files changed, 1049 insertions, 439 deletions
diff --git a/.gitignore b/.gitignore
index 073b4ef..d0c5a1b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -112,6 +112,7 @@ src/plugins/test_ogg
src/plugins/test_ole2
src/plugins/test_png
src/plugins/test_ps
+src/plugins/test_real
src/plugins/test_riff
src/plugins/test_rpm
src/plugins/test_s3m
diff --git a/ChangeLog b/ChangeLog
index 1e68017..6cd0e63 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,6 @@
+Sun 02 May 2021 10:30:33 PM CEST
+ Revive REAL plugin (fixes #2518). -CG
+
Sat 01 May 2021 10:57:55 PM CEST
Revive ELF plugin (fixes #2516). -CG
diff --git a/src/plugins/Makefile.am b/src/plugins/Makefile.am
index 8cbe21a..58b0590 100644
--- a/src/plugins/Makefile.am
+++ b/src/plugins/Makefile.am
@@ -23,7 +23,9 @@ SUBDIRS = .
EXTRA_DIST = \
fuzz_default.sh \
template_extractor.c \
+ testdata/audiosig.rm \
testdata/archive_test.tar \
+ testdata/chello-elf \
testdata/deb_bzip2.deb \
testdata/dvi_ora.dvi \
testdata/exiv2_iptc.jpg \
@@ -51,6 +53,7 @@ EXTRA_DIST = \
testdata/png_image.png \
testdata/ps_bloomfilter.ps \
testdata/ps_wallace.ps \
+ testdata/ra3.ra \
testdata/riff_flame.avi \
testdata/rpm_test.rpm \
testdata/s3m_2nd_pm.s3m \
@@ -171,6 +174,7 @@ plugin_LTLIBRARIES = \
libextractor_nsf.la \
libextractor_nsfe.la \
libextractor_ps.la \
+ libextractor_real.la \
libextractor_riff.la \
libextractor_s3m.la \
libextractor_sid.la \
@@ -209,6 +213,7 @@ check_PROGRAMS = \
test_odf \
test_ps \
test_png \
+ test_real \
test_riff \
test_s3m \
test_sid \
@@ -562,6 +567,20 @@ test_ps_LDADD = \
$(top_builddir)/src/plugins/libtest.la
+libextractor_real_la_SOURCES = \
+ real_extractor.c
+libextractor_real_la_LDFLAGS = \
+ $(PLUGINFLAGS)
+libextractor_real_la_LIBADD = \
+ -lm \
+ $(XLIB) \
+ $(LE_LIBINTL)
+
+test_real_SOURCES = \
+ test_real.c
+test_real_LDADD = \
+ $(top_builddir)/src/plugins/libtest.la
+
libextractor_riff_la_SOURCES = \
riff_extractor.c
libextractor_riff_la_LDFLAGS = \
@@ -643,6 +662,15 @@ test_tiff_LDADD = \
$(top_builddir)/src/plugins/libtest.la
+libextractor_vlc_la_SOURCES = \
+ vlc_extractor.c
+libextractor_vlc_la_LDFLAGS = \
+ $(PLUGINFLAGS)
+libextractor_vlc_la_LIBADD = \
+ -lvlc \
+ $(XLIB)
+
+
libextractor_wav_la_SOURCES = \
wav_extractor.c
libextractor_wav_la_LDFLAGS = \
diff --git a/src/plugins/old/real_extractor.c b/src/plugins/old/real_extractor.c
deleted file mode 100644
index cfac031..0000000
--- a/src/plugins/old/real_extractor.c
+++ /dev/null
@@ -1,439 +0,0 @@
-/*
- This file is part of libextractor.
- Copyright (C) 2002, 2003, 2009 Vidyut Samanta and Christian Grothoff
-
- libextractor is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 2, or (at your
- option) any later version.
-
- libextractor is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with libextractor; see the file COPYING. If not, write to the
- Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
- Boston, MA 02110-1301, USA.
- */
-
-#include "platform.h"
-#include "extractor.h"
-#include <stdint.h>
-
-#define UINT32 uint32_t
-#define UINT16 uint16_t
-#define UINT8 uint8_t
-
-typedef struct
-{
- UINT32 object_id;
- UINT32 size;
- UINT16 object_version; /* must be 0 */
- UINT16 stream_number;
- UINT32 max_bit_rate;
- UINT32 avg_bit_rate;
- UINT32 max_packet_size;
- UINT32 avg_packet_size;
- UINT32 start_time;
- UINT32 preroll;
- UINT32 duration;
- UINT8 stream_name_size;
- UINT8 data[0]; /* variable length section */
- /*
- UINT8[stream_name_size] stream_name;
- UINT8 mime_type_size;
- UINT8[mime_type_size] mime_type;
- UINT32 type_specific_len;
- UINT8[type_specific_len] type_specific_data;
- */
-} Media_Properties;
-
-typedef struct
-{
- UINT32 object_id;
- UINT32 size;
- UINT16 object_version; /* must be 0 */
- UINT16 title_len;
- UINT8 data[0]; /* variable length section */
- /*
- UINT8[title_len] title;
- UINT16 author_len;
- UINT8[author_len] author;
- UINT16 copyright_len;
- UINT8[copyright_len] copyright;
- UINT16 comment_len;
- UINT8[comment_len] comment;
- */
-} Content_Description;
-/* author, copyright and comment are supposed to be ASCII */
-
-#define REAL_HEADER 0x2E524d46
-#define MDPR_HEADER 0x4D445052
-#define CONT_HEADER 0x434F4e54
-
-#define RAFF4_HEADER 0x2E7261FD
-
-
-static int
-processMediaProperties (const Media_Properties *prop,
- EXTRACTOR_MetaDataProcessor proc,
- void *proc_cls)
-{
-
- UINT8 mime_type_size;
- UINT32 prop_size;
-
- prop_size = ntohl (prop->size);
- if (prop_size <= sizeof (Media_Properties))
- return 0;
- if (0 != prop->object_version)
- return 0;
- if (prop_size <= prop->stream_name_size + sizeof (UINT8)
- + sizeof (Media_Properties))
- return 0;
-
- mime_type_size = prop->data[prop->stream_name_size];
- if (prop_size > prop->stream_name_size + sizeof (UINT8)
- + +mime_type_size + sizeof (Media_Properties))
- {
- char data[mime_type_size + 1];
- memcpy (data, &prop->data[prop->stream_name_size + 1], mime_type_size);
- data[mime_type_size] = '\0';
-
- return proc (proc_cls,
- "real",
- EXTRACTOR_METATYPE_MIMETYPE,
- EXTRACTOR_METAFORMAT_UTF8,
- "text/plain",
- data,
- strlen (data));
- }
- return 0;
-}
-
-
-static int
-processContentDescription (const Content_Description *prop,
- EXTRACTOR_MetaDataProcessor proc,
- void *proc_cls)
-{
- UINT16 author_len;
- UINT16 copyright_len;
- UINT16 comment_len;
- UINT16 title_len;
- char *title;
- char *author;
- char *copyright;
- char *comment;
- UINT32 prop_size;
- int ret;
-
- prop_size = ntohl (prop->size);
- if (prop_size <= sizeof (Content_Description))
- return 0;
- if (0 != prop->object_version)
- return 0;
- title_len = ntohs (prop->title_len);
- if (prop_size <= title_len + sizeof (UINT16) + sizeof (Content_Description))
- return 0;
- author_len = ntohs (*(UINT16 *) &prop->data[title_len]);
- if (prop_size <= title_len + sizeof (UINT16)
- + author_len + sizeof (Content_Description))
- return 0;
-
- copyright_len = ntohs (*(UINT16 *) &prop->data[title_len
- + author_len
- + sizeof (UINT16)]);
-
- if (prop_size <= title_len + 2 * sizeof (UINT16)
- + author_len + copyright_len + sizeof (Content_Description))
- return 0;
-
- comment_len = ntohs (*(UINT16 *) &prop->data[title_len
- + author_len
- + copyright_len
- + 2 * sizeof (UINT16)]);
-
- if (prop_size < title_len + 3 * sizeof (UINT16)
- + author_len + copyright_len + comment_len
- + sizeof (Content_Description))
- return 0;
-
- ret = 0;
- title = malloc (title_len + 1);
- if (title != NULL)
- {
- memcpy (title, &prop->data[0], title_len);
- title[title_len] = '\0';
- ret = proc (proc_cls,
- "real",
- EXTRACTOR_METATYPE_TITLE,
- EXTRACTOR_METAFORMAT_UTF8,
- "text/plain",
- title,
- strlen (title) + 1);
- free (title);
- }
- if (ret != 0)
- return ret;
-
- author = malloc (author_len + 1);
- if (author != NULL)
- {
- memcpy (author, &prop->data[title_len + sizeof (UINT16)], author_len);
- author[author_len] = '\0';
- ret = proc (proc_cls,
- "real",
- EXTRACTOR_METATYPE_AUTHOR_NAME,
- EXTRACTOR_METAFORMAT_UTF8,
- "text/plain",
- author,
- strlen (author) + 1);
- free (author);
- }
- if (ret != 0)
- return ret;
-
- copyright = malloc (copyright_len + 1);
- if (copyright != NULL)
- {
- memcpy (copyright,
- &prop->data[title_len + sizeof (UINT16) * 2 + author_len],
- copyright_len);
- copyright[copyright_len] = '\0';
- ret = proc (proc_cls,
- "real",
- EXTRACTOR_METATYPE_COPYRIGHT,
- EXTRACTOR_METAFORMAT_UTF8,
- "text/plain",
- copyright,
- strlen (copyright) + 1);
- free (copyright);
- }
- if (ret != 0)
- return ret;
-
- comment = malloc (comment_len + 1);
- if (comment != NULL)
- {
- memcpy (comment,
- &prop->data[title_len + sizeof (UINT16) * 3 + author_len
- + copyright_len], comment_len);
- comment[comment_len] = '\0';
- ret = proc (proc_cls,
- "real",
- EXTRACTOR_METATYPE_COMMENT,
- EXTRACTOR_METAFORMAT_UTF8,
- "text/plain",
- comment,
- strlen (comment) + 1);
- free (comment);
- }
- if (ret != 0)
- return ret;
- return 0;
-}
-
-
-typedef struct RAFF4_header
-{
- unsigned short version;
- unsigned short revision;
- unsigned short header_length;
- unsigned short compression_type;
- unsigned int granularity;
- unsigned int total_bytes;
- unsigned int bytes_per_minute;
- unsigned int bytes_per_minute2;
- unsigned short interleave_factor;
- unsigned short interleave_block_size;
- unsigned int user_data;
- float sample_rate;
- unsigned short sample_size;
- unsigned short channels;
- unsigned char interleave_code[5];
- unsigned char compression_code[5];
- unsigned char is_interleaved;
- unsigned char copy_byte;
- unsigned char stream_type;
- /*
- unsigned char tlen;
- unsigned char title[tlen];
- unsigned char alen;
- unsigned char author[alen];
- unsigned char clen;
- unsigned char copyright[clen];
- unsigned char aplen;
- unsigned char app[aplen]; */
-} RAFF4_header;
-
-#define RAFF4_HDR_SIZE 53
-
-static char *
-stndup (const char *str, size_t n)
-{
- char *tmp;
- tmp = malloc (n + 1);
- if (tmp == NULL)
- return NULL;
- tmp[n] = '\0';
- memcpy (tmp, str, n);
- return tmp;
-}
-
-
-/* audio/vnd.rn-realaudio */
-int
-EXTRACTOR_real_extract (const unsigned char *data,
- size_t size,
- EXTRACTOR_MetaDataProcessor proc,
- void *proc_cls,
- const char *options)
-{
- const unsigned char *pos;
- const unsigned char *end;
- unsigned int length;
- const RAFF4_header *hdr;
- unsigned char tlen;
- unsigned char alen;
- unsigned char clen;
- unsigned char aplen;
- char *x;
- int ret;
-
- if (size <= 2 * sizeof (int))
- return 0;
- if (RAFF4_HEADER == ntohl (*(int *) data))
- {
- /* HELIX */
- if (size <= RAFF4_HDR_SIZE + 16 + 4)
- return 0;
- if (0 != proc (proc_cls,
- "real",
- EXTRACTOR_METATYPE_MIMETYPE,
- EXTRACTOR_METAFORMAT_UTF8,
- "text/plain",
- "audio/vnd.rn-realaudio",
- strlen ("audio/vnd.rn-realaudio") + 1))
- return 1;
- hdr = (const RAFF4_header *) &data[16];
- if (ntohs (hdr->header_length) + 16 > size)
- return 0;
- tlen = data[16 + RAFF4_HDR_SIZE];
- if (tlen + RAFF4_HDR_SIZE + 20 > size)
- return 0;
- alen = data[17 + tlen + RAFF4_HDR_SIZE];
- if (tlen + alen + RAFF4_HDR_SIZE + 20 > size)
- return 0;
- clen = data[18 + tlen + alen + RAFF4_HDR_SIZE];
- if (tlen + alen + clen + RAFF4_HDR_SIZE + 20 > size)
- return 0;
- aplen = data[19 + tlen + clen + alen + RAFF4_HDR_SIZE];
- if (tlen + alen + clen + aplen + RAFF4_HDR_SIZE + 20 > size)
- return 0;
- ret = 0;
- if ( (tlen > 0) && (ret == 0) )
- {
- x = stndup ((const char *) &data[17 + RAFF4_HDR_SIZE], tlen);
- if (x != NULL)
- {
- ret = proc (proc_cls,
- "real",
- EXTRACTOR_METATYPE_MIMETYPE,
- EXTRACTOR_METAFORMAT_UTF8,
- "text/plain",
- x,
- strlen (x) + 1);
- free (x);
- }
- }
- if ( (alen > 0) && (ret == 0) )
- {
- x = stndup ((const char *) &data[18 + RAFF4_HDR_SIZE + tlen], alen);
- if (x != NULL)
- {
- ret = proc (proc_cls,
- "real",
- EXTRACTOR_METATYPE_MIMETYPE,
- EXTRACTOR_METAFORMAT_UTF8,
- "text/plain",
- x,
- strlen (x) + 1);
- free (x);
- }
- }
- if ( (clen > 0) && (ret == 0) )
- {
- x = stndup ((const char *) &data[19 + RAFF4_HDR_SIZE + tlen + alen],
- clen);
- if (x != NULL)
- {
- ret = proc (proc_cls,
- "real",
- EXTRACTOR_METATYPE_MIMETYPE,
- EXTRACTOR_METAFORMAT_UTF8,
- "text/plain",
- x,
- strlen (x) + 1);
- free (x);
- }
- }
- if ( (aplen > 0) && (ret == 0) )
- {
- x = stndup ((const char *) &data[20 + RAFF4_HDR_SIZE + tlen + alen
- + clen], aplen);
- if (x != NULL)
- {
- ret = proc (proc_cls,
- "real",
- EXTRACTOR_METATYPE_MIMETYPE,
- EXTRACTOR_METAFORMAT_UTF8,
- "text/plain",
- x,
- strlen (x) + 1);
- free (x);
- }
- }
- return ret;
- }
- if (REAL_HEADER == ntohl (*(int *) data))
- {
- /* old real */
- end = &data[size];
- pos = &data[0];
- ret = 0;
- while (0 == ret)
- {
- if ((pos + 8 >= end) || (pos + 8 < pos))
- break;
- length = ntohl (*(((unsigned int *) pos) + 1));
- if (length <= 0)
- break;
- if ((pos + length >= end) || (pos + length < pos))
- break;
- switch (ntohl (*((unsigned int *) pos)))
- {
- case MDPR_HEADER:
- ret = processMediaProperties ((Media_Properties *) pos,
- proc,
- proc_cls);
- pos += length;
- break;
- case CONT_HEADER:
- ret = processContentDescription ((Content_Description *) pos,
- proc,
- proc_cls);
- pos += length;
- break;
- case REAL_HEADER: /* treat like default */
- default:
- pos += length;
- break;
- }
- }
- return ret;
- }
- return 0;
-}
diff --git a/src/plugins/real_extractor.c b/src/plugins/real_extractor.c
new file mode 100644
index 0000000..9d77b28
--- /dev/null
+++ b/src/plugins/real_extractor.c
@@ -0,0 +1,579 @@
+/*
+ * This file is part of libextractor.
+ * Copyright (C) 2021 Christian Grothoff
+ *
+ * libextractor is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your
+ * option) any later version.
+ *
+ * libextractor is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with libextractor; see the file COPYING. If not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+/**
+ * @file plugins/real_extractor.c
+ * @brief plugin to support REAL files
+ * @author Christian Grothoff
+ */
+#include "platform.h"
+#include "extractor.h"
+
+struct MediaProperties
+{
+ uint32_t object_id;
+ uint32_t size;
+ uint16_t object_version; /* must be 0 */
+ uint16_t stream_number;
+ uint32_t max_bit_rate;
+ uint32_t avg_bit_rate;
+ uint32_t max_packet_size;
+ uint32_t avg_packet_size;
+ uint32_t start_time;
+ uint32_t preroll;
+ uint32_t duration;
+ uint8_t stream_name_size;
+ uint8_t data[0]; /* variable length section */
+ /*
+ uint8_t[stream_name_size] stream_name;
+ uint8_t mime_type_size;
+ uint8_t[mime_type_size] mime_type;
+ uint32_t type_specific_len;
+ uint8_t[type_specific_len] type_specific_data;
+ */
+};
+
+struct ContentDescription
+{
+ uint32_t object_id;
+ uint32_t size;
+ uint16_t object_version; /* must be 0 */
+ uint16_t title_len;
+ uint8_t data[0]; /* variable length section */
+ /*
+ uint8_t[title_len] title;
+ uint16_t author_len;
+ uint8_t[author_len] author;
+ uint16_t copyright_len;
+ uint8_t[copyright_len] copyright;
+ uint16_t comment_len;
+ uint8_t[comment_len] comment;
+ */
+};
+/* author, copyright and comment are supposed to be ASCII */
+
+
+#define REAL_HEADER 0x2E524d46
+#define MDPR_HEADER 0x4D445052
+#define CONT_HEADER 0x434F4e54
+#define RAFF4_HEADER 0x2E7261FD
+
+
+/**
+ * Give meta data to LE.
+ *
+ * @param s utf-8 string meta data value
+ * @param t type of the meta data
+ */
+#define ADD(s,t) do { \
+ if (0 != ec->proc (ec->cls, "real", t, \
+ EXTRACTOR_METAFORMAT_C_STRING, \
+ "text/plain", s, strlen (s) + 1)) \
+ { return; } \
+} while (0)
+
+
+static void
+processMediaProperties (const struct MediaProperties *prop,
+ struct EXTRACTOR_ExtractContext *ec)
+{
+ uint8_t mime_type_size;
+ uint32_t prop_size;
+
+ prop_size = ntohl (prop->size);
+ if (prop_size <= sizeof (struct MediaProperties))
+ return;
+ if (0 != prop->object_version)
+ return;
+ if (prop_size <= prop->stream_name_size + sizeof (uint8_t)
+ + sizeof (struct MediaProperties))
+ return;
+ mime_type_size = prop->data[prop->stream_name_size];
+ if (prop_size > prop->stream_name_size + sizeof (uint8_t)
+ + mime_type_size + sizeof (struct MediaProperties))
+ {
+ char data[mime_type_size + 1];
+
+ memcpy (data,
+ &prop->data[prop->stream_name_size + 1],
+ mime_type_size);
+ data[mime_type_size] = '\0';
+ ADD (data,
+ EXTRACTOR_METATYPE_MIMETYPE);
+ }
+}
+
+
+static void
+processContentDescription (const struct ContentDescription *prop,
+ struct EXTRACTOR_ExtractContext *ec)
+{
+ uint16_t author_len;
+ uint16_t copyright_len;
+ uint16_t comment_len;
+ uint16_t title_len;
+ uint32_t prop_size;
+
+ prop_size = ntohl (prop->size);
+ if (prop_size <= sizeof (struct ContentDescription))
+ return;
+ if (0 != prop->object_version)
+ return;
+ title_len = ntohs (prop->title_len);
+ if (prop_size <=
+ title_len
+ + sizeof (struct ContentDescription))
+ return;
+ if (title_len > 0)
+ {
+ char title[title_len + 1];
+
+ memcpy (title,
+ &prop->data[0],
+ title_len);
+ title[title_len] = '\0';
+ ADD (title,
+ EXTRACTOR_METATYPE_TITLE);
+ }
+ if (prop_size <=
+ title_len
+ + sizeof (uint16_t)
+ + sizeof (struct ContentDescription))
+ return;
+ author_len = ntohs (*(uint16_t *) &prop->data[title_len]);
+ if (prop_size <=
+ title_len
+ + sizeof (uint16_t)
+ + author_len
+ + sizeof (struct ContentDescription))
+ return;
+ if (author_len > 0)
+ {
+ char author[author_len + 1];
+
+ memcpy (author,
+ &prop->data[title_len
+ + sizeof (uint16_t)],
+ author_len);
+ author[author_len] = '\0';
+ ADD (author,
+ EXTRACTOR_METATYPE_AUTHOR_NAME);
+ }
+ if (prop_size <=
+ title_len
+ + sizeof (uint16_t)
+ + author_len
+ + sizeof (uint16_t)
+ + sizeof (struct ContentDescription))
+ return;
+ copyright_len = ntohs (*(uint16_t *) &prop->data[title_len
+ + author_len
+ + sizeof (uint16_t)]);
+ if (prop_size <=
+ title_len
+ + sizeof (uint16_t)
+ + author_len
+ + sizeof (uint16_t)
+ + copyright_len
+ + sizeof (struct ContentDescription))
+ return;
+ if (copyright_len > 0)
+ {
+ char copyright[copyright_len + 1];
+
+ memcpy (copyright,
+ &prop->data[title_len
+ + sizeof (uint16_t) * 2
+ + author_len],
+ copyright_len);
+ copyright[copyright_len] = '\0';
+ ADD (copyright,
+ EXTRACTOR_METATYPE_COPYRIGHT);
+ }
+
+ if (prop_size <=
+ title_len
+ + sizeof (uint16_t)
+ + author_len
+ + sizeof (uint16_t)
+ + copyright_len
+ + sizeof (uint16_t)
+ + sizeof (struct ContentDescription))
+ return;
+ comment_len = ntohs (*(uint16_t *) &prop->data[title_len
+ + author_len
+ + copyright_len
+ + 2 * sizeof (uint16_t)]);
+ if (prop_size <
+ title_len
+ + sizeof (uint16_t)
+ + author_len
+ + sizeof (uint16_t)
+ + copyright_len
+ + sizeof (uint16_t)
+ + comment_len
+ + sizeof (struct ContentDescription))
+ return;
+
+ if (comment_len > 0)
+ {
+ char comment[comment_len + 1];
+
+ memcpy (comment,
+ &prop->data[title_len
+ + sizeof (uint16_t) * 3
+ + author_len
+ + copyright_len],
+ comment_len);
+ comment[comment_len] = '\0';
+ ADD (comment,
+ EXTRACTOR_METATYPE_COMMENT);
+ }
+}
+
+
+struct RAFF_Header
+{
+ uint16_t version;
+};
+
+struct RAFF3_Header
+{
+ uint8_t unknown[10];
+ uint32_t data_size;
+ /*
+ uint8_t tlen;
+ uint8_t title[tlen];
+ uint8_t alen;
+ uint8_t author[alen];
+ uint8_t clen;
+ uint8_t copyright[clen];
+ uint8_t aplen;
+ uint8_t app[aplen]; */
+};
+
+
+#define RAFF3_HDR_SIZE 14
+
+
+struct RAFF4_Header
+{
+ uint16_t version;
+ uint16_t revision;
+ uint16_t header_length;
+ uint16_t compression_type;
+ uint32_t granularity;
+ uint32_t total_bytes;
+ uint32_t bytes_per_minute;
+ uint32_t bytes_per_minute2;
+ uint16_t interleave_factor;
+ uint16_t interleave_block_size;
+ uint32_t user_data;
+ float sample_rate;
+ uint16_t sample_size;
+ uint16_t channels;
+ uint8_t interleave_code[5];
+ uint8_t compression_code[5];
+ uint8_t is_interleaved;
+ uint8_t copy_byte;
+ uint8_t stream_type;
+ /*
+ uint8_t tlen;
+ uint8_t title[tlen];
+ uint8_t alen;
+ uint8_t author[alen];
+ uint8_t clen;
+ uint8_t copyright[clen];
+ uint8_t aplen;
+ uint8_t app[aplen]; */
+};
+
+#define RAFF4_HDR_SIZE 53
+
+
+static void
+extract_raff3 (struct EXTRACTOR_ExtractContext *ec,
+ const void *ptr,
+ size_t size)
+{
+ const uint8_t *data = ptr;
+ uint8_t tlen;
+ uint8_t alen;
+ uint8_t clen;
+ uint8_t aplen;
+
+ if (size <= RAFF3_HDR_SIZE + 8)
+ return;
+ tlen = data[8 + RAFF3_HDR_SIZE];
+ if (tlen + RAFF3_HDR_SIZE + 12 > size)
+ return;
+ if (tlen > 0)
+ {
+ char x[tlen + 1];
+
+ memcpy (x,
+ &data[9 + RAFF3_HDR_SIZE],
+ tlen);
+ x[tlen] = '\0';
+ ADD (x,
+ EXTRACTOR_METATYPE_TITLE);
+ }
+ alen = data[9 + tlen + RAFF3_HDR_SIZE];
+ if (tlen + alen + RAFF3_HDR_SIZE + 12 > size)
+ return;
+ if (alen > 0)
+ {
+ char x[alen + 1];
+
+ memcpy (x,
+ &data[10 + RAFF3_HDR_SIZE + tlen],
+ alen);
+ x[alen] = '\0';
+ ADD (x,
+ EXTRACTOR_METATYPE_AUTHOR_NAME);
+ }
+ clen = data[10 + tlen + alen + RAFF3_HDR_SIZE];
+ if (tlen + alen + clen + RAFF3_HDR_SIZE + 12 > size)
+ return;
+ if (clen > 0)
+ {
+ char x[clen + 1];
+
+ memcpy (x,
+ &data[11 + RAFF4_HDR_SIZE + tlen + alen],
+ clen);
+ x[clen] = '\0';
+ ADD (x,
+ EXTRACTOR_METATYPE_COPYRIGHT);
+ }
+ aplen = data[11 + tlen + clen + alen + RAFF3_HDR_SIZE];
+ if (tlen + alen + clen + aplen + RAFF3_HDR_SIZE + 12 > size)
+ return;
+ if (aplen > 0)
+ {
+ char x[aplen + 1];
+
+ memcpy (x,
+ &data[12 + RAFF4_HDR_SIZE + tlen + alen + clen],
+ aplen);
+ x[aplen] = '\0';
+ ADD (x,
+ EXTRACTOR_METATYPE_UNKNOWN);
+ }
+}
+
+
+static void
+extract_raff4 (struct EXTRACTOR_ExtractContext *ec,
+ const void *ptr,
+ size_t size)
+{
+ const uint8_t *data = ptr;
+ uint8_t tlen;
+ uint8_t alen;
+ uint8_t clen;
+ uint8_t aplen;
+
+ if (size <= RAFF4_HDR_SIZE + 16 + 4)
+ return;
+ tlen = data[16 + RAFF4_HDR_SIZE];
+ if (tlen + RAFF4_HDR_SIZE + 20 > size)
+ return;
+ alen = data[17 + tlen + RAFF4_HDR_SIZE];
+ if (tlen + alen + RAFF4_HDR_SIZE + 20 > size)
+ return;
+ clen = data[18 + tlen + alen + RAFF4_HDR_SIZE];
+ if (tlen + alen + clen + RAFF4_HDR_SIZE + 20 > size)
+ return;
+ aplen = data[19 + tlen + clen + alen + RAFF4_HDR_SIZE];
+ if (tlen + alen + clen + aplen + RAFF4_HDR_SIZE + 20 > size)
+ return;
+ if (tlen > 0)
+ {
+ char x[tlen + 1];
+
+ memcpy (x,
+ &data[17 + RAFF4_HDR_SIZE],
+ tlen);
+ x[tlen] = '\0';
+ ADD (x,
+ EXTRACTOR_METATYPE_TITLE);
+ }
+ if (alen > 0)
+ {
+ char x[alen + 1];
+
+ memcpy (x,
+ &data[18 + RAFF4_HDR_SIZE + tlen],
+ alen);
+ x[alen] = '\0';
+ ADD (x,
+ EXTRACTOR_METATYPE_AUTHOR_NAME);
+ }
+ if (clen > 0)
+ {
+ char x[clen + 1];
+
+ memcpy (x,
+ &data[19 + RAFF4_HDR_SIZE + tlen + alen],
+ clen);
+ x[clen] = '\0';
+ ADD (x,
+ EXTRACTOR_METATYPE_COPYRIGHT);
+ }
+ if (aplen > 0)
+ {
+ char x[aplen + 1];
+
+ memcpy (x,
+ &data[20 + RAFF4_HDR_SIZE + tlen + alen + clen],
+ aplen);
+ x[aplen] = '\0';
+ ADD (x,
+ EXTRACTOR_METATYPE_UNKNOWN);
+ }
+}
+
+
+static void
+extract_raff (struct EXTRACTOR_ExtractContext *ec,
+ const void *ptr,
+ size_t size)
+{
+ const uint8_t *data = ptr;
+ const struct RAFF_Header *hdr;
+
+ /* HELIX */
+ if (size <= sizeof (*hdr) + 4)
+ return;
+ ADD ("audio/vnd.rn-realaudio",
+ EXTRACTOR_METATYPE_MIMETYPE);
+ hdr = (const struct RAFF_Header *) &data[4];
+ switch (ntohs (hdr->version))
+ {
+ case 3:
+ extract_raff3 (ec,
+ ptr,
+ size);
+ break;
+ case 4:
+ extract_raff4 (ec,
+ ptr,
+ size);
+ break;
+ }
+}
+
+
+/* old real format */
+static void
+extract_real (struct EXTRACTOR_ExtractContext *ec,
+ const void *data,
+ size_t size)
+{
+ uint64_t off = 0;
+ size_t pos = 0;
+
+ while (1)
+ {
+ uint32_t length;
+
+ if ( (pos + 8 > size) ||
+ (pos + 8 < pos) ||
+ (pos + (length = ntohl (((uint32_t *) (data + pos))[1])) > size) )
+ {
+ uint64_t noff;
+ void *in;
+ ssize_t isize;
+
+ noff = ec->seek (ec->cls,
+ off + pos,
+ SEEK_SET);
+ if (-1 == noff)
+ return;
+ isize = ec->read (ec->cls,
+ &in,
+ 32 * 1024);
+ if (isize < 8)
+ return;
+ data = in;
+ size = isize;
+ off = noff;
+ pos = 0;
+ }
+ if (length <= 8)
+ return;
+ if ( (pos + length > size) ||
+ (pos + length < pos) )
+ return;
+ switch (ntohl (((uint32_t *) (data + pos))[0]))
+ {
+ case MDPR_HEADER:
+ processMediaProperties (data + pos,
+ ec);
+ pos += length;
+ break;
+ case CONT_HEADER:
+ processContentDescription (data + pos,
+ ec);
+ pos += length;
+ break;
+ case REAL_HEADER: /* treat like default */
+ default:
+ pos += length;
+ break;
+ }
+ }
+}
+
+
+/**
+ * "extract" metadata from a REAL file
+ *
+ * @param ec extraction context
+ */
+void
+EXTRACTOR_real_extract_method (struct EXTRACTOR_ExtractContext *ec)
+{
+ void *data;
+ size_t n;
+
+ n = ec->read (ec->cls,
+ &data,
+ sizeof (struct RAFF4_Header) + 4 * 256);
+ if (n < sizeof (uint32_t))
+ return;
+ switch (ntohl (*(uint32_t *) data))
+ {
+ case RAFF4_HEADER:
+ extract_raff (ec,
+ data,
+ n);
+ break;
+ case REAL_HEADER:
+ extract_real (ec,
+ data,
+ n);
+ break;
+ }
+}
+
+
+/* end of real_extractor.c */
diff --git a/src/plugins/test_real.c b/src/plugins/test_real.c
new file mode 100644
index 0000000..98e2af4
--- /dev/null
+++ b/src/plugins/test_real.c
@@ -0,0 +1,104 @@
+/*
+ This file is part of libextractor.
+ Copyright (C) 2012 Vidyut Samanta and Christian Grothoff
+
+ libextractor is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3, or (at your
+ option) any later version.
+
+ libextractor is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with libextractor; see the file COPYING. If not, write to the
+ Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ Boston, MA 02110-1301, USA.
+*/
+/**
+ * @file plugins/test_real.c
+ * @brief testcase for real plugin
+ * @author Christian Grothoff
+ */
+#include "platform.h"
+#include "test_lib.h"
+
+
+/**
+ * Main function for the REAL testcase.
+ *
+ * @param argc number of arguments (ignored)
+ * @param argv arguments (ignored)
+ * @return 0 on success
+ */
+int
+main (int argc, char *argv[])
+{
+ struct SolutionData real_audiosig_sol[] = {
+ {
+ EXTRACTOR_METATYPE_MIMETYPE,
+ EXTRACTOR_METAFORMAT_C_STRING,
+ "text/plain",
+ "audio/x-pn-realaudio",
+ strlen ("audio/x-pn-realaudio") + 1,
+ 0
+ },
+ {
+ EXTRACTOR_METATYPE_TITLE,
+ EXTRACTOR_METAFORMAT_C_STRING,
+ "text/plain",
+ "Welcome!",
+ strlen ("Welcome!") + 1,
+ 0
+ },
+ {
+ EXTRACTOR_METATYPE_COPYRIGHT,
+ EXTRACTOR_METAFORMAT_C_STRING,
+ "text/plain",
+ "1998, RealNetworks, Inc.",
+ strlen ("1998, RealNetworks, Inc.") + 1,
+ 0
+ },
+ { 0, 0, NULL, NULL, 0, -1 }
+ };
+ struct SolutionData real_ra3_sol[] = {
+ {
+ EXTRACTOR_METATYPE_MIMETYPE,
+ EXTRACTOR_METAFORMAT_C_STRING,
+ "text/plain",
+ "audio/vnd.rn-realaudio",
+ strlen ("audio/vnd.rn-realaudio") + 1,
+ 0
+ },
+ {
+ EXTRACTOR_METATYPE_TITLE,
+ EXTRACTOR_METAFORMAT_C_STRING,
+ "text/plain",
+ "Song of Welcome",
+ strlen ("Song of Welcome") + 1,
+ 0
+ },
+ {
+ EXTRACTOR_METATYPE_AUTHOR_NAME,
+ EXTRACTOR_METAFORMAT_C_STRING,
+ "text/plain",
+ "Investiture Service",
+ strlen ("Investiture Service") + 1,
+ 0
+ },
+ { 0, 0, NULL, NULL, 0, -1 }
+ };
+ struct ProblemSet ps[] = {
+ { "testdata/audiosig.rm",
+ real_audiosig_sol },
+ { "testdata/ra3.ra",
+ real_ra3_sol },
+ { NULL, NULL }
+ };
+ return ET_main ("real", ps);
+}
+
+
+/* end of test_real.c */
diff --git a/src/plugins/testdata/audiosig.rm b/src/plugins/testdata/audiosig.rm
new file mode 100644
index 0000000..6307d30
--- /dev/null
+++ b/src/plugins/testdata/audiosig.rm
Binary files differ
diff --git a/src/plugins/testdata/ra3.ra b/src/plugins/testdata/ra3.ra
new file mode 100644
index 0000000..d36569f
--- /dev/null
+++ b/src/plugins/testdata/ra3.ra
Binary files differ
diff --git a/src/plugins/vlc_extractor.c b/src/plugins/vlc_extractor.c
new file mode 100644
index 0000000..e90b3ea
--- /dev/null
+++ b/src/plugins/vlc_extractor.c
@@ -0,0 +1,334 @@
+/*
+ This file is part of libextractor.
+ Copyright (C) 2021 Christian Grothoff
+
+ libextractor is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3, or (at your
+ option) any later version.
+
+ libextractor is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with libextractor; see the file COPYING. If not, write to the
+ Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ Boston, MA 02110-1301, USA.
+
+NOTE: This plugin is not yet working. Somehow libvlc never calls any of the IO callbacks.
+
+*/
+/**
+ * @file plugins/vlc_extractor.c
+ * @brief plugin to extract metadata using libvlc
+ * @author Christian Grothoff
+ */
+#include "platform.h"
+#include "extractor.h"
+#include <vlc/vlc.h>
+#include <signal.h>
+
+/**
+ * Function to help VLC open a custom bitstream input media.
+ *
+ * The same media item can be opened multiple times. Each time, this callback
+ * is invoked. It should allocate and initialize any instance-specific
+ * resources, then store them in *datap. The instance resources can be freed
+ * in the @ref libvlc_media_close_cb callback.
+ *
+ * @param opaque our `struct EXTRACTOR_ExtractContext`
+ * @param[out] datap storage space for a private data pointer
+ * @param[out] sizep byte length of the bitstream or UINT64_MAX if unknown
+ *
+ * @note For convenience, *datap is initially NULL and *sizep is initially 0.
+ *
+ * @return 0 on success, non-zero on error. In case of failure, the other
+ * callbacks will not be invoked and any value stored in *datap and *sizep is
+ * discarded.
+ */
+static int
+open_cb (void *opaque,
+ void **datap,
+ uint64_t *sizep)
+{
+ struct EXTRACTOR_ExtractContext *ec = opaque;
+
+ *datap = ec;
+ *sizep = ec->get_size (ec->cls);
+ if (UINT64_MAX == *sizep)
+ {
+ fprintf (stderr,
+ "Open failed!\n");
+ return 1;
+ }
+ fprintf (stderr,
+ "Open returns %llu file size!\n",
+ (unsigned long long) *sizep);
+ return 0;
+}
+
+
+/**
+ * Function to help VLC read data from a custom bitstream input media.
+ *
+ * @param opaque our `struct EXTRACTOR_ExtractContext`
+ * @param buf start address of the buffer to read data into
+ * @param len bytes length of the buffer
+ * @return strictly positive number of bytes read, 0 on end-of-stream,
+ * or -1 on non-recoverable error
+ *
+ * @note If no data is immediately available, then the callback should sleep.
+ * @warning The application is responsible for avoiding deadlock situations.
+ * In particular, the callback should return an error if playback is stopped;
+ * if it does not return, then libvlc_media_player_stop() will never return.
+ */
+static ssize_t
+read_cb (void *opaque,
+ unsigned char *buf,
+ size_t len)
+{
+ struct EXTRACTOR_ExtractContext *ec = opaque;
+ void *data;
+ ssize_t ret;
+
+ ret = ec->read (ec->cls,
+ &data,
+ len);
+ if (-1 == ret)
+ {
+ fprintf (stderr,
+ "Read failed!\n");
+ return -1;
+ }
+ memcpy (buf,
+ data,
+ ret);
+ fprintf (stderr,
+ "Read %u bytes!\n",
+ (unsigned int) ret);
+ return ret;
+}
+
+
+/**
+ * Allow VLC to seek a custom bitstream input media.
+ *
+ * @param opaque our `struct EXTRACTOR_ExtractContext`
+ * @param offset absolute byte offset to seek to
+ * @return 0 on success, -1 on error.
+ */
+static int
+seek_cb (void *opaque,
+ uint64_t offset)
+{
+ struct EXTRACTOR_ExtractContext *ec = opaque;
+
+ fprintf (stderr,
+ "Seek to %llu!\n",
+ (unsigned long long) offset);
+ if (offset > INT64_MAX)
+ {
+ fprintf (stderr,
+ "Excessive seek, impossible with LE!\n");
+ return -1;
+ }
+ if (-1 ==
+ ec->seek (ec->cls,
+ offset,
+ SEEK_SET))
+ {
+ fprintf (stderr,
+ "Seek failed!\n");
+ return -1;
+ }
+ return 0;
+}
+
+
+/**
+ * Callback prototype to close a custom bitstream input media.
+ *
+ * @param opaque our `struct EXTRACTOR_ExtractContext`
+ */
+static void
+close_cb (void *opaque)
+{
+ /* intentionally empty */
+ fprintf (stderr,
+ "Close called\n");
+}
+
+
+static void
+extract (struct EXTRACTOR_ExtractContext *ec,
+ libvlc_media_t *media)
+{
+ struct
+ {
+ enum libvlc_meta_t vt;
+ enum EXTRACTOR_MetaType mt;
+ } map[] = {
+ { libvlc_meta_Title,
+ EXTRACTOR_METATYPE_TITLE },
+ { libvlc_meta_Artist,
+ EXTRACTOR_METATYPE_ARTIST },
+ { libvlc_meta_Genre,
+ EXTRACTOR_METATYPE_GENRE },
+ { libvlc_meta_Copyright,
+ EXTRACTOR_METATYPE_COPYRIGHT },
+ { libvlc_meta_Album,
+ EXTRACTOR_METATYPE_ALBUM },
+ { libvlc_meta_TrackNumber,
+ EXTRACTOR_METATYPE_TRACK_NUMBER },
+ { libvlc_meta_Description,
+ EXTRACTOR_METATYPE_DESCRIPTION },
+ { libvlc_meta_Rating,
+ EXTRACTOR_METATYPE_RATING },
+ { libvlc_meta_Date,
+ EXTRACTOR_METATYPE_CREATION_TIME },
+ { libvlc_meta_Setting,
+ EXTRACTOR_METATYPE_UNKNOWN },
+ { libvlc_meta_URL,
+ EXTRACTOR_METATYPE_URL },
+ { libvlc_meta_Language,
+ EXTRACTOR_METATYPE_LANGUAGE },
+ { libvlc_meta_NowPlaying,
+ EXTRACTOR_METATYPE_UNKNOWN },
+ { libvlc_meta_Publisher,
+ EXTRACTOR_METATYPE_PUBLISHER },
+ { libvlc_meta_EncodedBy,
+ EXTRACTOR_METATYPE_ENCODED_BY },
+ { libvlc_meta_ArtworkURL,
+ EXTRACTOR_METATYPE_URL },
+ { libvlc_meta_TrackID,
+ EXTRACTOR_METATYPE_TRACK_NUMBER },
+ { libvlc_meta_TrackTotal,
+ EXTRACTOR_METATYPE_UNKNOWN },
+ { libvlc_meta_Director,
+ EXTRACTOR_METATYPE_MOVIE_DIRECTOR },
+ { libvlc_meta_Season,
+ EXTRACTOR_METATYPE_SHOW_SEASON_NUMBER },
+ { libvlc_meta_Episode,
+ EXTRACTOR_METATYPE_SHOW_EPISODE_NUMBER },
+ { libvlc_meta_ShowName,
+ EXTRACTOR_METATYPE_SHOW_NAME },
+ { libvlc_meta_Actors,
+ EXTRACTOR_METATYPE_PERFORMER },
+ { libvlc_meta_AlbumArtist,
+ EXTRACTOR_METATYPE_ARTIST },
+ { libvlc_meta_DiscNumber,
+ EXTRACTOR_METATYPE_DISC_NUMBER },
+ { libvlc_meta_DiscTotal,
+ EXTRACTOR_METATYPE_UNKNOWN },
+ { 0, 0 }
+ };
+
+ for (unsigned int i = 0;
+ EXTRACTOR_METATYPE_RESERVED != map[i].mt;
+ i++)
+ {
+ char *meta;
+
+ fprintf (stderr,
+ ".");
+ meta = libvlc_media_get_meta (media,
+ map[i].vt);
+ if (NULL == meta)
+ continue;
+ ec->proc (ec->cls,
+ "vlc",
+ map[i].mt,
+ EXTRACTOR_METAFORMAT_UTF8, /* ??? */
+ "text/plain",
+ meta,
+ strlen (meta) + 1);
+ free (meta);
+ }
+}
+
+
+static void
+media_ready (const struct libvlc_event_t *p_event,
+ void *p_data)
+{
+ fprintf (stderr,
+ "media status: %d, %d\n",
+ p_event->type == libvlc_MediaParsedChanged,
+ p_event->u.media_parsed_changed.new_status);
+ if (p_event->u.media_parsed_changed.new_status ==
+ libvlc_media_parsed_status_done)
+ {
+ fprintf (stderr,
+ "media ready\n");
+ }
+}
+
+
+/**
+ * Extract information using libvlc
+ *
+ * @param ec extraction context
+ */
+void
+EXTRACTOR_vlc_extract_method (struct EXTRACTOR_ExtractContext *ec)
+{
+ libvlc_instance_t *vlc;
+ libvlc_media_t *media;
+ libvlc_event_manager_t *em;
+
+ {
+ sigset_t set;
+
+ signal (SIGCHLD, SIG_DFL);
+ sigemptyset (&set);
+ sigaddset (&set, SIGPIPE);
+ pthread_sigmask (SIG_BLOCK, &set, NULL);
+ }
+
+ vlc = libvlc_new (0, NULL);
+ if (NULL == vlc)
+ return;
+ media = libvlc_media_new_callbacks (vlc,
+ &open_cb,
+ &read_cb,
+ &seek_cb,
+ &close_cb,
+ ec);
+ if (NULL == media)
+ {
+ libvlc_release (vlc);
+ return;
+ }
+
+ em = libvlc_media_event_manager (media);
+ libvlc_event_attach (em,
+ libvlc_MediaParsedChanged,
+ &media_ready,
+ ec);
+ fprintf (stderr,
+ "Triggering parser\n");
+ {
+ int status;
+
+ status = libvlc_media_parse_with_options (media,
+ libvlc_media_fetch_local
+ | libvlc_media_parse_network
+ | libvlc_media_fetch_network,
+ 30000); /* 30s timeout */
+ fprintf (stderr,
+ "Status: %d\n",
+ status);
+ }
+ fprintf (stderr,
+ "Sleeping\n");
+ sleep (1);
+ extract (ec,
+ media);
+ libvlc_media_release (media);
+ libvlc_release (vlc);
+}
+
+
+/* end of vlc_extractor.c */