From d40016f1e8b4578b294cfa09a59f43000c427643 Mon Sep 17 00:00:00 2001 From: Christian Grothoff Date: Sun, 2 May 2021 22:31:07 +0200 Subject: resolve #2518 --- .gitignore | 1 + ChangeLog | 3 + src/plugins/Makefile.am | 28 ++ src/plugins/old/real_extractor.c | 439 ----------------------------- src/plugins/real_extractor.c | 579 +++++++++++++++++++++++++++++++++++++++ src/plugins/test_real.c | 104 +++++++ src/plugins/testdata/audiosig.rm | Bin 0 -> 9616 bytes src/plugins/testdata/ra3.ra | Bin 0 -> 1066 bytes src/plugins/vlc_extractor.c | 334 ++++++++++++++++++++++ 9 files changed, 1049 insertions(+), 439 deletions(-) delete mode 100644 src/plugins/old/real_extractor.c create mode 100644 src/plugins/real_extractor.c create mode 100644 src/plugins/test_real.c create mode 100644 src/plugins/testdata/audiosig.rm create mode 100644 src/plugins/testdata/ra3.ra create mode 100644 src/plugins/vlc_extractor.c diff --git a/.gitignore b/.gitignore index 073b4ef..d0c5a1b 100644 --- a/.gitignore +++ b/.gitignore @@ -112,6 +112,7 @@ src/plugins/test_ogg src/plugins/test_ole2 src/plugins/test_png src/plugins/test_ps +src/plugins/test_real src/plugins/test_riff src/plugins/test_rpm src/plugins/test_s3m diff --git a/ChangeLog b/ChangeLog index 1e68017..6cd0e63 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,6 @@ +Sun 02 May 2021 10:30:33 PM CEST + Revive REAL plugin (fixes #2518). -CG + Sat 01 May 2021 10:57:55 PM CEST Revive ELF plugin (fixes #2516). -CG diff --git a/src/plugins/Makefile.am b/src/plugins/Makefile.am index 8cbe21a..58b0590 100644 --- a/src/plugins/Makefile.am +++ b/src/plugins/Makefile.am @@ -23,7 +23,9 @@ SUBDIRS = . EXTRA_DIST = \ fuzz_default.sh \ template_extractor.c \ + testdata/audiosig.rm \ testdata/archive_test.tar \ + testdata/chello-elf \ testdata/deb_bzip2.deb \ testdata/dvi_ora.dvi \ testdata/exiv2_iptc.jpg \ @@ -51,6 +53,7 @@ EXTRA_DIST = \ testdata/png_image.png \ testdata/ps_bloomfilter.ps \ testdata/ps_wallace.ps \ + testdata/ra3.ra \ testdata/riff_flame.avi \ testdata/rpm_test.rpm \ testdata/s3m_2nd_pm.s3m \ @@ -171,6 +174,7 @@ plugin_LTLIBRARIES = \ libextractor_nsf.la \ libextractor_nsfe.la \ libextractor_ps.la \ + libextractor_real.la \ libextractor_riff.la \ libextractor_s3m.la \ libextractor_sid.la \ @@ -209,6 +213,7 @@ check_PROGRAMS = \ test_odf \ test_ps \ test_png \ + test_real \ test_riff \ test_s3m \ test_sid \ @@ -562,6 +567,20 @@ test_ps_LDADD = \ $(top_builddir)/src/plugins/libtest.la +libextractor_real_la_SOURCES = \ + real_extractor.c +libextractor_real_la_LDFLAGS = \ + $(PLUGINFLAGS) +libextractor_real_la_LIBADD = \ + -lm \ + $(XLIB) \ + $(LE_LIBINTL) + +test_real_SOURCES = \ + test_real.c +test_real_LDADD = \ + $(top_builddir)/src/plugins/libtest.la + libextractor_riff_la_SOURCES = \ riff_extractor.c libextractor_riff_la_LDFLAGS = \ @@ -643,6 +662,15 @@ test_tiff_LDADD = \ $(top_builddir)/src/plugins/libtest.la +libextractor_vlc_la_SOURCES = \ + vlc_extractor.c +libextractor_vlc_la_LDFLAGS = \ + $(PLUGINFLAGS) +libextractor_vlc_la_LIBADD = \ + -lvlc \ + $(XLIB) + + libextractor_wav_la_SOURCES = \ wav_extractor.c libextractor_wav_la_LDFLAGS = \ diff --git a/src/plugins/old/real_extractor.c b/src/plugins/old/real_extractor.c deleted file mode 100644 index cfac031..0000000 --- a/src/plugins/old/real_extractor.c +++ /dev/null @@ -1,439 +0,0 @@ -/* - This file is part of libextractor. - Copyright (C) 2002, 2003, 2009 Vidyut Samanta and Christian Grothoff - - libextractor is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 2, or (at your - option) any later version. - - libextractor is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with libextractor; see the file COPYING. If not, write to the - Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - Boston, MA 02110-1301, USA. - */ - -#include "platform.h" -#include "extractor.h" -#include - -#define UINT32 uint32_t -#define UINT16 uint16_t -#define UINT8 uint8_t - -typedef struct -{ - UINT32 object_id; - UINT32 size; - UINT16 object_version; /* must be 0 */ - UINT16 stream_number; - UINT32 max_bit_rate; - UINT32 avg_bit_rate; - UINT32 max_packet_size; - UINT32 avg_packet_size; - UINT32 start_time; - UINT32 preroll; - UINT32 duration; - UINT8 stream_name_size; - UINT8 data[0]; /* variable length section */ - /* - UINT8[stream_name_size] stream_name; - UINT8 mime_type_size; - UINT8[mime_type_size] mime_type; - UINT32 type_specific_len; - UINT8[type_specific_len] type_specific_data; - */ -} Media_Properties; - -typedef struct -{ - UINT32 object_id; - UINT32 size; - UINT16 object_version; /* must be 0 */ - UINT16 title_len; - UINT8 data[0]; /* variable length section */ - /* - UINT8[title_len] title; - UINT16 author_len; - UINT8[author_len] author; - UINT16 copyright_len; - UINT8[copyright_len] copyright; - UINT16 comment_len; - UINT8[comment_len] comment; - */ -} Content_Description; -/* author, copyright and comment are supposed to be ASCII */ - -#define REAL_HEADER 0x2E524d46 -#define MDPR_HEADER 0x4D445052 -#define CONT_HEADER 0x434F4e54 - -#define RAFF4_HEADER 0x2E7261FD - - -static int -processMediaProperties (const Media_Properties *prop, - EXTRACTOR_MetaDataProcessor proc, - void *proc_cls) -{ - - UINT8 mime_type_size; - UINT32 prop_size; - - prop_size = ntohl (prop->size); - if (prop_size <= sizeof (Media_Properties)) - return 0; - if (0 != prop->object_version) - return 0; - if (prop_size <= prop->stream_name_size + sizeof (UINT8) - + sizeof (Media_Properties)) - return 0; - - mime_type_size = prop->data[prop->stream_name_size]; - if (prop_size > prop->stream_name_size + sizeof (UINT8) - + +mime_type_size + sizeof (Media_Properties)) - { - char data[mime_type_size + 1]; - memcpy (data, &prop->data[prop->stream_name_size + 1], mime_type_size); - data[mime_type_size] = '\0'; - - return proc (proc_cls, - "real", - EXTRACTOR_METATYPE_MIMETYPE, - EXTRACTOR_METAFORMAT_UTF8, - "text/plain", - data, - strlen (data)); - } - return 0; -} - - -static int -processContentDescription (const Content_Description *prop, - EXTRACTOR_MetaDataProcessor proc, - void *proc_cls) -{ - UINT16 author_len; - UINT16 copyright_len; - UINT16 comment_len; - UINT16 title_len; - char *title; - char *author; - char *copyright; - char *comment; - UINT32 prop_size; - int ret; - - prop_size = ntohl (prop->size); - if (prop_size <= sizeof (Content_Description)) - return 0; - if (0 != prop->object_version) - return 0; - title_len = ntohs (prop->title_len); - if (prop_size <= title_len + sizeof (UINT16) + sizeof (Content_Description)) - return 0; - author_len = ntohs (*(UINT16 *) &prop->data[title_len]); - if (prop_size <= title_len + sizeof (UINT16) - + author_len + sizeof (Content_Description)) - return 0; - - copyright_len = ntohs (*(UINT16 *) &prop->data[title_len - + author_len - + sizeof (UINT16)]); - - if (prop_size <= title_len + 2 * sizeof (UINT16) - + author_len + copyright_len + sizeof (Content_Description)) - return 0; - - comment_len = ntohs (*(UINT16 *) &prop->data[title_len - + author_len - + copyright_len - + 2 * sizeof (UINT16)]); - - if (prop_size < title_len + 3 * sizeof (UINT16) - + author_len + copyright_len + comment_len - + sizeof (Content_Description)) - return 0; - - ret = 0; - title = malloc (title_len + 1); - if (title != NULL) - { - memcpy (title, &prop->data[0], title_len); - title[title_len] = '\0'; - ret = proc (proc_cls, - "real", - EXTRACTOR_METATYPE_TITLE, - EXTRACTOR_METAFORMAT_UTF8, - "text/plain", - title, - strlen (title) + 1); - free (title); - } - if (ret != 0) - return ret; - - author = malloc (author_len + 1); - if (author != NULL) - { - memcpy (author, &prop->data[title_len + sizeof (UINT16)], author_len); - author[author_len] = '\0'; - ret = proc (proc_cls, - "real", - EXTRACTOR_METATYPE_AUTHOR_NAME, - EXTRACTOR_METAFORMAT_UTF8, - "text/plain", - author, - strlen (author) + 1); - free (author); - } - if (ret != 0) - return ret; - - copyright = malloc (copyright_len + 1); - if (copyright != NULL) - { - memcpy (copyright, - &prop->data[title_len + sizeof (UINT16) * 2 + author_len], - copyright_len); - copyright[copyright_len] = '\0'; - ret = proc (proc_cls, - "real", - EXTRACTOR_METATYPE_COPYRIGHT, - EXTRACTOR_METAFORMAT_UTF8, - "text/plain", - copyright, - strlen (copyright) + 1); - free (copyright); - } - if (ret != 0) - return ret; - - comment = malloc (comment_len + 1); - if (comment != NULL) - { - memcpy (comment, - &prop->data[title_len + sizeof (UINT16) * 3 + author_len - + copyright_len], comment_len); - comment[comment_len] = '\0'; - ret = proc (proc_cls, - "real", - EXTRACTOR_METATYPE_COMMENT, - EXTRACTOR_METAFORMAT_UTF8, - "text/plain", - comment, - strlen (comment) + 1); - free (comment); - } - if (ret != 0) - return ret; - return 0; -} - - -typedef struct RAFF4_header -{ - unsigned short version; - unsigned short revision; - unsigned short header_length; - unsigned short compression_type; - unsigned int granularity; - unsigned int total_bytes; - unsigned int bytes_per_minute; - unsigned int bytes_per_minute2; - unsigned short interleave_factor; - unsigned short interleave_block_size; - unsigned int user_data; - float sample_rate; - unsigned short sample_size; - unsigned short channels; - unsigned char interleave_code[5]; - unsigned char compression_code[5]; - unsigned char is_interleaved; - unsigned char copy_byte; - unsigned char stream_type; - /* - unsigned char tlen; - unsigned char title[tlen]; - unsigned char alen; - unsigned char author[alen]; - unsigned char clen; - unsigned char copyright[clen]; - unsigned char aplen; - unsigned char app[aplen]; */ -} RAFF4_header; - -#define RAFF4_HDR_SIZE 53 - -static char * -stndup (const char *str, size_t n) -{ - char *tmp; - tmp = malloc (n + 1); - if (tmp == NULL) - return NULL; - tmp[n] = '\0'; - memcpy (tmp, str, n); - return tmp; -} - - -/* audio/vnd.rn-realaudio */ -int -EXTRACTOR_real_extract (const unsigned char *data, - size_t size, - EXTRACTOR_MetaDataProcessor proc, - void *proc_cls, - const char *options) -{ - const unsigned char *pos; - const unsigned char *end; - unsigned int length; - const RAFF4_header *hdr; - unsigned char tlen; - unsigned char alen; - unsigned char clen; - unsigned char aplen; - char *x; - int ret; - - if (size <= 2 * sizeof (int)) - return 0; - if (RAFF4_HEADER == ntohl (*(int *) data)) - { - /* HELIX */ - if (size <= RAFF4_HDR_SIZE + 16 + 4) - return 0; - if (0 != proc (proc_cls, - "real", - EXTRACTOR_METATYPE_MIMETYPE, - EXTRACTOR_METAFORMAT_UTF8, - "text/plain", - "audio/vnd.rn-realaudio", - strlen ("audio/vnd.rn-realaudio") + 1)) - return 1; - hdr = (const RAFF4_header *) &data[16]; - if (ntohs (hdr->header_length) + 16 > size) - return 0; - tlen = data[16 + RAFF4_HDR_SIZE]; - if (tlen + RAFF4_HDR_SIZE + 20 > size) - return 0; - alen = data[17 + tlen + RAFF4_HDR_SIZE]; - if (tlen + alen + RAFF4_HDR_SIZE + 20 > size) - return 0; - clen = data[18 + tlen + alen + RAFF4_HDR_SIZE]; - if (tlen + alen + clen + RAFF4_HDR_SIZE + 20 > size) - return 0; - aplen = data[19 + tlen + clen + alen + RAFF4_HDR_SIZE]; - if (tlen + alen + clen + aplen + RAFF4_HDR_SIZE + 20 > size) - return 0; - ret = 0; - if ( (tlen > 0) && (ret == 0) ) - { - x = stndup ((const char *) &data[17 + RAFF4_HDR_SIZE], tlen); - if (x != NULL) - { - ret = proc (proc_cls, - "real", - EXTRACTOR_METATYPE_MIMETYPE, - EXTRACTOR_METAFORMAT_UTF8, - "text/plain", - x, - strlen (x) + 1); - free (x); - } - } - if ( (alen > 0) && (ret == 0) ) - { - x = stndup ((const char *) &data[18 + RAFF4_HDR_SIZE + tlen], alen); - if (x != NULL) - { - ret = proc (proc_cls, - "real", - EXTRACTOR_METATYPE_MIMETYPE, - EXTRACTOR_METAFORMAT_UTF8, - "text/plain", - x, - strlen (x) + 1); - free (x); - } - } - if ( (clen > 0) && (ret == 0) ) - { - x = stndup ((const char *) &data[19 + RAFF4_HDR_SIZE + tlen + alen], - clen); - if (x != NULL) - { - ret = proc (proc_cls, - "real", - EXTRACTOR_METATYPE_MIMETYPE, - EXTRACTOR_METAFORMAT_UTF8, - "text/plain", - x, - strlen (x) + 1); - free (x); - } - } - if ( (aplen > 0) && (ret == 0) ) - { - x = stndup ((const char *) &data[20 + RAFF4_HDR_SIZE + tlen + alen - + clen], aplen); - if (x != NULL) - { - ret = proc (proc_cls, - "real", - EXTRACTOR_METATYPE_MIMETYPE, - EXTRACTOR_METAFORMAT_UTF8, - "text/plain", - x, - strlen (x) + 1); - free (x); - } - } - return ret; - } - if (REAL_HEADER == ntohl (*(int *) data)) - { - /* old real */ - end = &data[size]; - pos = &data[0]; - ret = 0; - while (0 == ret) - { - if ((pos + 8 >= end) || (pos + 8 < pos)) - break; - length = ntohl (*(((unsigned int *) pos) + 1)); - if (length <= 0) - break; - if ((pos + length >= end) || (pos + length < pos)) - break; - switch (ntohl (*((unsigned int *) pos))) - { - case MDPR_HEADER: - ret = processMediaProperties ((Media_Properties *) pos, - proc, - proc_cls); - pos += length; - break; - case CONT_HEADER: - ret = processContentDescription ((Content_Description *) pos, - proc, - proc_cls); - pos += length; - break; - case REAL_HEADER: /* treat like default */ - default: - pos += length; - break; - } - } - return ret; - } - return 0; -} diff --git a/src/plugins/real_extractor.c b/src/plugins/real_extractor.c new file mode 100644 index 0000000..9d77b28 --- /dev/null +++ b/src/plugins/real_extractor.c @@ -0,0 +1,579 @@ +/* + * This file is part of libextractor. + * Copyright (C) 2021 Christian Grothoff + * + * libextractor is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published + * by the Free Software Foundation; either version 3, or (at your + * option) any later version. + * + * libextractor is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with libextractor; see the file COPYING. If not, write to the + * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ +/** + * @file plugins/real_extractor.c + * @brief plugin to support REAL files + * @author Christian Grothoff + */ +#include "platform.h" +#include "extractor.h" + +struct MediaProperties +{ + uint32_t object_id; + uint32_t size; + uint16_t object_version; /* must be 0 */ + uint16_t stream_number; + uint32_t max_bit_rate; + uint32_t avg_bit_rate; + uint32_t max_packet_size; + uint32_t avg_packet_size; + uint32_t start_time; + uint32_t preroll; + uint32_t duration; + uint8_t stream_name_size; + uint8_t data[0]; /* variable length section */ + /* + uint8_t[stream_name_size] stream_name; + uint8_t mime_type_size; + uint8_t[mime_type_size] mime_type; + uint32_t type_specific_len; + uint8_t[type_specific_len] type_specific_data; + */ +}; + +struct ContentDescription +{ + uint32_t object_id; + uint32_t size; + uint16_t object_version; /* must be 0 */ + uint16_t title_len; + uint8_t data[0]; /* variable length section */ + /* + uint8_t[title_len] title; + uint16_t author_len; + uint8_t[author_len] author; + uint16_t copyright_len; + uint8_t[copyright_len] copyright; + uint16_t comment_len; + uint8_t[comment_len] comment; + */ +}; +/* author, copyright and comment are supposed to be ASCII */ + + +#define REAL_HEADER 0x2E524d46 +#define MDPR_HEADER 0x4D445052 +#define CONT_HEADER 0x434F4e54 +#define RAFF4_HEADER 0x2E7261FD + + +/** + * Give meta data to LE. + * + * @param s utf-8 string meta data value + * @param t type of the meta data + */ +#define ADD(s,t) do { \ + if (0 != ec->proc (ec->cls, "real", t, \ + EXTRACTOR_METAFORMAT_C_STRING, \ + "text/plain", s, strlen (s) + 1)) \ + { return; } \ +} while (0) + + +static void +processMediaProperties (const struct MediaProperties *prop, + struct EXTRACTOR_ExtractContext *ec) +{ + uint8_t mime_type_size; + uint32_t prop_size; + + prop_size = ntohl (prop->size); + if (prop_size <= sizeof (struct MediaProperties)) + return; + if (0 != prop->object_version) + return; + if (prop_size <= prop->stream_name_size + sizeof (uint8_t) + + sizeof (struct MediaProperties)) + return; + mime_type_size = prop->data[prop->stream_name_size]; + if (prop_size > prop->stream_name_size + sizeof (uint8_t) + + mime_type_size + sizeof (struct MediaProperties)) + { + char data[mime_type_size + 1]; + + memcpy (data, + &prop->data[prop->stream_name_size + 1], + mime_type_size); + data[mime_type_size] = '\0'; + ADD (data, + EXTRACTOR_METATYPE_MIMETYPE); + } +} + + +static void +processContentDescription (const struct ContentDescription *prop, + struct EXTRACTOR_ExtractContext *ec) +{ + uint16_t author_len; + uint16_t copyright_len; + uint16_t comment_len; + uint16_t title_len; + uint32_t prop_size; + + prop_size = ntohl (prop->size); + if (prop_size <= sizeof (struct ContentDescription)) + return; + if (0 != prop->object_version) + return; + title_len = ntohs (prop->title_len); + if (prop_size <= + title_len + + sizeof (struct ContentDescription)) + return; + if (title_len > 0) + { + char title[title_len + 1]; + + memcpy (title, + &prop->data[0], + title_len); + title[title_len] = '\0'; + ADD (title, + EXTRACTOR_METATYPE_TITLE); + } + if (prop_size <= + title_len + + sizeof (uint16_t) + + sizeof (struct ContentDescription)) + return; + author_len = ntohs (*(uint16_t *) &prop->data[title_len]); + if (prop_size <= + title_len + + sizeof (uint16_t) + + author_len + + sizeof (struct ContentDescription)) + return; + if (author_len > 0) + { + char author[author_len + 1]; + + memcpy (author, + &prop->data[title_len + + sizeof (uint16_t)], + author_len); + author[author_len] = '\0'; + ADD (author, + EXTRACTOR_METATYPE_AUTHOR_NAME); + } + if (prop_size <= + title_len + + sizeof (uint16_t) + + author_len + + sizeof (uint16_t) + + sizeof (struct ContentDescription)) + return; + copyright_len = ntohs (*(uint16_t *) &prop->data[title_len + + author_len + + sizeof (uint16_t)]); + if (prop_size <= + title_len + + sizeof (uint16_t) + + author_len + + sizeof (uint16_t) + + copyright_len + + sizeof (struct ContentDescription)) + return; + if (copyright_len > 0) + { + char copyright[copyright_len + 1]; + + memcpy (copyright, + &prop->data[title_len + + sizeof (uint16_t) * 2 + + author_len], + copyright_len); + copyright[copyright_len] = '\0'; + ADD (copyright, + EXTRACTOR_METATYPE_COPYRIGHT); + } + + if (prop_size <= + title_len + + sizeof (uint16_t) + + author_len + + sizeof (uint16_t) + + copyright_len + + sizeof (uint16_t) + + sizeof (struct ContentDescription)) + return; + comment_len = ntohs (*(uint16_t *) &prop->data[title_len + + author_len + + copyright_len + + 2 * sizeof (uint16_t)]); + if (prop_size < + title_len + + sizeof (uint16_t) + + author_len + + sizeof (uint16_t) + + copyright_len + + sizeof (uint16_t) + + comment_len + + sizeof (struct ContentDescription)) + return; + + if (comment_len > 0) + { + char comment[comment_len + 1]; + + memcpy (comment, + &prop->data[title_len + + sizeof (uint16_t) * 3 + + author_len + + copyright_len], + comment_len); + comment[comment_len] = '\0'; + ADD (comment, + EXTRACTOR_METATYPE_COMMENT); + } +} + + +struct RAFF_Header +{ + uint16_t version; +}; + +struct RAFF3_Header +{ + uint8_t unknown[10]; + uint32_t data_size; + /* + uint8_t tlen; + uint8_t title[tlen]; + uint8_t alen; + uint8_t author[alen]; + uint8_t clen; + uint8_t copyright[clen]; + uint8_t aplen; + uint8_t app[aplen]; */ +}; + + +#define RAFF3_HDR_SIZE 14 + + +struct RAFF4_Header +{ + uint16_t version; + uint16_t revision; + uint16_t header_length; + uint16_t compression_type; + uint32_t granularity; + uint32_t total_bytes; + uint32_t bytes_per_minute; + uint32_t bytes_per_minute2; + uint16_t interleave_factor; + uint16_t interleave_block_size; + uint32_t user_data; + float sample_rate; + uint16_t sample_size; + uint16_t channels; + uint8_t interleave_code[5]; + uint8_t compression_code[5]; + uint8_t is_interleaved; + uint8_t copy_byte; + uint8_t stream_type; + /* + uint8_t tlen; + uint8_t title[tlen]; + uint8_t alen; + uint8_t author[alen]; + uint8_t clen; + uint8_t copyright[clen]; + uint8_t aplen; + uint8_t app[aplen]; */ +}; + +#define RAFF4_HDR_SIZE 53 + + +static void +extract_raff3 (struct EXTRACTOR_ExtractContext *ec, + const void *ptr, + size_t size) +{ + const uint8_t *data = ptr; + uint8_t tlen; + uint8_t alen; + uint8_t clen; + uint8_t aplen; + + if (size <= RAFF3_HDR_SIZE + 8) + return; + tlen = data[8 + RAFF3_HDR_SIZE]; + if (tlen + RAFF3_HDR_SIZE + 12 > size) + return; + if (tlen > 0) + { + char x[tlen + 1]; + + memcpy (x, + &data[9 + RAFF3_HDR_SIZE], + tlen); + x[tlen] = '\0'; + ADD (x, + EXTRACTOR_METATYPE_TITLE); + } + alen = data[9 + tlen + RAFF3_HDR_SIZE]; + if (tlen + alen + RAFF3_HDR_SIZE + 12 > size) + return; + if (alen > 0) + { + char x[alen + 1]; + + memcpy (x, + &data[10 + RAFF3_HDR_SIZE + tlen], + alen); + x[alen] = '\0'; + ADD (x, + EXTRACTOR_METATYPE_AUTHOR_NAME); + } + clen = data[10 + tlen + alen + RAFF3_HDR_SIZE]; + if (tlen + alen + clen + RAFF3_HDR_SIZE + 12 > size) + return; + if (clen > 0) + { + char x[clen + 1]; + + memcpy (x, + &data[11 + RAFF4_HDR_SIZE + tlen + alen], + clen); + x[clen] = '\0'; + ADD (x, + EXTRACTOR_METATYPE_COPYRIGHT); + } + aplen = data[11 + tlen + clen + alen + RAFF3_HDR_SIZE]; + if (tlen + alen + clen + aplen + RAFF3_HDR_SIZE + 12 > size) + return; + if (aplen > 0) + { + char x[aplen + 1]; + + memcpy (x, + &data[12 + RAFF4_HDR_SIZE + tlen + alen + clen], + aplen); + x[aplen] = '\0'; + ADD (x, + EXTRACTOR_METATYPE_UNKNOWN); + } +} + + +static void +extract_raff4 (struct EXTRACTOR_ExtractContext *ec, + const void *ptr, + size_t size) +{ + const uint8_t *data = ptr; + uint8_t tlen; + uint8_t alen; + uint8_t clen; + uint8_t aplen; + + if (size <= RAFF4_HDR_SIZE + 16 + 4) + return; + tlen = data[16 + RAFF4_HDR_SIZE]; + if (tlen + RAFF4_HDR_SIZE + 20 > size) + return; + alen = data[17 + tlen + RAFF4_HDR_SIZE]; + if (tlen + alen + RAFF4_HDR_SIZE + 20 > size) + return; + clen = data[18 + tlen + alen + RAFF4_HDR_SIZE]; + if (tlen + alen + clen + RAFF4_HDR_SIZE + 20 > size) + return; + aplen = data[19 + tlen + clen + alen + RAFF4_HDR_SIZE]; + if (tlen + alen + clen + aplen + RAFF4_HDR_SIZE + 20 > size) + return; + if (tlen > 0) + { + char x[tlen + 1]; + + memcpy (x, + &data[17 + RAFF4_HDR_SIZE], + tlen); + x[tlen] = '\0'; + ADD (x, + EXTRACTOR_METATYPE_TITLE); + } + if (alen > 0) + { + char x[alen + 1]; + + memcpy (x, + &data[18 + RAFF4_HDR_SIZE + tlen], + alen); + x[alen] = '\0'; + ADD (x, + EXTRACTOR_METATYPE_AUTHOR_NAME); + } + if (clen > 0) + { + char x[clen + 1]; + + memcpy (x, + &data[19 + RAFF4_HDR_SIZE + tlen + alen], + clen); + x[clen] = '\0'; + ADD (x, + EXTRACTOR_METATYPE_COPYRIGHT); + } + if (aplen > 0) + { + char x[aplen + 1]; + + memcpy (x, + &data[20 + RAFF4_HDR_SIZE + tlen + alen + clen], + aplen); + x[aplen] = '\0'; + ADD (x, + EXTRACTOR_METATYPE_UNKNOWN); + } +} + + +static void +extract_raff (struct EXTRACTOR_ExtractContext *ec, + const void *ptr, + size_t size) +{ + const uint8_t *data = ptr; + const struct RAFF_Header *hdr; + + /* HELIX */ + if (size <= sizeof (*hdr) + 4) + return; + ADD ("audio/vnd.rn-realaudio", + EXTRACTOR_METATYPE_MIMETYPE); + hdr = (const struct RAFF_Header *) &data[4]; + switch (ntohs (hdr->version)) + { + case 3: + extract_raff3 (ec, + ptr, + size); + break; + case 4: + extract_raff4 (ec, + ptr, + size); + break; + } +} + + +/* old real format */ +static void +extract_real (struct EXTRACTOR_ExtractContext *ec, + const void *data, + size_t size) +{ + uint64_t off = 0; + size_t pos = 0; + + while (1) + { + uint32_t length; + + if ( (pos + 8 > size) || + (pos + 8 < pos) || + (pos + (length = ntohl (((uint32_t *) (data + pos))[1])) > size) ) + { + uint64_t noff; + void *in; + ssize_t isize; + + noff = ec->seek (ec->cls, + off + pos, + SEEK_SET); + if (-1 == noff) + return; + isize = ec->read (ec->cls, + &in, + 32 * 1024); + if (isize < 8) + return; + data = in; + size = isize; + off = noff; + pos = 0; + } + if (length <= 8) + return; + if ( (pos + length > size) || + (pos + length < pos) ) + return; + switch (ntohl (((uint32_t *) (data + pos))[0])) + { + case MDPR_HEADER: + processMediaProperties (data + pos, + ec); + pos += length; + break; + case CONT_HEADER: + processContentDescription (data + pos, + ec); + pos += length; + break; + case REAL_HEADER: /* treat like default */ + default: + pos += length; + break; + } + } +} + + +/** + * "extract" metadata from a REAL file + * + * @param ec extraction context + */ +void +EXTRACTOR_real_extract_method (struct EXTRACTOR_ExtractContext *ec) +{ + void *data; + size_t n; + + n = ec->read (ec->cls, + &data, + sizeof (struct RAFF4_Header) + 4 * 256); + if (n < sizeof (uint32_t)) + return; + switch (ntohl (*(uint32_t *) data)) + { + case RAFF4_HEADER: + extract_raff (ec, + data, + n); + break; + case REAL_HEADER: + extract_real (ec, + data, + n); + break; + } +} + + +/* end of real_extractor.c */ diff --git a/src/plugins/test_real.c b/src/plugins/test_real.c new file mode 100644 index 0000000..98e2af4 --- /dev/null +++ b/src/plugins/test_real.c @@ -0,0 +1,104 @@ +/* + This file is part of libextractor. + Copyright (C) 2012 Vidyut Samanta and Christian Grothoff + + libextractor is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + libextractor is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with libextractor; see the file COPYING. If not, write to the + Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + Boston, MA 02110-1301, USA. +*/ +/** + * @file plugins/test_real.c + * @brief testcase for real plugin + * @author Christian Grothoff + */ +#include "platform.h" +#include "test_lib.h" + + +/** + * Main function for the REAL testcase. + * + * @param argc number of arguments (ignored) + * @param argv arguments (ignored) + * @return 0 on success + */ +int +main (int argc, char *argv[]) +{ + struct SolutionData real_audiosig_sol[] = { + { + EXTRACTOR_METATYPE_MIMETYPE, + EXTRACTOR_METAFORMAT_C_STRING, + "text/plain", + "audio/x-pn-realaudio", + strlen ("audio/x-pn-realaudio") + 1, + 0 + }, + { + EXTRACTOR_METATYPE_TITLE, + EXTRACTOR_METAFORMAT_C_STRING, + "text/plain", + "Welcome!", + strlen ("Welcome!") + 1, + 0 + }, + { + EXTRACTOR_METATYPE_COPYRIGHT, + EXTRACTOR_METAFORMAT_C_STRING, + "text/plain", + "1998, RealNetworks, Inc.", + strlen ("1998, RealNetworks, Inc.") + 1, + 0 + }, + { 0, 0, NULL, NULL, 0, -1 } + }; + struct SolutionData real_ra3_sol[] = { + { + EXTRACTOR_METATYPE_MIMETYPE, + EXTRACTOR_METAFORMAT_C_STRING, + "text/plain", + "audio/vnd.rn-realaudio", + strlen ("audio/vnd.rn-realaudio") + 1, + 0 + }, + { + EXTRACTOR_METATYPE_TITLE, + EXTRACTOR_METAFORMAT_C_STRING, + "text/plain", + "Song of Welcome", + strlen ("Song of Welcome") + 1, + 0 + }, + { + EXTRACTOR_METATYPE_AUTHOR_NAME, + EXTRACTOR_METAFORMAT_C_STRING, + "text/plain", + "Investiture Service", + strlen ("Investiture Service") + 1, + 0 + }, + { 0, 0, NULL, NULL, 0, -1 } + }; + struct ProblemSet ps[] = { + { "testdata/audiosig.rm", + real_audiosig_sol }, + { "testdata/ra3.ra", + real_ra3_sol }, + { NULL, NULL } + }; + return ET_main ("real", ps); +} + + +/* end of test_real.c */ diff --git a/src/plugins/testdata/audiosig.rm b/src/plugins/testdata/audiosig.rm new file mode 100644 index 0000000..6307d30 Binary files /dev/null and b/src/plugins/testdata/audiosig.rm differ diff --git a/src/plugins/testdata/ra3.ra b/src/plugins/testdata/ra3.ra new file mode 100644 index 0000000..d36569f Binary files /dev/null and b/src/plugins/testdata/ra3.ra differ diff --git a/src/plugins/vlc_extractor.c b/src/plugins/vlc_extractor.c new file mode 100644 index 0000000..e90b3ea --- /dev/null +++ b/src/plugins/vlc_extractor.c @@ -0,0 +1,334 @@ +/* + This file is part of libextractor. + Copyright (C) 2021 Christian Grothoff + + libextractor is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + libextractor is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with libextractor; see the file COPYING. If not, write to the + Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + Boston, MA 02110-1301, USA. + +NOTE: This plugin is not yet working. Somehow libvlc never calls any of the IO callbacks. + +*/ +/** + * @file plugins/vlc_extractor.c + * @brief plugin to extract metadata using libvlc + * @author Christian Grothoff + */ +#include "platform.h" +#include "extractor.h" +#include +#include + +/** + * Function to help VLC open a custom bitstream input media. + * + * The same media item can be opened multiple times. Each time, this callback + * is invoked. It should allocate and initialize any instance-specific + * resources, then store them in *datap. The instance resources can be freed + * in the @ref libvlc_media_close_cb callback. + * + * @param opaque our `struct EXTRACTOR_ExtractContext` + * @param[out] datap storage space for a private data pointer + * @param[out] sizep byte length of the bitstream or UINT64_MAX if unknown + * + * @note For convenience, *datap is initially NULL and *sizep is initially 0. + * + * @return 0 on success, non-zero on error. In case of failure, the other + * callbacks will not be invoked and any value stored in *datap and *sizep is + * discarded. + */ +static int +open_cb (void *opaque, + void **datap, + uint64_t *sizep) +{ + struct EXTRACTOR_ExtractContext *ec = opaque; + + *datap = ec; + *sizep = ec->get_size (ec->cls); + if (UINT64_MAX == *sizep) + { + fprintf (stderr, + "Open failed!\n"); + return 1; + } + fprintf (stderr, + "Open returns %llu file size!\n", + (unsigned long long) *sizep); + return 0; +} + + +/** + * Function to help VLC read data from a custom bitstream input media. + * + * @param opaque our `struct EXTRACTOR_ExtractContext` + * @param buf start address of the buffer to read data into + * @param len bytes length of the buffer + * @return strictly positive number of bytes read, 0 on end-of-stream, + * or -1 on non-recoverable error + * + * @note If no data is immediately available, then the callback should sleep. + * @warning The application is responsible for avoiding deadlock situations. + * In particular, the callback should return an error if playback is stopped; + * if it does not return, then libvlc_media_player_stop() will never return. + */ +static ssize_t +read_cb (void *opaque, + unsigned char *buf, + size_t len) +{ + struct EXTRACTOR_ExtractContext *ec = opaque; + void *data; + ssize_t ret; + + ret = ec->read (ec->cls, + &data, + len); + if (-1 == ret) + { + fprintf (stderr, + "Read failed!\n"); + return -1; + } + memcpy (buf, + data, + ret); + fprintf (stderr, + "Read %u bytes!\n", + (unsigned int) ret); + return ret; +} + + +/** + * Allow VLC to seek a custom bitstream input media. + * + * @param opaque our `struct EXTRACTOR_ExtractContext` + * @param offset absolute byte offset to seek to + * @return 0 on success, -1 on error. + */ +static int +seek_cb (void *opaque, + uint64_t offset) +{ + struct EXTRACTOR_ExtractContext *ec = opaque; + + fprintf (stderr, + "Seek to %llu!\n", + (unsigned long long) offset); + if (offset > INT64_MAX) + { + fprintf (stderr, + "Excessive seek, impossible with LE!\n"); + return -1; + } + if (-1 == + ec->seek (ec->cls, + offset, + SEEK_SET)) + { + fprintf (stderr, + "Seek failed!\n"); + return -1; + } + return 0; +} + + +/** + * Callback prototype to close a custom bitstream input media. + * + * @param opaque our `struct EXTRACTOR_ExtractContext` + */ +static void +close_cb (void *opaque) +{ + /* intentionally empty */ + fprintf (stderr, + "Close called\n"); +} + + +static void +extract (struct EXTRACTOR_ExtractContext *ec, + libvlc_media_t *media) +{ + struct + { + enum libvlc_meta_t vt; + enum EXTRACTOR_MetaType mt; + } map[] = { + { libvlc_meta_Title, + EXTRACTOR_METATYPE_TITLE }, + { libvlc_meta_Artist, + EXTRACTOR_METATYPE_ARTIST }, + { libvlc_meta_Genre, + EXTRACTOR_METATYPE_GENRE }, + { libvlc_meta_Copyright, + EXTRACTOR_METATYPE_COPYRIGHT }, + { libvlc_meta_Album, + EXTRACTOR_METATYPE_ALBUM }, + { libvlc_meta_TrackNumber, + EXTRACTOR_METATYPE_TRACK_NUMBER }, + { libvlc_meta_Description, + EXTRACTOR_METATYPE_DESCRIPTION }, + { libvlc_meta_Rating, + EXTRACTOR_METATYPE_RATING }, + { libvlc_meta_Date, + EXTRACTOR_METATYPE_CREATION_TIME }, + { libvlc_meta_Setting, + EXTRACTOR_METATYPE_UNKNOWN }, + { libvlc_meta_URL, + EXTRACTOR_METATYPE_URL }, + { libvlc_meta_Language, + EXTRACTOR_METATYPE_LANGUAGE }, + { libvlc_meta_NowPlaying, + EXTRACTOR_METATYPE_UNKNOWN }, + { libvlc_meta_Publisher, + EXTRACTOR_METATYPE_PUBLISHER }, + { libvlc_meta_EncodedBy, + EXTRACTOR_METATYPE_ENCODED_BY }, + { libvlc_meta_ArtworkURL, + EXTRACTOR_METATYPE_URL }, + { libvlc_meta_TrackID, + EXTRACTOR_METATYPE_TRACK_NUMBER }, + { libvlc_meta_TrackTotal, + EXTRACTOR_METATYPE_UNKNOWN }, + { libvlc_meta_Director, + EXTRACTOR_METATYPE_MOVIE_DIRECTOR }, + { libvlc_meta_Season, + EXTRACTOR_METATYPE_SHOW_SEASON_NUMBER }, + { libvlc_meta_Episode, + EXTRACTOR_METATYPE_SHOW_EPISODE_NUMBER }, + { libvlc_meta_ShowName, + EXTRACTOR_METATYPE_SHOW_NAME }, + { libvlc_meta_Actors, + EXTRACTOR_METATYPE_PERFORMER }, + { libvlc_meta_AlbumArtist, + EXTRACTOR_METATYPE_ARTIST }, + { libvlc_meta_DiscNumber, + EXTRACTOR_METATYPE_DISC_NUMBER }, + { libvlc_meta_DiscTotal, + EXTRACTOR_METATYPE_UNKNOWN }, + { 0, 0 } + }; + + for (unsigned int i = 0; + EXTRACTOR_METATYPE_RESERVED != map[i].mt; + i++) + { + char *meta; + + fprintf (stderr, + "."); + meta = libvlc_media_get_meta (media, + map[i].vt); + if (NULL == meta) + continue; + ec->proc (ec->cls, + "vlc", + map[i].mt, + EXTRACTOR_METAFORMAT_UTF8, /* ??? */ + "text/plain", + meta, + strlen (meta) + 1); + free (meta); + } +} + + +static void +media_ready (const struct libvlc_event_t *p_event, + void *p_data) +{ + fprintf (stderr, + "media status: %d, %d\n", + p_event->type == libvlc_MediaParsedChanged, + p_event->u.media_parsed_changed.new_status); + if (p_event->u.media_parsed_changed.new_status == + libvlc_media_parsed_status_done) + { + fprintf (stderr, + "media ready\n"); + } +} + + +/** + * Extract information using libvlc + * + * @param ec extraction context + */ +void +EXTRACTOR_vlc_extract_method (struct EXTRACTOR_ExtractContext *ec) +{ + libvlc_instance_t *vlc; + libvlc_media_t *media; + libvlc_event_manager_t *em; + + { + sigset_t set; + + signal (SIGCHLD, SIG_DFL); + sigemptyset (&set); + sigaddset (&set, SIGPIPE); + pthread_sigmask (SIG_BLOCK, &set, NULL); + } + + vlc = libvlc_new (0, NULL); + if (NULL == vlc) + return; + media = libvlc_media_new_callbacks (vlc, + &open_cb, + &read_cb, + &seek_cb, + &close_cb, + ec); + if (NULL == media) + { + libvlc_release (vlc); + return; + } + + em = libvlc_media_event_manager (media); + libvlc_event_attach (em, + libvlc_MediaParsedChanged, + &media_ready, + ec); + fprintf (stderr, + "Triggering parser\n"); + { + int status; + + status = libvlc_media_parse_with_options (media, + libvlc_media_fetch_local + | libvlc_media_parse_network + | libvlc_media_fetch_network, + 30000); /* 30s timeout */ + fprintf (stderr, + "Status: %d\n", + status); + } + fprintf (stderr, + "Sleeping\n"); + sleep (1); + extract (ec, + media); + libvlc_media_release (media); + libvlc_release (vlc); +} + + +/* end of vlc_extractor.c */ -- cgit v1.2.3