summaryrefslogtreecommitdiff
path: root/src/plugins/real_extractor.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/plugins/real_extractor.c')
-rw-r--r--src/plugins/real_extractor.c579
1 files changed, 579 insertions, 0 deletions
diff --git a/src/plugins/real_extractor.c b/src/plugins/real_extractor.c
new file mode 100644
index 0000000..9d77b28
--- /dev/null
+++ b/src/plugins/real_extractor.c
@@ -0,0 +1,579 @@
+/*
+ * This file is part of libextractor.
+ * Copyright (C) 2021 Christian Grothoff
+ *
+ * libextractor is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your
+ * option) any later version.
+ *
+ * libextractor is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with libextractor; see the file COPYING. If not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+/**
+ * @file plugins/real_extractor.c
+ * @brief plugin to support REAL files
+ * @author Christian Grothoff
+ */
+#include "platform.h"
+#include "extractor.h"
+
+struct MediaProperties
+{
+ uint32_t object_id;
+ uint32_t size;
+ uint16_t object_version; /* must be 0 */
+ uint16_t stream_number;
+ uint32_t max_bit_rate;
+ uint32_t avg_bit_rate;
+ uint32_t max_packet_size;
+ uint32_t avg_packet_size;
+ uint32_t start_time;
+ uint32_t preroll;
+ uint32_t duration;
+ uint8_t stream_name_size;
+ uint8_t data[0]; /* variable length section */
+ /*
+ uint8_t[stream_name_size] stream_name;
+ uint8_t mime_type_size;
+ uint8_t[mime_type_size] mime_type;
+ uint32_t type_specific_len;
+ uint8_t[type_specific_len] type_specific_data;
+ */
+};
+
+struct ContentDescription
+{
+ uint32_t object_id;
+ uint32_t size;
+ uint16_t object_version; /* must be 0 */
+ uint16_t title_len;
+ uint8_t data[0]; /* variable length section */
+ /*
+ uint8_t[title_len] title;
+ uint16_t author_len;
+ uint8_t[author_len] author;
+ uint16_t copyright_len;
+ uint8_t[copyright_len] copyright;
+ uint16_t comment_len;
+ uint8_t[comment_len] comment;
+ */
+};
+/* author, copyright and comment are supposed to be ASCII */
+
+
+#define REAL_HEADER 0x2E524d46
+#define MDPR_HEADER 0x4D445052
+#define CONT_HEADER 0x434F4e54
+#define RAFF4_HEADER 0x2E7261FD
+
+
+/**
+ * Give meta data to LE.
+ *
+ * @param s utf-8 string meta data value
+ * @param t type of the meta data
+ */
+#define ADD(s,t) do { \
+ if (0 != ec->proc (ec->cls, "real", t, \
+ EXTRACTOR_METAFORMAT_C_STRING, \
+ "text/plain", s, strlen (s) + 1)) \
+ { return; } \
+} while (0)
+
+
+static void
+processMediaProperties (const struct MediaProperties *prop,
+ struct EXTRACTOR_ExtractContext *ec)
+{
+ uint8_t mime_type_size;
+ uint32_t prop_size;
+
+ prop_size = ntohl (prop->size);
+ if (prop_size <= sizeof (struct MediaProperties))
+ return;
+ if (0 != prop->object_version)
+ return;
+ if (prop_size <= prop->stream_name_size + sizeof (uint8_t)
+ + sizeof (struct MediaProperties))
+ return;
+ mime_type_size = prop->data[prop->stream_name_size];
+ if (prop_size > prop->stream_name_size + sizeof (uint8_t)
+ + mime_type_size + sizeof (struct MediaProperties))
+ {
+ char data[mime_type_size + 1];
+
+ memcpy (data,
+ &prop->data[prop->stream_name_size + 1],
+ mime_type_size);
+ data[mime_type_size] = '\0';
+ ADD (data,
+ EXTRACTOR_METATYPE_MIMETYPE);
+ }
+}
+
+
+static void
+processContentDescription (const struct ContentDescription *prop,
+ struct EXTRACTOR_ExtractContext *ec)
+{
+ uint16_t author_len;
+ uint16_t copyright_len;
+ uint16_t comment_len;
+ uint16_t title_len;
+ uint32_t prop_size;
+
+ prop_size = ntohl (prop->size);
+ if (prop_size <= sizeof (struct ContentDescription))
+ return;
+ if (0 != prop->object_version)
+ return;
+ title_len = ntohs (prop->title_len);
+ if (prop_size <=
+ title_len
+ + sizeof (struct ContentDescription))
+ return;
+ if (title_len > 0)
+ {
+ char title[title_len + 1];
+
+ memcpy (title,
+ &prop->data[0],
+ title_len);
+ title[title_len] = '\0';
+ ADD (title,
+ EXTRACTOR_METATYPE_TITLE);
+ }
+ if (prop_size <=
+ title_len
+ + sizeof (uint16_t)
+ + sizeof (struct ContentDescription))
+ return;
+ author_len = ntohs (*(uint16_t *) &prop->data[title_len]);
+ if (prop_size <=
+ title_len
+ + sizeof (uint16_t)
+ + author_len
+ + sizeof (struct ContentDescription))
+ return;
+ if (author_len > 0)
+ {
+ char author[author_len + 1];
+
+ memcpy (author,
+ &prop->data[title_len
+ + sizeof (uint16_t)],
+ author_len);
+ author[author_len] = '\0';
+ ADD (author,
+ EXTRACTOR_METATYPE_AUTHOR_NAME);
+ }
+ if (prop_size <=
+ title_len
+ + sizeof (uint16_t)
+ + author_len
+ + sizeof (uint16_t)
+ + sizeof (struct ContentDescription))
+ return;
+ copyright_len = ntohs (*(uint16_t *) &prop->data[title_len
+ + author_len
+ + sizeof (uint16_t)]);
+ if (prop_size <=
+ title_len
+ + sizeof (uint16_t)
+ + author_len
+ + sizeof (uint16_t)
+ + copyright_len
+ + sizeof (struct ContentDescription))
+ return;
+ if (copyright_len > 0)
+ {
+ char copyright[copyright_len + 1];
+
+ memcpy (copyright,
+ &prop->data[title_len
+ + sizeof (uint16_t) * 2
+ + author_len],
+ copyright_len);
+ copyright[copyright_len] = '\0';
+ ADD (copyright,
+ EXTRACTOR_METATYPE_COPYRIGHT);
+ }
+
+ if (prop_size <=
+ title_len
+ + sizeof (uint16_t)
+ + author_len
+ + sizeof (uint16_t)
+ + copyright_len
+ + sizeof (uint16_t)
+ + sizeof (struct ContentDescription))
+ return;
+ comment_len = ntohs (*(uint16_t *) &prop->data[title_len
+ + author_len
+ + copyright_len
+ + 2 * sizeof (uint16_t)]);
+ if (prop_size <
+ title_len
+ + sizeof (uint16_t)
+ + author_len
+ + sizeof (uint16_t)
+ + copyright_len
+ + sizeof (uint16_t)
+ + comment_len
+ + sizeof (struct ContentDescription))
+ return;
+
+ if (comment_len > 0)
+ {
+ char comment[comment_len + 1];
+
+ memcpy (comment,
+ &prop->data[title_len
+ + sizeof (uint16_t) * 3
+ + author_len
+ + copyright_len],
+ comment_len);
+ comment[comment_len] = '\0';
+ ADD (comment,
+ EXTRACTOR_METATYPE_COMMENT);
+ }
+}
+
+
+struct RAFF_Header
+{
+ uint16_t version;
+};
+
+struct RAFF3_Header
+{
+ uint8_t unknown[10];
+ uint32_t data_size;
+ /*
+ uint8_t tlen;
+ uint8_t title[tlen];
+ uint8_t alen;
+ uint8_t author[alen];
+ uint8_t clen;
+ uint8_t copyright[clen];
+ uint8_t aplen;
+ uint8_t app[aplen]; */
+};
+
+
+#define RAFF3_HDR_SIZE 14
+
+
+struct RAFF4_Header
+{
+ uint16_t version;
+ uint16_t revision;
+ uint16_t header_length;
+ uint16_t compression_type;
+ uint32_t granularity;
+ uint32_t total_bytes;
+ uint32_t bytes_per_minute;
+ uint32_t bytes_per_minute2;
+ uint16_t interleave_factor;
+ uint16_t interleave_block_size;
+ uint32_t user_data;
+ float sample_rate;
+ uint16_t sample_size;
+ uint16_t channels;
+ uint8_t interleave_code[5];
+ uint8_t compression_code[5];
+ uint8_t is_interleaved;
+ uint8_t copy_byte;
+ uint8_t stream_type;
+ /*
+ uint8_t tlen;
+ uint8_t title[tlen];
+ uint8_t alen;
+ uint8_t author[alen];
+ uint8_t clen;
+ uint8_t copyright[clen];
+ uint8_t aplen;
+ uint8_t app[aplen]; */
+};
+
+#define RAFF4_HDR_SIZE 53
+
+
+static void
+extract_raff3 (struct EXTRACTOR_ExtractContext *ec,
+ const void *ptr,
+ size_t size)
+{
+ const uint8_t *data = ptr;
+ uint8_t tlen;
+ uint8_t alen;
+ uint8_t clen;
+ uint8_t aplen;
+
+ if (size <= RAFF3_HDR_SIZE + 8)
+ return;
+ tlen = data[8 + RAFF3_HDR_SIZE];
+ if (tlen + RAFF3_HDR_SIZE + 12 > size)
+ return;
+ if (tlen > 0)
+ {
+ char x[tlen + 1];
+
+ memcpy (x,
+ &data[9 + RAFF3_HDR_SIZE],
+ tlen);
+ x[tlen] = '\0';
+ ADD (x,
+ EXTRACTOR_METATYPE_TITLE);
+ }
+ alen = data[9 + tlen + RAFF3_HDR_SIZE];
+ if (tlen + alen + RAFF3_HDR_SIZE + 12 > size)
+ return;
+ if (alen > 0)
+ {
+ char x[alen + 1];
+
+ memcpy (x,
+ &data[10 + RAFF3_HDR_SIZE + tlen],
+ alen);
+ x[alen] = '\0';
+ ADD (x,
+ EXTRACTOR_METATYPE_AUTHOR_NAME);
+ }
+ clen = data[10 + tlen + alen + RAFF3_HDR_SIZE];
+ if (tlen + alen + clen + RAFF3_HDR_SIZE + 12 > size)
+ return;
+ if (clen > 0)
+ {
+ char x[clen + 1];
+
+ memcpy (x,
+ &data[11 + RAFF4_HDR_SIZE + tlen + alen],
+ clen);
+ x[clen] = '\0';
+ ADD (x,
+ EXTRACTOR_METATYPE_COPYRIGHT);
+ }
+ aplen = data[11 + tlen + clen + alen + RAFF3_HDR_SIZE];
+ if (tlen + alen + clen + aplen + RAFF3_HDR_SIZE + 12 > size)
+ return;
+ if (aplen > 0)
+ {
+ char x[aplen + 1];
+
+ memcpy (x,
+ &data[12 + RAFF4_HDR_SIZE + tlen + alen + clen],
+ aplen);
+ x[aplen] = '\0';
+ ADD (x,
+ EXTRACTOR_METATYPE_UNKNOWN);
+ }
+}
+
+
+static void
+extract_raff4 (struct EXTRACTOR_ExtractContext *ec,
+ const void *ptr,
+ size_t size)
+{
+ const uint8_t *data = ptr;
+ uint8_t tlen;
+ uint8_t alen;
+ uint8_t clen;
+ uint8_t aplen;
+
+ if (size <= RAFF4_HDR_SIZE + 16 + 4)
+ return;
+ tlen = data[16 + RAFF4_HDR_SIZE];
+ if (tlen + RAFF4_HDR_SIZE + 20 > size)
+ return;
+ alen = data[17 + tlen + RAFF4_HDR_SIZE];
+ if (tlen + alen + RAFF4_HDR_SIZE + 20 > size)
+ return;
+ clen = data[18 + tlen + alen + RAFF4_HDR_SIZE];
+ if (tlen + alen + clen + RAFF4_HDR_SIZE + 20 > size)
+ return;
+ aplen = data[19 + tlen + clen + alen + RAFF4_HDR_SIZE];
+ if (tlen + alen + clen + aplen + RAFF4_HDR_SIZE + 20 > size)
+ return;
+ if (tlen > 0)
+ {
+ char x[tlen + 1];
+
+ memcpy (x,
+ &data[17 + RAFF4_HDR_SIZE],
+ tlen);
+ x[tlen] = '\0';
+ ADD (x,
+ EXTRACTOR_METATYPE_TITLE);
+ }
+ if (alen > 0)
+ {
+ char x[alen + 1];
+
+ memcpy (x,
+ &data[18 + RAFF4_HDR_SIZE + tlen],
+ alen);
+ x[alen] = '\0';
+ ADD (x,
+ EXTRACTOR_METATYPE_AUTHOR_NAME);
+ }
+ if (clen > 0)
+ {
+ char x[clen + 1];
+
+ memcpy (x,
+ &data[19 + RAFF4_HDR_SIZE + tlen + alen],
+ clen);
+ x[clen] = '\0';
+ ADD (x,
+ EXTRACTOR_METATYPE_COPYRIGHT);
+ }
+ if (aplen > 0)
+ {
+ char x[aplen + 1];
+
+ memcpy (x,
+ &data[20 + RAFF4_HDR_SIZE + tlen + alen + clen],
+ aplen);
+ x[aplen] = '\0';
+ ADD (x,
+ EXTRACTOR_METATYPE_UNKNOWN);
+ }
+}
+
+
+static void
+extract_raff (struct EXTRACTOR_ExtractContext *ec,
+ const void *ptr,
+ size_t size)
+{
+ const uint8_t *data = ptr;
+ const struct RAFF_Header *hdr;
+
+ /* HELIX */
+ if (size <= sizeof (*hdr) + 4)
+ return;
+ ADD ("audio/vnd.rn-realaudio",
+ EXTRACTOR_METATYPE_MIMETYPE);
+ hdr = (const struct RAFF_Header *) &data[4];
+ switch (ntohs (hdr->version))
+ {
+ case 3:
+ extract_raff3 (ec,
+ ptr,
+ size);
+ break;
+ case 4:
+ extract_raff4 (ec,
+ ptr,
+ size);
+ break;
+ }
+}
+
+
+/* old real format */
+static void
+extract_real (struct EXTRACTOR_ExtractContext *ec,
+ const void *data,
+ size_t size)
+{
+ uint64_t off = 0;
+ size_t pos = 0;
+
+ while (1)
+ {
+ uint32_t length;
+
+ if ( (pos + 8 > size) ||
+ (pos + 8 < pos) ||
+ (pos + (length = ntohl (((uint32_t *) (data + pos))[1])) > size) )
+ {
+ uint64_t noff;
+ void *in;
+ ssize_t isize;
+
+ noff = ec->seek (ec->cls,
+ off + pos,
+ SEEK_SET);
+ if (-1 == noff)
+ return;
+ isize = ec->read (ec->cls,
+ &in,
+ 32 * 1024);
+ if (isize < 8)
+ return;
+ data = in;
+ size = isize;
+ off = noff;
+ pos = 0;
+ }
+ if (length <= 8)
+ return;
+ if ( (pos + length > size) ||
+ (pos + length < pos) )
+ return;
+ switch (ntohl (((uint32_t *) (data + pos))[0]))
+ {
+ case MDPR_HEADER:
+ processMediaProperties (data + pos,
+ ec);
+ pos += length;
+ break;
+ case CONT_HEADER:
+ processContentDescription (data + pos,
+ ec);
+ pos += length;
+ break;
+ case REAL_HEADER: /* treat like default */
+ default:
+ pos += length;
+ break;
+ }
+ }
+}
+
+
+/**
+ * "extract" metadata from a REAL file
+ *
+ * @param ec extraction context
+ */
+void
+EXTRACTOR_real_extract_method (struct EXTRACTOR_ExtractContext *ec)
+{
+ void *data;
+ size_t n;
+
+ n = ec->read (ec->cls,
+ &data,
+ sizeof (struct RAFF4_Header) + 4 * 256);
+ if (n < sizeof (uint32_t))
+ return;
+ switch (ntohl (*(uint32_t *) data))
+ {
+ case RAFF4_HEADER:
+ extract_raff (ec,
+ data,
+ n);
+ break;
+ case REAL_HEADER:
+ extract_real (ec,
+ data,
+ n);
+ break;
+ }
+}
+
+
+/* end of real_extractor.c */