aboutsummaryrefslogtreecommitdiff
path: root/src/plugins/real_extractor.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/plugins/real_extractor.c')
-rw-r--r--src/plugins/real_extractor.c579
1 files changed, 579 insertions, 0 deletions
diff --git a/src/plugins/real_extractor.c b/src/plugins/real_extractor.c
new file mode 100644
index 0000000..9d77b28
--- /dev/null
+++ b/src/plugins/real_extractor.c
@@ -0,0 +1,579 @@
1/*
2 * This file is part of libextractor.
3 * Copyright (C) 2021 Christian Grothoff
4 *
5 * libextractor is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published
7 * by the Free Software Foundation; either version 3, or (at your
8 * option) any later version.
9 *
10 * libextractor is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with libextractor; see the file COPYING. If not, write to the
17 * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18 * Boston, MA 02110-1301, USA.
19 *
20 */
21/**
22 * @file plugins/real_extractor.c
23 * @brief plugin to support REAL files
24 * @author Christian Grothoff
25 */
26#include "platform.h"
27#include "extractor.h"
28
29struct MediaProperties
30{
31 uint32_t object_id;
32 uint32_t size;
33 uint16_t object_version; /* must be 0 */
34 uint16_t stream_number;
35 uint32_t max_bit_rate;
36 uint32_t avg_bit_rate;
37 uint32_t max_packet_size;
38 uint32_t avg_packet_size;
39 uint32_t start_time;
40 uint32_t preroll;
41 uint32_t duration;
42 uint8_t stream_name_size;
43 uint8_t data[0]; /* variable length section */
44 /*
45 uint8_t[stream_name_size] stream_name;
46 uint8_t mime_type_size;
47 uint8_t[mime_type_size] mime_type;
48 uint32_t type_specific_len;
49 uint8_t[type_specific_len] type_specific_data;
50 */
51};
52
53struct ContentDescription
54{
55 uint32_t object_id;
56 uint32_t size;
57 uint16_t object_version; /* must be 0 */
58 uint16_t title_len;
59 uint8_t data[0]; /* variable length section */
60 /*
61 uint8_t[title_len] title;
62 uint16_t author_len;
63 uint8_t[author_len] author;
64 uint16_t copyright_len;
65 uint8_t[copyright_len] copyright;
66 uint16_t comment_len;
67 uint8_t[comment_len] comment;
68 */
69};
70/* author, copyright and comment are supposed to be ASCII */
71
72
73#define REAL_HEADER 0x2E524d46
74#define MDPR_HEADER 0x4D445052
75#define CONT_HEADER 0x434F4e54
76#define RAFF4_HEADER 0x2E7261FD
77
78
79/**
80 * Give meta data to LE.
81 *
82 * @param s utf-8 string meta data value
83 * @param t type of the meta data
84 */
85#define ADD(s,t) do { \
86 if (0 != ec->proc (ec->cls, "real", t, \
87 EXTRACTOR_METAFORMAT_C_STRING, \
88 "text/plain", s, strlen (s) + 1)) \
89 { return; } \
90} while (0)
91
92
93static void
94processMediaProperties (const struct MediaProperties *prop,
95 struct EXTRACTOR_ExtractContext *ec)
96{
97 uint8_t mime_type_size;
98 uint32_t prop_size;
99
100 prop_size = ntohl (prop->size);
101 if (prop_size <= sizeof (struct MediaProperties))
102 return;
103 if (0 != prop->object_version)
104 return;
105 if (prop_size <= prop->stream_name_size + sizeof (uint8_t)
106 + sizeof (struct MediaProperties))
107 return;
108 mime_type_size = prop->data[prop->stream_name_size];
109 if (prop_size > prop->stream_name_size + sizeof (uint8_t)
110 + mime_type_size + sizeof (struct MediaProperties))
111 {
112 char data[mime_type_size + 1];
113
114 memcpy (data,
115 &prop->data[prop->stream_name_size + 1],
116 mime_type_size);
117 data[mime_type_size] = '\0';
118 ADD (data,
119 EXTRACTOR_METATYPE_MIMETYPE);
120 }
121}
122
123
124static void
125processContentDescription (const struct ContentDescription *prop,
126 struct EXTRACTOR_ExtractContext *ec)
127{
128 uint16_t author_len;
129 uint16_t copyright_len;
130 uint16_t comment_len;
131 uint16_t title_len;
132 uint32_t prop_size;
133
134 prop_size = ntohl (prop->size);
135 if (prop_size <= sizeof (struct ContentDescription))
136 return;
137 if (0 != prop->object_version)
138 return;
139 title_len = ntohs (prop->title_len);
140 if (prop_size <=
141 title_len
142 + sizeof (struct ContentDescription))
143 return;
144 if (title_len > 0)
145 {
146 char title[title_len + 1];
147
148 memcpy (title,
149 &prop->data[0],
150 title_len);
151 title[title_len] = '\0';
152 ADD (title,
153 EXTRACTOR_METATYPE_TITLE);
154 }
155 if (prop_size <=
156 title_len
157 + sizeof (uint16_t)
158 + sizeof (struct ContentDescription))
159 return;
160 author_len = ntohs (*(uint16_t *) &prop->data[title_len]);
161 if (prop_size <=
162 title_len
163 + sizeof (uint16_t)
164 + author_len
165 + sizeof (struct ContentDescription))
166 return;
167 if (author_len > 0)
168 {
169 char author[author_len + 1];
170
171 memcpy (author,
172 &prop->data[title_len
173 + sizeof (uint16_t)],
174 author_len);
175 author[author_len] = '\0';
176 ADD (author,
177 EXTRACTOR_METATYPE_AUTHOR_NAME);
178 }
179 if (prop_size <=
180 title_len
181 + sizeof (uint16_t)
182 + author_len
183 + sizeof (uint16_t)
184 + sizeof (struct ContentDescription))
185 return;
186 copyright_len = ntohs (*(uint16_t *) &prop->data[title_len
187 + author_len
188 + sizeof (uint16_t)]);
189 if (prop_size <=
190 title_len
191 + sizeof (uint16_t)
192 + author_len
193 + sizeof (uint16_t)
194 + copyright_len
195 + sizeof (struct ContentDescription))
196 return;
197 if (copyright_len > 0)
198 {
199 char copyright[copyright_len + 1];
200
201 memcpy (copyright,
202 &prop->data[title_len
203 + sizeof (uint16_t) * 2
204 + author_len],
205 copyright_len);
206 copyright[copyright_len] = '\0';
207 ADD (copyright,
208 EXTRACTOR_METATYPE_COPYRIGHT);
209 }
210
211 if (prop_size <=
212 title_len
213 + sizeof (uint16_t)
214 + author_len
215 + sizeof (uint16_t)
216 + copyright_len
217 + sizeof (uint16_t)
218 + sizeof (struct ContentDescription))
219 return;
220 comment_len = ntohs (*(uint16_t *) &prop->data[title_len
221 + author_len
222 + copyright_len
223 + 2 * sizeof (uint16_t)]);
224 if (prop_size <
225 title_len
226 + sizeof (uint16_t)
227 + author_len
228 + sizeof (uint16_t)
229 + copyright_len
230 + sizeof (uint16_t)
231 + comment_len
232 + sizeof (struct ContentDescription))
233 return;
234
235 if (comment_len > 0)
236 {
237 char comment[comment_len + 1];
238
239 memcpy (comment,
240 &prop->data[title_len
241 + sizeof (uint16_t) * 3
242 + author_len
243 + copyright_len],
244 comment_len);
245 comment[comment_len] = '\0';
246 ADD (comment,
247 EXTRACTOR_METATYPE_COMMENT);
248 }
249}
250
251
252struct RAFF_Header
253{
254 uint16_t version;
255};
256
257struct RAFF3_Header
258{
259 uint8_t unknown[10];
260 uint32_t data_size;
261 /*
262 uint8_t tlen;
263 uint8_t title[tlen];
264 uint8_t alen;
265 uint8_t author[alen];
266 uint8_t clen;
267 uint8_t copyright[clen];
268 uint8_t aplen;
269 uint8_t app[aplen]; */
270};
271
272
273#define RAFF3_HDR_SIZE 14
274
275
276struct RAFF4_Header
277{
278 uint16_t version;
279 uint16_t revision;
280 uint16_t header_length;
281 uint16_t compression_type;
282 uint32_t granularity;
283 uint32_t total_bytes;
284 uint32_t bytes_per_minute;
285 uint32_t bytes_per_minute2;
286 uint16_t interleave_factor;
287 uint16_t interleave_block_size;
288 uint32_t user_data;
289 float sample_rate;
290 uint16_t sample_size;
291 uint16_t channels;
292 uint8_t interleave_code[5];
293 uint8_t compression_code[5];
294 uint8_t is_interleaved;
295 uint8_t copy_byte;
296 uint8_t stream_type;
297 /*
298 uint8_t tlen;
299 uint8_t title[tlen];
300 uint8_t alen;
301 uint8_t author[alen];
302 uint8_t clen;
303 uint8_t copyright[clen];
304 uint8_t aplen;
305 uint8_t app[aplen]; */
306};
307
308#define RAFF4_HDR_SIZE 53
309
310
311static void
312extract_raff3 (struct EXTRACTOR_ExtractContext *ec,
313 const void *ptr,
314 size_t size)
315{
316 const uint8_t *data = ptr;
317 uint8_t tlen;
318 uint8_t alen;
319 uint8_t clen;
320 uint8_t aplen;
321
322 if (size <= RAFF3_HDR_SIZE + 8)
323 return;
324 tlen = data[8 + RAFF3_HDR_SIZE];
325 if (tlen + RAFF3_HDR_SIZE + 12 > size)
326 return;
327 if (tlen > 0)
328 {
329 char x[tlen + 1];
330
331 memcpy (x,
332 &data[9 + RAFF3_HDR_SIZE],
333 tlen);
334 x[tlen] = '\0';
335 ADD (x,
336 EXTRACTOR_METATYPE_TITLE);
337 }
338 alen = data[9 + tlen + RAFF3_HDR_SIZE];
339 if (tlen + alen + RAFF3_HDR_SIZE + 12 > size)
340 return;
341 if (alen > 0)
342 {
343 char x[alen + 1];
344
345 memcpy (x,
346 &data[10 + RAFF3_HDR_SIZE + tlen],
347 alen);
348 x[alen] = '\0';
349 ADD (x,
350 EXTRACTOR_METATYPE_AUTHOR_NAME);
351 }
352 clen = data[10 + tlen + alen + RAFF3_HDR_SIZE];
353 if (tlen + alen + clen + RAFF3_HDR_SIZE + 12 > size)
354 return;
355 if (clen > 0)
356 {
357 char x[clen + 1];
358
359 memcpy (x,
360 &data[11 + RAFF4_HDR_SIZE + tlen + alen],
361 clen);
362 x[clen] = '\0';
363 ADD (x,
364 EXTRACTOR_METATYPE_COPYRIGHT);
365 }
366 aplen = data[11 + tlen + clen + alen + RAFF3_HDR_SIZE];
367 if (tlen + alen + clen + aplen + RAFF3_HDR_SIZE + 12 > size)
368 return;
369 if (aplen > 0)
370 {
371 char x[aplen + 1];
372
373 memcpy (x,
374 &data[12 + RAFF4_HDR_SIZE + tlen + alen + clen],
375 aplen);
376 x[aplen] = '\0';
377 ADD (x,
378 EXTRACTOR_METATYPE_UNKNOWN);
379 }
380}
381
382
383static void
384extract_raff4 (struct EXTRACTOR_ExtractContext *ec,
385 const void *ptr,
386 size_t size)
387{
388 const uint8_t *data = ptr;
389 uint8_t tlen;
390 uint8_t alen;
391 uint8_t clen;
392 uint8_t aplen;
393
394 if (size <= RAFF4_HDR_SIZE + 16 + 4)
395 return;
396 tlen = data[16 + RAFF4_HDR_SIZE];
397 if (tlen + RAFF4_HDR_SIZE + 20 > size)
398 return;
399 alen = data[17 + tlen + RAFF4_HDR_SIZE];
400 if (tlen + alen + RAFF4_HDR_SIZE + 20 > size)
401 return;
402 clen = data[18 + tlen + alen + RAFF4_HDR_SIZE];
403 if (tlen + alen + clen + RAFF4_HDR_SIZE + 20 > size)
404 return;
405 aplen = data[19 + tlen + clen + alen + RAFF4_HDR_SIZE];
406 if (tlen + alen + clen + aplen + RAFF4_HDR_SIZE + 20 > size)
407 return;
408 if (tlen > 0)
409 {
410 char x[tlen + 1];
411
412 memcpy (x,
413 &data[17 + RAFF4_HDR_SIZE],
414 tlen);
415 x[tlen] = '\0';
416 ADD (x,
417 EXTRACTOR_METATYPE_TITLE);
418 }
419 if (alen > 0)
420 {
421 char x[alen + 1];
422
423 memcpy (x,
424 &data[18 + RAFF4_HDR_SIZE + tlen],
425 alen);
426 x[alen] = '\0';
427 ADD (x,
428 EXTRACTOR_METATYPE_AUTHOR_NAME);
429 }
430 if (clen > 0)
431 {
432 char x[clen + 1];
433
434 memcpy (x,
435 &data[19 + RAFF4_HDR_SIZE + tlen + alen],
436 clen);
437 x[clen] = '\0';
438 ADD (x,
439 EXTRACTOR_METATYPE_COPYRIGHT);
440 }
441 if (aplen > 0)
442 {
443 char x[aplen + 1];
444
445 memcpy (x,
446 &data[20 + RAFF4_HDR_SIZE + tlen + alen + clen],
447 aplen);
448 x[aplen] = '\0';
449 ADD (x,
450 EXTRACTOR_METATYPE_UNKNOWN);
451 }
452}
453
454
455static void
456extract_raff (struct EXTRACTOR_ExtractContext *ec,
457 const void *ptr,
458 size_t size)
459{
460 const uint8_t *data = ptr;
461 const struct RAFF_Header *hdr;
462
463 /* HELIX */
464 if (size <= sizeof (*hdr) + 4)
465 return;
466 ADD ("audio/vnd.rn-realaudio",
467 EXTRACTOR_METATYPE_MIMETYPE);
468 hdr = (const struct RAFF_Header *) &data[4];
469 switch (ntohs (hdr->version))
470 {
471 case 3:
472 extract_raff3 (ec,
473 ptr,
474 size);
475 break;
476 case 4:
477 extract_raff4 (ec,
478 ptr,
479 size);
480 break;
481 }
482}
483
484
485/* old real format */
486static void
487extract_real (struct EXTRACTOR_ExtractContext *ec,
488 const void *data,
489 size_t size)
490{
491 uint64_t off = 0;
492 size_t pos = 0;
493
494 while (1)
495 {
496 uint32_t length;
497
498 if ( (pos + 8 > size) ||
499 (pos + 8 < pos) ||
500 (pos + (length = ntohl (((uint32_t *) (data + pos))[1])) > size) )
501 {
502 uint64_t noff;
503 void *in;
504 ssize_t isize;
505
506 noff = ec->seek (ec->cls,
507 off + pos,
508 SEEK_SET);
509 if (-1 == noff)
510 return;
511 isize = ec->read (ec->cls,
512 &in,
513 32 * 1024);
514 if (isize < 8)
515 return;
516 data = in;
517 size = isize;
518 off = noff;
519 pos = 0;
520 }
521 if (length <= 8)
522 return;
523 if ( (pos + length > size) ||
524 (pos + length < pos) )
525 return;
526 switch (ntohl (((uint32_t *) (data + pos))[0]))
527 {
528 case MDPR_HEADER:
529 processMediaProperties (data + pos,
530 ec);
531 pos += length;
532 break;
533 case CONT_HEADER:
534 processContentDescription (data + pos,
535 ec);
536 pos += length;
537 break;
538 case REAL_HEADER: /* treat like default */
539 default:
540 pos += length;
541 break;
542 }
543 }
544}
545
546
547/**
548 * "extract" metadata from a REAL file
549 *
550 * @param ec extraction context
551 */
552void
553EXTRACTOR_real_extract_method (struct EXTRACTOR_ExtractContext *ec)
554{
555 void *data;
556 size_t n;
557
558 n = ec->read (ec->cls,
559 &data,
560 sizeof (struct RAFF4_Header) + 4 * 256);
561 if (n < sizeof (uint32_t))
562 return;
563 switch (ntohl (*(uint32_t *) data))
564 {
565 case RAFF4_HEADER:
566 extract_raff (ec,
567 data,
568 n);
569 break;
570 case REAL_HEADER:
571 extract_real (ec,
572 data,
573 n);
574 break;
575 }
576}
577
578
579/* end of real_extractor.c */