diff options
Diffstat (limited to 'src/plugins/real_extractor.c')
-rw-r--r-- | src/plugins/real_extractor.c | 579 |
1 files changed, 579 insertions, 0 deletions
diff --git a/src/plugins/real_extractor.c b/src/plugins/real_extractor.c new file mode 100644 index 0000000..9d77b28 --- /dev/null +++ b/src/plugins/real_extractor.c | |||
@@ -0,0 +1,579 @@ | |||
1 | /* | ||
2 | * This file is part of libextractor. | ||
3 | * Copyright (C) 2021 Christian Grothoff | ||
4 | * | ||
5 | * libextractor is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License as published | ||
7 | * by the Free Software Foundation; either version 3, or (at your | ||
8 | * option) any later version. | ||
9 | * | ||
10 | * libextractor is distributed in the hope that it will be useful, but | ||
11 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | * General Public License for more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with libextractor; see the file COPYING. If not, write to the | ||
17 | * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, | ||
18 | * Boston, MA 02110-1301, USA. | ||
19 | * | ||
20 | */ | ||
21 | /** | ||
22 | * @file plugins/real_extractor.c | ||
23 | * @brief plugin to support REAL files | ||
24 | * @author Christian Grothoff | ||
25 | */ | ||
26 | #include "platform.h" | ||
27 | #include "extractor.h" | ||
28 | |||
29 | struct MediaProperties | ||
30 | { | ||
31 | uint32_t object_id; | ||
32 | uint32_t size; | ||
33 | uint16_t object_version; /* must be 0 */ | ||
34 | uint16_t stream_number; | ||
35 | uint32_t max_bit_rate; | ||
36 | uint32_t avg_bit_rate; | ||
37 | uint32_t max_packet_size; | ||
38 | uint32_t avg_packet_size; | ||
39 | uint32_t start_time; | ||
40 | uint32_t preroll; | ||
41 | uint32_t duration; | ||
42 | uint8_t stream_name_size; | ||
43 | uint8_t data[0]; /* variable length section */ | ||
44 | /* | ||
45 | uint8_t[stream_name_size] stream_name; | ||
46 | uint8_t mime_type_size; | ||
47 | uint8_t[mime_type_size] mime_type; | ||
48 | uint32_t type_specific_len; | ||
49 | uint8_t[type_specific_len] type_specific_data; | ||
50 | */ | ||
51 | }; | ||
52 | |||
53 | struct ContentDescription | ||
54 | { | ||
55 | uint32_t object_id; | ||
56 | uint32_t size; | ||
57 | uint16_t object_version; /* must be 0 */ | ||
58 | uint16_t title_len; | ||
59 | uint8_t data[0]; /* variable length section */ | ||
60 | /* | ||
61 | uint8_t[title_len] title; | ||
62 | uint16_t author_len; | ||
63 | uint8_t[author_len] author; | ||
64 | uint16_t copyright_len; | ||
65 | uint8_t[copyright_len] copyright; | ||
66 | uint16_t comment_len; | ||
67 | uint8_t[comment_len] comment; | ||
68 | */ | ||
69 | }; | ||
70 | /* author, copyright and comment are supposed to be ASCII */ | ||
71 | |||
72 | |||
73 | #define REAL_HEADER 0x2E524d46 | ||
74 | #define MDPR_HEADER 0x4D445052 | ||
75 | #define CONT_HEADER 0x434F4e54 | ||
76 | #define RAFF4_HEADER 0x2E7261FD | ||
77 | |||
78 | |||
79 | /** | ||
80 | * Give meta data to LE. | ||
81 | * | ||
82 | * @param s utf-8 string meta data value | ||
83 | * @param t type of the meta data | ||
84 | */ | ||
85 | #define ADD(s,t) do { \ | ||
86 | if (0 != ec->proc (ec->cls, "real", t, \ | ||
87 | EXTRACTOR_METAFORMAT_C_STRING, \ | ||
88 | "text/plain", s, strlen (s) + 1)) \ | ||
89 | { return; } \ | ||
90 | } while (0) | ||
91 | |||
92 | |||
93 | static void | ||
94 | processMediaProperties (const struct MediaProperties *prop, | ||
95 | struct EXTRACTOR_ExtractContext *ec) | ||
96 | { | ||
97 | uint8_t mime_type_size; | ||
98 | uint32_t prop_size; | ||
99 | |||
100 | prop_size = ntohl (prop->size); | ||
101 | if (prop_size <= sizeof (struct MediaProperties)) | ||
102 | return; | ||
103 | if (0 != prop->object_version) | ||
104 | return; | ||
105 | if (prop_size <= prop->stream_name_size + sizeof (uint8_t) | ||
106 | + sizeof (struct MediaProperties)) | ||
107 | return; | ||
108 | mime_type_size = prop->data[prop->stream_name_size]; | ||
109 | if (prop_size > prop->stream_name_size + sizeof (uint8_t) | ||
110 | + mime_type_size + sizeof (struct MediaProperties)) | ||
111 | { | ||
112 | char data[mime_type_size + 1]; | ||
113 | |||
114 | memcpy (data, | ||
115 | &prop->data[prop->stream_name_size + 1], | ||
116 | mime_type_size); | ||
117 | data[mime_type_size] = '\0'; | ||
118 | ADD (data, | ||
119 | EXTRACTOR_METATYPE_MIMETYPE); | ||
120 | } | ||
121 | } | ||
122 | |||
123 | |||
124 | static void | ||
125 | processContentDescription (const struct ContentDescription *prop, | ||
126 | struct EXTRACTOR_ExtractContext *ec) | ||
127 | { | ||
128 | uint16_t author_len; | ||
129 | uint16_t copyright_len; | ||
130 | uint16_t comment_len; | ||
131 | uint16_t title_len; | ||
132 | uint32_t prop_size; | ||
133 | |||
134 | prop_size = ntohl (prop->size); | ||
135 | if (prop_size <= sizeof (struct ContentDescription)) | ||
136 | return; | ||
137 | if (0 != prop->object_version) | ||
138 | return; | ||
139 | title_len = ntohs (prop->title_len); | ||
140 | if (prop_size <= | ||
141 | title_len | ||
142 | + sizeof (struct ContentDescription)) | ||
143 | return; | ||
144 | if (title_len > 0) | ||
145 | { | ||
146 | char title[title_len + 1]; | ||
147 | |||
148 | memcpy (title, | ||
149 | &prop->data[0], | ||
150 | title_len); | ||
151 | title[title_len] = '\0'; | ||
152 | ADD (title, | ||
153 | EXTRACTOR_METATYPE_TITLE); | ||
154 | } | ||
155 | if (prop_size <= | ||
156 | title_len | ||
157 | + sizeof (uint16_t) | ||
158 | + sizeof (struct ContentDescription)) | ||
159 | return; | ||
160 | author_len = ntohs (*(uint16_t *) &prop->data[title_len]); | ||
161 | if (prop_size <= | ||
162 | title_len | ||
163 | + sizeof (uint16_t) | ||
164 | + author_len | ||
165 | + sizeof (struct ContentDescription)) | ||
166 | return; | ||
167 | if (author_len > 0) | ||
168 | { | ||
169 | char author[author_len + 1]; | ||
170 | |||
171 | memcpy (author, | ||
172 | &prop->data[title_len | ||
173 | + sizeof (uint16_t)], | ||
174 | author_len); | ||
175 | author[author_len] = '\0'; | ||
176 | ADD (author, | ||
177 | EXTRACTOR_METATYPE_AUTHOR_NAME); | ||
178 | } | ||
179 | if (prop_size <= | ||
180 | title_len | ||
181 | + sizeof (uint16_t) | ||
182 | + author_len | ||
183 | + sizeof (uint16_t) | ||
184 | + sizeof (struct ContentDescription)) | ||
185 | return; | ||
186 | copyright_len = ntohs (*(uint16_t *) &prop->data[title_len | ||
187 | + author_len | ||
188 | + sizeof (uint16_t)]); | ||
189 | if (prop_size <= | ||
190 | title_len | ||
191 | + sizeof (uint16_t) | ||
192 | + author_len | ||
193 | + sizeof (uint16_t) | ||
194 | + copyright_len | ||
195 | + sizeof (struct ContentDescription)) | ||
196 | return; | ||
197 | if (copyright_len > 0) | ||
198 | { | ||
199 | char copyright[copyright_len + 1]; | ||
200 | |||
201 | memcpy (copyright, | ||
202 | &prop->data[title_len | ||
203 | + sizeof (uint16_t) * 2 | ||
204 | + author_len], | ||
205 | copyright_len); | ||
206 | copyright[copyright_len] = '\0'; | ||
207 | ADD (copyright, | ||
208 | EXTRACTOR_METATYPE_COPYRIGHT); | ||
209 | } | ||
210 | |||
211 | if (prop_size <= | ||
212 | title_len | ||
213 | + sizeof (uint16_t) | ||
214 | + author_len | ||
215 | + sizeof (uint16_t) | ||
216 | + copyright_len | ||
217 | + sizeof (uint16_t) | ||
218 | + sizeof (struct ContentDescription)) | ||
219 | return; | ||
220 | comment_len = ntohs (*(uint16_t *) &prop->data[title_len | ||
221 | + author_len | ||
222 | + copyright_len | ||
223 | + 2 * sizeof (uint16_t)]); | ||
224 | if (prop_size < | ||
225 | title_len | ||
226 | + sizeof (uint16_t) | ||
227 | + author_len | ||
228 | + sizeof (uint16_t) | ||
229 | + copyright_len | ||
230 | + sizeof (uint16_t) | ||
231 | + comment_len | ||
232 | + sizeof (struct ContentDescription)) | ||
233 | return; | ||
234 | |||
235 | if (comment_len > 0) | ||
236 | { | ||
237 | char comment[comment_len + 1]; | ||
238 | |||
239 | memcpy (comment, | ||
240 | &prop->data[title_len | ||
241 | + sizeof (uint16_t) * 3 | ||
242 | + author_len | ||
243 | + copyright_len], | ||
244 | comment_len); | ||
245 | comment[comment_len] = '\0'; | ||
246 | ADD (comment, | ||
247 | EXTRACTOR_METATYPE_COMMENT); | ||
248 | } | ||
249 | } | ||
250 | |||
251 | |||
252 | struct RAFF_Header | ||
253 | { | ||
254 | uint16_t version; | ||
255 | }; | ||
256 | |||
257 | struct RAFF3_Header | ||
258 | { | ||
259 | uint8_t unknown[10]; | ||
260 | uint32_t data_size; | ||
261 | /* | ||
262 | uint8_t tlen; | ||
263 | uint8_t title[tlen]; | ||
264 | uint8_t alen; | ||
265 | uint8_t author[alen]; | ||
266 | uint8_t clen; | ||
267 | uint8_t copyright[clen]; | ||
268 | uint8_t aplen; | ||
269 | uint8_t app[aplen]; */ | ||
270 | }; | ||
271 | |||
272 | |||
273 | #define RAFF3_HDR_SIZE 14 | ||
274 | |||
275 | |||
276 | struct RAFF4_Header | ||
277 | { | ||
278 | uint16_t version; | ||
279 | uint16_t revision; | ||
280 | uint16_t header_length; | ||
281 | uint16_t compression_type; | ||
282 | uint32_t granularity; | ||
283 | uint32_t total_bytes; | ||
284 | uint32_t bytes_per_minute; | ||
285 | uint32_t bytes_per_minute2; | ||
286 | uint16_t interleave_factor; | ||
287 | uint16_t interleave_block_size; | ||
288 | uint32_t user_data; | ||
289 | float sample_rate; | ||
290 | uint16_t sample_size; | ||
291 | uint16_t channels; | ||
292 | uint8_t interleave_code[5]; | ||
293 | uint8_t compression_code[5]; | ||
294 | uint8_t is_interleaved; | ||
295 | uint8_t copy_byte; | ||
296 | uint8_t stream_type; | ||
297 | /* | ||
298 | uint8_t tlen; | ||
299 | uint8_t title[tlen]; | ||
300 | uint8_t alen; | ||
301 | uint8_t author[alen]; | ||
302 | uint8_t clen; | ||
303 | uint8_t copyright[clen]; | ||
304 | uint8_t aplen; | ||
305 | uint8_t app[aplen]; */ | ||
306 | }; | ||
307 | |||
308 | #define RAFF4_HDR_SIZE 53 | ||
309 | |||
310 | |||
311 | static void | ||
312 | extract_raff3 (struct EXTRACTOR_ExtractContext *ec, | ||
313 | const void *ptr, | ||
314 | size_t size) | ||
315 | { | ||
316 | const uint8_t *data = ptr; | ||
317 | uint8_t tlen; | ||
318 | uint8_t alen; | ||
319 | uint8_t clen; | ||
320 | uint8_t aplen; | ||
321 | |||
322 | if (size <= RAFF3_HDR_SIZE + 8) | ||
323 | return; | ||
324 | tlen = data[8 + RAFF3_HDR_SIZE]; | ||
325 | if (tlen + RAFF3_HDR_SIZE + 12 > size) | ||
326 | return; | ||
327 | if (tlen > 0) | ||
328 | { | ||
329 | char x[tlen + 1]; | ||
330 | |||
331 | memcpy (x, | ||
332 | &data[9 + RAFF3_HDR_SIZE], | ||
333 | tlen); | ||
334 | x[tlen] = '\0'; | ||
335 | ADD (x, | ||
336 | EXTRACTOR_METATYPE_TITLE); | ||
337 | } | ||
338 | alen = data[9 + tlen + RAFF3_HDR_SIZE]; | ||
339 | if (tlen + alen + RAFF3_HDR_SIZE + 12 > size) | ||
340 | return; | ||
341 | if (alen > 0) | ||
342 | { | ||
343 | char x[alen + 1]; | ||
344 | |||
345 | memcpy (x, | ||
346 | &data[10 + RAFF3_HDR_SIZE + tlen], | ||
347 | alen); | ||
348 | x[alen] = '\0'; | ||
349 | ADD (x, | ||
350 | EXTRACTOR_METATYPE_AUTHOR_NAME); | ||
351 | } | ||
352 | clen = data[10 + tlen + alen + RAFF3_HDR_SIZE]; | ||
353 | if (tlen + alen + clen + RAFF3_HDR_SIZE + 12 > size) | ||
354 | return; | ||
355 | if (clen > 0) | ||
356 | { | ||
357 | char x[clen + 1]; | ||
358 | |||
359 | memcpy (x, | ||
360 | &data[11 + RAFF4_HDR_SIZE + tlen + alen], | ||
361 | clen); | ||
362 | x[clen] = '\0'; | ||
363 | ADD (x, | ||
364 | EXTRACTOR_METATYPE_COPYRIGHT); | ||
365 | } | ||
366 | aplen = data[11 + tlen + clen + alen + RAFF3_HDR_SIZE]; | ||
367 | if (tlen + alen + clen + aplen + RAFF3_HDR_SIZE + 12 > size) | ||
368 | return; | ||
369 | if (aplen > 0) | ||
370 | { | ||
371 | char x[aplen + 1]; | ||
372 | |||
373 | memcpy (x, | ||
374 | &data[12 + RAFF4_HDR_SIZE + tlen + alen + clen], | ||
375 | aplen); | ||
376 | x[aplen] = '\0'; | ||
377 | ADD (x, | ||
378 | EXTRACTOR_METATYPE_UNKNOWN); | ||
379 | } | ||
380 | } | ||
381 | |||
382 | |||
383 | static void | ||
384 | extract_raff4 (struct EXTRACTOR_ExtractContext *ec, | ||
385 | const void *ptr, | ||
386 | size_t size) | ||
387 | { | ||
388 | const uint8_t *data = ptr; | ||
389 | uint8_t tlen; | ||
390 | uint8_t alen; | ||
391 | uint8_t clen; | ||
392 | uint8_t aplen; | ||
393 | |||
394 | if (size <= RAFF4_HDR_SIZE + 16 + 4) | ||
395 | return; | ||
396 | tlen = data[16 + RAFF4_HDR_SIZE]; | ||
397 | if (tlen + RAFF4_HDR_SIZE + 20 > size) | ||
398 | return; | ||
399 | alen = data[17 + tlen + RAFF4_HDR_SIZE]; | ||
400 | if (tlen + alen + RAFF4_HDR_SIZE + 20 > size) | ||
401 | return; | ||
402 | clen = data[18 + tlen + alen + RAFF4_HDR_SIZE]; | ||
403 | if (tlen + alen + clen + RAFF4_HDR_SIZE + 20 > size) | ||
404 | return; | ||
405 | aplen = data[19 + tlen + clen + alen + RAFF4_HDR_SIZE]; | ||
406 | if (tlen + alen + clen + aplen + RAFF4_HDR_SIZE + 20 > size) | ||
407 | return; | ||
408 | if (tlen > 0) | ||
409 | { | ||
410 | char x[tlen + 1]; | ||
411 | |||
412 | memcpy (x, | ||
413 | &data[17 + RAFF4_HDR_SIZE], | ||
414 | tlen); | ||
415 | x[tlen] = '\0'; | ||
416 | ADD (x, | ||
417 | EXTRACTOR_METATYPE_TITLE); | ||
418 | } | ||
419 | if (alen > 0) | ||
420 | { | ||
421 | char x[alen + 1]; | ||
422 | |||
423 | memcpy (x, | ||
424 | &data[18 + RAFF4_HDR_SIZE + tlen], | ||
425 | alen); | ||
426 | x[alen] = '\0'; | ||
427 | ADD (x, | ||
428 | EXTRACTOR_METATYPE_AUTHOR_NAME); | ||
429 | } | ||
430 | if (clen > 0) | ||
431 | { | ||
432 | char x[clen + 1]; | ||
433 | |||
434 | memcpy (x, | ||
435 | &data[19 + RAFF4_HDR_SIZE + tlen + alen], | ||
436 | clen); | ||
437 | x[clen] = '\0'; | ||
438 | ADD (x, | ||
439 | EXTRACTOR_METATYPE_COPYRIGHT); | ||
440 | } | ||
441 | if (aplen > 0) | ||
442 | { | ||
443 | char x[aplen + 1]; | ||
444 | |||
445 | memcpy (x, | ||
446 | &data[20 + RAFF4_HDR_SIZE + tlen + alen + clen], | ||
447 | aplen); | ||
448 | x[aplen] = '\0'; | ||
449 | ADD (x, | ||
450 | EXTRACTOR_METATYPE_UNKNOWN); | ||
451 | } | ||
452 | } | ||
453 | |||
454 | |||
455 | static void | ||
456 | extract_raff (struct EXTRACTOR_ExtractContext *ec, | ||
457 | const void *ptr, | ||
458 | size_t size) | ||
459 | { | ||
460 | const uint8_t *data = ptr; | ||
461 | const struct RAFF_Header *hdr; | ||
462 | |||
463 | /* HELIX */ | ||
464 | if (size <= sizeof (*hdr) + 4) | ||
465 | return; | ||
466 | ADD ("audio/vnd.rn-realaudio", | ||
467 | EXTRACTOR_METATYPE_MIMETYPE); | ||
468 | hdr = (const struct RAFF_Header *) &data[4]; | ||
469 | switch (ntohs (hdr->version)) | ||
470 | { | ||
471 | case 3: | ||
472 | extract_raff3 (ec, | ||
473 | ptr, | ||
474 | size); | ||
475 | break; | ||
476 | case 4: | ||
477 | extract_raff4 (ec, | ||
478 | ptr, | ||
479 | size); | ||
480 | break; | ||
481 | } | ||
482 | } | ||
483 | |||
484 | |||
485 | /* old real format */ | ||
486 | static void | ||
487 | extract_real (struct EXTRACTOR_ExtractContext *ec, | ||
488 | const void *data, | ||
489 | size_t size) | ||
490 | { | ||
491 | uint64_t off = 0; | ||
492 | size_t pos = 0; | ||
493 | |||
494 | while (1) | ||
495 | { | ||
496 | uint32_t length; | ||
497 | |||
498 | if ( (pos + 8 > size) || | ||
499 | (pos + 8 < pos) || | ||
500 | (pos + (length = ntohl (((uint32_t *) (data + pos))[1])) > size) ) | ||
501 | { | ||
502 | uint64_t noff; | ||
503 | void *in; | ||
504 | ssize_t isize; | ||
505 | |||
506 | noff = ec->seek (ec->cls, | ||
507 | off + pos, | ||
508 | SEEK_SET); | ||
509 | if (-1 == noff) | ||
510 | return; | ||
511 | isize = ec->read (ec->cls, | ||
512 | &in, | ||
513 | 32 * 1024); | ||
514 | if (isize < 8) | ||
515 | return; | ||
516 | data = in; | ||
517 | size = isize; | ||
518 | off = noff; | ||
519 | pos = 0; | ||
520 | } | ||
521 | if (length <= 8) | ||
522 | return; | ||
523 | if ( (pos + length > size) || | ||
524 | (pos + length < pos) ) | ||
525 | return; | ||
526 | switch (ntohl (((uint32_t *) (data + pos))[0])) | ||
527 | { | ||
528 | case MDPR_HEADER: | ||
529 | processMediaProperties (data + pos, | ||
530 | ec); | ||
531 | pos += length; | ||
532 | break; | ||
533 | case CONT_HEADER: | ||
534 | processContentDescription (data + pos, | ||
535 | ec); | ||
536 | pos += length; | ||
537 | break; | ||
538 | case REAL_HEADER: /* treat like default */ | ||
539 | default: | ||
540 | pos += length; | ||
541 | break; | ||
542 | } | ||
543 | } | ||
544 | } | ||
545 | |||
546 | |||
547 | /** | ||
548 | * "extract" metadata from a REAL file | ||
549 | * | ||
550 | * @param ec extraction context | ||
551 | */ | ||
552 | void | ||
553 | EXTRACTOR_real_extract_method (struct EXTRACTOR_ExtractContext *ec) | ||
554 | { | ||
555 | void *data; | ||
556 | size_t n; | ||
557 | |||
558 | n = ec->read (ec->cls, | ||
559 | &data, | ||
560 | sizeof (struct RAFF4_Header) + 4 * 256); | ||
561 | if (n < sizeof (uint32_t)) | ||
562 | return; | ||
563 | switch (ntohl (*(uint32_t *) data)) | ||
564 | { | ||
565 | case RAFF4_HEADER: | ||
566 | extract_raff (ec, | ||
567 | data, | ||
568 | n); | ||
569 | break; | ||
570 | case REAL_HEADER: | ||
571 | extract_real (ec, | ||
572 | data, | ||
573 | n); | ||
574 | break; | ||
575 | } | ||
576 | } | ||
577 | |||
578 | |||
579 | /* end of real_extractor.c */ | ||