diff options
author | Christian Grothoff <christian@grothoff.org> | 2021-05-02 22:31:07 +0200 |
---|---|---|
committer | Christian Grothoff <christian@grothoff.org> | 2021-05-02 22:31:07 +0200 |
commit | d40016f1e8b4578b294cfa09a59f43000c427643 (patch) | |
tree | 2d10444f14dd82ade72191f8b343aaa179852797 | |
parent | 1cc2d75852b35e308d88352c23883a57a6f17c6a (diff) | |
download | libextractor-d40016f1e8b4578b294cfa09a59f43000c427643.tar.gz libextractor-d40016f1e8b4578b294cfa09a59f43000c427643.zip |
resolve #2518
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | ChangeLog | 3 | ||||
-rw-r--r-- | src/plugins/Makefile.am | 28 | ||||
-rw-r--r-- | src/plugins/old/real_extractor.c | 439 | ||||
-rw-r--r-- | src/plugins/real_extractor.c | 579 | ||||
-rw-r--r-- | src/plugins/test_real.c | 104 | ||||
-rw-r--r-- | src/plugins/testdata/audiosig.rm | bin | 0 -> 9616 bytes | |||
-rw-r--r-- | src/plugins/testdata/ra3.ra | bin | 0 -> 1066 bytes | |||
-rw-r--r-- | src/plugins/vlc_extractor.c | 334 |
9 files changed, 1049 insertions, 439 deletions
@@ -112,6 +112,7 @@ src/plugins/test_ogg | |||
112 | src/plugins/test_ole2 | 112 | src/plugins/test_ole2 |
113 | src/plugins/test_png | 113 | src/plugins/test_png |
114 | src/plugins/test_ps | 114 | src/plugins/test_ps |
115 | src/plugins/test_real | ||
115 | src/plugins/test_riff | 116 | src/plugins/test_riff |
116 | src/plugins/test_rpm | 117 | src/plugins/test_rpm |
117 | src/plugins/test_s3m | 118 | src/plugins/test_s3m |
@@ -1,3 +1,6 @@ | |||
1 | Sun 02 May 2021 10:30:33 PM CEST | ||
2 | Revive REAL plugin (fixes #2518). -CG | ||
3 | |||
1 | Sat 01 May 2021 10:57:55 PM CEST | 4 | Sat 01 May 2021 10:57:55 PM CEST |
2 | Revive ELF plugin (fixes #2516). -CG | 5 | Revive ELF plugin (fixes #2516). -CG |
3 | 6 | ||
diff --git a/src/plugins/Makefile.am b/src/plugins/Makefile.am index 8cbe21a..58b0590 100644 --- a/src/plugins/Makefile.am +++ b/src/plugins/Makefile.am | |||
@@ -23,7 +23,9 @@ SUBDIRS = . | |||
23 | EXTRA_DIST = \ | 23 | EXTRA_DIST = \ |
24 | fuzz_default.sh \ | 24 | fuzz_default.sh \ |
25 | template_extractor.c \ | 25 | template_extractor.c \ |
26 | testdata/audiosig.rm \ | ||
26 | testdata/archive_test.tar \ | 27 | testdata/archive_test.tar \ |
28 | testdata/chello-elf \ | ||
27 | testdata/deb_bzip2.deb \ | 29 | testdata/deb_bzip2.deb \ |
28 | testdata/dvi_ora.dvi \ | 30 | testdata/dvi_ora.dvi \ |
29 | testdata/exiv2_iptc.jpg \ | 31 | testdata/exiv2_iptc.jpg \ |
@@ -51,6 +53,7 @@ EXTRA_DIST = \ | |||
51 | testdata/png_image.png \ | 53 | testdata/png_image.png \ |
52 | testdata/ps_bloomfilter.ps \ | 54 | testdata/ps_bloomfilter.ps \ |
53 | testdata/ps_wallace.ps \ | 55 | testdata/ps_wallace.ps \ |
56 | testdata/ra3.ra \ | ||
54 | testdata/riff_flame.avi \ | 57 | testdata/riff_flame.avi \ |
55 | testdata/rpm_test.rpm \ | 58 | testdata/rpm_test.rpm \ |
56 | testdata/s3m_2nd_pm.s3m \ | 59 | testdata/s3m_2nd_pm.s3m \ |
@@ -171,6 +174,7 @@ plugin_LTLIBRARIES = \ | |||
171 | libextractor_nsf.la \ | 174 | libextractor_nsf.la \ |
172 | libextractor_nsfe.la \ | 175 | libextractor_nsfe.la \ |
173 | libextractor_ps.la \ | 176 | libextractor_ps.la \ |
177 | libextractor_real.la \ | ||
174 | libextractor_riff.la \ | 178 | libextractor_riff.la \ |
175 | libextractor_s3m.la \ | 179 | libextractor_s3m.la \ |
176 | libextractor_sid.la \ | 180 | libextractor_sid.la \ |
@@ -209,6 +213,7 @@ check_PROGRAMS = \ | |||
209 | test_odf \ | 213 | test_odf \ |
210 | test_ps \ | 214 | test_ps \ |
211 | test_png \ | 215 | test_png \ |
216 | test_real \ | ||
212 | test_riff \ | 217 | test_riff \ |
213 | test_s3m \ | 218 | test_s3m \ |
214 | test_sid \ | 219 | test_sid \ |
@@ -562,6 +567,20 @@ test_ps_LDADD = \ | |||
562 | $(top_builddir)/src/plugins/libtest.la | 567 | $(top_builddir)/src/plugins/libtest.la |
563 | 568 | ||
564 | 569 | ||
570 | libextractor_real_la_SOURCES = \ | ||
571 | real_extractor.c | ||
572 | libextractor_real_la_LDFLAGS = \ | ||
573 | $(PLUGINFLAGS) | ||
574 | libextractor_real_la_LIBADD = \ | ||
575 | -lm \ | ||
576 | $(XLIB) \ | ||
577 | $(LE_LIBINTL) | ||
578 | |||
579 | test_real_SOURCES = \ | ||
580 | test_real.c | ||
581 | test_real_LDADD = \ | ||
582 | $(top_builddir)/src/plugins/libtest.la | ||
583 | |||
565 | libextractor_riff_la_SOURCES = \ | 584 | libextractor_riff_la_SOURCES = \ |
566 | riff_extractor.c | 585 | riff_extractor.c |
567 | libextractor_riff_la_LDFLAGS = \ | 586 | libextractor_riff_la_LDFLAGS = \ |
@@ -643,6 +662,15 @@ test_tiff_LDADD = \ | |||
643 | $(top_builddir)/src/plugins/libtest.la | 662 | $(top_builddir)/src/plugins/libtest.la |
644 | 663 | ||
645 | 664 | ||
665 | libextractor_vlc_la_SOURCES = \ | ||
666 | vlc_extractor.c | ||
667 | libextractor_vlc_la_LDFLAGS = \ | ||
668 | $(PLUGINFLAGS) | ||
669 | libextractor_vlc_la_LIBADD = \ | ||
670 | -lvlc \ | ||
671 | $(XLIB) | ||
672 | |||
673 | |||
646 | libextractor_wav_la_SOURCES = \ | 674 | libextractor_wav_la_SOURCES = \ |
647 | wav_extractor.c | 675 | wav_extractor.c |
648 | libextractor_wav_la_LDFLAGS = \ | 676 | libextractor_wav_la_LDFLAGS = \ |
diff --git a/src/plugins/old/real_extractor.c b/src/plugins/old/real_extractor.c deleted file mode 100644 index cfac031..0000000 --- a/src/plugins/old/real_extractor.c +++ /dev/null | |||
@@ -1,439 +0,0 @@ | |||
1 | /* | ||
2 | This file is part of libextractor. | ||
3 | Copyright (C) 2002, 2003, 2009 Vidyut Samanta and Christian Grothoff | ||
4 | |||
5 | libextractor is free software; you can redistribute it and/or modify | ||
6 | it under the terms of the GNU General Public License as published | ||
7 | by the Free Software Foundation; either version 2, or (at your | ||
8 | option) any later version. | ||
9 | |||
10 | libextractor is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU General Public License | ||
16 | along with libextractor; see the file COPYING. If not, write to the | ||
17 | Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, | ||
18 | Boston, MA 02110-1301, USA. | ||
19 | */ | ||
20 | |||
21 | #include "platform.h" | ||
22 | #include "extractor.h" | ||
23 | #include <stdint.h> | ||
24 | |||
25 | #define UINT32 uint32_t | ||
26 | #define UINT16 uint16_t | ||
27 | #define UINT8 uint8_t | ||
28 | |||
29 | typedef struct | ||
30 | { | ||
31 | UINT32 object_id; | ||
32 | UINT32 size; | ||
33 | UINT16 object_version; /* must be 0 */ | ||
34 | UINT16 stream_number; | ||
35 | UINT32 max_bit_rate; | ||
36 | UINT32 avg_bit_rate; | ||
37 | UINT32 max_packet_size; | ||
38 | UINT32 avg_packet_size; | ||
39 | UINT32 start_time; | ||
40 | UINT32 preroll; | ||
41 | UINT32 duration; | ||
42 | UINT8 stream_name_size; | ||
43 | UINT8 data[0]; /* variable length section */ | ||
44 | /* | ||
45 | UINT8[stream_name_size] stream_name; | ||
46 | UINT8 mime_type_size; | ||
47 | UINT8[mime_type_size] mime_type; | ||
48 | UINT32 type_specific_len; | ||
49 | UINT8[type_specific_len] type_specific_data; | ||
50 | */ | ||
51 | } Media_Properties; | ||
52 | |||
53 | typedef struct | ||
54 | { | ||
55 | UINT32 object_id; | ||
56 | UINT32 size; | ||
57 | UINT16 object_version; /* must be 0 */ | ||
58 | UINT16 title_len; | ||
59 | UINT8 data[0]; /* variable length section */ | ||
60 | /* | ||
61 | UINT8[title_len] title; | ||
62 | UINT16 author_len; | ||
63 | UINT8[author_len] author; | ||
64 | UINT16 copyright_len; | ||
65 | UINT8[copyright_len] copyright; | ||
66 | UINT16 comment_len; | ||
67 | UINT8[comment_len] comment; | ||
68 | */ | ||
69 | } Content_Description; | ||
70 | /* author, copyright and comment are supposed to be ASCII */ | ||
71 | |||
72 | #define REAL_HEADER 0x2E524d46 | ||
73 | #define MDPR_HEADER 0x4D445052 | ||
74 | #define CONT_HEADER 0x434F4e54 | ||
75 | |||
76 | #define RAFF4_HEADER 0x2E7261FD | ||
77 | |||
78 | |||
79 | static int | ||
80 | processMediaProperties (const Media_Properties *prop, | ||
81 | EXTRACTOR_MetaDataProcessor proc, | ||
82 | void *proc_cls) | ||
83 | { | ||
84 | |||
85 | UINT8 mime_type_size; | ||
86 | UINT32 prop_size; | ||
87 | |||
88 | prop_size = ntohl (prop->size); | ||
89 | if (prop_size <= sizeof (Media_Properties)) | ||
90 | return 0; | ||
91 | if (0 != prop->object_version) | ||
92 | return 0; | ||
93 | if (prop_size <= prop->stream_name_size + sizeof (UINT8) | ||
94 | + sizeof (Media_Properties)) | ||
95 | return 0; | ||
96 | |||
97 | mime_type_size = prop->data[prop->stream_name_size]; | ||
98 | if (prop_size > prop->stream_name_size + sizeof (UINT8) | ||
99 | + +mime_type_size + sizeof (Media_Properties)) | ||
100 | { | ||
101 | char data[mime_type_size + 1]; | ||
102 | memcpy (data, &prop->data[prop->stream_name_size + 1], mime_type_size); | ||
103 | data[mime_type_size] = '\0'; | ||
104 | |||
105 | return proc (proc_cls, | ||
106 | "real", | ||
107 | EXTRACTOR_METATYPE_MIMETYPE, | ||
108 | EXTRACTOR_METAFORMAT_UTF8, | ||
109 | "text/plain", | ||
110 | data, | ||
111 | strlen (data)); | ||
112 | } | ||
113 | return 0; | ||
114 | } | ||
115 | |||
116 | |||
117 | static int | ||
118 | processContentDescription (const Content_Description *prop, | ||
119 | EXTRACTOR_MetaDataProcessor proc, | ||
120 | void *proc_cls) | ||
121 | { | ||
122 | UINT16 author_len; | ||
123 | UINT16 copyright_len; | ||
124 | UINT16 comment_len; | ||
125 | UINT16 title_len; | ||
126 | char *title; | ||
127 | char *author; | ||
128 | char *copyright; | ||
129 | char *comment; | ||
130 | UINT32 prop_size; | ||
131 | int ret; | ||
132 | |||
133 | prop_size = ntohl (prop->size); | ||
134 | if (prop_size <= sizeof (Content_Description)) | ||
135 | return 0; | ||
136 | if (0 != prop->object_version) | ||
137 | return 0; | ||
138 | title_len = ntohs (prop->title_len); | ||
139 | if (prop_size <= title_len + sizeof (UINT16) + sizeof (Content_Description)) | ||
140 | return 0; | ||
141 | author_len = ntohs (*(UINT16 *) &prop->data[title_len]); | ||
142 | if (prop_size <= title_len + sizeof (UINT16) | ||
143 | + author_len + sizeof (Content_Description)) | ||
144 | return 0; | ||
145 | |||
146 | copyright_len = ntohs (*(UINT16 *) &prop->data[title_len | ||
147 | + author_len | ||
148 | + sizeof (UINT16)]); | ||
149 | |||
150 | if (prop_size <= title_len + 2 * sizeof (UINT16) | ||
151 | + author_len + copyright_len + sizeof (Content_Description)) | ||
152 | return 0; | ||
153 | |||
154 | comment_len = ntohs (*(UINT16 *) &prop->data[title_len | ||
155 | + author_len | ||
156 | + copyright_len | ||
157 | + 2 * sizeof (UINT16)]); | ||
158 | |||
159 | if (prop_size < title_len + 3 * sizeof (UINT16) | ||
160 | + author_len + copyright_len + comment_len | ||
161 | + sizeof (Content_Description)) | ||
162 | return 0; | ||
163 | |||
164 | ret = 0; | ||
165 | title = malloc (title_len + 1); | ||
166 | if (title != NULL) | ||
167 | { | ||
168 | memcpy (title, &prop->data[0], title_len); | ||
169 | title[title_len] = '\0'; | ||
170 | ret = proc (proc_cls, | ||
171 | "real", | ||
172 | EXTRACTOR_METATYPE_TITLE, | ||
173 | EXTRACTOR_METAFORMAT_UTF8, | ||
174 | "text/plain", | ||
175 | title, | ||
176 | strlen (title) + 1); | ||
177 | free (title); | ||
178 | } | ||
179 | if (ret != 0) | ||
180 | return ret; | ||
181 | |||
182 | author = malloc (author_len + 1); | ||
183 | if (author != NULL) | ||
184 | { | ||
185 | memcpy (author, &prop->data[title_len + sizeof (UINT16)], author_len); | ||
186 | author[author_len] = '\0'; | ||
187 | ret = proc (proc_cls, | ||
188 | "real", | ||
189 | EXTRACTOR_METATYPE_AUTHOR_NAME, | ||
190 | EXTRACTOR_METAFORMAT_UTF8, | ||
191 | "text/plain", | ||
192 | author, | ||
193 | strlen (author) + 1); | ||
194 | free (author); | ||
195 | } | ||
196 | if (ret != 0) | ||
197 | return ret; | ||
198 | |||
199 | copyright = malloc (copyright_len + 1); | ||
200 | if (copyright != NULL) | ||
201 | { | ||
202 | memcpy (copyright, | ||
203 | &prop->data[title_len + sizeof (UINT16) * 2 + author_len], | ||
204 | copyright_len); | ||
205 | copyright[copyright_len] = '\0'; | ||
206 | ret = proc (proc_cls, | ||
207 | "real", | ||
208 | EXTRACTOR_METATYPE_COPYRIGHT, | ||
209 | EXTRACTOR_METAFORMAT_UTF8, | ||
210 | "text/plain", | ||
211 | copyright, | ||
212 | strlen (copyright) + 1); | ||
213 | free (copyright); | ||
214 | } | ||
215 | if (ret != 0) | ||
216 | return ret; | ||
217 | |||
218 | comment = malloc (comment_len + 1); | ||
219 | if (comment != NULL) | ||
220 | { | ||
221 | memcpy (comment, | ||
222 | &prop->data[title_len + sizeof (UINT16) * 3 + author_len | ||
223 | + copyright_len], comment_len); | ||
224 | comment[comment_len] = '\0'; | ||
225 | ret = proc (proc_cls, | ||
226 | "real", | ||
227 | EXTRACTOR_METATYPE_COMMENT, | ||
228 | EXTRACTOR_METAFORMAT_UTF8, | ||
229 | "text/plain", | ||
230 | comment, | ||
231 | strlen (comment) + 1); | ||
232 | free (comment); | ||
233 | } | ||
234 | if (ret != 0) | ||
235 | return ret; | ||
236 | return 0; | ||
237 | } | ||
238 | |||
239 | |||
240 | typedef struct RAFF4_header | ||
241 | { | ||
242 | unsigned short version; | ||
243 | unsigned short revision; | ||
244 | unsigned short header_length; | ||
245 | unsigned short compression_type; | ||
246 | unsigned int granularity; | ||
247 | unsigned int total_bytes; | ||
248 | unsigned int bytes_per_minute; | ||
249 | unsigned int bytes_per_minute2; | ||
250 | unsigned short interleave_factor; | ||
251 | unsigned short interleave_block_size; | ||
252 | unsigned int user_data; | ||
253 | float sample_rate; | ||
254 | unsigned short sample_size; | ||
255 | unsigned short channels; | ||
256 | unsigned char interleave_code[5]; | ||
257 | unsigned char compression_code[5]; | ||
258 | unsigned char is_interleaved; | ||
259 | unsigned char copy_byte; | ||
260 | unsigned char stream_type; | ||
261 | /* | ||
262 | unsigned char tlen; | ||
263 | unsigned char title[tlen]; | ||
264 | unsigned char alen; | ||
265 | unsigned char author[alen]; | ||
266 | unsigned char clen; | ||
267 | unsigned char copyright[clen]; | ||
268 | unsigned char aplen; | ||
269 | unsigned char app[aplen]; */ | ||
270 | } RAFF4_header; | ||
271 | |||
272 | #define RAFF4_HDR_SIZE 53 | ||
273 | |||
274 | static char * | ||
275 | stndup (const char *str, size_t n) | ||
276 | { | ||
277 | char *tmp; | ||
278 | tmp = malloc (n + 1); | ||
279 | if (tmp == NULL) | ||
280 | return NULL; | ||
281 | tmp[n] = '\0'; | ||
282 | memcpy (tmp, str, n); | ||
283 | return tmp; | ||
284 | } | ||
285 | |||
286 | |||
287 | /* audio/vnd.rn-realaudio */ | ||
288 | int | ||
289 | EXTRACTOR_real_extract (const unsigned char *data, | ||
290 | size_t size, | ||
291 | EXTRACTOR_MetaDataProcessor proc, | ||
292 | void *proc_cls, | ||
293 | const char *options) | ||
294 | { | ||
295 | const unsigned char *pos; | ||
296 | const unsigned char *end; | ||
297 | unsigned int length; | ||
298 | const RAFF4_header *hdr; | ||
299 | unsigned char tlen; | ||
300 | unsigned char alen; | ||
301 | unsigned char clen; | ||
302 | unsigned char aplen; | ||
303 | char *x; | ||
304 | int ret; | ||
305 | |||
306 | if (size <= 2 * sizeof (int)) | ||
307 | return 0; | ||
308 | if (RAFF4_HEADER == ntohl (*(int *) data)) | ||
309 | { | ||
310 | /* HELIX */ | ||
311 | if (size <= RAFF4_HDR_SIZE + 16 + 4) | ||
312 | return 0; | ||
313 | if (0 != proc (proc_cls, | ||
314 | "real", | ||
315 | EXTRACTOR_METATYPE_MIMETYPE, | ||
316 | EXTRACTOR_METAFORMAT_UTF8, | ||
317 | "text/plain", | ||
318 | "audio/vnd.rn-realaudio", | ||
319 | strlen ("audio/vnd.rn-realaudio") + 1)) | ||
320 | return 1; | ||
321 | hdr = (const RAFF4_header *) &data[16]; | ||
322 | if (ntohs (hdr->header_length) + 16 > size) | ||
323 | return 0; | ||
324 | tlen = data[16 + RAFF4_HDR_SIZE]; | ||
325 | if (tlen + RAFF4_HDR_SIZE + 20 > size) | ||
326 | return 0; | ||
327 | alen = data[17 + tlen + RAFF4_HDR_SIZE]; | ||
328 | if (tlen + alen + RAFF4_HDR_SIZE + 20 > size) | ||
329 | return 0; | ||
330 | clen = data[18 + tlen + alen + RAFF4_HDR_SIZE]; | ||
331 | if (tlen + alen + clen + RAFF4_HDR_SIZE + 20 > size) | ||
332 | return 0; | ||
333 | aplen = data[19 + tlen + clen + alen + RAFF4_HDR_SIZE]; | ||
334 | if (tlen + alen + clen + aplen + RAFF4_HDR_SIZE + 20 > size) | ||
335 | return 0; | ||
336 | ret = 0; | ||
337 | if ( (tlen > 0) && (ret == 0) ) | ||
338 | { | ||
339 | x = stndup ((const char *) &data[17 + RAFF4_HDR_SIZE], tlen); | ||
340 | if (x != NULL) | ||
341 | { | ||
342 | ret = proc (proc_cls, | ||
343 | "real", | ||
344 | EXTRACTOR_METATYPE_MIMETYPE, | ||
345 | EXTRACTOR_METAFORMAT_UTF8, | ||
346 | "text/plain", | ||
347 | x, | ||
348 | strlen (x) + 1); | ||
349 | free (x); | ||
350 | } | ||
351 | } | ||
352 | if ( (alen > 0) && (ret == 0) ) | ||
353 | { | ||
354 | x = stndup ((const char *) &data[18 + RAFF4_HDR_SIZE + tlen], alen); | ||
355 | if (x != NULL) | ||
356 | { | ||
357 | ret = proc (proc_cls, | ||
358 | "real", | ||
359 | EXTRACTOR_METATYPE_MIMETYPE, | ||
360 | EXTRACTOR_METAFORMAT_UTF8, | ||
361 | "text/plain", | ||
362 | x, | ||
363 | strlen (x) + 1); | ||
364 | free (x); | ||
365 | } | ||
366 | } | ||
367 | if ( (clen > 0) && (ret == 0) ) | ||
368 | { | ||
369 | x = stndup ((const char *) &data[19 + RAFF4_HDR_SIZE + tlen + alen], | ||
370 | clen); | ||
371 | if (x != NULL) | ||
372 | { | ||
373 | ret = proc (proc_cls, | ||
374 | "real", | ||
375 | EXTRACTOR_METATYPE_MIMETYPE, | ||
376 | EXTRACTOR_METAFORMAT_UTF8, | ||
377 | "text/plain", | ||
378 | x, | ||
379 | strlen (x) + 1); | ||
380 | free (x); | ||
381 | } | ||
382 | } | ||
383 | if ( (aplen > 0) && (ret == 0) ) | ||
384 | { | ||
385 | x = stndup ((const char *) &data[20 + RAFF4_HDR_SIZE + tlen + alen | ||
386 | + clen], aplen); | ||
387 | if (x != NULL) | ||
388 | { | ||
389 | ret = proc (proc_cls, | ||
390 | "real", | ||
391 | EXTRACTOR_METATYPE_MIMETYPE, | ||
392 | EXTRACTOR_METAFORMAT_UTF8, | ||
393 | "text/plain", | ||
394 | x, | ||
395 | strlen (x) + 1); | ||
396 | free (x); | ||
397 | } | ||
398 | } | ||
399 | return ret; | ||
400 | } | ||
401 | if (REAL_HEADER == ntohl (*(int *) data)) | ||
402 | { | ||
403 | /* old real */ | ||
404 | end = &data[size]; | ||
405 | pos = &data[0]; | ||
406 | ret = 0; | ||
407 | while (0 == ret) | ||
408 | { | ||
409 | if ((pos + 8 >= end) || (pos + 8 < pos)) | ||
410 | break; | ||
411 | length = ntohl (*(((unsigned int *) pos) + 1)); | ||
412 | if (length <= 0) | ||
413 | break; | ||
414 | if ((pos + length >= end) || (pos + length < pos)) | ||
415 | break; | ||
416 | switch (ntohl (*((unsigned int *) pos))) | ||
417 | { | ||
418 | case MDPR_HEADER: | ||
419 | ret = processMediaProperties ((Media_Properties *) pos, | ||
420 | proc, | ||
421 | proc_cls); | ||
422 | pos += length; | ||
423 | break; | ||
424 | case CONT_HEADER: | ||
425 | ret = processContentDescription ((Content_Description *) pos, | ||
426 | proc, | ||
427 | proc_cls); | ||
428 | pos += length; | ||
429 | break; | ||
430 | case REAL_HEADER: /* treat like default */ | ||
431 | default: | ||
432 | pos += length; | ||
433 | break; | ||
434 | } | ||
435 | } | ||
436 | return ret; | ||
437 | } | ||
438 | return 0; | ||
439 | } | ||
diff --git a/src/plugins/real_extractor.c b/src/plugins/real_extractor.c new file mode 100644 index 0000000..9d77b28 --- /dev/null +++ b/src/plugins/real_extractor.c | |||
@@ -0,0 +1,579 @@ | |||
1 | /* | ||
2 | * This file is part of libextractor. | ||
3 | * Copyright (C) 2021 Christian Grothoff | ||
4 | * | ||
5 | * libextractor is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License as published | ||
7 | * by the Free Software Foundation; either version 3, or (at your | ||
8 | * option) any later version. | ||
9 | * | ||
10 | * libextractor is distributed in the hope that it will be useful, but | ||
11 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | * General Public License for more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with libextractor; see the file COPYING. If not, write to the | ||
17 | * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, | ||
18 | * Boston, MA 02110-1301, USA. | ||
19 | * | ||
20 | */ | ||
21 | /** | ||
22 | * @file plugins/real_extractor.c | ||
23 | * @brief plugin to support REAL files | ||
24 | * @author Christian Grothoff | ||
25 | */ | ||
26 | #include "platform.h" | ||
27 | #include "extractor.h" | ||
28 | |||
29 | struct MediaProperties | ||
30 | { | ||
31 | uint32_t object_id; | ||
32 | uint32_t size; | ||
33 | uint16_t object_version; /* must be 0 */ | ||
34 | uint16_t stream_number; | ||
35 | uint32_t max_bit_rate; | ||
36 | uint32_t avg_bit_rate; | ||
37 | uint32_t max_packet_size; | ||
38 | uint32_t avg_packet_size; | ||
39 | uint32_t start_time; | ||
40 | uint32_t preroll; | ||
41 | uint32_t duration; | ||
42 | uint8_t stream_name_size; | ||
43 | uint8_t data[0]; /* variable length section */ | ||
44 | /* | ||
45 | uint8_t[stream_name_size] stream_name; | ||
46 | uint8_t mime_type_size; | ||
47 | uint8_t[mime_type_size] mime_type; | ||
48 | uint32_t type_specific_len; | ||
49 | uint8_t[type_specific_len] type_specific_data; | ||
50 | */ | ||
51 | }; | ||
52 | |||
53 | struct ContentDescription | ||
54 | { | ||
55 | uint32_t object_id; | ||
56 | uint32_t size; | ||
57 | uint16_t object_version; /* must be 0 */ | ||
58 | uint16_t title_len; | ||
59 | uint8_t data[0]; /* variable length section */ | ||
60 | /* | ||
61 | uint8_t[title_len] title; | ||
62 | uint16_t author_len; | ||
63 | uint8_t[author_len] author; | ||
64 | uint16_t copyright_len; | ||
65 | uint8_t[copyright_len] copyright; | ||
66 | uint16_t comment_len; | ||
67 | uint8_t[comment_len] comment; | ||
68 | */ | ||
69 | }; | ||
70 | /* author, copyright and comment are supposed to be ASCII */ | ||
71 | |||
72 | |||
73 | #define REAL_HEADER 0x2E524d46 | ||
74 | #define MDPR_HEADER 0x4D445052 | ||
75 | #define CONT_HEADER 0x434F4e54 | ||
76 | #define RAFF4_HEADER 0x2E7261FD | ||
77 | |||
78 | |||
79 | /** | ||
80 | * Give meta data to LE. | ||
81 | * | ||
82 | * @param s utf-8 string meta data value | ||
83 | * @param t type of the meta data | ||
84 | */ | ||
85 | #define ADD(s,t) do { \ | ||
86 | if (0 != ec->proc (ec->cls, "real", t, \ | ||
87 | EXTRACTOR_METAFORMAT_C_STRING, \ | ||
88 | "text/plain", s, strlen (s) + 1)) \ | ||
89 | { return; } \ | ||
90 | } while (0) | ||
91 | |||
92 | |||
93 | static void | ||
94 | processMediaProperties (const struct MediaProperties *prop, | ||
95 | struct EXTRACTOR_ExtractContext *ec) | ||
96 | { | ||
97 | uint8_t mime_type_size; | ||
98 | uint32_t prop_size; | ||
99 | |||
100 | prop_size = ntohl (prop->size); | ||
101 | if (prop_size <= sizeof (struct MediaProperties)) | ||
102 | return; | ||
103 | if (0 != prop->object_version) | ||
104 | return; | ||
105 | if (prop_size <= prop->stream_name_size + sizeof (uint8_t) | ||
106 | + sizeof (struct MediaProperties)) | ||
107 | return; | ||
108 | mime_type_size = prop->data[prop->stream_name_size]; | ||
109 | if (prop_size > prop->stream_name_size + sizeof (uint8_t) | ||
110 | + mime_type_size + sizeof (struct MediaProperties)) | ||
111 | { | ||
112 | char data[mime_type_size + 1]; | ||
113 | |||
114 | memcpy (data, | ||
115 | &prop->data[prop->stream_name_size + 1], | ||
116 | mime_type_size); | ||
117 | data[mime_type_size] = '\0'; | ||
118 | ADD (data, | ||
119 | EXTRACTOR_METATYPE_MIMETYPE); | ||
120 | } | ||
121 | } | ||
122 | |||
123 | |||
124 | static void | ||
125 | processContentDescription (const struct ContentDescription *prop, | ||
126 | struct EXTRACTOR_ExtractContext *ec) | ||
127 | { | ||
128 | uint16_t author_len; | ||
129 | uint16_t copyright_len; | ||
130 | uint16_t comment_len; | ||
131 | uint16_t title_len; | ||
132 | uint32_t prop_size; | ||
133 | |||
134 | prop_size = ntohl (prop->size); | ||
135 | if (prop_size <= sizeof (struct ContentDescription)) | ||
136 | return; | ||
137 | if (0 != prop->object_version) | ||
138 | return; | ||
139 | title_len = ntohs (prop->title_len); | ||
140 | if (prop_size <= | ||
141 | title_len | ||
142 | + sizeof (struct ContentDescription)) | ||
143 | return; | ||
144 | if (title_len > 0) | ||
145 | { | ||
146 | char title[title_len + 1]; | ||
147 | |||
148 | memcpy (title, | ||
149 | &prop->data[0], | ||
150 | title_len); | ||
151 | title[title_len] = '\0'; | ||
152 | ADD (title, | ||
153 | EXTRACTOR_METATYPE_TITLE); | ||
154 | } | ||
155 | if (prop_size <= | ||
156 | title_len | ||
157 | + sizeof (uint16_t) | ||
158 | + sizeof (struct ContentDescription)) | ||
159 | return; | ||
160 | author_len = ntohs (*(uint16_t *) &prop->data[title_len]); | ||
161 | if (prop_size <= | ||
162 | title_len | ||
163 | + sizeof (uint16_t) | ||
164 | + author_len | ||
165 | + sizeof (struct ContentDescription)) | ||
166 | return; | ||
167 | if (author_len > 0) | ||
168 | { | ||
169 | char author[author_len + 1]; | ||
170 | |||
171 | memcpy (author, | ||
172 | &prop->data[title_len | ||
173 | + sizeof (uint16_t)], | ||
174 | author_len); | ||
175 | author[author_len] = '\0'; | ||
176 | ADD (author, | ||
177 | EXTRACTOR_METATYPE_AUTHOR_NAME); | ||
178 | } | ||
179 | if (prop_size <= | ||
180 | title_len | ||
181 | + sizeof (uint16_t) | ||
182 | + author_len | ||
183 | + sizeof (uint16_t) | ||
184 | + sizeof (struct ContentDescription)) | ||
185 | return; | ||
186 | copyright_len = ntohs (*(uint16_t *) &prop->data[title_len | ||
187 | + author_len | ||
188 | + sizeof (uint16_t)]); | ||
189 | if (prop_size <= | ||
190 | title_len | ||
191 | + sizeof (uint16_t) | ||
192 | + author_len | ||
193 | + sizeof (uint16_t) | ||
194 | + copyright_len | ||
195 | + sizeof (struct ContentDescription)) | ||
196 | return; | ||
197 | if (copyright_len > 0) | ||
198 | { | ||
199 | char copyright[copyright_len + 1]; | ||
200 | |||
201 | memcpy (copyright, | ||
202 | &prop->data[title_len | ||
203 | + sizeof (uint16_t) * 2 | ||
204 | + author_len], | ||
205 | copyright_len); | ||
206 | copyright[copyright_len] = '\0'; | ||
207 | ADD (copyright, | ||
208 | EXTRACTOR_METATYPE_COPYRIGHT); | ||
209 | } | ||
210 | |||
211 | if (prop_size <= | ||
212 | title_len | ||
213 | + sizeof (uint16_t) | ||
214 | + author_len | ||
215 | + sizeof (uint16_t) | ||
216 | + copyright_len | ||
217 | + sizeof (uint16_t) | ||
218 | + sizeof (struct ContentDescription)) | ||
219 | return; | ||
220 | comment_len = ntohs (*(uint16_t *) &prop->data[title_len | ||
221 | + author_len | ||
222 | + copyright_len | ||
223 | + 2 * sizeof (uint16_t)]); | ||
224 | if (prop_size < | ||
225 | title_len | ||
226 | + sizeof (uint16_t) | ||
227 | + author_len | ||
228 | + sizeof (uint16_t) | ||
229 | + copyright_len | ||
230 | + sizeof (uint16_t) | ||
231 | + comment_len | ||
232 | + sizeof (struct ContentDescription)) | ||
233 | return; | ||
234 | |||
235 | if (comment_len > 0) | ||
236 | { | ||
237 | char comment[comment_len + 1]; | ||
238 | |||
239 | memcpy (comment, | ||
240 | &prop->data[title_len | ||
241 | + sizeof (uint16_t) * 3 | ||
242 | + author_len | ||
243 | + copyright_len], | ||
244 | comment_len); | ||
245 | comment[comment_len] = '\0'; | ||
246 | ADD (comment, | ||
247 | EXTRACTOR_METATYPE_COMMENT); | ||
248 | } | ||
249 | } | ||
250 | |||
251 | |||
252 | struct RAFF_Header | ||
253 | { | ||
254 | uint16_t version; | ||
255 | }; | ||
256 | |||
257 | struct RAFF3_Header | ||
258 | { | ||
259 | uint8_t unknown[10]; | ||
260 | uint32_t data_size; | ||
261 | /* | ||
262 | uint8_t tlen; | ||
263 | uint8_t title[tlen]; | ||
264 | uint8_t alen; | ||
265 | uint8_t author[alen]; | ||
266 | uint8_t clen; | ||
267 | uint8_t copyright[clen]; | ||
268 | uint8_t aplen; | ||
269 | uint8_t app[aplen]; */ | ||
270 | }; | ||
271 | |||
272 | |||
273 | #define RAFF3_HDR_SIZE 14 | ||
274 | |||
275 | |||
276 | struct RAFF4_Header | ||
277 | { | ||
278 | uint16_t version; | ||
279 | uint16_t revision; | ||
280 | uint16_t header_length; | ||
281 | uint16_t compression_type; | ||
282 | uint32_t granularity; | ||
283 | uint32_t total_bytes; | ||
284 | uint32_t bytes_per_minute; | ||
285 | uint32_t bytes_per_minute2; | ||
286 | uint16_t interleave_factor; | ||
287 | uint16_t interleave_block_size; | ||
288 | uint32_t user_data; | ||
289 | float sample_rate; | ||
290 | uint16_t sample_size; | ||
291 | uint16_t channels; | ||
292 | uint8_t interleave_code[5]; | ||
293 | uint8_t compression_code[5]; | ||
294 | uint8_t is_interleaved; | ||
295 | uint8_t copy_byte; | ||
296 | uint8_t stream_type; | ||
297 | /* | ||
298 | uint8_t tlen; | ||
299 | uint8_t title[tlen]; | ||
300 | uint8_t alen; | ||
301 | uint8_t author[alen]; | ||
302 | uint8_t clen; | ||
303 | uint8_t copyright[clen]; | ||
304 | uint8_t aplen; | ||
305 | uint8_t app[aplen]; */ | ||
306 | }; | ||
307 | |||
308 | #define RAFF4_HDR_SIZE 53 | ||
309 | |||
310 | |||
311 | static void | ||
312 | extract_raff3 (struct EXTRACTOR_ExtractContext *ec, | ||
313 | const void *ptr, | ||
314 | size_t size) | ||
315 | { | ||
316 | const uint8_t *data = ptr; | ||
317 | uint8_t tlen; | ||
318 | uint8_t alen; | ||
319 | uint8_t clen; | ||
320 | uint8_t aplen; | ||
321 | |||
322 | if (size <= RAFF3_HDR_SIZE + 8) | ||
323 | return; | ||
324 | tlen = data[8 + RAFF3_HDR_SIZE]; | ||
325 | if (tlen + RAFF3_HDR_SIZE + 12 > size) | ||
326 | return; | ||
327 | if (tlen > 0) | ||
328 | { | ||
329 | char x[tlen + 1]; | ||
330 | |||
331 | memcpy (x, | ||
332 | &data[9 + RAFF3_HDR_SIZE], | ||
333 | tlen); | ||
334 | x[tlen] = '\0'; | ||
335 | ADD (x, | ||
336 | EXTRACTOR_METATYPE_TITLE); | ||
337 | } | ||
338 | alen = data[9 + tlen + RAFF3_HDR_SIZE]; | ||
339 | if (tlen + alen + RAFF3_HDR_SIZE + 12 > size) | ||
340 | return; | ||
341 | if (alen > 0) | ||
342 | { | ||
343 | char x[alen + 1]; | ||
344 | |||
345 | memcpy (x, | ||
346 | &data[10 + RAFF3_HDR_SIZE + tlen], | ||
347 | alen); | ||
348 | x[alen] = '\0'; | ||
349 | ADD (x, | ||
350 | EXTRACTOR_METATYPE_AUTHOR_NAME); | ||
351 | } | ||
352 | clen = data[10 + tlen + alen + RAFF3_HDR_SIZE]; | ||
353 | if (tlen + alen + clen + RAFF3_HDR_SIZE + 12 > size) | ||
354 | return; | ||
355 | if (clen > 0) | ||
356 | { | ||
357 | char x[clen + 1]; | ||
358 | |||
359 | memcpy (x, | ||
360 | &data[11 + RAFF4_HDR_SIZE + tlen + alen], | ||
361 | clen); | ||
362 | x[clen] = '\0'; | ||
363 | ADD (x, | ||
364 | EXTRACTOR_METATYPE_COPYRIGHT); | ||
365 | } | ||
366 | aplen = data[11 + tlen + clen + alen + RAFF3_HDR_SIZE]; | ||
367 | if (tlen + alen + clen + aplen + RAFF3_HDR_SIZE + 12 > size) | ||
368 | return; | ||
369 | if (aplen > 0) | ||
370 | { | ||
371 | char x[aplen + 1]; | ||
372 | |||
373 | memcpy (x, | ||
374 | &data[12 + RAFF4_HDR_SIZE + tlen + alen + clen], | ||
375 | aplen); | ||
376 | x[aplen] = '\0'; | ||
377 | ADD (x, | ||
378 | EXTRACTOR_METATYPE_UNKNOWN); | ||
379 | } | ||
380 | } | ||
381 | |||
382 | |||
383 | static void | ||
384 | extract_raff4 (struct EXTRACTOR_ExtractContext *ec, | ||
385 | const void *ptr, | ||
386 | size_t size) | ||
387 | { | ||
388 | const uint8_t *data = ptr; | ||
389 | uint8_t tlen; | ||
390 | uint8_t alen; | ||
391 | uint8_t clen; | ||
392 | uint8_t aplen; | ||
393 | |||
394 | if (size <= RAFF4_HDR_SIZE + 16 + 4) | ||
395 | return; | ||
396 | tlen = data[16 + RAFF4_HDR_SIZE]; | ||
397 | if (tlen + RAFF4_HDR_SIZE + 20 > size) | ||
398 | return; | ||
399 | alen = data[17 + tlen + RAFF4_HDR_SIZE]; | ||
400 | if (tlen + alen + RAFF4_HDR_SIZE + 20 > size) | ||
401 | return; | ||
402 | clen = data[18 + tlen + alen + RAFF4_HDR_SIZE]; | ||
403 | if (tlen + alen + clen + RAFF4_HDR_SIZE + 20 > size) | ||
404 | return; | ||
405 | aplen = data[19 + tlen + clen + alen + RAFF4_HDR_SIZE]; | ||
406 | if (tlen + alen + clen + aplen + RAFF4_HDR_SIZE + 20 > size) | ||
407 | return; | ||
408 | if (tlen > 0) | ||
409 | { | ||
410 | char x[tlen + 1]; | ||
411 | |||
412 | memcpy (x, | ||
413 | &data[17 + RAFF4_HDR_SIZE], | ||
414 | tlen); | ||
415 | x[tlen] = '\0'; | ||
416 | ADD (x, | ||
417 | EXTRACTOR_METATYPE_TITLE); | ||
418 | } | ||
419 | if (alen > 0) | ||
420 | { | ||
421 | char x[alen + 1]; | ||
422 | |||
423 | memcpy (x, | ||
424 | &data[18 + RAFF4_HDR_SIZE + tlen], | ||
425 | alen); | ||
426 | x[alen] = '\0'; | ||
427 | ADD (x, | ||
428 | EXTRACTOR_METATYPE_AUTHOR_NAME); | ||
429 | } | ||
430 | if (clen > 0) | ||
431 | { | ||
432 | char x[clen + 1]; | ||
433 | |||
434 | memcpy (x, | ||
435 | &data[19 + RAFF4_HDR_SIZE + tlen + alen], | ||
436 | clen); | ||
437 | x[clen] = '\0'; | ||
438 | ADD (x, | ||
439 | EXTRACTOR_METATYPE_COPYRIGHT); | ||
440 | } | ||
441 | if (aplen > 0) | ||
442 | { | ||
443 | char x[aplen + 1]; | ||
444 | |||
445 | memcpy (x, | ||
446 | &data[20 + RAFF4_HDR_SIZE + tlen + alen + clen], | ||
447 | aplen); | ||
448 | x[aplen] = '\0'; | ||
449 | ADD (x, | ||
450 | EXTRACTOR_METATYPE_UNKNOWN); | ||
451 | } | ||
452 | } | ||
453 | |||
454 | |||
455 | static void | ||
456 | extract_raff (struct EXTRACTOR_ExtractContext *ec, | ||
457 | const void *ptr, | ||
458 | size_t size) | ||
459 | { | ||
460 | const uint8_t *data = ptr; | ||
461 | const struct RAFF_Header *hdr; | ||
462 | |||
463 | /* HELIX */ | ||
464 | if (size <= sizeof (*hdr) + 4) | ||
465 | return; | ||
466 | ADD ("audio/vnd.rn-realaudio", | ||
467 | EXTRACTOR_METATYPE_MIMETYPE); | ||
468 | hdr = (const struct RAFF_Header *) &data[4]; | ||
469 | switch (ntohs (hdr->version)) | ||
470 | { | ||
471 | case 3: | ||
472 | extract_raff3 (ec, | ||
473 | ptr, | ||
474 | size); | ||
475 | break; | ||
476 | case 4: | ||
477 | extract_raff4 (ec, | ||
478 | ptr, | ||
479 | size); | ||
480 | break; | ||
481 | } | ||
482 | } | ||
483 | |||
484 | |||
485 | /* old real format */ | ||
486 | static void | ||
487 | extract_real (struct EXTRACTOR_ExtractContext *ec, | ||
488 | const void *data, | ||
489 | size_t size) | ||
490 | { | ||
491 | uint64_t off = 0; | ||
492 | size_t pos = 0; | ||
493 | |||
494 | while (1) | ||
495 | { | ||
496 | uint32_t length; | ||
497 | |||
498 | if ( (pos + 8 > size) || | ||
499 | (pos + 8 < pos) || | ||
500 | (pos + (length = ntohl (((uint32_t *) (data + pos))[1])) > size) ) | ||
501 | { | ||
502 | uint64_t noff; | ||
503 | void *in; | ||
504 | ssize_t isize; | ||
505 | |||
506 | noff = ec->seek (ec->cls, | ||
507 | off + pos, | ||
508 | SEEK_SET); | ||
509 | if (-1 == noff) | ||
510 | return; | ||
511 | isize = ec->read (ec->cls, | ||
512 | &in, | ||
513 | 32 * 1024); | ||
514 | if (isize < 8) | ||
515 | return; | ||
516 | data = in; | ||
517 | size = isize; | ||
518 | off = noff; | ||
519 | pos = 0; | ||
520 | } | ||
521 | if (length <= 8) | ||
522 | return; | ||
523 | if ( (pos + length > size) || | ||
524 | (pos + length < pos) ) | ||
525 | return; | ||
526 | switch (ntohl (((uint32_t *) (data + pos))[0])) | ||
527 | { | ||
528 | case MDPR_HEADER: | ||
529 | processMediaProperties (data + pos, | ||
530 | ec); | ||
531 | pos += length; | ||
532 | break; | ||
533 | case CONT_HEADER: | ||
534 | processContentDescription (data + pos, | ||
535 | ec); | ||
536 | pos += length; | ||
537 | break; | ||
538 | case REAL_HEADER: /* treat like default */ | ||
539 | default: | ||
540 | pos += length; | ||
541 | break; | ||
542 | } | ||
543 | } | ||
544 | } | ||
545 | |||
546 | |||
547 | /** | ||
548 | * "extract" metadata from a REAL file | ||
549 | * | ||
550 | * @param ec extraction context | ||
551 | */ | ||
552 | void | ||
553 | EXTRACTOR_real_extract_method (struct EXTRACTOR_ExtractContext *ec) | ||
554 | { | ||
555 | void *data; | ||
556 | size_t n; | ||
557 | |||
558 | n = ec->read (ec->cls, | ||
559 | &data, | ||
560 | sizeof (struct RAFF4_Header) + 4 * 256); | ||
561 | if (n < sizeof (uint32_t)) | ||
562 | return; | ||
563 | switch (ntohl (*(uint32_t *) data)) | ||
564 | { | ||
565 | case RAFF4_HEADER: | ||
566 | extract_raff (ec, | ||
567 | data, | ||
568 | n); | ||
569 | break; | ||
570 | case REAL_HEADER: | ||
571 | extract_real (ec, | ||
572 | data, | ||
573 | n); | ||
574 | break; | ||
575 | } | ||
576 | } | ||
577 | |||
578 | |||
579 | /* end of real_extractor.c */ | ||
diff --git a/src/plugins/test_real.c b/src/plugins/test_real.c new file mode 100644 index 0000000..98e2af4 --- /dev/null +++ b/src/plugins/test_real.c | |||
@@ -0,0 +1,104 @@ | |||
1 | /* | ||
2 | This file is part of libextractor. | ||
3 | Copyright (C) 2012 Vidyut Samanta and Christian Grothoff | ||
4 | |||
5 | libextractor is free software; you can redistribute it and/or modify | ||
6 | it under the terms of the GNU General Public License as published | ||
7 | by the Free Software Foundation; either version 3, or (at your | ||
8 | option) any later version. | ||
9 | |||
10 | libextractor is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU General Public License | ||
16 | along with libextractor; see the file COPYING. If not, write to the | ||
17 | Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, | ||
18 | Boston, MA 02110-1301, USA. | ||
19 | */ | ||
20 | /** | ||
21 | * @file plugins/test_real.c | ||
22 | * @brief testcase for real plugin | ||
23 | * @author Christian Grothoff | ||
24 | */ | ||
25 | #include "platform.h" | ||
26 | #include "test_lib.h" | ||
27 | |||
28 | |||
29 | /** | ||
30 | * Main function for the REAL testcase. | ||
31 | * | ||
32 | * @param argc number of arguments (ignored) | ||
33 | * @param argv arguments (ignored) | ||
34 | * @return 0 on success | ||
35 | */ | ||
36 | int | ||
37 | main (int argc, char *argv[]) | ||
38 | { | ||
39 | struct SolutionData real_audiosig_sol[] = { | ||
40 | { | ||
41 | EXTRACTOR_METATYPE_MIMETYPE, | ||
42 | EXTRACTOR_METAFORMAT_C_STRING, | ||
43 | "text/plain", | ||
44 | "audio/x-pn-realaudio", | ||
45 | strlen ("audio/x-pn-realaudio") + 1, | ||
46 | 0 | ||
47 | }, | ||
48 | { | ||
49 | EXTRACTOR_METATYPE_TITLE, | ||
50 | EXTRACTOR_METAFORMAT_C_STRING, | ||
51 | "text/plain", | ||
52 | "Welcome!", | ||
53 | strlen ("Welcome!") + 1, | ||
54 | 0 | ||
55 | }, | ||
56 | { | ||
57 | EXTRACTOR_METATYPE_COPYRIGHT, | ||
58 | EXTRACTOR_METAFORMAT_C_STRING, | ||
59 | "text/plain", | ||
60 | "1998, RealNetworks, Inc.", | ||
61 | strlen ("1998, RealNetworks, Inc.") + 1, | ||
62 | 0 | ||
63 | }, | ||
64 | { 0, 0, NULL, NULL, 0, -1 } | ||
65 | }; | ||
66 | struct SolutionData real_ra3_sol[] = { | ||
67 | { | ||
68 | EXTRACTOR_METATYPE_MIMETYPE, | ||
69 | EXTRACTOR_METAFORMAT_C_STRING, | ||
70 | "text/plain", | ||
71 | "audio/vnd.rn-realaudio", | ||
72 | strlen ("audio/vnd.rn-realaudio") + 1, | ||
73 | 0 | ||
74 | }, | ||
75 | { | ||
76 | EXTRACTOR_METATYPE_TITLE, | ||
77 | EXTRACTOR_METAFORMAT_C_STRING, | ||
78 | "text/plain", | ||
79 | "Song of Welcome", | ||
80 | strlen ("Song of Welcome") + 1, | ||
81 | 0 | ||
82 | }, | ||
83 | { | ||
84 | EXTRACTOR_METATYPE_AUTHOR_NAME, | ||
85 | EXTRACTOR_METAFORMAT_C_STRING, | ||
86 | "text/plain", | ||
87 | "Investiture Service", | ||
88 | strlen ("Investiture Service") + 1, | ||
89 | 0 | ||
90 | }, | ||
91 | { 0, 0, NULL, NULL, 0, -1 } | ||
92 | }; | ||
93 | struct ProblemSet ps[] = { | ||
94 | { "testdata/audiosig.rm", | ||
95 | real_audiosig_sol }, | ||
96 | { "testdata/ra3.ra", | ||
97 | real_ra3_sol }, | ||
98 | { NULL, NULL } | ||
99 | }; | ||
100 | return ET_main ("real", ps); | ||
101 | } | ||
102 | |||
103 | |||
104 | /* end of test_real.c */ | ||
diff --git a/src/plugins/testdata/audiosig.rm b/src/plugins/testdata/audiosig.rm new file mode 100644 index 0000000..6307d30 --- /dev/null +++ b/src/plugins/testdata/audiosig.rm | |||
Binary files differ | |||
diff --git a/src/plugins/testdata/ra3.ra b/src/plugins/testdata/ra3.ra new file mode 100644 index 0000000..d36569f --- /dev/null +++ b/src/plugins/testdata/ra3.ra | |||
Binary files differ | |||
diff --git a/src/plugins/vlc_extractor.c b/src/plugins/vlc_extractor.c new file mode 100644 index 0000000..e90b3ea --- /dev/null +++ b/src/plugins/vlc_extractor.c | |||
@@ -0,0 +1,334 @@ | |||
1 | /* | ||
2 | This file is part of libextractor. | ||
3 | Copyright (C) 2021 Christian Grothoff | ||
4 | |||
5 | libextractor is free software; you can redistribute it and/or modify | ||
6 | it under the terms of the GNU General Public License as published | ||
7 | by the Free Software Foundation; either version 3, or (at your | ||
8 | option) any later version. | ||
9 | |||
10 | libextractor is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU General Public License | ||
16 | along with libextractor; see the file COPYING. If not, write to the | ||
17 | Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, | ||
18 | Boston, MA 02110-1301, USA. | ||
19 | |||
20 | NOTE: This plugin is not yet working. Somehow libvlc never calls any of the IO callbacks. | ||
21 | |||
22 | */ | ||
23 | /** | ||
24 | * @file plugins/vlc_extractor.c | ||
25 | * @brief plugin to extract metadata using libvlc | ||
26 | * @author Christian Grothoff | ||
27 | */ | ||
28 | #include "platform.h" | ||
29 | #include "extractor.h" | ||
30 | #include <vlc/vlc.h> | ||
31 | #include <signal.h> | ||
32 | |||
33 | /** | ||
34 | * Function to help VLC open a custom bitstream input media. | ||
35 | * | ||
36 | * The same media item can be opened multiple times. Each time, this callback | ||
37 | * is invoked. It should allocate and initialize any instance-specific | ||
38 | * resources, then store them in *datap. The instance resources can be freed | ||
39 | * in the @ref libvlc_media_close_cb callback. | ||
40 | * | ||
41 | * @param opaque our `struct EXTRACTOR_ExtractContext` | ||
42 | * @param[out] datap storage space for a private data pointer | ||
43 | * @param[out] sizep byte length of the bitstream or UINT64_MAX if unknown | ||
44 | * | ||
45 | * @note For convenience, *datap is initially NULL and *sizep is initially 0. | ||
46 | * | ||
47 | * @return 0 on success, non-zero on error. In case of failure, the other | ||
48 | * callbacks will not be invoked and any value stored in *datap and *sizep is | ||
49 | * discarded. | ||
50 | */ | ||
51 | static int | ||
52 | open_cb (void *opaque, | ||
53 | void **datap, | ||
54 | uint64_t *sizep) | ||
55 | { | ||
56 | struct EXTRACTOR_ExtractContext *ec = opaque; | ||
57 | |||
58 | *datap = ec; | ||
59 | *sizep = ec->get_size (ec->cls); | ||
60 | if (UINT64_MAX == *sizep) | ||
61 | { | ||
62 | fprintf (stderr, | ||
63 | "Open failed!\n"); | ||
64 | return 1; | ||
65 | } | ||
66 | fprintf (stderr, | ||
67 | "Open returns %llu file size!\n", | ||
68 | (unsigned long long) *sizep); | ||
69 | return 0; | ||
70 | } | ||
71 | |||
72 | |||
73 | /** | ||
74 | * Function to help VLC read data from a custom bitstream input media. | ||
75 | * | ||
76 | * @param opaque our `struct EXTRACTOR_ExtractContext` | ||
77 | * @param buf start address of the buffer to read data into | ||
78 | * @param len bytes length of the buffer | ||
79 | * @return strictly positive number of bytes read, 0 on end-of-stream, | ||
80 | * or -1 on non-recoverable error | ||
81 | * | ||
82 | * @note If no data is immediately available, then the callback should sleep. | ||
83 | * @warning The application is responsible for avoiding deadlock situations. | ||
84 | * In particular, the callback should return an error if playback is stopped; | ||
85 | * if it does not return, then libvlc_media_player_stop() will never return. | ||
86 | */ | ||
87 | static ssize_t | ||
88 | read_cb (void *opaque, | ||
89 | unsigned char *buf, | ||
90 | size_t len) | ||
91 | { | ||
92 | struct EXTRACTOR_ExtractContext *ec = opaque; | ||
93 | void *data; | ||
94 | ssize_t ret; | ||
95 | |||
96 | ret = ec->read (ec->cls, | ||
97 | &data, | ||
98 | len); | ||
99 | if (-1 == ret) | ||
100 | { | ||
101 | fprintf (stderr, | ||
102 | "Read failed!\n"); | ||
103 | return -1; | ||
104 | } | ||
105 | memcpy (buf, | ||
106 | data, | ||
107 | ret); | ||
108 | fprintf (stderr, | ||
109 | "Read %u bytes!\n", | ||
110 | (unsigned int) ret); | ||
111 | return ret; | ||
112 | } | ||
113 | |||
114 | |||
115 | /** | ||
116 | * Allow VLC to seek a custom bitstream input media. | ||
117 | * | ||
118 | * @param opaque our `struct EXTRACTOR_ExtractContext` | ||
119 | * @param offset absolute byte offset to seek to | ||
120 | * @return 0 on success, -1 on error. | ||
121 | */ | ||
122 | static int | ||
123 | seek_cb (void *opaque, | ||
124 | uint64_t offset) | ||
125 | { | ||
126 | struct EXTRACTOR_ExtractContext *ec = opaque; | ||
127 | |||
128 | fprintf (stderr, | ||
129 | "Seek to %llu!\n", | ||
130 | (unsigned long long) offset); | ||
131 | if (offset > INT64_MAX) | ||
132 | { | ||
133 | fprintf (stderr, | ||
134 | "Excessive seek, impossible with LE!\n"); | ||
135 | return -1; | ||
136 | } | ||
137 | if (-1 == | ||
138 | ec->seek (ec->cls, | ||
139 | offset, | ||
140 | SEEK_SET)) | ||
141 | { | ||
142 | fprintf (stderr, | ||
143 | "Seek failed!\n"); | ||
144 | return -1; | ||
145 | } | ||
146 | return 0; | ||
147 | } | ||
148 | |||
149 | |||
150 | /** | ||
151 | * Callback prototype to close a custom bitstream input media. | ||
152 | * | ||
153 | * @param opaque our `struct EXTRACTOR_ExtractContext` | ||
154 | */ | ||
155 | static void | ||
156 | close_cb (void *opaque) | ||
157 | { | ||
158 | /* intentionally empty */ | ||
159 | fprintf (stderr, | ||
160 | "Close called\n"); | ||
161 | } | ||
162 | |||
163 | |||
164 | static void | ||
165 | extract (struct EXTRACTOR_ExtractContext *ec, | ||
166 | libvlc_media_t *media) | ||
167 | { | ||
168 | struct | ||
169 | { | ||
170 | enum libvlc_meta_t vt; | ||
171 | enum EXTRACTOR_MetaType mt; | ||
172 | } map[] = { | ||
173 | { libvlc_meta_Title, | ||
174 | EXTRACTOR_METATYPE_TITLE }, | ||
175 | { libvlc_meta_Artist, | ||
176 | EXTRACTOR_METATYPE_ARTIST }, | ||
177 | { libvlc_meta_Genre, | ||
178 | EXTRACTOR_METATYPE_GENRE }, | ||
179 | { libvlc_meta_Copyright, | ||
180 | EXTRACTOR_METATYPE_COPYRIGHT }, | ||
181 | { libvlc_meta_Album, | ||
182 | EXTRACTOR_METATYPE_ALBUM }, | ||
183 | { libvlc_meta_TrackNumber, | ||
184 | EXTRACTOR_METATYPE_TRACK_NUMBER }, | ||
185 | { libvlc_meta_Description, | ||
186 | EXTRACTOR_METATYPE_DESCRIPTION }, | ||
187 | { libvlc_meta_Rating, | ||
188 | EXTRACTOR_METATYPE_RATING }, | ||
189 | { libvlc_meta_Date, | ||
190 | EXTRACTOR_METATYPE_CREATION_TIME }, | ||
191 | { libvlc_meta_Setting, | ||
192 | EXTRACTOR_METATYPE_UNKNOWN }, | ||
193 | { libvlc_meta_URL, | ||
194 | EXTRACTOR_METATYPE_URL }, | ||
195 | { libvlc_meta_Language, | ||
196 | EXTRACTOR_METATYPE_LANGUAGE }, | ||
197 | { libvlc_meta_NowPlaying, | ||
198 | EXTRACTOR_METATYPE_UNKNOWN }, | ||
199 | { libvlc_meta_Publisher, | ||
200 | EXTRACTOR_METATYPE_PUBLISHER }, | ||
201 | { libvlc_meta_EncodedBy, | ||
202 | EXTRACTOR_METATYPE_ENCODED_BY }, | ||
203 | { libvlc_meta_ArtworkURL, | ||
204 | EXTRACTOR_METATYPE_URL }, | ||
205 | { libvlc_meta_TrackID, | ||
206 | EXTRACTOR_METATYPE_TRACK_NUMBER }, | ||
207 | { libvlc_meta_TrackTotal, | ||
208 | EXTRACTOR_METATYPE_UNKNOWN }, | ||
209 | { libvlc_meta_Director, | ||
210 | EXTRACTOR_METATYPE_MOVIE_DIRECTOR }, | ||
211 | { libvlc_meta_Season, | ||
212 | EXTRACTOR_METATYPE_SHOW_SEASON_NUMBER }, | ||
213 | { libvlc_meta_Episode, | ||
214 | EXTRACTOR_METATYPE_SHOW_EPISODE_NUMBER }, | ||
215 | { libvlc_meta_ShowName, | ||
216 | EXTRACTOR_METATYPE_SHOW_NAME }, | ||
217 | { libvlc_meta_Actors, | ||
218 | EXTRACTOR_METATYPE_PERFORMER }, | ||
219 | { libvlc_meta_AlbumArtist, | ||
220 | EXTRACTOR_METATYPE_ARTIST }, | ||
221 | { libvlc_meta_DiscNumber, | ||
222 | EXTRACTOR_METATYPE_DISC_NUMBER }, | ||
223 | { libvlc_meta_DiscTotal, | ||
224 | EXTRACTOR_METATYPE_UNKNOWN }, | ||
225 | { 0, 0 } | ||
226 | }; | ||
227 | |||
228 | for (unsigned int i = 0; | ||
229 | EXTRACTOR_METATYPE_RESERVED != map[i].mt; | ||
230 | i++) | ||
231 | { | ||
232 | char *meta; | ||
233 | |||
234 | fprintf (stderr, | ||
235 | "."); | ||
236 | meta = libvlc_media_get_meta (media, | ||
237 | map[i].vt); | ||
238 | if (NULL == meta) | ||
239 | continue; | ||
240 | ec->proc (ec->cls, | ||
241 | "vlc", | ||
242 | map[i].mt, | ||
243 | EXTRACTOR_METAFORMAT_UTF8, /* ??? */ | ||
244 | "text/plain", | ||
245 | meta, | ||
246 | strlen (meta) + 1); | ||
247 | free (meta); | ||
248 | } | ||
249 | } | ||
250 | |||
251 | |||
252 | static void | ||
253 | media_ready (const struct libvlc_event_t *p_event, | ||
254 | void *p_data) | ||
255 | { | ||
256 | fprintf (stderr, | ||
257 | "media status: %d, %d\n", | ||
258 | p_event->type == libvlc_MediaParsedChanged, | ||
259 | p_event->u.media_parsed_changed.new_status); | ||
260 | if (p_event->u.media_parsed_changed.new_status == | ||
261 | libvlc_media_parsed_status_done) | ||
262 | { | ||
263 | fprintf (stderr, | ||
264 | "media ready\n"); | ||
265 | } | ||
266 | } | ||
267 | |||
268 | |||
269 | /** | ||
270 | * Extract information using libvlc | ||
271 | * | ||
272 | * @param ec extraction context | ||
273 | */ | ||
274 | void | ||
275 | EXTRACTOR_vlc_extract_method (struct EXTRACTOR_ExtractContext *ec) | ||
276 | { | ||
277 | libvlc_instance_t *vlc; | ||
278 | libvlc_media_t *media; | ||
279 | libvlc_event_manager_t *em; | ||
280 | |||
281 | { | ||
282 | sigset_t set; | ||
283 | |||
284 | signal (SIGCHLD, SIG_DFL); | ||
285 | sigemptyset (&set); | ||
286 | sigaddset (&set, SIGPIPE); | ||
287 | pthread_sigmask (SIG_BLOCK, &set, NULL); | ||
288 | } | ||
289 | |||
290 | vlc = libvlc_new (0, NULL); | ||
291 | if (NULL == vlc) | ||
292 | return; | ||
293 | media = libvlc_media_new_callbacks (vlc, | ||
294 | &open_cb, | ||
295 | &read_cb, | ||
296 | &seek_cb, | ||
297 | &close_cb, | ||
298 | ec); | ||
299 | if (NULL == media) | ||
300 | { | ||
301 | libvlc_release (vlc); | ||
302 | return; | ||
303 | } | ||
304 | |||
305 | em = libvlc_media_event_manager (media); | ||
306 | libvlc_event_attach (em, | ||
307 | libvlc_MediaParsedChanged, | ||
308 | &media_ready, | ||
309 | ec); | ||
310 | fprintf (stderr, | ||
311 | "Triggering parser\n"); | ||
312 | { | ||
313 | int status; | ||
314 | |||
315 | status = libvlc_media_parse_with_options (media, | ||
316 | libvlc_media_fetch_local | ||
317 | | libvlc_media_parse_network | ||
318 | | libvlc_media_fetch_network, | ||
319 | 30000); /* 30s timeout */ | ||
320 | fprintf (stderr, | ||
321 | "Status: %d\n", | ||
322 | status); | ||
323 | } | ||
324 | fprintf (stderr, | ||
325 | "Sleeping\n"); | ||
326 | sleep (1); | ||
327 | extract (ec, | ||
328 | media); | ||
329 | libvlc_media_release (media); | ||
330 | libvlc_release (vlc); | ||
331 | } | ||
332 | |||
333 | |||
334 | /* end of vlc_extractor.c */ | ||