aboutsummaryrefslogtreecommitdiff
path: root/src/plugins/qt_extractor.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/plugins/qt_extractor.c')
-rw-r--r--src/plugins/qt_extractor.c1144
1 files changed, 1144 insertions, 0 deletions
diff --git a/src/plugins/qt_extractor.c b/src/plugins/qt_extractor.c
new file mode 100644
index 0000000..3abd543
--- /dev/null
+++ b/src/plugins/qt_extractor.c
@@ -0,0 +1,1144 @@
1/*
2 This file is part of libextractor.
3 (C) 2002, 2003, 2006 Vidyut Samanta and Christian Grothoff
4
5 libextractor is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 2, or (at your
8 option) any later version.
9
10 libextractor is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with libextractor; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
19 */
20
21#include "platform.h"
22#include "extractor.h"
23#include <zlib.h>
24#include <math.h>
25
26#define DEBUG 0
27
28/* verbatim from mp3extractor */
29static const char *const genre_names[] = {
30 gettext_noop ("Blues"),
31 gettext_noop ("Classic Rock"),
32 gettext_noop ("Country"),
33 gettext_noop ("Dance"),
34 gettext_noop ("Disco"),
35 gettext_noop ("Funk"),
36 gettext_noop ("Grunge"),
37 gettext_noop ("Hip-Hop"),
38 gettext_noop ("Jazz"),
39 gettext_noop ("Metal"),
40 gettext_noop ("New Age"),
41 gettext_noop ("Oldies"),
42 gettext_noop ("Other"),
43 gettext_noop ("Pop"),
44 gettext_noop ("R&B"),
45 gettext_noop ("Rap"),
46 gettext_noop ("Reggae"),
47 gettext_noop ("Rock"),
48 gettext_noop ("Techno"),
49 gettext_noop ("Industrial"),
50 gettext_noop ("Alternative"),
51 gettext_noop ("Ska"),
52 gettext_noop ("Death Metal"),
53 gettext_noop ("Pranks"),
54 gettext_noop ("Soundtrack"),
55 gettext_noop ("Euro-Techno"),
56 gettext_noop ("Ambient"),
57 gettext_noop ("Trip-Hop"),
58 gettext_noop ("Vocal"),
59 gettext_noop ("Jazz+Funk"),
60 gettext_noop ("Fusion"),
61 gettext_noop ("Trance"),
62 gettext_noop ("Classical"),
63 gettext_noop ("Instrumental"),
64 gettext_noop ("Acid"),
65 gettext_noop ("House"),
66 gettext_noop ("Game"),
67 gettext_noop ("Sound Clip"),
68 gettext_noop ("Gospel"),
69 gettext_noop ("Noise"),
70 gettext_noop ("Alt. Rock"),
71 gettext_noop ("Bass"),
72 gettext_noop ("Soul"),
73 gettext_noop ("Punk"),
74 gettext_noop ("Space"),
75 gettext_noop ("Meditative"),
76 gettext_noop ("Instrumental Pop"),
77 gettext_noop ("Instrumental Rock"),
78 gettext_noop ("Ethnic"),
79 gettext_noop ("Gothic"),
80 gettext_noop ("Darkwave"),
81 gettext_noop ("Techno-Industrial"),
82 gettext_noop ("Electronic"),
83 gettext_noop ("Pop-Folk"),
84 gettext_noop ("Eurodance"),
85 gettext_noop ("Dream"),
86 gettext_noop ("Southern Rock"),
87 gettext_noop ("Comedy"),
88 gettext_noop ("Cult"),
89 gettext_noop ("Gangsta Rap"),
90 gettext_noop ("Top 40"),
91 gettext_noop ("Christian Rap"),
92 gettext_noop ("Pop/Funk"),
93 gettext_noop ("Jungle"),
94 gettext_noop ("Native American"),
95 gettext_noop ("Cabaret"),
96 gettext_noop ("New Wave"),
97 gettext_noop ("Psychedelic"),
98 gettext_noop ("Rave"),
99 gettext_noop ("Showtunes"),
100 gettext_noop ("Trailer"),
101 gettext_noop ("Lo-Fi"),
102 gettext_noop ("Tribal"),
103 gettext_noop ("Acid Punk"),
104 gettext_noop ("Acid Jazz"),
105 gettext_noop ("Polka"),
106 gettext_noop ("Retro"),
107 gettext_noop ("Musical"),
108 gettext_noop ("Rock & Roll"),
109 gettext_noop ("Hard Rock"),
110 gettext_noop ("Folk"),
111 gettext_noop ("Folk/Rock"),
112 gettext_noop ("National Folk"),
113 gettext_noop ("Swing"),
114 gettext_noop ("Fast-Fusion"),
115 gettext_noop ("Bebob"),
116 gettext_noop ("Latin"),
117 gettext_noop ("Revival"),
118 gettext_noop ("Celtic"),
119 gettext_noop ("Bluegrass"),
120 gettext_noop ("Avantgarde"),
121 gettext_noop ("Gothic Rock"),
122 gettext_noop ("Progressive Rock"),
123 gettext_noop ("Psychedelic Rock"),
124 gettext_noop ("Symphonic Rock"),
125 gettext_noop ("Slow Rock"),
126 gettext_noop ("Big Band"),
127 gettext_noop ("Chorus"),
128 gettext_noop ("Easy Listening"),
129 gettext_noop ("Acoustic"),
130 gettext_noop ("Humour"),
131 gettext_noop ("Speech"),
132 gettext_noop ("Chanson"),
133 gettext_noop ("Opera"),
134 gettext_noop ("Chamber Music"),
135 gettext_noop ("Sonata"),
136 gettext_noop ("Symphony"),
137 gettext_noop ("Booty Bass"),
138 gettext_noop ("Primus"),
139 gettext_noop ("Porn Groove"),
140 gettext_noop ("Satire"),
141 gettext_noop ("Slow Jam"),
142 gettext_noop ("Club"),
143 gettext_noop ("Tango"),
144 gettext_noop ("Samba"),
145 gettext_noop ("Folklore"),
146 gettext_noop ("Ballad"),
147 gettext_noop ("Power Ballad"),
148 gettext_noop ("Rhythmic Soul"),
149 gettext_noop ("Freestyle"),
150 gettext_noop ("Duet"),
151 gettext_noop ("Punk Rock"),
152 gettext_noop ("Drum Solo"),
153 gettext_noop ("A Cappella"),
154 gettext_noop ("Euro-House"),
155 gettext_noop ("Dance Hall"),
156 gettext_noop ("Goa"),
157 gettext_noop ("Drum & Bass"),
158 gettext_noop ("Club-House"),
159 gettext_noop ("Hardcore"),
160 gettext_noop ("Terror"),
161 gettext_noop ("Indie"),
162 gettext_noop ("BritPop"),
163 gettext_noop ("Negerpunk"),
164 gettext_noop ("Polsk Punk"),
165 gettext_noop ("Beat"),
166 gettext_noop ("Christian Gangsta Rap"),
167 gettext_noop ("Heavy Metal"),
168 gettext_noop ("Black Metal"),
169 gettext_noop ("Crossover"),
170 gettext_noop ("Contemporary Christian"),
171 gettext_noop ("Christian Rock"),
172 gettext_noop ("Merengue"),
173 gettext_noop ("Salsa"),
174 gettext_noop ("Thrash Metal"),
175 gettext_noop ("Anime"),
176 gettext_noop ("JPop"),
177 gettext_noop ("Synthpop"),
178};
179
180#define GENRE_NAME_COUNT \
181 ((unsigned int)(sizeof genre_names / sizeof (const char *const)))
182
183
184static const char *languages[] = {
185 "English",
186 "French",
187 "German",
188 "Italian",
189 "Dutch",
190 "Swedish",
191 "Spanish",
192 "Danish",
193 "Portuguese",
194 "Norwegian",
195 "Hebrew",
196 "Japanese",
197 "Arabic",
198 "Finnish",
199 "Greek",
200 "Icelandic",
201 "Maltese",
202 "Turkish",
203 "Croatian",
204 "Traditional Chinese",
205 "Urdu",
206 "Hindi",
207 "Thai",
208 "Korean",
209 "Lithuanian",
210 "Polish",
211 "Hungarian",
212 "Estonian",
213 "Lettish",
214 "Saamisk",
215 "Lappish",
216 "Faeroese",
217 "Farsi",
218 "Russian",
219 "Simplified Chinese",
220 "Flemish",
221 "Irish",
222 "Albanian",
223 "Romanian",
224 "Czech",
225 "Slovak",
226 "Slovenian",
227 "Yiddish",
228 "Serbian",
229 "Macedonian",
230 "Bulgarian",
231 "Ukrainian",
232 "Byelorussian",
233 "Uzbek",
234 "Kazakh",
235 "Azerbaijani",
236 "AzerbaijanAr",
237 "Armenian",
238 "Georgian",
239 "Moldavian",
240 "Kirghiz",
241 "Tajiki",
242 "Turkmen",
243 "Mongolian",
244 "MongolianCyr",
245 "Pashto",
246 "Kurdish",
247 "Kashmiri",
248 "Sindhi",
249 "Tibetan",
250 "Nepali",
251 "Sanskrit",
252 "Marathi",
253 "Bengali",
254 "Assamese",
255 "Gujarati",
256 "Punjabi",
257 "Oriya",
258 "Malayalam",
259 "Kannada",
260 "Tamil",
261 "Telugu",
262 "Sinhalese",
263 "Burmese",
264 "Khmer",
265 "Lao",
266 "Vietnamese",
267 "Indonesian",
268 "Tagalog",
269 "MalayRoman",
270 "MalayArabic",
271 "Amharic",
272 "Tigrinya",
273 "Galla",
274 "Oromo",
275 "Somali",
276 "Swahili",
277 "Ruanda",
278 "Rundi",
279 "Chewa",
280 "Malagasy",
281 "Esperanto",
282 "Welsh",
283 "Basque",
284 "Catalan",
285 "Latin",
286 "Quechua",
287 "Guarani",
288 "Aymara",
289 "Tatar",
290 "Uighur",
291 "Dzongkha",
292 "JavaneseRom",
293};
294
295
296typedef struct
297{
298 const char *ext;
299 const char *mime;
300} C2M;
301
302/* see http://www.mp4ra.org/filetype.html
303 * http://www.ftyps.com/ */
304static C2M ftMap[] = {
305 {"qt ", "video/quicktime"},
306 {"isom", "video/mp4"}, /* ISO Base Media files */
307 {"iso2", "video/mp4"},
308 {"mp41", "video/mp4"}, /* MPEG-4 (ISO/IEC 14491-1) version 1 */
309 {"mp42", "video/mp4"}, /* MPEG-4 (ISO/IEC 14491-1) version 2 */
310 {"3gp1", "video/3gpp"},
311 {"3gp2", "video/3gpp"},
312 {"3gp3", "video/3gpp"},
313 {"3gp4", "video/3gpp"},
314 {"3gp5", "video/3gpp"},
315 {"3g2a", "video/3gpp2"},
316 {"mmp4", "video/mp4"}, /* Mobile MPEG-4 */
317 {"M4A ", "audio/mp4"},
318 {"M4B ", "audio/mp4"},
319 {"M4P ", "audio/mp4"},
320 {"M4V ", "video/mp4"},
321 {"mj2s", "video/mj2"}, /* Motion JPEG 2000 */
322 {"mjp2", "video/mj2"},
323 {NULL, NULL},
324};
325
326typedef struct CHE
327{
328 const char *pfx;
329 enum EXTRACTOR_MetaType type;
330} CHE;
331
332static CHE cHm[] = {
333 {"aut", EXTRACTOR_METATYPE_AUTHOR_NAME},
334 {"cpy", EXTRACTOR_METATYPE_COPYRIGHT},
335 {"day", EXTRACTOR_METATYPE_CREATION_DATE},
336 {"ed1", EXTRACTOR_METATYPE_MODIFICATION_DATE},
337 {"ed2", EXTRACTOR_METATYPE_MODIFICATION_DATE},
338 {"ed3", EXTRACTOR_METATYPE_MODIFICATION_DATE},
339 {"ed4", EXTRACTOR_METATYPE_MODIFICATION_DATE},
340 {"ed5", EXTRACTOR_METATYPE_MODIFICATION_DATE},
341 {"ed6", EXTRACTOR_METATYPE_MODIFICATION_DATE},
342 {"ed7", EXTRACTOR_METATYPE_MODIFICATION_DATE},
343 {"ed8", EXTRACTOR_METATYPE_MODIFICATION_DATE},
344 {"ed9", EXTRACTOR_METATYPE_MODIFICATION_DATE},
345 {"cmt", EXTRACTOR_METATYPE_COMMENT},
346 {"url", EXTRACTOR_METATYPE_URL},
347 {"enc", EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE},
348 {"hst", EXTRACTOR_METATYPE_BUILDHOST},
349 {"nam", EXTRACTOR_METATYPE_TITLE},
350 {"gen", EXTRACTOR_METATYPE_GENRE},
351 {"mak", EXTRACTOR_METATYPE_CAMERA_MAKE},
352 {"mod", EXTRACTOR_METATYPE_CAMERA_MODEL},
353 {"des", EXTRACTOR_METATYPE_DESCRIPTION},
354 {"dis", EXTRACTOR_METATYPE_DISCLAIMER},
355 {"dir", EXTRACTOR_METATYPE_MOVIE_DIRECTOR},
356 {"src", EXTRACTOR_METATYPE_CONTRIBUTOR_NAME},
357 {"prf", EXTRACTOR_METATYPE_PERFORMER },
358 {"prd", EXTRACTOR_METATYPE_PRODUCER},
359 {"PRD", EXTRACTOR_METATYPE_PRODUCT_VERSION},
360 {"swr", EXTRACTOR_METATYPE_PRODUCED_BY_SOFTWARE},
361 {"isr", EXTRACTOR_METATYPE_ISRC},
362 {"wrt", EXTRACTOR_METATYPE_WRITER},
363 {"wrn", EXTRACTOR_METATYPE_WARNING},
364 {"chp", EXTRACTOR_METATYPE_CHAPTER_NAME},
365 {"inf", EXTRACTOR_METATYPE_DESCRIPTION},
366 {"req", EXTRACTOR_METATYPE_TARGET_PLATFORM}, /* hardware requirements */
367 {"fmt", EXTRACTOR_METATYPE_FORMAT},
368 {NULL, EXTRACTOR_METATYPE_RESERVED },
369};
370
371
372typedef struct
373{
374 const char *atom_type;
375 enum EXTRACTOR_MetaType type;
376} ITTagConversionEntry;
377
378/* iTunes Tags:
379 * see http://atomicparsley.sourceforge.net/mpeg-4files.html */
380static ITTagConversionEntry it_to_extr_table[] = {
381 {"\xa9" "alb", EXTRACTOR_METATYPE_ALBUM},
382 {"\xa9" "ART", EXTRACTOR_METATYPE_ARTIST},
383 {"aART", EXTRACTOR_METATYPE_ARTIST},
384 {"\xa9" "cmt", EXTRACTOR_METATYPE_COMMENT},
385 {"\xa9" "day", EXTRACTOR_METATYPE_UNKNOWN_DATE},
386 {"\xa9" "nam", EXTRACTOR_METATYPE_TITLE},
387 {"trkn", EXTRACTOR_METATYPE_TRACK_NUMBER},
388 {"disk", EXTRACTOR_METATYPE_DISC_NUMBER},
389 {"\xa9" "gen", EXTRACTOR_METATYPE_GENRE},
390 {"gnre", EXTRACTOR_METATYPE_GENRE},
391 {"\xa9" "wrt", EXTRACTOR_METATYPE_WRITER},
392 {"\xa9" "too", EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE},
393 {"cprt", EXTRACTOR_METATYPE_COPYRIGHT},
394 {"\xa9" "grp", EXTRACTOR_METATYPE_GROUP},
395 {"catg", EXTRACTOR_METATYPE_SECTION},
396 {"keyw", EXTRACTOR_METATYPE_KEYWORDS},
397 {"desc", EXTRACTOR_METATYPE_DESCRIPTION},
398 {"tvnn", EXTRACTOR_METATYPE_TV_NETWORK_NAME},
399 {"tvsh", EXTRACTOR_METATYPE_TV_SHOW_NAME},
400 {"tven", EXTRACTOR_METATYPE_TV_NETWORK_NAME},
401 {NULL, EXTRACTOR_METATYPE_RESERVED}
402};
403
404
405typedef struct
406{
407 unsigned int size;
408 unsigned int type;
409} Atom;
410
411typedef struct
412{
413 unsigned int one;
414 unsigned int type;
415 unsigned long long size;
416} LongAtom;
417
418static unsigned long long
419ntohll (unsigned long long n)
420{
421#if __BYTE_ORDER == __BIG_ENDIAN
422 return n;
423#else
424 return (((unsigned long long) ntohl (n)) << 32) + ntohl (n >> 32);
425#endif
426}
427
428
429/**
430 * Check if at position pos there is a valid atom.
431 * @return 0 if the atom is invalid, 1 if it is valid
432 */
433static int
434checkAtomValid (const char *buffer, size_t size, size_t pos)
435{
436 unsigned long long atomSize;
437 const Atom *atom;
438 const LongAtom *latom;
439 if ((pos >= size) ||
440 (pos + sizeof (Atom) > size) || (pos + sizeof (Atom) < pos))
441 return 0;
442 atom = (const Atom *) &buffer[pos];
443 if (ntohl (atom->size) == 1)
444 {
445 if ((pos + sizeof (LongAtom) > size) || (pos + sizeof (LongAtom) < pos))
446 return 0;
447 latom = (const LongAtom *) &buffer[pos];
448 atomSize = ntohll (latom->size);
449 if ((atomSize < sizeof (LongAtom)) ||
450 (atomSize + pos > size) || (atomSize + pos < atomSize))
451 return 0;
452 }
453 else
454 {
455 atomSize = ntohl (atom->size);
456 if ((atomSize < sizeof (Atom)) ||
457 (atomSize + pos > size) || (atomSize + pos < atomSize))
458 return 0;
459 }
460 return 1;
461}
462
463/**
464 * Assumes that checkAtomValid has already been called.
465 */
466static unsigned long long
467getAtomSize (const char *buf)
468{
469 const Atom *atom;
470 const LongAtom *latom;
471 atom = (const Atom *) buf;
472 if (ntohl (atom->size) == 1)
473 {
474 latom = (const LongAtom *) buf;
475 return ntohll (latom->size);
476 }
477 return ntohl (atom->size);
478}
479
480/**
481 * Assumes that checkAtomValid has already been called.
482 */
483static unsigned int
484getAtomHeaderSize (const char *buf)
485{
486 const Atom *atom;
487
488 atom = (const Atom *) buf;
489 if (ntohl (atom->size) == 1)
490 return sizeof (const LongAtom);
491 return sizeof (Atom);
492}
493
494struct ExtractContext
495{
496 EXTRACTOR_MetaDataProcessor proc;
497 void *proc_cls;
498 int ret;
499};
500
501static void
502addKeyword (enum EXTRACTOR_MetaType type,
503 const char *str,
504 struct ExtractContext *ec)
505{
506 if (ec->ret != 0)
507 return;
508 ec->ret = ec->proc (ec->proc_cls,
509 "qt",
510 type,
511 EXTRACTOR_METAFORMAT_UTF8,
512 "text/plain",
513 str,
514 strlen(str)+1);
515}
516
517
518
519/**
520 * Assumes that checkAtomValid has already been called.
521 */
522typedef int (*AtomHandler) (const char *input,
523 size_t size,
524 size_t pos, struct ExtractContext *ec);
525
526typedef struct
527{
528 char *name;
529 AtomHandler handler;
530} HandlerEntry;
531
532/**
533 * Call the handler for the atom at the given position.
534 * Will check validity of the given atom.
535 *
536 * @return 0 on error, 1 for success, -1 for unknown atom type
537 */
538static int handleAtom (HandlerEntry *handlers,
539 const char *input,
540 size_t size,
541 size_t pos,
542 struct ExtractContext *ec);
543
544static HandlerEntry all_handlers[];
545static HandlerEntry ilst_handlers[];
546
547/**
548 * Process atoms.
549 * @return 0 on error, 1 for success, -1 for unknown atom type
550 */
551static int
552processAtoms (HandlerEntry *handlers, const char *input,
553 size_t size, struct ExtractContext *ec)
554{
555 size_t pos;
556
557 if (size < sizeof (Atom))
558 return 1;
559 pos = 0;
560 while (pos < size - sizeof (Atom))
561 {
562 if (0 == handleAtom (handlers, input, size, pos, ec))
563 return 0;
564 pos += getAtomSize (&input[pos]);
565 }
566 return 1;
567}
568
569/**
570 * Process all atoms.
571 * @return 0 on error, 1 for success, -1 for unknown atom type
572 */
573static int
574processAllAtoms (const char *input,
575 size_t size, struct ExtractContext *ec)
576{
577 return processAtoms(all_handlers, input, size, ec);
578}
579
580/**
581 * Handle the moov atom.
582 * @return 0 on error, 1 for success, -1 for unknown atom type
583 */
584static int
585moovHandler (const char *input,
586 size_t size, size_t pos, struct ExtractContext *ec)
587{
588 unsigned int hdr = getAtomHeaderSize (&input[pos]);
589 return processAllAtoms (&input[pos + hdr],
590 getAtomSize (&input[pos]) - hdr, ec);
591}
592
593/* see http://developer.apple.com/documentation/QuickTime/QTFF/QTFFChap1/chapter_2_section_5.html */
594typedef struct
595{
596 Atom header;
597 /* major brand */
598 char type[4];
599 /* minor version */
600 unsigned int version;
601 /* compatible brands */
602 char compatibility[4];
603} FileType;
604
605static int
606ftypHandler (const char *input,
607 size_t size, size_t pos, struct ExtractContext *ec)
608{
609 const FileType *ft;
610 int i;
611
612 if (getAtomSize (&input[pos]) < sizeof (FileType)) {
613 return 0;
614 }
615 ft = (const FileType *) &input[pos];
616
617 i = 0;
618 while ((ftMap[i].ext != NULL) && (0 != memcmp (ft->type, ftMap[i].ext, 4)))
619 i++;
620 if (ftMap[i].ext != NULL)
621 addKeyword (EXTRACTOR_METATYPE_MIMETYPE, ftMap[i].mime, ec);
622 return 1;
623}
624
625typedef struct
626{
627 Atom hdr;
628 unsigned char version;
629 unsigned char flags[3];
630 /* in seconds since midnight, January 1, 1904 */
631 unsigned int creationTime;
632 /* in seconds since midnight, January 1, 1904 */
633 unsigned int modificationTime;
634 /* number of time units that pass per second in the movies time
635 coordinate system */
636 unsigned int timeScale;
637 /* A time value that indicates the duration of the movie in time
638 scale units. */
639 unsigned int duration;
640 unsigned int preferredRate;
641 /* A 16-bit fixed-point number that specifies how loud to
642 play. 1.0 indicates full volume */
643 unsigned short preferredVolume;
644 unsigned char reserved[10];
645 unsigned char matrix[36];
646 unsigned int previewTime;
647 unsigned int previewDuration;
648 unsigned int posterTime;
649 unsigned int selectionTime;
650 unsigned int selectionDuration;
651 unsigned int currentTime;
652 unsigned int nextTrackId;
653} MovieHeaderAtom;
654
655static int
656mvhdHandler (const char *input,
657 size_t size, size_t pos, struct ExtractContext *ec)
658{
659 const MovieHeaderAtom *m;
660 char duration[16];
661 if (getAtomSize (&input[pos]) != sizeof (MovieHeaderAtom))
662 return 0;
663 m = (const MovieHeaderAtom *) &input[pos];
664 snprintf (duration,
665 sizeof(duration),
666 "%us",
667 ntohl (m->duration) / ntohl (m->timeScale));
668 addKeyword (EXTRACTOR_METATYPE_DURATION, duration, ec);
669 return 1;
670}
671
672typedef struct
673{
674 Atom cmovAtom;
675 Atom dcomAtom;
676 char compressor[4];
677 Atom cmvdAtom;
678 unsigned int decompressedSize;
679} CompressedMovieHeaderAtom;
680
681static int
682cmovHandler (const char *input,
683 size_t size, size_t pos, struct ExtractContext *ec)
684{
685 const CompressedMovieHeaderAtom *c;
686 unsigned int s;
687 char *buf;
688 int ret;
689 z_stream z_state;
690 int z_ret_code;
691
692
693 if (getAtomSize (&input[pos]) < sizeof (CompressedMovieHeaderAtom))
694 return 0;
695 c = (const CompressedMovieHeaderAtom *) &input[pos];
696 if ((ntohl (c->dcomAtom.size) != 12) ||
697 (0 != memcmp (&c->dcomAtom.type, "dcom", 4)) ||
698 (0 != memcmp (c->compressor, "zlib", 4)) ||
699 (0 != memcmp (&c->cmvdAtom.type, "cmvd", 4)) ||
700 (ntohl (c->cmvdAtom.size) !=
701 getAtomSize (&input[pos]) - sizeof (Atom) * 2 - 4))
702 {
703 return 0; /* dcom must be 12 bytes */
704 }
705 s = ntohl (c->decompressedSize);
706 if (s > 16 * 1024 * 1024)
707 return 1; /* ignore, too big! */
708 buf = malloc (s);
709 if (buf == NULL)
710 return 1; /* out of memory, handle gracefully */
711
712 z_state.next_in = (unsigned char *) &c[1];
713 z_state.avail_in = ntohl (c->cmvdAtom.size);
714 z_state.avail_out = s;
715 z_state.next_out = (unsigned char *) buf;
716 z_state.zalloc = (alloc_func) 0;
717 z_state.zfree = (free_func) 0;
718 z_state.opaque = (voidpf) 0;
719 z_ret_code = inflateInit (&z_state);
720 if (Z_OK != z_ret_code)
721 {
722 free (buf);
723 return 0; /* crc error? */
724 }
725 z_ret_code = inflate (&z_state, Z_NO_FLUSH);
726 if ((z_ret_code != Z_OK) && (z_ret_code != Z_STREAM_END))
727 {
728 free (buf);
729 return 0; /* decode error? */
730 }
731 z_ret_code = inflateEnd (&z_state);
732 if (Z_OK != z_ret_code)
733 {
734 free (buf);
735 return 0; /* decode error? */
736 }
737 ret = handleAtom (all_handlers, buf, s, 0, ec);
738 free (buf);
739 return ret;
740}
741
742typedef struct
743{
744 short integer;
745 short fraction;
746} QTFixed;
747
748typedef struct
749{
750 Atom hdr;
751 unsigned int flags; /* 1 byte of version, 3 bytes of flags */
752 /* in seconds since midnight, January 1, 1904 */
753 unsigned int creationTime;
754 /* in seconds since midnight, January 1, 1904 */
755 unsigned int modificationTime;
756 unsigned int trackID;
757 unsigned int reserved_0;
758 unsigned int duration;
759 unsigned int reserved_1;
760 unsigned int reserved_2;
761 unsigned short layer;
762 unsigned short alternate_group;
763 unsigned short volume;
764 unsigned short reserved_3;
765 QTFixed matrix[3][3];
766 /* in pixels */
767 QTFixed track_width;
768 /* in pixels */
769 QTFixed track_height;
770} TrackAtom;
771
772static int
773tkhdHandler (const char *input,
774 size_t size, size_t pos, struct ExtractContext *ec)
775{
776 const TrackAtom *m;
777 char dimensions[40];
778
779 if (getAtomSize (&input[pos]) < sizeof (TrackAtom))
780 return 0;
781 m = (const TrackAtom *) &input[pos];
782 if (ntohs (m->track_width.integer) != 0)
783 {
784 /* if actually a/the video track */
785 snprintf (dimensions,
786 sizeof(dimensions),
787 "%dx%d",
788 ntohs (m->track_width.integer),
789 ntohs (m->track_height.integer));
790 addKeyword (EXTRACTOR_METATYPE_IMAGE_DIMENSIONS, dimensions, ec);
791 }
792 return 1;
793}
794
795static int
796trakHandler (const char *input,
797 size_t size, size_t pos, struct ExtractContext *ec)
798{
799 unsigned int hdr = getAtomHeaderSize (&input[pos]);
800 return processAllAtoms (&input[pos + hdr],
801 getAtomSize (&input[pos]) - hdr, ec);
802}
803
804static int
805metaHandler (const char *input,
806 size_t size, size_t pos, struct ExtractContext *ec)
807{
808 unsigned int hdr = getAtomHeaderSize (&input[pos]);
809 if (getAtomSize (&input[pos]) < hdr + 4)
810 return 0;
811 return processAllAtoms (&input[pos + hdr + 4],
812 getAtomSize (&input[pos]) - hdr - 4, ec);
813}
814
815typedef struct
816{
817 Atom header;
818 unsigned short length;
819 unsigned short language;
820} InternationalText;
821
822/*
823 * see http://developer.apple.com/documentation/QuickTime/QTFF/QTFFChap2/chapter
824_3_section_2.html
825 * "User Data Text Strings and Language Codes"
826 * TODO: make conformant
827 */
828static int
829processTextTag (const char *input,
830 size_t size,
831 size_t pos,
832 enum EXTRACTOR_MetaType type, struct ExtractContext *ec)
833{
834 unsigned long long as;
835 unsigned short len;
836 unsigned short lang;
837 const InternationalText *txt;
838 char *meta;
839 int i;
840
841 /* contains "international text":
842 16-bit size + 16 bit language code */
843 as = getAtomSize (&input[pos]);
844 if (as < sizeof (InternationalText))
845 return 0; /* invalid */
846 txt = (const InternationalText *) &input[pos];
847 len = ntohs (txt->length);
848 if (len + sizeof (InternationalText) > as)
849 return 0; /* invalid */
850 lang = ntohs (txt->language);
851 if (lang >= sizeof (languages) / sizeof (char *))
852 return 0; /* invalid */
853 addKeyword (EXTRACTOR_METATYPE_DOCUMENT_LANGUAGE, languages[lang], ec);
854
855 meta = malloc (len + 1);
856 memcpy (meta, &txt[1], len);
857 meta[len] = '\0';
858 for (i = 0; i < len; i++)
859 if (meta[i] == '\r')
860 meta[i] = '\n';
861 addKeyword (type, meta, ec);
862 free (meta);
863 return 1;
864}
865
866
867static int
868c_Handler (const char *input,
869 size_t size, size_t pos, struct ExtractContext *ec)
870{
871 int i;
872
873 i = 0;
874 while ((cHm[i].pfx != NULL) && (0 != memcmp (&input[pos+5], cHm[i].pfx, 3)))
875 i++;
876 if (cHm[i].pfx != NULL)
877 return processTextTag (input, size, pos, cHm[i].type, ec);
878 return -1; /* not found */
879}
880
881static int
882udtaHandler (const char *input,
883 size_t size, size_t pos, struct ExtractContext *ec)
884{
885 unsigned int hdr = getAtomHeaderSize (&input[pos]);
886 return processAllAtoms (&input[pos + hdr],
887 getAtomSize (&input[pos]) - hdr, ec);
888}
889
890static int
891processDataAtom (const char *input,
892 size_t size, /* parent atom size */
893 size_t pos,
894 const char *patom,
895 enum EXTRACTOR_MetaType type,
896 struct ExtractContext *ec)
897{
898 char *meta;
899 unsigned char version;
900 unsigned int flags;
901 unsigned long long asize;
902 unsigned int len;
903 unsigned int hdr;
904 int i;
905
906 hdr = getAtomHeaderSize (&input[pos]);
907 asize = getAtomSize (&input[pos]);
908 if (memcmp(&input[pos+4], "data", 4) != 0)
909 return -1;
910
911 if (asize < hdr + 8 || /* header + u32 flags + u32 reserved */
912 asize > (getAtomSize(&patom[0]) - 8))
913 return 0;
914
915 len = (unsigned int)(asize - (hdr + 8));
916
917 version = input[pos+8];
918 flags = ((unsigned char)input[pos+9]<<16) |
919 ((unsigned char)input[pos+10]<<8) |
920 (unsigned char)input[pos+11];
921#if DEBUG
922 printf("[data] version:%02x flags:%08x txtlen:%d\n", version, flags, len);
923#endif
924
925 if (version != 0)
926 return -1;
927
928 if (flags == 0x0) { /* binary data */
929 if (memcmp(&patom[4], "gnre", 4) == 0) {
930 if (len >= 2) {
931 unsigned short genre = ((unsigned char)input[pos+16] << 8) |
932 (unsigned char)input[pos+17];
933 if (genre > 0 && genre < GENRE_NAME_COUNT)
934 addKeyword(type, genre_names[genre-1], ec);
935 }
936 return 1;
937 }
938 else if ((memcmp(&patom[4], "trkn", 4) == 0) ||
939 (memcmp(&patom[4], "disk", 4) == 0)) {
940 if (len >= 4) {
941 unsigned short n = ((unsigned char)input[pos+18] << 8) |
942 (unsigned char)input[pos+19];
943 char s[8];
944 snprintf(s, 8, "%d", n);
945 addKeyword(type, s, ec);
946 }
947 }
948 else {
949 return -1;
950 }
951 }
952 else if (flags == 0x1) { /* text data */
953 meta = malloc (len + 1);
954 memcpy (meta, &input[pos+16], len);
955 meta[len] = '\0';
956 for (i = 0; i < len; i++)
957 if (meta[i] == '\r')
958 meta[i] = '\n';
959 addKeyword (type, meta, ec);
960 free (meta);
961 return 1;
962 }
963
964 return -1;
965}
966
967/* NOTE: iTunes tag processing should, in theory, be limited to iTunes
968 * file types (from ftyp), but, in reality, it seems that there are other
969 * files, like 3gpp, out in the wild with iTunes tags. */
970static int
971iTunesTagHandler (const char *input,
972 size_t size, size_t pos, struct ExtractContext *ec)
973{
974 unsigned long long asize;
975 unsigned int hdr;
976 int i;
977
978 hdr = getAtomHeaderSize (&input[pos]);
979 asize = getAtomSize (&input[pos]);
980
981 if (asize < hdr + 8) /* header + at least one atom */
982 return 0;
983
984 i = 0;
985 while ((it_to_extr_table[i].atom_type != NULL) &&
986 (0 != memcmp (&input[pos+4], it_to_extr_table[i].atom_type, 4)))
987 i++;
988 if (it_to_extr_table[i].atom_type != NULL)
989 return processDataAtom(input, asize, pos+hdr, &input[pos],
990 it_to_extr_table[i].type, ec);
991
992 return -1;
993}
994
995
996static int
997ilstHandler (const char *input,
998 size_t size, size_t pos, struct ExtractContext *ec)
999{
1000 unsigned int hdr = getAtomHeaderSize (&input[pos]);
1001 return processAtoms(ilst_handlers, &input[pos + hdr],
1002 getAtomSize(&input[pos]) - hdr, ec);
1003}
1004
1005
1006static HandlerEntry all_handlers[] = {
1007 {"moov", &moovHandler},
1008 {"cmov", &cmovHandler},
1009 {"mvhd", &mvhdHandler},
1010 {"trak", &trakHandler},
1011 {"tkhd", &tkhdHandler},
1012 {"ilst", &ilstHandler},
1013 {"meta", &metaHandler},
1014 {"udta", &udtaHandler},
1015 {"ftyp", &ftypHandler},
1016 {"\xa9" "swr", &c_Handler},
1017 {"\xa9" "cpy", &c_Handler},
1018 {"\xa9" "day", &c_Handler},
1019 {"\xa9" "dir", &c_Handler},
1020 {"\xa9" "ed1", &c_Handler},
1021 {"\xa9" "ed2", &c_Handler},
1022 {"\xa9" "ed3", &c_Handler},
1023 {"\xa9" "ed4", &c_Handler},
1024 {"\xa9" "ed5", &c_Handler},
1025 {"\xa9" "ed6", &c_Handler},
1026 {"\xa9" "ed7", &c_Handler},
1027 {"\xa9" "ed8", &c_Handler},
1028 {"\xa9" "ed9", &c_Handler},
1029 {"\xa9" "fmt", &c_Handler},
1030 {"\xa9" "inf", &c_Handler},
1031 {"\xa9" "prd", &c_Handler},
1032 {"\xa9" "prf", &c_Handler},
1033 {"\xa9" "req", &c_Handler},
1034 {"\xa9" "src", &c_Handler},
1035 {"\xa9" "wrt", &c_Handler},
1036 {"\xa9" "aut", &c_Handler},
1037 {"\xa9" "hst", &c_Handler},
1038 {"\xa9" "wrt", &c_Handler},
1039 {"\xa9" "cmt", &c_Handler},
1040 {"\xa9" "mak", &c_Handler},
1041 {"\xa9" "mod", &c_Handler},
1042 {"\xa9" "nam", &c_Handler},
1043 {"\xa9" "des", &c_Handler},
1044 {"\xa9" "PRD", &c_Handler},
1045 {"\xa9" "wrn", &c_Handler},
1046 {"\xa9" "chp", &c_Handler},
1047 /* { "name", &nameHandler }, */
1048 {NULL, NULL},
1049};
1050
1051static HandlerEntry ilst_handlers[] = {
1052 {"\xa9" "alb", &iTunesTagHandler},
1053 {"\xa9" "ART", &iTunesTagHandler},
1054 {"aART", &iTunesTagHandler},
1055 {"\xa9" "cmt", &iTunesTagHandler},
1056 {"\xa9" "day", &iTunesTagHandler},
1057 {"\xa9" "nam", &iTunesTagHandler},
1058 {"\xa9" "gen", &iTunesTagHandler},
1059 {"gnre", &iTunesTagHandler},
1060 {"trkn", &iTunesTagHandler},
1061 {"disk", &iTunesTagHandler},
1062 {"\xa9" "wrt", &iTunesTagHandler},
1063 {"\xa9" "too", &iTunesTagHandler},
1064 {"tmpo", &iTunesTagHandler},
1065 {"cprt", &iTunesTagHandler},
1066 {"cpil", &iTunesTagHandler},
1067 {"covr", &iTunesTagHandler},
1068 {"rtng", &iTunesTagHandler},
1069 {"\xa9" "grp", &iTunesTagHandler},
1070 {"stik", &iTunesTagHandler},
1071 {"pcst", &iTunesTagHandler},
1072 {"catg", &iTunesTagHandler},
1073 {"keyw", &iTunesTagHandler},
1074 {"purl", &iTunesTagHandler},
1075 {"egid", &iTunesTagHandler},
1076 {"desc", &iTunesTagHandler},
1077 {"\xa9" "lyr", &iTunesTagHandler},
1078 {"tvnn", &iTunesTagHandler},
1079 {"tvsh", &iTunesTagHandler},
1080 {"tven", &iTunesTagHandler},
1081 {"tvsn", &iTunesTagHandler},
1082 {"tves", &iTunesTagHandler},
1083 {"purd", &iTunesTagHandler},
1084 {"pgap", &iTunesTagHandler},
1085 {NULL, NULL},
1086};
1087
1088/**
1089 * Call the handler for the atom at the given position.
1090 * @return 0 on error, 1 for success, -1 for unknown atom type
1091 */
1092static int
1093handleAtom (HandlerEntry *handlers, const char *input,
1094 size_t size, size_t pos, struct ExtractContext *ec)
1095{
1096 int i;
1097 if (0 == checkAtomValid (input, size, pos))
1098 {
1099 return 0;
1100 }
1101 i = 0;
1102 while ((handlers[i].name != NULL) &&
1103 (0 != memcmp (&input[pos + 4], handlers[i].name, 4)))
1104 i++;
1105 if (handlers[i].name == NULL)
1106 {
1107#if DEBUG
1108 char b[5];
1109 memcpy (b, &input[pos + 4], 4);
1110 b[4] = '\0';
1111 printf ("No handler for `%s'\n", b);
1112#endif
1113 return -1;
1114 }
1115 i = handlers[i].handler (input, size, pos, ec);
1116#if DEBUG
1117 printf ("Running handler for `%4s' at %u completed with result %d\n",
1118 &input[pos + 4], pos, i);
1119#endif
1120 return i;
1121}
1122
1123/* mimetypes:
1124 video/quicktime: mov,qt: Quicktime animation;
1125 video/x-quicktime: mov,qt: Quicktime animation;
1126 application/x-quicktimeplayer: qtl: Quicktime list;
1127 */
1128
1129int
1130EXTRACTOR_qt_extract (const char *data,
1131 size_t size,
1132 EXTRACTOR_MetaDataProcessor proc,
1133 void *proc_cls,
1134 const char *options)
1135{
1136 struct ExtractContext ec;
1137 ec.proc = proc;
1138 ec.proc_cls = proc_cls;
1139 ec.ret = 0;
1140 processAllAtoms (data, size, &ec);
1141 return ec.ret;
1142}
1143
1144/* end of qt_extractor.c */