qt_extractor.c (30004B)
1 /* 2 This file is part of libextractor. 3 Copyright (C) 2002, 2003, 2006 Vidyut Samanta and Christian Grothoff 4 5 libextractor is free software; you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published 7 by the Free Software Foundation; either version 2, or (at your 8 option) any later version. 9 10 libextractor is distributed in the hope that it will be useful, but 11 WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 General Public License for more details. 14 15 You should have received a copy of the GNU General Public License 16 along with libextractor; see the file COPYING. If not, write to the 17 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 18 Boston, MA 02110-1301, USA. 19 */ 20 21 #include "platform.h" 22 #include "extractor.h" 23 #include <zlib.h> 24 #include <math.h> 25 26 #define DEBUG 0 27 28 /* verbatim from mp3extractor */ 29 static const char *const genre_names[] = { 30 gettext_noop ("Blues"), 31 gettext_noop ("Classic Rock"), 32 gettext_noop ("Country"), 33 gettext_noop ("Dance"), 34 gettext_noop ("Disco"), 35 gettext_noop ("Funk"), 36 gettext_noop ("Grunge"), 37 gettext_noop ("Hip-Hop"), 38 gettext_noop ("Jazz"), 39 gettext_noop ("Metal"), 40 gettext_noop ("New Age"), 41 gettext_noop ("Oldies"), 42 gettext_noop ("Other"), 43 gettext_noop ("Pop"), 44 gettext_noop ("R&B"), 45 gettext_noop ("Rap"), 46 gettext_noop ("Reggae"), 47 gettext_noop ("Rock"), 48 gettext_noop ("Techno"), 49 gettext_noop ("Industrial"), 50 gettext_noop ("Alternative"), 51 gettext_noop ("Ska"), 52 gettext_noop ("Death Metal"), 53 gettext_noop ("Pranks"), 54 gettext_noop ("Soundtrack"), 55 gettext_noop ("Euro-Techno"), 56 gettext_noop ("Ambient"), 57 gettext_noop ("Trip-Hop"), 58 gettext_noop ("Vocal"), 59 gettext_noop ("Jazz+Funk"), 60 gettext_noop ("Fusion"), 61 gettext_noop ("Trance"), 62 gettext_noop ("Classical"), 63 gettext_noop ("Instrumental"), 64 gettext_noop ("Acid"), 65 gettext_noop ("House"), 66 gettext_noop ("Game"), 67 gettext_noop ("Sound Clip"), 68 gettext_noop ("Gospel"), 69 gettext_noop ("Noise"), 70 gettext_noop ("Alt. Rock"), 71 gettext_noop ("Bass"), 72 gettext_noop ("Soul"), 73 gettext_noop ("Punk"), 74 gettext_noop ("Space"), 75 gettext_noop ("Meditative"), 76 gettext_noop ("Instrumental Pop"), 77 gettext_noop ("Instrumental Rock"), 78 gettext_noop ("Ethnic"), 79 gettext_noop ("Gothic"), 80 gettext_noop ("Darkwave"), 81 gettext_noop ("Techno-Industrial"), 82 gettext_noop ("Electronic"), 83 gettext_noop ("Pop-Folk"), 84 gettext_noop ("Eurodance"), 85 gettext_noop ("Dream"), 86 gettext_noop ("Southern Rock"), 87 gettext_noop ("Comedy"), 88 gettext_noop ("Cult"), 89 gettext_noop ("Gangsta Rap"), 90 gettext_noop ("Top 40"), 91 gettext_noop ("Christian Rap"), 92 gettext_noop ("Pop/Funk"), 93 gettext_noop ("Jungle"), 94 gettext_noop ("Native American"), 95 gettext_noop ("Cabaret"), 96 gettext_noop ("New Wave"), 97 gettext_noop ("Psychedelic"), 98 gettext_noop ("Rave"), 99 gettext_noop ("Showtunes"), 100 gettext_noop ("Trailer"), 101 gettext_noop ("Lo-Fi"), 102 gettext_noop ("Tribal"), 103 gettext_noop ("Acid Punk"), 104 gettext_noop ("Acid Jazz"), 105 gettext_noop ("Polka"), 106 gettext_noop ("Retro"), 107 gettext_noop ("Musical"), 108 gettext_noop ("Rock & Roll"), 109 gettext_noop ("Hard Rock"), 110 gettext_noop ("Folk"), 111 gettext_noop ("Folk/Rock"), 112 gettext_noop ("National Folk"), 113 gettext_noop ("Swing"), 114 gettext_noop ("Fast-Fusion"), 115 gettext_noop ("Bebob"), 116 gettext_noop ("Latin"), 117 gettext_noop ("Revival"), 118 gettext_noop ("Celtic"), 119 gettext_noop ("Bluegrass"), 120 gettext_noop ("Avantgarde"), 121 gettext_noop ("Gothic Rock"), 122 gettext_noop ("Progressive Rock"), 123 gettext_noop ("Psychedelic Rock"), 124 gettext_noop ("Symphonic Rock"), 125 gettext_noop ("Slow Rock"), 126 gettext_noop ("Big Band"), 127 gettext_noop ("Chorus"), 128 gettext_noop ("Easy Listening"), 129 gettext_noop ("Acoustic"), 130 gettext_noop ("Humour"), 131 gettext_noop ("Speech"), 132 gettext_noop ("Chanson"), 133 gettext_noop ("Opera"), 134 gettext_noop ("Chamber Music"), 135 gettext_noop ("Sonata"), 136 gettext_noop ("Symphony"), 137 gettext_noop ("Booty Bass"), 138 gettext_noop ("Primus"), 139 gettext_noop ("Porn Groove"), 140 gettext_noop ("Satire"), 141 gettext_noop ("Slow Jam"), 142 gettext_noop ("Club"), 143 gettext_noop ("Tango"), 144 gettext_noop ("Samba"), 145 gettext_noop ("Folklore"), 146 gettext_noop ("Ballad"), 147 gettext_noop ("Power Ballad"), 148 gettext_noop ("Rhythmic Soul"), 149 gettext_noop ("Freestyle"), 150 gettext_noop ("Duet"), 151 gettext_noop ("Punk Rock"), 152 gettext_noop ("Drum Solo"), 153 gettext_noop ("A Cappella"), 154 gettext_noop ("Euro-House"), 155 gettext_noop ("Dance Hall"), 156 gettext_noop ("Goa"), 157 gettext_noop ("Drum & Bass"), 158 gettext_noop ("Club-House"), 159 gettext_noop ("Hardcore"), 160 gettext_noop ("Terror"), 161 gettext_noop ("Indie"), 162 gettext_noop ("BritPop"), 163 gettext_noop ("Negerpunk"), 164 gettext_noop ("Polsk Punk"), 165 gettext_noop ("Beat"), 166 gettext_noop ("Christian Gangsta Rap"), 167 gettext_noop ("Heavy Metal"), 168 gettext_noop ("Black Metal"), 169 gettext_noop ("Crossover"), 170 gettext_noop ("Contemporary Christian"), 171 gettext_noop ("Christian Rock"), 172 gettext_noop ("Merengue"), 173 gettext_noop ("Salsa"), 174 gettext_noop ("Thrash Metal"), 175 gettext_noop ("Anime"), 176 gettext_noop ("JPop"), 177 gettext_noop ("Synthpop"), 178 }; 179 180 #define GENRE_NAME_COUNT \ 181 ((unsigned int) (sizeof genre_names / sizeof (const char *const))) 182 183 184 static const char *languages[] = { 185 "English", 186 "French", 187 "German", 188 "Italian", 189 "Dutch", 190 "Swedish", 191 "Spanish", 192 "Danish", 193 "Portuguese", 194 "Norwegian", 195 "Hebrew", 196 "Japanese", 197 "Arabic", 198 "Finnish", 199 "Greek", 200 "Icelandic", 201 "Maltese", 202 "Turkish", 203 "Croatian", 204 "Traditional Chinese", 205 "Urdu", 206 "Hindi", 207 "Thai", 208 "Korean", 209 "Lithuanian", 210 "Polish", 211 "Hungarian", 212 "Estonian", 213 "Lettish", 214 "Saamisk", 215 "Lappish", 216 "Faeroese", 217 "Farsi", 218 "Russian", 219 "Simplified Chinese", 220 "Flemish", 221 "Irish", 222 "Albanian", 223 "Romanian", 224 "Czech", 225 "Slovak", 226 "Slovenian", 227 "Yiddish", 228 "Serbian", 229 "Macedonian", 230 "Bulgarian", 231 "Ukrainian", 232 "Byelorussian", 233 "Uzbek", 234 "Kazakh", 235 "Azerbaijani", 236 "AzerbaijanAr", 237 "Armenian", 238 "Georgian", 239 "Moldavian", 240 "Kirghiz", 241 "Tajiki", 242 "Turkmen", 243 "Mongolian", 244 "MongolianCyr", 245 "Pashto", 246 "Kurdish", 247 "Kashmiri", 248 "Sindhi", 249 "Tibetan", 250 "Nepali", 251 "Sanskrit", 252 "Marathi", 253 "Bengali", 254 "Assamese", 255 "Gujarati", 256 "Punjabi", 257 "Oriya", 258 "Malayalam", 259 "Kannada", 260 "Tamil", 261 "Telugu", 262 "Sinhalese", 263 "Burmese", 264 "Khmer", 265 "Lao", 266 "Vietnamese", 267 "Indonesian", 268 "Tagalog", 269 "MalayRoman", 270 "MalayArabic", 271 "Amharic", 272 "Tigrinya", 273 "Galla", 274 "Oromo", 275 "Somali", 276 "Swahili", 277 "Ruanda", 278 "Rundi", 279 "Chewa", 280 "Malagasy", 281 "Esperanto", 282 "Welsh", 283 "Basque", 284 "Catalan", 285 "Latin", 286 "Quechua", 287 "Guarani", 288 "Aymara", 289 "Tatar", 290 "Uighur", 291 "Dzongkha", 292 "JavaneseRom", 293 }; 294 295 296 typedef struct 297 { 298 const char *ext; 299 const char *mime; 300 } C2M; 301 302 /* see http://www.mp4ra.org/filetype.html 303 * http://www.ftyps.com/ */ 304 static C2M ftMap[] = { 305 {"qt ", "video/quicktime"}, 306 {"isom", "video/mp4"}, /* ISO Base Media files */ 307 {"iso2", "video/mp4"}, 308 {"mp41", "video/mp4"}, /* MPEG-4 (ISO/IEC 14491-1) version 1 */ 309 {"mp42", "video/mp4"}, /* MPEG-4 (ISO/IEC 14491-1) version 2 */ 310 {"3gp1", "video/3gpp"}, 311 {"3gp2", "video/3gpp"}, 312 {"3gp3", "video/3gpp"}, 313 {"3gp4", "video/3gpp"}, 314 {"3gp5", "video/3gpp"}, 315 {"3g2a", "video/3gpp2"}, 316 {"mmp4", "video/mp4"}, /* Mobile MPEG-4 */ 317 {"M4A ", "audio/mp4"}, 318 {"M4B ", "audio/mp4"}, 319 {"M4P ", "audio/mp4"}, 320 {"M4V ", "video/mp4"}, 321 {"mj2s", "video/mj2"}, /* Motion JPEG 2000 */ 322 {"mjp2", "video/mj2"}, 323 {NULL, NULL}, 324 }; 325 326 typedef struct CHE 327 { 328 const char *pfx; 329 enum EXTRACTOR_MetaType type; 330 } CHE; 331 332 static CHE cHm[] = { 333 {"aut", EXTRACTOR_METATYPE_AUTHOR_NAME}, 334 {"cpy", EXTRACTOR_METATYPE_COPYRIGHT}, 335 {"day", EXTRACTOR_METATYPE_CREATION_DATE}, 336 {"ed1", EXTRACTOR_METATYPE_MODIFICATION_DATE}, 337 {"ed2", EXTRACTOR_METATYPE_MODIFICATION_DATE}, 338 {"ed3", EXTRACTOR_METATYPE_MODIFICATION_DATE}, 339 {"ed4", EXTRACTOR_METATYPE_MODIFICATION_DATE}, 340 {"ed5", EXTRACTOR_METATYPE_MODIFICATION_DATE}, 341 {"ed6", EXTRACTOR_METATYPE_MODIFICATION_DATE}, 342 {"ed7", EXTRACTOR_METATYPE_MODIFICATION_DATE}, 343 {"ed8", EXTRACTOR_METATYPE_MODIFICATION_DATE}, 344 {"ed9", EXTRACTOR_METATYPE_MODIFICATION_DATE}, 345 {"cmt", EXTRACTOR_METATYPE_COMMENT}, 346 {"url", EXTRACTOR_METATYPE_URL}, 347 {"enc", EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE}, 348 {"hst", EXTRACTOR_METATYPE_BUILDHOST}, 349 {"nam", EXTRACTOR_METATYPE_TITLE}, 350 {"gen", EXTRACTOR_METATYPE_GENRE}, 351 {"mak", EXTRACTOR_METATYPE_CAMERA_MAKE}, 352 {"mod", EXTRACTOR_METATYPE_CAMERA_MODEL}, 353 {"des", EXTRACTOR_METATYPE_DESCRIPTION}, 354 {"dis", EXTRACTOR_METATYPE_DISCLAIMER}, 355 {"dir", EXTRACTOR_METATYPE_MOVIE_DIRECTOR}, 356 {"src", EXTRACTOR_METATYPE_CONTRIBUTOR_NAME}, 357 {"prf", EXTRACTOR_METATYPE_PERFORMER }, 358 {"prd", EXTRACTOR_METATYPE_PRODUCER}, 359 {"PRD", EXTRACTOR_METATYPE_PRODUCT_VERSION}, 360 {"swr", EXTRACTOR_METATYPE_PRODUCED_BY_SOFTWARE}, 361 {"isr", EXTRACTOR_METATYPE_ISRC}, 362 {"wrt", EXTRACTOR_METATYPE_WRITER}, 363 {"wrn", EXTRACTOR_METATYPE_WARNING}, 364 {"chp", EXTRACTOR_METATYPE_CHAPTER_NAME}, 365 {"inf", EXTRACTOR_METATYPE_DESCRIPTION}, 366 {"req", EXTRACTOR_METATYPE_TARGET_PLATFORM}, /* hardware requirements */ 367 {"fmt", EXTRACTOR_METATYPE_FORMAT}, 368 {NULL, EXTRACTOR_METATYPE_RESERVED }, 369 }; 370 371 372 typedef struct 373 { 374 const char *atom_type; 375 enum EXTRACTOR_MetaType type; 376 } ITTagConversionEntry; 377 378 /* iTunes Tags: 379 * see http://atomicparsley.sourceforge.net/mpeg-4files.html */ 380 static ITTagConversionEntry it_to_extr_table[] = { 381 {"\xa9" "alb", EXTRACTOR_METATYPE_ALBUM}, 382 {"\xa9" "ART", EXTRACTOR_METATYPE_ARTIST}, 383 {"aART", EXTRACTOR_METATYPE_ARTIST}, 384 {"\xa9" "cmt", EXTRACTOR_METATYPE_COMMENT}, 385 {"\xa9" "day", EXTRACTOR_METATYPE_UNKNOWN_DATE}, 386 {"\xa9" "nam", EXTRACTOR_METATYPE_TITLE}, 387 {"trkn", EXTRACTOR_METATYPE_TRACK_NUMBER}, 388 {"disk", EXTRACTOR_METATYPE_DISC_NUMBER}, 389 {"\xa9" "gen", EXTRACTOR_METATYPE_GENRE}, 390 {"gnre", EXTRACTOR_METATYPE_GENRE}, 391 {"\xa9" "wrt", EXTRACTOR_METATYPE_WRITER}, 392 {"\xa9" "too", EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE}, 393 {"cprt", EXTRACTOR_METATYPE_COPYRIGHT}, 394 {"\xa9" "grp", EXTRACTOR_METATYPE_GROUP}, 395 {"catg", EXTRACTOR_METATYPE_SECTION}, 396 {"keyw", EXTRACTOR_METATYPE_KEYWORDS}, 397 {"desc", EXTRACTOR_METATYPE_DESCRIPTION}, 398 {"tvnn", EXTRACTOR_METATYPE_NETWORK_NAME}, 399 {"tvsh", EXTRACTOR_METATYPE_SHOW_NAME}, 400 {"tven", EXTRACTOR_METATYPE_NETWORK_NAME}, 401 {NULL, EXTRACTOR_METATYPE_RESERVED} 402 }; 403 404 405 typedef struct 406 { 407 unsigned int size; 408 unsigned int type; 409 } Atom; 410 411 typedef struct 412 { 413 unsigned int one; 414 unsigned int type; 415 unsigned long long size; 416 } LongAtom; 417 418 static unsigned long long 419 ntohll (unsigned long long n) 420 { 421 #if __BYTE_ORDER == __BIG_ENDIAN 422 return n; 423 #else 424 return (((unsigned long long) ntohl (n)) << 32) + ntohl (n >> 32); 425 #endif 426 } 427 428 429 /** 430 * Check if at position pos there is a valid atom. 431 * @return 0 if the atom is invalid, 1 if it is valid 432 */ 433 static int 434 checkAtomValid (const char *buffer, size_t size, size_t pos) 435 { 436 unsigned long long atomSize; 437 const Atom *atom; 438 const LongAtom *latom; 439 if ((pos >= size) || 440 (pos + sizeof (Atom) > size) || (pos + sizeof (Atom) < pos)) 441 return 0; 442 atom = (const Atom *) &buffer[pos]; 443 if (ntohl (atom->size) == 1) 444 { 445 if ((pos + sizeof (LongAtom) > size) || (pos + sizeof (LongAtom) < pos)) 446 return 0; 447 latom = (const LongAtom *) &buffer[pos]; 448 atomSize = ntohll (latom->size); 449 if ((atomSize < sizeof (LongAtom)) || 450 (atomSize + pos > size) || (atomSize + pos < atomSize)) 451 return 0; 452 } 453 else 454 { 455 atomSize = ntohl (atom->size); 456 if ((atomSize < sizeof (Atom)) || 457 (atomSize + pos > size) || (atomSize + pos < atomSize)) 458 return 0; 459 } 460 return 1; 461 } 462 463 464 /** 465 * Assumes that checkAtomValid has already been called. 466 */ 467 static unsigned long long 468 getAtomSize (const char *buf) 469 { 470 const Atom *atom; 471 const LongAtom *latom; 472 atom = (const Atom *) buf; 473 if (ntohl (atom->size) == 1) 474 { 475 latom = (const LongAtom *) buf; 476 return ntohll (latom->size); 477 } 478 return ntohl (atom->size); 479 } 480 481 482 /** 483 * Assumes that checkAtomValid has already been called. 484 */ 485 static unsigned int 486 getAtomHeaderSize (const char *buf) 487 { 488 const Atom *atom; 489 490 atom = (const Atom *) buf; 491 if (ntohl (atom->size) == 1) 492 return sizeof (const LongAtom); 493 return sizeof (Atom); 494 } 495 496 497 struct ExtractContext 498 { 499 EXTRACTOR_MetaDataProcessor proc; 500 void *proc_cls; 501 int ret; 502 }; 503 504 static void 505 addKeyword (enum EXTRACTOR_MetaType type, 506 const char *str, 507 struct ExtractContext *ec) 508 { 509 if (ec->ret != 0) 510 return; 511 ec->ret = ec->proc (ec->proc_cls, 512 "qt", 513 type, 514 EXTRACTOR_METAFORMAT_UTF8, 515 "text/plain", 516 str, 517 strlen (str) + 1); 518 } 519 520 521 /** 522 * Assumes that checkAtomValid has already been called. 523 */ 524 typedef int (*AtomHandler) (const char *input, 525 size_t size, 526 size_t pos, struct ExtractContext *ec); 527 528 typedef struct 529 { 530 char *name; 531 AtomHandler handler; 532 } HandlerEntry; 533 534 /** 535 * Call the handler for the atom at the given position. 536 * Will check validity of the given atom. 537 * 538 * @return 0 on error, 1 for success, -1 for unknown atom type 539 */ 540 static int handleAtom (HandlerEntry *handlers, 541 const char *input, 542 size_t size, 543 size_t pos, 544 struct ExtractContext *ec); 545 546 static HandlerEntry all_handlers[]; 547 static HandlerEntry ilst_handlers[]; 548 549 /** 550 * Process atoms. 551 * @return 0 on error, 1 for success, -1 for unknown atom type 552 */ 553 static int 554 processAtoms (HandlerEntry *handlers, const char *input, 555 size_t size, struct ExtractContext *ec) 556 { 557 size_t pos; 558 559 if (size < sizeof (Atom)) 560 return 1; 561 pos = 0; 562 while (pos < size - sizeof (Atom)) 563 { 564 if (0 == handleAtom (handlers, input, size, pos, ec)) 565 return 0; 566 pos += getAtomSize (&input[pos]); 567 } 568 return 1; 569 } 570 571 572 /** 573 * Process all atoms. 574 * @return 0 on error, 1 for success, -1 for unknown atom type 575 */ 576 static int 577 processAllAtoms (const char *input, 578 size_t size, struct ExtractContext *ec) 579 { 580 return processAtoms (all_handlers, input, size, ec); 581 } 582 583 584 /** 585 * Handle the moov atom. 586 * @return 0 on error, 1 for success, -1 for unknown atom type 587 */ 588 static int 589 moovHandler (const char *input, 590 size_t size, size_t pos, struct ExtractContext *ec) 591 { 592 unsigned int hdr = getAtomHeaderSize (&input[pos]); 593 return processAllAtoms (&input[pos + hdr], 594 getAtomSize (&input[pos]) - hdr, ec); 595 } 596 597 598 /* see http://developer.apple.com/documentation/QuickTime/QTFF/QTFFChap1/chapter_2_section_5.html */ 599 typedef struct 600 { 601 Atom header; 602 /* major brand */ 603 char type[4]; 604 /* minor version */ 605 unsigned int version; 606 /* compatible brands */ 607 char compatibility[4]; 608 } FileType; 609 610 static int 611 ftypHandler (const char *input, 612 size_t size, size_t pos, struct ExtractContext *ec) 613 { 614 const FileType *ft; 615 int i; 616 617 if (getAtomSize (&input[pos]) < sizeof (FileType)) 618 { 619 return 0; 620 } 621 ft = (const FileType *) &input[pos]; 622 623 i = 0; 624 while ((ftMap[i].ext != NULL) && (0 != memcmp (ft->type, ftMap[i].ext, 4))) 625 i++; 626 if (ftMap[i].ext != NULL) 627 addKeyword (EXTRACTOR_METATYPE_MIMETYPE, ftMap[i].mime, ec); 628 return 1; 629 } 630 631 632 typedef struct 633 { 634 Atom hdr; 635 unsigned char version; 636 unsigned char flags[3]; 637 /* in seconds since midnight, January 1, 1904 */ 638 unsigned int creationTime; 639 /* in seconds since midnight, January 1, 1904 */ 640 unsigned int modificationTime; 641 /* number of time units that pass per second in the movies time 642 coordinate system */ 643 unsigned int timeScale; 644 /* A time value that indicates the duration of the movie in time 645 scale units. */ 646 unsigned int duration; 647 unsigned int preferredRate; 648 /* A 16-bit fixed-point number that specifies how loud to 649 play. 1.0 indicates full volume */ 650 unsigned short preferredVolume; 651 unsigned char reserved[10]; 652 unsigned char matrix[36]; 653 unsigned int previewTime; 654 unsigned int previewDuration; 655 unsigned int posterTime; 656 unsigned int selectionTime; 657 unsigned int selectionDuration; 658 unsigned int currentTime; 659 unsigned int nextTrackId; 660 } MovieHeaderAtom; 661 662 static int 663 mvhdHandler (const char *input, 664 size_t size, size_t pos, struct ExtractContext *ec) 665 { 666 const MovieHeaderAtom *m; 667 char duration[16]; 668 if (getAtomSize (&input[pos]) != sizeof (MovieHeaderAtom)) 669 return 0; 670 m = (const MovieHeaderAtom *) &input[pos]; 671 snprintf (duration, 672 sizeof(duration), 673 "%us", 674 ntohl (m->duration) / ntohl (m->timeScale)); 675 addKeyword (EXTRACTOR_METATYPE_DURATION, duration, ec); 676 return 1; 677 } 678 679 680 typedef struct 681 { 682 Atom cmovAtom; 683 Atom dcomAtom; 684 char compressor[4]; 685 Atom cmvdAtom; 686 unsigned int decompressedSize; 687 } CompressedMovieHeaderAtom; 688 689 static int 690 cmovHandler (const char *input, 691 size_t size, size_t pos, struct ExtractContext *ec) 692 { 693 const CompressedMovieHeaderAtom *c; 694 unsigned int s; 695 char *buf; 696 int ret; 697 z_stream z_state; 698 int z_ret_code; 699 700 701 if (getAtomSize (&input[pos]) < sizeof (CompressedMovieHeaderAtom)) 702 return 0; 703 c = (const CompressedMovieHeaderAtom *) &input[pos]; 704 if ((ntohl (c->dcomAtom.size) != 12) || 705 (0 != memcmp (&c->dcomAtom.type, "dcom", 4)) || 706 (0 != memcmp (c->compressor, "zlib", 4)) || 707 (0 != memcmp (&c->cmvdAtom.type, "cmvd", 4)) || 708 (ntohl (c->cmvdAtom.size) != 709 getAtomSize (&input[pos]) - sizeof (Atom) * 2 - 4)) 710 { 711 return 0; /* dcom must be 12 bytes */ 712 } 713 s = ntohl (c->decompressedSize); 714 if (s > 16 * 1024 * 1024) 715 return 1; /* ignore, too big! */ 716 buf = malloc (s); 717 if (buf == NULL) 718 return 1; /* out of memory, handle gracefully */ 719 720 z_state.next_in = (unsigned char *) &c[1]; 721 z_state.avail_in = ntohl (c->cmvdAtom.size); 722 z_state.avail_out = s; 723 z_state.next_out = (unsigned char *) buf; 724 z_state.zalloc = (alloc_func) 0; 725 z_state.zfree = (free_func) 0; 726 z_state.opaque = (voidpf) 0; 727 z_ret_code = inflateInit (&z_state); 728 if (Z_OK != z_ret_code) 729 { 730 free (buf); 731 return 0; /* crc error? */ 732 } 733 z_ret_code = inflate (&z_state, Z_NO_FLUSH); 734 if ((z_ret_code != Z_OK) && (z_ret_code != Z_STREAM_END)) 735 { 736 free (buf); 737 return 0; /* decode error? */ 738 } 739 z_ret_code = inflateEnd (&z_state); 740 if (Z_OK != z_ret_code) 741 { 742 free (buf); 743 return 0; /* decode error? */ 744 } 745 ret = handleAtom (all_handlers, buf, s, 0, ec); 746 free (buf); 747 return ret; 748 } 749 750 751 typedef struct 752 { 753 short integer; 754 short fraction; 755 } QTFixed; 756 757 typedef struct 758 { 759 Atom hdr; 760 unsigned int flags; /* 1 byte of version, 3 bytes of flags */ 761 /* in seconds since midnight, January 1, 1904 */ 762 unsigned int creationTime; 763 /* in seconds since midnight, January 1, 1904 */ 764 unsigned int modificationTime; 765 unsigned int trackID; 766 unsigned int reserved_0; 767 unsigned int duration; 768 unsigned int reserved_1; 769 unsigned int reserved_2; 770 unsigned short layer; 771 unsigned short alternate_group; 772 unsigned short volume; 773 unsigned short reserved_3; 774 QTFixed matrix[3][3]; 775 /* in pixels */ 776 QTFixed track_width; 777 /* in pixels */ 778 QTFixed track_height; 779 } TrackAtom; 780 781 static int 782 tkhdHandler (const char *input, 783 size_t size, size_t pos, struct ExtractContext *ec) 784 { 785 const TrackAtom *m; 786 char dimensions[40]; 787 788 if (getAtomSize (&input[pos]) < sizeof (TrackAtom)) 789 return 0; 790 m = (const TrackAtom *) &input[pos]; 791 if (ntohs (m->track_width.integer) != 0) 792 { 793 /* if actually a/the video track */ 794 snprintf (dimensions, 795 sizeof(dimensions), 796 "%dx%d", 797 ntohs (m->track_width.integer), 798 ntohs (m->track_height.integer)); 799 addKeyword (EXTRACTOR_METATYPE_IMAGE_DIMENSIONS, dimensions, ec); 800 } 801 return 1; 802 } 803 804 805 static int 806 trakHandler (const char *input, 807 size_t size, size_t pos, struct ExtractContext *ec) 808 { 809 unsigned int hdr = getAtomHeaderSize (&input[pos]); 810 return processAllAtoms (&input[pos + hdr], 811 getAtomSize (&input[pos]) - hdr, ec); 812 } 813 814 815 static int 816 metaHandler (const char *input, 817 size_t size, size_t pos, struct ExtractContext *ec) 818 { 819 unsigned int hdr = getAtomHeaderSize (&input[pos]); 820 if (getAtomSize (&input[pos]) < hdr + 4) 821 return 0; 822 return processAllAtoms (&input[pos + hdr + 4], 823 getAtomSize (&input[pos]) - hdr - 4, ec); 824 } 825 826 827 typedef struct 828 { 829 Atom header; 830 unsigned short length; 831 unsigned short language; 832 } InternationalText; 833 834 /* 835 * see http://developer.apple.com/documentation/QuickTime/QTFF/QTFFChap2/chapter 836 _3_section_2.html 837 * "User Data Text Strings and Language Codes" 838 * TODO: make conformant 839 */ 840 static int 841 processTextTag (const char *input, 842 size_t size, 843 size_t pos, 844 enum EXTRACTOR_MetaType type, struct ExtractContext *ec) 845 { 846 unsigned long long as; 847 unsigned short len; 848 unsigned short lang; 849 const InternationalText *txt; 850 char *meta; 851 int i; 852 853 /* contains "international text": 854 16-bit size + 16 bit language code */ 855 as = getAtomSize (&input[pos]); 856 if (as < sizeof (InternationalText)) 857 return 0; /* invalid */ 858 txt = (const InternationalText *) &input[pos]; 859 len = ntohs (txt->length); 860 if (len + sizeof (InternationalText) > as) 861 return 0; /* invalid */ 862 lang = ntohs (txt->language); 863 if (lang >= sizeof (languages) / sizeof (char *)) 864 return 0; /* invalid */ 865 addKeyword (EXTRACTOR_METATYPE_LANGUAGE, languages[lang], ec); 866 867 meta = malloc (len + 1); 868 if (meta == NULL) 869 return 0; 870 memcpy (meta, &txt[1], len); 871 meta[len] = '\0'; 872 for (i = 0; i < len; i++) 873 if (meta[i] == '\r') 874 meta[i] = '\n'; 875 addKeyword (type, meta, ec); 876 free (meta); 877 return 1; 878 } 879 880 881 static int 882 c_Handler (const char *input, 883 size_t size, size_t pos, struct ExtractContext *ec) 884 { 885 int i; 886 887 i = 0; 888 while ((cHm[i].pfx != NULL) && (0 != memcmp (&input[pos + 5], cHm[i].pfx, 3))) 889 i++; 890 if (cHm[i].pfx != NULL) 891 return processTextTag (input, size, pos, cHm[i].type, ec); 892 return -1; /* not found */ 893 } 894 895 896 static int 897 udtaHandler (const char *input, 898 size_t size, size_t pos, struct ExtractContext *ec) 899 { 900 unsigned int hdr = getAtomHeaderSize (&input[pos]); 901 return processAllAtoms (&input[pos + hdr], 902 getAtomSize (&input[pos]) - hdr, ec); 903 } 904 905 906 static int 907 processDataAtom (const char *input, 908 size_t size, /* parent atom size */ 909 size_t pos, 910 const char *patom, 911 enum EXTRACTOR_MetaType type, 912 struct ExtractContext *ec) 913 { 914 char *meta; 915 unsigned char version; 916 unsigned int flags; 917 unsigned long long asize; 918 unsigned int len; 919 unsigned int hdr; 920 int i; 921 922 hdr = getAtomHeaderSize (&input[pos]); 923 asize = getAtomSize (&input[pos]); 924 if (memcmp (&input[pos + 4], "data", 4) != 0) 925 return -1; 926 927 if ((asize < hdr + 8) || /* header + u32 flags + u32 reserved */ 928 (asize > (getAtomSize (&patom[0]) - 8)) ) 929 return 0; 930 931 len = (unsigned int) (asize - (hdr + 8)); 932 933 version = input[pos + 8]; 934 flags = ((unsigned char) input[pos + 9] << 16) 935 | ((unsigned char) input[pos + 10] << 8) 936 | (unsigned char) input[pos + 11]; 937 #if DEBUG 938 printf ("[data] version:%02x flags:%08x txtlen:%d\n", version, flags, len); 939 #endif 940 941 if (version != 0) 942 return -1; 943 944 if (flags == 0x0) /* binary data */ 945 { 946 if (memcmp (&patom[4], "gnre", 4) == 0) 947 { 948 if (len >= 2) 949 { 950 unsigned short genre = ((unsigned char) input[pos + 16] << 8) 951 | (unsigned char) input[pos + 17]; 952 if ((genre > 0) && (genre < GENRE_NAME_COUNT)) 953 addKeyword (type, genre_names[genre - 1], ec); 954 } 955 return 1; 956 } 957 else if ((memcmp (&patom[4], "trkn", 4) == 0) || 958 (memcmp (&patom[4], "disk", 4) == 0)) 959 { 960 if (len >= 4) 961 { 962 unsigned short n = ((unsigned char) input[pos + 18] << 8) 963 | (unsigned char) input[pos + 19]; 964 char s[8]; 965 snprintf (s, 8, "%d", n); 966 addKeyword (type, s, ec); 967 } 968 } 969 else 970 { 971 return -1; 972 } 973 } 974 else if (flags == 0x1) /* text data */ 975 { 976 meta = malloc (len + 1); 977 if (meta == NULL) 978 return 0; 979 memcpy (meta, &input[pos + 16], len); 980 meta[len] = '\0'; 981 for (i = 0; i < len; i++) 982 if (meta[i] == '\r') 983 meta[i] = '\n'; 984 addKeyword (type, meta, ec); 985 free (meta); 986 return 1; 987 } 988 989 return -1; 990 } 991 992 993 /* NOTE: iTunes tag processing should, in theory, be limited to iTunes 994 * file types (from ftyp), but, in reality, it seems that there are other 995 * files, like 3gpp, out in the wild with iTunes tags. */ 996 static int 997 iTunesTagHandler (const char *input, 998 size_t size, size_t pos, struct ExtractContext *ec) 999 { 1000 unsigned long long asize; 1001 unsigned int hdr; 1002 int i; 1003 1004 hdr = getAtomHeaderSize (&input[pos]); 1005 asize = getAtomSize (&input[pos]); 1006 1007 if (asize < hdr + 8) /* header + at least one atom */ 1008 return 0; 1009 1010 i = 0; 1011 while ((it_to_extr_table[i].atom_type != NULL) && 1012 (0 != memcmp (&input[pos + 4], it_to_extr_table[i].atom_type, 4))) 1013 i++; 1014 if (it_to_extr_table[i].atom_type != NULL) 1015 return processDataAtom (input, asize, pos + hdr, &input[pos], 1016 it_to_extr_table[i].type, ec); 1017 1018 return -1; 1019 } 1020 1021 1022 static int 1023 ilstHandler (const char *input, 1024 size_t size, size_t pos, struct ExtractContext *ec) 1025 { 1026 unsigned int hdr = getAtomHeaderSize (&input[pos]); 1027 return processAtoms (ilst_handlers, &input[pos + hdr], 1028 getAtomSize (&input[pos]) - hdr, ec); 1029 } 1030 1031 1032 static HandlerEntry all_handlers[] = { 1033 {"moov", &moovHandler}, 1034 {"cmov", &cmovHandler}, 1035 {"mvhd", &mvhdHandler}, 1036 {"trak", &trakHandler}, 1037 {"tkhd", &tkhdHandler}, 1038 {"ilst", &ilstHandler}, 1039 {"meta", &metaHandler}, 1040 {"udta", &udtaHandler}, 1041 {"ftyp", &ftypHandler}, 1042 {"\xa9" "swr", &c_Handler}, 1043 {"\xa9" "cpy", &c_Handler}, 1044 {"\xa9" "day", &c_Handler}, 1045 {"\xa9" "dir", &c_Handler}, 1046 {"\xa9" "ed1", &c_Handler}, 1047 {"\xa9" "ed2", &c_Handler}, 1048 {"\xa9" "ed3", &c_Handler}, 1049 {"\xa9" "ed4", &c_Handler}, 1050 {"\xa9" "ed5", &c_Handler}, 1051 {"\xa9" "ed6", &c_Handler}, 1052 {"\xa9" "ed7", &c_Handler}, 1053 {"\xa9" "ed8", &c_Handler}, 1054 {"\xa9" "ed9", &c_Handler}, 1055 {"\xa9" "fmt", &c_Handler}, 1056 {"\xa9" "inf", &c_Handler}, 1057 {"\xa9" "prd", &c_Handler}, 1058 {"\xa9" "prf", &c_Handler}, 1059 {"\xa9" "req", &c_Handler}, 1060 {"\xa9" "src", &c_Handler}, 1061 {"\xa9" "wrt", &c_Handler}, 1062 {"\xa9" "aut", &c_Handler}, 1063 {"\xa9" "hst", &c_Handler}, 1064 {"\xa9" "wrt", &c_Handler}, 1065 {"\xa9" "cmt", &c_Handler}, 1066 {"\xa9" "mak", &c_Handler}, 1067 {"\xa9" "mod", &c_Handler}, 1068 {"\xa9" "nam", &c_Handler}, 1069 {"\xa9" "des", &c_Handler}, 1070 {"\xa9" "PRD", &c_Handler}, 1071 {"\xa9" "wrn", &c_Handler}, 1072 {"\xa9" "chp", &c_Handler}, 1073 /* { "name", &nameHandler }, */ 1074 {NULL, NULL}, 1075 }; 1076 1077 static HandlerEntry ilst_handlers[] = { 1078 {"\xa9" "alb", &iTunesTagHandler}, 1079 {"\xa9" "ART", &iTunesTagHandler}, 1080 {"aART", &iTunesTagHandler}, 1081 {"\xa9" "cmt", &iTunesTagHandler}, 1082 {"\xa9" "day", &iTunesTagHandler}, 1083 {"\xa9" "nam", &iTunesTagHandler}, 1084 {"\xa9" "gen", &iTunesTagHandler}, 1085 {"gnre", &iTunesTagHandler}, 1086 {"trkn", &iTunesTagHandler}, 1087 {"disk", &iTunesTagHandler}, 1088 {"\xa9" "wrt", &iTunesTagHandler}, 1089 {"\xa9" "too", &iTunesTagHandler}, 1090 {"tmpo", &iTunesTagHandler}, 1091 {"cprt", &iTunesTagHandler}, 1092 {"cpil", &iTunesTagHandler}, 1093 {"covr", &iTunesTagHandler}, 1094 {"rtng", &iTunesTagHandler}, 1095 {"\xa9" "grp", &iTunesTagHandler}, 1096 {"stik", &iTunesTagHandler}, 1097 {"pcst", &iTunesTagHandler}, 1098 {"catg", &iTunesTagHandler}, 1099 {"keyw", &iTunesTagHandler}, 1100 {"purl", &iTunesTagHandler}, 1101 {"egid", &iTunesTagHandler}, 1102 {"desc", &iTunesTagHandler}, 1103 {"\xa9" "lyr", &iTunesTagHandler}, 1104 {"tvnn", &iTunesTagHandler}, 1105 {"tvsh", &iTunesTagHandler}, 1106 {"tven", &iTunesTagHandler}, 1107 {"tvsn", &iTunesTagHandler}, 1108 {"tves", &iTunesTagHandler}, 1109 {"purd", &iTunesTagHandler}, 1110 {"pgap", &iTunesTagHandler}, 1111 {NULL, NULL}, 1112 }; 1113 1114 /** 1115 * Call the handler for the atom at the given position. 1116 * @return 0 on error, 1 for success, -1 for unknown atom type 1117 */ 1118 static int 1119 handleAtom (HandlerEntry *handlers, const char *input, 1120 size_t size, size_t pos, struct ExtractContext *ec) 1121 { 1122 int i; 1123 if (0 == checkAtomValid (input, size, pos)) 1124 { 1125 return 0; 1126 } 1127 i = 0; 1128 while ((handlers[i].name != NULL) && 1129 (0 != memcmp (&input[pos + 4], handlers[i].name, 4))) 1130 i++; 1131 if (handlers[i].name == NULL) 1132 { 1133 #if DEBUG 1134 char b[5]; 1135 memcpy (b, &input[pos + 4], 4); 1136 b[4] = '\0'; 1137 printf ("No handler for `%s'\n", b); 1138 #endif 1139 return -1; 1140 } 1141 i = handlers[i].handler (input, size, pos, ec); 1142 #if DEBUG 1143 printf ("Running handler for `%4s' at %u completed with result %d\n", 1144 &input[pos + 4], pos, i); 1145 #endif 1146 return i; 1147 } 1148 1149 1150 /* mimetypes: 1151 video/quicktime: mov,qt: Quicktime animation; 1152 video/x-quicktime: mov,qt: Quicktime animation; 1153 application/x-quicktimeplayer: qtl: Quicktime list; 1154 */ 1155 1156 int 1157 EXTRACTOR_qt_extract (const char *data, 1158 size_t size, 1159 EXTRACTOR_MetaDataProcessor proc, 1160 void *proc_cls, 1161 const char *options) 1162 { 1163 struct ExtractContext ec; 1164 ec.proc = proc; 1165 ec.proc_cls = proc_cls; 1166 ec.ret = 0; 1167 processAllAtoms (data, size, &ec); 1168 return ec.ret; 1169 } 1170 1171 1172 /* end of qt_extractor.c */