qt_extractor.c (39859B)
1 /* 2 This file is part of libextractor. 3 Copyright (C) 2002, 2003, 2006, 2012, 2026 Vidyut Samanta and Christian Grothoff 4 5 libextractor is free software; you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published 7 by the Free Software Foundation; either version 3, or (at your 8 option) any later version. 9 10 libextractor is distributed in the hope that it will be useful, but 11 WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 General Public License for more details. 14 15 You should have received a copy of the GNU General Public License 16 along with libextractor; see the file COPYING. If not, write to the 17 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 18 Boston, MA 02110-1301, USA. 19 */ 20 /** 21 * @file plugins/qt_extractor.c 22 * @brief plugin to support QuickTime, MP4, M4A and 3GPP files 23 * @author Vidyut Samanta 24 * @author Christian Grothoff 25 * 26 * This plugin parses the ISO base media / QuickTime "atom" (box) tree. 27 * It does not link against any third-party demuxer: the metadata-bearing 28 * atoms ('ftyp', 'moov' and the boxes nested inside it) are tiny compared 29 * to the media payload ('mdat'), so the plugin streams the top-level 30 * boxes via the extraction context and only ever pulls the small 31 * metadata containers into memory before walking them recursively. 32 */ 33 #include "platform.h" 34 #include "extractor.h" 35 #include <zlib.h> 36 #include <stdint.h> 37 #include <stdbool.h> 38 39 /** 40 * Maximum size (in bytes) of a single top-level atom that we are willing 41 * to pull into memory. 'moov' is always far smaller than this in 42 * practice; the cap merely protects us against hostile or corrupt files. 43 */ 44 #define MAX_ATOM_SIZE (64 * 1024 * 1024) 45 46 /** 47 * Maximum size of a (decompressed) compressed-movie 'cmov' atom. 48 */ 49 #define MAX_CMOV_SIZE (16 * 1024 * 1024) 50 51 /** 52 * Maximum atom nesting depth we are willing to recurse into. Real files 53 * stay well below ten; the limit guards against stack exhaustion from 54 * maliciously deeply nested boxes. 55 */ 56 #define MAX_ATOM_DEPTH 32 57 58 59 /* verbatim from mp3extractor */ 60 static const char *const genre_names[] = { 61 gettext_noop ("Blues"), 62 gettext_noop ("Classic Rock"), 63 gettext_noop ("Country"), 64 gettext_noop ("Dance"), 65 gettext_noop ("Disco"), 66 gettext_noop ("Funk"), 67 gettext_noop ("Grunge"), 68 gettext_noop ("Hip-Hop"), 69 gettext_noop ("Jazz"), 70 gettext_noop ("Metal"), 71 gettext_noop ("New Age"), 72 gettext_noop ("Oldies"), 73 gettext_noop ("Other"), 74 gettext_noop ("Pop"), 75 gettext_noop ("R&B"), 76 gettext_noop ("Rap"), 77 gettext_noop ("Reggae"), 78 gettext_noop ("Rock"), 79 gettext_noop ("Techno"), 80 gettext_noop ("Industrial"), 81 gettext_noop ("Alternative"), 82 gettext_noop ("Ska"), 83 gettext_noop ("Death Metal"), 84 gettext_noop ("Pranks"), 85 gettext_noop ("Soundtrack"), 86 gettext_noop ("Euro-Techno"), 87 gettext_noop ("Ambient"), 88 gettext_noop ("Trip-Hop"), 89 gettext_noop ("Vocal"), 90 gettext_noop ("Jazz+Funk"), 91 gettext_noop ("Fusion"), 92 gettext_noop ("Trance"), 93 gettext_noop ("Classical"), 94 gettext_noop ("Instrumental"), 95 gettext_noop ("Acid"), 96 gettext_noop ("House"), 97 gettext_noop ("Game"), 98 gettext_noop ("Sound Clip"), 99 gettext_noop ("Gospel"), 100 gettext_noop ("Noise"), 101 gettext_noop ("Alt. Rock"), 102 gettext_noop ("Bass"), 103 gettext_noop ("Soul"), 104 gettext_noop ("Punk"), 105 gettext_noop ("Space"), 106 gettext_noop ("Meditative"), 107 gettext_noop ("Instrumental Pop"), 108 gettext_noop ("Instrumental Rock"), 109 gettext_noop ("Ethnic"), 110 gettext_noop ("Gothic"), 111 gettext_noop ("Darkwave"), 112 gettext_noop ("Techno-Industrial"), 113 gettext_noop ("Electronic"), 114 gettext_noop ("Pop-Folk"), 115 gettext_noop ("Eurodance"), 116 gettext_noop ("Dream"), 117 gettext_noop ("Southern Rock"), 118 gettext_noop ("Comedy"), 119 gettext_noop ("Cult"), 120 gettext_noop ("Gangsta Rap"), 121 gettext_noop ("Top 40"), 122 gettext_noop ("Christian Rap"), 123 gettext_noop ("Pop/Funk"), 124 gettext_noop ("Jungle"), 125 gettext_noop ("Native American"), 126 gettext_noop ("Cabaret"), 127 gettext_noop ("New Wave"), 128 gettext_noop ("Psychedelic"), 129 gettext_noop ("Rave"), 130 gettext_noop ("Showtunes"), 131 gettext_noop ("Trailer"), 132 gettext_noop ("Lo-Fi"), 133 gettext_noop ("Tribal"), 134 gettext_noop ("Acid Punk"), 135 gettext_noop ("Acid Jazz"), 136 gettext_noop ("Polka"), 137 gettext_noop ("Retro"), 138 gettext_noop ("Musical"), 139 gettext_noop ("Rock & Roll"), 140 gettext_noop ("Hard Rock"), 141 gettext_noop ("Folk"), 142 gettext_noop ("Folk/Rock"), 143 gettext_noop ("National Folk"), 144 gettext_noop ("Swing"), 145 gettext_noop ("Fast-Fusion"), 146 gettext_noop ("Bebob"), 147 gettext_noop ("Latin"), 148 gettext_noop ("Revival"), 149 gettext_noop ("Celtic"), 150 gettext_noop ("Bluegrass"), 151 gettext_noop ("Avantgarde"), 152 gettext_noop ("Gothic Rock"), 153 gettext_noop ("Progressive Rock"), 154 gettext_noop ("Psychedelic Rock"), 155 gettext_noop ("Symphonic Rock"), 156 gettext_noop ("Slow Rock"), 157 gettext_noop ("Big Band"), 158 gettext_noop ("Chorus"), 159 gettext_noop ("Easy Listening"), 160 gettext_noop ("Acoustic"), 161 gettext_noop ("Humour"), 162 gettext_noop ("Speech"), 163 gettext_noop ("Chanson"), 164 gettext_noop ("Opera"), 165 gettext_noop ("Chamber Music"), 166 gettext_noop ("Sonata"), 167 gettext_noop ("Symphony"), 168 gettext_noop ("Booty Bass"), 169 gettext_noop ("Primus"), 170 gettext_noop ("Porn Groove"), 171 gettext_noop ("Satire"), 172 gettext_noop ("Slow Jam"), 173 gettext_noop ("Club"), 174 gettext_noop ("Tango"), 175 gettext_noop ("Samba"), 176 gettext_noop ("Folklore"), 177 gettext_noop ("Ballad"), 178 gettext_noop ("Power Ballad"), 179 gettext_noop ("Rhythmic Soul"), 180 gettext_noop ("Freestyle"), 181 gettext_noop ("Duet"), 182 gettext_noop ("Punk Rock"), 183 gettext_noop ("Drum Solo"), 184 gettext_noop ("A Cappella"), 185 gettext_noop ("Euro-House"), 186 gettext_noop ("Dance Hall"), 187 gettext_noop ("Goa"), 188 gettext_noop ("Drum & Bass"), 189 gettext_noop ("Club-House"), 190 gettext_noop ("Hardcore"), 191 gettext_noop ("Terror"), 192 gettext_noop ("Indie"), 193 gettext_noop ("BritPop"), 194 gettext_noop ("Negerpunk"), 195 gettext_noop ("Polsk Punk"), 196 gettext_noop ("Beat"), 197 gettext_noop ("Christian Gangsta Rap"), 198 gettext_noop ("Heavy Metal"), 199 gettext_noop ("Black Metal"), 200 gettext_noop ("Crossover"), 201 gettext_noop ("Contemporary Christian"), 202 gettext_noop ("Christian Rock"), 203 gettext_noop ("Merengue"), 204 gettext_noop ("Salsa"), 205 gettext_noop ("Thrash Metal"), 206 gettext_noop ("Anime"), 207 gettext_noop ("JPop"), 208 gettext_noop ("Synthpop"), 209 }; 210 211 #define GENRE_NAME_COUNT \ 212 ((unsigned int) (sizeof genre_names / sizeof (const char *const))) 213 214 215 static const char *languages[] = { 216 "English", 217 "French", 218 "German", 219 "Italian", 220 "Dutch", 221 "Swedish", 222 "Spanish", 223 "Danish", 224 "Portuguese", 225 "Norwegian", 226 "Hebrew", 227 "Japanese", 228 "Arabic", 229 "Finnish", 230 "Greek", 231 "Icelandic", 232 "Maltese", 233 "Turkish", 234 "Croatian", 235 "Traditional Chinese", 236 "Urdu", 237 "Hindi", 238 "Thai", 239 "Korean", 240 "Lithuanian", 241 "Polish", 242 "Hungarian", 243 "Estonian", 244 "Lettish", 245 "Saamisk", 246 "Lappish", 247 "Faeroese", 248 "Farsi", 249 "Russian", 250 "Simplified Chinese", 251 "Flemish", 252 "Irish", 253 "Albanian", 254 "Romanian", 255 "Czech", 256 "Slovak", 257 "Slovenian", 258 "Yiddish", 259 "Serbian", 260 "Macedonian", 261 "Bulgarian", 262 "Ukrainian", 263 "Byelorussian", 264 "Uzbek", 265 "Kazakh", 266 "Azerbaijani", 267 "AzerbaijanAr", 268 "Armenian", 269 "Georgian", 270 "Moldavian", 271 "Kirghiz", 272 "Tajiki", 273 "Turkmen", 274 "Mongolian", 275 "MongolianCyr", 276 "Pashto", 277 "Kurdish", 278 "Kashmiri", 279 "Sindhi", 280 "Tibetan", 281 "Nepali", 282 "Sanskrit", 283 "Marathi", 284 "Bengali", 285 "Assamese", 286 "Gujarati", 287 "Punjabi", 288 "Oriya", 289 "Malayalam", 290 "Kannada", 291 "Tamil", 292 "Telugu", 293 "Sinhalese", 294 "Burmese", 295 "Khmer", 296 "Lao", 297 "Vietnamese", 298 "Indonesian", 299 "Tagalog", 300 "MalayRoman", 301 "MalayArabic", 302 "Amharic", 303 "Tigrinya", 304 "Galla", 305 "Oromo", 306 "Somali", 307 "Swahili", 308 "Ruanda", 309 "Rundi", 310 "Chewa", 311 "Malagasy", 312 "Esperanto", 313 "Welsh", 314 "Basque", 315 "Catalan", 316 "Latin", 317 "Quechua", 318 "Guarani", 319 "Aymara", 320 "Tatar", 321 "Uighur", 322 "Dzongkha", 323 "JavaneseRom", 324 }; 325 326 327 typedef struct 328 { 329 const char *ext; 330 const char *mime; 331 } C2M; 332 333 /* see http://www.mp4ra.org/filetype.html 334 * http://www.ftyps.com/ */ 335 static C2M ftMap[] = { 336 {"qt ", "video/quicktime"}, 337 {"isom", "video/mp4"}, /* ISO Base Media files */ 338 {"iso2", "video/mp4"}, 339 {"iso4", "video/mp4"}, 340 {"iso5", "video/mp4"}, 341 {"iso6", "video/mp4"}, 342 {"avc1", "video/mp4"}, 343 {"mp41", "video/mp4"}, /* MPEG-4 (ISO/IEC 14491-1) version 1 */ 344 {"mp42", "video/mp4"}, /* MPEG-4 (ISO/IEC 14491-1) version 2 */ 345 {"mp71", "video/mp4"}, /* MPEG-4 with MPEG-7 metadata */ 346 {"dash", "video/mp4"}, /* MPEG-DASH */ 347 {"3gp1", "video/3gpp"}, 348 {"3gp2", "video/3gpp"}, 349 {"3gp3", "video/3gpp"}, 350 {"3gp4", "video/3gpp"}, 351 {"3gp5", "video/3gpp"}, 352 {"3gp6", "video/3gpp"}, 353 {"3gp7", "video/3gpp"}, 354 {"3g2a", "video/3gpp2"}, 355 {"3g2b", "video/3gpp2"}, 356 {"3g2c", "video/3gpp2"}, 357 {"mmp4", "video/mp4"}, /* Mobile MPEG-4 */ 358 {"M4A ", "audio/mp4"}, 359 {"M4B ", "audio/mp4"}, /* Apple audio book */ 360 {"M4P ", "audio/mp4"}, 361 {"M4V ", "video/mp4"}, 362 {"M4VH", "video/mp4"}, 363 {"M4VP", "video/mp4"}, 364 {"f4v ", "video/mp4"}, /* Adobe Flash MP4 video */ 365 {"f4a ", "audio/mp4"}, 366 {"f4b ", "audio/mp4"}, 367 {"qt ", "video/quicktime"}, 368 {"mj2s", "video/mj2"}, /* Motion JPEG 2000 */ 369 {"mjp2", "video/mj2"}, 370 {NULL, NULL}, 371 }; 372 373 typedef struct CHE 374 { 375 const char *pfx; 376 enum EXTRACTOR_MetaType type; 377 } CHE; 378 379 static CHE cHm[] = { 380 {"aut", EXTRACTOR_METATYPE_AUTHOR_NAME}, 381 {"cpy", EXTRACTOR_METATYPE_COPYRIGHT}, 382 {"day", EXTRACTOR_METATYPE_CREATION_DATE}, 383 {"ed1", EXTRACTOR_METATYPE_MODIFICATION_DATE}, 384 {"ed2", EXTRACTOR_METATYPE_MODIFICATION_DATE}, 385 {"ed3", EXTRACTOR_METATYPE_MODIFICATION_DATE}, 386 {"ed4", EXTRACTOR_METATYPE_MODIFICATION_DATE}, 387 {"ed5", EXTRACTOR_METATYPE_MODIFICATION_DATE}, 388 {"ed6", EXTRACTOR_METATYPE_MODIFICATION_DATE}, 389 {"ed7", EXTRACTOR_METATYPE_MODIFICATION_DATE}, 390 {"ed8", EXTRACTOR_METATYPE_MODIFICATION_DATE}, 391 {"ed9", EXTRACTOR_METATYPE_MODIFICATION_DATE}, 392 {"cmt", EXTRACTOR_METATYPE_COMMENT}, 393 {"url", EXTRACTOR_METATYPE_URL}, 394 {"enc", EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE}, 395 {"hst", EXTRACTOR_METATYPE_BUILDHOST}, 396 {"nam", EXTRACTOR_METATYPE_TITLE}, 397 {"gen", EXTRACTOR_METATYPE_GENRE}, 398 {"mak", EXTRACTOR_METATYPE_CAMERA_MAKE}, 399 {"mod", EXTRACTOR_METATYPE_CAMERA_MODEL}, 400 {"des", EXTRACTOR_METATYPE_DESCRIPTION}, 401 {"dis", EXTRACTOR_METATYPE_DISCLAIMER}, 402 {"dir", EXTRACTOR_METATYPE_MOVIE_DIRECTOR}, 403 {"src", EXTRACTOR_METATYPE_CONTRIBUTOR_NAME}, 404 {"prf", EXTRACTOR_METATYPE_PERFORMER }, 405 {"prd", EXTRACTOR_METATYPE_PRODUCER}, 406 {"PRD", EXTRACTOR_METATYPE_PRODUCT_VERSION}, 407 {"swr", EXTRACTOR_METATYPE_PRODUCED_BY_SOFTWARE}, 408 {"isr", EXTRACTOR_METATYPE_ISRC}, 409 {"wrt", EXTRACTOR_METATYPE_WRITER}, 410 {"wrn", EXTRACTOR_METATYPE_WARNING}, 411 {"chp", EXTRACTOR_METATYPE_CHAPTER_NAME}, 412 {"inf", EXTRACTOR_METATYPE_DESCRIPTION}, 413 {"req", EXTRACTOR_METATYPE_TARGET_PLATFORM}, /* hardware requirements */ 414 {"fmt", EXTRACTOR_METATYPE_FORMAT}, 415 {"alb", EXTRACTOR_METATYPE_ALBUM}, 416 {"ART", EXTRACTOR_METATYPE_ARTIST}, 417 {"art", EXTRACTOR_METATYPE_ARTIST}, 418 {"too", EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE}, 419 {"grp", EXTRACTOR_METATYPE_GROUP}, 420 {"lyr", EXTRACTOR_METATYPE_LYRICS}, 421 {"st3", EXTRACTOR_METATYPE_SUBTITLE}, 422 {NULL, EXTRACTOR_METATYPE_RESERVED }, 423 }; 424 425 426 typedef struct 427 { 428 const char *atom_type; 429 enum EXTRACTOR_MetaType type; 430 } ITTagConversionEntry; 431 432 /* iTunes / "ilst" tags: 433 * see http://atomicparsley.sourceforge.net/mpeg-4files.html 434 * 435 * The first byte of the four-character key is the (C) / 0xa9 sign for 436 * the "user" tags; we keep it spelled out here so the table can be 437 * memcmp()ed against the raw atom name. */ 438 static ITTagConversionEntry it_to_extr_table[] = { 439 {"\xa9" "alb", EXTRACTOR_METATYPE_ALBUM}, 440 {"\xa9" "ART", EXTRACTOR_METATYPE_ARTIST}, 441 {"\xa9" "art", EXTRACTOR_METATYPE_ARTIST}, 442 {"aART", EXTRACTOR_METATYPE_ARTIST}, /* album artist */ 443 {"\xa9" "cmt", EXTRACTOR_METATYPE_COMMENT}, 444 {"\xa9" "day", EXTRACTOR_METATYPE_UNKNOWN_DATE}, 445 {"\xa9" "nam", EXTRACTOR_METATYPE_TITLE}, 446 {"\xa9" "trk", EXTRACTOR_METATYPE_TRACK_NUMBER}, 447 {"trkn", EXTRACTOR_METATYPE_TRACK_NUMBER}, 448 {"\xa9" "dis", EXTRACTOR_METATYPE_DISC_NUMBER}, 449 {"disk", EXTRACTOR_METATYPE_DISC_NUMBER}, 450 {"\xa9" "gen", EXTRACTOR_METATYPE_GENRE}, 451 {"gnre", EXTRACTOR_METATYPE_GENRE}, 452 {"\xa9" "wrt", EXTRACTOR_METATYPE_COMPOSER}, 453 {"\xa9" "com", EXTRACTOR_METATYPE_COMPOSER}, 454 {"\xa9" "too", EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE}, 455 {"\xa9" "enc", EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE}, 456 {"cprt", EXTRACTOR_METATYPE_COPYRIGHT}, 457 {"\xa9" "cpy", EXTRACTOR_METATYPE_COPYRIGHT}, 458 {"\xa9" "grp", EXTRACTOR_METATYPE_GROUP}, 459 {"\xa9" "lyr", EXTRACTOR_METATYPE_LYRICS}, 460 {"\xa9" "st3", EXTRACTOR_METATYPE_SUBTITLE}, 461 {"\xa9" "url", EXTRACTOR_METATYPE_URL}, 462 {"\xa9" "prd", EXTRACTOR_METATYPE_PRODUCER}, 463 {"\xa9" "dir", EXTRACTOR_METATYPE_MOVIE_DIRECTOR}, 464 {"\xa9" "prf", EXTRACTOR_METATYPE_PERFORMER}, 465 {"\xa9" "swr", EXTRACTOR_METATYPE_PRODUCED_BY_SOFTWARE}, 466 {"\xa9" "fmt", EXTRACTOR_METATYPE_FORMAT}, 467 {"\xa9" "inf", EXTRACTOR_METATYPE_DESCRIPTION}, 468 {"tmpo", EXTRACTOR_METATYPE_BEATS_PER_MINUTE}, 469 {"catg", EXTRACTOR_METATYPE_SECTION}, 470 {"keyw", EXTRACTOR_METATYPE_KEYWORDS}, 471 {"desc", EXTRACTOR_METATYPE_DESCRIPTION}, 472 {"ldes", EXTRACTOR_METATYPE_DESCRIPTION}, /* long description */ 473 {"tvnn", EXTRACTOR_METATYPE_NETWORK_NAME}, 474 {"tvsh", EXTRACTOR_METATYPE_SHOW_NAME}, 475 {"tvsn", EXTRACTOR_METATYPE_SHOW_SEASON_NUMBER}, 476 {"tves", EXTRACTOR_METATYPE_SHOW_EPISODE_NUMBER}, 477 {"purd", EXTRACTOR_METATYPE_UNKNOWN_DATE}, /* purchase date */ 478 {"covr", EXTRACTOR_METATYPE_COVER_PICTURE}, 479 {NULL, EXTRACTOR_METATYPE_RESERVED} 480 }; 481 482 483 struct Atom 484 { 485 uint32_t size; 486 uint32_t type; 487 }; 488 489 490 struct LongAtom 491 { 492 uint32_t one; 493 uint32_t type; 494 uint64_t size; 495 }; 496 497 498 static uint64_t 499 ntohll (uint64_t n) 500 { 501 #if __BYTE_ORDER == __BIG_ENDIAN 502 return n; 503 #else 504 return (((uint64_t) ntohl (n)) << 32) + ntohl (n >> 32); 505 #endif 506 } 507 508 509 /** 510 * Check if at position pos there is a valid atom. 511 * @return false if the atom is invalid, true if it is valid 512 */ 513 static bool 514 checkAtomValid (const char *buffer, 515 size_t size, 516 size_t pos) 517 { 518 unsigned long long atomSize; 519 const struct Atom *atom; 520 const struct LongAtom *latom; 521 522 if ( (pos >= size) || 523 (pos + sizeof (struct Atom) > size) || 524 (pos + sizeof (struct Atom) < pos) ) 525 return false; 526 atom = (const struct Atom *) &buffer[pos]; 527 if (ntohl (atom->size) == 1) 528 { 529 if ( (pos + sizeof (struct LongAtom) > size) || 530 (pos + sizeof (struct LongAtom) < pos) ) 531 return false; 532 latom = (const struct LongAtom *) &buffer[pos]; 533 atomSize = ntohll (latom->size); 534 if ((atomSize < sizeof (struct LongAtom)) || 535 (atomSize + pos > size) || (atomSize + pos < atomSize)) 536 return false; 537 } 538 else 539 { 540 atomSize = ntohl (atom->size); 541 if ((atomSize < sizeof (struct Atom)) || 542 (atomSize + pos > size) || (atomSize + pos < atomSize)) 543 return false; 544 } 545 return true; 546 } 547 548 549 /** 550 * Assumes that checkAtomValid has already been called. 551 */ 552 static uint64_t 553 getAtomSize (const char *buf) 554 { 555 const struct Atom *atom; 556 const struct LongAtom *latom; 557 558 atom = (const struct Atom *) buf; 559 if (ntohl (atom->size) == 1) 560 { 561 latom = (const struct LongAtom *) buf; 562 return ntohll (latom->size); 563 } 564 return ntohl (atom->size); 565 } 566 567 568 /** 569 * Assumes that checkAtomValid has already been called. 570 */ 571 static size_t 572 getAtomHeaderSize (const char *buf) 573 { 574 const struct Atom *atom; 575 576 atom = (const struct Atom *) buf; 577 if (ntohl (atom->size) == 1) 578 return sizeof (const struct LongAtom); 579 return sizeof (struct Atom); 580 } 581 582 583 /** 584 * State carried through the recursive atom walk. 585 */ 586 struct ExtractContext 587 { 588 /** 589 * The libextractor processing callback. 590 */ 591 EXTRACTOR_MetaDataProcessor proc; 592 593 /** 594 * Closure for @e proc. 595 */ 596 void *proc_cls; 597 598 /** 599 * Set to non-zero once @e proc asked us to stop. 600 */ 601 int ret; 602 603 /** 604 * Current atom nesting depth (for recursion limiting). 605 */ 606 unsigned int depth; 607 }; 608 609 610 static void 611 addKeyword (enum EXTRACTOR_MetaType type, 612 const char *str, 613 struct ExtractContext *ec) 614 { 615 if (ec->ret != 0) 616 return; 617 ec->ret = ec->proc (ec->proc_cls, 618 "qt", 619 type, 620 EXTRACTOR_METAFORMAT_UTF8, 621 "text/plain", 622 str, 623 strlen (str) + 1); 624 } 625 626 627 static void 628 addBinary (enum EXTRACTOR_MetaType type, 629 const char *mime, 630 const void *data, 631 size_t data_len, 632 struct ExtractContext *ec) 633 { 634 if (ec->ret != 0) 635 return; 636 ec->ret = ec->proc (ec->proc_cls, 637 "qt", 638 type, 639 EXTRACTOR_METAFORMAT_BINARY, 640 mime, 641 data, 642 data_len); 643 } 644 645 646 /** 647 * Assumes that checkAtomValid has already been called. 648 * 649 * @return 0 on a fatal error (stop the current level), 650 * 1 for success, -1 for "atom not understood, skip it" 651 */ 652 typedef int 653 (*AtomHandler) (const char *input, 654 size_t size, 655 size_t pos, 656 struct ExtractContext *ec); 657 658 struct HandlerEntry 659 { 660 const char *name; 661 AtomHandler handler; 662 }; 663 664 665 /** 666 * Call the handler for the atom at the given position. 667 * Will check validity of the given atom. 668 * 669 * @return 0 on error, 1 for success, -1 for unknown atom type 670 */ 671 static int 672 handleAtom (struct HandlerEntry *handlers, 673 const char *input, 674 size_t size, 675 size_t pos, 676 struct ExtractContext *ec); 677 678 static struct HandlerEntry all_handlers[]; 679 680 /** 681 * Process atoms. 682 * @return 0 on error, 1 for success, -1 for unknown atom type 683 */ 684 static int 685 processAtoms (struct HandlerEntry *handlers, 686 const char *input, 687 size_t size, 688 struct ExtractContext *ec) 689 { 690 size_t pos; 691 692 if (size < sizeof (struct Atom)) 693 return 1; 694 if (ec->depth >= MAX_ATOM_DEPTH) 695 return 1; 696 ec->depth++; 697 pos = 0; 698 while (pos < size - sizeof (struct Atom)) 699 { 700 if (0 == handleAtom (handlers, 701 input, 702 size, 703 pos, 704 ec)) 705 { 706 ec->depth--; 707 return 0; 708 } 709 if (0 != ec->ret) 710 break; /* processor asked us to stop */ 711 pos += getAtomSize (&input[pos]); 712 } 713 ec->depth--; 714 return 1; 715 } 716 717 718 /** 719 * Process all atoms. 720 * @return 0 on error, 1 for success, -1 for unknown atom type 721 */ 722 static int 723 processAllAtoms (const char *input, 724 size_t size, 725 struct ExtractContext *ec) 726 { 727 return processAtoms (all_handlers, 728 input, 729 size, 730 ec); 731 } 732 733 734 /** 735 * Handle the moov atom. 736 * @return 0 on error, 1 for success, -1 for unknown atom type 737 */ 738 static int 739 moovHandler (const char *input, 740 size_t size, 741 size_t pos, 742 struct ExtractContext *ec) 743 { 744 uint32_t hdr = getAtomHeaderSize (&input[pos]); 745 746 return processAllAtoms (&input[pos + hdr], 747 getAtomSize (&input[pos]) - hdr, 748 ec); 749 } 750 751 752 /* see http://developer.apple.com/documentation/QuickTime/QTFF/QTFFChap1/chapter_2_section_5.html */ 753 struct FileType 754 { 755 struct Atom header; 756 /* major brand */ 757 char type[4]; 758 /* minor version */ 759 unsigned int version; 760 /* compatible brands */ 761 char compatibility[4]; 762 }; 763 764 765 static int 766 ftypHandler (const char *input, 767 size_t size, 768 size_t pos, 769 struct ExtractContext *ec) 770 { 771 const struct FileType *ft; 772 773 if (getAtomSize (&input[pos]) < sizeof (struct FileType)) 774 return -1; 775 ft = (const struct FileType *) &input[pos]; 776 777 for (unsigned i = 0; 778 NULL != ftMap[i].ext; 779 i++) 780 { 781 if (0 != memcmp (ft->type, 782 ftMap[i].ext, 783 4)) 784 { 785 addKeyword (EXTRACTOR_METATYPE_MIMETYPE, 786 ftMap[i].mime, 787 ec); 788 break; 789 } 790 } 791 return 1; 792 } 793 794 795 /** 796 * Handle the movie header ('mvhd') atom, reporting the movie duration. 797 * Supports both the 32-bit (version 0) and 64-bit (version 1) layouts. 798 * 799 * @return 1 for success, -1 if the atom could not be parsed 800 */ 801 static int 802 mvhdHandler (const char *input, 803 size_t size, 804 size_t pos, 805 struct ExtractContext *ec) 806 { 807 uint64_t asize = getAtomSize (&input[pos]); 808 uint32_t hdr = getAtomHeaderSize (&input[pos]); 809 const unsigned char *body; 810 unsigned char version; 811 uint64_t timeScale; 812 uint64_t duration; 813 char dur[24]; 814 815 if (asize < hdr + 4) 816 return -1; 817 body = (const unsigned char *) &input[pos + hdr]; 818 version = body[0]; 819 if (0 == version) 820 { 821 /* version(1) flags(3) creation(4) modification(4) 822 timeScale(4) duration(4) ... */ 823 if (asize < hdr + 20) 824 return -1; 825 timeScale = ntohl (*(const uint32_t *) &body[12]); 826 duration = ntohl (*(const uint32_t *) &body[16]); 827 } 828 else if (1 == version) 829 { 830 /* version(1) flags(3) creation(8) modification(8) 831 timeScale(4) duration(8) ... */ 832 if (asize < hdr + 32) 833 return -1; 834 timeScale = ntohl (*(const uint32_t *) &body[20]); 835 duration = ntohll (*(const uint64_t *) &body[24]); 836 } 837 else 838 { 839 return -1; 840 } 841 if (0 == timeScale) 842 return -1; 843 snprintf (dur, 844 sizeof (dur), 845 "%llus", 846 (unsigned long long) (duration / timeScale)); 847 addKeyword (EXTRACTOR_METATYPE_DURATION, 848 dur, 849 ec); 850 return 1; 851 } 852 853 854 struct CompressedMovieHeaderAtom 855 { 856 struct Atom cmovAtom; 857 struct Atom dcomAtom; 858 char compressor[4]; 859 struct Atom cmvdAtom; 860 uint32_t decompressedSize; 861 }; 862 863 864 static int 865 cmovHandler (const char *input, 866 size_t size, 867 size_t pos, 868 struct ExtractContext *ec) 869 { 870 const struct CompressedMovieHeaderAtom *c; 871 unsigned int s; 872 char *buf; 873 int ret; 874 z_stream z_state; 875 int z_ret_code; 876 877 if (getAtomSize (&input[pos]) < sizeof (struct CompressedMovieHeaderAtom)) 878 return -1; 879 c = (const struct CompressedMovieHeaderAtom *) &input[pos]; 880 if ((ntohl (c->dcomAtom.size) != 12) || 881 (0 != memcmp (&c->dcomAtom.type, "dcom", 4)) || 882 (0 != memcmp (c->compressor, "zlib", 4)) || 883 (0 != memcmp (&c->cmvdAtom.type, "cmvd", 4)) || 884 (ntohl (c->cmvdAtom.size) != 885 getAtomSize (&input[pos]) - sizeof (struct Atom) * 2 - 4)) 886 { 887 return -1; /* dcom must be 12 bytes */ 888 } 889 s = ntohl (c->decompressedSize); 890 if (s > MAX_CMOV_SIZE) 891 return -1; /* ignore, too big! */ 892 buf = malloc (s); 893 if (buf == NULL) 894 return -1; /* out of memory, handle gracefully */ 895 896 memset (&z_state, 0, sizeof (z_state)); 897 z_state.next_in = (unsigned char *) &c[1]; 898 z_state.avail_in = ntohl (c->cmvdAtom.size); 899 z_state.avail_out = s; 900 z_state.next_out = (unsigned char *) buf; 901 z_state.zalloc = (alloc_func) 0; 902 z_state.zfree = (free_func) 0; 903 z_state.opaque = (voidpf) 0; 904 z_ret_code = inflateInit (&z_state); 905 if (Z_OK != z_ret_code) 906 { 907 free (buf); 908 return -1; /* crc error? */ 909 } 910 z_ret_code = inflate (&z_state, 911 Z_NO_FLUSH); 912 if ( (z_ret_code != Z_OK) && 913 (z_ret_code != Z_STREAM_END) ) 914 { 915 inflateEnd (&z_state); 916 free (buf); 917 return -1; /* decode error? */ 918 } 919 z_ret_code = inflateEnd (&z_state); 920 if (Z_OK != z_ret_code) 921 { 922 free (buf); 923 return -1; /* decode error? */ 924 } 925 ret = handleAtom (all_handlers, 926 buf, 927 s, 928 0, 929 ec); 930 free (buf); 931 return ret; 932 } 933 934 935 /** 936 * Handle the track header ('tkhd') atom. The (fixed-point) track 937 * width and height are the final eight bytes of the atom regardless of 938 * the atom's version, so we read them relative to the end of the box. 939 * 940 * @return 1 for success, -1 if the atom could not be parsed 941 */ 942 static int 943 tkhdHandler (const char *input, 944 size_t size, 945 size_t pos, 946 struct ExtractContext *ec) 947 { 948 uint64_t asize = getAtomSize (&input[pos]); 949 uint32_t hdr = getAtomHeaderSize (&input[pos]); 950 const unsigned char *p; 951 unsigned int width; 952 unsigned int height; 953 char dimensions[40]; 954 955 if (asize < hdr + 8) 956 return -1; 957 p = (const unsigned char *) &input[pos + asize - 8]; 958 /* 16.16 fixed point; the integer part is the high 16 bits */ 959 width = (p[0] << 8) | p[1]; 960 height = (p[4] << 8) | p[5]; 961 if (0 != width) 962 { 963 /* if actually a/the video track */ 964 snprintf (dimensions, 965 sizeof (dimensions), 966 "%ux%u", 967 width, 968 height); 969 addKeyword (EXTRACTOR_METATYPE_IMAGE_DIMENSIONS, 970 dimensions, 971 ec); 972 } 973 return 1; 974 } 975 976 977 static int 978 trakHandler (const char *input, 979 size_t size, 980 size_t pos, 981 struct ExtractContext *ec) 982 { 983 uint32_t hdr = getAtomHeaderSize (&input[pos]); 984 985 return processAllAtoms (&input[pos + hdr], 986 getAtomSize (&input[pos]) - hdr, 987 ec); 988 } 989 990 991 static int 992 metaHandler (const char *input, 993 size_t size, 994 size_t pos, 995 struct ExtractContext *ec) 996 { 997 uint32_t hdr = getAtomHeaderSize (&input[pos]); 998 999 if (getAtomSize (&input[pos]) < hdr + 4) 1000 return -1; 1001 return processAllAtoms (&input[pos + hdr + 4], 1002 getAtomSize (&input[pos]) - hdr - 4, 1003 ec); 1004 } 1005 1006 1007 struct InternationalText 1008 { 1009 struct Atom header; 1010 uint16_t length; 1011 uint16_t language; 1012 }; 1013 1014 1015 /* 1016 * see http://developer.apple.com/documentation/QuickTime/QTFF/QTFFChap2/chapter_3_section_2.html 1017 * "User Data Text Strings and Language Codes" 1018 */ 1019 static int 1020 processTextTag (const char *input, 1021 size_t size, 1022 size_t pos, 1023 enum EXTRACTOR_MetaType type, struct ExtractContext *ec) 1024 { 1025 uint64_t as; 1026 uint16_t len; 1027 uint16_t lang; 1028 const struct InternationalText *txt; 1029 char *meta; 1030 1031 /* contains "international text": 1032 16-bit size + 16 bit language code */ 1033 as = getAtomSize (&input[pos]); 1034 if (as < sizeof (struct InternationalText)) 1035 return -1; /* invalid */ 1036 txt = (const struct InternationalText *) &input[pos]; 1037 len = ntohs (txt->length); 1038 if (len + sizeof (struct InternationalText) > as) 1039 return -1; /* invalid */ 1040 lang = ntohs (txt->language); 1041 if (lang < sizeof (languages) / sizeof (char *)) 1042 addKeyword (EXTRACTOR_METATYPE_LANGUAGE, 1043 languages[lang], 1044 ec); 1045 1046 meta = malloc (len + 1); 1047 if (NULL == meta) 1048 return -1; 1049 memcpy (meta, 1050 &txt[1], 1051 len); 1052 meta[len] = '\0'; 1053 for (unsigned int i = 0; i < len; i++) 1054 if (meta[i] == '\r') 1055 meta[i] = '\n'; 1056 addKeyword (type, 1057 meta, 1058 ec); 1059 free (meta); 1060 return 1; 1061 } 1062 1063 1064 static int 1065 c_Handler (const char *input, 1066 size_t size, 1067 size_t pos, 1068 struct ExtractContext *ec) 1069 { 1070 for (unsigned int i = 0; 1071 NULL != cHm[i].pfx; 1072 i++) 1073 if (0 == memcmp (&input[pos + 5], 1074 cHm[i].pfx, 1075 3)) 1076 return processTextTag (input, 1077 size, 1078 pos, 1079 cHm[i].type, 1080 ec); 1081 return -1; /* not found */ 1082 } 1083 1084 1085 /** 1086 * Process the 'data' atom nested inside an iTunes-style 'ilst' entry. 1087 * 1088 * @param input start of the buffer 1089 * @param size size of the parent (ilst entry) atom 1090 * @param pos offset of the 'data' atom within @a input 1091 * @param patom pointer to the parent (ilst entry) atom 1092 * @param type metadata type to report the value as 1093 * @return 1 for success, -1 if the atom could not be handled 1094 */ 1095 static int 1096 processDataAtom (const char *input, 1097 size_t size, /* parent atom size */ 1098 size_t pos, 1099 const char *patom, 1100 enum EXTRACTOR_MetaType type, 1101 struct ExtractContext *ec) 1102 { 1103 char *meta; 1104 unsigned char version; 1105 unsigned int wellknown; 1106 uint64_t asize; 1107 unsigned int len; 1108 uint32_t hdr; 1109 int i; 1110 1111 hdr = getAtomHeaderSize (&input[pos]); 1112 asize = getAtomSize (&input[pos]); 1113 if (0 != 1114 memcmp (&input[pos + 4], 1115 "data", 1116 4)) 1117 return -1; 1118 1119 if ((asize < hdr + 8) || /* header + u32 type + u32 locale */ 1120 (asize > (getAtomSize (&patom[0]) - 8))) 1121 return -1; 1122 1123 len = (unsigned int) (asize - (hdr + 8)); 1124 1125 version = input[pos + 8]; 1126 /* "well known type" indicator (the low 24 bits of the type field) */ 1127 wellknown = ((unsigned char) input[pos + 9] << 16) 1128 | ((unsigned char) input[pos + 10] << 8) 1129 | (unsigned char) input[pos + 11]; 1130 1131 if (0 != version) 1132 return -1; 1133 1134 /* cover art: well-known type 13 = JPEG, 14 = PNG, 27 = BMP */ 1135 if ( (EXTRACTOR_METATYPE_COVER_PICTURE == type) && 1136 ( (13 == wellknown) || 1137 (14 == wellknown) || 1138 (27 == wellknown) ) ) 1139 { 1140 const char *mime; 1141 1142 if (0 == len) 1143 return -1; 1144 switch (wellknown) 1145 { 1146 case 13: 1147 mime = "image/jpeg"; 1148 break; 1149 case 14: 1150 mime = "image/png"; 1151 break; 1152 default: 1153 mime = "image/bmp"; 1154 break; 1155 } 1156 addBinary (type, 1157 mime, 1158 &input[pos + 16], 1159 len, 1160 ec); 1161 return 1; 1162 } 1163 1164 if (0x0 == wellknown) /* binary data */ 1165 { 1166 if (0 == 1167 memcmp (&patom[4], 1168 "gnre", 1169 4)) 1170 { 1171 if (len >= 2) 1172 { 1173 uint16_t genre = ((uint8_t) input[pos + 16] << 8) 1174 | (uint8_t) input[pos + 17]; 1175 1176 if ((genre > 0) && (genre <= GENRE_NAME_COUNT)) 1177 addKeyword (type, 1178 genre_names[genre - 1], 1179 ec); 1180 } 1181 return 1; 1182 } 1183 else if ( (0 == 1184 memcmp (&patom[4], 1185 "trkn", 1186 4)) || 1187 (0 == 1188 memcmp (&patom[4], 1189 "disk", 1190 4))) 1191 { 1192 if (len >= 4) 1193 { 1194 unsigned short n = ((unsigned char) input[pos + 18] << 8) 1195 | (unsigned char) input[pos + 19]; 1196 char s[8]; 1197 1198 snprintf (s, 1199 sizeof (s), 1200 "%d", 1201 n); 1202 addKeyword (type, 1203 s, 1204 ec); 1205 } 1206 return 1; 1207 } 1208 else if (0 == 1209 memcmp (&patom[4], 1210 "tmpo", 1211 4)) 1212 { 1213 if (len >= 2) 1214 { 1215 unsigned short n = ((unsigned char) input[pos + 16] << 8) 1216 | (unsigned char) input[pos + 17]; 1217 char s[8]; 1218 1219 snprintf (s, 1220 sizeof (s), 1221 "%u", 1222 n); 1223 addKeyword (type, 1224 s, 1225 ec); 1226 } 1227 return 1; 1228 } 1229 else 1230 { 1231 return -1; 1232 } 1233 } 1234 else if (0x15 == wellknown) /* signed/unsigned big-endian integer */ 1235 { 1236 unsigned long long n = 0; 1237 char s[24]; 1238 unsigned int j; 1239 1240 if ((len < 1) || (len > 8)) 1241 return -1; 1242 for (j = 0; j < len; j++) 1243 n = (n << 8) | (unsigned char) input[pos + 16 + j]; 1244 snprintf (s, 1245 sizeof (s), 1246 "%llu", 1247 n); 1248 addKeyword (type, 1249 s, 1250 ec); 1251 return 1; 1252 } 1253 else if (wellknown == 0x1) /* UTF-8 text data */ 1254 { 1255 meta = malloc (len + 1); 1256 if (meta == NULL) 1257 return -1; 1258 memcpy (meta, 1259 &input[pos + 16], 1260 len); 1261 meta[len] = '\0'; 1262 for (i = 0; i < len; i++) 1263 if (meta[i] == '\r') 1264 meta[i] = '\n'; 1265 addKeyword (type, 1266 meta, 1267 ec); 1268 free (meta); 1269 return 1; 1270 } 1271 1272 return -1; 1273 } 1274 1275 1276 /* NOTE: iTunes tag processing should, in theory, be limited to iTunes 1277 * file types (from ftyp), but, in reality, it seems that there are other 1278 * files, like 3gpp, out in the wild with iTunes tags. */ 1279 static int 1280 iTunesTagHandler (const char *input, 1281 size_t size, 1282 size_t pos, 1283 struct ExtractContext *ec) 1284 { 1285 uint64_t asize; 1286 uint32_t hdr; 1287 1288 hdr = getAtomHeaderSize (&input[pos]); 1289 asize = getAtomSize (&input[pos]); 1290 1291 if (asize < hdr + 8) /* header + at least one atom */ 1292 return -1; 1293 1294 for (unsigned int i = 0; 1295 NULL != it_to_extr_table[i].atom_type; 1296 i++) 1297 if (0 == memcmp (&input[pos + 4], 1298 it_to_extr_table[i].atom_type, 1299 4)) 1300 return processDataAtom (input, 1301 asize, 1302 pos + hdr, 1303 &input[pos], 1304 it_to_extr_table[i].type, 1305 ec); 1306 return -1; 1307 } 1308 1309 1310 /** 1311 * Handle the iTunes metadata list ('ilst'). Its children have 1312 * arbitrary four-character keys, so rather than a name table we simply 1313 * iterate them and let #iTunesTagHandler decide what is interesting. 1314 * 1315 * @return 0 on a fatal error, 1 otherwise 1316 */ 1317 static int 1318 ilstHandler (const char *input, 1319 size_t size, 1320 size_t pos, 1321 struct ExtractContext *ec) 1322 { 1323 uint32_t hdr = getAtomHeaderSize (&input[pos]); 1324 size_t end = pos + getAtomSize (&input[pos]); 1325 size_t cpos = pos + hdr; 1326 1327 if (ec->depth >= MAX_ATOM_DEPTH) 1328 return 1; 1329 ec->depth++; 1330 while ((cpos + sizeof (struct Atom) <= end) && 1331 (checkAtomValid (input, end, cpos))) 1332 { 1333 iTunesTagHandler (input, end, cpos, ec); 1334 if (0 != ec->ret) 1335 break; 1336 cpos += getAtomSize (&input[cpos]); 1337 } 1338 ec->depth--; 1339 return 1; 1340 } 1341 1342 1343 /** 1344 * Handle the user-data ('udta') atom. It mixes classic QuickTime 1345 * '(C)xyz' international-text tags with structural sub-atoms such as 1346 * 'meta'/'ilst', so we iterate the children and dispatch accordingly. 1347 * 1348 * @return 0 on a fatal error, 1 otherwise 1349 */ 1350 static int 1351 udtaHandler (const char *input, 1352 size_t size, 1353 size_t pos, 1354 struct ExtractContext *ec) 1355 { 1356 uint32_t hdr = getAtomHeaderSize (&input[pos]); 1357 size_t end = pos + getAtomSize (&input[pos]); 1358 size_t cpos = pos + hdr; 1359 1360 if (ec->depth >= MAX_ATOM_DEPTH) 1361 return 1; 1362 ec->depth++; 1363 while ((cpos + sizeof (struct Atom) <= end) && 1364 (checkAtomValid (input, end, cpos))) 1365 { 1366 if (0xA9 == (unsigned char) input[cpos + 4]) 1367 c_Handler (input, 1368 end, 1369 cpos, 1370 ec); 1371 else 1372 handleAtom (all_handlers, 1373 input, 1374 end, 1375 cpos, 1376 ec); 1377 if (0 != ec->ret) 1378 break; 1379 cpos += getAtomSize (&input[cpos]); 1380 } 1381 ec->depth--; 1382 return 1; 1383 } 1384 1385 1386 static struct HandlerEntry all_handlers[] = { 1387 {"moov", &moovHandler}, 1388 {"cmov", &cmovHandler}, 1389 {"mvhd", &mvhdHandler}, 1390 {"trak", &trakHandler}, 1391 {"tkhd", &tkhdHandler}, 1392 {"ilst", &ilstHandler}, 1393 {"meta", &metaHandler}, 1394 {"udta", &udtaHandler}, 1395 {"ftyp", &ftypHandler}, 1396 {NULL, NULL}, 1397 }; 1398 1399 1400 /** 1401 * Call the handler for the atom at the given position. 1402 * @return 0 on error, 1 for success, -1 for unknown atom type 1403 */ 1404 static int 1405 handleAtom (struct HandlerEntry *handlers, 1406 const char *input, 1407 size_t size, 1408 size_t pos, 1409 struct ExtractContext *ec) 1410 { 1411 if (! checkAtomValid (input, 1412 size, 1413 pos)) 1414 return 0; 1415 for (unsigned i = 0; 1416 handlers[i].name != NULL; 1417 i++) 1418 { 1419 if (0 == 1420 memcmp (&input[pos + 4], 1421 handlers[i].name, 1422 4)) 1423 { 1424 return handlers[i].handler (input, 1425 size, 1426 pos, 1427 ec); 1428 } 1429 } 1430 return -1; 1431 } 1432 1433 1434 /** 1435 * Read exactly @a len bytes from absolute offset @a off into @a dst. 1436 * 1437 * The extraction context exposes the file through a sliding shared 1438 * memory window, so a single read may return fewer bytes than 1439 * requested; we seek once and then loop until the request is satisfied. 1440 * 1441 * @return 0 on success, -1 on error / short file 1442 */ 1443 static int 1444 qt_pread (struct EXTRACTOR_ExtractContext *ec, 1445 uint64_t off, 1446 void *dst, 1447 size_t len) 1448 { 1449 unsigned char *out = dst; 1450 1451 if ((int64_t) off != ec->seek (ec->cls, (int64_t) off, SEEK_SET)) 1452 return -1; 1453 while (len > 0) 1454 { 1455 void *buf; 1456 ssize_t got; 1457 1458 got = ec->read (ec->cls, 1459 &buf, 1460 len); 1461 if (got <= 0) 1462 return -1; 1463 memcpy (out, 1464 buf, 1465 (size_t) got); 1466 out += got; 1467 len -= (size_t) got; 1468 } 1469 return 0; 1470 } 1471 1472 1473 /** 1474 * Top-level atom types worth pulling into memory. Everything else 1475 * (notably the huge 'mdat' payload, plus 'free'/'skip'/'wide') is 1476 * skipped without ever being read. 1477 */ 1478 static bool 1479 is_interesting_top_atom (const unsigned char *type) 1480 { 1481 static const char *const interesting[] = { 1482 "moov", "ftyp", "meta", "udta", "uuid", "pnot", NULL 1483 }; 1484 1485 for (unsigned int i = 0; 1486 NULL != interesting[i]; 1487 i++) 1488 if (0 == memcmp (type, 1489 interesting[i], 1490 4)) 1491 return true; 1492 return false; 1493 } 1494 1495 1496 /** 1497 * Main entry method for the QuickTime/MP4 extraction plugin. 1498 * 1499 * @param ec extraction context provided to the plugin 1500 */ 1501 void 1502 EXTRACTOR_qt_extract_method (struct EXTRACTOR_ExtractContext *ec); 1503 1504 void 1505 EXTRACTOR_qt_extract_method (struct EXTRACTOR_ExtractContext *ec) 1506 { 1507 struct ExtractContext xc; 1508 uint64_t fsize; 1509 uint64_t pos; 1510 1511 fsize = ec->get_size (ec->cls); 1512 if ((UINT64_MAX == fsize) || (fsize < sizeof (struct Atom))) 1513 return; 1514 1515 xc.proc = ec->proc; 1516 xc.proc_cls = ec->cls; 1517 xc.ret = 0; 1518 xc.depth = 0; 1519 1520 pos = 0; 1521 while ( (0 == xc.ret) && 1522 (pos + sizeof (struct Atom) <= fsize) ) 1523 { 1524 unsigned char hdr[16]; 1525 uint64_t asize; 1526 unsigned int hsize; 1527 1528 if (0 != qt_pread (ec, pos, hdr, 8)) 1529 break; 1530 asize = ((uint64_t) hdr[0] << 24) | ((uint64_t) hdr[1] << 16) 1531 | ((uint64_t) hdr[2] << 8) | (uint64_t) hdr[3]; 1532 if (1 == asize) 1533 { 1534 if ((pos + 16 > fsize) || 1535 (0 != qt_pread (ec, pos + 8, &hdr[8], 8))) 1536 break; 1537 asize = ((uint64_t) hdr[8] << 56) | ((uint64_t) hdr[9] << 48) 1538 | ((uint64_t) hdr[10] << 40) | ((uint64_t) hdr[11] << 32) 1539 | ((uint64_t) hdr[12] << 24) | ((uint64_t) hdr[13] << 16) 1540 | ((uint64_t) hdr[14] << 8) | (uint64_t) hdr[15]; 1541 hsize = 16; 1542 } 1543 else if (0 == asize) 1544 { 1545 /* atom extends to end of file */ 1546 asize = fsize - pos; 1547 hsize = 8; 1548 } 1549 else 1550 { 1551 hsize = 8; 1552 } 1553 if ((asize < hsize) || (pos + asize > fsize)) 1554 break; 1555 1556 if (is_interesting_top_atom (&hdr[4]) && 1557 (asize <= MAX_ATOM_SIZE)) 1558 { 1559 char *buf = malloc ((size_t) asize); 1560 1561 if (NULL != buf) 1562 { 1563 if (0 == qt_pread (ec, 1564 pos, 1565 buf, 1566 (size_t) asize)) 1567 handleAtom (all_handlers, 1568 buf, 1569 (size_t) asize, 1570 0, 1571 &xc); 1572 free (buf); 1573 } 1574 } 1575 pos += asize; 1576 } 1577 } 1578 1579 1580 /* end of qt_extractor.c */