libextractor

GNU libextractor
Log | Files | Refs | Submodules | README | LICENSE

qt_extractor.c (39859B)


      1 /*
      2      This file is part of libextractor.
      3      Copyright (C) 2002, 2003, 2006, 2012, 2026 Vidyut Samanta and Christian Grothoff
      4 
      5      libextractor is free software; you can redistribute it and/or modify
      6      it under the terms of the GNU General Public License as published
      7      by the Free Software Foundation; either version 3, or (at your
      8      option) any later version.
      9 
     10      libextractor is distributed in the hope that it will be useful, but
     11      WITHOUT ANY WARRANTY; without even the implied warranty of
     12      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     13      General Public License for more details.
     14 
     15      You should have received a copy of the GNU General Public License
     16      along with libextractor; see the file COPYING.  If not, write to the
     17      Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     18      Boston, MA 02110-1301, USA.
     19  */
     20 /**
     21  * @file plugins/qt_extractor.c
     22  * @brief plugin to support QuickTime, MP4, M4A and 3GPP files
     23  * @author Vidyut Samanta
     24  * @author Christian Grothoff
     25  *
     26  * This plugin parses the ISO base media / QuickTime "atom" (box) tree.
     27  * It does not link against any third-party demuxer: the metadata-bearing
     28  * atoms ('ftyp', 'moov' and the boxes nested inside it) are tiny compared
     29  * to the media payload ('mdat'), so the plugin streams the top-level
     30  * boxes via the extraction context and only ever pulls the small
     31  * metadata containers into memory before walking them recursively.
     32  */
     33 #include "platform.h"
     34 #include "extractor.h"
     35 #include <zlib.h>
     36 #include <stdint.h>
     37 #include <stdbool.h>
     38 
     39 /**
     40  * Maximum size (in bytes) of a single top-level atom that we are willing
     41  * to pull into memory.  'moov' is always far smaller than this in
     42  * practice; the cap merely protects us against hostile or corrupt files.
     43  */
     44 #define MAX_ATOM_SIZE (64 * 1024 * 1024)
     45 
     46 /**
     47  * Maximum size of a (decompressed) compressed-movie 'cmov' atom.
     48  */
     49 #define MAX_CMOV_SIZE (16 * 1024 * 1024)
     50 
     51 /**
     52  * Maximum atom nesting depth we are willing to recurse into.  Real files
     53  * stay well below ten; the limit guards against stack exhaustion from
     54  * maliciously deeply nested boxes.
     55  */
     56 #define MAX_ATOM_DEPTH 32
     57 
     58 
     59 /* verbatim from mp3extractor */
     60 static const char *const genre_names[] = {
     61   gettext_noop ("Blues"),
     62   gettext_noop ("Classic Rock"),
     63   gettext_noop ("Country"),
     64   gettext_noop ("Dance"),
     65   gettext_noop ("Disco"),
     66   gettext_noop ("Funk"),
     67   gettext_noop ("Grunge"),
     68   gettext_noop ("Hip-Hop"),
     69   gettext_noop ("Jazz"),
     70   gettext_noop ("Metal"),
     71   gettext_noop ("New Age"),
     72   gettext_noop ("Oldies"),
     73   gettext_noop ("Other"),
     74   gettext_noop ("Pop"),
     75   gettext_noop ("R&B"),
     76   gettext_noop ("Rap"),
     77   gettext_noop ("Reggae"),
     78   gettext_noop ("Rock"),
     79   gettext_noop ("Techno"),
     80   gettext_noop ("Industrial"),
     81   gettext_noop ("Alternative"),
     82   gettext_noop ("Ska"),
     83   gettext_noop ("Death Metal"),
     84   gettext_noop ("Pranks"),
     85   gettext_noop ("Soundtrack"),
     86   gettext_noop ("Euro-Techno"),
     87   gettext_noop ("Ambient"),
     88   gettext_noop ("Trip-Hop"),
     89   gettext_noop ("Vocal"),
     90   gettext_noop ("Jazz+Funk"),
     91   gettext_noop ("Fusion"),
     92   gettext_noop ("Trance"),
     93   gettext_noop ("Classical"),
     94   gettext_noop ("Instrumental"),
     95   gettext_noop ("Acid"),
     96   gettext_noop ("House"),
     97   gettext_noop ("Game"),
     98   gettext_noop ("Sound Clip"),
     99   gettext_noop ("Gospel"),
    100   gettext_noop ("Noise"),
    101   gettext_noop ("Alt. Rock"),
    102   gettext_noop ("Bass"),
    103   gettext_noop ("Soul"),
    104   gettext_noop ("Punk"),
    105   gettext_noop ("Space"),
    106   gettext_noop ("Meditative"),
    107   gettext_noop ("Instrumental Pop"),
    108   gettext_noop ("Instrumental Rock"),
    109   gettext_noop ("Ethnic"),
    110   gettext_noop ("Gothic"),
    111   gettext_noop ("Darkwave"),
    112   gettext_noop ("Techno-Industrial"),
    113   gettext_noop ("Electronic"),
    114   gettext_noop ("Pop-Folk"),
    115   gettext_noop ("Eurodance"),
    116   gettext_noop ("Dream"),
    117   gettext_noop ("Southern Rock"),
    118   gettext_noop ("Comedy"),
    119   gettext_noop ("Cult"),
    120   gettext_noop ("Gangsta Rap"),
    121   gettext_noop ("Top 40"),
    122   gettext_noop ("Christian Rap"),
    123   gettext_noop ("Pop/Funk"),
    124   gettext_noop ("Jungle"),
    125   gettext_noop ("Native American"),
    126   gettext_noop ("Cabaret"),
    127   gettext_noop ("New Wave"),
    128   gettext_noop ("Psychedelic"),
    129   gettext_noop ("Rave"),
    130   gettext_noop ("Showtunes"),
    131   gettext_noop ("Trailer"),
    132   gettext_noop ("Lo-Fi"),
    133   gettext_noop ("Tribal"),
    134   gettext_noop ("Acid Punk"),
    135   gettext_noop ("Acid Jazz"),
    136   gettext_noop ("Polka"),
    137   gettext_noop ("Retro"),
    138   gettext_noop ("Musical"),
    139   gettext_noop ("Rock & Roll"),
    140   gettext_noop ("Hard Rock"),
    141   gettext_noop ("Folk"),
    142   gettext_noop ("Folk/Rock"),
    143   gettext_noop ("National Folk"),
    144   gettext_noop ("Swing"),
    145   gettext_noop ("Fast-Fusion"),
    146   gettext_noop ("Bebob"),
    147   gettext_noop ("Latin"),
    148   gettext_noop ("Revival"),
    149   gettext_noop ("Celtic"),
    150   gettext_noop ("Bluegrass"),
    151   gettext_noop ("Avantgarde"),
    152   gettext_noop ("Gothic Rock"),
    153   gettext_noop ("Progressive Rock"),
    154   gettext_noop ("Psychedelic Rock"),
    155   gettext_noop ("Symphonic Rock"),
    156   gettext_noop ("Slow Rock"),
    157   gettext_noop ("Big Band"),
    158   gettext_noop ("Chorus"),
    159   gettext_noop ("Easy Listening"),
    160   gettext_noop ("Acoustic"),
    161   gettext_noop ("Humour"),
    162   gettext_noop ("Speech"),
    163   gettext_noop ("Chanson"),
    164   gettext_noop ("Opera"),
    165   gettext_noop ("Chamber Music"),
    166   gettext_noop ("Sonata"),
    167   gettext_noop ("Symphony"),
    168   gettext_noop ("Booty Bass"),
    169   gettext_noop ("Primus"),
    170   gettext_noop ("Porn Groove"),
    171   gettext_noop ("Satire"),
    172   gettext_noop ("Slow Jam"),
    173   gettext_noop ("Club"),
    174   gettext_noop ("Tango"),
    175   gettext_noop ("Samba"),
    176   gettext_noop ("Folklore"),
    177   gettext_noop ("Ballad"),
    178   gettext_noop ("Power Ballad"),
    179   gettext_noop ("Rhythmic Soul"),
    180   gettext_noop ("Freestyle"),
    181   gettext_noop ("Duet"),
    182   gettext_noop ("Punk Rock"),
    183   gettext_noop ("Drum Solo"),
    184   gettext_noop ("A Cappella"),
    185   gettext_noop ("Euro-House"),
    186   gettext_noop ("Dance Hall"),
    187   gettext_noop ("Goa"),
    188   gettext_noop ("Drum & Bass"),
    189   gettext_noop ("Club-House"),
    190   gettext_noop ("Hardcore"),
    191   gettext_noop ("Terror"),
    192   gettext_noop ("Indie"),
    193   gettext_noop ("BritPop"),
    194   gettext_noop ("Negerpunk"),
    195   gettext_noop ("Polsk Punk"),
    196   gettext_noop ("Beat"),
    197   gettext_noop ("Christian Gangsta Rap"),
    198   gettext_noop ("Heavy Metal"),
    199   gettext_noop ("Black Metal"),
    200   gettext_noop ("Crossover"),
    201   gettext_noop ("Contemporary Christian"),
    202   gettext_noop ("Christian Rock"),
    203   gettext_noop ("Merengue"),
    204   gettext_noop ("Salsa"),
    205   gettext_noop ("Thrash Metal"),
    206   gettext_noop ("Anime"),
    207   gettext_noop ("JPop"),
    208   gettext_noop ("Synthpop"),
    209 };
    210 
    211 #define GENRE_NAME_COUNT \
    212         ((unsigned int) (sizeof genre_names / sizeof (const char *const)))
    213 
    214 
    215 static const char *languages[] = {
    216   "English",
    217   "French",
    218   "German",
    219   "Italian",
    220   "Dutch",
    221   "Swedish",
    222   "Spanish",
    223   "Danish",
    224   "Portuguese",
    225   "Norwegian",
    226   "Hebrew",
    227   "Japanese",
    228   "Arabic",
    229   "Finnish",
    230   "Greek",
    231   "Icelandic",
    232   "Maltese",
    233   "Turkish",
    234   "Croatian",
    235   "Traditional Chinese",
    236   "Urdu",
    237   "Hindi",
    238   "Thai",
    239   "Korean",
    240   "Lithuanian",
    241   "Polish",
    242   "Hungarian",
    243   "Estonian",
    244   "Lettish",
    245   "Saamisk",
    246   "Lappish",
    247   "Faeroese",
    248   "Farsi",
    249   "Russian",
    250   "Simplified Chinese",
    251   "Flemish",
    252   "Irish",
    253   "Albanian",
    254   "Romanian",
    255   "Czech",
    256   "Slovak",
    257   "Slovenian",
    258   "Yiddish",
    259   "Serbian",
    260   "Macedonian",
    261   "Bulgarian",
    262   "Ukrainian",
    263   "Byelorussian",
    264   "Uzbek",
    265   "Kazakh",
    266   "Azerbaijani",
    267   "AzerbaijanAr",
    268   "Armenian",
    269   "Georgian",
    270   "Moldavian",
    271   "Kirghiz",
    272   "Tajiki",
    273   "Turkmen",
    274   "Mongolian",
    275   "MongolianCyr",
    276   "Pashto",
    277   "Kurdish",
    278   "Kashmiri",
    279   "Sindhi",
    280   "Tibetan",
    281   "Nepali",
    282   "Sanskrit",
    283   "Marathi",
    284   "Bengali",
    285   "Assamese",
    286   "Gujarati",
    287   "Punjabi",
    288   "Oriya",
    289   "Malayalam",
    290   "Kannada",
    291   "Tamil",
    292   "Telugu",
    293   "Sinhalese",
    294   "Burmese",
    295   "Khmer",
    296   "Lao",
    297   "Vietnamese",
    298   "Indonesian",
    299   "Tagalog",
    300   "MalayRoman",
    301   "MalayArabic",
    302   "Amharic",
    303   "Tigrinya",
    304   "Galla",
    305   "Oromo",
    306   "Somali",
    307   "Swahili",
    308   "Ruanda",
    309   "Rundi",
    310   "Chewa",
    311   "Malagasy",
    312   "Esperanto",
    313   "Welsh",
    314   "Basque",
    315   "Catalan",
    316   "Latin",
    317   "Quechua",
    318   "Guarani",
    319   "Aymara",
    320   "Tatar",
    321   "Uighur",
    322   "Dzongkha",
    323   "JavaneseRom",
    324 };
    325 
    326 
    327 typedef struct
    328 {
    329   const char *ext;
    330   const char *mime;
    331 } C2M;
    332 
    333 /* see http://www.mp4ra.org/filetype.html
    334  *     http://www.ftyps.com/ */
    335 static C2M ftMap[] = {
    336   {"qt  ", "video/quicktime"},
    337   {"isom", "video/mp4"},        /* ISO Base Media files */
    338   {"iso2", "video/mp4"},
    339   {"iso4", "video/mp4"},
    340   {"iso5", "video/mp4"},
    341   {"iso6", "video/mp4"},
    342   {"avc1", "video/mp4"},
    343   {"mp41", "video/mp4"},        /* MPEG-4 (ISO/IEC 14491-1) version 1 */
    344   {"mp42", "video/mp4"},        /* MPEG-4 (ISO/IEC 14491-1) version 2 */
    345   {"mp71", "video/mp4"},        /* MPEG-4 with MPEG-7 metadata */
    346   {"dash", "video/mp4"},        /* MPEG-DASH */
    347   {"3gp1", "video/3gpp"},
    348   {"3gp2", "video/3gpp"},
    349   {"3gp3", "video/3gpp"},
    350   {"3gp4", "video/3gpp"},
    351   {"3gp5", "video/3gpp"},
    352   {"3gp6", "video/3gpp"},
    353   {"3gp7", "video/3gpp"},
    354   {"3g2a", "video/3gpp2"},
    355   {"3g2b", "video/3gpp2"},
    356   {"3g2c", "video/3gpp2"},
    357   {"mmp4", "video/mp4"},        /* Mobile MPEG-4 */
    358   {"M4A ", "audio/mp4"},
    359   {"M4B ", "audio/mp4"},        /* Apple audio book */
    360   {"M4P ", "audio/mp4"},
    361   {"M4V ", "video/mp4"},
    362   {"M4VH", "video/mp4"},
    363   {"M4VP", "video/mp4"},
    364   {"f4v ", "video/mp4"},        /* Adobe Flash MP4 video */
    365   {"f4a ", "audio/mp4"},
    366   {"f4b ", "audio/mp4"},
    367   {"qt  ", "video/quicktime"},
    368   {"mj2s", "video/mj2"},        /* Motion JPEG 2000 */
    369   {"mjp2", "video/mj2"},
    370   {NULL, NULL},
    371 };
    372 
    373 typedef struct CHE
    374 {
    375   const char *pfx;
    376   enum EXTRACTOR_MetaType type;
    377 } CHE;
    378 
    379 static CHE cHm[] = {
    380   {"aut", EXTRACTOR_METATYPE_AUTHOR_NAME},
    381   {"cpy", EXTRACTOR_METATYPE_COPYRIGHT},
    382   {"day", EXTRACTOR_METATYPE_CREATION_DATE},
    383   {"ed1", EXTRACTOR_METATYPE_MODIFICATION_DATE},
    384   {"ed2", EXTRACTOR_METATYPE_MODIFICATION_DATE},
    385   {"ed3", EXTRACTOR_METATYPE_MODIFICATION_DATE},
    386   {"ed4", EXTRACTOR_METATYPE_MODIFICATION_DATE},
    387   {"ed5", EXTRACTOR_METATYPE_MODIFICATION_DATE},
    388   {"ed6", EXTRACTOR_METATYPE_MODIFICATION_DATE},
    389   {"ed7", EXTRACTOR_METATYPE_MODIFICATION_DATE},
    390   {"ed8", EXTRACTOR_METATYPE_MODIFICATION_DATE},
    391   {"ed9", EXTRACTOR_METATYPE_MODIFICATION_DATE},
    392   {"cmt", EXTRACTOR_METATYPE_COMMENT},
    393   {"url", EXTRACTOR_METATYPE_URL},
    394   {"enc", EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE},
    395   {"hst", EXTRACTOR_METATYPE_BUILDHOST},
    396   {"nam", EXTRACTOR_METATYPE_TITLE},
    397   {"gen", EXTRACTOR_METATYPE_GENRE},
    398   {"mak", EXTRACTOR_METATYPE_CAMERA_MAKE},
    399   {"mod", EXTRACTOR_METATYPE_CAMERA_MODEL},
    400   {"des", EXTRACTOR_METATYPE_DESCRIPTION},
    401   {"dis", EXTRACTOR_METATYPE_DISCLAIMER},
    402   {"dir", EXTRACTOR_METATYPE_MOVIE_DIRECTOR},
    403   {"src", EXTRACTOR_METATYPE_CONTRIBUTOR_NAME},
    404   {"prf", EXTRACTOR_METATYPE_PERFORMER },
    405   {"prd", EXTRACTOR_METATYPE_PRODUCER},
    406   {"PRD", EXTRACTOR_METATYPE_PRODUCT_VERSION},
    407   {"swr", EXTRACTOR_METATYPE_PRODUCED_BY_SOFTWARE},
    408   {"isr", EXTRACTOR_METATYPE_ISRC},
    409   {"wrt", EXTRACTOR_METATYPE_WRITER},
    410   {"wrn", EXTRACTOR_METATYPE_WARNING},
    411   {"chp", EXTRACTOR_METATYPE_CHAPTER_NAME},
    412   {"inf", EXTRACTOR_METATYPE_DESCRIPTION},
    413   {"req", EXTRACTOR_METATYPE_TARGET_PLATFORM},      /* hardware requirements */
    414   {"fmt", EXTRACTOR_METATYPE_FORMAT},
    415   {"alb", EXTRACTOR_METATYPE_ALBUM},
    416   {"ART", EXTRACTOR_METATYPE_ARTIST},
    417   {"art", EXTRACTOR_METATYPE_ARTIST},
    418   {"too", EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE},
    419   {"grp", EXTRACTOR_METATYPE_GROUP},
    420   {"lyr", EXTRACTOR_METATYPE_LYRICS},
    421   {"st3", EXTRACTOR_METATYPE_SUBTITLE},
    422   {NULL, EXTRACTOR_METATYPE_RESERVED },
    423 };
    424 
    425 
    426 typedef struct
    427 {
    428   const char *atom_type;
    429   enum EXTRACTOR_MetaType type;
    430 } ITTagConversionEntry;
    431 
    432 /* iTunes / "ilst" tags:
    433  * see http://atomicparsley.sourceforge.net/mpeg-4files.html
    434  *
    435  * The first byte of the four-character key is the (C) / 0xa9 sign for
    436  * the "user" tags; we keep it spelled out here so the table can be
    437  * memcmp()ed against the raw atom name. */
    438 static ITTagConversionEntry it_to_extr_table[] = {
    439   {"\xa9" "alb", EXTRACTOR_METATYPE_ALBUM},
    440   {"\xa9" "ART", EXTRACTOR_METATYPE_ARTIST},
    441   {"\xa9" "art", EXTRACTOR_METATYPE_ARTIST},
    442   {"aART", EXTRACTOR_METATYPE_ARTIST},               /* album artist */
    443   {"\xa9" "cmt", EXTRACTOR_METATYPE_COMMENT},
    444   {"\xa9" "day", EXTRACTOR_METATYPE_UNKNOWN_DATE},
    445   {"\xa9" "nam", EXTRACTOR_METATYPE_TITLE},
    446   {"\xa9" "trk", EXTRACTOR_METATYPE_TRACK_NUMBER},
    447   {"trkn", EXTRACTOR_METATYPE_TRACK_NUMBER},
    448   {"\xa9" "dis", EXTRACTOR_METATYPE_DISC_NUMBER},
    449   {"disk", EXTRACTOR_METATYPE_DISC_NUMBER},
    450   {"\xa9" "gen", EXTRACTOR_METATYPE_GENRE},
    451   {"gnre", EXTRACTOR_METATYPE_GENRE},
    452   {"\xa9" "wrt", EXTRACTOR_METATYPE_COMPOSER},
    453   {"\xa9" "com", EXTRACTOR_METATYPE_COMPOSER},
    454   {"\xa9" "too", EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE},
    455   {"\xa9" "enc", EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE},
    456   {"cprt", EXTRACTOR_METATYPE_COPYRIGHT},
    457   {"\xa9" "cpy", EXTRACTOR_METATYPE_COPYRIGHT},
    458   {"\xa9" "grp", EXTRACTOR_METATYPE_GROUP},
    459   {"\xa9" "lyr", EXTRACTOR_METATYPE_LYRICS},
    460   {"\xa9" "st3", EXTRACTOR_METATYPE_SUBTITLE},
    461   {"\xa9" "url", EXTRACTOR_METATYPE_URL},
    462   {"\xa9" "prd", EXTRACTOR_METATYPE_PRODUCER},
    463   {"\xa9" "dir", EXTRACTOR_METATYPE_MOVIE_DIRECTOR},
    464   {"\xa9" "prf", EXTRACTOR_METATYPE_PERFORMER},
    465   {"\xa9" "swr", EXTRACTOR_METATYPE_PRODUCED_BY_SOFTWARE},
    466   {"\xa9" "fmt", EXTRACTOR_METATYPE_FORMAT},
    467   {"\xa9" "inf", EXTRACTOR_METATYPE_DESCRIPTION},
    468   {"tmpo", EXTRACTOR_METATYPE_BEATS_PER_MINUTE},
    469   {"catg", EXTRACTOR_METATYPE_SECTION},
    470   {"keyw", EXTRACTOR_METATYPE_KEYWORDS},
    471   {"desc", EXTRACTOR_METATYPE_DESCRIPTION},
    472   {"ldes", EXTRACTOR_METATYPE_DESCRIPTION},          /* long description */
    473   {"tvnn", EXTRACTOR_METATYPE_NETWORK_NAME},
    474   {"tvsh", EXTRACTOR_METATYPE_SHOW_NAME},
    475   {"tvsn", EXTRACTOR_METATYPE_SHOW_SEASON_NUMBER},
    476   {"tves", EXTRACTOR_METATYPE_SHOW_EPISODE_NUMBER},
    477   {"purd", EXTRACTOR_METATYPE_UNKNOWN_DATE},         /* purchase date */
    478   {"covr", EXTRACTOR_METATYPE_COVER_PICTURE},
    479   {NULL, EXTRACTOR_METATYPE_RESERVED}
    480 };
    481 
    482 
    483 struct Atom
    484 {
    485   uint32_t size;
    486   uint32_t type;
    487 };
    488 
    489 
    490 struct LongAtom
    491 {
    492   uint32_t one;
    493   uint32_t type;
    494   uint64_t size;
    495 };
    496 
    497 
    498 static uint64_t
    499 ntohll (uint64_t n)
    500 {
    501 #if __BYTE_ORDER == __BIG_ENDIAN
    502   return n;
    503 #else
    504   return (((uint64_t) ntohl (n)) << 32) + ntohl (n >> 32);
    505 #endif
    506 }
    507 
    508 
    509 /**
    510  * Check if at position pos there is a valid atom.
    511  * @return false if the atom is invalid, true if it is valid
    512  */
    513 static bool
    514 checkAtomValid (const char *buffer,
    515                 size_t size,
    516                 size_t pos)
    517 {
    518   unsigned long long atomSize;
    519   const struct Atom *atom;
    520   const struct LongAtom *latom;
    521 
    522   if ( (pos >= size) ||
    523        (pos + sizeof (struct Atom) > size) ||
    524        (pos + sizeof (struct Atom) < pos) )
    525     return false;
    526   atom = (const struct Atom *) &buffer[pos];
    527   if (ntohl (atom->size) == 1)
    528   {
    529     if ( (pos + sizeof (struct LongAtom) > size) ||
    530          (pos + sizeof (struct LongAtom) < pos) )
    531       return false;
    532     latom = (const struct LongAtom *) &buffer[pos];
    533     atomSize = ntohll (latom->size);
    534     if ((atomSize < sizeof (struct LongAtom)) ||
    535         (atomSize + pos > size) || (atomSize + pos < atomSize))
    536       return false;
    537   }
    538   else
    539   {
    540     atomSize = ntohl (atom->size);
    541     if ((atomSize < sizeof (struct Atom)) ||
    542         (atomSize + pos > size) || (atomSize + pos < atomSize))
    543       return false;
    544   }
    545   return true;
    546 }
    547 
    548 
    549 /**
    550  * Assumes that checkAtomValid has already been called.
    551  */
    552 static uint64_t
    553 getAtomSize (const char *buf)
    554 {
    555   const struct Atom *atom;
    556   const struct LongAtom *latom;
    557 
    558   atom = (const struct Atom *) buf;
    559   if (ntohl (atom->size) == 1)
    560   {
    561     latom = (const struct LongAtom *) buf;
    562     return ntohll (latom->size);
    563   }
    564   return ntohl (atom->size);
    565 }
    566 
    567 
    568 /**
    569  * Assumes that checkAtomValid has already been called.
    570  */
    571 static size_t
    572 getAtomHeaderSize (const char *buf)
    573 {
    574   const struct Atom *atom;
    575 
    576   atom = (const struct Atom *) buf;
    577   if (ntohl (atom->size) == 1)
    578     return sizeof (const struct LongAtom);
    579   return sizeof (struct Atom);
    580 }
    581 
    582 
    583 /**
    584  * State carried through the recursive atom walk.
    585  */
    586 struct ExtractContext
    587 {
    588   /**
    589    * The libextractor processing callback.
    590    */
    591   EXTRACTOR_MetaDataProcessor proc;
    592 
    593   /**
    594    * Closure for @e proc.
    595    */
    596   void *proc_cls;
    597 
    598   /**
    599    * Set to non-zero once @e proc asked us to stop.
    600    */
    601   int ret;
    602 
    603   /**
    604    * Current atom nesting depth (for recursion limiting).
    605    */
    606   unsigned int depth;
    607 };
    608 
    609 
    610 static void
    611 addKeyword (enum EXTRACTOR_MetaType type,
    612             const char *str,
    613             struct ExtractContext *ec)
    614 {
    615   if (ec->ret != 0)
    616     return;
    617   ec->ret = ec->proc (ec->proc_cls,
    618                       "qt",
    619                       type,
    620                       EXTRACTOR_METAFORMAT_UTF8,
    621                       "text/plain",
    622                       str,
    623                       strlen (str) + 1);
    624 }
    625 
    626 
    627 static void
    628 addBinary (enum EXTRACTOR_MetaType type,
    629            const char *mime,
    630            const void *data,
    631            size_t data_len,
    632            struct ExtractContext *ec)
    633 {
    634   if (ec->ret != 0)
    635     return;
    636   ec->ret = ec->proc (ec->proc_cls,
    637                       "qt",
    638                       type,
    639                       EXTRACTOR_METAFORMAT_BINARY,
    640                       mime,
    641                       data,
    642                       data_len);
    643 }
    644 
    645 
    646 /**
    647  * Assumes that checkAtomValid has already been called.
    648  *
    649  * @return 0 on a fatal error (stop the current level),
    650  *         1 for success, -1 for "atom not understood, skip it"
    651  */
    652 typedef int
    653 (*AtomHandler) (const char *input,
    654                 size_t size,
    655                 size_t pos,
    656                 struct ExtractContext *ec);
    657 
    658 struct HandlerEntry
    659 {
    660   const char *name;
    661   AtomHandler handler;
    662 };
    663 
    664 
    665 /**
    666  * Call the handler for the atom at the given position.
    667  * Will check validity of the given atom.
    668  *
    669  * @return 0 on error, 1 for success, -1 for unknown atom type
    670  */
    671 static int
    672 handleAtom (struct HandlerEntry *handlers,
    673             const char *input,
    674             size_t size,
    675             size_t pos,
    676             struct ExtractContext *ec);
    677 
    678 static struct HandlerEntry all_handlers[];
    679 
    680 /**
    681  * Process atoms.
    682  * @return 0 on error, 1 for success, -1 for unknown atom type
    683  */
    684 static int
    685 processAtoms (struct HandlerEntry *handlers,
    686               const char *input,
    687               size_t size,
    688               struct ExtractContext *ec)
    689 {
    690   size_t pos;
    691 
    692   if (size < sizeof (struct Atom))
    693     return 1;
    694   if (ec->depth >= MAX_ATOM_DEPTH)
    695     return 1;
    696   ec->depth++;
    697   pos = 0;
    698   while (pos < size - sizeof (struct Atom))
    699   {
    700     if (0 == handleAtom (handlers,
    701                          input,
    702                          size,
    703                          pos,
    704                          ec))
    705     {
    706       ec->depth--;
    707       return 0;
    708     }
    709     if (0 != ec->ret)
    710       break;                    /* processor asked us to stop */
    711     pos += getAtomSize (&input[pos]);
    712   }
    713   ec->depth--;
    714   return 1;
    715 }
    716 
    717 
    718 /**
    719  * Process all atoms.
    720  * @return 0 on error, 1 for success, -1 for unknown atom type
    721  */
    722 static int
    723 processAllAtoms (const char *input,
    724                  size_t size,
    725                  struct ExtractContext *ec)
    726 {
    727   return processAtoms (all_handlers,
    728                        input,
    729                        size,
    730                        ec);
    731 }
    732 
    733 
    734 /**
    735  * Handle the moov atom.
    736  * @return 0 on error, 1 for success, -1 for unknown atom type
    737  */
    738 static int
    739 moovHandler (const char *input,
    740              size_t size,
    741              size_t pos,
    742              struct ExtractContext *ec)
    743 {
    744   uint32_t hdr = getAtomHeaderSize (&input[pos]);
    745 
    746   return processAllAtoms (&input[pos + hdr],
    747                           getAtomSize (&input[pos]) - hdr,
    748                           ec);
    749 }
    750 
    751 
    752 /* see http://developer.apple.com/documentation/QuickTime/QTFF/QTFFChap1/chapter_2_section_5.html */
    753 struct FileType
    754 {
    755   struct Atom header;
    756   /* major brand */
    757   char type[4];
    758   /* minor version */
    759   unsigned int version;
    760   /* compatible brands */
    761   char compatibility[4];
    762 };
    763 
    764 
    765 static int
    766 ftypHandler (const char *input,
    767              size_t size,
    768              size_t pos,
    769              struct ExtractContext *ec)
    770 {
    771   const struct FileType *ft;
    772 
    773   if (getAtomSize (&input[pos]) < sizeof (struct FileType))
    774     return -1;
    775   ft = (const struct FileType *) &input[pos];
    776 
    777   for (unsigned i = 0;
    778        NULL != ftMap[i].ext;
    779        i++)
    780   {
    781     if (0 != memcmp (ft->type,
    782                      ftMap[i].ext,
    783                      4))
    784     {
    785       addKeyword (EXTRACTOR_METATYPE_MIMETYPE,
    786                   ftMap[i].mime,
    787                   ec);
    788       break;
    789     }
    790   }
    791   return 1;
    792 }
    793 
    794 
    795 /**
    796  * Handle the movie header ('mvhd') atom, reporting the movie duration.
    797  * Supports both the 32-bit (version 0) and 64-bit (version 1) layouts.
    798  *
    799  * @return 1 for success, -1 if the atom could not be parsed
    800  */
    801 static int
    802 mvhdHandler (const char *input,
    803              size_t size,
    804              size_t pos,
    805              struct ExtractContext *ec)
    806 {
    807   uint64_t asize = getAtomSize (&input[pos]);
    808   uint32_t hdr = getAtomHeaderSize (&input[pos]);
    809   const unsigned char *body;
    810   unsigned char version;
    811   uint64_t timeScale;
    812   uint64_t duration;
    813   char dur[24];
    814 
    815   if (asize < hdr + 4)
    816     return -1;
    817   body = (const unsigned char *) &input[pos + hdr];
    818   version = body[0];
    819   if (0 == version)
    820   {
    821     /* version(1) flags(3) creation(4) modification(4)
    822        timeScale(4) duration(4) ... */
    823     if (asize < hdr + 20)
    824       return -1;
    825     timeScale = ntohl (*(const uint32_t *) &body[12]);
    826     duration = ntohl (*(const uint32_t *) &body[16]);
    827   }
    828   else if (1 == version)
    829   {
    830     /* version(1) flags(3) creation(8) modification(8)
    831        timeScale(4) duration(8) ... */
    832     if (asize < hdr + 32)
    833       return -1;
    834     timeScale = ntohl (*(const uint32_t *) &body[20]);
    835     duration = ntohll (*(const uint64_t *) &body[24]);
    836   }
    837   else
    838   {
    839     return -1;
    840   }
    841   if (0 == timeScale)
    842     return -1;
    843   snprintf (dur,
    844             sizeof (dur),
    845             "%llus",
    846             (unsigned long long) (duration / timeScale));
    847   addKeyword (EXTRACTOR_METATYPE_DURATION,
    848               dur,
    849               ec);
    850   return 1;
    851 }
    852 
    853 
    854 struct CompressedMovieHeaderAtom
    855 {
    856   struct Atom cmovAtom;
    857   struct Atom dcomAtom;
    858   char compressor[4];
    859   struct Atom cmvdAtom;
    860   uint32_t decompressedSize;
    861 };
    862 
    863 
    864 static int
    865 cmovHandler (const char *input,
    866              size_t size,
    867              size_t pos,
    868              struct ExtractContext *ec)
    869 {
    870   const struct CompressedMovieHeaderAtom *c;
    871   unsigned int s;
    872   char *buf;
    873   int ret;
    874   z_stream z_state;
    875   int z_ret_code;
    876 
    877   if (getAtomSize (&input[pos]) < sizeof (struct CompressedMovieHeaderAtom))
    878     return -1;
    879   c = (const struct CompressedMovieHeaderAtom *) &input[pos];
    880   if ((ntohl (c->dcomAtom.size) != 12) ||
    881       (0 != memcmp (&c->dcomAtom.type, "dcom", 4)) ||
    882       (0 != memcmp (c->compressor, "zlib", 4)) ||
    883       (0 != memcmp (&c->cmvdAtom.type, "cmvd", 4)) ||
    884       (ntohl (c->cmvdAtom.size) !=
    885        getAtomSize (&input[pos]) - sizeof (struct Atom) * 2 - 4))
    886   {
    887     return -1;                  /* dcom must be 12 bytes */
    888   }
    889   s = ntohl (c->decompressedSize);
    890   if (s > MAX_CMOV_SIZE)
    891     return -1;                  /* ignore, too big! */
    892   buf = malloc (s);
    893   if (buf == NULL)
    894     return -1;                  /* out of memory, handle gracefully */
    895 
    896   memset (&z_state, 0, sizeof (z_state));
    897   z_state.next_in = (unsigned char *) &c[1];
    898   z_state.avail_in = ntohl (c->cmvdAtom.size);
    899   z_state.avail_out = s;
    900   z_state.next_out = (unsigned char *) buf;
    901   z_state.zalloc = (alloc_func) 0;
    902   z_state.zfree = (free_func) 0;
    903   z_state.opaque = (voidpf) 0;
    904   z_ret_code = inflateInit (&z_state);
    905   if (Z_OK != z_ret_code)
    906   {
    907     free (buf);
    908     return -1;                  /* crc error? */
    909   }
    910   z_ret_code = inflate (&z_state,
    911                         Z_NO_FLUSH);
    912   if ( (z_ret_code != Z_OK) &&
    913        (z_ret_code != Z_STREAM_END) )
    914   {
    915     inflateEnd (&z_state);
    916     free (buf);
    917     return -1;                  /* decode error? */
    918   }
    919   z_ret_code = inflateEnd (&z_state);
    920   if (Z_OK != z_ret_code)
    921   {
    922     free (buf);
    923     return -1;                  /* decode error? */
    924   }
    925   ret = handleAtom (all_handlers,
    926                     buf,
    927                     s,
    928                     0,
    929                     ec);
    930   free (buf);
    931   return ret;
    932 }
    933 
    934 
    935 /**
    936  * Handle the track header ('tkhd') atom.  The (fixed-point) track
    937  * width and height are the final eight bytes of the atom regardless of
    938  * the atom's version, so we read them relative to the end of the box.
    939  *
    940  * @return 1 for success, -1 if the atom could not be parsed
    941  */
    942 static int
    943 tkhdHandler (const char *input,
    944              size_t size,
    945              size_t pos,
    946              struct ExtractContext *ec)
    947 {
    948   uint64_t asize = getAtomSize (&input[pos]);
    949   uint32_t hdr = getAtomHeaderSize (&input[pos]);
    950   const unsigned char *p;
    951   unsigned int width;
    952   unsigned int height;
    953   char dimensions[40];
    954 
    955   if (asize < hdr + 8)
    956     return -1;
    957   p = (const unsigned char *) &input[pos + asize - 8];
    958   /* 16.16 fixed point; the integer part is the high 16 bits */
    959   width = (p[0] << 8) | p[1];
    960   height = (p[4] << 8) | p[5];
    961   if (0 != width)
    962   {
    963     /* if actually a/the video track */
    964     snprintf (dimensions,
    965               sizeof (dimensions),
    966               "%ux%u",
    967               width,
    968               height);
    969     addKeyword (EXTRACTOR_METATYPE_IMAGE_DIMENSIONS,
    970                 dimensions,
    971                 ec);
    972   }
    973   return 1;
    974 }
    975 
    976 
    977 static int
    978 trakHandler (const char *input,
    979              size_t size,
    980              size_t pos,
    981              struct ExtractContext *ec)
    982 {
    983   uint32_t hdr = getAtomHeaderSize (&input[pos]);
    984 
    985   return processAllAtoms (&input[pos + hdr],
    986                           getAtomSize (&input[pos]) - hdr,
    987                           ec);
    988 }
    989 
    990 
    991 static int
    992 metaHandler (const char *input,
    993              size_t size,
    994              size_t pos,
    995              struct ExtractContext *ec)
    996 {
    997   uint32_t hdr = getAtomHeaderSize (&input[pos]);
    998 
    999   if (getAtomSize (&input[pos]) < hdr + 4)
   1000     return -1;
   1001   return processAllAtoms (&input[pos + hdr + 4],
   1002                           getAtomSize (&input[pos]) - hdr - 4,
   1003                           ec);
   1004 }
   1005 
   1006 
   1007 struct InternationalText
   1008 {
   1009   struct Atom header;
   1010   uint16_t length;
   1011   uint16_t language;
   1012 };
   1013 
   1014 
   1015 /*
   1016  * see http://developer.apple.com/documentation/QuickTime/QTFF/QTFFChap2/chapter_3_section_2.html
   1017  *   "User Data Text Strings and Language Codes"
   1018  */
   1019 static int
   1020 processTextTag (const char *input,
   1021                 size_t size,
   1022                 size_t pos,
   1023                 enum EXTRACTOR_MetaType type, struct ExtractContext *ec)
   1024 {
   1025   uint64_t as;
   1026   uint16_t len;
   1027   uint16_t lang;
   1028   const struct InternationalText *txt;
   1029   char *meta;
   1030 
   1031   /* contains "international text":
   1032      16-bit size + 16 bit language code */
   1033   as = getAtomSize (&input[pos]);
   1034   if (as < sizeof (struct InternationalText))
   1035     return -1;                  /* invalid */
   1036   txt = (const struct InternationalText *) &input[pos];
   1037   len = ntohs (txt->length);
   1038   if (len + sizeof (struct InternationalText) > as)
   1039     return -1;                  /* invalid */
   1040   lang = ntohs (txt->language);
   1041   if (lang < sizeof (languages) / sizeof (char *))
   1042     addKeyword (EXTRACTOR_METATYPE_LANGUAGE,
   1043                 languages[lang],
   1044                 ec);
   1045 
   1046   meta = malloc (len + 1);
   1047   if (NULL == meta)
   1048     return -1;
   1049   memcpy (meta,
   1050           &txt[1],
   1051           len);
   1052   meta[len] = '\0';
   1053   for (unsigned int i = 0; i < len; i++)
   1054     if (meta[i] == '\r')
   1055       meta[i] = '\n';
   1056   addKeyword (type,
   1057               meta,
   1058               ec);
   1059   free (meta);
   1060   return 1;
   1061 }
   1062 
   1063 
   1064 static int
   1065 c_Handler (const char *input,
   1066            size_t size,
   1067            size_t pos,
   1068            struct ExtractContext *ec)
   1069 {
   1070   for (unsigned int i = 0;
   1071        NULL != cHm[i].pfx;
   1072        i++)
   1073     if (0 == memcmp (&input[pos + 5],
   1074                      cHm[i].pfx,
   1075                      3))
   1076       return processTextTag (input,
   1077                              size,
   1078                              pos,
   1079                              cHm[i].type,
   1080                              ec);
   1081   return -1;  /* not found */
   1082 }
   1083 
   1084 
   1085 /**
   1086  * Process the 'data' atom nested inside an iTunes-style 'ilst' entry.
   1087  *
   1088  * @param input start of the buffer
   1089  * @param size size of the parent (ilst entry) atom
   1090  * @param pos offset of the 'data' atom within @a input
   1091  * @param patom pointer to the parent (ilst entry) atom
   1092  * @param type metadata type to report the value as
   1093  * @return 1 for success, -1 if the atom could not be handled
   1094  */
   1095 static int
   1096 processDataAtom (const char *input,
   1097                  size_t size, /* parent atom size */
   1098                  size_t pos,
   1099                  const char *patom,
   1100                  enum EXTRACTOR_MetaType type,
   1101                  struct ExtractContext *ec)
   1102 {
   1103   char *meta;
   1104   unsigned char version;
   1105   unsigned int wellknown;
   1106   uint64_t asize;
   1107   unsigned int len;
   1108   uint32_t hdr;
   1109   int i;
   1110 
   1111   hdr = getAtomHeaderSize (&input[pos]);
   1112   asize = getAtomSize (&input[pos]);
   1113   if (0 !=
   1114       memcmp (&input[pos + 4],
   1115               "data",
   1116               4))
   1117     return -1;
   1118 
   1119   if ((asize < hdr + 8) ||      /* header + u32 type + u32 locale */
   1120       (asize > (getAtomSize (&patom[0]) - 8)))
   1121     return -1;
   1122 
   1123   len = (unsigned int) (asize - (hdr + 8));
   1124 
   1125   version = input[pos + 8];
   1126   /* "well known type" indicator (the low 24 bits of the type field) */
   1127   wellknown = ((unsigned char) input[pos + 9] << 16)
   1128               | ((unsigned char) input[pos + 10] << 8)
   1129               | (unsigned char) input[pos + 11];
   1130 
   1131   if (0 != version)
   1132     return -1;
   1133 
   1134   /* cover art: well-known type 13 = JPEG, 14 = PNG, 27 = BMP */
   1135   if ( (EXTRACTOR_METATYPE_COVER_PICTURE == type) &&
   1136        ( (13 == wellknown) ||
   1137          (14 == wellknown) ||
   1138          (27 == wellknown) ) )
   1139   {
   1140     const char *mime;
   1141 
   1142     if (0 == len)
   1143       return -1;
   1144     switch (wellknown)
   1145     {
   1146     case 13:
   1147       mime = "image/jpeg";
   1148       break;
   1149     case 14:
   1150       mime = "image/png";
   1151       break;
   1152     default:
   1153       mime = "image/bmp";
   1154       break;
   1155     }
   1156     addBinary (type,
   1157                mime,
   1158                &input[pos + 16],
   1159                len,
   1160                ec);
   1161     return 1;
   1162   }
   1163 
   1164   if (0x0 == wellknown)         /* binary data */
   1165   {
   1166     if (0 ==
   1167         memcmp (&patom[4],
   1168                 "gnre",
   1169                 4))
   1170     {
   1171       if (len >= 2)
   1172       {
   1173         uint16_t genre = ((uint8_t) input[pos + 16] << 8)
   1174                          | (uint8_t) input[pos + 17];
   1175 
   1176         if ((genre > 0) && (genre <= GENRE_NAME_COUNT))
   1177           addKeyword (type,
   1178                       genre_names[genre - 1],
   1179                       ec);
   1180       }
   1181       return 1;
   1182     }
   1183     else if ( (0 ==
   1184                memcmp (&patom[4],
   1185                        "trkn",
   1186                        4)) ||
   1187               (0 ==
   1188                memcmp (&patom[4],
   1189                        "disk",
   1190                        4)))
   1191     {
   1192       if (len >= 4)
   1193       {
   1194         unsigned short n = ((unsigned char) input[pos + 18] << 8)
   1195                            | (unsigned char) input[pos + 19];
   1196         char s[8];
   1197 
   1198         snprintf (s,
   1199                   sizeof (s),
   1200                   "%d",
   1201                   n);
   1202         addKeyword (type,
   1203                     s,
   1204                     ec);
   1205       }
   1206       return 1;
   1207     }
   1208     else if (0 ==
   1209              memcmp (&patom[4],
   1210                      "tmpo",
   1211                      4))
   1212     {
   1213       if (len >= 2)
   1214       {
   1215         unsigned short n = ((unsigned char) input[pos + 16] << 8)
   1216                            | (unsigned char) input[pos + 17];
   1217         char s[8];
   1218 
   1219         snprintf (s,
   1220                   sizeof (s),
   1221                   "%u",
   1222                   n);
   1223         addKeyword (type,
   1224                     s,
   1225                     ec);
   1226       }
   1227       return 1;
   1228     }
   1229     else
   1230     {
   1231       return -1;
   1232     }
   1233   }
   1234   else if (0x15 == wellknown)   /* signed/unsigned big-endian integer */
   1235   {
   1236     unsigned long long n = 0;
   1237     char s[24];
   1238     unsigned int j;
   1239 
   1240     if ((len < 1) || (len > 8))
   1241       return -1;
   1242     for (j = 0; j < len; j++)
   1243       n = (n << 8) | (unsigned char) input[pos + 16 + j];
   1244     snprintf (s,
   1245               sizeof (s),
   1246               "%llu",
   1247               n);
   1248     addKeyword (type,
   1249                 s,
   1250                 ec);
   1251     return 1;
   1252   }
   1253   else if (wellknown == 0x1)    /* UTF-8 text data */
   1254   {
   1255     meta = malloc (len + 1);
   1256     if (meta == NULL)
   1257       return -1;
   1258     memcpy (meta,
   1259             &input[pos + 16],
   1260             len);
   1261     meta[len] = '\0';
   1262     for (i = 0; i < len; i++)
   1263       if (meta[i] == '\r')
   1264         meta[i] = '\n';
   1265     addKeyword (type,
   1266                 meta,
   1267                 ec);
   1268     free (meta);
   1269     return 1;
   1270   }
   1271 
   1272   return -1;
   1273 }
   1274 
   1275 
   1276 /* NOTE: iTunes tag processing should, in theory, be limited to iTunes
   1277  * file types (from ftyp), but, in reality, it seems that there are other
   1278  * files, like 3gpp, out in the wild with iTunes tags. */
   1279 static int
   1280 iTunesTagHandler (const char *input,
   1281                   size_t size,
   1282                   size_t pos,
   1283                   struct ExtractContext *ec)
   1284 {
   1285   uint64_t asize;
   1286   uint32_t hdr;
   1287 
   1288   hdr = getAtomHeaderSize (&input[pos]);
   1289   asize = getAtomSize (&input[pos]);
   1290 
   1291   if (asize < hdr + 8)          /* header + at least one atom */
   1292     return -1;
   1293 
   1294   for (unsigned int i = 0;
   1295        NULL != it_to_extr_table[i].atom_type;
   1296        i++)
   1297     if (0 == memcmp (&input[pos + 4],
   1298                      it_to_extr_table[i].atom_type,
   1299                      4))
   1300       return processDataAtom (input,
   1301                               asize,
   1302                               pos + hdr,
   1303                               &input[pos],
   1304                               it_to_extr_table[i].type,
   1305                               ec);
   1306   return -1;
   1307 }
   1308 
   1309 
   1310 /**
   1311  * Handle the iTunes metadata list ('ilst').  Its children have
   1312  * arbitrary four-character keys, so rather than a name table we simply
   1313  * iterate them and let #iTunesTagHandler decide what is interesting.
   1314  *
   1315  * @return 0 on a fatal error, 1 otherwise
   1316  */
   1317 static int
   1318 ilstHandler (const char *input,
   1319              size_t size,
   1320              size_t pos,
   1321              struct ExtractContext *ec)
   1322 {
   1323   uint32_t hdr = getAtomHeaderSize (&input[pos]);
   1324   size_t end = pos + getAtomSize (&input[pos]);
   1325   size_t cpos = pos + hdr;
   1326 
   1327   if (ec->depth >= MAX_ATOM_DEPTH)
   1328     return 1;
   1329   ec->depth++;
   1330   while ((cpos + sizeof (struct Atom) <= end) &&
   1331          (checkAtomValid (input, end, cpos)))
   1332   {
   1333     iTunesTagHandler (input, end, cpos, ec);
   1334     if (0 != ec->ret)
   1335       break;
   1336     cpos += getAtomSize (&input[cpos]);
   1337   }
   1338   ec->depth--;
   1339   return 1;
   1340 }
   1341 
   1342 
   1343 /**
   1344  * Handle the user-data ('udta') atom.  It mixes classic QuickTime
   1345  * '(C)xyz' international-text tags with structural sub-atoms such as
   1346  * 'meta'/'ilst', so we iterate the children and dispatch accordingly.
   1347  *
   1348  * @return 0 on a fatal error, 1 otherwise
   1349  */
   1350 static int
   1351 udtaHandler (const char *input,
   1352              size_t size,
   1353              size_t pos,
   1354              struct ExtractContext *ec)
   1355 {
   1356   uint32_t hdr = getAtomHeaderSize (&input[pos]);
   1357   size_t end = pos + getAtomSize (&input[pos]);
   1358   size_t cpos = pos + hdr;
   1359 
   1360   if (ec->depth >= MAX_ATOM_DEPTH)
   1361     return 1;
   1362   ec->depth++;
   1363   while ((cpos + sizeof (struct Atom) <= end) &&
   1364          (checkAtomValid (input, end, cpos)))
   1365   {
   1366     if (0xA9 == (unsigned char) input[cpos + 4])
   1367       c_Handler (input,
   1368                  end,
   1369                  cpos,
   1370                  ec);
   1371     else
   1372       handleAtom (all_handlers,
   1373                   input,
   1374                   end,
   1375                   cpos,
   1376                   ec);
   1377     if (0 != ec->ret)
   1378       break;
   1379     cpos += getAtomSize (&input[cpos]);
   1380   }
   1381   ec->depth--;
   1382   return 1;
   1383 }
   1384 
   1385 
   1386 static struct HandlerEntry all_handlers[] = {
   1387   {"moov", &moovHandler},
   1388   {"cmov", &cmovHandler},
   1389   {"mvhd", &mvhdHandler},
   1390   {"trak", &trakHandler},
   1391   {"tkhd", &tkhdHandler},
   1392   {"ilst", &ilstHandler},
   1393   {"meta", &metaHandler},
   1394   {"udta", &udtaHandler},
   1395   {"ftyp", &ftypHandler},
   1396   {NULL, NULL},
   1397 };
   1398 
   1399 
   1400 /**
   1401  * Call the handler for the atom at the given position.
   1402  * @return 0 on error, 1 for success, -1 for unknown atom type
   1403  */
   1404 static int
   1405 handleAtom (struct HandlerEntry *handlers,
   1406             const char *input,
   1407             size_t size,
   1408             size_t pos,
   1409             struct ExtractContext *ec)
   1410 {
   1411   if (! checkAtomValid (input,
   1412                         size,
   1413                         pos))
   1414     return 0;
   1415   for (unsigned i = 0;
   1416        handlers[i].name != NULL;
   1417        i++)
   1418   {
   1419     if (0 ==
   1420         memcmp (&input[pos + 4],
   1421                 handlers[i].name,
   1422                 4))
   1423     {
   1424       return handlers[i].handler (input,
   1425                                   size,
   1426                                   pos,
   1427                                   ec);
   1428     }
   1429   }
   1430   return -1;
   1431 }
   1432 
   1433 
   1434 /**
   1435  * Read exactly @a len bytes from absolute offset @a off into @a dst.
   1436  *
   1437  * The extraction context exposes the file through a sliding shared
   1438  * memory window, so a single read may return fewer bytes than
   1439  * requested; we seek once and then loop until the request is satisfied.
   1440  *
   1441  * @return 0 on success, -1 on error / short file
   1442  */
   1443 static int
   1444 qt_pread (struct EXTRACTOR_ExtractContext *ec,
   1445           uint64_t off,
   1446           void *dst,
   1447           size_t len)
   1448 {
   1449   unsigned char *out = dst;
   1450 
   1451   if ((int64_t) off != ec->seek (ec->cls, (int64_t) off, SEEK_SET))
   1452     return -1;
   1453   while (len > 0)
   1454   {
   1455     void *buf;
   1456     ssize_t got;
   1457 
   1458     got = ec->read (ec->cls,
   1459                     &buf,
   1460                     len);
   1461     if (got <= 0)
   1462       return -1;
   1463     memcpy (out,
   1464             buf,
   1465             (size_t) got);
   1466     out += got;
   1467     len -= (size_t) got;
   1468   }
   1469   return 0;
   1470 }
   1471 
   1472 
   1473 /**
   1474  * Top-level atom types worth pulling into memory.  Everything else
   1475  * (notably the huge 'mdat' payload, plus 'free'/'skip'/'wide') is
   1476  * skipped without ever being read.
   1477  */
   1478 static bool
   1479 is_interesting_top_atom (const unsigned char *type)
   1480 {
   1481   static const char *const interesting[] = {
   1482     "moov", "ftyp", "meta", "udta", "uuid", "pnot", NULL
   1483   };
   1484 
   1485   for (unsigned int i = 0;
   1486        NULL != interesting[i];
   1487        i++)
   1488     if (0 == memcmp (type,
   1489                      interesting[i],
   1490                      4))
   1491       return true;
   1492   return false;
   1493 }
   1494 
   1495 
   1496 /**
   1497  * Main entry method for the QuickTime/MP4 extraction plugin.
   1498  *
   1499  * @param ec extraction context provided to the plugin
   1500  */
   1501 void
   1502 EXTRACTOR_qt_extract_method (struct EXTRACTOR_ExtractContext *ec);
   1503 
   1504 void
   1505 EXTRACTOR_qt_extract_method (struct EXTRACTOR_ExtractContext *ec)
   1506 {
   1507   struct ExtractContext xc;
   1508   uint64_t fsize;
   1509   uint64_t pos;
   1510 
   1511   fsize = ec->get_size (ec->cls);
   1512   if ((UINT64_MAX == fsize) || (fsize < sizeof (struct Atom)))
   1513     return;
   1514 
   1515   xc.proc = ec->proc;
   1516   xc.proc_cls = ec->cls;
   1517   xc.ret = 0;
   1518   xc.depth = 0;
   1519 
   1520   pos = 0;
   1521   while ( (0 == xc.ret) &&
   1522           (pos + sizeof (struct Atom) <= fsize) )
   1523   {
   1524     unsigned char hdr[16];
   1525     uint64_t asize;
   1526     unsigned int hsize;
   1527 
   1528     if (0 != qt_pread (ec, pos, hdr, 8))
   1529       break;
   1530     asize = ((uint64_t) hdr[0] << 24) | ((uint64_t) hdr[1] << 16)
   1531             | ((uint64_t) hdr[2] << 8) | (uint64_t) hdr[3];
   1532     if (1 == asize)
   1533     {
   1534       if ((pos + 16 > fsize) ||
   1535           (0 != qt_pread (ec, pos + 8, &hdr[8], 8)))
   1536         break;
   1537       asize = ((uint64_t) hdr[8] << 56) | ((uint64_t) hdr[9] << 48)
   1538               | ((uint64_t) hdr[10] << 40) | ((uint64_t) hdr[11] << 32)
   1539               | ((uint64_t) hdr[12] << 24) | ((uint64_t) hdr[13] << 16)
   1540               | ((uint64_t) hdr[14] << 8) | (uint64_t) hdr[15];
   1541       hsize = 16;
   1542     }
   1543     else if (0 == asize)
   1544     {
   1545       /* atom extends to end of file */
   1546       asize = fsize - pos;
   1547       hsize = 8;
   1548     }
   1549     else
   1550     {
   1551       hsize = 8;
   1552     }
   1553     if ((asize < hsize) || (pos + asize > fsize))
   1554       break;
   1555 
   1556     if (is_interesting_top_atom (&hdr[4]) &&
   1557         (asize <= MAX_ATOM_SIZE))
   1558     {
   1559       char *buf = malloc ((size_t) asize);
   1560 
   1561       if (NULL != buf)
   1562       {
   1563         if (0 == qt_pread (ec,
   1564                            pos,
   1565                            buf,
   1566                            (size_t) asize))
   1567           handleAtom (all_handlers,
   1568                       buf,
   1569                       (size_t) asize,
   1570                       0,
   1571                       &xc);
   1572         free (buf);
   1573       }
   1574     }
   1575     pos += asize;
   1576   }
   1577 }
   1578 
   1579 
   1580 /*  end of qt_extractor.c */