libextractor

GNU libextractor
Log | Files | Refs | Submodules | README | LICENSE

qt_extractor.c (30004B)


      1 /*
      2      This file is part of libextractor.
      3      Copyright (C) 2002, 2003, 2006 Vidyut Samanta and Christian Grothoff
      4 
      5      libextractor is free software; you can redistribute it and/or modify
      6      it under the terms of the GNU General Public License as published
      7      by the Free Software Foundation; either version 2, or (at your
      8      option) any later version.
      9 
     10      libextractor is distributed in the hope that it will be useful, but
     11      WITHOUT ANY WARRANTY; without even the implied warranty of
     12      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     13      General Public License for more details.
     14 
     15      You should have received a copy of the GNU General Public License
     16      along with libextractor; see the file COPYING.  If not, write to the
     17      Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     18      Boston, MA 02110-1301, USA.
     19  */
     20 
     21 #include "platform.h"
     22 #include "extractor.h"
     23 #include <zlib.h>
     24 #include <math.h>
     25 
     26 #define DEBUG 0
     27 
     28 /* verbatim from mp3extractor */
     29 static const char *const genre_names[] = {
     30   gettext_noop ("Blues"),
     31   gettext_noop ("Classic Rock"),
     32   gettext_noop ("Country"),
     33   gettext_noop ("Dance"),
     34   gettext_noop ("Disco"),
     35   gettext_noop ("Funk"),
     36   gettext_noop ("Grunge"),
     37   gettext_noop ("Hip-Hop"),
     38   gettext_noop ("Jazz"),
     39   gettext_noop ("Metal"),
     40   gettext_noop ("New Age"),
     41   gettext_noop ("Oldies"),
     42   gettext_noop ("Other"),
     43   gettext_noop ("Pop"),
     44   gettext_noop ("R&B"),
     45   gettext_noop ("Rap"),
     46   gettext_noop ("Reggae"),
     47   gettext_noop ("Rock"),
     48   gettext_noop ("Techno"),
     49   gettext_noop ("Industrial"),
     50   gettext_noop ("Alternative"),
     51   gettext_noop ("Ska"),
     52   gettext_noop ("Death Metal"),
     53   gettext_noop ("Pranks"),
     54   gettext_noop ("Soundtrack"),
     55   gettext_noop ("Euro-Techno"),
     56   gettext_noop ("Ambient"),
     57   gettext_noop ("Trip-Hop"),
     58   gettext_noop ("Vocal"),
     59   gettext_noop ("Jazz+Funk"),
     60   gettext_noop ("Fusion"),
     61   gettext_noop ("Trance"),
     62   gettext_noop ("Classical"),
     63   gettext_noop ("Instrumental"),
     64   gettext_noop ("Acid"),
     65   gettext_noop ("House"),
     66   gettext_noop ("Game"),
     67   gettext_noop ("Sound Clip"),
     68   gettext_noop ("Gospel"),
     69   gettext_noop ("Noise"),
     70   gettext_noop ("Alt. Rock"),
     71   gettext_noop ("Bass"),
     72   gettext_noop ("Soul"),
     73   gettext_noop ("Punk"),
     74   gettext_noop ("Space"),
     75   gettext_noop ("Meditative"),
     76   gettext_noop ("Instrumental Pop"),
     77   gettext_noop ("Instrumental Rock"),
     78   gettext_noop ("Ethnic"),
     79   gettext_noop ("Gothic"),
     80   gettext_noop ("Darkwave"),
     81   gettext_noop ("Techno-Industrial"),
     82   gettext_noop ("Electronic"),
     83   gettext_noop ("Pop-Folk"),
     84   gettext_noop ("Eurodance"),
     85   gettext_noop ("Dream"),
     86   gettext_noop ("Southern Rock"),
     87   gettext_noop ("Comedy"),
     88   gettext_noop ("Cult"),
     89   gettext_noop ("Gangsta Rap"),
     90   gettext_noop ("Top 40"),
     91   gettext_noop ("Christian Rap"),
     92   gettext_noop ("Pop/Funk"),
     93   gettext_noop ("Jungle"),
     94   gettext_noop ("Native American"),
     95   gettext_noop ("Cabaret"),
     96   gettext_noop ("New Wave"),
     97   gettext_noop ("Psychedelic"),
     98   gettext_noop ("Rave"),
     99   gettext_noop ("Showtunes"),
    100   gettext_noop ("Trailer"),
    101   gettext_noop ("Lo-Fi"),
    102   gettext_noop ("Tribal"),
    103   gettext_noop ("Acid Punk"),
    104   gettext_noop ("Acid Jazz"),
    105   gettext_noop ("Polka"),
    106   gettext_noop ("Retro"),
    107   gettext_noop ("Musical"),
    108   gettext_noop ("Rock & Roll"),
    109   gettext_noop ("Hard Rock"),
    110   gettext_noop ("Folk"),
    111   gettext_noop ("Folk/Rock"),
    112   gettext_noop ("National Folk"),
    113   gettext_noop ("Swing"),
    114   gettext_noop ("Fast-Fusion"),
    115   gettext_noop ("Bebob"),
    116   gettext_noop ("Latin"),
    117   gettext_noop ("Revival"),
    118   gettext_noop ("Celtic"),
    119   gettext_noop ("Bluegrass"),
    120   gettext_noop ("Avantgarde"),
    121   gettext_noop ("Gothic Rock"),
    122   gettext_noop ("Progressive Rock"),
    123   gettext_noop ("Psychedelic Rock"),
    124   gettext_noop ("Symphonic Rock"),
    125   gettext_noop ("Slow Rock"),
    126   gettext_noop ("Big Band"),
    127   gettext_noop ("Chorus"),
    128   gettext_noop ("Easy Listening"),
    129   gettext_noop ("Acoustic"),
    130   gettext_noop ("Humour"),
    131   gettext_noop ("Speech"),
    132   gettext_noop ("Chanson"),
    133   gettext_noop ("Opera"),
    134   gettext_noop ("Chamber Music"),
    135   gettext_noop ("Sonata"),
    136   gettext_noop ("Symphony"),
    137   gettext_noop ("Booty Bass"),
    138   gettext_noop ("Primus"),
    139   gettext_noop ("Porn Groove"),
    140   gettext_noop ("Satire"),
    141   gettext_noop ("Slow Jam"),
    142   gettext_noop ("Club"),
    143   gettext_noop ("Tango"),
    144   gettext_noop ("Samba"),
    145   gettext_noop ("Folklore"),
    146   gettext_noop ("Ballad"),
    147   gettext_noop ("Power Ballad"),
    148   gettext_noop ("Rhythmic Soul"),
    149   gettext_noop ("Freestyle"),
    150   gettext_noop ("Duet"),
    151   gettext_noop ("Punk Rock"),
    152   gettext_noop ("Drum Solo"),
    153   gettext_noop ("A Cappella"),
    154   gettext_noop ("Euro-House"),
    155   gettext_noop ("Dance Hall"),
    156   gettext_noop ("Goa"),
    157   gettext_noop ("Drum & Bass"),
    158   gettext_noop ("Club-House"),
    159   gettext_noop ("Hardcore"),
    160   gettext_noop ("Terror"),
    161   gettext_noop ("Indie"),
    162   gettext_noop ("BritPop"),
    163   gettext_noop ("Negerpunk"),
    164   gettext_noop ("Polsk Punk"),
    165   gettext_noop ("Beat"),
    166   gettext_noop ("Christian Gangsta Rap"),
    167   gettext_noop ("Heavy Metal"),
    168   gettext_noop ("Black Metal"),
    169   gettext_noop ("Crossover"),
    170   gettext_noop ("Contemporary Christian"),
    171   gettext_noop ("Christian Rock"),
    172   gettext_noop ("Merengue"),
    173   gettext_noop ("Salsa"),
    174   gettext_noop ("Thrash Metal"),
    175   gettext_noop ("Anime"),
    176   gettext_noop ("JPop"),
    177   gettext_noop ("Synthpop"),
    178 };
    179 
    180 #define GENRE_NAME_COUNT \
    181   ((unsigned int) (sizeof genre_names / sizeof (const char *const)))
    182 
    183 
    184 static const char *languages[] = {
    185   "English",
    186   "French",
    187   "German",
    188   "Italian",
    189   "Dutch",
    190   "Swedish",
    191   "Spanish",
    192   "Danish",
    193   "Portuguese",
    194   "Norwegian",
    195   "Hebrew",
    196   "Japanese",
    197   "Arabic",
    198   "Finnish",
    199   "Greek",
    200   "Icelandic",
    201   "Maltese",
    202   "Turkish",
    203   "Croatian",
    204   "Traditional Chinese",
    205   "Urdu",
    206   "Hindi",
    207   "Thai",
    208   "Korean",
    209   "Lithuanian",
    210   "Polish",
    211   "Hungarian",
    212   "Estonian",
    213   "Lettish",
    214   "Saamisk",
    215   "Lappish",
    216   "Faeroese",
    217   "Farsi",
    218   "Russian",
    219   "Simplified Chinese",
    220   "Flemish",
    221   "Irish",
    222   "Albanian",
    223   "Romanian",
    224   "Czech",
    225   "Slovak",
    226   "Slovenian",
    227   "Yiddish",
    228   "Serbian",
    229   "Macedonian",
    230   "Bulgarian",
    231   "Ukrainian",
    232   "Byelorussian",
    233   "Uzbek",
    234   "Kazakh",
    235   "Azerbaijani",
    236   "AzerbaijanAr",
    237   "Armenian",
    238   "Georgian",
    239   "Moldavian",
    240   "Kirghiz",
    241   "Tajiki",
    242   "Turkmen",
    243   "Mongolian",
    244   "MongolianCyr",
    245   "Pashto",
    246   "Kurdish",
    247   "Kashmiri",
    248   "Sindhi",
    249   "Tibetan",
    250   "Nepali",
    251   "Sanskrit",
    252   "Marathi",
    253   "Bengali",
    254   "Assamese",
    255   "Gujarati",
    256   "Punjabi",
    257   "Oriya",
    258   "Malayalam",
    259   "Kannada",
    260   "Tamil",
    261   "Telugu",
    262   "Sinhalese",
    263   "Burmese",
    264   "Khmer",
    265   "Lao",
    266   "Vietnamese",
    267   "Indonesian",
    268   "Tagalog",
    269   "MalayRoman",
    270   "MalayArabic",
    271   "Amharic",
    272   "Tigrinya",
    273   "Galla",
    274   "Oromo",
    275   "Somali",
    276   "Swahili",
    277   "Ruanda",
    278   "Rundi",
    279   "Chewa",
    280   "Malagasy",
    281   "Esperanto",
    282   "Welsh",
    283   "Basque",
    284   "Catalan",
    285   "Latin",
    286   "Quechua",
    287   "Guarani",
    288   "Aymara",
    289   "Tatar",
    290   "Uighur",
    291   "Dzongkha",
    292   "JavaneseRom",
    293 };
    294 
    295 
    296 typedef struct
    297 {
    298   const char *ext;
    299   const char *mime;
    300 } C2M;
    301 
    302 /* see http://www.mp4ra.org/filetype.html
    303  *     http://www.ftyps.com/ */
    304 static C2M ftMap[] = {
    305   {"qt  ", "video/quicktime"},
    306   {"isom", "video/mp4"},        /* ISO Base Media files */
    307   {"iso2", "video/mp4"},
    308   {"mp41", "video/mp4"},        /* MPEG-4 (ISO/IEC 14491-1) version 1 */
    309   {"mp42", "video/mp4"},        /* MPEG-4 (ISO/IEC 14491-1) version 2 */
    310   {"3gp1", "video/3gpp"},
    311   {"3gp2", "video/3gpp"},
    312   {"3gp3", "video/3gpp"},
    313   {"3gp4", "video/3gpp"},
    314   {"3gp5", "video/3gpp"},
    315   {"3g2a", "video/3gpp2"},
    316   {"mmp4", "video/mp4"},        /* Mobile MPEG-4 */
    317   {"M4A ", "audio/mp4"},
    318   {"M4B ", "audio/mp4"},
    319   {"M4P ", "audio/mp4"},
    320   {"M4V ", "video/mp4"},
    321   {"mj2s", "video/mj2"},        /* Motion JPEG 2000 */
    322   {"mjp2", "video/mj2"},
    323   {NULL, NULL},
    324 };
    325 
    326 typedef struct CHE
    327 {
    328   const char *pfx;
    329   enum EXTRACTOR_MetaType type;
    330 } CHE;
    331 
    332 static CHE cHm[] = {
    333   {"aut", EXTRACTOR_METATYPE_AUTHOR_NAME},
    334   {"cpy", EXTRACTOR_METATYPE_COPYRIGHT},
    335   {"day", EXTRACTOR_METATYPE_CREATION_DATE},
    336   {"ed1", EXTRACTOR_METATYPE_MODIFICATION_DATE},
    337   {"ed2", EXTRACTOR_METATYPE_MODIFICATION_DATE},
    338   {"ed3", EXTRACTOR_METATYPE_MODIFICATION_DATE},
    339   {"ed4", EXTRACTOR_METATYPE_MODIFICATION_DATE},
    340   {"ed5", EXTRACTOR_METATYPE_MODIFICATION_DATE},
    341   {"ed6", EXTRACTOR_METATYPE_MODIFICATION_DATE},
    342   {"ed7", EXTRACTOR_METATYPE_MODIFICATION_DATE},
    343   {"ed8", EXTRACTOR_METATYPE_MODIFICATION_DATE},
    344   {"ed9", EXTRACTOR_METATYPE_MODIFICATION_DATE},
    345   {"cmt", EXTRACTOR_METATYPE_COMMENT},
    346   {"url", EXTRACTOR_METATYPE_URL},
    347   {"enc", EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE},
    348   {"hst", EXTRACTOR_METATYPE_BUILDHOST},
    349   {"nam", EXTRACTOR_METATYPE_TITLE},
    350   {"gen", EXTRACTOR_METATYPE_GENRE},
    351   {"mak", EXTRACTOR_METATYPE_CAMERA_MAKE},
    352   {"mod", EXTRACTOR_METATYPE_CAMERA_MODEL},
    353   {"des", EXTRACTOR_METATYPE_DESCRIPTION},
    354   {"dis", EXTRACTOR_METATYPE_DISCLAIMER},
    355   {"dir", EXTRACTOR_METATYPE_MOVIE_DIRECTOR},
    356   {"src", EXTRACTOR_METATYPE_CONTRIBUTOR_NAME},
    357   {"prf", EXTRACTOR_METATYPE_PERFORMER },
    358   {"prd", EXTRACTOR_METATYPE_PRODUCER},
    359   {"PRD", EXTRACTOR_METATYPE_PRODUCT_VERSION},
    360   {"swr", EXTRACTOR_METATYPE_PRODUCED_BY_SOFTWARE},
    361   {"isr", EXTRACTOR_METATYPE_ISRC},
    362   {"wrt", EXTRACTOR_METATYPE_WRITER},
    363   {"wrn", EXTRACTOR_METATYPE_WARNING},
    364   {"chp", EXTRACTOR_METATYPE_CHAPTER_NAME},
    365   {"inf", EXTRACTOR_METATYPE_DESCRIPTION},
    366   {"req", EXTRACTOR_METATYPE_TARGET_PLATFORM},      /* hardware requirements */
    367   {"fmt", EXTRACTOR_METATYPE_FORMAT},
    368   {NULL, EXTRACTOR_METATYPE_RESERVED },
    369 };
    370 
    371 
    372 typedef struct
    373 {
    374   const char *atom_type;
    375   enum EXTRACTOR_MetaType type;
    376 } ITTagConversionEntry;
    377 
    378 /* iTunes Tags:
    379  * see http://atomicparsley.sourceforge.net/mpeg-4files.html */
    380 static ITTagConversionEntry it_to_extr_table[] = {
    381   {"\xa9" "alb", EXTRACTOR_METATYPE_ALBUM},
    382   {"\xa9" "ART", EXTRACTOR_METATYPE_ARTIST},
    383   {"aART", EXTRACTOR_METATYPE_ARTIST},
    384   {"\xa9" "cmt", EXTRACTOR_METATYPE_COMMENT},
    385   {"\xa9" "day", EXTRACTOR_METATYPE_UNKNOWN_DATE},
    386   {"\xa9" "nam", EXTRACTOR_METATYPE_TITLE},
    387   {"trkn", EXTRACTOR_METATYPE_TRACK_NUMBER},
    388   {"disk", EXTRACTOR_METATYPE_DISC_NUMBER},
    389   {"\xa9" "gen", EXTRACTOR_METATYPE_GENRE},
    390   {"gnre", EXTRACTOR_METATYPE_GENRE},
    391   {"\xa9" "wrt", EXTRACTOR_METATYPE_WRITER},
    392   {"\xa9" "too", EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE},
    393   {"cprt", EXTRACTOR_METATYPE_COPYRIGHT},
    394   {"\xa9" "grp", EXTRACTOR_METATYPE_GROUP},
    395   {"catg", EXTRACTOR_METATYPE_SECTION},
    396   {"keyw", EXTRACTOR_METATYPE_KEYWORDS},
    397   {"desc", EXTRACTOR_METATYPE_DESCRIPTION},
    398   {"tvnn", EXTRACTOR_METATYPE_NETWORK_NAME},
    399   {"tvsh", EXTRACTOR_METATYPE_SHOW_NAME},
    400   {"tven", EXTRACTOR_METATYPE_NETWORK_NAME},
    401   {NULL, EXTRACTOR_METATYPE_RESERVED}
    402 };
    403 
    404 
    405 typedef struct
    406 {
    407   unsigned int size;
    408   unsigned int type;
    409 } Atom;
    410 
    411 typedef struct
    412 {
    413   unsigned int one;
    414   unsigned int type;
    415   unsigned long long size;
    416 } LongAtom;
    417 
    418 static unsigned long long
    419 ntohll (unsigned long long n)
    420 {
    421 #if __BYTE_ORDER == __BIG_ENDIAN
    422   return n;
    423 #else
    424   return (((unsigned long long) ntohl (n)) << 32) + ntohl (n >> 32);
    425 #endif
    426 }
    427 
    428 
    429 /**
    430  * Check if at position pos there is a valid atom.
    431  * @return 0 if the atom is invalid, 1 if it is valid
    432  */
    433 static int
    434 checkAtomValid (const char *buffer, size_t size, size_t pos)
    435 {
    436   unsigned long long atomSize;
    437   const Atom *atom;
    438   const LongAtom *latom;
    439   if ((pos >= size) ||
    440       (pos + sizeof (Atom) > size) || (pos + sizeof (Atom) < pos))
    441     return 0;
    442   atom = (const Atom *) &buffer[pos];
    443   if (ntohl (atom->size) == 1)
    444   {
    445     if ((pos + sizeof (LongAtom) > size) || (pos + sizeof (LongAtom) < pos))
    446       return 0;
    447     latom = (const LongAtom *) &buffer[pos];
    448     atomSize = ntohll (latom->size);
    449     if ((atomSize < sizeof (LongAtom)) ||
    450         (atomSize + pos > size) || (atomSize + pos < atomSize))
    451       return 0;
    452   }
    453   else
    454   {
    455     atomSize = ntohl (atom->size);
    456     if ((atomSize < sizeof (Atom)) ||
    457         (atomSize + pos > size) || (atomSize + pos < atomSize))
    458       return 0;
    459   }
    460   return 1;
    461 }
    462 
    463 
    464 /**
    465  * Assumes that checkAtomValid has already been called.
    466  */
    467 static unsigned long long
    468 getAtomSize (const char *buf)
    469 {
    470   const Atom *atom;
    471   const LongAtom *latom;
    472   atom = (const Atom *) buf;
    473   if (ntohl (atom->size) == 1)
    474   {
    475     latom = (const LongAtom *) buf;
    476     return ntohll (latom->size);
    477   }
    478   return ntohl (atom->size);
    479 }
    480 
    481 
    482 /**
    483  * Assumes that checkAtomValid has already been called.
    484  */
    485 static unsigned int
    486 getAtomHeaderSize (const char *buf)
    487 {
    488   const Atom *atom;
    489 
    490   atom = (const Atom *) buf;
    491   if (ntohl (atom->size) == 1)
    492     return sizeof (const LongAtom);
    493   return sizeof (Atom);
    494 }
    495 
    496 
    497 struct ExtractContext
    498 {
    499   EXTRACTOR_MetaDataProcessor proc;
    500   void *proc_cls;
    501   int ret;
    502 };
    503 
    504 static void
    505 addKeyword (enum EXTRACTOR_MetaType type,
    506             const char *str,
    507             struct ExtractContext *ec)
    508 {
    509   if (ec->ret != 0)
    510     return;
    511   ec->ret = ec->proc (ec->proc_cls,
    512                       "qt",
    513                       type,
    514                       EXTRACTOR_METAFORMAT_UTF8,
    515                       "text/plain",
    516                       str,
    517                       strlen (str) + 1);
    518 }
    519 
    520 
    521 /**
    522  * Assumes that checkAtomValid has already been called.
    523  */
    524 typedef int (*AtomHandler) (const char *input,
    525                             size_t size,
    526                             size_t pos, struct ExtractContext *ec);
    527 
    528 typedef struct
    529 {
    530   char *name;
    531   AtomHandler handler;
    532 } HandlerEntry;
    533 
    534 /**
    535  * Call the handler for the atom at the given position.
    536  * Will check validity of the given atom.
    537  *
    538  * @return 0 on error, 1 for success, -1 for unknown atom type
    539  */
    540 static int handleAtom (HandlerEntry *handlers,
    541                        const char *input,
    542                        size_t size,
    543                        size_t pos,
    544                        struct ExtractContext *ec);
    545 
    546 static HandlerEntry all_handlers[];
    547 static HandlerEntry ilst_handlers[];
    548 
    549 /**
    550  * Process atoms.
    551  * @return 0 on error, 1 for success, -1 for unknown atom type
    552  */
    553 static int
    554 processAtoms (HandlerEntry *handlers, const char *input,
    555               size_t size, struct ExtractContext *ec)
    556 {
    557   size_t pos;
    558 
    559   if (size < sizeof (Atom))
    560     return 1;
    561   pos = 0;
    562   while (pos < size - sizeof (Atom))
    563   {
    564     if (0 == handleAtom (handlers, input, size, pos, ec))
    565       return 0;
    566     pos += getAtomSize (&input[pos]);
    567   }
    568   return 1;
    569 }
    570 
    571 
    572 /**
    573  * Process all atoms.
    574  * @return 0 on error, 1 for success, -1 for unknown atom type
    575  */
    576 static int
    577 processAllAtoms (const char *input,
    578                  size_t size, struct ExtractContext *ec)
    579 {
    580   return processAtoms (all_handlers, input, size, ec);
    581 }
    582 
    583 
    584 /**
    585  * Handle the moov atom.
    586  * @return 0 on error, 1 for success, -1 for unknown atom type
    587  */
    588 static int
    589 moovHandler (const char *input,
    590              size_t size, size_t pos, struct ExtractContext *ec)
    591 {
    592   unsigned int hdr = getAtomHeaderSize (&input[pos]);
    593   return processAllAtoms (&input[pos + hdr],
    594                           getAtomSize (&input[pos]) - hdr, ec);
    595 }
    596 
    597 
    598 /* see http://developer.apple.com/documentation/QuickTime/QTFF/QTFFChap1/chapter_2_section_5.html */
    599 typedef struct
    600 {
    601   Atom header;
    602   /* major brand */
    603   char type[4];
    604   /* minor version */
    605   unsigned int version;
    606   /* compatible brands */
    607   char compatibility[4];
    608 } FileType;
    609 
    610 static int
    611 ftypHandler (const char *input,
    612              size_t size, size_t pos, struct ExtractContext *ec)
    613 {
    614   const FileType *ft;
    615   int i;
    616 
    617   if (getAtomSize (&input[pos]) < sizeof (FileType))
    618   {
    619     return 0;
    620   }
    621   ft = (const FileType *) &input[pos];
    622 
    623   i = 0;
    624   while ((ftMap[i].ext != NULL) && (0 != memcmp (ft->type, ftMap[i].ext, 4)))
    625     i++;
    626   if (ftMap[i].ext != NULL)
    627     addKeyword (EXTRACTOR_METATYPE_MIMETYPE, ftMap[i].mime, ec);
    628   return 1;
    629 }
    630 
    631 
    632 typedef struct
    633 {
    634   Atom hdr;
    635   unsigned char version;
    636   unsigned char flags[3];
    637   /* in seconds since midnight, January 1, 1904 */
    638   unsigned int creationTime;
    639   /* in seconds since midnight, January 1, 1904 */
    640   unsigned int modificationTime;
    641   /* number of time units that pass per second in the movies time
    642      coordinate system */
    643   unsigned int timeScale;
    644   /* A time value that indicates the duration of the movie in time
    645      scale units. */
    646   unsigned int duration;
    647   unsigned int preferredRate;
    648   /* A 16-bit fixed-point number that specifies how loud to
    649      play. 1.0 indicates full volume */
    650   unsigned short preferredVolume;
    651   unsigned char reserved[10];
    652   unsigned char matrix[36];
    653   unsigned int previewTime;
    654   unsigned int previewDuration;
    655   unsigned int posterTime;
    656   unsigned int selectionTime;
    657   unsigned int selectionDuration;
    658   unsigned int currentTime;
    659   unsigned int nextTrackId;
    660 } MovieHeaderAtom;
    661 
    662 static int
    663 mvhdHandler (const char *input,
    664              size_t size, size_t pos, struct ExtractContext *ec)
    665 {
    666   const MovieHeaderAtom *m;
    667   char duration[16];
    668   if (getAtomSize (&input[pos]) != sizeof (MovieHeaderAtom))
    669     return 0;
    670   m = (const MovieHeaderAtom *) &input[pos];
    671   snprintf (duration,
    672             sizeof(duration),
    673             "%us",
    674             ntohl (m->duration) / ntohl (m->timeScale));
    675   addKeyword (EXTRACTOR_METATYPE_DURATION, duration, ec);
    676   return 1;
    677 }
    678 
    679 
    680 typedef struct
    681 {
    682   Atom cmovAtom;
    683   Atom dcomAtom;
    684   char compressor[4];
    685   Atom cmvdAtom;
    686   unsigned int decompressedSize;
    687 } CompressedMovieHeaderAtom;
    688 
    689 static int
    690 cmovHandler (const char *input,
    691              size_t size, size_t pos, struct ExtractContext *ec)
    692 {
    693   const CompressedMovieHeaderAtom *c;
    694   unsigned int s;
    695   char *buf;
    696   int ret;
    697   z_stream z_state;
    698   int z_ret_code;
    699 
    700 
    701   if (getAtomSize (&input[pos]) < sizeof (CompressedMovieHeaderAtom))
    702     return 0;
    703   c = (const CompressedMovieHeaderAtom *) &input[pos];
    704   if ((ntohl (c->dcomAtom.size) != 12) ||
    705       (0 != memcmp (&c->dcomAtom.type, "dcom", 4)) ||
    706       (0 != memcmp (c->compressor, "zlib", 4)) ||
    707       (0 != memcmp (&c->cmvdAtom.type, "cmvd", 4)) ||
    708       (ntohl (c->cmvdAtom.size) !=
    709        getAtomSize (&input[pos]) - sizeof (Atom) * 2 - 4))
    710   {
    711     return 0;                   /* dcom must be 12 bytes */
    712   }
    713   s = ntohl (c->decompressedSize);
    714   if (s > 16 * 1024 * 1024)
    715     return 1;                   /* ignore, too big! */
    716   buf = malloc (s);
    717   if (buf == NULL)
    718     return 1;                   /* out of memory, handle gracefully */
    719 
    720   z_state.next_in = (unsigned char *) &c[1];
    721   z_state.avail_in = ntohl (c->cmvdAtom.size);
    722   z_state.avail_out = s;
    723   z_state.next_out = (unsigned char *) buf;
    724   z_state.zalloc = (alloc_func) 0;
    725   z_state.zfree = (free_func) 0;
    726   z_state.opaque = (voidpf) 0;
    727   z_ret_code = inflateInit (&z_state);
    728   if (Z_OK != z_ret_code)
    729   {
    730     free (buf);
    731     return 0;                   /* crc error? */
    732   }
    733   z_ret_code = inflate (&z_state, Z_NO_FLUSH);
    734   if ((z_ret_code != Z_OK) && (z_ret_code != Z_STREAM_END))
    735   {
    736     free (buf);
    737     return 0;                   /* decode error? */
    738   }
    739   z_ret_code = inflateEnd (&z_state);
    740   if (Z_OK != z_ret_code)
    741   {
    742     free (buf);
    743     return 0;                   /* decode error? */
    744   }
    745   ret = handleAtom (all_handlers, buf, s, 0, ec);
    746   free (buf);
    747   return ret;
    748 }
    749 
    750 
    751 typedef struct
    752 {
    753   short integer;
    754   short fraction;
    755 } QTFixed;
    756 
    757 typedef struct
    758 {
    759   Atom hdr;
    760   unsigned int flags;           /* 1 byte of version, 3 bytes of flags */
    761   /* in seconds since midnight, January 1, 1904 */
    762   unsigned int creationTime;
    763   /* in seconds since midnight, January 1, 1904 */
    764   unsigned int modificationTime;
    765   unsigned int trackID;
    766   unsigned int reserved_0;
    767   unsigned int duration;
    768   unsigned int reserved_1;
    769   unsigned int reserved_2;
    770   unsigned short layer;
    771   unsigned short alternate_group;
    772   unsigned short volume;
    773   unsigned short reserved_3;
    774   QTFixed matrix[3][3];
    775   /* in pixels */
    776   QTFixed track_width;
    777   /* in pixels */
    778   QTFixed track_height;
    779 } TrackAtom;
    780 
    781 static int
    782 tkhdHandler (const char *input,
    783              size_t size, size_t pos, struct ExtractContext *ec)
    784 {
    785   const TrackAtom *m;
    786   char dimensions[40];
    787 
    788   if (getAtomSize (&input[pos]) < sizeof (TrackAtom))
    789     return 0;
    790   m = (const TrackAtom *) &input[pos];
    791   if (ntohs (m->track_width.integer) != 0)
    792   {
    793     /* if actually a/the video track */
    794     snprintf (dimensions,
    795               sizeof(dimensions),
    796               "%dx%d",
    797               ntohs (m->track_width.integer),
    798               ntohs (m->track_height.integer));
    799     addKeyword (EXTRACTOR_METATYPE_IMAGE_DIMENSIONS, dimensions, ec);
    800   }
    801   return 1;
    802 }
    803 
    804 
    805 static int
    806 trakHandler (const char *input,
    807              size_t size, size_t pos, struct ExtractContext *ec)
    808 {
    809   unsigned int hdr = getAtomHeaderSize (&input[pos]);
    810   return processAllAtoms (&input[pos + hdr],
    811                           getAtomSize (&input[pos]) - hdr, ec);
    812 }
    813 
    814 
    815 static int
    816 metaHandler (const char *input,
    817              size_t size, size_t pos, struct ExtractContext *ec)
    818 {
    819   unsigned int hdr = getAtomHeaderSize (&input[pos]);
    820   if (getAtomSize (&input[pos]) < hdr + 4)
    821     return 0;
    822   return processAllAtoms (&input[pos + hdr + 4],
    823                           getAtomSize (&input[pos]) - hdr - 4, ec);
    824 }
    825 
    826 
    827 typedef struct
    828 {
    829   Atom header;
    830   unsigned short length;
    831   unsigned short language;
    832 } InternationalText;
    833 
    834 /*
    835  * see http://developer.apple.com/documentation/QuickTime/QTFF/QTFFChap2/chapter
    836 _3_section_2.html
    837  *   "User Data Text Strings and Language Codes"
    838  * TODO: make conformant
    839  */
    840 static int
    841 processTextTag (const char *input,
    842                 size_t size,
    843                 size_t pos,
    844                 enum EXTRACTOR_MetaType type, struct ExtractContext *ec)
    845 {
    846   unsigned long long as;
    847   unsigned short len;
    848   unsigned short lang;
    849   const InternationalText *txt;
    850   char *meta;
    851   int i;
    852 
    853   /* contains "international text":
    854      16-bit size + 16 bit language code */
    855   as = getAtomSize (&input[pos]);
    856   if (as < sizeof (InternationalText))
    857     return 0;                   /* invalid */
    858   txt = (const InternationalText *) &input[pos];
    859   len = ntohs (txt->length);
    860   if (len + sizeof (InternationalText) > as)
    861     return 0;                   /* invalid */
    862   lang = ntohs (txt->language);
    863   if (lang >= sizeof (languages) / sizeof (char *))
    864     return 0;                   /* invalid */
    865   addKeyword (EXTRACTOR_METATYPE_LANGUAGE, languages[lang], ec);
    866 
    867   meta = malloc (len + 1);
    868   if (meta == NULL)
    869     return 0;
    870   memcpy (meta, &txt[1], len);
    871   meta[len] = '\0';
    872   for (i = 0; i < len; i++)
    873     if (meta[i] == '\r')
    874       meta[i] = '\n';
    875   addKeyword (type, meta, ec);
    876   free (meta);
    877   return 1;
    878 }
    879 
    880 
    881 static int
    882 c_Handler (const char *input,
    883            size_t size, size_t pos, struct ExtractContext *ec)
    884 {
    885   int i;
    886 
    887   i = 0;
    888   while ((cHm[i].pfx != NULL) && (0 != memcmp (&input[pos + 5], cHm[i].pfx, 3)))
    889     i++;
    890   if (cHm[i].pfx != NULL)
    891     return processTextTag (input, size, pos, cHm[i].type, ec);
    892   return -1;                    /* not found */
    893 }
    894 
    895 
    896 static int
    897 udtaHandler (const char *input,
    898              size_t size, size_t pos, struct ExtractContext *ec)
    899 {
    900   unsigned int hdr = getAtomHeaderSize (&input[pos]);
    901   return processAllAtoms (&input[pos + hdr],
    902                           getAtomSize (&input[pos]) - hdr, ec);
    903 }
    904 
    905 
    906 static int
    907 processDataAtom (const char *input,
    908                  size_t size, /* parent atom size */
    909                  size_t pos,
    910                  const char *patom,
    911                  enum EXTRACTOR_MetaType type,
    912                  struct ExtractContext *ec)
    913 {
    914   char *meta;
    915   unsigned char version;
    916   unsigned int flags;
    917   unsigned long long asize;
    918   unsigned int len;
    919   unsigned int hdr;
    920   int i;
    921 
    922   hdr = getAtomHeaderSize (&input[pos]);
    923   asize = getAtomSize (&input[pos]);
    924   if (memcmp (&input[pos + 4], "data", 4) != 0)
    925     return -1;
    926 
    927   if ((asize < hdr + 8) || /* header + u32 flags + u32 reserved */
    928       (asize > (getAtomSize (&patom[0]) - 8)) )
    929     return 0;
    930 
    931   len = (unsigned int) (asize - (hdr + 8));
    932 
    933   version = input[pos + 8];
    934   flags = ((unsigned char) input[pos + 9] << 16)
    935           | ((unsigned char) input[pos + 10] << 8)
    936           | (unsigned char) input[pos + 11];
    937 #if DEBUG
    938   printf ("[data] version:%02x flags:%08x txtlen:%d\n", version, flags, len);
    939 #endif
    940 
    941   if (version != 0)
    942     return -1;
    943 
    944   if (flags == 0x0)   /* binary data */
    945   {
    946     if (memcmp (&patom[4], "gnre", 4) == 0)
    947     {
    948       if (len >= 2)
    949       {
    950         unsigned short genre = ((unsigned char) input[pos + 16] << 8)
    951                                | (unsigned char) input[pos + 17];
    952         if ((genre > 0) && (genre < GENRE_NAME_COUNT))
    953           addKeyword (type, genre_names[genre - 1], ec);
    954       }
    955       return 1;
    956     }
    957     else if ((memcmp (&patom[4], "trkn", 4) == 0) ||
    958              (memcmp (&patom[4], "disk", 4) == 0))
    959     {
    960       if (len >= 4)
    961       {
    962         unsigned short n = ((unsigned char) input[pos + 18] << 8)
    963                            | (unsigned char) input[pos + 19];
    964         char s[8];
    965         snprintf (s, 8, "%d", n);
    966         addKeyword (type, s, ec);
    967       }
    968     }
    969     else
    970     {
    971       return -1;
    972     }
    973   }
    974   else if (flags == 0x1)   /* text data */
    975   {
    976     meta = malloc (len + 1);
    977     if (meta == NULL)
    978       return 0;
    979     memcpy (meta, &input[pos + 16], len);
    980     meta[len] = '\0';
    981     for (i = 0; i < len; i++)
    982       if (meta[i] == '\r')
    983         meta[i] = '\n';
    984     addKeyword (type, meta, ec);
    985     free (meta);
    986     return 1;
    987   }
    988 
    989   return -1;
    990 }
    991 
    992 
    993 /* NOTE: iTunes tag processing should, in theory, be limited to iTunes
    994  * file types (from ftyp), but, in reality, it seems that there are other
    995  * files, like 3gpp, out in the wild with iTunes tags. */
    996 static int
    997 iTunesTagHandler (const char *input,
    998                   size_t size, size_t pos, struct ExtractContext *ec)
    999 {
   1000   unsigned long long asize;
   1001   unsigned int hdr;
   1002   int i;
   1003 
   1004   hdr = getAtomHeaderSize (&input[pos]);
   1005   asize = getAtomSize (&input[pos]);
   1006 
   1007   if (asize < hdr + 8) /* header + at least one atom */
   1008     return 0;
   1009 
   1010   i = 0;
   1011   while ((it_to_extr_table[i].atom_type != NULL) &&
   1012          (0 != memcmp (&input[pos + 4], it_to_extr_table[i].atom_type, 4)))
   1013     i++;
   1014   if (it_to_extr_table[i].atom_type != NULL)
   1015     return processDataAtom (input, asize, pos + hdr, &input[pos],
   1016                             it_to_extr_table[i].type, ec);
   1017 
   1018   return -1;
   1019 }
   1020 
   1021 
   1022 static int
   1023 ilstHandler (const char *input,
   1024              size_t size, size_t pos, struct ExtractContext *ec)
   1025 {
   1026   unsigned int hdr = getAtomHeaderSize (&input[pos]);
   1027   return processAtoms (ilst_handlers, &input[pos + hdr],
   1028                        getAtomSize (&input[pos]) - hdr, ec);
   1029 }
   1030 
   1031 
   1032 static HandlerEntry all_handlers[] = {
   1033   {"moov", &moovHandler},
   1034   {"cmov", &cmovHandler},
   1035   {"mvhd", &mvhdHandler},
   1036   {"trak", &trakHandler},
   1037   {"tkhd", &tkhdHandler},
   1038   {"ilst", &ilstHandler},
   1039   {"meta", &metaHandler},
   1040   {"udta", &udtaHandler},
   1041   {"ftyp", &ftypHandler},
   1042   {"\xa9" "swr", &c_Handler},
   1043   {"\xa9" "cpy", &c_Handler},
   1044   {"\xa9" "day", &c_Handler},
   1045   {"\xa9" "dir", &c_Handler},
   1046   {"\xa9" "ed1", &c_Handler},
   1047   {"\xa9" "ed2", &c_Handler},
   1048   {"\xa9" "ed3", &c_Handler},
   1049   {"\xa9" "ed4", &c_Handler},
   1050   {"\xa9" "ed5", &c_Handler},
   1051   {"\xa9" "ed6", &c_Handler},
   1052   {"\xa9" "ed7", &c_Handler},
   1053   {"\xa9" "ed8", &c_Handler},
   1054   {"\xa9" "ed9", &c_Handler},
   1055   {"\xa9" "fmt", &c_Handler},
   1056   {"\xa9" "inf", &c_Handler},
   1057   {"\xa9" "prd", &c_Handler},
   1058   {"\xa9" "prf", &c_Handler},
   1059   {"\xa9" "req", &c_Handler},
   1060   {"\xa9" "src", &c_Handler},
   1061   {"\xa9" "wrt", &c_Handler},
   1062   {"\xa9" "aut", &c_Handler},
   1063   {"\xa9" "hst", &c_Handler},
   1064   {"\xa9" "wrt", &c_Handler},
   1065   {"\xa9" "cmt", &c_Handler},
   1066   {"\xa9" "mak", &c_Handler},
   1067   {"\xa9" "mod", &c_Handler},
   1068   {"\xa9" "nam", &c_Handler},
   1069   {"\xa9" "des", &c_Handler},
   1070   {"\xa9" "PRD", &c_Handler},
   1071   {"\xa9" "wrn", &c_Handler},
   1072   {"\xa9" "chp", &c_Handler},
   1073   /*  { "name", &nameHandler }, */
   1074   {NULL, NULL},
   1075 };
   1076 
   1077 static HandlerEntry ilst_handlers[] = {
   1078   {"\xa9" "alb", &iTunesTagHandler},
   1079   {"\xa9" "ART", &iTunesTagHandler},
   1080   {"aART", &iTunesTagHandler},
   1081   {"\xa9" "cmt", &iTunesTagHandler},
   1082   {"\xa9" "day", &iTunesTagHandler},
   1083   {"\xa9" "nam", &iTunesTagHandler},
   1084   {"\xa9" "gen", &iTunesTagHandler},
   1085   {"gnre", &iTunesTagHandler},
   1086   {"trkn", &iTunesTagHandler},
   1087   {"disk", &iTunesTagHandler},
   1088   {"\xa9" "wrt", &iTunesTagHandler},
   1089   {"\xa9" "too", &iTunesTagHandler},
   1090   {"tmpo", &iTunesTagHandler},
   1091   {"cprt", &iTunesTagHandler},
   1092   {"cpil", &iTunesTagHandler},
   1093   {"covr", &iTunesTagHandler},
   1094   {"rtng", &iTunesTagHandler},
   1095   {"\xa9" "grp", &iTunesTagHandler},
   1096   {"stik", &iTunesTagHandler},
   1097   {"pcst", &iTunesTagHandler},
   1098   {"catg", &iTunesTagHandler},
   1099   {"keyw", &iTunesTagHandler},
   1100   {"purl", &iTunesTagHandler},
   1101   {"egid", &iTunesTagHandler},
   1102   {"desc", &iTunesTagHandler},
   1103   {"\xa9" "lyr", &iTunesTagHandler},
   1104   {"tvnn", &iTunesTagHandler},
   1105   {"tvsh", &iTunesTagHandler},
   1106   {"tven", &iTunesTagHandler},
   1107   {"tvsn", &iTunesTagHandler},
   1108   {"tves", &iTunesTagHandler},
   1109   {"purd", &iTunesTagHandler},
   1110   {"pgap", &iTunesTagHandler},
   1111   {NULL, NULL},
   1112 };
   1113 
   1114 /**
   1115  * Call the handler for the atom at the given position.
   1116  * @return 0 on error, 1 for success, -1 for unknown atom type
   1117  */
   1118 static int
   1119 handleAtom (HandlerEntry *handlers, const char *input,
   1120             size_t size, size_t pos, struct ExtractContext *ec)
   1121 {
   1122   int i;
   1123   if (0 == checkAtomValid (input, size, pos))
   1124   {
   1125     return 0;
   1126   }
   1127   i = 0;
   1128   while ((handlers[i].name != NULL) &&
   1129          (0 != memcmp (&input[pos + 4], handlers[i].name, 4)))
   1130     i++;
   1131   if (handlers[i].name == NULL)
   1132   {
   1133 #if DEBUG
   1134     char b[5];
   1135     memcpy (b, &input[pos + 4], 4);
   1136     b[4] = '\0';
   1137     printf ("No handler for `%s'\n", b);
   1138 #endif
   1139     return -1;
   1140   }
   1141   i = handlers[i].handler (input, size, pos, ec);
   1142 #if DEBUG
   1143   printf ("Running handler for `%4s' at %u completed with result %d\n",
   1144           &input[pos + 4], pos, i);
   1145 #endif
   1146   return i;
   1147 }
   1148 
   1149 
   1150 /* mimetypes:
   1151    video/quicktime: mov,qt: Quicktime animation;
   1152    video/x-quicktime: mov,qt: Quicktime animation;
   1153    application/x-quicktimeplayer: qtl: Quicktime list;
   1154  */
   1155 
   1156 int
   1157 EXTRACTOR_qt_extract (const char *data,
   1158                       size_t size,
   1159                       EXTRACTOR_MetaDataProcessor proc,
   1160                       void *proc_cls,
   1161                       const char *options)
   1162 {
   1163   struct ExtractContext ec;
   1164   ec.proc = proc;
   1165   ec.proc_cls = proc_cls;
   1166   ec.ret = 0;
   1167   processAllAtoms (data, size, &ec);
   1168   return ec.ret;
   1169 }
   1170 
   1171 
   1172 /*  end of qt_extractor.c */