libextractor

GNU libextractor
Log | Files | Refs | Submodules | README | LICENSE

ebml_extractor.c (93892B)


      1 /*
      2      This file is part of libextractor.
      3      Copyright (C) 2004, 2005, 2006, 2009 Vidyut Samanta and Christian Grothoff
      4 
      5      libextractor is free software; you can redistribute it and/or modify
      6      it under the terms of the GNU General Public License as published
      7      by the Free Software Foundation; either version 2, or (at your
      8      option) any later version.
      9 
     10      libextractor is distributed in the hope that it will be useful, but
     11      WITHOUT ANY WARRANTY; without even the implied warranty of
     12      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     13      General Public License for more details.
     14 
     15      You should have received a copy of the GNU General Public License
     16      along with libextractor; see the file COPYING.  If not, write to the
     17      Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     18      Boston, MA 02110-1301, USA.
     19  */
     20 
     21 /*
     22  * Made by Gabriel Peixoto
     23  * Using AVInfo 1.x code. Copyright (c) 2004 George Shuklin.
     24  * Nearly complete rewrite by LRN, Copyright (c) 2012
     25  */
     26 
     27 #include "platform.h"
     28 #include "extractor.h"
     29 #include <stdint.h>
     30 
     31 #include "le_architecture.h"
     32 
     33 #ifndef DEBUG_EBML
     34 # define DEBUG_EBML 0
     35 #endif
     36 
     37 #if WINDOWS
     38 /* According to http://old.nabble.com/Porting-localtime_r-and-gmtime_r-td15282276.html
     39  * msvcrt.dll does have thread-safe gmtime implementation,
     40  * even though the documentation says otherwise.
     41  * Should be easy to check - spawn 2 threads, run _gmtime64 in each one
     42  * and see if they return the same pointer.
     43  */
     44 struct tm *
     45 gmtime_undocumented_64_r (const __time64_t *timer, struct tm *result)
     46 {
     47   struct tm *local_result = NULL; // _gmtime64 (timer);
     48 
     49   if ((local_result == NULL) || (result == NULL) )
     50     return NULL;
     51 
     52   memcpy (result, local_result, sizeof (*result));
     53   return result;
     54 }
     55 
     56 
     57 #endif
     58 
     59 #include "extractor_plugins.h"
     60 
     61 #define ADD_EBML(s,t) do { proc (proc_cls, "ebml", t, EXTRACTOR_METAFORMAT_UTF8, \
     62                                  "text/plain", s, strlen (s) + 1); } while (0)
     63 #define ADD_MATROSKA(s,t) do { proc (proc_cls, "matroska", t, \
     64                                      EXTRACTOR_METAFORMAT_UTF8, "text/plain", s, \
     65                                      strlen (s) + 1); } while (0)
     66 
     67 /**
     68  * String length limit. The spec does not limit the strings,
     69  * but we don't want to allocate 2^56 bytes
     70  * just because some EBML file says it has a string of that length!
     71  * This also must be <= of the number of bytes LE gives us in one go
     72  * (the code doesn't know how to "read a part of string, request a seek,
     73  * then read some more, and repeat until the whole string is read").
     74  * If it isn't, the code will loop forever, requesting the same
     75  * seek position (beginning of the string) over and over.
     76  * FIXME: find a way to fix that condition in LE itself?
     77  * TODO: rewrite string reading code to allocate strings on the heap,
     78  * that will allow us to greatly increase max string size. Right now
     79  * strings are allocated on the stack, and can't be too long because
     80  * of that.
     81  */
     82 #define MAX_STRING_SIZE 1024
     83 
     84 struct MatroskaTrackType
     85 {
     86   unsigned char code;
     87   const char *name;
     88   char video_must_be_valid;
     89   char audio_must_be_valid;
     90 };
     91 
     92 struct MatroskaTrackType track_types[] = {
     93   {0x01, "video", 1, -1},
     94   {0x02, "audio", -1, 1},
     95   {0x03, "complex", -1, -1},
     96   {0x10, "logo", -1, -1},
     97   {0x11, "subtitle", -1, -1},
     98   {0x12, "buttons", -1, -1},
     99   {0x20, "control", -1, -1},
    100   {0x00, NULL}
    101 };
    102 
    103 struct MatroskaTagMap
    104 {
    105   const char *name;
    106   enum EXTRACTOR_MetaType id;
    107 };
    108 
    109 /* TODO: Add TargetLevel parsing, and use it to correctly set:
    110  * "track number" and "disk number" from PART_NUMBER,
    111  * "author email" from EMAIL,
    112  * "publisher address" from ADDRESS,
    113  * "
    114  */
    115 struct MatroskaTagMap tag_map[] = {
    116   {"COUNTRY", EXTRACTOR_METATYPE_LOCATION_COUNTRY_CODE},
    117   {"TITLE", EXTRACTOR_METATYPE_TITLE},
    118   {"SUBTITLE", EXTRACTOR_METATYPE_SUBTITLE},
    119   {"URL", EXTRACTOR_METATYPE_URL},
    120   {"ARTIST", EXTRACTOR_METATYPE_ARTIST},
    121   {"LEAD_PERFORMER", EXTRACTOR_METATYPE_PERFORMER},
    122   {"ACCOMPANIMENT", EXTRACTOR_METATYPE_MUSICIAN_CREDITS_LIST}, /* not sure if it's correct */
    123   {"COMPOSER", EXTRACTOR_METATYPE_COMPOSER},
    124   {"LYRICS", EXTRACTOR_METATYPE_LYRICS},
    125   /* LYRICIST */
    126   {"CONDUCTOR", EXTRACTOR_METATYPE_CONDUCTOR},
    127   /* DIRECTOR UTF-8 This is akin to the IART tag in RIFF.
    128      ASSISTANT_DIRECTOR UTF-8 The name of the assistant director.
    129      DIRECTOR_OF_PHOTOGRAPHY UTF-8 The name of the director of photography, also known as cinematographer. This is akin to the ICNM tag in Extended RIFF.
    130      SOUND_ENGINEER UTF-8 The name of the sound engineer or sound recordist.
    131      ART_DIRECTOR UTF-8 The person who oversees the artists and craftspeople who build the sets.
    132      PRODUCTION_DESIGNER UTF-8 Artist responsible for designing the overall visual appearance of a movie.
    133      CHOREGRAPHER UTF-8 The name of the choregrapher
    134      COSTUME_DESIGNER UTF-8 The name of the costume designer
    135      ACTOR UTF-8 An actor or actress playing a role in this movie. This is the person's real name, not the character's name the person is playing.
    136      CHARACTER UTF-8 The name of the character an actor or actress
    137   */
    138   {"WRITTEN_BY", EXTRACTOR_METATYPE_WRITER},
    139   /*
    140     SCREENPLAY_BY UTF-8 The author of the screenplay or scenario (used for movies and TV shows).
    141     EDITED_BY UTF-8 This is akin to the IEDT tag in Extended RIFF.
    142     PRODUCER UTF-8 Produced by. This is akin to the IPRO tag in Extended RIFF. (NOT EXTRACTOR_METATYPE_PRODUCER!)
    143     COPRODUCER UTF-8 The name of a co-producer.
    144     EXECUTIVE_PRODUCER UTF-8 The name of an executive producer.
    145     DISTRIBUTED_BY UTF-8 This is akin to the IDST tag in Extended RIFF.
    146     MASTERED_BY UTF-8 The engineer who mastered the content for a physical medium or for digital distribution.
    147   */
    148   {"ENCODED_BY", EXTRACTOR_METATYPE_ENCODED_BY},
    149   /*
    150     MIXED_BY UTF-8 DJ mix by the artist specified
    151     REMIXED_BY UTF-8 Interpreted, remixed, or otherwise modified by. This is akin to the TPE4 tag in ID3.
    152     PRODUCTION_STUDIO UTF-8 This is akin to the ISTD tag in Extended RIFF.
    153     THANKS_TO UTF-8 A very general tag for everyone else that wants to be listed.
    154   */
    155   {"PUBLISHER", EXTRACTOR_METATYPE_PUBLISHER},
    156   /*
    157     LABEL UTF-8 The record label or imprint on the disc.
    158   */
    159   {"GENRE", EXTRACTOR_METATYPE_GENRE},
    160   {"MOOD", EXTRACTOR_METATYPE_MOOD},
    161   /*
    162     ORIGINAL_MEDIA_TYPE UTF-8 Describes the original type of the media, such as, "DVD", "CD", "computer image," "drawing," "lithograph," and so forth. This is akin to the TMED tag in ID3.
    163     CONTENT_TYPE UTF-8 The type of the item. e.g. Documentary, Feature Film, Cartoon, Music Video, Music, Sound FX, ...
    164   */
    165   {"SUBJECT", EXTRACTOR_METATYPE_SUBJECT},
    166   {"DESCRIPTION", EXTRACTOR_METATYPE_DESCRIPTION},
    167   {"KEYWORDS", EXTRACTOR_METATYPE_KEYWORDS},
    168   {"SUMMARY", EXTRACTOR_METATYPE_SUMMARY},
    169   /*
    170     SYNOPSIS UTF-8 A description of the story line of the item.
    171     INITIAL_KEY UTF-8 The initial key that a musical track starts in. The format is identical to ID3.
    172     PERIOD UTF-8 Describes the period that the piece is from or about. For example, "Renaissance".
    173     LAW_RATING UTF-8 Depending on the country it's the format of the rating of a movie (P, R, X in the USA, an age in other countries or a URI defining a logo).
    174     ICRA binary	The ICRA content rating for parental control. (Previously RSACi)
    175   */
    176   {"DATE_RELEASED", EXTRACTOR_METATYPE_PUBLICATION_DATE},
    177   {"DATE_RECORDED", EXTRACTOR_METATYPE_CREATION_DATE},
    178   {"DATE_ENCODED", EXTRACTOR_METATYPE_UNKNOWN_DATE},
    179   {"DATE_TAGGED", EXTRACTOR_METATYPE_UNKNOWN_DATE},
    180   {"DATE_DIGITIZED", EXTRACTOR_METATYPE_UNKNOWN_DATE},
    181   {"DATE_WRITTEN", EXTRACTOR_METATYPE_UNKNOWN_DATE},
    182   {"DATE_PURCHASED", EXTRACTOR_METATYPE_UNKNOWN_DATE},
    183   /*
    184     RECORDING_LOCATION UTF-8 The location where the item was recorded. The countries corresponding to the string, same 2 octets as in Internet domains, or possibly ISO-3166. This code is followed by a comma, then more detailed information such as state/province, another comma, and then city. For example, "US, Texas, Austin". This will allow for easy sorting. It is okay to only store the country, or the country and the state/province. More detailed information can be added after the city through the use of additional commas. In cases where the province/state is unknown, but you want to store the city, simply leave a space between the two commas. For example, "US, , Austin".
    185     COMPOSITION_LOCATION UTF-8 Location that the item was originaly designed/written. The countries corresponding to the string, same 2 octets as in Internet domains, or possibly ISO-3166. This code is followed by a comma, then more detailed information such as state/province, another comma, and then city. For example, "US, Texas, Austin". This will allow for easy sorting. It is okay to only store the country, or the country and the state/province. More detailed information can be added after the city through the use of additional commas. In cases where the province/state is unknown, but you want to store the city, simply leave a space between the two commas. For example, "US, , Austin".
    186     COMPOSER_NATIONALITY UTF-8 Nationality of the main composer of the item, mostly for classical music. The countries corresponding to the string, same 2 octets as in Internet domains, or possibly ISO-3166.
    187   */
    188   /* Matroska considers "COMMENT", "PLAY_COUNTER" and "RATING" to be personal. Should we extract them? */
    189   {"COMMENT", EXTRACTOR_METATYPE_COMMENT},
    190   {"PLAY_COUNTER", EXTRACTOR_METATYPE_PLAY_COUNTER},
    191   {"RATING", EXTRACTOR_METATYPE_POPULARITY_METER},
    192   /*
    193     ENCODER UTF-8 The software or hardware used to encode this item. ("LAME" or "XviD")
    194     ENCODER_SETTINGS UTF-8 A list of the settings used for encoding this item. No specific format.
    195     BPS UTF-8 The average bits per second of the specified item. This is only the data in the Blocks, and excludes headers and any container overhead.
    196     FPS UTF-8 The average frames per second of the specified item. This is typically the average number of Blocks per second. In the event that lacing is used, each laced chunk is to be counted as a seperate frame.
    197   */
    198   {"BPM", EXTRACTOR_METATYPE_BEATS_PER_MINUTE},
    199   /*
    200     MEASURE UTF-8 In music, a measure is a unit of time in Western music like "4/4". It represents a regular grouping of beats, a meter, as indicated in musical notation by the time signature.. The majority of the contemporary rock and pop music you hear on the radio these days is written in the 4/4 time signature.
    201     TUNING UTF-8 It is saved as a frequency in hertz to allow near-perfect tuning of instruments to the same tone as the musical piece (e.g. "441.34" in Hertz). The default value is 440.0 Hz.
    202     REPLAYGAIN_GAIN binary The gain to apply to reach 89dB SPL on playback. This is based on the Replay Gain standard. Note that ReplayGain information can be found at all TargetType levels (track, album, etc).
    203     REPLAYGAIN_PEAK binary The maximum absolute peak value of the item. This is based on the Replay Gain standard.
    204   */
    205   {"ISRC", EXTRACTOR_METATYPE_ISRC},
    206   /*
    207     MCDI binary This is a binary dump of the TOC of the CDROM that this item was taken from. This holds the same information as the MCDI in ID3.
    208     ISBN UTF-8 International Standard Book Number
    209     BARCODE UTF-8 EAN-13 (European Article Numbering) or UPC-A (Universal Product Code) bar code identifier
    210     CATALOG_NUMBER UTF-8 A label-specific string used to identify the release (TIC 01 for example).
    211     LABEL_CODE UTF-8 A 4-digit or 5-digit number to identify the record label, typically printed as (LC) xxxx or (LC) 0xxxx on CDs medias or covers (only the number is stored).
    212     LCCN UTF-8 Library of Congress Control Number
    213   */
    214   /*
    215     PURCHASE_ITEM UTF-8 URL to purchase this file. This is akin to the WPAY tag in ID3.
    216     PURCHASE_INFO UTF-8 Information on where to purchase this album. This is akin to the WCOM tag in ID3.
    217     PURCHASE_OWNER UTF-8 Information on the person who purchased the file. This is akin to the TOWN tag in ID3.
    218     PURCHASE_PRICE UTF-8 The amount paid for entity. There should only be a numeric value in here. Only numbers, no letters or symbols other than ".". For instance, you would store "15.59" instead of "$15.59USD".
    219     PURCHASE_CURRENCY UTF-8 The currency type used to pay for the entity. Use ISO-4217 for the 3 letter currency code.
    220   */
    221   {"COPYRIGHT", EXTRACTOR_METATYPE_COPYRIGHT},
    222   {"PRODUCTION_COPYRIGHT", EXTRACTOR_METATYPE_COPYRIGHT},
    223   {"LICENSE", EXTRACTOR_METATYPE_LICENSE},
    224   /* TERMS_OF_USE UTF-8 The terms of use for this item. This is akin to the USER tag in ID3. */
    225   {NULL, EXTRACTOR_METATYPE_RESERVED}
    226 };
    227 
    228 /**
    229  * FIXME: document
    230  */
    231 enum
    232 {
    233   EBMLID_FILE_BEGIN = 0x1A, /* First byte of EBMLID_EBML */
    234   EBMLID_EBML = 0x1A45DFA3,
    235   EBMLID_VERSION = 0x4286,
    236   EBMLID_READ_VERSION = 0x42f7,
    237   EBMLID_MAX_ID_LENGTH = 0x42f2,
    238   EBMLID_MAX_SIZE_LENGTH = 0x42f3,
    239   EBMLID_DOCTYPE = 0x4282,
    240   EBMLID_DOCTYPE_VERSION = 0x4287,
    241   EBMLID_DOCTYPE_READ_VERSION = 0x4285,
    242 
    243   /*EBMLID_CRC32 = 0xC3, FIXME: support this! Need some magical logic to skip it, unlike MatroskaID_CRC32 = 0xBF. That is, files with 0xC3 are completely unreadable at the moment. */
    244 
    245   MatroskaID_Segment = 0x18538067,
    246 
    247   MatroskaID_SeekHead = 0x114D9B74,
    248 
    249   MatroskaID_Seek = 0x4DBB, /* mandatory, may appear more than once. Contains a single seek entry to an EBML element. */
    250 
    251   MatroskaID_SeekID = 0x53AB, /* mandatory, BINARY. The binary ID corresponding to the element name. */
    252   MatroskaID_SeekPosition = 0x53AC, /* mandatory, UINT. The position of the element in the segment in octets (0 = first level 1 element). */
    253 
    254   MatroskaID_Info = 0x1549A966,
    255 
    256   MatroskaID_Info_TimecodeScale = 0x2AD7B1, /* defaults to 1000000, UINT. Timecode scale in nanoseconds (1.000.000 means all timecodes in the segment are expressed in milliseconds). */
    257   MatroskaID_Info_Duration = 0x4489, /* must be >0, FLOAT. Duration of the segment (based on TimecodeScale). */
    258   MatroskaID_Info_DateUTC = 0x4461, /* DATE. Date of the origin of timecode (value 0), i.e. production date. */
    259   MatroskaID_Info_Title = 0x7BA9, /* UTF-8-encoded. General name of the segment. */
    260   MatroskaID_Info_MuxingApp = 0x4D80, /* mandatory, UTF-8-encoded. Muxing application or library ("libmatroska-0.4.3"). */
    261   MatroskaID_Info_WritingApp = 0x5741, /* mandatory, UTF-8-encoded. Writing application ("mkvmerge-0.3.3"). */
    262 
    263   MatroskaID_Tracks = 0x1654AE6B,
    264 
    265   MatroskaID_Tracks_TrackEntry = 0xAE,
    266 
    267   MatroskaID_Tracks_TrackType = 0x83, /* mandatory, 1-254, UINT. A set of track types coded on 8 bits (1: video, 2: audio, 3: complex, 0x10: logo, 0x11: subtitle, 0x12: buttons, 0x20: control). */
    268   MatroskaID_Tracks_Name = 0x536E, /* UTF-8-encoded. A human-readable track name. */
    269   MatroskaID_Tracks_Language = 0x22B59C, /* defaults to 'eng', string. Specifies the language of the track in the Matroska languages form. */
    270   MatroskaID_Tracks_CodecID = 0x86, /* mandatory, string. An ID corresponding to the codec, see the codec page ( http://matroska.org/technical/specs/codecid/index.html ) for more info. */
    271   MatroskaID_Tracks_CodecName = 0x258688, /* UTF-8-encoded. A human-readable string specifying the codec. */
    272 
    273   MatroskaID_Tracks_Video = 0xE0, /* Video settings. */
    274   MatroskaID_Tracks_Video_FlagInterlaced = 0x9A, /* mandatory, 0-1, defaults to 0, UINT. Set if the video is interlaced. (1 bit) */
    275   MatroskaID_Tracks_Video_StereoMode = 0x53B8, /* defaults to 0, UINT. Stereo-3D video mode (0: mono, 1: side by side (left eye is first), 2: top-bottom (right eye is first), 3: top-bottom (left eye is first), 4: checkboard (right is first), 5: checkboard (left is first), 6: row interleaved (right is first), 7: row interleaved (left is first), 8: column interleaved (right is first), 9: column interleaved (left is first), 10: anaglyph (cyan/red), 11: side by side (right eye is first), 12: anaglyph (green/magenta), 13 both eyes laced in one Block (left eye is first), 14 both eyes laced in one Block (right eye is first)) . There are some more details on 3D support in the Specification Notes ( http://matroska.org/technical/specs/notes.html#3D ). */
    276   MatroskaID_Tracks_Video_PixelWidth = 0xB0, /* mandatory, not 0, UINT. Width of the encoded video frames in pixels. */
    277   MatroskaID_Tracks_Video_PixelHeight = 0xBA, /* mandatory, not 0, UINT. Height of the encoded video frames in pixels. */
    278   MatroskaID_Tracks_Video_DisplayWidth = 0x54B0, /* not 0, defaults to PixelWidth, UINT. Width of the video frames to display. The default value is only valid when DisplayUnit is 0. */
    279   MatroskaID_Tracks_Video_DisplayHeight = 0x54BA, /* not 0, defaults to PixelHeight, UINT. Height of the video frames to display. The default value is only valid when DisplayUnit is 0. */
    280   MatroskaID_Tracks_Video_DisplayUnit = 0x54B2, /* defaults to 0, UINT. How DisplayWidth & DisplayHeight should be interpreted (0: pixels, 1: centimeters, 2: inches, 3: Display Aspect Ratio). */
    281 
    282   MatroskaID_Tracks_Audio = 0xE1, /* Audio settings. */
    283   MatroskaID_Tracks_Audio_SamplingFrequency = 0xB5, /* mandatory, > 0, defaults to 8000.0, FLOAT. Sampling frequency in Hz. */
    284   MatroskaID_Tracks_Audio_OutputSamplingFrequency = 0x78B5, /* > 0, defaults to SamplingFrequency, FLOAT. Real output sampling frequency in Hz (used for SBR techniques). */
    285   MatroskaID_Tracks_Audio_Channels = 0x9F, /* mandatory, not 0, defaults to 1, UINT. Numbers of channels in the track. */
    286   MatroskaID_Tracks_Audio_BitDepth = 0x6264, /* not 0, UINT. Bits per sample, mostly used for PCM. */
    287 
    288 
    289   MatroskaID_Tags = 0x1254C367, /* can appear more than once. Element containing elements specific to Tracks/Chapters. A list of valid tags can be found here. */
    290   MatroskaID_Tags_Tag = 0x7373, /* mandatory, can appear more than once. Element containing elements specific to Tracks/Chapters. */
    291   MatroskaID_Tags_Tag_SimpleTag = 0x67C8, /* mandatory, can appear more than once, recursive. Contains general information about the target. */
    292   MatroskaID_Tags_Tag_SimpleTag_TagName = 0x45A3, /* mandatory, UTF8-encoded. The name of the Tag that is going to be stored. */
    293   MatroskaID_Tags_Tag_SimpleTag_TagLanguage = 0x447A, /* mandatory, defaults to 'und', string. Specifies the language of the tag specified, in the Matroska languages form. */
    294   MatroskaID_Tags_Tag_SimpleTag_TagDefault = 0x4484, /* mandatory, 0-1, defaults to 1, UINT. Indication to know if this is the default/original language to use for the given tag. (1 bit) */
    295   MatroskaID_Tags_Tag_SimpleTag_TagString = 0x4487, /* UTF-8-encoded. The value of the Tag. */
    296   MatroskaID_Tags_Tag_SimpleTag_TagBinary = 0x4485 /* BINARY. The values of the Tag if it is binary. Note that this cannot be used in the same SimpleTag as TagString. */
    297 };
    298 
    299 
    300 enum VINTParseMode
    301 {
    302   VINT_READ_ID = 0,
    303   VINT_READ_SIZE = 1,
    304   VINT_READ_UINT = 2,
    305   VINT_READ_SINT = 3
    306 };
    307 
    308 /**
    309  * Reads an EBML integer from the buffer
    310  *
    311  * @param buffer array of bytes to read from
    312  * @param start the position in buffer at which to start reading
    313  * @param end first invalid index in buffer (i.e. buffer size)
    314  * @param result receives the integer.
    315  * @param mode (see VINTParseMode)
    316  * @return number of bytes occupied by the integer (the integer itself
    317  *         is always put into 64-bit long buffer),
    318  *         -1 if there is not enough bytes to read the integer
    319  */
    320 static ssize_t
    321 VINTparse (struct EXTRACTOR_PluginList *plugin,
    322            int64_t *result, enum VINTParseMode mode)
    323 {
    324   /* 10000000 01000000 00100000 00010000 00001000 00000100 00000010 00000001 */
    325   static const unsigned char mask[8] = { 0x80, 0x40, 0x20, 0x10, 0x08, 0x04,
    326                                          0x02, 0x01 };
    327   /* 01111111 00111111 00011111 00001111 00000111 00000011 00000001 00000000 */
    328   static const unsigned char imask[8] = { 0x7F, 0x3F, 0x1F, 0x0F, 0x07, 0x03,
    329                                           0x01, 0x00 };
    330   static const int64_t int_negative_limits[8] = {
    331     -0x00000000000040LL, /*  7-bit integer */
    332     -0x00000000002000LL, /* 14-bit integer */
    333     -0x00000000100000LL, /* 21-bit integer */
    334     -0x00000008000000LL, /* 28-bit integer */
    335     -0x00000400000000LL, /* 35-bit integer */
    336     -0x00020000000000LL, /* 42-bit integer */
    337     -0x01000000000000LL, /* 49-bit integer */
    338     -0x80000000000000LL, /* 56-bit integer */
    339   };
    340   static const int64_t int_positive_limits[8] = {
    341     0x00000000000040ULL - 1LL, /*  7-bit integer */
    342     0x00000000002000ULL - 1LL, /* 14-bit integer */
    343     0x00000000100000ULL - 1LL, /* 21-bit integer */
    344     0x00000008000000ULL - 1LL, /* 28-bit integer */
    345     0x00000400000000ULL - 1LL, /* 35-bit integer */
    346     0x00020000000000ULL - 1LL, /* 42-bit integer */
    347     0x01000000000000ULL - 1LL, /* 49-bit integer */
    348     0x80000000000000ULL - 1LL, /* 56-bit integer */
    349   };
    350   static const uint64_t uint_positive_limits[8] = {
    351     0x0000000000000080ULL - 1LL, /*  7-bit integer */
    352     0x0000000000004000ULL - 1LL, /* 14-bit integer */
    353     0x0000000000200000ULL - 1LL, /* 21-bit integer */
    354     0x0000000010000000ULL - 1LL, /* 28-bit integer */
    355     0x0000000800000000ULL - 1LL, /* 35-bit integer */
    356     0x0000040000000000ULL - 1LL, /* 42-bit integer */
    357     0x0002000000000000ULL - 1LL, /* 49-bit integer */
    358     0x0100000000000000ULL - 1LL, /* 56-bit integer */
    359   };
    360   int vint_width;
    361   unsigned int c;
    362   uint64_t result_u;
    363   int64_t result_s;
    364   uint64_t temp;
    365   unsigned char *data;
    366   unsigned char first_byte;
    367   unsigned char int_bytes[8];
    368 
    369   /* Minimal integer size is 1 byte */
    370   if (1 != pl_read (plugin, &data, 1))
    371     return -1;
    372   first_byte = data[0];
    373 
    374   /* An integer begins with zero or more 0-bits. Number of 0-bits indicates the
    375    * width of the integer, zero 0-bits means a 1-byte long integer; 8 0-bits
    376    * indicate a 8-byte (64-bit) integer.
    377    * 0-bits are followed by a mandatory 1-bit. Then - by the bits of the integer
    378    * itself. Integers are stored in big-endian order. Because of the width prefix
    379    * and the mandatory 1-bit, integers are relatively short:
    380    * 1-byte integer has 2^7 different values,
    381    * 2-byte integer has 2^14 different values,
    382    * etc
    383    *//*
    384    * Examine the first byte and see how many 0-bytes are at its beginning.
    385    */vint_width = 0;
    386   for (c = 0; c < 8; c++)
    387     if (! (first_byte & mask[c]))
    388       vint_width++;
    389     else
    390       break;
    391   /* vint_width now contains the number of 0-bytes. That is also the number
    392    * of extra bytes occupied by the integer (beyond the one that we've just
    393    * partially read).
    394    */
    395   if (vint_width != pl_read (plugin, &data, vint_width))
    396     return -1;
    397 
    398   if ((vint_width >= 8))
    399     return 0;
    400 
    401   memcpy (&int_bytes[1], data, vint_width);
    402   int_bytes[0] = first_byte;
    403 
    404   /* OK, signedness is a PITA. Here's a small scale example to illustrate
    405    * the point:
    406    * 4-bit unsigned integer:
    407    * 0 1 2 3 4 5 6 7  8  9  10  11  12  13  14   15
    408    * 4-bit signed integer:
    409    * 0 1 2 3 4 5 6 7 -8 -7  -6  -5  -4  -3  -2   -1
    410    *
    411    * 3 here is 0011b, and -3 is 1101b
    412    * However, writing 1101b into int8_t memory location will NOT make
    413    * the machine interpret it as -3, it will be interpreted as 00001101b,
    414    * which is 13. To be -3 in int8_t it has to be 11111101b. That is,
    415    * it must be padded with extra 1s to the left, but only if its first
    416    * bit is set (which means a negative integer)!
    417    * Easier way (without looking closesly at the bits):
    418    * 1) get it as unsigned integer (say, 1010b, which is 10 for a 4-bit unsigned
    419    * integer, and is 10 for any large unsigned integer, so this interpretation is
    420    * always correct).
    421    * 2) see if it's more than what a signed integer would hold (it is - a
    422    * signed integer only holds up to 7). At this point we will need an array of 8
    423    * different maximums for signed integers, indexed by vint_width.
    424    * 3) do the following math: 10 - 8 = 2 ; -8 + 2 = -6
    425    * That is, the minimal signed value (-8) and the number (10) should be summed,
    426    * and the sum (2) should be added to the minimal signed value (-8)
    427    * to get the signed counterpart (-6) of the number (10)
    428    * 13 - 8 = 5; -8 + 5 = -3
    429    * It's better to do that in two separate steps, because combining it into one step
    430    * boils down to -8 + -8 + 13, which might confuse the compiler, because -8 + -8 = -16,
    431    * which is outside of the signed integer range (remember, we're in 4-bit space here).
    432    * on the other hand, 5 and -3 both are within the range.
    433    * 4) if the number does not exceed the signed integer maximum (7), store it as-is
    434    */result_u = 0;
    435   /* Copy the extra bytes into a temporary buffer, in the right order */
    436   for (c = 0; c < vint_width; c++)
    437     result_u += ((uint64_t) int_bytes[vint_width - c]) << (c * 8);
    438 
    439   /* Add the first byte, do mode-dependent adjustment, then copy the result */
    440   switch (mode)
    441   {
    442   case VINT_READ_UINT:
    443     /* Unset the 1-bit marker */
    444     result_u += ((uint64_t) int_bytes[0] & imask[vint_width]) << (vint_width
    445                                                                   * 8);
    446     memcpy (result, &result_u, sizeof (uint64_t));
    447     break;
    448   case VINT_READ_ID:
    449     /* Do not unset the 1-bit marker*/
    450     result_u += ((uint64_t) int_bytes[0]) << (vint_width * 8);
    451     memcpy (result, &result_u, sizeof (uint64_t));
    452     break;
    453   case VINT_READ_SIZE:
    454     /* Unset the 1-bit marker */
    455     result_u += ((uint64_t) int_bytes[0] & imask[vint_width]) << (vint_width
    456                                                                   * 8);
    457     /* Special case: all-1 size means "size is unknown". We indicate this
    458      * in the return value by setting it to UINT64_MAX.
    459      */
    460     if (result_u == uint_positive_limits[vint_width])
    461       result_u = 0xFFFFFFFFFFFFFFFFULL;
    462     memcpy (result, &result_u, sizeof (uint64_t));
    463     break;
    464   case VINT_READ_SINT:
    465     /* Unset the 1-bit marker */
    466     result_u += ((uint64_t) int_bytes[0] & imask[vint_width]) << (vint_width
    467                                                                   * 8);
    468     /* Interpret large values as negative signed values */
    469     if (result_u > int_positive_limits[vint_width])
    470     {
    471       /* Pray that the compiler won't optimize this */
    472       temp = result_u + int_negative_limits[vint_width];
    473       result_s = int_negative_limits[vint_width] + temp;
    474     }
    475     else
    476       result_s = result_u;
    477     memcpy (result, &result_s, sizeof (int64_t));
    478     break;
    479   }
    480   return vint_width + 1;
    481 }
    482 
    483 
    484 /**
    485  * Reads an EBML element header. Only supports 32-bit IDs and 64-bit sizes.
    486  * (EBML might specify that IDs larger than 32 bits are allowed, or that
    487  * sizes larger than 64 bits are allowed).
    488  *
    489  * @param buffer array of bytes to read the header from
    490  * @param start index at which start to read
    491  * @param end first invalid index in the array (i.e. array size)
    492  * @param id receives the element id
    493  * @param size receives the element size
    494  * @return number of bytes occupied by the header,
    495  *         0 if buffer doesn't contain a header at 'start',
    496  *         -1 if buffer doesn't contain a complete header
    497  */
    498 static ssize_t
    499 elementRead (struct EXTRACTOR_PluginList *plugin,
    500              uint32_t *id, int64_t *size)
    501 {
    502   int64_t tempID;
    503   int64_t tempsize;
    504   ssize_t id_offset;
    505   ssize_t size_offset;
    506 
    507   tempID = 0;
    508 
    509   id_offset = VINTparse (plugin, &tempID, VINT_READ_ID);
    510   if (id_offset <= 0)
    511     return id_offset;
    512   if (id_offset > 4)
    513     /* Interpret unsupported long IDs as file corruption */
    514     return 0;
    515   /* VINTparse takes care of returning 0 when size is > 8 bytes */
    516   size_offset = VINTparse (plugin, &tempsize, VINT_READ_SIZE);
    517   if (size_offset <= 0)
    518     return size_offset;
    519   *id = (uint32_t) tempID;
    520   *size = tempsize;
    521 #if DEBUG_EBML
    522   printf ("EL 0x%06X %llu\n", *id, *size);
    523 #endif
    524   return id_offset + size_offset;
    525 }
    526 
    527 
    528 static ssize_t
    529 idRead (struct EXTRACTOR_PluginList *plugin,
    530         uint64_t length, uint32_t *id)
    531 {
    532   int64_t tempID;
    533   ssize_t id_offset;
    534 
    535   tempID = 0;
    536 
    537   id_offset = VINTparse (plugin, &tempID, VINT_READ_ID);
    538   if (id_offset <= 0)
    539     return id_offset;
    540   if (id_offset > 4)
    541     return 0;
    542   *id = (uint32_t) tempID;
    543   return id_offset;
    544 }
    545 
    546 
    547 static ssize_t
    548 uintRead (struct EXTRACTOR_PluginList *plugin, uint64_t length,
    549           uint64_t *result)
    550 {
    551   size_t c;
    552   unsigned char *data;
    553 
    554   if (length != pl_read (plugin, &data, length))
    555     return -1;
    556 
    557   *result = 0;
    558   for (c = 1; c <= length; c++)
    559     *result += ((uint64_t) data[c - 1]) << (8 * (length - c));
    560   return (ssize_t) length;
    561 }
    562 
    563 
    564 static ssize_t
    565 sintRead (struct EXTRACTOR_PluginList *plugin, uint64_t length, int64_t *result)
    566 {
    567   size_t c;
    568   uint64_t tmp;
    569   unsigned char *data;
    570 
    571   if (length != pl_read (plugin, &data, length))
    572     return -1;
    573 
    574   tmp = 0;
    575   for (c = 1; c <= length; c++)
    576     tmp += ((uint64_t) data[c - 1]) << (8 * (length - c));
    577   if (0x80 == (0x80 & data[0]))
    578   {
    579     /* OK, i'm just too tired to think... If sign bit is set, pad the rest of the
    580      * uint64_t with 0xFF. Unlike variable-length integers, these have normal
    581      * multiple-of-8 length, and will fit well. They just need to be padded.
    582      */
    583     int i;
    584     for (i = length; i < 8; i++)
    585       tmp += ((uint64_t) 0xFF) << (8 * i);
    586   }
    587   memcpy (result, &tmp, sizeof (uint64_t));
    588   return (ssize_t) length;
    589 }
    590 
    591 
    592 static ssize_t
    593 stringRead (struct EXTRACTOR_PluginList *plugin, uint64_t length, char *result)
    594 {
    595   uint64_t read_length;
    596   unsigned char *data;
    597 
    598   read_length = length;
    599   if (length > MAX_STRING_SIZE)
    600     read_length = MAX_STRING_SIZE;
    601 
    602   if (read_length != pl_read (plugin, &data, read_length))
    603     return -1;
    604 
    605   memcpy (result, data, read_length);
    606   result[read_length] = '\0';
    607   if (read_length < length)
    608     if ((length - read_length) != pl_read (plugin, &data, length - read_length))
    609       return -1;
    610   /* Can't return uint64_t - need it to be signed */
    611   return 1;
    612 }
    613 
    614 
    615 static ssize_t
    616 floatRead (struct EXTRACTOR_PluginList *plugin, uint64_t length, long
    617            double *result)
    618 {
    619   size_t c;
    620   unsigned char t[8];
    621   unsigned char *data;
    622 
    623   if (length != pl_read (plugin, &data, length))
    624     return -1;
    625 
    626   /* we don't support 10-byte floats, because not all C compilers will guarantee that long double is stored in 10 bytes in a IEEE-conformant format */
    627   if ((length != 4) && (length != 8) /* && length != 10 */)
    628     return 0;
    629 
    630   for (c = 0; c < length; c++)
    631   {
    632 #if __BYTE_ORDER == __BIG_ENDIAN
    633     t[c] = data[c];
    634 #else
    635     t[c] = data[length - 1 - c];
    636 #endif
    637   }
    638   if (length == 4)
    639     *result = *((float *) t);
    640   else if (length == 8)
    641     *result = *((double *) t);
    642   else
    643     *result = *((long double *) t);
    644   return (ssize_t) length;
    645 }
    646 
    647 
    648 static const char stream_type_letters[] = "?vat";      /*[0]-no, [1]-video,[2]-audio,[3]-text */
    649 
    650 enum EBMLState
    651 {
    652   EBML_BAD_STATE = -1,
    653   EBML_LOOKING_FOR_HEADER = 0,
    654   EBML_READING_HEADER = 1,
    655   EBML_READING_ELEMENTS = 2,
    656   EBML_READ_ELEMENT = 3,
    657   EBML_READING_HEADER_ELEMENTS = 4,
    658   EBML_FINISHED_READING_HEADER = 5,
    659   EBML_READ_UINT,
    660   EBML_READ_ID,
    661   EBML_READ_SINT,
    662   EBML_READ_FLOAT,
    663   EBML_READ_STRING,
    664   EBML_READING_HEADER_ELEMENT_VALUE,
    665   EBML_SKIP_UNTIL_NEXT_HEADER,
    666   EBML_READING_MATROSKA_SEGMENT,
    667   EBML_READING_MATROSKA_SEGMENT_CONTENTS,
    668   EBML_FINISHED_READING_MATROSKA_SEGMENT_CONTENTS,
    669   EBML_READING_MATROSKA_SEEK_HEAD_CONTENTS,
    670   EBML_FINISHED_READING_MATROSKA_SEEK_HEAD_CONTENTS,
    671   EBML_READING_MATROSKA_SEEK_CONTENTS,
    672   EBML_FINISHED_READING_MATROSKA_SEEK_CONTENTS,
    673   EBML_READING_MATROSKA_SEEK_CONTENTS_VALUE,
    674   EBML_READING_MATROSKA_INFO_CONTENTS,
    675   EBML_FINISHED_READING_MATROSKA_INFO_CONTENTS,
    676   EBML_READING_MATROSKA_TRACKS_CONTENTS,
    677   EBML_FINISHED_READING_MATROSKA_TRACKS_CONTENTS,
    678   EBML_READING_MATROSKA_TAGS_CONTENTS,
    679   EBML_FINISHED_READING_MATROSKA_TAGS_CONTENTS,
    680   EBML_READING_MATROSKA_TRACK_ENTRY_CONTENTS,
    681   EBML_FINISHED_READING_MATROSKA_TRACK_ENTRY_CONTENTS,
    682   EBML_READING_MATROSKA_TRACK_ENTRY_CONTENTS_VALUE,
    683   EBML_READING_MATROSKA_INFO_CONTENTS_VALUE,
    684   EBML_READING_MATROSKA_TRACK_ENTRY_VIDEO_CONTENTS,
    685   EBML_FINISHED_READING_MATROSKA_TRACK_ENTRY_VIDEO_CONTENTS,
    686   EBML_READING_MATROSKA_TRACK_ENTRY_VIDEO_CONTENTS_VALUE,
    687   EBML_READING_MATROSKA_TRACK_ENTRY_AUDIO_CONTENTS,
    688   EBML_READING_MATROSKA_TRACK_ENTRY_AUDIO_CONTENTS_VALUE,
    689   EBML_FINISHED_READING_MATROSKA_TRACK_ENTRY_AUDIO_CONTENTS,
    690   EBML_READING_MATROSKA_TAG_CONTENTS,
    691   EBML_FINISHED_READING_MATROSKA_TAG_CONTENTS,
    692   EBML_READING_MATROSKA_SIMPLETAG_CONTENTS,
    693   EBML_FINISHED_READING_MATROSKA_SIMPLETAG_CONTENTS,
    694   EBML_READING_MATROSKA_SIMPLETAG_CONTENTS_VALUE
    695 };
    696 
    697 struct ebml_element
    698 {
    699   uint64_t position;
    700   uint64_t header_size;
    701   uint32_t id;
    702   uint64_t size;
    703   struct ebml_element *parent;
    704   int finish_state;
    705   int prev_state;
    706   int bail_state;
    707   int bail_next_state;
    708 };
    709 
    710 struct matroska_seek_list
    711 {
    712   struct matroska_seek_list *next;
    713   uint32_t id;
    714   uint64_t position;
    715 };
    716 
    717 struct matroska_simpletag
    718 {
    719   struct matroska_simpletag *next;
    720   struct matroska_simpletag *child;
    721   struct matroska_simpletag *parent;
    722   char *name;
    723   char *string;
    724 };
    725 
    726 struct ebml_state
    727 {
    728   enum EBMLState state;
    729 
    730   struct ebml_element *stack_top;
    731 
    732   enum EBMLState next_state;
    733 
    734   int reported_ebml;
    735   int valid_ebml;
    736   uint64_t ebml_version;
    737   uint64_t ebml_READ_version;
    738   uint64_t ebml_max_id_length;
    739   uint64_t ebml_max_size_length;
    740   char *doctype;
    741   uint64_t doctype_version;
    742   uint64_t doctype_read_version;
    743 
    744   int64_t segment_contents_start;
    745 
    746   struct matroska_seek_list *matroska_seeks;
    747   struct matroska_seek_list *matroska_seeks_tail;
    748   struct matroska_seek_list *matroska_pos;
    749   uint32_t matroska_seek_id;
    750   uint64_t matroska_seek_position;
    751 
    752   int reported_matroska_info;
    753   int valid_matroska_info;
    754   uint64_t matroska_info_timecode_scale;
    755   double matroska_info_duration;
    756   int matroska_info_date_utc_is_set;
    757   int64_t matroska_info_date_utc;
    758   char *matroska_info_title;
    759   char *matroska_info_muxing_app;
    760   char *matroska_info_writing_app;
    761 
    762   int reported_matroska_track;
    763   int valid_matroska_track;
    764   uint64_t matroska_track_type;
    765   char *matroska_track_name;
    766   char *matroska_track_language;
    767   char *matroska_track_codec_id;
    768   char *matroska_track_codec_name;
    769 
    770   int valid_matroska_track_video;
    771   uint64_t matroska_track_video_flag_interlaced;
    772   uint64_t matroska_track_video_stereo_mode;
    773   uint64_t matroska_track_video_pixel_width;
    774   uint64_t matroska_track_video_pixel_height;
    775   uint64_t matroska_track_video_display_width;
    776   uint64_t matroska_track_video_display_height;
    777   uint64_t matroska_track_video_display_unit;
    778 
    779   int valid_matroska_track_audio;
    780   double matroska_track_audio_sampling_frequency;
    781   double matroska_track_audio_output_sampling_frequency;
    782   uint64_t matroska_track_audio_channels;
    783   uint64_t matroska_track_audio_bit_depth;
    784 
    785   struct matroska_simpletag *tag_tree;
    786   struct matroska_simpletag *tag_last;
    787   struct matroska_simpletag *tag_current;
    788 };
    789 
    790 static void
    791 clean_ebml_state_ebml (struct ebml_state *state)
    792 {
    793   if (state->doctype != NULL)
    794     free (state->doctype);
    795   state->doctype = NULL;
    796   state->reported_ebml = 0;
    797   state->valid_ebml = 0;
    798   state->ebml_version = 1;
    799   state->ebml_READ_version = 1;
    800   state->ebml_max_id_length = 4;
    801   state->ebml_max_size_length = 8;
    802   state->doctype = NULL;
    803   state->doctype_version = 0;
    804   state->doctype_read_version = 0;
    805 }
    806 
    807 
    808 static void
    809 clean_ebml_state_matroska_simpletags (struct ebml_state *state)
    810 {
    811   struct matroska_simpletag *el, *parent, *next;
    812   for (el = state->tag_tree; el;)
    813   {
    814     if (el->child != NULL)
    815     {
    816       el = el->child;
    817       continue;
    818     }
    819     parent = el->parent;
    820     next = el->next;
    821     if (el->name != NULL)
    822       free (el->name);
    823     if (el->string != NULL)
    824       free (el->string);
    825     free (el);
    826     if ((parent != NULL) && (parent->child == el))
    827       parent->child = next;
    828     el = next;
    829     if (next == NULL)
    830       el = parent;
    831   }
    832   state->tag_tree = NULL;
    833   state->tag_last = NULL;
    834   state->tag_current = NULL;
    835 }
    836 
    837 
    838 void
    839 matroska_add_tag (struct ebml_state *state, struct matroska_simpletag *parent,
    840                   char *name, char *string)
    841 {
    842   struct matroska_simpletag *el = malloc (sizeof (struct matroska_simpletag));
    843   el->parent = parent;
    844   el->next = NULL;
    845   el->child = NULL;
    846   el->name = name;
    847   el->string = string;
    848   if (state->tag_last != NULL)
    849   {
    850     if (state->tag_last == parent)
    851       state->tag_last->child = el;
    852     else
    853       state->tag_last->next = el;
    854   }
    855   state->tag_last = el;
    856 }
    857 
    858 
    859 static void
    860 clean_ebml_state_matroska_seeks (struct ebml_state *state)
    861 {
    862   struct matroska_seek_list *seek_head, *next;
    863   for (seek_head = state->matroska_seeks; seek_head != NULL; seek_head = next)
    864   {
    865     next = seek_head->next;
    866     free (seek_head);
    867   }
    868   state->matroska_seeks = NULL;
    869   state->matroska_seeks_tail = NULL;
    870 }
    871 
    872 
    873 static void
    874 clean_ebml_state_matroska_segment (struct ebml_state *state)
    875 {
    876   state->segment_contents_start = 0;
    877   state->matroska_pos = NULL;
    878 
    879   clean_ebml_state_matroska_seeks (state);
    880   clean_ebml_state_matroska_simpletags (state);
    881 }
    882 
    883 
    884 static void
    885 clean_ebml_state_matroska_seek (struct ebml_state *state)
    886 {
    887   state->matroska_seek_id = 0;
    888   state->matroska_seek_position = 0;
    889 }
    890 
    891 
    892 static void
    893 clean_ebml_state_matroska_info (struct ebml_state *state)
    894 {
    895   state->reported_matroska_info = 0;
    896   state->valid_matroska_info = -1;
    897   state->matroska_info_timecode_scale = 1000000;
    898   state->matroska_info_duration = -1.0;
    899   state->matroska_info_date_utc_is_set = 0;
    900   state->matroska_info_date_utc = 0;
    901   if (state->matroska_info_title != NULL)
    902     free (state->matroska_info_title);
    903   state->matroska_info_title = NULL;
    904   if (state->matroska_info_muxing_app != NULL)
    905     free (state->matroska_info_muxing_app);
    906   state->matroska_info_muxing_app = NULL;
    907   if (state->matroska_info_writing_app != NULL)
    908     free (state->matroska_info_writing_app);
    909   state->matroska_info_writing_app = NULL;
    910 }
    911 
    912 
    913 static void
    914 clean_ebml_state_matroska_track_video (struct ebml_state *state)
    915 {
    916   state->valid_matroska_track_video = -1;
    917   state->matroska_track_video_flag_interlaced = 0;
    918   state->matroska_track_video_stereo_mode = 0;
    919   state->matroska_track_video_pixel_width = 0;
    920   state->matroska_track_video_pixel_height = 0;
    921   state->matroska_track_video_display_width = 0;
    922   state->matroska_track_video_display_height = 0;
    923   state->matroska_track_video_display_unit = 0;
    924 }
    925 
    926 
    927 static void
    928 clean_ebml_state_matroska_track_audio (struct ebml_state *state)
    929 {
    930   state->valid_matroska_track_audio = -1;
    931   state->matroska_track_audio_sampling_frequency = 8000.0;
    932   state->matroska_track_audio_output_sampling_frequency = 0;
    933   state->matroska_track_audio_channels = 1;
    934   state->matroska_track_audio_bit_depth = 0;
    935 }
    936 
    937 
    938 static void
    939 clean_ebml_state_matroska_track (struct ebml_state *state)
    940 {
    941   state->reported_matroska_track = 0;
    942   state->valid_matroska_track = -1;
    943   state->matroska_track_type = 0;
    944   if (state->matroska_track_name != NULL)
    945     free (state->matroska_track_name);
    946   state->matroska_track_name = NULL;
    947   if (state->matroska_track_language != NULL)
    948     free (state->matroska_track_language);
    949   state->matroska_track_language = strdup ("eng");
    950   if (state->matroska_track_codec_id != NULL)
    951     free (state->matroska_track_codec_id);
    952   state->matroska_track_codec_id = NULL;
    953   if (state->matroska_track_codec_name != NULL)
    954     free (state->matroska_track_codec_name);
    955   state->matroska_track_codec_name = NULL;
    956 
    957   clean_ebml_state_matroska_track_video (state);
    958   clean_ebml_state_matroska_track_audio (state);
    959 }
    960 
    961 
    962 static struct ebml_state *
    963 EXTRACTOR_ebml_init_state_method ()
    964 {
    965   struct ebml_state *state;
    966   state = malloc (sizeof (struct ebml_state));
    967   if (state == NULL)
    968     return NULL;
    969   memset (state, 0, sizeof (struct ebml_state));
    970 
    971   state->next_state = EBML_BAD_STATE;
    972 
    973   clean_ebml_state_ebml (state);
    974   clean_ebml_state_matroska_info (state);
    975   clean_ebml_state_matroska_track (state);
    976   return state;
    977 }
    978 
    979 
    980 static void
    981 report_simpletag (struct ebml_state *state, EXTRACTOR_MetaDataProcessor proc,
    982                   void *proc_cls)
    983 {
    984   struct matroska_simpletag *el, *next;
    985   char format[MAX_STRING_SIZE + 1];
    986   for (el = state->tag_tree; el != NULL; el = next)
    987   {
    988     if ((el->name != NULL) && (el->name[0] != '\0') && (el->string != NULL) &&
    989         (el->string[0] != '\0') )
    990     {
    991       enum EXTRACTOR_MetaType metatype = EXTRACTOR_METATYPE_RESERVED;
    992       struct MatroskaTagMap *map_item;
    993       for (map_item = &tag_map[0]; map_item->name != NULL; map_item++)
    994       {
    995         if (strcmp (map_item->name, el->name) == 0)
    996         {
    997           metatype = map_item->id;
    998           break;
    999         }
   1000       }
   1001       if (metatype == EXTRACTOR_METATYPE_RESERVED)
   1002       {
   1003         snprintf (format, MAX_STRING_SIZE, "%s=%s", el->name, el->string);
   1004         format[MAX_STRING_SIZE] = '\0';
   1005         ADD_MATROSKA (format, EXTRACTOR_METATYPE_UNKNOWN);
   1006       }
   1007       else
   1008         ADD_MATROSKA (el->string, metatype);
   1009     }
   1010     next = el->child;
   1011     while (next == NULL && el != NULL)
   1012     {
   1013       next = el->next;
   1014       if (next == NULL)
   1015         el = el->parent;
   1016     }
   1017   }
   1018   clean_ebml_state_matroska_simpletags (state);
   1019 }
   1020 
   1021 
   1022 static void
   1023 report_state (struct ebml_state *state, EXTRACTOR_MetaDataProcessor proc,
   1024               void *proc_cls)
   1025 {
   1026   char format[MAX_STRING_SIZE + 1];
   1027   report_simpletag (state, proc, proc_cls);
   1028   if (state->valid_ebml && ! state->reported_ebml)
   1029   {
   1030     state->reported_ebml = 1;
   1031     snprintf (format, MAX_STRING_SIZE, "%llu", (unsigned long
   1032                                                 long) state->ebml_version);
   1033     format[MAX_STRING_SIZE] = '\0';
   1034     ADD_EBML (format, EXTRACTOR_METATYPE_FORMAT_VERSION);
   1035     snprintf (format, MAX_STRING_SIZE, "%s %llu (EBML %llu)", state->doctype,
   1036               (unsigned long long) state->doctype_version,
   1037               (unsigned long long) state->ebml_version);
   1038     format[MAX_STRING_SIZE] = '\0';
   1039     ADD_EBML (format, EXTRACTOR_METATYPE_RESOURCE_TYPE);
   1040   }
   1041   if (state->valid_ebml)
   1042     clean_ebml_state_ebml (state);
   1043   if (state->valid_matroska_info == -1)
   1044   {
   1045     if (((state->matroska_info_duration > 0) ||
   1046          (state->matroska_info_duration == -1.0) ) &&
   1047         (state->matroska_info_muxing_app != NULL) &&
   1048         (state->matroska_info_writing_app != NULL) )
   1049       state->valid_matroska_info = 1;
   1050     else
   1051       state->valid_matroska_info = 0;
   1052   }
   1053   if ((state->valid_matroska_info == 1) && ! state->reported_matroska_info)
   1054   {
   1055     state->reported_matroska_info = 1;
   1056     if (state->matroska_info_duration != -1.0)
   1057     {
   1058       uint64_t seconds = (uint64_t) ((state->matroska_info_duration
   1059                                       * (float) state->
   1060                                       matroska_info_timecode_scale) / 1e+9);
   1061       snprintf (format, MAX_STRING_SIZE, "%llus", (unsigned long long) seconds);
   1062       format[MAX_STRING_SIZE] = '\0';
   1063       ADD_MATROSKA (format, EXTRACTOR_METATYPE_DURATION);
   1064     }
   1065     if (state->matroska_info_date_utc_is_set)
   1066     {
   1067       struct tm millenium_start;
   1068       struct tm matroska_date;
   1069       int64_t millenium_start_stamp;
   1070       int64_t matroska_date_stamp;
   1071 #if WINDOWS
   1072       __time64_t matroska_date_stamp_time_t;
   1073 #else
   1074       time_t matroska_date_stamp_time_t;
   1075 #endif
   1076       millenium_start.tm_sec = 0;
   1077       millenium_start.tm_min = 0;
   1078       millenium_start.tm_hour = 0;
   1079       millenium_start.tm_mday = 1;
   1080       millenium_start.tm_mon = 1;
   1081       millenium_start.tm_year = 2001 - 1900;
   1082       millenium_start.tm_isdst = -1;
   1083       putenv ("TZ=GMT0");
   1084       /* If no matter what is the size of the returned value, it fits 32-bit integer
   1085        * (in fact, i could have just used a constant here, since the start of Matroska
   1086        * millenium is known and never changes), but we want to use 64-bit integer to
   1087        * manipulate time. If it gets trimmed later, when assigning back to a TIME_TYPE
   1088        * that happens to be 32-bit long - well, tough luck.
   1089        */errno = 0;
   1090 #if WINDOWS
   1091       millenium_start_stamp = _mktime64 (&millenium_start);
   1092 #else
   1093       millenium_start_stamp = (time_t) mktime (&millenium_start);
   1094 #endif
   1095       if (millenium_start_stamp == -1)
   1096         printf ("Failed to convert time: %d\n", errno);
   1097       matroska_date_stamp = millenium_start_stamp * 1000000000
   1098                             + state->matroska_info_date_utc;
   1099       /* Now matroska_date_stamp is the number of nanoseconds since UNIX Epoch */
   1100       matroska_date_stamp_time_t = matroska_date_stamp / 1000000000;
   1101       /* Now matroska_date_stamp_time_t is the number of seconds since UNIX Epoch */
   1102 #if WINDOWS
   1103       if (NULL != gmtime_undocumented_64_r (&matroska_date_stamp_time_t,
   1104                                             &matroska_date))
   1105 #else
   1106       /* We want to be thread-safe. If you have no gmtime_r(), think of something! */
   1107       if (NULL != gmtime_r (&matroska_date_stamp_time_t, &matroska_date))
   1108 #endif
   1109       {
   1110         if (0 != strftime (format, MAX_STRING_SIZE, "%Y.%m.%d %H:%M:%S UTC",
   1111                            &matroska_date))
   1112           ADD_MATROSKA (format, EXTRACTOR_METATYPE_CREATION_DATE);
   1113       }
   1114     }
   1115     if (state->matroska_info_title != NULL)
   1116       ADD_MATROSKA (state->matroska_info_title, EXTRACTOR_METATYPE_TITLE);
   1117     if (strcmp (state->matroska_info_writing_app,
   1118                 state->matroska_info_muxing_app) == 0)
   1119       snprintf (format, MAX_STRING_SIZE, "Written and muxed with %s",
   1120                 state->matroska_info_writing_app);
   1121     else
   1122       snprintf (format, MAX_STRING_SIZE, "Written with %s, muxed with %s",
   1123                 state->matroska_info_writing_app,
   1124                 state->matroska_info_muxing_app);
   1125     format[MAX_STRING_SIZE] = '\0';
   1126     ADD_MATROSKA (format, EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE);
   1127   }
   1128   if (state->valid_matroska_info == 1)
   1129     clean_ebml_state_matroska_info (state);
   1130   if (state->valid_matroska_track == -1)
   1131   {
   1132     if (((state->matroska_track_type > 0) && (state->matroska_track_type <
   1133                                               255) ) &&
   1134         (state->matroska_track_codec_id != NULL) )
   1135       state->valid_matroska_track = 1;
   1136     else
   1137       state->valid_matroska_track = 0;
   1138   }
   1139   if (state->valid_matroska_track_video == -1)
   1140   {
   1141     if (((state->matroska_track_video_flag_interlaced == 0) ||
   1142          (state->matroska_track_video_flag_interlaced == 1) ) &&
   1143         ((state->matroska_track_video_stereo_mode >= 0) &&
   1144          (state->matroska_track_video_stereo_mode <= 14) ) &&
   1145         (state->matroska_track_video_pixel_width > 0) &&
   1146         (state->matroska_track_video_pixel_height > 0) )
   1147       state->valid_matroska_track_video = 1;
   1148     else
   1149       state->valid_matroska_track_video = 0;
   1150   }
   1151   if (state->valid_matroska_track_audio == -1)
   1152   {
   1153     if ((state->matroska_track_audio_sampling_frequency > 0) &&
   1154         (state->matroska_track_audio_channels > 0) )
   1155       state->valid_matroska_track_audio = 1;
   1156     else
   1157       state->valid_matroska_track_audio = 0;
   1158   }
   1159   if ((state->valid_matroska_track == 1) && ! state->reported_matroska_track)
   1160   {
   1161     char name_part[MAX_STRING_SIZE + 1];
   1162     char codec_part[MAX_STRING_SIZE + 1];
   1163     char bit_part[MAX_STRING_SIZE + 1];
   1164     char hz_part[MAX_STRING_SIZE + 1];
   1165     struct MatroskaTrackType *tt;
   1166     const char *track_type_string = NULL;
   1167     char use_video = 0;
   1168     char use_audio = 0;
   1169 
   1170     state->reported_matroska_track = 1;
   1171     for (tt = track_types; tt->code > 0; tt++)
   1172     {
   1173       if (tt->code == state->matroska_track_type)
   1174       {
   1175         track_type_string = tt->name;
   1176         if (tt->video_must_be_valid == 1)
   1177           use_video = 1;
   1178         else if (tt->audio_must_be_valid == 1)
   1179           use_audio = 1;
   1180         break;
   1181       }
   1182     }
   1183     if (track_type_string == NULL)
   1184       track_type_string = "unknown";
   1185 
   1186     if (state->matroska_track_name == NULL)
   1187       snprintf (name_part, MAX_STRING_SIZE, "%s", "");
   1188     else
   1189       snprintf (name_part, MAX_STRING_SIZE, "`%s' ",
   1190                 state->matroska_track_name);
   1191     name_part[MAX_STRING_SIZE] = '\0';
   1192 
   1193     if (state->matroska_track_codec_name == NULL)
   1194       snprintf (codec_part, MAX_STRING_SIZE, "%s",
   1195                 state->matroska_track_codec_id);
   1196     else
   1197       snprintf (codec_part, MAX_STRING_SIZE, "%s [%s]",
   1198                 state->matroska_track_codec_id,
   1199                 state->matroska_track_codec_name);
   1200     codec_part[MAX_STRING_SIZE] = '\0';
   1201 
   1202     if (use_video && (state->valid_matroska_track_video == 1))
   1203     {
   1204       /* Ignore Display* for now. Aspect ratio correction could be
   1205        * done either way (stretching horizontally or squishing vertically),
   1206        * so let's stick to hard cold pixel counts.
   1207        */
   1208       snprintf (format, MAX_STRING_SIZE, "%llux%llu",
   1209                 (unsigned long long) state->matroska_track_video_pixel_width,
   1210                 (unsigned long long) state->matroska_track_video_pixel_height);
   1211       format[MAX_STRING_SIZE] = '\0';
   1212       ADD_MATROSKA (format, EXTRACTOR_METATYPE_IMAGE_DIMENSIONS);
   1213     }
   1214     if (use_audio && (state->valid_matroska_track_audio == 1))
   1215     {
   1216       double freq = state->matroska_track_audio_sampling_frequency;
   1217       double rfreq = freq;
   1218       if (state->matroska_track_audio_output_sampling_frequency > 0)
   1219         rfreq = state->matroska_track_audio_output_sampling_frequency;
   1220       if (freq == rfreq)
   1221         snprintf (hz_part, MAX_STRING_SIZE, "%.0fHz", freq);
   1222       else
   1223         snprintf (hz_part, MAX_STRING_SIZE, "%.0fHz (%.0fHz SBR)", freq, rfreq);
   1224       hz_part[MAX_STRING_SIZE] = '\0';
   1225 
   1226       if (state->matroska_track_audio_bit_depth > 0)
   1227         snprintf (bit_part, MAX_STRING_SIZE, "%llu-bit ", (unsigned long
   1228                                                            long) state->
   1229                   matroska_track_audio_bit_depth);
   1230       else
   1231         bit_part[0] = '\0';
   1232       bit_part[MAX_STRING_SIZE] = '\0';
   1233 
   1234       snprintf (format, MAX_STRING_SIZE,
   1235                 "%s track %s(%s, %llu-channel %sat %s) [%s]",
   1236                 track_type_string, name_part, codec_part,
   1237                 (unsigned long long) state->matroska_track_audio_channels,
   1238                 bit_part, hz_part, state->matroska_track_language);
   1239     }
   1240     else
   1241     {
   1242       snprintf (format, MAX_STRING_SIZE, "%s track %s(%s) [%s]",
   1243                 track_type_string, name_part, codec_part,
   1244                 state->matroska_track_language);
   1245     }
   1246     format[MAX_STRING_SIZE] = '\0';
   1247     ADD_EBML (format, EXTRACTOR_METATYPE_RESOURCE_TYPE);
   1248   }
   1249   if (state->valid_matroska_track)
   1250     clean_ebml_state_matroska_track (state);
   1251 }
   1252 
   1253 
   1254 static int
   1255 EXTRACTOR_ebml_discard_state_method (struct ebml_state *state)
   1256 {
   1257   if (state != NULL)
   1258   {
   1259     if (state->doctype != NULL)
   1260       free (state->doctype);
   1261     clean_ebml_state_matroska_segment (state);
   1262     clean_ebml_state_matroska_info (state);
   1263     clean_ebml_state_matroska_track (state);
   1264     free (state);
   1265   }
   1266   return 1;
   1267 }
   1268 
   1269 
   1270 static struct ebml_element *
   1271 ebml_stack_pop (struct ebml_state *state)
   1272 {
   1273   struct ebml_element *result;
   1274   if (state->stack_top == NULL)
   1275     return NULL;
   1276   result = state->stack_top;
   1277   state->stack_top = result->parent;
   1278   return result;
   1279 }
   1280 
   1281 
   1282 static void
   1283 ebml_stack_push_new (struct ebml_state *state, uint64_t position, uint32_t id,
   1284                      uint64_t size, uint64_t header_size, int finish_state, int
   1285                      prev_state, int
   1286                      bail_state, int bail_next_state)
   1287 {
   1288   struct ebml_element *element = malloc (sizeof (struct ebml_element));
   1289   element->parent = state->stack_top;
   1290   state->stack_top = element;
   1291   element->position = position - header_size;
   1292   element->header_size = header_size;
   1293   element->id = id;
   1294   element->size = size;
   1295   element->finish_state = finish_state;
   1296   element->prev_state = prev_state;
   1297   element->bail_state = bail_state;
   1298   element->bail_next_state = bail_next_state;
   1299 }
   1300 
   1301 
   1302 static int
   1303 check_result (struct EXTRACTOR_PluginList *plugin, ssize_t read_result, struct
   1304               ebml_state *state)
   1305 {
   1306   if (read_result == 0)
   1307   {
   1308     int64_t offset;
   1309     struct ebml_element *parent = ebml_stack_pop (state);
   1310     if (parent == NULL)
   1311     {
   1312       /* But this shouldn't really happen */
   1313       state->state = EBML_LOOKING_FOR_HEADER;
   1314       return 0;
   1315     }
   1316     offset = parent->position + parent->header_size + parent->size;
   1317     if ((offset < 0) || (offset != pl_seek (plugin, offset, SEEK_SET)))
   1318     {
   1319       state->state = EBML_BAD_STATE;
   1320       return 0;
   1321     }
   1322     state->state = parent->bail_state;
   1323     state->next_state = parent->bail_next_state;
   1324     free (parent);
   1325     return 0;
   1326   }
   1327   return 1;
   1328 }
   1329 
   1330 
   1331 static int
   1332 maybe_rise_up (struct EXTRACTOR_PluginList *plugin, struct ebml_state *state,
   1333                int *do_break, int64_t read_result)
   1334 {
   1335   int64_t offset;
   1336   offset = pl_get_pos (plugin) - read_result;
   1337   if ((state->stack_top != NULL) && (offset >= state->stack_top->position
   1338                                      + state->stack_top->header_size
   1339                                      + state->stack_top->size) )
   1340   {
   1341     state->state = state->stack_top->finish_state;
   1342     pl_seek (plugin, -read_result, SEEK_CUR);
   1343     *do_break = 1;
   1344     return 1;
   1345   }
   1346   return 0;
   1347 }
   1348 
   1349 
   1350 static void
   1351 rise_up_after_value (struct EXTRACTOR_PluginList *plugin, struct
   1352                      ebml_state *state, int next_state)
   1353 {
   1354   int64_t offset;
   1355   state->state = EBML_READ_ELEMENT;
   1356   offset = state->stack_top->position + state->stack_top->header_size
   1357            + state->stack_top->size;
   1358   free (ebml_stack_pop (state));
   1359   state->next_state = next_state;
   1360   pl_seek (plugin, offset, SEEK_SET);
   1361 }
   1362 
   1363 
   1364 static void
   1365 try_to_find_pos (struct EXTRACTOR_PluginList *plugin, struct ebml_state *state)
   1366 {
   1367   if (state->matroska_seeks != NULL)
   1368   {
   1369     struct matroska_seek_list *el, *pos = NULL;
   1370     int64_t segment_position = pl_get_pos (plugin)
   1371                                - state->segment_contents_start;
   1372     for (el = state->matroska_seeks; el != NULL; el = el->next)
   1373     {
   1374       if (el->position <= segment_position)
   1375         pos = el;
   1376       else
   1377         break;
   1378     }
   1379     if (pos != NULL)
   1380       state->matroska_pos = pos;
   1381   }
   1382 }
   1383 
   1384 
   1385 static void
   1386 maybe_seek_to_something_interesting (struct EXTRACTOR_PluginList *plugin, struct
   1387                                      ebml_state *state)
   1388 {
   1389   int64_t offset;
   1390   struct matroska_seek_list *el;
   1391   try_to_find_pos (plugin, state);
   1392   if (state->matroska_pos == NULL)
   1393     return;
   1394   offset = pl_get_pos (plugin);
   1395   for (el = state->matroska_pos; el != NULL; el = el->next)
   1396   {
   1397     char do_break = 0;
   1398     switch (el->id)
   1399     {
   1400     case MatroskaID_Info:
   1401     case MatroskaID_Tracks:
   1402     case MatroskaID_Tags:
   1403     /* Some files will have more than one seek head */
   1404     case MatroskaID_SeekHead:
   1405       if (el->position + state->segment_contents_start >= offset)
   1406         do_break = 1;
   1407       break;
   1408     default:
   1409       break;
   1410     }
   1411     if (do_break)
   1412       break;
   1413   }
   1414   if (el == NULL)
   1415     el = state->matroska_seeks_tail;
   1416   if (el->position + state->segment_contents_start > offset)
   1417   {
   1418     /* TODO: add a separate stage after seeking that checks the ID of the element against
   1419      * the one we've got from seek table. If it doesn't match - stop parsing the file.
   1420      */
   1421 #if DEBUG_EBML
   1422     printf ("Seeking from %llu to %llu\n", offset, el->position
   1423             + state->segment_contents_start);
   1424 #endif
   1425     pl_seek (plugin, el->position + state->segment_contents_start, SEEK_SET);
   1426   }
   1427 }
   1428 
   1429 
   1430 static void
   1431 sort_seeks (struct ebml_state *state)
   1432 {
   1433   uint32_t id;
   1434   int64_t position;
   1435   struct matroska_seek_list *el;
   1436   char sorted = 0;
   1437   while (! sorted)
   1438   {
   1439     sorted = 1;
   1440     for (el = state->matroska_seeks; el != NULL; el = el->next)
   1441     {
   1442       if (el->next == NULL)
   1443         break;
   1444       id = el->next->id;
   1445       position = el->next->position;
   1446       if (position < el->position)
   1447       {
   1448         el->next->position = el->position;
   1449         el->next->id = el->id;
   1450         el->position = position;
   1451         el->id = id;
   1452         sorted = 0;
   1453       }
   1454     }
   1455   }
   1456 }
   1457 
   1458 
   1459 int
   1460 EXTRACTOR_ebml_extract_method (struct EXTRACTOR_PluginList *plugin,
   1461                                EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
   1462 {
   1463   uint64_t offset = 0;
   1464   ssize_t read_result;
   1465   unsigned char *data;
   1466   struct ebml_state *state;
   1467 
   1468   const unsigned char *start;
   1469   uint32_t eID;
   1470   uint64_t eSize;
   1471   int do_break;
   1472 
   1473   uint64_t uint_value;
   1474   int64_t sint_value;
   1475   char string_value[MAX_STRING_SIZE + 1];
   1476   long double float_value;
   1477   uint32_t id_value;
   1478 
   1479   if (plugin == NULL)
   1480     return 1;
   1481 
   1482   state = EXTRACTOR_ebml_init_state_method ();
   1483   if (state == NULL)
   1484     return 1;
   1485 
   1486   while (1)
   1487   {
   1488     switch (state->state)
   1489     {
   1490     default:
   1491     case EBML_BAD_STATE:
   1492       report_state (state, proc, proc_cls);
   1493       return EXTRACTOR_ebml_discard_state_method (state);
   1494     case EBML_LOOKING_FOR_HEADER:
   1495       offset = pl_get_pos (plugin);
   1496       sint_value = pl_read (plugin, &data, 1024 * 1024);
   1497       if (sint_value < 4)
   1498         return EXTRACTOR_ebml_discard_state_method (state);
   1499       start = NULL;
   1500       while (start == NULL)
   1501       {
   1502         start = memchr (data, EBMLID_FILE_BEGIN, sint_value);
   1503         if (start == NULL)
   1504         {
   1505           offset = pl_get_pos (plugin) - 3;
   1506           if (offset != pl_seek (plugin, offset, SEEK_SET))
   1507             return EXTRACTOR_ebml_discard_state_method (state);
   1508           sint_value = pl_read (plugin, &data, 1024 * 1024);
   1509           if (sint_value < 4)
   1510             return EXTRACTOR_ebml_discard_state_method (state);
   1511         }
   1512       }
   1513       if (offset + start - data != pl_seek (plugin, offset + start - data,
   1514                                             SEEK_SET))
   1515         return EXTRACTOR_ebml_discard_state_method (state);
   1516       state->state = EBML_READING_HEADER;
   1517       break;
   1518     case EBML_READING_HEADER:
   1519       if (0 > (read_result = elementRead (plugin, &eID, (int64_t*) &eSize)))
   1520         return EXTRACTOR_ebml_discard_state_method (state);
   1521       if (EBMLID_EBML != eID)
   1522       {
   1523         /* Not a header (happens easily, 0x1A is not uncommon), look further. */
   1524         offset = pl_get_pos (plugin) - 3;
   1525         if (offset < 0)
   1526           offset = 0;
   1527         if (offset != pl_seek (plugin, offset, SEEK_SET))
   1528           return EXTRACTOR_ebml_discard_state_method (state);
   1529         state->state = EBML_LOOKING_FOR_HEADER;
   1530         break;
   1531       }
   1532       state->state = EBML_READ_ELEMENT;
   1533       state->next_state = EBML_READING_HEADER_ELEMENTS;
   1534       ebml_stack_push_new (state, pl_get_pos (plugin), eID, eSize, read_result,
   1535                            EBML_FINISHED_READING_HEADER, EBML_BAD_STATE,
   1536                            EBML_FINISHED_READING_HEADER, EBML_BAD_STATE);
   1537       break;
   1538     case EBML_READ_ELEMENT:
   1539 #if DEBUG_EBML
   1540       printf ("Reading at %lld\n", pl_get_pos (plugin));
   1541 #endif
   1542       /* The following code generates some odd compiled instructions - instead of being getting the next state,
   1543        * state->state gets 0xfeeefeee.
   1544        */
   1545       /*
   1546       if (0 > (read_result = elementRead (plugin, &eID, &eSize)))
   1547       {
   1548         state->state = -1;
   1549         break;
   1550       }
   1551       state->state = state->next_state;
   1552       break;
   1553       *//* while the following code crashes with SIGILL.
   1554        *//*
   1555       read_result = elementRead (plugin, &eID, &eSize);
   1556       state->state = state->next_state;
   1557       if (read_result < 0)
   1558         state->state = EBML_BAD_STATE;
   1559       break;
   1560       *//* but the following code works as intended *//* All three code snippets were compiled with -O0 */{
   1561         enum EBMLState next_state = state->next_state;
   1562         state->state = EBML_BAD_STATE;
   1563         read_result = elementRead (plugin, &eID, (int64_t*) &eSize);
   1564         if (read_result >= 0)
   1565           state->state = next_state;
   1566       }
   1567       break;
   1568     case EBML_READ_UINT:
   1569       if (state->stack_top->size == 0)
   1570       {
   1571         /* Special case - zero-size uint means zero */
   1572         uint_value = 0;
   1573         read_result = 1; /* 0 means error */
   1574       }
   1575       else if (state->stack_top->size > 8)
   1576         read_result = 0;
   1577       else
   1578       {
   1579         if (0 > (read_result = uintRead (plugin, state->stack_top->size,
   1580                                          &uint_value)))
   1581         {
   1582           state->state = EBML_BAD_STATE;
   1583           break;
   1584         }
   1585       }
   1586       /* REMINDER: read_result might not be == number of read bytes in this case! */
   1587       state->state = state->next_state;
   1588       break;
   1589     case EBML_READ_ID:
   1590       if (0 > (read_result = idRead (plugin, state->stack_top->size,
   1591                                      &id_value)))
   1592       {
   1593         state->state = EBML_BAD_STATE;
   1594         break;
   1595       }
   1596       state->state = state->next_state;
   1597       break;
   1598     case EBML_READ_SINT:
   1599       if (state->stack_top->size == 0)
   1600       {
   1601         /* Special case - zero-size sint means zero */
   1602         sint_value = 0;
   1603         read_result = 1; /* 0 means error */
   1604       }
   1605       else if (state->stack_top->size > 8)
   1606         read_result = 0;
   1607       else
   1608       {
   1609         if (0 > (read_result = sintRead (plugin, state->stack_top->size,
   1610                                          &sint_value)))
   1611         {
   1612           state->state = EBML_BAD_STATE;
   1613           break;
   1614         }
   1615       }
   1616       /* REMINDER: read_result might not be == number of read bytes in this case! */
   1617       state->state = state->next_state;
   1618       break;
   1619     case EBML_READ_FLOAT:
   1620       if (state->stack_top->size == 0)
   1621       {
   1622         /* Special case - zero-size float means zero */
   1623         float_value = 0.0;
   1624         read_result = 1; /* 0 means error */
   1625       }
   1626       else if (state->stack_top->size > 10)
   1627         read_result = 0;
   1628       else
   1629       {
   1630         if (0 > (read_result = floatRead (plugin, state->stack_top->size,
   1631                                           &float_value)))
   1632         {
   1633           state->state = EBML_BAD_STATE;
   1634           break;
   1635         }
   1636       }
   1637       /* REMINDER: read_result might not be == number of read bytes in this case! */
   1638       state->state = state->next_state;
   1639       break;
   1640     case EBML_READ_STRING:
   1641       if (state->stack_top->size == 0)
   1642       {
   1643         string_value[0] = '\0';
   1644         read_result = 1; /* 0 means error */
   1645       }
   1646       else
   1647       {
   1648         if (0 > (read_result = stringRead (plugin, state->stack_top->size,
   1649                                            (char *) &string_value)))
   1650         {
   1651           state->state = EBML_BAD_STATE;
   1652           break;
   1653         }
   1654       }
   1655       /* REMINDER: read_result might not be == number of read bytes in this case! */
   1656       state->state = state->next_state;
   1657       break;
   1658     case EBML_READING_HEADER_ELEMENTS:
   1659       if (! check_result (plugin, read_result, state))
   1660         break;
   1661       do_break = 0;
   1662       switch (eID)
   1663       {
   1664       case EBMLID_VERSION:
   1665       case EBMLID_READ_VERSION:
   1666       case EBMLID_MAX_ID_LENGTH:
   1667       case EBMLID_MAX_SIZE_LENGTH:
   1668       case EBMLID_DOCTYPE_VERSION:
   1669       case EBMLID_DOCTYPE_READ_VERSION:
   1670         state->state = EBML_READ_UINT;
   1671         break;
   1672       case EBMLID_DOCTYPE:
   1673         state->state = EBML_READ_STRING;
   1674         break;
   1675       default:
   1676         if (maybe_rise_up (plugin, state, &do_break, read_result))
   1677           break;
   1678         /* Unknown element in EBML header - skip over it */
   1679         state->state = EBML_READ_ELEMENT;
   1680         state->next_state = EBML_READING_HEADER_ELEMENTS;
   1681         pl_seek (plugin, eSize, SEEK_CUR);
   1682         do_break = 1;
   1683       }
   1684       if (do_break)
   1685         break;
   1686       state->next_state = EBML_READING_HEADER_ELEMENT_VALUE;
   1687       ebml_stack_push_new (state, pl_get_pos (plugin), eID, eSize, read_result,
   1688                            EBML_BAD_STATE, EBML_READING_HEADER_ELEMENTS,
   1689                            EBML_READ_ELEMENT,
   1690                            EBML_READING_HEADER_ELEMENTS);
   1691       break;
   1692     case EBML_READING_HEADER_ELEMENT_VALUE:
   1693       if (! check_result (plugin, read_result, state))
   1694         break;
   1695 
   1696       switch (state->stack_top->id)
   1697       {
   1698       case EBMLID_VERSION:
   1699         state->ebml_version = uint_value;
   1700         break;
   1701       case EBMLID_READ_VERSION:
   1702         state->ebml_READ_version = uint_value;
   1703         if (uint_value > 1)
   1704         {
   1705           /* We don't support EBML versions > 1 */
   1706           state->state = EBML_BAD_STATE;
   1707           /* State remains invalid, and is not reported. That is probably OK,
   1708            * since we barely read anything (we only know that this is
   1709            * _probably_ EBML version X, that's all).
   1710            * We also stop right here and do not assume that somewhere further
   1711            * in the file there's another EBML header that is, maybe, readable
   1712            * by us. If you think this is worth correcting - patches are welcome.
   1713            */continue;
   1714         }
   1715         break;
   1716       case EBMLID_MAX_ID_LENGTH:
   1717         state->ebml_max_id_length = uint_value;
   1718         break;
   1719       case EBMLID_MAX_SIZE_LENGTH:
   1720         state->ebml_max_size_length = uint_value;
   1721         break;
   1722       case EBMLID_DOCTYPE_VERSION:
   1723         state->doctype_version = uint_value;
   1724         break;
   1725       case EBMLID_DOCTYPE_READ_VERSION:
   1726         state->doctype_read_version = uint_value;
   1727         break;
   1728       case EBMLID_DOCTYPE:
   1729         if (state->doctype != NULL)
   1730           free (state->doctype);
   1731         state->doctype = strdup (string_value);
   1732         state->valid_ebml = 1;
   1733         break;
   1734       }
   1735       rise_up_after_value (plugin, state, EBML_READING_HEADER_ELEMENTS);
   1736       break;
   1737     case EBML_FINISHED_READING_HEADER:
   1738       if (! state->valid_ebml)
   1739       {
   1740         /* Header was invalid (lacking doctype). */
   1741         state->next_state = EBML_SKIP_UNTIL_NEXT_HEADER;
   1742         break;
   1743       }
   1744       else
   1745       {
   1746         char *doctype = strdup (state->doctype);
   1747         report_state (state, proc, proc_cls);
   1748         state->state = EBML_READ_ELEMENT;
   1749         if (strcmp (doctype, "matroska") == 0)
   1750         {
   1751           state->next_state = EBML_READING_MATROSKA_SEGMENT;
   1752         }
   1753         else if (strcmp (doctype, "webm") == 0)
   1754         {
   1755           /* Webm is a strict subset of Matroska. However, since strictness
   1756            * means nothing to us (we don't validate the container, we extract
   1757            * metadata from it!), we do not care about these differences
   1758            * (which means that this code will happily read webm files that do
   1759            * not conform to Webm spec, but conform to Matroska spec).
   1760            */state->next_state = EBML_READING_MATROSKA_SEGMENT;
   1761         }
   1762         else
   1763         {
   1764           /* Header was valid, but doctype is unknown. */
   1765           state->next_state = EBML_SKIP_UNTIL_NEXT_HEADER;
   1766         }
   1767         free (doctype);
   1768       }
   1769       break;
   1770     case EBML_SKIP_UNTIL_NEXT_HEADER:
   1771       if (read_result == 0)
   1772       {
   1773         state->state = EBML_LOOKING_FOR_HEADER;
   1774         break;
   1775       }
   1776       if (eID != EBMLID_EBML)
   1777       {
   1778         state->state = EBML_READ_ELEMENT;
   1779         state->next_state = EBML_SKIP_UNTIL_NEXT_HEADER;
   1780         pl_seek (plugin, eSize, SEEK_CUR);
   1781         break;
   1782       }
   1783       state->state = EBML_READING_HEADER;
   1784       break;
   1785     case EBML_READING_MATROSKA_SEGMENT:
   1786       if (read_result == 0)
   1787       {
   1788         state->state = EBML_LOOKING_FOR_HEADER;
   1789         break;
   1790       }
   1791       if (eID == EBMLID_EBML)
   1792       {
   1793         state->state = EBML_READING_HEADER;
   1794         break;
   1795       }
   1796       if (eID != MatroskaID_Segment)
   1797       {
   1798         pl_seek (plugin, eSize, SEEK_CUR);
   1799         state->state = EBML_READ_ELEMENT;
   1800         state->next_state = EBML_READING_MATROSKA_SEGMENT;
   1801         break;
   1802       }
   1803       state->state = EBML_READ_ELEMENT;
   1804       state->next_state = EBML_READING_MATROSKA_SEGMENT_CONTENTS;
   1805       clean_ebml_state_matroska_segment (state);
   1806       ebml_stack_push_new (state, pl_get_pos (plugin), eID, eSize, read_result,
   1807                            EBML_FINISHED_READING_MATROSKA_SEGMENT_CONTENTS,
   1808                            EBML_READING_MATROSKA_SEGMENT, EBML_READ_ELEMENT,
   1809                            EBML_READING_MATROSKA_SEGMENT);
   1810       state->segment_contents_start = pl_get_pos (plugin);
   1811       break;
   1812     case EBML_READING_MATROSKA_SEGMENT_CONTENTS:
   1813       if (! check_result (plugin, read_result, state))
   1814         break;
   1815 
   1816       state->state = EBML_READ_ELEMENT;
   1817       switch (eID)
   1818       {
   1819       case MatroskaID_SeekHead:
   1820         state->next_state = EBML_READING_MATROSKA_SEEK_HEAD_CONTENTS;
   1821         ebml_stack_push_new (state, pl_get_pos (plugin), eID, eSize,
   1822                              read_result,
   1823                              EBML_FINISHED_READING_MATROSKA_SEEK_HEAD_CONTENTS,
   1824                              EBML_READING_MATROSKA_SEGMENT_CONTENTS,
   1825                              EBML_READ_ELEMENT,
   1826                              EBML_READING_MATROSKA_SEGMENT_CONTENTS);
   1827         break;
   1828       case MatroskaID_Info:
   1829         state->next_state = EBML_READING_MATROSKA_INFO_CONTENTS;
   1830         clean_ebml_state_matroska_info (state);
   1831         ebml_stack_push_new (state, pl_get_pos (plugin), eID, eSize,
   1832                              read_result,
   1833                              EBML_FINISHED_READING_MATROSKA_INFO_CONTENTS,
   1834                              EBML_READING_MATROSKA_SEGMENT_CONTENTS,
   1835                              EBML_READ_ELEMENT,
   1836                              EBML_READING_MATROSKA_SEGMENT_CONTENTS);
   1837         break;
   1838       case MatroskaID_Tracks:
   1839         state->next_state = EBML_READING_MATROSKA_TRACKS_CONTENTS;
   1840         ebml_stack_push_new (state, pl_get_pos (plugin), eID, eSize,
   1841                              read_result,
   1842                              EBML_FINISHED_READING_MATROSKA_TRACKS_CONTENTS,
   1843                              EBML_READING_MATROSKA_SEGMENT_CONTENTS,
   1844                              EBML_READ_ELEMENT,
   1845                              EBML_READING_MATROSKA_SEGMENT_CONTENTS);
   1846         break;
   1847       case MatroskaID_Tags:
   1848         state->next_state = EBML_READING_MATROSKA_TAGS_CONTENTS;
   1849         ebml_stack_push_new (state, pl_get_pos (plugin), eID, eSize,
   1850                              read_result,
   1851                              EBML_FINISHED_READING_MATROSKA_TAGS_CONTENTS,
   1852                              EBML_READING_MATROSKA_SEGMENT_CONTENTS,
   1853                              EBML_READ_ELEMENT,
   1854                              EBML_READING_MATROSKA_SEGMENT_CONTENTS);
   1855         break;
   1856       default:
   1857         if (maybe_rise_up (plugin, state, &do_break, read_result))
   1858           break;
   1859         maybe_seek_to_something_interesting (plugin, state);
   1860         state->next_state = EBML_READING_MATROSKA_SEGMENT_CONTENTS;
   1861         pl_seek (plugin, eSize, SEEK_CUR);
   1862       }
   1863       break;
   1864     case EBML_READING_MATROSKA_TAGS_CONTENTS:
   1865       if (! check_result (plugin, read_result, state))
   1866         break;
   1867       state->state = EBML_READ_ELEMENT;
   1868       switch (eID)
   1869       {
   1870       case MatroskaID_Tags_Tag:
   1871         state->next_state = EBML_READING_MATROSKA_TAG_CONTENTS;
   1872         clean_ebml_state_matroska_seek (state);
   1873         ebml_stack_push_new (state, pl_get_pos (plugin), eID, eSize,
   1874                              read_result,
   1875                              EBML_FINISHED_READING_MATROSKA_TAG_CONTENTS,
   1876                              EBML_READING_MATROSKA_TAGS_CONTENTS,
   1877                              EBML_READ_ELEMENT,
   1878                              EBML_READING_MATROSKA_TAGS_CONTENTS);
   1879         break;
   1880       default:
   1881         if (maybe_rise_up (plugin, state, &do_break, read_result))
   1882           break;
   1883         state->next_state = EBML_READING_MATROSKA_TAGS_CONTENTS;
   1884         pl_seek (plugin, eSize, SEEK_CUR);
   1885       }
   1886       break;
   1887     case EBML_READING_MATROSKA_TAG_CONTENTS:
   1888       if (! check_result (plugin, read_result, state))
   1889         break;
   1890 
   1891       state->state = EBML_READ_ELEMENT;
   1892       switch (eID)
   1893       {
   1894       case MatroskaID_Tags_Tag_SimpleTag:
   1895         state->next_state = EBML_READING_MATROSKA_SIMPLETAG_CONTENTS;
   1896         clean_ebml_state_matroska_simpletags (state);
   1897         matroska_add_tag (state, NULL, NULL, NULL);
   1898         state->tag_current = state->tag_last;
   1899         state->tag_tree = state->tag_current;
   1900         ebml_stack_push_new (state, pl_get_pos (plugin), eID, eSize,
   1901                              read_result,
   1902                              EBML_FINISHED_READING_MATROSKA_SIMPLETAG_CONTENTS,
   1903                              EBML_READING_MATROSKA_TAG_CONTENTS,
   1904                              EBML_READ_ELEMENT,
   1905                              EBML_READING_MATROSKA_TAG_CONTENTS);
   1906         break;
   1907       default:
   1908         if (maybe_rise_up (plugin, state, &do_break, read_result))
   1909           break;
   1910         state->next_state = EBML_READING_MATROSKA_TAG_CONTENTS;
   1911         pl_seek (plugin, eSize, SEEK_CUR);
   1912       }
   1913       break;
   1914     case EBML_READING_MATROSKA_SIMPLETAG_CONTENTS:
   1915       if (! check_result (plugin, read_result, state))
   1916         break;
   1917 
   1918       do_break = 0;
   1919       switch (eID)
   1920       {
   1921       case MatroskaID_Tags_Tag_SimpleTag_TagName:
   1922         state->state = EBML_READ_STRING;
   1923         break; /* mandatory, UTF8-encoded. The name of the Tag that is going to be stored. */
   1924       case MatroskaID_Tags_Tag_SimpleTag_TagString:
   1925         state->state = EBML_READ_STRING;
   1926         break; /* UTF-8-encoded. The value of the Tag. */
   1927       case MatroskaID_Tags_Tag_SimpleTag:
   1928         if (maybe_rise_up (plugin, state, &do_break, read_result))
   1929           break;
   1930         /* Oh joy, simpletags are recursive! */
   1931         state->state = EBML_READ_ELEMENT;
   1932         state->next_state = EBML_READING_MATROSKA_SIMPLETAG_CONTENTS;
   1933         matroska_add_tag (state, state->tag_current, NULL, NULL);
   1934         state->tag_current = state->tag_last;
   1935         ebml_stack_push_new (state, pl_get_pos (plugin), eID, eSize,
   1936                              read_result,
   1937                              EBML_FINISHED_READING_MATROSKA_SIMPLETAG_CONTENTS,
   1938                              EBML_READING_MATROSKA_SIMPLETAG_CONTENTS,
   1939                              EBML_READ_ELEMENT,
   1940                              EBML_FINISHED_READING_MATROSKA_SIMPLETAG_CONTENTS);
   1941         do_break = 1;
   1942         break;
   1943       default:
   1944         if (maybe_rise_up (plugin, state, &do_break, read_result))
   1945           break;
   1946         state->state = EBML_READ_ELEMENT;
   1947         state->next_state = EBML_READING_MATROSKA_SIMPLETAG_CONTENTS;
   1948         pl_seek (plugin, eSize, SEEK_CUR);
   1949         do_break = 1;
   1950         break;
   1951       }
   1952       if (do_break)
   1953         break;
   1954       state->next_state = EBML_READING_MATROSKA_SIMPLETAG_CONTENTS_VALUE;
   1955       ebml_stack_push_new (state, pl_get_pos (plugin), eID, eSize, read_result,
   1956                            EBML_BAD_STATE,
   1957                            EBML_READING_MATROSKA_SIMPLETAG_CONTENTS,
   1958                            EBML_READ_ELEMENT,
   1959                            EBML_READING_MATROSKA_SIMPLETAG_CONTENTS);
   1960       break;
   1961     case EBML_READING_MATROSKA_SIMPLETAG_CONTENTS_VALUE:
   1962       if (! check_result (plugin, read_result, state))
   1963         break;
   1964 
   1965       /* This breaks the specs, as there should be only one instance of each
   1966        * element (at most). We ignore that and remember the latest value,
   1967        * dropping previous ones.
   1968        */
   1969       switch (state->stack_top->id)
   1970       {
   1971       case MatroskaID_Tags_Tag_SimpleTag_TagName:
   1972         if (state->tag_current->name != NULL)
   1973           free (state->tag_current->name);
   1974         state->tag_current->name = strdup (string_value);
   1975         break;
   1976       case MatroskaID_Tags_Tag_SimpleTag_TagString:
   1977         if (state->tag_current->string != NULL)
   1978           free (state->tag_current->string);
   1979         state->tag_current->string = strdup (string_value);
   1980         break;
   1981       }
   1982       rise_up_after_value (plugin, state,
   1983                            EBML_READING_MATROSKA_SIMPLETAG_CONTENTS);
   1984       break;
   1985     case EBML_READING_MATROSKA_SEEK_HEAD_CONTENTS:
   1986       if (! check_result (plugin, read_result, state))
   1987         break;
   1988 
   1989       state->state = EBML_READ_ELEMENT;
   1990       switch (eID)
   1991       {
   1992       case MatroskaID_Seek:
   1993         state->next_state = EBML_READING_MATROSKA_SEEK_CONTENTS;
   1994         clean_ebml_state_matroska_seek (state);
   1995         ebml_stack_push_new (state, pl_get_pos (plugin), eID, eSize,
   1996                              read_result,
   1997                              EBML_FINISHED_READING_MATROSKA_SEEK_CONTENTS,
   1998                              EBML_READING_MATROSKA_SEEK_HEAD_CONTENTS,
   1999                              EBML_READ_ELEMENT,
   2000                              EBML_READING_MATROSKA_SEEK_HEAD_CONTENTS);
   2001         break;
   2002       default:
   2003         if (maybe_rise_up (plugin, state, &do_break, read_result))
   2004           break;
   2005         state->next_state = EBML_READING_MATROSKA_SEEK_HEAD_CONTENTS;
   2006         pl_seek (plugin, eSize, SEEK_CUR);
   2007       }
   2008       break;
   2009     case EBML_READING_MATROSKA_SEEK_CONTENTS:
   2010       if (! check_result (plugin, read_result, state))
   2011         break;
   2012 
   2013       do_break = 0;
   2014       switch (eID)
   2015       {
   2016       case MatroskaID_SeekID:
   2017         state->state = EBML_READ_ID;
   2018         break;
   2019       case MatroskaID_SeekPosition:
   2020         state->state = EBML_READ_UINT;
   2021         break;
   2022       default:
   2023         if (maybe_rise_up (plugin, state, &do_break, read_result))
   2024           break;
   2025         state->state = EBML_READ_ELEMENT;
   2026         state->next_state = EBML_READING_MATROSKA_SEEK_CONTENTS;
   2027         pl_seek (plugin, eSize, SEEK_CUR);
   2028         do_break = 1;
   2029         break;
   2030       }
   2031       if (do_break)
   2032         break;
   2033       state->next_state = EBML_READING_MATROSKA_SEEK_CONTENTS_VALUE;
   2034       ebml_stack_push_new (state, pl_get_pos (plugin), eID, eSize, read_result,
   2035                            EBML_BAD_STATE, EBML_READING_MATROSKA_SEEK_CONTENTS,
   2036                            EBML_READ_ELEMENT,
   2037                            EBML_READING_MATROSKA_SEEK_CONTENTS);
   2038       break;
   2039     case EBML_READING_MATROSKA_SEEK_CONTENTS_VALUE:
   2040       if (! check_result (plugin, read_result, state))
   2041         break;
   2042 
   2043       /* This breaks the specs, as there should be only one instance of each
   2044        * element (at most). We ignore that and remember the latest value,
   2045        * dropping previous ones.
   2046        */
   2047       switch (state->stack_top->id)
   2048       {
   2049       case MatroskaID_SeekID:
   2050         state->matroska_seek_id = id_value;
   2051         break;
   2052       case MatroskaID_SeekPosition:
   2053         state->matroska_seek_position = uint_value;
   2054         break;
   2055       }
   2056       rise_up_after_value (plugin, state, EBML_READING_MATROSKA_SEEK_CONTENTS);
   2057       break;
   2058     case EBML_READING_MATROSKA_TRACKS_CONTENTS:
   2059       if (! check_result (plugin, read_result, state))
   2060         break;
   2061 
   2062       state->state = EBML_READ_ELEMENT;
   2063       switch (eID)
   2064       {
   2065       case MatroskaID_Tracks_TrackEntry:
   2066         state->next_state = EBML_READING_MATROSKA_TRACK_ENTRY_CONTENTS;
   2067         clean_ebml_state_matroska_track (state);
   2068         ebml_stack_push_new (state, pl_get_pos (plugin), eID, eSize,
   2069                              read_result,
   2070                              EBML_FINISHED_READING_MATROSKA_TRACK_ENTRY_CONTENTS,
   2071                              EBML_READING_MATROSKA_TRACKS_CONTENTS,
   2072                              EBML_READ_ELEMENT,
   2073                              EBML_READING_MATROSKA_TRACKS_CONTENTS);
   2074         break;
   2075       default:
   2076         if (maybe_rise_up (plugin, state, &do_break, read_result))
   2077           break;
   2078         state->next_state = EBML_READING_MATROSKA_TRACKS_CONTENTS;
   2079         pl_seek (plugin, eSize, SEEK_CUR);
   2080       }
   2081       break;
   2082     case EBML_READING_MATROSKA_TRACK_ENTRY_CONTENTS:
   2083       if (! check_result (plugin, read_result, state))
   2084         break;
   2085 
   2086       do_break = 0;
   2087       switch (eID)
   2088       {
   2089       case MatroskaID_Tracks_TrackType:
   2090         state->state = EBML_READ_UINT;
   2091         break;
   2092       case MatroskaID_Tracks_Name:
   2093       case MatroskaID_Tracks_Language:
   2094       case MatroskaID_Tracks_CodecID:
   2095       case MatroskaID_Tracks_CodecName:
   2096         state->state = EBML_READ_STRING;
   2097         break;
   2098       case MatroskaID_Tracks_Video:
   2099         state->state = EBML_READ_ELEMENT;
   2100         state->next_state = EBML_READING_MATROSKA_TRACK_ENTRY_VIDEO_CONTENTS;
   2101         clean_ebml_state_matroska_track_video (state);
   2102         ebml_stack_push_new (state, pl_get_pos (plugin), eID, eSize,
   2103                              read_result,
   2104                              EBML_FINISHED_READING_MATROSKA_TRACK_ENTRY_VIDEO_CONTENTS,
   2105                              EBML_READING_MATROSKA_TRACK_ENTRY_CONTENTS,
   2106                              EBML_READ_ELEMENT,
   2107                              EBML_READING_MATROSKA_TRACK_ENTRY_CONTENTS);
   2108         do_break = 1;
   2109         break;
   2110       case MatroskaID_Tracks_Audio:
   2111         state->state = EBML_READ_ELEMENT;
   2112         state->next_state = EBML_READING_MATROSKA_TRACK_ENTRY_AUDIO_CONTENTS;
   2113         clean_ebml_state_matroska_track_audio (state);
   2114         ebml_stack_push_new (state, pl_get_pos (plugin), eID, eSize,
   2115                              read_result,
   2116                              EBML_FINISHED_READING_MATROSKA_TRACK_ENTRY_AUDIO_CONTENTS,
   2117                              EBML_READING_MATROSKA_TRACK_ENTRY_CONTENTS,
   2118                              EBML_READ_ELEMENT,
   2119                              EBML_READING_MATROSKA_TRACK_ENTRY_CONTENTS);
   2120         do_break = 1;
   2121         break;
   2122       default:
   2123         if (maybe_rise_up (plugin, state, &do_break, read_result))
   2124           break;
   2125         state->state = EBML_READ_ELEMENT;
   2126         state->next_state = EBML_READING_MATROSKA_TRACK_ENTRY_CONTENTS;
   2127         pl_seek (plugin, eSize, SEEK_CUR);
   2128         do_break = 1;
   2129         break;
   2130       }
   2131       if (do_break)
   2132         break;
   2133       state->next_state = EBML_READING_MATROSKA_TRACK_ENTRY_CONTENTS_VALUE;
   2134       ebml_stack_push_new (state, pl_get_pos (plugin), eID, eSize, read_result,
   2135                            EBML_BAD_STATE,
   2136                            EBML_READING_MATROSKA_TRACK_ENTRY_CONTENTS,
   2137                            EBML_READ_ELEMENT,
   2138                            EBML_READING_MATROSKA_TRACK_ENTRY_CONTENTS);
   2139       break;
   2140     case EBML_READING_MATROSKA_TRACK_ENTRY_AUDIO_CONTENTS:
   2141       if (! check_result (plugin, read_result, state))
   2142         break;
   2143 
   2144       do_break = 0;
   2145       switch (eID)
   2146       {
   2147       case MatroskaID_Tracks_Audio_SamplingFrequency:
   2148       case MatroskaID_Tracks_Audio_OutputSamplingFrequency:
   2149         state->state = EBML_READ_FLOAT;
   2150         break;
   2151       case MatroskaID_Tracks_Audio_Channels:
   2152       case MatroskaID_Tracks_Audio_BitDepth:
   2153         state->state = EBML_READ_UINT;
   2154         break;
   2155       default:
   2156         if (maybe_rise_up (plugin, state, &do_break, read_result))
   2157           break;
   2158         state->state = EBML_READ_ELEMENT;
   2159         state->next_state = EBML_READING_MATROSKA_TRACK_ENTRY_AUDIO_CONTENTS;
   2160         pl_seek (plugin, eSize, SEEK_CUR);
   2161         do_break = 1;
   2162         break;
   2163       }
   2164       if (do_break)
   2165         break;
   2166       state->next_state =
   2167         EBML_READING_MATROSKA_TRACK_ENTRY_AUDIO_CONTENTS_VALUE;
   2168       ebml_stack_push_new (state, pl_get_pos (plugin), eID, eSize, read_result,
   2169                            EBML_BAD_STATE,
   2170                            EBML_READING_MATROSKA_TRACK_ENTRY_AUDIO_CONTENTS,
   2171                            EBML_READ_ELEMENT,
   2172                            EBML_READING_MATROSKA_TRACK_ENTRY_AUDIO_CONTENTS);
   2173       break;
   2174     case EBML_READING_MATROSKA_TRACK_ENTRY_AUDIO_CONTENTS_VALUE:
   2175       if (! check_result (plugin, read_result, state))
   2176         break;
   2177 
   2178       /* This breaks the specs, as there should be only one instance of each
   2179        * element (at most). We ignore that and remember the latest value,
   2180        * dropping previous ones.
   2181        */
   2182       switch (state->stack_top->id)
   2183       {
   2184       case MatroskaID_Tracks_Audio_SamplingFrequency:
   2185         state->matroska_track_audio_sampling_frequency = float_value;
   2186         break;
   2187       case MatroskaID_Tracks_Audio_OutputSamplingFrequency:
   2188         state->matroska_track_audio_output_sampling_frequency = float_value;
   2189         break;
   2190       case MatroskaID_Tracks_Audio_Channels:
   2191         state->matroska_track_audio_channels = uint_value;
   2192         break;
   2193       case MatroskaID_Tracks_Audio_BitDepth:
   2194         state->matroska_track_audio_bit_depth = uint_value;
   2195         break;
   2196       }
   2197       rise_up_after_value (plugin, state,
   2198                            EBML_READING_MATROSKA_TRACK_ENTRY_AUDIO_CONTENTS);
   2199       break;
   2200     case EBML_READING_MATROSKA_TRACK_ENTRY_VIDEO_CONTENTS:
   2201       if (! check_result (plugin, read_result, state))
   2202         break;
   2203 
   2204       do_break = 0;
   2205       switch (eID)
   2206       {
   2207       case MatroskaID_Tracks_Video_FlagInterlaced:
   2208       case MatroskaID_Tracks_Video_StereoMode:
   2209       case MatroskaID_Tracks_Video_PixelWidth:
   2210       case MatroskaID_Tracks_Video_PixelHeight:
   2211       case MatroskaID_Tracks_Video_DisplayWidth:
   2212       case MatroskaID_Tracks_Video_DisplayHeight:
   2213       case MatroskaID_Tracks_Video_DisplayUnit:
   2214         state->state = EBML_READ_UINT;
   2215         break;
   2216       default:
   2217         if (maybe_rise_up (plugin, state, &do_break, read_result))
   2218           break;
   2219         state->state = EBML_READ_ELEMENT;
   2220         state->next_state = EBML_READING_MATROSKA_TRACK_ENTRY_VIDEO_CONTENTS;
   2221         pl_seek (plugin, eSize, SEEK_CUR);
   2222         do_break = 1;
   2223         break;
   2224       }
   2225       if (do_break)
   2226         break;
   2227       state->next_state =
   2228         EBML_READING_MATROSKA_TRACK_ENTRY_VIDEO_CONTENTS_VALUE;
   2229       ebml_stack_push_new (state, pl_get_pos (plugin), eID, eSize, read_result,
   2230                            EBML_BAD_STATE,
   2231                            EBML_READING_MATROSKA_TRACK_ENTRY_VIDEO_CONTENTS,
   2232                            EBML_READ_ELEMENT,
   2233                            EBML_READING_MATROSKA_TRACK_ENTRY_VIDEO_CONTENTS);
   2234       break;
   2235     case EBML_READING_MATROSKA_TRACK_ENTRY_VIDEO_CONTENTS_VALUE:
   2236       if (! check_result (plugin, read_result, state))
   2237         break;
   2238 
   2239       /* This breaks the specs, as there should be only one instance of each
   2240        * element (at most). We ignore that and remember the latest value,
   2241        * dropping previous ones.
   2242        */
   2243       switch (state->stack_top->id)
   2244       {
   2245       case MatroskaID_Tracks_Video_FlagInterlaced:
   2246         state->matroska_track_video_flag_interlaced = uint_value;
   2247         break;
   2248       case MatroskaID_Tracks_Video_StereoMode:
   2249         state->matroska_track_video_stereo_mode = uint_value;
   2250         break;
   2251       case MatroskaID_Tracks_Video_PixelWidth:
   2252         state->matroska_track_video_pixel_width = uint_value;
   2253         break;
   2254       case MatroskaID_Tracks_Video_PixelHeight:
   2255         state->matroska_track_video_pixel_height = uint_value;
   2256         break;
   2257       case MatroskaID_Tracks_Video_DisplayWidth:
   2258         state->matroska_track_video_display_width = uint_value;
   2259         break;
   2260       case MatroskaID_Tracks_Video_DisplayHeight:
   2261         state->matroska_track_video_display_height = uint_value;
   2262         break;
   2263       case MatroskaID_Tracks_Video_DisplayUnit:
   2264         state->matroska_track_video_display_unit = uint_value;
   2265         break;
   2266       }
   2267       rise_up_after_value (plugin, state,
   2268                            EBML_READING_MATROSKA_TRACK_ENTRY_VIDEO_CONTENTS);
   2269       break;
   2270     case EBML_READING_MATROSKA_TRACK_ENTRY_CONTENTS_VALUE:
   2271       if (! check_result (plugin, read_result, state))
   2272         break;
   2273 
   2274       /* This breaks the specs, as there should be only one instance of each
   2275        * element (at most). We ignore that and remember the latest value,
   2276        * dropping previous ones.
   2277        */
   2278       switch (state->stack_top->id)
   2279       {
   2280       case MatroskaID_Tracks_TrackType:
   2281         state->matroska_track_type = uint_value;
   2282         break;
   2283       case MatroskaID_Tracks_Name:
   2284         if (state->matroska_track_name != NULL)
   2285           free (state->matroska_track_name);
   2286         state->matroska_track_name = strdup (string_value);
   2287         break; /* UTF-8-encoded. A human-readable track name. */
   2288       case MatroskaID_Tracks_Language:
   2289         if (state->matroska_track_language != NULL)
   2290           free (state->matroska_track_language);
   2291         state->matroska_track_language = strdup (string_value);
   2292         break; /* defaults to 'eng', string. Specifies the language of the track in the Matroska languages form. */
   2293       case MatroskaID_Tracks_CodecID:
   2294         if (state->matroska_track_codec_id != NULL)
   2295           free (state->matroska_track_codec_id);
   2296         state->matroska_track_codec_id = strdup (string_value);
   2297         break; /* mandatory, string. An ID corresponding to the codec, see the codec page ( http://matroska.org/technical/specs/codecid/index.html ) for more info. */
   2298       case MatroskaID_Tracks_CodecName:
   2299         if (state->matroska_track_codec_name != NULL)
   2300           free (state->matroska_track_codec_name);
   2301         state->matroska_track_codec_name = strdup (string_value);
   2302         break; /* UTF-8-encoded. A human-readable string specifying the codec. */
   2303       }
   2304       rise_up_after_value (plugin, state,
   2305                            EBML_READING_MATROSKA_TRACK_ENTRY_CONTENTS);
   2306       break;
   2307     case EBML_READING_MATROSKA_INFO_CONTENTS:
   2308       if (! check_result (plugin, read_result, state))
   2309         break;
   2310 
   2311       do_break = 0;
   2312       switch (eID)
   2313       {
   2314       case MatroskaID_Info_Title:
   2315       case MatroskaID_Info_MuxingApp:
   2316       case MatroskaID_Info_WritingApp:
   2317         state->state = EBML_READ_STRING;
   2318         break;
   2319       case MatroskaID_Info_TimecodeScale:
   2320         state->state = EBML_READ_UINT;
   2321         break;
   2322       case MatroskaID_Info_Duration:
   2323         state->state = EBML_READ_FLOAT;
   2324         break;
   2325       case MatroskaID_Info_DateUTC:
   2326         state->state = EBML_READ_SINT;
   2327         break;
   2328       default:
   2329         if (maybe_rise_up (plugin, state, &do_break, read_result))
   2330           break;
   2331         /* Unknown element in MatroskaInfo - skip over it */
   2332         state->state = EBML_READ_ELEMENT;
   2333         state->next_state = EBML_READING_MATROSKA_INFO_CONTENTS;
   2334         pl_seek (plugin, eSize, SEEK_CUR);
   2335         do_break = 1;
   2336       }
   2337       if (do_break)
   2338         break;
   2339       state->next_state = EBML_READING_MATROSKA_INFO_CONTENTS_VALUE;
   2340       ebml_stack_push_new (state, pl_get_pos (plugin), eID, eSize, read_result,
   2341                            EBML_BAD_STATE, EBML_READING_MATROSKA_INFO_CONTENTS,
   2342                            EBML_READ_ELEMENT,
   2343                            EBML_READING_MATROSKA_INFO_CONTENTS);
   2344       break;
   2345     case EBML_READING_MATROSKA_INFO_CONTENTS_VALUE:
   2346       if (! check_result (plugin, read_result, state))
   2347         break;
   2348 
   2349       /* This breaks the specs, as there should be only one instance of each
   2350        * element (at most). We ignore that and remember the latest value,
   2351        * dropping previous ones.
   2352        */
   2353       switch (state->stack_top->id)
   2354       {
   2355       case MatroskaID_Info_Title:
   2356         if (state->matroska_info_title != NULL)
   2357           free (state->matroska_info_title);
   2358         state->matroska_info_title = strdup (string_value);
   2359         break;
   2360       case MatroskaID_Info_MuxingApp:
   2361         if (state->matroska_info_muxing_app != NULL)
   2362           free (state->matroska_info_muxing_app);
   2363         state->matroska_info_muxing_app = strdup (string_value);
   2364         break;
   2365       case MatroskaID_Info_WritingApp:
   2366         if (state->matroska_info_writing_app != NULL)
   2367           free (state->matroska_info_writing_app);
   2368         state->matroska_info_writing_app = strdup (string_value);
   2369         break;
   2370       case MatroskaID_Info_TimecodeScale:
   2371         state->matroska_info_timecode_scale = uint_value;
   2372         break;
   2373       case MatroskaID_Info_Duration:
   2374         state->matroska_info_duration = float_value;
   2375         break;
   2376       case MatroskaID_Info_DateUTC:
   2377         state->matroska_info_date_utc_is_set = 1;
   2378         state->matroska_info_date_utc = sint_value;
   2379         break;
   2380       }
   2381       rise_up_after_value (plugin, state, EBML_READING_MATROSKA_INFO_CONTENTS);
   2382       break;
   2383     case EBML_FINISHED_READING_MATROSKA_INFO_CONTENTS:
   2384       if ((state->stack_top != NULL) && (pl_get_pos (plugin) >=
   2385                                          state->stack_top->position
   2386                                          + state->stack_top->header_size
   2387                                          + state->stack_top->size) )
   2388         report_state (state, proc, proc_cls);
   2389       maybe_seek_to_something_interesting (plugin, state);
   2390       state->state = EBML_READ_ELEMENT;
   2391       state->next_state = state->stack_top->prev_state;
   2392       free (ebml_stack_pop (state));
   2393       break;
   2394     case EBML_FINISHED_READING_MATROSKA_TRACK_ENTRY_CONTENTS:
   2395       if ((state->stack_top != NULL) && (pl_get_pos (plugin) >=
   2396                                          state->stack_top->position
   2397                                          + state->stack_top->header_size
   2398                                          + state->stack_top->size) )
   2399         report_state (state, proc, proc_cls);
   2400       state->state = EBML_READ_ELEMENT;
   2401       state->next_state = state->stack_top->prev_state;
   2402       free (ebml_stack_pop (state));
   2403       break;
   2404     case EBML_FINISHED_READING_MATROSKA_SEEK_CONTENTS:
   2405       if ((state->matroska_seek_id != 0) &&
   2406           ((state->matroska_seek_position > 0) || (state->matroska_seeks_tail ==
   2407                                                    NULL) ))
   2408       {
   2409         struct matroska_seek_list *el;
   2410         el = malloc (sizeof (struct matroska_seek_list));
   2411         el->next = NULL;
   2412         el->id = state->matroska_seek_id;
   2413         el->position = state->matroska_seek_position;
   2414         if (state->matroska_seeks_tail != NULL)
   2415         {
   2416           state->matroska_seeks_tail->next = el;
   2417           state->matroska_seeks_tail = el;
   2418         }
   2419         else
   2420           state->matroska_seeks_tail = state->matroska_seeks = el;
   2421       }
   2422       state->state = EBML_READ_ELEMENT;
   2423       state->next_state = state->stack_top->prev_state;
   2424       free (ebml_stack_pop (state));
   2425       break;
   2426     case EBML_FINISHED_READING_MATROSKA_TRACKS_CONTENTS:
   2427     case EBML_FINISHED_READING_MATROSKA_TAGS_CONTENTS:
   2428       maybe_seek_to_something_interesting (plugin, state);
   2429     case EBML_FINISHED_READING_MATROSKA_SEGMENT_CONTENTS:
   2430     case EBML_FINISHED_READING_MATROSKA_TRACK_ENTRY_VIDEO_CONTENTS:
   2431     case EBML_FINISHED_READING_MATROSKA_TRACK_ENTRY_AUDIO_CONTENTS:
   2432       state->state = EBML_READ_ELEMENT;
   2433       state->next_state = state->stack_top->prev_state;
   2434       free (ebml_stack_pop (state));
   2435       break;
   2436     case EBML_FINISHED_READING_MATROSKA_SIMPLETAG_CONTENTS:
   2437       state->tag_current = state->tag_current->parent;
   2438       if (state->tag_current == NULL)
   2439         report_simpletag (state, proc, proc_cls);
   2440       state->state = EBML_READ_ELEMENT;
   2441       state->next_state = state->stack_top->prev_state;
   2442       free (ebml_stack_pop (state));
   2443       break;
   2444     case EBML_FINISHED_READING_MATROSKA_TAG_CONTENTS:
   2445       state->state = EBML_READ_ELEMENT;
   2446       state->next_state = state->stack_top->prev_state;
   2447       free (ebml_stack_pop (state));
   2448       break;
   2449     case EBML_FINISHED_READING_MATROSKA_SEEK_HEAD_CONTENTS:
   2450       sort_seeks (state);
   2451       try_to_find_pos (plugin, state);
   2452       state->state = EBML_READ_ELEMENT;
   2453       state->next_state = state->stack_top->prev_state;
   2454       free (ebml_stack_pop (state));
   2455       break;
   2456     }
   2457   }
   2458   return EXTRACTOR_ebml_discard_state_method (state);
   2459 }