ebml_extractor.c (93892B)
1 /* 2 This file is part of libextractor. 3 Copyright (C) 2004, 2005, 2006, 2009 Vidyut Samanta and Christian Grothoff 4 5 libextractor is free software; you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published 7 by the Free Software Foundation; either version 2, or (at your 8 option) any later version. 9 10 libextractor is distributed in the hope that it will be useful, but 11 WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 General Public License for more details. 14 15 You should have received a copy of the GNU General Public License 16 along with libextractor; see the file COPYING. If not, write to the 17 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 18 Boston, MA 02110-1301, USA. 19 */ 20 21 /* 22 * Made by Gabriel Peixoto 23 * Using AVInfo 1.x code. Copyright (c) 2004 George Shuklin. 24 * Nearly complete rewrite by LRN, Copyright (c) 2012 25 */ 26 27 #include "platform.h" 28 #include "extractor.h" 29 #include <stdint.h> 30 31 #include "le_architecture.h" 32 33 #ifndef DEBUG_EBML 34 # define DEBUG_EBML 0 35 #endif 36 37 #if WINDOWS 38 /* According to http://old.nabble.com/Porting-localtime_r-and-gmtime_r-td15282276.html 39 * msvcrt.dll does have thread-safe gmtime implementation, 40 * even though the documentation says otherwise. 41 * Should be easy to check - spawn 2 threads, run _gmtime64 in each one 42 * and see if they return the same pointer. 43 */ 44 struct tm * 45 gmtime_undocumented_64_r (const __time64_t *timer, struct tm *result) 46 { 47 struct tm *local_result = NULL; // _gmtime64 (timer); 48 49 if ((local_result == NULL) || (result == NULL) ) 50 return NULL; 51 52 memcpy (result, local_result, sizeof (*result)); 53 return result; 54 } 55 56 57 #endif 58 59 #include "extractor_plugins.h" 60 61 #define ADD_EBML(s,t) do { proc (proc_cls, "ebml", t, EXTRACTOR_METAFORMAT_UTF8, \ 62 "text/plain", s, strlen (s) + 1); } while (0) 63 #define ADD_MATROSKA(s,t) do { proc (proc_cls, "matroska", t, \ 64 EXTRACTOR_METAFORMAT_UTF8, "text/plain", s, \ 65 strlen (s) + 1); } while (0) 66 67 /** 68 * String length limit. The spec does not limit the strings, 69 * but we don't want to allocate 2^56 bytes 70 * just because some EBML file says it has a string of that length! 71 * This also must be <= of the number of bytes LE gives us in one go 72 * (the code doesn't know how to "read a part of string, request a seek, 73 * then read some more, and repeat until the whole string is read"). 74 * If it isn't, the code will loop forever, requesting the same 75 * seek position (beginning of the string) over and over. 76 * FIXME: find a way to fix that condition in LE itself? 77 * TODO: rewrite string reading code to allocate strings on the heap, 78 * that will allow us to greatly increase max string size. Right now 79 * strings are allocated on the stack, and can't be too long because 80 * of that. 81 */ 82 #define MAX_STRING_SIZE 1024 83 84 struct MatroskaTrackType 85 { 86 unsigned char code; 87 const char *name; 88 char video_must_be_valid; 89 char audio_must_be_valid; 90 }; 91 92 struct MatroskaTrackType track_types[] = { 93 {0x01, "video", 1, -1}, 94 {0x02, "audio", -1, 1}, 95 {0x03, "complex", -1, -1}, 96 {0x10, "logo", -1, -1}, 97 {0x11, "subtitle", -1, -1}, 98 {0x12, "buttons", -1, -1}, 99 {0x20, "control", -1, -1}, 100 {0x00, NULL} 101 }; 102 103 struct MatroskaTagMap 104 { 105 const char *name; 106 enum EXTRACTOR_MetaType id; 107 }; 108 109 /* TODO: Add TargetLevel parsing, and use it to correctly set: 110 * "track number" and "disk number" from PART_NUMBER, 111 * "author email" from EMAIL, 112 * "publisher address" from ADDRESS, 113 * " 114 */ 115 struct MatroskaTagMap tag_map[] = { 116 {"COUNTRY", EXTRACTOR_METATYPE_LOCATION_COUNTRY_CODE}, 117 {"TITLE", EXTRACTOR_METATYPE_TITLE}, 118 {"SUBTITLE", EXTRACTOR_METATYPE_SUBTITLE}, 119 {"URL", EXTRACTOR_METATYPE_URL}, 120 {"ARTIST", EXTRACTOR_METATYPE_ARTIST}, 121 {"LEAD_PERFORMER", EXTRACTOR_METATYPE_PERFORMER}, 122 {"ACCOMPANIMENT", EXTRACTOR_METATYPE_MUSICIAN_CREDITS_LIST}, /* not sure if it's correct */ 123 {"COMPOSER", EXTRACTOR_METATYPE_COMPOSER}, 124 {"LYRICS", EXTRACTOR_METATYPE_LYRICS}, 125 /* LYRICIST */ 126 {"CONDUCTOR", EXTRACTOR_METATYPE_CONDUCTOR}, 127 /* DIRECTOR UTF-8 This is akin to the IART tag in RIFF. 128 ASSISTANT_DIRECTOR UTF-8 The name of the assistant director. 129 DIRECTOR_OF_PHOTOGRAPHY UTF-8 The name of the director of photography, also known as cinematographer. This is akin to the ICNM tag in Extended RIFF. 130 SOUND_ENGINEER UTF-8 The name of the sound engineer or sound recordist. 131 ART_DIRECTOR UTF-8 The person who oversees the artists and craftspeople who build the sets. 132 PRODUCTION_DESIGNER UTF-8 Artist responsible for designing the overall visual appearance of a movie. 133 CHOREGRAPHER UTF-8 The name of the choregrapher 134 COSTUME_DESIGNER UTF-8 The name of the costume designer 135 ACTOR UTF-8 An actor or actress playing a role in this movie. This is the person's real name, not the character's name the person is playing. 136 CHARACTER UTF-8 The name of the character an actor or actress 137 */ 138 {"WRITTEN_BY", EXTRACTOR_METATYPE_WRITER}, 139 /* 140 SCREENPLAY_BY UTF-8 The author of the screenplay or scenario (used for movies and TV shows). 141 EDITED_BY UTF-8 This is akin to the IEDT tag in Extended RIFF. 142 PRODUCER UTF-8 Produced by. This is akin to the IPRO tag in Extended RIFF. (NOT EXTRACTOR_METATYPE_PRODUCER!) 143 COPRODUCER UTF-8 The name of a co-producer. 144 EXECUTIVE_PRODUCER UTF-8 The name of an executive producer. 145 DISTRIBUTED_BY UTF-8 This is akin to the IDST tag in Extended RIFF. 146 MASTERED_BY UTF-8 The engineer who mastered the content for a physical medium or for digital distribution. 147 */ 148 {"ENCODED_BY", EXTRACTOR_METATYPE_ENCODED_BY}, 149 /* 150 MIXED_BY UTF-8 DJ mix by the artist specified 151 REMIXED_BY UTF-8 Interpreted, remixed, or otherwise modified by. This is akin to the TPE4 tag in ID3. 152 PRODUCTION_STUDIO UTF-8 This is akin to the ISTD tag in Extended RIFF. 153 THANKS_TO UTF-8 A very general tag for everyone else that wants to be listed. 154 */ 155 {"PUBLISHER", EXTRACTOR_METATYPE_PUBLISHER}, 156 /* 157 LABEL UTF-8 The record label or imprint on the disc. 158 */ 159 {"GENRE", EXTRACTOR_METATYPE_GENRE}, 160 {"MOOD", EXTRACTOR_METATYPE_MOOD}, 161 /* 162 ORIGINAL_MEDIA_TYPE UTF-8 Describes the original type of the media, such as, "DVD", "CD", "computer image," "drawing," "lithograph," and so forth. This is akin to the TMED tag in ID3. 163 CONTENT_TYPE UTF-8 The type of the item. e.g. Documentary, Feature Film, Cartoon, Music Video, Music, Sound FX, ... 164 */ 165 {"SUBJECT", EXTRACTOR_METATYPE_SUBJECT}, 166 {"DESCRIPTION", EXTRACTOR_METATYPE_DESCRIPTION}, 167 {"KEYWORDS", EXTRACTOR_METATYPE_KEYWORDS}, 168 {"SUMMARY", EXTRACTOR_METATYPE_SUMMARY}, 169 /* 170 SYNOPSIS UTF-8 A description of the story line of the item. 171 INITIAL_KEY UTF-8 The initial key that a musical track starts in. The format is identical to ID3. 172 PERIOD UTF-8 Describes the period that the piece is from or about. For example, "Renaissance". 173 LAW_RATING UTF-8 Depending on the country it's the format of the rating of a movie (P, R, X in the USA, an age in other countries or a URI defining a logo). 174 ICRA binary The ICRA content rating for parental control. (Previously RSACi) 175 */ 176 {"DATE_RELEASED", EXTRACTOR_METATYPE_PUBLICATION_DATE}, 177 {"DATE_RECORDED", EXTRACTOR_METATYPE_CREATION_DATE}, 178 {"DATE_ENCODED", EXTRACTOR_METATYPE_UNKNOWN_DATE}, 179 {"DATE_TAGGED", EXTRACTOR_METATYPE_UNKNOWN_DATE}, 180 {"DATE_DIGITIZED", EXTRACTOR_METATYPE_UNKNOWN_DATE}, 181 {"DATE_WRITTEN", EXTRACTOR_METATYPE_UNKNOWN_DATE}, 182 {"DATE_PURCHASED", EXTRACTOR_METATYPE_UNKNOWN_DATE}, 183 /* 184 RECORDING_LOCATION UTF-8 The location where the item was recorded. The countries corresponding to the string, same 2 octets as in Internet domains, or possibly ISO-3166. This code is followed by a comma, then more detailed information such as state/province, another comma, and then city. For example, "US, Texas, Austin". This will allow for easy sorting. It is okay to only store the country, or the country and the state/province. More detailed information can be added after the city through the use of additional commas. In cases where the province/state is unknown, but you want to store the city, simply leave a space between the two commas. For example, "US, , Austin". 185 COMPOSITION_LOCATION UTF-8 Location that the item was originaly designed/written. The countries corresponding to the string, same 2 octets as in Internet domains, or possibly ISO-3166. This code is followed by a comma, then more detailed information such as state/province, another comma, and then city. For example, "US, Texas, Austin". This will allow for easy sorting. It is okay to only store the country, or the country and the state/province. More detailed information can be added after the city through the use of additional commas. In cases where the province/state is unknown, but you want to store the city, simply leave a space between the two commas. For example, "US, , Austin". 186 COMPOSER_NATIONALITY UTF-8 Nationality of the main composer of the item, mostly for classical music. The countries corresponding to the string, same 2 octets as in Internet domains, or possibly ISO-3166. 187 */ 188 /* Matroska considers "COMMENT", "PLAY_COUNTER" and "RATING" to be personal. Should we extract them? */ 189 {"COMMENT", EXTRACTOR_METATYPE_COMMENT}, 190 {"PLAY_COUNTER", EXTRACTOR_METATYPE_PLAY_COUNTER}, 191 {"RATING", EXTRACTOR_METATYPE_POPULARITY_METER}, 192 /* 193 ENCODER UTF-8 The software or hardware used to encode this item. ("LAME" or "XviD") 194 ENCODER_SETTINGS UTF-8 A list of the settings used for encoding this item. No specific format. 195 BPS UTF-8 The average bits per second of the specified item. This is only the data in the Blocks, and excludes headers and any container overhead. 196 FPS UTF-8 The average frames per second of the specified item. This is typically the average number of Blocks per second. In the event that lacing is used, each laced chunk is to be counted as a seperate frame. 197 */ 198 {"BPM", EXTRACTOR_METATYPE_BEATS_PER_MINUTE}, 199 /* 200 MEASURE UTF-8 In music, a measure is a unit of time in Western music like "4/4". It represents a regular grouping of beats, a meter, as indicated in musical notation by the time signature.. The majority of the contemporary rock and pop music you hear on the radio these days is written in the 4/4 time signature. 201 TUNING UTF-8 It is saved as a frequency in hertz to allow near-perfect tuning of instruments to the same tone as the musical piece (e.g. "441.34" in Hertz). The default value is 440.0 Hz. 202 REPLAYGAIN_GAIN binary The gain to apply to reach 89dB SPL on playback. This is based on the Replay Gain standard. Note that ReplayGain information can be found at all TargetType levels (track, album, etc). 203 REPLAYGAIN_PEAK binary The maximum absolute peak value of the item. This is based on the Replay Gain standard. 204 */ 205 {"ISRC", EXTRACTOR_METATYPE_ISRC}, 206 /* 207 MCDI binary This is a binary dump of the TOC of the CDROM that this item was taken from. This holds the same information as the MCDI in ID3. 208 ISBN UTF-8 International Standard Book Number 209 BARCODE UTF-8 EAN-13 (European Article Numbering) or UPC-A (Universal Product Code) bar code identifier 210 CATALOG_NUMBER UTF-8 A label-specific string used to identify the release (TIC 01 for example). 211 LABEL_CODE UTF-8 A 4-digit or 5-digit number to identify the record label, typically printed as (LC) xxxx or (LC) 0xxxx on CDs medias or covers (only the number is stored). 212 LCCN UTF-8 Library of Congress Control Number 213 */ 214 /* 215 PURCHASE_ITEM UTF-8 URL to purchase this file. This is akin to the WPAY tag in ID3. 216 PURCHASE_INFO UTF-8 Information on where to purchase this album. This is akin to the WCOM tag in ID3. 217 PURCHASE_OWNER UTF-8 Information on the person who purchased the file. This is akin to the TOWN tag in ID3. 218 PURCHASE_PRICE UTF-8 The amount paid for entity. There should only be a numeric value in here. Only numbers, no letters or symbols other than ".". For instance, you would store "15.59" instead of "$15.59USD". 219 PURCHASE_CURRENCY UTF-8 The currency type used to pay for the entity. Use ISO-4217 for the 3 letter currency code. 220 */ 221 {"COPYRIGHT", EXTRACTOR_METATYPE_COPYRIGHT}, 222 {"PRODUCTION_COPYRIGHT", EXTRACTOR_METATYPE_COPYRIGHT}, 223 {"LICENSE", EXTRACTOR_METATYPE_LICENSE}, 224 /* TERMS_OF_USE UTF-8 The terms of use for this item. This is akin to the USER tag in ID3. */ 225 {NULL, EXTRACTOR_METATYPE_RESERVED} 226 }; 227 228 /** 229 * FIXME: document 230 */ 231 enum 232 { 233 EBMLID_FILE_BEGIN = 0x1A, /* First byte of EBMLID_EBML */ 234 EBMLID_EBML = 0x1A45DFA3, 235 EBMLID_VERSION = 0x4286, 236 EBMLID_READ_VERSION = 0x42f7, 237 EBMLID_MAX_ID_LENGTH = 0x42f2, 238 EBMLID_MAX_SIZE_LENGTH = 0x42f3, 239 EBMLID_DOCTYPE = 0x4282, 240 EBMLID_DOCTYPE_VERSION = 0x4287, 241 EBMLID_DOCTYPE_READ_VERSION = 0x4285, 242 243 /*EBMLID_CRC32 = 0xC3, FIXME: support this! Need some magical logic to skip it, unlike MatroskaID_CRC32 = 0xBF. That is, files with 0xC3 are completely unreadable at the moment. */ 244 245 MatroskaID_Segment = 0x18538067, 246 247 MatroskaID_SeekHead = 0x114D9B74, 248 249 MatroskaID_Seek = 0x4DBB, /* mandatory, may appear more than once. Contains a single seek entry to an EBML element. */ 250 251 MatroskaID_SeekID = 0x53AB, /* mandatory, BINARY. The binary ID corresponding to the element name. */ 252 MatroskaID_SeekPosition = 0x53AC, /* mandatory, UINT. The position of the element in the segment in octets (0 = first level 1 element). */ 253 254 MatroskaID_Info = 0x1549A966, 255 256 MatroskaID_Info_TimecodeScale = 0x2AD7B1, /* defaults to 1000000, UINT. Timecode scale in nanoseconds (1.000.000 means all timecodes in the segment are expressed in milliseconds). */ 257 MatroskaID_Info_Duration = 0x4489, /* must be >0, FLOAT. Duration of the segment (based on TimecodeScale). */ 258 MatroskaID_Info_DateUTC = 0x4461, /* DATE. Date of the origin of timecode (value 0), i.e. production date. */ 259 MatroskaID_Info_Title = 0x7BA9, /* UTF-8-encoded. General name of the segment. */ 260 MatroskaID_Info_MuxingApp = 0x4D80, /* mandatory, UTF-8-encoded. Muxing application or library ("libmatroska-0.4.3"). */ 261 MatroskaID_Info_WritingApp = 0x5741, /* mandatory, UTF-8-encoded. Writing application ("mkvmerge-0.3.3"). */ 262 263 MatroskaID_Tracks = 0x1654AE6B, 264 265 MatroskaID_Tracks_TrackEntry = 0xAE, 266 267 MatroskaID_Tracks_TrackType = 0x83, /* mandatory, 1-254, UINT. A set of track types coded on 8 bits (1: video, 2: audio, 3: complex, 0x10: logo, 0x11: subtitle, 0x12: buttons, 0x20: control). */ 268 MatroskaID_Tracks_Name = 0x536E, /* UTF-8-encoded. A human-readable track name. */ 269 MatroskaID_Tracks_Language = 0x22B59C, /* defaults to 'eng', string. Specifies the language of the track in the Matroska languages form. */ 270 MatroskaID_Tracks_CodecID = 0x86, /* mandatory, string. An ID corresponding to the codec, see the codec page ( http://matroska.org/technical/specs/codecid/index.html ) for more info. */ 271 MatroskaID_Tracks_CodecName = 0x258688, /* UTF-8-encoded. A human-readable string specifying the codec. */ 272 273 MatroskaID_Tracks_Video = 0xE0, /* Video settings. */ 274 MatroskaID_Tracks_Video_FlagInterlaced = 0x9A, /* mandatory, 0-1, defaults to 0, UINT. Set if the video is interlaced. (1 bit) */ 275 MatroskaID_Tracks_Video_StereoMode = 0x53B8, /* defaults to 0, UINT. Stereo-3D video mode (0: mono, 1: side by side (left eye is first), 2: top-bottom (right eye is first), 3: top-bottom (left eye is first), 4: checkboard (right is first), 5: checkboard (left is first), 6: row interleaved (right is first), 7: row interleaved (left is first), 8: column interleaved (right is first), 9: column interleaved (left is first), 10: anaglyph (cyan/red), 11: side by side (right eye is first), 12: anaglyph (green/magenta), 13 both eyes laced in one Block (left eye is first), 14 both eyes laced in one Block (right eye is first)) . There are some more details on 3D support in the Specification Notes ( http://matroska.org/technical/specs/notes.html#3D ). */ 276 MatroskaID_Tracks_Video_PixelWidth = 0xB0, /* mandatory, not 0, UINT. Width of the encoded video frames in pixels. */ 277 MatroskaID_Tracks_Video_PixelHeight = 0xBA, /* mandatory, not 0, UINT. Height of the encoded video frames in pixels. */ 278 MatroskaID_Tracks_Video_DisplayWidth = 0x54B0, /* not 0, defaults to PixelWidth, UINT. Width of the video frames to display. The default value is only valid when DisplayUnit is 0. */ 279 MatroskaID_Tracks_Video_DisplayHeight = 0x54BA, /* not 0, defaults to PixelHeight, UINT. Height of the video frames to display. The default value is only valid when DisplayUnit is 0. */ 280 MatroskaID_Tracks_Video_DisplayUnit = 0x54B2, /* defaults to 0, UINT. How DisplayWidth & DisplayHeight should be interpreted (0: pixels, 1: centimeters, 2: inches, 3: Display Aspect Ratio). */ 281 282 MatroskaID_Tracks_Audio = 0xE1, /* Audio settings. */ 283 MatroskaID_Tracks_Audio_SamplingFrequency = 0xB5, /* mandatory, > 0, defaults to 8000.0, FLOAT. Sampling frequency in Hz. */ 284 MatroskaID_Tracks_Audio_OutputSamplingFrequency = 0x78B5, /* > 0, defaults to SamplingFrequency, FLOAT. Real output sampling frequency in Hz (used for SBR techniques). */ 285 MatroskaID_Tracks_Audio_Channels = 0x9F, /* mandatory, not 0, defaults to 1, UINT. Numbers of channels in the track. */ 286 MatroskaID_Tracks_Audio_BitDepth = 0x6264, /* not 0, UINT. Bits per sample, mostly used for PCM. */ 287 288 289 MatroskaID_Tags = 0x1254C367, /* can appear more than once. Element containing elements specific to Tracks/Chapters. A list of valid tags can be found here. */ 290 MatroskaID_Tags_Tag = 0x7373, /* mandatory, can appear more than once. Element containing elements specific to Tracks/Chapters. */ 291 MatroskaID_Tags_Tag_SimpleTag = 0x67C8, /* mandatory, can appear more than once, recursive. Contains general information about the target. */ 292 MatroskaID_Tags_Tag_SimpleTag_TagName = 0x45A3, /* mandatory, UTF8-encoded. The name of the Tag that is going to be stored. */ 293 MatroskaID_Tags_Tag_SimpleTag_TagLanguage = 0x447A, /* mandatory, defaults to 'und', string. Specifies the language of the tag specified, in the Matroska languages form. */ 294 MatroskaID_Tags_Tag_SimpleTag_TagDefault = 0x4484, /* mandatory, 0-1, defaults to 1, UINT. Indication to know if this is the default/original language to use for the given tag. (1 bit) */ 295 MatroskaID_Tags_Tag_SimpleTag_TagString = 0x4487, /* UTF-8-encoded. The value of the Tag. */ 296 MatroskaID_Tags_Tag_SimpleTag_TagBinary = 0x4485 /* BINARY. The values of the Tag if it is binary. Note that this cannot be used in the same SimpleTag as TagString. */ 297 }; 298 299 300 enum VINTParseMode 301 { 302 VINT_READ_ID = 0, 303 VINT_READ_SIZE = 1, 304 VINT_READ_UINT = 2, 305 VINT_READ_SINT = 3 306 }; 307 308 /** 309 * Reads an EBML integer from the buffer 310 * 311 * @param buffer array of bytes to read from 312 * @param start the position in buffer at which to start reading 313 * @param end first invalid index in buffer (i.e. buffer size) 314 * @param result receives the integer. 315 * @param mode (see VINTParseMode) 316 * @return number of bytes occupied by the integer (the integer itself 317 * is always put into 64-bit long buffer), 318 * -1 if there is not enough bytes to read the integer 319 */ 320 static ssize_t 321 VINTparse (struct EXTRACTOR_PluginList *plugin, 322 int64_t *result, enum VINTParseMode mode) 323 { 324 /* 10000000 01000000 00100000 00010000 00001000 00000100 00000010 00000001 */ 325 static const unsigned char mask[8] = { 0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 326 0x02, 0x01 }; 327 /* 01111111 00111111 00011111 00001111 00000111 00000011 00000001 00000000 */ 328 static const unsigned char imask[8] = { 0x7F, 0x3F, 0x1F, 0x0F, 0x07, 0x03, 329 0x01, 0x00 }; 330 static const int64_t int_negative_limits[8] = { 331 -0x00000000000040LL, /* 7-bit integer */ 332 -0x00000000002000LL, /* 14-bit integer */ 333 -0x00000000100000LL, /* 21-bit integer */ 334 -0x00000008000000LL, /* 28-bit integer */ 335 -0x00000400000000LL, /* 35-bit integer */ 336 -0x00020000000000LL, /* 42-bit integer */ 337 -0x01000000000000LL, /* 49-bit integer */ 338 -0x80000000000000LL, /* 56-bit integer */ 339 }; 340 static const int64_t int_positive_limits[8] = { 341 0x00000000000040ULL - 1LL, /* 7-bit integer */ 342 0x00000000002000ULL - 1LL, /* 14-bit integer */ 343 0x00000000100000ULL - 1LL, /* 21-bit integer */ 344 0x00000008000000ULL - 1LL, /* 28-bit integer */ 345 0x00000400000000ULL - 1LL, /* 35-bit integer */ 346 0x00020000000000ULL - 1LL, /* 42-bit integer */ 347 0x01000000000000ULL - 1LL, /* 49-bit integer */ 348 0x80000000000000ULL - 1LL, /* 56-bit integer */ 349 }; 350 static const uint64_t uint_positive_limits[8] = { 351 0x0000000000000080ULL - 1LL, /* 7-bit integer */ 352 0x0000000000004000ULL - 1LL, /* 14-bit integer */ 353 0x0000000000200000ULL - 1LL, /* 21-bit integer */ 354 0x0000000010000000ULL - 1LL, /* 28-bit integer */ 355 0x0000000800000000ULL - 1LL, /* 35-bit integer */ 356 0x0000040000000000ULL - 1LL, /* 42-bit integer */ 357 0x0002000000000000ULL - 1LL, /* 49-bit integer */ 358 0x0100000000000000ULL - 1LL, /* 56-bit integer */ 359 }; 360 int vint_width; 361 unsigned int c; 362 uint64_t result_u; 363 int64_t result_s; 364 uint64_t temp; 365 unsigned char *data; 366 unsigned char first_byte; 367 unsigned char int_bytes[8]; 368 369 /* Minimal integer size is 1 byte */ 370 if (1 != pl_read (plugin, &data, 1)) 371 return -1; 372 first_byte = data[0]; 373 374 /* An integer begins with zero or more 0-bits. Number of 0-bits indicates the 375 * width of the integer, zero 0-bits means a 1-byte long integer; 8 0-bits 376 * indicate a 8-byte (64-bit) integer. 377 * 0-bits are followed by a mandatory 1-bit. Then - by the bits of the integer 378 * itself. Integers are stored in big-endian order. Because of the width prefix 379 * and the mandatory 1-bit, integers are relatively short: 380 * 1-byte integer has 2^7 different values, 381 * 2-byte integer has 2^14 different values, 382 * etc 383 *//* 384 * Examine the first byte and see how many 0-bytes are at its beginning. 385 */vint_width = 0; 386 for (c = 0; c < 8; c++) 387 if (! (first_byte & mask[c])) 388 vint_width++; 389 else 390 break; 391 /* vint_width now contains the number of 0-bytes. That is also the number 392 * of extra bytes occupied by the integer (beyond the one that we've just 393 * partially read). 394 */ 395 if (vint_width != pl_read (plugin, &data, vint_width)) 396 return -1; 397 398 if ((vint_width >= 8)) 399 return 0; 400 401 memcpy (&int_bytes[1], data, vint_width); 402 int_bytes[0] = first_byte; 403 404 /* OK, signedness is a PITA. Here's a small scale example to illustrate 405 * the point: 406 * 4-bit unsigned integer: 407 * 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 408 * 4-bit signed integer: 409 * 0 1 2 3 4 5 6 7 -8 -7 -6 -5 -4 -3 -2 -1 410 * 411 * 3 here is 0011b, and -3 is 1101b 412 * However, writing 1101b into int8_t memory location will NOT make 413 * the machine interpret it as -3, it will be interpreted as 00001101b, 414 * which is 13. To be -3 in int8_t it has to be 11111101b. That is, 415 * it must be padded with extra 1s to the left, but only if its first 416 * bit is set (which means a negative integer)! 417 * Easier way (without looking closesly at the bits): 418 * 1) get it as unsigned integer (say, 1010b, which is 10 for a 4-bit unsigned 419 * integer, and is 10 for any large unsigned integer, so this interpretation is 420 * always correct). 421 * 2) see if it's more than what a signed integer would hold (it is - a 422 * signed integer only holds up to 7). At this point we will need an array of 8 423 * different maximums for signed integers, indexed by vint_width. 424 * 3) do the following math: 10 - 8 = 2 ; -8 + 2 = -6 425 * That is, the minimal signed value (-8) and the number (10) should be summed, 426 * and the sum (2) should be added to the minimal signed value (-8) 427 * to get the signed counterpart (-6) of the number (10) 428 * 13 - 8 = 5; -8 + 5 = -3 429 * It's better to do that in two separate steps, because combining it into one step 430 * boils down to -8 + -8 + 13, which might confuse the compiler, because -8 + -8 = -16, 431 * which is outside of the signed integer range (remember, we're in 4-bit space here). 432 * on the other hand, 5 and -3 both are within the range. 433 * 4) if the number does not exceed the signed integer maximum (7), store it as-is 434 */result_u = 0; 435 /* Copy the extra bytes into a temporary buffer, in the right order */ 436 for (c = 0; c < vint_width; c++) 437 result_u += ((uint64_t) int_bytes[vint_width - c]) << (c * 8); 438 439 /* Add the first byte, do mode-dependent adjustment, then copy the result */ 440 switch (mode) 441 { 442 case VINT_READ_UINT: 443 /* Unset the 1-bit marker */ 444 result_u += ((uint64_t) int_bytes[0] & imask[vint_width]) << (vint_width 445 * 8); 446 memcpy (result, &result_u, sizeof (uint64_t)); 447 break; 448 case VINT_READ_ID: 449 /* Do not unset the 1-bit marker*/ 450 result_u += ((uint64_t) int_bytes[0]) << (vint_width * 8); 451 memcpy (result, &result_u, sizeof (uint64_t)); 452 break; 453 case VINT_READ_SIZE: 454 /* Unset the 1-bit marker */ 455 result_u += ((uint64_t) int_bytes[0] & imask[vint_width]) << (vint_width 456 * 8); 457 /* Special case: all-1 size means "size is unknown". We indicate this 458 * in the return value by setting it to UINT64_MAX. 459 */ 460 if (result_u == uint_positive_limits[vint_width]) 461 result_u = 0xFFFFFFFFFFFFFFFFULL; 462 memcpy (result, &result_u, sizeof (uint64_t)); 463 break; 464 case VINT_READ_SINT: 465 /* Unset the 1-bit marker */ 466 result_u += ((uint64_t) int_bytes[0] & imask[vint_width]) << (vint_width 467 * 8); 468 /* Interpret large values as negative signed values */ 469 if (result_u > int_positive_limits[vint_width]) 470 { 471 /* Pray that the compiler won't optimize this */ 472 temp = result_u + int_negative_limits[vint_width]; 473 result_s = int_negative_limits[vint_width] + temp; 474 } 475 else 476 result_s = result_u; 477 memcpy (result, &result_s, sizeof (int64_t)); 478 break; 479 } 480 return vint_width + 1; 481 } 482 483 484 /** 485 * Reads an EBML element header. Only supports 32-bit IDs and 64-bit sizes. 486 * (EBML might specify that IDs larger than 32 bits are allowed, or that 487 * sizes larger than 64 bits are allowed). 488 * 489 * @param buffer array of bytes to read the header from 490 * @param start index at which start to read 491 * @param end first invalid index in the array (i.e. array size) 492 * @param id receives the element id 493 * @param size receives the element size 494 * @return number of bytes occupied by the header, 495 * 0 if buffer doesn't contain a header at 'start', 496 * -1 if buffer doesn't contain a complete header 497 */ 498 static ssize_t 499 elementRead (struct EXTRACTOR_PluginList *plugin, 500 uint32_t *id, int64_t *size) 501 { 502 int64_t tempID; 503 int64_t tempsize; 504 ssize_t id_offset; 505 ssize_t size_offset; 506 507 tempID = 0; 508 509 id_offset = VINTparse (plugin, &tempID, VINT_READ_ID); 510 if (id_offset <= 0) 511 return id_offset; 512 if (id_offset > 4) 513 /* Interpret unsupported long IDs as file corruption */ 514 return 0; 515 /* VINTparse takes care of returning 0 when size is > 8 bytes */ 516 size_offset = VINTparse (plugin, &tempsize, VINT_READ_SIZE); 517 if (size_offset <= 0) 518 return size_offset; 519 *id = (uint32_t) tempID; 520 *size = tempsize; 521 #if DEBUG_EBML 522 printf ("EL 0x%06X %llu\n", *id, *size); 523 #endif 524 return id_offset + size_offset; 525 } 526 527 528 static ssize_t 529 idRead (struct EXTRACTOR_PluginList *plugin, 530 uint64_t length, uint32_t *id) 531 { 532 int64_t tempID; 533 ssize_t id_offset; 534 535 tempID = 0; 536 537 id_offset = VINTparse (plugin, &tempID, VINT_READ_ID); 538 if (id_offset <= 0) 539 return id_offset; 540 if (id_offset > 4) 541 return 0; 542 *id = (uint32_t) tempID; 543 return id_offset; 544 } 545 546 547 static ssize_t 548 uintRead (struct EXTRACTOR_PluginList *plugin, uint64_t length, 549 uint64_t *result) 550 { 551 size_t c; 552 unsigned char *data; 553 554 if (length != pl_read (plugin, &data, length)) 555 return -1; 556 557 *result = 0; 558 for (c = 1; c <= length; c++) 559 *result += ((uint64_t) data[c - 1]) << (8 * (length - c)); 560 return (ssize_t) length; 561 } 562 563 564 static ssize_t 565 sintRead (struct EXTRACTOR_PluginList *plugin, uint64_t length, int64_t *result) 566 { 567 size_t c; 568 uint64_t tmp; 569 unsigned char *data; 570 571 if (length != pl_read (plugin, &data, length)) 572 return -1; 573 574 tmp = 0; 575 for (c = 1; c <= length; c++) 576 tmp += ((uint64_t) data[c - 1]) << (8 * (length - c)); 577 if (0x80 == (0x80 & data[0])) 578 { 579 /* OK, i'm just too tired to think... If sign bit is set, pad the rest of the 580 * uint64_t with 0xFF. Unlike variable-length integers, these have normal 581 * multiple-of-8 length, and will fit well. They just need to be padded. 582 */ 583 int i; 584 for (i = length; i < 8; i++) 585 tmp += ((uint64_t) 0xFF) << (8 * i); 586 } 587 memcpy (result, &tmp, sizeof (uint64_t)); 588 return (ssize_t) length; 589 } 590 591 592 static ssize_t 593 stringRead (struct EXTRACTOR_PluginList *plugin, uint64_t length, char *result) 594 { 595 uint64_t read_length; 596 unsigned char *data; 597 598 read_length = length; 599 if (length > MAX_STRING_SIZE) 600 read_length = MAX_STRING_SIZE; 601 602 if (read_length != pl_read (plugin, &data, read_length)) 603 return -1; 604 605 memcpy (result, data, read_length); 606 result[read_length] = '\0'; 607 if (read_length < length) 608 if ((length - read_length) != pl_read (plugin, &data, length - read_length)) 609 return -1; 610 /* Can't return uint64_t - need it to be signed */ 611 return 1; 612 } 613 614 615 static ssize_t 616 floatRead (struct EXTRACTOR_PluginList *plugin, uint64_t length, long 617 double *result) 618 { 619 size_t c; 620 unsigned char t[8]; 621 unsigned char *data; 622 623 if (length != pl_read (plugin, &data, length)) 624 return -1; 625 626 /* we don't support 10-byte floats, because not all C compilers will guarantee that long double is stored in 10 bytes in a IEEE-conformant format */ 627 if ((length != 4) && (length != 8) /* && length != 10 */) 628 return 0; 629 630 for (c = 0; c < length; c++) 631 { 632 #if __BYTE_ORDER == __BIG_ENDIAN 633 t[c] = data[c]; 634 #else 635 t[c] = data[length - 1 - c]; 636 #endif 637 } 638 if (length == 4) 639 *result = *((float *) t); 640 else if (length == 8) 641 *result = *((double *) t); 642 else 643 *result = *((long double *) t); 644 return (ssize_t) length; 645 } 646 647 648 static const char stream_type_letters[] = "?vat"; /*[0]-no, [1]-video,[2]-audio,[3]-text */ 649 650 enum EBMLState 651 { 652 EBML_BAD_STATE = -1, 653 EBML_LOOKING_FOR_HEADER = 0, 654 EBML_READING_HEADER = 1, 655 EBML_READING_ELEMENTS = 2, 656 EBML_READ_ELEMENT = 3, 657 EBML_READING_HEADER_ELEMENTS = 4, 658 EBML_FINISHED_READING_HEADER = 5, 659 EBML_READ_UINT, 660 EBML_READ_ID, 661 EBML_READ_SINT, 662 EBML_READ_FLOAT, 663 EBML_READ_STRING, 664 EBML_READING_HEADER_ELEMENT_VALUE, 665 EBML_SKIP_UNTIL_NEXT_HEADER, 666 EBML_READING_MATROSKA_SEGMENT, 667 EBML_READING_MATROSKA_SEGMENT_CONTENTS, 668 EBML_FINISHED_READING_MATROSKA_SEGMENT_CONTENTS, 669 EBML_READING_MATROSKA_SEEK_HEAD_CONTENTS, 670 EBML_FINISHED_READING_MATROSKA_SEEK_HEAD_CONTENTS, 671 EBML_READING_MATROSKA_SEEK_CONTENTS, 672 EBML_FINISHED_READING_MATROSKA_SEEK_CONTENTS, 673 EBML_READING_MATROSKA_SEEK_CONTENTS_VALUE, 674 EBML_READING_MATROSKA_INFO_CONTENTS, 675 EBML_FINISHED_READING_MATROSKA_INFO_CONTENTS, 676 EBML_READING_MATROSKA_TRACKS_CONTENTS, 677 EBML_FINISHED_READING_MATROSKA_TRACKS_CONTENTS, 678 EBML_READING_MATROSKA_TAGS_CONTENTS, 679 EBML_FINISHED_READING_MATROSKA_TAGS_CONTENTS, 680 EBML_READING_MATROSKA_TRACK_ENTRY_CONTENTS, 681 EBML_FINISHED_READING_MATROSKA_TRACK_ENTRY_CONTENTS, 682 EBML_READING_MATROSKA_TRACK_ENTRY_CONTENTS_VALUE, 683 EBML_READING_MATROSKA_INFO_CONTENTS_VALUE, 684 EBML_READING_MATROSKA_TRACK_ENTRY_VIDEO_CONTENTS, 685 EBML_FINISHED_READING_MATROSKA_TRACK_ENTRY_VIDEO_CONTENTS, 686 EBML_READING_MATROSKA_TRACK_ENTRY_VIDEO_CONTENTS_VALUE, 687 EBML_READING_MATROSKA_TRACK_ENTRY_AUDIO_CONTENTS, 688 EBML_READING_MATROSKA_TRACK_ENTRY_AUDIO_CONTENTS_VALUE, 689 EBML_FINISHED_READING_MATROSKA_TRACK_ENTRY_AUDIO_CONTENTS, 690 EBML_READING_MATROSKA_TAG_CONTENTS, 691 EBML_FINISHED_READING_MATROSKA_TAG_CONTENTS, 692 EBML_READING_MATROSKA_SIMPLETAG_CONTENTS, 693 EBML_FINISHED_READING_MATROSKA_SIMPLETAG_CONTENTS, 694 EBML_READING_MATROSKA_SIMPLETAG_CONTENTS_VALUE 695 }; 696 697 struct ebml_element 698 { 699 uint64_t position; 700 uint64_t header_size; 701 uint32_t id; 702 uint64_t size; 703 struct ebml_element *parent; 704 int finish_state; 705 int prev_state; 706 int bail_state; 707 int bail_next_state; 708 }; 709 710 struct matroska_seek_list 711 { 712 struct matroska_seek_list *next; 713 uint32_t id; 714 uint64_t position; 715 }; 716 717 struct matroska_simpletag 718 { 719 struct matroska_simpletag *next; 720 struct matroska_simpletag *child; 721 struct matroska_simpletag *parent; 722 char *name; 723 char *string; 724 }; 725 726 struct ebml_state 727 { 728 enum EBMLState state; 729 730 struct ebml_element *stack_top; 731 732 enum EBMLState next_state; 733 734 int reported_ebml; 735 int valid_ebml; 736 uint64_t ebml_version; 737 uint64_t ebml_READ_version; 738 uint64_t ebml_max_id_length; 739 uint64_t ebml_max_size_length; 740 char *doctype; 741 uint64_t doctype_version; 742 uint64_t doctype_read_version; 743 744 int64_t segment_contents_start; 745 746 struct matroska_seek_list *matroska_seeks; 747 struct matroska_seek_list *matroska_seeks_tail; 748 struct matroska_seek_list *matroska_pos; 749 uint32_t matroska_seek_id; 750 uint64_t matroska_seek_position; 751 752 int reported_matroska_info; 753 int valid_matroska_info; 754 uint64_t matroska_info_timecode_scale; 755 double matroska_info_duration; 756 int matroska_info_date_utc_is_set; 757 int64_t matroska_info_date_utc; 758 char *matroska_info_title; 759 char *matroska_info_muxing_app; 760 char *matroska_info_writing_app; 761 762 int reported_matroska_track; 763 int valid_matroska_track; 764 uint64_t matroska_track_type; 765 char *matroska_track_name; 766 char *matroska_track_language; 767 char *matroska_track_codec_id; 768 char *matroska_track_codec_name; 769 770 int valid_matroska_track_video; 771 uint64_t matroska_track_video_flag_interlaced; 772 uint64_t matroska_track_video_stereo_mode; 773 uint64_t matroska_track_video_pixel_width; 774 uint64_t matroska_track_video_pixel_height; 775 uint64_t matroska_track_video_display_width; 776 uint64_t matroska_track_video_display_height; 777 uint64_t matroska_track_video_display_unit; 778 779 int valid_matroska_track_audio; 780 double matroska_track_audio_sampling_frequency; 781 double matroska_track_audio_output_sampling_frequency; 782 uint64_t matroska_track_audio_channels; 783 uint64_t matroska_track_audio_bit_depth; 784 785 struct matroska_simpletag *tag_tree; 786 struct matroska_simpletag *tag_last; 787 struct matroska_simpletag *tag_current; 788 }; 789 790 static void 791 clean_ebml_state_ebml (struct ebml_state *state) 792 { 793 if (state->doctype != NULL) 794 free (state->doctype); 795 state->doctype = NULL; 796 state->reported_ebml = 0; 797 state->valid_ebml = 0; 798 state->ebml_version = 1; 799 state->ebml_READ_version = 1; 800 state->ebml_max_id_length = 4; 801 state->ebml_max_size_length = 8; 802 state->doctype = NULL; 803 state->doctype_version = 0; 804 state->doctype_read_version = 0; 805 } 806 807 808 static void 809 clean_ebml_state_matroska_simpletags (struct ebml_state *state) 810 { 811 struct matroska_simpletag *el, *parent, *next; 812 for (el = state->tag_tree; el;) 813 { 814 if (el->child != NULL) 815 { 816 el = el->child; 817 continue; 818 } 819 parent = el->parent; 820 next = el->next; 821 if (el->name != NULL) 822 free (el->name); 823 if (el->string != NULL) 824 free (el->string); 825 free (el); 826 if ((parent != NULL) && (parent->child == el)) 827 parent->child = next; 828 el = next; 829 if (next == NULL) 830 el = parent; 831 } 832 state->tag_tree = NULL; 833 state->tag_last = NULL; 834 state->tag_current = NULL; 835 } 836 837 838 void 839 matroska_add_tag (struct ebml_state *state, struct matroska_simpletag *parent, 840 char *name, char *string) 841 { 842 struct matroska_simpletag *el = malloc (sizeof (struct matroska_simpletag)); 843 el->parent = parent; 844 el->next = NULL; 845 el->child = NULL; 846 el->name = name; 847 el->string = string; 848 if (state->tag_last != NULL) 849 { 850 if (state->tag_last == parent) 851 state->tag_last->child = el; 852 else 853 state->tag_last->next = el; 854 } 855 state->tag_last = el; 856 } 857 858 859 static void 860 clean_ebml_state_matroska_seeks (struct ebml_state *state) 861 { 862 struct matroska_seek_list *seek_head, *next; 863 for (seek_head = state->matroska_seeks; seek_head != NULL; seek_head = next) 864 { 865 next = seek_head->next; 866 free (seek_head); 867 } 868 state->matroska_seeks = NULL; 869 state->matroska_seeks_tail = NULL; 870 } 871 872 873 static void 874 clean_ebml_state_matroska_segment (struct ebml_state *state) 875 { 876 state->segment_contents_start = 0; 877 state->matroska_pos = NULL; 878 879 clean_ebml_state_matroska_seeks (state); 880 clean_ebml_state_matroska_simpletags (state); 881 } 882 883 884 static void 885 clean_ebml_state_matroska_seek (struct ebml_state *state) 886 { 887 state->matroska_seek_id = 0; 888 state->matroska_seek_position = 0; 889 } 890 891 892 static void 893 clean_ebml_state_matroska_info (struct ebml_state *state) 894 { 895 state->reported_matroska_info = 0; 896 state->valid_matroska_info = -1; 897 state->matroska_info_timecode_scale = 1000000; 898 state->matroska_info_duration = -1.0; 899 state->matroska_info_date_utc_is_set = 0; 900 state->matroska_info_date_utc = 0; 901 if (state->matroska_info_title != NULL) 902 free (state->matroska_info_title); 903 state->matroska_info_title = NULL; 904 if (state->matroska_info_muxing_app != NULL) 905 free (state->matroska_info_muxing_app); 906 state->matroska_info_muxing_app = NULL; 907 if (state->matroska_info_writing_app != NULL) 908 free (state->matroska_info_writing_app); 909 state->matroska_info_writing_app = NULL; 910 } 911 912 913 static void 914 clean_ebml_state_matroska_track_video (struct ebml_state *state) 915 { 916 state->valid_matroska_track_video = -1; 917 state->matroska_track_video_flag_interlaced = 0; 918 state->matroska_track_video_stereo_mode = 0; 919 state->matroska_track_video_pixel_width = 0; 920 state->matroska_track_video_pixel_height = 0; 921 state->matroska_track_video_display_width = 0; 922 state->matroska_track_video_display_height = 0; 923 state->matroska_track_video_display_unit = 0; 924 } 925 926 927 static void 928 clean_ebml_state_matroska_track_audio (struct ebml_state *state) 929 { 930 state->valid_matroska_track_audio = -1; 931 state->matroska_track_audio_sampling_frequency = 8000.0; 932 state->matroska_track_audio_output_sampling_frequency = 0; 933 state->matroska_track_audio_channels = 1; 934 state->matroska_track_audio_bit_depth = 0; 935 } 936 937 938 static void 939 clean_ebml_state_matroska_track (struct ebml_state *state) 940 { 941 state->reported_matroska_track = 0; 942 state->valid_matroska_track = -1; 943 state->matroska_track_type = 0; 944 if (state->matroska_track_name != NULL) 945 free (state->matroska_track_name); 946 state->matroska_track_name = NULL; 947 if (state->matroska_track_language != NULL) 948 free (state->matroska_track_language); 949 state->matroska_track_language = strdup ("eng"); 950 if (state->matroska_track_codec_id != NULL) 951 free (state->matroska_track_codec_id); 952 state->matroska_track_codec_id = NULL; 953 if (state->matroska_track_codec_name != NULL) 954 free (state->matroska_track_codec_name); 955 state->matroska_track_codec_name = NULL; 956 957 clean_ebml_state_matroska_track_video (state); 958 clean_ebml_state_matroska_track_audio (state); 959 } 960 961 962 static struct ebml_state * 963 EXTRACTOR_ebml_init_state_method () 964 { 965 struct ebml_state *state; 966 state = malloc (sizeof (struct ebml_state)); 967 if (state == NULL) 968 return NULL; 969 memset (state, 0, sizeof (struct ebml_state)); 970 971 state->next_state = EBML_BAD_STATE; 972 973 clean_ebml_state_ebml (state); 974 clean_ebml_state_matroska_info (state); 975 clean_ebml_state_matroska_track (state); 976 return state; 977 } 978 979 980 static void 981 report_simpletag (struct ebml_state *state, EXTRACTOR_MetaDataProcessor proc, 982 void *proc_cls) 983 { 984 struct matroska_simpletag *el, *next; 985 char format[MAX_STRING_SIZE + 1]; 986 for (el = state->tag_tree; el != NULL; el = next) 987 { 988 if ((el->name != NULL) && (el->name[0] != '\0') && (el->string != NULL) && 989 (el->string[0] != '\0') ) 990 { 991 enum EXTRACTOR_MetaType metatype = EXTRACTOR_METATYPE_RESERVED; 992 struct MatroskaTagMap *map_item; 993 for (map_item = &tag_map[0]; map_item->name != NULL; map_item++) 994 { 995 if (strcmp (map_item->name, el->name) == 0) 996 { 997 metatype = map_item->id; 998 break; 999 } 1000 } 1001 if (metatype == EXTRACTOR_METATYPE_RESERVED) 1002 { 1003 snprintf (format, MAX_STRING_SIZE, "%s=%s", el->name, el->string); 1004 format[MAX_STRING_SIZE] = '\0'; 1005 ADD_MATROSKA (format, EXTRACTOR_METATYPE_UNKNOWN); 1006 } 1007 else 1008 ADD_MATROSKA (el->string, metatype); 1009 } 1010 next = el->child; 1011 while (next == NULL && el != NULL) 1012 { 1013 next = el->next; 1014 if (next == NULL) 1015 el = el->parent; 1016 } 1017 } 1018 clean_ebml_state_matroska_simpletags (state); 1019 } 1020 1021 1022 static void 1023 report_state (struct ebml_state *state, EXTRACTOR_MetaDataProcessor proc, 1024 void *proc_cls) 1025 { 1026 char format[MAX_STRING_SIZE + 1]; 1027 report_simpletag (state, proc, proc_cls); 1028 if (state->valid_ebml && ! state->reported_ebml) 1029 { 1030 state->reported_ebml = 1; 1031 snprintf (format, MAX_STRING_SIZE, "%llu", (unsigned long 1032 long) state->ebml_version); 1033 format[MAX_STRING_SIZE] = '\0'; 1034 ADD_EBML (format, EXTRACTOR_METATYPE_FORMAT_VERSION); 1035 snprintf (format, MAX_STRING_SIZE, "%s %llu (EBML %llu)", state->doctype, 1036 (unsigned long long) state->doctype_version, 1037 (unsigned long long) state->ebml_version); 1038 format[MAX_STRING_SIZE] = '\0'; 1039 ADD_EBML (format, EXTRACTOR_METATYPE_RESOURCE_TYPE); 1040 } 1041 if (state->valid_ebml) 1042 clean_ebml_state_ebml (state); 1043 if (state->valid_matroska_info == -1) 1044 { 1045 if (((state->matroska_info_duration > 0) || 1046 (state->matroska_info_duration == -1.0) ) && 1047 (state->matroska_info_muxing_app != NULL) && 1048 (state->matroska_info_writing_app != NULL) ) 1049 state->valid_matroska_info = 1; 1050 else 1051 state->valid_matroska_info = 0; 1052 } 1053 if ((state->valid_matroska_info == 1) && ! state->reported_matroska_info) 1054 { 1055 state->reported_matroska_info = 1; 1056 if (state->matroska_info_duration != -1.0) 1057 { 1058 uint64_t seconds = (uint64_t) ((state->matroska_info_duration 1059 * (float) state-> 1060 matroska_info_timecode_scale) / 1e+9); 1061 snprintf (format, MAX_STRING_SIZE, "%llus", (unsigned long long) seconds); 1062 format[MAX_STRING_SIZE] = '\0'; 1063 ADD_MATROSKA (format, EXTRACTOR_METATYPE_DURATION); 1064 } 1065 if (state->matroska_info_date_utc_is_set) 1066 { 1067 struct tm millenium_start; 1068 struct tm matroska_date; 1069 int64_t millenium_start_stamp; 1070 int64_t matroska_date_stamp; 1071 #if WINDOWS 1072 __time64_t matroska_date_stamp_time_t; 1073 #else 1074 time_t matroska_date_stamp_time_t; 1075 #endif 1076 millenium_start.tm_sec = 0; 1077 millenium_start.tm_min = 0; 1078 millenium_start.tm_hour = 0; 1079 millenium_start.tm_mday = 1; 1080 millenium_start.tm_mon = 1; 1081 millenium_start.tm_year = 2001 - 1900; 1082 millenium_start.tm_isdst = -1; 1083 putenv ("TZ=GMT0"); 1084 /* If no matter what is the size of the returned value, it fits 32-bit integer 1085 * (in fact, i could have just used a constant here, since the start of Matroska 1086 * millenium is known and never changes), but we want to use 64-bit integer to 1087 * manipulate time. If it gets trimmed later, when assigning back to a TIME_TYPE 1088 * that happens to be 32-bit long - well, tough luck. 1089 */errno = 0; 1090 #if WINDOWS 1091 millenium_start_stamp = _mktime64 (&millenium_start); 1092 #else 1093 millenium_start_stamp = (time_t) mktime (&millenium_start); 1094 #endif 1095 if (millenium_start_stamp == -1) 1096 printf ("Failed to convert time: %d\n", errno); 1097 matroska_date_stamp = millenium_start_stamp * 1000000000 1098 + state->matroska_info_date_utc; 1099 /* Now matroska_date_stamp is the number of nanoseconds since UNIX Epoch */ 1100 matroska_date_stamp_time_t = matroska_date_stamp / 1000000000; 1101 /* Now matroska_date_stamp_time_t is the number of seconds since UNIX Epoch */ 1102 #if WINDOWS 1103 if (NULL != gmtime_undocumented_64_r (&matroska_date_stamp_time_t, 1104 &matroska_date)) 1105 #else 1106 /* We want to be thread-safe. If you have no gmtime_r(), think of something! */ 1107 if (NULL != gmtime_r (&matroska_date_stamp_time_t, &matroska_date)) 1108 #endif 1109 { 1110 if (0 != strftime (format, MAX_STRING_SIZE, "%Y.%m.%d %H:%M:%S UTC", 1111 &matroska_date)) 1112 ADD_MATROSKA (format, EXTRACTOR_METATYPE_CREATION_DATE); 1113 } 1114 } 1115 if (state->matroska_info_title != NULL) 1116 ADD_MATROSKA (state->matroska_info_title, EXTRACTOR_METATYPE_TITLE); 1117 if (strcmp (state->matroska_info_writing_app, 1118 state->matroska_info_muxing_app) == 0) 1119 snprintf (format, MAX_STRING_SIZE, "Written and muxed with %s", 1120 state->matroska_info_writing_app); 1121 else 1122 snprintf (format, MAX_STRING_SIZE, "Written with %s, muxed with %s", 1123 state->matroska_info_writing_app, 1124 state->matroska_info_muxing_app); 1125 format[MAX_STRING_SIZE] = '\0'; 1126 ADD_MATROSKA (format, EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE); 1127 } 1128 if (state->valid_matroska_info == 1) 1129 clean_ebml_state_matroska_info (state); 1130 if (state->valid_matroska_track == -1) 1131 { 1132 if (((state->matroska_track_type > 0) && (state->matroska_track_type < 1133 255) ) && 1134 (state->matroska_track_codec_id != NULL) ) 1135 state->valid_matroska_track = 1; 1136 else 1137 state->valid_matroska_track = 0; 1138 } 1139 if (state->valid_matroska_track_video == -1) 1140 { 1141 if (((state->matroska_track_video_flag_interlaced == 0) || 1142 (state->matroska_track_video_flag_interlaced == 1) ) && 1143 ((state->matroska_track_video_stereo_mode >= 0) && 1144 (state->matroska_track_video_stereo_mode <= 14) ) && 1145 (state->matroska_track_video_pixel_width > 0) && 1146 (state->matroska_track_video_pixel_height > 0) ) 1147 state->valid_matroska_track_video = 1; 1148 else 1149 state->valid_matroska_track_video = 0; 1150 } 1151 if (state->valid_matroska_track_audio == -1) 1152 { 1153 if ((state->matroska_track_audio_sampling_frequency > 0) && 1154 (state->matroska_track_audio_channels > 0) ) 1155 state->valid_matroska_track_audio = 1; 1156 else 1157 state->valid_matroska_track_audio = 0; 1158 } 1159 if ((state->valid_matroska_track == 1) && ! state->reported_matroska_track) 1160 { 1161 char name_part[MAX_STRING_SIZE + 1]; 1162 char codec_part[MAX_STRING_SIZE + 1]; 1163 char bit_part[MAX_STRING_SIZE + 1]; 1164 char hz_part[MAX_STRING_SIZE + 1]; 1165 struct MatroskaTrackType *tt; 1166 const char *track_type_string = NULL; 1167 char use_video = 0; 1168 char use_audio = 0; 1169 1170 state->reported_matroska_track = 1; 1171 for (tt = track_types; tt->code > 0; tt++) 1172 { 1173 if (tt->code == state->matroska_track_type) 1174 { 1175 track_type_string = tt->name; 1176 if (tt->video_must_be_valid == 1) 1177 use_video = 1; 1178 else if (tt->audio_must_be_valid == 1) 1179 use_audio = 1; 1180 break; 1181 } 1182 } 1183 if (track_type_string == NULL) 1184 track_type_string = "unknown"; 1185 1186 if (state->matroska_track_name == NULL) 1187 snprintf (name_part, MAX_STRING_SIZE, "%s", ""); 1188 else 1189 snprintf (name_part, MAX_STRING_SIZE, "`%s' ", 1190 state->matroska_track_name); 1191 name_part[MAX_STRING_SIZE] = '\0'; 1192 1193 if (state->matroska_track_codec_name == NULL) 1194 snprintf (codec_part, MAX_STRING_SIZE, "%s", 1195 state->matroska_track_codec_id); 1196 else 1197 snprintf (codec_part, MAX_STRING_SIZE, "%s [%s]", 1198 state->matroska_track_codec_id, 1199 state->matroska_track_codec_name); 1200 codec_part[MAX_STRING_SIZE] = '\0'; 1201 1202 if (use_video && (state->valid_matroska_track_video == 1)) 1203 { 1204 /* Ignore Display* for now. Aspect ratio correction could be 1205 * done either way (stretching horizontally or squishing vertically), 1206 * so let's stick to hard cold pixel counts. 1207 */ 1208 snprintf (format, MAX_STRING_SIZE, "%llux%llu", 1209 (unsigned long long) state->matroska_track_video_pixel_width, 1210 (unsigned long long) state->matroska_track_video_pixel_height); 1211 format[MAX_STRING_SIZE] = '\0'; 1212 ADD_MATROSKA (format, EXTRACTOR_METATYPE_IMAGE_DIMENSIONS); 1213 } 1214 if (use_audio && (state->valid_matroska_track_audio == 1)) 1215 { 1216 double freq = state->matroska_track_audio_sampling_frequency; 1217 double rfreq = freq; 1218 if (state->matroska_track_audio_output_sampling_frequency > 0) 1219 rfreq = state->matroska_track_audio_output_sampling_frequency; 1220 if (freq == rfreq) 1221 snprintf (hz_part, MAX_STRING_SIZE, "%.0fHz", freq); 1222 else 1223 snprintf (hz_part, MAX_STRING_SIZE, "%.0fHz (%.0fHz SBR)", freq, rfreq); 1224 hz_part[MAX_STRING_SIZE] = '\0'; 1225 1226 if (state->matroska_track_audio_bit_depth > 0) 1227 snprintf (bit_part, MAX_STRING_SIZE, "%llu-bit ", (unsigned long 1228 long) state-> 1229 matroska_track_audio_bit_depth); 1230 else 1231 bit_part[0] = '\0'; 1232 bit_part[MAX_STRING_SIZE] = '\0'; 1233 1234 snprintf (format, MAX_STRING_SIZE, 1235 "%s track %s(%s, %llu-channel %sat %s) [%s]", 1236 track_type_string, name_part, codec_part, 1237 (unsigned long long) state->matroska_track_audio_channels, 1238 bit_part, hz_part, state->matroska_track_language); 1239 } 1240 else 1241 { 1242 snprintf (format, MAX_STRING_SIZE, "%s track %s(%s) [%s]", 1243 track_type_string, name_part, codec_part, 1244 state->matroska_track_language); 1245 } 1246 format[MAX_STRING_SIZE] = '\0'; 1247 ADD_EBML (format, EXTRACTOR_METATYPE_RESOURCE_TYPE); 1248 } 1249 if (state->valid_matroska_track) 1250 clean_ebml_state_matroska_track (state); 1251 } 1252 1253 1254 static int 1255 EXTRACTOR_ebml_discard_state_method (struct ebml_state *state) 1256 { 1257 if (state != NULL) 1258 { 1259 if (state->doctype != NULL) 1260 free (state->doctype); 1261 clean_ebml_state_matroska_segment (state); 1262 clean_ebml_state_matroska_info (state); 1263 clean_ebml_state_matroska_track (state); 1264 free (state); 1265 } 1266 return 1; 1267 } 1268 1269 1270 static struct ebml_element * 1271 ebml_stack_pop (struct ebml_state *state) 1272 { 1273 struct ebml_element *result; 1274 if (state->stack_top == NULL) 1275 return NULL; 1276 result = state->stack_top; 1277 state->stack_top = result->parent; 1278 return result; 1279 } 1280 1281 1282 static void 1283 ebml_stack_push_new (struct ebml_state *state, uint64_t position, uint32_t id, 1284 uint64_t size, uint64_t header_size, int finish_state, int 1285 prev_state, int 1286 bail_state, int bail_next_state) 1287 { 1288 struct ebml_element *element = malloc (sizeof (struct ebml_element)); 1289 element->parent = state->stack_top; 1290 state->stack_top = element; 1291 element->position = position - header_size; 1292 element->header_size = header_size; 1293 element->id = id; 1294 element->size = size; 1295 element->finish_state = finish_state; 1296 element->prev_state = prev_state; 1297 element->bail_state = bail_state; 1298 element->bail_next_state = bail_next_state; 1299 } 1300 1301 1302 static int 1303 check_result (struct EXTRACTOR_PluginList *plugin, ssize_t read_result, struct 1304 ebml_state *state) 1305 { 1306 if (read_result == 0) 1307 { 1308 int64_t offset; 1309 struct ebml_element *parent = ebml_stack_pop (state); 1310 if (parent == NULL) 1311 { 1312 /* But this shouldn't really happen */ 1313 state->state = EBML_LOOKING_FOR_HEADER; 1314 return 0; 1315 } 1316 offset = parent->position + parent->header_size + parent->size; 1317 if ((offset < 0) || (offset != pl_seek (plugin, offset, SEEK_SET))) 1318 { 1319 state->state = EBML_BAD_STATE; 1320 return 0; 1321 } 1322 state->state = parent->bail_state; 1323 state->next_state = parent->bail_next_state; 1324 free (parent); 1325 return 0; 1326 } 1327 return 1; 1328 } 1329 1330 1331 static int 1332 maybe_rise_up (struct EXTRACTOR_PluginList *plugin, struct ebml_state *state, 1333 int *do_break, int64_t read_result) 1334 { 1335 int64_t offset; 1336 offset = pl_get_pos (plugin) - read_result; 1337 if ((state->stack_top != NULL) && (offset >= state->stack_top->position 1338 + state->stack_top->header_size 1339 + state->stack_top->size) ) 1340 { 1341 state->state = state->stack_top->finish_state; 1342 pl_seek (plugin, -read_result, SEEK_CUR); 1343 *do_break = 1; 1344 return 1; 1345 } 1346 return 0; 1347 } 1348 1349 1350 static void 1351 rise_up_after_value (struct EXTRACTOR_PluginList *plugin, struct 1352 ebml_state *state, int next_state) 1353 { 1354 int64_t offset; 1355 state->state = EBML_READ_ELEMENT; 1356 offset = state->stack_top->position + state->stack_top->header_size 1357 + state->stack_top->size; 1358 free (ebml_stack_pop (state)); 1359 state->next_state = next_state; 1360 pl_seek (plugin, offset, SEEK_SET); 1361 } 1362 1363 1364 static void 1365 try_to_find_pos (struct EXTRACTOR_PluginList *plugin, struct ebml_state *state) 1366 { 1367 if (state->matroska_seeks != NULL) 1368 { 1369 struct matroska_seek_list *el, *pos = NULL; 1370 int64_t segment_position = pl_get_pos (plugin) 1371 - state->segment_contents_start; 1372 for (el = state->matroska_seeks; el != NULL; el = el->next) 1373 { 1374 if (el->position <= segment_position) 1375 pos = el; 1376 else 1377 break; 1378 } 1379 if (pos != NULL) 1380 state->matroska_pos = pos; 1381 } 1382 } 1383 1384 1385 static void 1386 maybe_seek_to_something_interesting (struct EXTRACTOR_PluginList *plugin, struct 1387 ebml_state *state) 1388 { 1389 int64_t offset; 1390 struct matroska_seek_list *el; 1391 try_to_find_pos (plugin, state); 1392 if (state->matroska_pos == NULL) 1393 return; 1394 offset = pl_get_pos (plugin); 1395 for (el = state->matroska_pos; el != NULL; el = el->next) 1396 { 1397 char do_break = 0; 1398 switch (el->id) 1399 { 1400 case MatroskaID_Info: 1401 case MatroskaID_Tracks: 1402 case MatroskaID_Tags: 1403 /* Some files will have more than one seek head */ 1404 case MatroskaID_SeekHead: 1405 if (el->position + state->segment_contents_start >= offset) 1406 do_break = 1; 1407 break; 1408 default: 1409 break; 1410 } 1411 if (do_break) 1412 break; 1413 } 1414 if (el == NULL) 1415 el = state->matroska_seeks_tail; 1416 if (el->position + state->segment_contents_start > offset) 1417 { 1418 /* TODO: add a separate stage after seeking that checks the ID of the element against 1419 * the one we've got from seek table. If it doesn't match - stop parsing the file. 1420 */ 1421 #if DEBUG_EBML 1422 printf ("Seeking from %llu to %llu\n", offset, el->position 1423 + state->segment_contents_start); 1424 #endif 1425 pl_seek (plugin, el->position + state->segment_contents_start, SEEK_SET); 1426 } 1427 } 1428 1429 1430 static void 1431 sort_seeks (struct ebml_state *state) 1432 { 1433 uint32_t id; 1434 int64_t position; 1435 struct matroska_seek_list *el; 1436 char sorted = 0; 1437 while (! sorted) 1438 { 1439 sorted = 1; 1440 for (el = state->matroska_seeks; el != NULL; el = el->next) 1441 { 1442 if (el->next == NULL) 1443 break; 1444 id = el->next->id; 1445 position = el->next->position; 1446 if (position < el->position) 1447 { 1448 el->next->position = el->position; 1449 el->next->id = el->id; 1450 el->position = position; 1451 el->id = id; 1452 sorted = 0; 1453 } 1454 } 1455 } 1456 } 1457 1458 1459 int 1460 EXTRACTOR_ebml_extract_method (struct EXTRACTOR_PluginList *plugin, 1461 EXTRACTOR_MetaDataProcessor proc, void *proc_cls) 1462 { 1463 uint64_t offset = 0; 1464 ssize_t read_result; 1465 unsigned char *data; 1466 struct ebml_state *state; 1467 1468 const unsigned char *start; 1469 uint32_t eID; 1470 uint64_t eSize; 1471 int do_break; 1472 1473 uint64_t uint_value; 1474 int64_t sint_value; 1475 char string_value[MAX_STRING_SIZE + 1]; 1476 long double float_value; 1477 uint32_t id_value; 1478 1479 if (plugin == NULL) 1480 return 1; 1481 1482 state = EXTRACTOR_ebml_init_state_method (); 1483 if (state == NULL) 1484 return 1; 1485 1486 while (1) 1487 { 1488 switch (state->state) 1489 { 1490 default: 1491 case EBML_BAD_STATE: 1492 report_state (state, proc, proc_cls); 1493 return EXTRACTOR_ebml_discard_state_method (state); 1494 case EBML_LOOKING_FOR_HEADER: 1495 offset = pl_get_pos (plugin); 1496 sint_value = pl_read (plugin, &data, 1024 * 1024); 1497 if (sint_value < 4) 1498 return EXTRACTOR_ebml_discard_state_method (state); 1499 start = NULL; 1500 while (start == NULL) 1501 { 1502 start = memchr (data, EBMLID_FILE_BEGIN, sint_value); 1503 if (start == NULL) 1504 { 1505 offset = pl_get_pos (plugin) - 3; 1506 if (offset != pl_seek (plugin, offset, SEEK_SET)) 1507 return EXTRACTOR_ebml_discard_state_method (state); 1508 sint_value = pl_read (plugin, &data, 1024 * 1024); 1509 if (sint_value < 4) 1510 return EXTRACTOR_ebml_discard_state_method (state); 1511 } 1512 } 1513 if (offset + start - data != pl_seek (plugin, offset + start - data, 1514 SEEK_SET)) 1515 return EXTRACTOR_ebml_discard_state_method (state); 1516 state->state = EBML_READING_HEADER; 1517 break; 1518 case EBML_READING_HEADER: 1519 if (0 > (read_result = elementRead (plugin, &eID, (int64_t*) &eSize))) 1520 return EXTRACTOR_ebml_discard_state_method (state); 1521 if (EBMLID_EBML != eID) 1522 { 1523 /* Not a header (happens easily, 0x1A is not uncommon), look further. */ 1524 offset = pl_get_pos (plugin) - 3; 1525 if (offset < 0) 1526 offset = 0; 1527 if (offset != pl_seek (plugin, offset, SEEK_SET)) 1528 return EXTRACTOR_ebml_discard_state_method (state); 1529 state->state = EBML_LOOKING_FOR_HEADER; 1530 break; 1531 } 1532 state->state = EBML_READ_ELEMENT; 1533 state->next_state = EBML_READING_HEADER_ELEMENTS; 1534 ebml_stack_push_new (state, pl_get_pos (plugin), eID, eSize, read_result, 1535 EBML_FINISHED_READING_HEADER, EBML_BAD_STATE, 1536 EBML_FINISHED_READING_HEADER, EBML_BAD_STATE); 1537 break; 1538 case EBML_READ_ELEMENT: 1539 #if DEBUG_EBML 1540 printf ("Reading at %lld\n", pl_get_pos (plugin)); 1541 #endif 1542 /* The following code generates some odd compiled instructions - instead of being getting the next state, 1543 * state->state gets 0xfeeefeee. 1544 */ 1545 /* 1546 if (0 > (read_result = elementRead (plugin, &eID, &eSize))) 1547 { 1548 state->state = -1; 1549 break; 1550 } 1551 state->state = state->next_state; 1552 break; 1553 *//* while the following code crashes with SIGILL. 1554 *//* 1555 read_result = elementRead (plugin, &eID, &eSize); 1556 state->state = state->next_state; 1557 if (read_result < 0) 1558 state->state = EBML_BAD_STATE; 1559 break; 1560 *//* but the following code works as intended *//* All three code snippets were compiled with -O0 */{ 1561 enum EBMLState next_state = state->next_state; 1562 state->state = EBML_BAD_STATE; 1563 read_result = elementRead (plugin, &eID, (int64_t*) &eSize); 1564 if (read_result >= 0) 1565 state->state = next_state; 1566 } 1567 break; 1568 case EBML_READ_UINT: 1569 if (state->stack_top->size == 0) 1570 { 1571 /* Special case - zero-size uint means zero */ 1572 uint_value = 0; 1573 read_result = 1; /* 0 means error */ 1574 } 1575 else if (state->stack_top->size > 8) 1576 read_result = 0; 1577 else 1578 { 1579 if (0 > (read_result = uintRead (plugin, state->stack_top->size, 1580 &uint_value))) 1581 { 1582 state->state = EBML_BAD_STATE; 1583 break; 1584 } 1585 } 1586 /* REMINDER: read_result might not be == number of read bytes in this case! */ 1587 state->state = state->next_state; 1588 break; 1589 case EBML_READ_ID: 1590 if (0 > (read_result = idRead (plugin, state->stack_top->size, 1591 &id_value))) 1592 { 1593 state->state = EBML_BAD_STATE; 1594 break; 1595 } 1596 state->state = state->next_state; 1597 break; 1598 case EBML_READ_SINT: 1599 if (state->stack_top->size == 0) 1600 { 1601 /* Special case - zero-size sint means zero */ 1602 sint_value = 0; 1603 read_result = 1; /* 0 means error */ 1604 } 1605 else if (state->stack_top->size > 8) 1606 read_result = 0; 1607 else 1608 { 1609 if (0 > (read_result = sintRead (plugin, state->stack_top->size, 1610 &sint_value))) 1611 { 1612 state->state = EBML_BAD_STATE; 1613 break; 1614 } 1615 } 1616 /* REMINDER: read_result might not be == number of read bytes in this case! */ 1617 state->state = state->next_state; 1618 break; 1619 case EBML_READ_FLOAT: 1620 if (state->stack_top->size == 0) 1621 { 1622 /* Special case - zero-size float means zero */ 1623 float_value = 0.0; 1624 read_result = 1; /* 0 means error */ 1625 } 1626 else if (state->stack_top->size > 10) 1627 read_result = 0; 1628 else 1629 { 1630 if (0 > (read_result = floatRead (plugin, state->stack_top->size, 1631 &float_value))) 1632 { 1633 state->state = EBML_BAD_STATE; 1634 break; 1635 } 1636 } 1637 /* REMINDER: read_result might not be == number of read bytes in this case! */ 1638 state->state = state->next_state; 1639 break; 1640 case EBML_READ_STRING: 1641 if (state->stack_top->size == 0) 1642 { 1643 string_value[0] = '\0'; 1644 read_result = 1; /* 0 means error */ 1645 } 1646 else 1647 { 1648 if (0 > (read_result = stringRead (plugin, state->stack_top->size, 1649 (char *) &string_value))) 1650 { 1651 state->state = EBML_BAD_STATE; 1652 break; 1653 } 1654 } 1655 /* REMINDER: read_result might not be == number of read bytes in this case! */ 1656 state->state = state->next_state; 1657 break; 1658 case EBML_READING_HEADER_ELEMENTS: 1659 if (! check_result (plugin, read_result, state)) 1660 break; 1661 do_break = 0; 1662 switch (eID) 1663 { 1664 case EBMLID_VERSION: 1665 case EBMLID_READ_VERSION: 1666 case EBMLID_MAX_ID_LENGTH: 1667 case EBMLID_MAX_SIZE_LENGTH: 1668 case EBMLID_DOCTYPE_VERSION: 1669 case EBMLID_DOCTYPE_READ_VERSION: 1670 state->state = EBML_READ_UINT; 1671 break; 1672 case EBMLID_DOCTYPE: 1673 state->state = EBML_READ_STRING; 1674 break; 1675 default: 1676 if (maybe_rise_up (plugin, state, &do_break, read_result)) 1677 break; 1678 /* Unknown element in EBML header - skip over it */ 1679 state->state = EBML_READ_ELEMENT; 1680 state->next_state = EBML_READING_HEADER_ELEMENTS; 1681 pl_seek (plugin, eSize, SEEK_CUR); 1682 do_break = 1; 1683 } 1684 if (do_break) 1685 break; 1686 state->next_state = EBML_READING_HEADER_ELEMENT_VALUE; 1687 ebml_stack_push_new (state, pl_get_pos (plugin), eID, eSize, read_result, 1688 EBML_BAD_STATE, EBML_READING_HEADER_ELEMENTS, 1689 EBML_READ_ELEMENT, 1690 EBML_READING_HEADER_ELEMENTS); 1691 break; 1692 case EBML_READING_HEADER_ELEMENT_VALUE: 1693 if (! check_result (plugin, read_result, state)) 1694 break; 1695 1696 switch (state->stack_top->id) 1697 { 1698 case EBMLID_VERSION: 1699 state->ebml_version = uint_value; 1700 break; 1701 case EBMLID_READ_VERSION: 1702 state->ebml_READ_version = uint_value; 1703 if (uint_value > 1) 1704 { 1705 /* We don't support EBML versions > 1 */ 1706 state->state = EBML_BAD_STATE; 1707 /* State remains invalid, and is not reported. That is probably OK, 1708 * since we barely read anything (we only know that this is 1709 * _probably_ EBML version X, that's all). 1710 * We also stop right here and do not assume that somewhere further 1711 * in the file there's another EBML header that is, maybe, readable 1712 * by us. If you think this is worth correcting - patches are welcome. 1713 */continue; 1714 } 1715 break; 1716 case EBMLID_MAX_ID_LENGTH: 1717 state->ebml_max_id_length = uint_value; 1718 break; 1719 case EBMLID_MAX_SIZE_LENGTH: 1720 state->ebml_max_size_length = uint_value; 1721 break; 1722 case EBMLID_DOCTYPE_VERSION: 1723 state->doctype_version = uint_value; 1724 break; 1725 case EBMLID_DOCTYPE_READ_VERSION: 1726 state->doctype_read_version = uint_value; 1727 break; 1728 case EBMLID_DOCTYPE: 1729 if (state->doctype != NULL) 1730 free (state->doctype); 1731 state->doctype = strdup (string_value); 1732 state->valid_ebml = 1; 1733 break; 1734 } 1735 rise_up_after_value (plugin, state, EBML_READING_HEADER_ELEMENTS); 1736 break; 1737 case EBML_FINISHED_READING_HEADER: 1738 if (! state->valid_ebml) 1739 { 1740 /* Header was invalid (lacking doctype). */ 1741 state->next_state = EBML_SKIP_UNTIL_NEXT_HEADER; 1742 break; 1743 } 1744 else 1745 { 1746 char *doctype = strdup (state->doctype); 1747 report_state (state, proc, proc_cls); 1748 state->state = EBML_READ_ELEMENT; 1749 if (strcmp (doctype, "matroska") == 0) 1750 { 1751 state->next_state = EBML_READING_MATROSKA_SEGMENT; 1752 } 1753 else if (strcmp (doctype, "webm") == 0) 1754 { 1755 /* Webm is a strict subset of Matroska. However, since strictness 1756 * means nothing to us (we don't validate the container, we extract 1757 * metadata from it!), we do not care about these differences 1758 * (which means that this code will happily read webm files that do 1759 * not conform to Webm spec, but conform to Matroska spec). 1760 */state->next_state = EBML_READING_MATROSKA_SEGMENT; 1761 } 1762 else 1763 { 1764 /* Header was valid, but doctype is unknown. */ 1765 state->next_state = EBML_SKIP_UNTIL_NEXT_HEADER; 1766 } 1767 free (doctype); 1768 } 1769 break; 1770 case EBML_SKIP_UNTIL_NEXT_HEADER: 1771 if (read_result == 0) 1772 { 1773 state->state = EBML_LOOKING_FOR_HEADER; 1774 break; 1775 } 1776 if (eID != EBMLID_EBML) 1777 { 1778 state->state = EBML_READ_ELEMENT; 1779 state->next_state = EBML_SKIP_UNTIL_NEXT_HEADER; 1780 pl_seek (plugin, eSize, SEEK_CUR); 1781 break; 1782 } 1783 state->state = EBML_READING_HEADER; 1784 break; 1785 case EBML_READING_MATROSKA_SEGMENT: 1786 if (read_result == 0) 1787 { 1788 state->state = EBML_LOOKING_FOR_HEADER; 1789 break; 1790 } 1791 if (eID == EBMLID_EBML) 1792 { 1793 state->state = EBML_READING_HEADER; 1794 break; 1795 } 1796 if (eID != MatroskaID_Segment) 1797 { 1798 pl_seek (plugin, eSize, SEEK_CUR); 1799 state->state = EBML_READ_ELEMENT; 1800 state->next_state = EBML_READING_MATROSKA_SEGMENT; 1801 break; 1802 } 1803 state->state = EBML_READ_ELEMENT; 1804 state->next_state = EBML_READING_MATROSKA_SEGMENT_CONTENTS; 1805 clean_ebml_state_matroska_segment (state); 1806 ebml_stack_push_new (state, pl_get_pos (plugin), eID, eSize, read_result, 1807 EBML_FINISHED_READING_MATROSKA_SEGMENT_CONTENTS, 1808 EBML_READING_MATROSKA_SEGMENT, EBML_READ_ELEMENT, 1809 EBML_READING_MATROSKA_SEGMENT); 1810 state->segment_contents_start = pl_get_pos (plugin); 1811 break; 1812 case EBML_READING_MATROSKA_SEGMENT_CONTENTS: 1813 if (! check_result (plugin, read_result, state)) 1814 break; 1815 1816 state->state = EBML_READ_ELEMENT; 1817 switch (eID) 1818 { 1819 case MatroskaID_SeekHead: 1820 state->next_state = EBML_READING_MATROSKA_SEEK_HEAD_CONTENTS; 1821 ebml_stack_push_new (state, pl_get_pos (plugin), eID, eSize, 1822 read_result, 1823 EBML_FINISHED_READING_MATROSKA_SEEK_HEAD_CONTENTS, 1824 EBML_READING_MATROSKA_SEGMENT_CONTENTS, 1825 EBML_READ_ELEMENT, 1826 EBML_READING_MATROSKA_SEGMENT_CONTENTS); 1827 break; 1828 case MatroskaID_Info: 1829 state->next_state = EBML_READING_MATROSKA_INFO_CONTENTS; 1830 clean_ebml_state_matroska_info (state); 1831 ebml_stack_push_new (state, pl_get_pos (plugin), eID, eSize, 1832 read_result, 1833 EBML_FINISHED_READING_MATROSKA_INFO_CONTENTS, 1834 EBML_READING_MATROSKA_SEGMENT_CONTENTS, 1835 EBML_READ_ELEMENT, 1836 EBML_READING_MATROSKA_SEGMENT_CONTENTS); 1837 break; 1838 case MatroskaID_Tracks: 1839 state->next_state = EBML_READING_MATROSKA_TRACKS_CONTENTS; 1840 ebml_stack_push_new (state, pl_get_pos (plugin), eID, eSize, 1841 read_result, 1842 EBML_FINISHED_READING_MATROSKA_TRACKS_CONTENTS, 1843 EBML_READING_MATROSKA_SEGMENT_CONTENTS, 1844 EBML_READ_ELEMENT, 1845 EBML_READING_MATROSKA_SEGMENT_CONTENTS); 1846 break; 1847 case MatroskaID_Tags: 1848 state->next_state = EBML_READING_MATROSKA_TAGS_CONTENTS; 1849 ebml_stack_push_new (state, pl_get_pos (plugin), eID, eSize, 1850 read_result, 1851 EBML_FINISHED_READING_MATROSKA_TAGS_CONTENTS, 1852 EBML_READING_MATROSKA_SEGMENT_CONTENTS, 1853 EBML_READ_ELEMENT, 1854 EBML_READING_MATROSKA_SEGMENT_CONTENTS); 1855 break; 1856 default: 1857 if (maybe_rise_up (plugin, state, &do_break, read_result)) 1858 break; 1859 maybe_seek_to_something_interesting (plugin, state); 1860 state->next_state = EBML_READING_MATROSKA_SEGMENT_CONTENTS; 1861 pl_seek (plugin, eSize, SEEK_CUR); 1862 } 1863 break; 1864 case EBML_READING_MATROSKA_TAGS_CONTENTS: 1865 if (! check_result (plugin, read_result, state)) 1866 break; 1867 state->state = EBML_READ_ELEMENT; 1868 switch (eID) 1869 { 1870 case MatroskaID_Tags_Tag: 1871 state->next_state = EBML_READING_MATROSKA_TAG_CONTENTS; 1872 clean_ebml_state_matroska_seek (state); 1873 ebml_stack_push_new (state, pl_get_pos (plugin), eID, eSize, 1874 read_result, 1875 EBML_FINISHED_READING_MATROSKA_TAG_CONTENTS, 1876 EBML_READING_MATROSKA_TAGS_CONTENTS, 1877 EBML_READ_ELEMENT, 1878 EBML_READING_MATROSKA_TAGS_CONTENTS); 1879 break; 1880 default: 1881 if (maybe_rise_up (plugin, state, &do_break, read_result)) 1882 break; 1883 state->next_state = EBML_READING_MATROSKA_TAGS_CONTENTS; 1884 pl_seek (plugin, eSize, SEEK_CUR); 1885 } 1886 break; 1887 case EBML_READING_MATROSKA_TAG_CONTENTS: 1888 if (! check_result (plugin, read_result, state)) 1889 break; 1890 1891 state->state = EBML_READ_ELEMENT; 1892 switch (eID) 1893 { 1894 case MatroskaID_Tags_Tag_SimpleTag: 1895 state->next_state = EBML_READING_MATROSKA_SIMPLETAG_CONTENTS; 1896 clean_ebml_state_matroska_simpletags (state); 1897 matroska_add_tag (state, NULL, NULL, NULL); 1898 state->tag_current = state->tag_last; 1899 state->tag_tree = state->tag_current; 1900 ebml_stack_push_new (state, pl_get_pos (plugin), eID, eSize, 1901 read_result, 1902 EBML_FINISHED_READING_MATROSKA_SIMPLETAG_CONTENTS, 1903 EBML_READING_MATROSKA_TAG_CONTENTS, 1904 EBML_READ_ELEMENT, 1905 EBML_READING_MATROSKA_TAG_CONTENTS); 1906 break; 1907 default: 1908 if (maybe_rise_up (plugin, state, &do_break, read_result)) 1909 break; 1910 state->next_state = EBML_READING_MATROSKA_TAG_CONTENTS; 1911 pl_seek (plugin, eSize, SEEK_CUR); 1912 } 1913 break; 1914 case EBML_READING_MATROSKA_SIMPLETAG_CONTENTS: 1915 if (! check_result (plugin, read_result, state)) 1916 break; 1917 1918 do_break = 0; 1919 switch (eID) 1920 { 1921 case MatroskaID_Tags_Tag_SimpleTag_TagName: 1922 state->state = EBML_READ_STRING; 1923 break; /* mandatory, UTF8-encoded. The name of the Tag that is going to be stored. */ 1924 case MatroskaID_Tags_Tag_SimpleTag_TagString: 1925 state->state = EBML_READ_STRING; 1926 break; /* UTF-8-encoded. The value of the Tag. */ 1927 case MatroskaID_Tags_Tag_SimpleTag: 1928 if (maybe_rise_up (plugin, state, &do_break, read_result)) 1929 break; 1930 /* Oh joy, simpletags are recursive! */ 1931 state->state = EBML_READ_ELEMENT; 1932 state->next_state = EBML_READING_MATROSKA_SIMPLETAG_CONTENTS; 1933 matroska_add_tag (state, state->tag_current, NULL, NULL); 1934 state->tag_current = state->tag_last; 1935 ebml_stack_push_new (state, pl_get_pos (plugin), eID, eSize, 1936 read_result, 1937 EBML_FINISHED_READING_MATROSKA_SIMPLETAG_CONTENTS, 1938 EBML_READING_MATROSKA_SIMPLETAG_CONTENTS, 1939 EBML_READ_ELEMENT, 1940 EBML_FINISHED_READING_MATROSKA_SIMPLETAG_CONTENTS); 1941 do_break = 1; 1942 break; 1943 default: 1944 if (maybe_rise_up (plugin, state, &do_break, read_result)) 1945 break; 1946 state->state = EBML_READ_ELEMENT; 1947 state->next_state = EBML_READING_MATROSKA_SIMPLETAG_CONTENTS; 1948 pl_seek (plugin, eSize, SEEK_CUR); 1949 do_break = 1; 1950 break; 1951 } 1952 if (do_break) 1953 break; 1954 state->next_state = EBML_READING_MATROSKA_SIMPLETAG_CONTENTS_VALUE; 1955 ebml_stack_push_new (state, pl_get_pos (plugin), eID, eSize, read_result, 1956 EBML_BAD_STATE, 1957 EBML_READING_MATROSKA_SIMPLETAG_CONTENTS, 1958 EBML_READ_ELEMENT, 1959 EBML_READING_MATROSKA_SIMPLETAG_CONTENTS); 1960 break; 1961 case EBML_READING_MATROSKA_SIMPLETAG_CONTENTS_VALUE: 1962 if (! check_result (plugin, read_result, state)) 1963 break; 1964 1965 /* This breaks the specs, as there should be only one instance of each 1966 * element (at most). We ignore that and remember the latest value, 1967 * dropping previous ones. 1968 */ 1969 switch (state->stack_top->id) 1970 { 1971 case MatroskaID_Tags_Tag_SimpleTag_TagName: 1972 if (state->tag_current->name != NULL) 1973 free (state->tag_current->name); 1974 state->tag_current->name = strdup (string_value); 1975 break; 1976 case MatroskaID_Tags_Tag_SimpleTag_TagString: 1977 if (state->tag_current->string != NULL) 1978 free (state->tag_current->string); 1979 state->tag_current->string = strdup (string_value); 1980 break; 1981 } 1982 rise_up_after_value (plugin, state, 1983 EBML_READING_MATROSKA_SIMPLETAG_CONTENTS); 1984 break; 1985 case EBML_READING_MATROSKA_SEEK_HEAD_CONTENTS: 1986 if (! check_result (plugin, read_result, state)) 1987 break; 1988 1989 state->state = EBML_READ_ELEMENT; 1990 switch (eID) 1991 { 1992 case MatroskaID_Seek: 1993 state->next_state = EBML_READING_MATROSKA_SEEK_CONTENTS; 1994 clean_ebml_state_matroska_seek (state); 1995 ebml_stack_push_new (state, pl_get_pos (plugin), eID, eSize, 1996 read_result, 1997 EBML_FINISHED_READING_MATROSKA_SEEK_CONTENTS, 1998 EBML_READING_MATROSKA_SEEK_HEAD_CONTENTS, 1999 EBML_READ_ELEMENT, 2000 EBML_READING_MATROSKA_SEEK_HEAD_CONTENTS); 2001 break; 2002 default: 2003 if (maybe_rise_up (plugin, state, &do_break, read_result)) 2004 break; 2005 state->next_state = EBML_READING_MATROSKA_SEEK_HEAD_CONTENTS; 2006 pl_seek (plugin, eSize, SEEK_CUR); 2007 } 2008 break; 2009 case EBML_READING_MATROSKA_SEEK_CONTENTS: 2010 if (! check_result (plugin, read_result, state)) 2011 break; 2012 2013 do_break = 0; 2014 switch (eID) 2015 { 2016 case MatroskaID_SeekID: 2017 state->state = EBML_READ_ID; 2018 break; 2019 case MatroskaID_SeekPosition: 2020 state->state = EBML_READ_UINT; 2021 break; 2022 default: 2023 if (maybe_rise_up (plugin, state, &do_break, read_result)) 2024 break; 2025 state->state = EBML_READ_ELEMENT; 2026 state->next_state = EBML_READING_MATROSKA_SEEK_CONTENTS; 2027 pl_seek (plugin, eSize, SEEK_CUR); 2028 do_break = 1; 2029 break; 2030 } 2031 if (do_break) 2032 break; 2033 state->next_state = EBML_READING_MATROSKA_SEEK_CONTENTS_VALUE; 2034 ebml_stack_push_new (state, pl_get_pos (plugin), eID, eSize, read_result, 2035 EBML_BAD_STATE, EBML_READING_MATROSKA_SEEK_CONTENTS, 2036 EBML_READ_ELEMENT, 2037 EBML_READING_MATROSKA_SEEK_CONTENTS); 2038 break; 2039 case EBML_READING_MATROSKA_SEEK_CONTENTS_VALUE: 2040 if (! check_result (plugin, read_result, state)) 2041 break; 2042 2043 /* This breaks the specs, as there should be only one instance of each 2044 * element (at most). We ignore that and remember the latest value, 2045 * dropping previous ones. 2046 */ 2047 switch (state->stack_top->id) 2048 { 2049 case MatroskaID_SeekID: 2050 state->matroska_seek_id = id_value; 2051 break; 2052 case MatroskaID_SeekPosition: 2053 state->matroska_seek_position = uint_value; 2054 break; 2055 } 2056 rise_up_after_value (plugin, state, EBML_READING_MATROSKA_SEEK_CONTENTS); 2057 break; 2058 case EBML_READING_MATROSKA_TRACKS_CONTENTS: 2059 if (! check_result (plugin, read_result, state)) 2060 break; 2061 2062 state->state = EBML_READ_ELEMENT; 2063 switch (eID) 2064 { 2065 case MatroskaID_Tracks_TrackEntry: 2066 state->next_state = EBML_READING_MATROSKA_TRACK_ENTRY_CONTENTS; 2067 clean_ebml_state_matroska_track (state); 2068 ebml_stack_push_new (state, pl_get_pos (plugin), eID, eSize, 2069 read_result, 2070 EBML_FINISHED_READING_MATROSKA_TRACK_ENTRY_CONTENTS, 2071 EBML_READING_MATROSKA_TRACKS_CONTENTS, 2072 EBML_READ_ELEMENT, 2073 EBML_READING_MATROSKA_TRACKS_CONTENTS); 2074 break; 2075 default: 2076 if (maybe_rise_up (plugin, state, &do_break, read_result)) 2077 break; 2078 state->next_state = EBML_READING_MATROSKA_TRACKS_CONTENTS; 2079 pl_seek (plugin, eSize, SEEK_CUR); 2080 } 2081 break; 2082 case EBML_READING_MATROSKA_TRACK_ENTRY_CONTENTS: 2083 if (! check_result (plugin, read_result, state)) 2084 break; 2085 2086 do_break = 0; 2087 switch (eID) 2088 { 2089 case MatroskaID_Tracks_TrackType: 2090 state->state = EBML_READ_UINT; 2091 break; 2092 case MatroskaID_Tracks_Name: 2093 case MatroskaID_Tracks_Language: 2094 case MatroskaID_Tracks_CodecID: 2095 case MatroskaID_Tracks_CodecName: 2096 state->state = EBML_READ_STRING; 2097 break; 2098 case MatroskaID_Tracks_Video: 2099 state->state = EBML_READ_ELEMENT; 2100 state->next_state = EBML_READING_MATROSKA_TRACK_ENTRY_VIDEO_CONTENTS; 2101 clean_ebml_state_matroska_track_video (state); 2102 ebml_stack_push_new (state, pl_get_pos (plugin), eID, eSize, 2103 read_result, 2104 EBML_FINISHED_READING_MATROSKA_TRACK_ENTRY_VIDEO_CONTENTS, 2105 EBML_READING_MATROSKA_TRACK_ENTRY_CONTENTS, 2106 EBML_READ_ELEMENT, 2107 EBML_READING_MATROSKA_TRACK_ENTRY_CONTENTS); 2108 do_break = 1; 2109 break; 2110 case MatroskaID_Tracks_Audio: 2111 state->state = EBML_READ_ELEMENT; 2112 state->next_state = EBML_READING_MATROSKA_TRACK_ENTRY_AUDIO_CONTENTS; 2113 clean_ebml_state_matroska_track_audio (state); 2114 ebml_stack_push_new (state, pl_get_pos (plugin), eID, eSize, 2115 read_result, 2116 EBML_FINISHED_READING_MATROSKA_TRACK_ENTRY_AUDIO_CONTENTS, 2117 EBML_READING_MATROSKA_TRACK_ENTRY_CONTENTS, 2118 EBML_READ_ELEMENT, 2119 EBML_READING_MATROSKA_TRACK_ENTRY_CONTENTS); 2120 do_break = 1; 2121 break; 2122 default: 2123 if (maybe_rise_up (plugin, state, &do_break, read_result)) 2124 break; 2125 state->state = EBML_READ_ELEMENT; 2126 state->next_state = EBML_READING_MATROSKA_TRACK_ENTRY_CONTENTS; 2127 pl_seek (plugin, eSize, SEEK_CUR); 2128 do_break = 1; 2129 break; 2130 } 2131 if (do_break) 2132 break; 2133 state->next_state = EBML_READING_MATROSKA_TRACK_ENTRY_CONTENTS_VALUE; 2134 ebml_stack_push_new (state, pl_get_pos (plugin), eID, eSize, read_result, 2135 EBML_BAD_STATE, 2136 EBML_READING_MATROSKA_TRACK_ENTRY_CONTENTS, 2137 EBML_READ_ELEMENT, 2138 EBML_READING_MATROSKA_TRACK_ENTRY_CONTENTS); 2139 break; 2140 case EBML_READING_MATROSKA_TRACK_ENTRY_AUDIO_CONTENTS: 2141 if (! check_result (plugin, read_result, state)) 2142 break; 2143 2144 do_break = 0; 2145 switch (eID) 2146 { 2147 case MatroskaID_Tracks_Audio_SamplingFrequency: 2148 case MatroskaID_Tracks_Audio_OutputSamplingFrequency: 2149 state->state = EBML_READ_FLOAT; 2150 break; 2151 case MatroskaID_Tracks_Audio_Channels: 2152 case MatroskaID_Tracks_Audio_BitDepth: 2153 state->state = EBML_READ_UINT; 2154 break; 2155 default: 2156 if (maybe_rise_up (plugin, state, &do_break, read_result)) 2157 break; 2158 state->state = EBML_READ_ELEMENT; 2159 state->next_state = EBML_READING_MATROSKA_TRACK_ENTRY_AUDIO_CONTENTS; 2160 pl_seek (plugin, eSize, SEEK_CUR); 2161 do_break = 1; 2162 break; 2163 } 2164 if (do_break) 2165 break; 2166 state->next_state = 2167 EBML_READING_MATROSKA_TRACK_ENTRY_AUDIO_CONTENTS_VALUE; 2168 ebml_stack_push_new (state, pl_get_pos (plugin), eID, eSize, read_result, 2169 EBML_BAD_STATE, 2170 EBML_READING_MATROSKA_TRACK_ENTRY_AUDIO_CONTENTS, 2171 EBML_READ_ELEMENT, 2172 EBML_READING_MATROSKA_TRACK_ENTRY_AUDIO_CONTENTS); 2173 break; 2174 case EBML_READING_MATROSKA_TRACK_ENTRY_AUDIO_CONTENTS_VALUE: 2175 if (! check_result (plugin, read_result, state)) 2176 break; 2177 2178 /* This breaks the specs, as there should be only one instance of each 2179 * element (at most). We ignore that and remember the latest value, 2180 * dropping previous ones. 2181 */ 2182 switch (state->stack_top->id) 2183 { 2184 case MatroskaID_Tracks_Audio_SamplingFrequency: 2185 state->matroska_track_audio_sampling_frequency = float_value; 2186 break; 2187 case MatroskaID_Tracks_Audio_OutputSamplingFrequency: 2188 state->matroska_track_audio_output_sampling_frequency = float_value; 2189 break; 2190 case MatroskaID_Tracks_Audio_Channels: 2191 state->matroska_track_audio_channels = uint_value; 2192 break; 2193 case MatroskaID_Tracks_Audio_BitDepth: 2194 state->matroska_track_audio_bit_depth = uint_value; 2195 break; 2196 } 2197 rise_up_after_value (plugin, state, 2198 EBML_READING_MATROSKA_TRACK_ENTRY_AUDIO_CONTENTS); 2199 break; 2200 case EBML_READING_MATROSKA_TRACK_ENTRY_VIDEO_CONTENTS: 2201 if (! check_result (plugin, read_result, state)) 2202 break; 2203 2204 do_break = 0; 2205 switch (eID) 2206 { 2207 case MatroskaID_Tracks_Video_FlagInterlaced: 2208 case MatroskaID_Tracks_Video_StereoMode: 2209 case MatroskaID_Tracks_Video_PixelWidth: 2210 case MatroskaID_Tracks_Video_PixelHeight: 2211 case MatroskaID_Tracks_Video_DisplayWidth: 2212 case MatroskaID_Tracks_Video_DisplayHeight: 2213 case MatroskaID_Tracks_Video_DisplayUnit: 2214 state->state = EBML_READ_UINT; 2215 break; 2216 default: 2217 if (maybe_rise_up (plugin, state, &do_break, read_result)) 2218 break; 2219 state->state = EBML_READ_ELEMENT; 2220 state->next_state = EBML_READING_MATROSKA_TRACK_ENTRY_VIDEO_CONTENTS; 2221 pl_seek (plugin, eSize, SEEK_CUR); 2222 do_break = 1; 2223 break; 2224 } 2225 if (do_break) 2226 break; 2227 state->next_state = 2228 EBML_READING_MATROSKA_TRACK_ENTRY_VIDEO_CONTENTS_VALUE; 2229 ebml_stack_push_new (state, pl_get_pos (plugin), eID, eSize, read_result, 2230 EBML_BAD_STATE, 2231 EBML_READING_MATROSKA_TRACK_ENTRY_VIDEO_CONTENTS, 2232 EBML_READ_ELEMENT, 2233 EBML_READING_MATROSKA_TRACK_ENTRY_VIDEO_CONTENTS); 2234 break; 2235 case EBML_READING_MATROSKA_TRACK_ENTRY_VIDEO_CONTENTS_VALUE: 2236 if (! check_result (plugin, read_result, state)) 2237 break; 2238 2239 /* This breaks the specs, as there should be only one instance of each 2240 * element (at most). We ignore that and remember the latest value, 2241 * dropping previous ones. 2242 */ 2243 switch (state->stack_top->id) 2244 { 2245 case MatroskaID_Tracks_Video_FlagInterlaced: 2246 state->matroska_track_video_flag_interlaced = uint_value; 2247 break; 2248 case MatroskaID_Tracks_Video_StereoMode: 2249 state->matroska_track_video_stereo_mode = uint_value; 2250 break; 2251 case MatroskaID_Tracks_Video_PixelWidth: 2252 state->matroska_track_video_pixel_width = uint_value; 2253 break; 2254 case MatroskaID_Tracks_Video_PixelHeight: 2255 state->matroska_track_video_pixel_height = uint_value; 2256 break; 2257 case MatroskaID_Tracks_Video_DisplayWidth: 2258 state->matroska_track_video_display_width = uint_value; 2259 break; 2260 case MatroskaID_Tracks_Video_DisplayHeight: 2261 state->matroska_track_video_display_height = uint_value; 2262 break; 2263 case MatroskaID_Tracks_Video_DisplayUnit: 2264 state->matroska_track_video_display_unit = uint_value; 2265 break; 2266 } 2267 rise_up_after_value (plugin, state, 2268 EBML_READING_MATROSKA_TRACK_ENTRY_VIDEO_CONTENTS); 2269 break; 2270 case EBML_READING_MATROSKA_TRACK_ENTRY_CONTENTS_VALUE: 2271 if (! check_result (plugin, read_result, state)) 2272 break; 2273 2274 /* This breaks the specs, as there should be only one instance of each 2275 * element (at most). We ignore that and remember the latest value, 2276 * dropping previous ones. 2277 */ 2278 switch (state->stack_top->id) 2279 { 2280 case MatroskaID_Tracks_TrackType: 2281 state->matroska_track_type = uint_value; 2282 break; 2283 case MatroskaID_Tracks_Name: 2284 if (state->matroska_track_name != NULL) 2285 free (state->matroska_track_name); 2286 state->matroska_track_name = strdup (string_value); 2287 break; /* UTF-8-encoded. A human-readable track name. */ 2288 case MatroskaID_Tracks_Language: 2289 if (state->matroska_track_language != NULL) 2290 free (state->matroska_track_language); 2291 state->matroska_track_language = strdup (string_value); 2292 break; /* defaults to 'eng', string. Specifies the language of the track in the Matroska languages form. */ 2293 case MatroskaID_Tracks_CodecID: 2294 if (state->matroska_track_codec_id != NULL) 2295 free (state->matroska_track_codec_id); 2296 state->matroska_track_codec_id = strdup (string_value); 2297 break; /* mandatory, string. An ID corresponding to the codec, see the codec page ( http://matroska.org/technical/specs/codecid/index.html ) for more info. */ 2298 case MatroskaID_Tracks_CodecName: 2299 if (state->matroska_track_codec_name != NULL) 2300 free (state->matroska_track_codec_name); 2301 state->matroska_track_codec_name = strdup (string_value); 2302 break; /* UTF-8-encoded. A human-readable string specifying the codec. */ 2303 } 2304 rise_up_after_value (plugin, state, 2305 EBML_READING_MATROSKA_TRACK_ENTRY_CONTENTS); 2306 break; 2307 case EBML_READING_MATROSKA_INFO_CONTENTS: 2308 if (! check_result (plugin, read_result, state)) 2309 break; 2310 2311 do_break = 0; 2312 switch (eID) 2313 { 2314 case MatroskaID_Info_Title: 2315 case MatroskaID_Info_MuxingApp: 2316 case MatroskaID_Info_WritingApp: 2317 state->state = EBML_READ_STRING; 2318 break; 2319 case MatroskaID_Info_TimecodeScale: 2320 state->state = EBML_READ_UINT; 2321 break; 2322 case MatroskaID_Info_Duration: 2323 state->state = EBML_READ_FLOAT; 2324 break; 2325 case MatroskaID_Info_DateUTC: 2326 state->state = EBML_READ_SINT; 2327 break; 2328 default: 2329 if (maybe_rise_up (plugin, state, &do_break, read_result)) 2330 break; 2331 /* Unknown element in MatroskaInfo - skip over it */ 2332 state->state = EBML_READ_ELEMENT; 2333 state->next_state = EBML_READING_MATROSKA_INFO_CONTENTS; 2334 pl_seek (plugin, eSize, SEEK_CUR); 2335 do_break = 1; 2336 } 2337 if (do_break) 2338 break; 2339 state->next_state = EBML_READING_MATROSKA_INFO_CONTENTS_VALUE; 2340 ebml_stack_push_new (state, pl_get_pos (plugin), eID, eSize, read_result, 2341 EBML_BAD_STATE, EBML_READING_MATROSKA_INFO_CONTENTS, 2342 EBML_READ_ELEMENT, 2343 EBML_READING_MATROSKA_INFO_CONTENTS); 2344 break; 2345 case EBML_READING_MATROSKA_INFO_CONTENTS_VALUE: 2346 if (! check_result (plugin, read_result, state)) 2347 break; 2348 2349 /* This breaks the specs, as there should be only one instance of each 2350 * element (at most). We ignore that and remember the latest value, 2351 * dropping previous ones. 2352 */ 2353 switch (state->stack_top->id) 2354 { 2355 case MatroskaID_Info_Title: 2356 if (state->matroska_info_title != NULL) 2357 free (state->matroska_info_title); 2358 state->matroska_info_title = strdup (string_value); 2359 break; 2360 case MatroskaID_Info_MuxingApp: 2361 if (state->matroska_info_muxing_app != NULL) 2362 free (state->matroska_info_muxing_app); 2363 state->matroska_info_muxing_app = strdup (string_value); 2364 break; 2365 case MatroskaID_Info_WritingApp: 2366 if (state->matroska_info_writing_app != NULL) 2367 free (state->matroska_info_writing_app); 2368 state->matroska_info_writing_app = strdup (string_value); 2369 break; 2370 case MatroskaID_Info_TimecodeScale: 2371 state->matroska_info_timecode_scale = uint_value; 2372 break; 2373 case MatroskaID_Info_Duration: 2374 state->matroska_info_duration = float_value; 2375 break; 2376 case MatroskaID_Info_DateUTC: 2377 state->matroska_info_date_utc_is_set = 1; 2378 state->matroska_info_date_utc = sint_value; 2379 break; 2380 } 2381 rise_up_after_value (plugin, state, EBML_READING_MATROSKA_INFO_CONTENTS); 2382 break; 2383 case EBML_FINISHED_READING_MATROSKA_INFO_CONTENTS: 2384 if ((state->stack_top != NULL) && (pl_get_pos (plugin) >= 2385 state->stack_top->position 2386 + state->stack_top->header_size 2387 + state->stack_top->size) ) 2388 report_state (state, proc, proc_cls); 2389 maybe_seek_to_something_interesting (plugin, state); 2390 state->state = EBML_READ_ELEMENT; 2391 state->next_state = state->stack_top->prev_state; 2392 free (ebml_stack_pop (state)); 2393 break; 2394 case EBML_FINISHED_READING_MATROSKA_TRACK_ENTRY_CONTENTS: 2395 if ((state->stack_top != NULL) && (pl_get_pos (plugin) >= 2396 state->stack_top->position 2397 + state->stack_top->header_size 2398 + state->stack_top->size) ) 2399 report_state (state, proc, proc_cls); 2400 state->state = EBML_READ_ELEMENT; 2401 state->next_state = state->stack_top->prev_state; 2402 free (ebml_stack_pop (state)); 2403 break; 2404 case EBML_FINISHED_READING_MATROSKA_SEEK_CONTENTS: 2405 if ((state->matroska_seek_id != 0) && 2406 ((state->matroska_seek_position > 0) || (state->matroska_seeks_tail == 2407 NULL) )) 2408 { 2409 struct matroska_seek_list *el; 2410 el = malloc (sizeof (struct matroska_seek_list)); 2411 el->next = NULL; 2412 el->id = state->matroska_seek_id; 2413 el->position = state->matroska_seek_position; 2414 if (state->matroska_seeks_tail != NULL) 2415 { 2416 state->matroska_seeks_tail->next = el; 2417 state->matroska_seeks_tail = el; 2418 } 2419 else 2420 state->matroska_seeks_tail = state->matroska_seeks = el; 2421 } 2422 state->state = EBML_READ_ELEMENT; 2423 state->next_state = state->stack_top->prev_state; 2424 free (ebml_stack_pop (state)); 2425 break; 2426 case EBML_FINISHED_READING_MATROSKA_TRACKS_CONTENTS: 2427 case EBML_FINISHED_READING_MATROSKA_TAGS_CONTENTS: 2428 maybe_seek_to_something_interesting (plugin, state); 2429 case EBML_FINISHED_READING_MATROSKA_SEGMENT_CONTENTS: 2430 case EBML_FINISHED_READING_MATROSKA_TRACK_ENTRY_VIDEO_CONTENTS: 2431 case EBML_FINISHED_READING_MATROSKA_TRACK_ENTRY_AUDIO_CONTENTS: 2432 state->state = EBML_READ_ELEMENT; 2433 state->next_state = state->stack_top->prev_state; 2434 free (ebml_stack_pop (state)); 2435 break; 2436 case EBML_FINISHED_READING_MATROSKA_SIMPLETAG_CONTENTS: 2437 state->tag_current = state->tag_current->parent; 2438 if (state->tag_current == NULL) 2439 report_simpletag (state, proc, proc_cls); 2440 state->state = EBML_READ_ELEMENT; 2441 state->next_state = state->stack_top->prev_state; 2442 free (ebml_stack_pop (state)); 2443 break; 2444 case EBML_FINISHED_READING_MATROSKA_TAG_CONTENTS: 2445 state->state = EBML_READ_ELEMENT; 2446 state->next_state = state->stack_top->prev_state; 2447 free (ebml_stack_pop (state)); 2448 break; 2449 case EBML_FINISHED_READING_MATROSKA_SEEK_HEAD_CONTENTS: 2450 sort_seeks (state); 2451 try_to_find_pos (plugin, state); 2452 state->state = EBML_READ_ELEMENT; 2453 state->next_state = state->stack_top->prev_state; 2454 free (ebml_stack_pop (state)); 2455 break; 2456 } 2457 } 2458 return EXTRACTOR_ebml_discard_state_method (state); 2459 }