aboutsummaryrefslogtreecommitdiff
path: root/src/plugins/id3v2_extractor.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/plugins/id3v2_extractor.c')
-rw-r--r--src/plugins/id3v2_extractor.c957
1 files changed, 684 insertions, 273 deletions
diff --git a/src/plugins/id3v2_extractor.c b/src/plugins/id3v2_extractor.c
index 4f50d05..0302dc6 100644
--- a/src/plugins/id3v2_extractor.c
+++ b/src/plugins/id3v2_extractor.c
@@ -26,6 +26,8 @@
26#endif 26#endif
27#include "convert.h" 27#include "convert.h"
28 28
29#include "extractor_plugins.h"
30
29#define DEBUG_EXTRACT_ID3v2 0 31#define DEBUG_EXTRACT_ID3v2 0
30 32
31enum Id3v2Fmt 33enum Id3v2Fmt
@@ -47,314 +49,723 @@ typedef struct
47 49
48static Matches tmap[] = { 50static Matches tmap[] = {
49 /* skipping UFI */ 51 /* skipping UFI */
50 {"TT1", EXTRACTOR_METATYPE_SECTION, T}, 52 {"TT1 ", EXTRACTOR_METATYPE_SECTION, T},
51 {"TT2", EXTRACTOR_METATYPE_TITLE, T}, 53 {"TT2 ", EXTRACTOR_METATYPE_TITLE, T},
52 {"TT3", EXTRACTOR_METATYPE_SONG_VERSION, T}, 54 {"TT3 ", EXTRACTOR_METATYPE_SONG_VERSION, T},
53 {"TP1", EXTRACTOR_METATYPE_ARTIST, T}, 55 {"TP1 ", EXTRACTOR_METATYPE_ARTIST, T},
54 {"TP2", EXTRACTOR_METATYPE_PERFORMER, T}, 56 {"TP2 ", EXTRACTOR_METATYPE_PERFORMER, T},
55 {"TP3", EXTRACTOR_METATYPE_CONDUCTOR, T}, 57 {"TP3 ", EXTRACTOR_METATYPE_CONDUCTOR, T},
56 {"TP4", EXTRACTOR_METATYPE_INTERPRETATION, T}, 58 {"TP4 ", EXTRACTOR_METATYPE_INTERPRETATION, T},
57 {"TCM", EXTRACTOR_METATYPE_COMPOSER, T}, 59 {"TCM ", EXTRACTOR_METATYPE_COMPOSER, T},
58 {"TXT", EXTRACTOR_METATYPE_WRITER, T}, 60 {"TXT ", EXTRACTOR_METATYPE_WRITER, T},
59 {"TLA", EXTRACTOR_METATYPE_LANGUAGE, T}, 61 {"TLA ", EXTRACTOR_METATYPE_LANGUAGE, T},
60 {"TCO", EXTRACTOR_METATYPE_GENRE, T}, 62 {"TCO ", EXTRACTOR_METATYPE_GENRE, T},
61 {"TAL", EXTRACTOR_METATYPE_ALBUM, T}, 63 {"TAL ", EXTRACTOR_METATYPE_ALBUM, T},
62 {"TPA", EXTRACTOR_METATYPE_DISC_NUMBER, T}, 64 {"TPA ", EXTRACTOR_METATYPE_DISC_NUMBER, T},
63 {"TRK", EXTRACTOR_METATYPE_TRACK_NUMBER, T}, 65 {"TRK ", EXTRACTOR_METATYPE_TRACK_NUMBER, T},
64 {"TRC", EXTRACTOR_METATYPE_ISRC, T}, 66 {"TRC ", EXTRACTOR_METATYPE_ISRC, T},
65 {"TYE", EXTRACTOR_METATYPE_PUBLICATION_YEAR, T}, 67 {"TYE ", EXTRACTOR_METATYPE_PUBLICATION_YEAR, T},
66 /* 68 /*
67 FIXME: these two and TYE should be combined into 69 FIXME: these two and TYE should be combined into
68 the actual publication date (if TRD is missing) 70 the actual publication date (if TRD is missing)
69 {"TDA", EXTRACTOR_METATYPE_PUBLICATION_DATE}, 71 {"TDA ", EXTRACTOR_METATYPE_PUBLICATION_DATE},
70 {"TIM", EXTRACTOR_METATYPE_PUBLICATION_DATE}, 72 {"TIM ", EXTRACTOR_METATYPE_PUBLICATION_DATE},
71 */ 73 */
72 {"TRD", EXTRACTOR_METATYPE_CREATION_TIME, T}, 74 {"TRD ", EXTRACTOR_METATYPE_CREATION_TIME, T},
73 {"TMT", EXTRACTOR_METATYPE_SOURCE, T}, 75 {"TMT ", EXTRACTOR_METATYPE_SOURCE, T},
74 {"TFT", EXTRACTOR_METATYPE_FORMAT_VERSION, T}, 76 {"TFT ", EXTRACTOR_METATYPE_FORMAT_VERSION, T},
75 {"TBP", EXTRACTOR_METATYPE_BEATS_PER_MINUTE, T}, 77 {"TBP ", EXTRACTOR_METATYPE_BEATS_PER_MINUTE, T},
76 {"TCR", EXTRACTOR_METATYPE_COPYRIGHT, T}, 78 {"TCR ", EXTRACTOR_METATYPE_COPYRIGHT, T},
77 {"TPB", EXTRACTOR_METATYPE_PUBLISHER, T}, 79 {"TPB ", EXTRACTOR_METATYPE_PUBLISHER, T},
78 {"TEN", EXTRACTOR_METATYPE_ENCODED_BY, T}, 80 {"TEN ", EXTRACTOR_METATYPE_ENCODED_BY, T},
79 {"TSS", EXTRACTOR_METATYPE_PRODUCED_BY_SOFTWARE, T}, 81 {"TSS ", EXTRACTOR_METATYPE_PRODUCED_BY_SOFTWARE, T},
80 {"TOF", EXTRACTOR_METATYPE_FILENAME, T}, 82 {"TOF ", EXTRACTOR_METATYPE_FILENAME, T},
81 {"TLE", EXTRACTOR_METATYPE_DURATION, T}, /* FIXME: should append 'ms' as unit */ 83 {"TLE ", EXTRACTOR_METATYPE_DURATION, T}, /* FIXME: should append 'ms' as unit */
82 {"TSI", EXTRACTOR_METATYPE_EMBEDDED_FILE_SIZE, T}, 84 {"TSI ", EXTRACTOR_METATYPE_EMBEDDED_FILE_SIZE, T},
83 /* skipping TDY, TKE */ 85 /* skipping TDY, TKE */
84 {"TOT", EXTRACTOR_METATYPE_ORIGINAL_TITLE, T}, 86 {"TOT ", EXTRACTOR_METATYPE_ORIGINAL_TITLE, T},
85 {"TOA", EXTRACTOR_METATYPE_ORIGINAL_ARTIST, T}, 87 {"TOA ", EXTRACTOR_METATYPE_ORIGINAL_ARTIST, T},
86 {"TOL", EXTRACTOR_METATYPE_ORIGINAL_WRITER, T}, 88 {"TOL ", EXTRACTOR_METATYPE_ORIGINAL_WRITER, T},
87 {"TOR", EXTRACTOR_METATYPE_ORIGINAL_RELEASE_YEAR, T}, 89 {"TOR ", EXTRACTOR_METATYPE_ORIGINAL_RELEASE_YEAR, T},
88 /* skipping TXX */ 90 /* skipping TXX */
89 91
90 {"WAF", EXTRACTOR_METATYPE_URL, U}, 92 {"WAF ", EXTRACTOR_METATYPE_URL, U},
91 {"WAR", EXTRACTOR_METATYPE_URL, U}, 93 {"WAR ", EXTRACTOR_METATYPE_URL, U},
92 {"WAS", EXTRACTOR_METATYPE_URL, U}, 94 {"WAS ", EXTRACTOR_METATYPE_URL, U},
93 {"WCM", EXTRACTOR_METATYPE_URL, U}, 95 {"WCM ", EXTRACTOR_METATYPE_URL, U},
94 {"WCP", EXTRACTOR_METATYPE_RIGHTS, U}, 96 {"WCP ", EXTRACTOR_METATYPE_RIGHTS, U},
95 {"WCB", EXTRACTOR_METATYPE_URL, U}, 97 {"WCB ", EXTRACTOR_METATYPE_URL, U},
96 /* skipping WXX */ 98 /* skipping WXX */
97 {"IPL", EXTRACTOR_METATYPE_CONTRIBUTOR_NAME, T}, 99 {"IPL ", EXTRACTOR_METATYPE_CONTRIBUTOR_NAME, T},
98 /* skipping MCI */ 100 /* skipping MCI */
99 /* skipping ETC */ 101 /* skipping ETC */
100 /* skipping MLL */ 102 /* skipping MLL */
101 /* skipping STC */ 103 /* skipping STC */
102 {"ULT", EXTRACTOR_METATYPE_LYRICS, UL}, 104 {"ULT ", EXTRACTOR_METATYPE_LYRICS, UL},
103 {"SLT", EXTRACTOR_METATYPE_LYRICS, SL}, 105 {"SLT ", EXTRACTOR_METATYPE_LYRICS, SL},
104 {"COM", EXTRACTOR_METATYPE_COMMENT, L}, 106 {"COM ", EXTRACTOR_METATYPE_COMMENT, L},
105 /* skipping RVA */ 107 /* skipping RVA */
106 /* skipping EQU */ 108 /* skipping EQU */
107 /* skipping REV */ 109 /* skipping REV */
108 {"PIC", EXTRACTOR_METATYPE_PICTURE, I}, 110 {"PIC ", EXTRACTOR_METATYPE_PICTURE, I},
109 /* skipping GEN */ 111 /* skipping GEN */
110 /* {"CNT", EXTRACTOR_METATYPE_PLAY_COUNTER, XXX}, */ 112 /* {"CNT ", EXTRACTOR_METATYPE_PLAY_COUNTER, XXX}, */
111 /* {"POP", EXTRACTOR_METATYPE_POPULARITY_METER, XXX}, */ 113 /* {"POP ", EXTRACTOR_METATYPE_POPULARITY_METER, XXX}, */
112 /* skipping BUF */ 114 /* skipping BUF */
113 /* skipping CRM */ 115 /* skipping CRM */
114 /* skipping CRA */ 116 /* skipping CRA */
115 /* {"LNK", EXTRACTOR_METATYPE_URL, XXX}, */ 117 /* {"LNK ", EXTRACTOR_METATYPE_URL, XXX}, */
118
119
120 {"TALB", EXTRACTOR_METATYPE_ALBUM, T},
121 {"TBPM", EXTRACTOR_METATYPE_BEATS_PER_MINUTE, T},
122 {"TCOM", EXTRACTOR_METATYPE_COMPOSER, T},
123 {"TCON", EXTRACTOR_METATYPE_SONG_VERSION, T},
124 {"TCOP", EXTRACTOR_METATYPE_COPYRIGHT, T},
125 {"TDAT", EXTRACTOR_METATYPE_CREATION_DATE, T}, /* idv23 only */
126 /* TDLY */
127 {"TENC", EXTRACTOR_METATYPE_ENCODED_BY, T},
128 {"TEXT", EXTRACTOR_METATYPE_WRITER, T},
129 {"TFLT", EXTRACTOR_METATYPE_FORMAT_VERSION, T},
130 /* TIME, idv23 only */
131 {"TIT1", EXTRACTOR_METATYPE_SECTION, T},
132 {"TIT2", EXTRACTOR_METATYPE_TITLE, T},
133 {"TIT3", EXTRACTOR_METATYPE_SONG_VERSION, T},
134 /* TKEY */
135 {"TLAN", EXTRACTOR_METATYPE_LANGUAGE, T},
136 {"TLEN", EXTRACTOR_METATYPE_DURATION, T}, /* FIXME: should append 'ms' as unit */
137 {"TMED", EXTRACTOR_METATYPE_SOURCE, T},
138 {"TOAL", EXTRACTOR_METATYPE_ORIGINAL_TITLE, T},
139 {"TOFN", EXTRACTOR_METATYPE_ORIGINAL_ARTIST, T},
140 {"TOLY", EXTRACTOR_METATYPE_ORIGINAL_WRITER, T},
141 {"TOPE", EXTRACTOR_METATYPE_ORIGINAL_PERFORMER, T},
142 {"TORY", EXTRACTOR_METATYPE_ORIGINAL_RELEASE_YEAR, T}, /* idv23 only */
143 {"TOWN", EXTRACTOR_METATYPE_LICENSEE, T},
144 {"TPE1", EXTRACTOR_METATYPE_ARTIST, T},
145 {"TPE2", EXTRACTOR_METATYPE_PERFORMER, T},
146 {"TPE3", EXTRACTOR_METATYPE_CONDUCTOR, T},
147 {"TPE4", EXTRACTOR_METATYPE_INTERPRETATION, T},
148 {"TPOS", EXTRACTOR_METATYPE_DISC_NUMBER, T},
149 {"TPUB", EXTRACTOR_METATYPE_PUBLISHER, T},
150 {"TRCK", EXTRACTOR_METATYPE_TRACK_NUMBER, T},
151 /* TRDA, idv23 only */
152 {"TRSN", EXTRACTOR_METATYPE_NETWORK_NAME, T},
153 /* TRSO */
154 {"TSIZ", EXTRACTOR_METATYPE_EMBEDDED_FILE_SIZE, T}, /* idv23 only */
155 {"TSRC", EXTRACTOR_METATYPE_ISRC, T},
156 /* TSSE */
157 {"TYER", EXTRACTOR_METATYPE_PUBLICATION_YEAR, T}, /* idv23 only */
158 {"WCOM", EXTRACTOR_METATYPE_URL, U},
159 {"WCOP", EXTRACTOR_METATYPE_URL, U},
160 {"WOAF", EXTRACTOR_METATYPE_URL, U},
161 {"WOAS", EXTRACTOR_METATYPE_URL, U},
162 {"WORS", EXTRACTOR_METATYPE_URL, U},
163 {"WPAY", EXTRACTOR_METATYPE_URL, U},
164 {"WPUB", EXTRACTOR_METATYPE_URL, U},
165 {"WXXX", EXTRACTOR_METATYPE_URL, T},
166 {"IPLS", EXTRACTOR_METATYPE_CONTRIBUTOR_NAME, T}, /* idv23 only */
167 /* ... */
168 {"USLT", EXTRACTOR_METATYPE_LYRICS, UL },
169 {"SYLT", EXTRACTOR_METATYPE_LYRICS, SL },
170 {"COMM", EXTRACTOR_METATYPE_COMMENT, L},
171 /* ... */
172 {"APIC", EXTRACTOR_METATYPE_PICTURE, I},
173 /* ... */
174 {"LINK", EXTRACTOR_METATYPE_URL, U},
175 /* ... */
176 {"USER", EXTRACTOR_METATYPE_LICENSE, T},
177 /* ... */
178
179 /* new frames in id3v24 */
180 /* ASPI, EQU2, RVA2, SEEK, SIGN, TDEN */
181 {"TDOR", EXTRACTOR_METATYPE_PUBLICATION_DATE, T},
182 /* TDRC, TDRL, TDTG */
183 {"TIPL", EXTRACTOR_METATYPE_CONTRIBUTOR_NAME, T},
184 {"TMCL", EXTRACTOR_METATYPE_MUSICIAN_CREDITS_LIST, T},
185 {"TMOO", EXTRACTOR_METATYPE_MOOD, T},
186 {"TPRO", EXTRACTOR_METATYPE_COPYRIGHT, T},
187 {"TSOA", EXTRACTOR_METATYPE_ALBUM, T},
188 {"TSOP", EXTRACTOR_METATYPE_PERFORMER, T},
189 {"TSOT", EXTRACTOR_METATYPE_TITLE, T},
190 {"TSST", EXTRACTOR_METATYPE_SUBTITLE, T},
191
116 {NULL, 0, T}, 192 {NULL, 0, T},
117}; 193};
118 194
119 195struct id3v2_state
120/* mimetype = audio/mpeg */
121int
122EXTRACTOR_id3v2_extract (const unsigned char *data,
123 size_t size,
124 EXTRACTOR_MetaDataProcessor proc,
125 void *proc_cls,
126 const char *options)
127{ 196{
197 int state;
128 unsigned int tsize; 198 unsigned int tsize;
129 unsigned int pos; 199 size_t csize;
200 char id[4];
201 int32_t ti;
202 char ver;
203 char extended_header;
204 uint16_t frame_flags;
205 char *mime;
206};
207
208enum ID3v2State
209{
210 ID3V2_INVALID = -1,
211 ID3V2_READING_HEADER = 0,
212 ID3V2_READING_FRAME_HEADER,
213 ID3V23_READING_EXTENDED_HEADER,
214 ID3V24_READING_EXTENDED_HEADER,
215 ID3V2_READING_FRAME
216};
217
218void
219EXTRACTOR_id3v2_init_state_method (struct EXTRACTOR_PluginList *plugin)
220{
221 struct id3v2_state *state;
222 state = plugin->state = malloc (sizeof (struct id3v2_state));
223 if (state == NULL)
224 return;
225 memset (state, 0, sizeof (struct id3v2_state));
226 state->state = ID3V2_READING_HEADER;
227 state->ti = -1;
228 state->mime = NULL;
229}
230
231void
232EXTRACTOR_id3v2_discard_state_method (struct EXTRACTOR_PluginList *plugin)
233{
234 struct id3v2_state *state = plugin->state;
235 if (state != NULL)
236 {
237 if (state->mime != NULL)
238 free (state->mime);
239 free (state);
240 }
241 plugin->state = NULL;
242}
243
244static int
245find_type (const char *id, size_t len)
246{
247 int i;
248 for (i = 0; tmap[i].text != NULL; i++)
249 if (0 == strncmp (tmap[i].text, id, len))
250 return i;
251 return -1;
252}
253
254int
255EXTRACTOR_id3v2_extract_method (struct EXTRACTOR_PluginList *plugin,
256 EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
257{
258 int64_t file_position;
259 int64_t file_size;
260 int64_t offset = 0;
261 int64_t size;
262 struct id3v2_state *state;
263 unsigned char *data;
264 char *word = NULL;
130 unsigned int off; 265 unsigned int off;
131 enum EXTRACTOR_MetaType type; 266 enum EXTRACTOR_MetaType type;
132 const char *mime; 267 unsigned char picture_type;
133 268
134 if ((size < 16) || 269 if (plugin == NULL || plugin->state == NULL)
135 (data[0] != 0x49) || 270 return 1;
136 (data[1] != 0x44) ||
137 (data[2] != 0x33) || (data[3] != 0x02) || (data[4] != 0x00))
138 return 0;
139 /* unsync: (data[5] & 0x80) > 0; */
140 tsize = (((data[6] & 0x7F) << 21) |
141 ((data[7] & 0x7F) << 14) |
142 ((data[8] & 0x7F) << 07) | ((data[9] & 0x7F) << 00));
143 271
144 if (tsize + 10 > size) 272 state = plugin->state;
145 return 0; 273 file_position = plugin->position;
146 pos = 10; 274 file_size = plugin->fsize;
147 while (pos < tsize) 275 size = plugin->map_size;
276 data = plugin->shm_ptr;
277
278 if (plugin->seek_request < 0)
279 return 1;
280 if (file_position - plugin->seek_request > 0)
281 {
282 plugin->seek_request = -1;
283 return 1;
284 }
285 if (plugin->seek_request - file_position < size)
286 offset = plugin->seek_request - file_position;
287
288 while (1)
289 {
290 switch (state->state)
148 { 291 {
149 size_t csize; 292 case ID3V2_INVALID:
150 int i; 293 plugin->seek_request = -1;
294 return 1;
295 case ID3V2_READING_HEADER:
296 /* TODO: support id3v24 tags at the end of file. Here's a quote from id3 faq:
297 * Q: Where is an ID3v2 tag located in an MP3 file?
298 * A: It is most likely located at the beginning of the file. Look for the
299 * marker "ID3" in the first 3 bytes of the file. If it's not there, it
300 * could be at the end of the file (if the tag is ID3v2.4). Look for the
301 * marker "3DI" 10 bytes from the end of the file, or 10 bytes before the
302 * beginning of an ID3v1 tag. Finally it is possible to embed ID3v2 tags
303 * in the actual MPEG stream, on an MPEG frame boundry. Almost nobody does
304 * this.
305 * Parsing of such tags will not be completely correct, because we can't
306 * seek backwards. We will have to seek to file_size - chunk_size instead
307 * (by the way, chunk size is theoretically unknown, LE is free to use any chunk
308 * size, even though plugins often make assumptions about chunk size being large
309 * enough to make one atomic read without seeking, if offset == 0) and search
310 * for id3v1 at -128 offset, then look if there's a 3DI marker 10 bytes before
311 * it (or 10 bytes before the end of file, if id3v1 is not there; not sure
312 * about APETAGs; we should probably just scan byte-by-byte from the end of file,
313 * until we hit 3DI, or reach the offset == 0), and use it set offset to the
314 * start of ID3v24 header, adjust the following file_position check and data
315 * indices (use offset), and otherwise proceed as normal (maybe file size checks
316 * along the way will have to be adjusted by -1, or made ">" instead of ">=";
317 * these problems do not arise for tags at the beginning of the file, since
318 * audio itself is usually at least 1-byte long; when the tag is at the end of
319 * file, these checks will have to be 100% correct).
320 * If there are two tags (at the beginning and at the end of the file),
321 * a SEEK in the one at the beginning of the file can be used to seek to the
322 * one at the end.
323 */
324 /* TODO: merge id3v1 and id3v2 parsers. There's an "update" flag in id3v2 that
325 * tells the parser to augument id3v1 values with the values from id3v2 (if this
326 * flag is not set, id3v2 parser must discard id3v1 data).
327 * At the moment id3v1 and id3v2 are parsed separately, and update flag is ignored.
328 */
329 if (file_position != 0 || size < 10 || (data[0] != 0x49) || (data[1] != 0x44) || (data[2] != 0x33) || ((data[3] != 0x02) && (data[3] != 0x03) && (data[3] != 0x04))/* || (data[4] != 0x00) minor verisons are backward-compatible*/)
330 {
331 state->state = ID3V2_INVALID;
332 break;
333 }
334 state->ver = data[3];
335 if (state->ver == 0x02)
336 {
337 state->extended_header = 0;
338 }
339 else if ((state->ver == 0x03) || (state->ver == 0x04))
340 {
341 if ((data[5] & 0x80) > 0)
342 {
343 /* unsync is not supported in id3v23 or id3v24*/
344 state->state = ID3V2_INVALID;
345 break;
346 }
347 state->extended_header = (data[5] & 0x40) > 0;
348 if ((data[5] & 0x20) > 0)
349 {
350 /* experimental is not supported in id3v23 or id3v24*/
351 state->state = ID3V2_INVALID;
352 break;
353 }
354 }
355 state->tsize = (((data[6] & 0x7F) << 21) | ((data[7] & 0x7F) << 14) | ((data[8] & 0x7F) << 07) | ((data[9] & 0x7F) << 00));
356 if (state->tsize + 10 > file_size)
357 {
358 state->state = ID3V2_INVALID;
359 break;
360 }
361 offset = 10;
362 if (state->ver == 0x03 && state->extended_header)
363 state->state = ID3V23_READING_EXTENDED_HEADER;
364 else if (state->ver == 0x04 && state->extended_header)
365 state->state = ID3V24_READING_EXTENDED_HEADER;
366 else
367 state->state = ID3V2_READING_FRAME_HEADER;
368 break;
369 case ID3V23_READING_EXTENDED_HEADER:
370 if (offset + 9 >= size)
371 {
372 if (offset == 0)
373 {
374 state->state = ID3V2_INVALID;
375 break;
376 }
377 plugin->seek_request = file_position + offset;
378 return 0;
379 }
380 if (state->ver == 0x03 && state->extended_header)
381 {
382 uint32_t padding, extended_header_size;
383 extended_header_size = (((data[offset]) << 24) | ((data[offset + 1]) << 16) | ((data[offset + 2]) << 8) | ((data[offset + 3]) << 0));
384 padding = (((data[offset + 6]) << 24) | ((data[offset + 7]) << 16) | ((data[offset + 8]) << 8) | ((data[offset + 9]) << 0));
385 if (data[offset + 4] == 0 && data[offset + 5] == 0)
386 /* Skip the CRC32 byte after extended header */
387 offset += 1;
388 offset += 4 + extended_header_size;
389 if (padding < state->tsize)
390 state->tsize -= padding;
391 else
392 {
393 state->state = ID3V2_INVALID;
394 break;
395 }
396 }
397 break;
398 case ID3V24_READING_EXTENDED_HEADER:
399 if (offset + 6 >= size)
400 {
401 if (offset == 0)
402 {
403 state->state = ID3V2_INVALID;
404 break;
405 }
406 plugin->seek_request = file_position + offset;
407 return 0;
408 }
409 if ( (state->ver == 0x04) && (state->extended_header))
410 {
411 uint32_t extended_header_size;
151 412
152 if (pos + 7 > tsize) 413 extended_header_size = (((data[offset]) << 24) |
414 ((data[offset + 1]) << 16) |
415 ((data[offset + 2]) << 8) |
416 ((data[offset + 3]) << 0));
417 offset += 4 + extended_header_size;
418 }
419 break;
420 case ID3V2_READING_FRAME_HEADER:
421 if (file_position + offset > state->tsize ||
422 ((state->ver == 0x02) && file_position + offset + 6 >= state->tsize) ||
423 (((state->ver == 0x03) || (state->ver == 0x04))&& file_position + offset + 10 >= state->tsize))
424 {
425 state->state = ID3V2_INVALID;
426 break;
427 }
428 if (((state->ver == 0x02) && (offset + 6 >= size)) ||
429 (((state->ver == 0x03) || (state->ver == 0x04)) && (offset + 10 >= size)))
430 {
431 plugin->seek_request = file_position + offset;
153 return 0; 432 return 0;
154 csize = (data[pos + 3] << 16) + (data[pos + 4] << 8) + data[pos + 5]; 433 }
155 if ((pos + 7 + csize > tsize) || (csize > tsize) || (csize == 0)) 434 if (state->ver == 0x02)
435 {
436 memcpy (state->id, &data[offset], 3);
437 state->csize = (data[offset + 3] << 16) + (data[offset + 4] << 8) + data[offset + 5];
438 if ((file_position + offset + 6 + state->csize > file_size) || (state->csize > file_size) || (state->csize == 0))
439 {
440 state->state = ID3V2_INVALID;
441 break;
442 }
443 offset += 6;
444 state->frame_flags = 0;
445 }
446 else if ((state->ver == 0x03) || (state->ver == 0x04))
447 {
448 memcpy (state->id, &data[offset], 4);
449 if (state->ver == 0x03)
450 state->csize = (data[offset + 4] << 24) + (data[offset + 5] << 16) + (data[offset + 6] << 8) + data[offset + 7];
451 else if (state->ver == 0x04)
452 state->csize = ((data[offset + 4] & 0x7F) << 21) | ((data[offset + 5] & 0x7F) << 14) | ((data[offset + 6] & 0x7F) << 07) | ((data[offset + 7] & 0x7F) << 00);
453 if ((file_position + offset + 10 + state->csize > file_size) || (state->csize > file_size) || (state->csize == 0))
454 {
455 state->state = ID3V2_INVALID;
456 break;
457 }
458 state->frame_flags = (data[offset + 8] << 8) + data[offset + 9];
459 if (state->ver == 0x03)
460 {
461 if (((state->frame_flags & 0x80) > 0) /* compressed, not yet supported */ ||
462 ((state->frame_flags & 0x40) > 0) /* encrypted, not supported */)
463 {
464 /* Skip to next frame header */
465 offset += 10 + state->csize;
466 break;
467 }
468 }
469 else if (state->ver == 0x04)
470 {
471 if (((state->frame_flags & 0x08) > 0) /* compressed, not yet supported */ ||
472 ((state->frame_flags & 0x04) > 0) /* encrypted, not supported */ ||
473 ((state->frame_flags & 0x02) > 0) /* unsynchronization, not supported */)
474 {
475 /* Skip to next frame header */
476 offset += 10 + state->csize;
477 break;
478 }
479 if ((state->frame_flags & 0x01) > 0)
480 {
481 /* Skip data length indicator */
482 state->csize -= 4;
483 offset += 4;
484 }
485 }
486 offset += 10;
487 }
488
489 state->ti = find_type ((const char *) state->id, (state->ver == 0x02) ? 3 : (((state->ver == 0x03) || (state->ver == 0x04)) ? 4 : 0));
490 if (state->ti == -1)
491 {
492 offset += state->csize;
493 break;
494 }
495 state->state = ID3V2_READING_FRAME;
496 break;
497 case ID3V2_READING_FRAME:
498 if (offset == 0 && state->csize > size)
499 {
500 /* frame size is larger than the size of one data chunk we get at a time */
501 offset += state->csize;
502 state->state = ID3V2_READING_FRAME_HEADER;
503 break;
504 }
505 if (offset + state->csize > size)
506 {
507 plugin->seek_request = file_position + offset;
508 return 0;
509 }
510 word = NULL;
511 if (((state->ver == 0x03) && ((state->frame_flags & 0x20) > 0)) ||
512 ((state->ver == 0x04) && ((state->frame_flags & 0x40) > 0)))
513 {
514 /* "group" identifier, skip a byte */
515 offset++;
516 state->csize--;
517 }
518 switch (tmap[state->ti].fmt)
519 {
520 case T:
521 if (data[offset] == 0x00)
522 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + 1],
523 state->csize - 1, "ISO-8859-1");
524 else if (data[offset] == 0x01)
525 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + 1],
526 state->csize - 1, "UCS-2");
527 else if ((state->ver == 0x04) && (data[offset] == 0x02))
528 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + 1],
529 state->csize - 1, "UTF-16BE");
530 else if ((state->ver == 0x04) && (data[offset] == 0x03))
531 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + 1],
532 state->csize - 1, "UTF-8");
533 else
534 /* bad encoding byte, try to convert from iso-8859-1 */
535 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + 1],
536 state->csize - 1, "ISO-8859-1");
537 break;
538 case U:
539 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset],
540 state->csize, "ISO-8859-1");
541 break;
542 case UL:
543 if (state->csize < 6)
544 {
545 /* malformed */
546 state->state = ID3V2_INVALID;
547 break;
548 }
549 /* find end of description */
550 off = 4;
551 while ((off < size) && (off < offset + state->csize) && (data[offset + off] != '\0'))
552 off++;
553 if ((off >= state->csize) || (data[offset + off] != '\0'))
554 {
555 /* malformed */
556 state->state = ID3V2_INVALID;
557 break;
558 }
559 off++;
560 if (data[offset] == 0x00)
561 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + off],
562 state->csize - off, "ISO-8859-1");
563 else if (data[offset] == 0x01)
564 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + off],
565 state->csize - off, "UCS-2");
566 else if ((state->ver == 0x04) && (data[offset] == 0x02))
567 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + off],
568 state->csize - off, "UTF-16BE");
569 else if ((state->ver == 0x04) && (data[offset] == 0x03))
570 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + off],
571 state->csize - off, "UTF-8");
572 else
573 /* bad encoding byte, try to convert from iso-8859-1 */
574 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + off],
575 state->csize - off, "ISO-8859-1");
576 break;
577 case SL:
578 if (state->csize < 7)
579 {
580 /* malformed */
581 state->state = ID3V2_INVALID;
582 break;
583 }
584 if (data[offset] == 0x00)
585 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + 6],
586 state->csize - 6, "ISO-8859-1");
587 else if (data[offset] == 0x01)
588 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + 6],
589 state->csize - 6, "UCS-2");
590 else if ((state->ver == 0x04) && (data[offset] == 0x02))
591 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + 6],
592 state->csize - 6, "UTF-16BE");
593 else if ((state->ver == 0x04) && (data[offset] == 0x03))
594 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + 6],
595 state->csize - 6, "UTF-8");
596 else
597 /* bad encoding byte, try to convert from iso-8859-1 */
598 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + 6],
599 state->csize - 6, "ISO-8859-1");
600 break;
601 case L:
602 if (state->csize < 5)
603 {
604 /* malformed */
605 state->state = ID3V2_INVALID;
606 break;
607 }
608 /* find end of description */
609 off = 4;
610 while ((off < size) && (off < offset + state->csize) && (data[offset + off] != '\0'))
611 off++;
612 if ((off >= state->csize) || (data[offset + off] != '\0'))
613 {
614 /* malformed */
615 state->state = ID3V2_INVALID;
616 break;
617 }
618 off++;
619
620 if (data[offset] == 0x00)
621 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + off],
622 state->csize - off, "ISO-8859-1");
623 else if (data[offset] == 0x01)
624 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + off],
625 state->csize - off, "UCS-2");
626 else if ((state->ver == 0x04) && (data[offset] == 0x02))
627 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + off],
628 state->csize - off, "UTF-1offBE");
629 else if ((state->ver == 0x04) && (data[offset] == 0x03))
630 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + off],
631 state->csize - off, "UTF-8");
632 else
633 /* bad encoding byte, try to convert from iso-8859-1 */
634 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + off],
635 state->csize - off, "ISO-8859-1");
636 break;
637 case I:
638 if ( ( (state->ver == 0x02) &&
639 (state->csize < 7) ) ||
640 ( ( (state->ver == 0x03) ||
641 (state->ver == 0x04)) && (state->csize < 5)) )
642 {
643 /* malformed */
644 state->state = ID3V2_INVALID;
645 break;
646 }
647 if (state->mime != NULL)
648 free (state->mime);
649 state->mime = NULL;
650 if (state->ver == 0x02)
651 {
652 off = 5;
653 picture_type = data[offset + 5];
654 }
655 else if ((state->ver == 0x03) || (state->ver == 0x04))
656 {
657 off = 1;
658 while ((off < size) && (off < offset + state->csize) && (data[offset + off] != '\0') )
659 off++;
660 if ((off >= state->csize) || (data[offset + off] != '\0'))
661 {
662 /* malformed */
663 state->state = ID3V2_INVALID;
664 break;
665 }
666 state->mime = malloc (off);
667 memcpy (state->mime, &data[offset + 1], off - 1);
668 state->mime[off - 1] = '\0';
669 off += 1;
670 picture_type = data[offset];
671 off += 1;
672 }
673 /* find end of description */
674 while ((off < size) && (off < offset + state->csize) && (data[offset + off] != '\0'))
675 off++;
676 if ((off >= state->csize) || (data[offset + off] != '\0'))
677 {
678 free (state->mime);
679 state->mime = NULL;
680 /* malformed */
681 state->state = ID3V2_INVALID;
682 break;
683 }
684 off++;
685 switch (picture_type)
686 {
687 case 0x03:
688 case 0x04:
689 type = EXTRACTOR_METATYPE_COVER_PICTURE;
690 break;
691 case 0x07:
692 case 0x08:
693 case 0x09:
694 case 0x0A:
695 case 0x0B:
696 case 0x0C:
697 type = EXTRACTOR_METATYPE_CONTRIBUTOR_PICTURE;
698 break;
699 case 0x0D:
700 case 0x0E:
701 case 0x0F:
702 type = EXTRACTOR_METATYPE_EVENT_PICTURE;
703 break;
704 case 0x14:
705 type = EXTRACTOR_METATYPE_LOGO;
706 type = EXTRACTOR_METATYPE_LOGO;
707 break;
708 default:
709 type = EXTRACTOR_METATYPE_PICTURE;
710 break;
711 }
712 if (state->ver == 0x02)
713 {
714 if (0 == strncasecmp ("PNG", (const char *) &data[offset + 1], 3))
715 state->mime = strdup ("image/png");
716 else if (0 == strncasecmp ("JPG", (const char *) &data[offset + 1], 3))
717 state->mime = strdup ("image/jpeg");
718 else
719 state->mime = NULL;
720 }
721 else if (((state->ver == 0x03) || (state->ver == 0x04)) && (strchr (state->mime, '/') == NULL))
722 {
723 size_t mime_len = strlen (state->mime);
724 char *type_mime = malloc (mime_len + 6 + 1);
725 snprintf (type_mime, mime_len + 6 + 1, "image/%s", state->mime);
726 free (state->mime);
727 state->mime = type_mime;
728 }
729 if ((state->mime != NULL) && (0 == strcmp (state->mime, "-->")))
730 {
731 /* not supported */
732 free (state->mime);
733 state->mime = NULL;
734 }
735 else
736 {
737 if (0 != proc (proc_cls, "id3v2", type, EXTRACTOR_METAFORMAT_BINARY, state->mime, (const char*) &data[offset + off], state->csize - off))
738 {
739 if (state->mime != NULL)
740 free (state->mime);
741 state->mime = NULL;
742 return 1;
743 }
744 if (state->mime != NULL)
745 free (state->mime);
746 state->mime = NULL;
747 }
748 word = NULL;
156 break; 749 break;
157 i = 0; 750 default:
158 while (tmap[i].text != NULL) 751 return 1;
752 }
753 if ((word != NULL) && (strlen (word) > 0))
754 {
755 if (0 != proc (proc_cls, "id3v2", tmap[state->ti].type, EXTRACTOR_METAFORMAT_UTF8, "text/plain", word, strlen (word) + 1))
159 { 756 {
160 if (0 == strncmp (tmap[i].text, (const char *) &data[pos], 3)) 757 free (word);
161 { 758 return 1;
162 char *word;
163 /* this byte describes the encoding
164 try to convert strings to UTF-8
165 if it fails, then forget it */
166 switch (tmap[i].fmt)
167 {
168 case T:
169 switch (data[pos + 6])
170 {
171 case 0x00:
172 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 7],
173 csize - 1, "ISO-8859-1");
174 break;
175 case 0x01:
176 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 7],
177 csize - 1, "UCS-2");
178 break;
179 default:
180 /* bad encoding byte,
181 try to convert from iso-8859-1 */
182 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 7],
183 csize - 1, "ISO-8859-1");
184 break;
185 }
186 break;
187 case U:
188 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 6],
189 csize, "ISO-8859-1");
190 break;
191 case UL:
192 if (csize < 6)
193 return 0; /* malformed */
194 /* find end of description */
195 off = 10;
196 while ( (off < size) &&
197 (off - pos < csize) &&
198 (data[pos + off] == '\0') )
199 off++;
200 if ( (off >= csize) ||
201 (data[pos+off] != '\0') )
202 return 0; /* malformed */
203 off++;
204 switch (data[pos + 6])
205 {
206 case 0x00:
207 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + off],
208 csize - off, "ISO-8859-1");
209 break;
210 case 0x01:
211 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + off],
212 csize - off, "UCS-2");
213 break;
214 default:
215 /* bad encoding byte,
216 try to convert from iso-8859-1 */
217 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + off],
218 csize - off, "ISO-8859-1");
219 break;
220 }
221 break;
222 case SL:
223 if (csize < 7)
224 return 0; /* malformed */
225 /* find end of description */
226 switch (data[pos + 6])
227 {
228 case 0x00:
229 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 12],
230 csize - 6, "ISO-8859-1");
231 break;
232 case 0x01:
233 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 12],
234 csize - 6, "UCS-2");
235 break;
236 default:
237 /* bad encoding byte,
238 try to convert from iso-8859-1 */
239 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 12],
240 csize - 6, "ISO-8859-1");
241 break;
242 }
243 break;
244 case L:
245 if (csize < 5)
246 return 0; /* malformed */
247 /* find end of description */
248 switch (data[pos + 6])
249 {
250 case 0x00:
251 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 10],
252 csize - 4, "ISO-8859-1");
253 break;
254 case 0x01:
255 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 10],
256 csize - 4, "UCS-2");
257 break;
258 default:
259 /* bad encoding byte,
260 try to convert from iso-8859-1 */
261 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 10],
262 csize - 4, "ISO-8859-1");
263 break;
264 }
265 break;
266 case I:
267 if (csize < 6)
268 return 0; /* malformed */
269 /* find end of description */
270 off = 12;
271 while ( (off < size) &&
272 (off - pos < csize) &&
273 (data[pos + off] == '\0') )
274 off++;
275 if ( (off >= csize) ||
276 (data[pos+off] != '\0') )
277 return 0; /* malformed */
278 off++;
279 switch (data[pos+11])
280 {
281 case 0x03:
282 case 0x04:
283 type = EXTRACTOR_METATYPE_COVER_PICTURE;
284 break;
285 case 0x07:
286 case 0x08:
287 case 0x09:
288 case 0x0A:
289 case 0x0B:
290 case 0x0C:
291 type = EXTRACTOR_METATYPE_CONTRIBUTOR_PICTURE;
292 break;
293 case 0x0D:
294 case 0x0E:
295 case 0x0F:
296 type = EXTRACTOR_METATYPE_EVENT_PICTURE;
297 break;
298 case 0x14:
299 type = EXTRACTOR_METATYPE_LOGO;
300 type = EXTRACTOR_METATYPE_LOGO;
301 break;
302 default:
303 type = EXTRACTOR_METATYPE_PICTURE;
304 break;
305 }
306 if (0 == strncasecmp ("PNG",
307 (const char*) &data[pos + 7], 3))
308 mime = "image/png";
309 else if (0 == strncasecmp ("JPG",
310 (const char*) &data[pos + 7], 3))
311 mime = "image/jpeg";
312 else
313 mime = NULL;
314 if (0 == strncasecmp ("-->",
315 (const char*) &data[pos + 7], 3))
316 {
317 /* not supported */
318 }
319 else
320 {
321 if (0 != proc (proc_cls,
322 "id3v2",
323 type,
324 EXTRACTOR_METAFORMAT_BINARY,
325 mime,
326 (const char*) &data[pos + off],
327 csize + 6 - off))
328 return 1;
329 }
330 word = NULL;
331 break;
332 default:
333 return 0;
334 }
335 if ((word != NULL) && (strlen (word) > 0))
336 {
337 if (0 != proc (proc_cls,
338 "id3v2",
339 tmap[i].type,
340 EXTRACTOR_METAFORMAT_UTF8,
341 "text/plain",
342 word,
343 strlen(word)+1))
344 {
345 free (word);
346 return 1;
347 }
348 }
349 if (word != NULL)
350 free (word);
351 break;
352 }
353 i++;
354 } 759 }
355 pos += 6 + csize; 760 }
761 if (word != NULL)
762 free (word);
763 offset = offset + state->csize;
764 state->state = ID3V2_READING_FRAME_HEADER;
765 break;
356 } 766 }
357 return 0; 767 }
768 return 1;
358} 769}
359 770
360/* end of id3v2_extractor.c */ 771/* end of id3v2_extractor.c */