aboutsummaryrefslogtreecommitdiff
path: root/src/plugins/old/id3v2_extractor.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/plugins/old/id3v2_extractor.c')
-rw-r--r--src/plugins/old/id3v2_extractor.c753
1 files changed, 0 insertions, 753 deletions
diff --git a/src/plugins/old/id3v2_extractor.c b/src/plugins/old/id3v2_extractor.c
deleted file mode 100644
index 0991a42..0000000
--- a/src/plugins/old/id3v2_extractor.c
+++ /dev/null
@@ -1,753 +0,0 @@
1/*
2 This file is part of libextractor.
3 (C) 2002, 2003, 2004, 2006, 2009 Vidyut Samanta and Christian Grothoff
4
5 libextractor is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 2, or (at your
8 option) any later version.
9
10 libextractor is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with libextractor; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
19
20 */
21
22#include "platform.h"
23#include "extractor.h"
24#ifndef MINGW
25#include <sys/mman.h>
26#endif
27#include "convert.h"
28
29#include "extractor_plugins.h"
30
31#define DEBUG_EXTRACT_ID3v2 0
32
33enum Id3v2Fmt
34 {
35 T, /* simple, 0-terminated string, prefixed by encoding */
36 U, /* 0-terminated ASCII string, no encoding */
37 UL, /* unsync'ed lyrics */
38 SL, /* sync'ed lyrics */
39 L, /* string with language prefix */
40 I /* image */
41 };
42
43typedef struct
44{
45 const char *text;
46 enum EXTRACTOR_MetaType type;
47 enum Id3v2Fmt fmt;
48} Matches;
49
50static Matches tmap[] = {
51 /* skipping UFI */
52 {"TT1 ", EXTRACTOR_METATYPE_SECTION, T},
53 {"TT2 ", EXTRACTOR_METATYPE_TITLE, T},
54 {"TT3 ", EXTRACTOR_METATYPE_SONG_VERSION, T},
55 {"TP1 ", EXTRACTOR_METATYPE_ARTIST, T},
56 {"TP2 ", EXTRACTOR_METATYPE_PERFORMER, T},
57 {"TP3 ", EXTRACTOR_METATYPE_CONDUCTOR, T},
58 {"TP4 ", EXTRACTOR_METATYPE_INTERPRETATION, T},
59 {"TCM ", EXTRACTOR_METATYPE_COMPOSER, T},
60 {"TXT ", EXTRACTOR_METATYPE_WRITER, T},
61 {"TLA ", EXTRACTOR_METATYPE_LANGUAGE, T},
62 {"TCO ", EXTRACTOR_METATYPE_GENRE, T},
63 {"TAL ", EXTRACTOR_METATYPE_ALBUM, T},
64 {"TPA ", EXTRACTOR_METATYPE_DISC_NUMBER, T},
65 {"TRK ", EXTRACTOR_METATYPE_TRACK_NUMBER, T},
66 {"TRC ", EXTRACTOR_METATYPE_ISRC, T},
67 {"TYE ", EXTRACTOR_METATYPE_PUBLICATION_YEAR, T},
68 /*
69 FIXME: these two and TYE should be combined into
70 the actual publication date (if TRD is missing)
71 {"TDA ", EXTRACTOR_METATYPE_PUBLICATION_DATE},
72 {"TIM ", EXTRACTOR_METATYPE_PUBLICATION_DATE},
73 */
74 {"TRD ", EXTRACTOR_METATYPE_CREATION_TIME, T},
75 {"TMT ", EXTRACTOR_METATYPE_SOURCE, T},
76 {"TFT ", EXTRACTOR_METATYPE_FORMAT_VERSION, T},
77 {"TBP ", EXTRACTOR_METATYPE_BEATS_PER_MINUTE, T},
78 {"TCR ", EXTRACTOR_METATYPE_COPYRIGHT, T},
79 {"TPB ", EXTRACTOR_METATYPE_PUBLISHER, T},
80 {"TEN ", EXTRACTOR_METATYPE_ENCODED_BY, T},
81 {"TSS ", EXTRACTOR_METATYPE_PRODUCED_BY_SOFTWARE, T},
82 {"TOF ", EXTRACTOR_METATYPE_FILENAME, T},
83 {"TLE ", EXTRACTOR_METATYPE_DURATION, T}, /* FIXME: should append 'ms' as unit */
84 {"TSI ", EXTRACTOR_METATYPE_EMBEDDED_FILE_SIZE, T},
85 /* skipping TDY, TKE */
86 {"TOT ", EXTRACTOR_METATYPE_ORIGINAL_TITLE, T},
87 {"TOA ", EXTRACTOR_METATYPE_ORIGINAL_ARTIST, T},
88 {"TOL ", EXTRACTOR_METATYPE_ORIGINAL_WRITER, T},
89 {"TOR ", EXTRACTOR_METATYPE_ORIGINAL_RELEASE_YEAR, T},
90 /* skipping TXX */
91
92 {"WAF ", EXTRACTOR_METATYPE_URL, U},
93 {"WAR ", EXTRACTOR_METATYPE_URL, U},
94 {"WAS ", EXTRACTOR_METATYPE_URL, U},
95 {"WCM ", EXTRACTOR_METATYPE_URL, U},
96 {"WCP ", EXTRACTOR_METATYPE_RIGHTS, U},
97 {"WCB ", EXTRACTOR_METATYPE_URL, U},
98 /* skipping WXX */
99 {"IPL ", EXTRACTOR_METATYPE_CONTRIBUTOR_NAME, T},
100 /* skipping MCI */
101 /* skipping ETC */
102 /* skipping MLL */
103 /* skipping STC */
104 {"ULT ", EXTRACTOR_METATYPE_LYRICS, UL},
105 {"SLT ", EXTRACTOR_METATYPE_LYRICS, SL},
106 {"COM ", EXTRACTOR_METATYPE_COMMENT, L},
107 /* skipping RVA */
108 /* skipping EQU */
109 /* skipping REV */
110 {"PIC ", EXTRACTOR_METATYPE_PICTURE, I},
111 /* skipping GEN */
112 /* {"CNT ", EXTRACTOR_METATYPE_PLAY_COUNTER, XXX}, */
113 /* {"POP ", EXTRACTOR_METATYPE_POPULARITY_METER, XXX}, */
114 /* skipping BUF */
115 /* skipping CRM */
116 /* skipping CRA */
117 /* {"LNK ", EXTRACTOR_METATYPE_URL, XXX}, */
118
119
120 {"TALB", EXTRACTOR_METATYPE_ALBUM, T},
121 {"TBPM", EXTRACTOR_METATYPE_BEATS_PER_MINUTE, T},
122 {"TCOM", EXTRACTOR_METATYPE_COMPOSER, T},
123 {"TCON", EXTRACTOR_METATYPE_SONG_VERSION, T},
124 {"TCOP", EXTRACTOR_METATYPE_COPYRIGHT, T},
125 {"TDAT", EXTRACTOR_METATYPE_CREATION_DATE, T}, /* idv23 only */
126 /* TDLY */
127 {"TENC", EXTRACTOR_METATYPE_ENCODED_BY, T},
128 {"TEXT", EXTRACTOR_METATYPE_WRITER, T},
129 {"TFLT", EXTRACTOR_METATYPE_FORMAT_VERSION, T},
130 /* TIME, idv23 only */
131 {"TIT1", EXTRACTOR_METATYPE_SECTION, T},
132 {"TIT2", EXTRACTOR_METATYPE_TITLE, T},
133 {"TIT3", EXTRACTOR_METATYPE_SONG_VERSION, T},
134 /* TKEY */
135 {"TLAN", EXTRACTOR_METATYPE_LANGUAGE, T},
136 {"TLEN", EXTRACTOR_METATYPE_DURATION, T}, /* FIXME: should append 'ms' as unit */
137 {"TMED", EXTRACTOR_METATYPE_SOURCE, T},
138 {"TOAL", EXTRACTOR_METATYPE_ORIGINAL_TITLE, T},
139 {"TOFN", EXTRACTOR_METATYPE_ORIGINAL_ARTIST, T},
140 {"TOLY", EXTRACTOR_METATYPE_ORIGINAL_WRITER, T},
141 {"TOPE", EXTRACTOR_METATYPE_ORIGINAL_PERFORMER, T},
142 {"TORY", EXTRACTOR_METATYPE_ORIGINAL_RELEASE_YEAR, T}, /* idv23 only */
143 {"TOWN", EXTRACTOR_METATYPE_LICENSEE, T},
144 {"TPE1", EXTRACTOR_METATYPE_ARTIST, T},
145 {"TPE2", EXTRACTOR_METATYPE_PERFORMER, T},
146 {"TPE3", EXTRACTOR_METATYPE_CONDUCTOR, T},
147 {"TPE4", EXTRACTOR_METATYPE_INTERPRETATION, T},
148 {"TPOS", EXTRACTOR_METATYPE_DISC_NUMBER, T},
149 {"TPUB", EXTRACTOR_METATYPE_PUBLISHER, T},
150 {"TRCK", EXTRACTOR_METATYPE_TRACK_NUMBER, T},
151 /* TRDA, idv23 only */
152 {"TRSN", EXTRACTOR_METATYPE_NETWORK_NAME, T},
153 /* TRSO */
154 {"TSIZ", EXTRACTOR_METATYPE_EMBEDDED_FILE_SIZE, T}, /* idv23 only */
155 {"TSRC", EXTRACTOR_METATYPE_ISRC, T},
156 /* TSSE */
157 {"TYER", EXTRACTOR_METATYPE_PUBLICATION_YEAR, T}, /* idv23 only */
158 {"WCOM", EXTRACTOR_METATYPE_URL, U},
159 {"WCOP", EXTRACTOR_METATYPE_URL, U},
160 {"WOAF", EXTRACTOR_METATYPE_URL, U},
161 {"WOAS", EXTRACTOR_METATYPE_URL, U},
162 {"WORS", EXTRACTOR_METATYPE_URL, U},
163 {"WPAY", EXTRACTOR_METATYPE_URL, U},
164 {"WPUB", EXTRACTOR_METATYPE_URL, U},
165 {"WXXX", EXTRACTOR_METATYPE_URL, T},
166 {"IPLS", EXTRACTOR_METATYPE_CONTRIBUTOR_NAME, T}, /* idv23 only */
167 /* ... */
168 {"USLT", EXTRACTOR_METATYPE_LYRICS, UL },
169 {"SYLT", EXTRACTOR_METATYPE_LYRICS, SL },
170 {"COMM", EXTRACTOR_METATYPE_COMMENT, L},
171 /* ... */
172 {"APIC", EXTRACTOR_METATYPE_PICTURE, I},
173 /* ... */
174 {"LINK", EXTRACTOR_METATYPE_URL, U},
175 /* ... */
176 {"USER", EXTRACTOR_METATYPE_LICENSE, T},
177 /* ... */
178
179 /* new frames in id3v24 */
180 /* ASPI, EQU2, RVA2, SEEK, SIGN, TDEN */
181 {"TDOR", EXTRACTOR_METATYPE_PUBLICATION_DATE, T},
182 /* TDRC, TDRL, TDTG */
183 {"TIPL", EXTRACTOR_METATYPE_CONTRIBUTOR_NAME, T},
184 {"TMCL", EXTRACTOR_METATYPE_MUSICIAN_CREDITS_LIST, T},
185 {"TMOO", EXTRACTOR_METATYPE_MOOD, T},
186 {"TPRO", EXTRACTOR_METATYPE_COPYRIGHT, T},
187 {"TSOA", EXTRACTOR_METATYPE_ALBUM, T},
188 {"TSOP", EXTRACTOR_METATYPE_PERFORMER, T},
189 {"TSOT", EXTRACTOR_METATYPE_TITLE, T},
190 {"TSST", EXTRACTOR_METATYPE_SUBTITLE, T},
191
192 {NULL, 0, T},
193};
194
195struct id3v2_state
196{
197 int state;
198 unsigned int tsize;
199 size_t csize;
200 char id[4];
201 int32_t ti;
202 char ver;
203 char extended_header;
204 uint16_t frame_flags;
205 char *mime;
206};
207
208enum ID3v2State
209{
210 ID3V2_INVALID = -1,
211 ID3V2_READING_HEADER = 0,
212 ID3V2_READING_FRAME_HEADER,
213 ID3V23_READING_EXTENDED_HEADER,
214 ID3V24_READING_EXTENDED_HEADER,
215 ID3V2_READING_FRAME
216};
217
218struct id3v2_state *
219EXTRACTOR_id3v2_init_state_method ()
220{
221 struct id3v2_state *state;
222 state = malloc (sizeof (struct id3v2_state));
223 if (state == NULL)
224 return NULL;
225 memset (state, 0, sizeof (struct id3v2_state));
226 state->state = ID3V2_READING_HEADER;
227 state->ti = -1;
228 state->mime = NULL;
229 return state;
230}
231
232static int
233EXTRACTOR_id3v2_discard_state_method (struct id3v2_state *state)
234{
235 if (state != NULL)
236 {
237 if (state->mime != NULL)
238 free (state->mime);
239 free (state);
240 }
241 return 1;
242}
243
244static int
245find_type (const char *id, size_t len)
246{
247 int i;
248 for (i = 0; tmap[i].text != NULL; i++)
249 if (0 == strncmp (tmap[i].text, id, len))
250 return i;
251 return -1;
252}
253
254int
255EXTRACTOR_id3v2_extract_method (struct EXTRACTOR_PluginList *plugin,
256 EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
257{
258 int64_t offset = 0;
259 struct id3v2_state *state;
260 unsigned char *data;
261 char *word = NULL;
262 unsigned int off;
263 enum EXTRACTOR_MetaType type;
264 unsigned char picture_type;
265
266 if (plugin == NULL)
267 return 1;
268
269 state = EXTRACTOR_id3v2_init_state_method ();
270 if (state == NULL)
271 return 1;
272
273 while (1)
274 {
275 switch (state->state)
276 {
277 case ID3V2_INVALID:
278 plugin->seek_request = -1;
279 return EXTRACTOR_id3v2_discard_state_method (state);
280 case ID3V2_READING_HEADER:
281 /* TODO: support id3v24 tags at the end of file. Here's a quote from id3 faq:
282 * Q: Where is an ID3v2 tag located in an MP3 file?
283 * A: It is most likely located at the beginning of the file. Look for the
284 * marker "ID3" in the first 3 bytes of the file. If it's not there, it
285 * could be at the end of the file (if the tag is ID3v2.4). Look for the
286 * marker "3DI" 10 bytes from the end of the file, or 10 bytes before the
287 * beginning of an ID3v1 tag. Finally it is possible to embed ID3v2 tags
288 * in the actual MPEG stream, on an MPEG frame boundry. Almost nobody does
289 * this.
290 * Parsing of such tags will not be completely correct, because we can't
291 * seek backwards. (OK, now we CAN seek backwards, but we still need to mind the
292 * chunk size). We will have to seek to file_size - chunk_size instead
293 * (by the way, chunk size is theoretically unknown, LE is free to use any chunk
294 * size, even though plugins often make assumptions about chunk size being large
295 * enough to make one atomic read without seeking, if offset == 0) and search
296 * for id3v1 at -128 offset, then look if there's a 3DI marker 10 bytes before
297 * it (or 10 bytes before the end of file, if id3v1 is not there; not sure
298 * about APETAGs; we should probably just scan byte-by-byte from the end of file,
299 * until we hit 3DI, or reach the offset == 0), and use it set offset to the
300 * start of ID3v24 header, adjust the following file_position check and data
301 * indices (use offset), and otherwise proceed as normal (maybe file size checks
302 * along the way will have to be adjusted by -1, or made ">" instead of ">=";
303 * these problems do not arise for tags at the beginning of the file, since
304 * audio itself is usually at least 1-byte long; when the tag is at the end of
305 * file, these checks will have to be 100% correct).
306 * If there are two tags (at the beginning and at the end of the file),
307 * a SEEK in the one at the beginning of the file can be used to seek to the
308 * one at the end.
309 */
310 /* TODO: merge id3v1 and id3v2 parsers. There's an "update" flag in id3v2 that
311 * tells the parser to augument id3v1 values with the values from id3v2 (if this
312 * flag is not set, id3v2 parser must discard id3v1 data).
313 * At the moment id3v1 and id3v2 are parsed separately, and update flag is ignored.
314 */
315 if (10 != pl_read (plugin, &data, 10))
316 {
317 state->state = ID3V2_INVALID;
318 break;
319 }
320 if ((data[0] != 0x49) || (data[1] != 0x44) || (data[2] != 0x33) || ((data[3] != 0x02) && (data[3] != 0x03) && (data[3] != 0x04))/* || (data[4] != 0x00) minor verisons are backward-compatible*/)
321 {
322 state->state = ID3V2_INVALID;
323 break;
324 }
325 state->ver = data[3];
326 if (state->ver == 0x02)
327 {
328 state->extended_header = 0;
329 }
330 else if ((state->ver == 0x03) || (state->ver == 0x04))
331 {
332 if ((data[5] & 0x80) > 0)
333 {
334 /* unsync is not supported in id3v23 or id3v24*/
335 state->state = ID3V2_INVALID;
336 break;
337 }
338 state->extended_header = (data[5] & 0x40) > 0;
339 if ((data[5] & 0x20) > 0)
340 {
341 /* experimental is not supported in id3v23 or id3v24*/
342 state->state = ID3V2_INVALID;
343 break;
344 }
345 }
346 state->tsize = (((data[6] & 0x7F) << 21) | ((data[7] & 0x7F) << 14) | ((data[8] & 0x7F) << 07) | ((data[9] & 0x7F) << 00));
347 if (state->ver == 0x03 && state->extended_header)
348 state->state = ID3V23_READING_EXTENDED_HEADER;
349 else if (state->ver == 0x04 && state->extended_header)
350 state->state = ID3V24_READING_EXTENDED_HEADER;
351 else
352 state->state = ID3V2_READING_FRAME_HEADER;
353 break;
354 case ID3V23_READING_EXTENDED_HEADER:
355 if (10 != pl_read (plugin, &data, 10))
356 {
357 state->state = ID3V2_INVALID;
358 break;
359 }
360 if (state->ver == 0x03 && state->extended_header)
361 {
362 uint32_t extended_header_size;
363 extended_header_size = (((data[0]) << 24) | ((data[1]) << 16) | ((data[2]) << 8) | ((data[3]) << 0));
364 // padding = (((data[6]) << 24) | ((data[7]) << 16) | ((data[8]) << 8) | ((data[9]) << 0));
365 if (extended_header_size - 6 != pl_read (plugin, &data, extended_header_size - 6))
366 {
367 state->state = ID3V2_INVALID;
368 break;
369 }
370 }
371 break;
372 case ID3V24_READING_EXTENDED_HEADER:
373 if (4 != pl_read (plugin, &data, 4))
374 {
375 state->state = ID3V2_INVALID;
376 break;
377 }
378 if ((state->ver == 0x04) && (state->extended_header))
379 {
380 uint32_t extended_header_size;
381
382 extended_header_size = (((data[0]) << 24) |
383 ((data[1]) << 16) |
384 ((data[2]) << 8) |
385 ((data[3]) << 0));
386 if (extended_header_size != pl_read (plugin, &data, extended_header_size))
387 {
388 state->state = ID3V2_INVALID;
389 break;
390 }
391 }
392 break;
393 case ID3V2_READING_FRAME_HEADER:
394 if (state->ver == 0x02)
395 {
396 if (6 != pl_read (plugin, &data, 6))
397 {
398 state->state = ID3V2_INVALID;
399 break;
400 }
401 }
402 else if ((state->ver == 0x03) || (state->ver == 0x04))
403 {
404 if (10 != pl_read (plugin, &data, 10))
405 {
406 state->state = ID3V2_INVALID;
407 break;
408 }
409 }
410 if (state->ver == 0x02)
411 {
412 memcpy (state->id, &data[0], 3);
413 state->csize = (data[3] << 16) + (data[4] << 8) + data[5];
414 if (state->csize == 0)
415 {
416 state->state = ID3V2_INVALID;
417 break;
418 }
419 state->frame_flags = 0;
420 }
421 else if ((state->ver == 0x03) || (state->ver == 0x04))
422 {
423 memcpy (state->id, &data[0], 4);
424 if (state->ver == 0x03)
425 state->csize = (data[4] << 24) + (data[5] << 16) + (data[6] << 8) + data[7];
426 else if (state->ver == 0x04)
427 state->csize = ((data[4] & 0x7F) << 21) | ((data[5] & 0x7F) << 14) | ((data[6] & 0x7F) << 07) | ((data[7] & 0x7F) << 00);
428 if (state->csize == 0)
429 {
430 state->state = ID3V2_INVALID;
431 break;
432 }
433 state->frame_flags = (data[8] << 8) + data[9];
434 if (state->ver == 0x03)
435 {
436 if (((state->frame_flags & 0x80) > 0) /* compressed, not yet supported */ ||
437 ((state->frame_flags & 0x40) > 0) /* encrypted, not supported */)
438 {
439 /* Skip to next frame header */
440 if (state->csize != pl_read (plugin, &data, state->csize))
441 state->state = ID3V2_INVALID;
442 break;
443 }
444 }
445 else if (state->ver == 0x04)
446 {
447 if (((state->frame_flags & 0x08) > 0) /* compressed, not yet supported */ ||
448 ((state->frame_flags & 0x04) > 0) /* encrypted, not supported */ ||
449 ((state->frame_flags & 0x02) > 0) /* unsynchronization, not supported */)
450 {
451 /* Skip to next frame header */
452 if (state->csize != pl_read (plugin, &data, state->csize))
453 state->state = ID3V2_INVALID;
454 break;
455 }
456 if ((state->frame_flags & 0x01) > 0)
457 {
458 /* Skip data length indicator */
459 state->csize -= 4;
460 if (4 != pl_read (plugin, &data, 4))
461 {
462 state->state = ID3V2_INVALID;
463 break;
464 }
465 }
466 }
467 }
468
469 state->ti = find_type ((const char *) state->id, (state->ver == 0x02) ? 3 : (((state->ver == 0x03) || (state->ver == 0x04)) ? 4 : 0));
470 if (state->ti == -1)
471 {
472 if (state->csize != pl_read (plugin, &data, state->csize))
473 state->state = ID3V2_INVALID;
474 break;
475 }
476 state->state = ID3V2_READING_FRAME;
477 break;
478 case ID3V2_READING_FRAME:
479 if (0 > (offset = pl_get_pos (plugin)))
480 {
481 state->state = ID3V2_INVALID;
482 break;
483 }
484 word = NULL;
485 if (((state->ver == 0x03) && ((state->frame_flags & 0x20) > 0)) ||
486 ((state->ver == 0x04) && ((state->frame_flags & 0x40) > 0)))
487 {
488 /* "group" identifier, skip a byte */
489 if (1 != pl_read (plugin, &data, 1))
490 {
491 state->state = ID3V2_INVALID;
492 break;
493 }
494 state->csize--;
495 }
496 if (state->csize != pl_read (plugin, &data, state->csize))
497 {
498 state->state = ID3V2_INVALID;
499 break;
500 }
501 switch (tmap[state->ti].fmt)
502 {
503 case T:
504 if (data[0] == 0x00)
505 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[1],
506 state->csize - 1, "ISO-8859-1");
507 else if (data[0] == 0x01)
508 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[1],
509 state->csize - 1, "UCS-2");
510 else if ((state->ver == 0x04) && (data[0] == 0x02))
511 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[1],
512 state->csize - 1, "UTF-16BE");
513 else if ((state->ver == 0x04) && (data[0] == 0x03))
514 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[1],
515 state->csize - 1, "UTF-8");
516 else
517 /* bad encoding byte, try to convert from iso-8859-1 */
518 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[1],
519 state->csize - 1, "ISO-8859-1");
520 break;
521 case U:
522 word = EXTRACTOR_common_convert_to_utf8 ((const char *) data,
523 state->csize, "ISO-8859-1");
524 break;
525 case UL:
526 if (state->csize < 6)
527 {
528 /* malformed */
529 state->state = ID3V2_INVALID;
530 break;
531 }
532 /* find end of description */
533 off = 4;
534 while ((off < state->csize) && (data[off] != '\0'))
535 off++;
536 if ((off >= state->csize) || (data[off] != '\0'))
537 {
538 /* malformed */
539 state->state = ID3V2_INVALID;
540 break;
541 }
542 off++;
543 if (data[0] == 0x00)
544 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[off],
545 state->csize - off, "ISO-8859-1");
546 else if (data[0] == 0x01)
547 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[off],
548 state->csize - off, "UCS-2");
549 else if ((state->ver == 0x04) && (data[0] == 0x02))
550 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[off],
551 state->csize - off, "UTF-16BE");
552 else if ((state->ver == 0x04) && (data[0] == 0x03))
553 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[off],
554 state->csize - off, "UTF-8");
555 else
556 /* bad encoding byte, try to convert from iso-8859-1 */
557 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[off],
558 state->csize - off, "ISO-8859-1");
559 break;
560 case SL:
561 if (state->csize < 7)
562 {
563 /* malformed */
564 state->state = ID3V2_INVALID;
565 break;
566 }
567 if (data[0] == 0x00)
568 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[6],
569 state->csize - 6, "ISO-8859-1");
570 else if (data[0] == 0x01)
571 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[6],
572 state->csize - 6, "UCS-2");
573 else if ((state->ver == 0x04) && (data[0] == 0x02))
574 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[6],
575 state->csize - 6, "UTF-16BE");
576 else if ((state->ver == 0x04) && (data[0] == 0x03))
577 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[6],
578 state->csize - 6, "UTF-8");
579 else
580 /* bad encoding byte, try to convert from iso-8859-1 */
581 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[6],
582 state->csize - 6, "ISO-8859-1");
583 break;
584 case L:
585 if (state->csize < 5)
586 {
587 /* malformed */
588 state->state = ID3V2_INVALID;
589 break;
590 }
591 /* find end of description */
592 off = 4;
593 while ((off < state->csize) && (data[off] != '\0'))
594 off++;
595 if ((off >= state->csize) || (data[off] != '\0'))
596 {
597 /* malformed */
598 state->state = ID3V2_INVALID;
599 break;
600 }
601 off++;
602
603 if (data[0] == 0x00)
604 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[off],
605 state->csize - off, "ISO-8859-1");
606 else if (data[0] == 0x01)
607 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[off],
608 state->csize - off, "UCS-2");
609 else if ((state->ver == 0x04) && (data[0] == 0x02))
610 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[off],
611 state->csize - off, "UTF-1offBE");
612 else if ((state->ver == 0x04) && (data[0] == 0x03))
613 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[off],
614 state->csize - off, "UTF-8");
615 else
616 /* bad encoding byte, try to convert from iso-8859-1 */
617 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[off],
618 state->csize - off, "ISO-8859-1");
619 break;
620 case I:
621 if ( ( (state->ver == 0x02) &&
622 (state->csize < 7) ) ||
623 ( ( (state->ver == 0x03) ||
624 (state->ver == 0x04)) && (state->csize < 5)) )
625 {
626 /* malformed */
627 state->state = ID3V2_INVALID;
628 break;
629 }
630 if (state->mime != NULL)
631 free (state->mime);
632 state->mime = NULL;
633 if (state->ver == 0x02)
634 {
635 off = 5;
636 picture_type = data[4];
637 }
638 else if ((state->ver == 0x03) || (state->ver == 0x04))
639 {
640 off = 1;
641 while ((off < state->csize) && (data[off] != '\0'))
642 off++;
643 if ((off >= state->csize) || (data[off] != '\0'))
644 {
645 /* malformed */
646 state->state = ID3V2_INVALID;
647 break;
648 }
649 state->mime = malloc (off);
650 memcpy (state->mime, &data[1], off - 1);
651 state->mime[off - 1] = '\0';
652 off += 1;
653 picture_type = data[off];
654 off += 1;
655 /* find end of mime type*/
656 while ((off < state->csize) && (data[off] != '\0'))
657 off++;
658 if ((off >= state->csize) || (data[off] != '\0'))
659 {
660 free (state->mime);
661 state->mime = NULL;
662 /* malformed */
663 state->state = ID3V2_INVALID;
664 break;
665 }
666 off++;
667 }
668 switch (picture_type)
669 {
670 case 0x03:
671 case 0x04:
672 type = EXTRACTOR_METATYPE_COVER_PICTURE;
673 break;
674 case 0x07:
675 case 0x08:
676 case 0x09:
677 case 0x0A:
678 case 0x0B:
679 case 0x0C:
680 type = EXTRACTOR_METATYPE_CONTRIBUTOR_PICTURE;
681 break;
682 case 0x0D:
683 case 0x0E:
684 case 0x0F:
685 type = EXTRACTOR_METATYPE_EVENT_PICTURE;
686 break;
687 case 0x14:
688 type = EXTRACTOR_METATYPE_LOGO;
689 type = EXTRACTOR_METATYPE_LOGO;
690 break;
691 default:
692 type = EXTRACTOR_METATYPE_PICTURE;
693 break;
694 }
695 if (state->ver == 0x02)
696 {
697 if (0 == strncasecmp ("PNG", (const char *) &data[1], 3))
698 state->mime = strdup ("image/png");
699 else if (0 == strncasecmp ("JPG", (const char *) &data[1], 3))
700 state->mime = strdup ("image/jpeg");
701 else
702 state->mime = NULL;
703 }
704 else if (((state->ver == 0x03) || (state->ver == 0x04)) && (strchr (state->mime, '/') == NULL))
705 {
706 size_t mime_len = strlen (state->mime);
707 char *type_mime = malloc (mime_len + 6 + 1);
708 snprintf (type_mime, mime_len + 6 + 1, "image/%s", state->mime);
709 free (state->mime);
710 state->mime = type_mime;
711 }
712 if ((state->mime != NULL) && (0 == strcmp (state->mime, "-->")))
713 {
714 /* not supported */
715 free (state->mime);
716 state->mime = NULL;
717 }
718 else
719 {
720 if (0 != proc (proc_cls, "id3v2", type, EXTRACTOR_METAFORMAT_BINARY, state->mime, (const char*) &data[off], state->csize - off))
721 {
722 if (state->mime != NULL)
723 free (state->mime);
724 state->mime = NULL;
725 return 1;
726 }
727 if (state->mime != NULL)
728 free (state->mime);
729 state->mime = NULL;
730 }
731 word = NULL;
732 break;
733 default:
734 return 1;
735 }
736 if ((word != NULL) && (strlen (word) > 0))
737 {
738 if (0 != proc (proc_cls, "id3v2", tmap[state->ti].type, EXTRACTOR_METAFORMAT_UTF8, "text/plain", word, strlen (word) + 1))
739 {
740 free (word);
741 return 1;
742 }
743 }
744 if (word != NULL)
745 free (word);
746 state->state = ID3V2_READING_FRAME_HEADER;
747 break;
748 }
749 }
750 return 1;
751}
752
753/* end of id3v2_extractor.c */