diff options
Diffstat (limited to 'src/plugins/old/id3v2_extractor.c')
-rw-r--r-- | src/plugins/old/id3v2_extractor.c | 753 |
1 files changed, 0 insertions, 753 deletions
diff --git a/src/plugins/old/id3v2_extractor.c b/src/plugins/old/id3v2_extractor.c deleted file mode 100644 index 0991a42..0000000 --- a/src/plugins/old/id3v2_extractor.c +++ /dev/null | |||
@@ -1,753 +0,0 @@ | |||
1 | /* | ||
2 | This file is part of libextractor. | ||
3 | (C) 2002, 2003, 2004, 2006, 2009 Vidyut Samanta and Christian Grothoff | ||
4 | |||
5 | libextractor is free software; you can redistribute it and/or modify | ||
6 | it under the terms of the GNU General Public License as published | ||
7 | by the Free Software Foundation; either version 2, or (at your | ||
8 | option) any later version. | ||
9 | |||
10 | libextractor is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU General Public License | ||
16 | along with libextractor; see the file COPYING. If not, write to the | ||
17 | Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
18 | Boston, MA 02111-1307, USA. | ||
19 | |||
20 | */ | ||
21 | |||
22 | #include "platform.h" | ||
23 | #include "extractor.h" | ||
24 | #ifndef MINGW | ||
25 | #include <sys/mman.h> | ||
26 | #endif | ||
27 | #include "convert.h" | ||
28 | |||
29 | #include "extractor_plugins.h" | ||
30 | |||
31 | #define DEBUG_EXTRACT_ID3v2 0 | ||
32 | |||
33 | enum Id3v2Fmt | ||
34 | { | ||
35 | T, /* simple, 0-terminated string, prefixed by encoding */ | ||
36 | U, /* 0-terminated ASCII string, no encoding */ | ||
37 | UL, /* unsync'ed lyrics */ | ||
38 | SL, /* sync'ed lyrics */ | ||
39 | L, /* string with language prefix */ | ||
40 | I /* image */ | ||
41 | }; | ||
42 | |||
43 | typedef struct | ||
44 | { | ||
45 | const char *text; | ||
46 | enum EXTRACTOR_MetaType type; | ||
47 | enum Id3v2Fmt fmt; | ||
48 | } Matches; | ||
49 | |||
50 | static Matches tmap[] = { | ||
51 | /* skipping UFI */ | ||
52 | {"TT1 ", EXTRACTOR_METATYPE_SECTION, T}, | ||
53 | {"TT2 ", EXTRACTOR_METATYPE_TITLE, T}, | ||
54 | {"TT3 ", EXTRACTOR_METATYPE_SONG_VERSION, T}, | ||
55 | {"TP1 ", EXTRACTOR_METATYPE_ARTIST, T}, | ||
56 | {"TP2 ", EXTRACTOR_METATYPE_PERFORMER, T}, | ||
57 | {"TP3 ", EXTRACTOR_METATYPE_CONDUCTOR, T}, | ||
58 | {"TP4 ", EXTRACTOR_METATYPE_INTERPRETATION, T}, | ||
59 | {"TCM ", EXTRACTOR_METATYPE_COMPOSER, T}, | ||
60 | {"TXT ", EXTRACTOR_METATYPE_WRITER, T}, | ||
61 | {"TLA ", EXTRACTOR_METATYPE_LANGUAGE, T}, | ||
62 | {"TCO ", EXTRACTOR_METATYPE_GENRE, T}, | ||
63 | {"TAL ", EXTRACTOR_METATYPE_ALBUM, T}, | ||
64 | {"TPA ", EXTRACTOR_METATYPE_DISC_NUMBER, T}, | ||
65 | {"TRK ", EXTRACTOR_METATYPE_TRACK_NUMBER, T}, | ||
66 | {"TRC ", EXTRACTOR_METATYPE_ISRC, T}, | ||
67 | {"TYE ", EXTRACTOR_METATYPE_PUBLICATION_YEAR, T}, | ||
68 | /* | ||
69 | FIXME: these two and TYE should be combined into | ||
70 | the actual publication date (if TRD is missing) | ||
71 | {"TDA ", EXTRACTOR_METATYPE_PUBLICATION_DATE}, | ||
72 | {"TIM ", EXTRACTOR_METATYPE_PUBLICATION_DATE}, | ||
73 | */ | ||
74 | {"TRD ", EXTRACTOR_METATYPE_CREATION_TIME, T}, | ||
75 | {"TMT ", EXTRACTOR_METATYPE_SOURCE, T}, | ||
76 | {"TFT ", EXTRACTOR_METATYPE_FORMAT_VERSION, T}, | ||
77 | {"TBP ", EXTRACTOR_METATYPE_BEATS_PER_MINUTE, T}, | ||
78 | {"TCR ", EXTRACTOR_METATYPE_COPYRIGHT, T}, | ||
79 | {"TPB ", EXTRACTOR_METATYPE_PUBLISHER, T}, | ||
80 | {"TEN ", EXTRACTOR_METATYPE_ENCODED_BY, T}, | ||
81 | {"TSS ", EXTRACTOR_METATYPE_PRODUCED_BY_SOFTWARE, T}, | ||
82 | {"TOF ", EXTRACTOR_METATYPE_FILENAME, T}, | ||
83 | {"TLE ", EXTRACTOR_METATYPE_DURATION, T}, /* FIXME: should append 'ms' as unit */ | ||
84 | {"TSI ", EXTRACTOR_METATYPE_EMBEDDED_FILE_SIZE, T}, | ||
85 | /* skipping TDY, TKE */ | ||
86 | {"TOT ", EXTRACTOR_METATYPE_ORIGINAL_TITLE, T}, | ||
87 | {"TOA ", EXTRACTOR_METATYPE_ORIGINAL_ARTIST, T}, | ||
88 | {"TOL ", EXTRACTOR_METATYPE_ORIGINAL_WRITER, T}, | ||
89 | {"TOR ", EXTRACTOR_METATYPE_ORIGINAL_RELEASE_YEAR, T}, | ||
90 | /* skipping TXX */ | ||
91 | |||
92 | {"WAF ", EXTRACTOR_METATYPE_URL, U}, | ||
93 | {"WAR ", EXTRACTOR_METATYPE_URL, U}, | ||
94 | {"WAS ", EXTRACTOR_METATYPE_URL, U}, | ||
95 | {"WCM ", EXTRACTOR_METATYPE_URL, U}, | ||
96 | {"WCP ", EXTRACTOR_METATYPE_RIGHTS, U}, | ||
97 | {"WCB ", EXTRACTOR_METATYPE_URL, U}, | ||
98 | /* skipping WXX */ | ||
99 | {"IPL ", EXTRACTOR_METATYPE_CONTRIBUTOR_NAME, T}, | ||
100 | /* skipping MCI */ | ||
101 | /* skipping ETC */ | ||
102 | /* skipping MLL */ | ||
103 | /* skipping STC */ | ||
104 | {"ULT ", EXTRACTOR_METATYPE_LYRICS, UL}, | ||
105 | {"SLT ", EXTRACTOR_METATYPE_LYRICS, SL}, | ||
106 | {"COM ", EXTRACTOR_METATYPE_COMMENT, L}, | ||
107 | /* skipping RVA */ | ||
108 | /* skipping EQU */ | ||
109 | /* skipping REV */ | ||
110 | {"PIC ", EXTRACTOR_METATYPE_PICTURE, I}, | ||
111 | /* skipping GEN */ | ||
112 | /* {"CNT ", EXTRACTOR_METATYPE_PLAY_COUNTER, XXX}, */ | ||
113 | /* {"POP ", EXTRACTOR_METATYPE_POPULARITY_METER, XXX}, */ | ||
114 | /* skipping BUF */ | ||
115 | /* skipping CRM */ | ||
116 | /* skipping CRA */ | ||
117 | /* {"LNK ", EXTRACTOR_METATYPE_URL, XXX}, */ | ||
118 | |||
119 | |||
120 | {"TALB", EXTRACTOR_METATYPE_ALBUM, T}, | ||
121 | {"TBPM", EXTRACTOR_METATYPE_BEATS_PER_MINUTE, T}, | ||
122 | {"TCOM", EXTRACTOR_METATYPE_COMPOSER, T}, | ||
123 | {"TCON", EXTRACTOR_METATYPE_SONG_VERSION, T}, | ||
124 | {"TCOP", EXTRACTOR_METATYPE_COPYRIGHT, T}, | ||
125 | {"TDAT", EXTRACTOR_METATYPE_CREATION_DATE, T}, /* idv23 only */ | ||
126 | /* TDLY */ | ||
127 | {"TENC", EXTRACTOR_METATYPE_ENCODED_BY, T}, | ||
128 | {"TEXT", EXTRACTOR_METATYPE_WRITER, T}, | ||
129 | {"TFLT", EXTRACTOR_METATYPE_FORMAT_VERSION, T}, | ||
130 | /* TIME, idv23 only */ | ||
131 | {"TIT1", EXTRACTOR_METATYPE_SECTION, T}, | ||
132 | {"TIT2", EXTRACTOR_METATYPE_TITLE, T}, | ||
133 | {"TIT3", EXTRACTOR_METATYPE_SONG_VERSION, T}, | ||
134 | /* TKEY */ | ||
135 | {"TLAN", EXTRACTOR_METATYPE_LANGUAGE, T}, | ||
136 | {"TLEN", EXTRACTOR_METATYPE_DURATION, T}, /* FIXME: should append 'ms' as unit */ | ||
137 | {"TMED", EXTRACTOR_METATYPE_SOURCE, T}, | ||
138 | {"TOAL", EXTRACTOR_METATYPE_ORIGINAL_TITLE, T}, | ||
139 | {"TOFN", EXTRACTOR_METATYPE_ORIGINAL_ARTIST, T}, | ||
140 | {"TOLY", EXTRACTOR_METATYPE_ORIGINAL_WRITER, T}, | ||
141 | {"TOPE", EXTRACTOR_METATYPE_ORIGINAL_PERFORMER, T}, | ||
142 | {"TORY", EXTRACTOR_METATYPE_ORIGINAL_RELEASE_YEAR, T}, /* idv23 only */ | ||
143 | {"TOWN", EXTRACTOR_METATYPE_LICENSEE, T}, | ||
144 | {"TPE1", EXTRACTOR_METATYPE_ARTIST, T}, | ||
145 | {"TPE2", EXTRACTOR_METATYPE_PERFORMER, T}, | ||
146 | {"TPE3", EXTRACTOR_METATYPE_CONDUCTOR, T}, | ||
147 | {"TPE4", EXTRACTOR_METATYPE_INTERPRETATION, T}, | ||
148 | {"TPOS", EXTRACTOR_METATYPE_DISC_NUMBER, T}, | ||
149 | {"TPUB", EXTRACTOR_METATYPE_PUBLISHER, T}, | ||
150 | {"TRCK", EXTRACTOR_METATYPE_TRACK_NUMBER, T}, | ||
151 | /* TRDA, idv23 only */ | ||
152 | {"TRSN", EXTRACTOR_METATYPE_NETWORK_NAME, T}, | ||
153 | /* TRSO */ | ||
154 | {"TSIZ", EXTRACTOR_METATYPE_EMBEDDED_FILE_SIZE, T}, /* idv23 only */ | ||
155 | {"TSRC", EXTRACTOR_METATYPE_ISRC, T}, | ||
156 | /* TSSE */ | ||
157 | {"TYER", EXTRACTOR_METATYPE_PUBLICATION_YEAR, T}, /* idv23 only */ | ||
158 | {"WCOM", EXTRACTOR_METATYPE_URL, U}, | ||
159 | {"WCOP", EXTRACTOR_METATYPE_URL, U}, | ||
160 | {"WOAF", EXTRACTOR_METATYPE_URL, U}, | ||
161 | {"WOAS", EXTRACTOR_METATYPE_URL, U}, | ||
162 | {"WORS", EXTRACTOR_METATYPE_URL, U}, | ||
163 | {"WPAY", EXTRACTOR_METATYPE_URL, U}, | ||
164 | {"WPUB", EXTRACTOR_METATYPE_URL, U}, | ||
165 | {"WXXX", EXTRACTOR_METATYPE_URL, T}, | ||
166 | {"IPLS", EXTRACTOR_METATYPE_CONTRIBUTOR_NAME, T}, /* idv23 only */ | ||
167 | /* ... */ | ||
168 | {"USLT", EXTRACTOR_METATYPE_LYRICS, UL }, | ||
169 | {"SYLT", EXTRACTOR_METATYPE_LYRICS, SL }, | ||
170 | {"COMM", EXTRACTOR_METATYPE_COMMENT, L}, | ||
171 | /* ... */ | ||
172 | {"APIC", EXTRACTOR_METATYPE_PICTURE, I}, | ||
173 | /* ... */ | ||
174 | {"LINK", EXTRACTOR_METATYPE_URL, U}, | ||
175 | /* ... */ | ||
176 | {"USER", EXTRACTOR_METATYPE_LICENSE, T}, | ||
177 | /* ... */ | ||
178 | |||
179 | /* new frames in id3v24 */ | ||
180 | /* ASPI, EQU2, RVA2, SEEK, SIGN, TDEN */ | ||
181 | {"TDOR", EXTRACTOR_METATYPE_PUBLICATION_DATE, T}, | ||
182 | /* TDRC, TDRL, TDTG */ | ||
183 | {"TIPL", EXTRACTOR_METATYPE_CONTRIBUTOR_NAME, T}, | ||
184 | {"TMCL", EXTRACTOR_METATYPE_MUSICIAN_CREDITS_LIST, T}, | ||
185 | {"TMOO", EXTRACTOR_METATYPE_MOOD, T}, | ||
186 | {"TPRO", EXTRACTOR_METATYPE_COPYRIGHT, T}, | ||
187 | {"TSOA", EXTRACTOR_METATYPE_ALBUM, T}, | ||
188 | {"TSOP", EXTRACTOR_METATYPE_PERFORMER, T}, | ||
189 | {"TSOT", EXTRACTOR_METATYPE_TITLE, T}, | ||
190 | {"TSST", EXTRACTOR_METATYPE_SUBTITLE, T}, | ||
191 | |||
192 | {NULL, 0, T}, | ||
193 | }; | ||
194 | |||
195 | struct id3v2_state | ||
196 | { | ||
197 | int state; | ||
198 | unsigned int tsize; | ||
199 | size_t csize; | ||
200 | char id[4]; | ||
201 | int32_t ti; | ||
202 | char ver; | ||
203 | char extended_header; | ||
204 | uint16_t frame_flags; | ||
205 | char *mime; | ||
206 | }; | ||
207 | |||
208 | enum ID3v2State | ||
209 | { | ||
210 | ID3V2_INVALID = -1, | ||
211 | ID3V2_READING_HEADER = 0, | ||
212 | ID3V2_READING_FRAME_HEADER, | ||
213 | ID3V23_READING_EXTENDED_HEADER, | ||
214 | ID3V24_READING_EXTENDED_HEADER, | ||
215 | ID3V2_READING_FRAME | ||
216 | }; | ||
217 | |||
218 | struct id3v2_state * | ||
219 | EXTRACTOR_id3v2_init_state_method () | ||
220 | { | ||
221 | struct id3v2_state *state; | ||
222 | state = malloc (sizeof (struct id3v2_state)); | ||
223 | if (state == NULL) | ||
224 | return NULL; | ||
225 | memset (state, 0, sizeof (struct id3v2_state)); | ||
226 | state->state = ID3V2_READING_HEADER; | ||
227 | state->ti = -1; | ||
228 | state->mime = NULL; | ||
229 | return state; | ||
230 | } | ||
231 | |||
232 | static int | ||
233 | EXTRACTOR_id3v2_discard_state_method (struct id3v2_state *state) | ||
234 | { | ||
235 | if (state != NULL) | ||
236 | { | ||
237 | if (state->mime != NULL) | ||
238 | free (state->mime); | ||
239 | free (state); | ||
240 | } | ||
241 | return 1; | ||
242 | } | ||
243 | |||
244 | static int | ||
245 | find_type (const char *id, size_t len) | ||
246 | { | ||
247 | int i; | ||
248 | for (i = 0; tmap[i].text != NULL; i++) | ||
249 | if (0 == strncmp (tmap[i].text, id, len)) | ||
250 | return i; | ||
251 | return -1; | ||
252 | } | ||
253 | |||
254 | int | ||
255 | EXTRACTOR_id3v2_extract_method (struct EXTRACTOR_PluginList *plugin, | ||
256 | EXTRACTOR_MetaDataProcessor proc, void *proc_cls) | ||
257 | { | ||
258 | int64_t offset = 0; | ||
259 | struct id3v2_state *state; | ||
260 | unsigned char *data; | ||
261 | char *word = NULL; | ||
262 | unsigned int off; | ||
263 | enum EXTRACTOR_MetaType type; | ||
264 | unsigned char picture_type; | ||
265 | |||
266 | if (plugin == NULL) | ||
267 | return 1; | ||
268 | |||
269 | state = EXTRACTOR_id3v2_init_state_method (); | ||
270 | if (state == NULL) | ||
271 | return 1; | ||
272 | |||
273 | while (1) | ||
274 | { | ||
275 | switch (state->state) | ||
276 | { | ||
277 | case ID3V2_INVALID: | ||
278 | plugin->seek_request = -1; | ||
279 | return EXTRACTOR_id3v2_discard_state_method (state); | ||
280 | case ID3V2_READING_HEADER: | ||
281 | /* TODO: support id3v24 tags at the end of file. Here's a quote from id3 faq: | ||
282 | * Q: Where is an ID3v2 tag located in an MP3 file? | ||
283 | * A: It is most likely located at the beginning of the file. Look for the | ||
284 | * marker "ID3" in the first 3 bytes of the file. If it's not there, it | ||
285 | * could be at the end of the file (if the tag is ID3v2.4). Look for the | ||
286 | * marker "3DI" 10 bytes from the end of the file, or 10 bytes before the | ||
287 | * beginning of an ID3v1 tag. Finally it is possible to embed ID3v2 tags | ||
288 | * in the actual MPEG stream, on an MPEG frame boundry. Almost nobody does | ||
289 | * this. | ||
290 | * Parsing of such tags will not be completely correct, because we can't | ||
291 | * seek backwards. (OK, now we CAN seek backwards, but we still need to mind the | ||
292 | * chunk size). We will have to seek to file_size - chunk_size instead | ||
293 | * (by the way, chunk size is theoretically unknown, LE is free to use any chunk | ||
294 | * size, even though plugins often make assumptions about chunk size being large | ||
295 | * enough to make one atomic read without seeking, if offset == 0) and search | ||
296 | * for id3v1 at -128 offset, then look if there's a 3DI marker 10 bytes before | ||
297 | * it (or 10 bytes before the end of file, if id3v1 is not there; not sure | ||
298 | * about APETAGs; we should probably just scan byte-by-byte from the end of file, | ||
299 | * until we hit 3DI, or reach the offset == 0), and use it set offset to the | ||
300 | * start of ID3v24 header, adjust the following file_position check and data | ||
301 | * indices (use offset), and otherwise proceed as normal (maybe file size checks | ||
302 | * along the way will have to be adjusted by -1, or made ">" instead of ">="; | ||
303 | * these problems do not arise for tags at the beginning of the file, since | ||
304 | * audio itself is usually at least 1-byte long; when the tag is at the end of | ||
305 | * file, these checks will have to be 100% correct). | ||
306 | * If there are two tags (at the beginning and at the end of the file), | ||
307 | * a SEEK in the one at the beginning of the file can be used to seek to the | ||
308 | * one at the end. | ||
309 | */ | ||
310 | /* TODO: merge id3v1 and id3v2 parsers. There's an "update" flag in id3v2 that | ||
311 | * tells the parser to augument id3v1 values with the values from id3v2 (if this | ||
312 | * flag is not set, id3v2 parser must discard id3v1 data). | ||
313 | * At the moment id3v1 and id3v2 are parsed separately, and update flag is ignored. | ||
314 | */ | ||
315 | if (10 != pl_read (plugin, &data, 10)) | ||
316 | { | ||
317 | state->state = ID3V2_INVALID; | ||
318 | break; | ||
319 | } | ||
320 | if ((data[0] != 0x49) || (data[1] != 0x44) || (data[2] != 0x33) || ((data[3] != 0x02) && (data[3] != 0x03) && (data[3] != 0x04))/* || (data[4] != 0x00) minor verisons are backward-compatible*/) | ||
321 | { | ||
322 | state->state = ID3V2_INVALID; | ||
323 | break; | ||
324 | } | ||
325 | state->ver = data[3]; | ||
326 | if (state->ver == 0x02) | ||
327 | { | ||
328 | state->extended_header = 0; | ||
329 | } | ||
330 | else if ((state->ver == 0x03) || (state->ver == 0x04)) | ||
331 | { | ||
332 | if ((data[5] & 0x80) > 0) | ||
333 | { | ||
334 | /* unsync is not supported in id3v23 or id3v24*/ | ||
335 | state->state = ID3V2_INVALID; | ||
336 | break; | ||
337 | } | ||
338 | state->extended_header = (data[5] & 0x40) > 0; | ||
339 | if ((data[5] & 0x20) > 0) | ||
340 | { | ||
341 | /* experimental is not supported in id3v23 or id3v24*/ | ||
342 | state->state = ID3V2_INVALID; | ||
343 | break; | ||
344 | } | ||
345 | } | ||
346 | state->tsize = (((data[6] & 0x7F) << 21) | ((data[7] & 0x7F) << 14) | ((data[8] & 0x7F) << 07) | ((data[9] & 0x7F) << 00)); | ||
347 | if (state->ver == 0x03 && state->extended_header) | ||
348 | state->state = ID3V23_READING_EXTENDED_HEADER; | ||
349 | else if (state->ver == 0x04 && state->extended_header) | ||
350 | state->state = ID3V24_READING_EXTENDED_HEADER; | ||
351 | else | ||
352 | state->state = ID3V2_READING_FRAME_HEADER; | ||
353 | break; | ||
354 | case ID3V23_READING_EXTENDED_HEADER: | ||
355 | if (10 != pl_read (plugin, &data, 10)) | ||
356 | { | ||
357 | state->state = ID3V2_INVALID; | ||
358 | break; | ||
359 | } | ||
360 | if (state->ver == 0x03 && state->extended_header) | ||
361 | { | ||
362 | uint32_t extended_header_size; | ||
363 | extended_header_size = (((data[0]) << 24) | ((data[1]) << 16) | ((data[2]) << 8) | ((data[3]) << 0)); | ||
364 | // padding = (((data[6]) << 24) | ((data[7]) << 16) | ((data[8]) << 8) | ((data[9]) << 0)); | ||
365 | if (extended_header_size - 6 != pl_read (plugin, &data, extended_header_size - 6)) | ||
366 | { | ||
367 | state->state = ID3V2_INVALID; | ||
368 | break; | ||
369 | } | ||
370 | } | ||
371 | break; | ||
372 | case ID3V24_READING_EXTENDED_HEADER: | ||
373 | if (4 != pl_read (plugin, &data, 4)) | ||
374 | { | ||
375 | state->state = ID3V2_INVALID; | ||
376 | break; | ||
377 | } | ||
378 | if ((state->ver == 0x04) && (state->extended_header)) | ||
379 | { | ||
380 | uint32_t extended_header_size; | ||
381 | |||
382 | extended_header_size = (((data[0]) << 24) | | ||
383 | ((data[1]) << 16) | | ||
384 | ((data[2]) << 8) | | ||
385 | ((data[3]) << 0)); | ||
386 | if (extended_header_size != pl_read (plugin, &data, extended_header_size)) | ||
387 | { | ||
388 | state->state = ID3V2_INVALID; | ||
389 | break; | ||
390 | } | ||
391 | } | ||
392 | break; | ||
393 | case ID3V2_READING_FRAME_HEADER: | ||
394 | if (state->ver == 0x02) | ||
395 | { | ||
396 | if (6 != pl_read (plugin, &data, 6)) | ||
397 | { | ||
398 | state->state = ID3V2_INVALID; | ||
399 | break; | ||
400 | } | ||
401 | } | ||
402 | else if ((state->ver == 0x03) || (state->ver == 0x04)) | ||
403 | { | ||
404 | if (10 != pl_read (plugin, &data, 10)) | ||
405 | { | ||
406 | state->state = ID3V2_INVALID; | ||
407 | break; | ||
408 | } | ||
409 | } | ||
410 | if (state->ver == 0x02) | ||
411 | { | ||
412 | memcpy (state->id, &data[0], 3); | ||
413 | state->csize = (data[3] << 16) + (data[4] << 8) + data[5]; | ||
414 | if (state->csize == 0) | ||
415 | { | ||
416 | state->state = ID3V2_INVALID; | ||
417 | break; | ||
418 | } | ||
419 | state->frame_flags = 0; | ||
420 | } | ||
421 | else if ((state->ver == 0x03) || (state->ver == 0x04)) | ||
422 | { | ||
423 | memcpy (state->id, &data[0], 4); | ||
424 | if (state->ver == 0x03) | ||
425 | state->csize = (data[4] << 24) + (data[5] << 16) + (data[6] << 8) + data[7]; | ||
426 | else if (state->ver == 0x04) | ||
427 | state->csize = ((data[4] & 0x7F) << 21) | ((data[5] & 0x7F) << 14) | ((data[6] & 0x7F) << 07) | ((data[7] & 0x7F) << 00); | ||
428 | if (state->csize == 0) | ||
429 | { | ||
430 | state->state = ID3V2_INVALID; | ||
431 | break; | ||
432 | } | ||
433 | state->frame_flags = (data[8] << 8) + data[9]; | ||
434 | if (state->ver == 0x03) | ||
435 | { | ||
436 | if (((state->frame_flags & 0x80) > 0) /* compressed, not yet supported */ || | ||
437 | ((state->frame_flags & 0x40) > 0) /* encrypted, not supported */) | ||
438 | { | ||
439 | /* Skip to next frame header */ | ||
440 | if (state->csize != pl_read (plugin, &data, state->csize)) | ||
441 | state->state = ID3V2_INVALID; | ||
442 | break; | ||
443 | } | ||
444 | } | ||
445 | else if (state->ver == 0x04) | ||
446 | { | ||
447 | if (((state->frame_flags & 0x08) > 0) /* compressed, not yet supported */ || | ||
448 | ((state->frame_flags & 0x04) > 0) /* encrypted, not supported */ || | ||
449 | ((state->frame_flags & 0x02) > 0) /* unsynchronization, not supported */) | ||
450 | { | ||
451 | /* Skip to next frame header */ | ||
452 | if (state->csize != pl_read (plugin, &data, state->csize)) | ||
453 | state->state = ID3V2_INVALID; | ||
454 | break; | ||
455 | } | ||
456 | if ((state->frame_flags & 0x01) > 0) | ||
457 | { | ||
458 | /* Skip data length indicator */ | ||
459 | state->csize -= 4; | ||
460 | if (4 != pl_read (plugin, &data, 4)) | ||
461 | { | ||
462 | state->state = ID3V2_INVALID; | ||
463 | break; | ||
464 | } | ||
465 | } | ||
466 | } | ||
467 | } | ||
468 | |||
469 | state->ti = find_type ((const char *) state->id, (state->ver == 0x02) ? 3 : (((state->ver == 0x03) || (state->ver == 0x04)) ? 4 : 0)); | ||
470 | if (state->ti == -1) | ||
471 | { | ||
472 | if (state->csize != pl_read (plugin, &data, state->csize)) | ||
473 | state->state = ID3V2_INVALID; | ||
474 | break; | ||
475 | } | ||
476 | state->state = ID3V2_READING_FRAME; | ||
477 | break; | ||
478 | case ID3V2_READING_FRAME: | ||
479 | if (0 > (offset = pl_get_pos (plugin))) | ||
480 | { | ||
481 | state->state = ID3V2_INVALID; | ||
482 | break; | ||
483 | } | ||
484 | word = NULL; | ||
485 | if (((state->ver == 0x03) && ((state->frame_flags & 0x20) > 0)) || | ||
486 | ((state->ver == 0x04) && ((state->frame_flags & 0x40) > 0))) | ||
487 | { | ||
488 | /* "group" identifier, skip a byte */ | ||
489 | if (1 != pl_read (plugin, &data, 1)) | ||
490 | { | ||
491 | state->state = ID3V2_INVALID; | ||
492 | break; | ||
493 | } | ||
494 | state->csize--; | ||
495 | } | ||
496 | if (state->csize != pl_read (plugin, &data, state->csize)) | ||
497 | { | ||
498 | state->state = ID3V2_INVALID; | ||
499 | break; | ||
500 | } | ||
501 | switch (tmap[state->ti].fmt) | ||
502 | { | ||
503 | case T: | ||
504 | if (data[0] == 0x00) | ||
505 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[1], | ||
506 | state->csize - 1, "ISO-8859-1"); | ||
507 | else if (data[0] == 0x01) | ||
508 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[1], | ||
509 | state->csize - 1, "UCS-2"); | ||
510 | else if ((state->ver == 0x04) && (data[0] == 0x02)) | ||
511 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[1], | ||
512 | state->csize - 1, "UTF-16BE"); | ||
513 | else if ((state->ver == 0x04) && (data[0] == 0x03)) | ||
514 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[1], | ||
515 | state->csize - 1, "UTF-8"); | ||
516 | else | ||
517 | /* bad encoding byte, try to convert from iso-8859-1 */ | ||
518 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[1], | ||
519 | state->csize - 1, "ISO-8859-1"); | ||
520 | break; | ||
521 | case U: | ||
522 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) data, | ||
523 | state->csize, "ISO-8859-1"); | ||
524 | break; | ||
525 | case UL: | ||
526 | if (state->csize < 6) | ||
527 | { | ||
528 | /* malformed */ | ||
529 | state->state = ID3V2_INVALID; | ||
530 | break; | ||
531 | } | ||
532 | /* find end of description */ | ||
533 | off = 4; | ||
534 | while ((off < state->csize) && (data[off] != '\0')) | ||
535 | off++; | ||
536 | if ((off >= state->csize) || (data[off] != '\0')) | ||
537 | { | ||
538 | /* malformed */ | ||
539 | state->state = ID3V2_INVALID; | ||
540 | break; | ||
541 | } | ||
542 | off++; | ||
543 | if (data[0] == 0x00) | ||
544 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[off], | ||
545 | state->csize - off, "ISO-8859-1"); | ||
546 | else if (data[0] == 0x01) | ||
547 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[off], | ||
548 | state->csize - off, "UCS-2"); | ||
549 | else if ((state->ver == 0x04) && (data[0] == 0x02)) | ||
550 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[off], | ||
551 | state->csize - off, "UTF-16BE"); | ||
552 | else if ((state->ver == 0x04) && (data[0] == 0x03)) | ||
553 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[off], | ||
554 | state->csize - off, "UTF-8"); | ||
555 | else | ||
556 | /* bad encoding byte, try to convert from iso-8859-1 */ | ||
557 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[off], | ||
558 | state->csize - off, "ISO-8859-1"); | ||
559 | break; | ||
560 | case SL: | ||
561 | if (state->csize < 7) | ||
562 | { | ||
563 | /* malformed */ | ||
564 | state->state = ID3V2_INVALID; | ||
565 | break; | ||
566 | } | ||
567 | if (data[0] == 0x00) | ||
568 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[6], | ||
569 | state->csize - 6, "ISO-8859-1"); | ||
570 | else if (data[0] == 0x01) | ||
571 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[6], | ||
572 | state->csize - 6, "UCS-2"); | ||
573 | else if ((state->ver == 0x04) && (data[0] == 0x02)) | ||
574 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[6], | ||
575 | state->csize - 6, "UTF-16BE"); | ||
576 | else if ((state->ver == 0x04) && (data[0] == 0x03)) | ||
577 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[6], | ||
578 | state->csize - 6, "UTF-8"); | ||
579 | else | ||
580 | /* bad encoding byte, try to convert from iso-8859-1 */ | ||
581 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[6], | ||
582 | state->csize - 6, "ISO-8859-1"); | ||
583 | break; | ||
584 | case L: | ||
585 | if (state->csize < 5) | ||
586 | { | ||
587 | /* malformed */ | ||
588 | state->state = ID3V2_INVALID; | ||
589 | break; | ||
590 | } | ||
591 | /* find end of description */ | ||
592 | off = 4; | ||
593 | while ((off < state->csize) && (data[off] != '\0')) | ||
594 | off++; | ||
595 | if ((off >= state->csize) || (data[off] != '\0')) | ||
596 | { | ||
597 | /* malformed */ | ||
598 | state->state = ID3V2_INVALID; | ||
599 | break; | ||
600 | } | ||
601 | off++; | ||
602 | |||
603 | if (data[0] == 0x00) | ||
604 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[off], | ||
605 | state->csize - off, "ISO-8859-1"); | ||
606 | else if (data[0] == 0x01) | ||
607 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[off], | ||
608 | state->csize - off, "UCS-2"); | ||
609 | else if ((state->ver == 0x04) && (data[0] == 0x02)) | ||
610 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[off], | ||
611 | state->csize - off, "UTF-1offBE"); | ||
612 | else if ((state->ver == 0x04) && (data[0] == 0x03)) | ||
613 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[off], | ||
614 | state->csize - off, "UTF-8"); | ||
615 | else | ||
616 | /* bad encoding byte, try to convert from iso-8859-1 */ | ||
617 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[off], | ||
618 | state->csize - off, "ISO-8859-1"); | ||
619 | break; | ||
620 | case I: | ||
621 | if ( ( (state->ver == 0x02) && | ||
622 | (state->csize < 7) ) || | ||
623 | ( ( (state->ver == 0x03) || | ||
624 | (state->ver == 0x04)) && (state->csize < 5)) ) | ||
625 | { | ||
626 | /* malformed */ | ||
627 | state->state = ID3V2_INVALID; | ||
628 | break; | ||
629 | } | ||
630 | if (state->mime != NULL) | ||
631 | free (state->mime); | ||
632 | state->mime = NULL; | ||
633 | if (state->ver == 0x02) | ||
634 | { | ||
635 | off = 5; | ||
636 | picture_type = data[4]; | ||
637 | } | ||
638 | else if ((state->ver == 0x03) || (state->ver == 0x04)) | ||
639 | { | ||
640 | off = 1; | ||
641 | while ((off < state->csize) && (data[off] != '\0')) | ||
642 | off++; | ||
643 | if ((off >= state->csize) || (data[off] != '\0')) | ||
644 | { | ||
645 | /* malformed */ | ||
646 | state->state = ID3V2_INVALID; | ||
647 | break; | ||
648 | } | ||
649 | state->mime = malloc (off); | ||
650 | memcpy (state->mime, &data[1], off - 1); | ||
651 | state->mime[off - 1] = '\0'; | ||
652 | off += 1; | ||
653 | picture_type = data[off]; | ||
654 | off += 1; | ||
655 | /* find end of mime type*/ | ||
656 | while ((off < state->csize) && (data[off] != '\0')) | ||
657 | off++; | ||
658 | if ((off >= state->csize) || (data[off] != '\0')) | ||
659 | { | ||
660 | free (state->mime); | ||
661 | state->mime = NULL; | ||
662 | /* malformed */ | ||
663 | state->state = ID3V2_INVALID; | ||
664 | break; | ||
665 | } | ||
666 | off++; | ||
667 | } | ||
668 | switch (picture_type) | ||
669 | { | ||
670 | case 0x03: | ||
671 | case 0x04: | ||
672 | type = EXTRACTOR_METATYPE_COVER_PICTURE; | ||
673 | break; | ||
674 | case 0x07: | ||
675 | case 0x08: | ||
676 | case 0x09: | ||
677 | case 0x0A: | ||
678 | case 0x0B: | ||
679 | case 0x0C: | ||
680 | type = EXTRACTOR_METATYPE_CONTRIBUTOR_PICTURE; | ||
681 | break; | ||
682 | case 0x0D: | ||
683 | case 0x0E: | ||
684 | case 0x0F: | ||
685 | type = EXTRACTOR_METATYPE_EVENT_PICTURE; | ||
686 | break; | ||
687 | case 0x14: | ||
688 | type = EXTRACTOR_METATYPE_LOGO; | ||
689 | type = EXTRACTOR_METATYPE_LOGO; | ||
690 | break; | ||
691 | default: | ||
692 | type = EXTRACTOR_METATYPE_PICTURE; | ||
693 | break; | ||
694 | } | ||
695 | if (state->ver == 0x02) | ||
696 | { | ||
697 | if (0 == strncasecmp ("PNG", (const char *) &data[1], 3)) | ||
698 | state->mime = strdup ("image/png"); | ||
699 | else if (0 == strncasecmp ("JPG", (const char *) &data[1], 3)) | ||
700 | state->mime = strdup ("image/jpeg"); | ||
701 | else | ||
702 | state->mime = NULL; | ||
703 | } | ||
704 | else if (((state->ver == 0x03) || (state->ver == 0x04)) && (strchr (state->mime, '/') == NULL)) | ||
705 | { | ||
706 | size_t mime_len = strlen (state->mime); | ||
707 | char *type_mime = malloc (mime_len + 6 + 1); | ||
708 | snprintf (type_mime, mime_len + 6 + 1, "image/%s", state->mime); | ||
709 | free (state->mime); | ||
710 | state->mime = type_mime; | ||
711 | } | ||
712 | if ((state->mime != NULL) && (0 == strcmp (state->mime, "-->"))) | ||
713 | { | ||
714 | /* not supported */ | ||
715 | free (state->mime); | ||
716 | state->mime = NULL; | ||
717 | } | ||
718 | else | ||
719 | { | ||
720 | if (0 != proc (proc_cls, "id3v2", type, EXTRACTOR_METAFORMAT_BINARY, state->mime, (const char*) &data[off], state->csize - off)) | ||
721 | { | ||
722 | if (state->mime != NULL) | ||
723 | free (state->mime); | ||
724 | state->mime = NULL; | ||
725 | return 1; | ||
726 | } | ||
727 | if (state->mime != NULL) | ||
728 | free (state->mime); | ||
729 | state->mime = NULL; | ||
730 | } | ||
731 | word = NULL; | ||
732 | break; | ||
733 | default: | ||
734 | return 1; | ||
735 | } | ||
736 | if ((word != NULL) && (strlen (word) > 0)) | ||
737 | { | ||
738 | if (0 != proc (proc_cls, "id3v2", tmap[state->ti].type, EXTRACTOR_METAFORMAT_UTF8, "text/plain", word, strlen (word) + 1)) | ||
739 | { | ||
740 | free (word); | ||
741 | return 1; | ||
742 | } | ||
743 | } | ||
744 | if (word != NULL) | ||
745 | free (word); | ||
746 | state->state = ID3V2_READING_FRAME_HEADER; | ||
747 | break; | ||
748 | } | ||
749 | } | ||
750 | return 1; | ||
751 | } | ||
752 | |||
753 | /* end of id3v2_extractor.c */ | ||