diff options
Diffstat (limited to 'src/plugins/id3v2_extractor.c')
-rw-r--r-- | src/plugins/id3v2_extractor.c | 957 |
1 files changed, 684 insertions, 273 deletions
diff --git a/src/plugins/id3v2_extractor.c b/src/plugins/id3v2_extractor.c index 4f50d05..0302dc6 100644 --- a/src/plugins/id3v2_extractor.c +++ b/src/plugins/id3v2_extractor.c | |||
@@ -26,6 +26,8 @@ | |||
26 | #endif | 26 | #endif |
27 | #include "convert.h" | 27 | #include "convert.h" |
28 | 28 | ||
29 | #include "extractor_plugins.h" | ||
30 | |||
29 | #define DEBUG_EXTRACT_ID3v2 0 | 31 | #define DEBUG_EXTRACT_ID3v2 0 |
30 | 32 | ||
31 | enum Id3v2Fmt | 33 | enum Id3v2Fmt |
@@ -47,314 +49,723 @@ typedef struct | |||
47 | 49 | ||
48 | static Matches tmap[] = { | 50 | static Matches tmap[] = { |
49 | /* skipping UFI */ | 51 | /* skipping UFI */ |
50 | {"TT1", EXTRACTOR_METATYPE_SECTION, T}, | 52 | {"TT1 ", EXTRACTOR_METATYPE_SECTION, T}, |
51 | {"TT2", EXTRACTOR_METATYPE_TITLE, T}, | 53 | {"TT2 ", EXTRACTOR_METATYPE_TITLE, T}, |
52 | {"TT3", EXTRACTOR_METATYPE_SONG_VERSION, T}, | 54 | {"TT3 ", EXTRACTOR_METATYPE_SONG_VERSION, T}, |
53 | {"TP1", EXTRACTOR_METATYPE_ARTIST, T}, | 55 | {"TP1 ", EXTRACTOR_METATYPE_ARTIST, T}, |
54 | {"TP2", EXTRACTOR_METATYPE_PERFORMER, T}, | 56 | {"TP2 ", EXTRACTOR_METATYPE_PERFORMER, T}, |
55 | {"TP3", EXTRACTOR_METATYPE_CONDUCTOR, T}, | 57 | {"TP3 ", EXTRACTOR_METATYPE_CONDUCTOR, T}, |
56 | {"TP4", EXTRACTOR_METATYPE_INTERPRETATION, T}, | 58 | {"TP4 ", EXTRACTOR_METATYPE_INTERPRETATION, T}, |
57 | {"TCM", EXTRACTOR_METATYPE_COMPOSER, T}, | 59 | {"TCM ", EXTRACTOR_METATYPE_COMPOSER, T}, |
58 | {"TXT", EXTRACTOR_METATYPE_WRITER, T}, | 60 | {"TXT ", EXTRACTOR_METATYPE_WRITER, T}, |
59 | {"TLA", EXTRACTOR_METATYPE_LANGUAGE, T}, | 61 | {"TLA ", EXTRACTOR_METATYPE_LANGUAGE, T}, |
60 | {"TCO", EXTRACTOR_METATYPE_GENRE, T}, | 62 | {"TCO ", EXTRACTOR_METATYPE_GENRE, T}, |
61 | {"TAL", EXTRACTOR_METATYPE_ALBUM, T}, | 63 | {"TAL ", EXTRACTOR_METATYPE_ALBUM, T}, |
62 | {"TPA", EXTRACTOR_METATYPE_DISC_NUMBER, T}, | 64 | {"TPA ", EXTRACTOR_METATYPE_DISC_NUMBER, T}, |
63 | {"TRK", EXTRACTOR_METATYPE_TRACK_NUMBER, T}, | 65 | {"TRK ", EXTRACTOR_METATYPE_TRACK_NUMBER, T}, |
64 | {"TRC", EXTRACTOR_METATYPE_ISRC, T}, | 66 | {"TRC ", EXTRACTOR_METATYPE_ISRC, T}, |
65 | {"TYE", EXTRACTOR_METATYPE_PUBLICATION_YEAR, T}, | 67 | {"TYE ", EXTRACTOR_METATYPE_PUBLICATION_YEAR, T}, |
66 | /* | 68 | /* |
67 | FIXME: these two and TYE should be combined into | 69 | FIXME: these two and TYE should be combined into |
68 | the actual publication date (if TRD is missing) | 70 | the actual publication date (if TRD is missing) |
69 | {"TDA", EXTRACTOR_METATYPE_PUBLICATION_DATE}, | 71 | {"TDA ", EXTRACTOR_METATYPE_PUBLICATION_DATE}, |
70 | {"TIM", EXTRACTOR_METATYPE_PUBLICATION_DATE}, | 72 | {"TIM ", EXTRACTOR_METATYPE_PUBLICATION_DATE}, |
71 | */ | 73 | */ |
72 | {"TRD", EXTRACTOR_METATYPE_CREATION_TIME, T}, | 74 | {"TRD ", EXTRACTOR_METATYPE_CREATION_TIME, T}, |
73 | {"TMT", EXTRACTOR_METATYPE_SOURCE, T}, | 75 | {"TMT ", EXTRACTOR_METATYPE_SOURCE, T}, |
74 | {"TFT", EXTRACTOR_METATYPE_FORMAT_VERSION, T}, | 76 | {"TFT ", EXTRACTOR_METATYPE_FORMAT_VERSION, T}, |
75 | {"TBP", EXTRACTOR_METATYPE_BEATS_PER_MINUTE, T}, | 77 | {"TBP ", EXTRACTOR_METATYPE_BEATS_PER_MINUTE, T}, |
76 | {"TCR", EXTRACTOR_METATYPE_COPYRIGHT, T}, | 78 | {"TCR ", EXTRACTOR_METATYPE_COPYRIGHT, T}, |
77 | {"TPB", EXTRACTOR_METATYPE_PUBLISHER, T}, | 79 | {"TPB ", EXTRACTOR_METATYPE_PUBLISHER, T}, |
78 | {"TEN", EXTRACTOR_METATYPE_ENCODED_BY, T}, | 80 | {"TEN ", EXTRACTOR_METATYPE_ENCODED_BY, T}, |
79 | {"TSS", EXTRACTOR_METATYPE_PRODUCED_BY_SOFTWARE, T}, | 81 | {"TSS ", EXTRACTOR_METATYPE_PRODUCED_BY_SOFTWARE, T}, |
80 | {"TOF", EXTRACTOR_METATYPE_FILENAME, T}, | 82 | {"TOF ", EXTRACTOR_METATYPE_FILENAME, T}, |
81 | {"TLE", EXTRACTOR_METATYPE_DURATION, T}, /* FIXME: should append 'ms' as unit */ | 83 | {"TLE ", EXTRACTOR_METATYPE_DURATION, T}, /* FIXME: should append 'ms' as unit */ |
82 | {"TSI", EXTRACTOR_METATYPE_EMBEDDED_FILE_SIZE, T}, | 84 | {"TSI ", EXTRACTOR_METATYPE_EMBEDDED_FILE_SIZE, T}, |
83 | /* skipping TDY, TKE */ | 85 | /* skipping TDY, TKE */ |
84 | {"TOT", EXTRACTOR_METATYPE_ORIGINAL_TITLE, T}, | 86 | {"TOT ", EXTRACTOR_METATYPE_ORIGINAL_TITLE, T}, |
85 | {"TOA", EXTRACTOR_METATYPE_ORIGINAL_ARTIST, T}, | 87 | {"TOA ", EXTRACTOR_METATYPE_ORIGINAL_ARTIST, T}, |
86 | {"TOL", EXTRACTOR_METATYPE_ORIGINAL_WRITER, T}, | 88 | {"TOL ", EXTRACTOR_METATYPE_ORIGINAL_WRITER, T}, |
87 | {"TOR", EXTRACTOR_METATYPE_ORIGINAL_RELEASE_YEAR, T}, | 89 | {"TOR ", EXTRACTOR_METATYPE_ORIGINAL_RELEASE_YEAR, T}, |
88 | /* skipping TXX */ | 90 | /* skipping TXX */ |
89 | 91 | ||
90 | {"WAF", EXTRACTOR_METATYPE_URL, U}, | 92 | {"WAF ", EXTRACTOR_METATYPE_URL, U}, |
91 | {"WAR", EXTRACTOR_METATYPE_URL, U}, | 93 | {"WAR ", EXTRACTOR_METATYPE_URL, U}, |
92 | {"WAS", EXTRACTOR_METATYPE_URL, U}, | 94 | {"WAS ", EXTRACTOR_METATYPE_URL, U}, |
93 | {"WCM", EXTRACTOR_METATYPE_URL, U}, | 95 | {"WCM ", EXTRACTOR_METATYPE_URL, U}, |
94 | {"WCP", EXTRACTOR_METATYPE_RIGHTS, U}, | 96 | {"WCP ", EXTRACTOR_METATYPE_RIGHTS, U}, |
95 | {"WCB", EXTRACTOR_METATYPE_URL, U}, | 97 | {"WCB ", EXTRACTOR_METATYPE_URL, U}, |
96 | /* skipping WXX */ | 98 | /* skipping WXX */ |
97 | {"IPL", EXTRACTOR_METATYPE_CONTRIBUTOR_NAME, T}, | 99 | {"IPL ", EXTRACTOR_METATYPE_CONTRIBUTOR_NAME, T}, |
98 | /* skipping MCI */ | 100 | /* skipping MCI */ |
99 | /* skipping ETC */ | 101 | /* skipping ETC */ |
100 | /* skipping MLL */ | 102 | /* skipping MLL */ |
101 | /* skipping STC */ | 103 | /* skipping STC */ |
102 | {"ULT", EXTRACTOR_METATYPE_LYRICS, UL}, | 104 | {"ULT ", EXTRACTOR_METATYPE_LYRICS, UL}, |
103 | {"SLT", EXTRACTOR_METATYPE_LYRICS, SL}, | 105 | {"SLT ", EXTRACTOR_METATYPE_LYRICS, SL}, |
104 | {"COM", EXTRACTOR_METATYPE_COMMENT, L}, | 106 | {"COM ", EXTRACTOR_METATYPE_COMMENT, L}, |
105 | /* skipping RVA */ | 107 | /* skipping RVA */ |
106 | /* skipping EQU */ | 108 | /* skipping EQU */ |
107 | /* skipping REV */ | 109 | /* skipping REV */ |
108 | {"PIC", EXTRACTOR_METATYPE_PICTURE, I}, | 110 | {"PIC ", EXTRACTOR_METATYPE_PICTURE, I}, |
109 | /* skipping GEN */ | 111 | /* skipping GEN */ |
110 | /* {"CNT", EXTRACTOR_METATYPE_PLAY_COUNTER, XXX}, */ | 112 | /* {"CNT ", EXTRACTOR_METATYPE_PLAY_COUNTER, XXX}, */ |
111 | /* {"POP", EXTRACTOR_METATYPE_POPULARITY_METER, XXX}, */ | 113 | /* {"POP ", EXTRACTOR_METATYPE_POPULARITY_METER, XXX}, */ |
112 | /* skipping BUF */ | 114 | /* skipping BUF */ |
113 | /* skipping CRM */ | 115 | /* skipping CRM */ |
114 | /* skipping CRA */ | 116 | /* skipping CRA */ |
115 | /* {"LNK", EXTRACTOR_METATYPE_URL, XXX}, */ | 117 | /* {"LNK ", EXTRACTOR_METATYPE_URL, XXX}, */ |
118 | |||
119 | |||
120 | {"TALB", EXTRACTOR_METATYPE_ALBUM, T}, | ||
121 | {"TBPM", EXTRACTOR_METATYPE_BEATS_PER_MINUTE, T}, | ||
122 | {"TCOM", EXTRACTOR_METATYPE_COMPOSER, T}, | ||
123 | {"TCON", EXTRACTOR_METATYPE_SONG_VERSION, T}, | ||
124 | {"TCOP", EXTRACTOR_METATYPE_COPYRIGHT, T}, | ||
125 | {"TDAT", EXTRACTOR_METATYPE_CREATION_DATE, T}, /* idv23 only */ | ||
126 | /* TDLY */ | ||
127 | {"TENC", EXTRACTOR_METATYPE_ENCODED_BY, T}, | ||
128 | {"TEXT", EXTRACTOR_METATYPE_WRITER, T}, | ||
129 | {"TFLT", EXTRACTOR_METATYPE_FORMAT_VERSION, T}, | ||
130 | /* TIME, idv23 only */ | ||
131 | {"TIT1", EXTRACTOR_METATYPE_SECTION, T}, | ||
132 | {"TIT2", EXTRACTOR_METATYPE_TITLE, T}, | ||
133 | {"TIT3", EXTRACTOR_METATYPE_SONG_VERSION, T}, | ||
134 | /* TKEY */ | ||
135 | {"TLAN", EXTRACTOR_METATYPE_LANGUAGE, T}, | ||
136 | {"TLEN", EXTRACTOR_METATYPE_DURATION, T}, /* FIXME: should append 'ms' as unit */ | ||
137 | {"TMED", EXTRACTOR_METATYPE_SOURCE, T}, | ||
138 | {"TOAL", EXTRACTOR_METATYPE_ORIGINAL_TITLE, T}, | ||
139 | {"TOFN", EXTRACTOR_METATYPE_ORIGINAL_ARTIST, T}, | ||
140 | {"TOLY", EXTRACTOR_METATYPE_ORIGINAL_WRITER, T}, | ||
141 | {"TOPE", EXTRACTOR_METATYPE_ORIGINAL_PERFORMER, T}, | ||
142 | {"TORY", EXTRACTOR_METATYPE_ORIGINAL_RELEASE_YEAR, T}, /* idv23 only */ | ||
143 | {"TOWN", EXTRACTOR_METATYPE_LICENSEE, T}, | ||
144 | {"TPE1", EXTRACTOR_METATYPE_ARTIST, T}, | ||
145 | {"TPE2", EXTRACTOR_METATYPE_PERFORMER, T}, | ||
146 | {"TPE3", EXTRACTOR_METATYPE_CONDUCTOR, T}, | ||
147 | {"TPE4", EXTRACTOR_METATYPE_INTERPRETATION, T}, | ||
148 | {"TPOS", EXTRACTOR_METATYPE_DISC_NUMBER, T}, | ||
149 | {"TPUB", EXTRACTOR_METATYPE_PUBLISHER, T}, | ||
150 | {"TRCK", EXTRACTOR_METATYPE_TRACK_NUMBER, T}, | ||
151 | /* TRDA, idv23 only */ | ||
152 | {"TRSN", EXTRACTOR_METATYPE_NETWORK_NAME, T}, | ||
153 | /* TRSO */ | ||
154 | {"TSIZ", EXTRACTOR_METATYPE_EMBEDDED_FILE_SIZE, T}, /* idv23 only */ | ||
155 | {"TSRC", EXTRACTOR_METATYPE_ISRC, T}, | ||
156 | /* TSSE */ | ||
157 | {"TYER", EXTRACTOR_METATYPE_PUBLICATION_YEAR, T}, /* idv23 only */ | ||
158 | {"WCOM", EXTRACTOR_METATYPE_URL, U}, | ||
159 | {"WCOP", EXTRACTOR_METATYPE_URL, U}, | ||
160 | {"WOAF", EXTRACTOR_METATYPE_URL, U}, | ||
161 | {"WOAS", EXTRACTOR_METATYPE_URL, U}, | ||
162 | {"WORS", EXTRACTOR_METATYPE_URL, U}, | ||
163 | {"WPAY", EXTRACTOR_METATYPE_URL, U}, | ||
164 | {"WPUB", EXTRACTOR_METATYPE_URL, U}, | ||
165 | {"WXXX", EXTRACTOR_METATYPE_URL, T}, | ||
166 | {"IPLS", EXTRACTOR_METATYPE_CONTRIBUTOR_NAME, T}, /* idv23 only */ | ||
167 | /* ... */ | ||
168 | {"USLT", EXTRACTOR_METATYPE_LYRICS, UL }, | ||
169 | {"SYLT", EXTRACTOR_METATYPE_LYRICS, SL }, | ||
170 | {"COMM", EXTRACTOR_METATYPE_COMMENT, L}, | ||
171 | /* ... */ | ||
172 | {"APIC", EXTRACTOR_METATYPE_PICTURE, I}, | ||
173 | /* ... */ | ||
174 | {"LINK", EXTRACTOR_METATYPE_URL, U}, | ||
175 | /* ... */ | ||
176 | {"USER", EXTRACTOR_METATYPE_LICENSE, T}, | ||
177 | /* ... */ | ||
178 | |||
179 | /* new frames in id3v24 */ | ||
180 | /* ASPI, EQU2, RVA2, SEEK, SIGN, TDEN */ | ||
181 | {"TDOR", EXTRACTOR_METATYPE_PUBLICATION_DATE, T}, | ||
182 | /* TDRC, TDRL, TDTG */ | ||
183 | {"TIPL", EXTRACTOR_METATYPE_CONTRIBUTOR_NAME, T}, | ||
184 | {"TMCL", EXTRACTOR_METATYPE_MUSICIAN_CREDITS_LIST, T}, | ||
185 | {"TMOO", EXTRACTOR_METATYPE_MOOD, T}, | ||
186 | {"TPRO", EXTRACTOR_METATYPE_COPYRIGHT, T}, | ||
187 | {"TSOA", EXTRACTOR_METATYPE_ALBUM, T}, | ||
188 | {"TSOP", EXTRACTOR_METATYPE_PERFORMER, T}, | ||
189 | {"TSOT", EXTRACTOR_METATYPE_TITLE, T}, | ||
190 | {"TSST", EXTRACTOR_METATYPE_SUBTITLE, T}, | ||
191 | |||
116 | {NULL, 0, T}, | 192 | {NULL, 0, T}, |
117 | }; | 193 | }; |
118 | 194 | ||
119 | 195 | struct id3v2_state | |
120 | /* mimetype = audio/mpeg */ | ||
121 | int | ||
122 | EXTRACTOR_id3v2_extract (const unsigned char *data, | ||
123 | size_t size, | ||
124 | EXTRACTOR_MetaDataProcessor proc, | ||
125 | void *proc_cls, | ||
126 | const char *options) | ||
127 | { | 196 | { |
197 | int state; | ||
128 | unsigned int tsize; | 198 | unsigned int tsize; |
129 | unsigned int pos; | 199 | size_t csize; |
200 | char id[4]; | ||
201 | int32_t ti; | ||
202 | char ver; | ||
203 | char extended_header; | ||
204 | uint16_t frame_flags; | ||
205 | char *mime; | ||
206 | }; | ||
207 | |||
208 | enum ID3v2State | ||
209 | { | ||
210 | ID3V2_INVALID = -1, | ||
211 | ID3V2_READING_HEADER = 0, | ||
212 | ID3V2_READING_FRAME_HEADER, | ||
213 | ID3V23_READING_EXTENDED_HEADER, | ||
214 | ID3V24_READING_EXTENDED_HEADER, | ||
215 | ID3V2_READING_FRAME | ||
216 | }; | ||
217 | |||
218 | void | ||
219 | EXTRACTOR_id3v2_init_state_method (struct EXTRACTOR_PluginList *plugin) | ||
220 | { | ||
221 | struct id3v2_state *state; | ||
222 | state = plugin->state = malloc (sizeof (struct id3v2_state)); | ||
223 | if (state == NULL) | ||
224 | return; | ||
225 | memset (state, 0, sizeof (struct id3v2_state)); | ||
226 | state->state = ID3V2_READING_HEADER; | ||
227 | state->ti = -1; | ||
228 | state->mime = NULL; | ||
229 | } | ||
230 | |||
231 | void | ||
232 | EXTRACTOR_id3v2_discard_state_method (struct EXTRACTOR_PluginList *plugin) | ||
233 | { | ||
234 | struct id3v2_state *state = plugin->state; | ||
235 | if (state != NULL) | ||
236 | { | ||
237 | if (state->mime != NULL) | ||
238 | free (state->mime); | ||
239 | free (state); | ||
240 | } | ||
241 | plugin->state = NULL; | ||
242 | } | ||
243 | |||
244 | static int | ||
245 | find_type (const char *id, size_t len) | ||
246 | { | ||
247 | int i; | ||
248 | for (i = 0; tmap[i].text != NULL; i++) | ||
249 | if (0 == strncmp (tmap[i].text, id, len)) | ||
250 | return i; | ||
251 | return -1; | ||
252 | } | ||
253 | |||
254 | int | ||
255 | EXTRACTOR_id3v2_extract_method (struct EXTRACTOR_PluginList *plugin, | ||
256 | EXTRACTOR_MetaDataProcessor proc, void *proc_cls) | ||
257 | { | ||
258 | int64_t file_position; | ||
259 | int64_t file_size; | ||
260 | int64_t offset = 0; | ||
261 | int64_t size; | ||
262 | struct id3v2_state *state; | ||
263 | unsigned char *data; | ||
264 | char *word = NULL; | ||
130 | unsigned int off; | 265 | unsigned int off; |
131 | enum EXTRACTOR_MetaType type; | 266 | enum EXTRACTOR_MetaType type; |
132 | const char *mime; | 267 | unsigned char picture_type; |
133 | 268 | ||
134 | if ((size < 16) || | 269 | if (plugin == NULL || plugin->state == NULL) |
135 | (data[0] != 0x49) || | 270 | return 1; |
136 | (data[1] != 0x44) || | ||
137 | (data[2] != 0x33) || (data[3] != 0x02) || (data[4] != 0x00)) | ||
138 | return 0; | ||
139 | /* unsync: (data[5] & 0x80) > 0; */ | ||
140 | tsize = (((data[6] & 0x7F) << 21) | | ||
141 | ((data[7] & 0x7F) << 14) | | ||
142 | ((data[8] & 0x7F) << 07) | ((data[9] & 0x7F) << 00)); | ||
143 | 271 | ||
144 | if (tsize + 10 > size) | 272 | state = plugin->state; |
145 | return 0; | 273 | file_position = plugin->position; |
146 | pos = 10; | 274 | file_size = plugin->fsize; |
147 | while (pos < tsize) | 275 | size = plugin->map_size; |
276 | data = plugin->shm_ptr; | ||
277 | |||
278 | if (plugin->seek_request < 0) | ||
279 | return 1; | ||
280 | if (file_position - plugin->seek_request > 0) | ||
281 | { | ||
282 | plugin->seek_request = -1; | ||
283 | return 1; | ||
284 | } | ||
285 | if (plugin->seek_request - file_position < size) | ||
286 | offset = plugin->seek_request - file_position; | ||
287 | |||
288 | while (1) | ||
289 | { | ||
290 | switch (state->state) | ||
148 | { | 291 | { |
149 | size_t csize; | 292 | case ID3V2_INVALID: |
150 | int i; | 293 | plugin->seek_request = -1; |
294 | return 1; | ||
295 | case ID3V2_READING_HEADER: | ||
296 | /* TODO: support id3v24 tags at the end of file. Here's a quote from id3 faq: | ||
297 | * Q: Where is an ID3v2 tag located in an MP3 file? | ||
298 | * A: It is most likely located at the beginning of the file. Look for the | ||
299 | * marker "ID3" in the first 3 bytes of the file. If it's not there, it | ||
300 | * could be at the end of the file (if the tag is ID3v2.4). Look for the | ||
301 | * marker "3DI" 10 bytes from the end of the file, or 10 bytes before the | ||
302 | * beginning of an ID3v1 tag. Finally it is possible to embed ID3v2 tags | ||
303 | * in the actual MPEG stream, on an MPEG frame boundry. Almost nobody does | ||
304 | * this. | ||
305 | * Parsing of such tags will not be completely correct, because we can't | ||
306 | * seek backwards. We will have to seek to file_size - chunk_size instead | ||
307 | * (by the way, chunk size is theoretically unknown, LE is free to use any chunk | ||
308 | * size, even though plugins often make assumptions about chunk size being large | ||
309 | * enough to make one atomic read without seeking, if offset == 0) and search | ||
310 | * for id3v1 at -128 offset, then look if there's a 3DI marker 10 bytes before | ||
311 | * it (or 10 bytes before the end of file, if id3v1 is not there; not sure | ||
312 | * about APETAGs; we should probably just scan byte-by-byte from the end of file, | ||
313 | * until we hit 3DI, or reach the offset == 0), and use it set offset to the | ||
314 | * start of ID3v24 header, adjust the following file_position check and data | ||
315 | * indices (use offset), and otherwise proceed as normal (maybe file size checks | ||
316 | * along the way will have to be adjusted by -1, or made ">" instead of ">="; | ||
317 | * these problems do not arise for tags at the beginning of the file, since | ||
318 | * audio itself is usually at least 1-byte long; when the tag is at the end of | ||
319 | * file, these checks will have to be 100% correct). | ||
320 | * If there are two tags (at the beginning and at the end of the file), | ||
321 | * a SEEK in the one at the beginning of the file can be used to seek to the | ||
322 | * one at the end. | ||
323 | */ | ||
324 | /* TODO: merge id3v1 and id3v2 parsers. There's an "update" flag in id3v2 that | ||
325 | * tells the parser to augument id3v1 values with the values from id3v2 (if this | ||
326 | * flag is not set, id3v2 parser must discard id3v1 data). | ||
327 | * At the moment id3v1 and id3v2 are parsed separately, and update flag is ignored. | ||
328 | */ | ||
329 | if (file_position != 0 || size < 10 || (data[0] != 0x49) || (data[1] != 0x44) || (data[2] != 0x33) || ((data[3] != 0x02) && (data[3] != 0x03) && (data[3] != 0x04))/* || (data[4] != 0x00) minor verisons are backward-compatible*/) | ||
330 | { | ||
331 | state->state = ID3V2_INVALID; | ||
332 | break; | ||
333 | } | ||
334 | state->ver = data[3]; | ||
335 | if (state->ver == 0x02) | ||
336 | { | ||
337 | state->extended_header = 0; | ||
338 | } | ||
339 | else if ((state->ver == 0x03) || (state->ver == 0x04)) | ||
340 | { | ||
341 | if ((data[5] & 0x80) > 0) | ||
342 | { | ||
343 | /* unsync is not supported in id3v23 or id3v24*/ | ||
344 | state->state = ID3V2_INVALID; | ||
345 | break; | ||
346 | } | ||
347 | state->extended_header = (data[5] & 0x40) > 0; | ||
348 | if ((data[5] & 0x20) > 0) | ||
349 | { | ||
350 | /* experimental is not supported in id3v23 or id3v24*/ | ||
351 | state->state = ID3V2_INVALID; | ||
352 | break; | ||
353 | } | ||
354 | } | ||
355 | state->tsize = (((data[6] & 0x7F) << 21) | ((data[7] & 0x7F) << 14) | ((data[8] & 0x7F) << 07) | ((data[9] & 0x7F) << 00)); | ||
356 | if (state->tsize + 10 > file_size) | ||
357 | { | ||
358 | state->state = ID3V2_INVALID; | ||
359 | break; | ||
360 | } | ||
361 | offset = 10; | ||
362 | if (state->ver == 0x03 && state->extended_header) | ||
363 | state->state = ID3V23_READING_EXTENDED_HEADER; | ||
364 | else if (state->ver == 0x04 && state->extended_header) | ||
365 | state->state = ID3V24_READING_EXTENDED_HEADER; | ||
366 | else | ||
367 | state->state = ID3V2_READING_FRAME_HEADER; | ||
368 | break; | ||
369 | case ID3V23_READING_EXTENDED_HEADER: | ||
370 | if (offset + 9 >= size) | ||
371 | { | ||
372 | if (offset == 0) | ||
373 | { | ||
374 | state->state = ID3V2_INVALID; | ||
375 | break; | ||
376 | } | ||
377 | plugin->seek_request = file_position + offset; | ||
378 | return 0; | ||
379 | } | ||
380 | if (state->ver == 0x03 && state->extended_header) | ||
381 | { | ||
382 | uint32_t padding, extended_header_size; | ||
383 | extended_header_size = (((data[offset]) << 24) | ((data[offset + 1]) << 16) | ((data[offset + 2]) << 8) | ((data[offset + 3]) << 0)); | ||
384 | padding = (((data[offset + 6]) << 24) | ((data[offset + 7]) << 16) | ((data[offset + 8]) << 8) | ((data[offset + 9]) << 0)); | ||
385 | if (data[offset + 4] == 0 && data[offset + 5] == 0) | ||
386 | /* Skip the CRC32 byte after extended header */ | ||
387 | offset += 1; | ||
388 | offset += 4 + extended_header_size; | ||
389 | if (padding < state->tsize) | ||
390 | state->tsize -= padding; | ||
391 | else | ||
392 | { | ||
393 | state->state = ID3V2_INVALID; | ||
394 | break; | ||
395 | } | ||
396 | } | ||
397 | break; | ||
398 | case ID3V24_READING_EXTENDED_HEADER: | ||
399 | if (offset + 6 >= size) | ||
400 | { | ||
401 | if (offset == 0) | ||
402 | { | ||
403 | state->state = ID3V2_INVALID; | ||
404 | break; | ||
405 | } | ||
406 | plugin->seek_request = file_position + offset; | ||
407 | return 0; | ||
408 | } | ||
409 | if ( (state->ver == 0x04) && (state->extended_header)) | ||
410 | { | ||
411 | uint32_t extended_header_size; | ||
151 | 412 | ||
152 | if (pos + 7 > tsize) | 413 | extended_header_size = (((data[offset]) << 24) | |
414 | ((data[offset + 1]) << 16) | | ||
415 | ((data[offset + 2]) << 8) | | ||
416 | ((data[offset + 3]) << 0)); | ||
417 | offset += 4 + extended_header_size; | ||
418 | } | ||
419 | break; | ||
420 | case ID3V2_READING_FRAME_HEADER: | ||
421 | if (file_position + offset > state->tsize || | ||
422 | ((state->ver == 0x02) && file_position + offset + 6 >= state->tsize) || | ||
423 | (((state->ver == 0x03) || (state->ver == 0x04))&& file_position + offset + 10 >= state->tsize)) | ||
424 | { | ||
425 | state->state = ID3V2_INVALID; | ||
426 | break; | ||
427 | } | ||
428 | if (((state->ver == 0x02) && (offset + 6 >= size)) || | ||
429 | (((state->ver == 0x03) || (state->ver == 0x04)) && (offset + 10 >= size))) | ||
430 | { | ||
431 | plugin->seek_request = file_position + offset; | ||
153 | return 0; | 432 | return 0; |
154 | csize = (data[pos + 3] << 16) + (data[pos + 4] << 8) + data[pos + 5]; | 433 | } |
155 | if ((pos + 7 + csize > tsize) || (csize > tsize) || (csize == 0)) | 434 | if (state->ver == 0x02) |
435 | { | ||
436 | memcpy (state->id, &data[offset], 3); | ||
437 | state->csize = (data[offset + 3] << 16) + (data[offset + 4] << 8) + data[offset + 5]; | ||
438 | if ((file_position + offset + 6 + state->csize > file_size) || (state->csize > file_size) || (state->csize == 0)) | ||
439 | { | ||
440 | state->state = ID3V2_INVALID; | ||
441 | break; | ||
442 | } | ||
443 | offset += 6; | ||
444 | state->frame_flags = 0; | ||
445 | } | ||
446 | else if ((state->ver == 0x03) || (state->ver == 0x04)) | ||
447 | { | ||
448 | memcpy (state->id, &data[offset], 4); | ||
449 | if (state->ver == 0x03) | ||
450 | state->csize = (data[offset + 4] << 24) + (data[offset + 5] << 16) + (data[offset + 6] << 8) + data[offset + 7]; | ||
451 | else if (state->ver == 0x04) | ||
452 | state->csize = ((data[offset + 4] & 0x7F) << 21) | ((data[offset + 5] & 0x7F) << 14) | ((data[offset + 6] & 0x7F) << 07) | ((data[offset + 7] & 0x7F) << 00); | ||
453 | if ((file_position + offset + 10 + state->csize > file_size) || (state->csize > file_size) || (state->csize == 0)) | ||
454 | { | ||
455 | state->state = ID3V2_INVALID; | ||
456 | break; | ||
457 | } | ||
458 | state->frame_flags = (data[offset + 8] << 8) + data[offset + 9]; | ||
459 | if (state->ver == 0x03) | ||
460 | { | ||
461 | if (((state->frame_flags & 0x80) > 0) /* compressed, not yet supported */ || | ||
462 | ((state->frame_flags & 0x40) > 0) /* encrypted, not supported */) | ||
463 | { | ||
464 | /* Skip to next frame header */ | ||
465 | offset += 10 + state->csize; | ||
466 | break; | ||
467 | } | ||
468 | } | ||
469 | else if (state->ver == 0x04) | ||
470 | { | ||
471 | if (((state->frame_flags & 0x08) > 0) /* compressed, not yet supported */ || | ||
472 | ((state->frame_flags & 0x04) > 0) /* encrypted, not supported */ || | ||
473 | ((state->frame_flags & 0x02) > 0) /* unsynchronization, not supported */) | ||
474 | { | ||
475 | /* Skip to next frame header */ | ||
476 | offset += 10 + state->csize; | ||
477 | break; | ||
478 | } | ||
479 | if ((state->frame_flags & 0x01) > 0) | ||
480 | { | ||
481 | /* Skip data length indicator */ | ||
482 | state->csize -= 4; | ||
483 | offset += 4; | ||
484 | } | ||
485 | } | ||
486 | offset += 10; | ||
487 | } | ||
488 | |||
489 | state->ti = find_type ((const char *) state->id, (state->ver == 0x02) ? 3 : (((state->ver == 0x03) || (state->ver == 0x04)) ? 4 : 0)); | ||
490 | if (state->ti == -1) | ||
491 | { | ||
492 | offset += state->csize; | ||
493 | break; | ||
494 | } | ||
495 | state->state = ID3V2_READING_FRAME; | ||
496 | break; | ||
497 | case ID3V2_READING_FRAME: | ||
498 | if (offset == 0 && state->csize > size) | ||
499 | { | ||
500 | /* frame size is larger than the size of one data chunk we get at a time */ | ||
501 | offset += state->csize; | ||
502 | state->state = ID3V2_READING_FRAME_HEADER; | ||
503 | break; | ||
504 | } | ||
505 | if (offset + state->csize > size) | ||
506 | { | ||
507 | plugin->seek_request = file_position + offset; | ||
508 | return 0; | ||
509 | } | ||
510 | word = NULL; | ||
511 | if (((state->ver == 0x03) && ((state->frame_flags & 0x20) > 0)) || | ||
512 | ((state->ver == 0x04) && ((state->frame_flags & 0x40) > 0))) | ||
513 | { | ||
514 | /* "group" identifier, skip a byte */ | ||
515 | offset++; | ||
516 | state->csize--; | ||
517 | } | ||
518 | switch (tmap[state->ti].fmt) | ||
519 | { | ||
520 | case T: | ||
521 | if (data[offset] == 0x00) | ||
522 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + 1], | ||
523 | state->csize - 1, "ISO-8859-1"); | ||
524 | else if (data[offset] == 0x01) | ||
525 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + 1], | ||
526 | state->csize - 1, "UCS-2"); | ||
527 | else if ((state->ver == 0x04) && (data[offset] == 0x02)) | ||
528 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + 1], | ||
529 | state->csize - 1, "UTF-16BE"); | ||
530 | else if ((state->ver == 0x04) && (data[offset] == 0x03)) | ||
531 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + 1], | ||
532 | state->csize - 1, "UTF-8"); | ||
533 | else | ||
534 | /* bad encoding byte, try to convert from iso-8859-1 */ | ||
535 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + 1], | ||
536 | state->csize - 1, "ISO-8859-1"); | ||
537 | break; | ||
538 | case U: | ||
539 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset], | ||
540 | state->csize, "ISO-8859-1"); | ||
541 | break; | ||
542 | case UL: | ||
543 | if (state->csize < 6) | ||
544 | { | ||
545 | /* malformed */ | ||
546 | state->state = ID3V2_INVALID; | ||
547 | break; | ||
548 | } | ||
549 | /* find end of description */ | ||
550 | off = 4; | ||
551 | while ((off < size) && (off < offset + state->csize) && (data[offset + off] != '\0')) | ||
552 | off++; | ||
553 | if ((off >= state->csize) || (data[offset + off] != '\0')) | ||
554 | { | ||
555 | /* malformed */ | ||
556 | state->state = ID3V2_INVALID; | ||
557 | break; | ||
558 | } | ||
559 | off++; | ||
560 | if (data[offset] == 0x00) | ||
561 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + off], | ||
562 | state->csize - off, "ISO-8859-1"); | ||
563 | else if (data[offset] == 0x01) | ||
564 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + off], | ||
565 | state->csize - off, "UCS-2"); | ||
566 | else if ((state->ver == 0x04) && (data[offset] == 0x02)) | ||
567 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + off], | ||
568 | state->csize - off, "UTF-16BE"); | ||
569 | else if ((state->ver == 0x04) && (data[offset] == 0x03)) | ||
570 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + off], | ||
571 | state->csize - off, "UTF-8"); | ||
572 | else | ||
573 | /* bad encoding byte, try to convert from iso-8859-1 */ | ||
574 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + off], | ||
575 | state->csize - off, "ISO-8859-1"); | ||
576 | break; | ||
577 | case SL: | ||
578 | if (state->csize < 7) | ||
579 | { | ||
580 | /* malformed */ | ||
581 | state->state = ID3V2_INVALID; | ||
582 | break; | ||
583 | } | ||
584 | if (data[offset] == 0x00) | ||
585 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + 6], | ||
586 | state->csize - 6, "ISO-8859-1"); | ||
587 | else if (data[offset] == 0x01) | ||
588 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + 6], | ||
589 | state->csize - 6, "UCS-2"); | ||
590 | else if ((state->ver == 0x04) && (data[offset] == 0x02)) | ||
591 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + 6], | ||
592 | state->csize - 6, "UTF-16BE"); | ||
593 | else if ((state->ver == 0x04) && (data[offset] == 0x03)) | ||
594 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + 6], | ||
595 | state->csize - 6, "UTF-8"); | ||
596 | else | ||
597 | /* bad encoding byte, try to convert from iso-8859-1 */ | ||
598 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + 6], | ||
599 | state->csize - 6, "ISO-8859-1"); | ||
600 | break; | ||
601 | case L: | ||
602 | if (state->csize < 5) | ||
603 | { | ||
604 | /* malformed */ | ||
605 | state->state = ID3V2_INVALID; | ||
606 | break; | ||
607 | } | ||
608 | /* find end of description */ | ||
609 | off = 4; | ||
610 | while ((off < size) && (off < offset + state->csize) && (data[offset + off] != '\0')) | ||
611 | off++; | ||
612 | if ((off >= state->csize) || (data[offset + off] != '\0')) | ||
613 | { | ||
614 | /* malformed */ | ||
615 | state->state = ID3V2_INVALID; | ||
616 | break; | ||
617 | } | ||
618 | off++; | ||
619 | |||
620 | if (data[offset] == 0x00) | ||
621 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + off], | ||
622 | state->csize - off, "ISO-8859-1"); | ||
623 | else if (data[offset] == 0x01) | ||
624 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + off], | ||
625 | state->csize - off, "UCS-2"); | ||
626 | else if ((state->ver == 0x04) && (data[offset] == 0x02)) | ||
627 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + off], | ||
628 | state->csize - off, "UTF-1offBE"); | ||
629 | else if ((state->ver == 0x04) && (data[offset] == 0x03)) | ||
630 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + off], | ||
631 | state->csize - off, "UTF-8"); | ||
632 | else | ||
633 | /* bad encoding byte, try to convert from iso-8859-1 */ | ||
634 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + off], | ||
635 | state->csize - off, "ISO-8859-1"); | ||
636 | break; | ||
637 | case I: | ||
638 | if ( ( (state->ver == 0x02) && | ||
639 | (state->csize < 7) ) || | ||
640 | ( ( (state->ver == 0x03) || | ||
641 | (state->ver == 0x04)) && (state->csize < 5)) ) | ||
642 | { | ||
643 | /* malformed */ | ||
644 | state->state = ID3V2_INVALID; | ||
645 | break; | ||
646 | } | ||
647 | if (state->mime != NULL) | ||
648 | free (state->mime); | ||
649 | state->mime = NULL; | ||
650 | if (state->ver == 0x02) | ||
651 | { | ||
652 | off = 5; | ||
653 | picture_type = data[offset + 5]; | ||
654 | } | ||
655 | else if ((state->ver == 0x03) || (state->ver == 0x04)) | ||
656 | { | ||
657 | off = 1; | ||
658 | while ((off < size) && (off < offset + state->csize) && (data[offset + off] != '\0') ) | ||
659 | off++; | ||
660 | if ((off >= state->csize) || (data[offset + off] != '\0')) | ||
661 | { | ||
662 | /* malformed */ | ||
663 | state->state = ID3V2_INVALID; | ||
664 | break; | ||
665 | } | ||
666 | state->mime = malloc (off); | ||
667 | memcpy (state->mime, &data[offset + 1], off - 1); | ||
668 | state->mime[off - 1] = '\0'; | ||
669 | off += 1; | ||
670 | picture_type = data[offset]; | ||
671 | off += 1; | ||
672 | } | ||
673 | /* find end of description */ | ||
674 | while ((off < size) && (off < offset + state->csize) && (data[offset + off] != '\0')) | ||
675 | off++; | ||
676 | if ((off >= state->csize) || (data[offset + off] != '\0')) | ||
677 | { | ||
678 | free (state->mime); | ||
679 | state->mime = NULL; | ||
680 | /* malformed */ | ||
681 | state->state = ID3V2_INVALID; | ||
682 | break; | ||
683 | } | ||
684 | off++; | ||
685 | switch (picture_type) | ||
686 | { | ||
687 | case 0x03: | ||
688 | case 0x04: | ||
689 | type = EXTRACTOR_METATYPE_COVER_PICTURE; | ||
690 | break; | ||
691 | case 0x07: | ||
692 | case 0x08: | ||
693 | case 0x09: | ||
694 | case 0x0A: | ||
695 | case 0x0B: | ||
696 | case 0x0C: | ||
697 | type = EXTRACTOR_METATYPE_CONTRIBUTOR_PICTURE; | ||
698 | break; | ||
699 | case 0x0D: | ||
700 | case 0x0E: | ||
701 | case 0x0F: | ||
702 | type = EXTRACTOR_METATYPE_EVENT_PICTURE; | ||
703 | break; | ||
704 | case 0x14: | ||
705 | type = EXTRACTOR_METATYPE_LOGO; | ||
706 | type = EXTRACTOR_METATYPE_LOGO; | ||
707 | break; | ||
708 | default: | ||
709 | type = EXTRACTOR_METATYPE_PICTURE; | ||
710 | break; | ||
711 | } | ||
712 | if (state->ver == 0x02) | ||
713 | { | ||
714 | if (0 == strncasecmp ("PNG", (const char *) &data[offset + 1], 3)) | ||
715 | state->mime = strdup ("image/png"); | ||
716 | else if (0 == strncasecmp ("JPG", (const char *) &data[offset + 1], 3)) | ||
717 | state->mime = strdup ("image/jpeg"); | ||
718 | else | ||
719 | state->mime = NULL; | ||
720 | } | ||
721 | else if (((state->ver == 0x03) || (state->ver == 0x04)) && (strchr (state->mime, '/') == NULL)) | ||
722 | { | ||
723 | size_t mime_len = strlen (state->mime); | ||
724 | char *type_mime = malloc (mime_len + 6 + 1); | ||
725 | snprintf (type_mime, mime_len + 6 + 1, "image/%s", state->mime); | ||
726 | free (state->mime); | ||
727 | state->mime = type_mime; | ||
728 | } | ||
729 | if ((state->mime != NULL) && (0 == strcmp (state->mime, "-->"))) | ||
730 | { | ||
731 | /* not supported */ | ||
732 | free (state->mime); | ||
733 | state->mime = NULL; | ||
734 | } | ||
735 | else | ||
736 | { | ||
737 | if (0 != proc (proc_cls, "id3v2", type, EXTRACTOR_METAFORMAT_BINARY, state->mime, (const char*) &data[offset + off], state->csize - off)) | ||
738 | { | ||
739 | if (state->mime != NULL) | ||
740 | free (state->mime); | ||
741 | state->mime = NULL; | ||
742 | return 1; | ||
743 | } | ||
744 | if (state->mime != NULL) | ||
745 | free (state->mime); | ||
746 | state->mime = NULL; | ||
747 | } | ||
748 | word = NULL; | ||
156 | break; | 749 | break; |
157 | i = 0; | 750 | default: |
158 | while (tmap[i].text != NULL) | 751 | return 1; |
752 | } | ||
753 | if ((word != NULL) && (strlen (word) > 0)) | ||
754 | { | ||
755 | if (0 != proc (proc_cls, "id3v2", tmap[state->ti].type, EXTRACTOR_METAFORMAT_UTF8, "text/plain", word, strlen (word) + 1)) | ||
159 | { | 756 | { |
160 | if (0 == strncmp (tmap[i].text, (const char *) &data[pos], 3)) | 757 | free (word); |
161 | { | 758 | return 1; |
162 | char *word; | ||
163 | /* this byte describes the encoding | ||
164 | try to convert strings to UTF-8 | ||
165 | if it fails, then forget it */ | ||
166 | switch (tmap[i].fmt) | ||
167 | { | ||
168 | case T: | ||
169 | switch (data[pos + 6]) | ||
170 | { | ||
171 | case 0x00: | ||
172 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 7], | ||
173 | csize - 1, "ISO-8859-1"); | ||
174 | break; | ||
175 | case 0x01: | ||
176 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 7], | ||
177 | csize - 1, "UCS-2"); | ||
178 | break; | ||
179 | default: | ||
180 | /* bad encoding byte, | ||
181 | try to convert from iso-8859-1 */ | ||
182 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 7], | ||
183 | csize - 1, "ISO-8859-1"); | ||
184 | break; | ||
185 | } | ||
186 | break; | ||
187 | case U: | ||
188 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 6], | ||
189 | csize, "ISO-8859-1"); | ||
190 | break; | ||
191 | case UL: | ||
192 | if (csize < 6) | ||
193 | return 0; /* malformed */ | ||
194 | /* find end of description */ | ||
195 | off = 10; | ||
196 | while ( (off < size) && | ||
197 | (off - pos < csize) && | ||
198 | (data[pos + off] == '\0') ) | ||
199 | off++; | ||
200 | if ( (off >= csize) || | ||
201 | (data[pos+off] != '\0') ) | ||
202 | return 0; /* malformed */ | ||
203 | off++; | ||
204 | switch (data[pos + 6]) | ||
205 | { | ||
206 | case 0x00: | ||
207 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + off], | ||
208 | csize - off, "ISO-8859-1"); | ||
209 | break; | ||
210 | case 0x01: | ||
211 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + off], | ||
212 | csize - off, "UCS-2"); | ||
213 | break; | ||
214 | default: | ||
215 | /* bad encoding byte, | ||
216 | try to convert from iso-8859-1 */ | ||
217 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + off], | ||
218 | csize - off, "ISO-8859-1"); | ||
219 | break; | ||
220 | } | ||
221 | break; | ||
222 | case SL: | ||
223 | if (csize < 7) | ||
224 | return 0; /* malformed */ | ||
225 | /* find end of description */ | ||
226 | switch (data[pos + 6]) | ||
227 | { | ||
228 | case 0x00: | ||
229 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 12], | ||
230 | csize - 6, "ISO-8859-1"); | ||
231 | break; | ||
232 | case 0x01: | ||
233 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 12], | ||
234 | csize - 6, "UCS-2"); | ||
235 | break; | ||
236 | default: | ||
237 | /* bad encoding byte, | ||
238 | try to convert from iso-8859-1 */ | ||
239 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 12], | ||
240 | csize - 6, "ISO-8859-1"); | ||
241 | break; | ||
242 | } | ||
243 | break; | ||
244 | case L: | ||
245 | if (csize < 5) | ||
246 | return 0; /* malformed */ | ||
247 | /* find end of description */ | ||
248 | switch (data[pos + 6]) | ||
249 | { | ||
250 | case 0x00: | ||
251 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 10], | ||
252 | csize - 4, "ISO-8859-1"); | ||
253 | break; | ||
254 | case 0x01: | ||
255 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 10], | ||
256 | csize - 4, "UCS-2"); | ||
257 | break; | ||
258 | default: | ||
259 | /* bad encoding byte, | ||
260 | try to convert from iso-8859-1 */ | ||
261 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 10], | ||
262 | csize - 4, "ISO-8859-1"); | ||
263 | break; | ||
264 | } | ||
265 | break; | ||
266 | case I: | ||
267 | if (csize < 6) | ||
268 | return 0; /* malformed */ | ||
269 | /* find end of description */ | ||
270 | off = 12; | ||
271 | while ( (off < size) && | ||
272 | (off - pos < csize) && | ||
273 | (data[pos + off] == '\0') ) | ||
274 | off++; | ||
275 | if ( (off >= csize) || | ||
276 | (data[pos+off] != '\0') ) | ||
277 | return 0; /* malformed */ | ||
278 | off++; | ||
279 | switch (data[pos+11]) | ||
280 | { | ||
281 | case 0x03: | ||
282 | case 0x04: | ||
283 | type = EXTRACTOR_METATYPE_COVER_PICTURE; | ||
284 | break; | ||
285 | case 0x07: | ||
286 | case 0x08: | ||
287 | case 0x09: | ||
288 | case 0x0A: | ||
289 | case 0x0B: | ||
290 | case 0x0C: | ||
291 | type = EXTRACTOR_METATYPE_CONTRIBUTOR_PICTURE; | ||
292 | break; | ||
293 | case 0x0D: | ||
294 | case 0x0E: | ||
295 | case 0x0F: | ||
296 | type = EXTRACTOR_METATYPE_EVENT_PICTURE; | ||
297 | break; | ||
298 | case 0x14: | ||
299 | type = EXTRACTOR_METATYPE_LOGO; | ||
300 | type = EXTRACTOR_METATYPE_LOGO; | ||
301 | break; | ||
302 | default: | ||
303 | type = EXTRACTOR_METATYPE_PICTURE; | ||
304 | break; | ||
305 | } | ||
306 | if (0 == strncasecmp ("PNG", | ||
307 | (const char*) &data[pos + 7], 3)) | ||
308 | mime = "image/png"; | ||
309 | else if (0 == strncasecmp ("JPG", | ||
310 | (const char*) &data[pos + 7], 3)) | ||
311 | mime = "image/jpeg"; | ||
312 | else | ||
313 | mime = NULL; | ||
314 | if (0 == strncasecmp ("-->", | ||
315 | (const char*) &data[pos + 7], 3)) | ||
316 | { | ||
317 | /* not supported */ | ||
318 | } | ||
319 | else | ||
320 | { | ||
321 | if (0 != proc (proc_cls, | ||
322 | "id3v2", | ||
323 | type, | ||
324 | EXTRACTOR_METAFORMAT_BINARY, | ||
325 | mime, | ||
326 | (const char*) &data[pos + off], | ||
327 | csize + 6 - off)) | ||
328 | return 1; | ||
329 | } | ||
330 | word = NULL; | ||
331 | break; | ||
332 | default: | ||
333 | return 0; | ||
334 | } | ||
335 | if ((word != NULL) && (strlen (word) > 0)) | ||
336 | { | ||
337 | if (0 != proc (proc_cls, | ||
338 | "id3v2", | ||
339 | tmap[i].type, | ||
340 | EXTRACTOR_METAFORMAT_UTF8, | ||
341 | "text/plain", | ||
342 | word, | ||
343 | strlen(word)+1)) | ||
344 | { | ||
345 | free (word); | ||
346 | return 1; | ||
347 | } | ||
348 | } | ||
349 | if (word != NULL) | ||
350 | free (word); | ||
351 | break; | ||
352 | } | ||
353 | i++; | ||
354 | } | 759 | } |
355 | pos += 6 + csize; | 760 | } |
761 | if (word != NULL) | ||
762 | free (word); | ||
763 | offset = offset + state->csize; | ||
764 | state->state = ID3V2_READING_FRAME_HEADER; | ||
765 | break; | ||
356 | } | 766 | } |
357 | return 0; | 767 | } |
768 | return 1; | ||
358 | } | 769 | } |
359 | 770 | ||
360 | /* end of id3v2_extractor.c */ | 771 | /* end of id3v2_extractor.c */ |