diff options
Diffstat (limited to 'src/plugins/id3v23_extractor.c')
-rw-r--r-- | src/plugins/id3v23_extractor.c | 420 |
1 files changed, 0 insertions, 420 deletions
diff --git a/src/plugins/id3v23_extractor.c b/src/plugins/id3v23_extractor.c deleted file mode 100644 index c31d63d..0000000 --- a/src/plugins/id3v23_extractor.c +++ /dev/null | |||
@@ -1,420 +0,0 @@ | |||
1 | /* | ||
2 | This file is part of libextractor. | ||
3 | (C) 2002, 2003, 2004, 2006, 2007, 2009 Vidyut Samanta and Christian Grothoff | ||
4 | |||
5 | libextractor is free software; you can redistribute it and/or modify | ||
6 | it under the terms of the GNU General Public License as published | ||
7 | by the Free Software Foundation; either version 2, or (at your | ||
8 | option) any later version. | ||
9 | |||
10 | libextractor is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU General Public License | ||
16 | along with libextractor; see the file COPYING. If not, write to the | ||
17 | Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
18 | Boston, MA 02111-1307, USA. | ||
19 | |||
20 | */ | ||
21 | #define DEBUG_EXTRACT_ID3v23 0 | ||
22 | |||
23 | #include "platform.h" | ||
24 | #include "extractor.h" | ||
25 | #include <string.h> | ||
26 | #include <stdio.h> | ||
27 | #include <sys/types.h> | ||
28 | #include <sys/stat.h> | ||
29 | #include <unistd.h> | ||
30 | #include <stdlib.h> | ||
31 | #include <fcntl.h> | ||
32 | #ifndef MINGW | ||
33 | #include <sys/mman.h> | ||
34 | #endif | ||
35 | |||
36 | #include "convert.h" | ||
37 | |||
38 | enum Id3v23Fmt | ||
39 | { | ||
40 | T, /* simple, 0-terminated string, prefixed by encoding */ | ||
41 | U, /* 0-terminated ASCII string, no encoding */ | ||
42 | UL, /* unsync'ed lyrics */ | ||
43 | SL, /* sync'ed lyrics */ | ||
44 | L, /* string with language prefix */ | ||
45 | I /* image */ | ||
46 | }; | ||
47 | |||
48 | typedef struct | ||
49 | { | ||
50 | const char *text; | ||
51 | enum EXTRACTOR_MetaType type; | ||
52 | enum Id3v23Fmt fmt; | ||
53 | } Matches; | ||
54 | |||
55 | static Matches tmap[] = { | ||
56 | {"TALB", EXTRACTOR_METATYPE_ALBUM, T}, | ||
57 | {"TBPM", EXTRACTOR_METATYPE_BEATS_PER_MINUTE, T}, | ||
58 | {"TCOM", EXTRACTOR_METATYPE_COMPOSER, T}, | ||
59 | {"TCON", EXTRACTOR_METATYPE_SONG_VERSION, T}, | ||
60 | {"TCOP", EXTRACTOR_METATYPE_COPYRIGHT, T}, | ||
61 | /* {"TDAT", EXTRACTOR_METATYPE_CREATION_DATE, T}, */ | ||
62 | /* TDLY */ | ||
63 | {"TENC", EXTRACTOR_METATYPE_ENCODED_BY, T}, | ||
64 | {"TEXT", EXTRACTOR_METATYPE_WRITER, T}, | ||
65 | {"TFLT", EXTRACTOR_METATYPE_FORMAT_VERSION, T}, | ||
66 | /* TIME */ | ||
67 | {"TIT1", EXTRACTOR_METATYPE_SECTION, T}, | ||
68 | {"TIT2", EXTRACTOR_METATYPE_TITLE, T}, | ||
69 | {"TIT3", EXTRACTOR_METATYPE_SONG_VERSION, T}, | ||
70 | /* TKEY */ | ||
71 | {"TLAN", EXTRACTOR_METATYPE_LANGUAGE, T}, | ||
72 | {"TLEN", EXTRACTOR_METATYPE_DURATION, T}, /* FIXME: should append 'ms' as unit */ | ||
73 | {"TMED", EXTRACTOR_METATYPE_SOURCE, T}, | ||
74 | {"TOAL", EXTRACTOR_METATYPE_ORIGINAL_TITLE, T}, | ||
75 | {"TOFN", EXTRACTOR_METATYPE_ORIGINAL_ARTIST, T}, | ||
76 | {"TOLY", EXTRACTOR_METATYPE_ORIGINAL_WRITER, T}, | ||
77 | {"TOPE", EXTRACTOR_METATYPE_ORIGINAL_PERFORMER, T}, | ||
78 | {"TORY", EXTRACTOR_METATYPE_ORIGINAL_RELEASE_YEAR, T}, | ||
79 | {"TOWN", EXTRACTOR_METATYPE_LICENSEE, T}, | ||
80 | {"TPE1", EXTRACTOR_METATYPE_ARTIST, T}, | ||
81 | {"TPE2", EXTRACTOR_METATYPE_PERFORMER, T}, | ||
82 | {"TPE3", EXTRACTOR_METATYPE_CONDUCTOR, T}, | ||
83 | {"TPE4", EXTRACTOR_METATYPE_INTERPRETATION, T}, | ||
84 | {"TPOS", EXTRACTOR_METATYPE_DISC_NUMBER, T}, | ||
85 | {"TPUB", EXTRACTOR_METATYPE_PUBLISHER, T}, | ||
86 | {"TRCK", EXTRACTOR_METATYPE_TRACK_NUMBER, T}, | ||
87 | /* TRDA */ | ||
88 | {"TRSN", EXTRACTOR_METATYPE_NETWORK_NAME, T}, | ||
89 | /* TRSO */ | ||
90 | {"TSIZ", EXTRACTOR_METATYPE_EMBEDDED_FILE_SIZE, T}, | ||
91 | {"TSRC", EXTRACTOR_METATYPE_ISRC, T}, | ||
92 | /* TSSE */ | ||
93 | {"TYER", EXTRACTOR_METATYPE_PUBLICATION_YEAR, T}, | ||
94 | {"WCOM", EXTRACTOR_METATYPE_URL, U}, | ||
95 | {"WCOP", EXTRACTOR_METATYPE_URL, U}, | ||
96 | {"WOAF", EXTRACTOR_METATYPE_URL, U}, | ||
97 | {"WOAS", EXTRACTOR_METATYPE_URL, U}, | ||
98 | {"WORS", EXTRACTOR_METATYPE_URL, U}, | ||
99 | {"WPAY", EXTRACTOR_METATYPE_URL, U}, | ||
100 | {"WPUB", EXTRACTOR_METATYPE_URL, U}, | ||
101 | {"WXXX", EXTRACTOR_METATYPE_URL, T}, | ||
102 | {"IPLS", EXTRACTOR_METATYPE_CONTRIBUTOR_NAME, T}, | ||
103 | /* ... */ | ||
104 | {"USLT", EXTRACTOR_METATYPE_LYRICS, UL }, | ||
105 | {"SYLT", EXTRACTOR_METATYPE_LYRICS, SL }, | ||
106 | {"COMM", EXTRACTOR_METATYPE_COMMENT, L}, | ||
107 | /* ... */ | ||
108 | {"APIC", EXTRACTOR_METATYPE_PICTURE, I}, | ||
109 | /* ... */ | ||
110 | {"LINK", EXTRACTOR_METATYPE_URL, U}, | ||
111 | /* ... */ | ||
112 | {"USER", EXTRACTOR_METATYPE_LICENSE, T}, | ||
113 | /* ... */ | ||
114 | {NULL, 0, T} | ||
115 | }; | ||
116 | |||
117 | |||
118 | /* mimetype = audio/mpeg */ | ||
119 | int | ||
120 | EXTRACTOR_id3v23_extract (const unsigned char *data, | ||
121 | size_t size, | ||
122 | EXTRACTOR_MetaDataProcessor proc, | ||
123 | void *proc_cls, | ||
124 | const char *options) | ||
125 | { | ||
126 | int unsync; | ||
127 | int extendedHdr; | ||
128 | int experimental; | ||
129 | uint32_t tsize; | ||
130 | uint32_t pos; | ||
131 | uint32_t ehdrSize; | ||
132 | uint32_t padding; | ||
133 | uint32_t csize; | ||
134 | int i; | ||
135 | uint16_t flags; | ||
136 | char *mime; | ||
137 | enum EXTRACTOR_MetaType type; | ||
138 | size_t off; | ||
139 | int obo; | ||
140 | |||
141 | if ((size < 16) || | ||
142 | (data[0] != 0x49) || | ||
143 | (data[1] != 0x44) || | ||
144 | (data[2] != 0x33) || (data[3] != 0x03) || (data[4] != 0x00)) | ||
145 | return 0; | ||
146 | unsync = (data[5] & 0x80) > 0; | ||
147 | if (unsync) | ||
148 | return 0; /* not supported */ | ||
149 | extendedHdr = (data[5] & 0x40) > 0; | ||
150 | experimental = (data[5] & 0x20) > 0; | ||
151 | if (experimental) | ||
152 | return 0; | ||
153 | tsize = (((data[6] & 0x7F) << 21) | | ||
154 | ((data[7] & 0x7F) << 14) | | ||
155 | ((data[8] & 0x7F) << 7) | ((data[9] & 0x7F) << 0)); | ||
156 | if (tsize + 10 > size) | ||
157 | return 0; | ||
158 | pos = 10; | ||
159 | padding = 0; | ||
160 | if (extendedHdr) | ||
161 | { | ||
162 | ehdrSize = (((data[10]) << 24) | | ||
163 | ((data[11]) << 16) | ((data[12]) << 8) | ((data[12]) << 0)); | ||
164 | |||
165 | padding = (((data[15]) << 24) | | ||
166 | ((data[16]) << 16) | ((data[17]) << 8) | ((data[18]) << 0)); | ||
167 | pos += 4 + ehdrSize; | ||
168 | if (padding < tsize) | ||
169 | tsize -= padding; | ||
170 | else | ||
171 | return 0; | ||
172 | } | ||
173 | |||
174 | |||
175 | while (pos < tsize) | ||
176 | { | ||
177 | if (pos + 10 > tsize) | ||
178 | return 0; | ||
179 | csize = | ||
180 | (data[pos + 4] << 24) + (data[pos + 5] << 16) + (data[pos + 6] << 8) + | ||
181 | data[pos + 7]; | ||
182 | if ((pos + 10 + csize > tsize) || (csize > tsize) || (csize == 0) || | ||
183 | (pos + 10 + csize <= pos + 10) || (pos + 10 <= pos)) | ||
184 | break; | ||
185 | flags = (data[pos + 8] << 8) + data[pos + 9]; | ||
186 | if (((flags & 0x80) > 0) /* compressed, not yet supported */ || | ||
187 | ((flags & 0x40) > 0) /* encrypted, not supported */ ) | ||
188 | { | ||
189 | pos += 10 + csize; | ||
190 | continue; | ||
191 | } | ||
192 | i = 0; | ||
193 | while (tmap[i].text != NULL) | ||
194 | { | ||
195 | if (0 == strncmp (tmap[i].text, (const char *) &data[pos], 4)) | ||
196 | { | ||
197 | char *word; | ||
198 | if ((flags & 0x20) > 0) | ||
199 | { | ||
200 | /* "group" identifier, skip a byte */ | ||
201 | pos++; | ||
202 | csize--; | ||
203 | } | ||
204 | switch (tmap[i].fmt) | ||
205 | { | ||
206 | case T: | ||
207 | /* this byte describes the encoding | ||
208 | try to convert strings to UTF-8 | ||
209 | if it fails, then forget it */ | ||
210 | switch (data[pos + 10]) | ||
211 | { | ||
212 | case 0x00: | ||
213 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 11], | ||
214 | csize - 1, "ISO-8859-1"); | ||
215 | break; | ||
216 | case 0x01: | ||
217 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 11], | ||
218 | csize - 1, "UCS-2"); | ||
219 | break; | ||
220 | default: | ||
221 | /* bad encoding byte, | ||
222 | try to convert from iso-8859-1 */ | ||
223 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 11], | ||
224 | csize - 1, "ISO-8859-1"); | ||
225 | break; | ||
226 | } | ||
227 | break; | ||
228 | case U: | ||
229 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 10], | ||
230 | csize, "ISO-8859-1"); | ||
231 | break; | ||
232 | case UL: | ||
233 | if (csize < 6) | ||
234 | return 0; /* malformed */ | ||
235 | /* find end of description */ | ||
236 | off = 14; | ||
237 | while ( (off < size) && | ||
238 | (off - pos < csize) && | ||
239 | (data[pos + off] == '\0') ) | ||
240 | off++; | ||
241 | if ( (off >= csize) || | ||
242 | (data[pos+off] != '\0') ) | ||
243 | return 0; /* malformed */ | ||
244 | off++; | ||
245 | switch (data[pos + 10]) | ||
246 | { | ||
247 | case 0x00: | ||
248 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + off], | ||
249 | csize - off, "ISO-8859-1"); | ||
250 | break; | ||
251 | case 0x01: | ||
252 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + off], | ||
253 | csize - off, "UCS-2"); | ||
254 | break; | ||
255 | default: | ||
256 | /* bad encoding byte, | ||
257 | try to convert from iso-8859-1 */ | ||
258 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + off], | ||
259 | csize - off, "ISO-8859-1"); | ||
260 | break; | ||
261 | } | ||
262 | break; | ||
263 | case SL: | ||
264 | if (csize < 7) | ||
265 | return 0; /* malformed */ | ||
266 | /* find end of description */ | ||
267 | switch (data[pos + 10]) | ||
268 | { | ||
269 | case 0x00: | ||
270 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 16], | ||
271 | csize - 6, "ISO-8859-1"); | ||
272 | break; | ||
273 | case 0x01: | ||
274 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 16], | ||
275 | csize - 6, "UCS-2"); | ||
276 | break; | ||
277 | default: | ||
278 | /* bad encoding byte, | ||
279 | try to convert from iso-8859-1 */ | ||
280 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 16], | ||
281 | csize - 6, "ISO-8859-1"); | ||
282 | break; | ||
283 | } | ||
284 | break; | ||
285 | case L: | ||
286 | if (csize < 5) | ||
287 | return 0; /* malformed */ | ||
288 | /* find end of description */ | ||
289 | obo = data[pos + 14] == '\0' ? 1 : 0; /* someone put a \0 in front of comments... */ | ||
290 | if (csize < 6) | ||
291 | obo = 0; | ||
292 | switch (data[pos + 10]) | ||
293 | { | ||
294 | case 0x00: | ||
295 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 14 + obo], | ||
296 | csize - 4 - obo, "ISO-8859-1"); | ||
297 | break; | ||
298 | case 0x01: | ||
299 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 14 + obo], | ||
300 | csize - 4 - obo, "UCS-2"); | ||
301 | break; | ||
302 | default: | ||
303 | /* bad encoding byte, | ||
304 | try to convert from iso-8859-1 */ | ||
305 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 14 + obo], | ||
306 | csize - 4 - obo, "ISO-8859-1"); | ||
307 | break; | ||
308 | } | ||
309 | break; | ||
310 | case I: | ||
311 | if (csize < 2) | ||
312 | return 0; /* malformed */ | ||
313 | /* find end of mime type */ | ||
314 | off = 11; | ||
315 | while ( (off < size) && | ||
316 | (off - pos < csize) && | ||
317 | (data[pos + off] == '\0') ) | ||
318 | off++; | ||
319 | if ( (off >= csize) || | ||
320 | (data[pos+off] != '\0') ) | ||
321 | return 0; /* malformed */ | ||
322 | off++; | ||
323 | mime = strdup ((const char*) &data[pos + 11]); | ||
324 | |||
325 | switch (data[pos+off]) | ||
326 | { | ||
327 | case 0x03: | ||
328 | case 0x04: | ||
329 | type = EXTRACTOR_METATYPE_COVER_PICTURE; | ||
330 | break; | ||
331 | case 0x07: | ||
332 | case 0x08: | ||
333 | case 0x09: | ||
334 | case 0x0A: | ||
335 | case 0x0B: | ||
336 | case 0x0C: | ||
337 | type = EXTRACTOR_METATYPE_CONTRIBUTOR_PICTURE; | ||
338 | break; | ||
339 | case 0x0D: | ||
340 | case 0x0E: | ||
341 | case 0x0F: | ||
342 | type = EXTRACTOR_METATYPE_EVENT_PICTURE; | ||
343 | break; | ||
344 | case 0x14: | ||
345 | type = EXTRACTOR_METATYPE_LOGO; | ||
346 | type = EXTRACTOR_METATYPE_LOGO; | ||
347 | break; | ||
348 | default: | ||
349 | type = EXTRACTOR_METATYPE_PICTURE; | ||
350 | break; | ||
351 | } | ||
352 | off++; | ||
353 | |||
354 | /* find end of description */ | ||
355 | while ( (off < size) && | ||
356 | (off - pos < csize) && | ||
357 | (data[pos + off] == '\0') ) | ||
358 | off++; | ||
359 | if ( (off >= csize) || | ||
360 | (data[pos+off] != '\0') ) | ||
361 | { | ||
362 | if (mime != NULL) | ||
363 | free (mime); | ||
364 | return 0; /* malformed */ | ||
365 | } | ||
366 | off++; | ||
367 | if ( (mime != NULL) && | ||
368 | (0 == strcasecmp ("-->", | ||
369 | mime)) ) | ||
370 | { | ||
371 | /* not supported */ | ||
372 | } | ||
373 | else | ||
374 | { | ||
375 | if (0 != proc (proc_cls, | ||
376 | "id3v23", | ||
377 | type, | ||
378 | EXTRACTOR_METAFORMAT_BINARY, | ||
379 | mime, | ||
380 | (const char*) &data[pos + off], | ||
381 | csize + 6 - off)) | ||
382 | { | ||
383 | if (mime != NULL) | ||
384 | free (mime); | ||
385 | return 1; | ||
386 | } | ||
387 | } | ||
388 | if (mime != NULL) | ||
389 | free (mime); | ||
390 | word = NULL; | ||
391 | break; | ||
392 | default: | ||
393 | return 0; | ||
394 | } | ||
395 | if ((word != NULL) && (strlen (word) > 0)) | ||
396 | { | ||
397 | if (0 != proc (proc_cls, | ||
398 | "id3v23", | ||
399 | tmap[i].type, | ||
400 | EXTRACTOR_METAFORMAT_UTF8, | ||
401 | "text/plain", | ||
402 | word, | ||
403 | strlen(word)+1)) | ||
404 | { | ||
405 | free (word); | ||
406 | return 1; | ||
407 | } | ||
408 | } | ||
409 | if (word != NULL) | ||
410 | free (word); | ||
411 | break; | ||
412 | } | ||
413 | i++; | ||
414 | } | ||
415 | pos += 10 + csize; | ||
416 | } | ||
417 | return 0; | ||
418 | } | ||
419 | |||
420 | /* end of id3v23_extractor.c */ | ||