aboutsummaryrefslogtreecommitdiff
path: root/src/plugins/id3v23_extractor.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/plugins/id3v23_extractor.c')
-rw-r--r--src/plugins/id3v23_extractor.c420
1 files changed, 0 insertions, 420 deletions
diff --git a/src/plugins/id3v23_extractor.c b/src/plugins/id3v23_extractor.c
deleted file mode 100644
index c31d63d..0000000
--- a/src/plugins/id3v23_extractor.c
+++ /dev/null
@@ -1,420 +0,0 @@
1/*
2 This file is part of libextractor.
3 (C) 2002, 2003, 2004, 2006, 2007, 2009 Vidyut Samanta and Christian Grothoff
4
5 libextractor is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 2, or (at your
8 option) any later version.
9
10 libextractor is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with libextractor; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
19
20 */
21#define DEBUG_EXTRACT_ID3v23 0
22
23#include "platform.h"
24#include "extractor.h"
25#include <string.h>
26#include <stdio.h>
27#include <sys/types.h>
28#include <sys/stat.h>
29#include <unistd.h>
30#include <stdlib.h>
31#include <fcntl.h>
32#ifndef MINGW
33#include <sys/mman.h>
34#endif
35
36#include "convert.h"
37
38enum Id3v23Fmt
39 {
40 T, /* simple, 0-terminated string, prefixed by encoding */
41 U, /* 0-terminated ASCII string, no encoding */
42 UL, /* unsync'ed lyrics */
43 SL, /* sync'ed lyrics */
44 L, /* string with language prefix */
45 I /* image */
46 };
47
48typedef struct
49{
50 const char *text;
51 enum EXTRACTOR_MetaType type;
52 enum Id3v23Fmt fmt;
53} Matches;
54
55static Matches tmap[] = {
56 {"TALB", EXTRACTOR_METATYPE_ALBUM, T},
57 {"TBPM", EXTRACTOR_METATYPE_BEATS_PER_MINUTE, T},
58 {"TCOM", EXTRACTOR_METATYPE_COMPOSER, T},
59 {"TCON", EXTRACTOR_METATYPE_SONG_VERSION, T},
60 {"TCOP", EXTRACTOR_METATYPE_COPYRIGHT, T},
61 /* {"TDAT", EXTRACTOR_METATYPE_CREATION_DATE, T}, */
62 /* TDLY */
63 {"TENC", EXTRACTOR_METATYPE_ENCODED_BY, T},
64 {"TEXT", EXTRACTOR_METATYPE_WRITER, T},
65 {"TFLT", EXTRACTOR_METATYPE_FORMAT_VERSION, T},
66 /* TIME */
67 {"TIT1", EXTRACTOR_METATYPE_SECTION, T},
68 {"TIT2", EXTRACTOR_METATYPE_TITLE, T},
69 {"TIT3", EXTRACTOR_METATYPE_SONG_VERSION, T},
70 /* TKEY */
71 {"TLAN", EXTRACTOR_METATYPE_LANGUAGE, T},
72 {"TLEN", EXTRACTOR_METATYPE_DURATION, T}, /* FIXME: should append 'ms' as unit */
73 {"TMED", EXTRACTOR_METATYPE_SOURCE, T},
74 {"TOAL", EXTRACTOR_METATYPE_ORIGINAL_TITLE, T},
75 {"TOFN", EXTRACTOR_METATYPE_ORIGINAL_ARTIST, T},
76 {"TOLY", EXTRACTOR_METATYPE_ORIGINAL_WRITER, T},
77 {"TOPE", EXTRACTOR_METATYPE_ORIGINAL_PERFORMER, T},
78 {"TORY", EXTRACTOR_METATYPE_ORIGINAL_RELEASE_YEAR, T},
79 {"TOWN", EXTRACTOR_METATYPE_LICENSEE, T},
80 {"TPE1", EXTRACTOR_METATYPE_ARTIST, T},
81 {"TPE2", EXTRACTOR_METATYPE_PERFORMER, T},
82 {"TPE3", EXTRACTOR_METATYPE_CONDUCTOR, T},
83 {"TPE4", EXTRACTOR_METATYPE_INTERPRETATION, T},
84 {"TPOS", EXTRACTOR_METATYPE_DISC_NUMBER, T},
85 {"TPUB", EXTRACTOR_METATYPE_PUBLISHER, T},
86 {"TRCK", EXTRACTOR_METATYPE_TRACK_NUMBER, T},
87 /* TRDA */
88 {"TRSN", EXTRACTOR_METATYPE_NETWORK_NAME, T},
89 /* TRSO */
90 {"TSIZ", EXTRACTOR_METATYPE_EMBEDDED_FILE_SIZE, T},
91 {"TSRC", EXTRACTOR_METATYPE_ISRC, T},
92 /* TSSE */
93 {"TYER", EXTRACTOR_METATYPE_PUBLICATION_YEAR, T},
94 {"WCOM", EXTRACTOR_METATYPE_URL, U},
95 {"WCOP", EXTRACTOR_METATYPE_URL, U},
96 {"WOAF", EXTRACTOR_METATYPE_URL, U},
97 {"WOAS", EXTRACTOR_METATYPE_URL, U},
98 {"WORS", EXTRACTOR_METATYPE_URL, U},
99 {"WPAY", EXTRACTOR_METATYPE_URL, U},
100 {"WPUB", EXTRACTOR_METATYPE_URL, U},
101 {"WXXX", EXTRACTOR_METATYPE_URL, T},
102 {"IPLS", EXTRACTOR_METATYPE_CONTRIBUTOR_NAME, T},
103 /* ... */
104 {"USLT", EXTRACTOR_METATYPE_LYRICS, UL },
105 {"SYLT", EXTRACTOR_METATYPE_LYRICS, SL },
106 {"COMM", EXTRACTOR_METATYPE_COMMENT, L},
107 /* ... */
108 {"APIC", EXTRACTOR_METATYPE_PICTURE, I},
109 /* ... */
110 {"LINK", EXTRACTOR_METATYPE_URL, U},
111 /* ... */
112 {"USER", EXTRACTOR_METATYPE_LICENSE, T},
113 /* ... */
114 {NULL, 0, T}
115};
116
117
118/* mimetype = audio/mpeg */
119int
120EXTRACTOR_id3v23_extract (const unsigned char *data,
121 size_t size,
122 EXTRACTOR_MetaDataProcessor proc,
123 void *proc_cls,
124 const char *options)
125{
126 int unsync;
127 int extendedHdr;
128 int experimental;
129 uint32_t tsize;
130 uint32_t pos;
131 uint32_t ehdrSize;
132 uint32_t padding;
133 uint32_t csize;
134 int i;
135 uint16_t flags;
136 char *mime;
137 enum EXTRACTOR_MetaType type;
138 size_t off;
139 int obo;
140
141 if ((size < 16) ||
142 (data[0] != 0x49) ||
143 (data[1] != 0x44) ||
144 (data[2] != 0x33) || (data[3] != 0x03) || (data[4] != 0x00))
145 return 0;
146 unsync = (data[5] & 0x80) > 0;
147 if (unsync)
148 return 0; /* not supported */
149 extendedHdr = (data[5] & 0x40) > 0;
150 experimental = (data[5] & 0x20) > 0;
151 if (experimental)
152 return 0;
153 tsize = (((data[6] & 0x7F) << 21) |
154 ((data[7] & 0x7F) << 14) |
155 ((data[8] & 0x7F) << 7) | ((data[9] & 0x7F) << 0));
156 if (tsize + 10 > size)
157 return 0;
158 pos = 10;
159 padding = 0;
160 if (extendedHdr)
161 {
162 ehdrSize = (((data[10]) << 24) |
163 ((data[11]) << 16) | ((data[12]) << 8) | ((data[12]) << 0));
164
165 padding = (((data[15]) << 24) |
166 ((data[16]) << 16) | ((data[17]) << 8) | ((data[18]) << 0));
167 pos += 4 + ehdrSize;
168 if (padding < tsize)
169 tsize -= padding;
170 else
171 return 0;
172 }
173
174
175 while (pos < tsize)
176 {
177 if (pos + 10 > tsize)
178 return 0;
179 csize =
180 (data[pos + 4] << 24) + (data[pos + 5] << 16) + (data[pos + 6] << 8) +
181 data[pos + 7];
182 if ((pos + 10 + csize > tsize) || (csize > tsize) || (csize == 0) ||
183 (pos + 10 + csize <= pos + 10) || (pos + 10 <= pos))
184 break;
185 flags = (data[pos + 8] << 8) + data[pos + 9];
186 if (((flags & 0x80) > 0) /* compressed, not yet supported */ ||
187 ((flags & 0x40) > 0) /* encrypted, not supported */ )
188 {
189 pos += 10 + csize;
190 continue;
191 }
192 i = 0;
193 while (tmap[i].text != NULL)
194 {
195 if (0 == strncmp (tmap[i].text, (const char *) &data[pos], 4))
196 {
197 char *word;
198 if ((flags & 0x20) > 0)
199 {
200 /* "group" identifier, skip a byte */
201 pos++;
202 csize--;
203 }
204 switch (tmap[i].fmt)
205 {
206 case T:
207 /* this byte describes the encoding
208 try to convert strings to UTF-8
209 if it fails, then forget it */
210 switch (data[pos + 10])
211 {
212 case 0x00:
213 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 11],
214 csize - 1, "ISO-8859-1");
215 break;
216 case 0x01:
217 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 11],
218 csize - 1, "UCS-2");
219 break;
220 default:
221 /* bad encoding byte,
222 try to convert from iso-8859-1 */
223 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 11],
224 csize - 1, "ISO-8859-1");
225 break;
226 }
227 break;
228 case U:
229 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 10],
230 csize, "ISO-8859-1");
231 break;
232 case UL:
233 if (csize < 6)
234 return 0; /* malformed */
235 /* find end of description */
236 off = 14;
237 while ( (off < size) &&
238 (off - pos < csize) &&
239 (data[pos + off] == '\0') )
240 off++;
241 if ( (off >= csize) ||
242 (data[pos+off] != '\0') )
243 return 0; /* malformed */
244 off++;
245 switch (data[pos + 10])
246 {
247 case 0x00:
248 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + off],
249 csize - off, "ISO-8859-1");
250 break;
251 case 0x01:
252 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + off],
253 csize - off, "UCS-2");
254 break;
255 default:
256 /* bad encoding byte,
257 try to convert from iso-8859-1 */
258 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + off],
259 csize - off, "ISO-8859-1");
260 break;
261 }
262 break;
263 case SL:
264 if (csize < 7)
265 return 0; /* malformed */
266 /* find end of description */
267 switch (data[pos + 10])
268 {
269 case 0x00:
270 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 16],
271 csize - 6, "ISO-8859-1");
272 break;
273 case 0x01:
274 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 16],
275 csize - 6, "UCS-2");
276 break;
277 default:
278 /* bad encoding byte,
279 try to convert from iso-8859-1 */
280 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 16],
281 csize - 6, "ISO-8859-1");
282 break;
283 }
284 break;
285 case L:
286 if (csize < 5)
287 return 0; /* malformed */
288 /* find end of description */
289 obo = data[pos + 14] == '\0' ? 1 : 0; /* someone put a \0 in front of comments... */
290 if (csize < 6)
291 obo = 0;
292 switch (data[pos + 10])
293 {
294 case 0x00:
295 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 14 + obo],
296 csize - 4 - obo, "ISO-8859-1");
297 break;
298 case 0x01:
299 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 14 + obo],
300 csize - 4 - obo, "UCS-2");
301 break;
302 default:
303 /* bad encoding byte,
304 try to convert from iso-8859-1 */
305 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 14 + obo],
306 csize - 4 - obo, "ISO-8859-1");
307 break;
308 }
309 break;
310 case I:
311 if (csize < 2)
312 return 0; /* malformed */
313 /* find end of mime type */
314 off = 11;
315 while ( (off < size) &&
316 (off - pos < csize) &&
317 (data[pos + off] == '\0') )
318 off++;
319 if ( (off >= csize) ||
320 (data[pos+off] != '\0') )
321 return 0; /* malformed */
322 off++;
323 mime = strdup ((const char*) &data[pos + 11]);
324
325 switch (data[pos+off])
326 {
327 case 0x03:
328 case 0x04:
329 type = EXTRACTOR_METATYPE_COVER_PICTURE;
330 break;
331 case 0x07:
332 case 0x08:
333 case 0x09:
334 case 0x0A:
335 case 0x0B:
336 case 0x0C:
337 type = EXTRACTOR_METATYPE_CONTRIBUTOR_PICTURE;
338 break;
339 case 0x0D:
340 case 0x0E:
341 case 0x0F:
342 type = EXTRACTOR_METATYPE_EVENT_PICTURE;
343 break;
344 case 0x14:
345 type = EXTRACTOR_METATYPE_LOGO;
346 type = EXTRACTOR_METATYPE_LOGO;
347 break;
348 default:
349 type = EXTRACTOR_METATYPE_PICTURE;
350 break;
351 }
352 off++;
353
354 /* find end of description */
355 while ( (off < size) &&
356 (off - pos < csize) &&
357 (data[pos + off] == '\0') )
358 off++;
359 if ( (off >= csize) ||
360 (data[pos+off] != '\0') )
361 {
362 if (mime != NULL)
363 free (mime);
364 return 0; /* malformed */
365 }
366 off++;
367 if ( (mime != NULL) &&
368 (0 == strcasecmp ("-->",
369 mime)) )
370 {
371 /* not supported */
372 }
373 else
374 {
375 if (0 != proc (proc_cls,
376 "id3v23",
377 type,
378 EXTRACTOR_METAFORMAT_BINARY,
379 mime,
380 (const char*) &data[pos + off],
381 csize + 6 - off))
382 {
383 if (mime != NULL)
384 free (mime);
385 return 1;
386 }
387 }
388 if (mime != NULL)
389 free (mime);
390 word = NULL;
391 break;
392 default:
393 return 0;
394 }
395 if ((word != NULL) && (strlen (word) > 0))
396 {
397 if (0 != proc (proc_cls,
398 "id3v23",
399 tmap[i].type,
400 EXTRACTOR_METAFORMAT_UTF8,
401 "text/plain",
402 word,
403 strlen(word)+1))
404 {
405 free (word);
406 return 1;
407 }
408 }
409 if (word != NULL)
410 free (word);
411 break;
412 }
413 i++;
414 }
415 pos += 10 + csize;
416 }
417 return 0;
418}
419
420/* end of id3v23_extractor.c */