diff options
Diffstat (limited to 'src/plugins/id3v2_extractor.c')
-rw-r--r-- | src/plugins/id3v2_extractor.c | 327 |
1 files changed, 266 insertions, 61 deletions
diff --git a/src/plugins/id3v2_extractor.c b/src/plugins/id3v2_extractor.c index fa5fea6..08ba124 100644 --- a/src/plugins/id3v2_extractor.c +++ b/src/plugins/id3v2_extractor.c | |||
@@ -28,46 +28,91 @@ | |||
28 | 28 | ||
29 | #define DEBUG_EXTRACT_ID3v2 0 | 29 | #define DEBUG_EXTRACT_ID3v2 0 |
30 | 30 | ||
31 | enum Id3v2Fmt | ||
32 | { | ||
33 | T, /* simple, 0-terminated string, prefixed by encoding */ | ||
34 | U, /* 0-terminated ASCII string, no encoding */ | ||
35 | UL, /* unsync'ed lyrics */ | ||
36 | SL, /* sync'ed lyrics */ | ||
37 | L, /* string with language prefix */ | ||
38 | I /* image */ | ||
39 | }; | ||
40 | |||
31 | typedef struct | 41 | typedef struct |
32 | { | 42 | { |
33 | const char *text; | 43 | const char *text; |
34 | enum EXTRACTOR_MetaType type; | 44 | enum EXTRACTOR_MetaType type; |
45 | enum Id3v2Fmt fmt; | ||
35 | } Matches; | 46 | } Matches; |
36 | 47 | ||
37 | static Matches tmap[] = { | 48 | static Matches tmap[] = { |
38 | {"TAL", EXTRACTOR_METATYPE_TITLE}, | 49 | /* skipping UFI */ |
39 | {"TT1", EXTRACTOR_METATYPE_GROUP}, | 50 | {"TT1", EXTRACTOR_METATYPE_SECTION, T}, |
40 | {"TT2", EXTRACTOR_METATYPE_TITLE}, | 51 | {"TT2", EXTRACTOR_METATYPE_TITLE, T}, |
41 | {"TT3", EXTRACTOR_METATYPE_TITLE}, | 52 | {"TT3", EXTRACTOR_METATYPE_SONG_VERSION, T}, |
42 | {"TXT", EXTRACTOR_METATYPE_DESCRIPTION}, | 53 | {"TP1", EXTRACTOR_METATYPE_ARTIST, T}, |
43 | {"TPB", EXTRACTOR_METATYPE_PUBLISHER}, | 54 | {"TP2", EXTRACTOR_METATYPE_PERFORMER, T}, |
44 | {"WAF", EXTRACTOR_METATYPE_LOCATION}, | 55 | {"TP3", EXTRACTOR_METATYPE_CONDUCTOR, T}, |
45 | {"WAR", EXTRACTOR_METATYPE_LOCATION}, | 56 | {"TP4", EXTRACTOR_METATYPE_INTERPRETATION, T}, |
46 | {"WAS", EXTRACTOR_METATYPE_LOCATION}, | 57 | {"TCM", EXTRACTOR_METATYPE_COMPOSER, T}, |
47 | {"WCP", EXTRACTOR_METATYPE_COPYRIGHT}, | 58 | {"TXT", EXTRACTOR_METATYPE_WRITER, T}, |
48 | {"WAF", EXTRACTOR_METATYPE_LOCATION}, | 59 | {"TLA", EXTRACTOR_METATYPE_LANGUAGE, T}, |
49 | {"WCM", EXTRACTOR_METATYPE_DISCLAIMER}, | 60 | {"TCO", EXTRACTOR_METATYPE_GENRE, T}, |
50 | {"TSS", EXTRACTOR_METATYPE_FORMAT}, | 61 | {"TAL", EXTRACTOR_METATYPE_ALBUM, T}, |
51 | {"TYE", EXTRACTOR_METATYPE_DATE}, | 62 | {"TPA", EXTRACTOR_METATYPE_DISC_NUMBER, T}, |
52 | {"TLA", EXTRACTOR_METATYPE_LANGUAGE}, | 63 | {"TRK", EXTRACTOR_METATYPE_TRACK_NUMBER, T}, |
53 | {"TP1", EXTRACTOR_METATYPE_ARTIST}, | 64 | {"TRC", EXTRACTOR_METATYPE_ISRC, T}, |
54 | {"TP2", EXTRACTOR_METATYPE_ARTIST}, | 65 | {"TYE", EXTRACTOR_METATYPE_PUBLICATION_YEAR, T}, |
55 | {"TP3", EXTRACTOR_METATYPE_CONDUCTOR}, | 66 | /* |
56 | {"TP4", EXTRACTOR_METATYPE_INTERPRET}, | 67 | FIXME: these two and TYE should be combined into |
57 | {"IPL", EXTRACTOR_METATYPE_CONTRIBUTOR}, | 68 | the actual publication date (if TRD is missing) |
58 | {"TOF", EXTRACTOR_METATYPE_FILENAME}, | 69 | {"TDA", EXTRACTOR_METATYPE_PUBLICATION_DATE}, |
59 | {"TEN", EXTRACTOR_METATYPE_PRODUCER}, | 70 | {"TIM", EXTRACTOR_METATYPE_PUBLICATION_DATE}, |
60 | {"TCO", EXTRACTOR_METATYPE_SUBJECT}, | 71 | */ |
61 | {"TCR", EXTRACTOR_METATYPE_COPYRIGHT}, | 72 | {"TRD", EXTRACTOR_METATYPE_CREATION_TIME, T}, |
62 | {"SLT", EXTRACTOR_METATYPE_LYRICS}, | 73 | {"TMT", EXTRACTOR_METATYPE_SOURCE, T}, |
63 | {"TOA", EXTRACTOR_METATYPE_ARTIST}, | 74 | {"TFT", EXTRACTOR_METATYPE_FORMAT_VERSION, T}, |
64 | {"TRC", EXTRACTOR_METATYPE_ISRC}, | 75 | {"TBP", EXTRACTOR_METATYPE_BEATS_PER_MINUTE, T}, |
65 | {"TRK", EXTRACTOR_METATYPE_TRACK_NUMBER}, | 76 | {"TCR", EXTRACTOR_METATYPE_COPYRIGHT, T}, |
66 | {"TCM", EXTRACTOR_METATYPE_CREATOR}, | 77 | {"TPB", EXTRACTOR_METATYPE_PUBLISHER, T}, |
67 | {"TOT", EXTRACTOR_METATYPE_ALBUM}, | 78 | {"TEN", EXTRACTOR_METATYPE_ENCODED_BY, T}, |
68 | {"TOL", EXTRACTOR_METATYPE_AUTHOR}, | 79 | {"TSS", EXTRACTOR_METATYPE_PRODUCED_BY_SOFTWARE, T}, |
69 | {"COM", EXTRACTOR_METATYPE_COMMENT}, | 80 | {"TOF", EXTRACTOR_METATYPE_FILENAME, T}, |
70 | {"", EXTRACTOR_METATYPE_KEYWORDS}, | 81 | {"TLE", EXTRACTOR_METATYPE_DURATION, T}, /* FIXME: should append 'ms' as unit */ |
82 | {"TSI", EXTRACTOR_METATYPE_EMBEDDED_FILE_SIZE, T}, | ||
83 | /* skipping TDY, TKE */ | ||
84 | {"TOT", EXTRACTOR_METATYPE_ORIGINAL_TITLE, T}, | ||
85 | {"TOA", EXTRACTOR_METATYPE_ORIGINAL_ARTIST, T}, | ||
86 | {"TOL", EXTRACTOR_METATYPE_ORIGINAL_WRITER, T}, | ||
87 | {"TOR", EXTRACTOR_METATYPE_ORIGINAL_RELEASE_YEAR, T}, | ||
88 | /* skipping TXX */ | ||
89 | |||
90 | {"WAF", EXTRACTOR_METATYPE_URL, U}, | ||
91 | {"WAR", EXTRACTOR_METATYPE_URL, U}, | ||
92 | {"WAS", EXTRACTOR_METATYPE_URL, U}, | ||
93 | {"WCM", EXTRACTOR_METATYPE_URL, U}, | ||
94 | {"WCP", EXTRACTOR_METATYPE_RIGHTS, U}, | ||
95 | {"WCB", EXTRACTOR_METATYPE_URL, U}, | ||
96 | /* skipping WXX */ | ||
97 | {"IPL", EXTRACTOR_METATYPE_CONTRIBUTOR_NAME, T}, | ||
98 | /* skipping MCI */ | ||
99 | /* skipping ETC */ | ||
100 | /* skipping MLL */ | ||
101 | /* skipping STC */ | ||
102 | {"ULT", EXTRACTOR_METATYPE_LYRICS, UL}, | ||
103 | {"SLT", EXTRACTOR_METATYPE_LYRICS, SL}, | ||
104 | {"COM", EXTRACTOR_METATYPE_COMMENT, L}, | ||
105 | /* skipping RVA */ | ||
106 | /* skipping EQU */ | ||
107 | /* skipping REV */ | ||
108 | {"PIC", EXTRACTOR_METATYPE_PICTURE, I}, | ||
109 | /* skipping GEN */ | ||
110 | /* {"CNT", EXTRACTOR_METATYPE_PLAY_COUNTER, XXX}, */ | ||
111 | /* {"POP", EXTRACTOR_METATYPE_POPULARITY_METER, XXX}, */ | ||
112 | /* skipping BUF */ | ||
113 | /* skipping CRM */ | ||
114 | /* skipping CRA */ | ||
115 | /* {"LNK", EXTRACTOR_METATYPE_URL, XXX}, */ | ||
71 | {NULL, 0}, | 116 | {NULL, 0}, |
72 | }; | 117 | }; |
73 | 118 | ||
@@ -83,6 +128,9 @@ EXTRACTOR_id3v2_extract (const unsigned char *data, | |||
83 | int unsync; | 128 | int unsync; |
84 | unsigned int tsize; | 129 | unsigned int tsize; |
85 | unsigned int pos; | 130 | unsigned int pos; |
131 | unsigned int off; | ||
132 | enum EXTRACTOR_MetaType type; | ||
133 | const char *mime; | ||
86 | 134 | ||
87 | if ((size < 16) || | 135 | if ((size < 16) || |
88 | (data[0] != 0x49) || | 136 | (data[0] != 0x49) || |
@@ -102,10 +150,10 @@ EXTRACTOR_id3v2_extract (const unsigned char *data, | |||
102 | size_t csize; | 150 | size_t csize; |
103 | int i; | 151 | int i; |
104 | 152 | ||
105 | if (pos + 6 > tsize) | 153 | if (pos + 7 > tsize) |
106 | return 0; | 154 | return 0; |
107 | csize = (data[pos + 3] << 16) + (data[pos + 4] << 8) + data[pos + 5]; | 155 | csize = (data[pos + 3] << 16) + (data[pos + 4] << 8) + data[pos + 5]; |
108 | if ((pos + 6 + csize > tsize) || (csize > tsize) || (csize == 0)) | 156 | if ((pos + 7 + csize > tsize) || (csize > tsize) || (csize == 0)) |
109 | break; | 157 | break; |
110 | i = 0; | 158 | i = 0; |
111 | while (tmap[i].text != NULL) | 159 | while (tmap[i].text != NULL) |
@@ -116,33 +164,190 @@ EXTRACTOR_id3v2_extract (const unsigned char *data, | |||
116 | /* this byte describes the encoding | 164 | /* this byte describes the encoding |
117 | try to convert strings to UTF-8 | 165 | try to convert strings to UTF-8 |
118 | if it fails, then forget it */ | 166 | if it fails, then forget it */ |
119 | switch (data[pos + 6]) | 167 | switch (tmap[i].fmt) |
120 | { | 168 | { |
121 | case 0x00: | 169 | case T: |
122 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 7], | 170 | switch (data[pos + 6]) |
123 | csize, "ISO-8859-1"); | 171 | { |
124 | break; | 172 | case 0x00: |
125 | case 0x01: | 173 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 7], |
126 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 7], | 174 | csize - 1, "ISO-8859-1"); |
127 | csize, "UCS-2"); | 175 | break; |
128 | break; | 176 | case 0x01: |
129 | default: | 177 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 7], |
130 | /* bad encoding byte, | 178 | csize - 1, "UCS-2"); |
131 | try to convert from iso-8859-1 */ | 179 | break; |
132 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 7], | 180 | default: |
133 | csize, "ISO-8859-1"); | 181 | /* bad encoding byte, |
134 | break; | 182 | try to convert from iso-8859-1 */ |
135 | } | 183 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 7], |
136 | pos++; | 184 | csize - 1, "ISO-8859-1"); |
137 | csize--; | 185 | break; |
138 | if ((word != NULL) && (strlen (word) > 0)) | 186 | } |
139 | { | 187 | break; |
140 | prev = addKeyword (prev, word, tmap[i].type); | 188 | case U: |
141 | } | 189 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 6], |
142 | else | 190 | csize, "ISO-8859-1"); |
191 | break; | ||
192 | case UL: | ||
193 | if (csize < 6) | ||
194 | return 0; /* malformed */ | ||
195 | /* find end of description */ | ||
196 | off = 10; | ||
197 | while ( (off < size) && | ||
198 | (off - pos < csize) && | ||
199 | (data[pos + off] == '\0') ) | ||
200 | off++; | ||
201 | if ( (off >= csize) || | ||
202 | (data[pos+off] != '\0') ) | ||
203 | return 0; /* malformed */ | ||
204 | off++; | ||
205 | switch (data[pos + 6]) | ||
206 | { | ||
207 | case 0x00: | ||
208 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + off], | ||
209 | csize - off, "ISO-8859-1"); | ||
210 | break; | ||
211 | case 0x01: | ||
212 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + off], | ||
213 | csize - off, "UCS-2"); | ||
214 | break; | ||
215 | default: | ||
216 | /* bad encoding byte, | ||
217 | try to convert from iso-8859-1 */ | ||
218 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + off], | ||
219 | csize - off, "ISO-8859-1"); | ||
220 | break; | ||
221 | } | ||
222 | break; | ||
223 | case SL: | ||
224 | if (csize < 7) | ||
225 | return 0; /* malformed */ | ||
226 | /* find end of description */ | ||
227 | switch (data[pos + 6]) | ||
228 | { | ||
229 | case 0x00: | ||
230 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 12], | ||
231 | csize - 6, "ISO-8859-1"); | ||
232 | break; | ||
233 | case 0x01: | ||
234 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 12], | ||
235 | csize - 6, "UCS-2"); | ||
236 | break; | ||
237 | default: | ||
238 | /* bad encoding byte, | ||
239 | try to convert from iso-8859-1 */ | ||
240 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 12], | ||
241 | csize - 6, "ISO-8859-1"); | ||
242 | break; | ||
243 | } | ||
244 | break; | ||
245 | case L: | ||
246 | if (csize < 5) | ||
247 | return 0; /* malformed */ | ||
248 | /* find end of description */ | ||
249 | switch (data[pos + 6]) | ||
250 | { | ||
251 | case 0x00: | ||
252 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 10], | ||
253 | csize - 4, "ISO-8859-1"); | ||
254 | break; | ||
255 | case 0x01: | ||
256 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 10], | ||
257 | csize - 4, "UCS-2"); | ||
258 | break; | ||
259 | default: | ||
260 | /* bad encoding byte, | ||
261 | try to convert from iso-8859-1 */ | ||
262 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 10], | ||
263 | csize - 4, "ISO-8859-1"); | ||
264 | break; | ||
265 | } | ||
266 | break; | ||
267 | case I: | ||
268 | if (csize < 6) | ||
269 | return 0; /* malformed */ | ||
270 | /* find end of description */ | ||
271 | off = 12; | ||
272 | while ( (off < size) && | ||
273 | (off - pos < csize) && | ||
274 | (data[pos + off] == '\0') ) | ||
275 | off++; | ||
276 | if ( (off >= csize) || | ||
277 | (data[pos+off] != '\0') ) | ||
278 | return 0; /* malformed */ | ||
279 | off++; | ||
280 | switch (data[pos+11]) | ||
281 | { | ||
282 | case 0x03: | ||
283 | case 0x04: | ||
284 | type = EXTRACTOR_METATYPE_COVER_PICTURE; | ||
285 | break; | ||
286 | case 0x07: | ||
287 | case 0x08: | ||
288 | case 0x09: | ||
289 | case 0x0A: | ||
290 | case 0x0B: | ||
291 | case 0x0C: | ||
292 | type = EXTRACTOR_METATYPE_CONTRIBUTOR_PICTURE; | ||
293 | break; | ||
294 | case 0x0D: | ||
295 | case 0x0E: | ||
296 | case 0x0F: | ||
297 | type = EXTRACTOR_METATYPE_EVENT_PICTURE; | ||
298 | break; | ||
299 | case 0x14: | ||
300 | type = EXTRACTOR_METATYPE_LOGO; | ||
301 | type = EXTRACTOR_METATYPE_LOGO; | ||
302 | break; | ||
303 | default: | ||
304 | type = EXTRACTOR_METATYPE_PICTURE; | ||
305 | break; | ||
306 | } | ||
307 | if (0 == strncasecmp ("PNG", | ||
308 | (const char*) &data[pos + 7], 3)) | ||
309 | mime = "image/png"; | ||
310 | else if (0 == strncasecmp ("JPG", | ||
311 | (const char*) &data[pos + 7], 3)) | ||
312 | mime = "image/jpeg"; | ||
313 | else | ||
314 | mime = NULL; | ||
315 | if (0 == strncasecmp ("-->", | ||
316 | (const char*) &data[pos + 7], 3)) | ||
317 | { | ||
318 | /* not supported */ | ||
319 | } | ||
320 | else | ||
321 | { | ||
322 | if (0 != proc (proc_cls, | ||
323 | "id3v2", | ||
324 | type, | ||
325 | EXTRACTOR_METAFORMAT_BINARY, | ||
326 | mime, | ||
327 | (const char*) &data[pos + off], | ||
328 | csize + 6 - off)) | ||
329 | return 1; | ||
330 | } | ||
331 | word = NULL; | ||
332 | break; | ||
333 | default: | ||
334 | return 0; | ||
335 | } | ||
336 | if ((word != NULL) && (strlen (word) > 0)) | ||
143 | { | 337 | { |
144 | free (word); | 338 | if (0 != proc (proc_cls, |
145 | } | 339 | "id3v2", |
340 | type, | ||
341 | EXTRACTOR_METAFORMAT_UTF8, | ||
342 | "text/plain", | ||
343 | word, | ||
344 | strlen(word)+1)) | ||
345 | { | ||
346 | free (word); | ||
347 | return 1; | ||
348 | } | ||
349 | } | ||
350 | free (word); | ||
146 | break; | 351 | break; |
147 | } | 352 | } |
148 | i++; | 353 | i++; |