diff options
author | Christian Grothoff <christian@grothoff.org> | 2009-12-19 21:10:55 +0000 |
---|---|---|
committer | Christian Grothoff <christian@grothoff.org> | 2009-12-19 21:10:55 +0000 |
commit | fa7592ccb96d40353ff0270c57efe28057c81d7f (patch) | |
tree | ebdce5679114076c28136757c7e30d73b47b9741 | |
parent | 5307edba27e65305173177ebbeb5759c0c60217a (diff) | |
download | libextractor-fa7592ccb96d40353ff0270c57efe28057c81d7f.tar.gz libextractor-fa7592ccb96d40353ff0270c57efe28057c81d7f.zip |
id3v2
-rw-r--r-- | src/include/extractor.h | 19 | ||||
-rw-r--r-- | src/main/extractor_metatypes.c | 32 | ||||
-rw-r--r-- | src/plugins/html_extractor.c | 2 | ||||
-rw-r--r-- | src/plugins/id3v2_extractor.c | 327 |
4 files changed, 305 insertions, 75 deletions
diff --git a/src/include/extractor.h b/src/include/extractor.h index e826b8d..9c4ae60 100644 --- a/src/include/extractor.h +++ b/src/include/extractor.h | |||
@@ -136,7 +136,7 @@ enum EXTRACTOR_MetaType | |||
136 | EXTRACTOR_METATYPE_PUBLICATION_DATE = 24, | 136 | EXTRACTOR_METATYPE_PUBLICATION_DATE = 24, |
137 | EXTRACTOR_METATYPE_BIBTEX_EPRINT = 25, | 137 | EXTRACTOR_METATYPE_BIBTEX_EPRINT = 25, |
138 | EXTRACTOR_METATYPE_BIBTEX_ENTRY_TYPE = 26, | 138 | EXTRACTOR_METATYPE_BIBTEX_ENTRY_TYPE = 26, |
139 | EXTRACTOR_METATYPE_DOCUMENT_LANGUAGE = 27, | 139 | EXTRACTOR_METATYPE_LANGUAGE = 27, |
140 | EXTRACTOR_METATYPE_CREATION_TIME = 28, | 140 | EXTRACTOR_METATYPE_CREATION_TIME = 28, |
141 | EXTRACTOR_METATYPE_URL = 29, | 141 | EXTRACTOR_METATYPE_URL = 29, |
142 | 142 | ||
@@ -285,12 +285,21 @@ enum EXTRACTOR_MetaType | |||
285 | EXTRACTOR_METATYPE_CHAPTER_NAME = 153, | 285 | EXTRACTOR_METATYPE_CHAPTER_NAME = 153, |
286 | EXTRACTOR_METATYPE_SONG_COUNT = 154, | 286 | EXTRACTOR_METATYPE_SONG_COUNT = 154, |
287 | EXTRACTOR_METATYPE_STARTING_SONG = 155, | 287 | EXTRACTOR_METATYPE_STARTING_SONG = 155, |
288 | EXTRACTOR_METATYPE_PLAY_COUNTER = 156, | ||
289 | EXTRACTOR_METATYPE_CONDUCTOR = 157, | ||
290 | EXTRACTOR_METATYPE_INTERPRETATION = 158, | ||
291 | EXTRACTOR_METATYPE_COMPOSER = 159, | ||
292 | EXTRACTOR_METATYPE_BEATS_PER_MINUTE = 160, | ||
293 | EXTRACTOR_METATYPE_ENCODED_BY = 161, | ||
294 | EXTRACTOR_METATYPE_ORIGINAL_TITLE = 162, | ||
295 | EXTRACTOR_METATYPE_ORIGINAL_ARTIST = 163, | ||
296 | EXTRACTOR_METATYPE_ORIGINAL_WRITER = 164, | ||
297 | EXTRACTOR_METATYPE_ORIGINAL_RELEASE_YEAR = 165, | ||
298 | EXTRACTOR_METATYPE_LYRICS = 166, | ||
299 | EXTRACTOR_METATYPE_POPULARITY_METER = 167, | ||
288 | 300 | ||
289 | /* fixme: used up to here! */ | 301 | /* fixme: used up to here! */ |
290 | 302 | ||
291 | EXTRACTOR_METATYPE_LYRICS = 67, | ||
292 | EXTRACTOR_METATYPE_CONDUCTOR = 64, | ||
293 | EXTRACTOR_METATYPE_INTERPRET = 65, | ||
294 | EXTRACTOR_METATYPE_MUSIC_CD_IDENTIFIER = 117, | 303 | EXTRACTOR_METATYPE_MUSIC_CD_IDENTIFIER = 117, |
295 | EXTRACTOR_METATYPE_MUSICIAN_CREDITS_LIST = 123, | 304 | EXTRACTOR_METATYPE_MUSICIAN_CREDITS_LIST = 123, |
296 | 305 | ||
@@ -304,7 +313,6 @@ enum EXTRACTOR_MetaType | |||
304 | 313 | ||
305 | 314 | ||
306 | /* numeric metrics */ | 315 | /* numeric metrics */ |
307 | EXTRACTOR_METATYPE_POPULARITY_METER = 119, | ||
308 | EXTRACTOR_METATYPE_RATING = 145, | 316 | EXTRACTOR_METATYPE_RATING = 145, |
309 | EXTRACTOR_METATYPE_PRIORITY = 60, | 317 | EXTRACTOR_METATYPE_PRIORITY = 60, |
310 | 318 | ||
@@ -316,7 +324,6 @@ enum EXTRACTOR_MetaType | |||
316 | /* misc (see if these are still needed...) */ | 324 | /* misc (see if these are still needed...) */ |
317 | 325 | ||
318 | EXTRACTOR_METATYPE_GENERATOR = 103, | 326 | EXTRACTOR_METATYPE_GENERATOR = 103, |
319 | EXTRACTOR_METATYPE_ENCODED_BY = 121, | ||
320 | 327 | ||
321 | EXTRACTOR_METATYPE_FULL_DATA = 137, | 328 | EXTRACTOR_METATYPE_FULL_DATA = 137, |
322 | 329 | ||
diff --git a/src/main/extractor_metatypes.c b/src/main/extractor_metatypes.c index 158d3a2..b98a395 100644 --- a/src/main/extractor_metatypes.c +++ b/src/main/extractor_metatypes.c | |||
@@ -380,14 +380,32 @@ static const struct MetaTypeDescription meta_type_descriptions[] = { | |||
380 | /* 155 */ | 380 | /* 155 */ |
381 | { gettext_noop ("starting song"), | 381 | { gettext_noop ("starting song"), |
382 | gettext_noop ("number of the first song to play") }, | 382 | gettext_noop ("number of the first song to play") }, |
383 | { gettext_noop (""), | 383 | { gettext_noop ("play counter"), |
384 | gettext_noop ("") }, | 384 | gettext_noop ("number of times the media has been played") }, |
385 | { gettext_noop (""), | 385 | { gettext_noop ("conductor"), |
386 | gettext_noop ("") }, | 386 | gettext_noop ("name of the conductor") }, |
387 | { gettext_noop (""), | 387 | { gettext_noop ("interpretation"), |
388 | gettext_noop ("") }, | 388 | gettext_noop ("information about the people behind interpretations of an existing piece") }, |
389 | { gettext_noop (""), | 389 | { gettext_noop ("composer"), |
390 | gettext_noop ("name of the composer") }, | ||
391 | /* 160 */ | ||
392 | { gettext_noop ("beats per minute"), | ||
390 | gettext_noop ("") }, | 393 | gettext_noop ("") }, |
394 | { gettext_noop ("encoded by"), | ||
395 | gettext_noop ("name of person or organization that encoded the file") }, | ||
396 | { gettext_noop ("original title"), | ||
397 | gettext_noop ("title of the original work") }, | ||
398 | { gettext_noop ("original artist"), | ||
399 | gettext_noop ("name of the original artist") }, | ||
400 | { gettext_noop ("original writer"), | ||
401 | gettext_noop ("name of the original lyricist or writer") }, | ||
402 | /* 165 */ | ||
403 | { gettext_noop ("original release year"), | ||
404 | gettext_noop ("year of the original release") }, | ||
405 | { gettext_noop ("lyrics"), | ||
406 | gettext_noop ("lyrics of the song or text description of vocal activities") }, | ||
407 | { gettext_noop ("popularity"), | ||
408 | gettext_noop ("information about the file's popularity") }, | ||
391 | { gettext_noop (""), | 409 | { gettext_noop (""), |
392 | gettext_noop ("") }, | 410 | gettext_noop ("") }, |
393 | #if 0 | 411 | #if 0 |
diff --git a/src/plugins/html_extractor.c b/src/plugins/html_extractor.c index 435548a..313c4ac 100644 --- a/src/plugins/html_extractor.c +++ b/src/plugins/html_extractor.c | |||
@@ -44,7 +44,7 @@ static struct | |||
44 | { "rights", EXTRACTOR_METATYPE_RIGHTS }, | 44 | { "rights", EXTRACTOR_METATYPE_RIGHTS }, |
45 | { "dc.rights", EXTRACTOR_METATYPE_RIGHTS }, | 45 | { "dc.rights", EXTRACTOR_METATYPE_RIGHTS }, |
46 | { "copyright", EXTRACTOR_METATYPE_COPYRIGHT }, | 46 | { "copyright", EXTRACTOR_METATYPE_COPYRIGHT }, |
47 | { "language", EXTRACTOR_METATYPE_DOCUMENT_LANGUAGE }, | 47 | { "language", EXTRACTOR_METATYPE_LANGUAGE }, |
48 | { "keywords", EXTRACTOR_METATYPE_KEYWORDS }, | 48 | { "keywords", EXTRACTOR_METATYPE_KEYWORDS }, |
49 | { "abstract", EXTRACTOR_METATYPE_ABSTRACT }, | 49 | { "abstract", EXTRACTOR_METATYPE_ABSTRACT }, |
50 | { "formatter", EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE }, | 50 | { "formatter", EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE }, |
diff --git a/src/plugins/id3v2_extractor.c b/src/plugins/id3v2_extractor.c index fa5fea6..08ba124 100644 --- a/src/plugins/id3v2_extractor.c +++ b/src/plugins/id3v2_extractor.c | |||
@@ -28,46 +28,91 @@ | |||
28 | 28 | ||
29 | #define DEBUG_EXTRACT_ID3v2 0 | 29 | #define DEBUG_EXTRACT_ID3v2 0 |
30 | 30 | ||
31 | enum Id3v2Fmt | ||
32 | { | ||
33 | T, /* simple, 0-terminated string, prefixed by encoding */ | ||
34 | U, /* 0-terminated ASCII string, no encoding */ | ||
35 | UL, /* unsync'ed lyrics */ | ||
36 | SL, /* sync'ed lyrics */ | ||
37 | L, /* string with language prefix */ | ||
38 | I /* image */ | ||
39 | }; | ||
40 | |||
31 | typedef struct | 41 | typedef struct |
32 | { | 42 | { |
33 | const char *text; | 43 | const char *text; |
34 | enum EXTRACTOR_MetaType type; | 44 | enum EXTRACTOR_MetaType type; |
45 | enum Id3v2Fmt fmt; | ||
35 | } Matches; | 46 | } Matches; |
36 | 47 | ||
37 | static Matches tmap[] = { | 48 | static Matches tmap[] = { |
38 | {"TAL", EXTRACTOR_METATYPE_TITLE}, | 49 | /* skipping UFI */ |
39 | {"TT1", EXTRACTOR_METATYPE_GROUP}, | 50 | {"TT1", EXTRACTOR_METATYPE_SECTION, T}, |
40 | {"TT2", EXTRACTOR_METATYPE_TITLE}, | 51 | {"TT2", EXTRACTOR_METATYPE_TITLE, T}, |
41 | {"TT3", EXTRACTOR_METATYPE_TITLE}, | 52 | {"TT3", EXTRACTOR_METATYPE_SONG_VERSION, T}, |
42 | {"TXT", EXTRACTOR_METATYPE_DESCRIPTION}, | 53 | {"TP1", EXTRACTOR_METATYPE_ARTIST, T}, |
43 | {"TPB", EXTRACTOR_METATYPE_PUBLISHER}, | 54 | {"TP2", EXTRACTOR_METATYPE_PERFORMER, T}, |
44 | {"WAF", EXTRACTOR_METATYPE_LOCATION}, | 55 | {"TP3", EXTRACTOR_METATYPE_CONDUCTOR, T}, |
45 | {"WAR", EXTRACTOR_METATYPE_LOCATION}, | 56 | {"TP4", EXTRACTOR_METATYPE_INTERPRETATION, T}, |
46 | {"WAS", EXTRACTOR_METATYPE_LOCATION}, | 57 | {"TCM", EXTRACTOR_METATYPE_COMPOSER, T}, |
47 | {"WCP", EXTRACTOR_METATYPE_COPYRIGHT}, | 58 | {"TXT", EXTRACTOR_METATYPE_WRITER, T}, |
48 | {"WAF", EXTRACTOR_METATYPE_LOCATION}, | 59 | {"TLA", EXTRACTOR_METATYPE_LANGUAGE, T}, |
49 | {"WCM", EXTRACTOR_METATYPE_DISCLAIMER}, | 60 | {"TCO", EXTRACTOR_METATYPE_GENRE, T}, |
50 | {"TSS", EXTRACTOR_METATYPE_FORMAT}, | 61 | {"TAL", EXTRACTOR_METATYPE_ALBUM, T}, |
51 | {"TYE", EXTRACTOR_METATYPE_DATE}, | 62 | {"TPA", EXTRACTOR_METATYPE_DISC_NUMBER, T}, |
52 | {"TLA", EXTRACTOR_METATYPE_LANGUAGE}, | 63 | {"TRK", EXTRACTOR_METATYPE_TRACK_NUMBER, T}, |
53 | {"TP1", EXTRACTOR_METATYPE_ARTIST}, | 64 | {"TRC", EXTRACTOR_METATYPE_ISRC, T}, |
54 | {"TP2", EXTRACTOR_METATYPE_ARTIST}, | 65 | {"TYE", EXTRACTOR_METATYPE_PUBLICATION_YEAR, T}, |
55 | {"TP3", EXTRACTOR_METATYPE_CONDUCTOR}, | 66 | /* |
56 | {"TP4", EXTRACTOR_METATYPE_INTERPRET}, | 67 | FIXME: these two and TYE should be combined into |
57 | {"IPL", EXTRACTOR_METATYPE_CONTRIBUTOR}, | 68 | the actual publication date (if TRD is missing) |
58 | {"TOF", EXTRACTOR_METATYPE_FILENAME}, | 69 | {"TDA", EXTRACTOR_METATYPE_PUBLICATION_DATE}, |
59 | {"TEN", EXTRACTOR_METATYPE_PRODUCER}, | 70 | {"TIM", EXTRACTOR_METATYPE_PUBLICATION_DATE}, |
60 | {"TCO", EXTRACTOR_METATYPE_SUBJECT}, | 71 | */ |
61 | {"TCR", EXTRACTOR_METATYPE_COPYRIGHT}, | 72 | {"TRD", EXTRACTOR_METATYPE_CREATION_TIME, T}, |
62 | {"SLT", EXTRACTOR_METATYPE_LYRICS}, | 73 | {"TMT", EXTRACTOR_METATYPE_SOURCE, T}, |
63 | {"TOA", EXTRACTOR_METATYPE_ARTIST}, | 74 | {"TFT", EXTRACTOR_METATYPE_FORMAT_VERSION, T}, |
64 | {"TRC", EXTRACTOR_METATYPE_ISRC}, | 75 | {"TBP", EXTRACTOR_METATYPE_BEATS_PER_MINUTE, T}, |
65 | {"TRK", EXTRACTOR_METATYPE_TRACK_NUMBER}, | 76 | {"TCR", EXTRACTOR_METATYPE_COPYRIGHT, T}, |
66 | {"TCM", EXTRACTOR_METATYPE_CREATOR}, | 77 | {"TPB", EXTRACTOR_METATYPE_PUBLISHER, T}, |
67 | {"TOT", EXTRACTOR_METATYPE_ALBUM}, | 78 | {"TEN", EXTRACTOR_METATYPE_ENCODED_BY, T}, |
68 | {"TOL", EXTRACTOR_METATYPE_AUTHOR}, | 79 | {"TSS", EXTRACTOR_METATYPE_PRODUCED_BY_SOFTWARE, T}, |
69 | {"COM", EXTRACTOR_METATYPE_COMMENT}, | 80 | {"TOF", EXTRACTOR_METATYPE_FILENAME, T}, |
70 | {"", EXTRACTOR_METATYPE_KEYWORDS}, | 81 | {"TLE", EXTRACTOR_METATYPE_DURATION, T}, /* FIXME: should append 'ms' as unit */ |
82 | {"TSI", EXTRACTOR_METATYPE_EMBEDDED_FILE_SIZE, T}, | ||
83 | /* skipping TDY, TKE */ | ||
84 | {"TOT", EXTRACTOR_METATYPE_ORIGINAL_TITLE, T}, | ||
85 | {"TOA", EXTRACTOR_METATYPE_ORIGINAL_ARTIST, T}, | ||
86 | {"TOL", EXTRACTOR_METATYPE_ORIGINAL_WRITER, T}, | ||
87 | {"TOR", EXTRACTOR_METATYPE_ORIGINAL_RELEASE_YEAR, T}, | ||
88 | /* skipping TXX */ | ||
89 | |||
90 | {"WAF", EXTRACTOR_METATYPE_URL, U}, | ||
91 | {"WAR", EXTRACTOR_METATYPE_URL, U}, | ||
92 | {"WAS", EXTRACTOR_METATYPE_URL, U}, | ||
93 | {"WCM", EXTRACTOR_METATYPE_URL, U}, | ||
94 | {"WCP", EXTRACTOR_METATYPE_RIGHTS, U}, | ||
95 | {"WCB", EXTRACTOR_METATYPE_URL, U}, | ||
96 | /* skipping WXX */ | ||
97 | {"IPL", EXTRACTOR_METATYPE_CONTRIBUTOR_NAME, T}, | ||
98 | /* skipping MCI */ | ||
99 | /* skipping ETC */ | ||
100 | /* skipping MLL */ | ||
101 | /* skipping STC */ | ||
102 | {"ULT", EXTRACTOR_METATYPE_LYRICS, UL}, | ||
103 | {"SLT", EXTRACTOR_METATYPE_LYRICS, SL}, | ||
104 | {"COM", EXTRACTOR_METATYPE_COMMENT, L}, | ||
105 | /* skipping RVA */ | ||
106 | /* skipping EQU */ | ||
107 | /* skipping REV */ | ||
108 | {"PIC", EXTRACTOR_METATYPE_PICTURE, I}, | ||
109 | /* skipping GEN */ | ||
110 | /* {"CNT", EXTRACTOR_METATYPE_PLAY_COUNTER, XXX}, */ | ||
111 | /* {"POP", EXTRACTOR_METATYPE_POPULARITY_METER, XXX}, */ | ||
112 | /* skipping BUF */ | ||
113 | /* skipping CRM */ | ||
114 | /* skipping CRA */ | ||
115 | /* {"LNK", EXTRACTOR_METATYPE_URL, XXX}, */ | ||
71 | {NULL, 0}, | 116 | {NULL, 0}, |
72 | }; | 117 | }; |
73 | 118 | ||
@@ -83,6 +128,9 @@ EXTRACTOR_id3v2_extract (const unsigned char *data, | |||
83 | int unsync; | 128 | int unsync; |
84 | unsigned int tsize; | 129 | unsigned int tsize; |
85 | unsigned int pos; | 130 | unsigned int pos; |
131 | unsigned int off; | ||
132 | enum EXTRACTOR_MetaType type; | ||
133 | const char *mime; | ||
86 | 134 | ||
87 | if ((size < 16) || | 135 | if ((size < 16) || |
88 | (data[0] != 0x49) || | 136 | (data[0] != 0x49) || |
@@ -102,10 +150,10 @@ EXTRACTOR_id3v2_extract (const unsigned char *data, | |||
102 | size_t csize; | 150 | size_t csize; |
103 | int i; | 151 | int i; |
104 | 152 | ||
105 | if (pos + 6 > tsize) | 153 | if (pos + 7 > tsize) |
106 | return 0; | 154 | return 0; |
107 | csize = (data[pos + 3] << 16) + (data[pos + 4] << 8) + data[pos + 5]; | 155 | csize = (data[pos + 3] << 16) + (data[pos + 4] << 8) + data[pos + 5]; |
108 | if ((pos + 6 + csize > tsize) || (csize > tsize) || (csize == 0)) | 156 | if ((pos + 7 + csize > tsize) || (csize > tsize) || (csize == 0)) |
109 | break; | 157 | break; |
110 | i = 0; | 158 | i = 0; |
111 | while (tmap[i].text != NULL) | 159 | while (tmap[i].text != NULL) |
@@ -116,33 +164,190 @@ EXTRACTOR_id3v2_extract (const unsigned char *data, | |||
116 | /* this byte describes the encoding | 164 | /* this byte describes the encoding |
117 | try to convert strings to UTF-8 | 165 | try to convert strings to UTF-8 |
118 | if it fails, then forget it */ | 166 | if it fails, then forget it */ |
119 | switch (data[pos + 6]) | 167 | switch (tmap[i].fmt) |
120 | { | 168 | { |
121 | case 0x00: | 169 | case T: |
122 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 7], | 170 | switch (data[pos + 6]) |
123 | csize, "ISO-8859-1"); | 171 | { |
124 | break; | 172 | case 0x00: |
125 | case 0x01: | 173 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 7], |
126 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 7], | 174 | csize - 1, "ISO-8859-1"); |
127 | csize, "UCS-2"); | 175 | break; |
128 | break; | 176 | case 0x01: |
129 | default: | 177 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 7], |
130 | /* bad encoding byte, | 178 | csize - 1, "UCS-2"); |
131 | try to convert from iso-8859-1 */ | 179 | break; |
132 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 7], | 180 | default: |
133 | csize, "ISO-8859-1"); | 181 | /* bad encoding byte, |
134 | break; | 182 | try to convert from iso-8859-1 */ |
135 | } | 183 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 7], |
136 | pos++; | 184 | csize - 1, "ISO-8859-1"); |
137 | csize--; | 185 | break; |
138 | if ((word != NULL) && (strlen (word) > 0)) | 186 | } |
139 | { | 187 | break; |
140 | prev = addKeyword (prev, word, tmap[i].type); | 188 | case U: |
141 | } | 189 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 6], |
142 | else | 190 | csize, "ISO-8859-1"); |
191 | break; | ||
192 | case UL: | ||
193 | if (csize < 6) | ||
194 | return 0; /* malformed */ | ||
195 | /* find end of description */ | ||
196 | off = 10; | ||
197 | while ( (off < size) && | ||
198 | (off - pos < csize) && | ||
199 | (data[pos + off] == '\0') ) | ||
200 | off++; | ||
201 | if ( (off >= csize) || | ||
202 | (data[pos+off] != '\0') ) | ||
203 | return 0; /* malformed */ | ||
204 | off++; | ||
205 | switch (data[pos + 6]) | ||
206 | { | ||
207 | case 0x00: | ||
208 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + off], | ||
209 | csize - off, "ISO-8859-1"); | ||
210 | break; | ||
211 | case 0x01: | ||
212 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + off], | ||
213 | csize - off, "UCS-2"); | ||
214 | break; | ||
215 | default: | ||
216 | /* bad encoding byte, | ||
217 | try to convert from iso-8859-1 */ | ||
218 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + off], | ||
219 | csize - off, "ISO-8859-1"); | ||
220 | break; | ||
221 | } | ||
222 | break; | ||
223 | case SL: | ||
224 | if (csize < 7) | ||
225 | return 0; /* malformed */ | ||
226 | /* find end of description */ | ||
227 | switch (data[pos + 6]) | ||
228 | { | ||
229 | case 0x00: | ||
230 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 12], | ||
231 | csize - 6, "ISO-8859-1"); | ||
232 | break; | ||
233 | case 0x01: | ||
234 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 12], | ||
235 | csize - 6, "UCS-2"); | ||
236 | break; | ||
237 | default: | ||
238 | /* bad encoding byte, | ||
239 | try to convert from iso-8859-1 */ | ||
240 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 12], | ||
241 | csize - 6, "ISO-8859-1"); | ||
242 | break; | ||
243 | } | ||
244 | break; | ||
245 | case L: | ||
246 | if (csize < 5) | ||
247 | return 0; /* malformed */ | ||
248 | /* find end of description */ | ||
249 | switch (data[pos + 6]) | ||
250 | { | ||
251 | case 0x00: | ||
252 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 10], | ||
253 | csize - 4, "ISO-8859-1"); | ||
254 | break; | ||
255 | case 0x01: | ||
256 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 10], | ||
257 | csize - 4, "UCS-2"); | ||
258 | break; | ||
259 | default: | ||
260 | /* bad encoding byte, | ||
261 | try to convert from iso-8859-1 */ | ||
262 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 10], | ||
263 | csize - 4, "ISO-8859-1"); | ||
264 | break; | ||
265 | } | ||
266 | break; | ||
267 | case I: | ||
268 | if (csize < 6) | ||
269 | return 0; /* malformed */ | ||
270 | /* find end of description */ | ||
271 | off = 12; | ||
272 | while ( (off < size) && | ||
273 | (off - pos < csize) && | ||
274 | (data[pos + off] == '\0') ) | ||
275 | off++; | ||
276 | if ( (off >= csize) || | ||
277 | (data[pos+off] != '\0') ) | ||
278 | return 0; /* malformed */ | ||
279 | off++; | ||
280 | switch (data[pos+11]) | ||
281 | { | ||
282 | case 0x03: | ||
283 | case 0x04: | ||
284 | type = EXTRACTOR_METATYPE_COVER_PICTURE; | ||
285 | break; | ||
286 | case 0x07: | ||
287 | case 0x08: | ||
288 | case 0x09: | ||
289 | case 0x0A: | ||
290 | case 0x0B: | ||
291 | case 0x0C: | ||
292 | type = EXTRACTOR_METATYPE_CONTRIBUTOR_PICTURE; | ||
293 | break; | ||
294 | case 0x0D: | ||
295 | case 0x0E: | ||
296 | case 0x0F: | ||
297 | type = EXTRACTOR_METATYPE_EVENT_PICTURE; | ||
298 | break; | ||
299 | case 0x14: | ||
300 | type = EXTRACTOR_METATYPE_LOGO; | ||
301 | type = EXTRACTOR_METATYPE_LOGO; | ||
302 | break; | ||
303 | default: | ||
304 | type = EXTRACTOR_METATYPE_PICTURE; | ||
305 | break; | ||
306 | } | ||
307 | if (0 == strncasecmp ("PNG", | ||
308 | (const char*) &data[pos + 7], 3)) | ||
309 | mime = "image/png"; | ||
310 | else if (0 == strncasecmp ("JPG", | ||
311 | (const char*) &data[pos + 7], 3)) | ||
312 | mime = "image/jpeg"; | ||
313 | else | ||
314 | mime = NULL; | ||
315 | if (0 == strncasecmp ("-->", | ||
316 | (const char*) &data[pos + 7], 3)) | ||
317 | { | ||
318 | /* not supported */ | ||
319 | } | ||
320 | else | ||
321 | { | ||
322 | if (0 != proc (proc_cls, | ||
323 | "id3v2", | ||
324 | type, | ||
325 | EXTRACTOR_METAFORMAT_BINARY, | ||
326 | mime, | ||
327 | (const char*) &data[pos + off], | ||
328 | csize + 6 - off)) | ||
329 | return 1; | ||
330 | } | ||
331 | word = NULL; | ||
332 | break; | ||
333 | default: | ||
334 | return 0; | ||
335 | } | ||
336 | if ((word != NULL) && (strlen (word) > 0)) | ||
143 | { | 337 | { |
144 | free (word); | 338 | if (0 != proc (proc_cls, |
145 | } | 339 | "id3v2", |
340 | type, | ||
341 | EXTRACTOR_METAFORMAT_UTF8, | ||
342 | "text/plain", | ||
343 | word, | ||
344 | strlen(word)+1)) | ||
345 | { | ||
346 | free (word); | ||
347 | return 1; | ||
348 | } | ||
349 | } | ||
350 | free (word); | ||
146 | break; | 351 | break; |
147 | } | 352 | } |
148 | i++; | 353 | i++; |