aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristian Grothoff <christian@grothoff.org>2009-12-20 00:06:45 +0000
committerChristian Grothoff <christian@grothoff.org>2009-12-20 00:06:45 +0000
commit949dae1583254b789e3dafe569e030140a621846 (patch)
treea6e356030fcb030bd88d96b74d7aa62e1dd5aeb7
parent73b50507dba0570f2182f21f8b1c27a95886e4e7 (diff)
downloadlibextractor-949dae1583254b789e3dafe569e030140a621846.tar.gz
libextractor-949dae1583254b789e3dafe569e030140a621846.zip
id3vx
-rw-r--r--TODO2
-rw-r--r--src/include/extractor.h15
-rw-r--r--src/main/extractor_metatypes.c21
-rw-r--r--src/plugins/id3v23_extractor.c330
-rw-r--r--src/plugins/id3v24_extractor.c432
-rw-r--r--src/plugins/id3v2_extractor.c2
-rw-r--r--src/plugins/odf_extractor.c2
-rw-r--r--src/plugins/ole2_extractor.c4
-rw-r--r--src/plugins/png_extractor.c2
-rw-r--r--src/plugins/qt_extractor.c8
10 files changed, 609 insertions, 209 deletions
diff --git a/TODO b/TODO
index cc8c6eb..12b473b 100644
--- a/TODO
+++ b/TODO
@@ -17,12 +17,12 @@ Core:
17 17
18Incomplete code (missing features): 18Incomplete code (missing features):
19* RIFF (idx1 attribute) 19* RIFF (idx1 attribute)
20* IDv2{3,4} (some attributes, make testcases in test/id3v2/ work)
21* StarOffice sdw (some attributes, see doc/) 20* StarOffice sdw (some attributes, see doc/)
22* man pages (interpret sections for authors, brief description) 21* man pages (interpret sections for authors, brief description)
23* pdf: full-text extraction! 22* pdf: full-text extraction!
24* EXIV2 23* EXIV2
25* ELF: 64-bit support, lists of architectures, OSes, etc. are incomplete 24* ELF: 64-bit support, lists of architectures, OSes, etc. are incomplete
25* ID3v2x: unsynchronization support, (de)compression support, footer support (24)
26 26
27Desirable missing formats: 27Desirable missing formats:
28* mbox / various e-mail formats 28* mbox / various e-mail formats
diff --git a/src/include/extractor.h b/src/include/extractor.h
index 9c4ae60..ffebd5c 100644
--- a/src/include/extractor.h
+++ b/src/include/extractor.h
@@ -280,8 +280,8 @@ enum EXTRACTOR_MetaType
280 EXTRACTOR_METATYPE_PRODUCT_VERSION = 148, 280 EXTRACTOR_METATYPE_PRODUCT_VERSION = 148,
281 EXTRACTOR_METATYPE_CONTRIBUTOR_NAME = 149, 281 EXTRACTOR_METATYPE_CONTRIBUTOR_NAME = 149,
282 EXTRACTOR_METATYPE_MOVIE_DIRECTOR = 150, 282 EXTRACTOR_METATYPE_MOVIE_DIRECTOR = 150,
283 EXTRACTOR_METATYPE_TV_NETWORK_NAME = 151, 283 EXTRACTOR_METATYPE_NETWORK_NAME = 151,
284 EXTRACTOR_METATYPE_TV_SHOW_NAME = 152, 284 EXTRACTOR_METATYPE_SHOW_NAME = 152,
285 EXTRACTOR_METATYPE_CHAPTER_NAME = 153, 285 EXTRACTOR_METATYPE_CHAPTER_NAME = 153,
286 EXTRACTOR_METATYPE_SONG_COUNT = 154, 286 EXTRACTOR_METATYPE_SONG_COUNT = 154,
287 EXTRACTOR_METATYPE_STARTING_SONG = 155, 287 EXTRACTOR_METATYPE_STARTING_SONG = 155,
@@ -295,13 +295,17 @@ enum EXTRACTOR_MetaType
295 EXTRACTOR_METATYPE_ORIGINAL_ARTIST = 163, 295 EXTRACTOR_METATYPE_ORIGINAL_ARTIST = 163,
296 EXTRACTOR_METATYPE_ORIGINAL_WRITER = 164, 296 EXTRACTOR_METATYPE_ORIGINAL_WRITER = 164,
297 EXTRACTOR_METATYPE_ORIGINAL_RELEASE_YEAR = 165, 297 EXTRACTOR_METATYPE_ORIGINAL_RELEASE_YEAR = 165,
298 EXTRACTOR_METATYPE_LYRICS = 166, 298 EXTRACTOR_METATYPE_ORIGINAL_PERFORMER = 166,
299 EXTRACTOR_METATYPE_POPULARITY_METER = 167, 299 EXTRACTOR_METATYPE_LYRICS = 167,
300 EXTRACTOR_METATYPE_POPULARITY_METER = 168,
301 EXTRACTOR_METATYPE_LICENSEE = 169,
302 EXTRACTOR_METATYPE_MUSICIAN_CREDITS_LIST = 170,
303 EXTRACTOR_METATYPE_MOOD = 171,
304 EXTRACTOR_METATYPE_SUBTITLE = 172,
300 305
301 /* fixme: used up to here! */ 306 /* fixme: used up to here! */
302 307
303 EXTRACTOR_METATYPE_MUSIC_CD_IDENTIFIER = 117, 308 EXTRACTOR_METATYPE_MUSIC_CD_IDENTIFIER = 117,
304 EXTRACTOR_METATYPE_MUSICIAN_CREDITS_LIST = 123,
305 309
306 310
307 EXTRACTOR_METATYPE_SCALE = 108, 311 EXTRACTOR_METATYPE_SCALE = 108,
@@ -342,7 +346,6 @@ enum EXTRACTOR_MetaType
342 EXTRACTOR_METATYPE_FULL_NAME = 113, 346 EXTRACTOR_METATYPE_FULL_NAME = 113,
343 EXTRACTOR_METATYPE_LINK = 116, 347 EXTRACTOR_METATYPE_LINK = 116,
344 EXTRACTOR_METATYPE_TIME = 122, 348 EXTRACTOR_METATYPE_TIME = 122,
345 EXTRACTOR_METATYPE_MOOD = 124,
346 EXTRACTOR_METATYPE_TELEVISION_SYSTEM = 126, 349 EXTRACTOR_METATYPE_TELEVISION_SYSTEM = 126,
347 EXTRACTOR_METATYPE_HARDWARE_DEPENDENCY = 129, 350 EXTRACTOR_METATYPE_HARDWARE_DEPENDENCY = 129,
348 EXTRACTOR_METATYPE_RIPPER = 130, 351 EXTRACTOR_METATYPE_RIPPER = 130,
diff --git a/src/main/extractor_metatypes.c b/src/main/extractor_metatypes.c
index b98a395..c97fc3e 100644
--- a/src/main/extractor_metatypes.c
+++ b/src/main/extractor_metatypes.c
@@ -369,10 +369,10 @@ static const struct MetaTypeDescription meta_type_descriptions[] = {
369 /* 150 */ 369 /* 150 */
370 { gettext_noop ("movie director"), 370 { gettext_noop ("movie director"),
371 gettext_noop ("name of the director") }, 371 gettext_noop ("name of the director") },
372 { gettext_noop ("TV network"), 372 { gettext_noop ("network"),
373 gettext_noop ("name of the broadcasting TV network") }, 373 gettext_noop ("name of the broadcasting network or station") },
374 { gettext_noop ("TV show"), 374 { gettext_noop ("show"),
375 gettext_noop ("name of the TV show") }, 375 gettext_noop ("name of the show") },
376 { gettext_noop ("chapter name"), 376 { gettext_noop ("chapter name"),
377 gettext_noop ("name of the chapter") }, 377 gettext_noop ("name of the chapter") },
378 { gettext_noop ("song count"), 378 { gettext_noop ("song count"),
@@ -402,10 +402,23 @@ static const struct MetaTypeDescription meta_type_descriptions[] = {
402 /* 165 */ 402 /* 165 */
403 { gettext_noop ("original release year"), 403 { gettext_noop ("original release year"),
404 gettext_noop ("year of the original release") }, 404 gettext_noop ("year of the original release") },
405 { gettext_noop ("original performer"),
406 gettext_noop ("name of the original performer") },
405 { gettext_noop ("lyrics"), 407 { gettext_noop ("lyrics"),
406 gettext_noop ("lyrics of the song or text description of vocal activities") }, 408 gettext_noop ("lyrics of the song or text description of vocal activities") },
407 { gettext_noop ("popularity"), 409 { gettext_noop ("popularity"),
408 gettext_noop ("information about the file's popularity") }, 410 gettext_noop ("information about the file's popularity") },
411 { gettext_noop ("licensee"),
412 gettext_noop ("name of the owner or licensee of the file") },
413 /* 170 */
414 { gettext_noop ("musician credit list"),
415 gettext_noop ("names of contributing musicians") },
416 { gettext_noop ("mood"),
417 gettext_noop ("keywords reflecting the mood of the piece") },
418 { gettext_noop ("subtitle"),
419 gettext_noop ("subtitle of this part") },
420 { gettext_noop (""),
421 gettext_noop ("") },
409 { gettext_noop (""), 422 { gettext_noop (""),
410 gettext_noop ("") }, 423 gettext_noop ("") },
411#if 0 424#if 0
diff --git a/src/plugins/id3v23_extractor.c b/src/plugins/id3v23_extractor.c
index 71553c2..4ab8116 100644
--- a/src/plugins/id3v23_extractor.c
+++ b/src/plugins/id3v23_extractor.c
@@ -1,6 +1,6 @@
1/* 1/*
2 This file is part of libextractor. 2 This file is part of libextractor.
3 (C) 2002, 2003, 2004, 2006, 2007 Vidyut Samanta and Christian Grothoff 3 (C) 2002, 2003, 2004, 2006, 2007, 2009 Vidyut Samanta and Christian Grothoff
4 4
5 libextractor is free software; you can redistribute it and/or modify 5 libextractor is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published 6 it under the terms of the GNU General Public License as published
@@ -35,54 +35,83 @@
35 35
36#include "convert.h" 36#include "convert.h"
37 37
38enum Id3v23Fmt
39 {
40 T, /* simple, 0-terminated string, prefixed by encoding */
41 U, /* 0-terminated ASCII string, no encoding */
42 UL, /* unsync'ed lyrics */
43 SL, /* sync'ed lyrics */
44 L, /* string with language prefix */
45 I /* image */
46 };
47
38typedef struct 48typedef struct
39{ 49{
40 const char *text; 50 const char *text;
41 enum EXTRACTOR_MetaType type; 51 enum EXTRACTOR_MetaType type;
52 enum Id3v23Fmt fmt;
42} Matches; 53} Matches;
43 54
44static Matches tmap[] = { 55static Matches tmap[] = {
45 {"COMM", EXTRACTOR_METATYPE_COMMENT}, 56 {"TALB", EXTRACTOR_METATYPE_ALBUM, T},
46 {"IPLS", EXTRACTOR_METATYPE_CONTRIBUTOR}, 57 {"TBPM", EXTRACTOR_METATYPE_BEATS_PER_MINUTE, T},
47 {"LINK", EXTRACTOR_METATYPE_LINK}, 58 {"TCOM", EXTRACTOR_METATYPE_COMPOSER, T},
48 {"MCDI", EXTRACTOR_METATYPE_MUSIC_CD_IDENTIFIER}, 59 {"TCON", EXTRACTOR_METATYPE_SONG_VERSION, T},
49 {"PCNT", EXTRACTOR_METATYPE_PLAY_COUNTER}, 60 {"TCOP", EXTRACTOR_METATYPE_COPYRIGHT, T},
50 {"POPM", EXTRACTOR_METATYPE_POPULARITY_METER}, 61 /* {"TDAT", EXTRACTOR_METATYPE_CREATION_DATE, T}, */
51 {"TCOP", EXTRACTOR_METATYPE_COPYRIGHT}, 62 /* TDLY */
52 {"TDAT", EXTRACTOR_METATYPE_DATE}, 63 {"TENC", EXTRACTOR_METATYPE_ENCODED_BY, T},
53 {"TCON", EXTRACTOR_METATYPE_CONTENT_TYPE}, 64 {"TEXT", EXTRACTOR_METATYPE_WRITER, T},
54 {"TIT1", EXTRACTOR_METATYPE_GENRE}, 65 {"TFLT", EXTRACTOR_METATYPE_FORMAT_VERSION, T},
55 {"TENC", EXTRACTOR_METATYPE_ENCODED_BY}, 66 /* TIME */
56 {"TEXT", EXTRACTOR_METATYPE_LYRICS}, 67 {"TIT1", EXTRACTOR_METATYPE_SECTION, T},
57 {"TOLY", EXTRACTOR_METATYPE_CONTRIBUTOR}, 68 {"TIT2", EXTRACTOR_METATYPE_TITLE, T},
58 {"TOPE", EXTRACTOR_METATYPE_CONTRIBUTOR}, 69 {"TIT3", EXTRACTOR_METATYPE_SONG_VERSION, T},
59 {"TOWN", EXTRACTOR_METATYPE_OWNER}, 70 /* TKEY */
60 {"TPE1", EXTRACTOR_METATYPE_ARTIST}, 71 {"TLAN", EXTRACTOR_METATYPE_LANGUAGE, T},
61 {"TPE2", EXTRACTOR_METATYPE_ARTIST}, 72 {"TLEN", EXTRACTOR_METATYPE_DURATION, T}, /* FIXME: should append 'ms' as unit */
62 {"TPE3", EXTRACTOR_METATYPE_CONDUCTOR}, 73 {"TMED", EXTRACTOR_METATYPE_SOURCE, T},
63 {"TPE4", EXTRACTOR_METATYPE_INTERPRET}, 74 {"TOAL", EXTRACTOR_METATYPE_ORIGINAL_TITLE, T},
64 {"TMED", EXTRACTOR_METATYPE_MEDIA_TYPE}, 75 {"TOFN", EXTRACTOR_METATYPE_ORIGINAL_ARTIST, T},
65 {"TCOM", EXTRACTOR_METATYPE_CREATOR}, 76 {"TOLY", EXTRACTOR_METATYPE_ORIGINAL_WRITER, T},
66 {"TIME", EXTRACTOR_METATYPE_TIME}, 77 {"TOPE", EXTRACTOR_METATYPE_ORIGINAL_PERFORMER, T},
67 {"TOFN", EXTRACTOR_METATYPE_FILENAME}, 78 {"TORY", EXTRACTOR_METATYPE_ORIGINAL_RELEASE_YEAR, T},
68 {"TOPE", EXTRACTOR_METATYPE_ARTIST}, 79 {"TOWN", EXTRACTOR_METATYPE_LICENSEE, T},
69 {"TPUB", EXTRACTOR_METATYPE_PUBLISHER}, 80 {"TPE1", EXTRACTOR_METATYPE_ARTIST, T},
70 {"TRCK", EXTRACTOR_METATYPE_TRACK_NUMBER}, 81 {"TPE2", EXTRACTOR_METATYPE_PERFORMER, T},
71 {"TRSC", EXTRACTOR_METATYPE_ISRC}, 82 {"TPE3", EXTRACTOR_METATYPE_CONDUCTOR, T},
72 {"TRSN", EXTRACTOR_METATYPE_SOURCE}, 83 {"TPE4", EXTRACTOR_METATYPE_INTERPRETATION, T},
73 {"TRSO", EXTRACTOR_METATYPE_CREATED_FOR}, 84 {"TPOS", EXTRACTOR_METATYPE_DISC_NUMBER, T},
74 {"TSRC", EXTRACTOR_METATYPE_RESOURCE_IDENTIFIER}, 85 {"TPUB", EXTRACTOR_METATYPE_PUBLISHER, T},
75 {"TOAL", EXTRACTOR_METATYPE_ALBUM}, 86 {"TRCK", EXTRACTOR_METATYPE_TRACK_NUMBER, T},
76 {"TALB", EXTRACTOR_METATYPE_ALBUM}, 87 /* TRDA */
77 {"TLAN", EXTRACTOR_METATYPE_LANGUAGE}, 88 {"TRSN", EXTRACTOR_METATYPE_NETWORK_NAME, T},
78 {"TYER", EXTRACTOR_METATYPE_YEAR}, 89 /* TRSO */
79 {"TLEN", EXTRACTOR_METATYPE_DURATION}, 90 {"TSIZ", EXTRACTOR_METATYPE_EMBEDDED_FILE_SIZE, T},
80 {"TIT2", EXTRACTOR_METATYPE_TITLE}, 91 {"TSRC", EXTRACTOR_METATYPE_ISRC, T},
81 {"TIT3", EXTRACTOR_METATYPE_DESCRIPTION}, 92 /* TSSE */
82 {"WCOM", EXTRACTOR_METATYPE_RELEASE}, 93 {"TYER", EXTRACTOR_METATYPE_PUBLICATION_YEAR, T},
83 {"WCOP", EXTRACTOR_METATYPE_DISCLAIMER}, 94 {"WCOM", EXTRACTOR_METATYPE_URL, U},
84 {"", EXTRACTOR_METATYPE_KEYWORDS}, 95 {"WCOP", EXTRACTOR_METATYPE_URL, U},
85 {NULL, 0} 96 {"WOAF", EXTRACTOR_METATYPE_URL, U},
97 {"WOAS", EXTRACTOR_METATYPE_URL, U},
98 {"WORS", EXTRACTOR_METATYPE_URL, U},
99 {"WPAY", EXTRACTOR_METATYPE_URL, U},
100 {"WPUB", EXTRACTOR_METATYPE_URL, U},
101 {"WXXX", EXTRACTOR_METATYPE_URL, T},
102 {"IPLS", EXTRACTOR_METATYPE_CONTRIBUTOR_NAME, T},
103 /* ... */
104 {"USLT", EXTRACTOR_METATYPE_LYRICS, UL },
105 {"SYLT", EXTRACTOR_METATYPE_LYRICS, SL },
106 {"COMM", EXTRACTOR_METATYPE_COMMENT, L},
107 /* ... */
108 {"APIC", EXTRACTOR_METATYPE_PICTURE, I},
109 /* ... */
110 {"LINK", EXTRACTOR_METATYPE_URL, U},
111 /* ... */
112 {"USER", EXTRACTOR_METATYPE_LICENSE, T},
113 /* ... */
114 {NULL, 0, T}
86}; 115};
87 116
88 117
@@ -104,6 +133,9 @@ EXTRACTOR_id3v23_extract (const unsigned char *data,
104 uint32_t csize; 133 uint32_t csize;
105 int i; 134 int i;
106 uint16_t flags; 135 uint16_t flags;
136 char *mime;
137 enum EXTRACTOR_MetaType type;
138 size_t off;
107 139
108 if ((size < 16) || 140 if ((size < 16) ||
109 (data[0] != 0x49) || 141 (data[0] != 0x49) ||
@@ -111,12 +143,16 @@ EXTRACTOR_id3v23_extract (const unsigned char *data,
111 (data[2] != 0x33) || (data[3] != 0x03) || (data[4] != 0x00)) 143 (data[2] != 0x33) || (data[3] != 0x03) || (data[4] != 0x00))
112 return 0; 144 return 0;
113 unsync = (data[5] & 0x80) > 0; 145 unsync = (data[5] & 0x80) > 0;
146 if (unsync)
147 return 0; /* not supported */
114 extendedHdr = (data[5] & 0x40) > 0; 148 extendedHdr = (data[5] & 0x40) > 0;
115 experimental = (data[5] & 0x20) > 0; 149 experimental = (data[5] & 0x20) > 0;
150 if (experimental)
151 return 0;
116 tsize = (((data[6] & 0x7F) << 21) | 152 tsize = (((data[6] & 0x7F) << 21) |
117 ((data[7] & 0x7F) << 14) | 153 ((data[7] & 0x7F) << 14) |
118 ((data[8] & 0x7F) << 7) | ((data[9] & 0x7F) << 0)); 154 ((data[8] & 0x7F) << 7) | ((data[9] & 0x7F) << 0));
119 if ((tsize + 10 > size) || (experimental)) 155 if (tsize + 10 > size)
120 return 0; 156 return 0;
121 pos = 10; 157 pos = 10;
122 padding = 0; 158 padding = 0;
@@ -142,7 +178,8 @@ EXTRACTOR_id3v23_extract (const unsigned char *data,
142 csize = 178 csize =
143 (data[pos + 4] << 24) + (data[pos + 5] << 16) + (data[pos + 6] << 8) + 179 (data[pos + 4] << 24) + (data[pos + 5] << 16) + (data[pos + 6] << 8) +
144 data[pos + 7]; 180 data[pos + 7];
145 if ((pos + 10 + csize > tsize) || (csize > tsize) || (csize == 0)) 181 if ((pos + 10 + csize > tsize) || (csize > tsize) || (csize == 0) ||
182 (pos + 10 + csize <= pos + 10) || (pos + 10 <= pos))
146 break; 183 break;
147 flags = (data[pos + 8] << 8) + data[pos + 9]; 184 flags = (data[pos + 8] << 8) + data[pos + 9];
148 if (((flags & 0x80) > 0) /* compressed, not yet supported */ || 185 if (((flags & 0x80) > 0) /* compressed, not yet supported */ ||
@@ -163,32 +200,191 @@ EXTRACTOR_id3v23_extract (const unsigned char *data,
163 pos++; 200 pos++;
164 csize--; 201 csize--;
165 } 202 }
166 csize--; 203 switch (tmap[i].fmt)
167 /* this byte describes the encoding 204 {
168 try to convert strings to UTF-8 205 case T:
169 if it fails, then forget it */ 206 /* this byte describes the encoding
170 switch (data[pos + 10]) 207 try to convert strings to UTF-8
171 { 208 if it fails, then forget it */
172 case 0x00: 209 switch (data[pos + 10])
173 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 11], 210 {
174 csize, "ISO-8859-1"); 211 case 0x00:
175 break; 212 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 11],
176 case 0x01: 213 csize - 1, "ISO-8859-1");
177 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 11], 214 break;
178 csize, "UCS-2"); 215 case 0x01:
179 break; 216 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 11],
180 default: 217 csize - 1, "UCS-2");
181 /* bad encoding byte, 218 break;
182 try to convert from iso-8859-1 */ 219 default:
183 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 11], 220 /* bad encoding byte,
184 csize, "ISO-8859-1"); 221 try to convert from iso-8859-1 */
185 break; 222 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 11],
186 } 223 csize - 1, "ISO-8859-1");
187 pos++; 224 break;
225 }
226 break;
227 case U:
228 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 10],
229 csize, "ISO-8859-1");
230 break;
231 case UL:
232 if (csize < 6)
233 return 0; /* malformed */
234 /* find end of description */
235 off = 14;
236 while ( (off < size) &&
237 (off - pos < csize) &&
238 (data[pos + off] == '\0') )
239 off++;
240 if ( (off >= csize) ||
241 (data[pos+off] != '\0') )
242 return 0; /* malformed */
243 off++;
244 switch (data[pos + 10])
245 {
246 case 0x00:
247 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + off],
248 csize - off, "ISO-8859-1");
249 break;
250 case 0x01:
251 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + off],
252 csize - off, "UCS-2");
253 break;
254 default:
255 /* bad encoding byte,
256 try to convert from iso-8859-1 */
257 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + off],
258 csize - off, "ISO-8859-1");
259 break;
260 }
261 break;
262 case SL:
263 if (csize < 7)
264 return 0; /* malformed */
265 /* find end of description */
266 switch (data[pos + 10])
267 {
268 case 0x00:
269 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 16],
270 csize - 6, "ISO-8859-1");
271 break;
272 case 0x01:
273 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 16],
274 csize - 6, "UCS-2");
275 break;
276 default:
277 /* bad encoding byte,
278 try to convert from iso-8859-1 */
279 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 16],
280 csize - 6, "ISO-8859-1");
281 break;
282 }
283 break;
284 case L:
285 if (csize < 5)
286 return 0; /* malformed */
287 /* find end of description */
288 switch (data[pos + 10])
289 {
290 case 0x00:
291 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 14],
292 csize - 4, "ISO-8859-1");
293 break;
294 case 0x01:
295 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 14],
296 csize - 4, "UCS-2");
297 break;
298 default:
299 /* bad encoding byte,
300 try to convert from iso-8859-1 */
301 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 14],
302 csize - 4, "ISO-8859-1");
303 break;
304 }
305 break;
306 case I:
307 if (csize < 2)
308 return 0; /* malformed */
309 /* find end of mime type */
310 off = 11;
311 while ( (off < size) &&
312 (off - pos < csize) &&
313 (data[pos + off] == '\0') )
314 off++;
315 if ( (off >= csize) ||
316 (data[pos+off] != '\0') )
317 return 0; /* malformed */
318 off++;
319 mime = strdup ((const char*) &data[pos + 11]);
320
321 switch (data[pos+off])
322 {
323 case 0x03:
324 case 0x04:
325 type = EXTRACTOR_METATYPE_COVER_PICTURE;
326 break;
327 case 0x07:
328 case 0x08:
329 case 0x09:
330 case 0x0A:
331 case 0x0B:
332 case 0x0C:
333 type = EXTRACTOR_METATYPE_CONTRIBUTOR_PICTURE;
334 break;
335 case 0x0D:
336 case 0x0E:
337 case 0x0F:
338 type = EXTRACTOR_METATYPE_EVENT_PICTURE;
339 break;
340 case 0x14:
341 type = EXTRACTOR_METATYPE_LOGO;
342 type = EXTRACTOR_METATYPE_LOGO;
343 break;
344 default:
345 type = EXTRACTOR_METATYPE_PICTURE;
346 break;
347 }
348 off++;
349
350 /* find end of description */
351 while ( (off < size) &&
352 (off - pos < csize) &&
353 (data[pos + off] == '\0') )
354 off++;
355 if ( (off >= csize) ||
356 (data[pos+off] != '\0') )
357 return 0; /* malformed */
358 off++;
359 if (0 == strcasecmp ("-->",
360 mime))
361 {
362 /* not supported */
363 }
364 else
365 {
366 if (0 != proc (proc_cls,
367 "id3v23",
368 type,
369 EXTRACTOR_METAFORMAT_BINARY,
370 mime,
371 (const char*) &data[pos + off],
372 csize + 6 - off))
373 {
374 free (mime);
375 return 1;
376 }
377 }
378 free (mime);
379 word = NULL;
380 break;
381 default:
382 return 0;
383 }
188 if ((word != NULL) && (strlen (word) > 0)) 384 if ((word != NULL) && (strlen (word) > 0))
189 { 385 {
190 if (0 != proc (proc_cls, 386 if (0 != proc (proc_cls,
191 "id3v2", 387 "id3v23",
192 tmap[i].type, 388 tmap[i].type,
193 EXTRACTOR_METAFORMAT_UTF8, 389 EXTRACTOR_METAFORMAT_UTF8,
194 "text/plain", 390 "text/plain",
diff --git a/src/plugins/id3v24_extractor.c b/src/plugins/id3v24_extractor.c
index ec11e4a..acc76af 100644
--- a/src/plugins/id3v24_extractor.c
+++ b/src/plugins/id3v24_extractor.c
@@ -1,6 +1,6 @@
1/* 1/*
2 This file is part of libextractor. 2 This file is part of libextractor.
3 (C) 2002, 2003, 2004, 2006, 2009 Vidyut Samanta and Christian Grothoff 3 (C) 2002, 2003, 2004, 2006, 2007, 2009 Vidyut Samanta and Christian Grothoff
4 4
5 libextractor is free software; you can redistribute it and/or modify 5 libextractor is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published 6 it under the terms of the GNU General Public License as published
@@ -18,7 +18,6 @@
18 Boston, MA 02111-1307, USA. 18 Boston, MA 02111-1307, USA.
19 19
20 */ 20 */
21
22#define DEBUG_EXTRACT_ID3v24 0 21#define DEBUG_EXTRACT_ID3v24 0
23 22
24#include "platform.h" 23#include "platform.h"
@@ -33,72 +32,98 @@
33#ifndef MINGW 32#ifndef MINGW
34#include <sys/mman.h> 33#include <sys/mman.h>
35#endif 34#endif
35
36#include "convert.h" 36#include "convert.h"
37 37
38 38enum Id3v24Fmt
39static struct EXTRACTOR_Keywords * 39 {
40addKeyword (EXTRACTOR_KeywordList * oldhead, 40 T, /* simple, 0-terminated string, prefixed by encoding */
41 char *phrase, EXTRACTOR_KeywordType type) 41 U, /* 0-terminated ASCII string, no encoding */
42{ 42 UL, /* unsync'ed lyrics */
43 EXTRACTOR_KeywordList *keyword; 43 SL, /* sync'ed lyrics */
44 44 L, /* string with language prefix */
45 keyword = malloc (sizeof (EXTRACTOR_KeywordList)); 45 I /* image */
46 keyword->next = oldhead; 46 };
47 keyword->keyword = phrase;
48 keyword->keywordType = type;
49 return keyword;
50}
51 47
52typedef struct 48typedef struct
53{ 49{
54 char *text; 50 const char *text;
55 enum EXTRACTOR_MetaType type; 51 enum EXTRACTOR_MetaType type;
52 enum Id3v24Fmt fmt;
56} Matches; 53} Matches;
57 54
58static Matches tmap[] = { 55static Matches tmap[] = {
59 {"COMM", EXTRACTOR_METATYPE_COMMENT}, 56 {"TALB", EXTRACTOR_METATYPE_ALBUM, T},
60 {"IPLS", EXTRACTOR_METATYPE_CONTRIBUTOR}, 57 {"TBPM", EXTRACTOR_METATYPE_BEATS_PER_MINUTE, T},
61 {"TIPL", EXTRACTOR_METATYPE_CONTRIBUTOR}, 58 {"TCOM", EXTRACTOR_METATYPE_COMPOSER, T},
62 {"TMOO", EXTRACTOR_METATYPE_MOOD}, 59 {"TCON", EXTRACTOR_METATYPE_SONG_VERSION, T},
63 {"TMCL", EXTRACTOR_METATYPE_MUSICIAN_CREDITS_LIST}, 60 {"TCOP", EXTRACTOR_METATYPE_COPYRIGHT, T},
64 {"LINK", EXTRACTOR_METATYPE_LINK}, 61 /* {"TDAT", EXTRACTOR_METATYPE_CREATION_DATE, T}, deprecated in 24 */
65 {"MCDI", EXTRACTOR_METATYPE_MUSIC_CD_IDENTIFIER}, 62 /* TDLY */
66 {"PCNT", EXTRACTOR_METATYPE_PLAY_COUNTER}, 63 {"TENC", EXTRACTOR_METATYPE_ENCODED_BY, T},
67 {"POPM", EXTRACTOR_METATYPE_POPULARITY_METER}, 64 {"TEXT", EXTRACTOR_METATYPE_WRITER, T},
68 {"TCOP", EXTRACTOR_METATYPE_COPYRIGHT}, 65 {"TFLT", EXTRACTOR_METATYPE_FORMAT_VERSION, T},
69 {"TDRC", EXTRACTOR_METATYPE_DATE}, 66 /* TIME, deprecated in 24 */
70 {"TCON", EXTRACTOR_METATYPE_GENRE}, 67 {"TIT1", EXTRACTOR_METATYPE_SECTION, T},
71 {"TIT1", EXTRACTOR_METATYPE_GENRE}, 68 {"TIT2", EXTRACTOR_METATYPE_TITLE, T},
72 {"TENC", EXTRACTOR_METATYPE_ENCODED_BY}, 69 {"TIT3", EXTRACTOR_METATYPE_SONG_VERSION, T},
73 {"TEXT", EXTRACTOR_METATYPE_LYRICS}, 70 /* TKEY */
74 {"TOLY", EXTRACTOR_METATYPE_CONTRIBUTOR}, 71 {"TLAN", EXTRACTOR_METATYPE_LANGUAGE, T},
75 {"TOPE", EXTRACTOR_METATYPE_CONTRIBUTOR}, 72 {"TLEN", EXTRACTOR_METATYPE_DURATION, T}, /* FIXME: should append 'ms' as unit */
76 {"TOWN", EXTRACTOR_METATYPE_OWNER}, 73 {"TMED", EXTRACTOR_METATYPE_SOURCE, T},
77 {"TPE1", EXTRACTOR_METATYPE_ARTIST}, 74 {"TOAL", EXTRACTOR_METATYPE_ORIGINAL_TITLE, T},
78 {"TPE2", EXTRACTOR_METATYPE_ARTIST}, 75 {"TOFN", EXTRACTOR_METATYPE_ORIGINAL_ARTIST, T},
79 {"TPE3", EXTRACTOR_METATYPE_CONDUCTOR}, 76 {"TOLY", EXTRACTOR_METATYPE_ORIGINAL_WRITER, T},
80 {"TPE4", EXTRACTOR_METATYPE_INTERPRET}, 77 {"TOPE", EXTRACTOR_METATYPE_ORIGINAL_PERFORMER, T},
81 {"TIME", EXTRACTOR_METATYPE_TIME}, 78 /* {"TORY", EXTRACTOR_METATYPE_ORIGINAL_RELEASE_YEAR, T}, deprecated in 24 */
82 {"TMED", EXTRACTOR_METATYPE_MEDIA_TYPE}, 79 {"TOWN", EXTRACTOR_METATYPE_LICENSEE, T},
83 {"TCOM", EXTRACTOR_METATYPE_CREATOR}, 80 {"TPE1", EXTRACTOR_METATYPE_ARTIST, T},
84 {"TOFN", EXTRACTOR_METATYPE_FILENAME}, 81 {"TPE2", EXTRACTOR_METATYPE_PERFORMER, T},
85 {"TOPE", EXTRACTOR_METATYPE_ARTIST}, 82 {"TPE3", EXTRACTOR_METATYPE_CONDUCTOR, T},
86 {"TPUB", EXTRACTOR_METATYPE_PUBLISHER}, 83 {"TPE4", EXTRACTOR_METATYPE_INTERPRETATION, T},
87 {"TRCK", EXTRACTOR_METATYPE_TRACK_NUMBER}, 84 {"TPOS", EXTRACTOR_METATYPE_DISC_NUMBER, T},
88 {"TRSC", EXTRACTOR_METATYPE_ISRC}, 85 {"TPUB", EXTRACTOR_METATYPE_PUBLISHER, T},
89 {"TRSN", EXTRACTOR_METATYPE_SOURCE}, 86 {"TRCK", EXTRACTOR_METATYPE_TRACK_NUMBER, T},
90 {"TRSO", EXTRACTOR_METATYPE_CREATED_FOR}, 87 /* TRDA, deprecated in 24 */
91 {"TSRC", EXTRACTOR_METATYPE_RESOURCE_IDENTIFIER}, 88 {"TRSN", EXTRACTOR_METATYPE_NETWORK_NAME, T},
92 {"TYER", EXTRACTOR_METATYPE_YEAR}, 89 /* TRSO */
93 {"TOAL", EXTRACTOR_METATYPE_ALBUM}, 90 /* {"TSIZ", EXTRACTOR_METATYPE_EMBEDDED_FILE_SIZE, T}, deprecated in 24 */
94 {"TALB", EXTRACTOR_METATYPE_ALBUM}, 91 {"TSRC", EXTRACTOR_METATYPE_ISRC, T},
95 {"TLAN", EXTRACTOR_METATYPE_LANGUAGE}, 92 /* TSSE */
96 {"TIT2", EXTRACTOR_METATYPE_TITLE}, 93 /* {"TYER", EXTRACTOR_METATYPE_PUBLICATION_YEAR, T}, deprecated in 24 */
97 {"TIT3", EXTRACTOR_METATYPE_DESCRIPTION}, 94 {"WCOM", EXTRACTOR_METATYPE_URL, U},
98 {"WCOM", EXTRACTOR_METATYPE_RELEASE}, 95 {"WCOP", EXTRACTOR_METATYPE_URL, U},
99 {"WCOP", EXTRACTOR_METATYPE_DISCLAIMER}, 96 {"WOAF", EXTRACTOR_METATYPE_URL, U},
100 {"", EXTRACTOR_METATYPE_KEYWORDS}, 97 {"WOAS", EXTRACTOR_METATYPE_URL, U},
101 {NULL, 0} 98 {"WORS", EXTRACTOR_METATYPE_URL, U},
99 {"WPAY", EXTRACTOR_METATYPE_URL, U},
100 {"WPUB", EXTRACTOR_METATYPE_URL, U},
101 {"WXXX", EXTRACTOR_METATYPE_URL, T},
102 /* {"IPLS", EXTRACTOR_METATYPE_CONTRIBUTOR_NAME, T}, deprecated in 24 */
103 /* ... */
104 {"USLT", EXTRACTOR_METATYPE_LYRICS, UL },
105 {"SYLT", EXTRACTOR_METATYPE_LYRICS, SL },
106 {"COMM", EXTRACTOR_METATYPE_COMMENT, L},
107 /* ... */
108 {"APIC", EXTRACTOR_METATYPE_PICTURE, I},
109 /* ... */
110 {"LINK", EXTRACTOR_METATYPE_URL, U},
111 /* ... */
112 {"USER", EXTRACTOR_METATYPE_LICENSE, T},
113 /* ... */
114 /* new frames in 24 */
115 /* ASPI, EQU2, RVA2, SEEK, SIGN, TDEN */
116 {"TDOR", EXTRACTOR_METATYPE_PUBLICATION_DATE, T},
117 /* TDRC, TDRL, TDTG */
118 {"TIPL", EXTRACTOR_METATYPE_CONTRIBUTOR_NAME, T},
119 {"TMCL", EXTRACTOR_METATYPE_MUSICIAN_CREDITS_LIST, T},
120 {"TMOO", EXTRACTOR_METATYPE_MOOD, T},
121 {"TPRO", EXTRACTOR_METATYPE_COPYRIGHT, T},
122 {"TSOA", EXTRACTOR_METATYPE_ALBUM, T},
123 {"TSOP", EXTRACTOR_METATYPE_PERFORMER, T},
124 {"TSOT", EXTRACTOR_METATYPE_TITLE, T},
125 {"TSST", EXTRACTOR_METATYPE_SUBTITLE, T},
126 {NULL, 0, T}
102}; 127};
103 128
104 129
@@ -114,54 +139,60 @@ EXTRACTOR_id3v24_extract (const unsigned char *data,
114 int extendedHdr; 139 int extendedHdr;
115 int experimental; 140 int experimental;
116 int footer; 141 int footer;
117 unsigned int tsize; 142 uint32_t tsize;
118 unsigned int pos; 143 uint32_t pos;
119 unsigned int ehdrSize; 144 uint32_t ehdrSize;
120 unsigned int padding; 145 uint32_t padding;
146 uint32_t csize;
147 int i;
148 uint16_t flags;
149 char *mime;
150 enum EXTRACTOR_MetaType type;
151 size_t off;
121 152
122 if ((size < 16) || 153 if ((size < 16) ||
123 (data[0] != 0x49) || 154 (data[0] != 0x49) ||
124 (data[1] != 0x44) || 155 (data[1] != 0x44) ||
125 (data[2] != 0x33) || (data[3] != 0x04) || (data[4] != 0x00)) 156 (data[2] != 0x33) || (data[3] != 0x04) || (data[4] != 0x00))
126 return prev; 157 return 0;
127 unsync = (data[5] & 0x80) > 0; 158 unsync = (data[5] & 0x80) > 0;
159 if (unsync)
160 return 0; /* not supported */
128 extendedHdr = (data[5] & 0x40) > 0; 161 extendedHdr = (data[5] & 0x40) > 0;
129 experimental = (data[5] & 0x20) > 0; 162 experimental = (data[5] & 0x20) > 0;
163 if (experimental)
164 return 0;
130 footer = (data[5] & 0x10) > 0; 165 footer = (data[5] & 0x10) > 0;
131 tsize = (((data[6] & 0x7F) << 21) | 166 tsize = (((data[6] & 0x7F) << 21) |
132 ((data[7] & 0x7F) << 14) | 167 ((data[7] & 0x7F) << 14) |
133 ((data[8] & 0x7F) << 7) | ((data[9] & 0x7F) << 0)); 168 ((data[8] & 0x7F) << 7) | ((data[9] & 0x7F) << 0));
134 if ((tsize + 10 > size) || (experimental)) 169 if (tsize + 10 > size)
135 return prev; 170 return 0;
136 pos = 10; 171 pos = 10;
137 padding = 0; 172 padding = 0;
138 if (extendedHdr) 173 if (extendedHdr)
139 { 174 {
140 ehdrSize = (((data[10] & 0x7F) << 21) | 175 ehdrSize = (((data[10] & 0x7F) << 21) |
141 ((data[11] & 0x7F) << 14) | 176 ((data[11] & 0x7F) << 14) |
142 ((data[12] & 0x7F) << 7) | ((data[13] & 0x7F) << 0)); 177 ((data[12] & 0x7F) << 7) | ((data[13] & 0x7F) << 0));
143 pos += ehdrSize; 178 pos += 4 + ehdrSize;
179 if (ehdrSize > tsize)
180 return 0;
144 } 181 }
145
146
147 while (pos < tsize) 182 while (pos < tsize)
148 { 183 {
149 size_t csize;
150 int i;
151 unsigned short flags;
152
153 if (pos + 10 > tsize) 184 if (pos + 10 > tsize)
154 return prev; 185 return 0;
155 186 csize =
156 csize = (((data[pos + 4] & 0x7F) << 21) | 187 (data[pos + 4] << 24) + (data[pos + 5] << 16) + (data[pos + 6] << 8) +
157 ((data[pos + 5] & 0x7F) << 14) | 188 data[pos + 7];
158 ((data[pos + 6] & 0x7F) << 7) | ((data[pos + 7] & 0x7F) << 0)); 189 if ((pos + 10 + csize > tsize) || (csize > tsize) || (csize == 0) ||
159 190 (pos + 10 + csize <= pos + 10) || (pos + 10 <= pos))
160 if ((pos + 10 + csize > tsize) || (csize > tsize) || (csize == 0))
161 break; 191 break;
162 flags = (data[pos + 8] << 8) + data[pos + 9]; 192 flags = (data[pos + 8] << 8) + data[pos + 9];
163 if (((flags & 0x80) > 0) /* compressed, not yet supported */ || 193 if (((flags & 0x08) > 0) /* compressed, not yet supported */ ||
164 ((flags & 0x40) > 0) /* encrypted, not supported */ ) 194 ((flags & 0x04) > 0) /* encrypted, not supported */ ||
195 ((flags & 0x02) > 0) /* unsynchronized, not supported */ )
165 { 196 {
166 pos += 10 + csize; 197 pos += 10 + csize;
167 continue; 198 continue;
@@ -172,59 +203,216 @@ EXTRACTOR_id3v24_extract (const unsigned char *data,
172 if (0 == strncmp (tmap[i].text, (const char *) &data[pos], 4)) 203 if (0 == strncmp (tmap[i].text, (const char *) &data[pos], 4))
173 { 204 {
174 char *word; 205 char *word;
175 if ((flags & 0x20) > 0) 206 if ((flags & 0x40) > 0)
176 { 207 {
177 /* "group" identifier, skip a byte */ 208 /* "group" identifier, skip a byte */
178 pos++; 209 pos++;
179 csize--; 210 csize--;
180 } 211 }
181 212
182 /* this byte describes the encoding 213 switch (tmap[i].fmt)
183 try to convert strings to UTF-8 214 {
184 if it fails, then forget it */ 215 case T:
185 csize--; 216 /* this byte describes the encoding
186 switch (data[pos + 10]) 217 try to convert strings to UTF-8
187 { 218 if it fails, then forget it */
188 case 0x00: 219 switch (data[pos + 10])
189 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 11], 220 {
190 csize, "ISO-8859-1"); 221 case 0x00:
191 break; 222 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 11],
192 case 0x01: 223 csize - 1, "ISO-8859-1");
193 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 11], 224 break;
194 csize, "UTF-16"); 225 case 0x01:
195 break; 226 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 11],
196 case 0x02: 227 csize - 1, "UCS-2");
197 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 11], 228 break;
198 csize, "UTF-16BE"); 229 default:
199 break; 230 /* bad encoding byte,
200 case 0x03: 231 try to convert from iso-8859-1 */
201 word = malloc (csize + 1); 232 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 11],
202 memcpy (word, &data[pos + 11], csize); 233 csize - 1, "ISO-8859-1");
203 word[csize] = '\0'; 234 break;
204 break; 235 }
205 default: 236 break;
206 /* bad encoding byte, 237 case U:
207 try to convert from iso-8859-1 */ 238 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 10],
208 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 11], 239 csize, "ISO-8859-1");
209 csize, "ISO-8859-1"); 240 break;
210 break; 241 case UL:
211 } 242 if (csize < 6)
212 pos++; 243 return 0; /* malformed */
244 /* find end of description */
245 off = 14;
246 while ( (off < size) &&
247 (off - pos < csize) &&
248 (data[pos + off] == '\0') )
249 off++;
250 if ( (off >= csize) ||
251 (data[pos+off] != '\0') )
252 return 0; /* malformed */
253 off++;
254 switch (data[pos + 10])
255 {
256 case 0x00:
257 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + off],
258 csize - off, "ISO-8859-1");
259 break;
260 case 0x01:
261 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + off],
262 csize - off, "UCS-2");
263 break;
264 default:
265 /* bad encoding byte,
266 try to convert from iso-8859-1 */
267 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + off],
268 csize - off, "ISO-8859-1");
269 break;
270 }
271 break;
272 case SL:
273 if (csize < 7)
274 return 0; /* malformed */
275 /* find end of description */
276 switch (data[pos + 10])
277 {
278 case 0x00:
279 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 16],
280 csize - 6, "ISO-8859-1");
281 break;
282 case 0x01:
283 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 16],
284 csize - 6, "UCS-2");
285 break;
286 default:
287 /* bad encoding byte,
288 try to convert from iso-8859-1 */
289 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 16],
290 csize - 6, "ISO-8859-1");
291 break;
292 }
293 break;
294 case L:
295 if (csize < 5)
296 return 0; /* malformed */
297 /* find end of description */
298 switch (data[pos + 10])
299 {
300 case 0x00:
301 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 14],
302 csize - 4, "ISO-8859-1");
303 break;
304 case 0x01:
305 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 14],
306 csize - 4, "UCS-2");
307 break;
308 default:
309 /* bad encoding byte,
310 try to convert from iso-8859-1 */
311 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 14],
312 csize - 4, "ISO-8859-1");
313 break;
314 }
315 break;
316 case I:
317 if (csize < 2)
318 return 0; /* malformed */
319 /* find end of mime type */
320 off = 11;
321 while ( (off < size) &&
322 (off - pos < csize) &&
323 (data[pos + off] == '\0') )
324 off++;
325 if ( (off >= csize) ||
326 (data[pos+off] != '\0') )
327 return 0; /* malformed */
328 off++;
329 mime = strdup ((const char*) &data[pos + 11]);
330
331 switch (data[pos+off])
332 {
333 case 0x03:
334 case 0x04:
335 type = EXTRACTOR_METATYPE_COVER_PICTURE;
336 break;
337 case 0x07:
338 case 0x08:
339 case 0x09:
340 case 0x0A:
341 case 0x0B:
342 case 0x0C:
343 type = EXTRACTOR_METATYPE_CONTRIBUTOR_PICTURE;
344 break;
345 case 0x0D:
346 case 0x0E:
347 case 0x0F:
348 type = EXTRACTOR_METATYPE_EVENT_PICTURE;
349 break;
350 case 0x14:
351 type = EXTRACTOR_METATYPE_LOGO;
352 type = EXTRACTOR_METATYPE_LOGO;
353 break;
354 default:
355 type = EXTRACTOR_METATYPE_PICTURE;
356 break;
357 }
358 off++;
359
360 /* find end of description */
361 while ( (off < size) &&
362 (off - pos < csize) &&
363 (data[pos + off] == '\0') )
364 off++;
365 if ( (off >= csize) ||
366 (data[pos+off] != '\0') )
367 return 0; /* malformed */
368 off++;
369 if (0 == strcasecmp ("-->",
370 mime))
371 {
372 /* not supported */
373 }
374 else
375 {
376 if (0 != proc (proc_cls,
377 "id3v24",
378 type,
379 EXTRACTOR_METAFORMAT_BINARY,
380 mime,
381 (const char*) &data[pos + off],
382 csize + 6 - off))
383 {
384 free (mime);
385 return 1;
386 }
387 }
388 free (mime);
389 word = NULL;
390 break;
391 default:
392 return 0;
393 }
213 if ((word != NULL) && (strlen (word) > 0)) 394 if ((word != NULL) && (strlen (word) > 0))
214 { 395 {
215 prev = addKeyword (prev, word, tmap[i].type); 396 if (0 != proc (proc_cls,
216 } 397 "id3v24",
217 else 398 tmap[i].type,
218 { 399 EXTRACTOR_METAFORMAT_UTF8,
219 free (word); 400 "text/plain",
401 word,
402 strlen(word)+1))
403 {
404 free (word);
405 return 1;
406 }
220 } 407 }
408 free (word);
221 break; 409 break;
222 } 410 }
223 i++; 411 i++;
224 } 412 }
225 pos += 10 + csize; 413 pos += 10 + csize;
226 } 414 }
227 return prev; 415 return 0;
228} 416}
229 417
230/* end of id3v24_extractor.c */ 418/* end of id3v24_extractor.c */
diff --git a/src/plugins/id3v2_extractor.c b/src/plugins/id3v2_extractor.c
index da5c6d9..f0227f1 100644
--- a/src/plugins/id3v2_extractor.c
+++ b/src/plugins/id3v2_extractor.c
@@ -113,7 +113,7 @@ static Matches tmap[] = {
113 /* skipping CRM */ 113 /* skipping CRM */
114 /* skipping CRA */ 114 /* skipping CRA */
115 /* {"LNK", EXTRACTOR_METATYPE_URL, XXX}, */ 115 /* {"LNK", EXTRACTOR_METATYPE_URL, XXX}, */
116 {NULL, 0}, 116 {NULL, 0, T},
117}; 117};
118 118
119 119
diff --git a/src/plugins/odf_extractor.c b/src/plugins/odf_extractor.c
index 479f5ca..9a8c827 100644
--- a/src/plugins/odf_extractor.c
+++ b/src/plugins/odf_extractor.c
@@ -44,7 +44,7 @@ static Matches tmap[] = {
44 { "meta:creation-date", EXTRACTOR_METATYPE_CREATION_DATE }, 44 { "meta:creation-date", EXTRACTOR_METATYPE_CREATION_DATE },
45 { "dc:date", EXTRACTOR_METATYPE_UNKNOWN_DATE }, 45 { "dc:date", EXTRACTOR_METATYPE_UNKNOWN_DATE },
46 { "dc:creator", EXTRACTOR_METATYPE_CREATOR }, 46 { "dc:creator", EXTRACTOR_METATYPE_CREATOR },
47 { "dc:language", EXTRACTOR_METATYPE_DOCUMENT_LANGUAGE }, 47 { "dc:language", EXTRACTOR_METATYPE_LANGUAGE },
48 { "dc:title", EXTRACTOR_METATYPE_TITLE }, 48 { "dc:title", EXTRACTOR_METATYPE_TITLE },
49 { "dc:description", EXTRACTOR_METATYPE_DESCRIPTION }, 49 { "dc:description", EXTRACTOR_METATYPE_DESCRIPTION },
50 { "dc:subject", EXTRACTOR_METATYPE_SUBJECT }, 50 { "dc:subject", EXTRACTOR_METATYPE_SUBJECT },
diff --git a/src/plugins/ole2_extractor.c b/src/plugins/ole2_extractor.c
index 1b8d5cf..5f6cd01 100644
--- a/src/plugins/ole2_extractor.c
+++ b/src/plugins/ole2_extractor.c
@@ -107,7 +107,7 @@ static Matches tmap[] = {
107 { "meta:generator", EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE }, 107 { "meta:generator", EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE },
108 { "meta:template", EXTRACTOR_METATYPE_TEMPLATE }, 108 { "meta:template", EXTRACTOR_METATYPE_TEMPLATE },
109 { "meta:editing-cycles", EXTRACTOR_METATYPE_EDITING_CYCLES }, 109 { "meta:editing-cycles", EXTRACTOR_METATYPE_EDITING_CYCLES },
110 /* { "Dictionary", EXTRACTOR_METATYPE_DOCUMENT_LANGUAGE }, */ 110 /* { "Dictionary", EXTRACTOR_METATYPE_LANGUAGE }, */
111 /* { "gsf:security", EXTRACTOR_SECURITY }, */ 111 /* { "gsf:security", EXTRACTOR_SECURITY }, */
112 /* { "gsf:scale", EXTRACTOR_SCALE }, // always "false"? */ 112 /* { "gsf:scale", EXTRACTOR_SCALE }, // always "false"? */
113 /* { "meta:editing-duration", EXTRACTOR_METATYPE_TOTAL_EDITING_TIME }, // encoding? */ 113 /* { "meta:editing-duration", EXTRACTOR_METATYPE_TOTAL_EDITING_TIME }, // encoding? */
@@ -544,7 +544,7 @@ EXTRACTOR_ole2_extract (const char *data,
544 if ( (lang != NULL) && (ret == 0) ) 544 if ( (lang != NULL) && (ret == 0) )
545 ret = addKeyword(proc, proc_cls, 545 ret = addKeyword(proc, proc_cls,
546 lang, 546 lang,
547 EXTRACTOR_METATYPE_DOCUMENT_LANGUAGE); 547 EXTRACTOR_METATYPE_LANGUAGE);
548 if (lcb >= 6) { 548 if (lcb >= 6) {
549 for (i=0;i<gsf_infile_num_children(infile);i++) { 549 for (i=0;i<gsf_infile_num_children(infile);i++) {
550 if (ret != 0) 550 if (ret != 0)
diff --git a/src/plugins/png_extractor.c b/src/plugins/png_extractor.c
index 5fb2b9d..bdba922 100644
--- a/src/plugins/png_extractor.c
+++ b/src/plugins/png_extractor.c
@@ -141,7 +141,7 @@ processiTXt (const char *data,
141 language = &data[pos]; 141 language = &data[pos];
142 ret = 0; 142 ret = 0;
143 if (stnlen (language, length - pos) > 0) 143 if (stnlen (language, length - pos) > 0)
144 ADDF (EXTRACTOR_METATYPE_DOCUMENT_LANGUAGE, 144 ADDF (EXTRACTOR_METATYPE_LANGUAGE,
145 stndup (language, length - pos)); 145 stndup (language, length - pos));
146 pos += stnlen (language, length - pos) + 1; 146 pos += stnlen (language, length - pos) + 1;
147 if (pos + 1 >= length) 147 if (pos + 1 >= length)
diff --git a/src/plugins/qt_extractor.c b/src/plugins/qt_extractor.c
index 3abd543..123e0f5 100644
--- a/src/plugins/qt_extractor.c
+++ b/src/plugins/qt_extractor.c
@@ -395,9 +395,9 @@ static ITTagConversionEntry it_to_extr_table[] = {
395 {"catg", EXTRACTOR_METATYPE_SECTION}, 395 {"catg", EXTRACTOR_METATYPE_SECTION},
396 {"keyw", EXTRACTOR_METATYPE_KEYWORDS}, 396 {"keyw", EXTRACTOR_METATYPE_KEYWORDS},
397 {"desc", EXTRACTOR_METATYPE_DESCRIPTION}, 397 {"desc", EXTRACTOR_METATYPE_DESCRIPTION},
398 {"tvnn", EXTRACTOR_METATYPE_TV_NETWORK_NAME}, 398 {"tvnn", EXTRACTOR_METATYPE_NETWORK_NAME},
399 {"tvsh", EXTRACTOR_METATYPE_TV_SHOW_NAME}, 399 {"tvsh", EXTRACTOR_METATYPE_SHOW_NAME},
400 {"tven", EXTRACTOR_METATYPE_TV_NETWORK_NAME}, 400 {"tven", EXTRACTOR_METATYPE_NETWORK_NAME},
401 {NULL, EXTRACTOR_METATYPE_RESERVED} 401 {NULL, EXTRACTOR_METATYPE_RESERVED}
402}; 402};
403 403
@@ -850,7 +850,7 @@ processTextTag (const char *input,
850 lang = ntohs (txt->language); 850 lang = ntohs (txt->language);
851 if (lang >= sizeof (languages) / sizeof (char *)) 851 if (lang >= sizeof (languages) / sizeof (char *))
852 return 0; /* invalid */ 852 return 0; /* invalid */
853 addKeyword (EXTRACTOR_METATYPE_DOCUMENT_LANGUAGE, languages[lang], ec); 853 addKeyword (EXTRACTOR_METATYPE_LANGUAGE, languages[lang], ec);
854 854
855 meta = malloc (len + 1); 855 meta = malloc (len + 1);
856 memcpy (meta, &txt[1], len); 856 memcpy (meta, &txt[1], len);