diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/plugins/Makefile.am | 15 | ||||
-rw-r--r-- | src/plugins/zip_extractor.c (renamed from src/plugins/zipextractor.c) | 135 |
2 files changed, 71 insertions, 79 deletions
diff --git a/src/plugins/Makefile.am b/src/plugins/Makefile.am index 9668b87..706da26 100644 --- a/src/plugins/Makefile.am +++ b/src/plugins/Makefile.am | |||
@@ -90,7 +90,8 @@ plugin_LTLIBRARIES = \ | |||
90 | libextractor_real.la \ | 90 | libextractor_real.la \ |
91 | $(rpm) \ | 91 | $(rpm) \ |
92 | $(thumbgtk) \ | 92 | $(thumbgtk) \ |
93 | libextractor_wav.la | 93 | libextractor_wav.la \ |
94 | libextractor_zip.la | ||
94 | 95 | ||
95 | libextractor_applefile_la_SOURCES = \ | 96 | libextractor_applefile_la_SOURCES = \ |
96 | applefile_extractor.c | 97 | applefile_extractor.c |
@@ -260,6 +261,11 @@ libextractor_wav_la_LDFLAGS = \ | |||
260 | libextractor_wav_la_LIBADD = \ | 261 | libextractor_wav_la_LIBADD = \ |
261 | $(LE_LIBINTL) | 262 | $(LE_LIBINTL) |
262 | 263 | ||
264 | libextractor_zip_la_SOURCES = \ | ||
265 | zip_extractor.c | ||
266 | libextractor_zip_la_LDFLAGS = \ | ||
267 | $(PLUGINFLAGS) | ||
268 | |||
263 | EXTRA_DIST = template_extractor.c | 269 | EXTRA_DIST = template_extractor.c |
264 | 270 | ||
265 | 271 | ||
@@ -339,13 +345,6 @@ libextractor_tiff_la_LDFLAGS = \ | |||
339 | libextractor_tiff_la_LIBADD = \ | 345 | libextractor_tiff_la_LIBADD = \ |
340 | $(top_builddir)/src/common/libextractor_common.la | 346 | $(top_builddir)/src/common/libextractor_common.la |
341 | 347 | ||
342 | libextractor_zip_la_SOURCES = \ | ||
343 | zipextractor.c | ||
344 | libextractor_zip_la_LDFLAGS = \ | ||
345 | $(PLUGINFLAGS) | ||
346 | libextractor_zip_la_LIBADD = \ | ||
347 | $(top_builddir)/src/main/libextractor.la | ||
348 | |||
349 | libextractor_riff_la_SOURCES = \ | 348 | libextractor_riff_la_SOURCES = \ |
350 | riffextractor.c | 349 | riffextractor.c |
351 | libextractor_riff_la_LDFLAGS = \ | 350 | libextractor_riff_la_LDFLAGS = \ |
diff --git a/src/plugins/zipextractor.c b/src/plugins/zip_extractor.c index ad46637..c7fef95 100644 --- a/src/plugins/zipextractor.c +++ b/src/plugins/zip_extractor.c | |||
@@ -40,7 +40,7 @@ | |||
40 | 40 | ||
41 | /* | 41 | /* |
42 | * This file is part of libextractor. | 42 | * This file is part of libextractor. |
43 | * (C) 2002, 2003 Vidyut Samanta and Christian Grothoff | 43 | * (C) 2002, 2003, 2009 Vidyut Samanta and Christian Grothoff |
44 | * | 44 | * |
45 | * libextractor is free software; you can redistribute it and/or modify | 45 | * libextractor is free software; you can redistribute it and/or modify |
46 | * it under the terms of the GNU General Public License as published | 46 | * it under the terms of the GNU General Public License as published |
@@ -77,10 +77,14 @@ | |||
77 | } zip_entry; | 77 | } zip_entry; |
78 | 78 | ||
79 | /* mimetype = application/zip */ | 79 | /* mimetype = application/zip */ |
80 | struct EXTRACTOR_Keywords * | 80 | int |
81 | libextractor_zip_extract (const char *filename, const unsigned char *data, | 81 | EXTRACTOR_zip_extract (const unsigned char *data, |
82 | size_t size, struct EXTRACTOR_Keywords *prev) | 82 | size_t size, |
83 | EXTRACTOR_MetaDataProcessor proc, | ||
84 | void *proc_cls, | ||
85 | const char *options) | ||
83 | { | 86 | { |
87 | int ret; | ||
84 | void *tmp; | 88 | void *tmp; |
85 | zip_entry * info; | 89 | zip_entry * info; |
86 | zip_entry * start; | 90 | zip_entry * start; |
@@ -90,28 +94,13 @@ libextractor_zip_extract (const char *filename, const unsigned char *data, | |||
90 | unsigned int name_length, extra_length, comment_length; | 94 | unsigned int name_length, extra_length, comment_length; |
91 | unsigned int filecomment_length; | 95 | unsigned int filecomment_length; |
92 | unsigned int entry_total, entry_count; | 96 | unsigned int entry_total, entry_count; |
93 | EXTRACTOR_KeywordList * keyword; | 97 | |
94 | const char *mimetype; | 98 | /* I think the smallest zipfile you can have is about 120 bytes */ |
95 | mimetype = EXTRACTOR_extractLast (EXTRACTOR_MIMETYPE, prev); | 99 | if ((NULL == data) || (size < 100)) |
96 | if (NULL != mimetype) | 100 | return 0; |
97 | { | 101 | if (! (('P' == data[0]) && ('K' == data[1]) && (0x03 == data[2]) |
98 | if ((0 != strcmp (mimetype, "application/x-zip")) && | ||
99 | (0 != strcmp (mimetype, "application/zip"))) | ||
100 | { | ||
101 | |||
102 | /* we think we already know what's in here, | ||
103 | and it is not a zip */ | ||
104 | return prev; | ||
105 | } | ||
106 | } | ||
107 | |||
108 | /* I think the smallest zipfile you can have is about 120 bytes */ | ||
109 | if ((NULL == data) || (size < 100)) | ||
110 | return prev; | ||
111 | if (! | ||
112 | (('P' == data[0]) && ('K' == data[1]) && (0x03 == data[2]) | ||
113 | && (0x04 == data[3]))) | 102 | && (0x04 == data[3]))) |
114 | return prev; | 103 | return 0; |
115 | 104 | ||
116 | /* The filenames for each file in a zipfile are stored in two locations. | 105 | /* The filenames for each file in a zipfile are stored in two locations. |
117 | * There is one at the start of each entry, just before the compressed data, | 106 | * There is one at the start of each entry, just before the compressed data, |
@@ -149,7 +138,7 @@ libextractor_zip_extract (const char *filename, const unsigned char *data, | |||
149 | */ | 138 | */ |
150 | 139 | ||
151 | /* the signature can't be more than 22 bytes from the end */ | 140 | /* the signature can't be more than 22 bytes from the end */ |
152 | offset = size - 22; | 141 | offset = size - 22; |
153 | pos = &data[offset]; | 142 | pos = &data[offset]; |
154 | stop = 0; | 143 | stop = 0; |
155 | if (((signed int) size - 65556) > 0) | 144 | if (((signed int) size - 65556) > 0) |
@@ -158,8 +147,7 @@ libextractor_zip_extract (const char *filename, const unsigned char *data, | |||
158 | /* not using int 0x06054b50 so that we don't have to deal with endianess issues. | 147 | /* not using int 0x06054b50 so that we don't have to deal with endianess issues. |
159 | break out if we go more than 64K backwards and havn't found it, or if we hit the | 148 | break out if we go more than 64K backwards and havn't found it, or if we hit the |
160 | begining of the file. */ | 149 | begining of the file. */ |
161 | while ((! | 150 | while ((!(('P' == pos[0]) && ('K' == pos[1]) && (0x05 == pos[2]) |
162 | (('P' == pos[0]) && ('K' == pos[1]) && (0x05 == pos[2]) | ||
163 | && (0x06 == pos[3]))) && (offset > stop)) | 151 | && (0x06 == pos[3]))) && (offset > stop)) |
164 | pos = &data[offset--]; | 152 | pos = &data[offset--]; |
165 | if (offset == stop) | 153 | if (offset == stop) |
@@ -171,7 +159,7 @@ libextractor_zip_extract (const char *filename, const unsigned char *data, | |||
171 | offset); | 159 | offset); |
172 | 160 | ||
173 | #endif /* */ | 161 | #endif /* */ |
174 | return prev; | 162 | return 0; |
175 | } | 163 | } |
176 | 164 | ||
177 | /* offset should now point to the start of the end-of-central directory structure */ | 165 | /* offset should now point to the start of the end-of-central directory structure */ |
@@ -180,7 +168,7 @@ libextractor_zip_extract (const char *filename, const unsigned char *data, | |||
180 | filecomment_length = pos[20] + (pos[21] << 8); | 168 | filecomment_length = pos[20] + (pos[21] << 8); |
181 | if (filecomment_length + offset + 22 > size) | 169 | if (filecomment_length + offset + 22 > size) |
182 | { | 170 | { |
183 | return prev; /* invalid zip file format! */ | 171 | return 0; /* invalid zip file format! */ |
184 | } | 172 | } |
185 | filecomment = NULL; | 173 | filecomment = NULL; |
186 | if (filecomment_length > 0) | 174 | if (filecomment_length > 0) |
@@ -221,9 +209,9 @@ libextractor_zip_extract (const char *filename, const unsigned char *data, | |||
221 | { | 209 | { |
222 | 210 | ||
223 | /* not a zip */ | 211 | /* not a zip */ |
224 | if (filecomment != NULL) | 212 | if (filecomment != NULL) |
225 | free (filecomment); | 213 | free (filecomment); |
226 | return prev; | 214 | return 0; |
227 | } | 215 | } |
228 | pos = &data[offset]; /* jump */ | 216 | pos = &data[offset]; /* jump */ |
229 | 217 | ||
@@ -253,8 +241,7 @@ libextractor_zip_extract (const char *filename, const unsigned char *data, | |||
253 | * ?- ? extra field (variable size) | 241 | * ?- ? extra field (variable size) |
254 | * ?- ? file comment (variable size) | 242 | * ?- ? file comment (variable size) |
255 | */ | 243 | */ |
256 | if (! | 244 | if (!(('P' == pos[0]) && ('K' == pos[1]) && (0x01 == pos[2]) |
257 | (('P' == pos[0]) && ('K' == pos[1]) && (0x01 == pos[2]) | ||
258 | && (0x02 == pos[3]))) | 245 | && (0x02 == pos[3]))) |
259 | { | 246 | { |
260 | 247 | ||
@@ -263,10 +250,10 @@ libextractor_zip_extract (const char *filename, const unsigned char *data, | |||
263 | "Did not find central directory structure signature. offset: %i\n", | 250 | "Did not find central directory structure signature. offset: %i\n", |
264 | offset); | 251 | offset); |
265 | 252 | ||
266 | #endif /* */ | 253 | #endif |
267 | if (filecomment != NULL) | 254 | if (filecomment != NULL) |
268 | free (filecomment); | 255 | free (filecomment); |
269 | return prev; | 256 | return 0; |
270 | } | 257 | } |
271 | start = NULL; | 258 | start = NULL; |
272 | info = NULL; | 259 | info = NULL; |
@@ -341,7 +328,7 @@ libextractor_zip_extract (const char *filename, const unsigned char *data, | |||
341 | } | 328 | } |
342 | if (filecomment != NULL) | 329 | if (filecomment != NULL) |
343 | free (filecomment); | 330 | free (filecomment); |
344 | return prev; | 331 | return 0; |
345 | } | 332 | } |
346 | } | 333 | } |
347 | while ((0x01 == pos[2]) && (0x02 == pos[3])); | 334 | while ((0x01 == pos[2]) && (0x02 == pos[3])); |
@@ -360,58 +347,64 @@ libextractor_zip_extract (const char *filename, const unsigned char *data, | |||
360 | #endif /* */ | 347 | #endif /* */ |
361 | } | 348 | } |
362 | 349 | ||
363 | /* I'm only putting this in the else clause so that keyword has a local scope */ | 350 | ret = proc (proc_cls, |
364 | keyword = malloc (sizeof (EXTRACTOR_KeywordList)); | 351 | "zip", |
365 | keyword->next = prev; | 352 | EXTRACTOR_METATYPE_MIMETYPE, |
366 | keyword->keyword = strdup ("application/zip"); | 353 | EXTRACTOR_METAFORMAT_UTF8, |
367 | keyword->keywordType = EXTRACTOR_MIMETYPE; | 354 | "text/plain", |
368 | prev = keyword; | 355 | "application/zip", |
369 | if (filecomment != NULL) | 356 | strlen ("application/zip")+1); |
357 | if ( (filecomment != NULL) && (ret != 0) ) | ||
370 | { | 358 | { |
371 | EXTRACTOR_KeywordList * kw = malloc (sizeof (EXTRACTOR_KeywordList)); | 359 | ret = proc (proc_cls, |
372 | kw->next = prev; | 360 | "zip", |
373 | kw->keyword = strdup (filecomment); | 361 | EXTRACTOR_METATYPE_MIMETYPE, |
374 | kw->keywordType = EXTRACTOR_COMMENT; | 362 | EXTRACTOR_METAFORMAT_UTF8, |
375 | prev = kw; | 363 | "text/plain", |
376 | free (filecomment); | 364 | filecomment, |
365 | strlen (filecomment)+1); | ||
377 | } | 366 | } |
367 | free (filecomment); | ||
368 | |||
378 | 369 | ||
379 | /* if we've gotten to here then there is at least one zip entry (see get_zipinfo call above) */ | 370 | /* if we've gotten to here then there is at least one zip entry (see get_zipinfo call above) */ |
380 | /* note: this free()'s the info list as it goes */ | 371 | /* note: this free()'s the info list as it goes */ |
381 | info = start; | 372 | info = start; |
382 | while (NULL != info) | 373 | while (NULL != info) |
383 | { | 374 | { |
384 | if (info->filename != NULL) | 375 | if (info->filename != NULL) |
385 | { | 376 | { |
386 | if (strlen (info->filename)) | 377 | if ( (ret == 0) && (strlen (info->filename)) ) |
387 | { | 378 | { |
388 | EXTRACTOR_KeywordList * keyword = | 379 | ret = proc (proc_cls, |
389 | malloc (sizeof (EXTRACTOR_KeywordList)); | 380 | "zip", |
390 | keyword->next = prev; | 381 | EXTRACTOR_METATYPE_FILENAME, |
391 | keyword->keyword = strdup (info->filename); | 382 | EXTRACTOR_METAFORMAT_UTF8, |
392 | keyword->keywordType = EXTRACTOR_FILENAME; | 383 | "text/plain", |
393 | prev = keyword; | 384 | info->filename, |
385 | strlen (info->filename)+1); | ||
394 | } | 386 | } |
395 | free (info->filename); | ||
396 | } | 387 | } |
397 | if (info->comment != NULL) | 388 | if (info->comment != NULL) |
398 | { | 389 | { |
399 | if (strlen (info->comment) > 0) | 390 | if ( (ret == 0) && (strlen (info->comment) > 0) ) |
400 | { | 391 | { |
401 | EXTRACTOR_KeywordList * keyword = | 392 | ret = proc (proc_cls, |
402 | malloc (sizeof (EXTRACTOR_KeywordList)); | 393 | "zip", |
403 | keyword->next = prev; | 394 | EXTRACTOR_METATYPE_FILENAME, |
404 | keyword->keyword = strdup (info->comment); | 395 | EXTRACTOR_METAFORMAT_UTF8, |
405 | keyword->keywordType = EXTRACTOR_COMMENT; | 396 | "text/plain", |
406 | prev = keyword; | 397 | info->comment, |
398 | strlen (info->comment)+1); | ||
407 | } | 399 | } |
408 | free (info->comment); | ||
409 | } | 400 | } |
401 | free (info->filename); | ||
402 | free (info->comment); | ||
410 | tmp = info; | 403 | tmp = info; |
411 | info = info->next; | 404 | info = info->next; |
412 | free (tmp); | 405 | free (tmp); |
413 | } | 406 | } |
414 | return prev; | 407 | return ret; |
415 | } | 408 | } |
416 | 409 | ||
417 | 410 | ||