aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/plugins/Makefile.am15
-rw-r--r--src/plugins/zip_extractor.c (renamed from src/plugins/zipextractor.c)135
2 files changed, 71 insertions, 79 deletions
diff --git a/src/plugins/Makefile.am b/src/plugins/Makefile.am
index 9668b87..706da26 100644
--- a/src/plugins/Makefile.am
+++ b/src/plugins/Makefile.am
@@ -90,7 +90,8 @@ plugin_LTLIBRARIES = \
90 libextractor_real.la \ 90 libextractor_real.la \
91 $(rpm) \ 91 $(rpm) \
92 $(thumbgtk) \ 92 $(thumbgtk) \
93 libextractor_wav.la 93 libextractor_wav.la \
94 libextractor_zip.la
94 95
95libextractor_applefile_la_SOURCES = \ 96libextractor_applefile_la_SOURCES = \
96 applefile_extractor.c 97 applefile_extractor.c
@@ -260,6 +261,11 @@ libextractor_wav_la_LDFLAGS = \
260libextractor_wav_la_LIBADD = \ 261libextractor_wav_la_LIBADD = \
261 $(LE_LIBINTL) 262 $(LE_LIBINTL)
262 263
264libextractor_zip_la_SOURCES = \
265 zip_extractor.c
266libextractor_zip_la_LDFLAGS = \
267 $(PLUGINFLAGS)
268
263EXTRA_DIST = template_extractor.c 269EXTRA_DIST = template_extractor.c
264 270
265 271
@@ -339,13 +345,6 @@ libextractor_tiff_la_LDFLAGS = \
339libextractor_tiff_la_LIBADD = \ 345libextractor_tiff_la_LIBADD = \
340 $(top_builddir)/src/common/libextractor_common.la 346 $(top_builddir)/src/common/libextractor_common.la
341 347
342libextractor_zip_la_SOURCES = \
343 zipextractor.c
344libextractor_zip_la_LDFLAGS = \
345 $(PLUGINFLAGS)
346libextractor_zip_la_LIBADD = \
347 $(top_builddir)/src/main/libextractor.la
348
349libextractor_riff_la_SOURCES = \ 348libextractor_riff_la_SOURCES = \
350 riffextractor.c 349 riffextractor.c
351libextractor_riff_la_LDFLAGS = \ 350libextractor_riff_la_LDFLAGS = \
diff --git a/src/plugins/zipextractor.c b/src/plugins/zip_extractor.c
index ad46637..c7fef95 100644
--- a/src/plugins/zipextractor.c
+++ b/src/plugins/zip_extractor.c
@@ -40,7 +40,7 @@
40 40
41/* 41/*
42 * This file is part of libextractor. 42 * This file is part of libextractor.
43 * (C) 2002, 2003 Vidyut Samanta and Christian Grothoff 43 * (C) 2002, 2003, 2009 Vidyut Samanta and Christian Grothoff
44 * 44 *
45 * libextractor is free software; you can redistribute it and/or modify 45 * libextractor is free software; you can redistribute it and/or modify
46 * it under the terms of the GNU General Public License as published 46 * it under the terms of the GNU General Public License as published
@@ -77,10 +77,14 @@
77 } zip_entry; 77 } zip_entry;
78 78
79/* mimetype = application/zip */ 79/* mimetype = application/zip */
80struct EXTRACTOR_Keywords * 80int
81libextractor_zip_extract (const char *filename, const unsigned char *data, 81EXTRACTOR_zip_extract (const unsigned char *data,
82 size_t size, struct EXTRACTOR_Keywords *prev) 82 size_t size,
83 EXTRACTOR_MetaDataProcessor proc,
84 void *proc_cls,
85 const char *options)
83{ 86{
87 int ret;
84 void *tmp; 88 void *tmp;
85 zip_entry * info; 89 zip_entry * info;
86 zip_entry * start; 90 zip_entry * start;
@@ -90,28 +94,13 @@ libextractor_zip_extract (const char *filename, const unsigned char *data,
90 unsigned int name_length, extra_length, comment_length; 94 unsigned int name_length, extra_length, comment_length;
91 unsigned int filecomment_length; 95 unsigned int filecomment_length;
92 unsigned int entry_total, entry_count; 96 unsigned int entry_total, entry_count;
93 EXTRACTOR_KeywordList * keyword; 97
94 const char *mimetype; 98 /* I think the smallest zipfile you can have is about 120 bytes */
95 mimetype = EXTRACTOR_extractLast (EXTRACTOR_MIMETYPE, prev); 99 if ((NULL == data) || (size < 100))
96 if (NULL != mimetype) 100 return 0;
97 { 101 if (! (('P' == data[0]) && ('K' == data[1]) && (0x03 == data[2])
98 if ((0 != strcmp (mimetype, "application/x-zip")) &&
99 (0 != strcmp (mimetype, "application/zip")))
100 {
101
102 /* we think we already know what's in here,
103 and it is not a zip */
104 return prev;
105 }
106 }
107
108 /* I think the smallest zipfile you can have is about 120 bytes */
109 if ((NULL == data) || (size < 100))
110 return prev;
111 if (!
112 (('P' == data[0]) && ('K' == data[1]) && (0x03 == data[2])
113 && (0x04 == data[3]))) 102 && (0x04 == data[3])))
114 return prev; 103 return 0;
115 104
116 /* The filenames for each file in a zipfile are stored in two locations. 105 /* The filenames for each file in a zipfile are stored in two locations.
117 * There is one at the start of each entry, just before the compressed data, 106 * There is one at the start of each entry, just before the compressed data,
@@ -149,7 +138,7 @@ libextractor_zip_extract (const char *filename, const unsigned char *data,
149 */ 138 */
150 139
151 /* the signature can't be more than 22 bytes from the end */ 140 /* the signature can't be more than 22 bytes from the end */
152 offset = size - 22; 141 offset = size - 22;
153 pos = &data[offset]; 142 pos = &data[offset];
154 stop = 0; 143 stop = 0;
155 if (((signed int) size - 65556) > 0) 144 if (((signed int) size - 65556) > 0)
@@ -158,8 +147,7 @@ libextractor_zip_extract (const char *filename, const unsigned char *data,
158 /* not using int 0x06054b50 so that we don't have to deal with endianess issues. 147 /* not using int 0x06054b50 so that we don't have to deal with endianess issues.
159 break out if we go more than 64K backwards and havn't found it, or if we hit the 148 break out if we go more than 64K backwards and havn't found it, or if we hit the
160 begining of the file. */ 149 begining of the file. */
161 while ((! 150 while ((!(('P' == pos[0]) && ('K' == pos[1]) && (0x05 == pos[2])
162 (('P' == pos[0]) && ('K' == pos[1]) && (0x05 == pos[2])
163 && (0x06 == pos[3]))) && (offset > stop)) 151 && (0x06 == pos[3]))) && (offset > stop))
164 pos = &data[offset--]; 152 pos = &data[offset--];
165 if (offset == stop) 153 if (offset == stop)
@@ -171,7 +159,7 @@ libextractor_zip_extract (const char *filename, const unsigned char *data,
171 offset); 159 offset);
172 160
173#endif /* */ 161#endif /* */
174 return prev; 162 return 0;
175 } 163 }
176 164
177 /* offset should now point to the start of the end-of-central directory structure */ 165 /* offset should now point to the start of the end-of-central directory structure */
@@ -180,7 +168,7 @@ libextractor_zip_extract (const char *filename, const unsigned char *data,
180 filecomment_length = pos[20] + (pos[21] << 8); 168 filecomment_length = pos[20] + (pos[21] << 8);
181 if (filecomment_length + offset + 22 > size) 169 if (filecomment_length + offset + 22 > size)
182 { 170 {
183 return prev; /* invalid zip file format! */ 171 return 0; /* invalid zip file format! */
184 } 172 }
185 filecomment = NULL; 173 filecomment = NULL;
186 if (filecomment_length > 0) 174 if (filecomment_length > 0)
@@ -221,9 +209,9 @@ libextractor_zip_extract (const char *filename, const unsigned char *data,
221 { 209 {
222 210
223 /* not a zip */ 211 /* not a zip */
224 if (filecomment != NULL) 212 if (filecomment != NULL)
225 free (filecomment); 213 free (filecomment);
226 return prev; 214 return 0;
227 } 215 }
228 pos = &data[offset]; /* jump */ 216 pos = &data[offset]; /* jump */
229 217
@@ -253,8 +241,7 @@ libextractor_zip_extract (const char *filename, const unsigned char *data,
253 * ?- ? extra field (variable size) 241 * ?- ? extra field (variable size)
254 * ?- ? file comment (variable size) 242 * ?- ? file comment (variable size)
255 */ 243 */
256 if (! 244 if (!(('P' == pos[0]) && ('K' == pos[1]) && (0x01 == pos[2])
257 (('P' == pos[0]) && ('K' == pos[1]) && (0x01 == pos[2])
258 && (0x02 == pos[3]))) 245 && (0x02 == pos[3])))
259 { 246 {
260 247
@@ -263,10 +250,10 @@ libextractor_zip_extract (const char *filename, const unsigned char *data,
263 "Did not find central directory structure signature. offset: %i\n", 250 "Did not find central directory structure signature. offset: %i\n",
264 offset); 251 offset);
265 252
266#endif /* */ 253#endif
267 if (filecomment != NULL) 254 if (filecomment != NULL)
268 free (filecomment); 255 free (filecomment);
269 return prev; 256 return 0;
270 } 257 }
271 start = NULL; 258 start = NULL;
272 info = NULL; 259 info = NULL;
@@ -341,7 +328,7 @@ libextractor_zip_extract (const char *filename, const unsigned char *data,
341 } 328 }
342 if (filecomment != NULL) 329 if (filecomment != NULL)
343 free (filecomment); 330 free (filecomment);
344 return prev; 331 return 0;
345 } 332 }
346 } 333 }
347 while ((0x01 == pos[2]) && (0x02 == pos[3])); 334 while ((0x01 == pos[2]) && (0x02 == pos[3]));
@@ -360,58 +347,64 @@ libextractor_zip_extract (const char *filename, const unsigned char *data,
360#endif /* */ 347#endif /* */
361 } 348 }
362 349
363 /* I'm only putting this in the else clause so that keyword has a local scope */ 350 ret = proc (proc_cls,
364 keyword = malloc (sizeof (EXTRACTOR_KeywordList)); 351 "zip",
365 keyword->next = prev; 352 EXTRACTOR_METATYPE_MIMETYPE,
366 keyword->keyword = strdup ("application/zip"); 353 EXTRACTOR_METAFORMAT_UTF8,
367 keyword->keywordType = EXTRACTOR_MIMETYPE; 354 "text/plain",
368 prev = keyword; 355 "application/zip",
369 if (filecomment != NULL) 356 strlen ("application/zip")+1);
357 if ( (filecomment != NULL) && (ret != 0) )
370 { 358 {
371 EXTRACTOR_KeywordList * kw = malloc (sizeof (EXTRACTOR_KeywordList)); 359 ret = proc (proc_cls,
372 kw->next = prev; 360 "zip",
373 kw->keyword = strdup (filecomment); 361 EXTRACTOR_METATYPE_MIMETYPE,
374 kw->keywordType = EXTRACTOR_COMMENT; 362 EXTRACTOR_METAFORMAT_UTF8,
375 prev = kw; 363 "text/plain",
376 free (filecomment); 364 filecomment,
365 strlen (filecomment)+1);
377 } 366 }
367 free (filecomment);
368
378 369
379 /* if we've gotten to here then there is at least one zip entry (see get_zipinfo call above) */ 370 /* if we've gotten to here then there is at least one zip entry (see get_zipinfo call above) */
380 /* note: this free()'s the info list as it goes */ 371 /* note: this free()'s the info list as it goes */
381 info = start; 372 info = start;
382 while (NULL != info) 373 while (NULL != info)
383 { 374 {
384 if (info->filename != NULL) 375 if (info->filename != NULL)
385 { 376 {
386 if (strlen (info->filename)) 377 if ( (ret == 0) && (strlen (info->filename)) )
387 { 378 {
388 EXTRACTOR_KeywordList * keyword = 379 ret = proc (proc_cls,
389 malloc (sizeof (EXTRACTOR_KeywordList)); 380 "zip",
390 keyword->next = prev; 381 EXTRACTOR_METATYPE_FILENAME,
391 keyword->keyword = strdup (info->filename); 382 EXTRACTOR_METAFORMAT_UTF8,
392 keyword->keywordType = EXTRACTOR_FILENAME; 383 "text/plain",
393 prev = keyword; 384 info->filename,
385 strlen (info->filename)+1);
394 } 386 }
395 free (info->filename);
396 } 387 }
397 if (info->comment != NULL) 388 if (info->comment != NULL)
398 { 389 {
399 if (strlen (info->comment) > 0) 390 if ( (ret == 0) && (strlen (info->comment) > 0) )
400 { 391 {
401 EXTRACTOR_KeywordList * keyword = 392 ret = proc (proc_cls,
402 malloc (sizeof (EXTRACTOR_KeywordList)); 393 "zip",
403 keyword->next = prev; 394 EXTRACTOR_METATYPE_FILENAME,
404 keyword->keyword = strdup (info->comment); 395 EXTRACTOR_METAFORMAT_UTF8,
405 keyword->keywordType = EXTRACTOR_COMMENT; 396 "text/plain",
406 prev = keyword; 397 info->comment,
398 strlen (info->comment)+1);
407 } 399 }
408 free (info->comment);
409 } 400 }
401 free (info->filename);
402 free (info->comment);
410 tmp = info; 403 tmp = info;
411 info = info->next; 404 info = info->next;
412 free (tmp); 405 free (tmp);
413 } 406 }
414 return prev; 407 return ret;
415} 408}
416 409
417 410