aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristian Grothoff <christian@grothoff.org>2012-08-18 22:25:21 +0000
committerChristian Grothoff <christian@grothoff.org>2012-08-18 22:25:21 +0000
commit98605a3ce9ea7c4e00c88833e28eefec5e811dd7 (patch)
treebeeca4fa6bfc80ff3ababb1bc44388fcaf2059a3
parent8f122f1187c5ae585ce00ed02e22ebc5a538882e (diff)
downloadlibextractor-98605a3ce9ea7c4e00c88833e28eefec5e811dd7.tar.gz
libextractor-98605a3ce9ea7c4e00c88833e28eefec5e811dd7.zip
reincarnating tar plugin as archive plugin using libarchive
-rw-r--r--README1
-rw-r--r--TODO1
-rw-r--r--configure.ac7
-rw-r--r--src/plugins/Makefile.am21
-rw-r--r--src/plugins/archive_extractor.c124
-rw-r--r--src/plugins/old/tar_extractor.c855
-rw-r--r--src/plugins/test_archive.c76
-rw-r--r--src/plugins/testdata/archive_test.tar (renamed from test/test.tar)bin10240 -> 10240 bytes
8 files changed, 229 insertions, 856 deletions
diff --git a/README b/README
index 114dfb3..7fc7d79 100644
--- a/README
+++ b/README
@@ -40,6 +40,7 @@ Dependencies
40The following dependencies are all optional, but should be 40The following dependencies are all optional, but should be
41available in order for maximum coverage: 41available in order for maximum coverage:
42 42
43* libarchive
43* libavutil / libavformat / libavcodec / libswscale (ffmpeg) 44* libavutil / libavformat / libavcodec / libswscale (ffmpeg)
44* libbz2 (bzip2) 45* libbz2 (bzip2)
45* libexiv2 46* libexiv2
diff --git a/TODO b/TODO
index b956f60..dab8bb5 100644
--- a/TODO
+++ b/TODO
@@ -1,5 +1,4 @@
1* Update plugins to new API (and cleanup code): 1* Update plugins to new API (and cleanup code):
2 - tar
3 - elf 2 - elf
4 - applefile 3 - applefile
5 - pdf 4 - pdf
diff --git a/configure.ac b/configure.ac
index 638a802..1a6cd7b 100644
--- a/configure.ac
+++ b/configure.ac
@@ -307,6 +307,13 @@ AC_CHECK_LIB(tiff, TIFFClientOpen,
307 AM_CONDITIONAL(HAVE_TIFF, false))], 307 AM_CONDITIONAL(HAVE_TIFF, false))],
308 AM_CONDITIONAL(HAVE_TIFF, false)) 308 AM_CONDITIONAL(HAVE_TIFF, false))
309 309
310AC_CHECK_LIB(archive, archive_read_open,
311 [AC_CHECK_HEADERS([archive.h],
312 AM_CONDITIONAL(HAVE_ARCHIVE, true)
313 AC_DEFINE(HAVE_ARCHIVE,1,[Have libarchive]),
314 AM_CONDITIONAL(HAVE_ARCHIVE, false))],
315 AM_CONDITIONAL(HAVE_ARCHIVE, false))
316
310AC_MSG_CHECKING(for ImageFactory::iptcData in -lexiv2) 317AC_MSG_CHECKING(for ImageFactory::iptcData in -lexiv2)
311AC_LANG_PUSH(C++) 318AC_LANG_PUSH(C++)
312SAVED_LDFLAGS=$LDFLAGS 319SAVED_LDFLAGS=$LDFLAGS
diff --git a/src/plugins/Makefile.am b/src/plugins/Makefile.am
index e930ca4..a7e62b8 100644
--- a/src/plugins/Makefile.am
+++ b/src/plugins/Makefile.am
@@ -16,6 +16,7 @@ SUBDIRS = .
16 16
17EXTRA_DIST = \ 17EXTRA_DIST = \
18 template_extractor.c \ 18 template_extractor.c \
19 testdata/archive_test.tar \
19 testdata/deb_bzip2.deb \ 20 testdata/deb_bzip2.deb \
20 testdata/dvi_ora.dvi \ 21 testdata/dvi_ora.dvi \
21 testdata/flac_kraftwerk.flac \ 22 testdata/flac_kraftwerk.flac \
@@ -71,6 +72,11 @@ endif
71endif 72endif
72 73
73 74
75if HAVE_ARCHIVE
76PLUGIN_ARCHIVE=libextractor_archive.la
77TEST_ARCHIVE=test_archive
78endif
79
74if HAVE_EXIV2 80if HAVE_EXIV2
75PLUGIN_EXIV2=libextractor_exiv2.la 81PLUGIN_EXIV2=libextractor_exiv2.la
76TEST_EXIV2=test_exiv2 82TEST_EXIV2=test_exiv2
@@ -142,6 +148,7 @@ plugin_LTLIBRARIES = \
142 libextractor_wav.la \ 148 libextractor_wav.la \
143 libextractor_xm.la \ 149 libextractor_xm.la \
144 libextractor_zip.la \ 150 libextractor_zip.la \
151 $(PLUGIN_ARCHIVE) \
145 $(PLUGIN_EXIV2) \ 152 $(PLUGIN_EXIV2) \
146 $(PLUGIN_FFMPEG) \ 153 $(PLUGIN_FFMPEG) \
147 $(PLUGIN_FLAC) \ 154 $(PLUGIN_FLAC) \
@@ -175,6 +182,7 @@ check_PROGRAMS = \
175 test_wav \ 182 test_wav \
176 test_xm \ 183 test_xm \
177 test_zip \ 184 test_zip \
185 $(TEST_ARCHIVE) \
178 $(TEST_EXIV2) \ 186 $(TEST_EXIV2) \
179 $(TEST_FFMPEG) \ 187 $(TEST_FFMPEG) \
180 $(TEST_FLAC) \ 188 $(TEST_FLAC) \
@@ -481,6 +489,19 @@ libextractor_sid_la_LDFLAGS = \
481 $(PLUGINFLAGS) 489 $(PLUGINFLAGS)
482 490
483 491
492libextractor_archive_la_SOURCES = \
493 archive_extractor.c
494libextractor_archive_la_LDFLAGS = \
495 $(PLUGINFLAGS)
496libextractor_archive_la_LIBADD = \
497 -larchive
498
499test_archive_SOURCES = \
500 test_archive.c
501test_archive_LDADD = \
502 $(top_builddir)/src/plugins/libtest.la
503
504
484libextractor_thumbnailffmpeg_la_SOURCES = \ 505libextractor_thumbnailffmpeg_la_SOURCES = \
485 thumbnailffmpeg_extractor.c 506 thumbnailffmpeg_extractor.c
486libextractor_thumbnailffmpeg_la_LDFLAGS = \ 507libextractor_thumbnailffmpeg_la_LDFLAGS = \
diff --git a/src/plugins/archive_extractor.c b/src/plugins/archive_extractor.c
new file mode 100644
index 0000000..ef4e7c7
--- /dev/null
+++ b/src/plugins/archive_extractor.c
@@ -0,0 +1,124 @@
1/*
2 This file is part of libextractor.
3 (C) 2012 Christian Grothoff
4
5 libextractor is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 3, or (at your
8 option) any later version.
9
10 libextractor is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with libextractor; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
19 */
20/**
21 * @file plugins/archive_extractor.c
22 * @brief plugin to support archives (such as TAR)
23 * @author Christian Grothoff
24 */
25#include "platform.h"
26#include "extractor.h"
27#include <archive.h>
28#include <archive_entry.h>
29
30/**
31 * Callback for libarchive for 'reading'.
32 *
33 * @param a archive handle
34 * @param client_data our 'struct EXTRACTOR_ExtractContext'
35 * @param buff where to store data with pointer to data
36 * @return number of bytes read
37 */
38static ssize_t
39read_cb (struct archive *a,
40 void *client_data,
41 const void **buff)
42{
43 struct EXTRACTOR_ExtractContext *ec = client_data;
44 ssize_t ret;
45
46 *buff = NULL;
47 if (-1 == (ret = ec->read (ec->cls, (void **) buff, 16 * 1024)))
48 return ARCHIVE_FATAL;
49 return ret;
50}
51
52
53/**
54 * Callback for libarchive for 'skipping'.
55 *
56 * @param a archive handle
57 * @param client_data our 'struct EXTRACTOR_ExtractContext'
58 * @param request number of bytes to skip
59 * @return number of bytes skipped
60 */
61static __LA_INT64_T
62skip_cb (struct archive *a,
63 void *client_data,
64 __LA_INT64_T request)
65{
66 struct EXTRACTOR_ExtractContext *ec = client_data;
67
68 if (-1 == ec->seek (ec->cls, request, SEEK_CUR))
69 return 0;
70 return request;
71}
72
73
74/**
75 * Main entry method for the ARCHIVE extraction plugin.
76 *
77 * @param ec extraction context provided to the plugin
78 */
79void
80EXTRACTOR_archive_extract_method (struct EXTRACTOR_ExtractContext *ec)
81{
82 struct archive *a;
83 struct archive_entry *entry;
84 const char *fname;
85 const char *s;
86 char *format;
87
88 format = NULL;
89 a = archive_read_new ();
90 archive_read_support_compression_all (a);
91 archive_read_support_format_all (a);
92 archive_read_open2 (a, ec, NULL, &read_cb, &skip_cb, NULL);
93 while (ARCHIVE_OK == archive_read_next_header(a, &entry))
94 {
95 if ( (NULL == format) &&
96 (NULL != (fname = archive_format_name (a))) )
97 format = strdup (fname);
98 s = archive_entry_pathname (entry);
99 if (0 != ec->proc (ec->cls,
100 "tar",
101 EXTRACTOR_METATYPE_FILENAME,
102 EXTRACTOR_METAFORMAT_UTF8,
103 "text/plain",
104 s, strlen (s) + 1))
105 break;
106 }
107 archive_read_finish (a);
108 if (NULL != format)
109 {
110 if (0 != ec->proc (ec->cls,
111 "tar",
112 EXTRACTOR_METATYPE_FORMAT,
113 EXTRACTOR_METAFORMAT_UTF8,
114 "text/plain", format, strlen (format) + 1))
115 {
116 free (format);
117 return;
118 }
119 free (format);
120 }
121}
122
123
124/* end of tar_extractor.c */
diff --git a/src/plugins/old/tar_extractor.c b/src/plugins/old/tar_extractor.c
deleted file mode 100644
index 2ea0ac9..0000000
--- a/src/plugins/old/tar_extractor.c
+++ /dev/null
@@ -1,855 +0,0 @@
1/*
2 This file is part of libextractor.
3 (C) 2002, 2003, 2004, 2005, 2009 Vidyut Samanta and Christian Grothoff
4
5 libextractor is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 2, or (at your
8 option) any later version.
9
10 libextractor is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with libextractor; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
19 */
20
21#include "platform.h"
22#include "extractor.h"
23
24/*
25 * Note that this code is not complete!
26 *
27 * References:
28 *
29 * http://www.mkssoftware.com/docs/man4/tar.4.asp
30 * (does document USTAR format common nowadays,
31 * but not other extended formats such as the one produced
32 * by GNU tar 1.13 when very long filenames are met.)
33 *
34 * http://gd.tuwien.ac.at/utils/archivers/star/README.otherbugs
35 * (J. Schilling's remarks on TAR formats compatibility issues.)
36 */
37
38/*
39 * Define known TAR archive member variants.
40 * In theory different variants
41 * can coexist within a single TAR archive file
42 * although this will be uncommon.
43 */
44#define TAR_V7ORIGINAL_FORMAT (1)
45#define TAR_V7EXTENDED_FORMAT (1 << 1)
46#define TAR_SCHILLING1985_FORMAT (1 << 2)
47#define TAR_POSIX1988_FORMAT (1 << 3)
48#define TAR_GNU1991_FORMAT (1 << 4)
49#define TAR_SCHILLING1994_FORMAT (1 << 5)
50#define TAR_GNU1997_FORMAT (1 << 6)
51#define TAR_POSIX2001_FORMAT (1 << 7)
52#define TAR_SCHILLING2001_FORMAT (1 << 8)
53#define TAR_SOLARIS2001_FORMAT (1 << 9)
54#define TAR_GNU2004_FORMAT (1 << 10)
55
56/*
57 * TAR header structure, modelled after POSIX.1-1988
58 */
59typedef struct
60{
61 char fileName[100];
62 char mode[8];
63 char userId[8];
64 char groupId[8];
65 char fileSize[12];
66 char lastModTime[12];
67 char chksum[8];
68 char link;
69 char linkName[100];
70 /*
71 * All fields below are a
72 * either zero-filled or undefined
73 * for UNIX V7 TAR archive members ;
74 * their header is always 512 octets long nevertheless.
75 */
76 char ustarMagic[6];
77 char version[2];
78 char userName[32];
79 char groupName[32];
80 char devMajor[8];
81 char devMinor[8];
82 char prefix[155];
83 char filler[12];
84} TarHeader;
85
86#define TAR_HEADER_SIZE (sizeof(TarHeader))
87#define TAR_TIME_FENCE ((long long) (-(1LL << 62)))
88
89static size_t
90tar_roundup (size_t size)
91{
92 size_t diff = (size % TAR_HEADER_SIZE);
93
94 return (0 == diff) ? size : (size + (TAR_HEADER_SIZE - diff));
95}
96
97static int
98tar_isnonzero (const char *data, unsigned int length)
99{
100 unsigned int total = 0;
101
102 while (total < length)
103 {
104 if (0 != data[total])
105 return 1;
106 total++;
107 }
108
109 return 0;
110}
111
112static unsigned int
113tar_octalvalue (const char *data, size_t size, unsigned long long *valueptr)
114{
115 unsigned int result = 0;
116
117 if (NULL != data && 0 < size)
118 {
119 const char *p = data;
120 int found = 0;
121 unsigned long long value = 0;
122
123 while ((p < data + size) && (' ' == *p))
124 p += 1;
125
126 while ((p < data + size) && ('0' <= *p) && (*p < '8'))
127 {
128 found = 1;
129 value *= 8;
130 value += (*p - '0');
131 p += 1;
132 }
133
134 if (0 != found)
135 {
136 while ((p < data + size) && ((0 == *p) || (' ' == *p)))
137 p += 1;
138
139 result = (p - data);
140 }
141
142 if ((0 < result) && (NULL != valueptr))
143 *valueptr = value;
144 }
145
146 return result;
147}
148
149#ifndef EOVERFLOW
150#define EOVERFLOW -1
151#endif
152
153static int
154tar_time (long long timeval, char *rtime, unsigned int rsize)
155{
156 int retval = 0;
157
158 /*
159 * shift epoch to proleptic times
160 * to make subsequent modulo operations safer.
161 */
162 long long my_timeval = timeval
163 + ((long long) ((1970 * 365) + 478) * (long long) 86400);
164
165 unsigned int seconds = (unsigned int) (my_timeval % 60);
166 unsigned int minutes = (unsigned int) ((my_timeval / 60) % 60);
167 unsigned int hours = (unsigned int) ((my_timeval / 3600) % 24);
168
169 unsigned int year = 0;
170 unsigned int month = 1;
171
172 unsigned int days = (unsigned int) (my_timeval / (24 * 3600));
173
174 unsigned int days_in_month[] =
175 { 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 };
176 unsigned int diff = 0;
177
178 if ((long long) 0 > my_timeval)
179 return EDOM;
180
181 /*
182 * 400-year periods
183 */
184 year += (400 * (days / ((365 * 400) + 97)));
185 days %= ((365 * 400) + 97);
186
187 /*
188 * 100-year periods
189 */
190 diff = (days / ((365 * 100) + 24));
191 if (4 <= diff)
192 {
193 year += 399;
194 days = 364;
195 }
196 else
197 {
198 year += (100 * diff);
199 days %= ((365 * 100) + 24);
200 }
201
202 /*
203 * remaining leap years
204 */
205 year += (4 * (days / ((365 * 4) + 1)));
206 days %= ((365 * 4) + 1);
207
208 while (1)
209 {
210 if ((0 == (year % 400)) || ((0 == (year % 4)) && (0 != (year % 100))))
211 {
212 if (366 > days)
213 {
214 break;
215 }
216 else
217 {
218 days -= 366;
219 year++;
220 }
221 }
222 else
223 {
224 if (365 > days)
225 {
226 break;
227 }
228 else
229 {
230 days -= 365;
231 year++;
232 }
233 }
234 }
235
236 if ((0 == (year % 400)) || ((0 == (year % 4)) && (0 != (year % 100))))
237 days_in_month[1] = 29;
238
239 for (month = 0; (month < 12) && (days >= days_in_month[month]); month += 1)
240 days -= days_in_month[month];
241
242 retval = snprintf (rtime, rsize, "%04u-%02u-%02uT%02u:%02u:%02uZ",
243 year, month + 1, days + 1, hours, minutes, seconds);
244
245 return (retval < rsize) ? 0 : EOVERFLOW;
246}
247
248#define ADD(t,s) do { if (0 != (ret = proc (proc_cls, "tar", t, EXTRACTOR_METAFORMAT_UTF8, "text/plain", s, strlen(s)+1))) goto FINISH; } while (0)
249#define ADDF(t,s) do { if (0 != (ret = proc (proc_cls, "tar", t, EXTRACTOR_METAFORMAT_UTF8, "text/plain", s, strlen(s)+1))) { free(s); goto FINISH; } free (s); } while (0)
250
251int
252EXTRACTOR_tar_extract (const char *data,
253 size_t size,
254 EXTRACTOR_MetaDataProcessor proc,
255 void *proc_cls, const char *options)
256{
257 char *fname = NULL;
258 size_t pos;
259 int contents_are_empty = 1;
260 long long maxftime = TAR_TIME_FENCE;
261 unsigned int format_archive = 0;
262 int ret;
263
264 if (512 != TAR_HEADER_SIZE)
265 return 0; /* compiler should remove this when optimising */
266 if (0 != (size % TAR_HEADER_SIZE))
267 return 0; /* cannot be tar! */
268 if (size < TAR_HEADER_SIZE)
269 return 0; /* too short, or somehow truncated */
270
271 ret = 0;
272 pos = 0;
273 while ((pos + TAR_HEADER_SIZE) <= size)
274 {
275 const TarHeader *tar = NULL;
276 unsigned format_member = 0;
277 unsigned long long fmode;
278 unsigned long long fsize;
279 long long ftime = TAR_TIME_FENCE;
280 char typeFlag = -1;
281 const char *nul_pos;
282 unsigned int tar_prefix_length = 0;
283 unsigned int tar_name_length = 0;
284 unsigned int checksum_offset;
285 int checksum_computed_500s = 0;
286 int checksum_computed_512s = 0;
287 unsigned int checksum_computed_500u = 0;
288 unsigned int checksum_computed_512u = 0;
289 unsigned long long checksum_stored = 0;
290
291 /*
292 * Compute TAR header checksum and compare with stored value.
293 * Allow for non-conformant checksums computed with signed values,
294 * such as those produced by early Solaris tar.
295 * Allow for non-conformant checksums computed on first 500 octets,
296 * such as those produced by SunOS 4.x tar according to J. Schilling.
297 * This will also detect EOF marks, since a zero-filled block
298 * cannot possibly hold octal values.
299 */
300 for (checksum_offset = 0; checksum_offset < 148; checksum_offset += 1)
301 {
302 checksum_computed_500u +=
303 (unsigned char) data[pos + checksum_offset];
304 checksum_computed_500s += (signed char) data[pos + checksum_offset];
305 }
306 if (8 >
307 tar_octalvalue (data + pos + checksum_offset, 8, &checksum_stored))
308 break;
309 for (; checksum_offset < 156; checksum_offset += 1)
310 {
311 checksum_computed_500u += (unsigned char) ' ';
312 checksum_computed_500s += (signed char) ' ';
313 }
314 for (; checksum_offset < 500; checksum_offset += 1)
315 {
316 checksum_computed_500u +=
317 (unsigned char) data[pos + checksum_offset];
318 checksum_computed_500s += (signed char) data[pos + checksum_offset];
319 }
320
321 checksum_computed_512u = checksum_computed_500u;
322 checksum_computed_512s = checksum_computed_500s;
323 for (; checksum_offset < TAR_HEADER_SIZE; checksum_offset += 1)
324 {
325 checksum_computed_512u +=
326 (unsigned char) data[pos + checksum_offset];
327 checksum_computed_512s += (signed char) data[pos + checksum_offset];
328 }
329
330 /*
331 * Suggestion: use signed checksum matches to refine
332 * TAR format detection.
333 */
334 if ((checksum_stored != (unsigned long long) checksum_computed_512u)
335 && (checksum_stored != (unsigned long long) checksum_computed_512s)
336 && (checksum_stored != (unsigned long long) checksum_computed_500s)
337 && (checksum_stored != (unsigned long long) checksum_computed_500u))
338 break;
339
340 tar = (const TarHeader *) &data[pos];
341 typeFlag = tar->link;
342 pos += TAR_HEADER_SIZE;
343
344 /*
345 * Checking all octal fields helps reduce
346 * the possibility of false positives ;
347 * only the file size, time and mode are used for now.
348 *
349 * This will fail over GNU and Schilling TAR huge size fields
350 * using non-octal encodings used for very large file lengths (> 8 GB).
351 */
352 if ((12 > tar_octalvalue (tar->fileSize, 12,
353 &fsize))
354 || (12 > tar_octalvalue (tar->lastModTime, 12,
355 (unsigned long long *) &ftime))
356 || (8 > tar_octalvalue (tar->mode, 8,
357 (unsigned long long *) &fmode))
358 || (8 > tar_octalvalue (tar->userId, 8, NULL))
359 || (8 > tar_octalvalue (tar->groupId, 8, NULL)))
360 break;
361
362 /*
363 * Find out which TAR variant is here.
364 */
365 if (0 == memcmp (tar->ustarMagic, "ustar ", 7))
366 {
367
368 if (' ' == tar->mode[6])
369 format_member = TAR_GNU1991_FORMAT;
370 else if (('K' == typeFlag) || ('L' == typeFlag))
371 {
372 format_member = TAR_GNU1997_FORMAT;
373 ftime = TAR_TIME_FENCE;
374 }
375 else
376 format_member =
377 (((unsigned) fmode) !=
378 (((unsigned) fmode) & 03777)) ? TAR_GNU1997_FORMAT :
379 TAR_GNU2004_FORMAT;
380
381 }
382 else if (0 == memcmp (tar->ustarMagic, "ustar", 6))
383 {
384
385 /*
386 * It is important to perform test for SCHILLING1994 before GNU1997
387 * because certain extension type flags ('L' and 'S' for instance)
388 * are used by both.
389 */
390 if ((0 == tar->prefix[130])
391 && (12 <= tar_octalvalue (tar->prefix + 131, 12, NULL))
392 && (12 <= tar_octalvalue (tar->prefix + 143, 12, NULL))
393 && (0 == tar_isnonzero (tar->filler, 8))
394 && (0 == memcmp (tar->filler + 8, "tar", 4)))
395 {
396
397 format_member = TAR_SCHILLING1994_FORMAT;
398
399 }
400 else if (('D' == typeFlag) || ('K' == typeFlag)
401 || ('L' == typeFlag) || ('M' == typeFlag)
402 || ('N' == typeFlag) || ('S' == typeFlag)
403 || ('V' == typeFlag))
404 {
405
406 format_member = TAR_GNU1997_FORMAT;
407
408 }
409 else if (('g' == typeFlag)
410 || ('x' == typeFlag) || ('X' == typeFlag))
411 {
412
413 format_member = TAR_POSIX2001_FORMAT;
414 ftime = TAR_TIME_FENCE;
415
416 }
417 else
418 {
419
420 format_member = TAR_POSIX1988_FORMAT;
421
422 }
423 }
424 else if ((0 == memcmp (tar->filler + 8, "tar", 4))
425 && (0 == tar_isnonzero (tar->filler, 8)))
426 {
427
428 format_member = TAR_SCHILLING1985_FORMAT;
429
430 }
431 else if (('0' <= typeFlag) && (typeFlag <= '2'))
432 {
433
434 format_member = TAR_V7ORIGINAL_FORMAT;
435
436 }
437 else
438 {
439
440 format_member = TAR_V7EXTENDED_FORMAT;
441
442 }
443
444 /*
445 * Locate the file names.
446 */
447 if ((0 != (format_member & TAR_POSIX2001_FORMAT))
448 && (('x' == typeFlag) || ('X' == typeFlag)))
449 {
450
451 if (size <= pos)
452 break;
453
454 else if ((8 <= fsize) && fsize <= (unsigned long long) (size - pos))
455 {
456 const char *keyptr = data + pos;
457 const char *valptr = NULL;
458 const char *nameptr = NULL;
459 unsigned int keylength = 0;
460 unsigned int namelength = 0;
461
462 while (keyptr < data + pos + (size_t) fsize)
463 {
464 if (('0' > *keyptr) || ('9' < *keyptr))
465 {
466 keyptr += 1;
467 continue;
468 }
469
470 keylength =
471 (unsigned int) strtoul (keyptr, (char **) &valptr, 10);
472 if ((0 < keylength) && (NULL != valptr)
473 && (keyptr != valptr))
474 {
475 while ((valptr < data + pos + (size_t) fsize)
476 && (' ' == *valptr))
477 valptr += 1;
478 if (0 == memcmp (valptr, "path=", 5))
479 {
480 nameptr = valptr + 5;
481 namelength = keylength - (nameptr - keyptr);
482 }
483 else
484 {
485
486 if ((keylength > (valptr - keyptr) + 4 + 2)
487 && (0 == memcmp (valptr, "GNU.", 4)))
488 format_archive |= TAR_GNU2004_FORMAT;
489
490 else if ((keylength > (valptr - keyptr) + 7 + 2)
491 && (0 == memcmp (valptr, "SCHILY.", 7)))
492 format_archive |= TAR_SCHILLING2001_FORMAT;
493
494 else if ((keylength > (valptr - keyptr) + 4 + 2)
495 && (0 == memcmp (valptr, "SUN.", 4)))
496 format_archive |= TAR_SOLARIS2001_FORMAT;
497 }
498
499 keyptr += keylength;
500 }
501 else
502 {
503 nameptr = NULL;
504 break;
505 }
506 }
507
508 if ((NULL != nameptr) && (0 != *nameptr)
509 && ((size - (nameptr - data)) >= namelength)
510 && (1 < namelength) )
511 {
512 /*
513 * There is an 1-offset because POSIX.1-2001
514 * field separator is counted in field length.
515 */
516 if (fname != NULL)
517 free (fname);
518 fname = malloc (namelength);
519 if (NULL != fname)
520 {
521 memcpy (fname, nameptr, namelength - 1);
522 fname[namelength - 1] = '\0';
523
524 pos += tar_roundup ((size_t) fsize);
525 format_archive |= format_member;
526 continue;
527 }
528 }
529 }
530 }
531
532 else if ((0 != (format_member
533 & (TAR_SCHILLING1994_FORMAT
534 | TAR_GNU1997_FORMAT | TAR_GNU2004_FORMAT)))
535 && ('L' == typeFlag))
536 {
537
538 if (size <= pos)
539 break;
540
541 else if ((0 < fsize) && fsize <= (unsigned long long) (size - pos))
542 {
543
544 size_t length = (size_t) fsize;
545
546 nul_pos = memchr (data + pos, 0, length);
547 if (NULL != nul_pos)
548 length = (nul_pos - (data + pos));
549
550 if (0 < length)
551 {
552 if (fname != NULL)
553 free (fname);
554 fname = malloc (1 + length);
555 if (NULL != fname)
556 {
557 memcpy (fname, data + pos, length);
558 fname[length] = '\0';
559 }
560
561 pos += tar_roundup ((size_t) fsize);
562 format_archive |= format_member;
563 continue;
564 }
565 }
566 }
567 else
568 {
569
570 nul_pos = memchr (tar->fileName, 0, sizeof tar->fileName);
571 tar_name_length = (0 == nul_pos)
572 ? sizeof (tar->fileName) : (nul_pos - tar->fileName);
573
574 if ((0 !=
575 (format_member & (TAR_GNU1997_FORMAT | TAR_GNU2004_FORMAT)))
576 && ('S' == typeFlag))
577 {
578
579 if ((0 == tar->prefix[40])
580 && (0 != tar->prefix[137])
581 && (12 <= tar_octalvalue (tar->prefix + 41, 12, NULL))
582 && (12 <= tar_octalvalue (tar->prefix + 53, 12, NULL)))
583 {
584 /*
585 * fsize needs adjustment when there are more than 4 sparse blocks
586 */
587 size_t diffpos = 0;
588 fsize += TAR_HEADER_SIZE;
589
590 while ((pos + diffpos + TAR_HEADER_SIZE < size)
591 && (0 != *(data + pos + diffpos + 504)))
592 {
593 diffpos += TAR_HEADER_SIZE;
594 fsize += TAR_HEADER_SIZE;
595 }
596 }
597
598 typeFlag = '0';
599
600 }
601 else if (0 != (format_member & TAR_SCHILLING1994_FORMAT))
602 {
603
604 nul_pos = memchr (tar->prefix, 0, 130);
605 tar_prefix_length = (0 == nul_pos)
606 ? 130 : (nul_pos - tar->prefix);
607
608 if ('S' == typeFlag)
609 typeFlag = '0';
610
611 }
612 else if (0 != (format_member & TAR_SCHILLING1985_FORMAT))
613 {
614
615 nul_pos = memchr (tar->prefix, 0, 155);
616 tar_prefix_length = (0 == nul_pos)
617 ? 155 : (nul_pos - tar->prefix);
618
619
620 if ('S' == typeFlag)
621 typeFlag = '0';
622
623 }
624 else if (0 != (format_member & TAR_POSIX1988_FORMAT))
625 {
626
627 nul_pos = memchr (tar->prefix, 0, sizeof tar->prefix);
628 tar_prefix_length = (0 == nul_pos)
629 ? sizeof tar->prefix : nul_pos - tar->prefix;
630
631 }
632 }
633
634 /*
635 * Update position so that next loop iteration will find
636 * either a TAR header or TAR EOF mark or just EOF.
637 *
638 * Consider archive member size to be zero
639 * with no data following the header in the following cases :
640 * '1' : hard link, '2' : soft link,
641 * '3' : character device, '4' : block device,
642 * '5' : directory, '6' : named pipe.
643 */
644 if ('1' != typeFlag && '2' != typeFlag
645 && '3' != typeFlag && '4' != typeFlag
646 && '5' != typeFlag && '6' != typeFlag)
647 {
648 if ((fsize > (unsigned long long) size)
649 || (fsize + (unsigned long long) pos >
650 (unsigned long long) size))
651 break;
652
653 pos += tar_roundup ((size_t) fsize);
654 }
655 if (pos - 1 > size)
656 break;
657
658 format_archive |= format_member;
659
660 /*
661 * Store the file name in libextractor list.
662 *
663 * For the time being, only file types listed in POSIX.1-1988 ('0'..'7')
664 * are retained, leaving out labels, access control lists, etc.
665 */
666 if ((0 == typeFlag) || (('0' <= typeFlag) && (typeFlag <= '7')))
667 {
668 if (NULL == fname)
669 {
670 if (0 < tar_prefix_length + tar_name_length)
671 {
672 fname = malloc (2 + tar_prefix_length + tar_name_length);
673
674 if (NULL != fname)
675 {
676 if (0 < tar_prefix_length)
677 {
678 memcpy (fname, tar->prefix, tar_prefix_length);
679
680 if (('/' != tar->prefix[tar_prefix_length - 1])
681 && (0 < tar_name_length)
682 && ('/' != tar->fileName[0]))
683 {
684 fname[tar_prefix_length] = '/';
685 tar_prefix_length += 1;
686 }
687 }
688
689 if (0 < tar_name_length)
690 memcpy (fname + tar_prefix_length, tar->fileName,
691 tar_name_length);
692
693 fname[tar_prefix_length + tar_name_length] = '\0';
694 }
695 }
696 }
697
698 if ((NULL != fname) && (0 != *fname))
699 {
700#if 0
701 fprintf (stdout,
702 "(%u) flag = %c, size = %u, tname = (%s), fname = (%s)\n",
703 __LINE__, typeFlag, (unsigned int) fsize,
704 (NULL == tar->fileName) ? "" : tar->fileName,
705 (NULL == fname) ? "" : fname);
706#endif
707
708 ADDF (EXTRACTOR_METATYPE_FILENAME, fname);
709 fname = NULL;
710 if (ftime > maxftime)
711 maxftime = ftime;
712 contents_are_empty = 0;
713 }
714 }
715
716 if (NULL != fname)
717 {
718 free (fname);
719 fname = NULL;
720 }
721 }
722
723 if (NULL != fname)
724 {
725 free (fname);
726 fname = NULL;
727 }
728
729 /*
730 * Report mimetype; report also format(s) and most recent date
731 * when at least one archive member was found.
732 */
733 if (0 == format_archive)
734 return ret;
735 if (0 == contents_are_empty)
736 {
737
738 const char *formats[5] = { NULL, NULL, NULL, NULL, NULL };
739 unsigned int formats_count = 0;
740 unsigned int formats_u = 0;
741 unsigned int format_length = 0;
742 char *format = NULL;
743
744 if (TAR_TIME_FENCE < maxftime)
745 {
746 char iso8601_time[24];
747
748 if (0 == tar_time (maxftime, iso8601_time, sizeof (iso8601_time)))
749 ADD (EXTRACTOR_METATYPE_CREATION_DATE, iso8601_time);
750 }
751
752 /*
753 * We only keep the most recent POSIX format.
754 */
755 if (0 != (format_archive & TAR_POSIX2001_FORMAT))
756 formats[formats_count++] = "POSIX 2001";
757
758 else if (0 != (format_archive & TAR_POSIX1988_FORMAT))
759 formats[formats_count++] = "POSIX 1988";
760
761 /*
762 * We only keep the most recent GNU format.
763 */
764 if (0 != (format_archive & TAR_GNU2004_FORMAT))
765 formats[formats_count++] = "GNU 2004";
766
767 else if (0 != (format_archive & TAR_GNU1997_FORMAT))
768 formats[formats_count++] = "GNU 1997";
769
770 else if (0 != (format_archive & TAR_GNU1991_FORMAT))
771 formats[formats_count++] = "GNU 1991";
772
773 /*
774 * We only keep the most recent Schilling format.
775 */
776 if (0 != (format_archive & TAR_SCHILLING2001_FORMAT))
777 formats[formats_count++] = "Schilling 2001";
778
779 else if (0 != (format_archive & TAR_SCHILLING1994_FORMAT))
780 formats[formats_count++] = "Schilling 1994";
781
782 else if (0 != (format_archive & TAR_SCHILLING1985_FORMAT))
783 formats[formats_count++] = "Schilling 1985";
784
785 /*
786 * We only keep the most recent Solaris format.
787 */
788 if (0 != (format_archive & TAR_SOLARIS2001_FORMAT))
789 formats[formats_count++] = "Solaris 2001";
790
791 /*
792 * We only keep the (supposedly) most recent UNIX V7 format.
793 */
794 if (0 != (format_archive & TAR_V7EXTENDED_FORMAT))
795 formats[formats_count++] = "UNIX extended V7";
796
797 else if (0 != (format_archive & TAR_V7ORIGINAL_FORMAT))
798 formats[formats_count++] = "UNIX original V7";
799
800 /*
801 * Build the format string
802 */
803 for (formats_u = 0; formats_u < formats_count; formats_u += 1)
804 {
805 if ((NULL != formats[formats_u]) && (0 != *formats[formats_u]))
806 {
807 if (0 < format_length)
808 format_length += 3;
809 format_length += strlen (formats[formats_u]);
810 }
811 }
812
813 if (0 < format_length)
814 {
815 if (fname != NULL)
816 free (fname);
817 format = malloc (format_length + 5);
818
819 if (NULL != format)
820 {
821
822 format_length = 0;
823
824 for (formats_u = 0; formats_u < formats_count; formats_u += 1)
825 {
826 if ((NULL != formats[formats_u])
827 && (0 != *formats[formats_u]))
828 {
829 if (0 < format_length)
830 {
831 strcpy (format + format_length, " + ");
832 format_length += 3;
833 }
834 strcpy (format + format_length, formats[formats_u]);
835 format_length += strlen (formats[formats_u]);
836 }
837 }
838
839 if (0 < format_length)
840 {
841 strcpy (format + format_length, " TAR");
842 ADDF (EXTRACTOR_METATYPE_FORMAT_VERSION, format);
843 }
844 else
845 {
846 free (format);
847 }
848 }
849 }
850 }
851
852 ADD (EXTRACTOR_METATYPE_MIMETYPE, "application/x-tar");
853FINISH:
854 return ret;
855}
diff --git a/src/plugins/test_archive.c b/src/plugins/test_archive.c
new file mode 100644
index 0000000..9d3e2d4
--- /dev/null
+++ b/src/plugins/test_archive.c
@@ -0,0 +1,76 @@
1/*
2 This file is part of libextractor.
3 (C) 2012 Vidyut Samanta and Christian Grothoff
4
5 libextractor is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 3, or (at your
8 option) any later version.
9
10 libextractor is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with libextractor; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
19*/
20/**
21 * @file plugins/test_archive.c
22 * @brief testcase for archive plugin
23 * @author Christian Grothoff
24 */
25#include "platform.h"
26#include "test_lib.h"
27
28
29/**
30 * Main function for the ARCHIVE testcase.
31 *
32 * @param argc number of arguments (ignored)
33 * @param argv arguments (ignored)
34 * @return 0 on success
35 */
36int
37main (int argc, char *argv[])
38{
39 struct SolutionData tar_archive_sol[] =
40 {
41 {
42 EXTRACTOR_METATYPE_FILENAME,
43 EXTRACTOR_METAFORMAT_UTF8,
44 "text/plain",
45 "test.html",
46 strlen ("test.html") + 1,
47 0
48 },
49 {
50 EXTRACTOR_METATYPE_FILENAME,
51 EXTRACTOR_METAFORMAT_UTF8,
52 "text/plain",
53 "test.jpg",
54 strlen ("test.jpg") + 1,
55 0
56 },
57 {
58 EXTRACTOR_METATYPE_FORMAT,
59 EXTRACTOR_METAFORMAT_UTF8,
60 "text/plain",
61 "GNU tar format",
62 strlen ("GNU tar format") + 1,
63 0
64 },
65 { 0, 0, NULL, NULL, 0, -1 }
66 };
67 struct ProblemSet ps[] =
68 {
69 { "testdata/archive_test.tar",
70 tar_archive_sol },
71 { NULL, NULL }
72 };
73 return ET_main ("archive", ps);
74}
75
76/* end of test_archive.c */
diff --git a/test/test.tar b/src/plugins/testdata/archive_test.tar
index 9eadf23..9eadf23 100644
--- a/test/test.tar
+++ b/src/plugins/testdata/archive_test.tar
Binary files differ