aboutsummaryrefslogtreecommitdiff
path: root/src/plugins
diff options
context:
space:
mode:
authorChristian Grothoff <christian@grothoff.org>2012-08-14 16:53:21 +0000
committerChristian Grothoff <christian@grothoff.org>2012-08-14 16:53:21 +0000
commitb4ef30267b385b51949df757c5ae5a85d764cba3 (patch)
treeb94853803b9cfed4e92a50b1bb88e6442c9fdedf /src/plugins
parentd491e2066f59c74df2b3b8ac4d5450210826618f (diff)
downloadlibextractor-b4ef30267b385b51949df757c5ae5a85d764cba3.tar.gz
libextractor-b4ef30267b385b51949df757c5ae5a85d764cba3.zip
implemented zip, sid, nsf, nsfe ad odf extractors
Diffstat (limited to 'src/plugins')
-rw-r--r--src/plugins/Makefile.am71
-rw-r--r--src/plugins/deb_extractor.c466
-rw-r--r--src/plugins/nsf_extractor.c160
-rw-r--r--src/plugins/nsfe_extractor.c330
-rw-r--r--src/plugins/odf_extractor.c85
-rw-r--r--src/plugins/sid_extractor.c198
-rw-r--r--src/plugins/test_deb.c150
-rw-r--r--src/plugins/test_odf.c100
-rw-r--r--src/plugins/test_zip.c108
-rw-r--r--src/plugins/testdata/deb_bzip2.debbin0 -> 49482 bytes
-rw-r--r--src/plugins/testdata/odf_cg.odtbin0 -> 28419 bytes
-rw-r--r--src/plugins/testdata/zip_test.zipbin0 -> 1904 bytes
-rw-r--r--src/plugins/zip_extractor.c469
13 files changed, 1338 insertions, 799 deletions
diff --git a/src/plugins/Makefile.am b/src/plugins/Makefile.am
index 07b0560..8e2af6b 100644
--- a/src/plugins/Makefile.am
+++ b/src/plugins/Makefile.am
@@ -30,7 +30,9 @@ EXTRA_DIST = template_extractor.c \
30 testdata/ole2_starwriter40.sdw \ 30 testdata/ole2_starwriter40.sdw \
31 testdata/ole2_blair.doc \ 31 testdata/ole2_blair.doc \
32 testdata/ole2_excel.xls \ 32 testdata/ole2_excel.xls \
33 testdata/png_image.png 33 testdata/png_image.png \
34 testdata/odf_cg.odt \
35 testdata/deb_bzip2.deb
34 36
35if HAVE_VORBISFILE 37if HAVE_VORBISFILE
36PLUGIN_OGG=libextractor_ogg.la 38PLUGIN_OGG=libextractor_ogg.la
@@ -77,13 +79,23 @@ PLUGIN_GSF=libextractor_ole2.la
77TEST_GSF=test_ole2 79TEST_GSF=test_ole2
78endif 80endif
79 81
82if HAVE_ZLIB
83PLUGIN_ZLIB=libextractor_deb.la
84TEST_ZLIB=test_deb
85endif
80 86
81plugin_LTLIBRARIES = \ 87plugin_LTLIBRARIES = \
82 libextractor_it.la \ 88 libextractor_it.la \
89 libextractor_nsf.la \
90 libextractor_nsfe.la \
91 libextractor_odf.la \
83 libextractor_png.la \ 92 libextractor_png.la \
84 libextractor_xm.la \ 93 libextractor_xm.la \
85 libextractor_s3m.la \ 94 libextractor_s3m.la \
95 libextractor_sid.la \
86 libextractor_wav.la \ 96 libextractor_wav.la \
97 libextractor_zip.la \
98 $(PLUGIN_ZLIB) \
87 $(PLUGIN_OGG) \ 99 $(PLUGIN_OGG) \
88 $(PLUGIN_MIME) \ 100 $(PLUGIN_MIME) \
89 $(PLUGIN_GIF) \ 101 $(PLUGIN_GIF) \
@@ -103,6 +115,9 @@ check_PROGRAMS = \
103 test_it \ 115 test_it \
104 test_s3m \ 116 test_s3m \
105 test_png \ 117 test_png \
118 test_odf \
119 test_zip \
120 $(TEST_ZLIB) \
106 $(TEST_OGG) \ 121 $(TEST_OGG) \
107 $(TEST_MIME) \ 122 $(TEST_MIME) \
108 $(TEST_GIF) \ 123 $(TEST_GIF) \
@@ -133,6 +148,55 @@ libextractor_xm_la_LDFLAGS = \
133 $(PLUGINFLAGS) 148 $(PLUGINFLAGS)
134 149
135 150
151libextractor_deb_la_SOURCES = \
152 deb_extractor.c
153libextractor_deb_la_LDFLAGS = \
154 $(PLUGINFLAGS) -lz
155
156test_deb_SOURCES = \
157 test_deb.c
158test_deb_LDADD = \
159 $(top_builddir)/src/plugins/libtest.la
160
161
162libextractor_nsf_la_SOURCES = \
163 nsf_extractor.c
164libextractor_nsf_la_LDFLAGS = \
165 $(PLUGINFLAGS)
166
167
168libextractor_nsfe_la_SOURCES = \
169 nsfe_extractor.c
170libextractor_nsfe_la_LDFLAGS = \
171 $(PLUGINFLAGS)
172
173
174libextractor_odf_la_SOURCES = \
175 odf_extractor.c
176libextractor_odf_la_LDFLAGS = \
177 $(PLUGINFLAGS)
178libextractor_odf_la_LIBADD = \
179 $(top_builddir)/src/common/libextractor_common.la
180
181test_odf_SOURCES = \
182 test_odf.c
183test_odf_LDADD = \
184 $(top_builddir)/src/plugins/libtest.la
185
186
187libextractor_zip_la_SOURCES = \
188 zip_extractor.c
189libextractor_zip_la_LDFLAGS = \
190 $(PLUGINFLAGS)
191libextractor_zip_la_LIBADD = \
192 $(top_builddir)/src/common/libextractor_common.la
193
194test_zip_SOURCES = \
195 test_zip.c
196test_zip_LDADD = \
197 $(top_builddir)/src/plugins/libtest.la
198
199
136libextractor_png_la_SOURCES = \ 200libextractor_png_la_SOURCES = \
137 png_extractor.c 201 png_extractor.c
138libextractor_png_la_LDFLAGS = \ 202libextractor_png_la_LDFLAGS = \
@@ -157,6 +221,11 @@ test_it_LDADD = \
157 $(top_builddir)/src/plugins/libtest.la 221 $(top_builddir)/src/plugins/libtest.la
158 222
159 223
224libextractor_sid_la_SOURCES = \
225 sid_extractor.c
226libextractor_sid_la_LDFLAGS = \
227 $(PLUGINFLAGS)
228
160libextractor_s3m_la_SOURCES = \ 229libextractor_s3m_la_SOURCES = \
161 s3m_extractor.c 230 s3m_extractor.c
162libextractor_s3m_la_LDFLAGS = \ 231libextractor_s3m_la_LDFLAGS = \
diff --git a/src/plugins/deb_extractor.c b/src/plugins/deb_extractor.c
index 2bb90c5..955657e 100644
--- a/src/plugins/deb_extractor.c
+++ b/src/plugins/deb_extractor.c
@@ -1,10 +1,10 @@
1/* 1/*
2 This file is part of libextractor. 2 This file is part of libextractor.
3 (C) 2002, 2003, 2004 Vidyut Samanta and Christian Grothoff 3 (C) 2002, 2003, 2004, 2012 Vidyut Samanta and Christian Grothoff
4 4
5 libextractor is free software; you can redistribute it and/or modify 5 libextractor is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published 6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 2, or (at your 7 by the Free Software Foundation; either version 3, or (at your
8 option) any later version. 8 option) any later version.
9 9
10 libextractor is distributed in the hope that it will be useful, but 10 libextractor is distributed in the hope that it will be useful, but
@@ -17,12 +17,11 @@
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330, 17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. 18 Boston, MA 02111-1307, USA.
19 */ 19 */
20 20/**
21#include "platform.h" 21 * @file plugins/deb_extractor.c
22#include "extractor.h" 22 * @brief plugin to support Debian archives
23#include <zlib.h> 23 * @author Christian Grothoff
24 24 *
25/*
26 * The .deb is an ar-chive file. It contains a tar.gz file 25 * The .deb is an ar-chive file. It contains a tar.gz file
27 * named "control.tar.gz" which then contains a file 'control' 26 * named "control.tar.gz" which then contains a file 'control'
28 * that has the meta-data. And which variant of the various 27 * that has the meta-data. And which variant of the various
@@ -33,14 +32,33 @@
33 * http://lists.debian.org/debian-policy/2003/12/msg00000.html 32 * http://lists.debian.org/debian-policy/2003/12/msg00000.html
34 * http://www.opengroup.org/onlinepubs/009695399/utilities/ar.html 33 * http://www.opengroup.org/onlinepubs/009695399/utilities/ar.html
35 */ 34 */
35#include "platform.h"
36#include "extractor.h"
37#include <zlib.h>
38
39
40/**
41 * Maximum file size we allow for control.tar.gz files.
42 * This is a sanity check to avoid allocating huge amounts
43 * of memory.
44 */
45#define MAX_CONTROL_SIZE (1024 * 1024)
36 46
37 47
48/**
49 * Re-implementation of 'strndup'.
50 *
51 * @param str string to duplicate
52 * @param n maximum number of bytes to copy
53 * @return NULL on error, otherwise 0-terminated copy of 'str'
54 * with at most n characters
55 */
38static char * 56static char *
39stndup (const char *str, size_t n) 57stndup (const char *str, size_t n)
40{ 58{
41 char *tmp; 59 char *tmp;
42 tmp = malloc (n + 1); 60
43 if (tmp == NULL) 61 if (NULL == (tmp = malloc (n + 1)))
44 return NULL; 62 return NULL;
45 tmp[n] = '\0'; 63 tmp[n] = '\0';
46 memcpy (tmp, str, n); 64 memcpy (tmp, str, n);
@@ -48,15 +66,29 @@ stndup (const char *str, size_t n)
48} 66}
49 67
50 68
51 69/**
52typedef struct 70 * Entry in the mapping from control data to LE types.
71 */
72struct Matches
53{ 73{
74 /**
75 * Key in the Debian control file.
76 */
54 const char *text; 77 const char *text;
78
79 /**
80 * Corresponding type in LE.
81 */
55 enum EXTRACTOR_MetaType type; 82 enum EXTRACTOR_MetaType type;
56} Matches; 83};
84
57 85
58/* see also: "man 5 deb-control" */ 86/**
59static Matches tmap[] = { 87 * Map from deb-control entries to LE types.
88 *
89 * see also: "man 5 deb-control"
90 */
91static struct Matches tmap[] = {
60 {"Package: ", EXTRACTOR_METATYPE_PACKAGE_NAME}, 92 {"Package: ", EXTRACTOR_METATYPE_PACKAGE_NAME},
61 {"Version: ", EXTRACTOR_METATYPE_PACKAGE_VERSION}, 93 {"Version: ", EXTRACTOR_METATYPE_PACKAGE_VERSION},
62 {"Section: ", EXTRACTOR_METATYPE_SECTION}, 94 {"Section: ", EXTRACTOR_METATYPE_SECTION},
@@ -79,7 +111,13 @@ static Matches tmap[] = {
79 111
80 112
81/** 113/**
82 * Process the control file. 114 * Process the "control" file from the control.tar.gz
115 *
116 * @param data decompressed control data
117 * @param size number of bytes in data
118 * @param proc function to call with meta data
119 * @param proc_cls closure for 'proc'
120 * @return 0 to continue extracting, 1 if we are done
83 */ 121 */
84static int 122static int
85processControl (const char *data, 123processControl (const char *data,
@@ -90,62 +128,52 @@ processControl (const char *data,
90 size_t pos; 128 size_t pos;
91 char *key; 129 char *key;
92 char *val; 130 char *val;
93 131 size_t colon;
132 size_t eol;
133 unsigned int i;
134
94 pos = 0; 135 pos = 0;
95 while (pos < size) 136 while (pos < size)
96 { 137 {
97 size_t colon; 138 for (colon = pos; ':' != data[colon]; colon++)
98 size_t eol; 139 if ((colon > size) || ('\n' == data[colon]))
99 int i; 140 return 0;
100
101 colon = pos;
102 while (data[colon] != ':')
103 {
104 if ((colon > size) || (data[colon] == '\n'))
105 return 0;
106 colon++;
107 }
108 colon++; 141 colon++;
109 while ((colon < size) && (isspace ((unsigned char) data[colon]))) 142 while ((colon < size) && (isspace ((unsigned char) data[colon])))
110 colon++; 143 colon++;
111 eol = colon; 144 eol = colon;
112 while ((eol < size) && 145 while ((eol < size) &&
113 ((data[eol] != '\n') || 146 (('\n' != data[eol]) ||
114 ((eol + 1 < size) && (data[eol + 1] == ' ')))) 147 ((eol + 1 < size) && (' ' == data[eol + 1]))))
115 eol++; 148 eol++;
116 if ((eol == colon) || (eol > size)) 149 if ((eol == colon) || (eol > size))
117 return 0; 150 return 0;
118 key = stndup (&data[pos], colon - pos); 151 if (NULL == (key = stndup (&data[pos], colon - pos)))
119 if (key == NULL)
120 return 0; 152 return 0;
121 i = 0; 153 for (i = 0; NULL != tmap[i].text; i++)
122 while (tmap[i].text != NULL)
123 { 154 {
124 if (0 == strcmp (key, tmap[i].text)) 155 if (0 != strcmp (key, tmap[i].text))
125 { 156 continue;
126 val = stndup (&data[colon], eol - colon); 157 if (NULL == (val = stndup (&data[colon], eol - colon)))
127 if (val == NULL) 158 {
128 { 159 free (key);
129 free (key); 160 return 0;
130 return 0; 161 }
131 } 162 if (0 != proc (proc_cls,
132 if (0 != proc (proc_cls, 163 "deb",
133 "deb", 164 tmap[i].type,
134 tmap[i].type, 165 EXTRACTOR_METAFORMAT_UTF8,
135 EXTRACTOR_METAFORMAT_UTF8, 166 "text/plain",
136 "text/plain", 167 val,
137 val, 168 strlen(val) + 1))
138 strlen(val) + 1)) 169 {
139 {
140 free (val);
141 free (key);
142 return 1;
143 }
144 free (val); 170 free (val);
145 break; 171 free (key);
146 } 172 return 1;
147 i++; 173 }
148 } 174 free (val);
175 break;
176 }
149 free (key); 177 free (key);
150 pos = eol + 1; 178 pos = eol + 1;
151 } 179 }
@@ -153,62 +181,142 @@ processControl (const char *data,
153} 181}
154 182
155 183
156typedef struct 184/**
185 * Header of an entry in a TAR file.
186 */
187struct TarHeader
157{ 188{
189 /**
190 * Filename.
191 */
158 char name[100]; 192 char name[100];
193
194 /**
195 * File access modes.
196 */
159 char mode[8]; 197 char mode[8];
198
199 /**
200 * Owner of the file.
201 */
160 char userId[8]; 202 char userId[8];
203
204 /**
205 * Group of the file.
206 */
161 char groupId[8]; 207 char groupId[8];
208
209 /**
210 * Size of the file, in octal.
211 */
162 char filesize[12]; 212 char filesize[12];
213
214 /**
215 * Last modification time.
216 */
163 char lastModTime[12]; 217 char lastModTime[12];
218
219 /**
220 * Checksum of the file.
221 */
164 char chksum[8]; 222 char chksum[8];
223
224 /**
225 * Is the file a link?
226 */
165 char link; 227 char link;
228
229 /**
230 * Destination of the link.
231 */
166 char linkName[100]; 232 char linkName[100];
167} TarHeader; 233};
234
168 235
169typedef struct 236/**
237 * Extended TAR header for USTar format.
238 */
239struct USTarHeader
170{ 240{
171 TarHeader tar; 241 /**
242 * Original TAR header.
243 */
244 struct TarHeader tar;
245
246 /**
247 * Additinal magic for USTar.
248 */
172 char magic[6]; 249 char magic[6];
250
251 /**
252 * Format version.
253 */
173 char version[2]; 254 char version[2];
255
256 /**
257 * User name.
258 */
174 char uname[32]; 259 char uname[32];
260
261 /**
262 * Group name.
263 */
175 char gname[32]; 264 char gname[32];
265
266 /**
267 * Device major number.
268 */
176 char devmajor[8]; 269 char devmajor[8];
270
271 /**
272 * Device minor number.
273 */
177 char devminor[8]; 274 char devminor[8];
275
276 /**
277 * Unknown (padding?).
278 */
178 char prefix[155]; 279 char prefix[155];
179} USTarHeader; 280};
281
180 282
181/** 283/**
182 * Process the control.tar file. 284 * Process the control.tar file.
285 *
286 * @param data the deflated control.tar file data
287 * @param size number of bytes in data
288 * @param proc function to call with meta data
289 * @param proc_cls closure for 'proc'
290 * @return 0 to continue extracting, 1 if we are done
183 */ 291 */
184static int 292static int
185processControlTar (const char *data, 293processControlTar (const char *data,
186 const size_t size, 294 size_t size,
187 EXTRACTOR_MetaDataProcessor proc, 295 EXTRACTOR_MetaDataProcessor proc,
188 void *proc_cls) 296 void *proc_cls)
189{ 297{
190 TarHeader *tar; 298 struct TarHeader *tar;
191 USTarHeader *ustar; 299 struct USTarHeader *ustar;
192 size_t pos; 300 size_t pos;
193 301
194 pos = 0; 302 pos = 0;
195 while (pos + sizeof (TarHeader) < size) 303 while (pos + sizeof (struct TarHeader) < size)
196 { 304 {
197 unsigned long long fsize; 305 unsigned long long fsize;
198 char buf[13]; 306 char buf[13];
199 307
200 tar = (TarHeader *) & data[pos]; 308 tar = (struct TarHeader *) & data[pos];
201 if (pos + sizeof (USTarHeader) < size) 309 if (pos + sizeof (struct USTarHeader) < size)
202 { 310 {
203 ustar = (USTarHeader *) & data[pos]; 311 ustar = (struct USTarHeader *) & data[pos];
204 if (0 == strncmp ("ustar", &ustar->magic[0], strlen ("ustar"))) 312 if (0 == strncmp ("ustar", &ustar->magic[0], strlen ("ustar")))
205 pos += 512; /* sizeof(USTarHeader); */ 313 pos += 512; /* sizeof (struct USTarHeader); */
206 else 314 else
207 pos += 257; /* sizeof(TarHeader); minus gcc alignment... */ 315 pos += 257; /* sizeof (struct TarHeader); minus gcc alignment... */
208 } 316 }
209 else 317 else
210 { 318 {
211 pos += 257; /* sizeof(TarHeader); minus gcc alignment... */ 319 pos += 257; /* sizeof (struct TarHeader); minus gcc alignment... */
212 } 320 }
213 321
214 memcpy (buf, &tar->filesize[0], 12); 322 memcpy (buf, &tar->filesize[0], 12);
@@ -220,9 +328,10 @@ processControlTar (const char *data,
220 328
221 if (0 == strncmp (&tar->name[0], "./control", strlen ("./control"))) 329 if (0 == strncmp (&tar->name[0], "./control", strlen ("./control")))
222 { 330 {
331 /* found the 'control' file we were looking for */
223 return processControl (&data[pos], fsize, proc, proc_cls); 332 return processControl (&data[pos], fsize, proc, proc_cls);
224 } 333 }
225 if ((fsize & 511) != 0) 334 if (0 != (fsize & 511))
226 fsize = (fsize | 511) + 1; /* round up! */ 335 fsize = (fsize | 511) + 1; /* round up! */
227 if (pos + fsize < pos) 336 if (pos + fsize < pos)
228 return 0; 337 return 0;
@@ -231,137 +340,184 @@ processControlTar (const char *data,
231 return 0; 340 return 0;
232} 341}
233 342
234#define MAX_CONTROL_SIZE (1024 * 1024)
235
236static voidpf
237Emalloc (voidpf opaque, uInt items, uInt size)
238{
239 if (SIZE_MAX / size <= items)
240 return NULL;
241 return malloc (size * items);
242}
243
244static void
245Efree (voidpf opaque, voidpf ptr)
246{
247 free (ptr);
248}
249 343
250/** 344/**
251 * Process the control.tar.gz file. 345 * Process the control.tar.gz file.
346 *
347 * @param ec extractor context with control.tar.gz at current read position
348 * @param size number of bytes in the control file
349 * @return 0 to continue extracting, 1 if we are done
252 */ 350 */
253static int 351static int
254processControlTGZ (const unsigned char *data, 352processControlTGZ (struct EXTRACTOR_ExtractContext *ec,
255 size_t size, 353 unsigned long long size)
256 EXTRACTOR_MetaDataProcessor proc,
257 void *proc_cls)
258{ 354{
259 uint32_t bufSize; 355 uint32_t bufSize;
260 char *buf; 356 char *buf;
357 void *data;
358 unsigned char *cdata;
261 z_stream strm; 359 z_stream strm;
262 int ret; 360 int ret;
361 ssize_t sret;
362 unsigned long long off;
263 363
264 bufSize = data[size - 4] + (data[size - 3] << 8) + (data[size - 2] << 16) + (data[size - 1] << 24); 364 if (size > MAX_CONTROL_SIZE)
265 if (bufSize > MAX_CONTROL_SIZE) 365 return 0;
366 if (NULL == (cdata = malloc (size)))
266 return 0; 367 return 0;
368 off = 0;
369 while (off < size)
370 {
371 if (0 >= (sret = ec->read (ec->cls, &data, size - off)))
372 {
373 free (cdata);
374 return 0;
375 }
376 memcpy (&cdata[off], data, sret);
377 off += sret;
378 }
379 bufSize = cdata[size - 4] + (cdata[size - 3] << 8) + (cdata[size - 2] << 16) + (cdata[size - 1] << 24);
380 if (bufSize > MAX_CONTROL_SIZE)
381 {
382 free (cdata);
383 return 0;
384 }
385 if (NULL == (buf = malloc (bufSize)))
386 {
387 free (cdata);
388 return 0;
389 }
390 ret = 0;
267 memset (&strm, 0, sizeof (z_stream)); 391 memset (&strm, 0, sizeof (z_stream));
268 strm.next_in = (Bytef *) data; 392 strm.next_in = (Bytef *) data;
269 strm.avail_in = size; 393 strm.avail_in = size;
270 strm.total_in = 0;
271 strm.zalloc = &Emalloc;
272 strm.zfree = &Efree;
273 strm.opaque = NULL;
274
275 if (Z_OK == inflateInit2 (&strm, 15 + 32)) 394 if (Z_OK == inflateInit2 (&strm, 15 + 32))
276 { 395 {
277 buf = malloc (bufSize);
278 if (buf == NULL)
279 {
280 inflateEnd (&strm);
281 return 0;
282 }
283 strm.next_out = (Bytef *) buf; 396 strm.next_out = (Bytef *) buf;
284 strm.avail_out = bufSize; 397 strm.avail_out = bufSize;
285 inflate (&strm, Z_FINISH); 398 inflate (&strm, Z_FINISH);
286 if (strm.total_out > 0) 399 if (strm.total_out > 0)
287 { 400 ret = processControlTar (buf, strm.total_out,
288 ret = processControlTar (buf, strm.total_out, proc, proc_cls); 401 ec->proc, ec->cls);
289 inflateEnd (&strm);
290 free (buf);
291 return ret;
292 }
293 free (buf);
294 inflateEnd (&strm); 402 inflateEnd (&strm);
295 } 403 }
296 return 0; 404 free (buf);
405 free (cdata);
406 return ret;
297} 407}
298 408
299typedef struct 409
410/**
411 * Header of an object in an "AR"chive file.
412 */
413struct ObjectHeader
300{ 414{
415 /**
416 * Name of the file.
417 */
301 char name[16]; 418 char name[16];
419
420 /**
421 * Last modification time for the file.
422 */
302 char lastModTime[12]; 423 char lastModTime[12];
424
425 /**
426 * User ID of the owner.
427 */
303 char userId[6]; 428 char userId[6];
429
430 /**
431 * Group ID of the owner.
432 */
304 char groupId[6]; 433 char groupId[6];
434
435 /**
436 * File access modes.
437 */
305 char modeInOctal[8]; 438 char modeInOctal[8];
439
440 /**
441 * Size of the file (as decimal string)
442 */
306 char filesize[10]; 443 char filesize[10];
444
445 /**
446 * Tailer of the object header ("`\n")
447 */
307 char trailer[2]; 448 char trailer[2];
308} ObjectHeader; 449};
309 450
310 451
311int 452/**
312EXTRACTOR_deb_extract (const char *data, 453 * Main entry method for the DEB extraction plugin.
313 size_t size, 454 *
314 EXTRACTOR_MetaDataProcessor proc, 455 * @param ec extraction context provided to the plugin
315 void *proc_cls, 456 */
316 const char *options) 457void
458EXTRACTOR_deb_extract_method (struct EXTRACTOR_ExtractContext *ec)
317{ 459{
318 size_t pos; 460 uint64_t pos;
319 int done = 0; 461 int done = 0;
320 ObjectHeader *hdr; 462 const struct ObjectHeader *hdr;
321 unsigned long long fsize; 463 uint64_t fsize;
464 unsigned long long csize;
322 char buf[11]; 465 char buf[11];
323 466 void *data;
324 if (size < 128) 467
325 return 0; 468 fsize = ec->get_size (ec->cls);
326 if (0 != strncmp ("!<arch>\n", data, strlen ("!<arch>\n"))) 469 if (fsize < 128)
327 return 0; 470 return;
328 pos = strlen ("!<arch>\n"); 471 if (8 !=
329 while (pos + sizeof (ObjectHeader) < size) 472 ec->read (ec->cls, &data, 8))
473 return;
474 if (0 != strncmp ("!<arch>\n", data, 8))
475 return;
476 pos = 8;
477 while (pos + sizeof (struct ObjectHeader) < fsize)
330 { 478 {
331 hdr = (ObjectHeader *) & data[pos]; 479 if (pos !=
480 ec->seek (ec->cls, pos, SEEK_SET))
481 return;
482 if (sizeof (struct ObjectHeader) !=
483 ec->read (ec->cls, &data, sizeof (struct ObjectHeader)))
484 return;
485 hdr = data;
332 if (0 != strncmp (&hdr->trailer[0], "`\n", 2)) 486 if (0 != strncmp (&hdr->trailer[0], "`\n", 2))
333 return 0; 487 return;
334 memcpy (buf, &hdr->filesize[0], 10); 488 memcpy (buf, &hdr->filesize[0], 10);
335 buf[10] = '\0'; 489 buf[10] = '\0';
336 if (1 != sscanf (buf, "%10llu", &fsize)) 490 if (1 != sscanf (buf, "%10llu", &csize))
337 return 0; 491 return;
338 pos += sizeof (ObjectHeader); 492 pos += sizeof (struct ObjectHeader);
339 if ((pos + fsize > size) || (fsize > size) || (pos + fsize < pos)) 493 if ((pos + csize > fsize) || (csize > fsize) || (pos + csize < pos))
340 return 0; 494 return;
341 if (0 == strncmp (&hdr->name[0], 495 if (0 == strncmp (&hdr->name[0],
342 "control.tar.gz", strlen ("control.tar.gz"))) 496 "control.tar.gz",
497 strlen ("control.tar.gz")))
343 { 498 {
344 if (0 != processControlTGZ ((const unsigned char *) &data[pos], 499 if (0 != processControlTGZ (ec,
345 fsize, proc, proc_cls)) 500 csize))
346 return 1; 501 return;
347 done++; 502 done++;
348 } 503 }
349 if (0 == strncmp (&hdr->name[0], 504 if (0 == strncmp (&hdr->name[0],
350 "debian-binary", strlen ("debian-binary"))) 505 "debian-binary", strlen ("debian-binary")))
351 { 506 {
352 if (0 != proc (proc_cls, 507 if (0 != ec->proc (ec->cls,
353 "deb", 508 "deb",
354 EXTRACTOR_METATYPE_MIMETYPE, 509 EXTRACTOR_METATYPE_MIMETYPE,
355 EXTRACTOR_METAFORMAT_UTF8, 510 EXTRACTOR_METAFORMAT_UTF8,
356 "text/plain", 511 "text/plain",
357 "application/x-debian-package", 512 "application/x-debian-package",
358 strlen ("application/x-debian-package")+1)) 513 strlen ("application/x-debian-package")+1))
359 return 1; 514 return;
360 done++; 515 done++;
361 } 516 }
362 pos += fsize; 517 pos += csize;
363 if (done == 2) 518 if (2 == done)
364 break; /* no need to process the rest of the archive */ 519 break; /* no need to process the rest of the archive */
365 } 520 }
366 return 0;
367} 521}
522
523/* end of deb_extractor.c */
diff --git a/src/plugins/nsf_extractor.c b/src/plugins/nsf_extractor.c
index 448b99d..2475214 100644
--- a/src/plugins/nsf_extractor.c
+++ b/src/plugins/nsf_extractor.c
@@ -1,10 +1,10 @@
1/* 1/*
2 * This file is part of libextractor. 2 * This file is part of libextractor.
3 * (C) 2006, 2009 Toni Ruottu 3 * (C) 2006, 2009, 2012 Toni Ruottu and Christian Grothoff
4 * 4 *
5 * libextractor is free software; you can redistribute it and/or modify 5 * libextractor is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published 6 * it under the terms of the GNU General Public License as published
7 * by the Free Software Foundation; either version 2, or (at your 7 * by the Free Software Foundation; either version 3, or (at your
8 * option) any later version. 8 * option) any later version.
9 * 9 *
10 * libextractor is distributed in the hope that it will be useful, but 10 * libextractor is distributed in the hope that it will be useful, but
@@ -18,21 +18,22 @@
18 * Boston, MA 02111-1307, USA. 18 * Boston, MA 02111-1307, USA.
19 * 19 *
20 */ 20 */
21 21/**
22 * @file plugins/nsf_extractor.c
23 * @brief plugin to support Nes Sound Format files
24 * @author Toni Ruottu
25 * @author Christian Grothoff
26 */
22#include "platform.h" 27#include "platform.h"
23#include "extractor.h" 28#include "extractor.h"
24#include "convert.h"
25 29
26 30
27#define HEADER_SIZE 0x80
28 31
29/* television system flags */ 32/* television system flags */
30
31#define PAL_FLAG 0x01 33#define PAL_FLAG 0x01
32#define DUAL_FLAG 0x02 34#define DUAL_FLAG 0x02
33 35
34/* sound chip flags */ 36/* sound chip flags */
35
36#define VRCVI_FLAG 0x01 37#define VRCVI_FLAG 0x01
37#define VRCVII_FLAG 0x02 38#define VRCVII_FLAG 0x02
38#define FDS_FLAG 0x04 39#define FDS_FLAG 0x04
@@ -40,43 +41,108 @@
40#define NAMCO_FLAG 0x10 41#define NAMCO_FLAG 0x10
41#define SUNSOFT_FLAG 0x20 42#define SUNSOFT_FLAG 0x20
42 43
43#define UINT16 unsigned short
44 44
45/**
46 * Header of an NSF file.
47 */
45struct header 48struct header
46{ 49{
50 /**
51 * Magic code.
52 */
47 char magicid[5]; 53 char magicid[5];
54
55 /**
56 * NSF version number.
57 */
48 char nsfversion; 58 char nsfversion;
49 char songs; 59
50 char firstsong; 60 /**
51 UINT16 loadaddr; 61 * Number of songs.
52 UINT16 initaddr; 62 */
53 UINT16 playaddr; 63 unsigned char songs;
64
65 /**
66 * Starting song.
67 */
68 unsigned char firstsong;
69
70 /**
71 * Unknown.
72 */
73 uint16_t loadaddr;
74
75 /**
76 * Unknown.
77 */
78 uint16_t initaddr;
79
80 /**
81 * Unknown.
82 */
83 uint16_t playaddr;
84
85 /**
86 * Album title.
87 */
54 char title[32]; 88 char title[32];
89
90 /**
91 * Artist name.
92 */
55 char artist[32]; 93 char artist[32];
94
95 /**
96 * Copyright information.
97 */
56 char copyright[32]; 98 char copyright[32];
57 UINT16 ntscspeed; 99
100 /**
101 * Unknown.
102 */
103 uint16_t ntscspeed;
104
105 /**
106 * Unknown.
107 */
58 char bankswitch[8]; 108 char bankswitch[8];
59 UINT16 palspeed; 109
110 /**
111 * Unknown.
112 */
113 uint16_t palspeed;
114
115 /**
116 * Flags for TV encoding.
117 */
60 char tvflags; 118 char tvflags;
119
120 /**
121 * Flags about the decoder chip.
122 */
61 char chipflags; 123 char chipflags;
62}; 124};
63 125
64#define ADD(s,t) do { if (0 != proc (proc_cls, "nsf", t, EXTRACTOR_METAFORMAT_UTF8, "text/plain", s, strlen(s)+1)) return 1; } while (0) 126
127/**
128 * Give metadata to LE; return if 'proc' returns non-zero.
129 *
130 * @param s metadata value as UTF8
131 * @param t metadata type to use
132 */
133#define ADD(s,t) do { if (0 != ec->proc (ec->cls, "nsf", t, EXTRACTOR_METAFORMAT_UTF8, "text/plain", s, strlen (s) + 1)) return; } while (0)
65 134
66 135
67/* "extract" keyword from a Nes Sound Format file 136/**
137 * "extract" meta data from a Nes Sound Format file
68 * 138 *
69 * NSF specification version 1.61 was used, 139 * NSF specification version 1.61 was used, while this piece of
70 * while this piece of software was originally 140 * software was originally written.
71 * written.
72 * 141 *
142 * @param ec extraction context
73 */ 143 */
74int 144void
75EXTRACTOR_nsf_extract (const unsigned char *data, 145EXTRACTOR_nsf_extract_method (struct EXTRACTOR_ExtractContext *ec)
76 size_t size,
77 EXTRACTOR_MetaDataProcessor proc,
78 void *proc_cls,
79 const char *options)
80{ 146{
81 char album[33]; 147 char album[33];
82 char artist[33]; 148 char artist[33];
@@ -85,12 +151,18 @@ EXTRACTOR_nsf_extract (const unsigned char *data,
85 char startingsong[32]; 151 char startingsong[32];
86 char nsfversion[32]; 152 char nsfversion[32];
87 const struct header *head; 153 const struct header *head;
88 154 void *data;
89 if (size < HEADER_SIZE) 155
90 return 0; 156 if (sizeof (struct header) >
91 head = (const struct header *) data; 157 ec->read (ec->cls,
158 &data,
159 sizeof (struct header)))
160 return;
161 head = data;
162
163 /* Check "magic" id bytes */
92 if (memcmp (head->magicid, "NESM\x1a", 5)) 164 if (memcmp (head->magicid, "NESM\x1a", 5))
93 return 0; 165 return;
94 ADD ("audio/x-nsf", EXTRACTOR_METATYPE_MIMETYPE); 166 ADD ("audio/x-nsf", EXTRACTOR_METATYPE_MIMETYPE);
95 snprintf (nsfversion, 167 snprintf (nsfversion,
96 sizeof(nsfversion), 168 sizeof(nsfversion),
@@ -100,50 +172,48 @@ EXTRACTOR_nsf_extract (const unsigned char *data,
100 snprintf (songs, 172 snprintf (songs,
101 sizeof(songs), 173 sizeof(songs),
102 "%d", 174 "%d",
103 head->songs); 175 (int) head->songs);
104 ADD (songs, EXTRACTOR_METATYPE_SONG_COUNT); 176 ADD (songs, EXTRACTOR_METATYPE_SONG_COUNT);
105 snprintf (startingsong, 177 snprintf (startingsong,
106 sizeof(startingsong), 178 sizeof(startingsong),
107 "%d", 179 "%d",
108 head->firstsong); 180 (int) head->firstsong);
109 ADD (startingsong, EXTRACTOR_METATYPE_STARTING_SONG); 181 ADD (startingsong, EXTRACTOR_METATYPE_STARTING_SONG);
110
111 memcpy (&album, head->title, 32); 182 memcpy (&album, head->title, 32);
112 album[32] = '\0'; 183 album[32] = '\0';
113 ADD (album, EXTRACTOR_METATYPE_ALBUM); 184 ADD (album, EXTRACTOR_METATYPE_ALBUM);
114
115 memcpy (&artist, head->artist, 32); 185 memcpy (&artist, head->artist, 32);
116 artist[32] = '\0'; 186 artist[32] = '\0';
117 ADD (artist, EXTRACTOR_METATYPE_ARTIST); 187 ADD (artist, EXTRACTOR_METATYPE_ARTIST);
118
119 memcpy (&copyright, head->copyright, 32); 188 memcpy (&copyright, head->copyright, 32);
120 copyright[32] = '\0'; 189 copyright[32] = '\0';
121 ADD (copyright, EXTRACTOR_METATYPE_COPYRIGHT); 190 ADD (copyright, EXTRACTOR_METATYPE_COPYRIGHT);
122 191
123 if (head->tvflags & DUAL_FLAG) 192 if (0 != (head->tvflags & DUAL_FLAG))
124 { 193 {
125 ADD ("PAL/NTSC", EXTRACTOR_METATYPE_BROADCAST_TELEVISION_SYSTEM); 194 ADD ("PAL/NTSC", EXTRACTOR_METATYPE_BROADCAST_TELEVISION_SYSTEM);
126 } 195 }
127 else 196 else
128 { 197 {
129 if (head->tvflags & PAL_FLAG) 198 if (0 != (head->tvflags & PAL_FLAG))
130 ADD ("PAL", EXTRACTOR_METATYPE_BROADCAST_TELEVISION_SYSTEM); 199 ADD ("PAL", EXTRACTOR_METATYPE_BROADCAST_TELEVISION_SYSTEM);
131 else 200 else
132 ADD ("NTSC", EXTRACTOR_METATYPE_BROADCAST_TELEVISION_SYSTEM); 201 ADD ("NTSC", EXTRACTOR_METATYPE_BROADCAST_TELEVISION_SYSTEM);
133 } 202 }
134 203
135 /* Detect Extra Sound Chips needed to play the files */ 204 /* Detect Extra Sound Chips needed to play the files */
136 if (head->chipflags & VRCVI_FLAG) 205 if (0 != (head->chipflags & VRCVI_FLAG))
137 ADD ("VRCVI", EXTRACTOR_METATYPE_TARGET_ARCHITECTURE); 206 ADD ("VRCVI", EXTRACTOR_METATYPE_TARGET_ARCHITECTURE);
138 if (head->chipflags & VRCVII_FLAG) 207 if (0 != (head->chipflags & VRCVII_FLAG))
139 ADD ("VRCVII", EXTRACTOR_METATYPE_TARGET_ARCHITECTURE); 208 ADD ("VRCVII", EXTRACTOR_METATYPE_TARGET_ARCHITECTURE);
140 if (head->chipflags & FDS_FLAG) 209 if (0 != (head->chipflags & FDS_FLAG))
141 ADD ("FDS Sound", EXTRACTOR_METATYPE_TARGET_ARCHITECTURE); 210 ADD ("FDS Sound", EXTRACTOR_METATYPE_TARGET_ARCHITECTURE);
142 if (head->chipflags & MMC5_FLAG) 211 if (0 != (head->chipflags & MMC5_FLAG))
143 ADD ("MMC5 audio", EXTRACTOR_METATYPE_TARGET_ARCHITECTURE); 212 ADD ("MMC5 audio", EXTRACTOR_METATYPE_TARGET_ARCHITECTURE);
144 if (head->chipflags & NAMCO_FLAG) 213 if (0 != (head->chipflags & NAMCO_FLAG))
145 ADD ("Namco 106", EXTRACTOR_METATYPE_TARGET_ARCHITECTURE); 214 ADD ("Namco 106", EXTRACTOR_METATYPE_TARGET_ARCHITECTURE);
146 if (head->chipflags & SUNSOFT_FLAG) 215 if (0 != (head->chipflags & SUNSOFT_FLAG))
147 ADD ("Sunsoft FME-07", EXTRACTOR_METATYPE_TARGET_ARCHITECTURE); 216 ADD ("Sunsoft FME-07", EXTRACTOR_METATYPE_TARGET_ARCHITECTURE);
148 return 0;
149} 217}
218
219/* end of nsf_extractor.c */
diff --git a/src/plugins/nsfe_extractor.c b/src/plugins/nsfe_extractor.c
index ffd5ded..16c4980 100644
--- a/src/plugins/nsfe_extractor.c
+++ b/src/plugins/nsfe_extractor.c
@@ -1,10 +1,10 @@
1/* 1/*
2 * This file is part of libextractor. 2 * This file is part of libextractor.
3 * (C) 2007, 2009 Toni Ruottu 3 * (C) 2007, 2009, 2012 Toni Ruottu and Christian Grothoff
4 * 4 *
5 * libextractor is free software; you can redistribute it and/or modify 5 * libextractor is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published 6 * it under the terms of the GNU General Public License as published
7 * by the Free Software Foundation; either version 2, or (at your 7 * by the Free Software Foundation; either version 3, or (at your
8 * option) any later version. 8 * option) any later version.
9 * 9 *
10 * libextractor is distributed in the hope that it will be useful, but 10 * libextractor is distributed in the hope that it will be useful, but
@@ -18,20 +18,22 @@
18 * Boston, MA 02111-1307, USA. 18 * Boston, MA 02111-1307, USA.
19 * 19 *
20 */ 20 */
21 21/**
22 * @file plugins/nsfe_extractor.c
23 * @brief plugin to support Nes Sound Format files
24 * @author Toni Ruottu
25 * @author Christian Grothoff
26 */
22#include "platform.h" 27#include "platform.h"
23#include "extractor.h" 28#include "extractor.h"
24#include "convert.h" 29#include "convert.h"
25 30
26#define HEADER_SIZE 0x04
27 31
28/* television system flags */ 32/* television system flags */
29
30#define PAL_FLAG 0x01 33#define PAL_FLAG 0x01
31#define DUAL_FLAG 0x02 34#define DUAL_FLAG 0x02
32 35
33/* sound chip flags */ 36/* sound chip flags */
34
35#define VRCVI_FLAG 0x01 37#define VRCVI_FLAG 0x01
36#define VRCVII_FLAG 0x02 38#define VRCVII_FLAG 0x02
37#define FDS_FLAG 0x04 39#define FDS_FLAG 0x04
@@ -39,29 +41,26 @@
39#define NAMCO_FLAG 0x10 41#define NAMCO_FLAG 0x10
40#define SUNSOFT_FLAG 0x20 42#define SUNSOFT_FLAG 0x20
41 43
42#define UINT16 unsigned short 44/**
43 45 * "Header" of an NSFE file.
46 */
44struct header 47struct header
45{ 48{
46 char magicid[4]; 49 char magicid[4];
47}; 50};
48 51
49struct infochunk
50{
51 UINT16 loadaddr;
52 UINT16 initaddr;
53 UINT16 playaddr;
54 char tvflags;
55 char chipflags;
56 char songs;
57 char firstsong;
58};
59 52
60static int 53/**
54 * Read an unsigned integer at the current offset.
55 *
56 * @param data input data to parse
57 * @return parsed integer
58 */
59static uint32_t
61nsfeuint (const char *data) 60nsfeuint (const char *data)
62{ 61{
63 int i; 62 int i;
64 int value = 0; 63 uint32_t value = 0;
65 64
66 for (i = 3; i > 0; i--) 65 for (i = 3; i > 0; i--)
67 { 66 {
@@ -73,8 +72,17 @@ nsfeuint (const char *data)
73} 72}
74 73
75 74
75/**
76 * Copy string starting at 'data' with at most
77 * 'size' bytes. (strndup).
78 *
79 * @param data input data to copy
80 * @param size number of bytes in 'data'
81 * @return copy of the string at data
82 */
76static char * 83static char *
77nsfestring (const char *data, size_t size) 84nsfestring (const char *data,
85 size_t size)
78{ 86{
79 char *s; 87 char *s;
80 size_t length; 88 size_t length;
@@ -83,105 +91,210 @@ nsfestring (const char *data, size_t size)
83 while ( (length < size) && 91 while ( (length < size) &&
84 (data[length] != '\0') ) 92 (data[length] != '\0') )
85 length++; 93 length++;
86 s = malloc (length + 1); 94 if (NULL == (s = malloc (length + 1)))
87 if (s == NULL)
88 return NULL; 95 return NULL;
89 strncpy (s, data, length); 96 memcpy (s, data, length);
90 s[strlen (data)] = '\0'; 97 s[length] = '\0';
91 return s; 98 return s;
92} 99}
93 100
94#define ADD(s,t) do { if (0 != proc (proc_cls, "nsfe", t, EXTRACTOR_METAFORMAT_UTF8, "text/plain", s, strlen(s)+1)) return 1; } while (0)
95 101
96#define ADDF(s,t) do { if (0 != proc (proc_cls, "nsfe", t, EXTRACTOR_METAFORMAT_UTF8, "text/plain", s, strlen(s)+1)) { free(s); return 1; } free (s); } while (0) 102/**
103 * Give metadata to LE; return if 'proc' returns non-zero.
104 *
105 * @param s metadata value as UTF8
106 * @param t metadata type to use
107 */
108#define ADD(s,t) do { if (0 != ec->proc (ec->cls, "nsfe", t, EXTRACTOR_METAFORMAT_UTF8, "text/plain", s, strlen (s) + 1)) return 1; } while (0)
109
110
111/**
112 * Give metadata to LE; return if 'proc' returns non-zero.
113 *
114 * @param s metadata value as UTF8, free at the end
115 * @param t metadata type to use
116 */
117#define ADDF(s,t) do { if (0 != ec->proc (ec->cls, "nsfe", t, EXTRACTOR_METAFORMAT_UTF8, "text/plain", s, strlen (s) + 1)) { free (s); return 1; } free (s); } while (0)
118
119
120/**
121 * Format of an 'INFO' chunk. Last two bytes are optional.
122 */
123struct infochunk
124{
125 /**
126 * Unknown.
127 */
128 uint16_t loadaddr;
129
130 /**
131 * Unknown.
132 */
133 uint16_t initaddr;
134
135 /**
136 * Unknown.
137 */
138 uint16_t playaddr;
139
140 /**
141 * TV encoding flags.
142 */
143 char tvflags;
97 144
145 /**
146 * Chipset encoding flags.
147 */
148 char chipflags;
149
150 /**
151 * Number of songs.
152 */
153 unsigned char songs;
154
155 /**
156 * Starting song.
157 */
158 unsigned char firstsong;
159};
160
161
162/**
163 * Extract data from the INFO chunk.
164 *
165 * @param ec extraction context
166 * @param size number of bytes in INFO chunk
167 * @return 0 to continue extrating
168 */
98static int 169static int
99libextractor_nsfe_info_extract(const char *data, 170info_extract (struct EXTRACTOR_ExtractContext *ec,
100 size_t size, 171 uint32_t size)
101 EXTRACTOR_MetaDataProcessor proc,
102 void *proc_cls)
103{ 172{
173 void *data;
104 const struct infochunk *ichunk; 174 const struct infochunk *ichunk;
105 char songs[32]; 175 char songs[32];
106 176
107 if (size < 8) 177 if (size < 8)
108 return 0; 178 return 0;
109 ichunk = (const struct infochunk *) data; 179 if (size >
110 if (ichunk->tvflags & DUAL_FLAG) 180 ec->read (ec->cls,
181 &data,
182 size))
183 return 1;
184 ichunk = data;
185
186 if (0 != (ichunk->tvflags & DUAL_FLAG))
111 { 187 {
112 ADD ("PAL/NTSC", EXTRACTOR_METATYPE_BROADCAST_TELEVISION_SYSTEM); 188 ADD ("PAL/NTSC", EXTRACTOR_METATYPE_BROADCAST_TELEVISION_SYSTEM);
113 } 189 }
114 else 190 else
115 { 191 {
116 if (ichunk->tvflags & PAL_FLAG) 192 if (0 != (ichunk->tvflags & PAL_FLAG))
117 ADD ("PAL", EXTRACTOR_METATYPE_BROADCAST_TELEVISION_SYSTEM); 193 ADD ("PAL", EXTRACTOR_METATYPE_BROADCAST_TELEVISION_SYSTEM);
118 else 194 else
119 ADD ("NTSC", EXTRACTOR_METATYPE_BROADCAST_TELEVISION_SYSTEM); 195 ADD ("NTSC", EXTRACTOR_METATYPE_BROADCAST_TELEVISION_SYSTEM);
120 } 196 }
121 197
122 if (ichunk->chipflags & VRCVI_FLAG) 198 if (0 != (ichunk->chipflags & VRCVI_FLAG))
123 ADD ("VRCVI", EXTRACTOR_METATYPE_TARGET_ARCHITECTURE); 199 ADD ("VRCVI", EXTRACTOR_METATYPE_TARGET_ARCHITECTURE);
124 if (ichunk->chipflags & VRCVII_FLAG) 200 if (0 != (ichunk->chipflags & VRCVII_FLAG))
125 ADD ("VRCVII", EXTRACTOR_METATYPE_TARGET_ARCHITECTURE); 201 ADD ("VRCVII", EXTRACTOR_METATYPE_TARGET_ARCHITECTURE);
126 if (ichunk->chipflags & FDS_FLAG) 202 if (0 != (ichunk->chipflags & FDS_FLAG))
127 ADD ("FDS Sound", EXTRACTOR_METATYPE_TARGET_ARCHITECTURE); 203 ADD ("FDS Sound", EXTRACTOR_METATYPE_TARGET_ARCHITECTURE);
128 if (ichunk->chipflags & MMC5_FLAG) 204 if (0 != (ichunk->chipflags & MMC5_FLAG))
129 ADD ("MMC5 audio", EXTRACTOR_METATYPE_TARGET_ARCHITECTURE); 205 ADD ("MMC5 audio", EXTRACTOR_METATYPE_TARGET_ARCHITECTURE);
130 if (ichunk->chipflags & NAMCO_FLAG) 206 if (0 != (ichunk->chipflags & NAMCO_FLAG))
131 ADD ("Namco 106", EXTRACTOR_METATYPE_TARGET_ARCHITECTURE); 207 ADD ("Namco 106", EXTRACTOR_METATYPE_TARGET_ARCHITECTURE);
132 if (ichunk->chipflags & SUNSOFT_FLAG) 208 if (0 != (ichunk->chipflags & SUNSOFT_FLAG))
133 ADD ("Sunsoft FME-07", EXTRACTOR_METATYPE_TARGET_ARCHITECTURE); 209 ADD ("Sunsoft FME-07", EXTRACTOR_METATYPE_TARGET_ARCHITECTURE);
134 if (size < 9) 210
211 if (size < sizeof (struct infochunk))
135 { 212 {
136 ADD ("1", EXTRACTOR_METATYPE_SONG_COUNT); 213 ADD ("1", EXTRACTOR_METATYPE_SONG_COUNT);
137 return 0; 214 return 0;
138 } 215 }
139 snprintf (songs, 216 snprintf (songs,
140 sizeof(songs), 217 sizeof (songs),
141 "%d", 218 "%d",
142 ichunk->songs); 219 ichunk->songs);
143 ADD (songs, EXTRACTOR_METATYPE_SONG_COUNT); 220 ADD (songs, EXTRACTOR_METATYPE_SONG_COUNT);
221 snprintf (songs,
222 sizeof (songs),
223 "%d",
224 ichunk->firstsong);
225 ADD (songs, EXTRACTOR_METATYPE_STARTING_SONG);
144 return 0; 226 return 0;
145} 227}
146 228
147 229
230/**
231 * Extract data from the TLBL chunk.
232 *
233 * @param ec extraction context
234 * @param size number of bytes in TLBL chunk
235 * @return 0 to continue extrating
236 */
148static int 237static int
149libextractor_nsfe_tlbl_extract(const char *data, 238tlbl_extract (struct EXTRACTOR_ExtractContext *ec,
150 size_t size, 239 uint32_t size)
151 EXTRACTOR_MetaDataProcessor proc,
152 void *proc_cls)
153
154{ 240{
155 char *title; 241 char *title;
156 ssize_t left; 242 ssize_t left;
157 size_t length; 243 size_t length;
158 244 void *data;
159 for (left = size; left > 0; left -= length) 245 const char *cdata;
246
247 if (size >
248 ec->read (ec->cls,
249 &data,
250 size))
251 return 1;
252 cdata = data;
253
254 left = size;
255 while (left > 0)
160 { 256 {
161 title = nsfestring (&data[size - left], left); 257 title = nsfestring (&cdata[size - left], left);
162 if (title == NULL) 258 if (NULL == title)
163 return 0; 259 return 0;
164 length = strlen (title) + 1; 260 length = strlen (title) + 1;
165 ADDF (title, EXTRACTOR_METATYPE_TITLE); 261 ADDF (title, EXTRACTOR_METATYPE_TITLE);
262 left -= length;
166 } 263 }
167 return 0; 264 return 0;
168} 265}
169 266
267
268/**
269 * Extract data from the AUTH chunk.
270 *
271 * @param ec extraction context
272 * @param size number of bytes in AUTH chunk
273 * @return 0 to continue extrating
274 */
170static int 275static int
171libextractor_nsfe_auth_extract (const char *data, size_t size, 276auth_extract (struct EXTRACTOR_ExtractContext *ec,
172 EXTRACTOR_MetaDataProcessor proc, 277 uint32_t size)
173 void *proc_cls)
174{ 278{
175 char *album; 279 char *album;
176 char *artist; 280 char *artist;
177 char *copyright; 281 char *copyright;
178 char *ripper; 282 char *ripper;
179 int left = size; 283 uint32_t left = size;
284 void *data;
285 const char *cdata;
180 286
181 if (left < 1) 287 if (left < 1)
182 return 0; 288 return 0;
183 album = nsfestring (&data[size - left], left); 289 if (size >
184 if (album != NULL) 290 ec->read (ec->cls,
291 &data,
292 size))
293 return 1;
294 cdata = data;
295
296 album = nsfestring (&cdata[size - left], left);
297 if (NULL != album)
185 { 298 {
186 left -= (strlen (album) + 1); 299 left -= (strlen (album) + 1);
187 ADDF (album, EXTRACTOR_METATYPE_ALBUM); 300 ADDF (album, EXTRACTOR_METATYPE_ALBUM);
@@ -189,8 +302,8 @@ libextractor_nsfe_auth_extract (const char *data, size_t size,
189 return 0; 302 return 0;
190 } 303 }
191 304
192 artist = nsfestring (&data[size - left], left); 305 artist = nsfestring (&cdata[size - left], left);
193 if (artist != NULL) 306 if (NULL != artist)
194 { 307 {
195 left -= (strlen (artist) + 1); 308 left -= (strlen (artist) + 1);
196 ADDF (artist, EXTRACTOR_METATYPE_ARTIST); 309 ADDF (artist, EXTRACTOR_METATYPE_ARTIST);
@@ -198,67 +311,78 @@ libextractor_nsfe_auth_extract (const char *data, size_t size,
198 return 0; 311 return 0;
199 } 312 }
200 313
201 copyright = nsfestring (&data[size - left], left); 314 copyright = nsfestring (&cdata[size - left], left);
202 if (copyright != NULL) 315 if (NULL != copyright)
203 { 316 {
204 left -= (strlen (copyright) + 1); 317 left -= (strlen (copyright) + 1);
205 ADDF (copyright, EXTRACTOR_METATYPE_COPYRIGHT); 318 ADDF (copyright, EXTRACTOR_METATYPE_COPYRIGHT);
206 if (left < 1) 319 if (left < 1)
207 return 0; 320 return 0;
208 } 321 }
209 ripper = nsfestring (&data[size - left], left); 322 ripper = nsfestring (&cdata[size - left], left);
210 if (ripper != NULL) 323 if (NULL != ripper)
211 ADDF (ripper, EXTRACTOR_METATYPE_RIPPER); 324 ADDF (ripper, EXTRACTOR_METATYPE_RIPPER);
212 return 0; 325 return 0;
213} 326}
214 327
215 328
216/* "extract" keyword from an Extended Nintendo Sound Format file 329/**
330 * "extract" meta data from an Extended Nintendo Sound Format file
217 * 331 *
218 * NSFE specification revision 2 (Sep. 3, 2003) 332 * NSFE specification revision 2 (Sep. 3, 2003) was used, while this
219 * was used, while this piece of software was 333 * piece of software was originally written.
220 * originally written.
221 * 334 *
335 * @param ec extraction context
222 */ 336 */
223int 337void
224EXTRACTOR_nsfe_extract (const char *data, 338EXTRACTOR_nsfe_extract_method (struct EXTRACTOR_ExtractContext *ec)
225 size_t size,
226 EXTRACTOR_MetaDataProcessor proc,
227 void *proc_cls,
228 const char *options)
229{ 339{
230 const struct header *head; 340 const struct header *head;
231 int i; 341 void *data;
232 char chunkid[5] = " "; 342 uint64_t off;
343 uint32_t chunksize;
233 int ret; 344 int ret;
234 345
235 if (size < HEADER_SIZE) 346 if (sizeof (struct header) >
236 return 0; 347 ec->read (ec->cls,
237 head = (const struct header *) data; 348 &data,
238 if (memcmp (head->magicid, "NSFE", 4)) 349 sizeof (struct header)))
239 return 0; 350 return;
240 ADD ("audio/x-nsfe", EXTRACTOR_METATYPE_MIMETYPE); 351 head = data;
241 i = 4; /* Jump over magic id */ 352 if (0 != memcmp (head->magicid, "NSFE", 4))
353 return;
354
355 if (0 != ec->proc (ec->cls,
356 "nsfe",
357 EXTRACTOR_METATYPE_MIMETYPE,
358 EXTRACTOR_METAFORMAT_UTF8,
359 "text/plain",
360 "audio/x-nsfe",
361 strlen ("audio/x-nsfe") + 1))
362 return;
363 off = sizeof (struct header);
242 ret = 0; 364 ret = 0;
243 while (i + 7 < size && strncmp (chunkid, "NEND", 4)) /* CHECK */ 365 while (0 == ret)
244 { 366 {
245 unsigned int chunksize = nsfeuint (&data[i]); 367 if (off != ec->seek (ec->cls,
246 368 off,
247 i += 4; /* Jump over chunk size */ 369 SEEK_SET))
248 memcpy (&chunkid, data + i, 4);
249 chunkid[4] = '\0';
250
251 i += 4; /* Jump over chunk id */
252 if (!strncmp (chunkid, "INFO", 4))
253 ret = libextractor_nsfe_info_extract (data + i, chunksize, proc, proc_cls);
254 else if (!strncmp (chunkid, "auth", 4))
255 ret = libextractor_nsfe_auth_extract (data + i, chunksize, proc, proc_cls);
256 else if (!strncmp (chunkid, "tlbl", 4))
257 ret = libextractor_nsfe_tlbl_extract (data + i, chunksize, proc, proc_cls);
258 /* Ignored chunks: DATA, NEND, plst, time, fade, BANK */
259 i += chunksize;
260 if (ret != 0)
261 break; 370 break;
371 if (8 >
372 ec->read (ec->cls,
373 &data,
374 sizeof (struct header)))
375 break;
376 chunksize = nsfeuint (data);
377 off += 4 + chunksize;
378 if (0 == memcmp (data + 4, "INFO", 4))
379 ret = info_extract (ec, chunksize);
380 else if (0 == memcmp (data + 4, "auth", 4))
381 ret = auth_extract (ec, chunksize);
382 else if (0 == memcmp (data + 4, "tlbl", 4))
383 ret = tlbl_extract (ec, chunksize);
384 /* Ignored chunks: DATA, NEND, plst, time, fade, BANK */
262 } 385 }
263 return ret;
264} 386}
387
388/* end of nsfe_extractor.c */
diff --git a/src/plugins/odf_extractor.c b/src/plugins/odf_extractor.c
index 7903ce5..06da7d8 100644
--- a/src/plugins/odf_extractor.c
+++ b/src/plugins/odf_extractor.c
@@ -28,11 +28,6 @@
28#include "unzip.h" 28#include "unzip.h"
29 29
30/** 30/**
31 * Should filenames be treated as case sensitive?
32 */
33#define CASESENSITIVITY 0
34
35/**
36 * Maximum length of a filename allowed inside the ZIP archive. 31 * Maximum length of a filename allowed inside the ZIP archive.
37 */ 32 */
38#define MAXFILENAME 256 33#define MAXFILENAME 256
@@ -90,17 +85,17 @@ static struct Matches tmap[] = {
90 * @return NULL if no mimetype could be found, otherwise the mime type 85 * @return NULL if no mimetype could be found, otherwise the mime type
91 */ 86 */
92static char * 87static char *
93libextractor_oo_getmimetype (EXTRACTOR_unzip_file uf) 88libextractor_oo_getmimetype (struct EXTRACTOR_UnzipFile * uf)
94{ 89{
95 char filename_inzip[MAXFILENAME]; 90 char filename_inzip[MAXFILENAME];
96 EXTRACTOR_unzip_file_info file_info; 91 struct EXTRACTOR_UnzipFileInfo file_info;
97 char *buf; 92 char *buf;
98 size_t buf_size; 93 size_t buf_size;
99 94
100 if (EXTRACTOR_UNZIP_OK != 95 if (EXTRACTOR_UNZIP_OK !=
101 EXTRACTOR_common_unzip_local_file (uf, 96 EXTRACTOR_common_unzip_go_find_local_file (uf,
102 "mimetype", 97 "mimetype",
103 CASESENSITIVITY)) 98 2))
104 return NULL; 99 return NULL;
105 if (EXTRACTOR_UNZIP_OK != 100 if (EXTRACTOR_UNZIP_OK !=
106 EXTRACTOR_common_unzip_get_current_file_info (uf, 101 EXTRACTOR_common_unzip_get_current_file_info (uf,
@@ -113,11 +108,8 @@ libextractor_oo_getmimetype (EXTRACTOR_unzip_file uf)
113 0)) 108 0))
114 return NULL; 109 return NULL;
115 if (EXTRACTOR_UNZIP_OK != 110 if (EXTRACTOR_UNZIP_OK !=
116 EXTRACTOR_common_unzip_open_current_file3 (uf, NULL, NULL, 0)) 111 EXTRACTOR_common_unzip_open_current_file (uf))
117 { 112 return NULL;
118 EXTRACTOR_common_unzip_close_current_file (uf);
119 return NULL;
120 }
121 buf_size = file_info.uncompressed_size; 113 buf_size = file_info.uncompressed_size;
122 if (buf_size > 1024) 114 if (buf_size > 1024)
123 { 115 {
@@ -164,40 +156,25 @@ void
164EXTRACTOR_odf_extract_method (struct EXTRACTOR_ExtractContext *ec) 156EXTRACTOR_odf_extract_method (struct EXTRACTOR_ExtractContext *ec)
165{ 157{
166 char filename_inzip[MAXFILENAME]; 158 char filename_inzip[MAXFILENAME];
167 EXTRACTOR_unzip_file uf; 159 struct EXTRACTOR_UnzipFile *uf;
168 EXTRACTOR_unzip_file_info file_info; 160 struct EXTRACTOR_UnzipFileInfo file_info;
169 char *buf; 161 char *buf;
170 char *pbuf; 162 char *pbuf;
171 size_t buf_size; 163 size_t buf_size;
172 unsigned int i; 164 unsigned int i;
173 EXTRACTOR_unzip_filefunc_def io;
174 char *mimetype; 165 char *mimetype;
175 166
176 if (size < 100) 167 if (NULL == (uf = EXTRACTOR_common_unzip_open (ec)))
177 return 0;
178 if ( !( ('P'==data[0]) && ('K'==data[1]) && (0x03==data[2]) && (0x04==data[3])) )
179 return 0;
180
181 io.zopen_file = &EXTRACTOR_common_unzip_zlib_open_file_func;
182 io.zread_file = &EXTRACTOR_common_unzip_zlib_read_file_func;
183 io.zwrite_file = NULL;
184 io.ztell_file = &EXTRACTOR_common_unzip_zlib_tell_file_func;
185 io.zseek_file = &EXTRACTOR_common_unzip_zlib_seek_file_func;
186 io.zclose_file = &EXTRACTOR_common_unzip_zlib_close_file_func;
187 io.zerror_file = &EXTRACTOR_common_unzip_zlib_testerror_file_func;
188 io.opaque = ec;
189
190 if (NULL == (uf = EXTRACTOR_common_unzip_open2 ("ERROR", &io)))
191 return; 168 return;
192 if (NULL != (mimetype = libextractor_oo_getmimetype (uf))) 169 if (NULL != (mimetype = libextractor_oo_getmimetype (uf)))
193 { 170 {
194 if (0 != proc (proc_cls, 171 if (0 != ec->proc (ec->cls,
195 "deb", 172 "odf",
196 EXTRACTOR_METATYPE_MIMETYPE, 173 EXTRACTOR_METATYPE_MIMETYPE,
197 EXTRACTOR_METAFORMAT_UTF8, 174 EXTRACTOR_METAFORMAT_UTF8,
198 "text/plain", 175 "text/plain",
199 mimetype, 176 mimetype,
200 strlen (mimetype) + 1)) 177 strlen (mimetype) + 1))
201 { 178 {
202 EXTRACTOR_common_unzip_close (uf); 179 EXTRACTOR_common_unzip_close (uf);
203 free (mimetype); 180 free (mimetype);
@@ -206,9 +183,9 @@ EXTRACTOR_odf_extract_method (struct EXTRACTOR_ExtractContext *ec)
206 free (mimetype); 183 free (mimetype);
207 } 184 }
208 if (EXTRACTOR_UNZIP_OK != 185 if (EXTRACTOR_UNZIP_OK !=
209 EXTRACTOR_common_unzip_local_file (uf, 186 EXTRACTOR_common_unzip_go_find_local_file (uf,
210 METAFILE, 187 METAFILE,
211 CASESENSITIVITY)) 188 2))
212 { 189 {
213 /* metafile not found */ 190 /* metafile not found */
214 EXTRACTOR_common_unzip_close (uf); 191 EXTRACTOR_common_unzip_close (uf);
@@ -219,14 +196,14 @@ EXTRACTOR_odf_extract_method (struct EXTRACTOR_ExtractContext *ec)
219 &file_info, 196 &file_info,
220 filename_inzip, 197 filename_inzip,
221 sizeof (filename_inzip), 198 sizeof (filename_inzip),
222 NULL,0,NULL,0)) 199 NULL, 0, NULL, 0))
223 { 200 {
224 /* problems accessing metafile */ 201 /* problems accessing metafile */
225 EXTRACTOR_common_unzip_close (uf); 202 EXTRACTOR_common_unzip_close (uf);
226 return; 203 return;
227 } 204 }
228 if (EXTRACTOR_UNZIP_OK != 205 if (EXTRACTOR_UNZIP_OK !=
229 EXTRACTOR_common_unzip_open_current_file3 (uf, NULL, NULL, 0)) 206 EXTRACTOR_common_unzip_open_current_file (uf))
230 { 207 {
231 /* problems with unzip */ 208 /* problems with unzip */
232 EXTRACTOR_common_unzip_close (uf); 209 EXTRACTOR_common_unzip_close (uf);
@@ -320,16 +297,14 @@ EXTRACTOR_odf_extract_method (struct EXTRACTOR_ExtractContext *ec)
320 297
321 memcpy(key, spos, epos-spos); 298 memcpy(key, spos, epos-spos);
322 key[epos-spos] = '\0'; 299 key[epos-spos] = '\0';
323 if (0 != proc (proc_cls, 300 if (0 != ec->proc (ec->cls,
324 "odf", 301 "odf",
325 tmap[i].type, 302 tmap[i].type,
326 EXTRACTOR_METAFORMAT_UTF8, 303 EXTRACTOR_METAFORMAT_UTF8,
327 "text/plain", 304 "text/plain",
328 key, 305 key,
329 epos - spos + 1)) 306 epos - spos + 1))
330 { 307 goto CLEANUP;
331 goto CLEANUP;
332 }
333 pbuf = epos; 308 pbuf = epos;
334 } 309 }
335 else 310 else
diff --git a/src/plugins/sid_extractor.c b/src/plugins/sid_extractor.c
index 2465982..afc739b 100644
--- a/src/plugins/sid_extractor.c
+++ b/src/plugins/sid_extractor.c
@@ -1,10 +1,10 @@
1/* 1/*
2 * This file is part of libextractor. 2 * This file is part of libextractor.
3 * (C) 2006, 2007 Toni Ruottu 3 * (C) 2006, 2007, 2012 Vidyut Samanta and Christian Grothoff
4 * 4 *
5 * libextractor is free software; you can redistribute it and/or modify 5 * libextractor is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published 6 * it under the terms of the GNU General Public License as published
7 * by the Free Software Foundation; either version 2, or (at your 7 * by the Free Software Foundation; either version 3, or (at your
8 * option) any later version. 8 * option) any later version.
9 * 9 *
10 * libextractor is distributed in the hope that it will be useful, but 10 * libextractor is distributed in the hope that it will be useful, but
@@ -18,15 +18,17 @@
18 * Boston, MA 02111-1307, USA. 18 * Boston, MA 02111-1307, USA.
19 * 19 *
20 */ 20 */
21/**
22 * @file plugins/sid_extractor.c
23 * @brief plugin to support Scream Tracker (S3M) files
24 * @author Toni Ruottu
25 * @author Christian Grothoff
26 */
21#include "platform.h" 27#include "platform.h"
22#include "extractor.h" 28#include "extractor.h"
23 29
24 30
25#define SID1_HEADER_SIZE 0x76 31/* SID flags */
26#define SID2_HEADER_SIZE 0x7c
27
28/* flags */
29
30#define MUSPLAYER_FLAG 0x01 32#define MUSPLAYER_FLAG 0x01
31#define PLAYSID_FLAG 0x02 33#define PLAYSID_FLAG 0x02
32#define PAL_FLAG 0x04 34#define PAL_FLAG 0x04
@@ -34,51 +36,134 @@
34#define MOS6581_FLAG 0x10 36#define MOS6581_FLAG 0x10
35#define MOS8580_FLAG 0x20 37#define MOS8580_FLAG 0x20
36 38
39/**
40 * A "SID word".
41 */
37typedef char sidwrd[2]; 42typedef char sidwrd[2];
43
44/**
45 * A "SID long".
46 */
38typedef char sidlongwrd[4]; 47typedef char sidlongwrd[4];
39 48
49/**
50 * Header of a SID file.
51 */
40struct header 52struct header
41{ 53{
54 /**
55 * Magic string.
56 */
42 char magicid[4]; 57 char magicid[4];
58
59 /**
60 * Version number.
61 */
43 sidwrd sidversion; 62 sidwrd sidversion;
63
64 /**
65 * Unknown.
66 */
44 sidwrd dataoffset; 67 sidwrd dataoffset;
68
69 /**
70 * Unknown.
71 */
45 sidwrd loadaddr; 72 sidwrd loadaddr;
73
74 /**
75 * Unknown.
76 */
46 sidwrd initaddr; 77 sidwrd initaddr;
78
79 /**
80 * Unknown.
81 */
47 sidwrd playaddr; 82 sidwrd playaddr;
83
84 /**
85 * Number of songs in file.
86 */
48 sidwrd songs; 87 sidwrd songs;
88
89 /**
90 * Starting song.
91 */
49 sidwrd firstsong; 92 sidwrd firstsong;
93
94 /**
95 * Unknown.
96 */
50 sidlongwrd speed; 97 sidlongwrd speed;
98
99 /**
100 * Title of the album.
101 */
51 char title[32]; 102 char title[32];
103
104 /**
105 * Name of the artist.
106 */
52 char artist[32]; 107 char artist[32];
108
109 /**
110 * Copyright information.
111 */
53 char copyright[32]; 112 char copyright[32];
54 sidwrd flags; /* version 2 specific fields start */ 113
114 /* version 2 specific fields start */
115
116 /**
117 * Flags
118 */
119 sidwrd flags;
120
121 /**
122 * Unknown.
123 */
55 char startpage; 124 char startpage;
125
126 /**
127 * Unknown.
128 */
56 char pagelength; 129 char pagelength;
130
131 /**
132 * Unknown.
133 */
57 sidwrd reserved; 134 sidwrd reserved;
58}; 135};
59 136
137
138/**
139 * Convert a 'sidword' to an integer.
140 *
141 * @param data the sidword
142 * @return corresponding integer value
143 */
60static int 144static int
61sidword (const sidwrd data) 145sidword (const sidwrd data)
62{ 146{
63 int value = (unsigned char) data[0] * 0x100 + (unsigned char) data[1]; 147 return (unsigned char) data[0] * 0x100 + (unsigned char) data[1];
64 return value;
65
66} 148}
67 149
68#define ADD(s,t) do { if (0 != proc (proc_cls, "sid", t, EXTRACTOR_METAFORMAT_UTF8, "text/plain", s, strlen(s)+1)) return 1; } while (0)
69 150
70/* "extract" keyword from a SID file 151/**
71 * 152 * Give metadata to LE; return if 'proc' returns non-zero.
72 * This plugin is based on the nsf extractor
73 * 153 *
154 * @param s metadata value as UTF8
155 * @param t metadata type to use
74 */ 156 */
157#define ADD(s,t) do { if (0 != ec->proc (ec->cls, "sid", t, EXTRACTOR_METAFORMAT_UTF8, "text/plain", s, strlen (s) + 1)) return; } while (0)
158
75 159
76int 160/**
77EXTRACTOR_sid_extract (const char *data, 161 * Extract metadata from SID files.
78 size_t size, 162 *
79 EXTRACTOR_MetaDataProcessor proc, 163 * @param ec extraction context
80 void *proc_cls, 164 */
81 const char *options) 165void
166EXTRACTOR_sid_extract_method (struct EXTRACTOR_ExtractContext *ec)
82{ 167{
83 unsigned int flags; 168 unsigned int flags;
84 int version; 169 int version;
@@ -89,17 +174,19 @@ EXTRACTOR_sid_extract (const char *data,
89 char startingsong[32]; 174 char startingsong[32];
90 char sidversion[32]; 175 char sidversion[32];
91 const struct header *head; 176 const struct header *head;
177 void *data;
92 178
93 /* Check header size */ 179 if (sizeof (struct header) >
94 180 ec->read (ec->cls,
95 if (size < SID1_HEADER_SIZE) 181 &data,
96 return 0; 182 sizeof (struct header)))
97 head = (const struct header *) data; 183 return;
184 head = data;
98 185
99 /* Check "magic" id bytes */ 186 /* Check "magic" id bytes */
100 if (memcmp (head->magicid, "PSID", 4) && 187 if ( (0 != memcmp (head->magicid, "PSID", 4)) &&
101 memcmp (head->magicid, "RSID", 4)) 188 (0 != memcmp (head->magicid, "RSID", 4)) )
102 return 0; 189 return;
103 190
104 /* Mime-type */ 191 /* Mime-type */
105 ADD ("audio/prs.sid", EXTRACTOR_METATYPE_MIMETYPE); 192 ADD ("audio/prs.sid", EXTRACTOR_METATYPE_MIMETYPE);
@@ -107,25 +194,24 @@ EXTRACTOR_sid_extract (const char *data,
107 /* Version of SID format */ 194 /* Version of SID format */
108 version = sidword (head->sidversion); 195 version = sidword (head->sidversion);
109 snprintf (sidversion, 196 snprintf (sidversion,
110 sizeof(sidversion), 197 sizeof (sidversion),
111 "%d", 198 "%d",
112 version); 199 version);
113 ADD (sidversion, EXTRACTOR_METATYPE_FORMAT_VERSION); 200 ADD (sidversion, EXTRACTOR_METATYPE_FORMAT_VERSION);
114 201
115 /* Get song count */ 202 /* Get song count */
116 snprintf (songs, 203 snprintf (songs,
117 sizeof(songs), 204 sizeof (songs),
118 "%d", sidword (head->songs)); 205 "%d", sidword (head->songs));
119 ADD (songs, EXTRACTOR_METATYPE_SONG_COUNT); 206 ADD (songs, EXTRACTOR_METATYPE_SONG_COUNT);
120 207
121 /* Get number of the first song to be played */ 208 /* Get number of the first song to be played */
122 snprintf (startingsong, 209 snprintf (startingsong,
123 sizeof(startingsong), 210 sizeof (startingsong),
124 "%d", 211 "%d",
125 sidword (head->firstsong)); 212 sidword (head->firstsong));
126 ADD (startingsong, EXTRACTOR_METATYPE_STARTING_SONG); 213 ADD (startingsong, EXTRACTOR_METATYPE_STARTING_SONG);
127 214
128
129 /* name, artist, copyright fields */ 215 /* name, artist, copyright fields */
130 memcpy (&album, head->title, 32); 216 memcpy (&album, head->title, 32);
131 album[32] = '\0'; 217 album[32] = '\0';
@@ -139,9 +225,8 @@ EXTRACTOR_sid_extract (const char *data,
139 copyright[32] = '\0'; 225 copyright[32] = '\0';
140 ADD (copyright, EXTRACTOR_METATYPE_COPYRIGHT); 226 ADD (copyright, EXTRACTOR_METATYPE_COPYRIGHT);
141 227
142 228 if (version < 2)
143 if ( (version < 2) || (size < SID2_HEADER_SIZE)) 229 return;
144 return 0;
145 230
146 /* Version 2 specific options follow 231 /* Version 2 specific options follow
147 * 232 *
@@ -150,42 +235,25 @@ EXTRACTOR_sid_extract (const char *data,
150 */ 235 */
151 flags = sidword (head->flags); 236 flags = sidword (head->flags);
152 /* MUS data */ 237 /* MUS data */
153 if (flags & MUSPLAYER_FLAG) 238 if (0 != (flags & MUSPLAYER_FLAG))
154 ADD ("Compute!'s Sidplayer", EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE); 239 ADD ("Compute!'s Sidplayer", EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE);
155 240
156 /* PlaySID data */ 241 /* PlaySID data */
157 if (flags & PLAYSID_FLAG) 242 if (0 != (flags & PLAYSID_FLAG))
158 ADD ("PlaySID", EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE); 243 ADD ("PlaySID", EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE);
159 244
160 245
161 /* PAL or NTSC */ 246 /* PAL or NTSC */
162 247 if (0 != (flags & NTSC_FLAG))
163 if (flags & PAL_FLAG) 248 ADD ("PAL/NTSC", EXTRACTOR_METATYPE_BROADCAST_TELEVISION_SYSTEM);
164 { 249 else if (0 != (flags & PAL_FLAG))
165 if (flags & NTSC_FLAG) 250 ADD ("PAL", EXTRACTOR_METATYPE_BROADCAST_TELEVISION_SYSTEM);
166 ADD ("PAL/NTSC", EXTRACTOR_METATYPE_BROADCAST_TELEVISION_SYSTEM);
167 else
168 ADD ("PAL", EXTRACTOR_METATYPE_BROADCAST_TELEVISION_SYSTEM);
169 }
170 else
171 {
172 if (flags & NTSC_FLAG)
173 ADD ("NTSC", EXTRACTOR_METATYPE_BROADCAST_TELEVISION_SYSTEM);
174 }
175 251
176 /* Detect SID Chips suitable for play the files */ 252 /* Detect SID Chips suitable for play the files */
177 if (flags & MOS6581_FLAG) 253 if (0 != (flags & MOS8580_FLAG))
178 { 254 ADD ("MOS6581/MOS8580", EXTRACTOR_METATYPE_TARGET_ARCHITECTURE);
179 if (flags & MOS8580_FLAG) 255 else if (0 != (flags & MOS6581_FLAG))
180 ADD ("MOS6581/MOS8580", EXTRACTOR_METATYPE_TARGET_ARCHITECTURE); 256 ADD ("MOS6581", EXTRACTOR_METATYPE_TARGET_ARCHITECTURE);
181 else
182 ADD ("MOS6581", EXTRACTOR_METATYPE_TARGET_ARCHITECTURE);
183 }
184 else
185 {
186 if (flags & MOS8580_FLAG)
187 ADD ("MOS8580", EXTRACTOR_METATYPE_TARGET_ARCHITECTURE);
188 }
189
190 return 0;
191} 257}
258
259/* end of sid_extractor.c */
diff --git a/src/plugins/test_deb.c b/src/plugins/test_deb.c
new file mode 100644
index 0000000..93a5586
--- /dev/null
+++ b/src/plugins/test_deb.c
@@ -0,0 +1,150 @@
1/*
2 This file is part of libextractor.
3 (C) 2012 Vidyut Samanta and Christian Grothoff
4
5 libextractor is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 3, or (at your
8 option) any later version.
9
10 libextractor is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with libextractor; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
19*/
20/**
21 * @file plugins/test_deb.c
22 * @brief testcase for deb plugin
23 * @author Christian Grothoff
24 */
25#include "platform.h"
26#include "test_lib.h"
27
28
29/**
30 * Main function for the DEB testcase.
31 *
32 * @param argc number of arguments (ignored)
33 * @param argv arguments (ignored)
34 * @return 0 on success
35 */
36int
37main (int argc, char *argv[])
38{
39 struct SolutionData deb_bzip2_sol[] =
40 {
41 {
42 EXTRACTOR_METATYPE_MIMETYPE,
43 EXTRACTOR_METAFORMAT_UTF8,
44 "text/plain",
45 "application/x-debian-package",
46 strlen ("application/x-debian-package") + 1,
47 0
48 },
49 {
50 EXTRACTOR_METATYPE_PACKAGE_NAME,
51 EXTRACTOR_METAFORMAT_UTF8,
52 "text/plain",
53 "bzip2",
54 strlen ("bzip2") + 1,
55 0
56 },
57 {
58 EXTRACTOR_METATYPE_PACKAGE_VERSION,
59 EXTRACTOR_METAFORMAT_UTF8,
60 "text/plain",
61 "1.0.6-4",
62 strlen ("1.0.6-4") + 1,
63 0
64 },
65 {
66 EXTRACTOR_METATYPE_TARGET_ARCHITECTURE,
67 EXTRACTOR_METAFORMAT_UTF8,
68 "text/plain",
69 "i386",
70 strlen ("i386") + 1,
71 0
72 },
73 {
74 EXTRACTOR_METATYPE_PACKAGE_MAINTAINER,
75 EXTRACTOR_METAFORMAT_UTF8,
76 "text/plain",
77 "Anibal Monsalve Salazar <anibal@debian.org>",
78 strlen ("Anibal Monsalve Salazar <anibal@debian.org>") + 1,
79 0
80 },
81 {
82 EXTRACTOR_METATYPE_PACKAGE_INSTALLED_SIZE,
83 EXTRACTOR_METAFORMAT_UTF8,
84 "text/plain",
85 "113", /* FIXME: should this be 'kb'? */
86 strlen ("113") + 1,
87 0
88 },
89 {
90 EXTRACTOR_METATYPE_PACKAGE_DEPENDENCY,
91 EXTRACTOR_METAFORMAT_UTF8,
92 "text/plain",
93 "libbz2-1.0 (= 1.0.6-4), libc6 (>= 2.4)",
94 strlen ("libbz2-1.0 (= 1.0.6-4), libc6 (>= 2.4)") + 1,
95 0
96 },
97 {
98 EXTRACTOR_METATYPE_PACKAGE_SUGGESTS,
99 EXTRACTOR_METAFORMAT_UTF8,
100 "text/plain",
101 "bzip2-doc",
102 strlen ("bzip2-doc") + 1,
103 0
104 },
105 {
106 EXTRACTOR_METATYPE_PACKAGE_REPLACES,
107 EXTRACTOR_METAFORMAT_UTF8,
108 "text/plain",
109 "libbz2 (<< 0.9.5d-3)",
110 strlen ("libbz2 (<< 0.9.5d-3)") + 1,
111 0
112 },
113 {
114 EXTRACTOR_METATYPE_SECTION,
115 EXTRACTOR_METAFORMAT_UTF8,
116 "text/plain",
117 "utils",
118 strlen ("utils") + 1,
119 0
120 },
121 {
122 EXTRACTOR_METATYPE_UPLOAD_PRIORITY,
123 EXTRACTOR_METAFORMAT_UTF8,
124 "text/plain",
125 "standard",
126 strlen ("standard") + 1,
127 0
128 },
129#if 0
130 {
131 EXTRACTOR_METATYPE_DESCRIPTION,
132 EXTRACTOR_METAFORMAT_UTF8,
133 "text/plain",
134 "",
135 strlen ("") + 1,
136 0
137 },
138#endif
139 { 0, 0, NULL, NULL, 0, -1 }
140 };
141 struct ProblemSet ps[] =
142 {
143 { "testdata/deb_bzip2.deb",
144 deb_bzip2_sol },
145 { NULL, NULL }
146 };
147 return ET_main ("deb", ps);
148}
149
150/* end of test_deb.c */
diff --git a/src/plugins/test_odf.c b/src/plugins/test_odf.c
new file mode 100644
index 0000000..be5205b
--- /dev/null
+++ b/src/plugins/test_odf.c
@@ -0,0 +1,100 @@
1/*
2 This file is part of libextractor.
3 (C) 2012 Vidyut Samanta and Christian Grothoff
4
5 libextractor is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 3, or (at your
8 option) any later version.
9
10 libextractor is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with libextractor; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
19*/
20/**
21 * @file plugins/test_odf.c
22 * @brief testcase for odf plugin
23 * @author Christian Grothoff
24 */
25#include "platform.h"
26#include "test_lib.h"
27
28
29/**
30 * Main function for the ODF testcase.
31 *
32 * @param argc number of arguments (ignored)
33 * @param argv arguments (ignored)
34 * @return 0 on success
35 */
36int
37main (int argc, char *argv[])
38{
39 struct SolutionData odf_cg_sol[] =
40 {
41 {
42 EXTRACTOR_METATYPE_MIMETYPE,
43 EXTRACTOR_METAFORMAT_UTF8,
44 "text/plain",
45 "application/vnd.oasis.opendocument.text",
46 strlen ("application/vnd.oasis.opendocument.text") + 1,
47 0
48 },
49 {
50 EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE,
51 EXTRACTOR_METAFORMAT_UTF8,
52 "text/plain",
53 "OpenOffice.org/3.2$Unix OpenOffice.org_project/320m12$Build-9483",
54 strlen ("OpenOffice.org/3.2$Unix OpenOffice.org_project/320m12$Build-9483") + 1,
55 0
56 },
57 {
58 EXTRACTOR_METATYPE_PAGE_COUNT,
59 EXTRACTOR_METAFORMAT_UTF8,
60 "text/plain",
61 "1",
62 strlen ("1") + 1,
63 0
64 },
65 {
66 EXTRACTOR_METATYPE_CREATION_DATE,
67 EXTRACTOR_METAFORMAT_UTF8,
68 "text/plain",
69 "2005-11-22T11:44:00",
70 strlen ("2005-11-22T11:44:00") + 1,
71 0
72 },
73 {
74 EXTRACTOR_METATYPE_UNKNOWN_DATE,
75 EXTRACTOR_METAFORMAT_UTF8,
76 "text/plain",
77 "2010-06-09T13:09:34",
78 strlen ("2010-06-09T13:09:34") + 1,
79 0
80 },
81 {
82 EXTRACTOR_METATYPE_TITLE,
83 EXTRACTOR_METAFORMAT_UTF8,
84 "text/plain",
85 "Anhang 1: Profile der beteiligten Wissenschaftler",
86 strlen ("Anhang 1: Profile der beteiligten Wissenschaftler") + 1,
87 0
88 },
89 { 0, 0, NULL, NULL, 0, -1 }
90 };
91 struct ProblemSet ps[] =
92 {
93 { "testdata/odf_cg.odt",
94 odf_cg_sol },
95 { NULL, NULL }
96 };
97 return ET_main ("odf", ps);
98}
99
100/* end of test_odf.c */
diff --git a/src/plugins/test_zip.c b/src/plugins/test_zip.c
new file mode 100644
index 0000000..855a30f
--- /dev/null
+++ b/src/plugins/test_zip.c
@@ -0,0 +1,108 @@
1/*
2 This file is part of libextractor.
3 (C) 2012 Vidyut Samanta and Christian Grothoff
4
5 libextractor is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 3, or (at your
8 option) any later version.
9
10 libextractor is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with libextractor; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
19*/
20/**
21 * @file plugins/test_zip.c
22 * @brief testcase for zip plugin
23 * @author Christian Grothoff
24 */
25#include "platform.h"
26#include "test_lib.h"
27
28
29/**
30 * Main function for the ZIP testcase.
31 *
32 * @param argc number of arguments (ignored)
33 * @param argv arguments (ignored)
34 * @return 0 on success
35 */
36int
37main (int argc, char *argv[])
38{
39 struct SolutionData zip_test_sol[] =
40 {
41 {
42 EXTRACTOR_METATYPE_MIMETYPE,
43 EXTRACTOR_METAFORMAT_UTF8,
44 "text/plain",
45 "application/zip",
46 strlen ("application/zip") + 1,
47 0
48 },
49 {
50 EXTRACTOR_METATYPE_COMMENT,
51 EXTRACTOR_METAFORMAT_C_STRING,
52 "text/plain",
53 "global zipfile comment",
54 strlen ("global zipfile comment") + 1,
55 0
56 },
57 {
58 EXTRACTOR_METATYPE_FILENAME,
59 EXTRACTOR_METAFORMAT_C_STRING,
60 "text/plain",
61 "ChangeLog",
62 strlen ("ChangeLog") + 1,
63 0
64 },
65 {
66 EXTRACTOR_METATYPE_FILENAME,
67 EXTRACTOR_METAFORMAT_C_STRING,
68 "text/plain",
69 "test.png",
70 strlen ("test.png") + 1,
71 0
72 },
73 {
74 EXTRACTOR_METATYPE_COMMENT,
75 EXTRACTOR_METAFORMAT_C_STRING,
76 "text/plain",
77 "comment for test.png",
78 strlen ("comment for test.png") + 1,
79 0
80 },
81 {
82 EXTRACTOR_METATYPE_FILENAME,
83 EXTRACTOR_METAFORMAT_C_STRING,
84 "text/plain",
85 "test.jpg",
86 strlen ("test.jpg") + 1,
87 0
88 },
89 {
90 EXTRACTOR_METATYPE_COMMENT,
91 EXTRACTOR_METAFORMAT_C_STRING,
92 "text/plain",
93 "comment for test.jpg",
94 strlen ("comment for test.jpg") + 1,
95 0
96 },
97 { 0, 0, NULL, NULL, 0, -1 }
98 };
99 struct ProblemSet ps[] =
100 {
101 { "testdata/zip_test.zip",
102 zip_test_sol },
103 { NULL, NULL }
104 };
105 return ET_main ("zip", ps);
106}
107
108/* end of test_zip.c */
diff --git a/src/plugins/testdata/deb_bzip2.deb b/src/plugins/testdata/deb_bzip2.deb
new file mode 100644
index 0000000..4b74b15
--- /dev/null
+++ b/src/plugins/testdata/deb_bzip2.deb
Binary files differ
diff --git a/src/plugins/testdata/odf_cg.odt b/src/plugins/testdata/odf_cg.odt
new file mode 100644
index 0000000..cfd40e5
--- /dev/null
+++ b/src/plugins/testdata/odf_cg.odt
Binary files differ
diff --git a/src/plugins/testdata/zip_test.zip b/src/plugins/testdata/zip_test.zip
new file mode 100644
index 0000000..564b43a
--- /dev/null
+++ b/src/plugins/testdata/zip_test.zip
Binary files differ
diff --git a/src/plugins/zip_extractor.c b/src/plugins/zip_extractor.c
index 8738295..23da67c 100644
--- a/src/plugins/zip_extractor.c
+++ b/src/plugins/zip_extractor.c
@@ -1,50 +1,10 @@
1/**
2 zipextractor.c version 0.0.2
3
4 Changes from 0.0.1 to 0.0.2
5 -> Searches for central dir struct from end of file if this is a self-extracting executable
6
7
8 This file was based on mp3extractor.c (0.1.2)
9
10 Currently, this only returns a list of the filenames within a zipfile
11 and any comments on each file or the whole file itself. File sizes,
12 modification times, and crc's are currently ignored.
13
14 TODO: Break the comments up into small, atomically, searchable chunks (keywords)
15 - might need some knowledge of English?
16
17 It returns:
18
19 one EXTRACTOR_MIMETYPE
20 multiple EXTRACTOR_FILENAME
21 multiple EXTRACTOR_COMMENT
22
23 ... from a .ZIP file
24
25 TODO: EXTRACTOR_DATE, EXTRACTOR_DESCRIPTION, EXTRACTOR_KEYWORDS, others?
26
27 Does NOT test data integrity (CRCs etc.)
28
29 This version is not recursive (i.e. doesn't look inside zip
30 files within zip files)
31
32 TODO: Run extract on files inside of archive (?) (i.e. gif, mp3, etc.)
33
34 The current .ZIP format description:
35 ftp://ftp.pkware.com/appnote.zip
36
37 No Copyright 2003 Julia Wolf
38
39 */
40
41/* 1/*
42 * This file is part of libextractor. 2 * This file is part of libextractor.
43 * (C) 2002, 2003, 2009 Vidyut Samanta and Christian Grothoff 3 * (C) 2012 Vidyut Samanta and Christian Grothoff
44 * 4 *
45 * libextractor is free software; you can redistribute it and/or modify 5 * libextractor is free software; you can redistribute it and/or modify
46 * it under the terms of the GNU General Public License as published 6 * it under the terms of the GNU General Public License as published
47 * by the Free Software Foundation; either version 2, or (at your 7 * by the Free Software Foundation; either version 3, or (at your
48 * option) any later version. 8 * option) any later version.
49 * 9 *
50 * libextractor is distributed in the hope that it will be useful, but 10 * libextractor is distributed in the hope that it will be useful, but
@@ -57,354 +17,113 @@
57 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 17 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
58 * Boston, MA 02111-1307, USA. 18 * Boston, MA 02111-1307, USA.
59 */ 19 */
60 20/**
21 * @file plugins/zip_extractor.c
22 * @brief plugin to support ZIP files
23 * @author Christian Grothoff
24 */
61#include "platform.h" 25#include "platform.h"
26#include <ctype.h>
62#include "extractor.h" 27#include "extractor.h"
63 28#include "unzip.h"
64#define DEBUG_EXTRACT_ZIP 0
65
66/* In a zipfile there are two kinds of comments. One is a big one for the
67 entire .zip, it's usually a BBS ad. The other is a small comment on each
68 individual file; most people don't use this.
69 */
70
71/* TODO: zip_entry linked list is handeled kinda messily, should clean up (maybe) */
72 typedef struct
73{
74 char *filename;
75 char *comment;
76 void *next;
77 } zip_entry;
78 29
79/* mimetype = application/zip */
80int
81EXTRACTOR_zip_extract (const unsigned char *data,
82 size_t size,
83 EXTRACTOR_MetaDataProcessor proc,
84 void *proc_cls,
85 const char *options)
86{
87 int ret;
88 void *tmp;
89 zip_entry * info;
90 zip_entry * start;
91 char *filecomment;
92 const unsigned char *pos;
93 unsigned int offset, stop;
94 unsigned int name_length;
95 unsigned int extra_length;
96 unsigned int comment_length;
97 unsigned int filecomment_length;
98 unsigned int entry_count;
99#if DEBUG_EXTRACT_ZIP
100 unsigned int entry_total;
101#endif
102
103 /* I think the smallest zipfile you can have is about 120 bytes */
104 if ((NULL == data) || (size < 100))
105 return 0;
106 if (! (('P' == data[0]) && ('K' == data[1]) && (0x03 == data[2])
107 && (0x04 == data[3])))
108 return 0;
109
110 /* The filenames for each file in a zipfile are stored in two locations.
111 * There is one at the start of each entry, just before the compressed data,
112 * and another at the end in a 'central directory structure'.
113 *
114 * In order to catch self-extracting executables, we scan backwards from the end
115 * of the file looking for the central directory structure. The previous version
116 * of this went forewards through the local headers, but that only works for plain
117 * vanilla zip's and I don't feel like writing a special case for each of the dozen
118 * self-extracting executable stubs.
119 *
120 * This assumes that the zip file is considered to be non-corrupt/non-truncated.
121 * If it is truncated then it's not considered to be a zip and skipped.
122 *
123 */
124
125 /* From appnote.iz and appnote.txt (more or less)
126 *
127 * (this is why you always need to put in the last floppy if you span disks)
128 *
129 * 0- 3 end of central dir signature 4 bytes (0x06054b50) P K ^E ^F
130 * 4- 5 number of this disk 2 bytes
131 * 6- 7 number of the disk with the
132 * start of the central directory 2 bytes
133 * 8- 9 total number of entries in
134 * the central dir on this disk 2 bytes
135 * 10-11 total number of entries in
136 * the central dir 2 bytes
137 * 12-15 size of the central directory 4 bytes
138 * 16-19 offset of start of central
139 * directory with respect to
140 * the starting disk number 4 bytes
141 * 20-21 zipfile comment length 2 bytes
142 * 22-?? zipfile comment (variable size) max length 65536 bytes
143 */
144
145 /* the signature can't be more than 22 bytes from the end */
146 offset = size - 22;
147 pos = &data[offset];
148 stop = 0;
149 if (((signed int) size - 65556) > 0)
150 stop = size - 65556;
151
152 /* not using int 0x06054b50 so that we don't have to deal with endianess issues.
153 break out if we go more than 64K backwards and havn't found it, or if we hit the
154 begining of the file. */
155 while ((!(('P' == pos[0]) && ('K' == pos[1]) && (0x05 == pos[2])
156 && (0x06 == pos[3]))) && (offset > stop))
157 pos = &data[offset--];
158 if (offset == stop)
159 {
160#if DEBUG_EXTRACT_ZIP
161 fprintf (stderr,
162 "Did not find end of central directory structure signature. offset: %i\n",
163 offset);
164
165#endif
166 return 0;
167 }
168 30
169 /* offset should now point to the start of the end-of-central directory structure */ 31/**
170 /* and pos[0] should be pointing there too */ 32 * Main entry method for the 'application/zip' extraction plugin.
171 /* so slurp down filecomment while here... */ 33 *
172 filecomment_length = pos[20] + (pos[21] << 8); 34 * @param ec extraction context provided to the plugin
173 if (filecomment_length + offset + 22 > size) 35 */
174 { 36void
175 return 0; /* invalid zip file format! */ 37EXTRACTOR_zip_extract_method (struct EXTRACTOR_ExtractContext *ec)
176 } 38{
177 filecomment = NULL; 39 struct EXTRACTOR_UnzipFile *uf;
178 if (filecomment_length > 0) 40 struct EXTRACTOR_UnzipFileInfo fi;
41 char fname[256];
42 char fcomment[256];
43
44 if (NULL == (uf = EXTRACTOR_common_unzip_open (ec)))
45 return;
46 if ( (EXTRACTOR_UNZIP_OK ==
47 EXTRACTOR_common_unzip_go_find_local_file (uf,
48 "meta.xml",
49 2)) ||
50 (EXTRACTOR_UNZIP_OK ==
51 EXTRACTOR_common_unzip_go_find_local_file (uf,
52 "META-INF/MANIFEST.MF",
53 2)) )
179 { 54 {
180 filecomment = malloc (filecomment_length + 1); 55 /* not a normal zip, might be odf, jar, etc. */
181 if (filecomment != NULL) 56 goto CLEANUP;
182 {
183 memcpy (filecomment, &pos[22], filecomment_length);
184 filecomment[filecomment_length] = '\0';
185 }
186 } 57 }
187 58 if (EXTRACTOR_UNZIP_OK !=
188#if DEBUG_EXTRACT_ZIP 59 EXTRACTOR_common_unzip_go_to_first_file (uf))
189 if ((0 != pos[4]) && (0 != pos[5])) 60 {
190 fprintf (stderr, 61 /* zip malformed? */
191 "WARNING: This seems to be the last disk in a multi-volume" 62 goto CLEANUP;
192 " ZIP archive, and so this might not work.\n"); 63 }
193#endif 64 if (0 !=
194 65 ec->proc (ec->cls,
195#if DEBUG_EXTRACT_ZIP 66 "zip",
196 if ((pos[8] != pos[10]) && (pos[9] != pos[11])) 67 EXTRACTOR_METATYPE_MIMETYPE,
197 fprintf (stderr, 68 EXTRACTOR_METAFORMAT_UTF8,
198 "WARNING: May not be able to find all the files in this" 69 "text/plain",
199 " ZIP archive (no multi-volume support right now).\n"); 70 "application/zip",
200 entry_total = pos[10] + (pos[11] << 8); 71 strlen ("application/zip") + 1))
201#endif 72 goto CLEANUP;
202 entry_count = 0; 73 if (EXTRACTOR_UNZIP_OK ==
203 74 EXTRACTOR_common_unzip_get_global_comment (uf,
204 /* jump to start of central directory, ASSUMING that the starting disk that it's on is disk 0 */ 75 fcomment,
205 /* starting disk would otherwise be pos[6]+pos[7]<<8 */ 76 sizeof (fcomment)))
206 offset = pos[16] + (pos[17] << 8) + (pos[18] << 16) + (pos[19] << 24); /* offset of cent-dir from start of disk 0 */
207
208 /* stop = pos[12] + (pos[13]<<8) + (pos[14]<<16) + (pos[15]<<24); *//* length of central dir */
209 if (offset + 46 > size)
210 { 77 {
211 78 if ( (0 != strlen (fcomment)) &&
212 /* not a zip */ 79 (0 !=
213 if (filecomment != NULL) 80 ec->proc (ec->cls,
214 free (filecomment); 81 "zip",
215 return 0; 82 EXTRACTOR_METATYPE_COMMENT,
83 EXTRACTOR_METAFORMAT_C_STRING,
84 "text/plain",
85 fcomment,
86 strlen (fcomment) + 1)))
87 goto CLEANUP;
216 } 88 }
217 pos = &data[offset]; /* jump */
218
219 /* we should now be at the begining of the central directory structure */
220
221 /* from appnote.txt and appnote.iz (mostly)
222 *
223 * 0- 3 central file header signature 4 bytes (0x02014b50)
224 * 4- 5 version made by 2 bytes
225 * 6- 7 version needed to extract 2 bytes
226 * 8- 9 general purpose bit flag 2 bytes
227 * 10-11 compression method 2 bytes
228 * 12-13 last mod file time 2 bytes
229 * 14-15 last mod file date 2 bytes
230 * 16-19 crc-32 4 bytes
231 * 20-23 compressed size 4 bytes
232 * 24-27 uncompressed size 4 bytes
233 * 28-29 filename length 2 bytes
234 * 30-31 extra field length 2 bytes
235 * 32-33 file comment length 2 bytes
236 * 34-35 disk number start 2 bytes
237 * 36-37 internal file attributes 2 bytes
238 * 38-41 external file attributes 4 bytes
239 * 42-45 relative offset of local header 4 bytes
240 *
241 * 46-?? filename (variable size)
242 * ?- ? extra field (variable size)
243 * ?- ? file comment (variable size)
244 */
245 if (!(('P' == pos[0]) && ('K' == pos[1]) && (0x01 == pos[2])
246 && (0x02 == pos[3])))
247 {
248#if DEBUG_EXTRACT_ZIP
249 fprintf (stderr,
250 "Did not find central directory structure signature. offset: %i\n",
251 offset);
252
253#endif
254 if (filecomment != NULL)
255 free (filecomment);
256 return 0;
257 }
258 start = NULL;
259 info = NULL;
260
261 do 89 do
262 { /* while ( (0x01==pos[2])&&(0x02==pos[3]) ) */
263 entry_count++; /* check to make sure we found everything at the end */
264 name_length = pos[28] + (pos[29] << 8);
265 extra_length = pos[30] + (pos[31] << 8);
266 comment_length = pos[32] + (pos[33] << 8);
267 if (name_length + extra_length + comment_length + offset + 46 > size)
268 {
269
270 /* ok, invalid, abort! */
271 break;
272 }
273
274#if DEBUG_EXTRACT_ZIP
275 fprintf (stderr, "Found filename length %i Comment length: %i\n",
276 name_length, comment_length);
277
278#endif
279 /* yay, finally get filenames */
280 if (start == NULL)
281 {
282 start = malloc (sizeof (zip_entry));
283 if (start == NULL)
284 break;
285 start->next = NULL;
286 info = start;
287 }
288 else
289 {
290 info->next = malloc (sizeof (zip_entry));
291 if (info->next == NULL)
292 break;
293 info = info->next;
294 info->next = NULL;
295 }
296 info->filename = malloc (name_length + 1);
297 info->comment = malloc (comment_length + 1);
298
299 /* (strings in zip files are not null terminated) */
300 if (info->filename != NULL)
301 {
302 memcpy (info->filename, &pos[46], name_length);
303 info->filename[name_length] = '\0';
304 }
305 if (info->comment != NULL)
306 {
307 memcpy (info->comment, &pos[46 + name_length + extra_length],
308 comment_length);
309 info->comment[comment_length] = '\0';
310 }
311 offset += 46 + name_length + extra_length + comment_length;
312 pos = &data[offset];
313 /* check for next header entry (0x02014b50) or (0x06054b50) if at end */
314 if (('P' != pos[0]) && ('K' != pos[1]))
315 {
316#if DEBUG_EXTRACT_ZIP
317 fprintf (stderr,
318 "Did not find next header in central directory.\n");
319
320#endif
321 info = start;
322 while (info != NULL)
323 {
324 start = info->next;
325 if (info->filename != NULL)
326 free (info->filename);
327 if (info->comment != NULL)
328 free (info->comment);
329 free (info);
330 info = start;
331 }
332 if (filecomment != NULL)
333 free (filecomment);
334 return 0;
335 }
336 }
337 while ((0x01 == pos[2]) && (0x02 == pos[3]));
338
339 /* end list */
340
341 /* TODO: should this return an error? indicates corrupt zipfile (or
342 disk missing in middle of multi-disk)? */
343#if DEBUG_EXTRACT_ZIP
344 if (entry_count != entry_total)
345 fprintf (stderr,
346 "WARNING: Did not find all of the zipfile entries that we should have.\n");
347#endif
348
349 ret = proc (proc_cls,
350 "zip",
351 EXTRACTOR_METATYPE_MIMETYPE,
352 EXTRACTOR_METAFORMAT_UTF8,
353 "text/plain",
354 "application/zip",
355 strlen ("application/zip")+1);
356 if ( (filecomment != NULL) && (ret != 0) )
357 {
358 ret = proc (proc_cls,
359 "zip",
360 EXTRACTOR_METATYPE_MIMETYPE,
361 EXTRACTOR_METAFORMAT_UTF8,
362 "text/plain",
363 filecomment,
364 strlen (filecomment)+1);
365 }
366 if (filecomment != NULL)
367 free (filecomment);
368
369
370 /* if we've gotten to here then there is at least one zip entry (see get_zipinfo call above) */
371 /* note: this free()'s the info list as it goes */
372 info = start;
373 while (NULL != info)
374 { 90 {
375 if (info->filename != NULL) 91 if (EXTRACTOR_UNZIP_OK ==
376 { 92 EXTRACTOR_common_unzip_get_current_file_info (uf,
377 if ( (ret == 0) && (strlen (info->filename)) ) 93 &fi,
378 { 94 fname,
379 ret = proc (proc_cls, 95 sizeof (fname),
96 NULL, 0,
97 fcomment,
98 sizeof (fcomment)))
99 {
100 if ( (0 != strlen (fname)) &&
101 (0 !=
102 ec->proc (ec->cls,
380 "zip", 103 "zip",
381 EXTRACTOR_METATYPE_FILENAME, 104 EXTRACTOR_METATYPE_FILENAME,
382 EXTRACTOR_METAFORMAT_UTF8, 105 EXTRACTOR_METAFORMAT_C_STRING,
383 "text/plain", 106 "text/plain",
384 info->filename, 107 fname,
385 strlen (info->filename)+1); 108 strlen (fname) + 1)))
386 } 109 goto CLEANUP;
387 free (info->filename); 110 if ( (0 != strlen (fcomment)) &&
388 } 111 (0 !=
389 if (info->comment != NULL) 112 ec->proc (ec->cls,
390 {
391 if ( (ret == 0) && (strlen (info->comment) > 0) )
392 {
393 ret = proc (proc_cls,
394 "zip", 113 "zip",
395 EXTRACTOR_METATYPE_FILENAME, 114 EXTRACTOR_METATYPE_COMMENT,
396 EXTRACTOR_METAFORMAT_UTF8, 115 EXTRACTOR_METAFORMAT_C_STRING,
397 "text/plain", 116 "text/plain",
398 info->comment, 117 fcomment,
399 strlen (info->comment)+1); 118 strlen (fcomment) + 1)))
400 } 119 goto CLEANUP;
401 free (info->comment); 120 }
402 }
403 tmp = info;
404 info = info->next;
405 free (tmp);
406 } 121 }
407 return ret; 122 while (EXTRACTOR_UNZIP_OK ==
123 EXTRACTOR_common_unzip_go_to_next_file (uf));
124
125CLEANUP:
126 (void) EXTRACTOR_common_unzip_close (uf);
408} 127}
409 128
410 129/* end of zip_extractor.c */