aboutsummaryrefslogtreecommitdiff
path: root/src/plugins/deb_extractor.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/plugins/deb_extractor.c')
-rw-r--r--src/plugins/deb_extractor.c466
1 files changed, 311 insertions, 155 deletions
diff --git a/src/plugins/deb_extractor.c b/src/plugins/deb_extractor.c
index 2bb90c5..955657e 100644
--- a/src/plugins/deb_extractor.c
+++ b/src/plugins/deb_extractor.c
@@ -1,10 +1,10 @@
1/* 1/*
2 This file is part of libextractor. 2 This file is part of libextractor.
3 (C) 2002, 2003, 2004 Vidyut Samanta and Christian Grothoff 3 (C) 2002, 2003, 2004, 2012 Vidyut Samanta and Christian Grothoff
4 4
5 libextractor is free software; you can redistribute it and/or modify 5 libextractor is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published 6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 2, or (at your 7 by the Free Software Foundation; either version 3, or (at your
8 option) any later version. 8 option) any later version.
9 9
10 libextractor is distributed in the hope that it will be useful, but 10 libextractor is distributed in the hope that it will be useful, but
@@ -17,12 +17,11 @@
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330, 17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. 18 Boston, MA 02111-1307, USA.
19 */ 19 */
20 20/**
21#include "platform.h" 21 * @file plugins/deb_extractor.c
22#include "extractor.h" 22 * @brief plugin to support Debian archives
23#include <zlib.h> 23 * @author Christian Grothoff
24 24 *
25/*
26 * The .deb is an ar-chive file. It contains a tar.gz file 25 * The .deb is an ar-chive file. It contains a tar.gz file
27 * named "control.tar.gz" which then contains a file 'control' 26 * named "control.tar.gz" which then contains a file 'control'
28 * that has the meta-data. And which variant of the various 27 * that has the meta-data. And which variant of the various
@@ -33,14 +32,33 @@
33 * http://lists.debian.org/debian-policy/2003/12/msg00000.html 32 * http://lists.debian.org/debian-policy/2003/12/msg00000.html
34 * http://www.opengroup.org/onlinepubs/009695399/utilities/ar.html 33 * http://www.opengroup.org/onlinepubs/009695399/utilities/ar.html
35 */ 34 */
35#include "platform.h"
36#include "extractor.h"
37#include <zlib.h>
38
39
40/**
41 * Maximum file size we allow for control.tar.gz files.
42 * This is a sanity check to avoid allocating huge amounts
43 * of memory.
44 */
45#define MAX_CONTROL_SIZE (1024 * 1024)
36 46
37 47
48/**
49 * Re-implementation of 'strndup'.
50 *
51 * @param str string to duplicate
52 * @param n maximum number of bytes to copy
53 * @return NULL on error, otherwise 0-terminated copy of 'str'
54 * with at most n characters
55 */
38static char * 56static char *
39stndup (const char *str, size_t n) 57stndup (const char *str, size_t n)
40{ 58{
41 char *tmp; 59 char *tmp;
42 tmp = malloc (n + 1); 60
43 if (tmp == NULL) 61 if (NULL == (tmp = malloc (n + 1)))
44 return NULL; 62 return NULL;
45 tmp[n] = '\0'; 63 tmp[n] = '\0';
46 memcpy (tmp, str, n); 64 memcpy (tmp, str, n);
@@ -48,15 +66,29 @@ stndup (const char *str, size_t n)
48} 66}
49 67
50 68
51 69/**
52typedef struct 70 * Entry in the mapping from control data to LE types.
71 */
72struct Matches
53{ 73{
74 /**
75 * Key in the Debian control file.
76 */
54 const char *text; 77 const char *text;
78
79 /**
80 * Corresponding type in LE.
81 */
55 enum EXTRACTOR_MetaType type; 82 enum EXTRACTOR_MetaType type;
56} Matches; 83};
84
57 85
58/* see also: "man 5 deb-control" */ 86/**
59static Matches tmap[] = { 87 * Map from deb-control entries to LE types.
88 *
89 * see also: "man 5 deb-control"
90 */
91static struct Matches tmap[] = {
60 {"Package: ", EXTRACTOR_METATYPE_PACKAGE_NAME}, 92 {"Package: ", EXTRACTOR_METATYPE_PACKAGE_NAME},
61 {"Version: ", EXTRACTOR_METATYPE_PACKAGE_VERSION}, 93 {"Version: ", EXTRACTOR_METATYPE_PACKAGE_VERSION},
62 {"Section: ", EXTRACTOR_METATYPE_SECTION}, 94 {"Section: ", EXTRACTOR_METATYPE_SECTION},
@@ -79,7 +111,13 @@ static Matches tmap[] = {
79 111
80 112
81/** 113/**
82 * Process the control file. 114 * Process the "control" file from the control.tar.gz
115 *
116 * @param data decompressed control data
117 * @param size number of bytes in data
118 * @param proc function to call with meta data
119 * @param proc_cls closure for 'proc'
120 * @return 0 to continue extracting, 1 if we are done
83 */ 121 */
84static int 122static int
85processControl (const char *data, 123processControl (const char *data,
@@ -90,62 +128,52 @@ processControl (const char *data,
90 size_t pos; 128 size_t pos;
91 char *key; 129 char *key;
92 char *val; 130 char *val;
93 131 size_t colon;
132 size_t eol;
133 unsigned int i;
134
94 pos = 0; 135 pos = 0;
95 while (pos < size) 136 while (pos < size)
96 { 137 {
97 size_t colon; 138 for (colon = pos; ':' != data[colon]; colon++)
98 size_t eol; 139 if ((colon > size) || ('\n' == data[colon]))
99 int i; 140 return 0;
100
101 colon = pos;
102 while (data[colon] != ':')
103 {
104 if ((colon > size) || (data[colon] == '\n'))
105 return 0;
106 colon++;
107 }
108 colon++; 141 colon++;
109 while ((colon < size) && (isspace ((unsigned char) data[colon]))) 142 while ((colon < size) && (isspace ((unsigned char) data[colon])))
110 colon++; 143 colon++;
111 eol = colon; 144 eol = colon;
112 while ((eol < size) && 145 while ((eol < size) &&
113 ((data[eol] != '\n') || 146 (('\n' != data[eol]) ||
114 ((eol + 1 < size) && (data[eol + 1] == ' ')))) 147 ((eol + 1 < size) && (' ' == data[eol + 1]))))
115 eol++; 148 eol++;
116 if ((eol == colon) || (eol > size)) 149 if ((eol == colon) || (eol > size))
117 return 0; 150 return 0;
118 key = stndup (&data[pos], colon - pos); 151 if (NULL == (key = stndup (&data[pos], colon - pos)))
119 if (key == NULL)
120 return 0; 152 return 0;
121 i = 0; 153 for (i = 0; NULL != tmap[i].text; i++)
122 while (tmap[i].text != NULL)
123 { 154 {
124 if (0 == strcmp (key, tmap[i].text)) 155 if (0 != strcmp (key, tmap[i].text))
125 { 156 continue;
126 val = stndup (&data[colon], eol - colon); 157 if (NULL == (val = stndup (&data[colon], eol - colon)))
127 if (val == NULL) 158 {
128 { 159 free (key);
129 free (key); 160 return 0;
130 return 0; 161 }
131 } 162 if (0 != proc (proc_cls,
132 if (0 != proc (proc_cls, 163 "deb",
133 "deb", 164 tmap[i].type,
134 tmap[i].type, 165 EXTRACTOR_METAFORMAT_UTF8,
135 EXTRACTOR_METAFORMAT_UTF8, 166 "text/plain",
136 "text/plain", 167 val,
137 val, 168 strlen(val) + 1))
138 strlen(val) + 1)) 169 {
139 {
140 free (val);
141 free (key);
142 return 1;
143 }
144 free (val); 170 free (val);
145 break; 171 free (key);
146 } 172 return 1;
147 i++; 173 }
148 } 174 free (val);
175 break;
176 }
149 free (key); 177 free (key);
150 pos = eol + 1; 178 pos = eol + 1;
151 } 179 }
@@ -153,62 +181,142 @@ processControl (const char *data,
153} 181}
154 182
155 183
156typedef struct 184/**
185 * Header of an entry in a TAR file.
186 */
187struct TarHeader
157{ 188{
189 /**
190 * Filename.
191 */
158 char name[100]; 192 char name[100];
193
194 /**
195 * File access modes.
196 */
159 char mode[8]; 197 char mode[8];
198
199 /**
200 * Owner of the file.
201 */
160 char userId[8]; 202 char userId[8];
203
204 /**
205 * Group of the file.
206 */
161 char groupId[8]; 207 char groupId[8];
208
209 /**
210 * Size of the file, in octal.
211 */
162 char filesize[12]; 212 char filesize[12];
213
214 /**
215 * Last modification time.
216 */
163 char lastModTime[12]; 217 char lastModTime[12];
218
219 /**
220 * Checksum of the file.
221 */
164 char chksum[8]; 222 char chksum[8];
223
224 /**
225 * Is the file a link?
226 */
165 char link; 227 char link;
228
229 /**
230 * Destination of the link.
231 */
166 char linkName[100]; 232 char linkName[100];
167} TarHeader; 233};
234
168 235
169typedef struct 236/**
237 * Extended TAR header for USTar format.
238 */
239struct USTarHeader
170{ 240{
171 TarHeader tar; 241 /**
242 * Original TAR header.
243 */
244 struct TarHeader tar;
245
246 /**
247 * Additinal magic for USTar.
248 */
172 char magic[6]; 249 char magic[6];
250
251 /**
252 * Format version.
253 */
173 char version[2]; 254 char version[2];
255
256 /**
257 * User name.
258 */
174 char uname[32]; 259 char uname[32];
260
261 /**
262 * Group name.
263 */
175 char gname[32]; 264 char gname[32];
265
266 /**
267 * Device major number.
268 */
176 char devmajor[8]; 269 char devmajor[8];
270
271 /**
272 * Device minor number.
273 */
177 char devminor[8]; 274 char devminor[8];
275
276 /**
277 * Unknown (padding?).
278 */
178 char prefix[155]; 279 char prefix[155];
179} USTarHeader; 280};
281
180 282
181/** 283/**
182 * Process the control.tar file. 284 * Process the control.tar file.
285 *
286 * @param data the deflated control.tar file data
287 * @param size number of bytes in data
288 * @param proc function to call with meta data
289 * @param proc_cls closure for 'proc'
290 * @return 0 to continue extracting, 1 if we are done
183 */ 291 */
184static int 292static int
185processControlTar (const char *data, 293processControlTar (const char *data,
186 const size_t size, 294 size_t size,
187 EXTRACTOR_MetaDataProcessor proc, 295 EXTRACTOR_MetaDataProcessor proc,
188 void *proc_cls) 296 void *proc_cls)
189{ 297{
190 TarHeader *tar; 298 struct TarHeader *tar;
191 USTarHeader *ustar; 299 struct USTarHeader *ustar;
192 size_t pos; 300 size_t pos;
193 301
194 pos = 0; 302 pos = 0;
195 while (pos + sizeof (TarHeader) < size) 303 while (pos + sizeof (struct TarHeader) < size)
196 { 304 {
197 unsigned long long fsize; 305 unsigned long long fsize;
198 char buf[13]; 306 char buf[13];
199 307
200 tar = (TarHeader *) & data[pos]; 308 tar = (struct TarHeader *) & data[pos];
201 if (pos + sizeof (USTarHeader) < size) 309 if (pos + sizeof (struct USTarHeader) < size)
202 { 310 {
203 ustar = (USTarHeader *) & data[pos]; 311 ustar = (struct USTarHeader *) & data[pos];
204 if (0 == strncmp ("ustar", &ustar->magic[0], strlen ("ustar"))) 312 if (0 == strncmp ("ustar", &ustar->magic[0], strlen ("ustar")))
205 pos += 512; /* sizeof(USTarHeader); */ 313 pos += 512; /* sizeof (struct USTarHeader); */
206 else 314 else
207 pos += 257; /* sizeof(TarHeader); minus gcc alignment... */ 315 pos += 257; /* sizeof (struct TarHeader); minus gcc alignment... */
208 } 316 }
209 else 317 else
210 { 318 {
211 pos += 257; /* sizeof(TarHeader); minus gcc alignment... */ 319 pos += 257; /* sizeof (struct TarHeader); minus gcc alignment... */
212 } 320 }
213 321
214 memcpy (buf, &tar->filesize[0], 12); 322 memcpy (buf, &tar->filesize[0], 12);
@@ -220,9 +328,10 @@ processControlTar (const char *data,
220 328
221 if (0 == strncmp (&tar->name[0], "./control", strlen ("./control"))) 329 if (0 == strncmp (&tar->name[0], "./control", strlen ("./control")))
222 { 330 {
331 /* found the 'control' file we were looking for */
223 return processControl (&data[pos], fsize, proc, proc_cls); 332 return processControl (&data[pos], fsize, proc, proc_cls);
224 } 333 }
225 if ((fsize & 511) != 0) 334 if (0 != (fsize & 511))
226 fsize = (fsize | 511) + 1; /* round up! */ 335 fsize = (fsize | 511) + 1; /* round up! */
227 if (pos + fsize < pos) 336 if (pos + fsize < pos)
228 return 0; 337 return 0;
@@ -231,137 +340,184 @@ processControlTar (const char *data,
231 return 0; 340 return 0;
232} 341}
233 342
234#define MAX_CONTROL_SIZE (1024 * 1024)
235
236static voidpf
237Emalloc (voidpf opaque, uInt items, uInt size)
238{
239 if (SIZE_MAX / size <= items)
240 return NULL;
241 return malloc (size * items);
242}
243
244static void
245Efree (voidpf opaque, voidpf ptr)
246{
247 free (ptr);
248}
249 343
250/** 344/**
251 * Process the control.tar.gz file. 345 * Process the control.tar.gz file.
346 *
347 * @param ec extractor context with control.tar.gz at current read position
348 * @param size number of bytes in the control file
349 * @return 0 to continue extracting, 1 if we are done
252 */ 350 */
253static int 351static int
254processControlTGZ (const unsigned char *data, 352processControlTGZ (struct EXTRACTOR_ExtractContext *ec,
255 size_t size, 353 unsigned long long size)
256 EXTRACTOR_MetaDataProcessor proc,
257 void *proc_cls)
258{ 354{
259 uint32_t bufSize; 355 uint32_t bufSize;
260 char *buf; 356 char *buf;
357 void *data;
358 unsigned char *cdata;
261 z_stream strm; 359 z_stream strm;
262 int ret; 360 int ret;
361 ssize_t sret;
362 unsigned long long off;
263 363
264 bufSize = data[size - 4] + (data[size - 3] << 8) + (data[size - 2] << 16) + (data[size - 1] << 24); 364 if (size > MAX_CONTROL_SIZE)
265 if (bufSize > MAX_CONTROL_SIZE) 365 return 0;
366 if (NULL == (cdata = malloc (size)))
266 return 0; 367 return 0;
368 off = 0;
369 while (off < size)
370 {
371 if (0 >= (sret = ec->read (ec->cls, &data, size - off)))
372 {
373 free (cdata);
374 return 0;
375 }
376 memcpy (&cdata[off], data, sret);
377 off += sret;
378 }
379 bufSize = cdata[size - 4] + (cdata[size - 3] << 8) + (cdata[size - 2] << 16) + (cdata[size - 1] << 24);
380 if (bufSize > MAX_CONTROL_SIZE)
381 {
382 free (cdata);
383 return 0;
384 }
385 if (NULL == (buf = malloc (bufSize)))
386 {
387 free (cdata);
388 return 0;
389 }
390 ret = 0;
267 memset (&strm, 0, sizeof (z_stream)); 391 memset (&strm, 0, sizeof (z_stream));
268 strm.next_in = (Bytef *) data; 392 strm.next_in = (Bytef *) data;
269 strm.avail_in = size; 393 strm.avail_in = size;
270 strm.total_in = 0;
271 strm.zalloc = &Emalloc;
272 strm.zfree = &Efree;
273 strm.opaque = NULL;
274
275 if (Z_OK == inflateInit2 (&strm, 15 + 32)) 394 if (Z_OK == inflateInit2 (&strm, 15 + 32))
276 { 395 {
277 buf = malloc (bufSize);
278 if (buf == NULL)
279 {
280 inflateEnd (&strm);
281 return 0;
282 }
283 strm.next_out = (Bytef *) buf; 396 strm.next_out = (Bytef *) buf;
284 strm.avail_out = bufSize; 397 strm.avail_out = bufSize;
285 inflate (&strm, Z_FINISH); 398 inflate (&strm, Z_FINISH);
286 if (strm.total_out > 0) 399 if (strm.total_out > 0)
287 { 400 ret = processControlTar (buf, strm.total_out,
288 ret = processControlTar (buf, strm.total_out, proc, proc_cls); 401 ec->proc, ec->cls);
289 inflateEnd (&strm);
290 free (buf);
291 return ret;
292 }
293 free (buf);
294 inflateEnd (&strm); 402 inflateEnd (&strm);
295 } 403 }
296 return 0; 404 free (buf);
405 free (cdata);
406 return ret;
297} 407}
298 408
299typedef struct 409
410/**
411 * Header of an object in an "AR"chive file.
412 */
413struct ObjectHeader
300{ 414{
415 /**
416 * Name of the file.
417 */
301 char name[16]; 418 char name[16];
419
420 /**
421 * Last modification time for the file.
422 */
302 char lastModTime[12]; 423 char lastModTime[12];
424
425 /**
426 * User ID of the owner.
427 */
303 char userId[6]; 428 char userId[6];
429
430 /**
431 * Group ID of the owner.
432 */
304 char groupId[6]; 433 char groupId[6];
434
435 /**
436 * File access modes.
437 */
305 char modeInOctal[8]; 438 char modeInOctal[8];
439
440 /**
441 * Size of the file (as decimal string)
442 */
306 char filesize[10]; 443 char filesize[10];
444
445 /**
446 * Tailer of the object header ("`\n")
447 */
307 char trailer[2]; 448 char trailer[2];
308} ObjectHeader; 449};
309 450
310 451
311int 452/**
312EXTRACTOR_deb_extract (const char *data, 453 * Main entry method for the DEB extraction plugin.
313 size_t size, 454 *
314 EXTRACTOR_MetaDataProcessor proc, 455 * @param ec extraction context provided to the plugin
315 void *proc_cls, 456 */
316 const char *options) 457void
458EXTRACTOR_deb_extract_method (struct EXTRACTOR_ExtractContext *ec)
317{ 459{
318 size_t pos; 460 uint64_t pos;
319 int done = 0; 461 int done = 0;
320 ObjectHeader *hdr; 462 const struct ObjectHeader *hdr;
321 unsigned long long fsize; 463 uint64_t fsize;
464 unsigned long long csize;
322 char buf[11]; 465 char buf[11];
323 466 void *data;
324 if (size < 128) 467
325 return 0; 468 fsize = ec->get_size (ec->cls);
326 if (0 != strncmp ("!<arch>\n", data, strlen ("!<arch>\n"))) 469 if (fsize < 128)
327 return 0; 470 return;
328 pos = strlen ("!<arch>\n"); 471 if (8 !=
329 while (pos + sizeof (ObjectHeader) < size) 472 ec->read (ec->cls, &data, 8))
473 return;
474 if (0 != strncmp ("!<arch>\n", data, 8))
475 return;
476 pos = 8;
477 while (pos + sizeof (struct ObjectHeader) < fsize)
330 { 478 {
331 hdr = (ObjectHeader *) & data[pos]; 479 if (pos !=
480 ec->seek (ec->cls, pos, SEEK_SET))
481 return;
482 if (sizeof (struct ObjectHeader) !=
483 ec->read (ec->cls, &data, sizeof (struct ObjectHeader)))
484 return;
485 hdr = data;
332 if (0 != strncmp (&hdr->trailer[0], "`\n", 2)) 486 if (0 != strncmp (&hdr->trailer[0], "`\n", 2))
333 return 0; 487 return;
334 memcpy (buf, &hdr->filesize[0], 10); 488 memcpy (buf, &hdr->filesize[0], 10);
335 buf[10] = '\0'; 489 buf[10] = '\0';
336 if (1 != sscanf (buf, "%10llu", &fsize)) 490 if (1 != sscanf (buf, "%10llu", &csize))
337 return 0; 491 return;
338 pos += sizeof (ObjectHeader); 492 pos += sizeof (struct ObjectHeader);
339 if ((pos + fsize > size) || (fsize > size) || (pos + fsize < pos)) 493 if ((pos + csize > fsize) || (csize > fsize) || (pos + csize < pos))
340 return 0; 494 return;
341 if (0 == strncmp (&hdr->name[0], 495 if (0 == strncmp (&hdr->name[0],
342 "control.tar.gz", strlen ("control.tar.gz"))) 496 "control.tar.gz",
497 strlen ("control.tar.gz")))
343 { 498 {
344 if (0 != processControlTGZ ((const unsigned char *) &data[pos], 499 if (0 != processControlTGZ (ec,
345 fsize, proc, proc_cls)) 500 csize))
346 return 1; 501 return;
347 done++; 502 done++;
348 } 503 }
349 if (0 == strncmp (&hdr->name[0], 504 if (0 == strncmp (&hdr->name[0],
350 "debian-binary", strlen ("debian-binary"))) 505 "debian-binary", strlen ("debian-binary")))
351 { 506 {
352 if (0 != proc (proc_cls, 507 if (0 != ec->proc (ec->cls,
353 "deb", 508 "deb",
354 EXTRACTOR_METATYPE_MIMETYPE, 509 EXTRACTOR_METATYPE_MIMETYPE,
355 EXTRACTOR_METAFORMAT_UTF8, 510 EXTRACTOR_METAFORMAT_UTF8,
356 "text/plain", 511 "text/plain",
357 "application/x-debian-package", 512 "application/x-debian-package",
358 strlen ("application/x-debian-package")+1)) 513 strlen ("application/x-debian-package")+1))
359 return 1; 514 return;
360 done++; 515 done++;
361 } 516 }
362 pos += fsize; 517 pos += csize;
363 if (done == 2) 518 if (2 == done)
364 break; /* no need to process the rest of the archive */ 519 break; /* no need to process the rest of the archive */
365 } 520 }
366 return 0;
367} 521}
522
523/* end of deb_extractor.c */