deb_extractor.c (12152B)
1 /* 2 This file is part of libextractor. 3 Copyright (C) 2002, 2003, 2004, 2012 Vidyut Samanta and Christian Grothoff 4 5 libextractor is free software; you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published 7 by the Free Software Foundation; either version 3, or (at your 8 option) any later version. 9 10 libextractor is distributed in the hope that it will be useful, but 11 WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 General Public License for more details. 14 15 You should have received a copy of the GNU General Public License 16 along with libextractor; see the file COPYING. If not, write to the 17 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 18 Boston, MA 02110-1301, USA. 19 */ 20 /** 21 * @file plugins/deb_extractor.c 22 * @brief plugin to support Debian archives 23 * @author Christian Grothoff 24 * 25 * The .deb is an ar-chive file. It contains a tar.gz file 26 * named "control.tar.gz" which then contains a file 'control' 27 * that has the meta-data. And which variant of the various 28 * ar file formats is used is also not quite certain. Yuck. 29 * 30 * References: 31 * http://www.mkssoftware.com/docs/man4/tar.4.asp 32 * http://lists.debian.org/debian-policy/2003/12/msg00000.html 33 * http://www.opengroup.org/onlinepubs/009695399/utilities/ar.html 34 */ 35 #include "platform.h" 36 #include "extractor.h" 37 #include <zlib.h> 38 39 40 /** 41 * Maximum file size we allow for control.tar.gz files. 42 * This is a sanity check to avoid allocating huge amounts 43 * of memory. 44 */ 45 #define MAX_CONTROL_SIZE (1024 * 1024) 46 47 48 /** 49 * Re-implementation of 'strndup'. 50 * 51 * @param str string to duplicate 52 * @param n maximum number of bytes to copy 53 * @return NULL on error, otherwise 0-terminated copy of 'str' 54 * with at most n characters 55 */ 56 static char * 57 stndup (const char *str, size_t n) 58 { 59 char *tmp; 60 61 if (NULL == (tmp = malloc (n + 1))) 62 return NULL; 63 tmp[n] = '\0'; 64 memcpy (tmp, str, n); 65 return tmp; 66 } 67 68 69 /** 70 * Entry in the mapping from control data to LE types. 71 */ 72 struct Matches 73 { 74 /** 75 * Key in the Debian control file. 76 */ 77 const char *text; 78 79 /** 80 * Corresponding type in LE. 81 */ 82 enum EXTRACTOR_MetaType type; 83 }; 84 85 86 /** 87 * Map from deb-control entries to LE types. 88 * 89 * see also: "man 5 deb-control" 90 */ 91 static struct Matches tmap[] = { 92 {"Package: ", EXTRACTOR_METATYPE_PACKAGE_NAME}, 93 {"Version: ", EXTRACTOR_METATYPE_PACKAGE_VERSION}, 94 {"Section: ", EXTRACTOR_METATYPE_SECTION}, 95 {"Priority: ", EXTRACTOR_METATYPE_UPLOAD_PRIORITY}, 96 {"Architecture: ", EXTRACTOR_METATYPE_TARGET_ARCHITECTURE}, 97 {"Depends: ", EXTRACTOR_METATYPE_PACKAGE_DEPENDENCY}, 98 {"Recommends: ", EXTRACTOR_METATYPE_PACKAGE_RECOMMENDS}, 99 {"Suggests: ", EXTRACTOR_METATYPE_PACKAGE_SUGGESTS}, 100 {"Installed-Size: ",EXTRACTOR_METATYPE_PACKAGE_INSTALLED_SIZE}, 101 {"Maintainer: ", EXTRACTOR_METATYPE_PACKAGE_MAINTAINER}, 102 {"Description: ", EXTRACTOR_METATYPE_DESCRIPTION}, 103 {"Source: ", EXTRACTOR_METATYPE_PACKAGE_SOURCE}, 104 {"Pre-Depends: ", EXTRACTOR_METATYPE_PACKAGE_PRE_DEPENDENCY}, 105 {"Conflicts: ", EXTRACTOR_METATYPE_PACKAGE_CONFLICTS}, 106 {"Replaces: ", EXTRACTOR_METATYPE_PACKAGE_REPLACES}, 107 {"Provides: ", EXTRACTOR_METATYPE_PACKAGE_PROVIDES}, 108 {"Essential: ", EXTRACTOR_METATYPE_PACKAGE_ESSENTIAL}, 109 {NULL, 0} 110 }; 111 112 113 /** 114 * Process the "control" file from the control.tar.gz 115 * 116 * @param data decompressed control data 117 * @param size number of bytes in data 118 * @param proc function to call with meta data 119 * @param proc_cls closure for 'proc' 120 * @return 0 to continue extracting, 1 if we are done 121 */ 122 static int 123 processControl (const char *data, 124 const size_t size, 125 EXTRACTOR_MetaDataProcessor proc, 126 void *proc_cls) 127 { 128 size_t pos; 129 char *key; 130 char *val; 131 size_t colon; 132 size_t eol; 133 unsigned int i; 134 135 pos = 0; 136 while (pos < size) 137 { 138 for (colon = pos; ':' != data[colon]; colon++) 139 if ((colon > size) || ('\n' == data[colon])) 140 return 0; 141 colon++; 142 while ((colon < size) && (isspace ((unsigned char) data[colon]))) 143 colon++; 144 eol = colon; 145 while ((eol < size) && 146 (('\n' != data[eol]) || 147 ((eol + 1 < size) && (' ' == data[eol + 1])))) 148 eol++; 149 if ((eol == colon) || (eol > size)) 150 return 0; 151 if (NULL == (key = stndup (&data[pos], colon - pos))) 152 return 0; 153 for (i = 0; NULL != tmap[i].text; i++) 154 { 155 if (0 != strcmp (key, tmap[i].text)) 156 continue; 157 if (NULL == (val = stndup (&data[colon], eol - colon))) 158 { 159 free (key); 160 return 0; 161 } 162 if (0 != proc (proc_cls, 163 "deb", 164 tmap[i].type, 165 EXTRACTOR_METAFORMAT_UTF8, 166 "text/plain", 167 val, 168 strlen (val) + 1)) 169 { 170 free (val); 171 free (key); 172 return 1; 173 } 174 free (val); 175 break; 176 } 177 free (key); 178 pos = eol + 1; 179 } 180 return 0; 181 } 182 183 184 /** 185 * Header of an entry in a TAR file. 186 */ 187 struct TarHeader 188 { 189 /** 190 * Filename. 191 */ 192 char name[100]; 193 194 /** 195 * File access modes. 196 */ 197 char mode[8]; 198 199 /** 200 * Owner of the file. 201 */ 202 char userId[8]; 203 204 /** 205 * Group of the file. 206 */ 207 char groupId[8]; 208 209 /** 210 * Size of the file, in octal. 211 */ 212 char filesize[12]; 213 214 /** 215 * Last modification time. 216 */ 217 char lastModTime[12]; 218 219 /** 220 * Checksum of the file. 221 */ 222 char chksum[8]; 223 224 /** 225 * Is the file a link? 226 */ 227 char link; 228 229 /** 230 * Destination of the link. 231 */ 232 char linkName[100]; 233 }; 234 235 236 /** 237 * Extended TAR header for USTar format. 238 */ 239 struct USTarHeader 240 { 241 /** 242 * Original TAR header. 243 */ 244 struct TarHeader tar; 245 246 /** 247 * Additinal magic for USTar. 248 */ 249 char magic[6]; 250 251 /** 252 * Format version. 253 */ 254 char version[2]; 255 256 /** 257 * User name. 258 */ 259 char uname[32]; 260 261 /** 262 * Group name. 263 */ 264 char gname[32]; 265 266 /** 267 * Device major number. 268 */ 269 char devmajor[8]; 270 271 /** 272 * Device minor number. 273 */ 274 char devminor[8]; 275 276 /** 277 * Unknown (padding?). 278 */ 279 char prefix[155]; 280 }; 281 282 283 /** 284 * Process the control.tar file. 285 * 286 * @param data the deflated control.tar file data 287 * @param size number of bytes in data 288 * @param proc function to call with meta data 289 * @param proc_cls closure for 'proc' 290 * @return 0 to continue extracting, 1 if we are done 291 */ 292 static int 293 processControlTar (const char *data, 294 size_t size, 295 EXTRACTOR_MetaDataProcessor proc, 296 void *proc_cls) 297 { 298 struct TarHeader *tar; 299 struct USTarHeader *ustar; 300 size_t pos; 301 302 pos = 0; 303 while (pos + sizeof (struct TarHeader) < size) 304 { 305 unsigned long long fsize; 306 char buf[13]; 307 308 tar = (struct TarHeader *) &data[pos]; 309 if (pos + sizeof (struct USTarHeader) < size) 310 { 311 ustar = (struct USTarHeader *) &data[pos]; 312 if (0 == strncmp ("ustar", &ustar->magic[0], strlen ("ustar"))) 313 pos += 512; /* sizeof (struct USTarHeader); */ 314 else 315 pos += 257; /* sizeof (struct TarHeader); minus gcc alignment... */ 316 } 317 else 318 { 319 pos += 257; /* sizeof (struct TarHeader); minus gcc alignment... */ 320 } 321 322 memcpy (buf, &tar->filesize[0], 12); 323 buf[12] = '\0'; 324 if (1 != sscanf (buf, "%12llo", &fsize)) /* octal! Yuck yuck! */ 325 return 0; 326 if ((pos + fsize > size) || (fsize > size) || (pos + fsize < pos)) 327 return 0; 328 329 if (0 == strncmp (&tar->name[0], "./control", strlen ("./control"))) 330 { 331 /* found the 'control' file we were looking for */ 332 return processControl (&data[pos], fsize, proc, proc_cls); 333 } 334 if (0 != (fsize & 511)) 335 fsize = (fsize | 511) + 1; /* round up! */ 336 if (pos + fsize < pos) 337 return 0; 338 pos += fsize; 339 } 340 return 0; 341 } 342 343 344 /** 345 * Process the control.tar.gz file. 346 * 347 * @param ec extractor context with control.tar.gz at current read position 348 * @param size number of bytes in the control file 349 * @return 0 to continue extracting, 1 if we are done 350 */ 351 static int 352 processControlTGZ (struct EXTRACTOR_ExtractContext *ec, 353 unsigned long long size) 354 { 355 uint32_t bufSize; 356 char *buf; 357 void *data; 358 unsigned char *cdata; 359 z_stream strm; 360 int ret; 361 ssize_t sret; 362 unsigned long long off; 363 364 if (size > MAX_CONTROL_SIZE) 365 return 0; 366 if (0 == size) 367 return 0; 368 if (size < 4) 369 return 0; 370 if (NULL == (cdata = malloc (size))) 371 return 0; 372 off = 0; 373 while (off < size) 374 { 375 if (0 >= (sret = ec->read (ec->cls, &data, size - off))) 376 { 377 free (cdata); 378 return 0; 379 } 380 memcpy (&cdata[off], 381 data, 382 sret); 383 off += sret; 384 } 385 bufSize = cdata[size - 4] + (cdata[size - 3] << 8) + (cdata[size - 2] << 16) 386 + (cdata[size - 1] << 24); 387 if (bufSize > MAX_CONTROL_SIZE) 388 { 389 free (cdata); 390 return 0; 391 } 392 if (NULL == (buf = malloc (bufSize))) 393 { 394 free (cdata); 395 return 0; 396 } 397 ret = 0; 398 memset (&strm, 0, sizeof (z_stream)); 399 strm.next_in = (Bytef *) data; 400 strm.avail_in = size; 401 if (Z_OK == inflateInit2 (&strm, 15 + 32)) 402 { 403 strm.next_out = (Bytef *) buf; 404 strm.avail_out = bufSize; 405 inflate (&strm, Z_FINISH); 406 if (strm.total_out > 0) 407 ret = processControlTar (buf, strm.total_out, 408 ec->proc, ec->cls); 409 inflateEnd (&strm); 410 } 411 free (buf); 412 free (cdata); 413 return ret; 414 } 415 416 417 /** 418 * Header of an object in an "AR"chive file. 419 */ 420 struct ObjectHeader 421 { 422 /** 423 * Name of the file. 424 */ 425 char name[16]; 426 427 /** 428 * Last modification time for the file. 429 */ 430 char lastModTime[12]; 431 432 /** 433 * User ID of the owner. 434 */ 435 char userId[6]; 436 437 /** 438 * Group ID of the owner. 439 */ 440 char groupId[6]; 441 442 /** 443 * File access modes. 444 */ 445 char modeInOctal[8]; 446 447 /** 448 * Size of the file (as decimal string) 449 */ 450 char filesize[10]; 451 452 /** 453 * Tailer of the object header ("`\n") 454 */ 455 char trailer[2]; 456 }; 457 458 459 /** 460 * Main entry method for the DEB extraction plugin. 461 * 462 * @param ec extraction context provided to the plugin 463 */ 464 void 465 EXTRACTOR_deb_extract_method (struct EXTRACTOR_ExtractContext *ec) 466 { 467 uint64_t pos; 468 int done = 0; 469 const struct ObjectHeader *hdr; 470 uint64_t fsize; 471 unsigned long long csize; 472 char buf[11]; 473 void *data; 474 475 fsize = ec->get_size (ec->cls); 476 if (fsize < 128) 477 return; 478 if (8 != 479 ec->read (ec->cls, &data, 8)) 480 return; 481 if (0 != strncmp ("!<arch>\n", data, 8)) 482 return; 483 pos = 8; 484 while (pos + sizeof (struct ObjectHeader) < fsize) 485 { 486 if (pos != 487 ec->seek (ec->cls, pos, SEEK_SET)) 488 return; 489 if (sizeof (struct ObjectHeader) != 490 ec->read (ec->cls, &data, sizeof (struct ObjectHeader))) 491 return; 492 hdr = data; 493 if (0 != strncmp (&hdr->trailer[0], "`\n", 2)) 494 return; 495 memcpy (buf, &hdr->filesize[0], 10); 496 buf[10] = '\0'; 497 if (1 != sscanf (buf, "%10llu", &csize)) 498 return; 499 pos += sizeof (struct ObjectHeader); 500 if ((pos + csize > fsize) || (csize > fsize) || (pos + csize < pos)) 501 return; 502 if (0 == strncmp (&hdr->name[0], 503 "control.tar.gz", 504 strlen ("control.tar.gz"))) 505 { 506 if (0 != processControlTGZ (ec, 507 csize)) 508 return; 509 done++; 510 } 511 if (0 == strncmp (&hdr->name[0], 512 "debian-binary", strlen ("debian-binary"))) 513 { 514 if (0 != ec->proc (ec->cls, 515 "deb", 516 EXTRACTOR_METATYPE_MIMETYPE, 517 EXTRACTOR_METAFORMAT_UTF8, 518 "text/plain", 519 "application/x-debian-package", 520 strlen ("application/x-debian-package") + 1)) 521 return; 522 done++; 523 } 524 pos += csize; 525 if (2 == done) 526 break; /* no need to process the rest of the archive */ 527 } 528 } 529 530 531 /* end of deb_extractor.c */