libextractor

GNU libextractor
Log | Files | Refs | Submodules | README | LICENSE

deb_extractor.c (12152B)


      1 /*
      2      This file is part of libextractor.
      3      Copyright (C) 2002, 2003, 2004, 2012 Vidyut Samanta and Christian Grothoff
      4 
      5      libextractor is free software; you can redistribute it and/or modify
      6      it under the terms of the GNU General Public License as published
      7      by the Free Software Foundation; either version 3, or (at your
      8      option) any later version.
      9 
     10      libextractor is distributed in the hope that it will be useful, but
     11      WITHOUT ANY WARRANTY; without even the implied warranty of
     12      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     13      General Public License for more details.
     14 
     15      You should have received a copy of the GNU General Public License
     16      along with libextractor; see the file COPYING.  If not, write to the
     17      Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     18      Boston, MA 02110-1301, USA.
     19  */
     20 /**
     21  * @file plugins/deb_extractor.c
     22  * @brief plugin to support Debian archives
     23  * @author Christian Grothoff
     24  *
     25  * The .deb is an ar-chive file.  It contains a tar.gz file
     26  * named "control.tar.gz" which then contains a file 'control'
     27  * that has the meta-data.  And which variant of the various
     28  * ar file formats is used is also not quite certain. Yuck.
     29  *
     30  * References:
     31  * http://www.mkssoftware.com/docs/man4/tar.4.asp
     32  * http://lists.debian.org/debian-policy/2003/12/msg00000.html
     33  * http://www.opengroup.org/onlinepubs/009695399/utilities/ar.html
     34  */
     35 #include "platform.h"
     36 #include "extractor.h"
     37 #include <zlib.h>
     38 
     39 
     40 /**
     41  * Maximum file size we allow for control.tar.gz files.
     42  * This is a sanity check to avoid allocating huge amounts
     43  * of memory.
     44  */
     45 #define MAX_CONTROL_SIZE (1024 * 1024)
     46 
     47 
     48 /**
     49  * Re-implementation of 'strndup'.
     50  *
     51  * @param str string to duplicate
     52  * @param n maximum number of bytes to copy
     53  * @return NULL on error, otherwise 0-terminated copy of 'str'
     54  *         with at most n characters
     55  */
     56 static char *
     57 stndup (const char *str, size_t n)
     58 {
     59   char *tmp;
     60 
     61   if (NULL == (tmp = malloc (n + 1)))
     62     return NULL;
     63   tmp[n] = '\0';
     64   memcpy (tmp, str, n);
     65   return tmp;
     66 }
     67 
     68 
     69 /**
     70  * Entry in the mapping from control data to LE types.
     71  */
     72 struct Matches
     73 {
     74   /**
     75    * Key in the Debian control file.
     76    */
     77   const char *text;
     78 
     79   /**
     80    * Corresponding type in LE.
     81    */
     82   enum EXTRACTOR_MetaType type;
     83 };
     84 
     85 
     86 /**
     87  * Map from deb-control entries to LE types.
     88  *
     89  * see also: "man 5 deb-control"
     90  */
     91 static struct Matches tmap[] = {
     92   {"Package: ",       EXTRACTOR_METATYPE_PACKAGE_NAME},
     93   {"Version: ",       EXTRACTOR_METATYPE_PACKAGE_VERSION},
     94   {"Section: ",       EXTRACTOR_METATYPE_SECTION},
     95   {"Priority: ",      EXTRACTOR_METATYPE_UPLOAD_PRIORITY},
     96   {"Architecture: ",  EXTRACTOR_METATYPE_TARGET_ARCHITECTURE},
     97   {"Depends: ",       EXTRACTOR_METATYPE_PACKAGE_DEPENDENCY},
     98   {"Recommends: ",    EXTRACTOR_METATYPE_PACKAGE_RECOMMENDS},
     99   {"Suggests: ",      EXTRACTOR_METATYPE_PACKAGE_SUGGESTS},
    100   {"Installed-Size: ",EXTRACTOR_METATYPE_PACKAGE_INSTALLED_SIZE},
    101   {"Maintainer: ",    EXTRACTOR_METATYPE_PACKAGE_MAINTAINER},
    102   {"Description: ",   EXTRACTOR_METATYPE_DESCRIPTION},
    103   {"Source: ",        EXTRACTOR_METATYPE_PACKAGE_SOURCE},
    104   {"Pre-Depends: ",   EXTRACTOR_METATYPE_PACKAGE_PRE_DEPENDENCY},
    105   {"Conflicts: ",     EXTRACTOR_METATYPE_PACKAGE_CONFLICTS},
    106   {"Replaces: ",      EXTRACTOR_METATYPE_PACKAGE_REPLACES},
    107   {"Provides: ",      EXTRACTOR_METATYPE_PACKAGE_PROVIDES},
    108   {"Essential: ",     EXTRACTOR_METATYPE_PACKAGE_ESSENTIAL},
    109   {NULL, 0}
    110 };
    111 
    112 
    113 /**
    114  * Process the "control" file from the control.tar.gz
    115  *
    116  * @param data decompressed control data
    117  * @param size number of bytes in data
    118  * @param proc function to call with meta data
    119  * @param proc_cls closure for 'proc'
    120  * @return 0 to continue extracting, 1 if we are done
    121  */
    122 static int
    123 processControl (const char *data,
    124                 const size_t size,
    125                 EXTRACTOR_MetaDataProcessor proc,
    126                 void *proc_cls)
    127 {
    128   size_t pos;
    129   char *key;
    130   char *val;
    131   size_t colon;
    132   size_t eol;
    133   unsigned int i;
    134 
    135   pos = 0;
    136   while (pos < size)
    137   {
    138     for (colon = pos; ':' != data[colon]; colon++)
    139       if ((colon > size) || ('\n' == data[colon]))
    140         return 0;
    141     colon++;
    142     while ((colon < size) && (isspace ((unsigned char) data[colon])))
    143       colon++;
    144     eol = colon;
    145     while ((eol < size) &&
    146            (('\n' != data[eol]) ||
    147             ((eol + 1 < size) && (' '  == data[eol + 1]))))
    148       eol++;
    149     if ((eol == colon) || (eol > size))
    150       return 0;
    151     if (NULL == (key = stndup (&data[pos], colon - pos)))
    152       return 0;
    153     for (i = 0; NULL != tmap[i].text; i++)
    154     {
    155       if (0 != strcmp (key, tmap[i].text))
    156         continue;
    157       if (NULL == (val = stndup (&data[colon], eol - colon)))
    158       {
    159         free (key);
    160         return 0;
    161       }
    162       if (0 != proc (proc_cls,
    163                      "deb",
    164                      tmap[i].type,
    165                      EXTRACTOR_METAFORMAT_UTF8,
    166                      "text/plain",
    167                      val,
    168                      strlen (val) + 1))
    169       {
    170         free (val);
    171         free (key);
    172         return 1;
    173       }
    174       free (val);
    175       break;
    176     }
    177     free (key);
    178     pos = eol + 1;
    179   }
    180   return 0;
    181 }
    182 
    183 
    184 /**
    185  * Header of an entry in a TAR file.
    186  */
    187 struct TarHeader
    188 {
    189   /**
    190    * Filename.
    191    */
    192   char name[100];
    193 
    194   /**
    195    * File access modes.
    196    */
    197   char mode[8];
    198 
    199   /**
    200    * Owner of the file.
    201    */
    202   char userId[8];
    203 
    204   /**
    205    * Group of the file.
    206    */
    207   char groupId[8];
    208 
    209   /**
    210    * Size of the file, in octal.
    211    */
    212   char filesize[12];
    213 
    214   /**
    215    * Last modification time.
    216    */
    217   char lastModTime[12];
    218 
    219   /**
    220    * Checksum of the file.
    221    */
    222   char chksum[8];
    223 
    224   /**
    225    * Is the file a link?
    226    */
    227   char link;
    228 
    229   /**
    230    * Destination of the link.
    231    */
    232   char linkName[100];
    233 };
    234 
    235 
    236 /**
    237  * Extended TAR header for USTar format.
    238  */
    239 struct USTarHeader
    240 {
    241   /**
    242    * Original TAR header.
    243    */
    244   struct TarHeader tar;
    245 
    246   /**
    247    * Additinal magic for USTar.
    248    */
    249   char magic[6];
    250 
    251   /**
    252    * Format version.
    253    */
    254   char version[2];
    255 
    256   /**
    257    * User name.
    258    */
    259   char uname[32];
    260 
    261   /**
    262    * Group name.
    263    */
    264   char gname[32];
    265 
    266   /**
    267    * Device major number.
    268    */
    269   char devmajor[8];
    270 
    271   /**
    272    * Device minor number.
    273    */
    274   char devminor[8];
    275 
    276   /**
    277    * Unknown (padding?).
    278    */
    279   char prefix[155];
    280 };
    281 
    282 
    283 /**
    284  * Process the control.tar file.
    285  *
    286  * @param data the deflated control.tar file data
    287  * @param size number of bytes in data
    288  * @param proc function to call with meta data
    289  * @param proc_cls closure for 'proc'
    290  * @return 0 to continue extracting, 1 if we are done
    291  */
    292 static int
    293 processControlTar (const char *data,
    294                    size_t size,
    295                    EXTRACTOR_MetaDataProcessor proc,
    296                    void *proc_cls)
    297 {
    298   struct TarHeader *tar;
    299   struct USTarHeader *ustar;
    300   size_t pos;
    301 
    302   pos = 0;
    303   while (pos + sizeof (struct TarHeader) < size)
    304   {
    305     unsigned long long fsize;
    306     char buf[13];
    307 
    308     tar = (struct TarHeader *) &data[pos];
    309     if (pos + sizeof (struct USTarHeader) < size)
    310     {
    311       ustar = (struct USTarHeader *) &data[pos];
    312       if (0 == strncmp ("ustar", &ustar->magic[0], strlen ("ustar")))
    313         pos += 512;             /* sizeof (struct USTarHeader); */
    314       else
    315         pos += 257;             /* sizeof (struct TarHeader); minus gcc alignment... */
    316     }
    317     else
    318     {
    319       pos += 257;               /* sizeof (struct TarHeader); minus gcc alignment... */
    320     }
    321 
    322     memcpy (buf, &tar->filesize[0], 12);
    323     buf[12] = '\0';
    324     if (1 != sscanf (buf, "%12llo", &fsize))    /* octal! Yuck yuck! */
    325       return 0;
    326     if ((pos + fsize > size) || (fsize > size) || (pos + fsize < pos))
    327       return 0;
    328 
    329     if (0 == strncmp (&tar->name[0], "./control", strlen ("./control")))
    330     {
    331       /* found the 'control' file we were looking for */
    332       return processControl (&data[pos], fsize, proc, proc_cls);
    333     }
    334     if (0 != (fsize & 511))
    335       fsize = (fsize | 511) + 1;        /* round up! */
    336     if (pos + fsize < pos)
    337       return 0;
    338     pos += fsize;
    339   }
    340   return 0;
    341 }
    342 
    343 
    344 /**
    345  * Process the control.tar.gz file.
    346  *
    347  * @param ec extractor context with control.tar.gz at current read position
    348  * @param size number of bytes in the control file
    349  * @return 0 to continue extracting, 1 if we are done
    350  */
    351 static int
    352 processControlTGZ (struct EXTRACTOR_ExtractContext *ec,
    353                    unsigned long long size)
    354 {
    355   uint32_t bufSize;
    356   char *buf;
    357   void *data;
    358   unsigned char *cdata;
    359   z_stream strm;
    360   int ret;
    361   ssize_t sret;
    362   unsigned long long off;
    363 
    364   if (size > MAX_CONTROL_SIZE)
    365     return 0;
    366   if (0 == size)
    367     return 0;
    368   if (size < 4)
    369     return 0;
    370   if (NULL == (cdata = malloc (size)))
    371     return 0;
    372   off = 0;
    373   while (off < size)
    374   {
    375     if (0 >= (sret = ec->read (ec->cls, &data, size - off)))
    376     {
    377       free (cdata);
    378       return 0;
    379     }
    380     memcpy (&cdata[off],
    381             data,
    382             sret);
    383     off += sret;
    384   }
    385   bufSize = cdata[size - 4] + (cdata[size - 3] << 8) + (cdata[size - 2] << 16)
    386             + (cdata[size - 1] << 24);
    387   if (bufSize > MAX_CONTROL_SIZE)
    388   {
    389     free (cdata);
    390     return 0;
    391   }
    392   if (NULL == (buf = malloc (bufSize)))
    393   {
    394     free (cdata);
    395     return 0;
    396   }
    397   ret = 0;
    398   memset (&strm, 0, sizeof (z_stream));
    399   strm.next_in = (Bytef *) data;
    400   strm.avail_in = size;
    401   if (Z_OK == inflateInit2 (&strm, 15 + 32))
    402   {
    403     strm.next_out = (Bytef *) buf;
    404     strm.avail_out = bufSize;
    405     inflate (&strm, Z_FINISH);
    406     if (strm.total_out > 0)
    407       ret = processControlTar (buf, strm.total_out,
    408                                ec->proc, ec->cls);
    409     inflateEnd (&strm);
    410   }
    411   free (buf);
    412   free (cdata);
    413   return ret;
    414 }
    415 
    416 
    417 /**
    418  * Header of an object in an "AR"chive file.
    419  */
    420 struct ObjectHeader
    421 {
    422   /**
    423    * Name of the file.
    424    */
    425   char name[16];
    426 
    427   /**
    428    * Last modification time for the file.
    429    */
    430   char lastModTime[12];
    431 
    432   /**
    433    * User ID of the owner.
    434    */
    435   char userId[6];
    436 
    437   /**
    438    * Group ID of the owner.
    439    */
    440   char groupId[6];
    441 
    442   /**
    443    * File access modes.
    444    */
    445   char modeInOctal[8];
    446 
    447   /**
    448    * Size of the file (as decimal string)
    449    */
    450   char filesize[10];
    451 
    452   /**
    453    * Tailer of the object header ("`\n")
    454    */
    455   char trailer[2];
    456 };
    457 
    458 
    459 /**
    460  * Main entry method for the DEB extraction plugin.
    461  *
    462  * @param ec extraction context provided to the plugin
    463  */
    464 void
    465 EXTRACTOR_deb_extract_method (struct EXTRACTOR_ExtractContext *ec)
    466 {
    467   uint64_t pos;
    468   int done = 0;
    469   const struct ObjectHeader *hdr;
    470   uint64_t fsize;
    471   unsigned long long csize;
    472   char buf[11];
    473   void *data;
    474 
    475   fsize = ec->get_size (ec->cls);
    476   if (fsize < 128)
    477     return;
    478   if (8 !=
    479       ec->read (ec->cls, &data, 8))
    480     return;
    481   if (0 != strncmp ("!<arch>\n", data, 8))
    482     return;
    483   pos = 8;
    484   while (pos + sizeof (struct ObjectHeader) < fsize)
    485   {
    486     if (pos !=
    487         ec->seek (ec->cls, pos, SEEK_SET))
    488       return;
    489     if (sizeof (struct ObjectHeader) !=
    490         ec->read (ec->cls, &data, sizeof (struct ObjectHeader)))
    491       return;
    492     hdr = data;
    493     if (0 != strncmp (&hdr->trailer[0], "`\n", 2))
    494       return;
    495     memcpy (buf, &hdr->filesize[0], 10);
    496     buf[10] = '\0';
    497     if (1 != sscanf (buf, "%10llu", &csize))
    498       return;
    499     pos += sizeof (struct ObjectHeader);
    500     if ((pos + csize > fsize) || (csize > fsize) || (pos + csize < pos))
    501       return;
    502     if (0 == strncmp (&hdr->name[0],
    503                       "control.tar.gz",
    504                       strlen ("control.tar.gz")))
    505     {
    506       if (0 != processControlTGZ (ec,
    507                                   csize))
    508         return;
    509       done++;
    510     }
    511     if (0 == strncmp (&hdr->name[0],
    512                       "debian-binary", strlen ("debian-binary")))
    513     {
    514       if (0 != ec->proc (ec->cls,
    515                          "deb",
    516                          EXTRACTOR_METATYPE_MIMETYPE,
    517                          EXTRACTOR_METAFORMAT_UTF8,
    518                          "text/plain",
    519                          "application/x-debian-package",
    520                          strlen ("application/x-debian-package") + 1))
    521         return;
    522       done++;
    523     }
    524     pos += csize;
    525     if (2 == done)
    526       break;                    /* no need to process the rest of the archive */
    527   }
    528 }
    529 
    530 
    531 /* end of deb_extractor.c */