libextractor

GNU libextractor
Log | Files | Refs | Submodules | README | LICENSE

png_extractor.c (12990B)


      1 /*
      2      This file is part of libextractor.
      3      Copyright (C) 2002, 2003, 2004, 2005, 2009, 2012 Vidyut Samanta and Christian Grothoff
      4 
      5      libextractor is free software; you can redistribute it and/or modify
      6      it under the terms of the GNU General Public License as published
      7      by the Free Software Foundation; either version 3, or (at your
      8      option) any later version.
      9 
     10      libextractor is distributed in the hope that it will be useful, but
     11      WITHOUT ANY WARRANTY; without even the implied warranty of
     12      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     13      General Public License for more details.
     14 
     15      You should have received a copy of the GNU General Public License
     16      along with libextractor; see the file COPYING.  If not, write to the
     17      Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     18      Boston, MA 02110-1301, USA.
     19  */
     20 /**
     21  * @file plugins/png_extractor.c
     22  * @brief plugin to support PNG files
     23  * @author Christian Grothoff
     24  */
     25 #include "platform.h"
     26 #include <zlib.h>
     27 #include "extractor.h"
     28 #include "convert.h"
     29 
     30 /**
     31  * Header that every PNG file must start with.
     32  */
     33 #define PNG_HEADER "\211PNG\r\n\032\n"
     34 
     35 
     36 /**
     37  * Function to create 0-terminated string from the
     38  * first n characters of the given input.
     39  *
     40  * @param str input string
     41  * @param n length of the input
     42  * @return n-bytes from str followed by 0-termination, NULL on error
     43  */
     44 static char *
     45 stndup (const char *str,
     46         size_t n)
     47 {
     48   char *tmp;
     49 
     50   if (n + 1 < n)
     51     return NULL;
     52   if (NULL == (tmp = malloc (n + 1)))
     53     return NULL;
     54   tmp[n] = '\0';
     55   memcpy (tmp, str, n);
     56   return tmp;
     57 }
     58 
     59 
     60 /**
     61  * strnlen is GNU specific, let's redo it here to be
     62  * POSIX compliant.
     63  *
     64  * @param str input string
     65  * @param maxlen maximum length of str
     66  * @return first position of 0-terminator in str, or maxlen
     67  */
     68 static size_t
     69 stnlen (const char *str,
     70         size_t maxlen)
     71 {
     72   size_t ret;
     73 
     74   ret = 0;
     75   while ( (ret < maxlen) &&
     76           ('\0' != str[ret]) )
     77     ret++;
     78   return ret;
     79 }
     80 
     81 
     82 /**
     83  * Interpret the 4 bytes in 'buf' as a big-endian
     84  * encoded 32-bit integer, convert and return.
     85  *
     86  * @param pos (unaligned) pointer to 4 byte integer
     87  * @return converted integer in host byte order
     88  */
     89 static uint32_t
     90 get_int_at (const void *pos)
     91 {
     92   uint32_t i;
     93 
     94   memcpy (&i, pos, sizeof (i));
     95   return htonl (i);
     96 }
     97 
     98 
     99 /**
    100  * Map from PNG meta data descriptor strings
    101  * to LE types.
    102  */
    103 static struct
    104 {
    105   /**
    106    * PNG name.
    107    */
    108   const char *name;
    109 
    110   /**
    111    * Corresponding LE type.
    112    */
    113   enum EXTRACTOR_MetaType type;
    114 } tagmap[] = {
    115   { "Author", EXTRACTOR_METATYPE_AUTHOR_NAME },
    116   { "Description", EXTRACTOR_METATYPE_DESCRIPTION },
    117   { "Comment", EXTRACTOR_METATYPE_COMMENT },
    118   { "Copyright", EXTRACTOR_METATYPE_COPYRIGHT },
    119   { "Source", EXTRACTOR_METATYPE_SOURCE_DEVICE },
    120   { "Creation Time", EXTRACTOR_METATYPE_CREATION_DATE },
    121   { "Title", EXTRACTOR_METATYPE_TITLE },
    122   { "Software", EXTRACTOR_METATYPE_PRODUCED_BY_SOFTWARE },
    123   { "Disclaimer", EXTRACTOR_METATYPE_DISCLAIMER },
    124   { "Warning", EXTRACTOR_METATYPE_WARNING },
    125   { "Signature", EXTRACTOR_METATYPE_UNKNOWN },
    126   { NULL, EXTRACTOR_METATYPE_RESERVED }
    127 };
    128 
    129 
    130 /**
    131  * Give the given metadata to LE.  Set "ret" to 1 and
    132  * goto 'FINISH' if LE says we are done.
    133  *
    134  * @param t type of the metadata
    135  * @param s utf8 string with the metadata
    136  */
    137 #define ADD(t,s) do { if (0 != (ret = ec->proc (ec->cls, "png", t, \
    138                                                 EXTRACTOR_METAFORMAT_UTF8, \
    139                                                 "text/plain", s, strlen (s) \
    140                                                 + 1))) goto FINISH; \
    141 } while (0)
    142 
    143 
    144 /**
    145  * Give the given metadata to LE and free the memory.  Set "ret" to 1 and
    146  * goto 'FINISH' if LE says we are done.
    147  *
    148  * @param t type of the metadata
    149  * @param s utf8 string with the metadata, to be freed afterwards
    150  */
    151 #define ADDF(t,s) do { if ( (NULL != s) && (0 != (ret = ec->proc (ec->cls, \
    152                                                                   "png", t, \
    153                                                                   EXTRACTOR_METAFORMAT_UTF8, \
    154                                                                   "text/plain", \
    155                                                                   s, strlen (s) \
    156                                                                   + 1))) ) { \
    157                          free (s); goto FINISH; } if (NULL != s) free (s); \
    158 } while (0)
    159 
    160 
    161 /**
    162  * Process EXt tag.
    163  *
    164  * @param ec extraction context
    165  * @param length length of the tag
    166  * @return 0 to continue extracting, 1 if we are done
    167  */
    168 static int
    169 processtEXt (struct EXTRACTOR_ExtractContext *ec,
    170              uint32_t length)
    171 {
    172   void *ptr;
    173   unsigned char *data;
    174   char *keyword;
    175   size_t off;
    176   unsigned int i;
    177   int ret;
    178 
    179   if (length != ec->read (ec->cls, &ptr, length))
    180     return 1;
    181   data = ptr;
    182   off = stnlen ((char*) data, length) + 1;
    183   if (off >= length)
    184     return 0;                /* failed to find '\0' */
    185   if (NULL == (keyword = EXTRACTOR_common_convert_to_utf8 ((char*) &data[off],
    186                                                            length - off,
    187                                                            "ISO-8859-1")))
    188     return 0;
    189   ret = 0;
    190   for (i = 0; NULL != tagmap[i].name; i++)
    191     if (0 == strcmp (tagmap[i].name, (char*) data))
    192     {
    193       ADDF (tagmap[i].type, keyword);
    194       return 0;
    195     }
    196   ADDF (EXTRACTOR_METATYPE_KEYWORDS, keyword);
    197 FINISH:
    198   return ret;
    199 }
    200 
    201 
    202 /**
    203  * Process iTXt tag.
    204  *
    205  * @param ec extraction context
    206  * @param length length of the tag
    207  * @return 0 to continue extracting, 1 if we are done
    208  */
    209 static int
    210 processiTXt (struct EXTRACTOR_ExtractContext *ec,
    211              uint32_t length)
    212 {
    213   void *ptr;
    214   unsigned char *data;
    215   size_t pos;
    216   char *keyword;
    217   const char *language;
    218   const char *translated;
    219   unsigned int i;
    220   int compressed;
    221   char *buf;
    222   char *lan;
    223   uLongf bufLen;
    224   int ret;
    225   int zret;
    226 
    227   if (length != ec->read (ec->cls, &ptr, length))
    228     return 1;
    229   data = ptr;
    230   pos = stnlen ((char *) data, length) + 1;
    231   if (pos >= length)
    232     return 0;
    233   compressed = data[pos++];
    234   if (compressed && (0 != data[pos++]))
    235     return 0;                /* bad compression method */
    236   if (pos > length)
    237     return 0;
    238   language = (char *) &data[pos];
    239   ret = 0;
    240   if ( (stnlen (language, length - pos) > 0) &&
    241        (NULL != (lan = stndup (language, length - pos))) )
    242     ADDF (EXTRACTOR_METATYPE_LANGUAGE, lan);
    243   pos += stnlen (language, length - pos) + 1;
    244   if (pos + 1 >= length)
    245     return 0;
    246   translated = (char*) &data[pos];      /* already in utf-8! */
    247   if ( (stnlen (translated, length - pos) > 0) &&
    248        (NULL != (lan = stndup (translated, length - pos))) )
    249     ADDF (EXTRACTOR_METATYPE_KEYWORDS, lan);
    250   pos += stnlen (translated, length - pos) + 1;
    251   if (pos >= length)
    252     return 0;
    253 
    254   if (compressed)
    255   {
    256     bufLen = 1024 + 2 * (length - pos);
    257     while (1)
    258     {
    259       if (bufLen * 2 < bufLen)
    260         return 0;
    261       bufLen *= 2;
    262       if (bufLen > 50 * (length - pos))
    263       {
    264         /* printf("zlib problem"); */
    265         return 0;
    266       }
    267       if (NULL == (buf = malloc (bufLen)))
    268       {
    269         /* printf("out of memory"); */
    270         return 0;            /* out of memory */
    271       }
    272       if (Z_OK ==
    273           (zret = uncompress ((Bytef *) buf,
    274                               &bufLen,
    275                               (const Bytef *) &data[pos], length - pos)))
    276       {
    277         /* printf("zlib ok"); */
    278         break;
    279       }
    280       free (buf);
    281       if (Z_BUF_ERROR != zret)
    282         return 0;            /* unknown error, abort */
    283     }
    284     keyword = stndup (buf, bufLen);
    285     free (buf);
    286   }
    287   else
    288   {
    289     keyword = stndup ((char *) &data[pos], length - pos);
    290   }
    291   if (NULL == keyword)
    292     return ret;
    293   for (i = 0; NULL != tagmap[i].name; i++)
    294     if (0 == strcmp (tagmap[i].name, (char*) data))
    295     {
    296       ADDF (tagmap[i].type, keyword /* already in utf8 */);
    297       return 0;
    298     }
    299   ADDF (EXTRACTOR_METATYPE_COMMENT, keyword);
    300 FINISH:
    301   return ret;
    302 }
    303 
    304 
    305 /**
    306  * Process IHDR tag.
    307  *
    308  * @param ec extraction context
    309  * @param length length of the tag
    310  * @return 0 to continue extracting, 1 if we are done
    311  */
    312 static int
    313 processIHDR (struct EXTRACTOR_ExtractContext *ec,
    314              uint32_t length)
    315 {
    316   void *ptr;
    317   unsigned char *data;
    318   char tmp[128];
    319   int ret;
    320 
    321   if (length < 12)
    322     return 0;
    323   if (length != ec->read (ec->cls, &ptr, length))
    324     return 1;
    325   data = ptr;
    326   ret = 0;
    327   snprintf (tmp,
    328             sizeof (tmp),
    329             "%ux%u",
    330             get_int_at (data), get_int_at (&data[4]));
    331   ADD (EXTRACTOR_METATYPE_IMAGE_DIMENSIONS, tmp);
    332 FINISH:
    333   return ret;
    334 }
    335 
    336 
    337 /**
    338  * Process zTXt tag.
    339  *
    340  * @param ec extraction context
    341  * @param length length of the tag
    342  * @return 0 to continue extracting, 1 if we are done
    343  */
    344 static int
    345 processzTXt (struct EXTRACTOR_ExtractContext *ec,
    346              uint32_t length)
    347 {
    348   void *ptr;
    349   unsigned char *data;
    350   char *keyword;
    351   size_t off;
    352   unsigned int i;
    353   char *buf;
    354   uLongf bufLen;
    355   int zret;
    356   int ret;
    357 
    358   if (length != ec->read (ec->cls, &ptr, length))
    359     return 1;
    360   data = ptr;
    361   off = stnlen ((char *) data, length) + 1;
    362   if (off >= length)
    363     return 0;                /* failed to find '\0' */
    364   if (0 != data[off])
    365     return 0;                /* compression method must be 0 */
    366   off++;
    367   ret = 0;
    368   bufLen = 1024 + 2 * (length - off);
    369   while (1)
    370   {
    371     if (bufLen * 2 < bufLen)
    372       return 0;
    373     bufLen *= 2;
    374     if (bufLen > 50 * (length - off))
    375     {
    376       /* printf("zlib problem"); */
    377       return 0;
    378     }
    379     if (NULL == (buf = malloc (bufLen)))
    380     {
    381       /* printf("out of memory"); */
    382       return 0;              /* out of memory */
    383     }
    384     if (Z_OK ==
    385         (zret = uncompress ((Bytef *) buf,
    386                             &bufLen,
    387                             (const Bytef *) &data[off],
    388                             length - off)))
    389     {
    390       /* printf("zlib ok"); */
    391       break;
    392     }
    393     free (buf);
    394     if (Z_BUF_ERROR != zret)
    395       return 0;              /* unknown error, abort */
    396   }
    397   keyword = EXTRACTOR_common_convert_to_utf8 (buf,
    398                                               bufLen,
    399                                               "ISO-8859-1");
    400   free (buf);
    401   for (i = 0; NULL != tagmap[i].name; i++)
    402     if (0 == strcmp (tagmap[i].name, (char*)  data))
    403     {
    404       ADDF (tagmap[i].type, keyword);
    405       return 0;
    406     }
    407   ADDF (EXTRACTOR_METATYPE_COMMENT, keyword);
    408 FINISH:
    409   return ret;
    410 }
    411 
    412 
    413 /**
    414  * Process IME tag.
    415  *
    416  * @param ec extraction context
    417  * @param length length of the tag
    418  * @return 0 to continue extracting, 1 if we are done
    419  */
    420 static int
    421 processtIME (struct EXTRACTOR_ExtractContext *ec,
    422              uint32_t length)
    423 {
    424   void *ptr;
    425   unsigned char *data;
    426   unsigned short y;
    427   unsigned int year;
    428   unsigned int mo;
    429   unsigned int day;
    430   unsigned int h;
    431   unsigned int m;
    432   unsigned int s;
    433   char val[256];
    434   int ret;
    435 
    436   if (length != 7)
    437     return 0;
    438   if (length != ec->read (ec->cls, &ptr, length))
    439     return 1;
    440   data = ptr;
    441   ret = 0;
    442   memcpy (&y, data, sizeof (uint16_t));
    443   year = ntohs (y);
    444   mo = (unsigned char) data[6];
    445   day = (unsigned char) data[7];
    446   h = (unsigned char) data[8];
    447   m = (unsigned char) data[9];
    448   s = (unsigned char) data[10];
    449   snprintf (val,
    450             sizeof (val),
    451             "%04u-%02u-%02u %02d:%02d:%02d",
    452             year, mo, day, h, m, s);
    453   ADD (EXTRACTOR_METATYPE_MODIFICATION_DATE, val);
    454 FINISH:
    455   return ret;
    456 }
    457 
    458 
    459 /**
    460  * Main entry method for the 'image/png' extraction plugin.
    461  *
    462  * @param ec extraction context provided to the plugin
    463  */
    464 void
    465 EXTRACTOR_png_extract_method (struct EXTRACTOR_ExtractContext *ec)
    466 {
    467   void *data;
    468   uint32_t length;
    469   int64_t pos;
    470   int ret;
    471   ssize_t len;
    472 
    473   len = strlen (PNG_HEADER);
    474   if (len != ec->read (ec->cls, &data, len))
    475     return;
    476   if (0 != strncmp ((const char*) data, PNG_HEADER, len))
    477     return;
    478   ADD (EXTRACTOR_METATYPE_MIMETYPE, "image/png");
    479   ret = 0;
    480   while (0 == ret)
    481   {
    482     if (sizeof (uint32_t) + 4 != ec->read (ec->cls,
    483                                            &data,
    484                                            sizeof (uint32_t) + 4))
    485       break;
    486     length = get_int_at (data);
    487     if (0 > (pos = ec->seek (ec->cls, 0, SEEK_CUR)))
    488       break;
    489     pos += length + 4;   /* Chunk type, data, crc */
    490     if (0 == strncmp ((char*) data + sizeof (uint32_t), "IHDR", 4))
    491       ret = processIHDR (ec, length);
    492     if (0 == strncmp ((char*) data + sizeof (uint32_t), "iTXt", 4))
    493       ret = processiTXt (ec, length);
    494     if (0 == strncmp ((char*) data + sizeof (uint32_t), "tEXt", 4))
    495       ret = processtEXt (ec, length);
    496     if (0 == strncmp ((char*) data + sizeof (uint32_t), "zTXt", 4))
    497       ret = processzTXt (ec, length);
    498     if (0 == strncmp ((char*) data + sizeof (uint32_t), "tIME", 4))
    499       ret = processtIME (ec, length);
    500     if (ret != 0)
    501       break;
    502     if (pos != ec->seek (ec->cls, pos, SEEK_SET))
    503       break;
    504   }
    505 FINISH:
    506   return;
    507 }
    508 
    509 
    510 /* end of png_extractor.c */