png_extractor.c (12990B)
1 /* 2 This file is part of libextractor. 3 Copyright (C) 2002, 2003, 2004, 2005, 2009, 2012 Vidyut Samanta and Christian Grothoff 4 5 libextractor is free software; you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published 7 by the Free Software Foundation; either version 3, or (at your 8 option) any later version. 9 10 libextractor is distributed in the hope that it will be useful, but 11 WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 General Public License for more details. 14 15 You should have received a copy of the GNU General Public License 16 along with libextractor; see the file COPYING. If not, write to the 17 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 18 Boston, MA 02110-1301, USA. 19 */ 20 /** 21 * @file plugins/png_extractor.c 22 * @brief plugin to support PNG files 23 * @author Christian Grothoff 24 */ 25 #include "platform.h" 26 #include <zlib.h> 27 #include "extractor.h" 28 #include "convert.h" 29 30 /** 31 * Header that every PNG file must start with. 32 */ 33 #define PNG_HEADER "\211PNG\r\n\032\n" 34 35 36 /** 37 * Function to create 0-terminated string from the 38 * first n characters of the given input. 39 * 40 * @param str input string 41 * @param n length of the input 42 * @return n-bytes from str followed by 0-termination, NULL on error 43 */ 44 static char * 45 stndup (const char *str, 46 size_t n) 47 { 48 char *tmp; 49 50 if (n + 1 < n) 51 return NULL; 52 if (NULL == (tmp = malloc (n + 1))) 53 return NULL; 54 tmp[n] = '\0'; 55 memcpy (tmp, str, n); 56 return tmp; 57 } 58 59 60 /** 61 * strnlen is GNU specific, let's redo it here to be 62 * POSIX compliant. 63 * 64 * @param str input string 65 * @param maxlen maximum length of str 66 * @return first position of 0-terminator in str, or maxlen 67 */ 68 static size_t 69 stnlen (const char *str, 70 size_t maxlen) 71 { 72 size_t ret; 73 74 ret = 0; 75 while ( (ret < maxlen) && 76 ('\0' != str[ret]) ) 77 ret++; 78 return ret; 79 } 80 81 82 /** 83 * Interpret the 4 bytes in 'buf' as a big-endian 84 * encoded 32-bit integer, convert and return. 85 * 86 * @param pos (unaligned) pointer to 4 byte integer 87 * @return converted integer in host byte order 88 */ 89 static uint32_t 90 get_int_at (const void *pos) 91 { 92 uint32_t i; 93 94 memcpy (&i, pos, sizeof (i)); 95 return htonl (i); 96 } 97 98 99 /** 100 * Map from PNG meta data descriptor strings 101 * to LE types. 102 */ 103 static struct 104 { 105 /** 106 * PNG name. 107 */ 108 const char *name; 109 110 /** 111 * Corresponding LE type. 112 */ 113 enum EXTRACTOR_MetaType type; 114 } tagmap[] = { 115 { "Author", EXTRACTOR_METATYPE_AUTHOR_NAME }, 116 { "Description", EXTRACTOR_METATYPE_DESCRIPTION }, 117 { "Comment", EXTRACTOR_METATYPE_COMMENT }, 118 { "Copyright", EXTRACTOR_METATYPE_COPYRIGHT }, 119 { "Source", EXTRACTOR_METATYPE_SOURCE_DEVICE }, 120 { "Creation Time", EXTRACTOR_METATYPE_CREATION_DATE }, 121 { "Title", EXTRACTOR_METATYPE_TITLE }, 122 { "Software", EXTRACTOR_METATYPE_PRODUCED_BY_SOFTWARE }, 123 { "Disclaimer", EXTRACTOR_METATYPE_DISCLAIMER }, 124 { "Warning", EXTRACTOR_METATYPE_WARNING }, 125 { "Signature", EXTRACTOR_METATYPE_UNKNOWN }, 126 { NULL, EXTRACTOR_METATYPE_RESERVED } 127 }; 128 129 130 /** 131 * Give the given metadata to LE. Set "ret" to 1 and 132 * goto 'FINISH' if LE says we are done. 133 * 134 * @param t type of the metadata 135 * @param s utf8 string with the metadata 136 */ 137 #define ADD(t,s) do { if (0 != (ret = ec->proc (ec->cls, "png", t, \ 138 EXTRACTOR_METAFORMAT_UTF8, \ 139 "text/plain", s, strlen (s) \ 140 + 1))) goto FINISH; \ 141 } while (0) 142 143 144 /** 145 * Give the given metadata to LE and free the memory. Set "ret" to 1 and 146 * goto 'FINISH' if LE says we are done. 147 * 148 * @param t type of the metadata 149 * @param s utf8 string with the metadata, to be freed afterwards 150 */ 151 #define ADDF(t,s) do { if ( (NULL != s) && (0 != (ret = ec->proc (ec->cls, \ 152 "png", t, \ 153 EXTRACTOR_METAFORMAT_UTF8, \ 154 "text/plain", \ 155 s, strlen (s) \ 156 + 1))) ) { \ 157 free (s); goto FINISH; } if (NULL != s) free (s); \ 158 } while (0) 159 160 161 /** 162 * Process EXt tag. 163 * 164 * @param ec extraction context 165 * @param length length of the tag 166 * @return 0 to continue extracting, 1 if we are done 167 */ 168 static int 169 processtEXt (struct EXTRACTOR_ExtractContext *ec, 170 uint32_t length) 171 { 172 void *ptr; 173 unsigned char *data; 174 char *keyword; 175 size_t off; 176 unsigned int i; 177 int ret; 178 179 if (length != ec->read (ec->cls, &ptr, length)) 180 return 1; 181 data = ptr; 182 off = stnlen ((char*) data, length) + 1; 183 if (off >= length) 184 return 0; /* failed to find '\0' */ 185 if (NULL == (keyword = EXTRACTOR_common_convert_to_utf8 ((char*) &data[off], 186 length - off, 187 "ISO-8859-1"))) 188 return 0; 189 ret = 0; 190 for (i = 0; NULL != tagmap[i].name; i++) 191 if (0 == strcmp (tagmap[i].name, (char*) data)) 192 { 193 ADDF (tagmap[i].type, keyword); 194 return 0; 195 } 196 ADDF (EXTRACTOR_METATYPE_KEYWORDS, keyword); 197 FINISH: 198 return ret; 199 } 200 201 202 /** 203 * Process iTXt tag. 204 * 205 * @param ec extraction context 206 * @param length length of the tag 207 * @return 0 to continue extracting, 1 if we are done 208 */ 209 static int 210 processiTXt (struct EXTRACTOR_ExtractContext *ec, 211 uint32_t length) 212 { 213 void *ptr; 214 unsigned char *data; 215 size_t pos; 216 char *keyword; 217 const char *language; 218 const char *translated; 219 unsigned int i; 220 int compressed; 221 char *buf; 222 char *lan; 223 uLongf bufLen; 224 int ret; 225 int zret; 226 227 if (length != ec->read (ec->cls, &ptr, length)) 228 return 1; 229 data = ptr; 230 pos = stnlen ((char *) data, length) + 1; 231 if (pos >= length) 232 return 0; 233 compressed = data[pos++]; 234 if (compressed && (0 != data[pos++])) 235 return 0; /* bad compression method */ 236 if (pos > length) 237 return 0; 238 language = (char *) &data[pos]; 239 ret = 0; 240 if ( (stnlen (language, length - pos) > 0) && 241 (NULL != (lan = stndup (language, length - pos))) ) 242 ADDF (EXTRACTOR_METATYPE_LANGUAGE, lan); 243 pos += stnlen (language, length - pos) + 1; 244 if (pos + 1 >= length) 245 return 0; 246 translated = (char*) &data[pos]; /* already in utf-8! */ 247 if ( (stnlen (translated, length - pos) > 0) && 248 (NULL != (lan = stndup (translated, length - pos))) ) 249 ADDF (EXTRACTOR_METATYPE_KEYWORDS, lan); 250 pos += stnlen (translated, length - pos) + 1; 251 if (pos >= length) 252 return 0; 253 254 if (compressed) 255 { 256 bufLen = 1024 + 2 * (length - pos); 257 while (1) 258 { 259 if (bufLen * 2 < bufLen) 260 return 0; 261 bufLen *= 2; 262 if (bufLen > 50 * (length - pos)) 263 { 264 /* printf("zlib problem"); */ 265 return 0; 266 } 267 if (NULL == (buf = malloc (bufLen))) 268 { 269 /* printf("out of memory"); */ 270 return 0; /* out of memory */ 271 } 272 if (Z_OK == 273 (zret = uncompress ((Bytef *) buf, 274 &bufLen, 275 (const Bytef *) &data[pos], length - pos))) 276 { 277 /* printf("zlib ok"); */ 278 break; 279 } 280 free (buf); 281 if (Z_BUF_ERROR != zret) 282 return 0; /* unknown error, abort */ 283 } 284 keyword = stndup (buf, bufLen); 285 free (buf); 286 } 287 else 288 { 289 keyword = stndup ((char *) &data[pos], length - pos); 290 } 291 if (NULL == keyword) 292 return ret; 293 for (i = 0; NULL != tagmap[i].name; i++) 294 if (0 == strcmp (tagmap[i].name, (char*) data)) 295 { 296 ADDF (tagmap[i].type, keyword /* already in utf8 */); 297 return 0; 298 } 299 ADDF (EXTRACTOR_METATYPE_COMMENT, keyword); 300 FINISH: 301 return ret; 302 } 303 304 305 /** 306 * Process IHDR tag. 307 * 308 * @param ec extraction context 309 * @param length length of the tag 310 * @return 0 to continue extracting, 1 if we are done 311 */ 312 static int 313 processIHDR (struct EXTRACTOR_ExtractContext *ec, 314 uint32_t length) 315 { 316 void *ptr; 317 unsigned char *data; 318 char tmp[128]; 319 int ret; 320 321 if (length < 12) 322 return 0; 323 if (length != ec->read (ec->cls, &ptr, length)) 324 return 1; 325 data = ptr; 326 ret = 0; 327 snprintf (tmp, 328 sizeof (tmp), 329 "%ux%u", 330 get_int_at (data), get_int_at (&data[4])); 331 ADD (EXTRACTOR_METATYPE_IMAGE_DIMENSIONS, tmp); 332 FINISH: 333 return ret; 334 } 335 336 337 /** 338 * Process zTXt tag. 339 * 340 * @param ec extraction context 341 * @param length length of the tag 342 * @return 0 to continue extracting, 1 if we are done 343 */ 344 static int 345 processzTXt (struct EXTRACTOR_ExtractContext *ec, 346 uint32_t length) 347 { 348 void *ptr; 349 unsigned char *data; 350 char *keyword; 351 size_t off; 352 unsigned int i; 353 char *buf; 354 uLongf bufLen; 355 int zret; 356 int ret; 357 358 if (length != ec->read (ec->cls, &ptr, length)) 359 return 1; 360 data = ptr; 361 off = stnlen ((char *) data, length) + 1; 362 if (off >= length) 363 return 0; /* failed to find '\0' */ 364 if (0 != data[off]) 365 return 0; /* compression method must be 0 */ 366 off++; 367 ret = 0; 368 bufLen = 1024 + 2 * (length - off); 369 while (1) 370 { 371 if (bufLen * 2 < bufLen) 372 return 0; 373 bufLen *= 2; 374 if (bufLen > 50 * (length - off)) 375 { 376 /* printf("zlib problem"); */ 377 return 0; 378 } 379 if (NULL == (buf = malloc (bufLen))) 380 { 381 /* printf("out of memory"); */ 382 return 0; /* out of memory */ 383 } 384 if (Z_OK == 385 (zret = uncompress ((Bytef *) buf, 386 &bufLen, 387 (const Bytef *) &data[off], 388 length - off))) 389 { 390 /* printf("zlib ok"); */ 391 break; 392 } 393 free (buf); 394 if (Z_BUF_ERROR != zret) 395 return 0; /* unknown error, abort */ 396 } 397 keyword = EXTRACTOR_common_convert_to_utf8 (buf, 398 bufLen, 399 "ISO-8859-1"); 400 free (buf); 401 for (i = 0; NULL != tagmap[i].name; i++) 402 if (0 == strcmp (tagmap[i].name, (char*) data)) 403 { 404 ADDF (tagmap[i].type, keyword); 405 return 0; 406 } 407 ADDF (EXTRACTOR_METATYPE_COMMENT, keyword); 408 FINISH: 409 return ret; 410 } 411 412 413 /** 414 * Process IME tag. 415 * 416 * @param ec extraction context 417 * @param length length of the tag 418 * @return 0 to continue extracting, 1 if we are done 419 */ 420 static int 421 processtIME (struct EXTRACTOR_ExtractContext *ec, 422 uint32_t length) 423 { 424 void *ptr; 425 unsigned char *data; 426 unsigned short y; 427 unsigned int year; 428 unsigned int mo; 429 unsigned int day; 430 unsigned int h; 431 unsigned int m; 432 unsigned int s; 433 char val[256]; 434 int ret; 435 436 if (length != 7) 437 return 0; 438 if (length != ec->read (ec->cls, &ptr, length)) 439 return 1; 440 data = ptr; 441 ret = 0; 442 memcpy (&y, data, sizeof (uint16_t)); 443 year = ntohs (y); 444 mo = (unsigned char) data[6]; 445 day = (unsigned char) data[7]; 446 h = (unsigned char) data[8]; 447 m = (unsigned char) data[9]; 448 s = (unsigned char) data[10]; 449 snprintf (val, 450 sizeof (val), 451 "%04u-%02u-%02u %02d:%02d:%02d", 452 year, mo, day, h, m, s); 453 ADD (EXTRACTOR_METATYPE_MODIFICATION_DATE, val); 454 FINISH: 455 return ret; 456 } 457 458 459 /** 460 * Main entry method for the 'image/png' extraction plugin. 461 * 462 * @param ec extraction context provided to the plugin 463 */ 464 void 465 EXTRACTOR_png_extract_method (struct EXTRACTOR_ExtractContext *ec) 466 { 467 void *data; 468 uint32_t length; 469 int64_t pos; 470 int ret; 471 ssize_t len; 472 473 len = strlen (PNG_HEADER); 474 if (len != ec->read (ec->cls, &data, len)) 475 return; 476 if (0 != strncmp ((const char*) data, PNG_HEADER, len)) 477 return; 478 ADD (EXTRACTOR_METATYPE_MIMETYPE, "image/png"); 479 ret = 0; 480 while (0 == ret) 481 { 482 if (sizeof (uint32_t) + 4 != ec->read (ec->cls, 483 &data, 484 sizeof (uint32_t) + 4)) 485 break; 486 length = get_int_at (data); 487 if (0 > (pos = ec->seek (ec->cls, 0, SEEK_CUR))) 488 break; 489 pos += length + 4; /* Chunk type, data, crc */ 490 if (0 == strncmp ((char*) data + sizeof (uint32_t), "IHDR", 4)) 491 ret = processIHDR (ec, length); 492 if (0 == strncmp ((char*) data + sizeof (uint32_t), "iTXt", 4)) 493 ret = processiTXt (ec, length); 494 if (0 == strncmp ((char*) data + sizeof (uint32_t), "tEXt", 4)) 495 ret = processtEXt (ec, length); 496 if (0 == strncmp ((char*) data + sizeof (uint32_t), "zTXt", 4)) 497 ret = processzTXt (ec, length); 498 if (0 == strncmp ((char*) data + sizeof (uint32_t), "tIME", 4)) 499 ret = processtIME (ec, length); 500 if (ret != 0) 501 break; 502 if (pos != ec->seek (ec->cls, pos, SEEK_SET)) 503 break; 504 } 505 FINISH: 506 return; 507 } 508 509 510 /* end of png_extractor.c */