libextractor

GNU libextractor
Log | Files | Refs | Submodules | README | LICENSE

commit e4423c3bcb9b184b037bd5c0c7b70c46a770b83f
parent d502541f439f605f5531f403bd8ce90acbc13608
Author: Christian Grothoff <christian@grothoff.org>
Date:   Wed, 16 Dec 2009 08:54:16 +0000

gif and jpeg

Diffstat:
Msrc/plugins/Makefile.am | 32++++++++++++++++----------------
Asrc/plugins/gif_extractor.c | 262+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Dsrc/plugins/gifextractor.c | 249-------------------------------------------------------------------------------
Asrc/plugins/jpeg_extractor.c | 275+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Dsrc/plugins/jpegextractor.c | 243-------------------------------------------------------------------------------
5 files changed, 553 insertions(+), 508 deletions(-)

diff --git a/src/plugins/Makefile.am b/src/plugins/Makefile.am @@ -89,8 +89,10 @@ plugin_LTLIBRARIES = \ libextractor_dvi.la \ libextractor_elf.la \ $(exiv2) \ + libextractor_gif.la \ libextractor_html.la \ libextractor_it.la \ + libextractor_jpeg.la \ libextractor_mime.la \ $(pdf) \ $(rpm) \ @@ -135,6 +137,13 @@ libextractor_exiv2_la_LDFLAGS = \ libextractor_exiv2_la_LIBADD = \ -lexiv2 +libextractor_gif_la_SOURCES = \ + gif_extractor.c +libextractor_gif_la_LDFLAGS = \ + $(PLUGINFLAGS) +libextractor_gif_la_LIBADD = \ + $(top_builddir)/src/common/libextractor_common.la + libextractor_html_la_SOURCES = \ html_extractor.c libextractor_html_la_LDFLAGS = \ @@ -147,6 +156,13 @@ libextractor_it_la_SOURCES = \ libextractor_it_la_LDFLAGS = \ $(PLUGINFLAGS) +libextractor_jpeg_la_SOURCES = \ + jpeg_extractor.c +libextractor_jpeg_la_LDFLAGS = \ + $(PLUGINFLAGS) +libextractor_jpeg_la_LIBADD = \ + $(LE_LIBINTL) + libextractor_mime_la_SOURCES = \ mime_extractor.c libextractor_mime_la_LDFLAGS = \ @@ -183,11 +199,9 @@ OLD_LIBS = \ libextractor_elf.la \ $(extraflac) \ libextractor_flv.la \ - libextractor_gif.la \ libextractor_id3v2.la \ libextractor_id3v24.la \ libextractor_id3v23.la \ - libextractor_jpeg.la \ libextractor_man.la \ libextractor_mp3.la \ $(extrampeg) \ @@ -290,13 +304,6 @@ libextractor_tar_la_LIBADD = \ $(top_builddir)/src/main/libextractor.la -lz endif -libextractor_gif_la_SOURCES = \ - gifextractor.c -libextractor_gif_la_LDFLAGS = \ - $(PLUGINFLAGS) -libextractor_gif_la_LIBADD = \ - $(top_builddir)/src/common/libextractor_common.la - libextractor_tiff_la_SOURCES = \ tiffextractor.c libextractor_tiff_la_LDFLAGS = \ @@ -311,13 +318,6 @@ libextractor_zip_la_LDFLAGS = \ libextractor_zip_la_LIBADD = \ $(top_builddir)/src/main/libextractor.la -libextractor_jpeg_la_SOURCES = \ - jpegextractor.c -libextractor_jpeg_la_LDFLAGS = \ - $(PLUGINFLAGS) -libextractor_jpeg_la_LIBADD = \ - $(LE_LIBINTL) - libextractor_flv_la_SOURCES = \ flvextractor.c libextractor_flv_la_LDFLAGS = \ diff --git a/src/plugins/gif_extractor.c b/src/plugins/gif_extractor.c @@ -0,0 +1,262 @@ +/* + This file is part of libextractor. + (C) 2002, 2003, 2009 Vidyut Samanta and Christian Grothoff + + libextractor is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 2, or (at your + option) any later version. + + libextractor is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with libextractor; see the file COPYING. If not, write to the + Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. + */ + +#include "platform.h" +#include "extractor.h" +#include "pack.h" + +#define DEBUG_GIF 0 +#if DEBUG_GIF +#define PRINT(a,b) fprintf(stderr,a,b) +#else +#define PRINT(a,b) +#endif + +typedef struct +{ + char gif[3]; + char version[3]; + unsigned short screen_width; + unsigned short screen_height; + unsigned char flags; +#define HEADER_FLAGS__SIZE_OF_GLOBAL_COLOR_TABLE 0x07 +#define HEADER_FLAGS__SORT_FLAG 0x08 +#define HEADER_FLAGS__COLOR_RESOLUTION 0x70 +#define HEADER_FLAGS__GLOBAL_COLOR_TABLE_FLAG 0x80 + unsigned char background_color_index; + unsigned char pixel_aspect_ratio; +} GIF_HEADER; + +#define GIF_HEADER_SIZE 13 +#define GIF_HEADER_SPEC "3b3bhhbbb" +#define GIF_HEADER_FIELDS(p) \ + &(p)->gif,\ + &(p)->version, \ + &(p)->screen_width, \ + &(p)->screen_height, \ + &(p)->flags, \ + &(p)->background_color_index, \ + &(p)->pixel_aspect_ratio + +typedef struct +{ + unsigned char image_separator; + unsigned short image_left; + unsigned short image_top; + unsigned short image_width; + unsigned short image_height; + unsigned char flags; +#define DESCRIPTOR_FLAGS__PIXEL_SIZE 0x07 +#define DESCRIPTOR_FLAGS__RESERVED 0x18 +#define DESCRIPTOR_FLAGS__SORT_FLAG 0x20 +#define DESCRIPTOR_FLAGS__INTERLACE_FLAG 0x40 +#define DESCRIPTOR_FLAGS__LOCAL_COLOR_TABLE_FLAG 0x80 +} GIF_DESCRIPTOR; +#define GIF_DESCRIPTOR_SIZE 10 +#define GIF_DESCRIPTOR_SPEC "chhhhc" +#define GIF_DESCRIPTOR_FIELDS(p) \ + &(p)->image_separator, \ + &(p)->image_left, \ + &(p)->image_top, \ + &(p)->image_width, \ + &(p)->image_height, \ + &(p)->flags + +typedef struct +{ + unsigned char extension_introducer; + unsigned char graphic_control_label; +} GIF_EXTENSION; + +/** + * Skip a data block. + * @return the position after the block + **/ +static size_t +skipDataBlock (const unsigned char *data, size_t pos, const size_t size) +{ + while ((pos < size) && (data[pos] != 0)) + pos += data[pos] + 1; + return pos + 1; +} + +/** + * skip an extention block + * @return the position after the block + **/ +static size_t +skipExtensionBlock (const unsigned char *data, + size_t pos, const size_t size, const GIF_EXTENSION * ext) +{ + return skipDataBlock (data, pos + sizeof (GIF_EXTENSION), size); +} + +/** + * @return the offset after the global color map + **/ +static size_t +skipGlobalColorMap (const unsigned char *data, + const size_t size, const GIF_HEADER * header) +{ + size_t gct_size; + + if ((header->flags & HEADER_FLAGS__GLOBAL_COLOR_TABLE_FLAG) > 0) + gct_size = + 3 * + (1 << ((header->flags & HEADER_FLAGS__SIZE_OF_GLOBAL_COLOR_TABLE) + 1)); + else + gct_size = 0; + return GIF_HEADER_SIZE + gct_size; +} + +/** + * @return the offset after the local color map + **/ +static size_t +skipLocalColorMap (const unsigned char *data, + size_t pos, const size_t size, GIF_DESCRIPTOR * descriptor) +{ + size_t lct_size; + + if (pos + GIF_DESCRIPTOR_SIZE > size) + return size; + if ((descriptor->flags & DESCRIPTOR_FLAGS__LOCAL_COLOR_TABLE_FLAG) > 0) + lct_size = + 3 * (1 << ((descriptor->flags & DESCRIPTOR_FLAGS__PIXEL_SIZE) + 1)); + else + lct_size = 0; + return pos + GIF_DESCRIPTOR_SIZE + lct_size; +} + +static int +parseComment (const unsigned char *data, + size_t pos, const size_t size, + EXTRACTOR_MetaDataProcessor proc, + void *proc_cls) +{ + size_t length = 0; + size_t curr = pos; + char *comment; + int ret; + + while ((data[curr] != 0) && (curr < size)) + { + length += data[curr]; + curr += data[curr] + 1; + } + comment = malloc (length + 1); + curr = pos; + length = 0; + while ((data[curr] != 0) && (curr < size)) + { + length += data[curr]; + if (length >= size) + break; + memcpy (&comment[length - data[curr]], &data[curr] + 1, data[curr]); + comment[length] = '\0'; + curr += data[curr] + 1; + } + ret = proc (proc_cls, + "gif", + EXTRACTOR_METATYPE_COMMENT, + EXTRACTOR_METAFORMAT_UTF8, + "text/plain", + comment, + length+1); + free (comment); + return ret; +} + + +int +EXTRACTOR_gif_extract (const unsigned char *data, + size_t size, + EXTRACTOR_MetaDataProcessor proc, + void *proc_cls, + const char *options) +{ + size_t pos; + GIF_HEADER header; + char tmp[128]; + + if (size < GIF_HEADER_SIZE) + return 0; + EXTRACTOR_common_cat_unpack (data, GIF_HEADER_SPEC, GIF_HEADER_FIELDS (&header)); + if (0 != strncmp (&header.gif[0], "GIF", 3)) + return 0; + if (0 != strncmp (&header.version[0], "89a", 3)) + return 0; /* only 89a has support for comments */ + if (0 != proc (proc_cls, + "gif", + EXTRACTOR_METATYPE_MIMETYPE, + EXTRACTOR_METAFORMAT_UTF8, + "text/plain", + "image/gif", + strlen ("image/gif")+1)) + return 1; + snprintf (tmp, + sizeof(tmp), + "%ux%u", + header.screen_width, header.screen_height); + if (0 != proc (proc_cls, + "gif", + EXTRACTOR_METATYPE_IMAGE_DIMENSIONS, + EXTRACTOR_METAFORMAT_UTF8, + "text/plain", + tmp, + strlen (tmp)+1)) + return 1; + pos = skipGlobalColorMap (data, size, &header); + PRINT ("global color map ends at %d\n", pos); + while (pos < size) + { + GIF_DESCRIPTOR gd; + + switch (data[pos]) + { + case ',': /* image descriptor block */ + PRINT ("skipping local color map %d\n", pos); + EXTRACTOR_common_cat_unpack (&data[pos], + GIF_DESCRIPTOR_SPEC, + GIF_DESCRIPTOR_FIELDS (&gd)); + pos = skipLocalColorMap (data, pos, size, &gd); + break; + case '!': /* extension block */ + PRINT ("skipping extension block %d\n", pos); + if (data[pos + 1] == (unsigned char) 0xFE) + { + if (0 != parseComment (data, pos + 2, size, proc, proc_cls)) + return 1; + } + pos = skipExtensionBlock (data, pos, size, + (GIF_EXTENSION *) & data[pos]); + break; + case ';': + PRINT ("hit terminator at %d!\n", pos); + return 0; /* terminator! */ + default: /* raster data block */ + PRINT ("skipping data block at %d\n", pos); + pos = skipDataBlock (data, pos + 1, size); + break; + } + } + PRINT ("returning at %d\n", pos); + return 0; +} diff --git a/src/plugins/gifextractor.c b/src/plugins/gifextractor.c @@ -1,249 +0,0 @@ -/* - This file is part of libextractor. - (C) 2002, 2003 Vidyut Samanta and Christian Grothoff - - libextractor is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 2, or (at your - option) any later version. - - libextractor is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with libextractor; see the file COPYING. If not, write to the - Free Software Foundation, Inc., 59 Temple Place - Suite 330, - Boston, MA 02111-1307, USA. - */ - -#include "platform.h" -#include "extractor.h" -#include "pack.h" - -#define DEBUG_GIF 0 -#if DEBUG_GIF -#define PRINT(a,b) fprintf(stderr,a,b) -#else -#define PRINT(a,b) -#endif - -typedef struct -{ - char gif[3]; - char version[3]; - unsigned short screen_width; - unsigned short screen_height; - unsigned char flags; -#define HEADER_FLAGS__SIZE_OF_GLOBAL_COLOR_TABLE 0x07 -#define HEADER_FLAGS__SORT_FLAG 0x08 -#define HEADER_FLAGS__COLOR_RESOLUTION 0x70 -#define HEADER_FLAGS__GLOBAL_COLOR_TABLE_FLAG 0x80 - unsigned char background_color_index; - unsigned char pixel_aspect_ratio; -} GIF_HEADER; - -#define GIF_HEADER_SIZE 13 -#define GIF_HEADER_SPEC "3b3bhhbbb" -#define GIF_HEADER_FIELDS(p) \ - &(p)->gif,\ - &(p)->version, \ - &(p)->screen_width, \ - &(p)->screen_height, \ - &(p)->flags, \ - &(p)->background_color_index, \ - &(p)->pixel_aspect_ratio - -typedef struct -{ - unsigned char image_separator; - unsigned short image_left; - unsigned short image_top; - unsigned short image_width; - unsigned short image_height; - unsigned char flags; -#define DESCRIPTOR_FLAGS__PIXEL_SIZE 0x07 -#define DESCRIPTOR_FLAGS__RESERVED 0x18 -#define DESCRIPTOR_FLAGS__SORT_FLAG 0x20 -#define DESCRIPTOR_FLAGS__INTERLACE_FLAG 0x40 -#define DESCRIPTOR_FLAGS__LOCAL_COLOR_TABLE_FLAG 0x80 -} GIF_DESCRIPTOR; -#define GIF_DESCRIPTOR_SIZE 10 -#define GIF_DESCRIPTOR_SPEC "chhhhc" -#define GIF_DESCRIPTOR_FIELDS(p) \ - &(p)->image_separator, \ - &(p)->image_left, \ - &(p)->image_top, \ - &(p)->image_width, \ - &(p)->image_height, \ - &(p)->flags - -typedef struct -{ - unsigned char extension_introducer; - unsigned char graphic_control_label; -} GIF_EXTENSION; - -static struct EXTRACTOR_Keywords * -addKeyword (EXTRACTOR_KeywordType type, - char *keyword, struct EXTRACTOR_Keywords *next) -{ - EXTRACTOR_KeywordList *result; - - if (keyword == NULL) - return next; - result = malloc (sizeof (EXTRACTOR_KeywordList)); - result->next = next; - result->keyword = keyword; - result->keywordType = type; - return result; -} - -/** - * Skip a data block. - * @return the position after the block - **/ -static size_t -skipDataBlock (const unsigned char *data, size_t pos, const size_t size) -{ - while ((pos < size) && (data[pos] != 0)) - pos += data[pos] + 1; - return pos + 1; -} - -/** - * skip an extention block - * @return the position after the block - **/ -static size_t -skipExtensionBlock (const unsigned char *data, - size_t pos, const size_t size, const GIF_EXTENSION * ext) -{ - return skipDataBlock (data, pos + sizeof (GIF_EXTENSION), size); -} - -/** - * @return the offset after the global color map - **/ -static size_t -skipGlobalColorMap (const unsigned char *data, - const size_t size, const GIF_HEADER * header) -{ - size_t gct_size; - - if ((header->flags & HEADER_FLAGS__GLOBAL_COLOR_TABLE_FLAG) > 0) - gct_size = - 3 * - (1 << ((header->flags & HEADER_FLAGS__SIZE_OF_GLOBAL_COLOR_TABLE) + 1)); - else - gct_size = 0; - return GIF_HEADER_SIZE + gct_size; -} - -/** - * @return the offset after the local color map - **/ -static size_t -skipLocalColorMap (const unsigned char *data, - size_t pos, const size_t size, GIF_DESCRIPTOR * descriptor) -{ - size_t lct_size; - - if (pos + GIF_DESCRIPTOR_SIZE > size) - return size; - if ((descriptor->flags & DESCRIPTOR_FLAGS__LOCAL_COLOR_TABLE_FLAG) > 0) - lct_size = - 3 * (1 << ((descriptor->flags & DESCRIPTOR_FLAGS__PIXEL_SIZE) + 1)); - else - lct_size = 0; - return pos + GIF_DESCRIPTOR_SIZE + lct_size; -} - -static struct EXTRACTOR_Keywords * -parseComment (const unsigned char *data, - size_t pos, const size_t size, struct EXTRACTOR_Keywords *prev) -{ - size_t length = 0; - size_t curr = pos; - char *keyword; - - while ((data[curr] != 0) && (curr < size)) - { - length += data[curr]; - curr += data[curr] + 1; - } - keyword = malloc (length + 1); - curr = pos; - length = 0; - while ((data[curr] != 0) && (curr < size)) - { - length += data[curr]; - if (length >= size) - break; - memcpy (&keyword[length - data[curr]], &data[curr] + 1, data[curr]); - keyword[length] = 0; - curr += data[curr] + 1; - } - return addKeyword (EXTRACTOR_COMMENT, keyword, prev); -} - - -struct EXTRACTOR_Keywords * -libextractor_gif_extract (const char *filename, - const unsigned char *data, - const size_t size, struct EXTRACTOR_Keywords *prev) -{ - size_t pos; - struct EXTRACTOR_Keywords *result; - GIF_HEADER header; - char *tmp; - - if (size < GIF_HEADER_SIZE) - return prev; - EXTRACTOR_common_cat_unpack (data, GIF_HEADER_SPEC, GIF_HEADER_FIELDS (&header)); - if (0 != strncmp (&header.gif[0], "GIF", 3)) - return prev; - if (0 != strncmp (&header.version[0], "89a", 3)) - return prev; /* only 89a has support for comments */ - result = prev; - result = addKeyword (EXTRACTOR_MIMETYPE, strdup ("image/gif"), result); - tmp = malloc (128); - snprintf (tmp, 128, "%ux%u", header.screen_width, header.screen_height); - result = addKeyword (EXTRACTOR_SIZE, strdup (tmp), result); - free (tmp); - pos = skipGlobalColorMap (data, size, &header); - PRINT ("global color map ends at %d\n", pos); - while (pos < size) - { - GIF_DESCRIPTOR gd; - - switch (data[pos]) - { - case ',': /* image descriptor block */ - PRINT ("skipping local color map %d\n", pos); - EXTRACTOR_common_cat_unpack (&data[pos], - GIF_DESCRIPTOR_SPEC, GIF_DESCRIPTOR_FIELDS (&gd)); - pos = skipLocalColorMap (data, pos, size, &gd); - break; - case '!': /* extension block */ - PRINT ("skipping extension block %d\n", pos); - if (data[pos + 1] == (unsigned char) 0xFE) - { - result = parseComment (data, pos + 2, size, result); - } - pos = skipExtensionBlock (data, pos, size, - (GIF_EXTENSION *) & data[pos]); - break; - case ';': - PRINT ("hit terminator at %d!\n", pos); - return result; /* terminator! */ - default: /* raster data block */ - PRINT ("skipping data block at %d\n", pos); - pos = skipDataBlock (data, pos + 1, size); - break; - } - } - PRINT ("returning at %d\n", pos); - return result; -} diff --git a/src/plugins/jpeg_extractor.c b/src/plugins/jpeg_extractor.c @@ -0,0 +1,275 @@ +/* + This file is part of libextractor. + (C) 2002, 2003, 2004 Vidyut Samanta and Christian Grothoff + + libextractor is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 2, or (at your + option) any later version. + + libextractor is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with libextractor; see the file COPYING. If not, write to the + Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. + */ + +#include "platform.h" +#include "extractor.h" + + +#define M_SOI 0xD8 /* Start Of Image (beginning of datastream) */ +#define M_EOI 0xD9 /* End Of Image (end of datastream) */ +#define M_SOS 0xDA /* Start Of Scan (begins compressed data) */ +#define M_APP12 0xEC +#define M_COM 0xFE /* COMment */ +#define M_APP0 0xE0 + +/** + * Get the next character in the sequence and advance + * the pointer *data to the next location in the sequence. + * If we're at the end, return -1. + */ +#define NEXTC(data,end) ((*(data)<(end))?*((*(data))++):-1) + +/* The macro does: +unsigned int NEXTC(unsigned char ** data, char * end) { + if (*data < end) { + char result = **data; + (*data)++; + return result; + } else + return -1; +} +*/ + +/** + * Read length, convert to unsigned int. + * All 2-byte quantities in JPEG markers are MSB first + * @return -1 on error + */ +static int +readLength (const unsigned char **data, const unsigned char *end) +{ + int c1; + int c2; + + c1 = NEXTC (data, end); + if (c1 == -1) + return -1; + c2 = NEXTC (data, end); + if (c2 == -1) + return -1; + return ((((unsigned int) c1) << 8) + ((unsigned int) c2)) - 2; +} + +/** + * @return the next marker or -1 on error. + */ +static int +next_marker (const unsigned char **data, const unsigned char *end) +{ + int c; + c = NEXTC (data, end); + while ((c != 0xFF) && (c != -1)) + c = NEXTC (data, end); + do + { + c = NEXTC (data, end); + } + while ((c == 0xFF) && (c != -1)); + return c; +} + +static void +skip_variable (const unsigned char **data, const unsigned char *end) +{ + int length; + + length = readLength (data, end); + if (length < 0) + { + (*data) = end; /* skip to the end */ + return; + } + /* Skip over length bytes */ + (*data) += length; +} + +static char * +process_COM (const unsigned char **data, const unsigned char *end) +{ + unsigned int length; + int ch; + int pos; + char *comment; + + length = readLength (data, end); + if (length <= 0) + return NULL; + comment = malloc (length + 1); + pos = 0; + while (length > 0) + { + ch = NEXTC (data, end); + if ((ch == '\r') || (ch == '\n')) + comment[pos++] = '\n'; + else if (isprint (ch)) + comment[pos++] = ch; + length--; + } + comment[pos] = '\0'; + return comment; +} + + +int +EXTRACTOR_jpeg_extract (const unsigned char *data, + size_t size, + EXTRACTOR_MetaDataProcessor proc, + void *proc_cls, + const char *options) +{ + int c1; + int c2; + int marker; + const unsigned char *end; + char *tmp; + char val[128]; + + if (size < 0x12) + return 0; + end = &data[size]; + c1 = NEXTC (&data, end); + c2 = NEXTC (&data, end); + if ((c1 != 0xFF) || (c2 != M_SOI)) + return 0; /* not a JPEG */ + if (0 != proc (proc_cls, + "jpeg", + EXTRACTOR_METATYPE_MIMETYPE, + EXTRACTOR_METAFORMAT_UTF8, + "text/plain", + "image/jpeg", + strlen ("image/jpeg")+1)) + return 1; + while (1) + { + marker = next_marker (&data, end); + switch (marker) + { + case -1: /* end of file */ + case M_SOS: + case M_EOI: + goto RETURN; + case M_APP0: + { + int len = readLength (&data, end); + if (len < 0x8) + goto RETURN; + if (0 == strncmp ((char *) data, "JFIF", 4)) + { + switch (data[0x4]) + { + case 1: /* dots per inch */ + snprintf (val, + sizeof (val), + _("%ux%u dots per inch"), + (data[0x8] << 8) + data[0x9], + (data[0xA] << 8) + data[0xB]); + if (0 != proc (proc_cls, + "jpeg", + EXTRACTOR_METATYPE_IMAGE_RESOLUTION, + EXTRACTOR_METAFORMAT_UTF8, + "text/plain", + val, + strlen (val)+1)) + return 1; + break; + case 2: /* dots per cm */ + snprintf (val, + sizeof (val), + _("%ux%u dots per cm"), + (data[0x8] << 8) + data[0x9], + (data[0xA] << 8) + data[0xB]); + if (0 != proc (proc_cls, + "jpeg", + EXTRACTOR_METATYPE_IMAGE_RESOLUTION, + EXTRACTOR_METAFORMAT_UTF8, + "text/plain", + val, + strlen (val)+1)) + return 1; + break; + case 0: /* no unit given */ + snprintf (val, + sizeof (val), + _("%ux%u dots per inch?"), + (data[0x8] << 8) + data[0x9], + (data[0xA] << 8) + data[0xB]); + if (0 != proc (proc_cls, + "jpeg", + EXTRACTOR_METATYPE_IMAGE_RESOLUTION, + EXTRACTOR_METAFORMAT_UTF8, + "text/plain", + val, + strlen (val)+1)) + return 1; + break; + default: /* unknown unit */ + break; + } + } + data = &data[len]; + break; + } + case 0xC0: + { + int len = readLength (&data, end); + if (len < 0x9) + goto RETURN; + snprintf (val, + sizeof (val), + "%ux%u", + (data[0x3] << 8) + data[0x4], + (data[0x1] << 8) + data[0x2]); + if (0 != proc (proc_cls, + "jpeg", + EXTRACTOR_METATYPE_IMAGE_DIMENSIONS, + EXTRACTOR_METAFORMAT_UTF8, + "text/plain", + val, + strlen (val)+1)) + return 1; + data = &data[len]; + break; + } + case M_COM: + case M_APP12: + tmp = process_COM (&data, end); + if (NULL == tmp) + break; + if (0 != proc (proc_cls, + "jpeg", + EXTRACTOR_METATYPE_COMMENT, + EXTRACTOR_METAFORMAT_UTF8, + "text/plain", + tmp, + strlen (tmp)+1)) + { + free (tmp); + return 1; + } + free (tmp); + break; + default: + skip_variable (&data, end); + break; + } + } +RETURN: + return 0; +} diff --git a/src/plugins/jpegextractor.c b/src/plugins/jpegextractor.c @@ -1,243 +0,0 @@ -/* - This file is part of libextractor. - (C) 2002, 2003, 2004 Vidyut Samanta and Christian Grothoff - - libextractor is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 2, or (at your - option) any later version. - - libextractor is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with libextractor; see the file COPYING. If not, write to the - Free Software Foundation, Inc., 59 Temple Place - Suite 330, - Boston, MA 02111-1307, USA. - */ - -#include "platform.h" -#include "extractor.h" - - -#define M_SOI 0xD8 /* Start Of Image (beginning of datastream) */ -#define M_EOI 0xD9 /* End Of Image (end of datastream) */ -#define M_SOS 0xDA /* Start Of Scan (begins compressed data) */ -#define M_APP12 0xEC -#define M_COM 0xFE /* COMment */ -#define M_APP0 0xE0 - -static EXTRACTOR_KeywordList * -addKeyword (EXTRACTOR_KeywordType type, - char *keyword, EXTRACTOR_KeywordList * next) -{ - EXTRACTOR_KeywordList *result; - - if (keyword == NULL) - return next; - result = malloc (sizeof (EXTRACTOR_KeywordList)); - result->next = next; - result->keyword = keyword; - result->keywordType = type; - return result; -} - -/** - * Get the next character in the sequence and advance - * the pointer *data to the next location in the sequence. - * If we're at the end, return -1. - */ -#define NEXTC(data,end) ((*(data)<(end))?*((*(data))++):-1) - -/* The macro does: -unsigned int NEXTC(unsigned char ** data, char * end) { - if (*data < end) { - char result = **data; - (*data)++; - return result; - } else - return -1; -} -*/ - -/** - * Read length, convert to unsigned int. - * All 2-byte quantities in JPEG markers are MSB first - * @return -1 on error - */ -static int -readLength (unsigned char **data, unsigned char *end) -{ - int c1; - int c2; - - c1 = NEXTC (data, end); - if (c1 == -1) - return -1; - c2 = NEXTC (data, end); - if (c2 == -1) - return -1; - return ((((unsigned int) c1) << 8) + ((unsigned int) c2)) - 2; -} - -/** - * @return the next marker or -1 on error. - */ -static int -next_marker (unsigned char **data, unsigned char *end) -{ - int c; - c = NEXTC (data, end); - while ((c != 0xFF) && (c != -1)) - c = NEXTC (data, end); - do - { - c = NEXTC (data, end); - } - while ((c == 0xFF) && (c != -1)); - return c; -} - -static void -skip_variable (unsigned char **data, unsigned char *end) -{ - int length; - - length = readLength (data, end); - if (length < 0) - { - (*data) = end; /* skip to the end */ - return; - } - /* Skip over length bytes */ - (*data) += length; -} - -static char * -process_COM (unsigned char **data, unsigned char *end) -{ - unsigned int length; - int ch; - int pos; - char *comment; - - length = readLength (data, end); - if (length <= 0) - return NULL; - comment = malloc (length + 1); - pos = 0; - while (length > 0) - { - ch = NEXTC (data, end); - if ((ch == '\r') || (ch == '\n')) - comment[pos++] = '\n'; - else if (isprint (ch)) - comment[pos++] = ch; - length--; - } - comment[pos] = '\0'; - return comment; -} - -struct EXTRACTOR_Keywords * -libextractor_jpeg_extract (const char *filename, - unsigned char *data, - size_t size, struct EXTRACTOR_Keywords *prev) -{ - int c1; - int c2; - int marker; - unsigned char *end; - struct EXTRACTOR_Keywords *result; - - if (size < 0x12) - return prev; - result = prev; - end = &data[size]; - c1 = NEXTC (&data, end); - c2 = NEXTC (&data, end); - if ((c1 != 0xFF) || (c2 != M_SOI)) - return result; /* not a JPEG */ - result = addKeyword (EXTRACTOR_MIMETYPE, strdup ("image/jpeg"), result); - while (1) - { - marker = next_marker (&data, end); - switch (marker) - { - case -1: /* end of file */ - case M_SOS: - case M_EOI: - goto RETURN; /* this used to be "return result", but this - makes certain compilers unhappy... */ - case M_APP0: - { - int len = readLength (&data, end); - if (len < 0x8) - goto RETURN; - if (0 == strncmp ((char *) data, "JFIF", 4)) - { - char *val; - - switch (data[0x4]) - { - case 1: /* dots per inch */ - val = malloc (128); - snprintf (val, 128, - _("%ux%u dots per inch"), - (data[0x8] << 8) + data[0x9], - (data[0xA] << 8) + data[0xB]); - result = addKeyword (EXTRACTOR_RESOLUTION, val, result); - break; - case 2: /* dots per cm */ - val = malloc (128); - snprintf (val, 128, - _("%ux%u dots per cm"), - (data[0x8] << 8) + data[0x9], - (data[0xA] << 8) + data[0xB]); - result = addKeyword (EXTRACTOR_RESOLUTION, val, result); - break; - case 0: /* no unit given */ - val = malloc (128); - snprintf (val, 128, - _("%ux%u dots per inch?"), - (data[0x8] << 8) + data[0x9], - (data[0xA] << 8) + data[0xB]); - result = addKeyword (EXTRACTOR_RESOLUTION, val, result); - break; - default: /* unknown unit */ - break; - } - } - data = &data[len]; - break; - } - case 0xC0: - { - char *val; - int len = readLength (&data, end); - if (len < 0x9) - goto RETURN; - val = malloc (128); - snprintf (val, 128, - "%ux%u", - (data[0x3] << 8) + data[0x4], - (data[0x1] << 8) + data[0x2]); - result = addKeyword (EXTRACTOR_SIZE, val, result); - data = &data[len]; - break; - } - case M_COM: - case M_APP12: - result = addKeyword (EXTRACTOR_COMMENT, - process_COM (&data, end), result); - break; - default: - skip_variable (&data, end); - break; - } - } -RETURN: - return result; -}