libextractor

GNU libextractor
Log | Files | Refs | Submodules | README | LICENSE

commit fc2f142a02125b0f72dcce3eb14cf98b1eaa89aa
parent e2ec7235a685ebc687a30e68a35c20d7749bf425
Author: Christian Grothoff <christian@grothoff.org>
Date:   Thu, 14 Jul 2005 19:48:48 +0000

API updates

Diffstat:
Mdoc/libextractor.3 | 6++++--
Msrc/include/extractor.h | 44++++++++++++++++++++++++++++++++++++++++++++
Msrc/include/plibc.h | 30+++++++++++++++---------------
Msrc/main/extractor.c | 113+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
Msrc/plugins/exiv2/exiv2extractor.cc | 7++-----
Msrc/plugins/filenameextractor.c | 14++++++++------
Msrc/plugins/manextractor.c | 64++++++++++++++++++++++++++++++++++++++++++++++++----------------
Msrc/plugins/mp3extractor.c | 2+-
Msrc/plugins/oggextractor.c | 10+---------
Msrc/plugins/ole2/ole2extractor.c | 16+++++++++-------
Msrc/plugins/oo/ooextractor.c | 99+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----
Msrc/plugins/psextractor.c | 2+-
Msrc/plugins/tarextractor.c | 47++++++++++++++++++++++++++++++++++-------------
Dsrc/plugins/thumbnail/thumbnailextractor-old.c | 253-------------------------------------------------------------------------------
Msrc/plugins/thumbnail/thumbnailextractor.c | 98++++++++++++-------------------------------------------------------------------
15 files changed, 386 insertions(+), 419 deletions(-)

diff --git a/doc/libextractor.3 b/doc/libextractor.3 @@ -1,6 +1,6 @@ -.TH LIBEXTRACTOR 3 "Apr 5, 2005" +.TH LIBEXTRACTOR 3 "Jul 14, 2005" .SH NAME -libextractor \- meta\-information extraction library 0.5.0 +libextractor \- meta\-information extraction library 0.5.2 .SH SYNOPSIS \fB#include <extractor.h> @@ -28,6 +28,8 @@ libextractor \- meta\-information extraction library 0.5.0 \fBEXTRACTOR_KeywordList * EXTRACTOR_getKeywords (EXTRACTOR_ExtractorList * \fIextractor\fB, const char * \fIfilename\fB); + \fBEXTRACTOR_KeywordList * EXTRACTOR_getKeywords (EXTRACTOR_ExtractorList * \fIextractor\fB, const char * \fIdata\fB, size_t \fIsize\fB); + \fBEXTRACTOR_KeywordList * EXTRACTOR_removeEmptyKeywords (EXTRACTOR_KeywordList * \fIlist\fB); \fBEXTRACTOR_KeywordList * EXTRACTOR_removeDuplicateKeywords (EXTRACTOR_KeywordList * \fIlist\fB, const unsigned int \fIoptions\fB); diff --git a/src/include/extractor.h b/src/include/extractor.h @@ -161,6 +161,9 @@ typedef struct EXTRACTOR_Keywords { /** * Signature of the extract method that each plugin * must provide. + * + * @param filename MAYBE NULL (!) + * @param data must not be modified (!) */ typedef EXTRACTOR_KeywordList * (*ExtractMethod)(const char * filename, @@ -273,6 +276,22 @@ EXTRACTOR_getKeywords(EXTRACTOR_ExtractorList * extractor, /** + * Extract keywords from a buffer in memory + * using the available extractors. + * + * @param extractor the list of extractor libraries + * @param data the data of the file + * @param size the number of bytes in data + * @return the list of keywords found in the file, NULL if none + * were found (or other errors) + */ +EXTRACTOR_KeywordList * +EXTRACTOR_getKeywords2(EXTRACTOR_ExtractorList * extractor, + const char * data, + size_t size); + + +/** * Remove duplicate keywords from the list. * @param list the original keyword list (destroyed in the process!) * @param options a set of options (DUPLICATES_XXXX) @@ -339,6 +358,31 @@ const char * EXTRACTOR_extractLastByString(const char * type, unsigned int EXTRACTOR_countKeywords(EXTRACTOR_KeywordList * keywords); +/** + * This function can be used to decode the binary data + * encoded in the libextractor metadata (i.e. for + * the thumbnails). + * + * @param in 0-terminated string from the meta-data + * @return 1 on error, 0 on success + */ +int EXTRACTOR_binaryDecode(const unsigned char * in, + unsigned char ** out, + size_t * outSize); + + +/** + * Encode the given binary data object + * as a 0-terminated C-string according + * to the LE binary data encoding standard. + * + * @return NULL on error, the 0-terminated + * encoding otherwise + */ +char * EXTRACTOR_binaryEncode(const char * data, + size_t size); + + #ifdef __cplusplus } #endif diff --git a/src/include/plibc.h b/src/include/plibc.h @@ -1,20 +1,20 @@ /* - This file is part of PlibC. - (C) 2005 Nils Durner (and other contributing authors) - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. + This file is part of PlibC. + (C) 2005 Nils Durner (and other contributing authors) + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /** diff --git a/src/main/extractor.c b/src/main/extractor.c @@ -647,8 +647,11 @@ EXTRACTOR_getKeywords (EXTRACTOR_ExtractorList * extractor, return NULL; result = NULL; while (extractor != NULL) { - result = extractor->extractMethod (filename, buffer, size, result, - extractor->options); + result = extractor->extractMethod(filename, + buffer, + size, + result, + extractor->options); extractor = extractor->next; } if (size > 0) @@ -903,4 +906,110 @@ EXTRACTOR_countKeywords (EXTRACTOR_KeywordList * keywords) return count; } +/** + * Encode the given binary data object + * as a 0-terminated C-string according + * to the LE binary data encoding standard. + * + * @return NULL on error, the 0-terminated + * encoding otherwise + */ +char * EXTRACTOR_binaryEncode(const char * data, + size_t size) { + + char * binary; + size_t pos; + size_t end; + size_t wpos; + size_t i; + unsigned int markers[8]; /* 256 bits */ + unsigned char marker; + char * format; + + /* encode! */ + binary = malloc(2 + size + (size+256) / 254); + if (binary == NULL) + return NULL; + + pos = 0; + wpos = 0; + while (pos < size) { + /* find unused value between 1 and 255 in + the next 254 bytes */ + end = pos + 254; + if (end < pos) + break; /* integer overflow! */ + if (end > size) + end = size; + memset(markers, + 0, + sizeof(markers)); + for (i=pos;i<end;i++) + markers[data[i]&7] |= 1 << (data[i] >> 3); + marker = 1; + while (markers[marker&7] & (1 << (marker >> 3))) { + marker++; + if (marker == 0) { + /* assertion failed... */ + free(binary); + return NULL; + } + } + /* recode */ + binary[wpos++] = marker; + for (i=pos;i<end;i++) + binary[wpos++] = data[i] == 0 ? marker : data[i]; + pos = end; + } + binary[wpos++] = 0; /* 0-termination! */ + return binary; +} + + +/** + * This function can be used to decode the binary data + * encoded in the libextractor metadata (i.e. for + * the thumbnails). + * + * @param in 0-terminated string from the meta-data + * @return 1 on error, 0 on success + */ +int EXTRACTOR_binaryDecode(const unsigned char * in, + unsigned char ** out, + size_t * outSize) { + unsigned char * buf; + size_t pos; + size_t wpos; + unsigned char marker; + size_t i; + size_t end; + size_t inSize; + + inSize = strlen(in); + if (inSize == 0) { + *out = NULL; + *outSize = 0; + return 1; + } + + buf = malloc(inSize); /* slightly more than needed ;-) */ + *out = buf; + + pos = 0; + wpos = 0; + while (pos < inSize) { + end = pos + 255; /* 255 here: count the marker! */ + if (end > inSize) + end = inSize; + marker = in[pos++]; + for (i=pos;i<end;i++) + buf[wpos++] = (in[i] == marker) ? 0 : in[i]; + pos = end; + } + *outSize = wpos; + return 0; +} + + + /* end of extractor.c */ diff --git a/src/plugins/exiv2/exiv2extractor.cc b/src/plugins/exiv2/exiv2extractor.cc @@ -83,7 +83,7 @@ struct EXTRACTOR_Keywords * addExiv2Tag(const Exiv2::ExifData& exifData, extern "C" { - struct EXTRACTOR_Keywords * libextractor_exiv2_extract(char * filename, + struct EXTRACTOR_Keywords * libextractor_exiv2_extract(const char * filename, unsigned char * data, size_t size, struct EXTRACTOR_Keywords * prev) @@ -91,11 +91,8 @@ extern "C" { struct EXTRACTOR_Keywords * result = 0; try { - if (!Exiv2::fileExists(filename, true)) return result; - - - Exiv2::Image::AutoPtr image = Exiv2::ImageFactory::open(filename); + Exiv2::Image::AutoPtr image = Exiv2::ImageFactory::open(data, size); assert(image.get() != 0); image->readMetadata(); Exiv2::ExifData &exifData = image->exifData(); diff --git a/src/plugins/filenameextractor.c b/src/plugins/filenameextractor.c @@ -24,14 +24,17 @@ /* "extract" the 'filename' as a keyword */ -struct EXTRACTOR_Keywords * libextractor_filename_extract(const char * filename, - char * date, - size_t size, - struct EXTRACTOR_Keywords * prev) { +struct EXTRACTOR_Keywords * +libextractor_filename_extract(const char * filename, + char * date, + size_t size, + struct EXTRACTOR_Keywords * prev) { EXTRACTOR_KeywordList * keyword; const char * filenameRoot = filename; int res; + if (filename == NULL) + return prev; for (res=strlen(filename)-1;res>=0;res--) if (filename[res] == DIR_SEPARATOR) { filenameRoot = &filename[res+1]; @@ -41,8 +44,7 @@ struct EXTRACTOR_Keywords * libextractor_filename_extract(const char * filename, keyword->next = prev; keyword->keyword = convertToUtf8(filenameRoot, strlen(filenameRoot), - nl_langinfo(CODESET) - ); + nl_langinfo(CODESET)); keyword->keywordType = EXTRACTOR_FILENAME; return keyword; } diff --git a/src/plugins/manextractor.c b/src/plugins/manextractor.c @@ -203,31 +203,63 @@ static struct EXTRACTOR_Keywords * tryParse(const char * buf, return prev; } +static voidpf Emalloc(voidpf opaque, uInt items, uInt size) { + return malloc(size * items); +} + +static void Efree(voidpf opaque, voidpf ptr) { + free(ptr); +} + /** * How many bytes do we actually try to scan? (from the beginning * of the file). */ #define MAX_READ 2048 -struct EXTRACTOR_Keywords * libextractor_man_extract(const char * filename, - char * data, - size_t size, - struct EXTRACTOR_Keywords * prev) { - gzFile gz; +struct EXTRACTOR_Keywords * +libextractor_man_extract(const char * filename, + char * data, + size_t size, + struct EXTRACTOR_Keywords * prev) { + z_stream strm; char * buf; int len; - gz = gzopen(filename, "rb"); - buf = malloc(MAX_READ); - len = gzread(gz, buf, MAX_READ); - if (len < 0) { + memset(&strm, + 0, + sizeof(z_stream)); + strm.next_in = (char*) data; + strm.avail_in = size; + strm.total_in = 0; + strm.zalloc = &Emalloc; + strm.zfree = &Efree; + strm.opaque = NULL; + if (Z_OK == inflateInit2(&strm, + 15 + 32)) { + buf = malloc(MAX_READ); + if (buf == NULL) { + inflateEnd(&strm); + return prev; + } + strm.next_out = buf; + strm.avail_out = MAX_READ; + inflate(&strm, + Z_FINISH); + if (strm.total_out > 0) { + prev = tryParse(buf, + strm.total_out, + prev); + inflateEnd(&strm); + free(buf); + return prev; + } free(buf); - gzclose(gz); - return prev; - } - gzclose(gz); - prev = tryParse(buf, len, prev); - free(buf); - return prev; + inflateEnd(&strm); + } + return tryParse(data, + size, + prev); } +/* end of manextractor.c */ diff --git a/src/plugins/mp3extractor.c b/src/plugins/mp3extractor.c @@ -435,7 +435,7 @@ mp3parse(char * data, /* mimetype = audio/mpeg */ struct EXTRACTOR_Keywords * -libextractor_mp3_extract(char * filename, +libextractor_mp3_extract(const char * filename, char * data, size_t size, struct EXTRACTOR_Keywords * klist) { diff --git a/src/plugins/oggextractor.c b/src/plugins/oggextractor.c @@ -74,7 +74,7 @@ static long tellError(void * datasource) { } /* mimetype = application/ogg */ -struct EXTRACTOR_Keywords * libextractor_ogg_extract(char * filename, +struct EXTRACTOR_Keywords * libextractor_ogg_extract(const char * filename, char * data, size_t size, struct EXTRACTOR_Keywords * prev) { @@ -94,20 +94,12 @@ struct EXTRACTOR_Keywords * libextractor_ogg_extract(char * filename, callbacks.close_func = &closeOk; callbacks.tell_func = &tellError; if (0 != ov_open_callbacks(NULL, &vf, data, size, callbacks)) { -#if DEBUG_EXTRACT_OGG - fprintf(stderr,"\nError opening file %s as ogg\n",filename); -#endif ov_clear(&vf); return prev; } comments = ov_comment(&vf, -1); if (NULL == comments) { -#if DEBUG_EXTRACT_OGG - fprintf(stderr, - "\nError decoding ogg information of %s, ignoring.\n", - filename); -#endif ov_clear(&vf); return prev; } diff --git a/src/plugins/ole2/ole2extractor.c b/src/plugins/ole2/ole2extractor.c @@ -44,9 +44,10 @@ void __attribute__ ((constructor)) ole_gobject_init(void) { g_type_init(); } -static struct EXTRACTOR_Keywords * addKeyword(EXTRACTOR_KeywordList *oldhead, - const char *phrase, - EXTRACTOR_KeywordType type) { +static struct EXTRACTOR_Keywords * +addKeyword(EXTRACTOR_KeywordList *oldhead, + const char *phrase, + EXTRACTOR_KeywordType type) { EXTRACTOR_KeywordList * keyword; if (strlen(phrase) == 0) @@ -917,10 +918,11 @@ static struct EXTRACTOR_Keywords * processSO(GsfInput * src, return prev; } -struct EXTRACTOR_Keywords * libextractor_ole2_extract(const char * filename, - char * date, - size_t size, - struct EXTRACTOR_Keywords * prev) { +struct EXTRACTOR_Keywords * +libextractor_ole2_extract(const char * filename, + char * date, + size_t size, + struct EXTRACTOR_Keywords * prev) { GsfInput *input; GsfInfile *infile; int i; diff --git a/src/plugins/oo/ooextractor.c b/src/plugins/oo/ooextractor.c @@ -21,6 +21,7 @@ #include "platform.h" #include "extractor.h" #include "unzip.h" +#include "ioapi.h" #define CASESENSITIVITY (0) #define MAXFILENAME (256) @@ -68,11 +69,85 @@ static Matches tmap[] = { { NULL, 0 }, }; +typedef struct Ecls { + char * data; + size_t size; + size_t pos; +} Ecls; -struct EXTRACTOR_Keywords * libextractor_oo_extract(const char * filename, - char * data, - size_t size, - struct EXTRACTOR_Keywords * prev) { +static voidpf Eopen_file_func (voidpf opaque, + const char* filename, + int mode) { + if (0 == strcmp(filename, + "ERROR")) + return opaque; + else + return NULL; +} +static uLong Eread_file_func(voidpf opaque, + voidpf stream, + void* buf, + uLong size) { + Ecls * e = opaque; + uLong ret; + + ret = e->size - e->pos; + if (ret > size) + ret = size; + memcpy(buf, + e->data, + ret); + return ret; +} + +static long Etell_file_func(voidpf opaque, + voidpf stream) { + Ecls * e = opaque; + return e->pos; +} + +static long Eseek_file_func(voidpf opaque, + voidpf stream, + uLong offset, + int origin) { + Ecls * e = opaque; + + switch (origin) { + case ZLIB_FILEFUNC_SEEK_SET: + e->pos = offset; + break; + case ZLIB_FILEFUNC_SEEK_END: + if (offset > e->size) + return -1; + e->pos = e->size - offset; + break; + case ZLIB_FILEFUNC_SEEK_CUR: + if (offset < - e->pos) + return -1; + e->pos += offset; + break; + default: + return -1; + } + return e->pos; +} + +static int Eclose_file_func(voidpf opaque, + voidpf stream) { + Ecls * e = opaque; + return 0; +} +static int Etesterror_file_func(voidpf opaque, + voidpf stream) { + return 0; +} + + +struct EXTRACTOR_Keywords * +libextractor_oo_extract(const char * filename, + char * data, + size_t size, + struct EXTRACTOR_Keywords * prev) { char filename_inzip[MAXFILENAME]; unzFile uf; unz_file_info file_info; @@ -80,13 +155,27 @@ struct EXTRACTOR_Keywords * libextractor_oo_extract(const char * filename, char * pbuf; size_t buf_size; int i; + zlib_filefunc_def io; + Ecls cls; if (size < 100) return prev; if ( !( ('P'==data[0]) && ('K'==data[1]) && (0x03==data[2]) && (0x04==data[3])) ) return prev; - uf = unzOpen(filename); + cls.data = data; + cls.size = size; + cls.pos = 0; + io.zopen_file = &Eopen_file_func; + io.zread_file = &Eread_file_func; + io.zwrite_file = NULL; + io.ztell_file = &Etell_file_func; + io.zseek_file = &Eseek_file_func; + io.zclose_file = &Eclose_file_func; + io.zerror_file = &Etesterror_file_func; + io.opaque = &cls; + + uf = unzOpen2("ERROR", &io); if (uf == NULL) return prev; diff --git a/src/plugins/psextractor.c b/src/plugins/psextractor.c @@ -142,7 +142,7 @@ static char * blacklist[] = { }; /* mimetype = application/postscript */ -struct EXTRACTOR_Keywords * libextractor_ps_extract(char * filename, +struct EXTRACTOR_Keywords * libextractor_ps_extract(const char * filename, char * data, size_t size, struct EXTRACTOR_Keywords * prev) { diff --git a/src/plugins/tarextractor.c b/src/plugins/tarextractor.c @@ -1,6 +1,6 @@ /* This file is part of libextractor. - (C) 2002, 2003, 2004 Vidyut Samanta and Christian Grothoff + (C) 2002, 2003, 2004, 2005 Vidyut Samanta and Christian Grothoff libextractor is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published @@ -136,20 +136,29 @@ tar_extract(const char * data, return prev; } +static voidpf Emalloc(voidpf opaque, uInt items, uInt size) { + return malloc(size * items); +} + +static void Efree(voidpf opaque, voidpf ptr) { + free(ptr); +} + /* do not decompress tar.gz files > 16 MB */ #define MAX_TGZ_SIZE 16 * 1024 * 1024 -struct EXTRACTOR_Keywords * libextractor_tar_extract(const char * filename, - const unsigned char * data, - size_t size, - struct EXTRACTOR_Keywords * prev) { +struct EXTRACTOR_Keywords * +libextractor_tar_extract(const char * filename, + const unsigned char * data, + size_t size, + struct EXTRACTOR_Keywords * prev) { if ( (data[0] == 0x1f) && (data[1] == 0x8b) && (data[2] == 0x08) ) { time_t ctime; char * buf; size_t bufSize; - gzFile gzf; + z_stream strm; /* Creation time */ ctime = (((((( (unsigned int)data[7] << 8) @@ -172,21 +181,33 @@ struct EXTRACTOR_Keywords * libextractor_tar_extract(const char * filename, if (bufSize > MAX_TGZ_SIZE) { return prev; } - gzf = gzopen(filename, "rb"); - if (gzf == NULL) { + + memset(&strm, 0, sizeof(z_stream)); + strm.next_in = (char*) data; + strm.avail_in = size; + strm.total_in = 0; + strm.zalloc = &Emalloc; + strm.zfree = &Efree; + strm.opaque = NULL; + if (Z_OK != inflateInit2(&strm, + 15 + 32)) return prev; - } buf = malloc(bufSize); if (buf == NULL) { - gzclose(gzf); + inflateEnd(&strm); return prev; } - if (bufSize != gzread(gzf, buf, bufSize)) { + strm.next_out = buf; + strm.avail_out = bufSize; + inflate(&strm, + Z_FINISH); + if (strm.total_out == 0) { + inflateEnd(&strm); free(buf); - gzclose(gzf); return prev; } - gzclose(gzf); + bufSize = strm.total_out; + inflateEnd(&strm); prev = tar_extract(buf, bufSize, prev); free(buf); return prev; diff --git a/src/plugins/thumbnail/thumbnailextractor-old.c b/src/plugins/thumbnail/thumbnailextractor-old.c @@ -1,253 +0,0 @@ -/* - This file is part of libextractor. - (C) 2005 Vidyut Samanta and Christian Grothoff - - libextractor is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 2, or (at your - option) any later version. - - libextractor is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with libextractor; see the file COPYING. If not, write to the - Free Software Foundation, Inc., 59 Temple Place - Suite 330, - Boston, MA 02111-1307, USA. - */ - -/** - * @file thumbnailextractor.c - * @author Christian Grothoff - * @brief this extractor produces a binary (!) encoded - * thumbnail of images (using imagemagick). The bottom - * of the file includes a decoder method that can be used - * to reproduce the 128x128 PNG thumbnails. - */ - -#include "platform.h" -#include "extractor.h" -#include <wand/magick_wand.h> - -#define THUMBSIZE 128 - -static EXTRACTOR_KeywordList * addKeyword(EXTRACTOR_KeywordType type, - char * keyword, - EXTRACTOR_KeywordList * next) { - EXTRACTOR_KeywordList * result; - - if (keyword == NULL) - return next; - result = malloc(sizeof(EXTRACTOR_KeywordList)); - result->next = next; - result->keyword = keyword; - result->keywordType = type; - return result; -} - - -/* which mime-types maybe subjected to - the thumbnail extractor (ImageMagick - crashes and/or prints errors for bad - formats, so we need to be rather - conservative here) */ -static char * whitelist[] = { - "image/jpeg", - "image/gif", - "image/miff", - "image/mng", - "image/png", - "image/tiff", - "image/x-bmp", - "image/x-mng", - "image/x-png", - "image/x-xpm", - "image/xcf", - NULL, -}; - -struct EXTRACTOR_Keywords * libextractor_thumbnail_extract(const char * filename, - const char * data, - size_t size, - struct EXTRACTOR_Keywords * prev) { - MagickBooleanType status; - MagickWand * magick_wand; - size_t length; - char * thumb; - unsigned long width; - unsigned long height; - char * binary; - size_t pos; - size_t end; - size_t wpos; - size_t i; - unsigned int markers[8]; /* 256 bits */ - unsigned char marker; - const char * mime; - int j; - - /* if the mime-type of the file is not whitelisted - do not run the thumbnail extactor! */ - mime = EXTRACTOR_extractLast(EXTRACTOR_MIMETYPE, - prev); - if (mime == NULL) - return prev; - j = 0; - while (whitelist[j] != NULL) { - if (0 == strcmp(whitelist[j], mime)) - break; - j++; - } - if (whitelist[j] == NULL) - return prev; - - magick_wand = NewMagickWand(); - status = MagickReadImageBlob(magick_wand, data, size); - if (status == MagickFalse) { - DestroyMagickWand(magick_wand); - return prev; - } - MagickResetIterator(magick_wand); - if (MagickNextImage(magick_wand) == MagickFalse) - return prev; - - height = MagickGetImageHeight(magick_wand); - width = MagickGetImageWidth(magick_wand); - if (height == 0) - height = 1; - if (width == 0) - width = 1; - if ( (height <= THUMBSIZE) && - (width <= THUMBSIZE) ) { - DestroyMagickWand(magick_wand); - return prev; - } - - - if (height > THUMBSIZE) { - width = width * THUMBSIZE / height; - height = THUMBSIZE; - } - if (width > THUMBSIZE) { - height = height * THUMBSIZE / width; - width = THUMBSIZE; - } - MagickResizeImage(magick_wand, height, width, LanczosFilter, 1.0); - MagickSetImageDepth(magick_wand, - 8); - MagickSetImageChannelDepth(magick_wand, - RedChannel, - 2); - MagickCommentImage(magick_wand, ""); - MagickSetImageChannelDepth(magick_wand, - GreenChannel, - 2); - MagickSetImageChannelDepth(magick_wand, - BlueChannel, - 2); - MagickSetImageChannelDepth(magick_wand, - OpacityChannel, - 2); - MagickSetImageInterlaceScheme(magick_wand, - NoInterlace); - - if (MagickFalse == MagickSetImageFormat(magick_wand, "png")) { - DestroyMagickWand(magick_wand); - return prev; - } - thumb = MagickGetImageBlob(magick_wand, &length); - DestroyMagickWand(magick_wand); - if (thumb == NULL) - return prev; - - - /* encode! */ - binary = malloc(2 + length + (length+256) / 254); - if (binary == NULL) - return prev; - - pos = 0; - wpos = 0; - while (pos < length) { - /* find unused value between 1 and 255 in - the next 254 bytes */ - end = pos + 254; - if (end < pos) - break; /* integer overflow! */ - if (end > length) - end = length; - memset(markers, 0, sizeof(markers)); - for (i=pos;i<end;i++) - markers[thumb[i]&7] |= 1 << (thumb[i] >> 3); - marker = 1; - while (markers[marker&7] & (1 << (marker >> 3))) { - marker++; - if (marker == 0) { - /* assertion failed... */ - free(binary); - free(thumb); - return prev; - } - } - /* recode */ - binary[wpos++] = marker; - for (i=pos;i<end;i++) - binary[wpos++] = thumb[i] == 0 ? marker : thumb[i]; - pos = end; - } - binary[wpos++] = 0; /* 0-termination! */ - free(thumb); - return addKeyword(EXTRACTOR_THUMBNAIL_DATA, - binary, - prev); -} - -#if 0 - -/** - * This function can be used to decode the binary data - * stream produced by the thumbnailextractor. - * - * @param in 0-terminated string from the meta-data - * @return 1 on error, 0 on success - */ -int decodeThumbnail(const unsigned char * in, - unsigned char ** out, - size_t * outSize) { - unsigned char * buf; - size_t pos; - size_t wpos; - unsigned char marker; - size_t i; - size_t end; - size_t inSize; - - inSize = strlen(in); - if (inSize == 0) { - *out = NULL; - *outSize = 0; - return 1; - } - - buf = malloc(inSize); /* slightly more than needed ;-) */ - *out = buf; - - pos = 0; - wpos = 0; - while (pos < inSize) { - end = pos + 255; /* 255 here: count the marker! */ - if (end > inSize) - end = inSize; - marker = in[pos++]; - for (i=pos;i<end;i++) - buf[wpos++] = (in[i] == marker) ? 0 : in[i]; - pos = end; - } - *outSize = wpos; - return 0; -} - - -#endif diff --git a/src/plugins/thumbnail/thumbnailextractor.c b/src/plugins/thumbnail/thumbnailextractor.c @@ -79,6 +79,7 @@ struct EXTRACTOR_Keywords * libextractor_thumbnail_extract(const char * filename const char * data, size_t size, struct EXTRACTOR_Keywords * prev) { + GdkPixbufLoader * loader; GdkPixbuf * in; GdkPixbuf * out; size_t length; @@ -112,8 +113,14 @@ struct EXTRACTOR_Keywords * libextractor_thumbnail_extract(const char * filename if (whitelist[j] == NULL) return prev; - in = gdk_pixbuf_new_from_file(filename, - &err); + loader = gdk_pixbuf_loader_new(); + gdk_pixbuf_loader_write(loader, + data, + size, + NULL); + in = gdk_pixbuf_loader_get_pixbuf(loader); + gdk_pixbuf_loader_close(loader, + NULL); if (in == NULL) return prev; height = gdk_pixbuf_get_height(in); @@ -164,92 +171,15 @@ struct EXTRACTOR_Keywords * libextractor_thumbnail_extract(const char * filename if (thumb == NULL) return prev; - - /* encode! */ - binary = malloc(2 + length + (length+256) / 254); + binary + = EXTRACTOR_binaryEncode(thumb, + length); + FREE(thumb); if (binary == NULL) return prev; - - pos = 0; - wpos = 0; - while (pos < length) { - /* find unused value between 1 and 255 in - the next 254 bytes */ - end = pos + 254; - if (end < pos) - break; /* integer overflow! */ - if (end > length) - end = length; - memset(markers, 0, sizeof(markers)); - for (i=pos;i<end;i++) - markers[thumb[i]&7] |= 1 << (thumb[i] >> 3); - marker = 1; - while (markers[marker&7] & (1 << (marker >> 3))) { - marker++; - if (marker == 0) { - /* assertion failed... */ - free(binary); - free(thumb); - return prev; - } - } - /* recode */ - binary[wpos++] = marker; - for (i=pos;i<end;i++) - binary[wpos++] = thumb[i] == 0 ? marker : thumb[i]; - pos = end; - } - binary[wpos++] = 0; /* 0-termination! */ - free(thumb); return addKeyword(EXTRACTOR_THUMBNAIL_DATA, binary, prev); } -#if 0 - -/** - * This function can be used to decode the binary data - * stream produced by the thumbnailextractor. - * - * @param in 0-terminated string from the meta-data - * @return 1 on error, 0 on success - */ -int decodeThumbnail(const unsigned char * in, - unsigned char ** out, - size_t * outSize) { - unsigned char * buf; - size_t pos; - size_t wpos; - unsigned char marker; - size_t i; - size_t end; - size_t inSize; - - inSize = strlen(in); - if (inSize == 0) { - *out = NULL; - *outSize = 0; - return 1; - } - - buf = malloc(inSize); /* slightly more than needed ;-) */ - *out = buf; - - pos = 0; - wpos = 0; - while (pos < inSize) { - end = pos + 255; /* 255 here: count the marker! */ - if (end > inSize) - end = inSize; - marker = in[pos++]; - for (i=pos;i<end;i++) - buf[wpos++] = (in[i] == marker) ? 0 : in[i]; - pos = end; - } - *outSize = wpos; - return 0; -} - - -#endif +/* end of thumbnailextractor.c */