libextractor

GNU libextractor
Log | Files | Refs | Submodules | README | LICENSE

commit d196e141f9259dad2812a7cb3a0dfb9471f34810
parent 1401a8851546bcd9989beaf3304d4223c7fb2e84
Author: Christian Grothoff <christian@grothoff.org>
Date:   Thu,  8 Sep 2005 04:46:11 +0000

release

Diffstat:
MChangeLog | 7+++++++
Mconfigure.ac | 11++++++++---
Mcontrib/doxygen | 2+-
Mpo/de.po | 28++++++++++++++--------------
Mpo/libextractor.pot | 28++++++++++++++--------------
Mpo/ro.po | 28++++++++++++++--------------
Mpo/rw.po | 28++++++++++++++--------------
Msrc/include/extractor.h | 2+-
Msrc/main/Makefile.am | 9++++++++-
Msrc/main/extractor.c | 213++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------
Msrc/plugins/Makefile.am | 3---
Msrc/plugins/manextractor.c | 76+++++++++++++++-------------------------------------------------------------
Msrc/plugins/printable/bloomfilter.c | 40----------------------------------------
Msrc/plugins/printable/dictionary-builder.c | 19+++++++++++++++++++
Msrc/plugins/printable/printableextractor.c | 22++++++++++++++++++++++
Msrc/plugins/tarextractor.c | 93+++++--------------------------------------------------------------------------
16 files changed, 332 insertions(+), 277 deletions(-)

diff --git a/ChangeLog b/ChangeLog @@ -1,3 +1,10 @@ +Wed Sep 7 21:41:35 PDT 2005 + Added decompression of gz and bz2 streams to the LE core library + (avoids need to do this, possibly repeatedly, in plugins and makes + sure that all plugins work with compressed files). Eliminated gz + decompression from man and tar extractors. + Releasing libextractor 0.5.5. + Sun Sep 4 02:08:56 PDT 2005 Changed code to export fewer symbols (refactoring plus linker options, goal is to address Mantis #925. Changed debian extractor to no longer diff --git a/configure.ac b/configure.ac @@ -1,8 +1,8 @@ # Process this file with autoconf to produce a configure script. AC_PREREQ(2.57) -AC_INIT([libextractor], [0.5.4a], [bug-libextractor@gnu.org]) +AC_INIT([libextractor], [0.5.5], [bug-libextractor@gnu.org]) AC_REVISION($Revision: 1.67 $) -AM_INIT_AUTOMAKE([libextractor], [0.5.4a]) +AM_INIT_AUTOMAKE([libextractor], [0.5.5]) AM_CONFIG_HEADER(src/include/config.h) AH_TOP([#define _GNU_SOURCE 1]) @@ -132,6 +132,11 @@ AC_CHECK_LIB(z, inflate, AC_DEFINE(HAVE_ZLIB,1,[Have zlib])], [AM_CONDITIONAL(HAVE_ZLIB, false)]) +AC_CHECK_LIB(bz2, BZ2_decompress, + [AM_CONDITIONAL(HAVE_BZ2, true) + AC_DEFINE(HAVE_LIBBZ2,1,[Have libbz2])], + [AM_CONDITIONAL(HAVE_BZ2, false)]) + # restore LIBS LIBS=$LIBSOLD @@ -156,7 +161,7 @@ AC_HEADER_STDC AC_HEADER_DIRENT AC_HEADER_STDBOOL AC_CHECK_HEADERS([fcntl.h netinet/in.h stdlib.h string.h unistd.h libintl.h limits.h stddef.h zlib.h]) -AC_CHECK_HEADERS([ltdl.h iconv.h]) +AC_CHECK_HEADERS([ltdl.h iconv.h bzlib.h]) # fixme, we need to die here if a header is not found! AC_CHECK_HEADERS([vorbis/vorbisfile.h]) diff --git a/contrib/doxygen b/contrib/doxygen @@ -23,7 +23,7 @@ PROJECT_NAME = libextractor # This could be handy for archiving the generated documentation or # if some version control system is used. -PROJECT_NUMBER = 0.5.2 +PROJECT_NUMBER = 0.5.5 # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) # base path where the generated documentation will be put. diff --git a/po/de.po b/po/de.po @@ -9,7 +9,7 @@ msgid "" msgstr "" "Project-Id-Version: libextractor 0.5.0\n" "Report-Msgid-Bugs-To: libextractor@gnu.org\n" -"POT-Creation-Date: 2005-09-04 02:59-0700\n" +"POT-Creation-Date: 2005-09-07 21:46-0700\n" "PO-Revision-Date: 2005-06-22 15:05+0200\n" "Last-Translator: Karl Eichwalder <ke@gnu.franken.de>\n" "Language-Team: German <de@li.org>\n" @@ -28,7 +28,7 @@ msgstr "Quell-RPM %d.%d" msgid "Binary RPM %d.%d" msgstr "Binäres RPM %d.%d" -#: src/plugins/printable/dictionary-builder.c:50 +#: src/plugins/printable/dictionary-builder.c:69 #, c-format msgid "" "Please provide the name of the language you are building\n" @@ -37,12 +37,12 @@ msgstr "" "Bitte geben Sie den Namen der Sprache an, für die Sie ein Wörterbuch\n" "erstellen. Zum Beispiel:\n" -#: src/plugins/printable/dictionary-builder.c:63 +#: src/plugins/printable/dictionary-builder.c:82 #, c-format msgid "Error opening file `%s': %s\n" msgstr "Fehler beim Öffnen der Datei »%s«: %s\n" -#: src/plugins/printable/dictionary-builder.c:71 +#: src/plugins/printable/dictionary-builder.c:90 #, c-format msgid "" "Error allocating: %s\n" @@ -51,44 +51,44 @@ msgstr "" "Fehler beim Allokieren: %s\n" "." -#: src/plugins/printable/dictionary-builder.c:83 +#: src/plugins/printable/dictionary-builder.c:102 #, c-format msgid "Increase ALLOCSIZE (in %s).\n" msgstr "ALLOCSIZE vergrößern (in %s).\n" -#: src/plugins/manextractor.c:128 +#: src/plugins/manextractor.c:140 msgid "Commands" msgstr "Befehle" -#: src/plugins/manextractor.c:133 +#: src/plugins/manextractor.c:145 msgid "System calls" msgstr "Systemaufrufe" -#: src/plugins/manextractor.c:138 +#: src/plugins/manextractor.c:150 msgid "Library calls" msgstr "Bibliotheksaufrufe" -#: src/plugins/manextractor.c:143 +#: src/plugins/manextractor.c:155 msgid "Special files" msgstr "Spezialdateien" -#: src/plugins/manextractor.c:148 +#: src/plugins/manextractor.c:160 msgid "File formats and conventions" msgstr "" -#: src/plugins/manextractor.c:153 +#: src/plugins/manextractor.c:165 msgid "Games" msgstr "Spiele" -#: src/plugins/manextractor.c:158 +#: src/plugins/manextractor.c:170 msgid "Conventions and miscellaneous" msgstr "" -#: src/plugins/manextractor.c:163 +#: src/plugins/manextractor.c:175 msgid "System management commands" msgstr "" -#: src/plugins/manextractor.c:168 +#: src/plugins/manextractor.c:180 msgid "Kernel routines" msgstr "Kernelroutinen" diff --git a/po/libextractor.pot b/po/libextractor.pot @@ -8,7 +8,7 @@ msgid "" msgstr "" "Project-Id-Version: PACKAGE VERSION\n" "Report-Msgid-Bugs-To: libextractor@gnu.org\n" -"POT-Creation-Date: 2005-09-04 02:59-0700\n" +"POT-Creation-Date: 2005-09-07 21:46-0700\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n" "Language-Team: LANGUAGE <LL@li.org>\n" @@ -26,63 +26,63 @@ msgstr "" msgid "Binary RPM %d.%d" msgstr "" -#: src/plugins/printable/dictionary-builder.c:50 +#: src/plugins/printable/dictionary-builder.c:69 #, c-format msgid "" "Please provide the name of the language you are building\n" "a dictionary for. For example:\n" msgstr "" -#: src/plugins/printable/dictionary-builder.c:63 +#: src/plugins/printable/dictionary-builder.c:82 #, c-format msgid "Error opening file `%s': %s\n" msgstr "" -#: src/plugins/printable/dictionary-builder.c:71 +#: src/plugins/printable/dictionary-builder.c:90 #, c-format msgid "" "Error allocating: %s\n" "." msgstr "" -#: src/plugins/printable/dictionary-builder.c:83 +#: src/plugins/printable/dictionary-builder.c:102 #, c-format msgid "Increase ALLOCSIZE (in %s).\n" msgstr "" -#: src/plugins/manextractor.c:128 +#: src/plugins/manextractor.c:140 msgid "Commands" msgstr "" -#: src/plugins/manextractor.c:133 +#: src/plugins/manextractor.c:145 msgid "System calls" msgstr "" -#: src/plugins/manextractor.c:138 +#: src/plugins/manextractor.c:150 msgid "Library calls" msgstr "" -#: src/plugins/manextractor.c:143 +#: src/plugins/manextractor.c:155 msgid "Special files" msgstr "" -#: src/plugins/manextractor.c:148 +#: src/plugins/manextractor.c:160 msgid "File formats and conventions" msgstr "" -#: src/plugins/manextractor.c:153 +#: src/plugins/manextractor.c:165 msgid "Games" msgstr "" -#: src/plugins/manextractor.c:158 +#: src/plugins/manextractor.c:170 msgid "Conventions and miscellaneous" msgstr "" -#: src/plugins/manextractor.c:163 +#: src/plugins/manextractor.c:175 msgid "System management commands" msgstr "" -#: src/plugins/manextractor.c:168 +#: src/plugins/manextractor.c:180 msgid "Kernel routines" msgstr "" diff --git a/po/ro.po b/po/ro.po @@ -9,7 +9,7 @@ msgid "" msgstr "" "Project-Id-Version: libextractor 0.4.2\n" "Report-Msgid-Bugs-To: libextractor@gnu.org\n" -"POT-Creation-Date: 2005-09-04 02:59-0700\n" +"POT-Creation-Date: 2005-09-07 21:46-0700\n" "PO-Revision-Date: 2005-02-25 12:00-0500\n" "Last-Translator: Laurentiu Buzdugan <lbuz@rolix.org>\n" "Language-Team: Romanian <translation-team-ro@lists.sourceforge.net>\n" @@ -28,7 +28,7 @@ msgstr "Surs msgid "Binary RPM %d.%d" msgstr "Binar RPM %d.%d" -#: src/plugins/printable/dictionary-builder.c:50 +#: src/plugins/printable/dictionary-builder.c:69 #, c-format msgid "" "Please provide the name of the language you are building\n" @@ -37,12 +37,12 @@ msgstr "" "Vã rugãm furnizaþi numele limbii pentru care contruiþi\n" "un dicþionar. De exemplu:\n" -#: src/plugins/printable/dictionary-builder.c:63 +#: src/plugins/printable/dictionary-builder.c:82 #, c-format msgid "Error opening file `%s': %s\n" msgstr "Eroare deschidere fiºier `%s': %s\n" -#: src/plugins/printable/dictionary-builder.c:71 +#: src/plugins/printable/dictionary-builder.c:90 #, c-format msgid "" "Error allocating: %s\n" @@ -51,44 +51,44 @@ msgstr "" "Eroare de alocare: %s\n" "." -#: src/plugins/printable/dictionary-builder.c:83 +#: src/plugins/printable/dictionary-builder.c:102 #, c-format msgid "Increase ALLOCSIZE (in %s).\n" msgstr "Creºteþi ALLOCSIZE (în %s).\n" -#: src/plugins/manextractor.c:128 +#: src/plugins/manextractor.c:140 msgid "Commands" msgstr "Comenzi" -#: src/plugins/manextractor.c:133 +#: src/plugins/manextractor.c:145 msgid "System calls" msgstr "Apeluri sistem" -#: src/plugins/manextractor.c:138 +#: src/plugins/manextractor.c:150 msgid "Library calls" msgstr "Apeluri de bibliotecã" -#: src/plugins/manextractor.c:143 +#: src/plugins/manextractor.c:155 msgid "Special files" msgstr "Fiºiere speciale" -#: src/plugins/manextractor.c:148 +#: src/plugins/manextractor.c:160 msgid "File formats and conventions" msgstr "Formate de fiºiere ºi convenþii" -#: src/plugins/manextractor.c:153 +#: src/plugins/manextractor.c:165 msgid "Games" msgstr "Jocuri" -#: src/plugins/manextractor.c:158 +#: src/plugins/manextractor.c:170 msgid "Conventions and miscellaneous" msgstr "Convenþii ºi diverse" -#: src/plugins/manextractor.c:163 +#: src/plugins/manextractor.c:175 msgid "System management commands" msgstr "Comenzi pentru managementul sistemului" -#: src/plugins/manextractor.c:168 +#: src/plugins/manextractor.c:180 msgid "Kernel routines" msgstr "Proceduri kernel" diff --git a/po/rw.po b/po/rw.po @@ -16,7 +16,7 @@ msgid "" msgstr "" "Project-Id-Version: libextractor 0.4.2\n" "Report-Msgid-Bugs-To: libextractor@gnu.org\n" -"POT-Creation-Date: 2005-09-04 02:59-0700\n" +"POT-Creation-Date: 2005-09-07 21:46-0700\n" "PO-Revision-Date: 2005-04-04 10:55-0700\n" "Last-Translator: Steven Michael Murphy <murf@e-tools.com>\n" "Language-Team: Kinyarwanda <translation-team-rw@lists.sourceforge.net>\n" @@ -34,7 +34,7 @@ msgstr "" msgid "Binary RPM %d.%d" msgstr "" -#: src/plugins/printable/dictionary-builder.c:50 +#: src/plugins/printable/dictionary-builder.c:69 #, fuzzy, c-format msgid "" "Please provide the name of the language you are building\n" @@ -42,62 +42,62 @@ msgid "" msgstr "i Izina: Bya i Ururimi Inkoranyamagambo kugirango Urugero" # basctl/source\basicide\basidesh.src:RID_STR_ERROROPENSTORAGE.text -#: src/plugins/printable/dictionary-builder.c:63 +#: src/plugins/printable/dictionary-builder.c:82 #, fuzzy, c-format msgid "Error opening file `%s': %s\n" msgstr "Hari ikibazo mu gufungura dosiye" -#: src/plugins/printable/dictionary-builder.c:71 +#: src/plugins/printable/dictionary-builder.c:90 #, c-format msgid "" "Error allocating: %s\n" "." msgstr "" -#: src/plugins/printable/dictionary-builder.c:83 +#: src/plugins/printable/dictionary-builder.c:102 #, fuzzy, c-format msgid "Increase ALLOCSIZE (in %s).\n" msgstr "in" -#: src/plugins/manextractor.c:128 +#: src/plugins/manextractor.c:140 msgid "Commands" msgstr "amabwiriza" -#: src/plugins/manextractor.c:133 +#: src/plugins/manextractor.c:145 #, fuzzy msgid "System calls" msgstr "Amahamagara:" -#: src/plugins/manextractor.c:138 +#: src/plugins/manextractor.c:150 #, fuzzy msgid "Library calls" msgstr "Amahamagara:" -#: src/plugins/manextractor.c:143 +#: src/plugins/manextractor.c:155 #, fuzzy msgid "Special files" msgstr "Idosiye" -#: src/plugins/manextractor.c:148 +#: src/plugins/manextractor.c:160 #, fuzzy msgid "File formats and conventions" msgstr "Idosiye Imiterere Na" -#: src/plugins/manextractor.c:153 +#: src/plugins/manextractor.c:165 msgid "Games" msgstr "" -#: src/plugins/manextractor.c:158 +#: src/plugins/manextractor.c:170 #, fuzzy msgid "Conventions and miscellaneous" msgstr "Na Binyuranye" -#: src/plugins/manextractor.c:163 +#: src/plugins/manextractor.c:175 #, fuzzy msgid "System management commands" msgstr "Amabwiriza" -#: src/plugins/manextractor.c:168 +#: src/plugins/manextractor.c:180 msgid "Kernel routines" msgstr "" diff --git a/src/include/extractor.h b/src/include/extractor.h @@ -29,7 +29,7 @@ extern "C" { * 0.2.6-1 => 0x00020601 * 4.5.2-0 => 0x04050200 */ -#define EXTRACTOR_VERSION 0x00050401 +#define EXTRACTOR_VERSION 0x00050500 #include <stdio.h> diff --git a/src/main/Makefile.am b/src/main/Makefile.am @@ -23,10 +23,17 @@ if !MINGW dlflag=-ldl endif +#if HAVE_ZLIB + zlib =-lz +#endif +#if HAVE_BZ2 + bz2lib = -lbz2 +#endif + libextractor_la_LDFLAGS = \ -export-dynamic -version-info 2:0:1 $(LIBICONV) libextractor_la_LIBADD = \ - $(LIBLTDL) $(dlflag) + $(LIBLTDL) $(dlflag) $(zlib) $(bz2lib) libextractor_la_DEPENDENCIES = \ $(LIBLTDL) diff --git a/src/main/extractor.c b/src/main/extractor.c @@ -27,6 +27,13 @@ #include <../../libltdl/ltdl.h> #endif +#if HAVE_LIBBZ2 +#include <bzlib.h> +#endif + +#if HAVE_ZLIB +#include <zlib.h> +#endif #define DEBUG 1 @@ -613,6 +620,179 @@ EXTRACTOR_removeAll (EXTRACTOR_ExtractorList * libraries) libraries = EXTRACTOR_removeLibrary (libraries, libraries->libname); } + + +/** + * How many bytes do we actually try to scan? (from the beginning + * of the file). Limit to 1 GB. + */ +#define MAX_READ 1024 * 1024 * 1024 + +/** + * How many bytes do we actually try to decompress? (from the beginning + * of the file). Limit to 16 MB. + */ +#define MAX_DECOMPRESS 16 * 1024 * 1024 + + +static EXTRACTOR_KeywordList * +getKeywords (EXTRACTOR_ExtractorList * extractor, + const char * filename, + const unsigned char * data, + size_t size) { + EXTRACTOR_KeywordList *result; + char * buf; + size_t dsize; +#if HAVE_ZLIB + z_stream strm; + int ret; + size_t pos; +#endif +#if HAVE_LIBBZ2 + bz_stream bstrm; + int bret; + size_t bpos; +#endif + + buf = NULL; + dsize = 0; +#if HAVE_ZLIB + /* try gzip decompression first */ + if ( (data[0] == 0x1f) && + (data[1] == 0x8b) && + (data[2] == 0x08) ) { + memset(&strm, + 0, + sizeof(z_stream)); + strm.next_in = (char*) data; + strm.avail_in = size; + strm.total_in = 0; + strm.zalloc = NULL; + strm.zfree = NULL; + strm.opaque = NULL; + + if (Z_OK == inflateInit2(&strm, + 15 + 32)) { + dsize = 2 * size; + if (dsize > MAX_DECOMPRESS) + dsize = MAX_DECOMPRESS; + buf = malloc(dsize); + pos = 0; + if (buf == NULL) { + inflateEnd(&strm); + } else { + strm.next_out = buf; + strm.avail_out = dsize; + do { + ret = inflate(&strm, + Z_SYNC_FLUSH); + if (ret == Z_OK) { + if (dsize == MAX_DECOMPRESS) + break; + pos += strm.total_out; + dsize *= 2; + if (dsize > MAX_DECOMPRESS) + dsize = MAX_DECOMPRESS; + buf = realloc(buf, dsize); + strm.next_out = &buf[pos]; + strm.avail_out = dsize - pos; + } else if (ret != Z_STREAM_END) { + /* error */ + free(buf); + buf = NULL; + } + } while ( (buf != NULL) && + (ret != Z_STREAM_END) ); + dsize = pos + strm.total_out; + inflateEnd(&strm); + if (dsize == 0) { + free(buf); + buf = NULL; + } + } + } + } +#endif + +#if HAVE_LIBBZ2 + if ( (data[0] == 'B') && + (data[1] == 'Z') && + (data[2] == 'h') ) { + /* now try bz2 decompression */ + memset(&bstrm, + 0, + sizeof(bz_stream)); + bstrm.next_in = (char*) data; + bstrm.avail_in = size; + bstrm.total_in_lo32 = 0; + bstrm.total_in_hi32 = 0; + bstrm.bzalloc = NULL; + bstrm.bzfree = NULL; + bstrm.opaque = NULL; + if ( (buf == NULL) && + (BZ_OK == BZ2_bzDecompressInit(&bstrm, + 0, + 0)) ) { + dsize = 2 * size; + if (dsize > MAX_DECOMPRESS) + dsize = MAX_DECOMPRESS; + buf = malloc(dsize); + bpos = 0; + if (buf == NULL) { + BZ2_bzDecompressEnd(&bstrm); + } else { + bstrm.next_out = buf; + bstrm.avail_out = dsize; + do { + bret = BZ2_bzDecompress(&bstrm); + if (bret == Z_OK) { + if (dsize == MAX_DECOMPRESS) + break; + bpos += bstrm.total_out_lo32; + dsize *= 2; + if (dsize > MAX_DECOMPRESS) + dsize = MAX_DECOMPRESS; + buf = realloc(buf, dsize); + bstrm.next_out = &buf[bpos]; + bstrm.avail_out = dsize - pos; + } else if (bret != BZ_STREAM_END) { + /* error */ + free(buf); + buf = NULL; + } + } while ( (buf != NULL) && + (bret != BZ_STREAM_END) ); + dsize = bpos + bstrm.total_out_lo32; + BZ2_bzDecompressEnd(&bstrm); + if (dsize == 0) { + free(buf); + buf = NULL; + } + } + } + } +#endif + + + /* finally, call plugins */ + if (buf != NULL) { + data = buf; + size = dsize; + } + result = NULL; + while (extractor != NULL) { + result = extractor->extractMethod(filename, + (char*) data, + size, + result, + extractor->options); + extractor = extractor->next; + } + if (buf != NULL) + free(buf); + return result; +} + /** * Extract keywords from a file using the available extractors. * @param extractor the list of extractor libraries @@ -646,21 +826,16 @@ EXTRACTOR_getKeywords (EXTRACTOR_ExtractorList * extractor, return NULL; } - if (size > 1* 1024 * 1024 * 1024) - size = 1 * 1024 * 1024 * 1024; /* do not mmap/read more than 1 GB! */ + if (size > MAX_READ) + size = MAX_READ; /* do not mmap/read more than 1 GB! */ buffer = MMAP(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE, file, 0); close(file); if ( (buffer == NULL) || (buffer == (void *) -1) ) return NULL; - result = NULL; - while (extractor != NULL) { - result = extractor->extractMethod(filename, - buffer, - size, - result, - extractor->options); - extractor = extractor->next; - } + result = getKeywords(extractor, + filename, + buffer, + size); if (size > 0) MUNMAP (buffer, size); else @@ -684,20 +859,12 @@ EXTRACTOR_KeywordList * EXTRACTOR_getKeywords2(EXTRACTOR_ExtractorList * extractor, const char * data, size_t size) { - EXTRACTOR_KeywordList * result; - if (data == NULL) return NULL; - result = NULL; - while (extractor != NULL) { - result = extractor->extractMethod(NULL, - (char*)data, - size, - result, - extractor->options); - extractor = extractor->next; - } - return result; + return getKeywords(extractor, + NULL, + data, + size); } static void diff --git a/src/plugins/Makefile.am b/src/plugins/Makefile.am @@ -173,7 +173,6 @@ libextractor_tar_la_SOURCES = \ libextractor_tar_la_LDFLAGS = \ $(PLUGINFLAGS) -Wl,--retain-symbols-file -Wl,TAR_SYMBOLS libextractor_tar_la_LIBADD = \ - -lz \ $(top_builddir)/src/main/libextractor.la libextractor_lower_la_SOURCES = \ @@ -253,8 +252,6 @@ libextractor_man_la_SOURCES = \ libextractor_man_la_LDFLAGS = \ $(top_builddir)/src/main/libextractor.la \ $(PLUGINFLAGS) -Wl,--retain-symbols-file -Wl,MAN_SYMBOLS -libextractor_man_la_LIBADD = \ - -lz libextractor_deb_la_SOURCES = \ debextractor.c diff --git a/src/plugins/manextractor.c b/src/plugins/manextractor.c @@ -78,13 +78,25 @@ static void NEXT(size_t * end, (*end) = size+1; } -static struct EXTRACTOR_Keywords * tryParse(const char * buf, - size_t size, - struct EXTRACTOR_Keywords * prev) { +/** + * How many bytes do we actually try to scan? (from the beginning + * of the file). + */ +#define MAX_READ (16 * 1024) + + + +struct EXTRACTOR_Keywords * +libextractor_man_extract(const char * filename, + const char * buf, + size_t size, + struct EXTRACTOR_Keywords * prev) { int pos; size_t xsize; const size_t xlen = strlen(".TH "); + if (size > MAX_READ) + size = MAX_READ; pos = 0; if (size < xlen) return prev; @@ -203,62 +215,4 @@ static struct EXTRACTOR_Keywords * tryParse(const char * buf, return prev; } -static voidpf Emalloc(voidpf opaque, uInt items, uInt size) { - return malloc(size * items); -} - -static void Efree(voidpf opaque, voidpf ptr) { - free(ptr); -} - -/** - * How many bytes do we actually try to scan? (from the beginning - * of the file). - */ -#define MAX_READ 2048 - -struct EXTRACTOR_Keywords * -libextractor_man_extract(const char * filename, - char * data, - size_t size, - struct EXTRACTOR_Keywords * prev) { - z_stream strm; - char * buf; - - memset(&strm, - 0, - sizeof(z_stream)); - strm.next_in = (char*) data; - strm.avail_in = size; - strm.total_in = 0; - strm.zalloc = &Emalloc; - strm.zfree = &Efree; - strm.opaque = NULL; - if (Z_OK == inflateInit2(&strm, - 15 + 32)) { - buf = malloc(MAX_READ); - if (buf == NULL) { - inflateEnd(&strm); - return prev; - } - strm.next_out = buf; - strm.avail_out = MAX_READ; - inflate(&strm, - Z_FINISH); - if (strm.total_out > 0) { - prev = tryParse(buf, - strm.total_out, - prev); - inflateEnd(&strm); - free(buf); - return prev; - } - free(buf); - inflateEnd(&strm); - } - return tryParse(data, - size, - prev); -} - /* end of manextractor.c */ diff --git a/src/plugins/printable/bloomfilter.c b/src/plugins/printable/bloomfilter.c @@ -435,44 +435,4 @@ static void testBitCallback(Bloomfilter * bf, *arg = 0; } -/* *********************** INTERFACE **************** */ - -/** - * Test if an element is in the filter. - * - * @param e the element - * @param bf the filter - * @return 1 if the element is in the filter, 0 if not - */ -static int testBloomfilter(Bloomfilter * bf, - HashCode160 * e) { - int res; - - if (NULL == bf) - return 1; - res = 1; - iterateBits(bf, - (BitIterator)&testBitCallback, - &res, - e); - return res; -} - -/** - * Add an element to the filter - * - * @param bf the filter - * @param e the element - */ -static void addToBloomfilter(Bloomfilter * bf, - HashCode160 * e) { - - if (NULL == bf) - return; - iterateBits(bf, - &setBitCallback, - NULL, - e); -} - /* ******************** end of bloomfilter.c *********** */ diff --git a/src/plugins/printable/dictionary-builder.c b/src/plugins/printable/dictionary-builder.c @@ -30,6 +30,25 @@ #include "bloomfilter.h" #include "bloomfilter.c" + +/** + * Add an element to the filter + * + * @param bf the filter + * @param e the element + */ +static void addToBloomfilter(Bloomfilter * bf, + HashCode160 * e) { + + if (NULL == bf) + return; + iterateBits(bf, + &setBitCallback, + NULL, + e); +} + + #define ADDR_PER_ELEMENT 46 int main(int argc, diff --git a/src/plugins/printable/printableextractor.c b/src/plugins/printable/printableextractor.c @@ -32,6 +32,28 @@ #include "extractor.h" #include "bloomfilter.c" +/** + * Test if an element is in the filter. + * + * @param e the element + * @param bf the filter + * @return 1 if the element is in the filter, 0 if not + */ +static int testBloomfilter(Bloomfilter * bf, + HashCode160 * e) { + int res; + + if (NULL == bf) + return 1; + res = 1; + iterateBits(bf, + (BitIterator)&testBitCallback, + &res, + e); + return res; +} + + extern Bloomfilter FILTER_NAME; static char * xstrndup(const char * s, size_t n){ diff --git a/src/plugins/tarextractor.c b/src/plugins/tarextractor.c @@ -81,10 +81,11 @@ typedef struct { } USTarHeader; -static struct EXTRACTOR_Keywords * -tar_extract(const char * data, - size_t size, - struct EXTRACTOR_Keywords * prev) { +struct EXTRACTOR_Keywords * +libextractor_tar_extract(const char * filename, + const char * data, + size_t size, + struct EXTRACTOR_Keywords * prev) { TarHeader * tar; USTarHeader * ustar; size_t pos; @@ -135,87 +136,3 @@ tar_extract(const char * data, } return prev; } - -static voidpf Emalloc(voidpf opaque, uInt items, uInt size) { - return malloc(size * items); -} - -static void Efree(voidpf opaque, voidpf ptr) { - free(ptr); -} - -/* do not decompress tar.gz files > 16 MB */ -#define MAX_TGZ_SIZE 16 * 1024 * 1024 - -struct EXTRACTOR_Keywords * -libextractor_tar_extract(const char * filename, - const unsigned char * data, - size_t size, - struct EXTRACTOR_Keywords * prev) { - if ( (data[0] == 0x1f) && - (data[1] == 0x8b) && - (data[2] == 0x08) ) { - time_t ctime; - char * buf; - size_t bufSize; - z_stream strm; - - /* Creation time */ - ctime = (((((( (unsigned int)data[7] << 8) - | (unsigned int)data[6]) << 8) - | (unsigned int)data[5]) << 8) - | (unsigned int)data[4]); - if (ctime) { - struct tm ctm; - char tmbuf[60]; - - ctm = *gmtime(&ctime); - if (strftime(tmbuf, sizeof(tmbuf), - nl_langinfo(D_FMT), - &ctm)) - prev = addKeyword(EXTRACTOR_CREATION_DATE, strdup(tmbuf), prev); - } - - /* try for tar.gz */ - bufSize = data[size-4] + 256 * data[size-3] + 65536 * data[size-2] + 256*65536 * data[size-1]; - if (bufSize > MAX_TGZ_SIZE) { - return prev; - } - - memset(&strm, - 0, - sizeof(z_stream)); - strm.next_in = (char*) data; - strm.avail_in = size; - strm.total_in = 0; - strm.zalloc = &Emalloc; - strm.zfree = &Efree; - strm.opaque = NULL; - if (Z_OK != inflateInit2(&strm, - 15 + 32)) - return prev; - buf = malloc(bufSize); - if (buf == NULL) { - inflateEnd(&strm); - return prev; - } - strm.next_out = buf; - strm.avail_out = bufSize; - inflate(&strm, - Z_FINISH); - if (strm.total_out == 0) { - inflateEnd(&strm); - free(buf); - return prev; - } - bufSize = strm.total_out; - inflateEnd(&strm); - prev = tar_extract(buf, bufSize, prev); - free(buf); - return prev; - } else { - /* try for uncompressed tar */ - return tar_extract(data, size, prev); - } -} -