commit d196e141f9259dad2812a7cb3a0dfb9471f34810
parent 1401a8851546bcd9989beaf3304d4223c7fb2e84
Author: Christian Grothoff <christian@grothoff.org>
Date: Thu, 8 Sep 2005 04:46:11 +0000
release
Diffstat:
16 files changed, 332 insertions(+), 277 deletions(-)
diff --git a/ChangeLog b/ChangeLog
@@ -1,3 +1,10 @@
+Wed Sep 7 21:41:35 PDT 2005
+ Added decompression of gz and bz2 streams to the LE core library
+ (avoids need to do this, possibly repeatedly, in plugins and makes
+ sure that all plugins work with compressed files). Eliminated gz
+ decompression from man and tar extractors.
+ Releasing libextractor 0.5.5.
+
Sun Sep 4 02:08:56 PDT 2005
Changed code to export fewer symbols (refactoring plus linker options,
goal is to address Mantis #925. Changed debian extractor to no longer
diff --git a/configure.ac b/configure.ac
@@ -1,8 +1,8 @@
# Process this file with autoconf to produce a configure script.
AC_PREREQ(2.57)
-AC_INIT([libextractor], [0.5.4a], [bug-libextractor@gnu.org])
+AC_INIT([libextractor], [0.5.5], [bug-libextractor@gnu.org])
AC_REVISION($Revision: 1.67 $)
-AM_INIT_AUTOMAKE([libextractor], [0.5.4a])
+AM_INIT_AUTOMAKE([libextractor], [0.5.5])
AM_CONFIG_HEADER(src/include/config.h)
AH_TOP([#define _GNU_SOURCE 1])
@@ -132,6 +132,11 @@ AC_CHECK_LIB(z, inflate,
AC_DEFINE(HAVE_ZLIB,1,[Have zlib])],
[AM_CONDITIONAL(HAVE_ZLIB, false)])
+AC_CHECK_LIB(bz2, BZ2_decompress,
+ [AM_CONDITIONAL(HAVE_BZ2, true)
+ AC_DEFINE(HAVE_LIBBZ2,1,[Have libbz2])],
+ [AM_CONDITIONAL(HAVE_BZ2, false)])
+
# restore LIBS
LIBS=$LIBSOLD
@@ -156,7 +161,7 @@ AC_HEADER_STDC
AC_HEADER_DIRENT
AC_HEADER_STDBOOL
AC_CHECK_HEADERS([fcntl.h netinet/in.h stdlib.h string.h unistd.h libintl.h limits.h stddef.h zlib.h])
-AC_CHECK_HEADERS([ltdl.h iconv.h])
+AC_CHECK_HEADERS([ltdl.h iconv.h bzlib.h])
# fixme, we need to die here if a header is not found!
AC_CHECK_HEADERS([vorbis/vorbisfile.h])
diff --git a/contrib/doxygen b/contrib/doxygen
@@ -23,7 +23,7 @@ PROJECT_NAME = libextractor
# This could be handy for archiving the generated documentation or
# if some version control system is used.
-PROJECT_NUMBER = 0.5.2
+PROJECT_NUMBER = 0.5.5
# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
# base path where the generated documentation will be put.
diff --git a/po/de.po b/po/de.po
@@ -9,7 +9,7 @@ msgid ""
msgstr ""
"Project-Id-Version: libextractor 0.5.0\n"
"Report-Msgid-Bugs-To: libextractor@gnu.org\n"
-"POT-Creation-Date: 2005-09-04 02:59-0700\n"
+"POT-Creation-Date: 2005-09-07 21:46-0700\n"
"PO-Revision-Date: 2005-06-22 15:05+0200\n"
"Last-Translator: Karl Eichwalder <ke@gnu.franken.de>\n"
"Language-Team: German <de@li.org>\n"
@@ -28,7 +28,7 @@ msgstr "Quell-RPM %d.%d"
msgid "Binary RPM %d.%d"
msgstr "Binäres RPM %d.%d"
-#: src/plugins/printable/dictionary-builder.c:50
+#: src/plugins/printable/dictionary-builder.c:69
#, c-format
msgid ""
"Please provide the name of the language you are building\n"
@@ -37,12 +37,12 @@ msgstr ""
"Bitte geben Sie den Namen der Sprache an, für die Sie ein Wörterbuch\n"
"erstellen. Zum Beispiel:\n"
-#: src/plugins/printable/dictionary-builder.c:63
+#: src/plugins/printable/dictionary-builder.c:82
#, c-format
msgid "Error opening file `%s': %s\n"
msgstr "Fehler beim Öffnen der Datei »%s«: %s\n"
-#: src/plugins/printable/dictionary-builder.c:71
+#: src/plugins/printable/dictionary-builder.c:90
#, c-format
msgid ""
"Error allocating: %s\n"
@@ -51,44 +51,44 @@ msgstr ""
"Fehler beim Allokieren: %s\n"
"."
-#: src/plugins/printable/dictionary-builder.c:83
+#: src/plugins/printable/dictionary-builder.c:102
#, c-format
msgid "Increase ALLOCSIZE (in %s).\n"
msgstr "ALLOCSIZE vergrößern (in %s).\n"
-#: src/plugins/manextractor.c:128
+#: src/plugins/manextractor.c:140
msgid "Commands"
msgstr "Befehle"
-#: src/plugins/manextractor.c:133
+#: src/plugins/manextractor.c:145
msgid "System calls"
msgstr "Systemaufrufe"
-#: src/plugins/manextractor.c:138
+#: src/plugins/manextractor.c:150
msgid "Library calls"
msgstr "Bibliotheksaufrufe"
-#: src/plugins/manextractor.c:143
+#: src/plugins/manextractor.c:155
msgid "Special files"
msgstr "Spezialdateien"
-#: src/plugins/manextractor.c:148
+#: src/plugins/manextractor.c:160
msgid "File formats and conventions"
msgstr ""
-#: src/plugins/manextractor.c:153
+#: src/plugins/manextractor.c:165
msgid "Games"
msgstr "Spiele"
-#: src/plugins/manextractor.c:158
+#: src/plugins/manextractor.c:170
msgid "Conventions and miscellaneous"
msgstr ""
-#: src/plugins/manextractor.c:163
+#: src/plugins/manextractor.c:175
msgid "System management commands"
msgstr ""
-#: src/plugins/manextractor.c:168
+#: src/plugins/manextractor.c:180
msgid "Kernel routines"
msgstr "Kernelroutinen"
diff --git a/po/libextractor.pot b/po/libextractor.pot
@@ -8,7 +8,7 @@ msgid ""
msgstr ""
"Project-Id-Version: PACKAGE VERSION\n"
"Report-Msgid-Bugs-To: libextractor@gnu.org\n"
-"POT-Creation-Date: 2005-09-04 02:59-0700\n"
+"POT-Creation-Date: 2005-09-07 21:46-0700\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: LANGUAGE <LL@li.org>\n"
@@ -26,63 +26,63 @@ msgstr ""
msgid "Binary RPM %d.%d"
msgstr ""
-#: src/plugins/printable/dictionary-builder.c:50
+#: src/plugins/printable/dictionary-builder.c:69
#, c-format
msgid ""
"Please provide the name of the language you are building\n"
"a dictionary for. For example:\n"
msgstr ""
-#: src/plugins/printable/dictionary-builder.c:63
+#: src/plugins/printable/dictionary-builder.c:82
#, c-format
msgid "Error opening file `%s': %s\n"
msgstr ""
-#: src/plugins/printable/dictionary-builder.c:71
+#: src/plugins/printable/dictionary-builder.c:90
#, c-format
msgid ""
"Error allocating: %s\n"
"."
msgstr ""
-#: src/plugins/printable/dictionary-builder.c:83
+#: src/plugins/printable/dictionary-builder.c:102
#, c-format
msgid "Increase ALLOCSIZE (in %s).\n"
msgstr ""
-#: src/plugins/manextractor.c:128
+#: src/plugins/manextractor.c:140
msgid "Commands"
msgstr ""
-#: src/plugins/manextractor.c:133
+#: src/plugins/manextractor.c:145
msgid "System calls"
msgstr ""
-#: src/plugins/manextractor.c:138
+#: src/plugins/manextractor.c:150
msgid "Library calls"
msgstr ""
-#: src/plugins/manextractor.c:143
+#: src/plugins/manextractor.c:155
msgid "Special files"
msgstr ""
-#: src/plugins/manextractor.c:148
+#: src/plugins/manextractor.c:160
msgid "File formats and conventions"
msgstr ""
-#: src/plugins/manextractor.c:153
+#: src/plugins/manextractor.c:165
msgid "Games"
msgstr ""
-#: src/plugins/manextractor.c:158
+#: src/plugins/manextractor.c:170
msgid "Conventions and miscellaneous"
msgstr ""
-#: src/plugins/manextractor.c:163
+#: src/plugins/manextractor.c:175
msgid "System management commands"
msgstr ""
-#: src/plugins/manextractor.c:168
+#: src/plugins/manextractor.c:180
msgid "Kernel routines"
msgstr ""
diff --git a/po/ro.po b/po/ro.po
@@ -9,7 +9,7 @@ msgid ""
msgstr ""
"Project-Id-Version: libextractor 0.4.2\n"
"Report-Msgid-Bugs-To: libextractor@gnu.org\n"
-"POT-Creation-Date: 2005-09-04 02:59-0700\n"
+"POT-Creation-Date: 2005-09-07 21:46-0700\n"
"PO-Revision-Date: 2005-02-25 12:00-0500\n"
"Last-Translator: Laurentiu Buzdugan <lbuz@rolix.org>\n"
"Language-Team: Romanian <translation-team-ro@lists.sourceforge.net>\n"
@@ -28,7 +28,7 @@ msgstr "Surs
msgid "Binary RPM %d.%d"
msgstr "Binar RPM %d.%d"
-#: src/plugins/printable/dictionary-builder.c:50
+#: src/plugins/printable/dictionary-builder.c:69
#, c-format
msgid ""
"Please provide the name of the language you are building\n"
@@ -37,12 +37,12 @@ msgstr ""
"Vã rugãm furnizaþi numele limbii pentru care contruiþi\n"
"un dicþionar. De exemplu:\n"
-#: src/plugins/printable/dictionary-builder.c:63
+#: src/plugins/printable/dictionary-builder.c:82
#, c-format
msgid "Error opening file `%s': %s\n"
msgstr "Eroare deschidere fiºier `%s': %s\n"
-#: src/plugins/printable/dictionary-builder.c:71
+#: src/plugins/printable/dictionary-builder.c:90
#, c-format
msgid ""
"Error allocating: %s\n"
@@ -51,44 +51,44 @@ msgstr ""
"Eroare de alocare: %s\n"
"."
-#: src/plugins/printable/dictionary-builder.c:83
+#: src/plugins/printable/dictionary-builder.c:102
#, c-format
msgid "Increase ALLOCSIZE (in %s).\n"
msgstr "Creºteþi ALLOCSIZE (în %s).\n"
-#: src/plugins/manextractor.c:128
+#: src/plugins/manextractor.c:140
msgid "Commands"
msgstr "Comenzi"
-#: src/plugins/manextractor.c:133
+#: src/plugins/manextractor.c:145
msgid "System calls"
msgstr "Apeluri sistem"
-#: src/plugins/manextractor.c:138
+#: src/plugins/manextractor.c:150
msgid "Library calls"
msgstr "Apeluri de bibliotecã"
-#: src/plugins/manextractor.c:143
+#: src/plugins/manextractor.c:155
msgid "Special files"
msgstr "Fiºiere speciale"
-#: src/plugins/manextractor.c:148
+#: src/plugins/manextractor.c:160
msgid "File formats and conventions"
msgstr "Formate de fiºiere ºi convenþii"
-#: src/plugins/manextractor.c:153
+#: src/plugins/manextractor.c:165
msgid "Games"
msgstr "Jocuri"
-#: src/plugins/manextractor.c:158
+#: src/plugins/manextractor.c:170
msgid "Conventions and miscellaneous"
msgstr "Convenþii ºi diverse"
-#: src/plugins/manextractor.c:163
+#: src/plugins/manextractor.c:175
msgid "System management commands"
msgstr "Comenzi pentru managementul sistemului"
-#: src/plugins/manextractor.c:168
+#: src/plugins/manextractor.c:180
msgid "Kernel routines"
msgstr "Proceduri kernel"
diff --git a/po/rw.po b/po/rw.po
@@ -16,7 +16,7 @@ msgid ""
msgstr ""
"Project-Id-Version: libextractor 0.4.2\n"
"Report-Msgid-Bugs-To: libextractor@gnu.org\n"
-"POT-Creation-Date: 2005-09-04 02:59-0700\n"
+"POT-Creation-Date: 2005-09-07 21:46-0700\n"
"PO-Revision-Date: 2005-04-04 10:55-0700\n"
"Last-Translator: Steven Michael Murphy <murf@e-tools.com>\n"
"Language-Team: Kinyarwanda <translation-team-rw@lists.sourceforge.net>\n"
@@ -34,7 +34,7 @@ msgstr ""
msgid "Binary RPM %d.%d"
msgstr ""
-#: src/plugins/printable/dictionary-builder.c:50
+#: src/plugins/printable/dictionary-builder.c:69
#, fuzzy, c-format
msgid ""
"Please provide the name of the language you are building\n"
@@ -42,62 +42,62 @@ msgid ""
msgstr "i Izina: Bya i Ururimi Inkoranyamagambo kugirango Urugero"
# basctl/source\basicide\basidesh.src:RID_STR_ERROROPENSTORAGE.text
-#: src/plugins/printable/dictionary-builder.c:63
+#: src/plugins/printable/dictionary-builder.c:82
#, fuzzy, c-format
msgid "Error opening file `%s': %s\n"
msgstr "Hari ikibazo mu gufungura dosiye"
-#: src/plugins/printable/dictionary-builder.c:71
+#: src/plugins/printable/dictionary-builder.c:90
#, c-format
msgid ""
"Error allocating: %s\n"
"."
msgstr ""
-#: src/plugins/printable/dictionary-builder.c:83
+#: src/plugins/printable/dictionary-builder.c:102
#, fuzzy, c-format
msgid "Increase ALLOCSIZE (in %s).\n"
msgstr "in"
-#: src/plugins/manextractor.c:128
+#: src/plugins/manextractor.c:140
msgid "Commands"
msgstr "amabwiriza"
-#: src/plugins/manextractor.c:133
+#: src/plugins/manextractor.c:145
#, fuzzy
msgid "System calls"
msgstr "Amahamagara:"
-#: src/plugins/manextractor.c:138
+#: src/plugins/manextractor.c:150
#, fuzzy
msgid "Library calls"
msgstr "Amahamagara:"
-#: src/plugins/manextractor.c:143
+#: src/plugins/manextractor.c:155
#, fuzzy
msgid "Special files"
msgstr "Idosiye"
-#: src/plugins/manextractor.c:148
+#: src/plugins/manextractor.c:160
#, fuzzy
msgid "File formats and conventions"
msgstr "Idosiye Imiterere Na"
-#: src/plugins/manextractor.c:153
+#: src/plugins/manextractor.c:165
msgid "Games"
msgstr ""
-#: src/plugins/manextractor.c:158
+#: src/plugins/manextractor.c:170
#, fuzzy
msgid "Conventions and miscellaneous"
msgstr "Na Binyuranye"
-#: src/plugins/manextractor.c:163
+#: src/plugins/manextractor.c:175
#, fuzzy
msgid "System management commands"
msgstr "Amabwiriza"
-#: src/plugins/manextractor.c:168
+#: src/plugins/manextractor.c:180
msgid "Kernel routines"
msgstr ""
diff --git a/src/include/extractor.h b/src/include/extractor.h
@@ -29,7 +29,7 @@ extern "C" {
* 0.2.6-1 => 0x00020601
* 4.5.2-0 => 0x04050200
*/
-#define EXTRACTOR_VERSION 0x00050401
+#define EXTRACTOR_VERSION 0x00050500
#include <stdio.h>
diff --git a/src/main/Makefile.am b/src/main/Makefile.am
@@ -23,10 +23,17 @@ if !MINGW
dlflag=-ldl
endif
+#if HAVE_ZLIB
+ zlib =-lz
+#endif
+#if HAVE_BZ2
+ bz2lib = -lbz2
+#endif
+
libextractor_la_LDFLAGS = \
-export-dynamic -version-info 2:0:1 $(LIBICONV)
libextractor_la_LIBADD = \
- $(LIBLTDL) $(dlflag)
+ $(LIBLTDL) $(dlflag) $(zlib) $(bz2lib)
libextractor_la_DEPENDENCIES = \
$(LIBLTDL)
diff --git a/src/main/extractor.c b/src/main/extractor.c
@@ -27,6 +27,13 @@
#include <../../libltdl/ltdl.h>
#endif
+#if HAVE_LIBBZ2
+#include <bzlib.h>
+#endif
+
+#if HAVE_ZLIB
+#include <zlib.h>
+#endif
#define DEBUG 1
@@ -613,6 +620,179 @@ EXTRACTOR_removeAll (EXTRACTOR_ExtractorList * libraries)
libraries = EXTRACTOR_removeLibrary (libraries, libraries->libname);
}
+
+
+/**
+ * How many bytes do we actually try to scan? (from the beginning
+ * of the file). Limit to 1 GB.
+ */
+#define MAX_READ 1024 * 1024 * 1024
+
+/**
+ * How many bytes do we actually try to decompress? (from the beginning
+ * of the file). Limit to 16 MB.
+ */
+#define MAX_DECOMPRESS 16 * 1024 * 1024
+
+
+static EXTRACTOR_KeywordList *
+getKeywords (EXTRACTOR_ExtractorList * extractor,
+ const char * filename,
+ const unsigned char * data,
+ size_t size) {
+ EXTRACTOR_KeywordList *result;
+ char * buf;
+ size_t dsize;
+#if HAVE_ZLIB
+ z_stream strm;
+ int ret;
+ size_t pos;
+#endif
+#if HAVE_LIBBZ2
+ bz_stream bstrm;
+ int bret;
+ size_t bpos;
+#endif
+
+ buf = NULL;
+ dsize = 0;
+#if HAVE_ZLIB
+ /* try gzip decompression first */
+ if ( (data[0] == 0x1f) &&
+ (data[1] == 0x8b) &&
+ (data[2] == 0x08) ) {
+ memset(&strm,
+ 0,
+ sizeof(z_stream));
+ strm.next_in = (char*) data;
+ strm.avail_in = size;
+ strm.total_in = 0;
+ strm.zalloc = NULL;
+ strm.zfree = NULL;
+ strm.opaque = NULL;
+
+ if (Z_OK == inflateInit2(&strm,
+ 15 + 32)) {
+ dsize = 2 * size;
+ if (dsize > MAX_DECOMPRESS)
+ dsize = MAX_DECOMPRESS;
+ buf = malloc(dsize);
+ pos = 0;
+ if (buf == NULL) {
+ inflateEnd(&strm);
+ } else {
+ strm.next_out = buf;
+ strm.avail_out = dsize;
+ do {
+ ret = inflate(&strm,
+ Z_SYNC_FLUSH);
+ if (ret == Z_OK) {
+ if (dsize == MAX_DECOMPRESS)
+ break;
+ pos += strm.total_out;
+ dsize *= 2;
+ if (dsize > MAX_DECOMPRESS)
+ dsize = MAX_DECOMPRESS;
+ buf = realloc(buf, dsize);
+ strm.next_out = &buf[pos];
+ strm.avail_out = dsize - pos;
+ } else if (ret != Z_STREAM_END) {
+ /* error */
+ free(buf);
+ buf = NULL;
+ }
+ } while ( (buf != NULL) &&
+ (ret != Z_STREAM_END) );
+ dsize = pos + strm.total_out;
+ inflateEnd(&strm);
+ if (dsize == 0) {
+ free(buf);
+ buf = NULL;
+ }
+ }
+ }
+ }
+#endif
+
+#if HAVE_LIBBZ2
+ if ( (data[0] == 'B') &&
+ (data[1] == 'Z') &&
+ (data[2] == 'h') ) {
+ /* now try bz2 decompression */
+ memset(&bstrm,
+ 0,
+ sizeof(bz_stream));
+ bstrm.next_in = (char*) data;
+ bstrm.avail_in = size;
+ bstrm.total_in_lo32 = 0;
+ bstrm.total_in_hi32 = 0;
+ bstrm.bzalloc = NULL;
+ bstrm.bzfree = NULL;
+ bstrm.opaque = NULL;
+ if ( (buf == NULL) &&
+ (BZ_OK == BZ2_bzDecompressInit(&bstrm,
+ 0,
+ 0)) ) {
+ dsize = 2 * size;
+ if (dsize > MAX_DECOMPRESS)
+ dsize = MAX_DECOMPRESS;
+ buf = malloc(dsize);
+ bpos = 0;
+ if (buf == NULL) {
+ BZ2_bzDecompressEnd(&bstrm);
+ } else {
+ bstrm.next_out = buf;
+ bstrm.avail_out = dsize;
+ do {
+ bret = BZ2_bzDecompress(&bstrm);
+ if (bret == Z_OK) {
+ if (dsize == MAX_DECOMPRESS)
+ break;
+ bpos += bstrm.total_out_lo32;
+ dsize *= 2;
+ if (dsize > MAX_DECOMPRESS)
+ dsize = MAX_DECOMPRESS;
+ buf = realloc(buf, dsize);
+ bstrm.next_out = &buf[bpos];
+ bstrm.avail_out = dsize - pos;
+ } else if (bret != BZ_STREAM_END) {
+ /* error */
+ free(buf);
+ buf = NULL;
+ }
+ } while ( (buf != NULL) &&
+ (bret != BZ_STREAM_END) );
+ dsize = bpos + bstrm.total_out_lo32;
+ BZ2_bzDecompressEnd(&bstrm);
+ if (dsize == 0) {
+ free(buf);
+ buf = NULL;
+ }
+ }
+ }
+ }
+#endif
+
+
+ /* finally, call plugins */
+ if (buf != NULL) {
+ data = buf;
+ size = dsize;
+ }
+ result = NULL;
+ while (extractor != NULL) {
+ result = extractor->extractMethod(filename,
+ (char*) data,
+ size,
+ result,
+ extractor->options);
+ extractor = extractor->next;
+ }
+ if (buf != NULL)
+ free(buf);
+ return result;
+}
+
/**
* Extract keywords from a file using the available extractors.
* @param extractor the list of extractor libraries
@@ -646,21 +826,16 @@ EXTRACTOR_getKeywords (EXTRACTOR_ExtractorList * extractor,
return NULL;
}
- if (size > 1* 1024 * 1024 * 1024)
- size = 1 * 1024 * 1024 * 1024; /* do not mmap/read more than 1 GB! */
+ if (size > MAX_READ)
+ size = MAX_READ; /* do not mmap/read more than 1 GB! */
buffer = MMAP(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE, file, 0);
close(file);
if ( (buffer == NULL) || (buffer == (void *) -1) )
return NULL;
- result = NULL;
- while (extractor != NULL) {
- result = extractor->extractMethod(filename,
- buffer,
- size,
- result,
- extractor->options);
- extractor = extractor->next;
- }
+ result = getKeywords(extractor,
+ filename,
+ buffer,
+ size);
if (size > 0)
MUNMAP (buffer, size);
else
@@ -684,20 +859,12 @@ EXTRACTOR_KeywordList *
EXTRACTOR_getKeywords2(EXTRACTOR_ExtractorList * extractor,
const char * data,
size_t size) {
- EXTRACTOR_KeywordList * result;
-
if (data == NULL)
return NULL;
- result = NULL;
- while (extractor != NULL) {
- result = extractor->extractMethod(NULL,
- (char*)data,
- size,
- result,
- extractor->options);
- extractor = extractor->next;
- }
- return result;
+ return getKeywords(extractor,
+ NULL,
+ data,
+ size);
}
static void
diff --git a/src/plugins/Makefile.am b/src/plugins/Makefile.am
@@ -173,7 +173,6 @@ libextractor_tar_la_SOURCES = \
libextractor_tar_la_LDFLAGS = \
$(PLUGINFLAGS) -Wl,--retain-symbols-file -Wl,TAR_SYMBOLS
libextractor_tar_la_LIBADD = \
- -lz \
$(top_builddir)/src/main/libextractor.la
libextractor_lower_la_SOURCES = \
@@ -253,8 +252,6 @@ libextractor_man_la_SOURCES = \
libextractor_man_la_LDFLAGS = \
$(top_builddir)/src/main/libextractor.la \
$(PLUGINFLAGS) -Wl,--retain-symbols-file -Wl,MAN_SYMBOLS
-libextractor_man_la_LIBADD = \
- -lz
libextractor_deb_la_SOURCES = \
debextractor.c
diff --git a/src/plugins/manextractor.c b/src/plugins/manextractor.c
@@ -78,13 +78,25 @@ static void NEXT(size_t * end,
(*end) = size+1;
}
-static struct EXTRACTOR_Keywords * tryParse(const char * buf,
- size_t size,
- struct EXTRACTOR_Keywords * prev) {
+/**
+ * How many bytes do we actually try to scan? (from the beginning
+ * of the file).
+ */
+#define MAX_READ (16 * 1024)
+
+
+
+struct EXTRACTOR_Keywords *
+libextractor_man_extract(const char * filename,
+ const char * buf,
+ size_t size,
+ struct EXTRACTOR_Keywords * prev) {
int pos;
size_t xsize;
const size_t xlen = strlen(".TH ");
+ if (size > MAX_READ)
+ size = MAX_READ;
pos = 0;
if (size < xlen)
return prev;
@@ -203,62 +215,4 @@ static struct EXTRACTOR_Keywords * tryParse(const char * buf,
return prev;
}
-static voidpf Emalloc(voidpf opaque, uInt items, uInt size) {
- return malloc(size * items);
-}
-
-static void Efree(voidpf opaque, voidpf ptr) {
- free(ptr);
-}
-
-/**
- * How many bytes do we actually try to scan? (from the beginning
- * of the file).
- */
-#define MAX_READ 2048
-
-struct EXTRACTOR_Keywords *
-libextractor_man_extract(const char * filename,
- char * data,
- size_t size,
- struct EXTRACTOR_Keywords * prev) {
- z_stream strm;
- char * buf;
-
- memset(&strm,
- 0,
- sizeof(z_stream));
- strm.next_in = (char*) data;
- strm.avail_in = size;
- strm.total_in = 0;
- strm.zalloc = &Emalloc;
- strm.zfree = &Efree;
- strm.opaque = NULL;
- if (Z_OK == inflateInit2(&strm,
- 15 + 32)) {
- buf = malloc(MAX_READ);
- if (buf == NULL) {
- inflateEnd(&strm);
- return prev;
- }
- strm.next_out = buf;
- strm.avail_out = MAX_READ;
- inflate(&strm,
- Z_FINISH);
- if (strm.total_out > 0) {
- prev = tryParse(buf,
- strm.total_out,
- prev);
- inflateEnd(&strm);
- free(buf);
- return prev;
- }
- free(buf);
- inflateEnd(&strm);
- }
- return tryParse(data,
- size,
- prev);
-}
-
/* end of manextractor.c */
diff --git a/src/plugins/printable/bloomfilter.c b/src/plugins/printable/bloomfilter.c
@@ -435,44 +435,4 @@ static void testBitCallback(Bloomfilter * bf,
*arg = 0;
}
-/* *********************** INTERFACE **************** */
-
-/**
- * Test if an element is in the filter.
- *
- * @param e the element
- * @param bf the filter
- * @return 1 if the element is in the filter, 0 if not
- */
-static int testBloomfilter(Bloomfilter * bf,
- HashCode160 * e) {
- int res;
-
- if (NULL == bf)
- return 1;
- res = 1;
- iterateBits(bf,
- (BitIterator)&testBitCallback,
- &res,
- e);
- return res;
-}
-
-/**
- * Add an element to the filter
- *
- * @param bf the filter
- * @param e the element
- */
-static void addToBloomfilter(Bloomfilter * bf,
- HashCode160 * e) {
-
- if (NULL == bf)
- return;
- iterateBits(bf,
- &setBitCallback,
- NULL,
- e);
-}
-
/* ******************** end of bloomfilter.c *********** */
diff --git a/src/plugins/printable/dictionary-builder.c b/src/plugins/printable/dictionary-builder.c
@@ -30,6 +30,25 @@
#include "bloomfilter.h"
#include "bloomfilter.c"
+
+/**
+ * Add an element to the filter
+ *
+ * @param bf the filter
+ * @param e the element
+ */
+static void addToBloomfilter(Bloomfilter * bf,
+ HashCode160 * e) {
+
+ if (NULL == bf)
+ return;
+ iterateBits(bf,
+ &setBitCallback,
+ NULL,
+ e);
+}
+
+
#define ADDR_PER_ELEMENT 46
int main(int argc,
diff --git a/src/plugins/printable/printableextractor.c b/src/plugins/printable/printableextractor.c
@@ -32,6 +32,28 @@
#include "extractor.h"
#include "bloomfilter.c"
+/**
+ * Test if an element is in the filter.
+ *
+ * @param e the element
+ * @param bf the filter
+ * @return 1 if the element is in the filter, 0 if not
+ */
+static int testBloomfilter(Bloomfilter * bf,
+ HashCode160 * e) {
+ int res;
+
+ if (NULL == bf)
+ return 1;
+ res = 1;
+ iterateBits(bf,
+ (BitIterator)&testBitCallback,
+ &res,
+ e);
+ return res;
+}
+
+
extern Bloomfilter FILTER_NAME;
static char * xstrndup(const char * s, size_t n){
diff --git a/src/plugins/tarextractor.c b/src/plugins/tarextractor.c
@@ -81,10 +81,11 @@ typedef struct {
} USTarHeader;
-static struct EXTRACTOR_Keywords *
-tar_extract(const char * data,
- size_t size,
- struct EXTRACTOR_Keywords * prev) {
+struct EXTRACTOR_Keywords *
+libextractor_tar_extract(const char * filename,
+ const char * data,
+ size_t size,
+ struct EXTRACTOR_Keywords * prev) {
TarHeader * tar;
USTarHeader * ustar;
size_t pos;
@@ -135,87 +136,3 @@ tar_extract(const char * data,
}
return prev;
}
-
-static voidpf Emalloc(voidpf opaque, uInt items, uInt size) {
- return malloc(size * items);
-}
-
-static void Efree(voidpf opaque, voidpf ptr) {
- free(ptr);
-}
-
-/* do not decompress tar.gz files > 16 MB */
-#define MAX_TGZ_SIZE 16 * 1024 * 1024
-
-struct EXTRACTOR_Keywords *
-libextractor_tar_extract(const char * filename,
- const unsigned char * data,
- size_t size,
- struct EXTRACTOR_Keywords * prev) {
- if ( (data[0] == 0x1f) &&
- (data[1] == 0x8b) &&
- (data[2] == 0x08) ) {
- time_t ctime;
- char * buf;
- size_t bufSize;
- z_stream strm;
-
- /* Creation time */
- ctime = (((((( (unsigned int)data[7] << 8)
- | (unsigned int)data[6]) << 8)
- | (unsigned int)data[5]) << 8)
- | (unsigned int)data[4]);
- if (ctime) {
- struct tm ctm;
- char tmbuf[60];
-
- ctm = *gmtime(&ctime);
- if (strftime(tmbuf, sizeof(tmbuf),
- nl_langinfo(D_FMT),
- &ctm))
- prev = addKeyword(EXTRACTOR_CREATION_DATE, strdup(tmbuf), prev);
- }
-
- /* try for tar.gz */
- bufSize = data[size-4] + 256 * data[size-3] + 65536 * data[size-2] + 256*65536 * data[size-1];
- if (bufSize > MAX_TGZ_SIZE) {
- return prev;
- }
-
- memset(&strm,
- 0,
- sizeof(z_stream));
- strm.next_in = (char*) data;
- strm.avail_in = size;
- strm.total_in = 0;
- strm.zalloc = &Emalloc;
- strm.zfree = &Efree;
- strm.opaque = NULL;
- if (Z_OK != inflateInit2(&strm,
- 15 + 32))
- return prev;
- buf = malloc(bufSize);
- if (buf == NULL) {
- inflateEnd(&strm);
- return prev;
- }
- strm.next_out = buf;
- strm.avail_out = bufSize;
- inflate(&strm,
- Z_FINISH);
- if (strm.total_out == 0) {
- inflateEnd(&strm);
- free(buf);
- return prev;
- }
- bufSize = strm.total_out;
- inflateEnd(&strm);
- prev = tar_extract(buf, bufSize, prev);
- free(buf);
- return prev;
- } else {
- /* try for uncompressed tar */
- return tar_extract(data, size, prev);
- }
-}
-