libextractor

GNU libextractor
Log | Files | Refs | Submodules | README | LICENSE

commit ad806f39482486375ff753613b90ca56b4967006
parent 80cfddc9fd3c0cd33b7f1e0e705e0431e0f07fd3
Author: Christian Grothoff <christian@grothoff.org>
Date:   Wed, 14 Sep 2005 20:41:16 +0000

patch

Diffstat:
MChangeLog | 4++++
Msrc/main/extractor.c | 80+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----
2 files changed, 80 insertions(+), 4 deletions(-)

diff --git a/ChangeLog b/ChangeLog @@ -1,3 +1,7 @@ +Wed Sep 14 13:50:15 PDT 2005 + Changed code for backwards-compatibility with zlib 1.1 + (thanks to Ronan Melennec). + Tue Sep 13 04:49:43 PDT 2005 Fixed segmentation fault in bz2 processing. diff --git a/src/main/extractor.c b/src/main/extractor.c @@ -658,21 +658,93 @@ getKeywords (EXTRACTOR_ExtractorList * extractor, dsize = 0; #if HAVE_ZLIB /* try gzip decompression first */ - if ( (data[0] == 0x1f) && + if ( (size >= 11) && + (data[0] == 0x1f) && (data[1] == 0x8b) && (data[2] == 0x08) ) { + + /* + * Skip gzip header - we might want to retrieve parts of it as keywords + */ + unsigned gzip_header_length = 10; + + if(data[3] & 0x4) /* FEXTRA set */ + gzip_header_length += 2 + (unsigned) (data[10] & 0xff) + + (((unsigned) (data[10] & 0xff)) * 256); + + if(data[3] & 0x8) /* FNAME set */ + { + const unsigned char * cptr = data + gzip_header_length; + + /* + * stored file name is here + * extremely long file names might break the following code. + */ + + while(cptr < data + size) + { + if('\0' == *cptr) + break; + + cptr += 1; + } + + gzip_header_length = (cptr - data) + 1; + } + + if(data[3] & 0x16) /* FCOMMENT set */ + { + const unsigned char * cptr = data + gzip_header_length; + + /* + * stored comment is here + * extremely long comments might break the following code. + */ + + while(cptr < data + size) + { + if('\0' == *cptr) + break; + + cptr += 1; + } + + gzip_header_length = (cptr - data) + 1; + } + + if(data[3] & 0x2) /* FCHRC set */ + gzip_header_length += 2; + memset(&strm, 0, sizeof(z_stream)); - strm.next_in = (char*) data; - strm.avail_in = size; + + if(size > gzip_header_length) { + strm.next_in = (char*) data + gzip_header_length; + strm.avail_in = size - gzip_header_length; + } else { + strm.next_in = (char*) data; + strm.avail_in = 0; + } strm.total_in = 0; strm.zalloc = NULL; strm.zfree = NULL; strm.opaque = NULL; + /* + * note: maybe plain inflateInit(&strm) is adequate, + * it looks more backward-compatible also ; + * + * ZLIB_VERNUM isn't defined by zlib version 1.1.4 ; + * there might be a better check. + */ +#ifdef ZLIB_VERNUM if (Z_OK == inflateInit2(&strm, 15 + 32)) { +#else + if (Z_OK == inflateInit2(&strm, + -MAX_WBITS)) { +#endif dsize = 2 * size; if (dsize > MAX_DECOMPRESS) dsize = MAX_DECOMPRESS; @@ -687,7 +759,7 @@ getKeywords (EXTRACTOR_ExtractorList * extractor, ret = inflate(&strm, Z_SYNC_FLUSH); if (ret == Z_OK) { - if (dsize == MAX_DECOMPRESS) + if (dsize == MAX_DECOMPRESS) break; pos += strm.total_out; strm.total_out = 0;