libextractor

GNU libextractor
Log | Files | Refs | Submodules | README | LICENSE

commit f2b333dc465e1bf08f4f97eca6f60facfeaa1fe7
parent 690b296d909fa5de839338d415e18857be119fe4
Author: Christian Grothoff <christian@grothoff.org>
Date:   Mon, 12 Dec 2005 07:29:00 +0000

Hello Christian,

You will find attached yet another patch for libextractor (0.5.8).
This one helps reduce the possibility of false positives (i.e.
non-Tar files which are output with an application/x-tar mimetype).
I got rid of sscanf()-based octal to binary conversions in order to
accommodate various padding schemes for octal fields.

I plan to add dates (most recent file date in archive).
I would also like to make it fully compatible with GNU and Schilling's
extended formats  (long filenames, etc), but it won't be finished this
year.

Cordially,
--
Ronan Melennec  


Diffstat:
Msrc/plugins/tarextractor.c | 63+++++++++++++++++++++++++++++++++++++++++++++++++++++++++------
1 file changed, 57 insertions(+), 6 deletions(-)

diff --git a/src/plugins/tarextractor.c b/src/plugins/tarextractor.c @@ -102,6 +102,48 @@ typedef struct { char prefix[155]; } USTarHeader; +static unsigned +taroctalvalue(const char *data, + size_t size, + unsigned long long *valueptr) +{ + unsigned result = 0; + + if(NULL != data && 0 < size) + { + const char *p = data; + int found = 0; + unsigned long long value = 0; + + while( (p < data + size) && (' ' == *p)) + p += 1; + + while( (p < data + size) && ('0' <= *p) && (*p < '8') ) + { + found = 1; + value *= 8; + value += (*p - '0'); + p += 1; + } + + if(0 != found) + { + while( (p < data + size) && (' ' == *p) ) + p += 1; + + while( (p < data + size) && (0 == *p) ) + p += 1; + + result = (p - data); + } + + if( (0 < result) && (NULL != valueptr) ) + *valueptr = value; + } + + return result; +} + struct EXTRACTOR_Keywords * libextractor_tar_extract(const char * filename, @@ -114,7 +156,7 @@ libextractor_tar_extract(const char * filename, int contents_are_empty = 1; const char * mimetype = NULL; struct EXTRACTOR_Keywords * last; - + last = prev; if (last != NULL) while (last->next != NULL) @@ -128,7 +170,6 @@ libextractor_tar_extract(const char * filename, pos = 0; while (pos + sizeof(TarHeader) < size) { unsigned long long fsize; - char buf[13]; const char * nul_pos; const char * ustar_prefix = NULL; unsigned int ustar_prefix_length = 0; @@ -150,6 +191,19 @@ libextractor_tar_extract(const char * filename, break; tar = (const TarHeader*) &data[pos]; + + /* + * checking all octal fields helps reduce + * the possibility of false positives ; + * only the file size is used for now. + */ + if( (12 > taroctalvalue(tar->filesize, 12, &fsize)) + || (12 > taroctalvalue(tar->lastModTime, 12, NULL)) + || (8 > taroctalvalue(tar->mode, 8, NULL)) + || (8 > taroctalvalue(tar->userId, 8, NULL)) + || (8 > taroctalvalue(tar->groupId, 8, NULL)) ) + break; + /* fixme: we may want to check the header checksum here... */ /* fixme: we attempt to follow MKS document for long file names, but no TAR file was found yet which matched what we understood ! */ @@ -186,10 +240,7 @@ libextractor_tar_extract(const char * filename, } else { pos += 257; /* sizeof(TarHeader); minus gcc alignment... */ } - memcpy(buf, &tar->filesize[0], 12); - buf[12] = '\0'; - if (1 != sscanf(buf, "%12llo", &fsize)) /* octal! Yuck yuck! */ - break; + if ( (pos + fsize > size) || (fsize > size) || (pos + fsize < pos) )