libextractor

GNU libextractor
Log | Files | Refs | Submodules | README | LICENSE

commit b469f81946127562c859a5c349476af165920ae1
parent 33f759aa8c65ac622f66b7550a4dd8bd6187a82c
Author: Christian Grothoff <christian@grothoff.org>
Date:   Fri, 18 Dec 2009 18:43:20 +0000

tar

Diffstat:
Msrc/plugins/Makefile.am | 13++++++-------
Asrc/plugins/tar_extractor.c | 861+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Dsrc/plugins/tarextractor.c | 930-------------------------------------------------------------------------------
3 files changed, 867 insertions(+), 937 deletions(-)

diff --git a/src/plugins/Makefile.am b/src/plugins/Makefile.am @@ -89,6 +89,7 @@ plugin_LTLIBRARIES = \ $(pdf) \ libextractor_real.la \ $(rpm) \ + libextractor_tar.la \ $(thumbgtk) \ libextractor_wav.la \ libextractor_zip.la @@ -245,6 +246,11 @@ libextractor_rpm_la_LDFLAGS = \ libextractor_rpm_la_LIBADD = \ -lrpm +libextractor_tar_la_SOURCES = \ + tar_extractor.c +libextractor_tar_la_LDFLAGS = \ + $(PLUGINFLAGS) + libextractor_thumbnailgtk_la_CFLAGS = \ $(GLIB_CFLAGS) $(GTK_CFLAGS) libextractor_thumbnailgtk_la_LIBADD = \ @@ -331,13 +337,6 @@ libextractor_id3v24_la_LDFLAGS = \ libextractor_id3v24_la_LIBADD = \ $(top_builddir)/src/common/libextractor_common.la -libextractor_tar_la_SOURCES = \ - tarextractor.c -libextractor_tar_la_LDFLAGS = \ - $(PLUGINFLAGS) -libextractor_tar_la_LIBADD = \ - $(top_builddir)/src/main/libextractor.la -lz - libextractor_tiff_la_SOURCES = \ tiffextractor.c libextractor_tiff_la_LDFLAGS = \ diff --git a/src/plugins/tar_extractor.c b/src/plugins/tar_extractor.c @@ -0,0 +1,861 @@ +/* + This file is part of libextractor. + (C) 2002, 2003, 2004, 2005, 2009 Vidyut Samanta and Christian Grothoff + + libextractor is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 2, or (at your + option) any later version. + + libextractor is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with libextractor; see the file COPYING. If not, write to the + Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. + */ + +#include "platform.h" +#include "extractor.h" + +/* + * Note that this code is not complete! + * + * References: + * + * http://www.mkssoftware.com/docs/man4/tar.4.asp + * (does document USTAR format common nowadays, + * but not other extended formats such as the one produced + * by GNU tar 1.13 when very long filenames are met.) + * + * http://gd.tuwien.ac.at/utils/archivers/star/README.otherbugs + * (J. Schilling's remarks on TAR formats compatibility issues.) + */ + +/* + * Define known TAR archive member variants. + * In theory different variants + * can coexist within a single TAR archive file + * although this will be uncommon. + */ +#define TAR_V7ORIGINAL_FORMAT (1) +#define TAR_V7EXTENDED_FORMAT (1 << 1) +#define TAR_SCHILLING1985_FORMAT (1 << 2) +#define TAR_POSIX1988_FORMAT (1 << 3) +#define TAR_GNU1991_FORMAT (1 << 4) +#define TAR_SCHILLING1994_FORMAT (1 << 5) +#define TAR_GNU1997_FORMAT (1 << 6) +#define TAR_POSIX2001_FORMAT (1 << 7) +#define TAR_SCHILLING2001_FORMAT (1 << 8) +#define TAR_SOLARIS2001_FORMAT (1 << 9) +#define TAR_GNU2004_FORMAT (1 << 10) + +/* + * TAR header structure, modelled after POSIX.1-1988 + */ +typedef struct +{ + char fileName[100]; + char mode[8]; + char userId[8]; + char groupId[8]; + char fileSize[12]; + char lastModTime[12]; + char chksum[8]; + char link; + char linkName[100]; + /* + * All fields below are a + * either zero-filled or undefined + * for UNIX V7 TAR archive members ; + * their header is always 512 octets long nevertheless. + */ + char ustarMagic[6]; + char version[2]; + char userName[32]; + char groupName[32]; + char devMajor[8]; + char devMinor[8]; + char prefix[155]; + char filler[12]; +} TarHeader; + +#define TAR_HEADER_SIZE (sizeof(TarHeader)) +#define TAR_TIME_FENCE ((long long) (-(1LL << 62))) + +static size_t +tar_roundup (size_t size) +{ + size_t diff = (size % TAR_HEADER_SIZE); + + return (0 == diff) ? size : (size + (TAR_HEADER_SIZE - diff)); +} + +static int +tar_isnonzero (const char *data, unsigned int length) +{ + unsigned int total = 0; + + while (total < length) + { + if (0 != data[total]) + return 1; + total++; + } + + return 0; +} + +static unsigned int +tar_octalvalue (const char *data, size_t size, unsigned long long *valueptr) +{ + unsigned int result = 0; + + if (NULL != data && 0 < size) + { + const char *p = data; + int found = 0; + unsigned long long value = 0; + + while ((p < data + size) && (' ' == *p)) + p += 1; + + while ((p < data + size) && ('0' <= *p) && (*p < '8')) + { + found = 1; + value *= 8; + value += (*p - '0'); + p += 1; + } + + if (0 != found) + { + while ((p < data + size) && ((0 == *p) || (' ' == *p))) + p += 1; + + result = (p - data); + } + + if ((0 < result) && (NULL != valueptr)) + *valueptr = value; + } + + return result; +} + +#ifndef EOVERFLOW +#define EOVERFLOW -1 +#endif + +static int +tar_time (long long timeval, char *rtime, unsigned int rsize) +{ + int retval = 0; + + /* + * shift epoch to proleptic times + * to make subsequent modulo operations safer. + */ + long long my_timeval = timeval + + ((long long) ((1970 * 365) + 478) * (long long) 86400); + + unsigned int seconds = (unsigned int) (my_timeval % 60); + unsigned int minutes = (unsigned int) ((my_timeval / 60) % 60); + unsigned int hours = (unsigned int) ((my_timeval / 3600) % 24); + + unsigned int year = 0; + unsigned int month = 1; + + unsigned int days = (unsigned int) (my_timeval / (24 * 3600)); + + unsigned int days_in_month[] = + { 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 }; + unsigned int diff = 0; + + if ((long long) 0 > my_timeval) + return EDOM; + + /* + * 400-year periods + */ + year += (400 * (days / ((365 * 400) + 97))); + days %= ((365 * 400) + 97); + + /* + * 100-year periods + */ + diff = (days / ((365 * 100) + 24)); + if (4 <= diff) + { + year += 399; + days = 364; + } + else + { + year += (100 * diff); + days %= ((365 * 100) + 24); + } + + /* + * remaining leap years + */ + year += (4 * (days / ((365 * 4) + 1))); + days %= ((365 * 4) + 1); + + while (1) + { + if ((0 == (year % 400)) || ((0 == (year % 4)) && (0 != (year % 100)))) + { + if (366 > days) + { + break; + } + else + { + days -= 366; + year++; + } + } + else + { + if (365 > days) + { + break; + } + else + { + days -= 365; + year++; + } + } + } + + if ((0 == (year % 400)) || ((0 == (year % 4)) && (0 != (year % 100)))) + days_in_month[1] = 29; + + for (month = 0; (month < 12) && (days >= days_in_month[month]); month += 1) + days -= days_in_month[month]; + + retval = snprintf (rtime, rsize, "%04u-%02u-%02uT%02u:%02u:%02uZ", + year, month + 1, days + 1, hours, minutes, seconds); + + return (retval < rsize) ? 0 : EOVERFLOW; +} + +#define ADD(t,s) do { if (0 != (ret = proc (proc_cls, "tar", t, EXTRACTOR_METAFORMAT_UTF8, "text/plain", s, strlen(s)+1))) goto FINISH; } while (0) +#define ADDF(t,s) do { if (0 != (ret = proc (proc_cls, "tar", t, EXTRACTOR_METAFORMAT_UTF8, "text/plain", s, strlen(s)+1))) { free(s); goto FINISH; } free (s); } while (0) + +int +EXTRACTOR_tar_extract (const char *data, + size_t size, + EXTRACTOR_MetaDataProcessor proc, + void *proc_cls, + const char *options) +{ + char *fname = NULL; + size_t pos = 0; + int contents_are_empty = 1; + long long maxftime = TAR_TIME_FENCE; + unsigned int format_archive = 0; + int ret; + + if (512 != TAR_HEADER_SIZE) + return 0; /* compiler should remove this when optimising */ + if (0 != (size % TAR_HEADER_SIZE)) + return 0; /* cannot be tar! */ + if (size < TAR_HEADER_SIZE) + return 0; /* too short, or somehow truncated */ + + ret = 0; + pos = 0; + while ((pos + TAR_HEADER_SIZE) <= size) + { + const TarHeader *tar = NULL; + unsigned format_member = 0; + unsigned long long fmode; + unsigned long long fsize; + long long ftime = TAR_TIME_FENCE; + char typeFlag = -1; + const char *nul_pos; + unsigned int tar_prefix_length = 0; + unsigned int tar_name_length = 0; + unsigned int checksum_offset; + int checksum_computed_500s = 0; + int checksum_computed_512s = 0; + unsigned int checksum_computed_500u = 0; + unsigned int checksum_computed_512u = 0; + unsigned long long checksum_stored = 0; + + /* + * Compute TAR header checksum and compare with stored value. + * Allow for non-conformant checksums computed with signed values, + * such as those produced by early Solaris tar. + * Allow for non-conformant checksums computed on first 500 octets, + * such as those produced by SunOS 4.x tar according to J. Schilling. + * This will also detect EOF marks, since a zero-filled block + * cannot possibly hold octal values. + */ + for (checksum_offset = 0; checksum_offset < 148; checksum_offset += 1) + { + checksum_computed_500u += + (unsigned char) data[pos + checksum_offset]; + checksum_computed_500s += (signed char) data[pos + checksum_offset]; + } + if (8 > + tar_octalvalue (data + pos + checksum_offset, 8, &checksum_stored)) + break; + for (; checksum_offset < 156; checksum_offset += 1) + { + checksum_computed_500u += (unsigned char) ' '; + checksum_computed_500s += (signed char) ' '; + } + for (; checksum_offset < 500; checksum_offset += 1) + { + checksum_computed_500u += + (unsigned char) data[pos + checksum_offset]; + checksum_computed_500s += (signed char) data[pos + checksum_offset]; + } + + checksum_computed_512u = checksum_computed_500u; + checksum_computed_512s = checksum_computed_500s; + for (; checksum_offset < TAR_HEADER_SIZE; checksum_offset += 1) + { + checksum_computed_512u += + (unsigned char) data[pos + checksum_offset]; + checksum_computed_512s += (signed char) data[pos + checksum_offset]; + } + + /* + * Suggestion: use signed checksum matches to refine + * TAR format detection. + */ + if ((checksum_stored != (unsigned long long) checksum_computed_512u) + && (checksum_stored != (unsigned long long) checksum_computed_512s) + && (checksum_stored != (unsigned long long) checksum_computed_500s) + && (checksum_stored != (unsigned long long) checksum_computed_500u)) + break; + + tar = (const TarHeader *) &data[pos]; + typeFlag = tar->link; + pos += TAR_HEADER_SIZE; + + /* + * Checking all octal fields helps reduce + * the possibility of false positives ; + * only the file size, time and mode are used for now. + * + * This will fail over GNU and Schilling TAR huge size fields + * using non-octal encodings used for very large file lengths (> 8 GB). + */ + if ((12 > tar_octalvalue (tar->fileSize, 12, + &fsize)) + || (12 > tar_octalvalue (tar->lastModTime, 12, + (unsigned long long *) &ftime)) + || (8 > tar_octalvalue (tar->mode, 8, + (unsigned long long *) &fmode)) + || (8 > tar_octalvalue (tar->userId, 8, NULL)) + || (8 > tar_octalvalue (tar->groupId, 8, NULL))) + break; + + /* + * Find out which TAR variant is here. + */ + if (0 == memcmp (tar->ustarMagic, "ustar ", 7)) + { + + if (' ' == tar->mode[6]) + format_member = TAR_GNU1991_FORMAT; + else if (('K' == typeFlag) || ('L' == typeFlag)) + { + format_member = TAR_GNU1997_FORMAT; + ftime = TAR_TIME_FENCE; + } + else + format_member = + (((unsigned) fmode) != + (((unsigned) fmode) & 03777)) ? TAR_GNU1997_FORMAT : + TAR_GNU2004_FORMAT; + + } + else if (0 == memcmp (tar->ustarMagic, "ustar", 6)) + { + + /* + * It is important to perform test for SCHILLING1994 before GNU1997 + * because certain extension type flags ('L' and 'S' for instance) + * are used by both. + */ + if ((0 == tar->prefix[130]) + && (12 <= tar_octalvalue (tar->prefix + 131, 12, NULL)) + && (12 <= tar_octalvalue (tar->prefix + 143, 12, NULL)) + && (0 == tar_isnonzero (tar->filler, 8)) + && (0 == memcmp (tar->filler + 8, "tar", 4))) + { + + format_member = TAR_SCHILLING1994_FORMAT; + + } + else if (('D' == typeFlag) || ('K' == typeFlag) + || ('L' == typeFlag) || ('M' == typeFlag) + || ('N' == typeFlag) || ('S' == typeFlag) + || ('V' == typeFlag)) + { + + format_member = TAR_GNU1997_FORMAT; + + } + else if (('g' == typeFlag) + || ('x' == typeFlag) || ('X' == typeFlag)) + { + + format_member = TAR_POSIX2001_FORMAT; + ftime = TAR_TIME_FENCE; + + } + else + { + + format_member = TAR_POSIX1988_FORMAT; + + } + } + else if ((0 == memcmp (tar->filler + 8, "tar", 4)) + && (0 == tar_isnonzero (tar->filler, 8))) + { + + format_member = TAR_SCHILLING1985_FORMAT; + + } + else if (('0' <= typeFlag) && (typeFlag <= '2')) + { + + format_member = TAR_V7ORIGINAL_FORMAT; + + } + else + { + + format_member = TAR_V7EXTENDED_FORMAT; + + } + + /* + * Locate the file names. + */ + if ((0 != (format_member & TAR_POSIX2001_FORMAT)) + && (('x' == typeFlag) || ('X' == typeFlag))) + { + + if (size <= pos) + break; + + else if ((8 <= fsize) && fsize <= (unsigned long long) (size - pos)) + { + const char *keyptr = data + pos; + const char *valptr = NULL; + const char *nameptr = NULL; + unsigned int keylength = 0; + unsigned int namelength = 0; + + while (keyptr < data + pos + (size_t) fsize) + { + if (('0' > *keyptr) || ('9' < *keyptr)) + { + keyptr += 1; + continue; + } + + keylength = + (unsigned int) strtoul (keyptr, (char **) &valptr, 10); + if ((0 < keylength) && (NULL != valptr) + && (keyptr != valptr)) + { + unsigned int difflength = 0; + + while ((valptr < data + pos + (size_t) fsize) + && (' ' == *valptr)) + valptr += 1; + + difflength = (valptr - keyptr); + + if (0 == memcmp (valptr, "path=", 5)) + { + nameptr = valptr + 5; + namelength = keylength - (nameptr - keyptr); + } + else + { + + if ((keylength > (valptr - keyptr) + 4 + 2) + && (0 == memcmp (valptr, "GNU.", 4))) + format_archive |= TAR_GNU2004_FORMAT; + + else if ((keylength > (valptr - keyptr) + 7 + 2) + && (0 == memcmp (valptr, "SCHILY.", 7))) + format_archive |= TAR_SCHILLING2001_FORMAT; + + else if ((keylength > (valptr - keyptr) + 4 + 2) + && (0 == memcmp (valptr, "SUN.", 4))) + format_archive |= TAR_SOLARIS2001_FORMAT; + } + + keyptr += keylength; + } + else + { + nameptr = NULL; + break; + } + } + + if ((NULL != nameptr) && (0 != *nameptr) + && ((size - (nameptr - data)) >= namelength) + && (1 < namelength)) + { + if (NULL != fname) + free (fname); + /* + * There is an 1-offset because POSIX.1-2001 + * field separator is counted in field length. + */ + fname = malloc (namelength); + if (NULL != fname) + { + memcpy (fname, nameptr, namelength - 1); + fname[namelength - 1] = '\0'; + + pos += tar_roundup ((size_t) fsize); + format_archive |= format_member; + continue; + } + } + } + } + + else if ((0 != (format_member + & (TAR_SCHILLING1994_FORMAT + | TAR_GNU1997_FORMAT | TAR_GNU2004_FORMAT))) + && ('L' == typeFlag)) + { + + if (size <= pos) + break; + + else if ((0 < fsize) && fsize <= (unsigned long long) (size - pos)) + { + + size_t length = (size_t) fsize; + + nul_pos = memchr (data + pos, 0, length); + if (NULL != nul_pos) + length = (nul_pos - (data + pos)); + + if (0 < length) + { + if (NULL != fname) + free (fname); + fname = malloc (1 + length); + if (NULL != fname) + { + memcpy (fname, data + pos, length); + fname[length] = '\0'; + } + + pos += tar_roundup ((size_t) fsize); + format_archive |= format_member; + continue; + } + } + } + else + { + + nul_pos = memchr (tar->fileName, 0, sizeof tar->fileName); + tar_name_length = (0 == nul_pos) + ? sizeof (tar->fileName) : (nul_pos - tar->fileName); + + if ((0 != + (format_member & (TAR_GNU1997_FORMAT | TAR_GNU2004_FORMAT))) + && ('S' == typeFlag)) + { + + if ((0 == tar->prefix[40]) + && (0 != tar->prefix[137]) + && (12 <= tar_octalvalue (tar->prefix + 41, 12, NULL)) + && (12 <= tar_octalvalue (tar->prefix + 53, 12, NULL))) + { + /* + * fsize needs adjustment when there are more than 4 sparse blocks + */ + size_t diffpos = 0; + fsize += TAR_HEADER_SIZE; + + while ((pos + diffpos + TAR_HEADER_SIZE < size) + && (0 != *(data + pos + diffpos + 504))) + { + diffpos += TAR_HEADER_SIZE; + fsize += TAR_HEADER_SIZE; + } + } + + typeFlag = '0'; + + } + else if (0 != (format_member & TAR_SCHILLING1994_FORMAT)) + { + + nul_pos = memchr (tar->prefix, 0, 130); + tar_prefix_length = (0 == nul_pos) + ? 130 : (nul_pos - tar->prefix); + + if ('S' == typeFlag) + typeFlag = '0'; + + } + else if (0 != (format_member & TAR_SCHILLING1985_FORMAT)) + { + + nul_pos = memchr (tar->prefix, 0, 155); + tar_prefix_length = (0 == nul_pos) + ? 155 : (nul_pos - tar->prefix); + + + if ('S' == typeFlag) + typeFlag = '0'; + + } + else if (0 != (format_member & TAR_POSIX1988_FORMAT)) + { + + nul_pos = memchr (tar->prefix, 0, sizeof tar->prefix); + tar_prefix_length = (0 == nul_pos) + ? sizeof tar->prefix : nul_pos - tar->prefix; + + } + } + + /* + * Update position so that next loop iteration will find + * either a TAR header or TAR EOF mark or just EOF. + * + * Consider archive member size to be zero + * with no data following the header in the following cases : + * '1' : hard link, '2' : soft link, + * '3' : character device, '4' : block device, + * '5' : directory, '6' : named pipe. + */ + if ('1' != typeFlag && '2' != typeFlag + && '3' != typeFlag && '4' != typeFlag + && '5' != typeFlag && '6' != typeFlag) + { + if ((fsize > (unsigned long long) size) + || (fsize + (unsigned long long) pos > + (unsigned long long) size)) + break; + + pos += tar_roundup ((size_t) fsize); + } + if (pos - 1 > size) + break; + + format_archive |= format_member; + + /* + * Store the file name in libextractor list. + * + * For the time being, only file types listed in POSIX.1-1988 ('0'..'7') + * are retained, leaving out labels, access control lists, etc. + */ + if ((0 == typeFlag) || (('0' <= typeFlag) && (typeFlag <= '7'))) + { + if (NULL == fname) + { + if (0 < tar_prefix_length + tar_name_length) + { + fname = malloc (2 + tar_prefix_length + tar_name_length); + + if (NULL != fname) + { + if (0 < tar_prefix_length) + { + memcpy (fname, tar->prefix, tar_prefix_length); + + if (('/' != tar->prefix[tar_prefix_length - 1]) + && (0 < tar_name_length) + && ('/' != tar->fileName[0])) + { + fname[tar_prefix_length] = '/'; + tar_prefix_length += 1; + } + } + + if (0 < tar_name_length) + memcpy (fname + tar_prefix_length, tar->fileName, + tar_name_length); + + fname[tar_prefix_length + tar_name_length] = '\0'; + } + } + } + + if ((NULL != fname) && (0 != *fname)) + { +#if 0 + fprintf (stdout, + "(%u) flag = %c, size = %u, tname = (%s), fname = (%s)\n", + __LINE__, typeFlag, (unsigned int) fsize, + (NULL == tar->fileName) ? "" : tar->fileName, + (NULL == fname) ? "" : fname); +#endif + + ADDF (EXTRACTOR_METATYPE_FILENAME, fname); + fname = NULL; + if (ftime > maxftime) + maxftime = ftime; + contents_are_empty = 0; + } + } + + if (NULL != fname) + { + free (fname); + fname = NULL; + } + } + + if (NULL != fname) + { + free (fname); + fname = NULL; + } + + /* + * Report mimetype; report also format(s) and most recent date + * when at least one archive member was found. + */ + if (0 != format_archive) + { + if (0 == contents_are_empty) + { + + const char *formats[5] = { NULL, NULL, NULL, NULL, NULL }; + unsigned int formats_count = 0; + unsigned int formats_u = 0; + unsigned int format_length = 0; + char *format = NULL; + + if (TAR_TIME_FENCE < maxftime) + { + char iso8601_time[24]; + + if (0 == tar_time (maxftime, iso8601_time, sizeof(iso8601_time))) + ADD (EXTRACTOR_METATYPE_CREATION_DATE, iso8601_time); + } + + /* + * We only keep the most recent POSIX format. + */ + if (0 != (format_archive & TAR_POSIX2001_FORMAT)) + formats[formats_count++] = "POSIX 2001"; + + else if (0 != (format_archive & TAR_POSIX1988_FORMAT)) + formats[formats_count++] = "POSIX 1988"; + + /* + * We only keep the most recent GNU format. + */ + if (0 != (format_archive & TAR_GNU2004_FORMAT)) + formats[formats_count++] = "GNU 2004"; + + else if (0 != (format_archive & TAR_GNU1997_FORMAT)) + formats[formats_count++] = "GNU 1997"; + + else if (0 != (format_archive & TAR_GNU1991_FORMAT)) + formats[formats_count++] = "GNU 1991"; + + /* + * We only keep the most recent Schilling format. + */ + if (0 != (format_archive & TAR_SCHILLING2001_FORMAT)) + formats[formats_count++] = "Schilling 2001"; + + else if (0 != (format_archive & TAR_SCHILLING1994_FORMAT)) + formats[formats_count++] = "Schilling 1994"; + + else if (0 != (format_archive & TAR_SCHILLING1985_FORMAT)) + formats[formats_count++] = "Schilling 1985"; + + /* + * We only keep the most recent Solaris format. + */ + if (0 != (format_archive & TAR_SOLARIS2001_FORMAT)) + formats[formats_count++] = "Solaris 2001"; + + /* + * We only keep the (supposedly) most recent UNIX V7 format. + */ + if (0 != (format_archive & TAR_V7EXTENDED_FORMAT)) + formats[formats_count++] = "UNIX extended V7"; + + else if (0 != (format_archive & TAR_V7ORIGINAL_FORMAT)) + formats[formats_count++] = "UNIX original V7"; + + /* + * Build the format string + */ + for (formats_u = 0; formats_u < formats_count; formats_u += 1) + { + if ((NULL != formats[formats_u]) && (0 != *formats[formats_u])) + { + if (0 < format_length) + format_length += 3; + format_length += strlen (formats[formats_u]); + } + } + + if (0 < format_length) + { + format = malloc (format_length + 5); + + if (NULL != format) + { + + format_length = 0; + + for (formats_u = 0; formats_u < formats_count; + formats_u += 1) + { + if ((NULL != formats[formats_u]) + && (0 != *formats[formats_u])) + { + if (0 < format_length) + { + strcpy (format + format_length, " + "); + format_length += 3; + } + strcpy (format + format_length, formats[formats_u]); + format_length += strlen (formats[formats_u]); + } + } + + if (0 < format_length) + { + strcpy (format + format_length, " TAR"); + ADDF (EXTRACTOR_METATYPE_FORMAT_VERSION, format); + } + else + { + free (format); + } + } + } + } + + ADD (EXTRACTOR_METATYPE_MIMETYPE, "application/x-tar"); + } + FINISH: + return ret; +} diff --git a/src/plugins/tarextractor.c b/src/plugins/tarextractor.c @@ -1,930 +0,0 @@ -/* - This file is part of libextractor. - (C) 2002, 2003, 2004, 2005, 2009 Vidyut Samanta and Christian Grothoff - - libextractor is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 2, or (at your - option) any later version. - - libextractor is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with libextractor; see the file COPYING. If not, write to the - Free Software Foundation, Inc., 59 Temple Place - Suite 330, - Boston, MA 02111-1307, USA. - */ - -#include "platform.h" -#include "extractor.h" - -/* - * Note that this code is not complete! - * - * References: - * - * http://www.mkssoftware.com/docs/man4/tar.4.asp - * (does document USTAR format common nowadays, - * but not other extended formats such as the one produced - * by GNU tar 1.13 when very long filenames are met.) - * - * http://gd.tuwien.ac.at/utils/archivers/star/README.otherbugs - * (J. Schilling's remarks on TAR formats compatibility issues.) - */ - -static EXTRACTOR_KeywordList * -addKeyword (EXTRACTOR_KeywordType type, - char *keyword, EXTRACTOR_KeywordList * next) -{ - EXTRACTOR_KeywordList *result = next; - - if (NULL != keyword) - { - if (0 == *keyword) - { - free (keyword); - } - else - { - result = malloc (sizeof (EXTRACTOR_KeywordList)); - if (NULL == result) - { - free (keyword); - } - else - { - result->next = next; - result->keyword = keyword; - result->keywordType = type; - } - } - } - - return result; -} - -static EXTRACTOR_KeywordList * -appendKeyword (EXTRACTOR_KeywordType type, - char *keyword, EXTRACTOR_KeywordList * last) -{ - EXTRACTOR_KeywordList *result = last; - - if (NULL != keyword) - { - if (0 == *keyword) - { - free (keyword); - } - else - { - if ((NULL != last) && (NULL != last->next)) - abort (); - result = malloc (sizeof (EXTRACTOR_KeywordList)); - if (NULL == result) - { - free (keyword); - } - else - { - result->next = NULL; - result->keywordType = type; - result->keyword = keyword; - if (NULL != last) - last->next = result; - } - } - } - - return result; -} - -/* - * Define known TAR archive member variants. - * In theory different variants - * can coexist within a single TAR archive file - * although this will be uncommon. - */ -#define TAR_V7ORIGINAL_FORMAT (1) -#define TAR_V7EXTENDED_FORMAT (1 << 1) -#define TAR_SCHILLING1985_FORMAT (1 << 2) -#define TAR_POSIX1988_FORMAT (1 << 3) -#define TAR_GNU1991_FORMAT (1 << 4) -#define TAR_SCHILLING1994_FORMAT (1 << 5) -#define TAR_GNU1997_FORMAT (1 << 6) -#define TAR_POSIX2001_FORMAT (1 << 7) -#define TAR_SCHILLING2001_FORMAT (1 << 8) -#define TAR_SOLARIS2001_FORMAT (1 << 9) -#define TAR_GNU2004_FORMAT (1 << 10) - -/* - * TAR header structure, modelled after POSIX.1-1988 - */ -typedef struct -{ - char fileName[100]; - char mode[8]; - char userId[8]; - char groupId[8]; - char fileSize[12]; - char lastModTime[12]; - char chksum[8]; - char link; - char linkName[100]; - /* - * All fields below are a - * either zero-filled or undefined - * for UNIX V7 TAR archive members ; - * their header is always 512 octets long nevertheless. - */ - char ustarMagic[6]; - char version[2]; - char userName[32]; - char groupName[32]; - char devMajor[8]; - char devMinor[8]; - char prefix[155]; - char filler[12]; -} TarHeader; - -#define TAR_HEADER_SIZE (sizeof(TarHeader)) -#define TAR_TIME_FENCE ((long long) (-(1LL << 62))) - -static size_t -tar_roundup (size_t size) -{ - size_t diff = (size % TAR_HEADER_SIZE); - - return (0 == diff) ? size : (size + (TAR_HEADER_SIZE - diff)); -} - -static int -tar_isnonzero (const char *data, unsigned int length) -{ - unsigned int total = 0; - - while (total < length) - { - if (0 != data[total]) - return 1; - total++; - } - - return 0; -} - -static unsigned int -tar_octalvalue (const char *data, size_t size, unsigned long long *valueptr) -{ - unsigned int result = 0; - - if (NULL != data && 0 < size) - { - const char *p = data; - int found = 0; - unsigned long long value = 0; - - while ((p < data + size) && (' ' == *p)) - p += 1; - - while ((p < data + size) && ('0' <= *p) && (*p < '8')) - { - found = 1; - value *= 8; - value += (*p - '0'); - p += 1; - } - - if (0 != found) - { - while ((p < data + size) && ((0 == *p) || (' ' == *p))) - p += 1; - - result = (p - data); - } - - if ((0 < result) && (NULL != valueptr)) - *valueptr = value; - } - - return result; -} - -#ifndef EOVERFLOW -#define EOVERFLOW -1 -#endif - -static int -tar_time (long long timeval, char *rtime, unsigned int rsize) -{ - int retval = 0; - - /* - * shift epoch to proleptic times - * to make subsequent modulo operations safer. - */ - long long my_timeval = timeval - + ((long long) ((1970 * 365) + 478) * (long long) 86400); - - unsigned int seconds = (unsigned int) (my_timeval % 60); - unsigned int minutes = (unsigned int) ((my_timeval / 60) % 60); - unsigned int hours = (unsigned int) ((my_timeval / 3600) % 24); - - unsigned int year = 0; - unsigned int month = 1; - - unsigned int days = (unsigned int) (my_timeval / (24 * 3600)); - - unsigned int days_in_month[] = - { 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 }; - unsigned int diff = 0; - - if ((long long) 0 > my_timeval) - return EDOM; - - /* - * 400-year periods - */ - year += (400 * (days / ((365 * 400) + 97))); - days %= ((365 * 400) + 97); - - /* - * 100-year periods - */ - diff = (days / ((365 * 100) + 24)); - if (4 <= diff) - { - year += 399; - days = 364; - } - else - { - year += (100 * diff); - days %= ((365 * 100) + 24); - } - - /* - * remaining leap years - */ - year += (4 * (days / ((365 * 4) + 1))); - days %= ((365 * 4) + 1); - - while (1) - { - if ((0 == (year % 400)) || ((0 == (year % 4)) && (0 != (year % 100)))) - { - if (366 > days) - { - break; - } - else - { - days -= 366; - year++; - } - } - else - { - if (365 > days) - { - break; - } - else - { - days -= 365; - year++; - } - } - } - - if ((0 == (year % 400)) || ((0 == (year % 4)) && (0 != (year % 100)))) - days_in_month[1] = 29; - - for (month = 0; (month < 12) && (days >= days_in_month[month]); month += 1) - days -= days_in_month[month]; - - retval = snprintf (rtime, rsize, "%04u-%02u-%02uT%02u:%02u:%02uZ", - year, month + 1, days + 1, hours, minutes, seconds); - - return (retval < rsize) ? 0 : EOVERFLOW; -} - -struct EXTRACTOR_Keywords * -libextractor_tar_extract (const char *filename, - const char *data, - size_t size, struct EXTRACTOR_Keywords *prev) -{ - char *fname = NULL; - size_t pos = 0; - int contents_are_empty = 1; - long long maxftime = TAR_TIME_FENCE; - unsigned int format_archive = 0; - struct EXTRACTOR_Keywords *last; - - if (512 != TAR_HEADER_SIZE) - return prev; /* compiler should remove this when optimising */ - if (0 != (size % TAR_HEADER_SIZE)) - return prev; /* cannot be tar! */ - if (size < TAR_HEADER_SIZE) - return prev; /* too short, or somehow truncated */ - - last = prev; - if (last != NULL) - while (last->next != NULL) - last = last->next; - - pos = 0; - while ((pos + TAR_HEADER_SIZE) <= size) - { - const TarHeader *tar = NULL; - unsigned format_member = 0; - unsigned long long fmode; - unsigned long long fsize; - long long ftime = TAR_TIME_FENCE; - char typeFlag = -1; - const char *nul_pos; - unsigned int tar_prefix_length = 0; - unsigned int tar_name_length = 0; - unsigned int checksum_offset; - int checksum_computed_500s = 0; - int checksum_computed_512s = 0; - unsigned int checksum_computed_500u = 0; - unsigned int checksum_computed_512u = 0; - unsigned long long checksum_stored = 0; - - /* - * Compute TAR header checksum and compare with stored value. - * Allow for non-conformant checksums computed with signed values, - * such as those produced by early Solaris tar. - * Allow for non-conformant checksums computed on first 500 octets, - * such as those produced by SunOS 4.x tar according to J. Schilling. - * This will also detect EOF marks, since a zero-filled block - * cannot possibly hold octal values. - */ - for (checksum_offset = 0; checksum_offset < 148; checksum_offset += 1) - { - checksum_computed_500u += - (unsigned char) data[pos + checksum_offset]; - checksum_computed_500s += (signed char) data[pos + checksum_offset]; - } - if (8 > - tar_octalvalue (data + pos + checksum_offset, 8, &checksum_stored)) - break; - for (; checksum_offset < 156; checksum_offset += 1) - { - checksum_computed_500u += (unsigned char) ' '; - checksum_computed_500s += (signed char) ' '; - } - for (; checksum_offset < 500; checksum_offset += 1) - { - checksum_computed_500u += - (unsigned char) data[pos + checksum_offset]; - checksum_computed_500s += (signed char) data[pos + checksum_offset]; - } - - checksum_computed_512u = checksum_computed_500u; - checksum_computed_512s = checksum_computed_500s; - for (; checksum_offset < TAR_HEADER_SIZE; checksum_offset += 1) - { - checksum_computed_512u += - (unsigned char) data[pos + checksum_offset]; - checksum_computed_512s += (signed char) data[pos + checksum_offset]; - } - - /* - * Suggestion: use signed checksum matches to refine - * TAR format detection. - */ - if ((checksum_stored != (unsigned long long) checksum_computed_512u) - && (checksum_stored != (unsigned long long) checksum_computed_512s) - && (checksum_stored != (unsigned long long) checksum_computed_500s) - && (checksum_stored != (unsigned long long) checksum_computed_500u)) - break; - - tar = (const TarHeader *) &data[pos]; - typeFlag = tar->link; - pos += TAR_HEADER_SIZE; - - /* - * Checking all octal fields helps reduce - * the possibility of false positives ; - * only the file size, time and mode are used for now. - * - * This will fail over GNU and Schilling TAR huge size fields - * using non-octal encodings used for very large file lengths (> 8 GB). - */ - if ((12 > tar_octalvalue (tar->fileSize, 12, - &fsize)) - || (12 > tar_octalvalue (tar->lastModTime, 12, - (unsigned long long *) &ftime)) - || (8 > tar_octalvalue (tar->mode, 8, - (unsigned long long *) &fmode)) - || (8 > tar_octalvalue (tar->userId, 8, NULL)) - || (8 > tar_octalvalue (tar->groupId, 8, NULL))) - break; - - /* - * Find out which TAR variant is here. - */ - if (0 == memcmp (tar->ustarMagic, "ustar ", 7)) - { - - if (' ' == tar->mode[6]) - format_member = TAR_GNU1991_FORMAT; - else if (('K' == typeFlag) || ('L' == typeFlag)) - { - format_member = TAR_GNU1997_FORMAT; - ftime = TAR_TIME_FENCE; - } - else - format_member = - (((unsigned) fmode) != - (((unsigned) fmode) & 03777)) ? TAR_GNU1997_FORMAT : - TAR_GNU2004_FORMAT; - - } - else if (0 == memcmp (tar->ustarMagic, "ustar", 6)) - { - - /* - * It is important to perform test for SCHILLING1994 before GNU1997 - * because certain extension type flags ('L' and 'S' for instance) - * are used by both. - */ - if ((0 == tar->prefix[130]) - && (12 <= tar_octalvalue (tar->prefix + 131, 12, NULL)) - && (12 <= tar_octalvalue (tar->prefix + 143, 12, NULL)) - && (0 == tar_isnonzero (tar->filler, 8)) - && (0 == memcmp (tar->filler + 8, "tar", 4))) - { - - format_member = TAR_SCHILLING1994_FORMAT; - - } - else if (('D' == typeFlag) || ('K' == typeFlag) - || ('L' == typeFlag) || ('M' == typeFlag) - || ('N' == typeFlag) || ('S' == typeFlag) - || ('V' == typeFlag)) - { - - format_member = TAR_GNU1997_FORMAT; - - } - else if (('g' == typeFlag) - || ('x' == typeFlag) || ('X' == typeFlag)) - { - - format_member = TAR_POSIX2001_FORMAT; - ftime = TAR_TIME_FENCE; - - } - else - { - - format_member = TAR_POSIX1988_FORMAT; - - } - } - else if ((0 == memcmp (tar->filler + 8, "tar", 4)) - && (0 == tar_isnonzero (tar->filler, 8))) - { - - format_member = TAR_SCHILLING1985_FORMAT; - - } - else if (('0' <= typeFlag) && (typeFlag <= '2')) - { - - format_member = TAR_V7ORIGINAL_FORMAT; - - } - else - { - - format_member = TAR_V7EXTENDED_FORMAT; - - } - - /* - * Locate the file names. - */ - if ((0 != (format_member & TAR_POSIX2001_FORMAT)) - && (('x' == typeFlag) || ('X' == typeFlag))) - { - - if (size <= pos) - break; - - else if ((8 <= fsize) && fsize <= (unsigned long long) (size - pos)) - { - const char *keyptr = data + pos; - const char *valptr = NULL; - const char *nameptr = NULL; - unsigned int keylength = 0; - unsigned int namelength = 0; - - while (keyptr < data + pos + (size_t) fsize) - { - if (('0' > *keyptr) || ('9' < *keyptr)) - { - keyptr += 1; - continue; - } - - keylength = - (unsigned int) strtoul (keyptr, (char **) &valptr, 10); - if ((0 < keylength) && (NULL != valptr) - && (keyptr != valptr)) - { - unsigned int difflength = 0; - - while ((valptr < data + pos + (size_t) fsize) - && (' ' == *valptr)) - valptr += 1; - - difflength = (valptr - keyptr); - - if (0 == memcmp (valptr, "path=", 5)) - { - nameptr = valptr + 5; - namelength = keylength - (nameptr - keyptr); - } - else - { - - if ((keylength > (valptr - keyptr) + 4 + 2) - && (0 == memcmp (valptr, "GNU.", 4))) - format_archive |= TAR_GNU2004_FORMAT; - - else if ((keylength > (valptr - keyptr) + 7 + 2) - && (0 == memcmp (valptr, "SCHILY.", 7))) - format_archive |= TAR_SCHILLING2001_FORMAT; - - else if ((keylength > (valptr - keyptr) + 4 + 2) - && (0 == memcmp (valptr, "SUN.", 4))) - format_archive |= TAR_SOLARIS2001_FORMAT; - } - - keyptr += keylength; - } - else - { - nameptr = NULL; - break; - } - } - - if ((NULL != nameptr) && (0 != *nameptr) - && ((size - (nameptr - data)) >= namelength) - && (1 < namelength)) - { - if (NULL != fname) - free (fname); - /* - * There is an 1-offset because POSIX.1-2001 - * field separator is counted in field length. - */ - fname = malloc (namelength); - if (NULL != fname) - { - memcpy (fname, nameptr, namelength - 1); - fname[namelength - 1] = '\0'; - - pos += tar_roundup ((size_t) fsize); - format_archive |= format_member; - continue; - } - } - } - } - - else if ((0 != (format_member - & (TAR_SCHILLING1994_FORMAT - | TAR_GNU1997_FORMAT | TAR_GNU2004_FORMAT))) - && ('L' == typeFlag)) - { - - if (size <= pos) - break; - - else if ((0 < fsize) && fsize <= (unsigned long long) (size - pos)) - { - - size_t length = (size_t) fsize; - - nul_pos = memchr (data + pos, 0, length); - if (NULL != nul_pos) - length = (nul_pos - (data + pos)); - - if (0 < length) - { - if (NULL != fname) - free (fname); - fname = malloc (1 + length); - if (NULL != fname) - { - memcpy (fname, data + pos, length); - fname[length] = '\0'; - } - - pos += tar_roundup ((size_t) fsize); - format_archive |= format_member; - continue; - } - } - } - else - { - - nul_pos = memchr (tar->fileName, 0, sizeof tar->fileName); - tar_name_length = (0 == nul_pos) - ? sizeof (tar->fileName) : (nul_pos - tar->fileName); - - if ((0 != - (format_member & (TAR_GNU1997_FORMAT | TAR_GNU2004_FORMAT))) - && ('S' == typeFlag)) - { - - if ((0 == tar->prefix[40]) - && (0 != tar->prefix[137]) - && (12 <= tar_octalvalue (tar->prefix + 41, 12, NULL)) - && (12 <= tar_octalvalue (tar->prefix + 53, 12, NULL))) - { - /* - * fsize needs adjustment when there are more than 4 sparse blocks - */ - size_t diffpos = 0; - fsize += TAR_HEADER_SIZE; - - while ((pos + diffpos + TAR_HEADER_SIZE < size) - && (0 != *(data + pos + diffpos + 504))) - { - diffpos += TAR_HEADER_SIZE; - fsize += TAR_HEADER_SIZE; - } - } - - typeFlag = '0'; - - } - else if (0 != (format_member & TAR_SCHILLING1994_FORMAT)) - { - - nul_pos = memchr (tar->prefix, 0, 130); - tar_prefix_length = (0 == nul_pos) - ? 130 : (nul_pos - tar->prefix); - - if ('S' == typeFlag) - typeFlag = '0'; - - } - else if (0 != (format_member & TAR_SCHILLING1985_FORMAT)) - { - - nul_pos = memchr (tar->prefix, 0, 155); - tar_prefix_length = (0 == nul_pos) - ? 155 : (nul_pos - tar->prefix); - - - if ('S' == typeFlag) - typeFlag = '0'; - - } - else if (0 != (format_member & TAR_POSIX1988_FORMAT)) - { - - nul_pos = memchr (tar->prefix, 0, sizeof tar->prefix); - tar_prefix_length = (0 == nul_pos) - ? sizeof tar->prefix : nul_pos - tar->prefix; - - } - } - - /* - * Update position so that next loop iteration will find - * either a TAR header or TAR EOF mark or just EOF. - * - * Consider archive member size to be zero - * with no data following the header in the following cases : - * '1' : hard link, '2' : soft link, - * '3' : character device, '4' : block device, - * '5' : directory, '6' : named pipe. - */ - if ('1' != typeFlag && '2' != typeFlag - && '3' != typeFlag && '4' != typeFlag - && '5' != typeFlag && '6' != typeFlag) - { - if ((fsize > (unsigned long long) size) - || (fsize + (unsigned long long) pos > - (unsigned long long) size)) - break; - - pos += tar_roundup ((size_t) fsize); - } - if (pos - 1 > size) - break; - - format_archive |= format_member; - - /* - * Store the file name in libextractor list. - * - * For the time being, only file types listed in POSIX.1-1988 ('0'..'7') - * are retained, leaving out labels, access control lists, etc. - */ - if ((0 == typeFlag) || (('0' <= typeFlag) && (typeFlag <= '7'))) - { - if (NULL == fname) - { - if (0 < tar_prefix_length + tar_name_length) - { - fname = malloc (2 + tar_prefix_length + tar_name_length); - - if (NULL != fname) - { - if (0 < tar_prefix_length) - { - memcpy (fname, tar->prefix, tar_prefix_length); - - if (('/' != tar->prefix[tar_prefix_length - 1]) - && (0 < tar_name_length) - && ('/' != tar->fileName[0])) - { - fname[tar_prefix_length] = '/'; - tar_prefix_length += 1; - } - } - - if (0 < tar_name_length) - memcpy (fname + tar_prefix_length, tar->fileName, - tar_name_length); - - fname[tar_prefix_length + tar_name_length] = '\0'; - } - } - } - - if ((NULL != fname) && (0 != *fname)) - { -#if 0 - fprintf (stdout, - "(%u) flag = %c, size = %u, tname = (%s), fname = (%s)\n", - __LINE__, typeFlag, (unsigned int) fsize, - (NULL == tar->fileName) ? "" : tar->fileName, - (NULL == fname) ? "" : fname); -#endif - - last = appendKeyword (EXTRACTOR_FILENAME, fname, last); - fname = NULL; - if (prev == NULL) - prev = last; - if (ftime > maxftime) - maxftime = ftime; - contents_are_empty = 0; - } - } - - if (NULL != fname) - { - free (fname); - fname = NULL; - } - } - - if (NULL != fname) - { - free (fname); - fname = NULL; - } - - /* - * Report mimetype; report also format(s) and most recent date - * when at least one archive member was found. - */ - if (0 != format_archive) - { - if (0 == contents_are_empty) - { - - const char *formats[5] = { NULL, NULL, NULL, NULL, NULL }; - unsigned int formats_count = 0; - unsigned int formats_u = 0; - unsigned int format_length = 0; - char *format = NULL; - - if (TAR_TIME_FENCE < maxftime) - { - char iso8601_time[24]; - - if (0 == tar_time (maxftime, iso8601_time, sizeof iso8601_time)) - prev = - addKeyword (EXTRACTOR_DATE, strdup (iso8601_time), prev); - } - - /* - * We only keep the most recent POSIX format. - */ - if (0 != (format_archive & TAR_POSIX2001_FORMAT)) - formats[formats_count++] = "POSIX 2001"; - - else if (0 != (format_archive & TAR_POSIX1988_FORMAT)) - formats[formats_count++] = "POSIX 1988"; - - /* - * We only keep the most recent GNU format. - */ - if (0 != (format_archive & TAR_GNU2004_FORMAT)) - formats[formats_count++] = "GNU 2004"; - - else if (0 != (format_archive & TAR_GNU1997_FORMAT)) - formats[formats_count++] = "GNU 1997"; - - else if (0 != (format_archive & TAR_GNU1991_FORMAT)) - formats[formats_count++] = "GNU 1991"; - - /* - * We only keep the most recent Schilling format. - */ - if (0 != (format_archive & TAR_SCHILLING2001_FORMAT)) - formats[formats_count++] = "Schilling 2001"; - - else if (0 != (format_archive & TAR_SCHILLING1994_FORMAT)) - formats[formats_count++] = "Schilling 1994"; - - else if (0 != (format_archive & TAR_SCHILLING1985_FORMAT)) - formats[formats_count++] = "Schilling 1985"; - - /* - * We only keep the most recent Solaris format. - */ - if (0 != (format_archive & TAR_SOLARIS2001_FORMAT)) - formats[formats_count++] = "Solaris 2001"; - - /* - * We only keep the (supposedly) most recent UNIX V7 format. - */ - if (0 != (format_archive & TAR_V7EXTENDED_FORMAT)) - formats[formats_count++] = "UNIX extended V7"; - - else if (0 != (format_archive & TAR_V7ORIGINAL_FORMAT)) - formats[formats_count++] = "UNIX original V7"; - - /* - * Build the format string - */ - for (formats_u = 0; formats_u < formats_count; formats_u += 1) - { - if ((NULL != formats[formats_u]) && (0 != *formats[formats_u])) - { - if (0 < format_length) - format_length += 3; - format_length += strlen (formats[formats_u]); - } - } - - if (0 < format_length) - { - format = malloc (format_length + 5); - - if (NULL != format) - { - - format_length = 0; - - for (formats_u = 0; formats_u < formats_count; - formats_u += 1) - { - if ((NULL != formats[formats_u]) - && (0 != *formats[formats_u])) - { - if (0 < format_length) - { - strcpy (format + format_length, " + "); - format_length += 3; - } - strcpy (format + format_length, formats[formats_u]); - format_length += strlen (formats[formats_u]); - } - } - - if (0 < format_length) - { - strcpy (format + format_length, " TAR"); - prev = addKeyword (EXTRACTOR_FORMAT, format, prev); - } - else - { - free (format); - } - } - } - } - - prev = - addKeyword (EXTRACTOR_MIMETYPE, strdup ("application/x-tar"), prev); - } - - return prev; -}