commit 069ebf37916afe510bcb98af4a87db8e929c6053
parent 42b645632bfdde2c24fb8ae7d2e76ecf0f76697f
Author: Christian Grothoff <christian@grothoff.org>
Date: Fri, 23 Sep 2005 03:56:15 +0000
sync
Diffstat:
4 files changed, 150 insertions(+), 98 deletions(-)
diff --git a/AUTHORS b/AUTHORS
@@ -21,7 +21,7 @@ printable - core team based in part on code from GNUnet (bloom filters)
avi - core team based in part on code from avinfo 1.0.0 alpha 11 and bitcollider 0.6.0
mpeg - core team based in part on code from avinfo 1.0.0 alpha 11 and bitcollider 0.6.0
ole2 - core team based on code from libgsf
-tar - core team
+tar - core team and Ronan MELENNEC <ronan.melennec@cena.fr>
tar.gz - core team using zlib
deb - core team using zlib
man - core team using zlib (for man.gz)
diff --git a/ChangeLog b/ChangeLog
@@ -1,3 +1,10 @@
+Thu Sep 22 21:05:53 PDT 2005
+ Improved TAR extractor:
+ - it now accepts old-style (UNIX V7) archives
+ - it produces a mimetype for old-style archives
+ - it outputs the file names in the same order as in the TAR file
+ - its end-of-file mark detection is more robust
+
Wed Sep 21 13:54:19 PDT 2005
Added Irish translation.
diff --git a/po/de.po b/po/de.po
@@ -7,10 +7,10 @@
#
msgid ""
msgstr ""
-"Project-Id-Version: libextractor 0.5.6\n"
+"Project-Id-Version: libextractor 0.5.6a\n"
"Report-Msgid-Bugs-To: libextractor@gnu.org\n"
"POT-Creation-Date: 2005-09-20 23:59-0700\n"
-"PO-Revision-Date: 2005-09-21 07:27+0200\n"
+"PO-Revision-Date: 2005-09-22 10:07+0200\n"
"Last-Translator: Karl Eichwalder <ke@suse.de>\n"
"Language-Team: German <de@li.org>\n"
"MIME-Version: 1.0\n"
@@ -21,7 +21,7 @@ msgstr ""
#: src/plugins/language/language-compiler.c:37
#, c-format
msgid "Please provide a list of klp files as arguments.\n"
-msgstr ""
+msgstr "Geben Sie eine Liste der klp-Dateien als Argument an.\n"
#: src/plugins/language/language-compiler.c:48
#: src/plugins/printable/dictionary-builder.c:113
@@ -228,9 +228,8 @@ msgid "Pranks"
msgstr "Punk"
#: src/plugins/mp3extractor.c:73
-#, fuzzy
msgid "Soundtrack"
-msgstr "Sonate"
+msgstr "Filmmusik (Soundtrack)"
#: src/plugins/mp3extractor.c:74
#, fuzzy
@@ -272,7 +271,7 @@ msgstr "Instrumental"
#: src/plugins/mp3extractor.c:83
msgid "Acid"
-msgstr ""
+msgstr "Acid"
#: src/plugins/mp3extractor.c:84
msgid "House"
@@ -390,9 +389,8 @@ msgid "Christian Rap"
msgstr ""
#: src/plugins/mp3extractor.c:111
-#, fuzzy
msgid "Pop/Funk"
-msgstr "Punk"
+msgstr "Pop/Funk"
#: src/plugins/mp3extractor.c:112
msgid "Jungle"
@@ -415,9 +413,8 @@ msgid "Psychedelic"
msgstr ""
#: src/plugins/mp3extractor.c:117
-#, fuzzy
msgid "Rave"
-msgstr "Reggae"
+msgstr "Rave"
#: src/plugins/mp3extractor.c:118
msgid "Showtunes"
@@ -446,27 +443,24 @@ msgid "Acid Jazz"
msgstr "Jazz"
#: src/plugins/mp3extractor.c:124
-#, fuzzy
msgid "Polka"
-msgstr "Folk"
+msgstr "Polka"
#: src/plugins/mp3extractor.c:125
msgid "Retro"
msgstr ""
#: src/plugins/mp3extractor.c:126
-#, fuzzy
msgid "Musical"
-msgstr "Klassik"
+msgstr "Musical"
#: src/plugins/mp3extractor.c:127
msgid "Rock & Roll"
msgstr "Rock & Roll"
#: src/plugins/mp3extractor.c:128
-#, fuzzy
msgid "Hard Rock"
-msgstr "Rock"
+msgstr "Hard Rock"
#: src/plugins/mp3extractor.c:129
msgid "Folk"
@@ -491,12 +485,11 @@ msgstr "Fusion"
#: src/plugins/mp3extractor.c:134
msgid "Bebob"
-msgstr ""
+msgstr "Bebob"
#: src/plugins/mp3extractor.c:135
-#, fuzzy
msgid "Latin"
-msgstr "Ort"
+msgstr "Latin"
#: src/plugins/mp3extractor.c:136
msgid "Revival"
@@ -513,7 +506,7 @@ msgstr "Blues"
#: src/plugins/mp3extractor.c:139
msgid "Avantgarde"
-msgstr ""
+msgstr "Avantgarde"
#: src/plugins/mp3extractor.c:140
#, fuzzy
@@ -540,11 +533,11 @@ msgstr "Rock"
#: src/plugins/mp3extractor.c:145
msgid "Big Band"
-msgstr ""
+msgstr "Big Band"
#: src/plugins/mp3extractor.c:146
msgid "Chorus"
-msgstr ""
+msgstr "Chor"
#: src/plugins/mp3extractor.c:147
msgid "Easy Listening"
@@ -595,9 +588,8 @@ msgid "Porn Groove"
msgstr ""
#: src/plugins/mp3extractor.c:159
-#, fuzzy
msgid "Satire"
-msgstr "Datum"
+msgstr "Satire"
#: src/plugins/mp3extractor.c:160
msgid "Slow Jam"
@@ -616,9 +608,8 @@ msgid "Samba"
msgstr "Samba"
#: src/plugins/mp3extractor.c:164
-#, fuzzy
msgid "Folklore"
-msgstr "Folk"
+msgstr "Folklore"
#: src/plugins/mp3extractor.c:165
msgid "Ballad"
@@ -704,7 +695,7 @@ msgstr ""
#: src/plugins/mp3extractor.c:184
msgid "Beat"
-msgstr ""
+msgstr "Beat"
#: src/plugins/mp3extractor.c:185
msgid "Christian Gangsta Rap"
@@ -729,9 +720,8 @@ msgid "Contemporary Christian"
msgstr ""
#: src/plugins/mp3extractor.c:190
-#, fuzzy
msgid "Christian Rock"
-msgstr "Klassischer Rock"
+msgstr "Christlicher Rock"
#: src/plugins/mp3extractor.c:191
msgid "Merengue"
@@ -830,8 +820,7 @@ msgstr ""
#: src/main/extract.c:52
#, c-format
-msgid ""
-"Arguments mandatory for long options are also mandatory for short options.\n"
+msgid "Arguments mandatory for long options are also mandatory for short options.\n"
msgstr ""
"Argumente, die für lange Optionen notwendig sind, sind ebenfalls für die\n"
"Optionen in Kurzform notwendig.\n"
@@ -845,12 +834,8 @@ msgid "print output in bibtex format"
msgstr "Ausgabe im BibTeX format"
#: src/main/extract.c:130
-msgid ""
-"use the generic plaintext extractor for the language with the 2-letter "
-"language code LANG"
-msgstr ""
-"generischen Klartext-extractor für die Sprache mit dem 2-Buchstabenkürzel "
-"LANG verwenden"
+msgid "use the generic plaintext extractor for the language with the 2-letter language code LANG"
+msgstr "generischen Klartext-extractor für die Sprache mit dem 2-Buchstabenkürzel LANG verwenden"
#: src/main/extract.c:132
msgid "remove duplicates only if types match"
@@ -858,9 +843,7 @@ msgstr "doppelte Einträge nur entfernen, wenn die Art übereinstimmt"
#: src/main/extract.c:134
msgid "use the filename as a keyword (loads filename-extractor plugin)"
-msgstr ""
-"Dateinamen als Schlüsselwort verwenden (filename-extractor-Erweiterung wird "
-"geladen)"
+msgstr "Dateinamen als Schlüsselwort verwenden (filename-extractor-Erweiterung wird geladen)"
#: src/main/extract.c:136
msgid "print this help"
@@ -868,8 +851,7 @@ msgstr "diese Hilfe anzeigen"
#: src/main/extract.c:138
msgid "compute hash using the given ALGORITHM (currently sha1 or md5)"
-msgstr ""
-"Hash gemäß dem angegebenen ALGORITHMUS errechnen (z.Zt. »sha1« oder »md5«)"
+msgstr "Hash gemäß dem angegebenen ALGORITHMUS errechnen (z.Zt. »sha1« oder »md5«)"
#: src/main/extract.c:140
msgid "load an extractor plugin named LIBRARY"
@@ -885,9 +867,7 @@ msgstr "Standardsatz der extractor-Erweiterungen nicht verwenden"
#: src/main/extract.c:146
msgid "print only keywords of the given TYPE (use -L to get a list)"
-msgstr ""
-"nur Schlüsselwörter einer bestimmten ART ausgeben (mit -L die Liste anzeigen "
-"lassen)"
+msgstr "nur Schlüsselwörter einer bestimmten ART ausgeben (mit -L die Liste anzeigen lassen)"
#: src/main/extract.c:148
msgid "remove duplicates even if keyword types do not match"
@@ -976,11 +956,9 @@ msgid "page count"
msgstr "Seitenanzahl"
#: src/main/extract.c:473
-#, fuzzy, c-format
+#, c-format
msgid "You must specify an argument for the `%s' option (option ignored).\n"
-msgstr ""
-"Sie müssen ein Argument für die Option »%s« angeben (Option wird "
-"ignoriert).\n"
+msgstr "Sie müssen ein Argument für die Option »%s« angeben (Option wird ignoriert).\n"
#: src/main/extract.c:540
#, c-format
@@ -1239,9 +1217,8 @@ msgid "binary thumbnail data"
msgstr ""
#: src/main/extractor.c:115
-#, fuzzy
msgid "publication date"
-msgstr "Datum der Erstellung"
+msgstr "Datum der Veröffentlichung"
#: src/main/extractor.c:116
msgid "camera make"
@@ -1265,7 +1242,7 @@ msgstr ""
#: src/main/extractor.c:121
msgid "flash"
-msgstr ""
+msgstr "Blitz"
#: src/main/extractor.c:122
msgid "flash bias"
@@ -1297,7 +1274,7 @@ msgstr ""
#: src/main/extractor.c:129
msgid "image quality"
-msgstr ""
+msgstr "Bildqualität"
#: src/main/extractor.c:130
msgid "white balance"
@@ -1315,20 +1292,18 @@ msgstr "Initialisierung des Plugin-Mechanismus' ist fehlgeschlagen: %s.\n"
#: src/main/extractor.c:372
#, c-format
-msgid ""
-"Resolving symbol `%s' in library `%s' failed, so I tried `%s', but that "
-"failed also. Errors are: `%s' and `%s'.\n"
+msgid "Resolving symbol `%s' in library `%s' failed, so I tried `%s', but that failed also. Errors are: `%s' and `%s'.\n"
msgstr ""
#: src/main/extractor.c:401
-#, fuzzy, c-format
+#, c-format
msgid "Loading `%s' plugin failed: %s\n"
msgstr "Laden des »%s«-Plugins ist fehlgeschlagen: %s\n"
#: src/main/extractor.c:606
-#, fuzzy, c-format
+#, c-format
msgid "Unloading plugin `%s' failed!\n"
-msgstr "Entladen des »%s«-Erweiterung ist fehlgeschlagen.\n"
+msgstr "Entladen des »%s«-Plugins ist fehlgeschlagen!\n"
#~ msgid "Fatal: could not allocate (%s at %s:%d).\n"
#~ msgstr "Fatal: Allokieren nicht möglich (%s bei %s:%d).\n"
diff --git a/src/plugins/tarextractor.c b/src/plugins/tarextractor.c
@@ -20,43 +20,42 @@
#include "platform.h"
#include "extractor.h"
-#include <zlib.h>
/*
* Note that this code is not complete!
+ * It will not report correct results for very long member filenames
+ * (> 99 octets) when the archive was made with GNU tar or Solaris tar.
*
* References:
* http://www.mkssoftware.com/docs/man4/tar.4.asp
+ * (does document USTAR format common nowadays,
+ * but not other extended formats such as the one produced
+ * by GNU tar 1.13 when very long filenames are met.)
*/
-
-static EXTRACTOR_KeywordList * addKeyword(EXTRACTOR_KeywordType type,
- char * keyword,
- EXTRACTOR_KeywordList * next) {
+static EXTRACTOR_KeywordList * appendKeyword(EXTRACTOR_KeywordType type,
+ char * keyword,
+ EXTRACTOR_KeywordList * last) {
EXTRACTOR_KeywordList * result;
+ if ( (last != NULL) &&
+ (last->next != NULL) )
+ abort();
if (keyword == NULL)
- return next;
+ return last;
if (strlen(keyword) == 0) {
free(keyword);
- return next;
+ return last;
}
result = malloc(sizeof(EXTRACTOR_KeywordList));
- result->next = next;
- result->keyword = keyword;
+ result->next = last;
result->keywordType = type;
+ result->keyword = keyword;
+ if (last != NULL)
+ last->next = result;
return result;
}
-static char * stndup(const char * str,
- size_t n) {
- char * tmp;
- tmp = malloc(n+1);
- tmp[n] = '\0';
- memcpy(tmp, str, n);
- return tmp;
-}
-
typedef struct {
char name[100];
char mode[8];
@@ -86,53 +85,124 @@ libextractor_tar_extract(const char * filename,
const char * data,
size_t size,
struct EXTRACTOR_Keywords * prev) {
- TarHeader * tar;
- USTarHeader * ustar;
+ const TarHeader * tar;
+ const USTarHeader * ustar;
size_t pos;
+ const char * mimetype = NULL;
+ struct EXTRACTOR_Keywords * last;
+
+ last = prev;
+ if (last != NULL)
+ while (last->next != NULL)
+ last = last->next;
if (0 != (size % 512) )
return prev; /* cannot be tar! */
if (size < 1024)
- return prev;
- size -= 1024; /* last 2 blocks are all zeros */
- /* fixme: we may want to check that the last
- 1024 bytes are all zeros here... */
+ return prev; /* too short, or somehow truncated */
pos = 0;
while (pos + sizeof(TarHeader) < size) {
unsigned long long fsize;
char buf[13];
+ const char * nul_pos;
+ const char * ustar_prefix = NULL;
+ unsigned int ustar_prefix_length = 0;
+ unsigned int tar_name_length;
+ unsigned int zeropos;
+ int header_is_empty = 1;
+
+ if (pos + 1024 < size) {
+ const int * idata = (const int*) data;
+ for (zeropos = 0; zeropos < 1024 / sizeof(int); zeropos++) {
+ if(0 != idata[zeropos]) {
+ header_is_empty = 0;
+ break;
+ }
+ }
+ }
- tar = (TarHeader*) &data[pos];
+ if (header_is_empty) /* assume the EOF mark was reached */
+ break;
+
+ tar = (const TarHeader*) &data[pos];
/* fixme: we may want to check the header checksum here... */
+ /* fixme: we attempt to follow MKS document for long file names,
+ but no TAR file was found yet which matched what we understood ! */
if (pos + sizeof(USTarHeader) < size) {
- ustar = (USTarHeader*) &data[pos];
+
+ nul_pos = memchr(data + pos, 0, sizeof tar->name);
+ tar_name_length = (0 == nul_pos)
+ ? sizeof(tar->name)
+ : (nul_pos - (data + pos));
+
+ ustar = (const USTarHeader*) &data[pos];
+
+ if(0 == mimetype) {
+ if(0 == memcmp(ustar->magic, "ustar ", 7))
+ mimetype = "application/x-gtar";
+ else
+ mimetype = "application/x-tar";
+ }
+
if (0 == strncmp("ustar",
- &ustar->magic[0],
- strlen("ustar")))
- pos += 512; /* sizeof(USTarHeader); */
- else
- pos += 257; /* sizeof(TarHeader); minus gcc alignment... */
+ &ustar->magic[0],
+ strlen("ustar"))) {
+ if(0 != *ustar->prefix) {
+ nul_pos = memchr(ustar->prefix, 0, sizeof ustar->prefix);
+
+ ustar_prefix_length = (0 == nul_pos)
+ ? sizeof ustar->prefix
+ : nul_pos - ustar->prefix;
+ ustar_prefix = ustar->prefix;
+ }
+ }
+
+ pos += 512; /* V7 Tar, USTar and GNU Tar usual headers take 512 octets */
} else {
pos += 257; /* sizeof(TarHeader); minus gcc alignment... */
}
memcpy(buf, &tar->filesize[0], 12);
buf[12] = '\0';
if (1 != sscanf(buf, "%12llo", &fsize)) /* octal! Yuck yuck! */
- return prev;
+ break;
if ( (pos + fsize > size) ||
(fsize > size) ||
(pos + fsize < pos) )
- return prev;
- prev = addKeyword(EXTRACTOR_FILENAME,
- stndup(&tar->name[0],
- 100),
- prev);
+ break;
+
+ if (0 < ustar_prefix_length + tar_name_length) {
+ char * fname = malloc(1 + ustar_prefix_length + tar_name_length);
+
+ if(0 != fname) {
+ if(0 < ustar_prefix_length)
+ memcpy(fname, ustar_prefix, ustar_prefix_length);
+ if(0 < tar_name_length)
+ memcpy(fname + ustar_prefix_length, tar->name, tar_name_length);
+ fname[ustar_prefix_length + tar_name_length]= '\0';
+ last = appendKeyword(EXTRACTOR_FILENAME, fname, last);
+ if (prev == NULL)
+ prev = last;
+ }
+ }
+
if ( (fsize & 511) != 0)
fsize = (fsize | 511)+1; /* round up! */
if (pos + fsize < pos)
- return prev;
+ break;
pos += fsize;
}
+
+ /*
+ * a simple guard would be to clobber mimetype to NULL
+ * whenever something bad happens while reading
+ * (check break instructions just above).
+ */
+ if (NULL != mimetype) {
+ last = appendKeyword(EXTRACTOR_MIMETYPE, strdup(mimetype), last);
+ if (prev == NULL)
+ prev = last;
+ }
+
return prev;
}