libextractor

GNU libextractor
Log | Files | Refs | Submodules | README | LICENSE

commit 6ee2ce25363acc1c2f567d3b05788d625a28b989
parent 711f136f3c77e21c0274c2a569f1854c28cb7385
Author: Christian Grothoff <christian@grothoff.org>
Date:   Sat, 12 Jun 2010 22:01:49 +0000

style

Diffstat:
Msrc/common/convert.c | 2++
Msrc/main/extract.c | 22++++++++++++----------
Msrc/main/extractor.c | 6++++++
Msrc/main/extractor_print.c | 6++++--
Msrc/main/iconv.c | 4+++-
Msrc/plugins/deb_extractor.c | 5+++++
Msrc/plugins/real_extractor.c | 179+++++++++++++++++++++++++++++++++++++++++++++----------------------------------
7 files changed, 135 insertions(+), 89 deletions(-)

diff --git a/src/common/convert.c b/src/common/convert.c @@ -45,6 +45,8 @@ EXTRACTOR_common_convert_to_utf8 (const char *input, size_t len, const char *cha cd = iconv_open ("UTF-8", charset); if (cd == (iconv_t) - 1) return strdup (i); + if (len > 1024 * 1024) + return NULL; /* too big for meta data */ tmpSize = 3 * len + 4; tmp = malloc (tmpSize); if (tmp == NULL) diff --git a/src/main/extract.c b/src/main/extract.c @@ -91,7 +91,7 @@ static void formatHelp(const char * general, int j; int ml; int p; - char * scp; + char scp[80]; const char * trans; printf(_("Usage: %s\n%s\n\n"), @@ -129,7 +129,6 @@ static void formatHelp(const char * general, while (ml - p > 78 - slen) { for (j=p+78-slen;j>p;j--) { if (isspace( (unsigned char) trans[j])) { - scp = malloc(j-p+1); memcpy(scp, &trans[p], j-p); @@ -138,14 +137,12 @@ static void formatHelp(const char * general, scp, BORDER+2, ""); - free(scp); p = j+1; slen = BORDER+2; goto OUTER; } } /* could not find space to break line */ - scp = malloc(78 - slen + 1); memcpy(scp, &trans[p], 78 - slen); @@ -154,7 +151,6 @@ static void formatHelp(const char * general, scp, BORDER+2, ""); - free(scp); slen = BORDER+2; p = p + 78 - slen; } @@ -230,6 +226,7 @@ print_selected_keywords (void *cls, char * keyword; iconv_t cd; const char *stype; + const char *mt; if (print[type] != YES) return 0; @@ -237,7 +234,8 @@ print_selected_keywords (void *cls, fprintf (stdout, _("Found by `%s' plugin:\n"), plugin_name); - stype = gettext(EXTRACTOR_metatype_to_string(type)); + mt = EXTRACTOR_metatype_to_string(type); + stype = (mt == NULL) ? _("unknown") : gettext(mt); switch (format) { case EXTRACTOR_METAFORMAT_UNKNOWN: @@ -312,9 +310,13 @@ print_selected_keywords_grep_friendly (void *cls, { char * keyword; iconv_t cd; + const char *mt; if (print[type] != YES) return 0; + mt = EXTRACTOR_metatype_to_string(type); + if (mt == NULL) + mt = gettext_noop ("unknown"); switch (format) { case EXTRACTOR_METAFORMAT_UNKNOWN: @@ -323,7 +325,7 @@ print_selected_keywords_grep_friendly (void *cls, if (verbose > 1) fprintf (stdout, "%s: ", - gettext(EXTRACTOR_metatype_to_string(type))); + gettext(mt)); cd = iconv_open(nl_langinfo(CODESET), "UTF-8"); if (cd != (iconv_t) -1) keyword = iconv_helper(cd, @@ -333,7 +335,7 @@ print_selected_keywords_grep_friendly (void *cls, if (keyword != NULL) { fprintf (stdout, - "'%s' ", + "`%s' ", keyword); free(keyword); } @@ -346,9 +348,9 @@ print_selected_keywords_grep_friendly (void *cls, if (verbose > 1) fprintf (stdout, "%s ", - gettext(EXTRACTOR_metatype_to_string(type))); + gettext(mt)); fprintf (stdout, - "'%s'", + "`%s'", data); break; default: diff --git a/src/main/extractor.c b/src/main/extractor.c @@ -1220,6 +1220,12 @@ process_requests (struct EXTRACTOR_PluginList *plugin, memset (&hdr, 0, sizeof (hdr)); fin = fdopen (in, "r"); + if (fin == NULL) + { + close (in); + close (out); + return; + } while (NULL != fgets (hfn, sizeof(hfn), fin)) { if (strlen (hfn) <= 1) diff --git a/src/main/extractor_print.c b/src/main/extractor_print.c @@ -51,6 +51,7 @@ EXTRACTOR_meta_data_print(void * handle, iconv_t cd; char * buf; int ret; + const char *mt; if (format != EXTRACTOR_METAFORMAT_UTF8) return 0; @@ -61,10 +62,11 @@ EXTRACTOR_meta_data_print(void * handle, buf = iconv_helper(cd, data); if (buf != NULL) { + mt = EXTRACTOR_metatype_to_string (type); ret = fprintf(handle, "%s - %s\n", - dgettext ("libextractor", - EXTRACTOR_metatype_to_string (type)), + (mt == NULL) ? _("unknown") : dgettext ("libextractor", + mt), buf); free(buf); } diff --git a/src/main/iconv.c b/src/main/iconv.c @@ -37,7 +37,9 @@ iconv_helper(iconv_t cd, iconv(cd, NULL, NULL, NULL, NULL); inSize = strlen(in); - outSize = 4 * strlen(in) + 2; + if (inSize > 1024 * 1024) + return NULL; /* too big to be meta data */ + outSize = 4 * inSize + 2; outLeft = outSize - 2; /* make sure we have 2 0-terminations! */ buf = malloc(outSize); if (buf == NULL) diff --git a/src/plugins/deb_extractor.c b/src/plugins/deb_extractor.c @@ -223,10 +223,15 @@ processControlTar (const char *data, } #define MAX_CONTROL_SIZE (1024 * 1024) +#ifndef SIZE_MAX +#define SIZE_MAX ((size_t)-1) +#endif static voidpf Emalloc (voidpf opaque, uInt items, uInt size) { + if (SIZE_MAX / size <= items) + return NULL; return malloc (size * items); } diff --git a/src/plugins/real_extractor.c b/src/plugins/real_extractor.c @@ -162,63 +162,76 @@ processContentDescription (const Content_Description * prop, + sizeof (Content_Description)) return 0; + ret = 0; title = malloc (title_len + 1); - memcpy (title, &prop->data[0], title_len); - title[title_len] = '\0'; - ret = proc (proc_cls, - "real", - EXTRACTOR_METATYPE_TITLE, - EXTRACTOR_METAFORMAT_UTF8, - "text/plain", - title, - strlen (title)+1); - free (title); + if (title != NULL) + { + memcpy (title, &prop->data[0], title_len); + title[title_len] = '\0'; + ret = proc (proc_cls, + "real", + EXTRACTOR_METATYPE_TITLE, + EXTRACTOR_METAFORMAT_UTF8, + "text/plain", + title, + strlen (title)+1); + free (title); + } if (ret != 0) return ret; author = malloc (author_len + 1); - memcpy (author, &prop->data[title_len + sizeof (UINT16)], author_len); - author[author_len] = '\0'; - ret = proc (proc_cls, - "real", - EXTRACTOR_METATYPE_AUTHOR_NAME, - EXTRACTOR_METAFORMAT_UTF8, - "text/plain", - author, - strlen (author)+1); - free (author); + if (author != NULL) + { + memcpy (author, &prop->data[title_len + sizeof (UINT16)], author_len); + author[author_len] = '\0'; + ret = proc (proc_cls, + "real", + EXTRACTOR_METATYPE_AUTHOR_NAME, + EXTRACTOR_METAFORMAT_UTF8, + "text/plain", + author, + strlen (author)+1); + free (author); + } if (ret != 0) return ret; copyright = malloc (copyright_len + 1); - memcpy (copyright, - &prop->data[title_len + sizeof (UINT16) * 2 + author_len], - copyright_len); - copyright[copyright_len] = '\0'; - ret = proc (proc_cls, - "real", - EXTRACTOR_METATYPE_COPYRIGHT, - EXTRACTOR_METAFORMAT_UTF8, - "text/plain", - copyright, - strlen (copyright)+1); - free (copyright); + if (copyright != NULL) + { + memcpy (copyright, + &prop->data[title_len + sizeof (UINT16) * 2 + author_len], + copyright_len); + copyright[copyright_len] = '\0'; + ret = proc (proc_cls, + "real", + EXTRACTOR_METATYPE_COPYRIGHT, + EXTRACTOR_METAFORMAT_UTF8, + "text/plain", + copyright, + strlen (copyright)+1); + free (copyright); + } if (ret != 0) return ret; comment = malloc (comment_len + 1); - memcpy (comment, - &prop->data[title_len + sizeof (UINT16) * 3 + author_len + - copyright_len], comment_len); - comment[comment_len] = '\0'; - ret = proc (proc_cls, - "real", - EXTRACTOR_METATYPE_COMMENT, - EXTRACTOR_METAFORMAT_UTF8, - "text/plain", - comment, - strlen (comment)+1); - free (comment); + if (comment != NULL) + { + memcpy (comment, + &prop->data[title_len + sizeof (UINT16) * 3 + author_len + + copyright_len], comment_len); + comment[comment_len] = '\0'; + ret = proc (proc_cls, + "real", + EXTRACTOR_METATYPE_COMMENT, + EXTRACTOR_METAFORMAT_UTF8, + "text/plain", + comment, + strlen (comment)+1); + free (comment); + } if (ret != 0) return ret; return 0; @@ -263,6 +276,8 @@ stndup (const char *str, size_t n) { char *tmp; tmp = malloc (n + 1); + if (tmp == NULL) + return NULL; tmp[n] = '\0'; memcpy (tmp, str, n); return tmp; @@ -321,50 +336,62 @@ EXTRACTOR_real_extract (const unsigned char *data, if ( (tlen > 0) && (ret == 0) ) { x = stndup ((const char *) &data[17 + RAFF4_HDR_SIZE], tlen); - ret = proc (proc_cls, - "real", - EXTRACTOR_METATYPE_MIMETYPE, - EXTRACTOR_METAFORMAT_UTF8, - "text/plain", - x, - strlen (x)+1); - free (x); + if (x != NULL) + { + ret = proc (proc_cls, + "real", + EXTRACTOR_METATYPE_MIMETYPE, + EXTRACTOR_METAFORMAT_UTF8, + "text/plain", + x, + strlen (x)+1); + free (x); + } } if ( (alen > 0) && (ret == 0) ) { x = stndup ((const char *) &data[18 + RAFF4_HDR_SIZE + tlen], alen); - ret = proc (proc_cls, - "real", - EXTRACTOR_METATYPE_MIMETYPE, - EXTRACTOR_METAFORMAT_UTF8, - "text/plain", - x, - strlen (x)+1); - free (x); + if (x != NULL) + { + ret = proc (proc_cls, + "real", + EXTRACTOR_METATYPE_MIMETYPE, + EXTRACTOR_METAFORMAT_UTF8, + "text/plain", + x, + strlen (x)+1); + free (x); + } } if ( (clen > 0) && (ret == 0) ) { x = stndup ((const char *) &data[19 + RAFF4_HDR_SIZE + tlen + alen], clen); - ret = proc (proc_cls, - "real", - EXTRACTOR_METATYPE_MIMETYPE, - EXTRACTOR_METAFORMAT_UTF8, - "text/plain", - x, - strlen (x)+1); - free (x); + if (x != NULL) + { + ret = proc (proc_cls, + "real", + EXTRACTOR_METATYPE_MIMETYPE, + EXTRACTOR_METAFORMAT_UTF8, + "text/plain", + x, + strlen (x)+1); + free (x); + } } if ( (aplen > 0) && (ret == 0) ) { x = stndup ((const char *) &data[20 + RAFF4_HDR_SIZE + tlen + alen + clen], aplen); - ret = proc (proc_cls, - "real", - EXTRACTOR_METATYPE_MIMETYPE, - EXTRACTOR_METAFORMAT_UTF8, - "text/plain", - x, - strlen (x)+1); - free (x); + if (x != NULL) + { + ret = proc (proc_cls, + "real", + EXTRACTOR_METATYPE_MIMETYPE, + EXTRACTOR_METAFORMAT_UTF8, + "text/plain", + x, + strlen (x)+1); + free (x); + } } return ret; }