aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristian Grothoff <christian@grothoff.org>2017-10-15 20:22:41 +0200
committerChristian Grothoff <christian@grothoff.org>2017-10-15 20:22:41 +0200
commitb2610cbbdff71e83f9163064efcacb0eae614d24 (patch)
tree2e72705bf47fae87ced93786b66cde44d5959f68
parentbb8184768d6b13bd32a3eb2224110f3e00573cbc (diff)
downloadlibextractor-b2610cbbdff71e83f9163064efcacb0eae614d24.tar.gz
libextractor-b2610cbbdff71e83f9163064efcacb0eae614d24.zip
fix NPE in extract for 0-byte values
-rw-r--r--ChangeLog3
-rw-r--r--src/main/extract.c96
2 files changed, 52 insertions, 47 deletions
diff --git a/ChangeLog b/ChangeLog
index 1a2fb98..65ef35a 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -4,7 +4,8 @@ Sun Oct 15 19:36:41 CEST 2017
4 Make sure to only pass "unsigned char" to functions like isspace(). 4 Make sure to only pass "unsigned char" to functions like isspace().
5 Avoid malloc(0) in DEB extractor under certain conditions. 5 Avoid malloc(0) in DEB extractor under certain conditions.
6 Properly initialize 'duration' in ffmpeg extractor. 6 Properly initialize 'duration' in ffmpeg extractor.
7 Fix potential buffer underflow read in DEB extractor. -CG 7 Fix potential buffer underflow read in DEB extractor.
8 Avoid NPE in 'extract' if extracted value has 0 bytes. -CG
8 9
9Fri Oct 13 12:30:37 CEST 2017 10Fri Oct 13 12:30:37 CEST 2017
10 Properly check read error in NSF plugin (from signedness confusion) found by Leon Zhao. -CG 11 Properly check read error in NSF plugin (from signedness confusion) found by Leon Zhao. -CG
diff --git a/src/main/extract.c b/src/main/extract.c
index fd7cc96..4274355 100644
--- a/src/main/extract.c
+++ b/src/main/extract.c
@@ -79,16 +79,16 @@ ignore_sigpipe ()
79/** 79/**
80 * Information about command-line options. 80 * Information about command-line options.
81 */ 81 */
82struct Help 82struct Help
83{ 83{
84 /** 84 /**
85 * Single-character option name, '\0' for none. 85 * Single-character option name, '\0' for none.
86 */ 86 */
87 char shortArg; 87 char shortArg;
88 88
89 /** 89 /**
90 * Long name of the option. 90 * Long name of the option.
91 */ 91 */
92 const char * longArg; 92 const char * longArg;
93 93
94 /** 94 /**
@@ -116,10 +116,10 @@ struct Help
116 * @param description program description 116 * @param description program description
117 * @param opt program options (NULL-terminated array) 117 * @param opt program options (NULL-terminated array)
118 */ 118 */
119static void 119static void
120format_help (const char *general, 120format_help (const char *general,
121 const char *description, 121 const char *description,
122 const struct Help *opt) 122 const struct Help *opt)
123{ 123{
124 size_t slen; 124 size_t slen;
125 unsigned int i; 125 unsigned int i;
@@ -128,14 +128,14 @@ format_help (const char *general,
128 size_t p; 128 size_t p;
129 char scp[80]; 129 char scp[80];
130 const char *trans; 130 const char *trans;
131 131
132 printf (_("Usage: %s\n%s\n\n"), 132 printf (_("Usage: %s\n%s\n\n"),
133 gettext(general), 133 gettext(general),
134 gettext(description)); 134 gettext(description));
135 printf (_("Arguments mandatory for long options are also mandatory for short options.\n")); 135 printf (_("Arguments mandatory for long options are also mandatory for short options.\n"));
136 slen = 0; 136 slen = 0;
137 i = 0; 137 i = 0;
138 while (NULL != opt[i].description) 138 while (NULL != opt[i].description)
139 { 139 {
140 if (0 == opt[i].shortArg) 140 if (0 == opt[i].shortArg)
141 printf (" "); 141 printf (" ");
@@ -145,18 +145,18 @@ format_help (const char *general,
145 printf ("--%s", 145 printf ("--%s",
146 opt[i].longArg); 146 opt[i].longArg);
147 slen = 8 + strlen(opt[i].longArg); 147 slen = 8 + strlen(opt[i].longArg);
148 if (NULL != opt[i].mandatoryArg) 148 if (NULL != opt[i].mandatoryArg)
149 { 149 {
150 printf ("=%s", 150 printf ("=%s",
151 opt[i].mandatoryArg); 151 opt[i].mandatoryArg);
152 slen += 1+strlen(opt[i].mandatoryArg); 152 slen += 1+strlen(opt[i].mandatoryArg);
153 } 153 }
154 if (slen > BORDER) 154 if (slen > BORDER)
155 { 155 {
156 printf ("\n%*s", BORDER, ""); 156 printf ("\n%*s", BORDER, "");
157 slen = BORDER; 157 slen = BORDER;
158 } 158 }
159 if (slen < BORDER) 159 if (slen < BORDER)
160 { 160 {
161 printf ("%*s", (int) (BORDER - slen), ""); 161 printf ("%*s", (int) (BORDER - slen), "");
162 slen = BORDER; 162 slen = BORDER;
@@ -165,7 +165,7 @@ format_help (const char *general,
165 ml = strlen(trans); 165 ml = strlen(trans);
166 p = 0; 166 p = 0;
167 OUTER: 167 OUTER:
168 while (ml - p > 78 - slen) 168 while (ml - p > 78 - slen)
169 { 169 {
170 for (j=p+78-slen;j>p;j--) 170 for (j=p+78-slen;j>p;j--)
171 { 171 {
@@ -192,7 +192,7 @@ format_help (const char *general,
192 printf ("%s\n%*s", 192 printf ("%s\n%*s",
193 scp, 193 scp,
194 BORDER+2, 194 BORDER+2,
195 ""); 195 "");
196 slen = BORDER+2; 196 slen = BORDER+2;
197 p = p + 78 - slen; 197 p = p + 78 - slen;
198 } 198 }
@@ -211,7 +211,7 @@ format_help (const char *general,
211static void 211static void
212print_help () 212print_help ()
213{ 213{
214 static struct Help help[] = 214 static struct Help help[] =
215 { 215 {
216 { 'b', "bibtex", NULL, 216 { 'b', "bibtex", NULL,
217 gettext_noop("print output in bibtex format") }, 217 gettext_noop("print output in bibtex format") },
@@ -258,13 +258,13 @@ print_help ()
258 * used in the main libextractor library and yielding 258 * used in the main libextractor library and yielding
259 * meta data). 259 * meta data).
260 * @param type libextractor-type describing the meta data 260 * @param type libextractor-type describing the meta data
261 * @param format basic format information about data 261 * @param format basic format information about data
262 * @param data_mime_type mime-type of data (not of the original file); 262 * @param data_mime_type mime-type of data (not of the original file);
263 * can be NULL (if mime-type is not known) 263 * can be NULL (if mime-type is not known)
264 * @param data actual meta-data found 264 * @param data actual meta-data found
265 * @param data_len number of bytes in data 265 * @param data_len number of bytes in data
266 * @return 0 to continue extracting, 1 to abort 266 * @return 0 to continue extracting, 1 to abort
267 */ 267 */
268static int 268static int
269print_selected_keywords (void *cls, 269print_selected_keywords (void *cls,
270 const char *plugin_name, 270 const char *plugin_name,
@@ -273,7 +273,7 @@ print_selected_keywords (void *cls,
273 const char *data_mime_type, 273 const char *data_mime_type,
274 const char *data, 274 const char *data,
275 size_t data_len) 275 size_t data_len)
276{ 276{
277 char *keyword; 277 char *keyword;
278#if HAVE_ICONV 278#if HAVE_ICONV
279 iconv_t cd; 279 iconv_t cd;
@@ -298,6 +298,8 @@ print_selected_keywords (void *cls,
298 (unsigned int) data_len); 298 (unsigned int) data_len);
299 break; 299 break;
300 case EXTRACTOR_METAFORMAT_UTF8: 300 case EXTRACTOR_METAFORMAT_UTF8:
301 if (0 == data_len)
302 break;
301#if HAVE_ICONV 303#if HAVE_ICONV
302 cd = iconv_open (nl_langinfo(CODESET), "UTF-8"); 304 cd = iconv_open (nl_langinfo(CODESET), "UTF-8");
303 if (((iconv_t) -1) != cd) 305 if (((iconv_t) -1) != cd)
@@ -306,7 +308,7 @@ print_selected_keywords (void *cls,
306 data_len); 308 data_len);
307 else 309 else
308#endif 310#endif
309 keyword = strdup (data); 311 keyword = strdup (data);
310 if (NULL != keyword) 312 if (NULL != keyword)
311 { 313 {
312 FPRINTF (stdout, 314 FPRINTF (stdout,
@@ -349,13 +351,13 @@ print_selected_keywords (void *cls,
349 * used in the main libextractor library and yielding 351 * used in the main libextractor library and yielding
350 * meta data). 352 * meta data).
351 * @param type libextractor-type describing the meta data 353 * @param type libextractor-type describing the meta data
352 * @param format basic format information about data 354 * @param format basic format information about data
353 * @param data_mime_type mime-type of data (not of the original file); 355 * @param data_mime_type mime-type of data (not of the original file);
354 * can be NULL (if mime-type is not known) 356 * can be NULL (if mime-type is not known)
355 * @param data actual meta-data found 357 * @param data actual meta-data found
356 * @param data_len number of bytes in data 358 * @param data_len number of bytes in data
357 * @return 0 to continue extracting, 1 to abort 359 * @return 0 to continue extracting, 1 to abort
358 */ 360 */
359static int 361static int
360print_selected_keywords_grep_friendly (void *cls, 362print_selected_keywords_grep_friendly (void *cls,
361 const char *plugin_name, 363 const char *plugin_name,
@@ -364,9 +366,9 @@ print_selected_keywords_grep_friendly (void *cls,
364 const char *data_mime_type, 366 const char *data_mime_type,
365 const char *data, 367 const char *data,
366 size_t data_len) 368 size_t data_len)
367{ 369{
368 char *keyword; 370 char *keyword;
369#if HAVE_ICONV 371#if HAVE_ICONV
370 iconv_t cd; 372 iconv_t cd;
371#endif 373#endif
372 const char *mt; 374 const char *mt;
@@ -378,14 +380,14 @@ print_selected_keywords_grep_friendly (void *cls,
378 mt = gettext_noop ("unknown"); 380 mt = gettext_noop ("unknown");
379 switch (format) 381 switch (format)
380 { 382 {
381 case EXTRACTOR_METAFORMAT_UNKNOWN: 383 case EXTRACTOR_METAFORMAT_UNKNOWN:
382 break; 384 break;
383 case EXTRACTOR_METAFORMAT_UTF8: 385 case EXTRACTOR_METAFORMAT_UTF8:
384 if (verbose > 1) 386 if (verbose > 1)
385 FPRINTF (stdout, 387 FPRINTF (stdout,
386 "%s: ", 388 "%s: ",
387 gettext(mt)); 389 gettext(mt));
388#if HAVE_ICONV 390#if HAVE_ICONV
389 cd = iconv_open (nl_langinfo (CODESET), "UTF-8"); 391 cd = iconv_open (nl_langinfo (CODESET), "UTF-8");
390 if (((iconv_t) -1) != cd) 392 if (((iconv_t) -1) != cd)
391 keyword = iconv_helper (cd, 393 keyword = iconv_helper (cd,
@@ -401,7 +403,7 @@ print_selected_keywords_grep_friendly (void *cls,
401 keyword); 403 keyword);
402 free (keyword); 404 free (keyword);
403 } 405 }
404#if HAVE_ICONV 406#if HAVE_ICONV
405 if (((iconv_t) -1) != cd) 407 if (((iconv_t) -1) != cd)
406 iconv_close (cd); 408 iconv_close (cd);
407#endif 409#endif
@@ -474,7 +476,7 @@ static struct BibTexMap btm[] =
474 { "institution", EXTRACTOR_METATYPE_PUBLISHER_INSTITUTION, NULL }, 476 { "institution", EXTRACTOR_METATYPE_PUBLISHER_INSTITUTION, NULL },
475 { "series", EXTRACTOR_METATYPE_PUBLISHER_SERIES, NULL}, 477 { "series", EXTRACTOR_METATYPE_PUBLISHER_SERIES, NULL},
476 { "month", EXTRACTOR_METATYPE_PUBLICATION_MONTH, NULL }, 478 { "month", EXTRACTOR_METATYPE_PUBLICATION_MONTH, NULL },
477 { "url", EXTRACTOR_METATYPE_URL, NULL}, 479 { "url", EXTRACTOR_METATYPE_URL, NULL},
478 { "note", EXTRACTOR_METATYPE_COMMENT, NULL}, 480 { "note", EXTRACTOR_METATYPE_COMMENT, NULL},
479 { "eprint", EXTRACTOR_METATYPE_BIBTEX_EPRINT, NULL }, 481 { "eprint", EXTRACTOR_METATYPE_BIBTEX_EPRINT, NULL },
480 { "type", EXTRACTOR_METATYPE_PUBLICATION_TYPE, NULL }, 482 { "type", EXTRACTOR_METATYPE_PUBLICATION_TYPE, NULL },
@@ -485,11 +487,11 @@ static struct BibTexMap btm[] =
485/** 487/**
486 * Clean up the bibtex processor in preparation for the next round. 488 * Clean up the bibtex processor in preparation for the next round.
487 */ 489 */
488static void 490static void
489cleanup_bibtex () 491cleanup_bibtex ()
490{ 492{
491 unsigned int i; 493 unsigned int i;
492 494
493 for (i = 0; NULL != btm[i].bibTexName; i++) 495 for (i = 0; NULL != btm[i].bibTexName; i++)
494 { 496 {
495 free (btm[i].value); 497 free (btm[i].value);
@@ -509,7 +511,7 @@ cleanup_bibtex ()
509 * used in the main libextractor library and yielding 511 * used in the main libextractor library and yielding
510 * meta data). 512 * meta data).
511 * @param type libextractor-type describing the meta data 513 * @param type libextractor-type describing the meta data
512 * @param format basic format information about data 514 * @param format basic format information about data
513 * @param data_mime_type mime-type of data (not of the original file); 515 * @param data_mime_type mime-type of data (not of the original file);
514 * can be NULL (if mime-type is not known) 516 * can be NULL (if mime-type is not known)
515 * @param data actual meta-data found 517 * @param data actual meta-data found
@@ -527,6 +529,8 @@ print_bibtex (void *cls,
527{ 529{
528 unsigned int i; 530 unsigned int i;
529 531
532 if (0 == data_len)
533 return 0;
530 if (YES != print[type]) 534 if (YES != print[type])
531 return 0; 535 return 0;
532 if (EXTRACTOR_METAFORMAT_UTF8 != format) 536 if (EXTRACTOR_METAFORMAT_UTF8 != format)
@@ -563,7 +567,7 @@ finish_bibtex (const char *fn)
563 et = "misc"; 567 et = "misc";
564 if ( (NULL == btm[0].value) || 568 if ( (NULL == btm[0].value) ||
565 (NULL == btm[1].value) || 569 (NULL == btm[1].value) ||
566 (NULL == btm[2].value) ) 570 (NULL == btm[2].value) )
567 FPRINTF (stdout, 571 FPRINTF (stdout,
568 "@%s %s { ", 572 "@%s %s { ",
569 et, 573 et,
@@ -577,9 +581,9 @@ finish_bibtex (const char *fn)
577 btm[1].value, 581 btm[1].value,
578 btm[0].value); 582 btm[0].value);
579 for (n=strlen (temp)-1;n>=0;n-- ) 583 for (n=strlen (temp)-1;n>=0;n-- )
580 if (! isalnum ( (unsigned char) temp[n]) ) 584 if (! isalnum ( (unsigned char) temp[n]) )
581 temp[n] = '_'; 585 temp[n] = '_';
582 else 586 else
583 temp[n] = tolower ( (unsigned char) temp[n]); 587 temp[n] = tolower ( (unsigned char) temp[n]);
584 FPRINTF (stdout, 588 FPRINTF (stdout,
585 "@%s %s { ", 589 "@%s %s { ",
@@ -587,7 +591,7 @@ finish_bibtex (const char *fn)
587 temp); 591 temp);
588 } 592 }
589 for (i=0; NULL != btm[i].bibTexName; i++) 593 for (i=0; NULL != btm[i].bibTexName; i++)
590 if (NULL != btm[i].value) 594 if (NULL != btm[i].value)
591 FPRINTF (stdout, 595 FPRINTF (stdout,
592 "\t%s = {%s},\n", 596 "\t%s = {%s},\n",
593 btm[i].bibTexName, 597 btm[i].bibTexName,
@@ -610,9 +614,9 @@ _wchar_to_str (const wchar_t *wstr, char **retstr, UINT cp)
610 error = GetLastError (); 614 error = GetLastError ();
611 if (len <= 0) 615 if (len <= 0)
612 return -1; 616 return -1;
613 617
614 str = malloc (sizeof (char) * len); 618 str = malloc (sizeof (char) * len);
615 619
616 SetLastError (0); 620 SetLastError (0);
617 lenc = WideCharToMultiByte (cp, 0, wstr, -1, str, len, NULL, (cp == CP_UTF8 || cp == CP_UTF7) ? NULL : &lossy); 621 lenc = WideCharToMultiByte (cp, 0, wstr, -1, str, len, NULL, (cp == CP_UTF8 || cp == CP_UTF7) ? NULL : &lossy);
618 error = GetLastError (); 622 error = GetLastError ();
@@ -758,7 +762,7 @@ main (int argc, char *argv[])
758#endif 762#endif
759 if (NULL == (print = malloc (sizeof (int) * EXTRACTOR_metatype_get_max ()))) 763 if (NULL == (print = malloc (sizeof (int) * EXTRACTOR_metatype_get_max ())))
760 { 764 {
761 FPRINTF (stderr, 765 FPRINTF (stderr,
762 "malloc failed: %s\n", 766 "malloc failed: %s\n",
763 strerror (errno)); 767 strerror (errno));
764 return 1; 768 return 1;
@@ -791,7 +795,7 @@ main (int argc, char *argv[])
791 }; 795 };
792 option_index = 0; 796 option_index = 0;
793 c = getopt_long (utf8_argc, 797 c = getopt_long (utf8_argc,
794 utf8_argv, 798 utf8_argv,
795 "abghiml:Lnp:vVx:", 799 "abghiml:Lnp:vVx:",
796 long_options, 800 long_options,
797 &option_index); 801 &option_index);
@@ -848,7 +852,7 @@ main (int argc, char *argv[])
848 nodefault = YES; 852 nodefault = YES;
849 break; 853 break;
850 case 'p': 854 case 'p':
851 if (NULL == optarg) 855 if (NULL == optarg)
852 { 856 {
853 FPRINTF(stderr, 857 FPRINTF(stderr,
854 _("You must specify an argument for the `%s' option (option ignored).\n"), 858 _("You must specify an argument for the `%s' option (option ignored).\n"),
@@ -865,11 +869,11 @@ main (int argc, char *argv[])
865 i = 0; 869 i = 0;
866 while (NULL != EXTRACTOR_metatype_to_string (i)) 870 while (NULL != EXTRACTOR_metatype_to_string (i))
867 { 871 {
868 if ( (0 == strcmp (optarg, 872 if ( (0 == strcmp (optarg,
869 EXTRACTOR_metatype_to_string (i))) || 873 EXTRACTOR_metatype_to_string (i))) ||
870 (0 == strcmp (optarg, 874 (0 == strcmp (optarg,
871 gettext(EXTRACTOR_metatype_to_string (i)))) ) 875 gettext(EXTRACTOR_metatype_to_string (i)))) )
872 876
873 { 877 {
874 print[i] = YES; 878 print[i] = YES;
875 break; 879 break;
@@ -897,9 +901,9 @@ main (int argc, char *argv[])
897 i = 0; 901 i = 0;
898 while (NULL != EXTRACTOR_metatype_to_string (i)) 902 while (NULL != EXTRACTOR_metatype_to_string (i))
899 { 903 {
900 if ( (0 == strcmp (optarg, 904 if ( (0 == strcmp (optarg,
901 EXTRACTOR_metatype_to_string (i))) || 905 EXTRACTOR_metatype_to_string (i))) ||
902 (0 == strcmp (optarg, 906 (0 == strcmp (optarg,
903 gettext(EXTRACTOR_metatype_to_string (i)))) ) 907 gettext(EXTRACTOR_metatype_to_string (i)))) )
904 { 908 {
905 print[i] = NO; 909 print[i] = NO;
@@ -950,7 +954,7 @@ main (int argc, char *argv[])
950 else 954 else
951 plugins = NULL; 955 plugins = NULL;
952 if (NULL != libraries) 956 if (NULL != libraries)
953 plugins = EXTRACTOR_plugin_add_config (plugins, 957 plugins = EXTRACTOR_plugin_add_config (plugins,
954 libraries, 958 libraries,
955 in_process 959 in_process
956 ? EXTRACTOR_OPTION_IN_PROCESS 960 ? EXTRACTOR_OPTION_IN_PROCESS
@@ -962,7 +966,7 @@ main (int argc, char *argv[])
962 if (YES == bibtex) 966 if (YES == bibtex)
963 FPRINTF(stdout, 967 FPRINTF(stdout,
964 "%s", _("% BiBTeX file\n")); 968 "%s", _("% BiBTeX file\n"));
965 for (i = optind; i < utf8_argc; i++) 969 for (i = optind; i < utf8_argc; i++)
966 { 970 {
967 errno = 0; 971 errno = 0;
968 if (YES == grepfriendly) 972 if (YES == grepfriendly)
@@ -1001,7 +1005,7 @@ main (int argc, char *argv[])
1001 } 1005 }
1002 else 1006 else
1003 { 1007 {
1004 if (verbose > 0) 1008 if (verbose > 0)
1005 FPRINTF(stderr, 1009 FPRINTF(stderr,
1006 "%s: %s: %s\n", 1010 "%s: %s: %s\n",
1007 utf8_argv[0], utf8_argv[i], strerror(errno)); 1011 utf8_argv[0], utf8_argv[i], strerror(errno));