diff options
author | Christian Grothoff <christian@grothoff.org> | 2017-10-15 20:03:29 +0200 |
---|---|---|
committer | Christian Grothoff <christian@grothoff.org> | 2017-10-15 20:03:29 +0200 |
commit | a0268aec1e494ac26b986454803f1c869e0fe58b (patch) | |
tree | 811984cd8487ad206811df113d45c29e8021c997 | |
parent | 71aa4223b2770a9243ddc86457bcd2fdcf47d922 (diff) | |
download | libextractor-a0268aec1e494ac26b986454803f1c869e0fe58b.tar.gz libextractor-a0268aec1e494ac26b986454803f1c869e0fe58b.zip |
fix very hypothetical lack of 0-termination in ole2-extractor iff translation string was more than 10x as long as the English version
-rw-r--r-- | src/plugins/ole2_extractor.c | 105 |
1 files changed, 56 insertions, 49 deletions
diff --git a/src/plugins/ole2_extractor.c b/src/plugins/ole2_extractor.c index 265120a..44a7ba9 100644 --- a/src/plugins/ole2_extractor.c +++ b/src/plugins/ole2_extractor.c | |||
@@ -68,7 +68,7 @@ static int | |||
68 | add_metadata (EXTRACTOR_MetaDataProcessor proc, | 68 | add_metadata (EXTRACTOR_MetaDataProcessor proc, |
69 | void *proc_cls, | 69 | void *proc_cls, |
70 | const char *phrase, | 70 | const char *phrase, |
71 | enum EXTRACTOR_MetaType type) | 71 | enum EXTRACTOR_MetaType type) |
72 | { | 72 | { |
73 | char *tmp; | 73 | char *tmp; |
74 | int ret; | 74 | int ret; |
@@ -83,11 +83,11 @@ add_metadata (EXTRACTOR_MetaDataProcessor proc, | |||
83 | return 0; | 83 | return 0; |
84 | if (NULL == (tmp = strdup (phrase))) | 84 | if (NULL == (tmp = strdup (phrase))) |
85 | return 0; | 85 | return 0; |
86 | 86 | ||
87 | while ( (strlen (tmp) > 0) && | 87 | while ( (strlen (tmp) > 0) && |
88 | (isblank ((unsigned char) tmp [strlen (tmp) - 1])) ) | 88 | (isblank ((unsigned char) tmp [strlen (tmp) - 1])) ) |
89 | tmp [strlen (tmp) - 1] = '\0'; | 89 | tmp [strlen (tmp) - 1] = '\0'; |
90 | ret = proc (proc_cls, | 90 | ret = proc (proc_cls, |
91 | "ole2", | 91 | "ole2", |
92 | type, | 92 | type, |
93 | EXTRACTOR_METAFORMAT_UTF8, | 93 | EXTRACTOR_METAFORMAT_UTF8, |
@@ -103,7 +103,7 @@ add_metadata (EXTRACTOR_MetaDataProcessor proc, | |||
103 | * Entry in the map from OLE meta type strings | 103 | * Entry in the map from OLE meta type strings |
104 | * to LE types. | 104 | * to LE types. |
105 | */ | 105 | */ |
106 | struct Matches | 106 | struct Matches |
107 | { | 107 | { |
108 | /** | 108 | /** |
109 | * OLE description. | 109 | * OLE description. |
@@ -152,7 +152,7 @@ static struct Matches tmap[] = { | |||
152 | { "meta:creation-date", EXTRACTOR_METATYPE_CREATION_DATE }, | 152 | { "meta:creation-date", EXTRACTOR_METATYPE_CREATION_DATE }, |
153 | { "meta:generator", EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE }, | 153 | { "meta:generator", EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE }, |
154 | { "meta:template", EXTRACTOR_METATYPE_TEMPLATE }, | 154 | { "meta:template", EXTRACTOR_METATYPE_TEMPLATE }, |
155 | { "meta:editing-cycles", EXTRACTOR_METATYPE_EDITING_CYCLES }, | 155 | { "meta:editing-cycles", EXTRACTOR_METATYPE_EDITING_CYCLES }, |
156 | /* { "Dictionary", EXTRACTOR_METATYPE_LANGUAGE }, */ | 156 | /* { "Dictionary", EXTRACTOR_METATYPE_LANGUAGE }, */ |
157 | /* { "gsf:security", EXTRACTOR_SECURITY }, */ | 157 | /* { "gsf:security", EXTRACTOR_SECURITY }, */ |
158 | /* { "gsf:scale", EXTRACTOR_SCALE }, // always "false"? */ | 158 | /* { "gsf:scale", EXTRACTOR_SCALE }, // always "false"? */ |
@@ -192,10 +192,10 @@ struct ProcContext | |||
192 | * @param value the UTF8 representation of the meta data | 192 | * @param value the UTF8 representation of the meta data |
193 | * @param user_data our 'struct ProcContext' (closure) | 193 | * @param user_data our 'struct ProcContext' (closure) |
194 | */ | 194 | */ |
195 | static void | 195 | static void |
196 | process_metadata (gpointer key, | 196 | process_metadata (gpointer key, |
197 | gpointer value, | 197 | gpointer value, |
198 | gpointer user_data) | 198 | gpointer user_data) |
199 | { | 199 | { |
200 | const char *type = key; | 200 | const char *type = key; |
201 | const GsfDocProp *prop = value; | 201 | const GsfDocProp *prop = value; |
@@ -211,7 +211,7 @@ process_metadata (gpointer key, | |||
211 | return; | 211 | return; |
212 | gval = gsf_doc_prop_get_val (prop); | 212 | gval = gsf_doc_prop_get_val (prop); |
213 | 213 | ||
214 | if (G_VALUE_TYPE(gval) == G_TYPE_STRING) | 214 | if (G_VALUE_TYPE(gval) == G_TYPE_STRING) |
215 | { | 215 | { |
216 | contents = strdup (g_value_get_string (gval)); | 216 | contents = strdup (g_value_get_string (gval)); |
217 | } | 217 | } |
@@ -241,8 +241,8 @@ process_metadata (gpointer key, | |||
241 | else if (0 == strncmp(value, "Microsoft Office", 16)) | 241 | else if (0 == strncmp(value, "Microsoft Office", 16)) |
242 | mimetype = "application/vnd.ms-office"; | 242 | mimetype = "application/vnd.ms-office"; |
243 | if (0 != add_metadata (pc->proc, | 243 | if (0 != add_metadata (pc->proc, |
244 | pc->proc_cls, | 244 | pc->proc_cls, |
245 | mimetype, | 245 | mimetype, |
246 | EXTRACTOR_METATYPE_MIMETYPE)) | 246 | EXTRACTOR_METATYPE_MIMETYPE)) |
247 | { | 247 | { |
248 | free (contents); | 248 | free (contents); |
@@ -270,7 +270,7 @@ process_metadata (gpointer key, | |||
270 | /** | 270 | /** |
271 | * Function called on (Document)SummaryInformation OLE | 271 | * Function called on (Document)SummaryInformation OLE |
272 | * streams. | 272 | * streams. |
273 | * | 273 | * |
274 | * @param in the input OLE stream | 274 | * @param in the input OLE stream |
275 | * @param proc function to call on meta data found | 275 | * @param proc function to call on meta data found |
276 | * @param proc_cls closure for proc | 276 | * @param proc_cls closure for proc |
@@ -312,7 +312,7 @@ process (GsfInput *in, | |||
312 | /** | 312 | /** |
313 | * Function called on SfxDocumentInfo OLE | 313 | * Function called on SfxDocumentInfo OLE |
314 | * streams. | 314 | * streams. |
315 | * | 315 | * |
316 | * @param in the input OLE stream | 316 | * @param in the input OLE stream |
317 | * @param proc function to call on meta data found | 317 | * @param proc function to call on meta data found |
318 | * @param proc_cls closure for proc | 318 | * @param proc_cls closure for proc |
@@ -321,11 +321,11 @@ process (GsfInput *in, | |||
321 | static int | 321 | static int |
322 | process_star_office (GsfInput *src, | 322 | process_star_office (GsfInput *src, |
323 | EXTRACTOR_MetaDataProcessor proc, | 323 | EXTRACTOR_MetaDataProcessor proc, |
324 | void *proc_cls) | 324 | void *proc_cls) |
325 | { | 325 | { |
326 | off_t size = gsf_input_size (src); | 326 | off_t size = gsf_input_size (src); |
327 | 327 | ||
328 | if ( (size < 0x374) || | 328 | if ( (size < 0x374) || |
329 | (size > 4*1024*1024) ) /* == 0x375?? */ | 329 | (size > 4*1024*1024) ) /* == 0x375?? */ |
330 | return 0; | 330 | return 0; |
331 | { | 331 | { |
@@ -339,7 +339,7 @@ process_star_office (GsfInput *src, | |||
339 | strlen ("SfxDocumentInfo"))) || | 339 | strlen ("SfxDocumentInfo"))) || |
340 | (buf[0x11] != 0x0B) || | 340 | (buf[0x11] != 0x0B) || |
341 | (buf[0x13] != 0x00) || /* pw protected! */ | 341 | (buf[0x13] != 0x00) || /* pw protected! */ |
342 | (buf[0x12] != 0x00) ) | 342 | (buf[0x12] != 0x00) ) |
343 | return 0; | 343 | return 0; |
344 | buf[0xd3] = '\0'; | 344 | buf[0xd3] = '\0'; |
345 | if ( (buf[0x94] + buf[0x93] > 0) && | 345 | if ( (buf[0x94] + buf[0x93] > 0) && |
@@ -374,7 +374,7 @@ process_star_office (GsfInput *src, | |||
374 | 374 | ||
375 | /** | 375 | /** |
376 | * We use "__" to translate using iso-639. | 376 | * We use "__" to translate using iso-639. |
377 | * | 377 | * |
378 | * @param a string to translate | 378 | * @param a string to translate |
379 | * @return translated string | 379 | * @return translated string |
380 | */ | 380 | */ |
@@ -384,11 +384,11 @@ process_star_office (GsfInput *src, | |||
384 | /** | 384 | /** |
385 | * Get the language string for the given language ID (lid) | 385 | * Get the language string for the given language ID (lid) |
386 | * value. | 386 | * value. |
387 | * | 387 | * |
388 | * @param lid language id value | 388 | * @param lid language id value |
389 | * @return language string corresponding to the lid | 389 | * @return language string corresponding to the lid |
390 | */ | 390 | */ |
391 | static const char * | 391 | static const char * |
392 | lid_to_language (unsigned int lid) | 392 | lid_to_language (unsigned int lid) |
393 | { | 393 | { |
394 | switch (lid) | 394 | switch (lid) |
@@ -554,7 +554,7 @@ history_extract (GsfInput *stream, | |||
554 | nRev = (lbuffer[2] + (lbuffer[3] << 8)) / 2; | 554 | nRev = (lbuffer[2] + (lbuffer[3] << 8)) / 2; |
555 | where = 6; | 555 | where = 6; |
556 | ret = 0; | 556 | ret = 0; |
557 | for (i=0; i < nRev; i++) | 557 | for (i=0; i < nRev; i++) |
558 | { | 558 | { |
559 | if (where >= lcbSttbSavedBy) | 559 | if (where >= lcbSttbSavedBy) |
560 | break; | 560 | break; |
@@ -568,7 +568,7 @@ history_extract (GsfInput *stream, | |||
568 | where += length * 2 + 1; | 568 | where += length * 2 + 1; |
569 | length = lbuffer[where++]; | 569 | length = lbuffer[where++]; |
570 | if ( (where + 2 * length >= lcbSttbSavedBy) || | 570 | if ( (where + 2 * length >= lcbSttbSavedBy) || |
571 | (where + 2 * length + 1 <= where) ) | 571 | (where + 2 * length + 1 <= where) ) |
572 | { | 572 | { |
573 | if (NULL != author) | 573 | if (NULL != author) |
574 | free(author); | 574 | free(author); |
@@ -581,17 +581,24 @@ history_extract (GsfInput *stream, | |||
581 | if ( (NULL != author) && | 581 | if ( (NULL != author) && |
582 | (NULL != filename) ) | 582 | (NULL != filename) ) |
583 | { | 583 | { |
584 | if (NULL != (rbuf = malloc (strlen (author) + strlen (filename) + 512))) | 584 | size_t bsize; |
585 | |||
586 | bsize = strlen (author) + strlen (filename) + 512; | ||
587 | if (NULL != (rbuf = malloc (bsize)) | ||
585 | { | 588 | { |
586 | snprintf (rbuf, | 589 | if (bsize > |
587 | 512 + strlen (author) + strlen (filename), | 590 | snprintf (rbuf, |
588 | _("Revision #%u: Author `%s' worked on `%s'"), | 591 | bsize, |
589 | i, | 592 | _("Revision #%u: Author `%s' worked on `%s'"), |
590 | author, | 593 | i, |
591 | filename); | 594 | author, |
592 | ret = add_metadata (proc, proc_cls, | 595 | filename)) |
593 | rbuf, | 596 | { |
594 | EXTRACTOR_METATYPE_REVISION_HISTORY); | 597 | ret = add_metadata (proc, |
598 | proc_cls, | ||
599 | rbuf, | ||
600 | EXTRACTOR_METATYPE_REVISION_HISTORY); | ||
601 | } | ||
595 | free (rbuf); | 602 | free (rbuf); |
596 | } | 603 | } |
597 | } | 604 | } |
@@ -619,7 +626,7 @@ history_extract (GsfInput *stream, | |||
619 | /** | 626 | /** |
620 | * Internal state of an "LeInput" object. | 627 | * Internal state of an "LeInput" object. |
621 | */ | 628 | */ |
622 | typedef struct _LeInputPrivate | 629 | typedef struct _LeInputPrivate |
623 | { | 630 | { |
624 | /** | 631 | /** |
625 | * Our extraction context. | 632 | * Our extraction context. |
@@ -631,13 +638,13 @@ typedef struct _LeInputPrivate | |||
631 | /** | 638 | /** |
632 | * Overall state of an "LeInput" object. | 639 | * Overall state of an "LeInput" object. |
633 | */ | 640 | */ |
634 | typedef struct _LeInput | 641 | typedef struct _LeInput |
635 | { | 642 | { |
636 | /** | 643 | /** |
637 | * Inherited state from parent (GsfInput). | 644 | * Inherited state from parent (GsfInput). |
638 | */ | 645 | */ |
639 | GsfInput input; | 646 | GsfInput input; |
640 | 647 | ||
641 | /*< private > */ | 648 | /*< private > */ |
642 | /** | 649 | /** |
643 | * Private state of the LeInput. | 650 | * Private state of the LeInput. |
@@ -665,7 +672,7 @@ typedef struct _LeInputClass | |||
665 | 672 | ||
666 | 673 | ||
667 | /** | 674 | /** |
668 | * Constructor for LeInput objects. | 675 | * Constructor for LeInput objects. |
669 | * | 676 | * |
670 | * @param ec extraction context to use | 677 | * @param ec extraction context to use |
671 | * @return the LeInput, NULL on error | 678 | * @return the LeInput, NULL on error |
@@ -737,15 +744,15 @@ le_input_read (GsfInput *input, | |||
737 | void *buf; | 744 | void *buf; |
738 | uint64_t old_off; | 745 | uint64_t old_off; |
739 | ssize_t ret; | 746 | ssize_t ret; |
740 | 747 | ||
741 | ec = li->priv->ec; | 748 | ec = li->priv->ec; |
742 | old_off = ec->seek (ec->cls, 0, SEEK_CUR); | 749 | old_off = ec->seek (ec->cls, 0, SEEK_CUR); |
743 | if (num_bytes | 750 | if (num_bytes |
744 | != (ret = ec->read (ec->cls, | 751 | != (ret = ec->read (ec->cls, |
745 | &buf, | 752 | &buf, |
746 | num_bytes))) | 753 | num_bytes))) |
747 | { | 754 | { |
748 | /* we don't support partial reads; | 755 | /* we don't support partial reads; |
749 | most other GsfInput implementations in this case | 756 | most other GsfInput implementations in this case |
750 | allocate some huge temporary buffer just to avoid | 757 | allocate some huge temporary buffer just to avoid |
751 | the partial read; we might need to do that as well!? */ | 758 | the partial read; we might need to do that as well!? */ |
@@ -794,7 +801,7 @@ le_input_seek (GsfInput *input, | |||
794 | default: | 801 | default: |
795 | return TRUE; | 802 | return TRUE; |
796 | } | 803 | } |
797 | if (-1 == | 804 | if (-1 == |
798 | (ret = ec->seek (ec->cls, | 805 | (ret = ec->seek (ec->cls, |
799 | offset, | 806 | offset, |
800 | w))) | 807 | w))) |
@@ -869,7 +876,7 @@ le_input_new (struct EXTRACTOR_ExtractContext *ec) | |||
869 | 876 | ||
870 | 877 | ||
871 | /** | 878 | /** |
872 | * Main entry method for the OLE2 extraction plugin. | 879 | * Main entry method for the OLE2 extraction plugin. |
873 | * | 880 | * |
874 | * @param ec extraction context provided to the plugin | 881 | * @param ec extraction context provided to the plugin |
875 | */ | 882 | */ |
@@ -933,7 +940,7 @@ EXTRACTOR_ole2_extract_method (struct EXTRACTOR_ExtractContext *ec) | |||
933 | return; | 940 | return; |
934 | } | 941 | } |
935 | ret = 0; | 942 | ret = 0; |
936 | for (i=0;i<gsf_infile_num_children (infile);i++) | 943 | for (i=0;i<gsf_infile_num_children (infile);i++) |
937 | { | 944 | { |
938 | if (0 != ret) | 945 | if (0 != ret) |
939 | break; | 946 | break; |
@@ -944,7 +951,7 @@ EXTRACTOR_ole2_extract_method (struct EXTRACTOR_ExtractContext *ec) | |||
944 | (0 == strcmp (name, "\005DocumentSummaryInformation")) ) && | 951 | (0 == strcmp (name, "\005DocumentSummaryInformation")) ) && |
945 | (NULL != (src = gsf_infile_child_by_index (infile, i))) ) | 952 | (NULL != (src = gsf_infile_child_by_index (infile, i))) ) |
946 | ret = process (src, | 953 | ret = process (src, |
947 | ec->proc, | 954 | ec->proc, |
948 | ec->cls); | 955 | ec->cls); |
949 | if ( (0 == strcmp (name, "SfxDocumentInfo")) && | 956 | if ( (0 == strcmp (name, "SfxDocumentInfo")) && |
950 | (NULL != (src = gsf_infile_child_by_index (infile, i))) ) | 957 | (NULL != (src = gsf_infile_child_by_index (infile, i))) ) |
@@ -959,7 +966,7 @@ EXTRACTOR_ole2_extract_method (struct EXTRACTOR_ExtractContext *ec) | |||
959 | 966 | ||
960 | if (lcb < 6) | 967 | if (lcb < 6) |
961 | goto CLEANUP; | 968 | goto CLEANUP; |
962 | for (i=0;i<gsf_infile_num_children (infile);i++) | 969 | for (i=0;i<gsf_infile_num_children (infile);i++) |
963 | { | 970 | { |
964 | if (ret != 0) | 971 | if (ret != 0) |
965 | break; | 972 | break; |
@@ -974,7 +981,7 @@ EXTRACTOR_ole2_extract_method (struct EXTRACTOR_ExtractContext *ec) | |||
974 | fcb, | 981 | fcb, |
975 | ec->proc, ec->cls); | 982 | ec->proc, ec->cls); |
976 | g_object_unref (G_OBJECT (src)); | 983 | g_object_unref (G_OBJECT (src)); |
977 | } | 984 | } |
978 | } | 985 | } |
979 | CLEANUP: | 986 | CLEANUP: |
980 | g_object_unref (G_OBJECT (infile)); | 987 | g_object_unref (G_OBJECT (infile)); |
@@ -990,11 +997,11 @@ EXTRACTOR_ole2_extract_method (struct EXTRACTOR_ExtractContext *ec) | |||
990 | * @param message unused | 997 | * @param message unused |
991 | * @param user_data unused | 998 | * @param user_data unused |
992 | */ | 999 | */ |
993 | static void | 1000 | static void |
994 | nolog (const gchar *log_domain, | 1001 | nolog (const gchar *log_domain, |
995 | GLogLevelFlags log_level, | 1002 | GLogLevelFlags log_level, |
996 | const gchar *message, | 1003 | const gchar *message, |
997 | gpointer user_data) | 1004 | gpointer user_data) |
998 | { | 1005 | { |
999 | /* do nothing */ | 1006 | /* do nothing */ |
1000 | } | 1007 | } |
@@ -1004,8 +1011,8 @@ nolog (const gchar *log_domain, | |||
1004 | * OLE2 plugin constructor. Initializes glib and gsf, in particular | 1011 | * OLE2 plugin constructor. Initializes glib and gsf, in particular |
1005 | * gsf logging is disabled. | 1012 | * gsf logging is disabled. |
1006 | */ | 1013 | */ |
1007 | void __attribute__ ((constructor)) | 1014 | void __attribute__ ((constructor)) |
1008 | ole2_ltdl_init() | 1015 | ole2_ltdl_init() |
1009 | { | 1016 | { |
1010 | #if !GLIB_CHECK_VERSION(2, 35, 0) | 1017 | #if !GLIB_CHECK_VERSION(2, 35, 0) |
1011 | g_type_init (); | 1018 | g_type_init (); |
@@ -1015,7 +1022,7 @@ ole2_ltdl_init() | |||
1015 | #endif | 1022 | #endif |
1016 | /* disable logging -- thanks, Jody! */ | 1023 | /* disable logging -- thanks, Jody! */ |
1017 | g_log_set_handler ("libgsf:msole", | 1024 | g_log_set_handler ("libgsf:msole", |
1018 | G_LOG_LEVEL_CRITICAL | G_LOG_LEVEL_WARNING, | 1025 | G_LOG_LEVEL_CRITICAL | G_LOG_LEVEL_WARNING, |
1019 | &nolog, NULL); | 1026 | &nolog, NULL); |
1020 | } | 1027 | } |
1021 | 1028 | ||
@@ -1024,7 +1031,7 @@ ole2_ltdl_init() | |||
1024 | * OLE2 plugin destructor. Shutdown of gsf. | 1031 | * OLE2 plugin destructor. Shutdown of gsf. |
1025 | */ | 1032 | */ |
1026 | void __attribute__ ((destructor)) | 1033 | void __attribute__ ((destructor)) |
1027 | ole2_ltdl_fini() | 1034 | ole2_ltdl_fini() |
1028 | { | 1035 | { |
1029 | #ifdef HAVE_GSF_INIT | 1036 | #ifdef HAVE_GSF_INIT |
1030 | gsf_shutdown(); | 1037 | gsf_shutdown(); |