diff options
author | Christian Grothoff <christian@grothoff.org> | 2012-08-04 19:44:40 +0000 |
---|---|---|
committer | Christian Grothoff <christian@grothoff.org> | 2012-08-04 19:44:40 +0000 |
commit | 57eb81f6eeb2c5ae5d847918e8d96f6e12e3e23e (patch) | |
tree | 087ee7403ddc548601dafba2429265619ae4eb7e /src/main/extractor_datasource.c | |
parent | b10d6b203d103ea0335576dc8af1d1e1649fcf06 (diff) | |
download | libextractor-57eb81f6eeb2c5ae5d847918e8d96f6e12e3e23e.tar.gz libextractor-57eb81f6eeb2c5ae5d847918e8d96f6e12e3e23e.zip |
implementing bz2 support
Diffstat (limited to 'src/main/extractor_datasource.c')
-rw-r--r-- | src/main/extractor_datasource.c | 255 |
1 files changed, 150 insertions, 105 deletions
diff --git a/src/main/extractor_datasource.c b/src/main/extractor_datasource.c index d4fd512..7269839 100644 --- a/src/main/extractor_datasource.c +++ b/src/main/extractor_datasource.c | |||
@@ -449,97 +449,6 @@ bfds_read (struct BufferedFileDataSource *bfds, | |||
449 | 449 | ||
450 | #if HAVE_ZLIB | 450 | #if HAVE_ZLIB |
451 | /** | 451 | /** |
452 | * Reset gz-compressed data stream to the beginning. | ||
453 | * | ||
454 | * @return 1 on success, 0 to terminate extraction, | ||
455 | * -1 on decompressor initialization failure | ||
456 | */ | ||
457 | static int | ||
458 | cfs_reset_stream_zlib (struct CompressedFileSource *cfs) | ||
459 | { | ||
460 | if (cfs->gzip_header_length != | ||
461 | bfds_seek (cfs->bfds, cfs->gzip_header_length, SEEK_SET)) | ||
462 | return -1; | ||
463 | memset (&cfs->strm, 0, sizeof (z_stream)); | ||
464 | cfs->strm.avail_out = COM_CHUNK_SIZE; | ||
465 | |||
466 | /* | ||
467 | * note: maybe plain inflateInit(&strm) is adequate, | ||
468 | * it looks more backward-compatible also ; | ||
469 | * | ||
470 | * ZLIB_VERNUM isn't defined by zlib version 1.1.4 ; | ||
471 | * there might be a better check. | ||
472 | */ | ||
473 | if (Z_OK != inflateInit2 (&cfs->strm, | ||
474 | #ifdef ZLIB_VERNUM | ||
475 | 15 + 32 | ||
476 | #else | ||
477 | - MAX_WBITS | ||
478 | #endif | ||
479 | )) | ||
480 | { | ||
481 | LOG ("Failed to initialize zlib decompression\n"); | ||
482 | return -1; | ||
483 | } | ||
484 | cfs->fpos = 0; | ||
485 | return 1; | ||
486 | } | ||
487 | #endif | ||
488 | |||
489 | |||
490 | #if HAVE_LIBBZ2 | ||
491 | /** | ||
492 | * Reset bz2-compressed data stream to the beginning. | ||
493 | * | ||
494 | * @return 1 on success, 0 to terminate extraction, | ||
495 | * -1 on decompressor initialization failure | ||
496 | */ | ||
497 | static int | ||
498 | cfs_reset_stream_bz2 (struct CompressedFileSource *cfs) | ||
499 | { | ||
500 | BZ2_bzDecompressEnd (&cfs->bstrm); | ||
501 | if (BZ_OK != | ||
502 | BZ2_bzDecompressInit (&cfs->bstrm, 0, 0)) | ||
503 | { | ||
504 | LOG ("Failed to reinitialize BZ2 decompressor\n"); | ||
505 | return -1; | ||
506 | } | ||
507 | return 1; | ||
508 | } | ||
509 | #endif | ||
510 | |||
511 | |||
512 | /** | ||
513 | * Resets the compression stream to begin uncompressing | ||
514 | * from the beginning. Used at initialization time, and when | ||
515 | * seeking backward. | ||
516 | * | ||
517 | * @param cfs cfs to reset | ||
518 | * @return 1 on success, 0 to terminate extraction, | ||
519 | * -1 on error | ||
520 | */ | ||
521 | static int | ||
522 | cfs_reset_stream (struct CompressedFileSource *cfs) | ||
523 | { | ||
524 | switch (cfs->compression_type) | ||
525 | { | ||
526 | #if HAVE_ZLIB | ||
527 | case COMP_TYPE_ZLIB: | ||
528 | return cfs_reset_stream_zlib (cfs); | ||
529 | #endif | ||
530 | #if HAVE_LIBBZ2 | ||
531 | case COMP_TYPE_BZ2: | ||
532 | return cfs_reset_stream_bz2 (cfs); | ||
533 | #endif | ||
534 | default: | ||
535 | LOG ("invalid compression type selected\n"); | ||
536 | return -1; | ||
537 | } | ||
538 | } | ||
539 | |||
540 | |||
541 | #if HAVE_ZLIB | ||
542 | /** | ||
543 | * Initializes gz-decompression object. Might report metadata about | 452 | * Initializes gz-decompression object. Might report metadata about |
544 | * compresse stream, if available. Resets the stream to the beginning. | 453 | * compresse stream, if available. Resets the stream to the beginning. |
545 | * | 454 | * |
@@ -591,10 +500,11 @@ cfs_init_decompressor_zlib (struct CompressedFileSource *cfs, | |||
591 | return -1; | 500 | return -1; |
592 | } | 501 | } |
593 | len = cptr - fname; | 502 | len = cptr - fname; |
594 | if (0 != proc (proc_cls, "<zlib>", EXTRACTOR_METATYPE_FILENAME, | 503 | if ( (NULL != proc) && |
595 | EXTRACTOR_METAFORMAT_C_STRING, "text/plain", | 504 | (0 != proc (proc_cls, "<zlib>", EXTRACTOR_METATYPE_FILENAME, |
596 | fname, | 505 | EXTRACTOR_METAFORMAT_C_STRING, "text/plain", |
597 | len)) | 506 | fname, |
507 | len)) ) | ||
598 | return 0; /* done */ | 508 | return 0; /* done */ |
599 | gzip_header_length += len + 1; | 509 | gzip_header_length += len + 1; |
600 | } | 510 | } |
@@ -624,10 +534,11 @@ cfs_init_decompressor_zlib (struct CompressedFileSource *cfs, | |||
624 | return -1; | 534 | return -1; |
625 | } | 535 | } |
626 | len = cptr - fcomment; | 536 | len = cptr - fcomment; |
627 | if (0 != proc (proc_cls, "<zlib>", EXTRACTOR_METATYPE_COMMENT, | 537 | if ( (NULL != proc) && |
628 | EXTRACTOR_METAFORMAT_C_STRING, "text/plain", | 538 | (0 != proc (proc_cls, "<zlib>", EXTRACTOR_METATYPE_COMMENT, |
629 | (const char *) fcomment, | 539 | EXTRACTOR_METAFORMAT_C_STRING, "text/plain", |
630 | len)) | 540 | (const char *) fcomment, |
541 | len)) ) | ||
631 | return 0; /* done */ | 542 | return 0; /* done */ |
632 | gzip_header_length += len + 1; | 543 | gzip_header_length += len + 1; |
633 | } | 544 | } |
@@ -640,7 +551,33 @@ cfs_init_decompressor_zlib (struct CompressedFileSource *cfs, | |||
640 | gzip_header_length = 0; | 551 | gzip_header_length = 0; |
641 | #endif | 552 | #endif |
642 | cfs->gzip_header_length = gzip_header_length; | 553 | cfs->gzip_header_length = gzip_header_length; |
643 | return cfs_reset_stream_zlib (cfs); | 554 | |
555 | if (cfs->gzip_header_length != | ||
556 | bfds_seek (cfs->bfds, cfs->gzip_header_length, SEEK_SET)) | ||
557 | { | ||
558 | LOG ("Failed to seek to start to initialize gzip decompressor\n"); | ||
559 | return -1; | ||
560 | } | ||
561 | cfs->strm.avail_out = COM_CHUNK_SIZE; | ||
562 | /* | ||
563 | * note: maybe plain inflateInit(&strm) is adequate, | ||
564 | * it looks more backward-compatible also ; | ||
565 | * | ||
566 | * ZLIB_VERNUM isn't defined by zlib version 1.1.4 ; | ||
567 | * there might be a better check. | ||
568 | */ | ||
569 | if (Z_OK != inflateInit2 (&cfs->strm, | ||
570 | #ifdef ZLIB_VERNUM | ||
571 | 15 + 32 | ||
572 | #else | ||
573 | - MAX_WBITS | ||
574 | #endif | ||
575 | )) | ||
576 | { | ||
577 | LOG ("Failed to initialize zlib decompression\n"); | ||
578 | return -1; | ||
579 | } | ||
580 | return 1; | ||
644 | } | 581 | } |
645 | #endif | 582 | #endif |
646 | 583 | ||
@@ -659,9 +596,20 @@ static int | |||
659 | cfs_init_decompressor_bz2 (struct CompressedFileSource *cfs, | 596 | cfs_init_decompressor_bz2 (struct CompressedFileSource *cfs, |
660 | EXTRACTOR_MetaDataProcessor proc, void *proc_cls) | 597 | EXTRACTOR_MetaDataProcessor proc, void *proc_cls) |
661 | { | 598 | { |
599 | if (0 != | ||
600 | bfds_seek (cfs->bfds, 0, SEEK_SET)) | ||
601 | { | ||
602 | LOG ("Failed to seek to start to initialize BZ2 decompressor\n"); | ||
603 | return -1; | ||
604 | } | ||
605 | memset (&cfs->bstrm, 0, sizeof (bz_stream)); | ||
662 | if (BZ_OK != | 606 | if (BZ_OK != |
663 | BZ2_bzDecompressInit (&cfs->bstrm, 0, 0)) | 607 | BZ2_bzDecompressInit (&cfs->bstrm, 0, 0)) |
664 | return -1; | 608 | { |
609 | LOG ("Failed to initialize BZ2 decompressor\n"); | ||
610 | return -1; | ||
611 | } | ||
612 | cfs->bstrm.avail_out = COM_CHUNK_SIZE; | ||
665 | return 1; | 613 | return 1; |
666 | } | 614 | } |
667 | #endif | 615 | #endif |
@@ -680,6 +628,8 @@ static int | |||
680 | cfs_init_decompressor (struct CompressedFileSource *cfs, | 628 | cfs_init_decompressor (struct CompressedFileSource *cfs, |
681 | EXTRACTOR_MetaDataProcessor proc, void *proc_cls) | 629 | EXTRACTOR_MetaDataProcessor proc, void *proc_cls) |
682 | { | 630 | { |
631 | cfs->result_pos = 0; | ||
632 | cfs->fpos = 0; | ||
683 | switch (cfs->compression_type) | 633 | switch (cfs->compression_type) |
684 | { | 634 | { |
685 | #if HAVE_ZLIB | 635 | #if HAVE_ZLIB |
@@ -756,6 +706,24 @@ cfs_deinit_decompressor (struct CompressedFileSource *cfs) | |||
756 | 706 | ||
757 | 707 | ||
758 | /** | 708 | /** |
709 | * Resets the compression stream to begin uncompressing | ||
710 | * from the beginning. Used at initialization time, and when | ||
711 | * seeking backward. | ||
712 | * | ||
713 | * @param cfs cfs to reset | ||
714 | * @return 1 on success, 0 to terminate extraction, | ||
715 | * -1 on error | ||
716 | */ | ||
717 | static int | ||
718 | cfs_reset_stream (struct CompressedFileSource *cfs) | ||
719 | { | ||
720 | if (-1 == cfs_deinit_decompressor (cfs)) | ||
721 | return -1; | ||
722 | return cfs_init_decompressor (cfs, NULL, NULL); | ||
723 | } | ||
724 | |||
725 | |||
726 | /** | ||
759 | * Destroy compressed file source. | 727 | * Destroy compressed file source. |
760 | * | 728 | * |
761 | * @param cfs source to destroy | 729 | * @param cfs source to destroy |
@@ -826,7 +794,7 @@ cfs_read_zlib (struct CompressedFileSource *cfs, | |||
826 | int ret; | 794 | int ret; |
827 | size_t rc; | 795 | size_t rc; |
828 | ssize_t in; | 796 | ssize_t in; |
829 | char buf[COM_CHUNK_SIZE]; | 797 | unsigned char buf[COM_CHUNK_SIZE]; |
830 | 798 | ||
831 | if (cfs->fpos == cfs->uncompressed_size) | 799 | if (cfs->fpos == cfs->uncompressed_size) |
832 | { | 800 | { |
@@ -856,7 +824,12 @@ cfs_read_zlib (struct CompressedFileSource *cfs, | |||
856 | LOG ("unexpected EOF\n"); | 824 | LOG ("unexpected EOF\n"); |
857 | return -1; /* unexpected EOF */ | 825 | return -1; /* unexpected EOF */ |
858 | } | 826 | } |
859 | cfs->strm.next_in = (unsigned char *) buf; | 827 | if (0 == in) |
828 | { | ||
829 | cfs->uncompressed_size = cfs->fpos; | ||
830 | return rc; | ||
831 | } | ||
832 | cfs->strm.next_in = buf; | ||
860 | cfs->strm.avail_in = (uInt) in; | 833 | cfs->strm.avail_in = (uInt) in; |
861 | cfs->strm.next_out = (unsigned char *) cfs->result; | 834 | cfs->strm.next_out = (unsigned char *) cfs->result; |
862 | cfs->strm.avail_out = COM_CHUNK_SIZE; | 835 | cfs->strm.avail_out = COM_CHUNK_SIZE; |
@@ -905,8 +878,74 @@ cfs_read_bz2 (struct CompressedFileSource *cfs, | |||
905 | void *data, | 878 | void *data, |
906 | size_t size) | 879 | size_t size) |
907 | { | 880 | { |
908 | LOG ("bz2 decompression not implemented\n"); | 881 | char *dst = data; |
909 | return -1; | 882 | int ret; |
883 | size_t rc; | ||
884 | ssize_t in; | ||
885 | char buf[COM_CHUNK_SIZE]; | ||
886 | |||
887 | if (cfs->fpos == cfs->uncompressed_size) | ||
888 | { | ||
889 | /* end of file */ | ||
890 | return 0; | ||
891 | } | ||
892 | rc = 0; | ||
893 | if (COM_CHUNK_SIZE > cfs->bstrm.avail_out + cfs->result_pos) | ||
894 | { | ||
895 | /* got left-over decompressed data from previous round! */ | ||
896 | in = COM_CHUNK_SIZE - (cfs->bstrm.avail_out + cfs->result_pos); | ||
897 | if (in > size) | ||
898 | in = size; | ||
899 | memcpy (&dst[rc], &cfs->result[cfs->result_pos], in); | ||
900 | cfs->fpos += in; | ||
901 | cfs->result_pos += in; | ||
902 | rc += in; | ||
903 | } | ||
904 | ret = BZ_OK; | ||
905 | while ( (rc < size) && (BZ_STREAM_END != ret) ) | ||
906 | { | ||
907 | /* read block from original data source */ | ||
908 | in = bfds_read (cfs->bfds, | ||
909 | buf, sizeof (buf)); | ||
910 | if (in < 0) | ||
911 | { | ||
912 | LOG ("unexpected EOF\n"); | ||
913 | return -1; /* unexpected EOF */ | ||
914 | } | ||
915 | if (0 == in) | ||
916 | { | ||
917 | cfs->uncompressed_size = cfs->fpos; | ||
918 | return rc; | ||
919 | } | ||
920 | cfs->bstrm.next_in = buf; | ||
921 | cfs->bstrm.avail_in = (uInt) in; | ||
922 | cfs->bstrm.next_out = cfs->result; | ||
923 | cfs->bstrm.avail_out = COM_CHUNK_SIZE; | ||
924 | cfs->result_pos = 0; | ||
925 | ret = BZ2_bzDecompress (&cfs->bstrm); | ||
926 | if ( (BZ_OK != ret) && (BZ_STREAM_END != ret) ) | ||
927 | { | ||
928 | LOG ("unexpected bzip2 decompress error: %d\n", ret); | ||
929 | return -1; /* unexpected error */ | ||
930 | } | ||
931 | /* go backwards by the number of bytes left in the buffer */ | ||
932 | if (-1 == bfds_seek (cfs->bfds, - (int64_t) cfs->bstrm.avail_in, SEEK_CUR)) | ||
933 | { | ||
934 | LOG ("seek failed\n"); | ||
935 | return -1; | ||
936 | } | ||
937 | /* copy decompressed bytes to target buffer */ | ||
938 | in = COM_CHUNK_SIZE - cfs->bstrm.avail_out; | ||
939 | if (in > size - rc) | ||
940 | in = size - rc; | ||
941 | memcpy (&dst[rc], &cfs->result[cfs->result_pos], in); | ||
942 | cfs->fpos += in; | ||
943 | cfs->result_pos += in; | ||
944 | rc += in; | ||
945 | } | ||
946 | if (BZ_STREAM_END == ret) | ||
947 | cfs->uncompressed_size = cfs->fpos; | ||
948 | return rc; | ||
910 | } | 949 | } |
911 | #endif | 950 | #endif |
912 | 951 | ||
@@ -1041,6 +1080,12 @@ cfs_seek (struct CompressedFileSource *cfs, | |||
1041 | LOG ("Failed to read decompressed stream for seek operation\n"); | 1080 | LOG ("Failed to read decompressed stream for seek operation\n"); |
1042 | return -1; | 1081 | return -1; |
1043 | } | 1082 | } |
1083 | if (0 == ret) | ||
1084 | { | ||
1085 | LOG ("Reached unexpected end of stream during seek operation\n"); | ||
1086 | return -1; | ||
1087 | } | ||
1088 | ASSERT (ret <= delta); | ||
1044 | delta -= ret; | 1089 | delta -= ret; |
1045 | } | 1090 | } |
1046 | return cfs->fpos; | 1091 | return cfs->fpos; |