aboutsummaryrefslogtreecommitdiff
path: root/src/main/extractor_datasource.c
diff options
context:
space:
mode:
authorChristian Grothoff <christian@grothoff.org>2012-08-04 19:44:40 +0000
committerChristian Grothoff <christian@grothoff.org>2012-08-04 19:44:40 +0000
commit57eb81f6eeb2c5ae5d847918e8d96f6e12e3e23e (patch)
tree087ee7403ddc548601dafba2429265619ae4eb7e /src/main/extractor_datasource.c
parentb10d6b203d103ea0335576dc8af1d1e1649fcf06 (diff)
downloadlibextractor-57eb81f6eeb2c5ae5d847918e8d96f6e12e3e23e.tar.gz
libextractor-57eb81f6eeb2c5ae5d847918e8d96f6e12e3e23e.zip
implementing bz2 support
Diffstat (limited to 'src/main/extractor_datasource.c')
-rw-r--r--src/main/extractor_datasource.c255
1 files changed, 150 insertions, 105 deletions
diff --git a/src/main/extractor_datasource.c b/src/main/extractor_datasource.c
index d4fd512..7269839 100644
--- a/src/main/extractor_datasource.c
+++ b/src/main/extractor_datasource.c
@@ -449,97 +449,6 @@ bfds_read (struct BufferedFileDataSource *bfds,
449 449
450#if HAVE_ZLIB 450#if HAVE_ZLIB
451/** 451/**
452 * Reset gz-compressed data stream to the beginning.
453 *
454 * @return 1 on success, 0 to terminate extraction,
455 * -1 on decompressor initialization failure
456 */
457static int
458cfs_reset_stream_zlib (struct CompressedFileSource *cfs)
459{
460 if (cfs->gzip_header_length !=
461 bfds_seek (cfs->bfds, cfs->gzip_header_length, SEEK_SET))
462 return -1;
463 memset (&cfs->strm, 0, sizeof (z_stream));
464 cfs->strm.avail_out = COM_CHUNK_SIZE;
465
466 /*
467 * note: maybe plain inflateInit(&strm) is adequate,
468 * it looks more backward-compatible also ;
469 *
470 * ZLIB_VERNUM isn't defined by zlib version 1.1.4 ;
471 * there might be a better check.
472 */
473 if (Z_OK != inflateInit2 (&cfs->strm,
474#ifdef ZLIB_VERNUM
475 15 + 32
476#else
477 - MAX_WBITS
478#endif
479 ))
480 {
481 LOG ("Failed to initialize zlib decompression\n");
482 return -1;
483 }
484 cfs->fpos = 0;
485 return 1;
486}
487#endif
488
489
490#if HAVE_LIBBZ2
491/**
492 * Reset bz2-compressed data stream to the beginning.
493 *
494 * @return 1 on success, 0 to terminate extraction,
495 * -1 on decompressor initialization failure
496 */
497static int
498cfs_reset_stream_bz2 (struct CompressedFileSource *cfs)
499{
500 BZ2_bzDecompressEnd (&cfs->bstrm);
501 if (BZ_OK !=
502 BZ2_bzDecompressInit (&cfs->bstrm, 0, 0))
503 {
504 LOG ("Failed to reinitialize BZ2 decompressor\n");
505 return -1;
506 }
507 return 1;
508}
509#endif
510
511
512/**
513 * Resets the compression stream to begin uncompressing
514 * from the beginning. Used at initialization time, and when
515 * seeking backward.
516 *
517 * @param cfs cfs to reset
518 * @return 1 on success, 0 to terminate extraction,
519 * -1 on error
520 */
521static int
522cfs_reset_stream (struct CompressedFileSource *cfs)
523{
524 switch (cfs->compression_type)
525 {
526#if HAVE_ZLIB
527 case COMP_TYPE_ZLIB:
528 return cfs_reset_stream_zlib (cfs);
529#endif
530#if HAVE_LIBBZ2
531 case COMP_TYPE_BZ2:
532 return cfs_reset_stream_bz2 (cfs);
533#endif
534 default:
535 LOG ("invalid compression type selected\n");
536 return -1;
537 }
538}
539
540
541#if HAVE_ZLIB
542/**
543 * Initializes gz-decompression object. Might report metadata about 452 * Initializes gz-decompression object. Might report metadata about
544 * compresse stream, if available. Resets the stream to the beginning. 453 * compresse stream, if available. Resets the stream to the beginning.
545 * 454 *
@@ -591,10 +500,11 @@ cfs_init_decompressor_zlib (struct CompressedFileSource *cfs,
591 return -1; 500 return -1;
592 } 501 }
593 len = cptr - fname; 502 len = cptr - fname;
594 if (0 != proc (proc_cls, "<zlib>", EXTRACTOR_METATYPE_FILENAME, 503 if ( (NULL != proc) &&
595 EXTRACTOR_METAFORMAT_C_STRING, "text/plain", 504 (0 != proc (proc_cls, "<zlib>", EXTRACTOR_METATYPE_FILENAME,
596 fname, 505 EXTRACTOR_METAFORMAT_C_STRING, "text/plain",
597 len)) 506 fname,
507 len)) )
598 return 0; /* done */ 508 return 0; /* done */
599 gzip_header_length += len + 1; 509 gzip_header_length += len + 1;
600 } 510 }
@@ -624,10 +534,11 @@ cfs_init_decompressor_zlib (struct CompressedFileSource *cfs,
624 return -1; 534 return -1;
625 } 535 }
626 len = cptr - fcomment; 536 len = cptr - fcomment;
627 if (0 != proc (proc_cls, "<zlib>", EXTRACTOR_METATYPE_COMMENT, 537 if ( (NULL != proc) &&
628 EXTRACTOR_METAFORMAT_C_STRING, "text/plain", 538 (0 != proc (proc_cls, "<zlib>", EXTRACTOR_METATYPE_COMMENT,
629 (const char *) fcomment, 539 EXTRACTOR_METAFORMAT_C_STRING, "text/plain",
630 len)) 540 (const char *) fcomment,
541 len)) )
631 return 0; /* done */ 542 return 0; /* done */
632 gzip_header_length += len + 1; 543 gzip_header_length += len + 1;
633 } 544 }
@@ -640,7 +551,33 @@ cfs_init_decompressor_zlib (struct CompressedFileSource *cfs,
640 gzip_header_length = 0; 551 gzip_header_length = 0;
641#endif 552#endif
642 cfs->gzip_header_length = gzip_header_length; 553 cfs->gzip_header_length = gzip_header_length;
643 return cfs_reset_stream_zlib (cfs); 554
555 if (cfs->gzip_header_length !=
556 bfds_seek (cfs->bfds, cfs->gzip_header_length, SEEK_SET))
557 {
558 LOG ("Failed to seek to start to initialize gzip decompressor\n");
559 return -1;
560 }
561 cfs->strm.avail_out = COM_CHUNK_SIZE;
562 /*
563 * note: maybe plain inflateInit(&strm) is adequate,
564 * it looks more backward-compatible also ;
565 *
566 * ZLIB_VERNUM isn't defined by zlib version 1.1.4 ;
567 * there might be a better check.
568 */
569 if (Z_OK != inflateInit2 (&cfs->strm,
570#ifdef ZLIB_VERNUM
571 15 + 32
572#else
573 - MAX_WBITS
574#endif
575 ))
576 {
577 LOG ("Failed to initialize zlib decompression\n");
578 return -1;
579 }
580 return 1;
644} 581}
645#endif 582#endif
646 583
@@ -659,9 +596,20 @@ static int
659cfs_init_decompressor_bz2 (struct CompressedFileSource *cfs, 596cfs_init_decompressor_bz2 (struct CompressedFileSource *cfs,
660 EXTRACTOR_MetaDataProcessor proc, void *proc_cls) 597 EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
661{ 598{
599 if (0 !=
600 bfds_seek (cfs->bfds, 0, SEEK_SET))
601 {
602 LOG ("Failed to seek to start to initialize BZ2 decompressor\n");
603 return -1;
604 }
605 memset (&cfs->bstrm, 0, sizeof (bz_stream));
662 if (BZ_OK != 606 if (BZ_OK !=
663 BZ2_bzDecompressInit (&cfs->bstrm, 0, 0)) 607 BZ2_bzDecompressInit (&cfs->bstrm, 0, 0))
664 return -1; 608 {
609 LOG ("Failed to initialize BZ2 decompressor\n");
610 return -1;
611 }
612 cfs->bstrm.avail_out = COM_CHUNK_SIZE;
665 return 1; 613 return 1;
666} 614}
667#endif 615#endif
@@ -680,6 +628,8 @@ static int
680cfs_init_decompressor (struct CompressedFileSource *cfs, 628cfs_init_decompressor (struct CompressedFileSource *cfs,
681 EXTRACTOR_MetaDataProcessor proc, void *proc_cls) 629 EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
682{ 630{
631 cfs->result_pos = 0;
632 cfs->fpos = 0;
683 switch (cfs->compression_type) 633 switch (cfs->compression_type)
684 { 634 {
685#if HAVE_ZLIB 635#if HAVE_ZLIB
@@ -756,6 +706,24 @@ cfs_deinit_decompressor (struct CompressedFileSource *cfs)
756 706
757 707
758/** 708/**
709 * Resets the compression stream to begin uncompressing
710 * from the beginning. Used at initialization time, and when
711 * seeking backward.
712 *
713 * @param cfs cfs to reset
714 * @return 1 on success, 0 to terminate extraction,
715 * -1 on error
716 */
717static int
718cfs_reset_stream (struct CompressedFileSource *cfs)
719{
720 if (-1 == cfs_deinit_decompressor (cfs))
721 return -1;
722 return cfs_init_decompressor (cfs, NULL, NULL);
723}
724
725
726/**
759 * Destroy compressed file source. 727 * Destroy compressed file source.
760 * 728 *
761 * @param cfs source to destroy 729 * @param cfs source to destroy
@@ -826,7 +794,7 @@ cfs_read_zlib (struct CompressedFileSource *cfs,
826 int ret; 794 int ret;
827 size_t rc; 795 size_t rc;
828 ssize_t in; 796 ssize_t in;
829 char buf[COM_CHUNK_SIZE]; 797 unsigned char buf[COM_CHUNK_SIZE];
830 798
831 if (cfs->fpos == cfs->uncompressed_size) 799 if (cfs->fpos == cfs->uncompressed_size)
832 { 800 {
@@ -856,7 +824,12 @@ cfs_read_zlib (struct CompressedFileSource *cfs,
856 LOG ("unexpected EOF\n"); 824 LOG ("unexpected EOF\n");
857 return -1; /* unexpected EOF */ 825 return -1; /* unexpected EOF */
858 } 826 }
859 cfs->strm.next_in = (unsigned char *) buf; 827 if (0 == in)
828 {
829 cfs->uncompressed_size = cfs->fpos;
830 return rc;
831 }
832 cfs->strm.next_in = buf;
860 cfs->strm.avail_in = (uInt) in; 833 cfs->strm.avail_in = (uInt) in;
861 cfs->strm.next_out = (unsigned char *) cfs->result; 834 cfs->strm.next_out = (unsigned char *) cfs->result;
862 cfs->strm.avail_out = COM_CHUNK_SIZE; 835 cfs->strm.avail_out = COM_CHUNK_SIZE;
@@ -905,8 +878,74 @@ cfs_read_bz2 (struct CompressedFileSource *cfs,
905 void *data, 878 void *data,
906 size_t size) 879 size_t size)
907{ 880{
908 LOG ("bz2 decompression not implemented\n"); 881 char *dst = data;
909 return -1; 882 int ret;
883 size_t rc;
884 ssize_t in;
885 char buf[COM_CHUNK_SIZE];
886
887 if (cfs->fpos == cfs->uncompressed_size)
888 {
889 /* end of file */
890 return 0;
891 }
892 rc = 0;
893 if (COM_CHUNK_SIZE > cfs->bstrm.avail_out + cfs->result_pos)
894 {
895 /* got left-over decompressed data from previous round! */
896 in = COM_CHUNK_SIZE - (cfs->bstrm.avail_out + cfs->result_pos);
897 if (in > size)
898 in = size;
899 memcpy (&dst[rc], &cfs->result[cfs->result_pos], in);
900 cfs->fpos += in;
901 cfs->result_pos += in;
902 rc += in;
903 }
904 ret = BZ_OK;
905 while ( (rc < size) && (BZ_STREAM_END != ret) )
906 {
907 /* read block from original data source */
908 in = bfds_read (cfs->bfds,
909 buf, sizeof (buf));
910 if (in < 0)
911 {
912 LOG ("unexpected EOF\n");
913 return -1; /* unexpected EOF */
914 }
915 if (0 == in)
916 {
917 cfs->uncompressed_size = cfs->fpos;
918 return rc;
919 }
920 cfs->bstrm.next_in = buf;
921 cfs->bstrm.avail_in = (uInt) in;
922 cfs->bstrm.next_out = cfs->result;
923 cfs->bstrm.avail_out = COM_CHUNK_SIZE;
924 cfs->result_pos = 0;
925 ret = BZ2_bzDecompress (&cfs->bstrm);
926 if ( (BZ_OK != ret) && (BZ_STREAM_END != ret) )
927 {
928 LOG ("unexpected bzip2 decompress error: %d\n", ret);
929 return -1; /* unexpected error */
930 }
931 /* go backwards by the number of bytes left in the buffer */
932 if (-1 == bfds_seek (cfs->bfds, - (int64_t) cfs->bstrm.avail_in, SEEK_CUR))
933 {
934 LOG ("seek failed\n");
935 return -1;
936 }
937 /* copy decompressed bytes to target buffer */
938 in = COM_CHUNK_SIZE - cfs->bstrm.avail_out;
939 if (in > size - rc)
940 in = size - rc;
941 memcpy (&dst[rc], &cfs->result[cfs->result_pos], in);
942 cfs->fpos += in;
943 cfs->result_pos += in;
944 rc += in;
945 }
946 if (BZ_STREAM_END == ret)
947 cfs->uncompressed_size = cfs->fpos;
948 return rc;
910} 949}
911#endif 950#endif
912 951
@@ -1041,6 +1080,12 @@ cfs_seek (struct CompressedFileSource *cfs,
1041 LOG ("Failed to read decompressed stream for seek operation\n"); 1080 LOG ("Failed to read decompressed stream for seek operation\n");
1042 return -1; 1081 return -1;
1043 } 1082 }
1083 if (0 == ret)
1084 {
1085 LOG ("Reached unexpected end of stream during seek operation\n");
1086 return -1;
1087 }
1088 ASSERT (ret <= delta);
1044 delta -= ret; 1089 delta -= ret;
1045 } 1090 }
1046 return cfs->fpos; 1091 return cfs->fpos;