diff options
Diffstat (limited to 'src/main/extractor_datasource.c')
-rw-r--r-- | src/main/extractor_datasource.c | 912 |
1 files changed, 497 insertions, 415 deletions
diff --git a/src/main/extractor_datasource.c b/src/main/extractor_datasource.c index 2adbdfc..90dd077 100644 --- a/src/main/extractor_datasource.c +++ b/src/main/extractor_datasource.c | |||
@@ -22,10 +22,22 @@ | |||
22 | 22 | ||
23 | #if HAVE_LIBBZ2 | 23 | #if HAVE_LIBBZ2 |
24 | #include <bzlib.h> | 24 | #include <bzlib.h> |
25 | #define MIN_BZ2_HEADER 4 | ||
26 | #ifndef MIN_COMPRESSED_HEADER | ||
27 | #define MIN_COMPRESSED_HEADER MIN_ZLIB_HEADER | ||
28 | #endif | ||
25 | #endif | 29 | #endif |
26 | 30 | ||
27 | #if HAVE_ZLIB | 31 | #if HAVE_ZLIB |
28 | #include <zlib.h> | 32 | #include <zlib.h> |
33 | #define MIN_ZLIB_HEADER 12 | ||
34 | #ifndef MIN_COMPRESSED_HEADER | ||
35 | #define MIN_COMPRESSED_HEADER MIN_BZ2_HEADER | ||
36 | #endif | ||
37 | #endif | ||
38 | |||
39 | #ifndef MIN_COMPRESSED_HEADER | ||
40 | #define MIN_COMPRESSED_HEADER -1 | ||
29 | #endif | 41 | #endif |
30 | 42 | ||
31 | #ifndef O_LARGEFILE | 43 | #ifndef O_LARGEFILE |
@@ -37,24 +49,12 @@ | |||
37 | */ | 49 | */ |
38 | #define MAX_READ (4 * 1024 * 1024) | 50 | #define MAX_READ (4 * 1024 * 1024) |
39 | 51 | ||
52 | /** | ||
53 | * Data is read from the source and shoved into decompressor | ||
54 | * in chunks this big. | ||
55 | */ | ||
56 | #define COM_CHUNK_SIZE (10 * 1024) | ||
40 | 57 | ||
41 | #if HAVE_ZLIB | ||
42 | #define MIN_ZLIB_HEADER 12 | ||
43 | #endif | ||
44 | #if HAVE_LIBBZ2 | ||
45 | #define MIN_BZ2_HEADER 4 | ||
46 | #endif | ||
47 | #if !defined (MIN_COMPRESSED_HEADER) && HAVE_ZLIB | ||
48 | #define MIN_COMPRESSED_HEADER MIN_ZLIB_HEADER | ||
49 | #endif | ||
50 | #if !defined (MIN_COMPRESSED_HEADER) && HAVE_LIBBZ2 | ||
51 | #define MIN_COMPRESSED_HEADER MIN_BZ2_HEADER | ||
52 | #endif | ||
53 | #if !defined (MIN_COMPRESSED_HEADER) | ||
54 | #define MIN_COMPRESSED_HEADER -1 | ||
55 | #endif | ||
56 | |||
57 | #define COMPRESSED_DATA_PROBE_SIZE 3 | ||
58 | 58 | ||
59 | /** | 59 | /** |
60 | * Enum with the various possible types of compression supported. | 60 | * Enum with the various possible types of compression supported. |
@@ -106,17 +106,17 @@ struct BufferedFileDataSource | |||
106 | uint64_t fsize; | 106 | uint64_t fsize; |
107 | 107 | ||
108 | /** | 108 | /** |
109 | * Position within the file or the data buffer | 109 | * Position of the buffer in the file. |
110 | */ | 110 | */ |
111 | uint64_t fpos; | 111 | uint64_t fpos; |
112 | 112 | ||
113 | /** | 113 | /** |
114 | * Position within the buffer. | 114 | * Position within the buffer. |
115 | */ | 115 | */ |
116 | uint64_t buffer_pos; | 116 | uint64_t buffer_pos; |
117 | 117 | ||
118 | /** | 118 | /** |
119 | * Number of bytes in the buffer (<= buffer_size) | 119 | * Number of valid bytes in the buffer (<= buffer_size) |
120 | */ | 120 | */ |
121 | uint64_t buffer_bytes; | 121 | uint64_t buffer_bytes; |
122 | 122 | ||
@@ -144,12 +144,22 @@ struct CompressedFileSource | |||
144 | struct BufferedFileDataSource *bfds; | 144 | struct BufferedFileDataSource *bfds; |
145 | 145 | ||
146 | /** | 146 | /** |
147 | * Decompression target buffer. | ||
148 | */ | ||
149 | char result[COM_CHUNK_SIZE]; | ||
150 | |||
151 | /** | ||
152 | * At which offset in 'result' is 'fpos'? | ||
153 | */ | ||
154 | size_t result_pos; | ||
155 | |||
156 | /** | ||
147 | * Size of the source (same as bfds->fsize) | 157 | * Size of the source (same as bfds->fsize) |
148 | */ | 158 | */ |
149 | int64_t fsize; | 159 | int64_t fsize; |
150 | 160 | ||
151 | /** | 161 | /** |
152 | * Position within the source | 162 | * Position within the (decompressed) source |
153 | */ | 163 | */ |
154 | int64_t fpos; | 164 | int64_t fpos; |
155 | 165 | ||
@@ -206,7 +216,7 @@ bfds_pick_next_buffer_at (struct BufferedFileDataSource *bfds, | |||
206 | return -1; /* invalid */ | 216 | return -1; /* invalid */ |
207 | if (NULL == bfds->buffer) | 217 | if (NULL == bfds->buffer) |
208 | { | 218 | { |
209 | bfds->buffer_bytes = bfds->fsize; | 219 | bfds->buffer_pos = pos; |
210 | return 0; | 220 | return 0; |
211 | } | 221 | } |
212 | #if WINDOWS | 222 | #if WINDOWS |
@@ -219,6 +229,7 @@ bfds_pick_next_buffer_at (struct BufferedFileDataSource *bfds, | |||
219 | if (position < 0) | 229 | if (position < 0) |
220 | return -1; | 230 | return -1; |
221 | bfds->fpos = position; | 231 | bfds->fpos = position; |
232 | bfds->buffer_pos = 0; | ||
222 | rd = read (bfds->fd, bfds->buffer, bfds->buffer_size); | 233 | rd = read (bfds->fd, bfds->buffer, bfds->buffer_size); |
223 | if (rd < 0) | 234 | if (rd < 0) |
224 | return -1; | 235 | return -1; |
@@ -258,7 +269,7 @@ bfds_new (const void *data, | |||
258 | memset (result, 0, sizeof (struct BufferedFileDataSource)); | 269 | memset (result, 0, sizeof (struct BufferedFileDataSource)); |
259 | result->data = (NULL != data) ? data : &result[1]; | 270 | result->data = (NULL != data) ? data : &result[1]; |
260 | result->buffer = (NULL != data) ? NULL : &result[1]; | 271 | result->buffer = (NULL != data) ? NULL : &result[1]; |
261 | result->buffer_size = (NULL != data) ? fsize : xtra; | 272 | result->buffer_size = (NULL != data) ? fsize : xtra; |
262 | result->fsize = fsize; | 273 | result->fsize = fsize; |
263 | result->fd = fd; | 274 | result->fd = fd; |
264 | bfds_pick_next_buffer_at (result, 0); | 275 | bfds_pick_next_buffer_at (result, 0); |
@@ -297,86 +308,95 @@ bfds_seek (struct BufferedFileDataSource *bfds, | |||
297 | switch (whence) | 308 | switch (whence) |
298 | { | 309 | { |
299 | case SEEK_CUR: | 310 | case SEEK_CUR: |
300 | if (NULL != bfds->buffer) | 311 | if (bfds->fpos + bfds->buffer_pos + pos < 0) |
312 | return -1; | ||
313 | if (bfds->fpos + bfds->buffer_pos + pos > bfds->fsize) | ||
314 | return -1; | ||
315 | if ( (NULL == bfds->buffer) || | ||
316 | ( (bfds->buffer_pos + pos < pos->buffer_bytes) && | ||
317 | (bfds->buffer_pos + pos >= 0) ) ) | ||
301 | { | 318 | { |
302 | if (0 != bfds_pick_next_buffer_at (bfds, | 319 | bfds->buffer_pos += pos; |
303 | bfds->fpos + bfds->buffer_pos + pos)) | 320 | return bfds->buffer_pos; |
304 | return -1; | ||
305 | bfds->buffer_pos = 0; | ||
306 | return bfds->fpos; | ||
307 | } | 321 | } |
308 | bfds->buffer_pos += pos; | 322 | if (0 != bfds_pick_next_buffer_at (bfds, |
309 | return bfds->buffer_pos; | 323 | bfds->fpos + bfds->buffer_pos + pos)) |
324 | return -1; | ||
325 | return bfds->fpos; | ||
326 | case SEEK_END: | ||
327 | if (pos > 0) | ||
328 | return -1; | ||
329 | if (bfds->fsize < - pos) | ||
330 | return -1; | ||
331 | pos = bfds->fsize + pos; | ||
332 | /* fall-through! */ | ||
310 | case SEEK_SET: | 333 | case SEEK_SET: |
311 | if (pos < 0) | 334 | if (pos < 0) |
312 | return -1; | 335 | return -1; |
313 | if (NULL != bfds->buffer) | 336 | if (pos > bfds->fsize) |
314 | { | 337 | return -1; |
315 | if (0 != bfds_pick_next_buffer_at (bfds, pos)) | 338 | if ( (NULL == bfds->buffer) || |
316 | return -1; | 339 | ( (bfds->buffer_pos <= pos) && |
317 | bfds->buffer_pos = 0; | 340 | (bfds->buffer_pos + pos->buffer_bytes > pos) ) ) |
318 | return bfds->fpos; | ||
319 | } | ||
320 | bfds->buffer_pos = pos; | ||
321 | return bfds->buffer_pos; | ||
322 | case SEEK_END: | ||
323 | if (NULL != bfds->buffer) | ||
324 | { | 341 | { |
325 | if (0 != bfds_pick_next_buffer_at (bfds, bfds->fsize + pos)) | 342 | bfds->buffer_pos = pos; |
326 | return -1; | 343 | return bfds->buffer_pos; |
327 | bfds->buffer_pos = 0; | ||
328 | return bfds->fpos; | ||
329 | } | 344 | } |
330 | bfds->buffer_pos = bfds->fsize + pos; | 345 | if (0 != bfds_pick_next_buffer_at (bfds, pos)) |
331 | return bfds->buffer_pos; | 346 | return -1; |
347 | return bfds->fpos; | ||
332 | } | 348 | } |
333 | return -1; | 349 | return -1; |
334 | } | 350 | } |
335 | 351 | ||
336 | 352 | ||
337 | /** | 353 | /** |
338 | * Fills 'buf_ptr' with a chunk of data. | 354 | * Fills 'buf_ptr' with a chunk of data. Will |
339 | * Will seek if necessary. Will fail if 'count' exceeds buffer size. | 355 | * fail if 'count' exceeds buffer size. |
340 | * | 356 | * |
341 | * @param bfds bfds | 357 | * @param bfds bfds |
342 | * @param buf_ptr location to store data | 358 | * @param buf_ptr location to store data |
343 | * @param count number of bytes to read | 359 | * @param count number of bytes to read |
344 | * @return number of bytes (<= count) available at location pointed by buf_ptr | 360 | * @return number of bytes (<= count) available at location pointed by buf_ptr, |
361 | * 0 for end of stream, -1 on error | ||
345 | */ | 362 | */ |
346 | static ssize_t | 363 | static ssize_t |
347 | bfds_read (struct BufferedFileDataSource *bfds, | 364 | bfds_read (struct BufferedFileDataSource *bfds, |
348 | void *buf_ptr, | 365 | void *buf_ptr, |
349 | size_t count) | 366 | size_t count) |
350 | { | 367 | { |
351 | if (count > MAX_READ) | 368 | char *cbuf = buf_ptr; |
352 | return -1; | 369 | uint64_t old_off; |
353 | if (count > bfds->buffer_bytes - bfds->buffer_pos) | 370 | size_t avail; |
371 | size_t ret; | ||
372 | |||
373 | old_off = bfds->fpos + bfds->buffer_pos + bfds->buffer_bytes; | ||
374 | if (old_off == bfds->fsize) | ||
375 | return 0; /* end of stream */ | ||
376 | ret = 0; | ||
377 | while (count > 0) | ||
354 | { | 378 | { |
355 | if (bfds->fpos + bfds->buffer_pos != bfds_seek (bfds, bfds->fpos + bfds->buffer_pos, SEEK_SET)) | 379 | if ( (bfds->buffer_bytes == bfds->buffer_pos) && |
356 | return -1; | 380 | (0 != bfds_pick_next_buffer_at (bfds, |
357 | if (NULL != bfds->buffer) | 381 | bfds->fpos + bfds->buffer_pos + bfds->buffer_bytes)) ) |
358 | { | 382 | { |
359 | *buf_ptr = &bfds->buffer[bfds->buffer_pos]; | 383 | /* revert to original position, invalidate buffer */ |
360 | bfds->buffer_pos += count < bfds->buffer_bytes ? count : bfds->buffer_bytes; | 384 | bfds->fpos = old_off; |
361 | return (count < bfds->buffer_bytes ? count : bfds->buffer_bytes); | 385 | bfds->buffer_bytes = 0; |
362 | } | 386 | bfds->buffer_pos = 0; |
363 | else | 387 | return -1; /* getting more failed */ |
364 | { | ||
365 | int64_t ret = count < (bfds->buffer_bytes - bfds->buffer_pos) ? count : (bfds->buffer_bytes - bfds->buffer_pos); | ||
366 | *buf_ptr = (unsigned char*) &bfds->data[bfds->buffer_pos]; | ||
367 | bfds->buffer_pos += ret; | ||
368 | return ret; | ||
369 | } | 388 | } |
389 | avail = bfds->buffer_bytes - bfds->buffer_pos; | ||
390 | if (avail > count) | ||
391 | avail = count; | ||
392 | if (0 == avail) | ||
393 | abort (); /* must not happen */ | ||
394 | memcpy (&cbuf[ret], &bfds->data[bfds->buffer_pos], avail); | ||
395 | bfds->buffer_pos += avail; | ||
396 | count -= avail; | ||
397 | ret += avail; | ||
370 | } | 398 | } |
371 | else | 399 | return ret; |
372 | { | ||
373 | if (NULL != bfds->buffer) | ||
374 | *buf_ptr = &bfds->buffer[bfds->buffer_pos]; | ||
375 | else | ||
376 | *buf_ptr = (unsigned char*) &bfds->data[bfds->buffer_pos]; | ||
377 | bfds->buffer_pos += count; | ||
378 | return count; | ||
379 | } | ||
380 | } | 400 | } |
381 | 401 | ||
382 | 402 | ||
@@ -395,7 +415,7 @@ cfs_delete (struct CompressedFileSource *cfs) | |||
395 | /** | 415 | /** |
396 | * Reset gz-compressed data stream to the beginning. | 416 | * Reset gz-compressed data stream to the beginning. |
397 | * | 417 | * |
398 | * @return 1 on success, 0 if we failed to seek, | 418 | * @return 1 on success, 0 to terminate extraction, |
399 | * -1 on decompressor initialization failure | 419 | * -1 on decompressor initialization failure |
400 | */ | 420 | */ |
401 | static int | 421 | static int |
@@ -422,13 +442,13 @@ cfs_reset_stream_zlib (struct CompressedFileSource *cfs) | |||
422 | #ifdef ZLIB_VERNUM | 442 | #ifdef ZLIB_VERNUM |
423 | 15 + 32 | 443 | 15 + 32 |
424 | #else | 444 | #else |
425 | -MAX_WBITS | 445 | - MAX_WBITS |
426 | #endif | 446 | #endif |
427 | )) | 447 | )) |
428 | { | 448 | { |
429 | return -1; | 449 | return -1; |
430 | } | 450 | } |
431 | cfs->fpos = cfs->gzip_header_length; | 451 | cfs->fpos = 0; |
432 | cfs->shm_pos = 0; | 452 | cfs->shm_pos = 0; |
433 | cfs->shm_buf_size = 0; | 453 | cfs->shm_buf_size = 0; |
434 | return 1; | 454 | return 1; |
@@ -438,7 +458,7 @@ cfs_reset_stream_zlib (struct CompressedFileSource *cfs) | |||
438 | /** | 458 | /** |
439 | * Reset bz2-compressed data stream to the beginning. | 459 | * Reset bz2-compressed data stream to the beginning. |
440 | * | 460 | * |
441 | * @return 1 on success, 0 if we failed to seek, | 461 | * @return 1 on success, 0 to terminate extraction, |
442 | * -1 on decompressor initialization failure | 462 | * -1 on decompressor initialization failure |
443 | */ | 463 | */ |
444 | static int | 464 | static int |
@@ -455,21 +475,21 @@ cfs_reset_stream_bz2 (struct CompressedFileSource *cfs) | |||
455 | * seeking backward. | 475 | * seeking backward. |
456 | * | 476 | * |
457 | * @param cfs cfs to reset | 477 | * @param cfs cfs to reset |
458 | * @return 1 on success, , 0 if we failed to seek, | 478 | * @return 1 on success, 0 to terminate extraction, |
459 | * -1 on error | 479 | * -1 on error |
460 | */ | 480 | */ |
461 | static int | 481 | static int |
462 | cfs_reset_stream (struct CompressedFileSource *cfs) | 482 | cfs_reset_stream (struct CompressedFileSource *cfs) |
463 | { | 483 | { |
464 | switch (cfs->compression_type) | 484 | switch (cfs->compression_type) |
465 | { | 485 | { |
466 | case COMP_TYPE_ZLIB: | 486 | case COMP_TYPE_ZLIB: |
467 | return cfs_reset_stream_zlib (cfs); | 487 | return cfs_reset_stream_zlib (cfs); |
468 | case COMP_TYPE_BZ2: | 488 | case COMP_TYPE_BZ2: |
469 | return cfs_reset_stream_bz2 (cfs); | 489 | return cfs_reset_stream_bz2 (cfs); |
470 | default: | 490 | default: |
471 | return -1; | 491 | return -1; |
472 | } | 492 | } |
473 | } | 493 | } |
474 | 494 | ||
475 | 495 | ||
@@ -480,98 +500,75 @@ cfs_reset_stream (struct CompressedFileSource *cfs) | |||
480 | * @param cfs cfs to initialize | 500 | * @param cfs cfs to initialize |
481 | * @param proc callback for metadata | 501 | * @param proc callback for metadata |
482 | * @param proc_cls callback cls | 502 | * @param proc_cls callback cls |
483 | * @return 1 on success, -1 on error | 503 | * @return 1 on success, 0 to terminate extraction, -1 on error |
484 | */ | 504 | */ |
485 | static int | 505 | static int |
486 | cfs_init_decompressor_zlib (struct CompressedFileSource *cfs, | 506 | cfs_init_decompressor_zlib (struct CompressedFileSource *cfs, |
487 | EXTRACTOR_MetaDataProcessor proc, void *proc_cls) | 507 | EXTRACTOR_MetaDataProcessor proc, void *proc_cls) |
488 | { | 508 | { |
489 | /* Process gzip header */ | ||
490 | unsigned int gzip_header_length = 10; | 509 | unsigned int gzip_header_length = 10; |
491 | unsigned char data[12]; | 510 | unsigned char hdata[12]; |
492 | int64_t buf_bytes; | ||
493 | int len; | ||
494 | unsigned char *buf; | ||
495 | unsigned char *cptr; | ||
496 | |||
497 | if (sizeof (data) > bfds_read (cfs->bfds, data, sizeof (data))) | ||
498 | return -1; | ||
499 | |||
500 | if (0 != (data[3] & 0x4)) /* FEXTRA set */ | ||
501 | gzip_header_length += 2 + (unsigned) (data[10] & 0xff) + | ||
502 | (((unsigned) (data[11] & 0xff)) * 256); | ||
503 | 511 | ||
504 | if (0 != (data[3] & 0x8)) /* FNAME set */ | 512 | /* Process gzip header */ |
505 | { | 513 | if (sizeof (hdata) > bfds_read (cfs->bfds, hdata, sizeof (hdata))) |
506 | if (gzip_header_length > bfds_seek (cfs->bfds, gzip_header_length, SEEK_SET)) | 514 | return -1; |
507 | return -1; | 515 | if (0 != (hdata[3] & 0x4)) /* FEXTRA set */ |
508 | buf_bytes = bfds_read (cfs->bfds, &buf, 1024); | 516 | gzip_header_length += 2 + (unsigned) (hdata[10] & 0xff) + |
509 | if (buf_bytes <= 0) | 517 | (((unsigned) (hdata[11] & 0xff)) * 256); |
510 | return -1; | ||
511 | cptr = buf; | ||
512 | 518 | ||
513 | len = 0; | 519 | if (0 != (hdata[3] & 0x8)) |
514 | /* stored file name is here */ | ||
515 | while (len < buf_bytes) | ||
516 | { | 520 | { |
517 | if ('\0' == *cptr) | 521 | /* FNAME set */ |
518 | break; | 522 | char fname[1024]; |
519 | cptr++; | 523 | char *cptr; |
520 | len++; | 524 | size_t len; |
525 | ssize_t buf_bytes; | ||
526 | |||
527 | if (gzip_header_length > bfds_seek (cfs->bfds, gzip_header_length, SEEK_SET)) | ||
528 | return -1; | ||
529 | buf_bytes = bfds_read (cfs->bfds, fname, sizeof (fname)); | ||
530 | if (buf_bytes <= 0) | ||
531 | return -1; | ||
532 | if (NULL == (cptr = memchr (fname, 0, buf_bytes))) | ||
533 | return -1; | ||
534 | len = cptr - fname; | ||
535 | if (0 != proc (proc_cls, "<zlib>", EXTRACTOR_METATYPE_FILENAME, | ||
536 | EXTRACTOR_METAFORMAT_C_STRING, "text/plain", | ||
537 | fname, | ||
538 | len)) | ||
539 | return 0; /* done */ | ||
540 | gzip_header_length += len + 1; | ||
521 | } | 541 | } |
522 | 542 | ||
523 | if (0 != proc (proc_cls, "<zlib>", EXTRACTOR_METATYPE_FILENAME, | 543 | if (0 != (hdata[3] & 0x16)) |
524 | EXTRACTOR_METAFORMAT_C_STRING, "text/plain", | 544 | { |
525 | (const char *) buf, | 545 | /* FCOMMENT set */ |
526 | len)) | 546 | char fcomment[1024]; |
527 | return 0; /* done */ | 547 | char *cptr; |
528 | 548 | ssize_t buf_bytes; | |
529 | /* FIXME: check for correctness */ | 549 | size_t len; |
530 | //gzip_header_length = (cptr - data) + 1; | 550 | |
531 | gzip_header_length += len + 1; | 551 | if (gzip_header_length > bfds_seek (cfs->bfds, gzip_header_length, SEEK_SET)) |
532 | } | 552 | return -1; |
533 | 553 | buf_bytes = bfds_read (cfs->bfds, fcomment, sizeof (fcomment)); | |
534 | if (0 != (data[3] & 0x16)) /* FCOMMENT set */ | 554 | if (buf_bytes <= 0) |
535 | { | 555 | return -1; |
536 | int64_t buf_bytes; | 556 | if (NULL == (cptr = memchr (fcomment, 0, buf_bytes))) |
537 | int len; | 557 | return -1; |
538 | unsigned char *buf; | 558 | len = cptr - fcomment; |
539 | unsigned char *cptr; | 559 | if (0 != proc (proc_cls, "<zlib>", EXTRACTOR_METATYPE_COMMENT, |
540 | 560 | EXTRACTOR_METAFORMAT_C_STRING, "text/plain", | |
541 | if (gzip_header_length > bfds_seek (cfs->bfds, gzip_header_length, SEEK_SET)) | 561 | (const char *) fcomment, |
542 | return -1; | 562 | len)) |
543 | buf_bytes = bfds_read (cfs->bfds, &buf, 1024); | 563 | return 0; /* done */ |
544 | if (buf_bytes <= 0) | 564 | gzip_header_length += len + 1; |
545 | return -1; | 565 | } |
546 | cptr = buf; | 566 | if (0 != (hdata[3] & 0x2)) /* FCHRC set */ |
547 | |||
548 | len = 0; | ||
549 | /* stored file name is here */ | ||
550 | while (len < buf_bytes) | ||
551 | { | ||
552 | if ('\0' == *cptr) | ||
553 | break; | ||
554 | cptr++; | ||
555 | len++; | ||
556 | } | ||
557 | |||
558 | if (0 != proc (proc_cls, "<zlib>", EXTRACTOR_METATYPE_COMMENT, | ||
559 | EXTRACTOR_METAFORMAT_C_STRING, "text/plain", | ||
560 | (const char *) buf, | ||
561 | len)) | ||
562 | return 0; /* done */ | ||
563 | |||
564 | /* FIXME: check for correctness */ | ||
565 | //gzip_header_length = (cptr - data) + 1; | ||
566 | gzip_header_length += len + 1; | ||
567 | } | ||
568 | |||
569 | if (data[3] & 0x2) /* FCHRC set */ | ||
570 | gzip_header_length += 2; | 567 | gzip_header_length += 2; |
571 | |||
572 | memset (&cfs->strm, 0, sizeof (z_stream)); | 568 | memset (&cfs->strm, 0, sizeof (z_stream)); |
573 | 569 | ||
574 | #ifdef ZLIB_VERNUM | 570 | #ifdef ZLIB_VERNUM |
571 | /* zlib will take care of its header */ | ||
575 | gzip_header_length = 0; | 572 | gzip_header_length = 0; |
576 | #endif | 573 | #endif |
577 | 574 | ||
@@ -604,7 +601,7 @@ cfs_init_decompressor_bz2 (struct CompressedFileSource *cfs, | |||
604 | * @param cfs cfs to initialize | 601 | * @param cfs cfs to initialize |
605 | * @param proc callback for metadata | 602 | * @param proc callback for metadata |
606 | * @param proc_cls callback cls | 603 | * @param proc_cls callback cls |
607 | * @return 1 on success, -1 on error | 604 | * @return 1 on success, 0 to terminate extraction, -1 on error |
608 | */ | 605 | */ |
609 | static int | 606 | static int |
610 | cfs_init_decompressor (struct CompressedFileSource *cfs, | 607 | cfs_init_decompressor (struct CompressedFileSource *cfs, |
@@ -671,12 +668,25 @@ cfs_deinit_decompressor (struct CompressedFileSource *cfs) | |||
671 | 668 | ||
672 | 669 | ||
673 | /** | 670 | /** |
671 | * Destroy compressed file source. | ||
672 | * | ||
673 | * @param cfs source to destroy | ||
674 | */ | ||
675 | static void | ||
676 | cfs_destroy (struct CompressedFileSource *cfs) | ||
677 | { | ||
678 | cfs_deinit_decompressor (cfs); | ||
679 | free (cfs); | ||
680 | } | ||
681 | |||
682 | |||
683 | /** | ||
674 | * Allocates and initializes new cfs object. | 684 | * Allocates and initializes new cfs object. |
675 | * | 685 | * |
676 | * @param bfds data source to use | 686 | * @param bfds data source to use |
677 | * @param fsize size of the source | 687 | * @param fsize size of the source |
678 | * @param compression_type type of compression used | 688 | * @param compression_type type of compression used |
679 | * @param proc metadata callback | 689 | * @param proc metadata callback to call with meta data found upon opening |
680 | * @param proc_cls callback cls | 690 | * @param proc_cls callback cls |
681 | * @return newly allocated cfs on success, NULL on error | 691 | * @return newly allocated cfs on success, NULL on error |
682 | */ | 692 | */ |
@@ -686,7 +696,6 @@ cfs_new (struct BufferedFileDataSource *bfds, | |||
686 | enum ExtractorCompressionType compression_type, | 696 | enum ExtractorCompressionType compression_type, |
687 | EXTRACTOR_MetaDataProcessor proc, void *proc_cls) | 697 | EXTRACTOR_MetaDataProcessor proc, void *proc_cls) |
688 | { | 698 | { |
689 | int shm_result; | ||
690 | struct CompressedFileSource *cfs; | 699 | struct CompressedFileSource *cfs; |
691 | 700 | ||
692 | if (NULL == (cfs = malloc (sizeof (struct CompressedFileSource)))) | 701 | if (NULL == (cfs = malloc (sizeof (struct CompressedFileSource)))) |
@@ -696,93 +705,118 @@ cfs_new (struct BufferedFileDataSource *bfds, | |||
696 | cfs->bfds = bfds; | 705 | cfs->bfds = bfds; |
697 | cfs->fsize = fsize; | 706 | cfs->fsize = fsize; |
698 | cfs->uncompressed_size = -1; | 707 | cfs->uncompressed_size = -1; |
708 | if (1 != cfs_init_decompressor (cfs, | ||
709 | proc, proc_cls)) | ||
710 | { | ||
711 | free (cfs); | ||
712 | return NULL; | ||
713 | } | ||
699 | return cfs; | 714 | return cfs; |
700 | } | 715 | } |
701 | 716 | ||
702 | 717 | ||
703 | /** | 718 | /** |
704 | * Data is read from the source and shoved into decompressor | 719 | * Fills 'data' with new uncompressed data. Does the actual |
705 | * in chunks this big. | 720 | * decompression. Will set uncompressed_size on the end of compressed |
706 | */ | 721 | * stream. |
707 | #define COM_CHUNK_SIZE (10*1024) | ||
708 | |||
709 | |||
710 | /** | ||
711 | * Re-fills shm with new uncompressed data, preserving the last | ||
712 | * 'preserve' bytes of existing data as the first 'preserve' bytes | ||
713 | * of the new data. | ||
714 | * Does the actual decompression. Will set uncompressed_size on | ||
715 | * the end of compressed stream. | ||
716 | * | 722 | * |
717 | * @param cfds cfs to read from | 723 | * @param cfds cfs to read from |
718 | * @param preserve number of bytes to preserve (0 to discard all old data) | 724 | * @param data where to copy the data |
719 | * @return number of bytes in shm. 0 if no more data can be uncompressed, -1 on error | 725 | * @param size number of bytes available in data |
726 | * @return number of bytes in data. 0 if no more data can be uncompressed, -1 on error | ||
720 | */ | 727 | */ |
721 | static int | 728 | static ssize_t |
722 | cfs_read_zlib (struct CompressedFileSource *cfs, int64_t preserve) | 729 | cfs_read_zlib (struct CompressedFileSource *cfs, |
730 | void *data, | ||
731 | size_t size) | ||
723 | { | 732 | { |
733 | char *dst = data; | ||
724 | int ret; | 734 | int ret; |
725 | int64_t rc = preserve; | 735 | size_t rc; |
726 | int64_t total = cfs->strm.total_out; | 736 | ssize_t in; |
737 | char buf[COM_CHUNK_SIZE]; | ||
727 | 738 | ||
728 | if (preserve > 0) | 739 | if (cfs->fpos == cfs->uncompressed_size) |
729 | memmove (cfs->shm_ptr, &((unsigned char *)cfs->shm_ptr)[0], preserve); | 740 | return 0; |
730 | 741 | rc = 0; | |
731 | while (rc < cfs->shm_size && ret != Z_STREAM_END) | 742 | if (strm.avail_out > 0) |
732 | { | 743 | { |
733 | if (cfs->strm.avail_in == 0) | 744 | /* got left-over decompressed data from previous round! */ |
745 | in = strm.avail_out; | ||
746 | if (in > size) | ||
747 | in = size; | ||
748 | memcpy (&dst[rc], &cfs->result[cfs->result_pos], in); | ||
749 | cfs->fpos += in; | ||
750 | cfs->result_pos += in; | ||
751 | rc += in; | ||
752 | } | ||
753 | ret = Z_OK; | ||
754 | while ( (rc < size) && (Z_STREAM_END != ret) ) | ||
734 | { | 755 | { |
735 | int64_t count = bfds_read (cfs->bfds, &cfs->strm.next_in, COM_CHUNK_SIZE); | 756 | /* read block from original data source */ |
736 | if (count <= 0) | 757 | in = bfds_read (cfs->bfds, |
737 | return 0; | 758 | buf, sizeof (buf)); |
759 | if (in <= 0) | ||
760 | return -1; /* unexpected EOF */ | ||
761 | cfs->strm.next_in = buf; | ||
738 | cfs->strm.avail_in = (uInt) count; | 762 | cfs->strm.avail_in = (uInt) count; |
763 | cfs->strm.next_out = cfs->result; | ||
764 | cfs->strm.avail_out = COM_CHUNK_SIZE; | ||
765 | cfs->result_pos = 0; | ||
766 | ret = inflate (&cfs->strm, Z_SYNC_FLUSH); | ||
767 | if ( (Z_OK != ret) && (Z_STREAM_END != ret) ) | ||
768 | return -1; /* unexpected error */ | ||
769 | /* go backwards by the number of bytes left in the buffer */ | ||
770 | if (-1 == bfds_seek (cfs->bfds, - cfs->strm.avail_in, SEEK_CUR)) | ||
771 | return -1; | ||
772 | /* copy decompressed bytes to target buffer */ | ||
773 | in = cfs->strm.total_out; | ||
774 | if (in > size - rc) | ||
775 | in = size - rc; | ||
776 | memcpy (&dst[rc], &cfs->result[cfs->result_pos], in); | ||
777 | cfs->fpos += in; | ||
778 | cfs->result_pos += in; | ||
779 | rc += in; | ||
739 | } | 780 | } |
740 | cfs->strm.next_out = &((unsigned char *)cfs->shm_ptr)[rc]; | 781 | if (Z_STREAM_END == ret) |
741 | cfs->strm.avail_out = cfs->shm_size - rc; | 782 | cfs->uncompressed_size = cfs->fpos; |
742 | ret = inflate (&cfs->strm, Z_SYNC_FLUSH); | 783 | return rc; |
743 | if (ret != Z_OK && ret != Z_STREAM_END) | ||
744 | return 0; | ||
745 | rc = cfs->strm.total_out - total; | ||
746 | } | ||
747 | if (ret == Z_STREAM_END) | ||
748 | cfs->uncompressed_size = cfs->strm.total_out; | ||
749 | cfs->shm_pos = preserve; | ||
750 | cfs->shm_buf_size = rc + preserve; | ||
751 | return 1; | ||
752 | } | 784 | } |
753 | 785 | ||
754 | 786 | ||
755 | /** | 787 | /** |
756 | * Re-fills shm with new uncompressed data, preserving the last | 788 | * Fills 'data' with new uncompressed data. Does the actual |
757 | * 'preserve' bytes of existing data as the first 'preserve' bytes | 789 | * decompression. Will set uncompressed_size on the end of compressed |
758 | * of the new data. | 790 | * stream. |
759 | * Does the actual decompression. Will set uncompressed_size on | ||
760 | * the end of compressed stream. | ||
761 | * | 791 | * |
762 | * @param cfds cfs to read from | 792 | * @param cfds cfs to read from |
763 | * @param preserve number of bytes to preserve (0 to discard all old data) | 793 | * @param data where to copy the data |
764 | * @return number of bytes in shm. 0 if no more data can be uncompressed, -1 on error | 794 | * @param size number of bytes available in data |
795 | * @return number of bytes in data. 0 if no more data can be uncompressed, -1 on error | ||
765 | */ | 796 | */ |
766 | static int | 797 | static ssize_t |
767 | cfs_read_bz2 (struct CompressedFileSource *cfs, int64_t preserve) | 798 | cfs_read_bz2 (struct CompressedFileSource *cfs, |
799 | void *data, | ||
800 | size_t size) | ||
768 | { | 801 | { |
769 | return -1; | 802 | return -1; |
770 | } | 803 | } |
771 | 804 | ||
772 | 805 | ||
773 | /** | 806 | /** |
774 | * Re-fills shm with new uncompressed data, preserving the last | 807 | * Fills 'data' with new uncompressed data. Does the actual |
775 | * 'preserve' bytes of existing data as the first 'preserve' bytes | 808 | * decompression. Will set uncompressed_size on the end of compressed |
776 | * of the new data. | 809 | * stream. |
777 | * Does the actual decompression. Will set uncompressed_size on | ||
778 | * the end of compressed stream. | ||
779 | * | 810 | * |
780 | * @param cfds cfs to read from | 811 | * @param cfds cfs to read from |
781 | * @param preserve number of bytes to preserve (0 to discard all old data) | 812 | * @param data where to copy the data |
782 | * @return number of bytes in shm. 0 if no more data can be uncompressed, -1 on error | 813 | * @param size number of bytes available in data |
814 | * @return number of bytes in data. 0 if no more data can be uncompressed, -1 on error | ||
783 | */ | 815 | */ |
784 | static int64_t | 816 | static ssize_t |
785 | cfs_read (struct CompressedFileSource *cfs, int64_t preserve) | 817 | cfs_read (struct CompressedFileSource *cfs, |
818 | void *data, | ||
819 | size_t size) | ||
786 | { | 820 | { |
787 | switch (cfs->compression_type) | 821 | switch (cfs->compression_type) |
788 | { | 822 | { |
@@ -801,72 +835,44 @@ cfs_read (struct CompressedFileSource *cfs, int64_t preserve) | |||
801 | * requires seeking backwards beyond the boundaries of the buffer, resets the | 835 | * requires seeking backwards beyond the boundaries of the buffer, resets the |
802 | * stream and repeats decompression from the beginning to 'position'. | 836 | * stream and repeats decompression from the beginning to 'position'. |
803 | * | 837 | * |
804 | * @param cfds cfs to seek on | 838 | * @param cfs cfs to seek on |
805 | * @param position new starting point for the buffer | ||
806 | * @return new absolute buffer position, -1 on error or EOS | ||
807 | */ | ||
808 | static int64_t | ||
809 | cfs_seek_zlib (struct CompressedFileSource *cfs, int64_t position) | ||
810 | { | ||
811 | int64_t ret; | ||
812 | |||
813 | if (position > cfs->strm.total_out - cfs->shm_buf_size && position < cfs->strm.total_out) | ||
814 | { | ||
815 | ret = cfs_read (cfs, cfs->strm.total_out - position); | ||
816 | if (ret < 0) | ||
817 | return ret; | ||
818 | return position; | ||
819 | } | ||
820 | while (position >= cfs->strm.total_out) | ||
821 | { | ||
822 | if (0 > (ret = cfs_read (cfs, 0))) | ||
823 | return ret; | ||
824 | if (ret == 0) | ||
825 | return position; | ||
826 | } | ||
827 | if (position < cfs->strm.total_out && position > cfs->strm.total_out - cfs->shm_buf_size) | ||
828 | return cfs->strm.total_out - cfs->shm_buf_size; | ||
829 | return -1; | ||
830 | } | ||
831 | |||
832 | |||
833 | /** | ||
834 | * Moves the buffer to 'position' in uncompressed steam. If position | ||
835 | * requires seeking backwards beyond the boundaries of the buffer, resets the | ||
836 | * stream and repeats decompression from the beginning to 'position'. | ||
837 | * | ||
838 | * @param cfds cfs to seek on | ||
839 | * @param position new starting point for the buffer | 839 | * @param position new starting point for the buffer |
840 | * @return new absolute buffer position, -1 on error or EOS | 840 | * @return new absolute buffer position, -1 on error or EOS |
841 | */ | 841 | */ |
842 | static int64_t | 842 | static int64_t |
843 | cfs_seek_bz2 (struct CompressedFileSource *cfs, int64_t position) | 843 | cfs_seek (struct CompressedFileSource *cfs, |
844 | uint64_t position) | ||
844 | { | 845 | { |
845 | return -1; | 846 | int64_t delta; |
846 | } | 847 | |
847 | 848 | delta = position - cfs->fpos; | |
848 | 849 | if (delta < 0) | |
849 | /** | ||
850 | * Moves the buffer to 'position' in uncompressed steam. If position | ||
851 | * requires seeking backwards beyond the boundaries of the buffer, resets the | ||
852 | * stream and repeats decompression from the beginning to 'position'. | ||
853 | * | ||
854 | * @param cfds cfs to seek on | ||
855 | * @param position new starting point for the buffer | ||
856 | * @return new absolute buffer position, -1 on error or EOS | ||
857 | */ | ||
858 | static int64_t | ||
859 | cfs_seek (struct CompressedFileSource *cfs, int64_t position) | ||
860 | { | ||
861 | switch (cfs->compression_type) | ||
862 | { | 850 | { |
863 | case COMP_TYPE_ZLIB: | 851 | if (result_pos >= - delta) |
864 | return cfs_seek_zlib (cfs, position); | 852 | { |
865 | case COMP_TYPE_BZ2: | 853 | result_pos += delta; |
866 | return cfs_seek_bz2 (cfs, position); | 854 | delta = 0; |
867 | default: | 855 | } |
868 | return -1; | 856 | else |
857 | { | ||
858 | if (-1 == cfs_reset_stream (cfs)) | ||
859 | return -1; | ||
860 | delta = position; | ||
861 | } | ||
862 | } | ||
863 | while (delta > 0) | ||
864 | { | ||
865 | char buf[COM_CHUNK_SIZE]; | ||
866 | size_t max; | ||
867 | int64_t ret; | ||
868 | |||
869 | max = (sizeof (buf) > delta) ? delta : sizeof (buf); | ||
870 | ret = cfs_read (cfs, buf, max); | ||
871 | if (-1 == ret) | ||
872 | return -1; | ||
873 | delta -= ret; | ||
869 | } | 874 | } |
875 | return cfs->fpos; | ||
870 | } | 876 | } |
871 | 877 | ||
872 | 878 | ||
@@ -879,163 +885,239 @@ cfs_seek (struct CompressedFileSource *cfs, int64_t position) | |||
879 | * @return -1 to indicate an error, 0 to indicate uncompressed data, or a type (> 0) of compression | 885 | * @return -1 to indicate an error, 0 to indicate uncompressed data, or a type (> 0) of compression |
880 | */ | 886 | */ |
881 | static enum ExtractorCompressionType | 887 | static enum ExtractorCompressionType |
882 | get_compression_type (const unsigned char *data, | 888 | get_compression_type (struct BufferedFileDataSource *bfds) |
883 | int fd, | ||
884 | int64_t fsize) | ||
885 | { | 889 | { |
886 | void *read_data = NULL; | 890 | unsigned char read_data[3]; |
887 | size_t read_data_size = 0; | ||
888 | ssize_t read_result; | ||
889 | enum ExtractorCompressionType result = COMP_TYPE_INVALID; | ||
890 | 891 | ||
891 | if ((MIN_COMPRESSED_HEADER < 0) || (fsize < MIN_COMPRESSED_HEADER)) | 892 | if (0 != bfds_seek (bfds, 0, SEEK_SET)) |
892 | { | ||
893 | return COMP_TYPE_INVALID; | 893 | return COMP_TYPE_INVALID; |
894 | } | 894 | if (sizeof (read_data) != |
895 | if (data == NULL) | 895 | bfds_read (bfds, read_data, sizeof (read_data))) |
896 | { | 896 | return COMP_TYPE_UNDEFINED; |
897 | int64_t position; | 897 | |
898 | read_data_size = COMPRESSED_DATA_PROBE_SIZE; | ||
899 | read_data = malloc (read_data_size); | ||
900 | if (read_data == NULL) | ||
901 | return -1; | ||
902 | #if WINDOWS | ||
903 | position = _lseeki64 (fd, 0, SEEK_CUR); | ||
904 | #elif HAVE_LSEEK64 | ||
905 | position = lseek64 (fd, 0, SEEK_CUR); | ||
906 | #else | ||
907 | position = (int64_t) lseek (fd, 0, SEEK_CUR); | ||
908 | #endif | ||
909 | read_result = READ (fd, read_data, read_data_size); | ||
910 | #if WINDOWS | ||
911 | position = _lseeki64 (fd, position, SEEK_SET); | ||
912 | #elif HAVE_LSEEK64 | ||
913 | position = lseek64 (fd, position, SEEK_SET); | ||
914 | #else | ||
915 | position = lseek (fd, (off_t) position, SEEK_SET); | ||
916 | #endif | ||
917 | if (read_result != read_data_size) | ||
918 | { | ||
919 | free (read_data); | ||
920 | return COMP_TYPE_UNDEFINED; | ||
921 | } | ||
922 | data = (const void *) read_data; | ||
923 | } | ||
924 | #if HAVE_ZLIB | 898 | #if HAVE_ZLIB |
925 | if ((fsize >= MIN_ZLIB_HEADER) && (data[0] == 0x1f) && (data[1] == 0x8b) && (data[2] == 0x08)) | 899 | if ( (bdfs->fsize >= MIN_ZLIB_HEADER) && |
926 | result = COMP_TYPE_ZLIB; | 900 | (data[0] == 0x1f) && |
901 | (data[1] == 0x8b) && | ||
902 | (data[2] == 0x08) ) | ||
903 | return COMP_TYPE_ZLIB; | ||
927 | #endif | 904 | #endif |
928 | #if HAVE_LIBBZ2 | 905 | #if HAVE_LIBBZ2 |
929 | if ((fsize >= MIN_BZ2_HEADER) && (data[0] == 'B') && (data[1] == 'Z') && (data[2] == 'h')) | 906 | if ( (bdfs->fsize >= MIN_BZ2_HEADER) && |
930 | result = COMP_TYPE_BZ2; | 907 | (data[0] == 'B') && |
908 | (data[1] == 'Z') && | ||
909 | (data[2] == 'h')) | ||
910 | return COMP_TYPE_BZ2; | ||
931 | #endif | 911 | #endif |
932 | if (read_data != NULL) | 912 | return COMP_TYPE_INVALID; |
933 | free (read_data); | ||
934 | return result; | ||
935 | } | 913 | } |
936 | 914 | ||
937 | 915 | ||
938 | #if 0 | 916 | /** |
939 | 917 | * Handle to a datasource we can use for the plugins. | |
940 | enum ExtractorCompressionType compression_type = -1; | 918 | */ |
941 | struct CompressedFileSource *cfs = NULL; | 919 | struct EXTRACTOR_Datasource |
942 | int fd = -1; | 920 | { |
943 | struct stat64 fstatbuf; | 921 | |
944 | int64_t fsize = 0; | 922 | /** |
945 | 923 | * Underlying buffered data source. | |
946 | /* If data is not given, then we need to read it from the file. Try opening it */ | 924 | */ |
947 | if ((data == NULL) && | 925 | struct BufferedFileDataSource *bfds; |
948 | (filename != NULL) && | ||
949 | (0 == STAT64(filename, &fstatbuf)) && | ||
950 | (!S_ISDIR(fstatbuf.st_mode)) && | ||
951 | (-1 != (fd = file_open (filename, | ||
952 | O_RDONLY | O_LARGEFILE)))) | ||
953 | { | ||
954 | /* Empty files are of no interest */ | ||
955 | fsize = fstatbuf.st_size; | ||
956 | if (fsize == 0) | ||
957 | { | ||
958 | close(fd); | ||
959 | return; | ||
960 | } | ||
961 | } | ||
962 | |||
963 | /* Data is not given, and we've failed to open the file with data -> exit */ | ||
964 | if ((fsize == 0) && (data == NULL)) | ||
965 | return; | ||
966 | /* fsize is now size of the data OR size of the file */ | ||
967 | if (data != NULL) | ||
968 | fsize = size; | ||
969 | |||
970 | errno = 0; | ||
971 | |||
972 | /* Peek at first few bytes of the file (or of the data), and see if it's compressed. */ | ||
973 | compression_type = get_compression_type (data, fd, fsize); | ||
974 | if (compression_type < 0) | ||
975 | { | ||
976 | /* errno is set by get_compression_type () */ | ||
977 | if (fd != -1) | ||
978 | close (fd); | ||
979 | return; | ||
980 | } | ||
981 | 926 | ||
927 | /** | ||
928 | * Compressed file source (NULL if not applicable). | ||
929 | */ | ||
930 | struct CompressedFileSource *cfs; | ||
931 | |||
932 | /** | ||
933 | * Underlying file descriptor, -1 for none. | ||
934 | */ | ||
935 | int fd; | ||
936 | }; | ||
937 | |||
938 | |||
939 | /** | ||
940 | * Create a datasource from a file on disk. | ||
941 | * | ||
942 | * @param filename name of the file on disk | ||
943 | * @param proc metadata callback to call with meta data found upon opening | ||
944 | * @param proc_cls callback cls | ||
945 | * @return handle to the datasource, NULL on error | ||
946 | */ | ||
947 | struct EXTRACTOR_Datasource * | ||
948 | EXTRACTOR_datasource_create_from_file_ (const char *filename, | ||
949 | EXTRACTOR_MetaDataProcessor proc, | ||
950 | void *proc_cls) | ||
951 | { | ||
982 | struct BufferedFileDataSource *bfds; | 952 | struct BufferedFileDataSource *bfds; |
983 | bfds = bfds_new (data, fd, fsize); | 953 | struct EXTRACTOR_Datasource *ds; |
984 | if (bfds == NULL) | 954 | enum ExtractorCompressionType ct; |
985 | return; | 955 | int fd; |
986 | 956 | struct stat sb; | |
987 | if (compression_type > 0) | 957 | int64_t fsize; |
988 | { | 958 | |
989 | int icr = 0; | 959 | if (-1 == (fd = open (filename, O_RDONLY | O_LARGEFILE))) |
990 | /* Set up a decompressor. | 960 | return NULL; |
991 | * Will also report compression-related metadata to the caller. | 961 | if ( (0 != fstat (fd, &sb)) || |
992 | */ | 962 | (S_ISDIR (fstatbuf.st_mode)) ) |
993 | cfs = cfs_new (bfds, fsize, compression_type, proc, proc_cls); | 963 | { |
994 | if (cfs == NULL) | 964 | (void) close (fd); |
965 | return NULL; | ||
966 | } | ||
967 | fsize = (int64_t) sb.st_size; | ||
968 | if (0 == fsize) | ||
969 | { | ||
970 | (void) close (fd); | ||
971 | return NULL; | ||
972 | } | ||
973 | bfds = bfds_new (NULL, fd, fsize); | ||
974 | if (NULL == bfds) | ||
995 | { | 975 | { |
996 | if (fd != -1) | 976 | (void) close (fd); |
997 | close (fd); | 977 | return NULL; |
998 | errno = EILSEQ; | ||
999 | return; | ||
1000 | } | 978 | } |
1001 | icr = cfs_init_decompressor (cfs, proc, proc_cls); | 979 | if (NULL == (ds = malloc (sizeof (struct EXTRACTOR_Datasource)))) |
1002 | if (icr < 0) | ||
1003 | { | 980 | { |
1004 | if (fd != -1) | 981 | bfds_delete (bfds); |
1005 | close (fd); | 982 | return NULL; |
1006 | errno = EILSEQ; | ||
1007 | return; | ||
1008 | } | 983 | } |
1009 | else if (icr == 0) | 984 | ds->bfds = bfds; |
985 | ds->fd; | ||
986 | ct = get_compression_type (bfds); | ||
987 | if ( (COMP_TYPE_ZLIB == ct) || | ||
988 | (COMP_TYPE_BZ2 == ct) ) | ||
989 | ds->cfs = cfs_new (bfds, fsize, ct, proc, proc_cls); | ||
990 | if (NULL == ds->cfs) | ||
1010 | { | 991 | { |
1011 | if (fd != -1) | 992 | bfds_delete (bfds); |
1012 | close (fd); | 993 | free (ds); |
1013 | errno = 0; | 994 | (void) close (fd); |
1014 | return; | 995 | return NULL; |
1015 | } | 996 | } |
1016 | } | 997 | return ds; |
998 | } | ||
1017 | 999 | ||
1018 | 1000 | ||
1019 | #endif | 1001 | /** |
1002 | * Create a datasource from a buffer in memory. | ||
1003 | * | ||
1004 | * @param buf data in memory | ||
1005 | * @param size number of bytes in 'buf' | ||
1006 | * @param proc metadata callback to call with meta data found upon opening | ||
1007 | * @param proc_cls callback cls | ||
1008 | * @return handle to the datasource | ||
1009 | */ | ||
1010 | struct EXTRACTOR_Datasource * | ||
1011 | EXTRACTOR_datasource_create_from_buffer_ (const char *buf, | ||
1012 | size_t size, | ||
1013 | EXTRACTOR_MetaDataProcessor proc, void *proc_cls) | ||
1014 | { | ||
1015 | struct BufferedFileDataSource *bfds; | ||
1016 | struct EXTRACTOR_Datasource *ds; | ||
1017 | enum ExtractorCompressionType ct; | ||
1020 | 1018 | ||
1019 | if (0 == size) | ||
1020 | return NULL; | ||
1021 | if (NULL == (bfds = bfds_new (buf, -1, size))) | ||
1022 | return NULL; | ||
1023 | if (NULL == (ds = malloc (sizeof (struct EXTRACTOR_Datasource)))) | ||
1024 | { | ||
1025 | bfds_delete (bfds); | ||
1026 | return NULL; | ||
1027 | } | ||
1028 | ds->bfds = bfds; | ||
1029 | ds->fd; | ||
1030 | ct = get_compression_type (bfds); | ||
1031 | if ( (COMP_TYPE_ZLIB == ct) || | ||
1032 | (COMP_TYPE_BZ2 == ct) ) | ||
1033 | ds->cfs = cfs_new (bfds, fsize, ct, proc, proc_cls); | ||
1034 | if (NULL == ds->cfs) | ||
1035 | { | ||
1036 | bfds_delete (bfds); | ||
1037 | free (ds); | ||
1038 | return NULL; | ||
1039 | } | ||
1040 | return ds; | ||
1041 | } | ||
1021 | 1042 | ||
1022 | 1043 | ||
1023 | /** | 1044 | /** |
1024 | * Destroy a data source. | 1045 | * Destroy a data source. |
1025 | * | 1046 | * |
1026 | * @param datasource source to destroy | 1047 | * @param ds source to destroy |
1027 | */ | 1048 | */ |
1028 | void | 1049 | void |
1029 | EXTRACTOR_datasource_destroy_ (struct EXTRACTOR_Datasource *datasource) | 1050 | EXTRACTOR_datasource_destroy_ (struct EXTRACTOR_Datasource *ds) |
1051 | { | ||
1052 | if (NULL != ds->cfs) | ||
1053 | cfs_destroy (ds->cfs); | ||
1054 | bfds_delete (ds->bfds); | ||
1055 | if (-1 != ds->fd) | ||
1056 | (void) close (ds->fd); | ||
1057 | free (ds); | ||
1058 | } | ||
1059 | |||
1060 | |||
1061 | /** | ||
1062 | * Make 'size' bytes of data from the data source available at 'data'. | ||
1063 | * | ||
1064 | * @param cls must be a 'struct EXTRACTOR_Datasource' | ||
1065 | * @param data where the data should be copied to | ||
1066 | * @param size maximum number of bytes requested | ||
1067 | * @return number of bytes now available in data (can be smaller than 'size'), | ||
1068 | * -1 on error | ||
1069 | */ | ||
1070 | ssize_t | ||
1071 | EXTRACTOR_datasource_read_ (void *cls, | ||
1072 | void *data, | ||
1073 | size_t size) | ||
1074 | { | ||
1075 | struct EXTRACTOR_Datasource *ds = cls; | ||
1076 | |||
1077 | if (NULL != ds->cfs) | ||
1078 | return cfs_read (ds->cfs, data, size); | ||
1079 | return bdfs_read (ds->bdfs, data, size); | ||
1080 | } | ||
1081 | |||
1082 | |||
1083 | /** | ||
1084 | * Seek in the datasource. Use 'SEEK_CUR' for whence and 'pos' of 0 to | ||
1085 | * obtain the current position in the file. | ||
1086 | * | ||
1087 | * @param cls must be a 'struct EXTRACTOR_Datasource' | ||
1088 | * @param pos position to seek (see 'man lseek') | ||
1089 | * @param whence how to see (absolute to start, relative, absolute to end) | ||
1090 | * @return new absolute position, UINT64_MAX on error (i.e. desired position | ||
1091 | * does not exist) | ||
1092 | */ | ||
1093 | int64_t | ||
1094 | EXTRACTOR_datasource_seek_ (void *cls, | ||
1095 | uint64_t pos, | ||
1096 | int whence) | ||
1030 | { | 1097 | { |
1031 | if (cfs != NULL) | 1098 | struct EXTRACTOR_Datasource *ds = cls; |
1032 | { | 1099 | |
1033 | cfs_deinit_decompressor (cfs); | 1100 | if (NULL != ds->cfs) |
1034 | cfs_delete (cfs); | 1101 | return cfs_seek (ds->cfs, pos, whence); |
1035 | } | 1102 | return bdfs_seek (ds->bdfs, pos, whence); |
1036 | bfds_delete (bfds); | ||
1037 | if (-1 != fd) | ||
1038 | close(fd); | ||
1039 | } | 1103 | } |
1040 | 1104 | ||
1105 | |||
1106 | /** | ||
1107 | * Determine the overall size of the data source (after compression). | ||
1108 | * | ||
1109 | * @param cls must be a 'struct EXTRACTOR_Datasource' | ||
1110 | * @return overall file size, UINT64_MAX on error or unknown | ||
1111 | */ | ||
1112 | int64_t | ||
1113 | EXTRACTOR_datasource_get_size_ (void *cls) | ||
1114 | { | ||
1115 | struct EXTRACTOR_Datasource *ds = cls; | ||
1116 | |||
1117 | if (NULL != ds->cfs) | ||
1118 | return cfs_seek (ds->cfs, pos, whence); | ||
1119 | return bdfs_seek (ds->bdfs, pos, whence); | ||
1120 | } | ||
1121 | |||
1122 | |||
1041 | /* end of extractor_datasource.c */ | 1123 | /* end of extractor_datasource.c */ |