aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristian Grothoff <christian@grothoff.org>2012-04-12 16:44:30 +0000
committerChristian Grothoff <christian@grothoff.org>2012-04-12 16:44:30 +0000
commitec05c1c8a5304f221b6fcea24c67c478893e6c4a (patch)
tree8ae643dac478025e7d85b4a6a5f913447d3e57fc
parent3b99af939bfe580c31d02db6aed4aab92cb2643c (diff)
downloadlibextractor-ec05c1c8a5304f221b6fcea24c67c478893e6c4a.tar.gz
libextractor-ec05c1c8a5304f221b6fcea24c67c478893e6c4a.zip
-LRN: minor cleanup, documentation
-rw-r--r--src/main/extractor.c589
-rw-r--r--src/main/extractor_plugins.h51
2 files changed, 570 insertions, 70 deletions
diff --git a/src/main/extractor.c b/src/main/extractor.c
index fc6ca12..cc39e6d 100644
--- a/src/main/extractor.c
+++ b/src/main/extractor.c
@@ -49,16 +49,13 @@
49#define MAX_READ 32 * 1024 * 1024 49#define MAX_READ 32 * 1024 * 1024
50 50
51/** 51/**
52 * How many bytes do we actually try to decompress? (from the beginning
53 * of the file). Limit to 16 MB.
54 */
55#define MAX_DECOMPRESS 16 * 1024 * 1024
56
57/**
58 * Maximum length of a Mime-Type string. 52 * Maximum length of a Mime-Type string.
59 */ 53 */
60#define MAX_MIME_LEN 256 54#define MAX_MIME_LEN 256
61 55
56/**
57 * Maximum length of a shared memory object name
58 */
62#define MAX_SHM_NAME 255 59#define MAX_SHM_NAME 255
63 60
64/** 61/**
@@ -67,15 +64,62 @@
67 */ 64 */
68#define DEBUG 1 65#define DEBUG 1
69 66
67/**
68 * Sent from LE to a plugin to initialize it (open shm,
69 * reset position counters etc).
70 */
70#define MESSAGE_INIT_STATE 0x01 71#define MESSAGE_INIT_STATE 0x01
72
73/**
74 * Sent from LE to a plugin to tell it that shm contents
75 * were updated. Only used for OPMODE_COMPRESS.
76 */
71#define MESSAGE_UPDATED_SHM 0x02 77#define MESSAGE_UPDATED_SHM 0x02
78
79/**
80 * Sent from plugin to LE to tell LE that plugin is done
81 * analyzing current file and will send no more data.
82 */
72#define MESSAGE_DONE 0x03 83#define MESSAGE_DONE 0x03
84
85/**
86 * Sent from plugin to LE to tell LE that plugin needs
87 * to read a different part of the source file.
88 */
73#define MESSAGE_SEEK 0x04 89#define MESSAGE_SEEK 0x04
90
91/**
92 * Sent from plugin to LE to tell LE about metadata discovered.
93 */
74#define MESSAGE_META 0x05 94#define MESSAGE_META 0x05
95
96/**
97 * Sent from LE to plugin to make plugin discard its state (unmap
98 * and close shm).
99 */
75#define MESSAGE_DISCARD_STATE 0x06 100#define MESSAGE_DISCARD_STATE 0x06
76 101
102/**
103 * Client provided a memory buffer, analyze it. Creates a shm, copies
104 * buffer contents into it. Does not support seeking (all data comes
105 * in one [big] chunk.
106 */
77#define OPMODE_MEMORY 1 107#define OPMODE_MEMORY 1
108
109/**
110 * Client provided a memory buffer or a file, which contains compressed data.
111 * Creates a shm of limited size and repeatedly fills it with uncompressed
112 * data. Never skips data (has to uncompress every byte, discards unwanted bytes),
113 * can't efficiently seek backwards. Uses MESSAGE_UPDATED_SHM and MESSAGE_SEEK.
114 */
78#define OPMODE_DECOMPRESS 2 115#define OPMODE_DECOMPRESS 2
116
117/**
118 * Client provided a filename. Creates a file-backed shm (on W32) or just
119 * communicates the file name to each plugin, and plugin opens its own file
120 * descriptor of the file (POSIX). Each plugin maps different parts of the
121 * file into its memory independently.
122 */
79#define OPMODE_FILE 3 123#define OPMODE_FILE 3
80 124
81/** 125/**
@@ -92,7 +136,16 @@ struct IpcHeader
92}; 136};
93 137
94#if !WINDOWS 138#if !WINDOWS
95int 139/**
140 * Opens a shared memory object (for later mmapping).
141 * This is POSIX variant of the the plugin_open_* function. Shm is always memory-backed.
142 * Closes a shm is already opened, closes it before opening a new one.
143 *
144 * @param plugin plugin context
145 * @param shm_name name of the shm.
146 * @return shm id (-1 on error). That is, the result of shm_open() syscall.
147 */
148static int
96plugin_open_shm (struct EXTRACTOR_PluginList *plugin, const char *shm_name) 149plugin_open_shm (struct EXTRACTOR_PluginList *plugin, const char *shm_name)
97{ 150{
98 if (plugin->shm_id != -1) 151 if (plugin->shm_id != -1)
@@ -100,7 +153,17 @@ plugin_open_shm (struct EXTRACTOR_PluginList *plugin, const char *shm_name)
100 plugin->shm_id = shm_open (shm_name, O_RDONLY, 0); 153 plugin->shm_id = shm_open (shm_name, O_RDONLY, 0);
101 return plugin->shm_id; 154 return plugin->shm_id;
102} 155}
103int 156
157/**
158 * Opens a file (for later mmapping).
159 * This is POSIX variant of the plugin_open_* function.
160 * Closes a file is already opened, closes it before opening a new one.
161 *
162 * @param plugin plugin context
163 * @param shm_name name of the file to open.
164 * @return file id (-1 on error). That is, the result of open() syscall.
165 */
166static int
104plugin_open_file (struct EXTRACTOR_PluginList *plugin, const char *shm_name) 167plugin_open_file (struct EXTRACTOR_PluginList *plugin, const char *shm_name)
105{ 168{
106 if (plugin->shm_id != -1) 169 if (plugin->shm_id != -1)
@@ -109,6 +172,17 @@ plugin_open_file (struct EXTRACTOR_PluginList *plugin, const char *shm_name)
109 return plugin->shm_id; 172 return plugin->shm_id;
110} 173}
111#else 174#else
175/**
176 * Opens a shared memory object (for later mmapping).
177 * This is W32 variant of the plugin_open_* function.
178 * Opened shm might be memory-backed or file-backed (depending on how
179 * it was created). shm_name is never a file name, unlike POSIX.
180 * Closes a shm is already opened, closes it before opening a new one.
181 *
182 * @param plugin plugin context
183 * @param shm_name name of the shared memory object.
184 * @return memory-mapped file handle (NULL on error). That is, the result of OpenFileMapping() syscall.
185 */
112HANDLE 186HANDLE
113plugin_open_shm (struct EXTRACTOR_PluginList *plugin, const char *shm_name) 187plugin_open_shm (struct EXTRACTOR_PluginList *plugin, const char *shm_name)
114{ 188{
@@ -117,6 +191,9 @@ plugin_open_shm (struct EXTRACTOR_PluginList *plugin, const char *shm_name)
117 plugin->map_handle = OpenFileMapping (FILE_MAP_READ, FALSE, shm_name); 191 plugin->map_handle = OpenFileMapping (FILE_MAP_READ, FALSE, shm_name);
118 return plugin->map_handle; 192 return plugin->map_handle;
119} 193}
194/**
195 * Another name for plugin_open_shm().
196 */
120HANDLE 197HANDLE
121plugin_open_file (struct EXTRACTOR_PluginList *plugin, const char *shm_name) 198plugin_open_file (struct EXTRACTOR_PluginList *plugin, const char *shm_name)
122{ 199{
@@ -124,6 +201,16 @@ plugin_open_file (struct EXTRACTOR_PluginList *plugin, const char *shm_name)
124} 201}
125#endif 202#endif
126 203
204/**
205 * Writes @size bytes from @buf into @fd, returns only when
206 * writing is not possible, or when all @size bytes were written
207 * (never does partial writes).
208 *
209 * @param fd fd to write into
210 * @param buf buffer to read from
211 * @param size number of bytes to write
212 * @return number of bytes written (that is - @size), or -1 on error
213 */
127static int 214static int
128write_all (int fd, 215write_all (int fd,
129 const void *buf, 216 const void *buf,
@@ -194,7 +281,20 @@ transmit_reply (void *cls,
194 return 0; 281 return 0;
195} 282}
196 283
197/* init the read/seek wrappers */ 284/**
285 * Initializes an extracting session for a plugin.
286 * opens the file/shm (only in OPMODE_FILE)
287 * sets shm_ptr to NULL (unmaps it, if it was mapped)
288 * sets position to 0
289 * initializes file size to @fsize (may be -1)
290 * sets seek request to 0
291 *
292 * @param plugin plugin context
293 * @param operation_mode the mode of operation (OPMODE_*)
294 * @param fsize size of the source file (may be -1)
295 * @param shm_name name of the shm or file to open
296 * @return 0 on success, non-0 on error.
297 */
198static int 298static int
199init_state_method (struct EXTRACTOR_PluginList *plugin, uint8_t operation_mode, int64_t fsize, const char *shm_name) 299init_state_method (struct EXTRACTOR_PluginList *plugin, uint8_t operation_mode, int64_t fsize, const char *shm_name)
200{ 300{
@@ -223,6 +323,14 @@ init_state_method (struct EXTRACTOR_PluginList *plugin, uint8_t operation_mode,
223 return 0; 323 return 0;
224} 324}
225 325
326/**
327 * Deinitializes an extracting session for a plugin.
328 * unmaps shm_ptr (if was mapped)
329 * closes file/shm (if it was opened)
330 * sets map size and shm_ptr to NULL.
331 *
332 * @param plugin plugin context
333 */
226static void 334static void
227discard_state_method (struct EXTRACTOR_PluginList *plugin) 335discard_state_method (struct EXTRACTOR_PluginList *plugin)
228{ 336{
@@ -243,6 +351,15 @@ discard_state_method (struct EXTRACTOR_PluginList *plugin)
243 plugin->shm_ptr = NULL; 351 plugin->shm_ptr = NULL;
244} 352}
245 353
354/**
355 * Main loop function for plugins.
356 * Reads a message from the plugin input pipe and acts on it.
357 * Can be called recursively (once) in OPMODE_DECOMPRESS.
358 * plugin->waiting_for_update == 1 indicates the recursive call.
359 *
360 * @param plugin plugin context
361 * @return 0, always
362 */
246static int 363static int
247process_requests (struct EXTRACTOR_PluginList *plugin) 364process_requests (struct EXTRACTOR_PluginList *plugin)
248{ 365{
@@ -265,6 +382,10 @@ process_requests (struct EXTRACTOR_PluginList *plugin)
265 in = plugin->pipe_in; 382 in = plugin->pipe_in;
266 out = plugin->cpipe_out; 383 out = plugin->cpipe_out;
267 384
385 /* The point of recursing into this function is to request
386 * a seek from LE server and wait for a reply. This snipper
387 * requests a seek.
388 */
268 if (plugin->waiting_for_update == 1) 389 if (plugin->waiting_for_update == 1)
269 { 390 {
270 unsigned char seek_byte = MESSAGE_SEEK; 391 unsigned char seek_byte = MESSAGE_SEEK;
@@ -308,6 +429,7 @@ process_requests (struct EXTRACTOR_PluginList *plugin)
308 do_break = 1; 429 do_break = 1;
309 break; 430 break;
310 } 431 }
432 /* Fsize may be -1 only in decompression mode */
311 if (plugin->operation_mode != OPMODE_DECOMPRESS && plugin->fsize <= 0) 433 if (plugin->operation_mode != OPMODE_DECOMPRESS && plugin->fsize <= 0)
312 { 434 {
313 do_break = 1; 435 do_break = 1;
@@ -329,6 +451,9 @@ process_requests (struct EXTRACTOR_PluginList *plugin)
329 } 451 }
330 shm_name[shm_name_len - 1] = '\0'; 452 shm_name[shm_name_len - 1] = '\0';
331 do_break = init_state_method (plugin, plugin->operation_mode, plugin->fsize, shm_name); 453 do_break = init_state_method (plugin, plugin->operation_mode, plugin->fsize, shm_name);
454 /* in OPMODE_MEMORY and OPMODE_FILE we can start extracting right away,
455 * there won't be UPDATED_SHM message, and we don't need it
456 */
332 if (!do_break && (plugin->operation_mode == OPMODE_MEMORY || 457 if (!do_break && (plugin->operation_mode == OPMODE_MEMORY ||
333 plugin->operation_mode == OPMODE_FILE)) 458 plugin->operation_mode == OPMODE_FILE))
334 { 459 {
@@ -369,6 +494,7 @@ process_requests (struct EXTRACTOR_PluginList *plugin)
369 break; 494 break;
370 } 495 }
371 /* FIXME: also check mapped region size (lseek for *nix, VirtualQuery for W32) */ 496 /* FIXME: also check mapped region size (lseek for *nix, VirtualQuery for W32) */
497 /* Re-map the shm */
372#if !WINDOWS 498#if !WINDOWS
373 if ((-1 == plugin->shm_id) || 499 if ((-1 == plugin->shm_id) ||
374 (NULL == (plugin->shm_ptr = mmap (NULL, plugin->map_size, PROT_READ, MAP_SHARED, plugin->shm_id, 0))) || 500 (NULL == (plugin->shm_ptr = mmap (NULL, plugin->map_size, PROT_READ, MAP_SHARED, plugin->shm_id, 0))) ||
@@ -387,11 +513,16 @@ process_requests (struct EXTRACTOR_PluginList *plugin)
387#endif 513#endif
388 if (plugin->waiting_for_update == 1) 514 if (plugin->waiting_for_update == 1)
389 { 515 {
516 /* We were only waiting for this one message */
390 do_break = 1; 517 do_break = 1;
391 plugin->waiting_for_update = 2; 518 plugin->waiting_for_update = 2;
392 break; 519 break;
393 } 520 }
521 /* Run extractor on mapped region (recursive call doesn't reach this
522 * point and breaks out earlier.
523 */
394 extract_reply = plugin->extract_method (plugin, transmit_reply, &out); 524 extract_reply = plugin->extract_method (plugin, transmit_reply, &out);
525 /* Unmap the shm */
395#if !WINDOWS 526#if !WINDOWS
396 if ((plugin->shm_ptr != NULL) && 527 if ((plugin->shm_ptr != NULL) &&
397 (plugin->shm_ptr != (void*) -1) ) 528 (plugin->shm_ptr != (void*) -1) )
@@ -403,6 +534,7 @@ process_requests (struct EXTRACTOR_PluginList *plugin)
403 plugin->shm_ptr = NULL; 534 plugin->shm_ptr = NULL;
404 if (extract_reply == 1) 535 if (extract_reply == 1)
405 { 536 {
537 /* Tell LE that we're done */
406 unsigned char done_byte = MESSAGE_DONE; 538 unsigned char done_byte = MESSAGE_DONE;
407 if (write (out, &done_byte, 1) != 1) 539 if (write (out, &done_byte, 1) != 1)
408 { 540 {
@@ -424,6 +556,7 @@ process_requests (struct EXTRACTOR_PluginList *plugin)
424 } 556 }
425 else 557 else
426 { 558 {
559 /* Tell LE that we're not done, and we need to seek */
427 unsigned char seek_byte = MESSAGE_SEEK; 560 unsigned char seek_byte = MESSAGE_SEEK;
428 if (write (out, &seek_byte, 1) != 1) 561 if (write (out, &seek_byte, 1) != 1)
429 { 562 {
@@ -439,6 +572,7 @@ process_requests (struct EXTRACTOR_PluginList *plugin)
439 } 572 }
440 else 573 else
441 { 574 {
575 /* This is mostly to safely skip unrelated messages */
442 int64_t t; 576 int64_t t;
443 size_t t2; 577 size_t t2;
444 read_result2 = read (in, &t, sizeof (int64_t)); 578 read_result2 = read (in, &t, sizeof (int64_t));
@@ -452,9 +586,8 @@ process_requests (struct EXTRACTOR_PluginList *plugin)
452} 586}
453 587
454/** 588/**
455 * 'main' function of the child process. Reads shm-filenames from 589 * 'main' function of the child process. Loads the plugin,
456 * 'in' (line-by-line) and writes meta data blocks to 'out'. The meta 590 * sets up its in and out pipes, then runs the request serving function.
457 * data stream is terminated by an empty entry.
458 * 591 *
459 * @param plugin extractor plugin to use 592 * @param plugin extractor plugin to use
460 * @param in stream to read from 593 * @param in stream to read from
@@ -486,6 +619,7 @@ plugin_main (struct EXTRACTOR_PluginList *plugin, int in, int out)
486 close (1); 619 close (1);
487 620
488 plugin->pipe_in = in; 621 plugin->pipe_in = in;
622 /* Compiler will complain, and it's right. This is a kind of hack...*/
489 plugin->cpipe_out = out; 623 plugin->cpipe_out = out;
490 process_requests (plugin); 624 process_requests (plugin);
491 625
@@ -606,11 +740,11 @@ stop_process (struct EXTRACTOR_PluginList *plugin)
606static int 740static int
607write_plugin_data (const struct EXTRACTOR_PluginList *plugin) 741write_plugin_data (const struct EXTRACTOR_PluginList *plugin)
608{ 742{
609 /* only does anything on Windows */ 743 /* This function is only necessary on W32. On POSIX
744 * systems plugin inherits its own data from the parent */
610 return 0; 745 return 0;
611} 746}
612 747
613#define plugin_print(plug, fmt, ...) fprintf (plug->cpipe_in, fmt, ...)
614#define plugin_write(plug, buf, size) write_all (fileno (plug->cpipe_in), buf, size) 748#define plugin_write(plug, buf, size) write_all (fileno (plug->cpipe_in), buf, size)
615 749
616#else /* WINDOWS */ 750#else /* WINDOWS */
@@ -721,6 +855,28 @@ create_selectable_pipe (PHANDLE read_pipe_ptr, PHANDLE write_pipe_ptr,
721 return 0; 855 return 0;
722} 856}
723 857
858/**
859 * Writes @size bytes from @buf to @h, using @ov for
860 * overlapped i/o. Deallocates @old_buf and sets it to NULL,
861 * if necessary.
862 * Writes asynchronously, but sequentially (only one writing
863 * operation may be active at any given moment, but it will
864 * be done in background). Thus it is intended to be used
865 * for writing a few big chunks rather than a lot of small pieces.
866 *
867 * The extravagant interface is mainly because this function
868 * does not use a separate struct to group together overlapped
869 * structure, buffer pointer and the handle.
870 *
871 * @param h pipe handle
872 * @param ov overlapped structure pointer
873 * @param buf buffer to read from. Will be copied internally
874 * @param size number of bytes to write
875 * @param old_buf pointer where a copy of previous buffer is stored,
876 * and where a copy of @buf will be stored.
877 *
878 * @return number of bytes written, -1 on error
879 */
724static int 880static int
725write_to_pipe (HANDLE h, OVERLAPPED *ov, unsigned char *buf, size_t size, unsigned char **old_buf) 881write_to_pipe (HANDLE h, OVERLAPPED *ov, unsigned char *buf, size_t size, unsigned char **old_buf)
726{ 882{
@@ -765,39 +921,17 @@ write_to_pipe (HANDLE h, OVERLAPPED *ov, unsigned char *buf, size_t size, unsign
765 return -1; 921 return -1;
766} 922}
767 923
768static int
769print_to_pipe (HANDLE h, OVERLAPPED *ov, unsigned char **buf, const char *fmt, ...)
770{
771 va_list va;
772 va_list vacp;
773 size_t size;
774 char *print_buf;
775 int result;
776
777 va_start (va, fmt);
778 va_copy (vacp, va);
779 size = VSNPRINTF (NULL, 0, fmt, vacp) + 1;
780 va_end (vacp);
781 if (size <= 0)
782 {
783 va_end (va);
784 return size;
785 }
786
787 print_buf = malloc (size);
788 if (print_buf == NULL)
789 return -1;
790 VSNPRINTF (print_buf, size, fmt, va);
791 va_end (va);
792
793 result = write_to_pipe (h, ov, print_buf, size, buf);
794 free (buf);
795 return result;
796}
797
798#define plugin_print(plug, fmt, ...) print_to_pipe (plug->cpipe_in, &plug->ov_write, &plug->ov_write_buffer, fmt, ...)
799#define plugin_write(plug, buf, size) write_to_pipe (plug->cpipe_in, &plug->ov_write, buf, size, &plug->ov_write_buffer) 924#define plugin_write(plug, buf, size) write_to_pipe (plug->cpipe_in, &plug->ov_write, buf, size, &plug->ov_write_buffer)
800 925
926/**
927 * Communicates plugin data (library name, options) to the plugin
928 * process. This is only necessary on W32, where this information
929 * is not inherited by the plugin, because it is not forked.
930 *
931 * @param plugin plugin context
932 *
933 * @return 0 on success, -1 on failure
934 */
801static int 935static int
802write_plugin_data (struct EXTRACTOR_PluginList *plugin) 936write_plugin_data (struct EXTRACTOR_PluginList *plugin)
803{ 937{
@@ -864,6 +998,14 @@ write_plugin_data (struct EXTRACTOR_PluginList *plugin)
864 return 0; 998 return 0;
865} 999}
866 1000
1001/**
1002 * Reads plugin data from the LE server process.
1003 * Also initializes allocation granularity (duh...).
1004 *
1005 * @param fd the pipe to read from
1006 *
1007 * @return newly allocated plugin context
1008 */
867static struct EXTRACTOR_PluginList * 1009static struct EXTRACTOR_PluginList *
868read_plugin_data (int fd) 1010read_plugin_data (int fd)
869{ 1011{
@@ -995,6 +1137,10 @@ start_process (struct EXTRACTOR_PluginList *plugin)
995 return; 1137 return;
996 } 1138 }
997 1139
1140 /* TODO: write our own plugin-hosting executable? rundll32, for once, has smaller than usual stack size.
1141 * Also, users might freak out seeing over 9000 rundll32 processes (seeing over 9000 processes named
1142 * "libextractor_plugin_helper" is probably less confusing).
1143 */
998 snprintf(cmd, MAX_PATH + 1, "rundll32.exe libextractor-3.dll,RundllEntryPoint@16 %lu %lu", p10_os_inh, p21_os_inh); 1144 snprintf(cmd, MAX_PATH + 1, "rundll32.exe libextractor-3.dll,RundllEntryPoint@16 %lu %lu", p10_os_inh, p21_os_inh);
999 cmd[MAX_PATH] = '\0'; 1145 cmd[MAX_PATH] = '\0';
1000 if (CreateProcessA (NULL, cmd, NULL, NULL, TRUE, 0, NULL, NULL, 1146 if (CreateProcessA (NULL, cmd, NULL, NULL, TRUE, 0, NULL, NULL,
@@ -1189,10 +1335,10 @@ make_shm_w32 (void **ptr, HANDLE *map, char *fn, size_t fn_size, size_t size)
1189} 1335}
1190 1336
1191/** 1337/**
1192 * Setup a shared memory segment. 1338 * Setup a file-backed shared memory segment.
1193 * 1339 *
1194 * @param ptr set to the location of the map segment
1195 * @param map where to store the map handle 1340 * @param map where to store the map handle
1341 * @param file handle of the file to back the shm
1196 * @param fn name of the mapping 1342 * @param fn name of the mapping
1197 * @param fn_size size available in fn 1343 * @param fn_size size available in fn
1198 * @param size number of bytes to allocated for the mapping 1344 * @param size number of bytes to allocated for the mapping
@@ -1283,32 +1429,109 @@ destroy_shm_posix (void *ptr, int shm_id, size_t size, char *shm_name)
1283#define O_LARGEFILE 0 1429#define O_LARGEFILE 0
1284#endif 1430#endif
1285 1431
1432/**
1433 * A poor attempt to abstract the data source (file or a memory buffer)
1434 * for the decompressor.
1435 */
1286struct BufferedFileDataSource 1436struct BufferedFileDataSource
1287{ 1437{
1438 /**
1439 * Descriptor of the file to read data from (may be -1)
1440 */
1288 int fd; 1441 int fd;
1442
1443 /**
1444 * Pointer to the buffer to read from (may be NULL)
1445 */
1289 const unsigned char *data; 1446 const unsigned char *data;
1290 1447
1448 /**
1449 * Size of the file (or the data buffer)
1450 */
1291 int64_t fsize; 1451 int64_t fsize;
1452
1453 /**
1454 * Position within the file or the data buffer
1455 */
1292 int64_t fpos; 1456 int64_t fpos;
1293 1457
1458 /**
1459 * A buffer to read into. For fd != -1: when data != NULL,
1460 * data is used directly.
1461 */
1294 unsigned char *buffer; 1462 unsigned char *buffer;
1463
1464 /**
1465 * Position within the buffer.
1466 */
1295 int64_t buffer_pos; 1467 int64_t buffer_pos;
1468
1469 /**
1470 * Number of bytes in the buffer (<= buffer_size)
1471 */
1296 int64_t buffer_bytes; 1472 int64_t buffer_bytes;
1473
1474 /**
1475 * Allocated size of the buffer
1476 */
1297 int64_t buffer_size; 1477 int64_t buffer_size;
1298}; 1478};
1299 1479
1480/**
1481 * Creates a bfds
1482 *
1483 * @param data data buffer to use as a source (NULL if fd != -1)
1484 * @param fd file descriptor to use as a source (-1 if data != NULL)
1485 * @param fsize size of the file (or the buffer)
1486 * @return newly allocated bfds
1487 */
1300struct BufferedFileDataSource * 1488struct BufferedFileDataSource *
1301bfds_new (const unsigned char *data, int fd, int64_t fsize); 1489bfds_new (const unsigned char *data, int fd, int64_t fsize);
1302 1490
1491/**
1492 * Unallocates bfds
1493 *
1494 * @param bfds bfds to deallocate
1495 */
1303void 1496void
1304bfds_delete (struct BufferedFileDataSource *bfds); 1497bfds_delete (struct BufferedFileDataSource *bfds);
1305 1498
1499/**
1500 * Makes bfds seek to @pos and read a chunk of bytes there.
1501 * Changes bfds->fpos, bfds->buffer_bytes and bfds->buffer_pos.
1502 * Does almost nothing for memory-backed bfds.
1503 *
1504 * @param bfds bfds
1505 * @param pos position
1506 * @return 0 on success, -1 on error
1507 */
1306int 1508int
1307bfds_pick_next_buffer_at (struct BufferedFileDataSource *bfds, int64_t pos); 1509bfds_pick_next_buffer_at (struct BufferedFileDataSource *bfds, int64_t pos);
1308 1510
1511/**
1512 * Makes bfds seek to @pos in @whence mode.
1513 * Will try to seek within the buffer, will move the buffer location if
1514 * the seek request falls outside of the buffer range.
1515 *
1516 * @param bfds bfds
1517 * @param pos position to seek to
1518 * @param whence one of the seek constants (SEEK_CUR, SEEK_SET, SEEK_END)
1519 * @return new absolute position
1520 */
1309int64_t 1521int64_t
1310bfds_seek (struct BufferedFileDataSource *bfds, int64_t pos, int whence); 1522bfds_seek (struct BufferedFileDataSource *bfds, int64_t pos, int whence);
1311 1523
1524/**
1525 * Fills @buf_ptr with a pointer to a chunk of data.
1526 * Same as read() but there's no need to allocate or de-allocate the
1527 * memory (since data IS already in memory).
1528 * Will seek if necessary. Will fail if @count exceeds buffer size.
1529 *
1530 * @param bfds bfds
1531 * @param buf_ptr location to store data pointer
1532 * @param count number of bytes to read
1533 * @return number of bytes (<= count) available at location pointed by buf_ptr
1534 */
1312int64_t 1535int64_t
1313bfds_read (struct BufferedFileDataSource *bfds, unsigned char **buf_ptr, int64_t count); 1536bfds_read (struct BufferedFileDataSource *bfds, unsigned char **buf_ptr, int64_t count);
1314 1537
@@ -1477,41 +1700,87 @@ enum ExtractorCompressionType
1477 COMP_TYPE_BZ2 = 2 1700 COMP_TYPE_BZ2 = 2
1478}; 1701};
1479 1702
1703/**
1704 * An object from which uncompressed data can be read
1705 */
1480struct CompressedFileSource 1706struct CompressedFileSource
1481{ 1707{
1708 /**
1709 * The type of compression used in the source
1710 */
1482 enum ExtractorCompressionType compression_type; 1711 enum ExtractorCompressionType compression_type;
1712 /**
1713 * The source of data
1714 */
1483 struct BufferedFileDataSource *bfds; 1715 struct BufferedFileDataSource *bfds;
1716 /**
1717 * Size of the source (same as bfds->fsize)
1718 */
1484 int64_t fsize; 1719 int64_t fsize;
1720 /**
1721 * Position within the source
1722 */
1485 int64_t fpos; 1723 int64_t fpos;
1486 1724
1725 /**
1726 * Total size of the uncompressed data. Remains -1 until
1727 * decompression is finished.
1728 */
1487 int64_t uncompressed_size; 1729 int64_t uncompressed_size;
1488 1730
1731 /*
1489 unsigned char *buffer; 1732 unsigned char *buffer;
1490 int64_t buffer_bytes; 1733 int64_t buffer_bytes;
1491 int64_t buffer_len; 1734 int64_t buffer_len;
1735 */
1492 1736
1493#if WINDOWS 1737#if WINDOWS
1738 /**
1739 * W32 handle of the shm into which data is uncompressed
1740 */
1494 HANDLE shm; 1741 HANDLE shm;
1495#else 1742#else
1743 /**
1744 * POSIX id of the shm into which data is uncompressed
1745 */
1496 int shm; 1746 int shm;
1497#endif 1747#endif
1748 /**
1749 * Name of the shm
1750 */
1498 char shm_name[MAX_SHM_NAME + 1]; 1751 char shm_name[MAX_SHM_NAME + 1];
1752 /**
1753 * Pointer to the mapped region of the shm (covers the whole shm)
1754 */
1499 void *shm_ptr; 1755 void *shm_ptr;
1756 /**
1757 * Position within shm
1758 */
1500 int64_t shm_pos; 1759 int64_t shm_pos;
1501 size_t shm_buf_pos; 1760 /**
1761 * Allocated size of the shm
1762 */
1502 int64_t shm_size; 1763 int64_t shm_size;
1764 /**
1765 * Number of bytes in shm (<= shm_size)
1766 */
1503 size_t shm_buf_size; 1767 size_t shm_buf_size;
1504 1768
1505#if HAVE_ZLIB 1769#if HAVE_ZLIB
1770 /**
1771 * ZLIB stream object
1772 */
1506 z_stream strm; 1773 z_stream strm;
1507 int ret; 1774 /**
1508 size_t pos; 1775 * Length of gzip header (may be 0, in that case ZLIB parses the header)
1776 */
1509 int gzip_header_length; 1777 int gzip_header_length;
1510#endif 1778#endif
1511#if HAVE_LIBBZ2 1779#if HAVE_LIBBZ2
1780 /**
1781 * BZ2 stream object
1782 */
1512 bz_stream bstrm; 1783 bz_stream bstrm;
1513 int bret;
1514 size_t bpos;
1515#endif 1784#endif
1516}; 1785};
1517 1786
@@ -1558,13 +1827,10 @@ cfs_reset_stream_zlib (struct CompressedFileSource *cfs)
1558 1827
1559 cfs->fpos = cfs->gzip_header_length; 1828 cfs->fpos = cfs->gzip_header_length;
1560 cfs->shm_pos = 0; 1829 cfs->shm_pos = 0;
1561 cfs->shm_buf_pos = 0;
1562 cfs->shm_buf_size = 0; 1830 cfs->shm_buf_size = 0;
1563 1831
1564#if HAVE_ZLIB 1832#if HAVE_ZLIB
1565 z_stream strm; 1833 z_stream strm;
1566 cfs->ret = 0;
1567 cfs->pos = 0;
1568#endif 1834#endif
1569 return 1; 1835 return 1;
1570} 1836}
@@ -1575,6 +1841,14 @@ cfs_reset_stream_bz2 (struct CompressedFileSource *cfs)
1575 return -1; 1841 return -1;
1576} 1842}
1577 1843
1844/**
1845 * Resets the compression stream to begin uncompressing
1846 * from the beginning. Used at initialization time, and when
1847 * seeking backward.
1848 *
1849 * @param cfs cfs to reset
1850 * @return 1 on success, -1 on error
1851 */
1578int 1852int
1579cfs_reset_stream (struct CompressedFileSource *cfs) 1853cfs_reset_stream (struct CompressedFileSource *cfs)
1580{ 1854{
@@ -1687,10 +1961,11 @@ cfs_init_decompressor_zlib (struct CompressedFileSource *cfs, EXTRACTOR_MetaData
1687 return cfs_reset_stream_zlib (cfs); 1961 return cfs_reset_stream_zlib (cfs);
1688} 1962}
1689 1963
1690int 1964static int
1691cfs_deinit_decompressor_zlib (struct CompressedFileSource *cfs) 1965cfs_deinit_decompressor_zlib (struct CompressedFileSource *cfs)
1692{ 1966{
1693 inflateEnd (&cfs->strm); 1967 inflateEnd (&cfs->strm);
1968 return 1;
1694} 1969}
1695 1970
1696static int 1971static int
@@ -1705,6 +1980,15 @@ cfs_deinit_decompressor_bz2 (struct CompressedFileSource *cfs)
1705 return -1; 1980 return -1;
1706} 1981}
1707 1982
1983/**
1984 * Initializes decompression object. Might report metadata about
1985 * compresse stream, if available. Resets the stream to the beginning.
1986 *
1987 * @param cfs cfs to initialize
1988 * @param proc callback for metadata
1989 * @param proc_cls callback cls
1990 * @return 1 on success, -1 on error
1991 */
1708static int 1992static int
1709cfs_init_decompressor (struct CompressedFileSource *cfs, EXTRACTOR_MetaDataProcessor proc, void *proc_cls) 1993cfs_init_decompressor (struct CompressedFileSource *cfs, EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
1710{ 1994{
@@ -1719,6 +2003,12 @@ cfs_init_decompressor (struct CompressedFileSource *cfs, EXTRACTOR_MetaDataProce
1719 } 2003 }
1720} 2004}
1721 2005
2006/**
2007 * Deinitializes decompression object.
2008 *
2009 * @param cfs cfs to deinitialize
2010 * @return 1 on success, -1 on error
2011 */
1722static int 2012static int
1723cfs_deinit_decompressor (struct CompressedFileSource *cfs) 2013cfs_deinit_decompressor (struct CompressedFileSource *cfs)
1724{ 2014{
@@ -1733,6 +2023,16 @@ cfs_deinit_decompressor (struct CompressedFileSource *cfs)
1733 } 2023 }
1734} 2024}
1735 2025
2026/**
2027 * Allocates and initializes new cfs object.
2028 *
2029 * @param bfds data source to use
2030 * @param fsize size of the source
2031 * @param compression_type type of compression used
2032 * @param proc metadata callback
2033 * @param proc_cls callback cls
2034 * @return newly allocated cfs on success, NULL on error
2035 */
1736struct CompressedFileSource * 2036struct CompressedFileSource *
1737cfs_new (struct BufferedFileDataSource *bfds, int64_t fsize, enum ExtractorCompressionType compression_type, EXTRACTOR_MetaDataProcessor proc, void *proc_cls) 2037cfs_new (struct BufferedFileDataSource *bfds, int64_t fsize, enum ExtractorCompressionType compression_type, EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
1738{ 2038{
@@ -1761,6 +2061,10 @@ cfs_new (struct BufferedFileDataSource *bfds, int64_t fsize, enum ExtractorCompr
1761 return cfs; 2061 return cfs;
1762} 2062}
1763 2063
2064/**
2065 * Data is read from the source and shoved into decompressor
2066 * in chunks this big.
2067 */
1764#define COM_CHUNK_SIZE (10*1024) 2068#define COM_CHUNK_SIZE (10*1024)
1765 2069
1766int 2070int
@@ -1801,6 +2105,17 @@ cfs_read_bz2 (struct CompressedFileSource *cfs, int64_t preserve)
1801 return -1; 2105 return -1;
1802} 2106}
1803 2107
2108/**
2109 * Re-fills shm with new uncompressed data, preserving the last
2110 * @preserve bytes of existing data as the first @preserve bytes
2111 * of the new data.
2112 * Does the actual decompression. Will set uncompressed_size on
2113 * the end of compressed stream.
2114 *
2115 * @param cfds cfs to read from
2116 * @param preserve number of bytes to preserve (0 to discard all old data)
2117 * @return number of bytes in shm. 0 if no more data can be uncompressed.
2118 */
1804int64_t 2119int64_t
1805cfs_read (struct CompressedFileSource *cfs, int64_t preserve) 2120cfs_read (struct CompressedFileSource *cfs, int64_t preserve)
1806{ 2121{
@@ -1844,6 +2159,15 @@ cfs_seek_bz2 (struct CompressedFileSource *cfs, int64_t position)
1844 return -1; 2159 return -1;
1845} 2160}
1846 2161
2162/**
2163 * Moves the buffer to @position in uncompressed steam. If position
2164 * requires seeking backwards beyond the boundaries of the buffer, resets the
2165 * stream and repeats decompression from the beginning to @position.
2166 *
2167 * @param cfds cfs to seek on
2168 * @param position new starting point for the buffer
2169 * @return new absolute buffer position, -1 on error or EOS
2170 */
1847int64_t 2171int64_t
1848cfs_seek (struct CompressedFileSource *cfs, int64_t position) 2172cfs_seek (struct CompressedFileSource *cfs, int64_t position)
1849{ 2173{
@@ -1920,8 +2244,17 @@ get_compression_type (const unsigned char *data, int fd, int64_t fsize)
1920 return result; 2244 return result;
1921} 2245}
1922 2246
2247/**
2248 * Initializes plugin state. Calls init_state_method()
2249 * directly or indirectly.
2250 *
2251 * @param plugin plugin to initialize
2252 * @param operation_mode operation mode
2253 * @param shm_name name of the shm/file
2254 * @param fsize file size (may be -1)
2255 */
1923static void 2256static void
1924init_plugin_state (struct EXTRACTOR_PluginList *plugin, uint8_t operation_mode, int fd, const char *shm_name, int64_t fsize) 2257init_plugin_state (struct EXTRACTOR_PluginList *plugin, uint8_t operation_mode, const char *shm_name, int64_t fsize)
1925{ 2258{
1926 int write_result; 2259 int write_result;
1927 int init_state_size; 2260 int init_state_size;
@@ -1970,6 +2303,12 @@ init_plugin_state (struct EXTRACTOR_PluginList *plugin, uint8_t operation_mode,
1970 } 2303 }
1971} 2304}
1972 2305
2306/**
2307 * Discards plugin state. Calls discard_state_method()
2308 * directly or indirectly.
2309 *
2310 * @param plugin plugin to initialize
2311 */
1973static void 2312static void
1974discard_plugin_state (struct EXTRACTOR_PluginList *plugin) 2313discard_plugin_state (struct EXTRACTOR_PluginList *plugin)
1975{ 2314{
@@ -2002,6 +2341,17 @@ discard_plugin_state (struct EXTRACTOR_PluginList *plugin)
2002 } 2341 }
2003} 2342}
2004 2343
2344/**
2345 * Forces plugin to move the buffer window to @pos.
2346 *
2347 * @param plugin plugin context
2348 * @param pos position to move to
2349 * @param want_start 1 if the caller is interested in the beginning of the
2350 * window, 0 if the caller is interested in its end. Window position
2351 * must be aligned to page size, and this parameter controls the
2352 * direction of window shift. 0 is used mostly by SEEK_END.
2353 * @return 0 on success, -1 on error
2354 */
2005static int 2355static int
2006pl_pick_next_buffer_at (struct EXTRACTOR_PluginList *plugin, int64_t pos, uint8_t want_start) 2356pl_pick_next_buffer_at (struct EXTRACTOR_PluginList *plugin, int64_t pos, uint8_t want_start)
2007{ 2357{
@@ -2114,6 +2464,7 @@ pl_pick_next_buffer_at (struct EXTRACTOR_PluginList *plugin, int64_t pos, uint8_
2114 int64_t old_pos; 2464 int64_t old_pos;
2115 old_pos = plugin->fpos + plugin->shm_pos; 2465 old_pos = plugin->fpos + plugin->shm_pos;
2116 plugin->seek_request = pos; 2466 plugin->seek_request = pos;
2467 /* Recourse into request loop to wait for shm update */
2117 while (plugin->fpos != pos) 2468 while (plugin->fpos != pos)
2118 { 2469 {
2119 plugin->waiting_for_update = 1; 2470 plugin->waiting_for_update = 1;
@@ -2127,18 +2478,28 @@ pl_pick_next_buffer_at (struct EXTRACTOR_PluginList *plugin, int64_t pos, uint8_
2127 { 2478 {
2128 if (pos < plugin->fpos) 2479 if (pos < plugin->fpos)
2129 { 2480 {
2130 if (1 != cfs_reset_stream (plugin->state)) 2481 if (1 != cfs_reset_stream (plugin->pass_cfs))
2131 return -1; 2482 return -1;
2132 } 2483 }
2133 while (plugin->fpos < pos && plugin->fpos >= 0) 2484 while (plugin->fpos < pos && plugin->fpos >= 0)
2134 plugin->fpos = cfs_seek (plugin->state, pos); 2485 plugin->fpos = cfs_seek (plugin->pass_cfs, pos);
2135 plugin->fsize = ((struct CompressedFileSource *)plugin->state)->uncompressed_size; 2486 plugin->fsize = ((struct CompressedFileSource *)plugin->pass_cfs)->uncompressed_size;
2136 plugin->shm_pos = pos - plugin->fpos; 2487 plugin->shm_pos = pos - plugin->fpos;
2137 } 2488 }
2138 return 0; 2489 return 0;
2139 } 2490 }
2140} 2491}
2141 2492
2493/**
2494 * Moves current absolute buffer position to @pos in @whence mode.
2495 * Will move logical position withouth shifting the buffer, if possible.
2496 * Will not move beyond the end of file.
2497 *
2498 * @param plugin plugin context
2499 * @param pos position to move to
2500 * @param whence seek mode (SEEK_CUR, SEEK_SET, SEEK_END)
2501 * @return new absolute position, -1 on error
2502 */
2142int64_t 2503int64_t
2143pl_seek (struct EXTRACTOR_PluginList *plugin, int64_t pos, int whence) 2504pl_seek (struct EXTRACTOR_PluginList *plugin, int64_t pos, int whence)
2144{ 2505{
@@ -2203,6 +2564,17 @@ pl_get_pos (struct EXTRACTOR_PluginList *plugin)
2203 return plugin->fpos + plugin->shm_pos; 2564 return plugin->fpos + plugin->shm_pos;
2204} 2565}
2205 2566
2567/**
2568 * Fills @data with a pointer to the data buffer.
2569 * Equivalent to read(), except you don't have to allocate and free
2570 * a buffer, since the data is already in memory.
2571 * Will move the buffer, if necessary
2572 *
2573 * @param plugin plugin context
2574 * @param data location to store data pointer
2575 * @param count number of bytes to read
2576 * @return number of bytes (<= count) avalable in @data, -1 on error
2577 */
2206int64_t 2578int64_t
2207pl_read (struct EXTRACTOR_PluginList *plugin, unsigned char **data, size_t count) 2579pl_read (struct EXTRACTOR_PluginList *plugin, unsigned char **data, size_t count)
2208{ 2580{
@@ -2226,6 +2598,17 @@ pl_read (struct EXTRACTOR_PluginList *plugin, unsigned char **data, size_t count
2226 } 2598 }
2227} 2599}
2228 2600
2601/**
2602 * Transmits information about updated shm to plugin.
2603 * For OPMODE_DECOMPRESS only.
2604 *
2605 * @param plugin plugin context
2606 * @param position current absolute position in uncompressed stream
2607 * @param map_size number of bytes that are available in shm
2608 * @param fsize total size of the uncompressed stream (might be -1)
2609 * @param operation_mode mode of operation
2610 * @return 0 on success, 1 on error
2611 */
2229static int 2612static int
2230give_shm_to_plugin (struct EXTRACTOR_PluginList *plugin, int64_t position, size_t map_size, int64_t fsize, uint8_t operation_mode) 2613give_shm_to_plugin (struct EXTRACTOR_PluginList *plugin, int64_t position, size_t map_size, int64_t fsize, uint8_t operation_mode)
2231{ 2614{
@@ -2272,6 +2655,14 @@ give_shm_to_plugin (struct EXTRACTOR_PluginList *plugin, int64_t position, size_
2272 } 2655 }
2273} 2656}
2274 2657
2658/**
2659 * Calls _extract_method of in-process plugin.
2660 *
2661 * @param plugin plugin context
2662 * @param shm_ptr pointer to the data buffer
2663 * @param proc metadata callback
2664 * @param proc_cls callback cls
2665 */
2275static void 2666static void
2276ask_in_process_plugin (struct EXTRACTOR_PluginList *plugin, void *shm_ptr, EXTRACTOR_MetaDataProcessor proc, void *proc_cls) 2667ask_in_process_plugin (struct EXTRACTOR_PluginList *plugin, void *shm_ptr, EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
2277{ 2668{
@@ -2297,6 +2688,14 @@ ask_in_process_plugin (struct EXTRACTOR_PluginList *plugin, void *shm_ptr, EXTRA
2297} 2688}
2298 2689
2299#if !WINDOWS 2690#if !WINDOWS
2691/**
2692 * Receive @size bytes from plugin, store them in @buf
2693 *
2694 * @param plugin plugin context
2695 * @param buf buffer to fill
2696 * @param size number of bytes to read
2697 * @return number of bytes read, 0 on EOS, < 0 on error
2698 */
2300int 2699int
2301plugin_read (struct EXTRACTOR_PluginList *plugin, unsigned char *buf, size_t size) 2700plugin_read (struct EXTRACTOR_PluginList *plugin, unsigned char *buf, size_t size)
2302{ 2701{
@@ -2312,6 +2711,14 @@ plugin_read (struct EXTRACTOR_PluginList *plugin, unsigned char *buf, size_t siz
2312 return read_count; 2711 return read_count;
2313} 2712}
2314#else 2713#else
2714/**
2715 * Receive @size bytes from plugin, store them in @buf
2716 *
2717 * @param plugin plugin context
2718 * @param buf buffer to fill
2719 * @param size number of bytes to read
2720 * @return number of bytes read, 0 on EOS, < 0 on error
2721 */
2315int 2722int
2316plugin_read (struct EXTRACTOR_PluginList *plugin, unsigned char *buf, size_t size) 2723plugin_read (struct EXTRACTOR_PluginList *plugin, unsigned char *buf, size_t size)
2317{ 2724{
@@ -2329,6 +2736,14 @@ plugin_read (struct EXTRACTOR_PluginList *plugin, unsigned char *buf, size_t siz
2329} 2736}
2330#endif 2737#endif
2331 2738
2739/**
2740 * Receive a reply from plugin (seek request, metadata and done message)
2741 *
2742 * @param plugin plugin context
2743 * @param proc metadata callback
2744 * @param proc_cls callback cls
2745 * @return 0 on success, -1 on error
2746 */
2332static int 2747static int
2333receive_reply (struct EXTRACTOR_PluginList *plugin, EXTRACTOR_MetaDataProcessor proc, void *proc_cls) 2748receive_reply (struct EXTRACTOR_PluginList *plugin, EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
2334{ 2749{
@@ -2397,6 +2812,19 @@ receive_reply (struct EXTRACTOR_PluginList *plugin, EXTRACTOR_MetaDataProcessor
2397} 2812}
2398 2813
2399#if !WINDOWS 2814#if !WINDOWS
2815/**
2816 * Wait for one of the plugins to reply.
2817 * Selects on plugin output pipes, runs receive_reply()
2818 * on each activated pipe until it gets a seek request
2819 * or a done message. Called repeatedly by the user until all pipes are dry or
2820 * broken.
2821 *
2822 * @param plugins to select upon
2823 * @param proc metadata callback
2824 * @param proc_cls callback cls
2825 * @return number of dry/broken pipes since last call, -1 on error or if no
2826 * plugins reply in 10 seconds.
2827 */
2400static int 2828static int
2401wait_for_reply (struct EXTRACTOR_PluginList *plugins, EXTRACTOR_MetaDataProcessor proc, void *proc_cls) 2829wait_for_reply (struct EXTRACTOR_PluginList *plugins, EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
2402{ 2830{
@@ -2464,6 +2892,21 @@ wait_for_reply (struct EXTRACTOR_PluginList *plugins, EXTRACTOR_MetaDataProcesso
2464 return result; 2892 return result;
2465} 2893}
2466#else 2894#else
2895/**
2896 * Wait for one of the plugins to reply.
2897 * Selects on plugin output pipes, runs receive_reply()
2898 * on each activated pipe until it gets a seek request
2899 * or a done message. Called repeatedly by the user until all pipes are dry or
2900 * broken.
2901 * This W32 version of wait_for_reply() can't select on more than 64 plugins
2902 * at once (returns -1 if there are more than 64 plugins).
2903 *
2904 * @param plugins to select upon
2905 * @param proc metadata callback
2906 * @param proc_cls callback cls
2907 * @return number of dry/broken pipes since last call, -1 on error or if no
2908 * plugins reply in 10 seconds.
2909 */
2467static int 2910static int
2468wait_for_reply (struct EXTRACTOR_PluginList *plugins, EXTRACTOR_MetaDataProcessor proc, void *proc_cls) 2911wait_for_reply (struct EXTRACTOR_PluginList *plugins, EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
2469{ 2912{
@@ -2563,6 +3006,16 @@ wait_for_reply (struct EXTRACTOR_PluginList *plugins, EXTRACTOR_MetaDataProcesso
2563 3006
2564#endif 3007#endif
2565 3008
3009/**
3010 * Checks the seek requests that plugins made, finds the one with
3011 * smallest offset from the beginning of the stream, and satisfies it.
3012 *
3013 * @param plugins to check
3014 * @param cfs compressed file source to seek in
3015 * @param current_position current stream position
3016 * @param map_size number of bytes currently buffered
3017 * @return new stream position, -1 on error
3018 */
2566static int64_t 3019static int64_t
2567seek_to_new_position (struct EXTRACTOR_PluginList *plugins, struct CompressedFileSource *cfs, int64_t current_position, int64_t map_size) 3020seek_to_new_position (struct EXTRACTOR_PluginList *plugins, struct CompressedFileSource *cfs, int64_t current_position, int64_t map_size)
2568{ 3021{
@@ -2617,9 +3070,9 @@ load_in_process_plugin (struct EXTRACTOR_PluginList *plugin)
2617 * @param plugins the list of plugins to use 3070 * @param plugins the list of plugins to use
2618 * @param data data to process, or NULL if fds is not -1 3071 * @param data data to process, or NULL if fds is not -1
2619 * @param fd file to read data from, or -1 if data is not NULL 3072 * @param fd file to read data from, or -1 if data is not NULL
2620 * @param fsize size of data or size of file 3073 * @param filename name of the file to which fd belongs
2621 * @param buffer a buffer with data alteady read from the file (if fd != -1) 3074 * @param cfs compressed file source for compressed stream (may be NULL)
2622 * @param buffer_size size of buffer 3075 * @param fsize size of the file or data buffer
2623 * @param proc function to call for each meta data item found 3076 * @param proc function to call for each meta data item found
2624 * @param proc_cls cls argument to proc 3077 * @param proc_cls cls argument to proc
2625 */ 3078 */
@@ -2700,21 +3153,21 @@ do_extract (struct EXTRACTOR_PluginList *plugins, const char *data, int fd, cons
2700 if (operation_mode == OPMODE_DECOMPRESS) 3153 if (operation_mode == OPMODE_DECOMPRESS)
2701 { 3154 {
2702 for (ppos = plugins; NULL != ppos; ppos = ppos->next) 3155 for (ppos = plugins; NULL != ppos; ppos = ppos->next)
2703 init_plugin_state (ppos, operation_mode, -1, cfs->shm_name, -1); 3156 init_plugin_state (ppos, operation_mode, cfs->shm_name, -1);
2704 } 3157 }
2705 else if (operation_mode == OPMODE_FILE) 3158 else if (operation_mode == OPMODE_FILE)
2706 { 3159 {
2707 for (ppos = plugins; NULL != ppos; ppos = ppos->next) 3160 for (ppos = plugins; NULL != ppos; ppos = ppos->next)
2708#if !WINDOWS 3161#if !WINDOWS
2709 init_plugin_state (ppos, operation_mode, fd, filename, fsize); 3162 init_plugin_state (ppos, operation_mode, filename, fsize);
2710#else 3163#else
2711 init_plugin_state (ppos, operation_mode, fd, shm_name, fsize); 3164 init_plugin_state (ppos, operation_mode, shm_name, fsize);
2712#endif 3165#endif
2713 } 3166 }
2714 else 3167 else
2715 { 3168 {
2716 for (ppos = plugins; NULL != ppos; ppos = ppos->next) 3169 for (ppos = plugins; NULL != ppos; ppos = ppos->next)
2717 init_plugin_state (ppos, operation_mode, -1, shm_name, fsize); 3170 init_plugin_state (ppos, operation_mode, shm_name, fsize);
2718 } 3171 }
2719 3172
2720 if (operation_mode == OPMODE_FILE || operation_mode == OPMODE_MEMORY) 3173 if (operation_mode == OPMODE_FILE || operation_mode == OPMODE_MEMORY)
@@ -2749,7 +3202,7 @@ do_extract (struct EXTRACTOR_PluginList *plugins, const char *data, int fd, cons
2749 { 3202 {
2750 /* Pass this way. we'll need it to call cfs functions later on */ 3203 /* Pass this way. we'll need it to call cfs functions later on */
2751 /* This is a special case */ 3204 /* This is a special case */
2752 ppos->state = cfs; 3205 ppos->pass_cfs = cfs;
2753 ask_in_process_plugin (ppos, cfs->shm_ptr, proc, proc_cls); 3206 ask_in_process_plugin (ppos, cfs->shm_ptr, proc, proc_cls);
2754 } 3207 }
2755 while (plugins_not_ready > 0 && !kill_plugins) 3208 while (plugins_not_ready > 0 && !kill_plugins)
diff --git a/src/main/extractor_plugins.h b/src/main/extractor_plugins.h
index bb9baec..fc53fd4 100644
--- a/src/main/extractor_plugins.h
+++ b/src/main/extractor_plugins.h
@@ -101,6 +101,10 @@ struct EXTRACTOR_PluginList
101#else 101#else
102 HANDLE cpipe_in; 102 HANDLE cpipe_in;
103#endif 103#endif
104
105 /**
106 * Pipe used by plugin to read from its parent.
107 */
104 int pipe_in; 108 int pipe_in;
105 109
106 /** 110 /**
@@ -110,36 +114,71 @@ struct EXTRACTOR_PluginList
110 int64_t seek_request; 114 int64_t seek_request;
111 115
112#if !WINDOWS 116#if !WINDOWS
117 /**
118 * ID of the shm object
119 */
113 int shm_id; 120 int shm_id;
114#else 121#else
122 /**
123 * Handle of the shm object
124 */
115 HANDLE map_handle; 125 HANDLE map_handle;
116#endif 126#endif
117 127
118 void *state; 128 /**
129 * Used to pass cfs pointer to in-process plugin in OPMODE_DECOMPRESS
130 */
131 void *pass_cfs;
119 132
133 /**
134 * Uncompressed stream size. Initially -1, until file is fully decompressed
135 * (for sources that are not compressed it is set from the start).
136 */
120 int64_t fsize; 137 int64_t fsize;
121 138
139 /**
140 * Absolute position within the stream
141 */
122 int64_t fpos; 142 int64_t fpos;
123 143
144 /**
145 * Pointer to the shared memory segment
146 */
124 unsigned char *shm_ptr; 147 unsigned char *shm_ptr;
125 148
149 /**
150 * Number of bytes in the segment
151 */
126 int64_t map_size; 152 int64_t map_size;
127 153
154 /**
155 * Position within the segment
156 */
128 int64_t shm_pos; 157 int64_t shm_pos;
129 158
159#if !WINDOWS
130 /** 160 /**
131 * Pipe used to read information about extracted meta data from 161 * Pipe used to read information about extracted meta data from
132 * the plugin child process. -1 if not initialized. 162 * the plugin child process. -1 if not initialized.
133 */ 163 */
134#if !WINDOWS
135 int cpipe_out; 164 int cpipe_out;
136#else 165#else
166 /**
167 * Pipe used to read information about extracted meta data from
168 * the plugin child process. -1 if not initialized.
169 */
137 HANDLE cpipe_out; 170 HANDLE cpipe_out;
138#endif 171#endif
139 172
140#if !WINDOWS 173#if !WINDOWS
174 /**
175 * Page size. Mmap offset is a multiple of this number.
176 */
141 long allocation_granularity; 177 long allocation_granularity;
142#else 178#else
179 /**
180 * Page size. Mmap offset is a multiple of this number.
181 */
143 DWORD allocation_granularity; 182 DWORD allocation_granularity;
144#endif 183#endif
145 184
@@ -160,7 +199,15 @@ struct EXTRACTOR_PluginList
160 unsigned char *ov_write_buffer; 199 unsigned char *ov_write_buffer;
161#endif 200#endif
162 201
202 /**
203 * Mode of operation. One of the OPMODE_* constants
204 */
163 uint8_t operation_mode; 205 uint8_t operation_mode;
206
207 /**
208 * 1 if plugin is currently in a recursive process_requests() call,
209 * 0 otherwise
210 */
164 int waiting_for_update; 211 int waiting_for_update;
165}; 212};
166 213