diff options
author | Christian Grothoff <christian@grothoff.org> | 2012-04-12 16:44:30 +0000 |
---|---|---|
committer | Christian Grothoff <christian@grothoff.org> | 2012-04-12 16:44:30 +0000 |
commit | ec05c1c8a5304f221b6fcea24c67c478893e6c4a (patch) | |
tree | 8ae643dac478025e7d85b4a6a5f913447d3e57fc | |
parent | 3b99af939bfe580c31d02db6aed4aab92cb2643c (diff) | |
download | libextractor-ec05c1c8a5304f221b6fcea24c67c478893e6c4a.tar.gz libextractor-ec05c1c8a5304f221b6fcea24c67c478893e6c4a.zip |
-LRN: minor cleanup, documentation
-rw-r--r-- | src/main/extractor.c | 589 | ||||
-rw-r--r-- | src/main/extractor_plugins.h | 51 |
2 files changed, 570 insertions, 70 deletions
diff --git a/src/main/extractor.c b/src/main/extractor.c index fc6ca12..cc39e6d 100644 --- a/src/main/extractor.c +++ b/src/main/extractor.c | |||
@@ -49,16 +49,13 @@ | |||
49 | #define MAX_READ 32 * 1024 * 1024 | 49 | #define MAX_READ 32 * 1024 * 1024 |
50 | 50 | ||
51 | /** | 51 | /** |
52 | * How many bytes do we actually try to decompress? (from the beginning | ||
53 | * of the file). Limit to 16 MB. | ||
54 | */ | ||
55 | #define MAX_DECOMPRESS 16 * 1024 * 1024 | ||
56 | |||
57 | /** | ||
58 | * Maximum length of a Mime-Type string. | 52 | * Maximum length of a Mime-Type string. |
59 | */ | 53 | */ |
60 | #define MAX_MIME_LEN 256 | 54 | #define MAX_MIME_LEN 256 |
61 | 55 | ||
56 | /** | ||
57 | * Maximum length of a shared memory object name | ||
58 | */ | ||
62 | #define MAX_SHM_NAME 255 | 59 | #define MAX_SHM_NAME 255 |
63 | 60 | ||
64 | /** | 61 | /** |
@@ -67,15 +64,62 @@ | |||
67 | */ | 64 | */ |
68 | #define DEBUG 1 | 65 | #define DEBUG 1 |
69 | 66 | ||
67 | /** | ||
68 | * Sent from LE to a plugin to initialize it (open shm, | ||
69 | * reset position counters etc). | ||
70 | */ | ||
70 | #define MESSAGE_INIT_STATE 0x01 | 71 | #define MESSAGE_INIT_STATE 0x01 |
72 | |||
73 | /** | ||
74 | * Sent from LE to a plugin to tell it that shm contents | ||
75 | * were updated. Only used for OPMODE_COMPRESS. | ||
76 | */ | ||
71 | #define MESSAGE_UPDATED_SHM 0x02 | 77 | #define MESSAGE_UPDATED_SHM 0x02 |
78 | |||
79 | /** | ||
80 | * Sent from plugin to LE to tell LE that plugin is done | ||
81 | * analyzing current file and will send no more data. | ||
82 | */ | ||
72 | #define MESSAGE_DONE 0x03 | 83 | #define MESSAGE_DONE 0x03 |
84 | |||
85 | /** | ||
86 | * Sent from plugin to LE to tell LE that plugin needs | ||
87 | * to read a different part of the source file. | ||
88 | */ | ||
73 | #define MESSAGE_SEEK 0x04 | 89 | #define MESSAGE_SEEK 0x04 |
90 | |||
91 | /** | ||
92 | * Sent from plugin to LE to tell LE about metadata discovered. | ||
93 | */ | ||
74 | #define MESSAGE_META 0x05 | 94 | #define MESSAGE_META 0x05 |
95 | |||
96 | /** | ||
97 | * Sent from LE to plugin to make plugin discard its state (unmap | ||
98 | * and close shm). | ||
99 | */ | ||
75 | #define MESSAGE_DISCARD_STATE 0x06 | 100 | #define MESSAGE_DISCARD_STATE 0x06 |
76 | 101 | ||
102 | /** | ||
103 | * Client provided a memory buffer, analyze it. Creates a shm, copies | ||
104 | * buffer contents into it. Does not support seeking (all data comes | ||
105 | * in one [big] chunk. | ||
106 | */ | ||
77 | #define OPMODE_MEMORY 1 | 107 | #define OPMODE_MEMORY 1 |
108 | |||
109 | /** | ||
110 | * Client provided a memory buffer or a file, which contains compressed data. | ||
111 | * Creates a shm of limited size and repeatedly fills it with uncompressed | ||
112 | * data. Never skips data (has to uncompress every byte, discards unwanted bytes), | ||
113 | * can't efficiently seek backwards. Uses MESSAGE_UPDATED_SHM and MESSAGE_SEEK. | ||
114 | */ | ||
78 | #define OPMODE_DECOMPRESS 2 | 115 | #define OPMODE_DECOMPRESS 2 |
116 | |||
117 | /** | ||
118 | * Client provided a filename. Creates a file-backed shm (on W32) or just | ||
119 | * communicates the file name to each plugin, and plugin opens its own file | ||
120 | * descriptor of the file (POSIX). Each plugin maps different parts of the | ||
121 | * file into its memory independently. | ||
122 | */ | ||
79 | #define OPMODE_FILE 3 | 123 | #define OPMODE_FILE 3 |
80 | 124 | ||
81 | /** | 125 | /** |
@@ -92,7 +136,16 @@ struct IpcHeader | |||
92 | }; | 136 | }; |
93 | 137 | ||
94 | #if !WINDOWS | 138 | #if !WINDOWS |
95 | int | 139 | /** |
140 | * Opens a shared memory object (for later mmapping). | ||
141 | * This is POSIX variant of the the plugin_open_* function. Shm is always memory-backed. | ||
142 | * Closes a shm is already opened, closes it before opening a new one. | ||
143 | * | ||
144 | * @param plugin plugin context | ||
145 | * @param shm_name name of the shm. | ||
146 | * @return shm id (-1 on error). That is, the result of shm_open() syscall. | ||
147 | */ | ||
148 | static int | ||
96 | plugin_open_shm (struct EXTRACTOR_PluginList *plugin, const char *shm_name) | 149 | plugin_open_shm (struct EXTRACTOR_PluginList *plugin, const char *shm_name) |
97 | { | 150 | { |
98 | if (plugin->shm_id != -1) | 151 | if (plugin->shm_id != -1) |
@@ -100,7 +153,17 @@ plugin_open_shm (struct EXTRACTOR_PluginList *plugin, const char *shm_name) | |||
100 | plugin->shm_id = shm_open (shm_name, O_RDONLY, 0); | 153 | plugin->shm_id = shm_open (shm_name, O_RDONLY, 0); |
101 | return plugin->shm_id; | 154 | return plugin->shm_id; |
102 | } | 155 | } |
103 | int | 156 | |
157 | /** | ||
158 | * Opens a file (for later mmapping). | ||
159 | * This is POSIX variant of the plugin_open_* function. | ||
160 | * Closes a file is already opened, closes it before opening a new one. | ||
161 | * | ||
162 | * @param plugin plugin context | ||
163 | * @param shm_name name of the file to open. | ||
164 | * @return file id (-1 on error). That is, the result of open() syscall. | ||
165 | */ | ||
166 | static int | ||
104 | plugin_open_file (struct EXTRACTOR_PluginList *plugin, const char *shm_name) | 167 | plugin_open_file (struct EXTRACTOR_PluginList *plugin, const char *shm_name) |
105 | { | 168 | { |
106 | if (plugin->shm_id != -1) | 169 | if (plugin->shm_id != -1) |
@@ -109,6 +172,17 @@ plugin_open_file (struct EXTRACTOR_PluginList *plugin, const char *shm_name) | |||
109 | return plugin->shm_id; | 172 | return plugin->shm_id; |
110 | } | 173 | } |
111 | #else | 174 | #else |
175 | /** | ||
176 | * Opens a shared memory object (for later mmapping). | ||
177 | * This is W32 variant of the plugin_open_* function. | ||
178 | * Opened shm might be memory-backed or file-backed (depending on how | ||
179 | * it was created). shm_name is never a file name, unlike POSIX. | ||
180 | * Closes a shm is already opened, closes it before opening a new one. | ||
181 | * | ||
182 | * @param plugin plugin context | ||
183 | * @param shm_name name of the shared memory object. | ||
184 | * @return memory-mapped file handle (NULL on error). That is, the result of OpenFileMapping() syscall. | ||
185 | */ | ||
112 | HANDLE | 186 | HANDLE |
113 | plugin_open_shm (struct EXTRACTOR_PluginList *plugin, const char *shm_name) | 187 | plugin_open_shm (struct EXTRACTOR_PluginList *plugin, const char *shm_name) |
114 | { | 188 | { |
@@ -117,6 +191,9 @@ plugin_open_shm (struct EXTRACTOR_PluginList *plugin, const char *shm_name) | |||
117 | plugin->map_handle = OpenFileMapping (FILE_MAP_READ, FALSE, shm_name); | 191 | plugin->map_handle = OpenFileMapping (FILE_MAP_READ, FALSE, shm_name); |
118 | return plugin->map_handle; | 192 | return plugin->map_handle; |
119 | } | 193 | } |
194 | /** | ||
195 | * Another name for plugin_open_shm(). | ||
196 | */ | ||
120 | HANDLE | 197 | HANDLE |
121 | plugin_open_file (struct EXTRACTOR_PluginList *plugin, const char *shm_name) | 198 | plugin_open_file (struct EXTRACTOR_PluginList *plugin, const char *shm_name) |
122 | { | 199 | { |
@@ -124,6 +201,16 @@ plugin_open_file (struct EXTRACTOR_PluginList *plugin, const char *shm_name) | |||
124 | } | 201 | } |
125 | #endif | 202 | #endif |
126 | 203 | ||
204 | /** | ||
205 | * Writes @size bytes from @buf into @fd, returns only when | ||
206 | * writing is not possible, or when all @size bytes were written | ||
207 | * (never does partial writes). | ||
208 | * | ||
209 | * @param fd fd to write into | ||
210 | * @param buf buffer to read from | ||
211 | * @param size number of bytes to write | ||
212 | * @return number of bytes written (that is - @size), or -1 on error | ||
213 | */ | ||
127 | static int | 214 | static int |
128 | write_all (int fd, | 215 | write_all (int fd, |
129 | const void *buf, | 216 | const void *buf, |
@@ -194,7 +281,20 @@ transmit_reply (void *cls, | |||
194 | return 0; | 281 | return 0; |
195 | } | 282 | } |
196 | 283 | ||
197 | /* init the read/seek wrappers */ | 284 | /** |
285 | * Initializes an extracting session for a plugin. | ||
286 | * opens the file/shm (only in OPMODE_FILE) | ||
287 | * sets shm_ptr to NULL (unmaps it, if it was mapped) | ||
288 | * sets position to 0 | ||
289 | * initializes file size to @fsize (may be -1) | ||
290 | * sets seek request to 0 | ||
291 | * | ||
292 | * @param plugin plugin context | ||
293 | * @param operation_mode the mode of operation (OPMODE_*) | ||
294 | * @param fsize size of the source file (may be -1) | ||
295 | * @param shm_name name of the shm or file to open | ||
296 | * @return 0 on success, non-0 on error. | ||
297 | */ | ||
198 | static int | 298 | static int |
199 | init_state_method (struct EXTRACTOR_PluginList *plugin, uint8_t operation_mode, int64_t fsize, const char *shm_name) | 299 | init_state_method (struct EXTRACTOR_PluginList *plugin, uint8_t operation_mode, int64_t fsize, const char *shm_name) |
200 | { | 300 | { |
@@ -223,6 +323,14 @@ init_state_method (struct EXTRACTOR_PluginList *plugin, uint8_t operation_mode, | |||
223 | return 0; | 323 | return 0; |
224 | } | 324 | } |
225 | 325 | ||
326 | /** | ||
327 | * Deinitializes an extracting session for a plugin. | ||
328 | * unmaps shm_ptr (if was mapped) | ||
329 | * closes file/shm (if it was opened) | ||
330 | * sets map size and shm_ptr to NULL. | ||
331 | * | ||
332 | * @param plugin plugin context | ||
333 | */ | ||
226 | static void | 334 | static void |
227 | discard_state_method (struct EXTRACTOR_PluginList *plugin) | 335 | discard_state_method (struct EXTRACTOR_PluginList *plugin) |
228 | { | 336 | { |
@@ -243,6 +351,15 @@ discard_state_method (struct EXTRACTOR_PluginList *plugin) | |||
243 | plugin->shm_ptr = NULL; | 351 | plugin->shm_ptr = NULL; |
244 | } | 352 | } |
245 | 353 | ||
354 | /** | ||
355 | * Main loop function for plugins. | ||
356 | * Reads a message from the plugin input pipe and acts on it. | ||
357 | * Can be called recursively (once) in OPMODE_DECOMPRESS. | ||
358 | * plugin->waiting_for_update == 1 indicates the recursive call. | ||
359 | * | ||
360 | * @param plugin plugin context | ||
361 | * @return 0, always | ||
362 | */ | ||
246 | static int | 363 | static int |
247 | process_requests (struct EXTRACTOR_PluginList *plugin) | 364 | process_requests (struct EXTRACTOR_PluginList *plugin) |
248 | { | 365 | { |
@@ -265,6 +382,10 @@ process_requests (struct EXTRACTOR_PluginList *plugin) | |||
265 | in = plugin->pipe_in; | 382 | in = plugin->pipe_in; |
266 | out = plugin->cpipe_out; | 383 | out = plugin->cpipe_out; |
267 | 384 | ||
385 | /* The point of recursing into this function is to request | ||
386 | * a seek from LE server and wait for a reply. This snipper | ||
387 | * requests a seek. | ||
388 | */ | ||
268 | if (plugin->waiting_for_update == 1) | 389 | if (plugin->waiting_for_update == 1) |
269 | { | 390 | { |
270 | unsigned char seek_byte = MESSAGE_SEEK; | 391 | unsigned char seek_byte = MESSAGE_SEEK; |
@@ -308,6 +429,7 @@ process_requests (struct EXTRACTOR_PluginList *plugin) | |||
308 | do_break = 1; | 429 | do_break = 1; |
309 | break; | 430 | break; |
310 | } | 431 | } |
432 | /* Fsize may be -1 only in decompression mode */ | ||
311 | if (plugin->operation_mode != OPMODE_DECOMPRESS && plugin->fsize <= 0) | 433 | if (plugin->operation_mode != OPMODE_DECOMPRESS && plugin->fsize <= 0) |
312 | { | 434 | { |
313 | do_break = 1; | 435 | do_break = 1; |
@@ -329,6 +451,9 @@ process_requests (struct EXTRACTOR_PluginList *plugin) | |||
329 | } | 451 | } |
330 | shm_name[shm_name_len - 1] = '\0'; | 452 | shm_name[shm_name_len - 1] = '\0'; |
331 | do_break = init_state_method (plugin, plugin->operation_mode, plugin->fsize, shm_name); | 453 | do_break = init_state_method (plugin, plugin->operation_mode, plugin->fsize, shm_name); |
454 | /* in OPMODE_MEMORY and OPMODE_FILE we can start extracting right away, | ||
455 | * there won't be UPDATED_SHM message, and we don't need it | ||
456 | */ | ||
332 | if (!do_break && (plugin->operation_mode == OPMODE_MEMORY || | 457 | if (!do_break && (plugin->operation_mode == OPMODE_MEMORY || |
333 | plugin->operation_mode == OPMODE_FILE)) | 458 | plugin->operation_mode == OPMODE_FILE)) |
334 | { | 459 | { |
@@ -369,6 +494,7 @@ process_requests (struct EXTRACTOR_PluginList *plugin) | |||
369 | break; | 494 | break; |
370 | } | 495 | } |
371 | /* FIXME: also check mapped region size (lseek for *nix, VirtualQuery for W32) */ | 496 | /* FIXME: also check mapped region size (lseek for *nix, VirtualQuery for W32) */ |
497 | /* Re-map the shm */ | ||
372 | #if !WINDOWS | 498 | #if !WINDOWS |
373 | if ((-1 == plugin->shm_id) || | 499 | if ((-1 == plugin->shm_id) || |
374 | (NULL == (plugin->shm_ptr = mmap (NULL, plugin->map_size, PROT_READ, MAP_SHARED, plugin->shm_id, 0))) || | 500 | (NULL == (plugin->shm_ptr = mmap (NULL, plugin->map_size, PROT_READ, MAP_SHARED, plugin->shm_id, 0))) || |
@@ -387,11 +513,16 @@ process_requests (struct EXTRACTOR_PluginList *plugin) | |||
387 | #endif | 513 | #endif |
388 | if (plugin->waiting_for_update == 1) | 514 | if (plugin->waiting_for_update == 1) |
389 | { | 515 | { |
516 | /* We were only waiting for this one message */ | ||
390 | do_break = 1; | 517 | do_break = 1; |
391 | plugin->waiting_for_update = 2; | 518 | plugin->waiting_for_update = 2; |
392 | break; | 519 | break; |
393 | } | 520 | } |
521 | /* Run extractor on mapped region (recursive call doesn't reach this | ||
522 | * point and breaks out earlier. | ||
523 | */ | ||
394 | extract_reply = plugin->extract_method (plugin, transmit_reply, &out); | 524 | extract_reply = plugin->extract_method (plugin, transmit_reply, &out); |
525 | /* Unmap the shm */ | ||
395 | #if !WINDOWS | 526 | #if !WINDOWS |
396 | if ((plugin->shm_ptr != NULL) && | 527 | if ((plugin->shm_ptr != NULL) && |
397 | (plugin->shm_ptr != (void*) -1) ) | 528 | (plugin->shm_ptr != (void*) -1) ) |
@@ -403,6 +534,7 @@ process_requests (struct EXTRACTOR_PluginList *plugin) | |||
403 | plugin->shm_ptr = NULL; | 534 | plugin->shm_ptr = NULL; |
404 | if (extract_reply == 1) | 535 | if (extract_reply == 1) |
405 | { | 536 | { |
537 | /* Tell LE that we're done */ | ||
406 | unsigned char done_byte = MESSAGE_DONE; | 538 | unsigned char done_byte = MESSAGE_DONE; |
407 | if (write (out, &done_byte, 1) != 1) | 539 | if (write (out, &done_byte, 1) != 1) |
408 | { | 540 | { |
@@ -424,6 +556,7 @@ process_requests (struct EXTRACTOR_PluginList *plugin) | |||
424 | } | 556 | } |
425 | else | 557 | else |
426 | { | 558 | { |
559 | /* Tell LE that we're not done, and we need to seek */ | ||
427 | unsigned char seek_byte = MESSAGE_SEEK; | 560 | unsigned char seek_byte = MESSAGE_SEEK; |
428 | if (write (out, &seek_byte, 1) != 1) | 561 | if (write (out, &seek_byte, 1) != 1) |
429 | { | 562 | { |
@@ -439,6 +572,7 @@ process_requests (struct EXTRACTOR_PluginList *plugin) | |||
439 | } | 572 | } |
440 | else | 573 | else |
441 | { | 574 | { |
575 | /* This is mostly to safely skip unrelated messages */ | ||
442 | int64_t t; | 576 | int64_t t; |
443 | size_t t2; | 577 | size_t t2; |
444 | read_result2 = read (in, &t, sizeof (int64_t)); | 578 | read_result2 = read (in, &t, sizeof (int64_t)); |
@@ -452,9 +586,8 @@ process_requests (struct EXTRACTOR_PluginList *plugin) | |||
452 | } | 586 | } |
453 | 587 | ||
454 | /** | 588 | /** |
455 | * 'main' function of the child process. Reads shm-filenames from | 589 | * 'main' function of the child process. Loads the plugin, |
456 | * 'in' (line-by-line) and writes meta data blocks to 'out'. The meta | 590 | * sets up its in and out pipes, then runs the request serving function. |
457 | * data stream is terminated by an empty entry. | ||
458 | * | 591 | * |
459 | * @param plugin extractor plugin to use | 592 | * @param plugin extractor plugin to use |
460 | * @param in stream to read from | 593 | * @param in stream to read from |
@@ -486,6 +619,7 @@ plugin_main (struct EXTRACTOR_PluginList *plugin, int in, int out) | |||
486 | close (1); | 619 | close (1); |
487 | 620 | ||
488 | plugin->pipe_in = in; | 621 | plugin->pipe_in = in; |
622 | /* Compiler will complain, and it's right. This is a kind of hack...*/ | ||
489 | plugin->cpipe_out = out; | 623 | plugin->cpipe_out = out; |
490 | process_requests (plugin); | 624 | process_requests (plugin); |
491 | 625 | ||
@@ -606,11 +740,11 @@ stop_process (struct EXTRACTOR_PluginList *plugin) | |||
606 | static int | 740 | static int |
607 | write_plugin_data (const struct EXTRACTOR_PluginList *plugin) | 741 | write_plugin_data (const struct EXTRACTOR_PluginList *plugin) |
608 | { | 742 | { |
609 | /* only does anything on Windows */ | 743 | /* This function is only necessary on W32. On POSIX |
744 | * systems plugin inherits its own data from the parent */ | ||
610 | return 0; | 745 | return 0; |
611 | } | 746 | } |
612 | 747 | ||
613 | #define plugin_print(plug, fmt, ...) fprintf (plug->cpipe_in, fmt, ...) | ||
614 | #define plugin_write(plug, buf, size) write_all (fileno (plug->cpipe_in), buf, size) | 748 | #define plugin_write(plug, buf, size) write_all (fileno (plug->cpipe_in), buf, size) |
615 | 749 | ||
616 | #else /* WINDOWS */ | 750 | #else /* WINDOWS */ |
@@ -721,6 +855,28 @@ create_selectable_pipe (PHANDLE read_pipe_ptr, PHANDLE write_pipe_ptr, | |||
721 | return 0; | 855 | return 0; |
722 | } | 856 | } |
723 | 857 | ||
858 | /** | ||
859 | * Writes @size bytes from @buf to @h, using @ov for | ||
860 | * overlapped i/o. Deallocates @old_buf and sets it to NULL, | ||
861 | * if necessary. | ||
862 | * Writes asynchronously, but sequentially (only one writing | ||
863 | * operation may be active at any given moment, but it will | ||
864 | * be done in background). Thus it is intended to be used | ||
865 | * for writing a few big chunks rather than a lot of small pieces. | ||
866 | * | ||
867 | * The extravagant interface is mainly because this function | ||
868 | * does not use a separate struct to group together overlapped | ||
869 | * structure, buffer pointer and the handle. | ||
870 | * | ||
871 | * @param h pipe handle | ||
872 | * @param ov overlapped structure pointer | ||
873 | * @param buf buffer to read from. Will be copied internally | ||
874 | * @param size number of bytes to write | ||
875 | * @param old_buf pointer where a copy of previous buffer is stored, | ||
876 | * and where a copy of @buf will be stored. | ||
877 | * | ||
878 | * @return number of bytes written, -1 on error | ||
879 | */ | ||
724 | static int | 880 | static int |
725 | write_to_pipe (HANDLE h, OVERLAPPED *ov, unsigned char *buf, size_t size, unsigned char **old_buf) | 881 | write_to_pipe (HANDLE h, OVERLAPPED *ov, unsigned char *buf, size_t size, unsigned char **old_buf) |
726 | { | 882 | { |
@@ -765,39 +921,17 @@ write_to_pipe (HANDLE h, OVERLAPPED *ov, unsigned char *buf, size_t size, unsign | |||
765 | return -1; | 921 | return -1; |
766 | } | 922 | } |
767 | 923 | ||
768 | static int | ||
769 | print_to_pipe (HANDLE h, OVERLAPPED *ov, unsigned char **buf, const char *fmt, ...) | ||
770 | { | ||
771 | va_list va; | ||
772 | va_list vacp; | ||
773 | size_t size; | ||
774 | char *print_buf; | ||
775 | int result; | ||
776 | |||
777 | va_start (va, fmt); | ||
778 | va_copy (vacp, va); | ||
779 | size = VSNPRINTF (NULL, 0, fmt, vacp) + 1; | ||
780 | va_end (vacp); | ||
781 | if (size <= 0) | ||
782 | { | ||
783 | va_end (va); | ||
784 | return size; | ||
785 | } | ||
786 | |||
787 | print_buf = malloc (size); | ||
788 | if (print_buf == NULL) | ||
789 | return -1; | ||
790 | VSNPRINTF (print_buf, size, fmt, va); | ||
791 | va_end (va); | ||
792 | |||
793 | result = write_to_pipe (h, ov, print_buf, size, buf); | ||
794 | free (buf); | ||
795 | return result; | ||
796 | } | ||
797 | |||
798 | #define plugin_print(plug, fmt, ...) print_to_pipe (plug->cpipe_in, &plug->ov_write, &plug->ov_write_buffer, fmt, ...) | ||
799 | #define plugin_write(plug, buf, size) write_to_pipe (plug->cpipe_in, &plug->ov_write, buf, size, &plug->ov_write_buffer) | 924 | #define plugin_write(plug, buf, size) write_to_pipe (plug->cpipe_in, &plug->ov_write, buf, size, &plug->ov_write_buffer) |
800 | 925 | ||
926 | /** | ||
927 | * Communicates plugin data (library name, options) to the plugin | ||
928 | * process. This is only necessary on W32, where this information | ||
929 | * is not inherited by the plugin, because it is not forked. | ||
930 | * | ||
931 | * @param plugin plugin context | ||
932 | * | ||
933 | * @return 0 on success, -1 on failure | ||
934 | */ | ||
801 | static int | 935 | static int |
802 | write_plugin_data (struct EXTRACTOR_PluginList *plugin) | 936 | write_plugin_data (struct EXTRACTOR_PluginList *plugin) |
803 | { | 937 | { |
@@ -864,6 +998,14 @@ write_plugin_data (struct EXTRACTOR_PluginList *plugin) | |||
864 | return 0; | 998 | return 0; |
865 | } | 999 | } |
866 | 1000 | ||
1001 | /** | ||
1002 | * Reads plugin data from the LE server process. | ||
1003 | * Also initializes allocation granularity (duh...). | ||
1004 | * | ||
1005 | * @param fd the pipe to read from | ||
1006 | * | ||
1007 | * @return newly allocated plugin context | ||
1008 | */ | ||
867 | static struct EXTRACTOR_PluginList * | 1009 | static struct EXTRACTOR_PluginList * |
868 | read_plugin_data (int fd) | 1010 | read_plugin_data (int fd) |
869 | { | 1011 | { |
@@ -995,6 +1137,10 @@ start_process (struct EXTRACTOR_PluginList *plugin) | |||
995 | return; | 1137 | return; |
996 | } | 1138 | } |
997 | 1139 | ||
1140 | /* TODO: write our own plugin-hosting executable? rundll32, for once, has smaller than usual stack size. | ||
1141 | * Also, users might freak out seeing over 9000 rundll32 processes (seeing over 9000 processes named | ||
1142 | * "libextractor_plugin_helper" is probably less confusing). | ||
1143 | */ | ||
998 | snprintf(cmd, MAX_PATH + 1, "rundll32.exe libextractor-3.dll,RundllEntryPoint@16 %lu %lu", p10_os_inh, p21_os_inh); | 1144 | snprintf(cmd, MAX_PATH + 1, "rundll32.exe libextractor-3.dll,RundllEntryPoint@16 %lu %lu", p10_os_inh, p21_os_inh); |
999 | cmd[MAX_PATH] = '\0'; | 1145 | cmd[MAX_PATH] = '\0'; |
1000 | if (CreateProcessA (NULL, cmd, NULL, NULL, TRUE, 0, NULL, NULL, | 1146 | if (CreateProcessA (NULL, cmd, NULL, NULL, TRUE, 0, NULL, NULL, |
@@ -1189,10 +1335,10 @@ make_shm_w32 (void **ptr, HANDLE *map, char *fn, size_t fn_size, size_t size) | |||
1189 | } | 1335 | } |
1190 | 1336 | ||
1191 | /** | 1337 | /** |
1192 | * Setup a shared memory segment. | 1338 | * Setup a file-backed shared memory segment. |
1193 | * | 1339 | * |
1194 | * @param ptr set to the location of the map segment | ||
1195 | * @param map where to store the map handle | 1340 | * @param map where to store the map handle |
1341 | * @param file handle of the file to back the shm | ||
1196 | * @param fn name of the mapping | 1342 | * @param fn name of the mapping |
1197 | * @param fn_size size available in fn | 1343 | * @param fn_size size available in fn |
1198 | * @param size number of bytes to allocated for the mapping | 1344 | * @param size number of bytes to allocated for the mapping |
@@ -1283,32 +1429,109 @@ destroy_shm_posix (void *ptr, int shm_id, size_t size, char *shm_name) | |||
1283 | #define O_LARGEFILE 0 | 1429 | #define O_LARGEFILE 0 |
1284 | #endif | 1430 | #endif |
1285 | 1431 | ||
1432 | /** | ||
1433 | * A poor attempt to abstract the data source (file or a memory buffer) | ||
1434 | * for the decompressor. | ||
1435 | */ | ||
1286 | struct BufferedFileDataSource | 1436 | struct BufferedFileDataSource |
1287 | { | 1437 | { |
1438 | /** | ||
1439 | * Descriptor of the file to read data from (may be -1) | ||
1440 | */ | ||
1288 | int fd; | 1441 | int fd; |
1442 | |||
1443 | /** | ||
1444 | * Pointer to the buffer to read from (may be NULL) | ||
1445 | */ | ||
1289 | const unsigned char *data; | 1446 | const unsigned char *data; |
1290 | 1447 | ||
1448 | /** | ||
1449 | * Size of the file (or the data buffer) | ||
1450 | */ | ||
1291 | int64_t fsize; | 1451 | int64_t fsize; |
1452 | |||
1453 | /** | ||
1454 | * Position within the file or the data buffer | ||
1455 | */ | ||
1292 | int64_t fpos; | 1456 | int64_t fpos; |
1293 | 1457 | ||
1458 | /** | ||
1459 | * A buffer to read into. For fd != -1: when data != NULL, | ||
1460 | * data is used directly. | ||
1461 | */ | ||
1294 | unsigned char *buffer; | 1462 | unsigned char *buffer; |
1463 | |||
1464 | /** | ||
1465 | * Position within the buffer. | ||
1466 | */ | ||
1295 | int64_t buffer_pos; | 1467 | int64_t buffer_pos; |
1468 | |||
1469 | /** | ||
1470 | * Number of bytes in the buffer (<= buffer_size) | ||
1471 | */ | ||
1296 | int64_t buffer_bytes; | 1472 | int64_t buffer_bytes; |
1473 | |||
1474 | /** | ||
1475 | * Allocated size of the buffer | ||
1476 | */ | ||
1297 | int64_t buffer_size; | 1477 | int64_t buffer_size; |
1298 | }; | 1478 | }; |
1299 | 1479 | ||
1480 | /** | ||
1481 | * Creates a bfds | ||
1482 | * | ||
1483 | * @param data data buffer to use as a source (NULL if fd != -1) | ||
1484 | * @param fd file descriptor to use as a source (-1 if data != NULL) | ||
1485 | * @param fsize size of the file (or the buffer) | ||
1486 | * @return newly allocated bfds | ||
1487 | */ | ||
1300 | struct BufferedFileDataSource * | 1488 | struct BufferedFileDataSource * |
1301 | bfds_new (const unsigned char *data, int fd, int64_t fsize); | 1489 | bfds_new (const unsigned char *data, int fd, int64_t fsize); |
1302 | 1490 | ||
1491 | /** | ||
1492 | * Unallocates bfds | ||
1493 | * | ||
1494 | * @param bfds bfds to deallocate | ||
1495 | */ | ||
1303 | void | 1496 | void |
1304 | bfds_delete (struct BufferedFileDataSource *bfds); | 1497 | bfds_delete (struct BufferedFileDataSource *bfds); |
1305 | 1498 | ||
1499 | /** | ||
1500 | * Makes bfds seek to @pos and read a chunk of bytes there. | ||
1501 | * Changes bfds->fpos, bfds->buffer_bytes and bfds->buffer_pos. | ||
1502 | * Does almost nothing for memory-backed bfds. | ||
1503 | * | ||
1504 | * @param bfds bfds | ||
1505 | * @param pos position | ||
1506 | * @return 0 on success, -1 on error | ||
1507 | */ | ||
1306 | int | 1508 | int |
1307 | bfds_pick_next_buffer_at (struct BufferedFileDataSource *bfds, int64_t pos); | 1509 | bfds_pick_next_buffer_at (struct BufferedFileDataSource *bfds, int64_t pos); |
1308 | 1510 | ||
1511 | /** | ||
1512 | * Makes bfds seek to @pos in @whence mode. | ||
1513 | * Will try to seek within the buffer, will move the buffer location if | ||
1514 | * the seek request falls outside of the buffer range. | ||
1515 | * | ||
1516 | * @param bfds bfds | ||
1517 | * @param pos position to seek to | ||
1518 | * @param whence one of the seek constants (SEEK_CUR, SEEK_SET, SEEK_END) | ||
1519 | * @return new absolute position | ||
1520 | */ | ||
1309 | int64_t | 1521 | int64_t |
1310 | bfds_seek (struct BufferedFileDataSource *bfds, int64_t pos, int whence); | 1522 | bfds_seek (struct BufferedFileDataSource *bfds, int64_t pos, int whence); |
1311 | 1523 | ||
1524 | /** | ||
1525 | * Fills @buf_ptr with a pointer to a chunk of data. | ||
1526 | * Same as read() but there's no need to allocate or de-allocate the | ||
1527 | * memory (since data IS already in memory). | ||
1528 | * Will seek if necessary. Will fail if @count exceeds buffer size. | ||
1529 | * | ||
1530 | * @param bfds bfds | ||
1531 | * @param buf_ptr location to store data pointer | ||
1532 | * @param count number of bytes to read | ||
1533 | * @return number of bytes (<= count) available at location pointed by buf_ptr | ||
1534 | */ | ||
1312 | int64_t | 1535 | int64_t |
1313 | bfds_read (struct BufferedFileDataSource *bfds, unsigned char **buf_ptr, int64_t count); | 1536 | bfds_read (struct BufferedFileDataSource *bfds, unsigned char **buf_ptr, int64_t count); |
1314 | 1537 | ||
@@ -1477,41 +1700,87 @@ enum ExtractorCompressionType | |||
1477 | COMP_TYPE_BZ2 = 2 | 1700 | COMP_TYPE_BZ2 = 2 |
1478 | }; | 1701 | }; |
1479 | 1702 | ||
1703 | /** | ||
1704 | * An object from which uncompressed data can be read | ||
1705 | */ | ||
1480 | struct CompressedFileSource | 1706 | struct CompressedFileSource |
1481 | { | 1707 | { |
1708 | /** | ||
1709 | * The type of compression used in the source | ||
1710 | */ | ||
1482 | enum ExtractorCompressionType compression_type; | 1711 | enum ExtractorCompressionType compression_type; |
1712 | /** | ||
1713 | * The source of data | ||
1714 | */ | ||
1483 | struct BufferedFileDataSource *bfds; | 1715 | struct BufferedFileDataSource *bfds; |
1716 | /** | ||
1717 | * Size of the source (same as bfds->fsize) | ||
1718 | */ | ||
1484 | int64_t fsize; | 1719 | int64_t fsize; |
1720 | /** | ||
1721 | * Position within the source | ||
1722 | */ | ||
1485 | int64_t fpos; | 1723 | int64_t fpos; |
1486 | 1724 | ||
1725 | /** | ||
1726 | * Total size of the uncompressed data. Remains -1 until | ||
1727 | * decompression is finished. | ||
1728 | */ | ||
1487 | int64_t uncompressed_size; | 1729 | int64_t uncompressed_size; |
1488 | 1730 | ||
1731 | /* | ||
1489 | unsigned char *buffer; | 1732 | unsigned char *buffer; |
1490 | int64_t buffer_bytes; | 1733 | int64_t buffer_bytes; |
1491 | int64_t buffer_len; | 1734 | int64_t buffer_len; |
1735 | */ | ||
1492 | 1736 | ||
1493 | #if WINDOWS | 1737 | #if WINDOWS |
1738 | /** | ||
1739 | * W32 handle of the shm into which data is uncompressed | ||
1740 | */ | ||
1494 | HANDLE shm; | 1741 | HANDLE shm; |
1495 | #else | 1742 | #else |
1743 | /** | ||
1744 | * POSIX id of the shm into which data is uncompressed | ||
1745 | */ | ||
1496 | int shm; | 1746 | int shm; |
1497 | #endif | 1747 | #endif |
1748 | /** | ||
1749 | * Name of the shm | ||
1750 | */ | ||
1498 | char shm_name[MAX_SHM_NAME + 1]; | 1751 | char shm_name[MAX_SHM_NAME + 1]; |
1752 | /** | ||
1753 | * Pointer to the mapped region of the shm (covers the whole shm) | ||
1754 | */ | ||
1499 | void *shm_ptr; | 1755 | void *shm_ptr; |
1756 | /** | ||
1757 | * Position within shm | ||
1758 | */ | ||
1500 | int64_t shm_pos; | 1759 | int64_t shm_pos; |
1501 | size_t shm_buf_pos; | 1760 | /** |
1761 | * Allocated size of the shm | ||
1762 | */ | ||
1502 | int64_t shm_size; | 1763 | int64_t shm_size; |
1764 | /** | ||
1765 | * Number of bytes in shm (<= shm_size) | ||
1766 | */ | ||
1503 | size_t shm_buf_size; | 1767 | size_t shm_buf_size; |
1504 | 1768 | ||
1505 | #if HAVE_ZLIB | 1769 | #if HAVE_ZLIB |
1770 | /** | ||
1771 | * ZLIB stream object | ||
1772 | */ | ||
1506 | z_stream strm; | 1773 | z_stream strm; |
1507 | int ret; | 1774 | /** |
1508 | size_t pos; | 1775 | * Length of gzip header (may be 0, in that case ZLIB parses the header) |
1776 | */ | ||
1509 | int gzip_header_length; | 1777 | int gzip_header_length; |
1510 | #endif | 1778 | #endif |
1511 | #if HAVE_LIBBZ2 | 1779 | #if HAVE_LIBBZ2 |
1780 | /** | ||
1781 | * BZ2 stream object | ||
1782 | */ | ||
1512 | bz_stream bstrm; | 1783 | bz_stream bstrm; |
1513 | int bret; | ||
1514 | size_t bpos; | ||
1515 | #endif | 1784 | #endif |
1516 | }; | 1785 | }; |
1517 | 1786 | ||
@@ -1558,13 +1827,10 @@ cfs_reset_stream_zlib (struct CompressedFileSource *cfs) | |||
1558 | 1827 | ||
1559 | cfs->fpos = cfs->gzip_header_length; | 1828 | cfs->fpos = cfs->gzip_header_length; |
1560 | cfs->shm_pos = 0; | 1829 | cfs->shm_pos = 0; |
1561 | cfs->shm_buf_pos = 0; | ||
1562 | cfs->shm_buf_size = 0; | 1830 | cfs->shm_buf_size = 0; |
1563 | 1831 | ||
1564 | #if HAVE_ZLIB | 1832 | #if HAVE_ZLIB |
1565 | z_stream strm; | 1833 | z_stream strm; |
1566 | cfs->ret = 0; | ||
1567 | cfs->pos = 0; | ||
1568 | #endif | 1834 | #endif |
1569 | return 1; | 1835 | return 1; |
1570 | } | 1836 | } |
@@ -1575,6 +1841,14 @@ cfs_reset_stream_bz2 (struct CompressedFileSource *cfs) | |||
1575 | return -1; | 1841 | return -1; |
1576 | } | 1842 | } |
1577 | 1843 | ||
1844 | /** | ||
1845 | * Resets the compression stream to begin uncompressing | ||
1846 | * from the beginning. Used at initialization time, and when | ||
1847 | * seeking backward. | ||
1848 | * | ||
1849 | * @param cfs cfs to reset | ||
1850 | * @return 1 on success, -1 on error | ||
1851 | */ | ||
1578 | int | 1852 | int |
1579 | cfs_reset_stream (struct CompressedFileSource *cfs) | 1853 | cfs_reset_stream (struct CompressedFileSource *cfs) |
1580 | { | 1854 | { |
@@ -1687,10 +1961,11 @@ cfs_init_decompressor_zlib (struct CompressedFileSource *cfs, EXTRACTOR_MetaData | |||
1687 | return cfs_reset_stream_zlib (cfs); | 1961 | return cfs_reset_stream_zlib (cfs); |
1688 | } | 1962 | } |
1689 | 1963 | ||
1690 | int | 1964 | static int |
1691 | cfs_deinit_decompressor_zlib (struct CompressedFileSource *cfs) | 1965 | cfs_deinit_decompressor_zlib (struct CompressedFileSource *cfs) |
1692 | { | 1966 | { |
1693 | inflateEnd (&cfs->strm); | 1967 | inflateEnd (&cfs->strm); |
1968 | return 1; | ||
1694 | } | 1969 | } |
1695 | 1970 | ||
1696 | static int | 1971 | static int |
@@ -1705,6 +1980,15 @@ cfs_deinit_decompressor_bz2 (struct CompressedFileSource *cfs) | |||
1705 | return -1; | 1980 | return -1; |
1706 | } | 1981 | } |
1707 | 1982 | ||
1983 | /** | ||
1984 | * Initializes decompression object. Might report metadata about | ||
1985 | * compresse stream, if available. Resets the stream to the beginning. | ||
1986 | * | ||
1987 | * @param cfs cfs to initialize | ||
1988 | * @param proc callback for metadata | ||
1989 | * @param proc_cls callback cls | ||
1990 | * @return 1 on success, -1 on error | ||
1991 | */ | ||
1708 | static int | 1992 | static int |
1709 | cfs_init_decompressor (struct CompressedFileSource *cfs, EXTRACTOR_MetaDataProcessor proc, void *proc_cls) | 1993 | cfs_init_decompressor (struct CompressedFileSource *cfs, EXTRACTOR_MetaDataProcessor proc, void *proc_cls) |
1710 | { | 1994 | { |
@@ -1719,6 +2003,12 @@ cfs_init_decompressor (struct CompressedFileSource *cfs, EXTRACTOR_MetaDataProce | |||
1719 | } | 2003 | } |
1720 | } | 2004 | } |
1721 | 2005 | ||
2006 | /** | ||
2007 | * Deinitializes decompression object. | ||
2008 | * | ||
2009 | * @param cfs cfs to deinitialize | ||
2010 | * @return 1 on success, -1 on error | ||
2011 | */ | ||
1722 | static int | 2012 | static int |
1723 | cfs_deinit_decompressor (struct CompressedFileSource *cfs) | 2013 | cfs_deinit_decompressor (struct CompressedFileSource *cfs) |
1724 | { | 2014 | { |
@@ -1733,6 +2023,16 @@ cfs_deinit_decompressor (struct CompressedFileSource *cfs) | |||
1733 | } | 2023 | } |
1734 | } | 2024 | } |
1735 | 2025 | ||
2026 | /** | ||
2027 | * Allocates and initializes new cfs object. | ||
2028 | * | ||
2029 | * @param bfds data source to use | ||
2030 | * @param fsize size of the source | ||
2031 | * @param compression_type type of compression used | ||
2032 | * @param proc metadata callback | ||
2033 | * @param proc_cls callback cls | ||
2034 | * @return newly allocated cfs on success, NULL on error | ||
2035 | */ | ||
1736 | struct CompressedFileSource * | 2036 | struct CompressedFileSource * |
1737 | cfs_new (struct BufferedFileDataSource *bfds, int64_t fsize, enum ExtractorCompressionType compression_type, EXTRACTOR_MetaDataProcessor proc, void *proc_cls) | 2037 | cfs_new (struct BufferedFileDataSource *bfds, int64_t fsize, enum ExtractorCompressionType compression_type, EXTRACTOR_MetaDataProcessor proc, void *proc_cls) |
1738 | { | 2038 | { |
@@ -1761,6 +2061,10 @@ cfs_new (struct BufferedFileDataSource *bfds, int64_t fsize, enum ExtractorCompr | |||
1761 | return cfs; | 2061 | return cfs; |
1762 | } | 2062 | } |
1763 | 2063 | ||
2064 | /** | ||
2065 | * Data is read from the source and shoved into decompressor | ||
2066 | * in chunks this big. | ||
2067 | */ | ||
1764 | #define COM_CHUNK_SIZE (10*1024) | 2068 | #define COM_CHUNK_SIZE (10*1024) |
1765 | 2069 | ||
1766 | int | 2070 | int |
@@ -1801,6 +2105,17 @@ cfs_read_bz2 (struct CompressedFileSource *cfs, int64_t preserve) | |||
1801 | return -1; | 2105 | return -1; |
1802 | } | 2106 | } |
1803 | 2107 | ||
2108 | /** | ||
2109 | * Re-fills shm with new uncompressed data, preserving the last | ||
2110 | * @preserve bytes of existing data as the first @preserve bytes | ||
2111 | * of the new data. | ||
2112 | * Does the actual decompression. Will set uncompressed_size on | ||
2113 | * the end of compressed stream. | ||
2114 | * | ||
2115 | * @param cfds cfs to read from | ||
2116 | * @param preserve number of bytes to preserve (0 to discard all old data) | ||
2117 | * @return number of bytes in shm. 0 if no more data can be uncompressed. | ||
2118 | */ | ||
1804 | int64_t | 2119 | int64_t |
1805 | cfs_read (struct CompressedFileSource *cfs, int64_t preserve) | 2120 | cfs_read (struct CompressedFileSource *cfs, int64_t preserve) |
1806 | { | 2121 | { |
@@ -1844,6 +2159,15 @@ cfs_seek_bz2 (struct CompressedFileSource *cfs, int64_t position) | |||
1844 | return -1; | 2159 | return -1; |
1845 | } | 2160 | } |
1846 | 2161 | ||
2162 | /** | ||
2163 | * Moves the buffer to @position in uncompressed steam. If position | ||
2164 | * requires seeking backwards beyond the boundaries of the buffer, resets the | ||
2165 | * stream and repeats decompression from the beginning to @position. | ||
2166 | * | ||
2167 | * @param cfds cfs to seek on | ||
2168 | * @param position new starting point for the buffer | ||
2169 | * @return new absolute buffer position, -1 on error or EOS | ||
2170 | */ | ||
1847 | int64_t | 2171 | int64_t |
1848 | cfs_seek (struct CompressedFileSource *cfs, int64_t position) | 2172 | cfs_seek (struct CompressedFileSource *cfs, int64_t position) |
1849 | { | 2173 | { |
@@ -1920,8 +2244,17 @@ get_compression_type (const unsigned char *data, int fd, int64_t fsize) | |||
1920 | return result; | 2244 | return result; |
1921 | } | 2245 | } |
1922 | 2246 | ||
2247 | /** | ||
2248 | * Initializes plugin state. Calls init_state_method() | ||
2249 | * directly or indirectly. | ||
2250 | * | ||
2251 | * @param plugin plugin to initialize | ||
2252 | * @param operation_mode operation mode | ||
2253 | * @param shm_name name of the shm/file | ||
2254 | * @param fsize file size (may be -1) | ||
2255 | */ | ||
1923 | static void | 2256 | static void |
1924 | init_plugin_state (struct EXTRACTOR_PluginList *plugin, uint8_t operation_mode, int fd, const char *shm_name, int64_t fsize) | 2257 | init_plugin_state (struct EXTRACTOR_PluginList *plugin, uint8_t operation_mode, const char *shm_name, int64_t fsize) |
1925 | { | 2258 | { |
1926 | int write_result; | 2259 | int write_result; |
1927 | int init_state_size; | 2260 | int init_state_size; |
@@ -1970,6 +2303,12 @@ init_plugin_state (struct EXTRACTOR_PluginList *plugin, uint8_t operation_mode, | |||
1970 | } | 2303 | } |
1971 | } | 2304 | } |
1972 | 2305 | ||
2306 | /** | ||
2307 | * Discards plugin state. Calls discard_state_method() | ||
2308 | * directly or indirectly. | ||
2309 | * | ||
2310 | * @param plugin plugin to initialize | ||
2311 | */ | ||
1973 | static void | 2312 | static void |
1974 | discard_plugin_state (struct EXTRACTOR_PluginList *plugin) | 2313 | discard_plugin_state (struct EXTRACTOR_PluginList *plugin) |
1975 | { | 2314 | { |
@@ -2002,6 +2341,17 @@ discard_plugin_state (struct EXTRACTOR_PluginList *plugin) | |||
2002 | } | 2341 | } |
2003 | } | 2342 | } |
2004 | 2343 | ||
2344 | /** | ||
2345 | * Forces plugin to move the buffer window to @pos. | ||
2346 | * | ||
2347 | * @param plugin plugin context | ||
2348 | * @param pos position to move to | ||
2349 | * @param want_start 1 if the caller is interested in the beginning of the | ||
2350 | * window, 0 if the caller is interested in its end. Window position | ||
2351 | * must be aligned to page size, and this parameter controls the | ||
2352 | * direction of window shift. 0 is used mostly by SEEK_END. | ||
2353 | * @return 0 on success, -1 on error | ||
2354 | */ | ||
2005 | static int | 2355 | static int |
2006 | pl_pick_next_buffer_at (struct EXTRACTOR_PluginList *plugin, int64_t pos, uint8_t want_start) | 2356 | pl_pick_next_buffer_at (struct EXTRACTOR_PluginList *plugin, int64_t pos, uint8_t want_start) |
2007 | { | 2357 | { |
@@ -2114,6 +2464,7 @@ pl_pick_next_buffer_at (struct EXTRACTOR_PluginList *plugin, int64_t pos, uint8_ | |||
2114 | int64_t old_pos; | 2464 | int64_t old_pos; |
2115 | old_pos = plugin->fpos + plugin->shm_pos; | 2465 | old_pos = plugin->fpos + plugin->shm_pos; |
2116 | plugin->seek_request = pos; | 2466 | plugin->seek_request = pos; |
2467 | /* Recourse into request loop to wait for shm update */ | ||
2117 | while (plugin->fpos != pos) | 2468 | while (plugin->fpos != pos) |
2118 | { | 2469 | { |
2119 | plugin->waiting_for_update = 1; | 2470 | plugin->waiting_for_update = 1; |
@@ -2127,18 +2478,28 @@ pl_pick_next_buffer_at (struct EXTRACTOR_PluginList *plugin, int64_t pos, uint8_ | |||
2127 | { | 2478 | { |
2128 | if (pos < plugin->fpos) | 2479 | if (pos < plugin->fpos) |
2129 | { | 2480 | { |
2130 | if (1 != cfs_reset_stream (plugin->state)) | 2481 | if (1 != cfs_reset_stream (plugin->pass_cfs)) |
2131 | return -1; | 2482 | return -1; |
2132 | } | 2483 | } |
2133 | while (plugin->fpos < pos && plugin->fpos >= 0) | 2484 | while (plugin->fpos < pos && plugin->fpos >= 0) |
2134 | plugin->fpos = cfs_seek (plugin->state, pos); | 2485 | plugin->fpos = cfs_seek (plugin->pass_cfs, pos); |
2135 | plugin->fsize = ((struct CompressedFileSource *)plugin->state)->uncompressed_size; | 2486 | plugin->fsize = ((struct CompressedFileSource *)plugin->pass_cfs)->uncompressed_size; |
2136 | plugin->shm_pos = pos - plugin->fpos; | 2487 | plugin->shm_pos = pos - plugin->fpos; |
2137 | } | 2488 | } |
2138 | return 0; | 2489 | return 0; |
2139 | } | 2490 | } |
2140 | } | 2491 | } |
2141 | 2492 | ||
2493 | /** | ||
2494 | * Moves current absolute buffer position to @pos in @whence mode. | ||
2495 | * Will move logical position withouth shifting the buffer, if possible. | ||
2496 | * Will not move beyond the end of file. | ||
2497 | * | ||
2498 | * @param plugin plugin context | ||
2499 | * @param pos position to move to | ||
2500 | * @param whence seek mode (SEEK_CUR, SEEK_SET, SEEK_END) | ||
2501 | * @return new absolute position, -1 on error | ||
2502 | */ | ||
2142 | int64_t | 2503 | int64_t |
2143 | pl_seek (struct EXTRACTOR_PluginList *plugin, int64_t pos, int whence) | 2504 | pl_seek (struct EXTRACTOR_PluginList *plugin, int64_t pos, int whence) |
2144 | { | 2505 | { |
@@ -2203,6 +2564,17 @@ pl_get_pos (struct EXTRACTOR_PluginList *plugin) | |||
2203 | return plugin->fpos + plugin->shm_pos; | 2564 | return plugin->fpos + plugin->shm_pos; |
2204 | } | 2565 | } |
2205 | 2566 | ||
2567 | /** | ||
2568 | * Fills @data with a pointer to the data buffer. | ||
2569 | * Equivalent to read(), except you don't have to allocate and free | ||
2570 | * a buffer, since the data is already in memory. | ||
2571 | * Will move the buffer, if necessary | ||
2572 | * | ||
2573 | * @param plugin plugin context | ||
2574 | * @param data location to store data pointer | ||
2575 | * @param count number of bytes to read | ||
2576 | * @return number of bytes (<= count) avalable in @data, -1 on error | ||
2577 | */ | ||
2206 | int64_t | 2578 | int64_t |
2207 | pl_read (struct EXTRACTOR_PluginList *plugin, unsigned char **data, size_t count) | 2579 | pl_read (struct EXTRACTOR_PluginList *plugin, unsigned char **data, size_t count) |
2208 | { | 2580 | { |
@@ -2226,6 +2598,17 @@ pl_read (struct EXTRACTOR_PluginList *plugin, unsigned char **data, size_t count | |||
2226 | } | 2598 | } |
2227 | } | 2599 | } |
2228 | 2600 | ||
2601 | /** | ||
2602 | * Transmits information about updated shm to plugin. | ||
2603 | * For OPMODE_DECOMPRESS only. | ||
2604 | * | ||
2605 | * @param plugin plugin context | ||
2606 | * @param position current absolute position in uncompressed stream | ||
2607 | * @param map_size number of bytes that are available in shm | ||
2608 | * @param fsize total size of the uncompressed stream (might be -1) | ||
2609 | * @param operation_mode mode of operation | ||
2610 | * @return 0 on success, 1 on error | ||
2611 | */ | ||
2229 | static int | 2612 | static int |
2230 | give_shm_to_plugin (struct EXTRACTOR_PluginList *plugin, int64_t position, size_t map_size, int64_t fsize, uint8_t operation_mode) | 2613 | give_shm_to_plugin (struct EXTRACTOR_PluginList *plugin, int64_t position, size_t map_size, int64_t fsize, uint8_t operation_mode) |
2231 | { | 2614 | { |
@@ -2272,6 +2655,14 @@ give_shm_to_plugin (struct EXTRACTOR_PluginList *plugin, int64_t position, size_ | |||
2272 | } | 2655 | } |
2273 | } | 2656 | } |
2274 | 2657 | ||
2658 | /** | ||
2659 | * Calls _extract_method of in-process plugin. | ||
2660 | * | ||
2661 | * @param plugin plugin context | ||
2662 | * @param shm_ptr pointer to the data buffer | ||
2663 | * @param proc metadata callback | ||
2664 | * @param proc_cls callback cls | ||
2665 | */ | ||
2275 | static void | 2666 | static void |
2276 | ask_in_process_plugin (struct EXTRACTOR_PluginList *plugin, void *shm_ptr, EXTRACTOR_MetaDataProcessor proc, void *proc_cls) | 2667 | ask_in_process_plugin (struct EXTRACTOR_PluginList *plugin, void *shm_ptr, EXTRACTOR_MetaDataProcessor proc, void *proc_cls) |
2277 | { | 2668 | { |
@@ -2297,6 +2688,14 @@ ask_in_process_plugin (struct EXTRACTOR_PluginList *plugin, void *shm_ptr, EXTRA | |||
2297 | } | 2688 | } |
2298 | 2689 | ||
2299 | #if !WINDOWS | 2690 | #if !WINDOWS |
2691 | /** | ||
2692 | * Receive @size bytes from plugin, store them in @buf | ||
2693 | * | ||
2694 | * @param plugin plugin context | ||
2695 | * @param buf buffer to fill | ||
2696 | * @param size number of bytes to read | ||
2697 | * @return number of bytes read, 0 on EOS, < 0 on error | ||
2698 | */ | ||
2300 | int | 2699 | int |
2301 | plugin_read (struct EXTRACTOR_PluginList *plugin, unsigned char *buf, size_t size) | 2700 | plugin_read (struct EXTRACTOR_PluginList *plugin, unsigned char *buf, size_t size) |
2302 | { | 2701 | { |
@@ -2312,6 +2711,14 @@ plugin_read (struct EXTRACTOR_PluginList *plugin, unsigned char *buf, size_t siz | |||
2312 | return read_count; | 2711 | return read_count; |
2313 | } | 2712 | } |
2314 | #else | 2713 | #else |
2714 | /** | ||
2715 | * Receive @size bytes from plugin, store them in @buf | ||
2716 | * | ||
2717 | * @param plugin plugin context | ||
2718 | * @param buf buffer to fill | ||
2719 | * @param size number of bytes to read | ||
2720 | * @return number of bytes read, 0 on EOS, < 0 on error | ||
2721 | */ | ||
2315 | int | 2722 | int |
2316 | plugin_read (struct EXTRACTOR_PluginList *plugin, unsigned char *buf, size_t size) | 2723 | plugin_read (struct EXTRACTOR_PluginList *plugin, unsigned char *buf, size_t size) |
2317 | { | 2724 | { |
@@ -2329,6 +2736,14 @@ plugin_read (struct EXTRACTOR_PluginList *plugin, unsigned char *buf, size_t siz | |||
2329 | } | 2736 | } |
2330 | #endif | 2737 | #endif |
2331 | 2738 | ||
2739 | /** | ||
2740 | * Receive a reply from plugin (seek request, metadata and done message) | ||
2741 | * | ||
2742 | * @param plugin plugin context | ||
2743 | * @param proc metadata callback | ||
2744 | * @param proc_cls callback cls | ||
2745 | * @return 0 on success, -1 on error | ||
2746 | */ | ||
2332 | static int | 2747 | static int |
2333 | receive_reply (struct EXTRACTOR_PluginList *plugin, EXTRACTOR_MetaDataProcessor proc, void *proc_cls) | 2748 | receive_reply (struct EXTRACTOR_PluginList *plugin, EXTRACTOR_MetaDataProcessor proc, void *proc_cls) |
2334 | { | 2749 | { |
@@ -2397,6 +2812,19 @@ receive_reply (struct EXTRACTOR_PluginList *plugin, EXTRACTOR_MetaDataProcessor | |||
2397 | } | 2812 | } |
2398 | 2813 | ||
2399 | #if !WINDOWS | 2814 | #if !WINDOWS |
2815 | /** | ||
2816 | * Wait for one of the plugins to reply. | ||
2817 | * Selects on plugin output pipes, runs receive_reply() | ||
2818 | * on each activated pipe until it gets a seek request | ||
2819 | * or a done message. Called repeatedly by the user until all pipes are dry or | ||
2820 | * broken. | ||
2821 | * | ||
2822 | * @param plugins to select upon | ||
2823 | * @param proc metadata callback | ||
2824 | * @param proc_cls callback cls | ||
2825 | * @return number of dry/broken pipes since last call, -1 on error or if no | ||
2826 | * plugins reply in 10 seconds. | ||
2827 | */ | ||
2400 | static int | 2828 | static int |
2401 | wait_for_reply (struct EXTRACTOR_PluginList *plugins, EXTRACTOR_MetaDataProcessor proc, void *proc_cls) | 2829 | wait_for_reply (struct EXTRACTOR_PluginList *plugins, EXTRACTOR_MetaDataProcessor proc, void *proc_cls) |
2402 | { | 2830 | { |
@@ -2464,6 +2892,21 @@ wait_for_reply (struct EXTRACTOR_PluginList *plugins, EXTRACTOR_MetaDataProcesso | |||
2464 | return result; | 2892 | return result; |
2465 | } | 2893 | } |
2466 | #else | 2894 | #else |
2895 | /** | ||
2896 | * Wait for one of the plugins to reply. | ||
2897 | * Selects on plugin output pipes, runs receive_reply() | ||
2898 | * on each activated pipe until it gets a seek request | ||
2899 | * or a done message. Called repeatedly by the user until all pipes are dry or | ||
2900 | * broken. | ||
2901 | * This W32 version of wait_for_reply() can't select on more than 64 plugins | ||
2902 | * at once (returns -1 if there are more than 64 plugins). | ||
2903 | * | ||
2904 | * @param plugins to select upon | ||
2905 | * @param proc metadata callback | ||
2906 | * @param proc_cls callback cls | ||
2907 | * @return number of dry/broken pipes since last call, -1 on error or if no | ||
2908 | * plugins reply in 10 seconds. | ||
2909 | */ | ||
2467 | static int | 2910 | static int |
2468 | wait_for_reply (struct EXTRACTOR_PluginList *plugins, EXTRACTOR_MetaDataProcessor proc, void *proc_cls) | 2911 | wait_for_reply (struct EXTRACTOR_PluginList *plugins, EXTRACTOR_MetaDataProcessor proc, void *proc_cls) |
2469 | { | 2912 | { |
@@ -2563,6 +3006,16 @@ wait_for_reply (struct EXTRACTOR_PluginList *plugins, EXTRACTOR_MetaDataProcesso | |||
2563 | 3006 | ||
2564 | #endif | 3007 | #endif |
2565 | 3008 | ||
3009 | /** | ||
3010 | * Checks the seek requests that plugins made, finds the one with | ||
3011 | * smallest offset from the beginning of the stream, and satisfies it. | ||
3012 | * | ||
3013 | * @param plugins to check | ||
3014 | * @param cfs compressed file source to seek in | ||
3015 | * @param current_position current stream position | ||
3016 | * @param map_size number of bytes currently buffered | ||
3017 | * @return new stream position, -1 on error | ||
3018 | */ | ||
2566 | static int64_t | 3019 | static int64_t |
2567 | seek_to_new_position (struct EXTRACTOR_PluginList *plugins, struct CompressedFileSource *cfs, int64_t current_position, int64_t map_size) | 3020 | seek_to_new_position (struct EXTRACTOR_PluginList *plugins, struct CompressedFileSource *cfs, int64_t current_position, int64_t map_size) |
2568 | { | 3021 | { |
@@ -2617,9 +3070,9 @@ load_in_process_plugin (struct EXTRACTOR_PluginList *plugin) | |||
2617 | * @param plugins the list of plugins to use | 3070 | * @param plugins the list of plugins to use |
2618 | * @param data data to process, or NULL if fds is not -1 | 3071 | * @param data data to process, or NULL if fds is not -1 |
2619 | * @param fd file to read data from, or -1 if data is not NULL | 3072 | * @param fd file to read data from, or -1 if data is not NULL |
2620 | * @param fsize size of data or size of file | 3073 | * @param filename name of the file to which fd belongs |
2621 | * @param buffer a buffer with data alteady read from the file (if fd != -1) | 3074 | * @param cfs compressed file source for compressed stream (may be NULL) |
2622 | * @param buffer_size size of buffer | 3075 | * @param fsize size of the file or data buffer |
2623 | * @param proc function to call for each meta data item found | 3076 | * @param proc function to call for each meta data item found |
2624 | * @param proc_cls cls argument to proc | 3077 | * @param proc_cls cls argument to proc |
2625 | */ | 3078 | */ |
@@ -2700,21 +3153,21 @@ do_extract (struct EXTRACTOR_PluginList *plugins, const char *data, int fd, cons | |||
2700 | if (operation_mode == OPMODE_DECOMPRESS) | 3153 | if (operation_mode == OPMODE_DECOMPRESS) |
2701 | { | 3154 | { |
2702 | for (ppos = plugins; NULL != ppos; ppos = ppos->next) | 3155 | for (ppos = plugins; NULL != ppos; ppos = ppos->next) |
2703 | init_plugin_state (ppos, operation_mode, -1, cfs->shm_name, -1); | 3156 | init_plugin_state (ppos, operation_mode, cfs->shm_name, -1); |
2704 | } | 3157 | } |
2705 | else if (operation_mode == OPMODE_FILE) | 3158 | else if (operation_mode == OPMODE_FILE) |
2706 | { | 3159 | { |
2707 | for (ppos = plugins; NULL != ppos; ppos = ppos->next) | 3160 | for (ppos = plugins; NULL != ppos; ppos = ppos->next) |
2708 | #if !WINDOWS | 3161 | #if !WINDOWS |
2709 | init_plugin_state (ppos, operation_mode, fd, filename, fsize); | 3162 | init_plugin_state (ppos, operation_mode, filename, fsize); |
2710 | #else | 3163 | #else |
2711 | init_plugin_state (ppos, operation_mode, fd, shm_name, fsize); | 3164 | init_plugin_state (ppos, operation_mode, shm_name, fsize); |
2712 | #endif | 3165 | #endif |
2713 | } | 3166 | } |
2714 | else | 3167 | else |
2715 | { | 3168 | { |
2716 | for (ppos = plugins; NULL != ppos; ppos = ppos->next) | 3169 | for (ppos = plugins; NULL != ppos; ppos = ppos->next) |
2717 | init_plugin_state (ppos, operation_mode, -1, shm_name, fsize); | 3170 | init_plugin_state (ppos, operation_mode, shm_name, fsize); |
2718 | } | 3171 | } |
2719 | 3172 | ||
2720 | if (operation_mode == OPMODE_FILE || operation_mode == OPMODE_MEMORY) | 3173 | if (operation_mode == OPMODE_FILE || operation_mode == OPMODE_MEMORY) |
@@ -2749,7 +3202,7 @@ do_extract (struct EXTRACTOR_PluginList *plugins, const char *data, int fd, cons | |||
2749 | { | 3202 | { |
2750 | /* Pass this way. we'll need it to call cfs functions later on */ | 3203 | /* Pass this way. we'll need it to call cfs functions later on */ |
2751 | /* This is a special case */ | 3204 | /* This is a special case */ |
2752 | ppos->state = cfs; | 3205 | ppos->pass_cfs = cfs; |
2753 | ask_in_process_plugin (ppos, cfs->shm_ptr, proc, proc_cls); | 3206 | ask_in_process_plugin (ppos, cfs->shm_ptr, proc, proc_cls); |
2754 | } | 3207 | } |
2755 | while (plugins_not_ready > 0 && !kill_plugins) | 3208 | while (plugins_not_ready > 0 && !kill_plugins) |
diff --git a/src/main/extractor_plugins.h b/src/main/extractor_plugins.h index bb9baec..fc53fd4 100644 --- a/src/main/extractor_plugins.h +++ b/src/main/extractor_plugins.h | |||
@@ -101,6 +101,10 @@ struct EXTRACTOR_PluginList | |||
101 | #else | 101 | #else |
102 | HANDLE cpipe_in; | 102 | HANDLE cpipe_in; |
103 | #endif | 103 | #endif |
104 | |||
105 | /** | ||
106 | * Pipe used by plugin to read from its parent. | ||
107 | */ | ||
104 | int pipe_in; | 108 | int pipe_in; |
105 | 109 | ||
106 | /** | 110 | /** |
@@ -110,36 +114,71 @@ struct EXTRACTOR_PluginList | |||
110 | int64_t seek_request; | 114 | int64_t seek_request; |
111 | 115 | ||
112 | #if !WINDOWS | 116 | #if !WINDOWS |
117 | /** | ||
118 | * ID of the shm object | ||
119 | */ | ||
113 | int shm_id; | 120 | int shm_id; |
114 | #else | 121 | #else |
122 | /** | ||
123 | * Handle of the shm object | ||
124 | */ | ||
115 | HANDLE map_handle; | 125 | HANDLE map_handle; |
116 | #endif | 126 | #endif |
117 | 127 | ||
118 | void *state; | 128 | /** |
129 | * Used to pass cfs pointer to in-process plugin in OPMODE_DECOMPRESS | ||
130 | */ | ||
131 | void *pass_cfs; | ||
119 | 132 | ||
133 | /** | ||
134 | * Uncompressed stream size. Initially -1, until file is fully decompressed | ||
135 | * (for sources that are not compressed it is set from the start). | ||
136 | */ | ||
120 | int64_t fsize; | 137 | int64_t fsize; |
121 | 138 | ||
139 | /** | ||
140 | * Absolute position within the stream | ||
141 | */ | ||
122 | int64_t fpos; | 142 | int64_t fpos; |
123 | 143 | ||
144 | /** | ||
145 | * Pointer to the shared memory segment | ||
146 | */ | ||
124 | unsigned char *shm_ptr; | 147 | unsigned char *shm_ptr; |
125 | 148 | ||
149 | /** | ||
150 | * Number of bytes in the segment | ||
151 | */ | ||
126 | int64_t map_size; | 152 | int64_t map_size; |
127 | 153 | ||
154 | /** | ||
155 | * Position within the segment | ||
156 | */ | ||
128 | int64_t shm_pos; | 157 | int64_t shm_pos; |
129 | 158 | ||
159 | #if !WINDOWS | ||
130 | /** | 160 | /** |
131 | * Pipe used to read information about extracted meta data from | 161 | * Pipe used to read information about extracted meta data from |
132 | * the plugin child process. -1 if not initialized. | 162 | * the plugin child process. -1 if not initialized. |
133 | */ | 163 | */ |
134 | #if !WINDOWS | ||
135 | int cpipe_out; | 164 | int cpipe_out; |
136 | #else | 165 | #else |
166 | /** | ||
167 | * Pipe used to read information about extracted meta data from | ||
168 | * the plugin child process. -1 if not initialized. | ||
169 | */ | ||
137 | HANDLE cpipe_out; | 170 | HANDLE cpipe_out; |
138 | #endif | 171 | #endif |
139 | 172 | ||
140 | #if !WINDOWS | 173 | #if !WINDOWS |
174 | /** | ||
175 | * Page size. Mmap offset is a multiple of this number. | ||
176 | */ | ||
141 | long allocation_granularity; | 177 | long allocation_granularity; |
142 | #else | 178 | #else |
179 | /** | ||
180 | * Page size. Mmap offset is a multiple of this number. | ||
181 | */ | ||
143 | DWORD allocation_granularity; | 182 | DWORD allocation_granularity; |
144 | #endif | 183 | #endif |
145 | 184 | ||
@@ -160,7 +199,15 @@ struct EXTRACTOR_PluginList | |||
160 | unsigned char *ov_write_buffer; | 199 | unsigned char *ov_write_buffer; |
161 | #endif | 200 | #endif |
162 | 201 | ||
202 | /** | ||
203 | * Mode of operation. One of the OPMODE_* constants | ||
204 | */ | ||
163 | uint8_t operation_mode; | 205 | uint8_t operation_mode; |
206 | |||
207 | /** | ||
208 | * 1 if plugin is currently in a recursive process_requests() call, | ||
209 | * 0 otherwise | ||
210 | */ | ||
164 | int waiting_for_update; | 211 | int waiting_for_update; |
165 | }; | 212 | }; |
166 | 213 | ||