libextractor

GNU libextractor
Log | Files | Refs | Submodules | README | LICENSE

commit 8b969da6d45e3a9245320f676b00d87e3768b1a6
parent 38b0cdd4c4d94c644eca757f5736ee8f3f03cc84
Author: Christian Grothoff <christian@grothoff.org>
Date:   Sun, 22 Jul 2012 18:20:40 +0000

-misc hacking on train

Diffstat:
Mconfigure.ac | 9+++++++++
Msrc/include/extractor.h | 2+-
Msrc/main/Makefile.am | 3++-
Asrc/main/TODO | 5+++++
Msrc/main/extract.c | 5+++++
Msrc/main/extractor.c | 576+------------------------------------------------------------------------------
Msrc/main/extractor_datasource.c | 14+++++++-------
Msrc/main/extractor_datasource.h | 5+++++
Asrc/main/extractor_ipc.c | 137+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/main/extractor_ipc.h | 142++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
Msrc/main/extractor_ipc_gnu.c | 640++++++++++++++++++++++++++++++++++---------------------------------------------
Msrc/main/extractor_ipc_w32.c | 184++++++++++++++++++++++++++++++++++++-------------------------------------------
Asrc/main/extractor_plugin_main.c | 625+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/main/extractor_plugin_main.h | 43+++++++++++++++++++++++++++++++++++++++++++
Msrc/main/extractor_plugins.c | 9+++++++--
Msrc/main/extractor_plugins.h | 124+++++--------------------------------------------------------------------------
Msrc/main/extractor_plugpath.c | 7++++++-
Msrc/main/extractor_plugpath.h | 28++++++++++++++++++++++++++--
18 files changed, 1381 insertions(+), 1177 deletions(-)

diff --git a/configure.ac b/configure.ac @@ -42,6 +42,7 @@ linux*) AC_DEFINE_UNQUOTED(LINUX,1,[This is a LINUX system]) AM_CONDITIONAL(HAVE_GNU_LD, true) AM_CONDITIONAL(SOMEBSD, false) + AM_CONDITIONAL(WINDOWS, false) XTRA_CPPLIBS=-lstdc++ LIBEXT=.so ;; @@ -50,6 +51,7 @@ freebsd*) AC_CHECK_LIB(c_r, pthread_create) AM_CONDITIONAL(HAVE_GNU_LD, true) AM_CONDITIONAL(SOMEBSD, true) + AM_CONDITIONAL(WINDOWS, false) XTRA_CPPLIBS=-lstdc++ LIBEXT=.so ;; @@ -58,6 +60,7 @@ openbsd*) AC_CHECK_LIB(c_r, pthread_create) AM_CONDITIONAL(HAVE_GNU_LD, true) AM_CONDITIONAL(SOMEBSD, true) + AM_CONDITIONAL(WINDOWS, false) XTRA_CPPLIBS=-lstdc++ LIBEXT=.so ;; @@ -66,6 +69,7 @@ netbsd*) AC_CHECK_LIB(c_r, pthread_create) AM_CONDITIONAL(HAVE_GNU_LD, true) AM_CONDITIONAL(SOMEBSD, true) + AM_CONDITIONAL(WINDOWS, false) XTRA_CPPLIBS=-lstdc++ LIBEXT=.so ;; @@ -75,6 +79,7 @@ netbsd*) XTRA_CPPLIBS=-lstdc++ AM_CONDITIONAL(HAVE_GNU_LD, false) AM_CONDITIONAL(SOMEBSD, false) + AM_CONDITIONAL(WINDOWS, false) CFLAGS="-D_POSIX_PTHREAD_SEMANTICS $CFLAGS" LIBEXT=.so ;; @@ -82,6 +87,7 @@ darwin*) AC_DEFINE_UNQUOTED(DARWIN,1,[This is a Darwin system]) AM_CONDITIONAL(HAVE_GNU_LD, false) AM_CONDITIONAL(SOMEBSD, false) + AM_CONDITIONAL(WINDOWS, false) CFLAGS="-fno-common $CFLAGS" LIBEXT=.so ;; @@ -90,6 +96,7 @@ cygwin*) LDFLAGS="$LDFLAGS -no-undefined" AM_CONDITIONAL(SOMEBSD, false) AM_CONDITIONAL(HAVE_GNU_LD, false) + AM_CONDITIONAL(WINDOWS, false) LIBEXT=.dll ;; mingw*) @@ -148,6 +155,7 @@ mingw*) LIBSOLD=$LIBS AM_CONDITIONAL(HAVE_GNU_LD, true) AM_CONDITIONAL(SOMEBSD, false) + AM_CONDITIONAL(WINDOWS, true) LIBEXT=.dll ;; *) @@ -156,6 +164,7 @@ mingw*) AC_MSG_RESULT(otheros) AM_CONDITIONAL(HAVE_GNU_LD, false) AM_CONDITIONAL(SOMEBSD, false) + AM_CONDITIONAL(WINDOWS, false) LIBEXT=.so ;; esac diff --git a/src/include/extractor.h b/src/include/extractor.h @@ -4,7 +4,7 @@ libextractor is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 2, or (at your + by the Free Software Foundation; either version 3, or (at your option) any later version. libextractor is distributed in the hope that it will be useful, but diff --git a/src/main/Makefile.am b/src/main/Makefile.am @@ -41,9 +41,10 @@ libextractor_la_CPPFLAGS = -DPLUGINDIR=\"@RPLUGINDIR@\" -DPLUGININSTDIR=\"${plug libextractor_la_SOURCES = \ extractor.c \ - $(EXTRACTOR_IPC) extractor_ipc.h \ + $(EXTRACTOR_IPC) extractor_ipc.c extractor_ipc.h \ extractor_plugpath.c extractor_plugpath.h \ extractor_plugins.c extractor_plugins.h \ + extractor_plugin_main.c extractor_plugin_main.h \ extractor_metatypes.c \ extractor_print.c diff --git a/src/main/TODO b/src/main/TODO @@ -0,0 +1,5 @@ +* extractor_plugin_main (a lot) +* extractor_ipc.c (a bit) +* extractor.c (a lot) +* extractor_ipc_w32.c (a lot) +* test, test, test (a lot) diff --git a/src/main/extract.c b/src/main/extract.c @@ -17,6 +17,11 @@ Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ +/** + * @file main/extract.c + * @brief command-line tool to run GNU libextractor + * @author Christian Grothoff + */ #include "platform.h" #include "extractor.h" #include "getopt.h" diff --git a/src/main/extractor.c b/src/main/extractor.c @@ -113,35 +113,6 @@ */ #define OPMODE_FILE 3 -/** - * Header used for our IPC replies. A header - * with all fields being zero is used to indicate - * the end of the stream. - */ -struct IpcHeader -{ - /** - * Type of the meta data. - */ - enum EXTRACTOR_MetaType meta_type; - - /** - * Format of the meta data. - */ - enum EXTRACTOR_MetaFormat meta_format; - - /** - * Number of bytes of meta data (value) - */ - size_t data_len; - - /** - * Length of the mime type string describing the meta data value's mime type, - * including 0-terminator, 0 for mime type of "NULL". - */ - size_t mime_len; -}; - /** * Writes 'size' bytes from 'buf' to 'fd', returns only when @@ -173,345 +144,12 @@ write_all (int fd, } -/** - * Function called by a plugin in a child process. Transmits - * the meta data back to the parent process. - * - * @param cls closure, "int*" of the FD for transmission - * @param plugin_name name of the plugin that produced this value; - * special values can be used (i.e. '<zlib>' for zlib being - * used in the main libextractor library and yielding - * meta data). - * @param type libextractor-type describing the meta data - * @param format basic format information about data - * @param data_mime_type mime-type of data (not of the original file); - * can be NULL (if mime-type is not known) - * @param data actual meta-data found - * @param data_len number of bytes in data - * @return 0 to continue extracting, 1 to abort (transmission error) - */ -static int -transmit_reply (void *cls, - const char *plugin_name, - enum EXTRACTOR_MetaType type, - enum EXTRACTOR_MetaFormat format, - const char *data_mime_type, - const char *data, - size_t data_len) -{ - static const unsigned char meta_byte = MESSAGE_META; - int *cpipe_out = cls; - struct IpcHeader hdr; - size_t mime_len; - - if (NULL == data_mime_type) - mime_len = 0; - else - mime_len = strlen (data_mime_type) + 1; - if (mime_len > MAX_MIME_LEN) - mime_len = MAX_MIME_LEN; - hdr.meta_type = type; - hdr.meta_format = format; - hdr.data_len = data_len; - hdr.mime_len = mime_len; - if ( (sizeof (meta_byte) != - write_all (*cpipe_out, - &meta_byte, sizeof (meta_byte))) || - (sizeof (hdr) != - write_all (*cpipe_out, - &hdr, sizeof (hdr))) || - (mime_len != - write_all (*cpipe_out, - data_mime_type, mime_len)) || - (data_len != - write_all (*cpipe_out, - data, data_len)) ) - return 1; - return 0; -} - - -/** - * Main loop function for plugins. - * Reads a message from the plugin input pipe and acts on it. - * Can be called recursively (once) in OPMODE_DECOMPRESS. - * plugin->waiting_for_update == 1 indicates the recursive call. - * - * @param plugin plugin context - * @return 0, always - */ -static int -process_requests (struct EXTRACTOR_PluginList *plugin) -{ - int in, out; - int read_result1, read_result2, read_result3, read_result4; - unsigned char code; - char *shm_name = NULL; - size_t shm_name_len; - int extract_reply; - struct IpcHeader hdr; - int do_break; -#ifdef WINDOWS - HANDLE map; - MEMORY_BASIC_INFORMATION mi; -#endif - - in = plugin->pipe_in; - out = plugin->cpipe_out; - - /* The point of recursing into this function is to request - * a seek from LE server and wait for a reply. This snipper - * requests a seek. - */ - if (plugin->waiting_for_update == 1) - { - unsigned char seek_byte = MESSAGE_SEEK; - if (write (out, &seek_byte, 1) != 1) - return -1; - if (write (out, &plugin->seek_request, sizeof (int64_t)) != sizeof (int64_t)) - return -1; - } - - memset (&hdr, 0, sizeof (hdr)); - do_break = 0; - while (!do_break) - { - read_result1 = read (in, &code, 1); - if (read_result1 <= 0) - break; - switch (code) - { - case MESSAGE_INIT_STATE: - read_result2 = read (in, &plugin->operation_mode, sizeof (uint8_t)); - read_result3 = read (in, &plugin->fsize, sizeof (int64_t)); - read_result4 = read (in, &shm_name_len, sizeof (size_t)); - if ((read_result2 < sizeof (uint8_t)) || - (read_result3 < sizeof (int64_t)) || - (read_result4 < sizeof (size_t))) - { - do_break = 1; - break; - } - if (plugin->operation_mode != OPMODE_MEMORY && - plugin->operation_mode != OPMODE_DECOMPRESS && - plugin->operation_mode != OPMODE_FILE) - { - do_break = 1; - break; - } - if ((plugin->operation_mode == OPMODE_MEMORY || - plugin->operation_mode == OPMODE_DECOMPRESS) && - shm_name_len > MAX_SHM_NAME) - { - do_break = 1; - break; - } - /* Fsize may be -1 only in decompression mode */ - if (plugin->operation_mode != OPMODE_DECOMPRESS && plugin->fsize <= 0) - { - do_break = 1; - break; - } - if (shm_name != NULL) - free (shm_name); - shm_name = malloc (shm_name_len); - if (shm_name == NULL) - { - do_break = 1; - break; - } - read_result2 = read (in, shm_name, shm_name_len); - if (read_result2 < shm_name_len) - { - do_break = 1; - break; - } - shm_name[shm_name_len - 1] = '\0'; - do_break = init_state_method (plugin, plugin->operation_mode, plugin->fsize, shm_name); - /* in OPMODE_MEMORY and OPMODE_FILE we can start extracting right away, - * there won't be UPDATED_SHM message, and we don't need it - */ - if (!do_break && (plugin->operation_mode == OPMODE_MEMORY || - plugin->operation_mode == OPMODE_FILE)) - { - extract_reply = plugin->extract_method (plugin, transmit_reply, &out); - unsigned char done_byte = MESSAGE_DONE; - if (write (out, &done_byte, 1) != 1) - { - do_break = 1; - break; - } - if ((plugin->specials != NULL) && - (NULL != strstr (plugin->specials, "force-kill"))) - { - /* we're required to die after each file since this - plugin only supports a single file at a time */ -#if !WINDOWS - fsync (out); -#else - _commit (out); -#endif - _exit (0); - } - } - break; - case MESSAGE_DISCARD_STATE: - discard_state_method (plugin); - break; - case MESSAGE_UPDATED_SHM: - if (plugin->operation_mode == OPMODE_DECOMPRESS) - { - read_result2 = read (in, &plugin->fpos, sizeof (int64_t)); - read_result3 = read (in, &plugin->map_size, sizeof (size_t)); - read_result4 = read (in, &plugin->fsize, sizeof (int64_t)); - if ((read_result2 < sizeof (int64_t)) || (read_result3 < sizeof (size_t)) || - plugin->fpos < 0 || (plugin->operation_mode != OPMODE_DECOMPRESS && (plugin->fsize <= 0 || plugin->fpos >= plugin->fsize))) - { - do_break = 1; - break; - } - /* FIXME: also check mapped region size (lseek for *nix, VirtualQuery for W32) */ - /* Re-map the shm */ -#if !WINDOWS - if ((-1 == plugin->shm_id) || - (NULL == (plugin->shm_ptr = mmap (NULL, plugin->map_size, PROT_READ, MAP_SHARED, plugin->shm_id, 0))) || - (plugin->shm_ptr == (void *) -1)) - { - do_break = 1; - break; - } -#else - if ((plugin->map_handle == 0) || - (NULL == (plugin->shm_ptr = MapViewOfFile (plugin->map_handle, FILE_MAP_READ, 0, 0, 0)))) - { - do_break = 1; - break; - } -#endif - if (plugin->waiting_for_update == 1) - { - /* We were only waiting for this one message */ - do_break = 1; - plugin->waiting_for_update = 2; - break; - } - /* Run extractor on mapped region (recursive call doesn't reach this - * point and breaks out earlier. - */ - extract_reply = plugin->extract_method (plugin, transmit_reply, &out); - /* Unmap the shm */ -#if !WINDOWS - if ((plugin->shm_ptr != NULL) && - (plugin->shm_ptr != (void*) -1) ) - munmap (plugin->shm_ptr, plugin->map_size); -#else - if (plugin->shm_ptr != NULL) - UnmapViewOfFile (plugin->shm_ptr); -#endif - plugin->shm_ptr = NULL; - if (extract_reply == 1) - { - /* Tell LE that we're done */ - unsigned char done_byte = MESSAGE_DONE; - if (write (out, &done_byte, 1) != 1) - { - do_break = 1; - break; - } - if ((plugin->specials != NULL) && - (NULL != strstr (plugin->specials, "force-kill"))) - { - /* we're required to die after each file since this - plugin only supports a single file at a time */ -#if !WINDOWS - fsync (out); -#else - _commit (out); -#endif - _exit (0); - } - } - else - { - /* Tell LE that we're not done, and we need to seek */ - unsigned char seek_byte = MESSAGE_SEEK; - if (write (out, &seek_byte, 1) != 1) - { - do_break = 1; - break; - } - if (write (out, &plugin->seek_request, sizeof (int64_t)) != sizeof (int64_t)) - { - do_break = 1; - break; - } - } - } - else - { - /* This is mostly to safely skip unrelated messages */ - int64_t t; - size_t t2; - read_result2 = read (in, &t, sizeof (int64_t)); - read_result3 = read (in, &t2, sizeof (size_t)); - read_result4 = read (in, &t, sizeof (int64_t)); - } - break; - } - } - return 0; -} - - -/** - * 'main' function of the child process. Loads the plugin, - * sets up its in and out pipes, then runs the request serving function. - * - * @param plugin extractor plugin to use - * @param in stream to read from - * @param out stream to write to - */ -static void -plugin_main (struct EXTRACTOR_PluginList *plugin, - int in, int out) -{ - if (plugin == NULL) - { - close (in); - close (out); - return; - } - if (0 != EXTRACTOR_plugin_load_ (plugin)) - { - close (in); - close (out); -#if DEBUG - fprintf (stderr, "Plugin `%s' failed to load!\n", plugin->short_libname); -#endif - return; - } - if ((plugin->specials != NULL) && - (NULL != strstr (plugin->specials, "close-stderr"))) - close (2); - if ((plugin->specials != NULL) && - (NULL != strstr (plugin->specials, "close-stdout"))) - close (1); - - plugin->pipe_in = in; - /* Compiler will complain, and it's right. This is a kind of hack...*/ - plugin->cpipe_out = out; - process_requests (plugin); - - close (in); - close (out); -} - /** * Open a file */ static int -file_open(const char *filename, int oflag, ...) +file_open (const char *filename, int oflag, ...) { int mode; const char *fn; @@ -794,112 +432,6 @@ pl_pick_next_buffer_at (struct EXTRACTOR_PluginList *plugin, } -/** - * Moves current absolute buffer position to @pos in @whence mode. - * Will move logical position withouth shifting the buffer, if possible. - * Will not move beyond the end of file. - * - * @param plugin plugin context - * @param pos position to move to - * @param whence seek mode (SEEK_CUR, SEEK_SET, SEEK_END) - * @return new absolute position, -1 on error - */ -static int64_t -pl_seek (struct EXTRACTOR_PluginList *plugin, int64_t pos, int whence) -{ - switch (whence) - { - case SEEK_CUR: - if (plugin->shm_pos + pos < plugin->map_size && plugin->shm_pos + pos >= 0) - { - plugin->shm_pos += pos; - return plugin->fpos + plugin->shm_pos; - } - if (0 != pl_pick_next_buffer_at (plugin, plugin->fpos + plugin->shm_pos + pos, 1)) - return -1; - plugin->shm_pos += pos; - return plugin->fpos + plugin->shm_pos; - break; - case SEEK_SET: - if (pos < 0) - return -1; - if (pos >= plugin->fpos && pos < plugin->fpos + plugin->map_size) - { - plugin->shm_pos = pos - plugin->fpos; - return pos; - } - if (0 != pl_pick_next_buffer_at (plugin, pos, 1)) - return -1; - if (pos >= plugin->fpos && pos < plugin->fpos + plugin->map_size) - { - plugin->shm_pos = pos - plugin->fpos; - return pos; - } - return -1; - break; - case SEEK_END: - while (plugin->fsize == -1) - { - pl_pick_next_buffer_at (plugin, plugin->fpos + plugin->map_size + pos, 0); - } - if (plugin->fsize + pos - 1 < plugin->fpos || plugin->fsize + pos - 1 > plugin->fpos + plugin->map_size) - { - if (0 != pl_pick_next_buffer_at (plugin, plugin->fsize - MAX_READ, 0)) - return -1; - } - plugin->shm_pos = plugin->fsize + pos - plugin->fpos; - if (plugin->shm_pos < 0) - plugin->shm_pos = 0; - else if (plugin->shm_pos >= plugin->map_size) - plugin->shm_pos = plugin->map_size - 1; - return plugin->fpos + plugin->shm_pos - 1; - break; - } - return -1; -} - - -static int64_t -pl_get_fsize (struct EXTRACTOR_PluginList *plugin) -{ - return plugin->fsize; -} - - -/** - * Fills @data with a pointer to the data buffer. - * Equivalent to read(), except you don't have to allocate and free - * a buffer, since the data is already in memory. - * Will move the buffer, if necessary - * - * @param plugin plugin context - * @param data location to store data pointer - * @param count number of bytes to read - * @return number of bytes (<= count) avalable in @data, -1 on error - */ -static int64_t -pl_read (struct EXTRACTOR_PluginList *plugin, unsigned char **data, size_t count) -{ - *data = NULL; - if (count > MAX_READ) - return -1; - if (count > plugin->map_size - plugin->shm_pos) - { - int64_t actual_count; - if (plugin->fpos + plugin->shm_pos != pl_seek (plugin, plugin->fpos + plugin->shm_pos, SEEK_SET)) - return -1; - *data = &plugin->shm_ptr[plugin->shm_pos]; - actual_count = (count < plugin->map_size - plugin->shm_pos) ? count : (plugin->map_size - plugin->shm_pos); - plugin->shm_pos += actual_count; - return actual_count; - } - else - { - *data = &plugin->shm_ptr[plugin->shm_pos]; - plugin->shm_pos += count; - return count; - } -} /** @@ -1002,111 +534,7 @@ ask_in_process_plugin (struct EXTRACTOR_PluginList *plugin, } -/** - * Receive a reply from plugin (seek request, metadata and done message) - * - * @param plugin plugin context - * @param proc metadata callback - * @param proc_cls callback cls - * @return 0 on success, -1 on error - */ -static int -receive_reply (struct EXTRACTOR_PluginList *plugin, - EXTRACTOR_MetaDataProcessor proc, void *proc_cls) -{ - int read_result; - unsigned char code; - int64_t seek_position; - struct IpcHeader hdr; - char *mime_type; - char *data; - int must_read = 1; - - while (must_read) - { - read_result = plugin_read (plugin, &code, 1); - if (read_result < 1) - return -1; - switch (code) - { - case MESSAGE_DONE: /* Done */ - plugin->seek_request = -1; - must_read = 0; - break; - case MESSAGE_SEEK: /* Seek */ - read_result = plugin_read (plugin, - &seek_position, sizeof (int64_t)); - if (read_result < sizeof (int64_t)) - return -1; - plugin->seek_request = seek_position; - must_read = 0; - break; - case MESSAGE_META: /* Meta */ - read_result = plugin_read (plugin, - &hdr, sizeof (hdr)); - if (read_result < sizeof (hdr)) - return -1; - /* FIXME: check hdr for sanity */ - if (hdr.data_len > MAX_META_DATA) - return -1; /* not allowing more than MAX_META_DATA meta data */ - if (0 == hdr.mime_len) - { - mime_type = NULL; - } - else - { - if (NULL == (mime_type = malloc (hdr.mime_len))) - return -1; - read_result = plugin_read (plugin, - mime_type, - hdr.mime_len); - if ( (read_result < hdr.mime_len) || - ('\0' != mime_type[hdr.mime_len-1]) ) - { - if (NULL != mime_type) - free (mime_type); - return -1; - } - } - if (0 == hdr.data_len) - { - data = NULL; - } - else - { - if (NULL == (data = malloc (hdr.data_len))) - { - if (NULL != mime_type) - free (mime_type); - return -1; - } - read_result = plugin_read (plugin, - data, hdr.data_len); - if (read_result < hdr.data_len) - { - if (NULL != mime_type) - free (mime_type); - free (data); - return -1; - } - } - read_result = proc (proc_cls, - plugin->short_libname, - hdr.meta_type, hdr.meta_format, - mime_type, data, hdr.data_len); - if (NULL != mime_type) - free (mime_type); - if (NULL != data) - free (data); - if (0 != read_result) - return 1; - break; - default: - return -1; - } - } - return 0; -} + /** diff --git a/src/main/extractor_datasource.c b/src/main/extractor_datasource.c @@ -17,8 +17,14 @@ Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ +/** + * @file main/extractor_datasource.c + * @brief random access and possibly decompression of data from buffer in memory or file on disk + * @author Christian Grothoff + */ #include "platform.h" +#include "extractor_datasource.h" #if HAVE_LIBBZ2 #include <bzlib.h> @@ -219,13 +225,7 @@ bfds_pick_next_buffer_at (struct BufferedFileDataSource *bfds, bfds->buffer_pos = pos; return 0; } -#if WINDOWS - position = _lseeki64 (bfds->fd, pos, SEEK_SET); -#elif HAVE_LSEEK64 - position = lseek64 (bfds->fd, pos, SEEK_SET); -#else - position = (int64_t) lseek (bfds->fd, pos, SEEK_SET); -#endif + position = (int64_t) LSEEK (bfds->fd, pos, SEEK_SET); if (position < 0) return -1; bfds->fpos = position; diff --git a/src/main/extractor_datasource.h b/src/main/extractor_datasource.h @@ -17,6 +17,11 @@ Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ +/** + * @file main/extractor_datasource.h + * @brief random access and possibly decompression of data from buffer in memory or file on disk + * @author Christian Grothoff + */ #ifndef EXTRACTOR_DATASOURCE_H #define EXTRACTOR_DATASOURCE_H diff --git a/src/main/extractor_ipc.c b/src/main/extractor_ipc.c @@ -0,0 +1,137 @@ +/* + This file is part of libextractor. + (C) 2012 Vidyut Samanta and Christian Grothoff + + libextractor is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + libextractor is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with libextractor; see the file COPYING. If not, write to the + Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. + */ +/** + * @file main/extractor_ipc.c + * @brief IPC with plugin (OS-independent parts) + * @author Christian Grothoff + */ +#include "platform.h" + + +/** + * Process a reply from channel (seek request, metadata and done message) + * + * @param buf buffer with data from IPC channel + * @param size number of bytes in buffer + * @param proc metadata callback + * @param proc_cls callback cls + * @return number of bytes processed, -1 on error + */ +ssize_t +EXTRACTOR_IPC_process_reply_ (const void *data, + size_t size, + EXTRACTOR_ChannelMessageProcessor proc, + void *proc_cls) +{ + int read_result; + unsigned char code; + int64_t seek_position; + struct IpcHeader hdr; + char *mime_type; + char *data; + int must_read = 1; + + while (must_read) + { + read_result = plugin_read (plugin, &code, 1); + if (read_result < 1) + return -1; + switch (code) + { + case MESSAGE_DONE: /* Done */ + plugin->seek_request = -1; + must_read = 0; + break; + case MESSAGE_SEEK: /* Seek */ + read_result = plugin_read (plugin, + &seek_position, sizeof (int64_t)); + if (read_result < sizeof (int64_t)) + return -1; + plugin->seek_request = seek_position; + must_read = 0; + break; + case MESSAGE_META: /* Meta */ + read_result = plugin_read (plugin, + &hdr, sizeof (hdr)); + if (read_result < sizeof (hdr)) + return -1; + /* FIXME: check hdr for sanity */ + if (hdr.data_len > MAX_META_DATA) + return -1; /* not allowing more than MAX_META_DATA meta data */ + if (0 == hdr.mime_len) + { + mime_type = NULL; + } + else + { + if (NULL == (mime_type = malloc (hdr.mime_len))) + return -1; + read_result = plugin_read (plugin, + mime_type, + hdr.mime_len); + if ( (read_result < hdr.mime_len) || + ('\0' != mime_type[hdr.mime_len-1]) ) + { + if (NULL != mime_type) + free (mime_type); + return -1; + } + } + if (0 == hdr.data_len) + { + data = NULL; + } + else + { + if (NULL == (data = malloc (hdr.data_len))) + { + if (NULL != mime_type) + free (mime_type); + return -1; + } + read_result = plugin_read (plugin, + data, hdr.data_len); + if (read_result < hdr.data_len) + { + if (NULL != mime_type) + free (mime_type); + free (data); + return -1; + } + } + read_result = proc (proc_cls, + plugin->short_libname, + hdr.meta_type, hdr.meta_format, + mime_type, data, hdr.data_len); + if (NULL != mime_type) + free (mime_type); + if (NULL != data) + free (data); + if (0 != read_result) + return 1; + break; + default: + return -1; + } + } + return 0; +} + +/* end of extractor_ipc.c */ diff --git a/src/main/extractor_ipc.h b/src/main/extractor_ipc.h @@ -1,6 +1,6 @@ /* This file is part of libextractor. - (C) 2002, 2003, 2004, 2005, 2006, 2009, 2012 Vidyut Samanta and Christian Grothoff + (C) 2012 Vidyut Samanta and Christian Grothoff libextractor is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published @@ -17,6 +17,11 @@ Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ +/** + * @file main/extractor_ipc.h + * @brief IPC with plugin (OS-independent API) + * @author Christian Grothoff + */ #ifndef EXTRACTOR_IPC_H #define EXTRACTOR_IPC_H @@ -26,16 +31,89 @@ */ struct EXTRACTOR_Channel; +/** + * Definition of a shared memory area. + */ +struct EXTRACTOR_SharedMemory; + + +/** + * Header used for our IPC replies. A header + * with all fields being zero is used to indicate + * the end of the stream. + */ +struct IpcHeader +{ + /** + * Type of the meta data. + */ + enum EXTRACTOR_MetaType meta_type; + + /** + * Format of the meta data. + */ + enum EXTRACTOR_MetaFormat meta_format; + + /** + * Number of bytes of meta data (value) + */ + size_t data_len; + + /** + * Length of the mime type string describing the meta data value's mime type, + * including 0-terminator, 0 for mime type of "NULL". + */ + size_t mime_len; +}; + + +/** + * Create a shared memory area. + * + * @param size size of the shared area + * @return NULL on error + */ +struct EXTRACTOR_SharedMemory * +EXTRACTOR_IPC_shared_memory_create_ (size_t size); + + +/** + * Destroy shared memory area. + * + * @param shm memory area to destroy + * @return NULL on error + */ +void +EXTRACTOR_IPC_shared_memory_destroy_ (struct EXTRACTOR_SharedMemory *shm); + + +/** + * Initialize shared memory area from data source. + * + * @param shm memory area to initialize + * @param ds data source to use for initialization + * @param off offset to use in data source + * @param size number of bytes to copy + * @return -1 on error, otherwise number of bytes copied + */ +ssize_t +EXTRACTOR_IPC_shared_memory_set_ (struct EXTRACTOR_SharedMemory *shm, + struct EXTRACTOR_Datasource *ds, + uint64_t off, + size_t size); + /** * Create a channel to communicate with a process wrapping * the plugin of the given name. Starts the process as well. * * @param short_libname name of the plugin + * @param shm memory to share with the process * @return NULL on error, otherwise IPC channel */ struct EXTRACTOR_Channel * -EXTRACTOR_IPC_channel_create_ (const char *short_libname); +EXTRACTOR_IPC_channel_create_ (const char *short_libname, + struct EXTRACTOR_SharedMemory *shm); /** @@ -49,13 +127,67 @@ EXTRACTOR_IPC_channel_destroy_ (struct EXTRACTOR_Channel *channel); /** - * Map the given buffer + * Send data via the given IPC channel (blocking). * * @param channel channel to communicate with the plugin + * @param buf data to send + * @param size number of bytes in buf to send + * @return -1 on error, number of bytes sent on success + * (never does partial writes) */ -void -EXTRACTOR_IPC_channel_xxx_ (struct EXTRACTOR_Channel *channel); +ssize_t +EXTRACTOR_IPC_channel_send_ (struct EXTRACTOR_Channel *channel, + const void *data, + size_t size); + + +/** + * Handler for a message from one of the plugins. + * + * @param cls closure + * @param short_libname library name of the channel sending the message + * @param msg header of the message from the plugin + * @param value 'data' send from the plugin + * @param mime mime string send from the plugin + */ +typedef void (*EXTRACTOR_ChannelMessageProcessor) (void *cls, + const char *short_libname, + const struct IpcHeader *msg, + const void *value, + const char *mime); + +/** + * Process a reply from channel (seek request, metadata and done message) + * + * @param buf buffer with data from IPC channel + * @param size number of bytes in buffer + * @param proc metadata callback + * @param proc_cls callback cls + * @return number of bytes processed, -1 on error + */ +ssize_t +EXTRACTOR_IPC_process_reply_ (const void *data, + size_t size, + EXTRACTOR_ChannelMessageProcessor proc, + void *proc_cls); + +/** + * Receive data from any of the given IPC channels (blocking). + * Wait for one of the plugins to reply. + * + * @param channels array of channels, channels that break may be set to NULL + * @param num_channels length of the 'channels' array + * @param proc function to call to process messages (may be called + * more than once) + * @param proc_cls closure for 'proc' + * @return -1 on error (i.e. no response in 10s), 1 on success + */ +int +EXTRACTOR_IPC_channel_recv_ (struct EXTRACTOR_Channel **channels, + unsigned int num_channels, + EXTRACTOR_ChannelMessageProcessor proc, + void *proc_cls); #endif diff --git a/src/main/extractor_ipc_gnu.c b/src/main/extractor_ipc_gnu.c @@ -1,6 +1,6 @@ /* This file is part of libextractor. - (C) 2002, 2003, 2004, 2005, 2006, 2009, 2012 Vidyut Samanta and Christian Grothoff + (C) 2012 Vidyut Samanta and Christian Grothoff libextractor is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published @@ -17,24 +17,51 @@ Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ - +/** + * @file main/extractor_ipc_gnu.c + * @brief IPC with plugin for GNU/POSIX systems + * @author Christian Grothoff + */ #include "platform.h" #include "plibc.h" #include "extractor.h" +#include "extractor_datasource.h" +#include "extractor_ipc.h" #include <dirent.h> #include <sys/types.h> #include <sys/wait.h> #include <sys/shm.h> #include <signal.h> +/** + * Size of the channel buffer; determines largest IPC message that + * is going to be allowed. FIXME: we might want to grow this + * buffer dynamically instead... + */ +#define CHANNEL_BUFFER_SIZE (1024 * 256) /** - * Definition of an IPC communication channel with - * some plugin. + * A shared memory resource (often shared with several + * other processes). */ -struct EXTRACTOR_Channel +struct EXTRACTOR_SharedMemory { /** + * Pointer to the mapped region of the shm (covers the whole shm) + */ + void *shm_ptr; + + /** + * Page size. Mmap offset is a multiple of this number. + */ + long allocation_granularity; + + /** + * Allocated size of the shm + */ + size_t shm_size; + + /** * POSIX id of the shm into which data is uncompressed */ int shm; @@ -44,447 +71,328 @@ struct EXTRACTOR_Channel */ char shm_name[MAX_SHM_NAME + 1]; +}; + + +/** + * Definition of an IPC communication channel with + * some plugin. + */ +struct EXTRACTOR_Channel +{ + /** - * Pointer to the mapped region of the shm (covers the whole shm) - */ - void *shm_ptr; + * Buffer for reading data from the plugin. + */ + char data[CHANNEL_BUFFER_SIZE]; /** - * Position within shm - */ - int64_t shm_pos; + * Memory segment shared with this process. + */ + struct EXTRACTOR_SharedMemory *shm; /** - * Allocated size of the shm - */ - int64_t shm_size; + * Name of the plugin to use for this channel. + */ + const char *short_libname; /** - * Number of bytes in shm (<= shm_size) - */ - size_t shm_buf_size; + * Pipe used to communicate information to the plugin child process. + * NULL if not initialized. + */ + int cpipe_in; + /** + * Number of valid bytes in the channel's buffer. + */ + size_t size; -}; + /** + * Pipe used to read information about extracted meta data from + * the plugin child process. -1 if not initialized. + */ + int cpipe_out; + /** + * Process ID of the child process for this plugin. 0 for none. + */ + pid_t cpid; -/** - * Opens a shared memory object (for later mmapping). - * This is POSIX variant of the the plugin_open_* function. Shm is always memory-backed. - * Closes a shm is already opened, closes it before opening a new one. - * - * @param plugin plugin context - * @param shm_name name of the shm. - * @return shm id (-1 on error). That is, the result of shm_open() syscall. - */ -static int -plugin_open_shm (struct EXTRACTOR_PluginList *plugin, - const char *shm_name) -{ - if (plugin->shm_id != -1) - close (plugin->shm_id); - plugin->shm_id = shm_open (shm_name, O_RDONLY, 0); - return plugin->shm_id; -} +}; /** - * Opens a file (for later mmapping). - * This is POSIX variant of the plugin_open_* function. - * Closes a file is already opened, closes it before opening a new one. + * Create a shared memory area. * - * @param plugin plugin context - * @param shm_name name of the file to open. - * @return file id (-1 on error). That is, the result of open() syscall. - */ -static int -plugin_open_file (struct EXTRACTOR_PluginList *plugin, - const char *shm_name) + * @param size size of the shared area + * @return NULL on error + */ +struct EXTRACTOR_SharedMemory * +EXTRACTOR_IPC_shared_memory_create_ (size_t size) { - if (plugin->shm_id != -1) - close (plugin->shm_id); - plugin->shm_id = open (shm_name, O_RDONLY, 0); - return plugin->shm_id; + struct EXTRACTOR_SharedMemory *shm; + const char *tpath; + + if (NULL == (shm = malloc (sizeof (struct EXTRACTOR_SharedMemory)))) + return NULL; +#if SOMEBSD + /* this works on FreeBSD, not sure about others... */ + tpath = getenv ("TMPDIR"); + if (tpath == NULL) + tpath = "/tmp/"; +#else + tpath = "/"; /* Linux */ +#endif + snprintf (shm->shm_name, + MAX_SHM_NAME, + "%slibextractor-shm-%u-%u", + tpath, getpid (), + (unsigned int) RANDOM()); + if (-1 == (shm->shm_id = shm_open (shm->shm_name, + O_RDWR | O_CREAT, S_IRUSR | S_IWUSR))) + { + free (shm); + return NULL; + } + if ( (0 != ftruncate (shm->shm_id, size)) || + (NULL == (shm->shm_ptr = mmap (NULL, size, + PROT_WRITE, MAP_SHARED, + shm->shm_id, 0))) || + (((void*) -1) == shm->shm_ptr) ) + { + (void) close (shm->shm_id); + (void) shm_unlink (shm->shm_name); + free (shm); + return NULL; + } + shm->shm_size = size; + return shm; } /** - * Initializes an extracting session for a plugin. - * opens the file/shm (only in OPMODE_FILE) - * sets shm_ptr to NULL (unmaps it, if it was mapped) - * sets position to 0 - * initializes file size to 'fsize' (may be -1) - * sets seek request to 0 + * Destroy shared memory area. * - * @param plugin plugin context - * @param operation_mode the mode of operation (OPMODE_*) - * @param fsize size of the source file (may be -1) - * @param shm_name name of the shm or file to open - * @return 0 on success, non-0 on error. - */ -static int -init_state_method (struct EXTRACTOR_PluginList *plugin, - uint8_t operation_mode, - int64_t fsize, - const char *shm_name) + * @param shm memory area to destroy + * @return NULL on error + */ +void +EXTRACTOR_IPC_shared_memory_destroy_ (struct EXTRACTOR_SharedMemory *shm) { - plugin->seek_request = 0; - if (plugin->shm_ptr != NULL) - munmap (plugin->shm_ptr, plugin->map_size); - plugin->shm_ptr = NULL; - if (operation_mode == OPMODE_FILE) - { - if (-1 == plugin_open_file (plugin, shm_name)) - return 1; - } - else if (-1 == plugin_open_shm (plugin, shm_name)) - return 1; - plugin->fsize = fsize; - plugin->shm_pos = 0; - plugin->fpos = 0; - return 0; + munmap (shm->shm_ptr, shm->map_size); + (void) close (plugin->shm_id); + (void) shm_unlink (shm->shm_name); + free (shm); } /** - * Deinitializes an extracting session for a plugin. - * unmaps shm_ptr (if was mapped) - * closes file/shm (if it was opened) - * sets map size and shm_ptr to NULL. + * Initialize shared memory area from data source. * - * @param plugin plugin context - */ -static void -discard_state_method (struct EXTRACTOR_PluginList *plugin) + * @param shm memory area to initialize + * @param ds data source to use for initialization + * @param off offset to use in data source + * @param size number of bytes to copy + * @return -1 on error, otherwise number of bytes copied + */ +ssize_t +EXTRACTOR_IPC_shared_memory_set_ (struct EXTRACTOR_SharedMemory *shm, + struct EXTRACTOR_Datasource *ds, + uint64_t off, + size_t size) { - if (plugin->shm_ptr != NULL && plugin->map_size > 0) - munmap (plugin->shm_ptr, plugin->map_size); - if (plugin->shm_id != -1) - close (plugin->shm_id); - plugin->shm_id = -1; - plugin->map_size = 0; - plugin->shm_ptr = NULL; + if (-1 == + EXTRACTOR_datasource_seek_ (ds, off, SEEK_SET)) + return -1; + if (size > shm->map_size) + size = shm->map_size; + return EXTRACTOR_datasource_read_ (ds, + shm->shm_ptr, + size); } - /** - * Start the process for the given plugin. + * Create a channel to communicate with a process wrapping + * the plugin of the given name. Starts the process as well. + * + * @param short_libname name of the plugin + * @param shm memory to share with the process + * @return NULL on error, otherwise IPC channel */ -static void -start_process (struct EXTRACTOR_PluginList *plugin) +struct EXTRACTOR_Channel * +EXTRACTOR_IPC_channel_create_ (const char *short_libname, + struct EXTRACTOR_SharedMemory *shm) { + struct EXTRACTOR_Channel *channel; int p1[2]; int p2[2]; pid_t pid; int status; - switch (plugin->flags) - { - case EXTRACTOR_OPTION_DEFAULT_POLICY: - if (-1 != plugin->cpid && 0 != plugin->cpid) - return; - break; - case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART: - if (0 != plugin->cpid) - return; - break; - case EXTRACTOR_OPTION_IN_PROCESS: - return; - break; - case EXTRACTOR_OPTION_DISABLED: - return; - break; - } - - plugin->cpid = -1; + if (NULL == (channel = malloc (sizeof (struct EXTRACTOR_Channel)))) + return NULL; + channel->shm = shm; + channel->short_libname = short_libname; if (0 != pipe (p1)) - { - plugin->flags = EXTRACTOR_OPTION_DISABLED; - return; - } + { + free (channel); + return NULL; + } if (0 != pipe (p2)) - { - close (p1[0]); - close (p1[1]); - plugin->flags = EXTRACTOR_OPTION_DISABLED; - return; - } + { + (void) close (p1[0]); + (void) close (p1[1]); + free (channel); + return; + } pid = fork (); - plugin->cpid = pid; if (pid == -1) - { - close (p1[0]); - close (p1[1]); - close (p2[0]); - close (p2[1]); - plugin->flags = EXTRACTOR_OPTION_DISABLED; - return; - } - if (pid == 0) - { - close (p1[1]); - close (p2[0]); - plugin_main (plugin, p1[0], p2[1]); - _exit (0); - } - close (p1[0]); - close (p2[1]); - plugin->cpipe_in = fdopen (p1[1], "w"); - if (plugin->cpipe_in == NULL) - { - perror ("fdopen"); - (void) kill (plugin->cpid, SIGKILL); - waitpid (plugin->cpid, &status, 0); - close (p1[1]); - close (p2[0]); - plugin->cpid = -1; - plugin->flags = EXTRACTOR_OPTION_DISABLED; - return; - } - plugin->cpipe_out = p2[0]; -} - - -/** - * Stop the child process of this plugin. - */ -static void -stop_process (struct EXTRACTOR_PluginList *plugin) -{ - int status; - -#if DEBUG - if (plugin->cpid == -1) - fprintf (stderr, - "Plugin `%s' choked on this input\n", - plugin->short_libname); -#endif - if ( (plugin->cpid == -1) || - (plugin->cpid == 0) ) - return; - kill (plugin->cpid, SIGKILL); - waitpid (plugin->cpid, &status, 0); - plugin->cpid = -1; - close (plugin->cpipe_out); - fclose (plugin->cpipe_in); - plugin->cpipe_out = -1; - plugin->cpipe_in = NULL; - - if (plugin->flags != EXTRACTOR_OPTION_DEFAULT_POLICY) - plugin->flags = EXTRACTOR_OPTION_DISABLED; - - plugin->seek_request = -1; -} - - -static int -write_plugin_data (const struct EXTRACTOR_PluginList *plugin) -{ - /* This function is only necessary on W32. On POSIX - * systems plugin inherits its own data from the parent */ - return 0; -} - - -/** - * Initializes an extracting session for a plugin. - * opens the file/shm (only in OPMODE_FILE) - * sets shm_ptr to NULL (unmaps it, if it was mapped) - * sets position to 0 - * initializes file size to 'fsize' (may be -1) - * sets seek request to 0 - * - * @param plugin plugin context - * @param operation_mode the mode of operation (OPMODE_*) - * @param fsize size of the source file (may be -1) - * @param shm_name name of the shm or file to open - * @return 0 on success, non-0 on error. - */ -static int -init_state_method (struct EXTRACTOR_PluginList *plugin, - uint8_t operation_mode, - int64_t fsize, - const char *shm_name) -{ - plugin->seek_request = 0; - if (plugin->shm_ptr != NULL) - munmap (plugin->shm_ptr, plugin->map_size); - plugin->shm_ptr = NULL; - if (operation_mode == OPMODE_FILE) - { - if (-1 == plugin_open_file (plugin, shm_name)) - return 1; - } - else if (-1 == plugin_open_shm (plugin, shm_name)) - return 1; - plugin->fsize = fsize; - plugin->shm_pos = 0; - plugin->fpos = 0; - return 0; + { + (void) close (p1[0]); + (void) close (p1[1]); + (void) close (p2[0]); + (void) close (p2[1]); + free (channel); + return NULL; + } + if (0 == pid) + { + (void) close (p1[1]); + (void) close (p2[0]); + EXTRACTOR_plugin_main_ (short_libname, p1[0], p2[1]); + _exit (0); + } + (void) close (p1[0]); + (void) close (p2[1]); + channel->cpipe_in = p1[1]; + channel->cpipe_out = p2[0]; + channel->cpid = pid; + return channel; } /** - * Setup a shared memory segment. + * Destroy communication channel with a plugin/process. Also + * destroys the process. * - * @param ptr set to the location of the shm segment - * @param shmid where to store the shm ID - * @param fn name of the shared segment - * @param fn_size size available in fn - * @param size number of bytes to allocated for the segment - * @return 0 on success + * @param channel channel to communicate with the plugin */ -static int -make_shm_posix (void **ptr, - int *shmid, - char *fn, - size_t fn_size, size_t size) +void +EXTRACTOR_IPC_channel_destroy_ (struct EXTRACTOR_Channel *channel) { - const char *tpath; -#if SOMEBSD - /* this works on FreeBSD, not sure about others... */ - tpath = getenv ("TMPDIR"); - if (tpath == NULL) - tpath = "/tmp/"; -#else - tpath = "/"; /* Linux */ -#endif - snprintf (fn, fn_size, "%slibextractor-shm-%u-%u", tpath, getpid(), - (unsigned int) RANDOM()); - *shmid = shm_open (fn, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); - *ptr = NULL; - if (-1 == *shmid) - return 1; - if ((0 != ftruncate (*shmid, size)) || - (NULL == (*ptr = mmap (NULL, size, PROT_WRITE, MAP_SHARED, *shmid, 0))) || - (*ptr == (void*) -1) ) - { - close (*shmid); - *shmid = -1; - shm_unlink (fn); - return 1; - } - return 0; -} - + int status; -static void -destroy_shm_posix (void *ptr, int shm_id, size_t size, char *shm_name) -{ - if (NULL != ptr) - munmap (ptr, size); - if (shm_id != -1) - close (shm_id); - shm_unlink (shm_name); + (void) kill (channel->cpid, SIGKILL); + (void) waitpid (channel->cpid, &status, 0); + (void) close (channel->cpipe_out); + (void) close (channel->cpipe_in); + free (channel); } /** - * Receive 'size' bytes from plugin, store them in 'buf' + * Send data via the given IPC channel (blocking). * - * @param plugin plugin context - * @param buf buffer to fill - * @param size number of bytes to read - * @return number of bytes read, 0 on EOS, < 0 on error + * @param channel channel to communicate with the plugin + * @param buf data to send + * @param size number of bytes in buf to send + * @return -1 on error, number of bytes sent on success + * (never does partial writes) */ -static int -plugin_read (struct EXTRACTOR_PluginList *plugin, - void *buf, - size_t size) +ssize_t +EXTRACTOR_IPC_channel_send_ (struct EXTRACTOR_Channel *channel, + const void *data, + size_t size) { - char *rb = buf; - ssize_t read_result; - size_t read_count = 0; + const char *cdata = data; + size_t off = 0; + ssize_t ret; - while (read_count < size) - { - read_result = read (plugin->cpipe_out, - &rb[read_count], size - read_count); - if (read_result <= 0) - return read_result; - read_count += read_result; - } - return read_count; + while (off < size) + { + ret = write (channel->cpipe_in, &cdata[off], size - off); + if (ret <= 0) + return -1; + off += ret; + } + return size; } /** + * Receive data from any of the given IPC channels (blocking). * Wait for one of the plugins to reply. - * Selects on plugin output pipes, runs receive_reply() + * Selects on plugin output pipes, runs 'receive_reply' * on each activated pipe until it gets a seek request * or a done message. Called repeatedly by the user until all pipes are dry or * broken. * - * @param plugins to select upon - * @param proc metadata callback - * @param proc_cls callback cls - * @return number of dry/broken pipes since last call, -1 on error or if no - * plugins reply in 10 seconds. + * @param channels array of channels, channels that break may be set to NULL + * @param num_channels length of the 'channels' array + * @param proc function to call to process messages (may be called + * more than once) + * @param proc_cls closure for 'proc' + * @return -1 on error, 1 on success */ -static int -wait_for_reply (struct EXTRACTOR_PluginList *plugins, - EXTRACTOR_MetaDataProcessor proc, void *proc_cls) +int +EXTRACTOR_IPC_channel_recv_ (struct EXTRACTOR_Channel **channels, + unsigned int num_channels, + EXTRACTOR_ChannelMessageProcessor proc, + void *proc_cls) { - int ready; - int result; struct timeval tv; fd_set to_check; - int highest = 0; - int read_result; - struct EXTRACTOR_PluginList *ppos; + int max; + unsigned int i; + struct EXTRACTOR_Channel *channel; + ssize_t ret; + ssize_t iret; FD_ZERO (&to_check); - for (ppos = plugins; NULL != ppos; ppos = ppos->next) + max = -1; + for (i=0;i<num_channels;i++) { - switch (ppos->flags) - { - case EXTRACTOR_OPTION_DEFAULT_POLICY: - case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART: - if (ppos->seek_request == -1) - continue; - FD_SET (ppos->cpipe_out, &to_check); - if (highest < ppos->cpipe_out) - highest = ppos->cpipe_out; - break; - case EXTRACTOR_OPTION_IN_PROCESS: - break; - case EXTRACTOR_OPTION_DISABLED: - break; - } + channel = channels[i]; + if (NULL == channel) + continue; + FD_SET (channel->cpipe_out, &to_check); + if (max < channel->cpipe_out) + max = channel->cpipe_out; } - tv.tv_sec = 10; tv.tv_usec = 0; - ready = select (highest + 1, &to_check, NULL, NULL, &tv); - if (ready <= 0) - /* an error or timeout -> something's wrong or all plugins hung up */ - return -1; - - result = 0; - for (ppos = plugins; NULL != ppos; ppos = ppos->next) + if (-1 == select (max + 1, &to_check, NULL, NULL, &tv)) { - switch (ppos->flags) + /* an error or timeout -> something's wrong or all plugins hung up */ + return -1; + } + for (i=0;i<num_channels;i++) + { + channel = channels[i]; + if (NULL == channel) + continue; + if (! FD_ISSET (channel->cpipe_out, &to_check)) + continue; + if ( (-1 == (iret = read (channel->cpipe_out, + &channel->data[channel->size], + CHANNEL_BUFFER_SIZE - channel->size)) ) || + (ret = EXTRACTOR_IPC_process_reply_ (channel->data, + channel->size + iret, + proc, proc_cls)) ) + { + EXTRACTOR_IPC_channel_destroy (channel); + channels[i] = NULL; + } + else { - case EXTRACTOR_OPTION_DEFAULT_POLICY: - case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART: - if (ppos->seek_request == -1) - continue; - if (FD_ISSET (ppos->cpipe_out, &to_check)) - { - read_result = receive_reply (ppos, proc, proc_cls); - if (read_result < 0) - { - stop_process (ppos); - } - result += 1; - } - break; - case EXTRACTOR_OPTION_IN_PROCESS: - break; - case EXTRACTOR_OPTION_DISABLED: - break; + memmove (channel->data, + &channel->data[ret], + channel->size + iret - ret); + channel->size = channel->size + iret - ret; } } - return result; + return 1; } + +/* end of extractor_ipc_gnu.c */ diff --git a/src/main/extractor_ipc_w32.c b/src/main/extractor_ipc_w32.c @@ -20,10 +20,8 @@ /** - * Definition of an IPC communication channel with - * some plugin. */ -struct EXTRACTOR_Channel +struct EXTRACTOR_SharedMemory { /** @@ -60,6 +58,57 @@ struct EXTRACTOR_Channel }; +/** + * Definition of an IPC communication channel with + * some plugin. + */ +struct EXTRACTOR_Channel +{ + + /** + * Process ID of the child process for this plugin. 0 for none. + */ + HANDLE hProcess; + + /** + * Pipe used to communicate information to the plugin child process. + * NULL if not initialized. + */ + HANDLE cpipe_in; + + /** + * Handle of the shm object + */ + HANDLE map_handle; + + /** + * Pipe used to read information about extracted meta data from + * the plugin child process. -1 if not initialized. + */ + HANDLE cpipe_out; + + /** + * Page size. Mmap offset is a multiple of this number. + */ + DWORD allocation_granularity; + + /** + * A structure for overlapped reads on W32. + */ + OVERLAPPED ov_read; + + /** + * A structure for overlapped writes on W32. + */ + OVERLAPPED ov_write; + + /** + * A write buffer for overlapped writes on W32 + */ + unsigned char *ov_write_buffer; + +}; + /** * Initializes an extracting session for a plugin. @@ -437,71 +486,6 @@ write_plugin_data (struct EXTRACTOR_PluginList *plugin) /** - * Reads plugin data from the LE server process. - * Also initializes allocation granularity (duh...). - * - * @param fd the pipe to read from - * - * @return newly allocated plugin context - */ -static struct EXTRACTOR_PluginList * -read_plugin_data (int fd) -{ - struct EXTRACTOR_PluginList *ret; - size_t i; - - ret = malloc (sizeof (struct EXTRACTOR_PluginList)); - if (ret == NULL) - return NULL; - read (fd, &i, sizeof (size_t)); - ret->libname = malloc (i); - if (ret->libname == NULL) - { - free (ret); - return NULL; - } - read (fd, ret->libname, i); - ret->libname[i - 1] = '\0'; - - read (fd, &i, sizeof (size_t)); - ret->short_libname = malloc (i); - if (ret->short_libname == NULL) - { - free (ret->libname); - free (ret); - return NULL; - } - read (fd, ret->short_libname, i); - ret->short_libname[i - 1] = '\0'; - - read (fd, &i, sizeof (size_t)); - if (i == 0) - { - ret->plugin_options = NULL; - } - else - { - ret->plugin_options = malloc (i); - if (ret->plugin_options == NULL) - { - free (ret->short_libname); - free (ret->libname); - free (ret); - return NULL; - } - read (fd, ret->plugin_options, i); - ret->plugin_options[i - 1] = '\0'; - } - { - SYSTEM_INFO si; - GetSystemInfo (&si); - ret->allocation_granularity = si.dwAllocationGranularity; - } - return ret; -} - - -/** * Start the process for the given plugin. */ static void @@ -513,7 +497,8 @@ start_process (struct EXTRACTOR_PluginList *plugin) PROCESS_INFORMATION proc; char cmd[MAX_PATH + 1]; char arg1[10], arg2[10]; - HANDLE p10_os_inh = INVALID_HANDLE_VALUE, p21_os_inh = INVALID_HANDLE_VALUE; + HANDLE p10_os_inh = INVALID_HANDLE_VALUE; + HANDLE p21_os_inh = INVALID_HANDLE_VALUE; SECURITY_ATTRIBUTES sa; switch (plugin->flags) @@ -614,6 +599,35 @@ start_process (struct EXTRACTOR_PluginList *plugin) /** + * Receive 'size' bytes from channel, store them in 'buf' + * + * @param plugin plugin context + * @param buf buffer to fill + * @param size number of bytes to read + * @return number of bytes read, 0 on EOS, < 0 on error + */ +static int +plugin_read (struct EXTRACTOR_PluginList *plugin, + void *buf, + size_t size) +{ + char *rb = buf; + ssize_t read_result; + size_t read_count = 0; + + while (read_count < size) + { + read_result = read (plugin->cpipe_out, + &rb[read_count], size - read_count); + if (read_result <= 0) + return read_result; + read_count += read_result; + } + return read_count; +} + + +/** * Stop the child process of this plugin. */ static void @@ -731,38 +745,6 @@ destroy_file_backed_shm_w32 (HANDLE map) #define plugin_write(plug, buf, size) write_all (fileno (plug->cpipe_in), buf, size) -void CALLBACK -RundllEntryPoint (HWND hwnd, - HINSTANCE hinst, - LPSTR lpszCmdLine, - int nCmdShow) -{ - intptr_t in_h; - intptr_t out_h; - int in; - int out; - - sscanf (lpszCmdLine, "%lu %lu", &in_h, &out_h); - in = _open_osfhandle (in_h, _O_RDONLY); - out = _open_osfhandle (out_h, 0); - setmode (in, _O_BINARY); - setmode (out, _O_BINARY); - plugin_main (read_plugin_data (in), - in, out); -} - - -void CALLBACK -RundllEntryPointA (HWND hwnd, - HINSTANCE hinst, - LPSTR lpszCmdLine, - int nCmdShow) -{ - return RundllEntryPoint (hwnd, hinst, lpszCmdLine, nCmdShow); -} - - - /** * Receive 'size' bytes from plugin, store them in 'buf' * diff --git a/src/main/extractor_plugin_main.c b/src/main/extractor_plugin_main.c @@ -0,0 +1,625 @@ +/* + This file is part of libextractor. + (C) 2012 Vidyut Samanta and Christian Grothoff + + libextractor is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + libextractor is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with libextractor; see the file COPYING. If not, write to the + Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. + */ +/** + * @file main/extractor_plugin_main.c + * @brief main loop for an out-of-process plugin + * @author Christian Grothoff + */ + +#include "platform.h" +#include "plibc.h" +#include "extractor.h" +#include "extractor_datasource.h" +#include <dirent.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <sys/shm.h> +#include <signal.h> + + + +/** + * Opens a file (for later mmapping). + * This is POSIX variant of the plugin_open_* function. + * Closes a file is already opened, closes it before opening a new one. + * Destroy shared memory area. + * + * @param plugin plugin context + * @param shm_name name of the file to open. + * @return file id (-1 on error). That is, the result of open() syscall. + */ +static int +plugin_open_file (struct EXTRACTOR_PluginList *plugin, + const char *shm_name) +{ + if (plugin->shm_id != -1) + close (plugin->shm_id); + plugin->shm_id = open (shm_name, O_RDONLY, 0); + return plugin->shm_id; +} + + +/** + * Moves current absolute buffer position to @pos in @whence mode. + * Will move logical position withouth shifting the buffer, if possible. + * Will not move beyond the end of file. + * + * @param plugin plugin context + * @param pos position to move to + * @param whence seek mode (SEEK_CUR, SEEK_SET, SEEK_END) + * @return new absolute position, -1 on error + */ +static int64_t +pl_seek (struct EXTRACTOR_PluginList *plugin, int64_t pos, int whence) +{ + switch (whence) + { + case SEEK_CUR: + if (plugin->shm_pos + pos < plugin->map_size && plugin->shm_pos + pos >= 0) + { + plugin->shm_pos += pos; + return plugin->fpos + plugin->shm_pos; + } + if (0 != pl_pick_next_buffer_at (plugin, plugin->fpos + plugin->shm_pos + pos, 1)) + return -1; + plugin->shm_pos += pos; + return plugin->fpos + plugin->shm_pos; + break; + case SEEK_SET: + if (pos < 0) + return -1; + if (pos >= plugin->fpos && pos < plugin->fpos + plugin->map_size) + { + plugin->shm_pos = pos - plugin->fpos; + return pos; + } + if (0 != pl_pick_next_buffer_at (plugin, pos, 1)) + return -1; + if (pos >= plugin->fpos && pos < plugin->fpos + plugin->map_size) + { + plugin->shm_pos = pos - plugin->fpos; + return pos; + } + return -1; + break; + case SEEK_END: + while (plugin->fsize == -1) + { + pl_pick_next_buffer_at (plugin, plugin->fpos + plugin->map_size + pos, 0); + } + if (plugin->fsize + pos - 1 < plugin->fpos || plugin->fsize + pos - 1 > plugin->fpos + plugin->map_size) + { + if (0 != pl_pick_next_buffer_at (plugin, plugin->fsize - MAX_READ, 0)) + return -1; + } + plugin->shm_pos = plugin->fsize + pos - plugin->fpos; + if (plugin->shm_pos < 0) + plugin->shm_pos = 0; + else if (plugin->shm_pos >= plugin->map_size) + plugin->shm_pos = plugin->map_size - 1; + return plugin->fpos + plugin->shm_pos - 1; + break; + } + return -1; +} + + +static int64_t +pl_get_fsize (struct EXTRACTOR_PluginList *plugin) +{ + return plugin->fsize; +} + + +/** + * Fills @data with a pointer to the data buffer. + * Equivalent to read(), except you don't have to allocate and free + * a buffer, since the data is already in memory. + * Will move the buffer, if necessary + * + * @param plugin plugin context + * @param data location to store data pointer + * @param count number of bytes to read + * @return number of bytes (<= count) avalable in @data, -1 on error + */ +static int64_t +pl_read (struct EXTRACTOR_PluginList *plugin, unsigned char **data, size_t count) +{ + *data = NULL; + if (count > MAX_READ) + return -1; + if (count > plugin->map_size - plugin->shm_pos) + { + int64_t actual_count; + if (plugin->fpos + plugin->shm_pos != pl_seek (plugin, plugin->fpos + plugin->shm_pos, SEEK_SET)) + return -1; + *data = &plugin->shm_ptr[plugin->shm_pos]; + actual_count = (count < plugin->map_size - plugin->shm_pos) ? count : (plugin->map_size - plugin->shm_pos); + plugin->shm_pos += actual_count; + return actual_count; + } + else + { + *data = &plugin->shm_ptr[plugin->shm_pos]; + plugin->shm_pos += count; + return count; + } +} + + +/** + * Initializes an extracting session for a plugin. + * opens the file/shm (only in OPMODE_FILE) + * sets shm_ptr to NULL (unmaps it, if it was mapped) + * sets position to 0 + * initializes file size to 'fsize' (may be -1) + * sets seek request to 0 + * + * @param plugin plugin context + * @param operation_mode the mode of operation (OPMODE_*) + * @param fsize size of the source file (may be -1) + * @param shm_name name of the shm or file to open + * @return 0 on success, non-0 on error. + */ +static int +init_state_method (struct EXTRACTOR_PluginList *plugin, + uint8_t operation_mode, + int64_t fsize, + const char *shm_name) +{ + plugin->seek_request = 0; + if (plugin->shm_ptr != NULL) + munmap (plugin->shm_ptr, plugin->map_size); + plugin->shm_ptr = NULL; + if (operation_mode == OPMODE_FILE) + { + if (-1 == plugin_open_file (plugin, shm_name)) + return 1; + } + else if (-1 == plugin_open_shm (plugin, shm_name)) + return 1; + plugin->fsize = fsize; + plugin->shm_pos = 0; + plugin->fpos = 0; + return 0; +} + + +/** + * Function called by a plugin in a child process. Transmits + * the meta data back to the parent process. + * + * @param cls closure, "int*" of the FD for transmission + * @param plugin_name name of the plugin that produced this value; + * special values can be used (i.e. '<zlib>' for zlib being + * used in the main libextractor library and yielding + * meta data). + * @param type libextractor-type describing the meta data + * @param format basic format information about data + * @param data_mime_type mime-type of data (not of the original file); + * can be NULL (if mime-type is not known) + * @param data actual meta-data found + * @param data_len number of bytes in data + * @return 0 to continue extracting, 1 to abort (transmission error) + */ +static int +transmit_reply (void *cls, + const char *plugin_name, + enum EXTRACTOR_MetaType type, + enum EXTRACTOR_MetaFormat format, + const char *data_mime_type, + const char *data, + size_t data_len) +{ + static const unsigned char meta_byte = MESSAGE_META; + int *cpipe_out = cls; + struct IpcHeader hdr; + size_t mime_len; + + if (NULL == data_mime_type) + mime_len = 0; + else + mime_len = strlen (data_mime_type) + 1; + if (mime_len > MAX_MIME_LEN) + mime_len = MAX_MIME_LEN; + hdr.meta_type = type; + hdr.meta_format = format; + hdr.data_len = data_len; + hdr.mime_len = mime_len; + if ( (sizeof (meta_byte) != + write_all (*cpipe_out, + &meta_byte, sizeof (meta_byte))) || + (sizeof (hdr) != + write_all (*cpipe_out, + &hdr, sizeof (hdr))) || + (mime_len != + write_all (*cpipe_out, + data_mime_type, mime_len)) || + (data_len != + write_all (*cpipe_out, + data, data_len)) ) + return 1; + return 0; +} + + +/** + * Main loop function for plugins. Reads a message from the plugin + * input pipe and acts on it. + * + * @param plugin plugin context + * @param in input stream with incoming requests + * @param out output stream for sending responses + */ +static void +process_requests (struct EXTRACTOR_PluginList *plugin, + int in, + int out) +{ + int read_result1; + int read_result2; + int read_result3; + int read_result4; + unsigned char code; + char *shm_name = NULL; + size_t shm_name_len; + int extract_reply; + struct IpcHeader hdr; + int do_break; +#ifdef WINDOWS + HANDLE map; + MEMORY_BASIC_INFORMATION mi; +#endif + + /* The point of recursing into this function is to request + * a seek from LE server and wait for a reply. This snipper + * requests a seek. + */ + if (plugin->waiting_for_update == 1) + { + unsigned char seek_byte = MESSAGE_SEEK; + if (write (out, &seek_byte, 1) != 1) + return -1; + if (write (out, &plugin->seek_request, sizeof (int64_t)) != sizeof (int64_t)) + return -1; + } + + memset (&hdr, 0, sizeof (hdr)); + do_break = 0; + while (!do_break) + { + read_result1 = read (in, &code, 1); + if (read_result1 <= 0) + break; + switch (code) + { + case MESSAGE_INIT_STATE: + read_result2 = read (in, &plugin->operation_mode, sizeof (uint8_t)); + read_result3 = read (in, &plugin->fsize, sizeof (int64_t)); + read_result4 = read (in, &shm_name_len, sizeof (size_t)); + if ((read_result2 < sizeof (uint8_t)) || + (read_result3 < sizeof (int64_t)) || + (read_result4 < sizeof (size_t))) + { + do_break = 1; + break; + } + if (plugin->operation_mode != OPMODE_MEMORY && + plugin->operation_mode != OPMODE_DECOMPRESS && + plugin->operation_mode != OPMODE_FILE) + { + do_break = 1; + break; + } + if ((plugin->operation_mode == OPMODE_MEMORY || + plugin->operation_mode == OPMODE_DECOMPRESS) && + shm_name_len > MAX_SHM_NAME) + { + do_break = 1; + break; + } + /* Fsize may be -1 only in decompression mode */ + if (plugin->operation_mode != OPMODE_DECOMPRESS && plugin->fsize <= 0) + { + do_break = 1; + break; + } + if (shm_name != NULL) + free (shm_name); + shm_name = malloc (shm_name_len); + if (shm_name == NULL) + { + do_break = 1; + break; + } + read_result2 = read (in, shm_name, shm_name_len); + if (read_result2 < shm_name_len) + { + do_break = 1; + break; + } + shm_name[shm_name_len - 1] = '\0'; + do_break = init_state_method (plugin, plugin->operation_mode, plugin->fsize, shm_name); + /* in OPMODE_MEMORY and OPMODE_FILE we can start extracting right away, + * there won't be UPDATED_SHM message, and we don't need it + */ + if (!do_break && (plugin->operation_mode == OPMODE_MEMORY || + plugin->operation_mode == OPMODE_FILE)) + { + extract_reply = plugin->extract_method (plugin, transmit_reply, &out); + unsigned char done_byte = MESSAGE_DONE; + if (write (out, &done_byte, 1) != 1) + { + do_break = 1; + break; + } + if ((plugin->specials != NULL) && + (NULL != strstr (plugin->specials, "force-kill"))) + { + /* we're required to die after each file since this + plugin only supports a single file at a time */ +#if !WINDOWS + fsync (out); +#else + _commit (out); +#endif + _exit (0); + } + } + break; + case MESSAGE_DISCARD_STATE: + discard_state_method (plugin); + break; + case MESSAGE_UPDATED_SHM: + if (plugin->operation_mode == OPMODE_DECOMPRESS) + { + read_result2 = read (in, &plugin->fpos, sizeof (int64_t)); + read_result3 = read (in, &plugin->map_size, sizeof (size_t)); + read_result4 = read (in, &plugin->fsize, sizeof (int64_t)); + if ((read_result2 < sizeof (int64_t)) || (read_result3 < sizeof (size_t)) || + plugin->fpos < 0 || (plugin->operation_mode != OPMODE_DECOMPRESS && (plugin->fsize <= 0 || plugin->fpos >= plugin->fsize))) + { + do_break = 1; + break; + } + /* FIXME: also check mapped region size (lseek for *nix, VirtualQuery for W32) */ + /* Re-map the shm */ +#if !WINDOWS + if ((-1 == plugin->shm_id) || + (NULL == (plugin->shm_ptr = mmap (NULL, plugin->map_size, PROT_READ, MAP_SHARED, plugin->shm_id, 0))) || + (plugin->shm_ptr == (void *) -1)) + { + do_break = 1; + break; + } +#else + if ((plugin->map_handle == 0) || + (NULL == (plugin->shm_ptr = MapViewOfFile (plugin->map_handle, FILE_MAP_READ, 0, 0, 0)))) + { + do_break = 1; + break; + } +#endif + if (plugin->waiting_for_update == 1) + { + /* We were only waiting for this one message */ + do_break = 1; + plugin->waiting_for_update = 2; + break; + } + /* Run extractor on mapped region (recursive call doesn't reach this + * point and breaks out earlier. + */ + extract_reply = plugin->extract_method (plugin, transmit_reply, &out); + /* Unmap the shm */ +#if !WINDOWS + if ((plugin->shm_ptr != NULL) && + (plugin->shm_ptr != (void*) -1) ) + munmap (plugin->shm_ptr, plugin->map_size); +#else + if (plugin->shm_ptr != NULL) + UnmapViewOfFile (plugin->shm_ptr); +#endif + plugin->shm_ptr = NULL; + if (extract_reply == 1) + { + /* Tell LE that we're done */ + unsigned char done_byte = MESSAGE_DONE; + if (write (out, &done_byte, 1) != 1) + { + do_break = 1; + break; + } + if ((plugin->specials != NULL) && + (NULL != strstr (plugin->specials, "force-kill"))) + { + /* we're required to die after each file since this + plugin only supports a single file at a time */ +#if !WINDOWS + fsync (out); +#else + _commit (out); +#endif + _exit (0); + } + } + else + { + /* Tell LE that we're not done, and we need to seek */ + unsigned char seek_byte = MESSAGE_SEEK; + if (write (out, &seek_byte, 1) != 1) + { + do_break = 1; + break; + } + if (write (out, &plugin->seek_request, sizeof (int64_t)) != sizeof (int64_t)) + { + do_break = 1; + break; + } + } + } + else + { + /* This is mostly to safely skip unrelated messages */ + int64_t t; + size_t t2; + read_result2 = read (in, &t, sizeof (int64_t)); + read_result3 = read (in, &t2, sizeof (size_t)); + read_result4 = read (in, &t, sizeof (int64_t)); + } + break; + } + } + return 0; +} + + +/** + * 'main' function of the child process. Loads the plugin, + * sets up its in and out pipes, then runs the request serving function. + * + * @param plugin extractor plugin to use + * @param in stream to read from + * @param out stream to write to + */ +void +EXTRACTOR_plugin_main_ (struct EXTRACTOR_PluginList *plugin, + int in, int out) +{ + if (0 != EXTRACTOR_plugin_load_ (plugin)) + { +#if DEBUG + fprintf (stderr, "Plugin `%s' failed to load!\n", plugin->short_libname); +#endif + return; + } + if ( (NULL != plugin->specials) && + (NULL != strstr (plugin->specials, "close-stderr"))) + close (2); + if ( (NULL != plugin->specials) && + (NULL != strstr (plugin->specials, "close-stdout"))) + close (1); + process_requests (plugin, in, out); +} + + +#if WINDOWS +/** + * Reads plugin data from the LE server process. + * Also initializes allocation granularity (duh...). + * + * @param fd the pipe to read from + * @return newly allocated plugin context + */ +static struct EXTRACTOR_PluginList * +read_plugin_data (int fd) +{ + struct EXTRACTOR_PluginList *ret; + SYSTEM_INFO si; + size_t i; + + if (NULL == (ret = malloc (sizeof (struct EXTRACTOR_PluginList)))) + return NULL; + GetSystemInfo (&si); + ret->allocation_granularity = si.dwAllocationGranularity; + read (fd, &i, sizeof (size_t)); + if (NULL == (ret->libname = malloc (i))) + { + free (ret); + return NULL; + } + read (fd, ret->libname, i); + ret->libname[i - 1] = '\0'; + read (fd, &i, sizeof (size_t)); + if (NULL == (ret->short_libname = malloc (i))) + { + free (ret->libname); + free (ret); + return NULL; + } + read (fd, ret->short_libname, i); + ret->short_libname[i - 1] = '\0'; + read (fd, &i, sizeof (size_t)); + if (0 == i) + { + ret->plugin_options = NULL; + return ret; + } + if (NULL == (ret->plugin_options = malloc (i))) + { + free (ret->short_libname); + free (ret->libname); + free (ret); + return NULL; + } + read (fd, ret->plugin_options, i); + ret->plugin_options[i - 1] = '\0'; + return ret; +} + + +/** + * FIXME: document. + */ +void CALLBACK +RundllEntryPoint (HWND hwnd, + HINSTANCE hinst, + LPSTR lpszCmdLine, + int nCmdShow) +{ + struct EXTRACTOR_PluginList *plugin; + intptr_t in_h; + intptr_t out_h; + int in; + int out; + + sscanf (lpszCmdLine, "%lu %lu", &in_h, &out_h); + in = _open_osfhandle (in_h, _O_RDONLY); + out = _open_osfhandle (out_h, 0); + setmode (in, _O_BINARY); + setmode (out, _O_BINARY); + if (NULL == (plugin = read_plugin_data (in))) + { + close (in); + close (out); + return; + } + plugin_main (plugin, + in, out); + close (in); + close (out); +} + + +/** + * FIXME: document. + */ +void CALLBACK +RundllEntryPointA (HWND hwnd, + HINSTANCE hinst, + LPSTR lpszCmdLine, + int nCmdShow) +{ + return RundllEntryPoint (hwnd, hinst, lpszCmdLine, nCmdShow); +} + + +#endif diff --git a/src/main/extractor_plugin_main.h b/src/main/extractor_plugin_main.h @@ -0,0 +1,43 @@ +/* + This file is part of libextractor. + (C) 2012 Vidyut Samanta and Christian Grothoff + + libextractor is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + libextractor is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with libextractor; see the file COPYING. If not, write to the + Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. + */ +/** + * @file main/extractor_plugin_main.c + * @brief main loop for an out-of-process plugin + * @author Christian Grothoff + */ +#ifndef EXTRACTOR_PLUGIN_MAIN_H +#define EXTRACTOR_PLUGIN_MAIN_H + +#include "extractor.h" + + +/** + * 'main' function of the child process. Loads the plugin, + * sets up its in and out pipes, then runs the request serving function. + * + * @param plugin extractor plugin to use + * @param in stream to read from + * @param out stream to write to + */ +void +EXTRACTOR_plugin_main_ (struct EXTRACTOR_PluginList *plugin, + int in, int out); + +#endif diff --git a/src/main/extractor_plugins.c b/src/main/extractor_plugins.c @@ -4,7 +4,7 @@ libextractor is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 2, or (at your + by the Free Software Foundation; either version 3, or (at your option) any later version. libextractor is distributed in the hope that it will be useful, but @@ -17,6 +17,11 @@ Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ +/** + * @file main/extractor_plugins.c + * @brief code to load plugins + * @author Christian Grothoff + */ #include "extractor_plugins.h" #include "extractor_plugpath.h" @@ -330,7 +335,7 @@ EXTRACTOR_plugin_add_config (struct EXTRACTOR_PluginList *prev, default: abort (); } - if (cpy[last] == '-') + if ('-' == cpy[last]) { last++; prev = EXTRACTOR_plugin_remove (prev, diff --git a/src/main/extractor_plugins.h b/src/main/extractor_plugins.h @@ -4,7 +4,7 @@ libextractor is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 2, or (at your + by the Free Software Foundation; either version 3, or (at your option) any later version. libextractor is distributed in the hope that it will be useful, but @@ -17,17 +17,17 @@ Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ - +/** + * @file main/extractor_plugins.h + * @brief code to load plugins + * @author Christian Grothoff + */ #ifndef EXTRACTOR_PLUGINS_H #define EXTRACTOR_PLUGINS_H #include "platform.h" #include "plibc.h" #include "extractor.h" -#ifndef WINDOWS -#include <sys/wait.h> -#include <sys/shm.h> -#endif #include <signal.h> #include <ltdl.h> @@ -47,7 +47,7 @@ struct EXTRACTOR_PluginList /** * Pointer to the plugin (as returned by lt_dlopen). */ - void * libraryHandle; + void *libraryHandle; /** * Name of the library (i.e., 'libextractor_foo.so') @@ -82,121 +82,11 @@ struct EXTRACTOR_PluginList enum EXTRACTOR_Options flags; /** - * Process ID of the child process for this plugin. 0 for none. - */ -#if !WINDOWS - int cpid; -#else - HANDLE hProcess; -#endif - - /** - * Pipe used to communicate information to the plugin child process. - * NULL if not initialized. - */ -#if !WINDOWS - FILE *cpipe_in; -#else - HANDLE cpipe_in; -#endif - - /** - * Pipe used by plugin to read from its parent. - */ - int pipe_in; - - /** * A position this plugin wants us to seek to. -1 if it's finished. * Starts at 0; */ int64_t seek_request; -#if !WINDOWS - /** - * ID of the shm object - */ - int shm_id; -#else - /** - * Handle of the shm object - */ - HANDLE map_handle; -#endif - - /** - * Used to pass cfs pointer to in-process plugin in OPMODE_DECOMPRESS - */ - void *pass_cfs; - - /** - * Uncompressed stream size. Initially -1, until file is fully decompressed - * (for sources that are not compressed it is set from the start). - */ - int64_t fsize; - - /** - * Absolute position within the stream - */ - int64_t fpos; - - /** - * Pointer to the shared memory segment - */ - unsigned char *shm_ptr; - - /** - * Number of bytes in the segment - */ - int64_t map_size; - - /** - * Position within the segment - */ - int64_t shm_pos; - -#if !WINDOWS - /** - * Pipe used to read information about extracted meta data from - * the plugin child process. -1 if not initialized. - */ - int cpipe_out; -#else - /** - * Pipe used to read information about extracted meta data from - * the plugin child process. -1 if not initialized. - */ - HANDLE cpipe_out; -#endif - -#if !WINDOWS - /** - * Page size. Mmap offset is a multiple of this number. - */ - long allocation_granularity; -#else - /** - * Page size. Mmap offset is a multiple of this number. - */ - DWORD allocation_granularity; -#endif - -#if WINDOWS - /** - * A structure for overlapped reads on W32. - */ - OVERLAPPED ov_read; - - /** - * A structure for overlapped writes on W32. - */ - OVERLAPPED ov_write; - - /** - * A write buffer for overlapped writes on W32 - */ - unsigned char *ov_write_buffer; -#endif - /** * Mode of operation. One of the OPMODE_* constants */ diff --git a/src/main/extractor_plugpath.c b/src/main/extractor_plugpath.c @@ -4,7 +4,7 @@ libextractor is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 2, or (at your + by the Free Software Foundation; either version 3, or (at your option) any later version. libextractor is distributed in the hope that it will be useful, but @@ -17,6 +17,11 @@ Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ +/** + * @file main/extractor_plugpath.c + * @brief determine path where plugins are installed + * @author Christian Grothoff + */ #include "platform.h" #include "plibc.h" diff --git a/src/main/extractor_plugpath.h b/src/main/extractor_plugpath.h @@ -1,3 +1,27 @@ +/* + This file is part of libextractor. + (C) 2002, 2003, 2004, 2005, 2006, 2009, 2012 Vidyut Samanta and Christian Grothoff + + libextractor is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + libextractor is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with libextractor; see the file COPYING. If not, write to the + Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. + */ +/** + * @file main/extractor_plugpath.h + * @brief determine path where plugins are installed + * @author Christian Grothoff + */ #ifndef EXTRACTOR_PLUGPATH_H #define EXTRACTOR_PLUGPATH_H @@ -7,8 +31,8 @@ * @param cls closure * @param path a directory path */ -typedef void (*EXTRACTOR_PathProcessor)(void *cls, - const char *path); +typedef void (*EXTRACTOR_PathProcessor) (void *cls, + const char *path); /**