libextractor

GNU libextractor
Log | Files | Refs | Submodules | README | LICENSE

commit a9ce3d5f0c7a493ab94c724f1a60021f4bdb0533
parent 2ae590d7ef42e827f7700f02bca519b6bb5acbe5
Author: Christian Grothoff <christian@grothoff.org>
Date:   Tue, 27 Mar 2012 13:05:17 +0000

LRN is refactoring the plugin API, and hell breaks loose

Diffstat:
MAUTHORS | 1+
MChangeLog | 5+++++
Mconfigure.ac | 6+++++-
Msrc/include/extractor.h | 13+++++++------
Msrc/main/extractor.c | 3225++++++++++++++++++++++++++++++++++++++++++++++++-------------------------------
Msrc/main/extractor_plugins.c | 24++++++++++++++++++++----
Msrc/main/extractor_plugins.h | 62+++++++++++++++++++++++++++++++++++++++++++++++++++++++-------
Msrc/plugins/Makefile.am | 385++-----------------------------------------------------------------------------
Msrc/plugins/id3_extractor.c | 149+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------
Dsrc/plugins/id3v23_extractor.c | 420-------------------------------------------------------------------------------
Dsrc/plugins/id3v24_extractor.c | 455-------------------------------------------------------------------------------
Msrc/plugins/id3v2_extractor.c | 957++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------------------
Msrc/plugins/mp3_extractor.c | 425+++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------------
Msrc/plugins/template_extractor.c | 122+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------
14 files changed, 3244 insertions(+), 3005 deletions(-)

diff --git a/AUTHORS b/AUTHORS @@ -1,6 +1,7 @@ Core Team: Christian Grothoff <christian@grothoff.org> Nils Durner <durner@gnunet.org> +LRN <lrn1986@gmail.com> Formats: html - core team with code from libhtmlparse 0.1.13, http://msalem.translator.cx/libhtmlparse.html diff --git a/ChangeLog b/ChangeLog @@ -1,3 +1,8 @@ +Tue Mar 27 15:04:00 CEST 2012 + Refactoring plugin API to allow seeks to arbitrary positions in the + file (breaks existing plugins, so the current version will not + work). -LRN + Sun Jan 29 17:27:08 CET 2012 Documented recently discovered issues with pthreads and out-of-process plugin executions in the manual. -CG diff --git a/configure.ac b/configure.ac @@ -101,6 +101,8 @@ mingw*) if test "x$mingw32_ws2" = "xno" -a "x$mingw64_ws2" = "xno"; then AC_MSG_ERROR([libextractor requires Winsock2]) fi + # Sufficiently new Windows XP + CFLAGS="-D__MSVCRT_VERSION__=0x0601 $CFLAGS" AC_MSG_CHECKING(for PlibC) plibc=0 @@ -136,6 +138,8 @@ mingw*) if test $plibc -ne 1; then AC_MSG_ERROR([libextractor requires PlibC]) + else + LIBS="$LIBS -lplibc" fi LDFLAGS="$LDFLAGS -Wl,-no-undefined -Wl,--export-all-symbols" @@ -336,7 +340,7 @@ AC_FUNC_STAT AC_FUNC_ERROR_AT_LINE AC_SEARCH_LIBS(dlopen, dl) AC_SEARCH_LIBS(shm_open, rt) -AC_CHECK_FUNCS([mkstemp strndup munmap strcasecmp strdup strncasecmp memmove memset strtoul floor getcwd pow setenv sqrt strchr strcspn strrchr strnlen strndup ftruncate shm_open shm_unlink]) +AC_CHECK_FUNCS([mkstemp strndup munmap strcasecmp strdup strncasecmp memmove memset strtoul floor getcwd pow setenv sqrt strchr strcspn strrchr strnlen strndup ftruncate shm_open shm_unlink lseek64]) LE_LIB_LIBS=$LIBS LIBS=$LIBSOLD diff --git a/src/include/extractor.h b/src/include/extractor.h @@ -392,12 +392,6 @@ typedef int (*EXTRACTOR_MetaDataProcessor)(void *cls, * @param options options for this plugin; can be NULL * @return 0 if all calls to proc returned 0, otherwise 1 */ -typedef int (*EXTRACTOR_ExtractMethod)(const char *data, - size_t datasize, - EXTRACTOR_MetaDataProcessor proc, - void *proc_cls, - const char *options); - /** * Linked list of extractor plugins. An application builds this list @@ -407,6 +401,13 @@ typedef int (*EXTRACTOR_ExtractMethod)(const char *data, */ struct EXTRACTOR_PluginList; +typedef int (*EXTRACTOR_extract_method) (struct EXTRACTOR_PluginList *plugin, + EXTRACTOR_MetaDataProcessor proc, void *proc_cls); + +typedef void (*EXTRACTOR_discard_state_method) (struct EXTRACTOR_PluginList *plugin); +typedef void (*EXTRACTOR_init_state_method) (struct EXTRACTOR_PluginList *plugin); + + /** * Load the default set of plugins. The default can be changed diff --git a/src/main/extractor.c b/src/main/extractor.c @@ -23,7 +23,7 @@ #include "extractor.h" #include <dirent.h> #include <sys/types.h> -#ifndef WINDOWS +#if !WINDOWS #include <sys/wait.h> #include <sys/shm.h> #endif @@ -59,117 +59,53 @@ */ #define MAX_MIME_LEN 256 +#define MAX_SHM_NAME 255 + /** * Set to 1 to get failure info, * 2 for actual debug info. */ #define DEBUG 1 +#define MESSAGE_INIT_STATE 0x01 +#define MESSAGE_UPDATED_SHM 0x02 +#define MESSAGE_DONE 0x03 +#define MESSAGE_SEEK 0x04 +#define MESSAGE_META 0x05 +#define MESSAGE_DISCARD_STATE 0x06 /** - * Stop the child process of this plugin. + * Header used for our IPC replies. A header + * with all fields being zero is used to indicate + * the end of the stream. */ -static void -stop_process (struct EXTRACTOR_PluginList *plugin) +struct IpcHeader { - int status; -#ifdef WINDOWS - HANDLE process; -#endif - -#if DEBUG -#ifndef WINDOWS - if (plugin->cpid == -1) -#else - if (plugin->hProcess == INVALID_HANDLE_VALUE) -#endif - fprintf (stderr, - "Plugin `%s' choked on this input\n", - plugin->short_libname); -#endif -#ifndef WINDOWS - if ( (plugin->cpid == -1) || - (plugin->cpid == 0) ) - return; - kill (plugin->cpid, SIGKILL); - waitpid (plugin->cpid, &status, 0); - plugin->cpid = -1; - close (plugin->cpipe_out); - fclose (plugin->cpipe_in); -#else - if (plugin->hProcess == INVALID_HANDLE_VALUE || - plugin->hProcess == NULL) - return; - TerminateProcess (plugin->hProcess, 0); - CloseHandle (plugin->hProcess); - plugin->hProcess = INVALID_HANDLE_VALUE; - close (plugin->cpipe_out); - fclose (plugin->cpipe_in); -#endif - plugin->cpipe_out = -1; - plugin->cpipe_in = NULL; -} - + enum EXTRACTOR_MetaType meta_type; + enum EXTRACTOR_MetaFormat meta_format; + size_t data_len; + size_t mime_len; +}; -/** - * Remove a plugin from a list. - * - * @param prev the current list of plugins - * @param library the name of the plugin to remove - * @return the reduced list, unchanged if the plugin was not loaded - */ -struct EXTRACTOR_PluginList * -EXTRACTOR_plugin_remove(struct EXTRACTOR_PluginList * prev, - const char * library) +#if !WINDOWS +int +plugin_open_shm (struct EXTRACTOR_PluginList *plugin, char *shm_name) { - struct EXTRACTOR_PluginList *pos; - struct EXTRACTOR_PluginList *first; - - pos = prev; - first = prev; - while ((pos != NULL) && (0 != strcmp (pos->short_libname, library))) - { - prev = pos; - pos = pos->next; - } - if (pos != NULL) - { - /* found, close library */ - if (first == pos) - first = pos->next; - else - prev->next = pos->next; - /* found */ - stop_process (pos); - free (pos->short_libname); - free (pos->libname); - free (pos->plugin_options); - if (NULL != pos->libraryHandle) - lt_dlclose (pos->libraryHandle); - free (pos); - } -#if DEBUG - else - fprintf(stderr, - "Unloading plugin `%s' failed!\n", - library); -#endif - return first; + if (plugin->shm_id != -1) + close (plugin->shm_id); + plugin->shm_id = shm_open (shm_name, O_RDONLY, 0); + return plugin->shm_id; } - - -/** - * Remove all plugins from the given list (destroys the list). - * - * @param plugin the list of plugins - */ -void -EXTRACTOR_plugin_remove_all(struct EXTRACTOR_PluginList *plugins) +#else +HANDLE +plugin_open_shm (struct EXTRACTOR_PluginList *plugin, char *shm_name) { - while (plugins != NULL) - plugins = EXTRACTOR_plugin_remove (plugins, plugins->short_libname); + if (plugin->map_handle != 0) + CloseHandle (plugin->map_handle); + plugin->map_handle = OpenFileMapping (FILE_MAP_READ, FALSE, shm_name); + return plugin->map_handle; } - +#endif static int write_all (int fd, @@ -187,44 +123,9 @@ write_all (int fd, return -1; off += ret; } - return 0; -} - - -static int -read_all (int fd, - void *buf, - size_t size) -{ - char *data = buf; - size_t off = 0; - ssize_t ret; - - while (off < size) - { - ret = read (fd, &data[off], size - off); - if (ret <= 0) - return -1; - off += ret; - } - return 0; + return size; } - -/** - * Header used for our IPC replies. A header - * with all fields being zero is used to indicate - * the end of the stream. - */ -struct IpcHeader -{ - enum EXTRACTOR_MetaType type; - enum EXTRACTOR_MetaFormat format; - size_t data_len; - size_t mime_len; -}; - - /** * Function called by a plugin in a child process. Transmits * the meta data back to the parent process. @@ -254,6 +155,8 @@ transmit_reply (void *cls, int *cpipe_out = cls; struct IpcHeader hdr; size_t mime_len; + unsigned char meta_byte = MESSAGE_META; + unsigned char zero_byte = 0; if (data_mime_type == NULL) mime_len = 0; @@ -261,23 +164,19 @@ transmit_reply (void *cls, mime_len = strlen (data_mime_type) + 1; if (mime_len > MAX_MIME_LEN) mime_len = MAX_MIME_LEN; - hdr.type = type; - hdr.format = format; + hdr.meta_type = type; + hdr.meta_format = format; hdr.data_len = data_len; hdr.mime_len = mime_len; - if ( (hdr.type == 0) && - (hdr.format == 0) && - (hdr.data_len == 0) && - (hdr.mime_len == 0) ) - return 0; /* better skip this one, would signal termination... */ - if ( (0 != write_all (*cpipe_out, &hdr, sizeof(hdr))) || - (0 != write_all (*cpipe_out, data_mime_type, mime_len)) || - (0 != write_all (*cpipe_out, data, data_len)) ) - return 1; + if ((1 != write_all (*cpipe_out, &meta_byte, 1)) || + (sizeof(hdr) != write_all (*cpipe_out, &hdr, sizeof(hdr))) || + (mime_len -1 != write_all (*cpipe_out, data_mime_type, mime_len - 1)) || + (1 != write_all (*cpipe_out, &zero_byte, 1)) || + (data_len != write_all (*cpipe_out, data, data_len))) + return 1; return 0; } - /** * 'main' function of the child process. Reads shm-filenames from * 'in' (line-by-line) and writes meta data blocks to 'out'. The meta @@ -288,23 +187,20 @@ transmit_reply (void *cls, * @param out stream to write to */ static void -process_requests (struct EXTRACTOR_PluginList *plugin, - int in, - int out) +process_requests (struct EXTRACTOR_PluginList *plugin, int in, int out) { - char hfn[256]; - char tfn[256]; - char sze[256]; - size_t hfn_len; - size_t tfn_len; - size_t sze_len; - char *fn; - FILE *fin; - void *ptr; - int shmid; + int read_result1, read_result2, read_result3; + unsigned char code; + int64_t fsize = -1; + int64_t position = 0; + void *shm_ptr = NULL; + size_t shm_size = 0; + char *shm_name = NULL; + size_t shm_name_len; + + int extract_reply; + struct IpcHeader hdr; - size_t size; - int want_tail; int do_break; #ifdef WINDOWS HANDLE map; @@ -312,1156 +208,1888 @@ process_requests (struct EXTRACTOR_PluginList *plugin, #endif if (plugin == NULL) - { - close (in); - close (out); - return; - } + { + close (in); + close (out); + return; + } if (0 != plugin_load (plugin)) - { - close (in); - close (out); + { + close (in); + close (out); #if DEBUG - fprintf (stderr, - "Plugin `%s' failed to load!\n", - plugin->short_libname); + fprintf (stderr, "Plugin `%s' failed to load!\n", plugin->short_libname); #endif - return; - } - want_tail = 0; - if ( (plugin->specials != NULL) && - (NULL != strstr (plugin->specials, - "want-tail")) ) - { - want_tail = 1; - } - if ( (plugin->specials != NULL) && - (NULL != strstr (plugin->specials, - "close-stderr")) ) - { - close (2); - } - if ( (plugin->specials != NULL) && - (NULL != strstr (plugin->specials, - "close-stdout")) ) - { - close (1); - } + return; + } + if ((plugin->specials != NULL) && + (NULL != strstr (plugin->specials, "close-stderr"))) + close (2); + if ((plugin->specials != NULL) && + (NULL != strstr (plugin->specials, "close-stdout"))) + close (1); memset (&hdr, 0, sizeof (hdr)); - fin = fdopen (in, "r"); - if (fin == NULL) - { - close (in); - close (out); - return; - } - while (NULL != fgets (hfn, sizeof(hfn), fin)) + do_break = 0; + while (!do_break) + { + read_result1 = read (in, &code, 1); + if (read_result1 <= 0) + break; + switch (code) { - hfn_len = strlen (hfn); - if (hfn_len <= 1) - break; - ptr = NULL; - hfn[--hfn_len] = '\0'; /* kill newline */ - if (NULL == fgets (tfn, sizeof(tfn), fin)) - break; - if ('!' != tfn[0]) - break; - tfn_len = strlen (tfn); - tfn[--tfn_len] = '\0'; /* kill newline */ - if ( (want_tail) && - (tfn_len > 1) ) - { - fn = &tfn[1]; - } - else - { - fn = hfn; - } - if (NULL == fgets (sze, sizeof(sze), fin)) - break; - if ('s' != sze[0]) - break; - sze_len = strlen (sze); - sze[--sze_len] = '\0'; /* kill newline */ - size = strtol (&sze[1], NULL, 10); - if (size == LONG_MIN || size == LONG_MAX || size == 0) + case MESSAGE_INIT_STATE: + read_result2 = read (in, &fsize, sizeof (int64_t)); + read_result3 = read (in, &shm_name_len, sizeof (size_t)); + if ((read_result2 < sizeof (int64_t)) || (read_result3 < sizeof (size_t)) || + shm_name_len > MAX_SHM_NAME || fsize <= 0) + { + do_break = 1; + break; + } + if (shm_name != NULL) + free (shm_name); + shm_name = malloc (shm_name_len); + if (shm_name == NULL) + { + do_break = 1; + break; + } + read_result2 = read (in, shm_name, shm_name_len); + if (read_result2 < shm_name_len) + { + do_break = 1; + break; + } + shm_name[shm_name_len - 1] = '\0'; +#if !WINDOWS + if (shm_ptr != NULL) + munmap (shm_ptr, shm_size); + if (-1 == plugin_open_shm (plugin, shm_name)) + { + do_break = 1; break; - do_break = 0; -#ifndef WINDOWS - if ( (-1 != (shmid = shm_open (fn, O_RDONLY, 0))) && - (SIZE_MAX != (size = lseek (shmid, 0, SEEK_END))) && - (NULL != (ptr = mmap (NULL, size, PROT_READ, MAP_SHARED, shmid, 0))) && - (ptr != (void*) -1) ) + } #else - /* Despite the obvious, this must be READWRITE, not READONLY */ - map = OpenFileMapping (PAGE_READWRITE, FALSE, fn); - ptr = MapViewOfFile (map, FILE_MAP_READ, 0, 0, 0); - if (ptr != NULL) + if (shm_ptr != NULL) + UnmapViewOfFile (shm_ptr); + if (INVALID_HANDLE_VALUE == plugin_open_shm (plugin, shm_name)) { - if (0 == VirtualQuery (ptr, &mi, sizeof (mi)) || mi.RegionSize < size) - { - UnmapViewOfFile (ptr); - ptr = NULL; - } + do_break = 1; + break; + } +#endif + plugin->fsize = fsize; + plugin->init_state_method (plugin); + break; + case MESSAGE_DISCARD_STATE: + plugin->discard_state_method (plugin); +#if !WINDOWS + if (shm_ptr != NULL && shm_size > 0) + munmap (shm_ptr, shm_size); + if (plugin->shm_id != -1) + close (plugin->shm_id); + plugin->shm_id = -1; + shm_size = 0; +#else + if (shm_ptr != NULL) + UnmapViewOfFile (shm_ptr); + if (plugin->map_handle != 0) + CloseHandle (plugin->map_handle); + plugin->map_handle = 0; +#endif + shm_ptr = NULL; + break; + case MESSAGE_UPDATED_SHM: + read_result2 = read (in, &position, sizeof (int64_t)); + read_result3 = read (in, &shm_size, sizeof (size_t)); + if ((read_result2 < sizeof (int64_t)) || (read_result3 < sizeof (size_t)) || + position < 0 || fsize <= 0 || position >= fsize) + { + do_break = 1; + break; + } + /* FIXME: also check mapped region size (lseek for *nix, VirtualQuery for W32) */ +#if !WINDOWS + if ((-1 == plugin->shm_id) || + (NULL == (shm_ptr = mmap (NULL, shm_size, PROT_READ, MAP_SHARED, plugin->shm_id, 0))) || + (shm_ptr == (void *) -1)) + { + do_break = 1; + break; + } +#else + if ((plugin->map_handle == 0) || + (NULL == (shm_ptr = MapViewOfFile (plugin->map_handle, FILE_MAP_READ, 0, 0, 0)))) + { + do_break = 1; + break; } - if (ptr != NULL) #endif - { - if ( ( (plugin->extractMethod != NULL) && - (0 != plugin->extractMethod (ptr, - size, - &transmit_reply, - &out, - plugin->plugin_options)) ) || - (0 != write_all (out, &hdr, sizeof(hdr))) ) - do_break = 1; - } -#ifndef WINDOWS - if ( (ptr != NULL) && - (ptr != (void*) -1) ) - munmap (ptr, size); - if (-1 != shmid) - close (shmid); + plugin->position = position; + plugin->shm_ptr = shm_ptr; + plugin->map_size = shm_size; + /* Now, ideally a plugin would do reads and seeks on a virtual "plugin" object + * completely transparently, and the underlying code would return bytes from + * the memory map, or would block and wait for a seek to happen. + * That, however, requires somewhat different architecture, and even more wrapping + * and hand-helding. It's easier to make plugins aware of the fact that they work + * with discrete in-memory buffers with expensive seeking, not continuous files. + */ + extract_reply = plugin->extract_method (plugin, transmit_reply, &out); +#if !WINDOWS + if ((shm_ptr != NULL) && + (shm_ptr != (void*) -1) ) + munmap (shm_ptr, shm_size); +#else + if (shm_ptr != NULL) + UnmapViewOfFile (shm_ptr); +#endif + if (extract_reply == 1) + { + unsigned char done_byte = MESSAGE_DONE; + if (write (out, &done_byte, 1) != 1) + { + do_break = 1; + break; + } + if ((plugin->specials != NULL) && + (NULL != strstr (plugin->specials, "force-kill"))) + { + /* we're required to die after each file since this + plugin only supports a single file at a time */ +#if !WINDOWS + fsync (out); #else - if (ptr != NULL && ptr != (void*) -1) - UnmapViewOfFile (ptr); - if (map != NULL) - CloseHandle (map); + _commit (out); #endif - if (do_break) - break; - if ( (plugin->specials != NULL) && - (NULL != strstr (plugin->specials, - "force-kill")) ) - { - /* we're required to die after each file since this - plugin only supports a single file at a time */ - _exit (0); - } + _exit (0); + } + } + else + { + unsigned char seek_byte = MESSAGE_SEEK; + if (write (out, &seek_byte, 1) != 1) + { + do_break = 1; + break; + } + if (write (out, &plugin->seek_request, sizeof (int64_t)) != sizeof (int64_t)) + { + do_break = 1; + break; + } + } + break; } - fclose (fin); + } + close (in); close (out); } +#if !WINDOWS -#ifdef WINDOWS +/** + * Start the process for the given plugin. + */ static void -write_plugin_data (int fd, const struct EXTRACTOR_PluginList *plugin) -{ - size_t i; - DWORD len; - char *str; - - i = strlen (plugin->libname) + 1; - write (fd, &i, sizeof (size_t)); - write (fd, plugin->libname, i); - i = strlen (plugin->short_libname) + 1; - write (fd, &i, sizeof (size_t)); - write (fd, plugin->short_libname, i); - if (plugin->plugin_options != NULL) - { - i = strlen (plugin->plugin_options) + 1; - str = plugin->plugin_options; - } - else - { - i = 0; - } - write (fd, &i, sizeof (size_t)); - if (i > 0) - write (fd, str, i); -} - -static struct EXTRACTOR_PluginList * -read_plugin_data (int fd) +start_process (struct EXTRACTOR_PluginList *plugin) { - struct EXTRACTOR_PluginList *ret; - size_t i; + int p1[2]; + int p2[2]; + pid_t pid; + int status; - ret = malloc (sizeof (struct EXTRACTOR_PluginList)); - if (ret == NULL) - return NULL; - read (fd, &i, sizeof (size_t)); - ret->libname = malloc (i); - if (ret->libname == NULL) - { - free (ret); - return NULL; - } - read (fd, ret->libname, i); + switch (plugin->flags) + { + case EXTRACTOR_OPTION_DEFAULT_POLICY: + if (-1 != plugin->cpid && 0 != plugin->cpid) + return; + break; + case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART: + if (0 != plugin->cpid) + return; + break; + case EXTRACTOR_OPTION_IN_PROCESS: + return; + break; + case EXTRACTOR_OPTION_DISABLED: + return; + break; + } - read (fd, &i, sizeof (size_t)); - ret->short_libname = malloc (i); - if (ret->short_libname == NULL) + plugin->cpid = -1; + if (0 != pipe (p1)) + { + plugin->flags = EXTRACTOR_OPTION_DISABLED; + return; + } + if (0 != pipe (p2)) + { + close (p1[0]); + close (p1[1]); + plugin->flags = EXTRACTOR_OPTION_DISABLED; + return; + } + pid = fork (); + plugin->cpid = pid; + if (pid == -1) + { + close (p1[0]); + close (p1[1]); + close (p2[0]); + close (p2[1]); + plugin->flags = EXTRACTOR_OPTION_DISABLED; + return; + } + if (pid == 0) + { + close (p1[1]); + close (p2[0]); + process_requests (plugin, p1[0], p2[1]); + _exit (0); + } + close (p1[0]); + close (p2[1]); + plugin->cpipe_in = fdopen (p1[1], "w"); + if (plugin->cpipe_in == NULL) + { + perror ("fdopen"); + (void) kill (plugin->cpid, SIGKILL); + waitpid (plugin->cpid, &status, 0); + close (p1[1]); + close (p2[0]); + plugin->cpid = -1; + plugin->flags = EXTRACTOR_OPTION_DISABLED; + return; + } + plugin->cpipe_out = p2[0]; +} + +/** + * Stop the child process of this plugin. + */ +static void +stop_process (struct EXTRACTOR_PluginList *plugin) +{ + int status; + +#if DEBUG + if (plugin->cpid == -1) + fprintf (stderr, + "Plugin `%s' choked on this input\n", + plugin->short_libname); +#endif + if ( (plugin->cpid == -1) || + (plugin->cpid == 0) ) + return; + kill (plugin->cpid, SIGKILL); + waitpid (plugin->cpid, &status, 0); + plugin->cpid = -1; + close (plugin->cpipe_out); + fclose (plugin->cpipe_in); + plugin->cpipe_out = -1; + plugin->cpipe_in = NULL; + + if (plugin->flags != EXTRACTOR_OPTION_DEFAULT_POLICY) + plugin->flags = EXTRACTOR_OPTION_DISABLED; + + plugin->seek_request = -1; +} + +static int +write_plugin_data (const struct EXTRACTOR_PluginList *plugin) +{ + /* only does anything on Windows */ + return 0; +} + +#define plugin_print(plug, fmt, ...) fprintf (plug->cpipe_in, fmt, ...) +#define plugin_write(plug, buf, size) write_all (fileno (plug->cpipe_in), buf, size) + +#else /* WINDOWS */ + +#ifndef PIPE_BUF +#define PIPE_BUF 512 +#endif + +/* Copyright Bob Byrnes <byrnes <at> curl.com> + http://permalink.gmane.org/gmane.os.cygwin.patches/2121 +*/ +/* Create a pipe, and return handles to the read and write ends, + just like CreatePipe, but ensure that the write end permits + FILE_READ_ATTRIBUTES access, on later versions of win32 where + this is supported. This access is needed by NtQueryInformationFile, + which is used to implement select and nonblocking writes. + Note that the return value is either NO_ERROR or GetLastError, + unlike CreatePipe, which returns a bool for success or failure. */ +static int +create_selectable_pipe (PHANDLE read_pipe_ptr, PHANDLE write_pipe_ptr, + LPSECURITY_ATTRIBUTES sa_ptr, DWORD psize, + DWORD dwReadMode, DWORD dwWriteMode) +{ + /* Default to error. */ + *read_pipe_ptr = *write_pipe_ptr = INVALID_HANDLE_VALUE; + + HANDLE read_pipe = INVALID_HANDLE_VALUE, write_pipe = INVALID_HANDLE_VALUE; + + /* Ensure that there is enough pipe buffer space for atomic writes. */ + if (psize < PIPE_BUF) + psize = PIPE_BUF; + + char pipename[MAX_PATH]; + + /* Retry CreateNamedPipe as long as the pipe name is in use. + * Retrying will probably never be necessary, but we want + * to be as robust as possible. */ + while (1) + { + static volatile LONG pipe_unique_id; + + snprintf (pipename, sizeof pipename, "\\\\.\\pipe\\gnunet-%d-%ld", + getpid (), InterlockedIncrement ((LONG *) & pipe_unique_id)); + /* Use CreateNamedPipe instead of CreatePipe, because the latter + * returns a write handle that does not permit FILE_READ_ATTRIBUTES + * access, on versions of win32 earlier than WinXP SP2. + * CreatePipe also stupidly creates a full duplex pipe, which is + * a waste, since only a single direction is actually used. + * It's important to only allow a single instance, to ensure that + * the pipe was not created earlier by some other process, even if + * the pid has been reused. We avoid FILE_FLAG_FIRST_PIPE_INSTANCE + * because that is only available for Win2k SP2 and WinXP. */ + read_pipe = CreateNamedPipeA (pipename, PIPE_ACCESS_INBOUND | dwReadMode, PIPE_TYPE_BYTE | PIPE_READMODE_BYTE, 1, /* max instances */ + psize, /* output buffer size */ + psize, /* input buffer size */ + NMPWAIT_USE_DEFAULT_WAIT, sa_ptr); + + if (read_pipe != INVALID_HANDLE_VALUE) + { + break; + } + + DWORD err = GetLastError (); + + switch (err) + { + case ERROR_PIPE_BUSY: + /* The pipe is already open with compatible parameters. + * Pick a new name and retry. */ + continue; + case ERROR_ACCESS_DENIED: + /* The pipe is already open with incompatible parameters. + * Pick a new name and retry. */ + continue; + case ERROR_CALL_NOT_IMPLEMENTED: + /* We are on an older Win9x platform without named pipes. + * Return an anonymous pipe as the best approximation. */ + if (CreatePipe (read_pipe_ptr, write_pipe_ptr, sa_ptr, psize)) + { + return 0; + } + err = GetLastError (); + return err; + default: + return err; + } + /* NOTREACHED */ + } + + /* Open the named pipe for writing. + * Be sure to permit FILE_READ_ATTRIBUTES access. */ + write_pipe = CreateFileA (pipename, GENERIC_WRITE | FILE_READ_ATTRIBUTES, 0, /* share mode */ + sa_ptr, OPEN_EXISTING, dwWriteMode, /* flags and attributes */ + 0); /* handle to template file */ + + if (write_pipe == INVALID_HANDLE_VALUE) + { + /* Failure. */ + DWORD err = GetLastError (); + + CloseHandle (read_pipe); + return err; + } + + /* Success. */ + *read_pipe_ptr = read_pipe; + *write_pipe_ptr = write_pipe; + return 0; +} + +static int +write_to_pipe (HANDLE h, OVERLAPPED *ov, unsigned char *buf, size_t size, unsigned char **old_buf) +{ + DWORD written; + BOOL bresult; + DWORD err; + + if (WAIT_OBJECT_0 != WaitForSingleObject (ov->hEvent, INFINITE)) + return -1; + + ResetEvent (ov->hEvent); + + if (*old_buf != NULL) + free (*old_buf); + + *old_buf = malloc (size); + if (*old_buf == NULL) + return -1; + memcpy (*old_buf, buf, size); + written = 0; + ov->Offset = 0; + ov->OffsetHigh = 0; + ov->Pointer = 0; + ov->Internal = 0; + ov->InternalHigh = 0; + bresult = WriteFile (h, *old_buf, size, &written, ov); + + if (bresult == TRUE) + { + SetEvent (ov->hEvent); + free (*old_buf); + *old_buf = NULL; + return written; + } + + err = GetLastError (); + if (err == ERROR_IO_PENDING) + return size; + SetEvent (ov->hEvent); + *old_buf = NULL; + SetLastError (err); + return -1; +} + +static int +print_to_pipe (HANDLE h, OVERLAPPED *ov, unsigned char **buf, const char *fmt, ...) +{ + va_list va; + va_list vacp; + size_t size; + char *print_buf; + int result; + + va_start (va, fmt); + va_copy (vacp, va); + size = VSNPRINTF (NULL, 0, fmt, vacp) + 1; + va_end (vacp); + if (size <= 0) + { + va_end (va); + return size; + } + + print_buf = malloc (size); + if (print_buf == NULL) + return -1; + VSNPRINTF (print_buf, size, fmt, va); + va_end (va); + + result = write_to_pipe (h, ov, print_buf, size, buf); + free (buf); + return result; +} + +#define plugin_print(plug, fmt, ...) print_to_pipe (plug->cpipe_in, &plug->ov_write, &plug->ov_write_buffer, fmt, ...) +#define plugin_write(plug, buf, size) write_to_pipe (plug->cpipe_in, &plug->ov_write, buf, size, &plug->ov_write_buffer) + +static int +write_plugin_data (struct EXTRACTOR_PluginList *plugin) +{ + size_t libname_len, shortname_len, opts_len; + DWORD len; + char *str; + size_t total_len = 0; + unsigned char *buf, *ptr; + + switch (plugin->flags) + { + case EXTRACTOR_OPTION_DEFAULT_POLICY: + break; + case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART: + break; + case EXTRACTOR_OPTION_IN_PROCESS: + return 0; + break; + case EXTRACTOR_OPTION_DISABLED: + return 0; + break; + } + + libname_len = strlen (plugin->libname) + 1; + total_len += sizeof (size_t) + libname_len; + shortname_len = strlen (plugin->short_libname) + 1; + total_len += sizeof (size_t) + shortname_len; + if (plugin->plugin_options != NULL) + { + opts_len = strlen (plugin->plugin_options) + 1; + total_len += opts_len; + } + else + { + opts_len = 0; + } + total_len += sizeof (size_t); + + buf = malloc (total_len); + if (buf == NULL) + return -1; + ptr = buf; + memcpy (ptr, &libname_len, sizeof (size_t)); + ptr += sizeof (size_t); + memcpy (ptr, plugin->libname, libname_len); + ptr += libname_len; + memcpy (ptr, &shortname_len, sizeof (size_t)); + ptr += sizeof (size_t); + memcpy (ptr, plugin->short_libname, shortname_len); + ptr += shortname_len; + memcpy (ptr, &opts_len, sizeof (size_t)); + ptr += sizeof (size_t); + if (opts_len > 0) + { + memcpy (ptr, plugin->plugin_options, opts_len); + ptr += opts_len; + } + if (total_len != write_to_pipe (plugin->cpipe_in, &plugin->ov_write, buf, total_len, &plugin->ov_write_buffer)) + { + free (buf); + return -1; + } + free (buf); + return 0; +} + +static struct EXTRACTOR_PluginList * +read_plugin_data (int fd) +{ + struct EXTRACTOR_PluginList *ret; + size_t i; + + ret = malloc (sizeof (struct EXTRACTOR_PluginList)); + if (ret == NULL) + return NULL; + read (fd, &i, sizeof (size_t)); + ret->libname = malloc (i); + if (ret->libname == NULL) + { + free (ret); + return NULL; + } + read (fd, ret->libname, i); + ret->libname[i - 1] = '\0'; + + read (fd, &i, sizeof (size_t)); + ret->short_libname = malloc (i); + if (ret->short_libname == NULL) + { + free (ret->libname); + free (ret); + return NULL; + } + read (fd, ret->short_libname, i); + ret->short_libname[i - 1] = '\0'; + + read (fd, &i, sizeof (size_t)); + if (i == 0) + { + ret->plugin_options = NULL; + } + else + { + ret->plugin_options = malloc (i); + if (ret->plugin_options == NULL) { + free (ret->short_libname); free (ret->libname); free (ret); return NULL; } - read (fd, ret->short_libname, i); + read (fd, ret->plugin_options, i); + ret->plugin_options[i - 1] = '\0'; + } + return ret; +} + +/** + * Start the process for the given plugin. + */ +static void +start_process (struct EXTRACTOR_PluginList *plugin) +{ + HANDLE p1[2]; + HANDLE p2[2]; + STARTUPINFO startup; + PROCESS_INFORMATION proc; + char cmd[MAX_PATH + 1]; + char arg1[10], arg2[10]; + HANDLE p10_os_inh = INVALID_HANDLE_VALUE, p21_os_inh = INVALID_HANDLE_VALUE; + SECURITY_ATTRIBUTES sa; + + switch (plugin->flags) + { + case EXTRACTOR_OPTION_DEFAULT_POLICY: + if (plugin->hProcess != INVALID_HANDLE_VALUE && plugin->hProcess != 0) + return; + break; + case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART: + if (plugin->hProcess != 0) + return; + break; + case EXTRACTOR_OPTION_IN_PROCESS: + return; + break; + case EXTRACTOR_OPTION_DISABLED: + return; + break; + } + + sa.nLength = sizeof (sa); + sa.lpSecurityDescriptor = NULL; + sa.bInheritHandle = FALSE; + + plugin->hProcess = NULL; + + if (0 != create_selectable_pipe (&p1[0], &p1[1], &sa, 1024, FILE_FLAG_OVERLAPPED, FILE_FLAG_OVERLAPPED)) + { + plugin->flags = EXTRACTOR_OPTION_DISABLED; + return; + } + if (0 != create_selectable_pipe (&p2[0], &p2[1], &sa, 1024, FILE_FLAG_OVERLAPPED, FILE_FLAG_OVERLAPPED)) + { + CloseHandle (p1[0]); + CloseHandle (p1[1]); + plugin->flags = EXTRACTOR_OPTION_DISABLED; + return; + } + + memset (&startup, 0, sizeof (STARTUPINFO)); + + if (!DuplicateHandle (GetCurrentProcess (), p1[0], GetCurrentProcess (), + &p10_os_inh, 0, TRUE, DUPLICATE_SAME_ACCESS) + || !DuplicateHandle (GetCurrentProcess (), p2[1], GetCurrentProcess (), + &p21_os_inh, 0, TRUE, DUPLICATE_SAME_ACCESS)) + { + if (p10_os_inh != INVALID_HANDLE_VALUE) + CloseHandle (p10_os_inh); + if (p21_os_inh != INVALID_HANDLE_VALUE) + CloseHandle (p21_os_inh); + CloseHandle (p1[0]); + CloseHandle (p1[1]); + CloseHandle (p2[0]); + CloseHandle (p2[1]); + plugin->flags = EXTRACTOR_OPTION_DISABLED; + return; + } + + snprintf(cmd, MAX_PATH + 1, "rundll32.exe libextractor-3.dll,RundllEntryPoint@16 %lu %lu", p10_os_inh, p21_os_inh); + cmd[MAX_PATH] = '\0'; + if (CreateProcessA (NULL, cmd, NULL, NULL, TRUE, 0, NULL, NULL, + &startup, &proc)) + { + plugin->hProcess = proc.hProcess; + CloseHandle (proc.hThread); + } + else + { + CloseHandle (p1[0]); + CloseHandle (p1[1]); + CloseHandle (p2[0]); + CloseHandle (p2[1]); + plugin->flags = EXTRACTOR_OPTION_DISABLED; + return; + } + CloseHandle (p1[0]); + CloseHandle (p2[1]); + CloseHandle (p10_os_inh); + CloseHandle (p21_os_inh); + + plugin->cpipe_in = p1[1]; + plugin->cpipe_out = p2[0]; + + memset (&plugin->ov_read, 0, sizeof (OVERLAPPED)); + memset (&plugin->ov_write, 0, sizeof (OVERLAPPED)); + + plugin->ov_write_buffer = NULL; + + plugin->ov_write.hEvent = CreateEvent (NULL, TRUE, TRUE, NULL); + plugin->ov_read.hEvent = CreateEvent (NULL, TRUE, TRUE, NULL); +} + +/** + * Stop the child process of this plugin. + */ +static void +stop_process (struct EXTRACTOR_PluginList *plugin) +{ + int status; + HANDLE process; + +#if DEBUG + if (plugin->hProcess == INVALID_HANDLE_VALUE) + fprintf (stderr, + "Plugin `%s' choked on this input\n", + plugin->short_libname); +#endif + if (plugin->hProcess == INVALID_HANDLE_VALUE || + plugin->hProcess == NULL) + return; + TerminateProcess (plugin->hProcess, 0); + CloseHandle (plugin->hProcess); + plugin->hProcess = INVALID_HANDLE_VALUE; + CloseHandle (plugin->cpipe_out); + CloseHandle (plugin->cpipe_in); + plugin->cpipe_out = INVALID_HANDLE_VALUE; + plugin->cpipe_in = INVALID_HANDLE_VALUE; + CloseHandle (plugin->ov_read.hEvent); + CloseHandle (plugin->ov_write.hEvent); + if (plugin->ov_write_buffer != NULL) + { + free (plugin->ov_write_buffer); + plugin->ov_write_buffer = NULL; + } + + if (plugin->flags != EXTRACTOR_OPTION_DEFAULT_POLICY) + plugin->flags = EXTRACTOR_OPTION_DISABLED; + + plugin->seek_request = -1; +} + +#endif /* WINDOWS */ + +/** + * Remove a plugin from a list. + * + * @param prev the current list of plugins + * @param library the name of the plugin to remove + * @return the reduced list, unchanged if the plugin was not loaded + */ +struct EXTRACTOR_PluginList * +EXTRACTOR_plugin_remove(struct EXTRACTOR_PluginList * prev, + const char * library) +{ + struct EXTRACTOR_PluginList *pos; + struct EXTRACTOR_PluginList *first; + + pos = prev; + first = prev; + while ((pos != NULL) && (0 != strcmp (pos->short_libname, library))) + { + prev = pos; + pos = pos->next; + } + if (pos != NULL) + { + /* found, close library */ + if (first == pos) + first = pos->next; + else + prev->next = pos->next; + /* found */ + stop_process (pos); + free (pos->short_libname); + free (pos->libname); + free (pos->plugin_options); + if (NULL != pos->libraryHandle) + lt_dlclose (pos->libraryHandle); + free (pos); + } +#if DEBUG + else + fprintf(stderr, + "Unloading plugin `%s' failed!\n", + library); +#endif + return first; +} + + +/** + * Remove all plugins from the given list (destroys the list). + * + * @param plugin the list of plugins + */ +void +EXTRACTOR_plugin_remove_all(struct EXTRACTOR_PluginList *plugins) +{ + while (plugins != NULL) + plugins = EXTRACTOR_plugin_remove (plugins, plugins->short_libname); +} + + + +/** + * Open a file + */ +static int file_open(const char *filename, int oflag, ...) +{ + int mode; + const char *fn; +#ifdef MINGW + char szFile[_MAX_PATH + 1]; + long lRet; + + if ((lRet = plibc_conv_to_win_path(filename, szFile)) != ERROR_SUCCESS) + { + errno = ENOENT; + SetLastError(lRet); + return -1; + } + fn = szFile; +#else + fn = filename; +#endif + mode = 0; +#ifdef MINGW + /* Set binary mode */ + mode |= O_BINARY; +#endif + return OPEN(fn, oflag, mode); +} + +#ifndef O_LARGEFILE +#define O_LARGEFILE 0 +#endif + +#if HAVE_ZLIB +#define MIN_ZLIB_HEADER 12 +#endif +#if HAVE_LIBBZ2 +#define MIN_BZ2_HEADER 4 +#endif +#if !defined (MIN_COMPRESSED_HEADER) && HAVE_ZLIB +#define MIN_COMPRESSED_HEADER MIN_ZLIB_HEADER +#endif +#if !defined (MIN_COMPRESSED_HEADER) && HAVE_LIBBZ2 +#define MIN_COMPRESSED_HEADER MIN_BZ2_HEADER +#endif +#if !defined (MIN_COMPRESSED_HEADER) +#define MIN_COMPRESSED_HEADER -1 +#endif + +#define COMPRESSED_DATA_PROBE_SIZE 3 + +/** + * Try to decompress compressed data + * + * @param data data to decompress, or NULL (if fd is not -1) + * @param fd file to read data from, or -1 (if data is not NULL) + * @param fsize size of data (if data is not NULL) or size of fd file (if fd is not -1) + * @param compression_type type of compression, as returned by get_compression_type () + * @param buffer a pointer to a buffer pointer, buffer pointer is NEVER a NULL and already has some data (usually - COMPRESSED_DATA_PROBE_SIZE bytes) in it. + * @param buffer_size a pointer to buffer size + * @param proc callback for metadata + * @param proc_cls cls for proc + * @return 0 on success, anything else on error + */ +static int +try_to_decompress (const unsigned char *data, int fd, int64_t fsize, int compression_type, void **buffer, size_t *buffer_size, EXTRACTOR_MetaDataProcessor proc, void *proc_cls) +{ + unsigned char *new_buffer; + ssize_t read_result; + + unsigned char *buf; + unsigned char *rbuf; + size_t dsize; +#if HAVE_ZLIB + z_stream strm; + int ret; + size_t pos; +#endif +#if HAVE_LIBBZ2 + bz_stream bstrm; + int bret; + size_t bpos; +#endif + + if (fd != -1) + { + if (fsize > *buffer_size) + { + /* Read the rest of the file. Can't de-compress it partially anyway */ + /* Memory mapping is not useful here, because memory mapping ALSO takes up + * memory (even more than a buffer, since it might be aligned), and + * because we need to read every byte anyway (lazy on-demand reads into + * memory provided by memory mapping won't help). + */ + new_buffer = realloc (*buffer, fsize); + if (new_buffer == NULL) + { + free (*buffer); + return -1; + } + read_result = READ (fd, &new_buffer[*buffer_size], fsize - *buffer_size); + if (read_result != fsize - *buffer_size) + { + free (*buffer); + return -1; + } + *buffer = new_buffer; + *buffer_size = fsize; + } + data = (const unsigned char *) new_buffer; + } + +#if HAVE_ZLIB + if (compression_type == 1) + { + /* Process gzip header */ + unsigned int gzip_header_length = 10; + + if (data[3] & 0x4) /* FEXTRA set */ + gzip_header_length += 2 + (unsigned) (data[10] & 0xff) + + (((unsigned) (data[11] & 0xff)) * 256); + + if (data[3] & 0x8) /* FNAME set */ + { + const unsigned char *cptr = data + gzip_header_length; + + /* stored file name is here */ + while ((cptr - data) < fsize) + { + if ('\0' == *cptr) + break; + cptr++; + } + + if (0 != proc (proc_cls, "<zlib>", EXTRACTOR_METATYPE_FILENAME, + EXTRACTOR_METAFORMAT_C_STRING, "text/plain", + (const char *) (data + gzip_header_length), + cptr - (data + gzip_header_length))) + return 0; /* done */ + + gzip_header_length = (cptr - data) + 1; + } + + if (data[3] & 0x16) /* FCOMMENT set */ + { + const unsigned char * cptr = data + gzip_header_length; + + /* stored comment is here */ + while (cptr < data + fsize) + { + if ('\0' == *cptr) + break; + cptr ++; + } + + if (0 != proc (proc_cls, "<zlib>", EXTRACTOR_METATYPE_COMMENT, + EXTRACTOR_METAFORMAT_C_STRING, "text/plain", + (const char *) (data + gzip_header_length), + cptr - (data + gzip_header_length))) + return 0; /* done */ + + gzip_header_length = (cptr - data) + 1; + } + + if (data[3] & 0x2) /* FCHRC set */ + gzip_header_length += 2; + + memset (&strm, 0, sizeof (z_stream)); + +#ifdef ZLIB_VERNUM + gzip_header_length = 0; +#endif + + if (fsize > gzip_header_length) + { + strm.next_in = (Bytef *) data + gzip_header_length; + strm.avail_in = fsize - gzip_header_length; + } + else + { + strm.next_in = (Bytef *) data; + strm.avail_in = 0; + } + strm.total_in = 0; + strm.zalloc = NULL; + strm.zfree = NULL; + strm.opaque = NULL; + + /* + * note: maybe plain inflateInit(&strm) is adequate, + * it looks more backward-compatible also ; + * + * ZLIB_VERNUM isn't defined by zlib version 1.1.4 ; + * there might be a better check. + */ + if (Z_OK == inflateInit2 (&strm, +#ifdef ZLIB_VERNUM + 15 + 32 +#else + -MAX_WBITS +#endif + )) + { + pos = 0; + dsize = 2 * fsize; + if ( (dsize > MAX_DECOMPRESS) || + (dsize < fsize) ) + dsize = MAX_DECOMPRESS; + buf = malloc (dsize); + + if (buf != NULL) + { + strm.next_out = (Bytef *) buf; + strm.avail_out = dsize; + + do + { + ret = inflate (&strm, Z_SYNC_FLUSH); + if (ret == Z_OK) + { + if (dsize == MAX_DECOMPRESS) + break; + + pos += strm.total_out; + strm.total_out = 0; + dsize *= 2; + + if (dsize > MAX_DECOMPRESS) + dsize = MAX_DECOMPRESS; + + rbuf = realloc (buf, dsize); + if (rbuf == NULL) + { + free (buf); + buf = NULL; + break; + } + + buf = rbuf; + strm.next_out = (Bytef *) &buf[pos]; + strm.avail_out = dsize - pos; + } + else if (ret != Z_STREAM_END) + { + /* error */ + free (buf); + buf = NULL; + } + } while ((buf != NULL) && (ret != Z_STREAM_END)); + + dsize = pos + strm.total_out; + if ((dsize == 0) && (buf != NULL)) + { + free (buf); + buf = NULL; + } + } + + inflateEnd (&strm); + + if (fd != -1) + if (*buffer != NULL) + free (*buffer); + + if (buf == NULL) + { + return -1; + } + else + { + *buffer = buf; + *buffer_size = dsize; + return 0; + } + } + } +#endif + +#if HAVE_LIBBZ2 + if (compression_type == 2) + { + memset(&bstrm, 0, sizeof (bz_stream)); + bstrm.next_in = (char *) data; + bstrm.avail_in = fsize; + bstrm.total_in_lo32 = 0; + bstrm.total_in_hi32 = 0; + bstrm.bzalloc = NULL; + bstrm.bzfree = NULL; + bstrm.opaque = NULL; + if (BZ_OK == BZ2_bzDecompressInit(&bstrm, 0,0)) + { + bpos = 0; + dsize = 2 * fsize; + if ( (dsize > MAX_DECOMPRESS) || (dsize < fsize) ) + dsize = MAX_DECOMPRESS; + buf = malloc (dsize); + + if (buf != NULL) + { + bstrm.next_out = (char *) buf; + bstrm.avail_out = dsize; + + do + { + bret = BZ2_bzDecompress (&bstrm); + if (bret == Z_OK) + { + if (dsize == MAX_DECOMPRESS) + break; + bpos += bstrm.total_out_lo32; + bstrm.total_out_lo32 = 0; + + dsize *= 2; + if (dsize > MAX_DECOMPRESS) + dsize = MAX_DECOMPRESS; + + rbuf = realloc(buf, dsize); + if (rbuf == NULL) + { + free (buf); + buf = NULL; + break; + } + + buf = rbuf; + bstrm.next_out = (char*) &buf[bpos]; + bstrm.avail_out = dsize - bpos; + } + else if (bret != BZ_STREAM_END) + { + /* error */ + free (buf); + buf = NULL; + } + } while ((buf != NULL) && (bret != BZ_STREAM_END)); + + dsize = bpos + bstrm.total_out_lo32; + if ((dsize == 0) && (buf != NULL)) + { + free (buf); + buf = NULL; + } + } + + BZ2_bzDecompressEnd (&bstrm); - read (fd, &i, sizeof (size_t)); - if (i == 0) - { - ret->plugin_options = NULL; + if (fd != -1) + if (*buffer != NULL) + free (*buffer); + + if (buf == NULL) + { + return -1; + } + else + { + *buffer = buf; + *buffer_size = dsize; + return 0; + } } - else + } +#endif + return -1; +} + +/** + * Detect if we have compressed data on our hands. + * + * @param data pointer to a data buffer or NULL (in case fd is not -1) + * @param fd a file to read data from, or -1 (if data is not NULL) + * @param fsize size of data (if data is not NULL) or of file (if fd is not -1) + * @param buffer will receive a pointer to the data that this function read + * @param buffer_size will receive size of the buffer + * @return -1 to indicate an error, 0 to indicate uncompressed data, or a type (> 0) of compression + */ +static int +get_compression_type (const unsigned char *data, int fd, int64_t fsize, void **buffer, size_t *buffer_size) +{ + void *read_data = NULL; + size_t read_data_size = 0; + ssize_t read_result; + + if ((MIN_COMPRESSED_HEADER < 0) || (fsize < MIN_COMPRESSED_HEADER)) + { + *buffer = NULL; + return 0; + } + if (data == NULL) + { + read_data_size = COMPRESSED_DATA_PROBE_SIZE; + read_data = malloc (read_data_size); + if (read_data == NULL) + return -1; + read_result = READ (fd, read_data, read_data_size); + if (read_result != read_data_size) { - ret->plugin_options = malloc (i); - if (ret->plugin_options == NULL) - { - free (ret->short_libname); - free (ret->libname); - free (ret); - return NULL; - } - read (fd, ret->plugin_options, i); + free (read_data); + return -1; } - return ret; + *buffer = read_data; + *buffer_size = read_data_size; + data = (const void *) read_data; + } +#if HAVE_ZLIB + if ((fsize >= MIN_ZLIB_HEADER) && (data[0] == 0x1f) && (data[1] == 0x8b) && (data[2] == 0x08)) + return 1; +#endif +#if HAVE_LIBBZ2 + if ((fsize >= MIN_BZ2_HEADER) && (data[0] == 'B') && (data[1] == 'Z') && (data[2] == 'h')) + return 2; +#endif + return 0; } +#if WINDOWS -void CALLBACK -RundllEntryPoint (HWND hwnd, - HINSTANCE hinst, - LPSTR lpszCmdLine, - int nCmdShow) +/** + * Setup a shared memory segment. + * + * @param ptr set to the location of the map segment + * @param map where to store the map handle + * @param fn name of the mapping + * @param fn_size size available in fn + * @param size number of bytes to allocated for the mapping + * @return 0 on success + */ +static int +make_shm_w32 (void **ptr, HANDLE *map, char *fn, size_t fn_size, size_t size) { - intptr_t in_h; - intptr_t out_h; - int in, out; - - sscanf(lpszCmdLine, "%lu %lu", &in_h, &out_h); - in = _open_osfhandle (in_h, _O_RDONLY); - out = _open_osfhandle (out_h, 0); - setmode (in, _O_BINARY); - setmode (out, _O_BINARY); - process_requests (read_plugin_data (in), - in, out); + const char *tpath = "Local\\"; + snprintf (fn, fn_size, "%slibextractor-shm-%u-%u", tpath, getpid(), + (unsigned int) RANDOM()); + *map = CreateFileMapping (INVALID_HANDLE_VALUE, NULL, PAGE_READWRITE, 0, size, fn); + *ptr = MapViewOfFile (*map, FILE_MAP_WRITE, 0, 0, size); + if (*ptr == NULL) + { + CloseHandle (*map); + return 1; + } + return 0; } -void CALLBACK -RundllEntryPointA (HWND hwnd, - HINSTANCE hinst, - LPSTR lpszCmdLine, - int nCmdShow) +static void +destroy_shm_w32 (void *ptr, HANDLE map) { - return RundllEntryPoint(hwnd, hinst, lpszCmdLine, nCmdShow); + UnmapViewOfFile (ptr); + CloseHandle (map); } -#endif +#else /** - * Start the process for the given plugin. - */ + * Setup a shared memory segment. + * + * @param ptr set to the location of the shm segment + * @param shmid where to store the shm ID + * @param fn name of the shared segment + * @param fn_size size available in fn + * @param size number of bytes to allocated for the segment + * @return 0 on success + */ +static int +make_shm_posix (void **ptr, int *shmid, char *fn, size_t fn_size, size_t size) +{ + const char *tpath; +#if SOMEBSD + /* this works on FreeBSD, not sure about others... */ + tpath = getenv ("TMPDIR"); + if (tpath == NULL) + tpath = "/tmp/"; +#else + tpath = "/"; /* Linux */ +#endif + snprintf (fn, fn_size, "%slibextractor-shm-%u-%u", tpath, getpid(), + (unsigned int) RANDOM()); + *shmid = shm_open (fn, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); + *ptr = NULL; + if (-1 == *shmid) + return 1; + if ((0 != ftruncate (*shmid, size)) || + (NULL == (*ptr = mmap (NULL, size, PROT_WRITE, MAP_SHARED, *shmid, 0))) || + (*ptr == (void*) -1) ) + { + close (*shmid); + *shmid = -1; + shm_unlink (fn); + return 1; + } + return 0; +} + static void -start_process (struct EXTRACTOR_PluginList *plugin) +destroy_shm_posix (void *ptr, int shm_id, size_t size, char *shm_name) { -#if !WINDOWS - int p1[2]; - int p2[2]; - pid_t pid; - int status; + if (NULL != ptr) + munmap (ptr, size); + if (shm_id != -1) + close (shm_id); + shm_unlink (shm_name); +} +#endif - plugin->cpid = -1; - if (0 != pipe (p1)) - { - plugin->flags = EXTRACTOR_OPTION_DISABLED; - return; - } - if (0 != pipe (p2)) - { - close (p1[0]); - close (p1[1]); - plugin->flags = EXTRACTOR_OPTION_DISABLED; - return; - } - pid = fork (); - plugin->cpid = pid; - if (pid == -1) - { - close (p1[0]); - close (p1[1]); - close (p2[0]); - close (p2[1]); - plugin->flags = EXTRACTOR_OPTION_DISABLED; - return; - } - if (pid == 0) - { - close (p1[1]); - close (p2[0]); - process_requests (plugin, p1[0], p2[1]); - _exit (0); - } - close (p1[0]); - close (p2[1]); - plugin->cpipe_in = fdopen (p1[1], "w"); - if (plugin->cpipe_in == NULL) - { - perror ("fdopen"); - (void) kill (plugin->cpid, SIGKILL); - waitpid (plugin->cpid, &status, 0); - close (p1[1]); - close (p2[0]); - plugin->cpid = -1; - plugin->flags = EXTRACTOR_OPTION_DISABLED; - return; - } - plugin->cpipe_out = p2[0]; -#else - int p1[2]; - int p2[2]; - STARTUPINFO startup; - PROCESS_INFORMATION proc; - char cmd[MAX_PATH + 1]; - char arg1[10], arg2[10]; - HANDLE p10_os = INVALID_HANDLE_VALUE, p21_os = INVALID_HANDLE_VALUE; - HANDLE p10_os_inh = INVALID_HANDLE_VALUE, p21_os_inh = INVALID_HANDLE_VALUE; - plugin->hProcess = NULL; - if (0 != _pipe (p1, 0, _O_BINARY | _O_NOINHERIT)) +static void +init_plugin_state (struct EXTRACTOR_PluginList *plugin, char *shm_name, int64_t fsize) +{ + int write_result; + int init_state_size; + unsigned char *init_state; + int t; + size_t shm_name_len = strlen (shm_name) + 1; + init_state_size = 1 + sizeof (size_t) + shm_name_len + sizeof (int64_t); + switch (plugin->flags) + { + case EXTRACTOR_OPTION_DEFAULT_POLICY: + case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART: + init_state = malloc (init_state_size); + if (init_state == NULL) { - plugin->flags = EXTRACTOR_OPTION_DISABLED; + stop_process (plugin); return; } - if (0 != _pipe (p2, 0, _O_BINARY | _O_NOINHERIT)) + t = 0; + init_state[t] = MESSAGE_INIT_STATE; + t += 1; + memcpy (&init_state[t], &fsize, sizeof (int64_t)); + t += sizeof (int64_t); + memcpy (&init_state[t], &shm_name_len, sizeof (size_t)); + t += sizeof (size_t); + memcpy (&init_state[t], shm_name, shm_name_len); + t += shm_name_len; + write_result = plugin_write (plugin, init_state, init_state_size); + free (init_state); + if (write_result < init_state_size) { - close (p1[0]); - close (p1[1]); - plugin->flags = EXTRACTOR_OPTION_DISABLED; + stop_process (plugin); return; } - - memset (&startup, 0, sizeof (STARTUPINFO)); - - p10_os = (HANDLE) _get_osfhandle (p1[0]); - p21_os = (HANDLE) _get_osfhandle (p2[1]); - - if (p10_os == INVALID_HANDLE_VALUE || p21_os == INVALID_HANDLE_VALUE) - { - close (p1[0]); - close (p1[1]); - close (p2[0]); - close (p2[1]); - plugin->flags = EXTRACTOR_OPTION_DISABLED; + plugin->seek_request = 0; + break; + case EXTRACTOR_OPTION_IN_PROCESS: + plugin_open_shm (plugin, shm_name); + plugin->fsize = fsize; + plugin->init_state_method (plugin); + plugin->seek_request = 0; + return; + break; + case EXTRACTOR_OPTION_DISABLED: return; + break; } +} - if (!DuplicateHandle (GetCurrentProcess (), p10_os, GetCurrentProcess (), - &p10_os_inh, 0, TRUE, DUPLICATE_SAME_ACCESS) - || !DuplicateHandle (GetCurrentProcess (), p21_os, GetCurrentProcess (), - &p21_os_inh, 0, TRUE, DUPLICATE_SAME_ACCESS)) +static void +discard_plugin_state (struct EXTRACTOR_PluginList *plugin) +{ + int write_result; + unsigned char discard_state = MESSAGE_DISCARD_STATE; + switch (plugin->flags) { - if (p10_os_inh != INVALID_HANDLE_VALUE) - CloseHandle (p10_os_inh); - if (p21_os_inh != INVALID_HANDLE_VALUE) - CloseHandle (p21_os_inh); - close (p1[0]); - close (p1[1]); - close (p2[0]); - close (p2[1]); - plugin->flags = EXTRACTOR_OPTION_DISABLED; + case EXTRACTOR_OPTION_DEFAULT_POLICY: + case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART: + /* This is somewhat clumsy, but it's the only stop-indicating + * non-W32/POSIX-specific field i could think of... + */ + if (plugin->cpipe_out != -1) + { + write_result = plugin_write (plugin, &discard_state, 1); + if (write_result < 1) + { + stop_process (plugin); + return; + } + } + break; + case EXTRACTOR_OPTION_IN_PROCESS: + plugin->discard_state_method (plugin); return; + break; + case EXTRACTOR_OPTION_DISABLED: + return; + break; } +} - snprintf(cmd, MAX_PATH + 1, "rundll32.exe libextractor-3.dll,RundllEntryPoint@16 %lu %lu", p10_os_inh, p21_os_inh); - cmd[MAX_PATH] = '\0'; - if (CreateProcessA (NULL, cmd, NULL, NULL, TRUE, 0, NULL, NULL, - &startup, &proc)) +static int +give_shm_to_plugin (struct EXTRACTOR_PluginList *plugin, int64_t position, size_t map_size) +{ + int write_result; + int updated_shm_size = 1 + sizeof (int64_t) + sizeof (size_t); + unsigned char updated_shm[updated_shm_size]; + int t = 0; + updated_shm[t] = MESSAGE_UPDATED_SHM; + t += 1; + memcpy (&updated_shm[t], &position, sizeof (int64_t)); + t += sizeof (int64_t); + memcpy (&updated_shm[t], &map_size, sizeof (size_t)); + t += sizeof (size_t); + switch (plugin->flags) { - plugin->hProcess = proc.hProcess; - CloseHandle (proc.hThread); - } - else + case EXTRACTOR_OPTION_DEFAULT_POLICY: + case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART: + if (plugin->seek_request < 0) + return 0; + write_result = plugin_write (plugin, updated_shm, updated_shm_size); + if (write_result < updated_shm_size) { - close (p1[0]); - close (p1[1]); - close (p2[0]); - close (p2[1]); - plugin->flags = EXTRACTOR_OPTION_DISABLED; - return; + stop_process (plugin); + return 0; } - close (p1[0]); - close (p2[1]); - CloseHandle (p10_os_inh); - CloseHandle (p21_os_inh); - - write_plugin_data (p1[1], plugin); + return 1; + case EXTRACTOR_OPTION_IN_PROCESS: + plugin->position = position; + plugin->map_size = map_size; + return 0; + case EXTRACTOR_OPTION_DISABLED: + return 0; + default: + return 1; + } +} - plugin->cpipe_in = fdopen (p1[1], "w"); - if (plugin->cpipe_in == NULL) +static void +ask_in_process_plugin (struct EXTRACTOR_PluginList *plugin, int64_t position, void *shm_ptr, EXTRACTOR_MetaDataProcessor proc, void *proc_cls) +{ + int extract_reply; + switch (plugin->flags) + { + case EXTRACTOR_OPTION_DEFAULT_POLICY: + case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART: + return; + case EXTRACTOR_OPTION_IN_PROCESS: + if (plugin->seek_request >= 0) { - perror ("fdopen"); - TerminateProcess (plugin->hProcess, 0); - WaitForSingleObject (plugin->hProcess, INFINITE); - CloseHandle (plugin->hProcess); - close (p1[1]); - close (p2[0]); - plugin->hProcess = INVALID_HANDLE_VALUE; - plugin->flags = EXTRACTOR_OPTION_DISABLED; - return; + plugin->shm_ptr = shm_ptr; + extract_reply = plugin->extract_method (plugin, proc, proc_cls); + if (extract_reply == 1) + plugin->seek_request = -1; } - plugin->cpipe_out = p2[0]; -#endif + break; + case EXTRACTOR_OPTION_DISABLED: + return; + break; + } } +#if !WINDOWS +int +plugin_read (struct EXTRACTOR_PluginList *plugin, unsigned char *buf, size_t size) +{ + ssize_t read_result; + size_t read_count = 0; + while (read_count < size) + { + read_result = read (plugin->cpipe_out, &buf[read_count], size - read_count); + if (read_result <= 0) + return read_result; + read_count += read_result; + } + return read_count; +} +#else +int +plugin_read (struct EXTRACTOR_PluginList *plugin, unsigned char *buf, size_t size) +{ + DWORD bytes_read; + BOOL bresult; + size_t read_count = 0; + while (read_count < size) + { + bresult = ReadFile (plugin->cpipe_out, &buf[read_count], size - read_count, &bytes_read, NULL); + if (!bresult) + return -1; + read_count += bytes_read; + } + return read_count; +} +#endif -/** - * Extract meta data using the given plugin, running the - * actual code of the plugin out-of-process. - * - * @param plugin which plugin to call - * @param size size of the file mapped by shmfn or tshmfn - * @param shmfn file name of the shared memory segment - * @param tshmfn file name of the shared memory segment for the end of the data - * @param proc function to call on the meta data - * @param proc_cls cls for proc - * @return 0 if proc did not return non-zero - */ static int -extract_oop (struct EXTRACTOR_PluginList *plugin, - size_t size, - const char *shmfn, - const char *tshmfn, - EXTRACTOR_MetaDataProcessor proc, - void *proc_cls) +receive_reply (struct EXTRACTOR_PluginList *plugin, EXTRACTOR_MetaDataProcessor proc, void *proc_cls) { + int read_result; + unsigned char code; + int must_read = 1; + + int64_t seek_position; struct IpcHeader hdr; - char mimetype[MAX_MIME_LEN + 1]; + char *mime_type; char *data; -#ifndef WINDOWS - if (plugin->cpid == -1) -#else - if (plugin->hProcess == INVALID_HANDLE_VALUE) -#endif - return 0; - if (0 >= fprintf (plugin->cpipe_in, - "%s\n", - shmfn)) - { - stop_process (plugin); -#ifndef WINDOWS - plugin->cpid = -1; -#else - plugin->hProcess = INVALID_HANDLE_VALUE; -#endif - if (plugin->flags != EXTRACTOR_OPTION_DEFAULT_POLICY) - plugin->flags = EXTRACTOR_OPTION_DISABLED; - return 0; - } - if (0 >= fprintf (plugin->cpipe_in, - "!%s\n", - (tshmfn != NULL) ? tshmfn : "")) - { - stop_process (plugin); -#ifndef WINDOWS - plugin->cpid = -1; -#else - plugin->hProcess = INVALID_HANDLE_VALUE; -#endif - if (plugin->flags != EXTRACTOR_OPTION_DEFAULT_POLICY) - plugin->flags = EXTRACTOR_OPTION_DISABLED; - return 0; - } - if (0 >= fprintf (plugin->cpipe_in, - "s%lu\n", - size)) - { - stop_process (plugin); -#ifndef WINDOWS - plugin->cpid = -1; -#else - plugin->hProcess = INVALID_HANDLE_VALUE; -#endif - if (plugin->flags != EXTRACTOR_OPTION_DEFAULT_POLICY) - plugin->flags = EXTRACTOR_OPTION_DISABLED; - return 0; - } - fflush (plugin->cpipe_in); - while (1) + while (must_read) + { + read_result = plugin_read (plugin, &code, 1); + if (read_result < 1) + return -1; + switch (code) { - if (0 != read_all (plugin->cpipe_out, - &hdr, - sizeof(hdr))) - { - stop_process (plugin); -#ifndef WINDOWS - plugin->cpid = -1; -#else - plugin->hProcess = INVALID_HANDLE_VALUE; -#endif - if (plugin->flags != EXTRACTOR_OPTION_DEFAULT_POLICY) - plugin->flags = EXTRACTOR_OPTION_DISABLED; - return 0; - } - if ( (hdr.type == 0) && - (hdr.format == 0) && - (hdr.data_len == 0) && - (hdr.mime_len == 0) ) - break; - if (hdr.mime_len > MAX_MIME_LEN) - { - stop_process (plugin); -#ifndef WINDOWS - plugin->cpid = -1; -#else - plugin->hProcess = INVALID_HANDLE_VALUE; -#endif - if (plugin->flags != EXTRACTOR_OPTION_DEFAULT_POLICY) - plugin->flags = EXTRACTOR_OPTION_DISABLED; - return 0; - } + case MESSAGE_DONE: /* Done */ + plugin->seek_request = -1; + must_read = 0; + break; + case MESSAGE_SEEK: /* Seek */ + read_result = plugin_read (plugin, (unsigned char *) &seek_position, sizeof (int64_t)); + if (read_result < sizeof (int64_t)) + return -1; + plugin->seek_request = seek_position; + must_read = 0; + break; + case MESSAGE_META: /* Meta */ + read_result = plugin_read (plugin, (unsigned char *) &hdr, sizeof (hdr)); + if (read_result < sizeof (hdr)) /* FIXME: check hdr for sanity */ + return -1; + mime_type = malloc (hdr.mime_len + 1); + if (mime_type == NULL) + return -1; + read_result = plugin_read (plugin, (unsigned char *) mime_type, hdr.mime_len); + if (read_result < hdr.mime_len) + return -1; + mime_type[hdr.mime_len] = '\0'; data = malloc (hdr.data_len); if (data == NULL) - { - stop_process (plugin); - return 1; - } - if ( (0 != (read_all (plugin->cpipe_out, - mimetype, - hdr.mime_len))) || - (0 != (read_all (plugin->cpipe_out, - data, - hdr.data_len))) ) - { - stop_process (plugin); -#ifndef WINDOWS - plugin->cpid = -1; -#else - plugin->hProcess = INVALID_HANDLE_VALUE; -#endif - free (data); - if (plugin->flags != EXTRACTOR_OPTION_DEFAULT_POLICY) - plugin->flags = EXTRACTOR_OPTION_DISABLED; - return 0; - } - mimetype[hdr.mime_len] = '\0'; - if ( (proc != NULL) && - (0 != proc (proc_cls, - plugin->short_libname, - hdr.type, - hdr.format, - mimetype, - data, - hdr.data_len)) ) - proc = NULL; + { + free (mime_type); + return -1; + } + read_result = plugin_read (plugin, (unsigned char *) data, hdr.data_len); + if (read_result < hdr.data_len) + { + free (mime_type); + free (data); + return -1; + } + read_result = proc (proc_cls, plugin->short_libname, hdr.meta_type, hdr.meta_format, mime_type, data, hdr.data_len); + free (mime_type); free (data); + if (read_result != 0) + return 1; + break; + default: + return -1; } - if (NULL == proc) - return 1; + } return 0; -} - +} -/** - * Setup a shared memory segment. - * - * @param ptr set to the location of the shm segment - * @param shmid where to store the shm ID - * @param fn name of the shared segment - * @param fn_size size available in fn - * @param size number of bytes to allocated for the segment - * @return 0 on success - */ +#if !WINDOWS static int -make_shm (int is_tail, - void **ptr, -#ifndef WINDOWS - int *shmid, -#else - HANDLE *map, -#endif - char *fn, - size_t fn_size, - size_t size) +wait_for_reply (struct EXTRACTOR_PluginList *plugins, EXTRACTOR_MetaDataProcessor proc, void *proc_cls) { - const char *tpath; -#ifdef WINDOWS - tpath = "Local\\"; -#elif SOMEBSD - const char *tpath; - /* this works on FreeBSD, not sure about others... */ - tpath = getenv ("TMPDIR"); - if (tpath == NULL) - tpath = "/tmp/"; -#else - tpath = "/"; /* Linux */ -#endif - snprintf (fn, - fn_size, - "%slibextractor-%sshm-%u-%u", - tpath, - (is_tail) ? "t" : "", - getpid(), - (unsigned int) RANDOM()); -#ifndef WINDOWS - *shmid = shm_open (fn, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); - *ptr = NULL; - if (-1 == (*shmid)) - return 1; - if ( (0 != ftruncate (*shmid, size)) || - (NULL == (*ptr = mmap (NULL, size, PROT_WRITE, MAP_SHARED, *shmid, 0))) || - (*ptr == (void*) -1) ) + int ready; + int result; + struct timeval tv; + fd_set to_check; + int highest = 0; + int read_result; + struct EXTRACTOR_PluginList *ppos; + + FD_ZERO (&to_check); + + for (ppos = plugins; NULL != ppos; ppos = ppos->next) + { + switch (ppos->flags) { - close (*shmid); - *shmid = -1; - shm_unlink (fn); - return 1; + case EXTRACTOR_OPTION_DEFAULT_POLICY: + case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART: + if (ppos->seek_request == -1) + continue; + FD_SET (ppos->cpipe_out, &to_check); + if (highest < ppos->cpipe_out) + highest = ppos->cpipe_out; + break; + case EXTRACTOR_OPTION_IN_PROCESS: + break; + case EXTRACTOR_OPTION_DISABLED: + break; } - return 0; -#else - *map = CreateFileMapping (INVALID_HANDLE_VALUE, NULL, PAGE_READWRITE, 0, size, fn); - *ptr = MapViewOfFile (*map, FILE_MAP_WRITE, 0, 0, size); - if (*ptr == NULL) + } + + tv.tv_sec = 10; + tv.tv_usec = 0; + ready = select (highest + 1, &to_check, NULL, NULL, &tv); + if (ready <= 0) + /* an error or timeout -> something's wrong or all plugins hung up */ + return -1; + + result = 0; + for (ppos = plugins; NULL != ppos; ppos = ppos->next) + { + switch (ppos->flags) { - CloseHandle (*map); - return 1; + case EXTRACTOR_OPTION_DEFAULT_POLICY: + case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART: + if (ppos->seek_request == -1) + continue; + if (FD_ISSET (ppos->cpipe_out, &to_check)) + { + read_result = receive_reply (ppos, proc, proc_cls); + if (read_result < 0) + { + stop_process (ppos); + } + result += 1; + } + break; + case EXTRACTOR_OPTION_IN_PROCESS: + break; + case EXTRACTOR_OPTION_DISABLED: + break; } - return 0; -#endif + } + return result; } - - -/** - * Extract keywords using the given set of plugins. - * - * @param plugins the list of plugins to use - * @param data data to process, never NULL - * @param size number of bytes in data, ignored if data is NULL - * @param tdata end of file data, or NULL - * @param tsize number of bytes in tdata - * @param proc function to call for each meta data item found - * @param proc_cls cls argument to proc - */ -static void -extract (struct EXTRACTOR_PluginList *plugins, - const char * data, - size_t size, - const char * tdata, - size_t tsize, - EXTRACTOR_MetaDataProcessor proc, - void *proc_cls) +#else +static int +wait_for_reply (struct EXTRACTOR_PluginList *plugins, EXTRACTOR_MetaDataProcessor proc, void *proc_cls) { + int result; + DWORD ms; + DWORD first_ready; + DWORD dwresult; + DWORD bytes_read; + BOOL bresult; + int i; + HANDLE events[MAXIMUM_WAIT_OBJECTS]; + + struct EXTRACTOR_PluginList *ppos; - enum EXTRACTOR_Options flags; - void *ptr; - void *tptr; - char fn[255]; - char tfn[255]; - int want_shm; - int want_tail; -#ifndef WINDOWS - int shmid; - int tshmid; -#else - HANDLE map; - HANDLE tmap; -#endif - want_shm = 0; - ppos = plugins; - while (NULL != ppos) - { - switch (ppos->flags) - { - case EXTRACTOR_OPTION_DEFAULT_POLICY: -#ifndef WINDOWS - if ( (0 == ppos->cpid) || - (-1 == ppos->cpid) ) -#else - if (ppos->hProcess == NULL || ppos->hProcess == INVALID_HANDLE_VALUE) -#endif - start_process (ppos); - want_shm = 1; - break; - case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART: -#ifndef WINDOWS - if (0 == ppos->cpid) -#else - if (ppos->hProcess == NULL) -#endif - start_process (ppos); - want_shm = 1; - break; - case EXTRACTOR_OPTION_IN_PROCESS: - break; - case EXTRACTOR_OPTION_DISABLED: - break; - } - ppos = ppos->next; - } - ptr = NULL; - tptr = NULL; - if (want_shm) + i = 0; + for (ppos = plugins; NULL != ppos; ppos = ppos->next) + { + if (i == MAXIMUM_WAIT_OBJECTS) + return -1; + if (ppos->seek_request == -1) + continue; + switch (ppos->flags) { - if (size > MAX_READ) - size = MAX_READ; - if (0 == make_shm (0, - &ptr, -#ifndef WINDOWS - &shmid, -#else - &map, -#endif - fn, sizeof(fn), size)) - { - memcpy (ptr, data, size); - if ( (tdata != NULL) && - (0 == make_shm (1, - &tptr, -#ifndef WINDOWS - &tshmid, -#else - &tmap, -#endif - tfn, sizeof(tfn), tsize)) ) - { - memcpy (tptr, tdata, tsize); - } - else - { - tptr = NULL; - } - } - else - { - want_shm = 0; - } + case EXTRACTOR_OPTION_DEFAULT_POLICY: + case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART: + if (WaitForSingleObject (ppos->ov_read.hEvent, 0) == WAIT_OBJECT_0) + { + ResetEvent (ppos->ov_read.hEvent); + bresult = ReadFile (ppos->cpipe_out, &i, 0, &bytes_read, &ppos->ov_read); + if (bresult == TRUE) + { + SetEvent (ppos->ov_read.hEvent); + } + else + { + DWORD err = GetLastError (); + if (err != ERROR_IO_PENDING) + SetEvent (ppos->ov_read.hEvent); + } + } + events[i] = ppos->ov_read.hEvent; + i++; + break; + case EXTRACTOR_OPTION_IN_PROCESS: + break; + case EXTRACTOR_OPTION_DISABLED: + break; } - ppos = plugins; - while (NULL != ppos) + } + + ms = 10000; + first_ready = WaitForMultipleObjects (i, events, FALSE, ms); + if (first_ready == WAIT_TIMEOUT || first_ready == WAIT_FAILED) + /* an error or timeout -> something's wrong or all plugins hung up */ + return -1; + + i = 0; + result = 0; + for (ppos = plugins; NULL != ppos; ppos = ppos->next) + { + int read_result; + switch (ppos->flags) { - flags = ppos->flags; - if (! want_shm) - flags = EXTRACTOR_OPTION_IN_PROCESS; - switch (flags) - { - case EXTRACTOR_OPTION_DEFAULT_POLICY: - if (0 != extract_oop (ppos, (tptr != NULL) ? tsize : size, fn, - (tptr != NULL) ? tfn : NULL, - proc, proc_cls)) - { - ppos = NULL; - break; - } -#ifndef WINDOWS - if (ppos->cpid == -1) -#else - if (ppos->hProcess == INVALID_HANDLE_VALUE) -#endif - { - start_process (ppos); - if (0 != extract_oop (ppos, (tptr != NULL) ? tsize : size, fn, - (tptr != NULL) ? tfn : NULL, - proc, proc_cls)) - { - ppos = NULL; - break; - } - } - break; - case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART: - if (0 != extract_oop (ppos, (tptr != NULL) ? tsize : size, fn, - (tptr != NULL) ? tfn : NULL, - proc, proc_cls)) - { - ppos = NULL; - break; - } - break; - case EXTRACTOR_OPTION_IN_PROCESS: - want_tail = ( (ppos->specials != NULL) && - (NULL != strstr (ppos->specials, - "want-tail"))); - if (NULL == ppos->extractMethod) - plugin_load (ppos); - if ( ( (ppos->specials == NULL) || - (NULL == strstr (ppos->specials, - "oop-only")) ) ) - { - if (want_tail) - { - if ( (NULL != ppos->extractMethod) && - (tdata != NULL) && - (0 != ppos->extractMethod (tdata, - tsize, - proc, - proc_cls, - ppos->plugin_options)) ) - { - ppos = NULL; - break; - } - } - else - { - if ( (NULL != ppos->extractMethod) && - (0 != ppos->extractMethod (data, - size, - proc, - proc_cls, - ppos->plugin_options)) ) - { - ppos = NULL; - break; - } - } - } - break; - case EXTRACTOR_OPTION_DISABLED: - break; - } - if (ppos == NULL) - break; - ppos = ppos->next; + case EXTRACTOR_OPTION_DEFAULT_POLICY: + case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART: + if (ppos->seek_request == -1) + continue; + if (i < first_ready) + { + i += 1; + continue; + } + dwresult = WaitForSingleObject (ppos->ov_read.hEvent, 0); + read_result = 0; + if (dwresult == WAIT_OBJECT_0) + { + read_result = receive_reply (ppos, proc, proc_cls); + result += 1; + } + if (dwresult == WAIT_FAILED || read_result < 0) + { + stop_process (ppos); + if (dwresult == WAIT_FAILED) + result += 1; + } + i++; + break; + case EXTRACTOR_OPTION_IN_PROCESS: + break; + case EXTRACTOR_OPTION_DISABLED: + break; } - if (want_shm) + } + return result; +} + +#endif + +static int64_t +seek_to_new_position (struct EXTRACTOR_PluginList *plugins, int fd, int64_t fsize, int64_t current_position) +{ + int64_t min_pos = fsize; + struct EXTRACTOR_PluginList *ppos; + for (ppos = plugins; NULL != ppos; ppos = ppos->next) + { + switch (ppos->flags) { -#ifndef WINDOWS - if (NULL != ptr) - munmap (ptr, size); - if (shmid != -1) - close (shmid); - shm_unlink (fn); - if (NULL != tptr) - { - munmap (tptr, tsize); - shm_unlink (tfn); - if (tshmid != -1) - close (tshmid); - } + case EXTRACTOR_OPTION_DEFAULT_POLICY: + case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART: + case EXTRACTOR_OPTION_IN_PROCESS: + if (ppos->seek_request > 0 && ppos->seek_request >= current_position && + ppos->seek_request <= min_pos) + min_pos = ppos->seek_request; + break; + case EXTRACTOR_OPTION_DISABLED: + break; + } + } + if (min_pos >= fsize) + return -1; +#if WINDOWS + _lseeki64 (fd, min_pos, SEEK_SET); +#elif !HAVE_SEEK64 + lseek64 (fd, min_pos, SEEK_SET); #else - UnmapViewOfFile (ptr); - CloseHandle (map); - if (tptr != NULL) - { - UnmapViewOfFile (tptr); - CloseHandle (tmap); - } + if (min_pos >= INT_MAX) + return -1; + lseek (fd, (ssize_t) min_pos, SEEK_SET); #endif - } + return min_pos; } +static void +load_in_process_plugin (struct EXTRACTOR_PluginList *plugin) +{ + switch (plugin->flags) + { + case EXTRACTOR_OPTION_DEFAULT_POLICY: + case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART: + case EXTRACTOR_OPTION_DISABLED: + break; + case EXTRACTOR_OPTION_IN_PROCESS: + plugin_load (plugin); + break; + } +} /** - * If the given data is compressed using gzip or bzip2, decompress - * it. Run 'extract' on the decompressed contents (or the original - * contents if they were not compressed). + * Extract keywords using the given set of plugins. * * @param plugins the list of plugins to use - * @param data data to process, never NULL - * @param size number of bytes in data - * @param tdata end of file data, or NULL - * @param tsize number of bytes in tdata + * @param data data to process, or NULL if fds is not -1 + * @param fd file to read data from, or -1 if data is not NULL + * @param fsize size of data or size of file + * @param buffer a buffer with data alteady read from the file (if fd != -1) + * @param buffer_size size of buffer * @param proc function to call for each meta data item found * @param proc_cls cls argument to proc */ static void -decompress_and_extract (struct EXTRACTOR_PluginList *plugins, - const unsigned char * data, - size_t size, - const char * tdata, - size_t tsize, - EXTRACTOR_MetaDataProcessor proc, - void *proc_cls) { - unsigned char * buf; - unsigned char * rbuf; - size_t dsize; -#if HAVE_ZLIB - z_stream strm; - int ret; - size_t pos; +do_extract (struct EXTRACTOR_PluginList *plugins, const char *data, int fd, int64_t fsize, void *buffer, size_t buffer_size, EXTRACTOR_MetaDataProcessor proc, void *proc_cls) +{ + int shm_result; + unsigned char *shm_ptr; +#if !WINDOWS + int shm_id; +#else + HANDLE map_handle; #endif -#if HAVE_LIBBZ2 - bz_stream bstrm; - int bret; - size_t bpos; + char shm_name[MAX_SHM_NAME + 1]; + + struct EXTRACTOR_PluginList *ppos; + + int64_t position = 0; + size_t map_size; + ssize_t read_result; + int kill_plugins = 0; + + map_size = (fd == -1) ? fsize : MAX_READ; + + /* Make a shared memory object. Even if we're running in-process. Simpler that way */ +#if !WINDOWS + shm_result = make_shm_posix ((void **) &shm_ptr, &shm_id, shm_name, MAX_SHM_NAME, + map_size); +#else + shm_result = make_shm_w32 ((void **) &shm_ptr, &map_handle, shm_name, MAX_SHM_NAME, + map_size); #endif + if (shm_result != 0) + return; - buf = NULL; - dsize = 0; -#if HAVE_ZLIB - /* try gzip decompression first */ - if ( (size >= 12) && - (data[0] == 0x1f) && - (data[1] == 0x8b) && - (data[2] == 0x08) ) + /* This three-loops-instead-of-one construction is intended to increase parallelism */ + for (ppos = plugins; NULL != ppos; ppos = ppos->next) + start_process (ppos); + + for (ppos = plugins; NULL != ppos; ppos = ppos->next) + load_in_process_plugin (ppos); + + for (ppos = plugins; NULL != ppos; ppos = ppos->next) + write_plugin_data (ppos); + + for (ppos = plugins; NULL != ppos; ppos = ppos->next) + init_plugin_state (ppos, shm_name, fsize); + + while (1) + { + int plugins_not_ready = 0; + if (fd != -1) { - /* Process gzip header */ - unsigned int gzip_header_length = 10; - - if (data[3] & 0x4) /* FEXTRA set */ - gzip_header_length += 2 + (unsigned) (data[10] & 0xff) - + (((unsigned) (data[11] & 0xff)) * 256); - - if (data[3] & 0x8) /* FNAME set */ - { - const unsigned char * cptr = data + gzip_header_length; - /* stored file name is here */ - while (cptr < data + size) - { - if ('\0' == *cptr) - break; - cptr++; - } - if (0 != proc (proc_cls, - "<zlib>", - EXTRACTOR_METATYPE_FILENAME, - EXTRACTOR_METAFORMAT_C_STRING, - "text/plain", - (const char*) (data + gzip_header_length), - cptr - (data + gzip_header_length))) - return; /* done */ - gzip_header_length = (cptr - data) + 1; - } - if (data[3] & 0x16) /* FCOMMENT set */ - { - const unsigned char * cptr = data + gzip_header_length; - /* stored comment is here */ - while (cptr < data + size) - { - if('\0' == *cptr) - break; - cptr ++; - } - if (0 != proc (proc_cls, - "<zlib>", - EXTRACTOR_METATYPE_COMMENT, - EXTRACTOR_METAFORMAT_C_STRING, - "text/plain", - (const char*) (data + gzip_header_length), - cptr - (data + gzip_header_length))) - return; /* done */ - gzip_header_length = (cptr - data) + 1; - } - if(data[3] & 0x2) /* FCHRC set */ - gzip_header_length += 2; - memset(&strm, - 0, - sizeof(z_stream)); -#ifdef ZLIB_VERNUM - gzip_header_length = 0; -#endif - if (size > gzip_header_length) - { - strm.next_in = (Bytef*) data + gzip_header_length; - strm.avail_in = size - gzip_header_length; - } + /* fill the share buffer with data from the file */ + if (buffer_size > 0) + memcpy (shm_ptr, buffer, buffer_size); + read_result = READ (fd, &shm_ptr[buffer_size], MAX_READ - buffer_size); + if (read_result <= 0) + break; else - { - strm.next_in = (Bytef*) data; - strm.avail_in = 0; - } - strm.total_in = 0; - strm.zalloc = NULL; - strm.zfree = NULL; - strm.opaque = NULL; - - /* - * note: maybe plain inflateInit(&strm) is adequate, - * it looks more backward-compatible also ; - * - * ZLIB_VERNUM isn't defined by zlib version 1.1.4 ; - * there might be a better check. - */ - if (Z_OK == inflateInit2(&strm, -#ifdef ZLIB_VERNUM - 15 + 32 -#else - -MAX_WBITS -#endif - )) { - dsize = 2 * size; - if (dsize > MAX_DECOMPRESS) - dsize = MAX_DECOMPRESS; - buf = malloc(dsize); - pos = 0; - if (buf == NULL) - { - inflateEnd(&strm); - } - else - { - strm.next_out = (Bytef*) buf; - strm.avail_out = dsize; - do - { - ret = inflate(&strm, - Z_SYNC_FLUSH); - if (ret == Z_OK) - { - if (dsize == MAX_DECOMPRESS) - break; - pos += strm.total_out; - strm.total_out = 0; - dsize *= 2; - if (dsize > MAX_DECOMPRESS) - dsize = MAX_DECOMPRESS; - rbuf = realloc(buf, dsize); - if (rbuf == NULL) - { - free (buf); - buf = NULL; - break; - } - buf = rbuf; - strm.next_out = (Bytef*) &buf[pos]; - strm.avail_out = dsize - pos; - } - else if (ret != Z_STREAM_END) - { - /* error */ - free(buf); - buf = NULL; - } - } while ( (buf != NULL) && - (ret != Z_STREAM_END) ); - dsize = pos + strm.total_out; - inflateEnd(&strm); - if ( (dsize == 0) && - (buf != NULL) ) - { - free(buf); - buf = NULL; - } - } - } + map_size = read_result + buffer_size; + if (buffer_size > 0) + buffer_size = 0; } -#endif - -#if HAVE_LIBBZ2 - if ( (size >= 4) && - (data[0] == 'B') && - (data[1] == 'Z') && - (data[2] == 'h') ) + for (ppos = plugins; NULL != ppos; ppos = ppos->next) + plugins_not_ready += give_shm_to_plugin (ppos, position, map_size); + for (ppos = plugins; NULL != ppos; ppos = ppos->next) + ask_in_process_plugin (ppos, position, shm_ptr, proc, proc_cls); + while (plugins_not_ready > 0 && !kill_plugins) { - /* now try bz2 decompression */ - memset(&bstrm, - 0, - sizeof(bz_stream)); - bstrm.next_in = (char*) data; - bstrm.avail_in = size; - bstrm.total_in_lo32 = 0; - bstrm.total_in_hi32 = 0; - bstrm.bzalloc = NULL; - bstrm.bzfree = NULL; - bstrm.opaque = NULL; - if ( (buf == NULL) && - (BZ_OK == BZ2_bzDecompressInit(&bstrm, - 0, - 0)) ) - { - dsize = 2 * size; - if (dsize > MAX_DECOMPRESS) - dsize = MAX_DECOMPRESS; - buf = malloc(dsize); - bpos = 0; - if (buf == NULL) - { - BZ2_bzDecompressEnd(&bstrm); - } - else - { - bstrm.next_out = (char*) buf; - bstrm.avail_out = dsize; - do { - bret = BZ2_bzDecompress(&bstrm); - if (bret == Z_OK) - { - if (dsize == MAX_DECOMPRESS) - break; - bpos += bstrm.total_out_lo32; - bstrm.total_out_lo32 = 0; - dsize *= 2; - if (dsize > MAX_DECOMPRESS) - dsize = MAX_DECOMPRESS; - rbuf = realloc(buf, dsize); - if (rbuf == NULL) - { - free (buf); - buf = NULL; - break; - } - buf = rbuf; - bstrm.next_out = (char*) &buf[bpos]; - bstrm.avail_out = dsize - bpos; - } - else if (bret != BZ_STREAM_END) - { - /* error */ - free(buf); - buf = NULL; - } - } while ( (buf != NULL) && - (bret != BZ_STREAM_END) ); - dsize = bpos + bstrm.total_out_lo32; - BZ2_bzDecompressEnd(&bstrm); - if ( (dsize == 0) && - (buf != NULL) ) - { - free(buf); - buf = NULL; - } - } - } + int ready = wait_for_reply (plugins, proc, proc_cls); + if (ready <= 0) + kill_plugins = 1; + plugins_not_ready -= ready; } -#endif - if (buf != NULL) + if (kill_plugins) + break; + if (fd != -1) { - data = buf; - size = dsize; + position += map_size; + position = seek_to_new_position (plugins, fd, fsize, position); + if (position < 0) + break; } - extract (plugins, - (const char*) data, - size, - tdata, - tsize, - proc, - proc_cls); - if (buf != NULL) - free(buf); - errno = 0; /* kill transient errors */ -} - + else + break; + } -/** - * Open a file - */ -static int file_open(const char *filename, int oflag, ...) -{ - int mode; - const char *fn; -#ifdef MINGW - char szFile[_MAX_PATH + 1]; - long lRet; + if (kill_plugins) + for (ppos = plugins; NULL != ppos; ppos = ppos->next) + stop_process (ppos); + for (ppos = plugins; NULL != ppos; ppos = ppos->next) + discard_plugin_state (ppos); - if ((lRet = plibc_conv_to_win_path(filename, szFile)) != ERROR_SUCCESS) - { - errno = ENOENT; - SetLastError(lRet); - return -1; - } - fn = szFile; +#if WINDOWS + destroy_shm_w32 (shm_ptr, map_handle); #else - fn = filename; -#endif - mode = 0; -#ifdef MINGW - /* Set binary mode */ - mode |= O_BINARY; + destroy_shm_posix (shm_ptr, shm_id, (fd == -1) ? fsize : MAX_READ, shm_name); #endif - return OPEN(fn, oflag, mode); } -#ifndef O_LARGEFILE -#define O_LARGEFILE 0 -#endif - - /** * Extract keywords from a file using the given set of plugins. * If needed, opens the file and loads its data (via mmap). Then @@ -1478,92 +2106,151 @@ static int file_open(const char *filename, int oflag, ...) */ void EXTRACTOR_extract (struct EXTRACTOR_PluginList *plugins, - const char *filename, - const void *data, - size_t size, - EXTRACTOR_MetaDataProcessor proc, - void *proc_cls) + const char *filename, + const void *data, + size_t size, + EXTRACTOR_MetaDataProcessor proc, + void *proc_cls) { - int fd; - void * buffer; - void * tbuffer; - struct stat fstatbuf; - size_t fsize; - size_t tsize; - int eno; - off_t offset; - long pg; -#ifdef WINDOWS - SYSTEM_INFO sys; -#endif - - fd = -1; - buffer = NULL; - if ( (data == NULL) && - (filename != NULL) && - (0 == STAT(filename, &fstatbuf)) && - (!S_ISDIR(fstatbuf.st_mode)) && - (-1 != (fd = file_open (filename, - O_RDONLY | O_LARGEFILE))) ) - { - fsize = (fstatbuf.st_size > 0xFFFFFFFF) ? 0xFFFFFFFF : fstatbuf.st_size; - if (fsize == 0) - { - close(fd); - return; - } - if (fsize > MAX_READ) - fsize = MAX_READ; - buffer = MMAP(NULL, fsize, PROT_READ, MAP_PRIVATE, fd, 0); - if ( (buffer == NULL) || (buffer == (void *) -1) ) - { - eno = errno; - close(fd); - errno = eno; - return; - } + int fd = -1; + struct stat64 fstatbuf; + int64_t fsize = 0; + int memory_only = 1; + int compression_type = -1; + void *buffer = NULL; + size_t buffer_size; + int decompression_result; + + /* If data is not given, then we need to read it from the file. Try opening it */ + if ((data == NULL) && + (filename != NULL) && + (0 == STAT64(filename, &fstatbuf)) && + (!S_ISDIR(fstatbuf.st_mode)) && + (-1 != (fd = file_open (filename, + O_RDONLY | O_LARGEFILE)))) + { + /* Empty files are of no interest */ + fsize = fstatbuf.st_size; + if (fsize == 0) + { + close(fd); + return; } - if ( (buffer == NULL) && - (data == NULL) ) + /* File is too big -> can't read it into memory */ + if (fsize > MAX_READ) + memory_only = 0; + } + + /* Data is not given, and we've failed to open the file with data -> exit */ + if ((fsize == 0) && (data == NULL)) return; - /* for footer extraction */ - tsize = 0; - tbuffer = NULL; - if ( (data == NULL) && - (fstatbuf.st_size > fsize) && - (fstatbuf.st_size > MAX_READ) ) + /* fsize is now size of the data OR size of the file */ + if (data != NULL) + fsize = size; + + errno = 0; + /* Peek at first few bytes of the file (or of the data), and see if it's compressed. + * If data is NULL, buffer is allocated by the function and holds the first few bytes + * of the file, buffer_size is set too. + */ + compression_type = get_compression_type (data, fd, fsize, &buffer, &buffer_size); + if (compression_type < 0) + { + /* errno is set by get_compression_type () */ + if (fd != -1) + close (fd); + return; + } + if (compression_type > 0) + { + /* Don't assume that MAX_DECOMPRESS < MAX_READ */ + if ((fsize > MAX_DECOMPRESS) || (fsize > MAX_READ)) { - pg = SYSCONF (_SC_PAGE_SIZE); - if ( (pg > 0) && - (pg < MAX_READ) ) - { - offset = (1 + (fstatbuf.st_size - MAX_READ) / pg) * pg; - if (offset < fstatbuf.st_size) - { - tsize = fstatbuf.st_size - offset; - tbuffer = MMAP (NULL, tsize, PROT_READ, MAP_PRIVATE, fd, offset); - if ( (tbuffer == NULL) || (tbuffer == (void *) -1) ) - { - tsize = 0; - tbuffer = NULL; - } - } - } + /* File or data is to big to be decompressed in-memory (the only kind of decompression we do) */ + errno = EFBIG; + if (fd != -1) + close (fd); + if (buffer != NULL) + free (buffer); + return; + } + /* Decompress data (or file contents + what we've read so far. Either way it writes a new + * pointer to buffer, sets buffer_size, and frees the old buffer (if it wasn't NULL). + * In case of failure it cleans up the buffer after itself. + * Will also report compression-related metadata to the caller. + */ + decompression_result = try_to_decompress (data, fd, fsize, compression_type, &buffer, &buffer_size, proc, proc_cls); + if (decompression_result != 0) + { + /* Buffer is taken care of already */ + close (fd); + errno = EILSEQ; + return; } - decompress_and_extract (plugins, - buffer != NULL ? buffer : data, - buffer != NULL ? fsize : size, - tbuffer, - tsize, - proc, - proc_cls); + else + { + close (fd); + fd = -1; + } + } + + /* Now we either have a non-NULL data of fsize bytes + * OR a valid fd to read from and a small buffer of buffer_size bytes + * OR an invalid fd and a big buffer of buffer_size bytes + * Simplify this situation a bit: + */ + if ((data == NULL) && (fd == -1) && (buffer_size > 0)) + { + data = (const void *) buffer; + fsize = buffer_size; + } + + /* Now we either have a non-NULL data of fsize bytes + * OR a valid fd to read from and a small buffer of buffer_size bytes + * and we might need to free the buffer later in either case + */ + + /* do_extract () might set errno itself, but from our point of view everything is OK */ + errno = 0; + + do_extract (plugins, data, fd, fsize, buffer, buffer_size, proc, proc_cls); + if (buffer != NULL) - MUNMAP (buffer, fsize); - if (tbuffer != NULL) - MUNMAP (tbuffer, tsize); + free (buffer); if (-1 != fd) - close(fd); + close(fd); +} + + +#if WINDOWS +void CALLBACK +RundllEntryPoint (HWND hwnd, + HINSTANCE hinst, + LPSTR lpszCmdLine, + int nCmdShow) +{ + intptr_t in_h; + intptr_t out_h; + int in, out; + + sscanf(lpszCmdLine, "%lu %lu", &in_h, &out_h); + in = _open_osfhandle (in_h, _O_RDONLY); + out = _open_osfhandle (out_h, 0); + setmode (in, _O_BINARY); + setmode (out, _O_BINARY); + process_requests (read_plugin_data (in), + in, out); +} + +void CALLBACK +RundllEntryPointA (HWND hwnd, + HINSTANCE hinst, + LPSTR lpszCmdLine, + int nCmdShow) +{ + return RundllEntryPoint(hwnd, hinst, lpszCmdLine, nCmdShow); } +#endif /** * Initialize gettext and libltdl (and W32 if needed). @@ -1579,12 +2266,12 @@ void __attribute__ ((constructor)) EXTRACTOR_ltdl_init() { if (err > 0) { #if DEBUG fprintf(stderr, - _("Initialization of plugin mechanism failed: %s!\n"), - lt_dlerror()); + _("Initialization of plugin mechanism failed: %s!\n"), + lt_dlerror()); #endif return; } -#ifdef MINGW +#if WINDOWS plibc_init("GNU", PACKAGE); #endif } @@ -1594,12 +2281,10 @@ void __attribute__ ((constructor)) EXTRACTOR_ltdl_init() { * Deinit. */ void __attribute__ ((destructor)) EXTRACTOR_ltdl_fini() { -#ifdef MINGW +#if WINDOWS plibc_shutdown(); #endif lt_dlexit (); } - - /* end of extractor.c */ diff --git a/src/main/extractor_plugins.c b/src/main/extractor_plugins.c @@ -204,15 +204,24 @@ plugin_load (struct EXTRACTOR_PluginList *plugin) plugin->flags = EXTRACTOR_OPTION_DISABLED; return -1; } - plugin->extractMethod = get_symbol_with_prefix (plugin->libraryHandle, - "_EXTRACTOR_%s_extract", + plugin->extract_method = get_symbol_with_prefix (plugin->libraryHandle, + "_EXTRACTOR_%s_extract_method", plugin->libname, &plugin->specials); - if (plugin->extractMethod == NULL) + plugin->init_state_method = get_symbol_with_prefix (plugin->libraryHandle, + "_EXTRACTOR_%s_init_state_method", + plugin->libname, + &plugin->specials); + plugin->discard_state_method = get_symbol_with_prefix (plugin->libraryHandle, + "_EXTRACTOR_%s_discard_state_method", + plugin->libname, + &plugin->specials); + if (plugin->extract_method == NULL || plugin->init_state_method == NULL || + plugin->discard_state_method == NULL) { #if DEBUG fprintf (stderr, - "Resolving `extract' method of plugin `%s' failed: %s\n", + "Resolving `extract', 'init_state' or 'discard_state' method(s) of plugin `%s' failed: %s\n", plugin->short_libname, lt_dlerror ()); #endif @@ -243,8 +252,15 @@ EXTRACTOR_plugin_add (struct EXTRACTOR_PluginList * prev, enum EXTRACTOR_Options flags) { struct EXTRACTOR_PluginList *result; + struct EXTRACTOR_PluginList *i; char *libname; + for (i = prev; i != NULL; i = i->next) + { + if (strcmp (i->short_libname, library) == 0) + return prev; + } + libname = find_plugin (library); if (libname == NULL) { diff --git a/src/main/extractor_plugins.h b/src/main/extractor_plugins.h @@ -64,7 +64,9 @@ struct EXTRACTOR_PluginList /** * Pointer to the function used for meta data extraction. */ - EXTRACTOR_ExtractMethod extractMethod; + EXTRACTOR_extract_method extract_method; + EXTRACTOR_init_state_method init_state_method; + EXTRACTOR_discard_state_method discard_state_method; /** * Options for the plugin. @@ -84,26 +86,72 @@ struct EXTRACTOR_PluginList enum EXTRACTOR_Options flags; /** - * Process ID of the child process for this plugin. 0 for - * none. + * Process ID of the child process for this plugin. 0 for none. */ -#ifndef WINDOWS +#if !WINDOWS int cpid; #else HANDLE hProcess; #endif /** - * Pipe used to send information about shared memory segments to - * the child process. NULL if not initialized. + * Pipe used to communicate information to the plugin child process. + * NULL if not initialized. */ +#if !WINDOWS FILE *cpipe_in; +#else + HANDLE cpipe_in; +#endif + + /** + * A position this plugin wants us to seek to. -1 if it's finished. + * Starts at 0; + */ + int64_t seek_request; + +#if !WINDOWS + int shm_id; +#else + HANDLE map_handle; +#endif + + void *state; + + int64_t fsize; + + int64_t position; + + unsigned char *shm_ptr; + + size_t map_size; /** * Pipe used to read information about extracted meta data from - * the child process. -1 if not initialized. + * the plugin child process. -1 if not initialized. */ +#if !WINDOWS int cpipe_out; +#else + HANDLE cpipe_out; +#endif + +#if WINDOWS + /** + * A structure for overlapped reads on W32. + */ + OVERLAPPED ov_read; + + /** + * A structure for overlapped writes on W32. + */ + OVERLAPPED ov_write; + + /** + * A write buffer for overlapped writes on W32 + */ + unsigned char *ov_write_buffer; +#endif }; /** diff --git a/src/plugins/Makefile.am b/src/plugins/Makefile.am @@ -1,4 +1,4 @@ -INCLUDES = -I$(top_srcdir)/src/include -I$(top_srcdir)/src/common +INCLUDES = -I$(top_srcdir)/src/include -I$(top_srcdir)/src/common -I$(top_srcdir)/src/main # install plugins under: plugindir = $(libdir)/@RPLUGINDIR@ @@ -11,183 +11,23 @@ PLUGINFLAGS = $(makesymbolic) $(LE_PLUGIN_LDFLAGS) SUBDIRS = . -if HAVE_FFMPEG - thumbffmpeg=libextractor_thumbnailffmpeg.la -endif - -if HAVE_LIBRPM - rpm=libextractor_rpm.la -endif - -if HAVE_GLIB -if WITH_GSF - ole2=libextractor_ole2.la -endif -if HAVE_GTK - thumbgtk=libextractor_thumbnailgtk.la -endif -endif - -if HAVE_QT - thumbqt=libextractor_thumbnailqt.la - qtflags=-lQtGui -lQtCore -lpthread -else -if HAVE_QT4 - thumbqt=libextractor_thumbnailqt.la - qtflags=-lQtGui4 -lQtCore4 -endif -endif - -if HAVE_QT_SVG - svgflags = -lQtSvg -else -if HAVE_QT_SVG4 - svgflags = -lQtSvg4 -endif -endif - -if HAVE_CXX -if HAVE_EXIV2 - exiv2=libextractor_exiv2.la -endif -if HAVE_POPPLER - pdf=libextractor_pdf.la -endif -endif - -if HAVE_MPEG2 - mpeg = libextractor_mpeg.la -endif - -if HAVE_VORBISFILE - ogg = libextractor_ogg.la -endif - -if HAVE_FLAC - flac = libextractor_flac.la -endif - -if NEED_VORBIS - vorbisflag = -lvorbis -endif - -if NEED_OGG - flacoggflag = -logg -endif - plugin_LTLIBRARIES = \ - libextractor_applefile.la \ - libextractor_asf.la \ - libextractor_deb.la \ - libextractor_dvi.la \ - libextractor_elf.la \ - $(exiv2) \ - $(flac) \ - libextractor_flv.la \ - libextractor_gif.la \ - libextractor_html.la \ libextractor_id3.la \ libextractor_id3v2.la \ - libextractor_id3v23.la \ - libextractor_id3v24.la \ - libextractor_it.la \ - libextractor_jpeg.la \ - libextractor_man.la \ - libextractor_mime.la \ - libextractor_mkv.la \ - libextractor_mp3.la \ - $(mpeg) \ - libextractor_nsf.la \ - libextractor_nsfe.la \ - libextractor_odf.la \ - $(ogg) \ - $(ole2) \ - $(pdf) \ - libextractor_png.la \ - libextractor_ps.la \ - libextractor_qt.la \ - libextractor_real.la \ - libextractor_riff.la \ - $(rpm) \ - libextractor_s3m.la \ - libextractor_sid.la \ - libextractor_tar.la \ - $(thumbgtk) \ - $(thumbqt) \ - $(thumbffmpeg) \ - libextractor_tiff.la \ - libextractor_wav.la \ - libextractor_xm.la \ - libextractor_zip.la + libextractor_mp3.la -libextractor_applefile_la_SOURCES = \ - applefile_extractor.c -libextractor_applefile_la_LDFLAGS = \ +libextractor_mp3_la_SOURCES = \ + mp3_extractor.c +libextractor_mp3_la_LDFLAGS = \ $(PLUGINFLAGS) -libextractor_applefile_la_LIBADD = \ - $(top_builddir)/src/common/libextractor_common.la \ - $(LE_LIBINTL) - -libextractor_asf_la_SOURCES = \ - asf_extractor.c -libextractor_asf_la_LDFLAGS = \ +libextractor_mp3_la_LIBADD = \ $(top_builddir)/src/common/libextractor_common.la \ - $(PLUGINFLAGS) - -libextractor_deb_la_SOURCES = \ - deb_extractor.c -libextractor_deb_la_LDFLAGS = \ - $(PLUGINFLAGS) -libextractor_deb_la_LIBADD = \ - -lz - -libextractor_dvi_la_SOURCES = \ - dvi_extractor.c -libextractor_dvi_la_LDFLAGS = \ - $(PLUGINFLAGS) - -libextractor_elf_la_SOURCES = \ - elf_extractor.c -libextractor_elf_la_LDFLAGS = \ - $(PLUGINFLAGS) -libextractor_elf_la_LIBADD = \ - $(top_builddir)/src/common/libextractor_common.la - -libextractor_exiv2_la_SOURCES = \ - exiv2_extractor.cc -libextractor_exiv2_la_LDFLAGS = \ - $(XTRA_CPPLIBS) $(PLUGINFLAGS) -libextractor_exiv2_la_LIBADD = \ - -lexiv2 - -libextractor_flac_la_SOURCES = \ - flac_extractor.c -libextractor_flac_la_LDFLAGS = \ - $(PLUGINFLAGS) -libextractor_flac_la_LIBADD = \ - -lFLAC $(flacoggflag) \ $(LE_LIBINTL) -libextractor_flv_la_SOURCES = \ - flv_extractor.c -libextractor_flv_la_LDFLAGS = \ +libextractor_ebml_la_SOURCES = \ + ebml_extractor.c +libextractor_ebml_la_LDFLAGS = \ $(PLUGINFLAGS) -libextractor_flv_la_LIBADD = \ - $(top_builddir)/src/common/libextractor_common.la - -libextractor_gif_la_SOURCES = \ - gif_extractor.c -libextractor_gif_la_LDFLAGS = \ - $(PLUGINFLAGS) -libextractor_gif_la_LIBADD = \ - $(top_builddir)/src/common/libextractor_common.la - -libextractor_html_la_SOURCES = \ - html_extractor.c -libextractor_html_la_LDFLAGS = \ - $(PLUGINFLAGS) -libextractor_html_la_LIBADD = \ - $(top_builddir)/src/common/libextractor_common.la libextractor_id3_la_SOURCES = \ id3_extractor.c @@ -204,211 +44,4 @@ libextractor_id3v2_la_LDFLAGS = \ libextractor_id3v2_la_LIBADD = \ $(top_builddir)/src/common/libextractor_common.la -libextractor_id3v23_la_SOURCES = \ - id3v23_extractor.c -libextractor_id3v23_la_LDFLAGS = \ - $(PLUGINFLAGS) -libextractor_id3v23_la_LIBADD = \ - $(top_builddir)/src/common/libextractor_common.la - -libextractor_id3v24_la_SOURCES = \ - id3v24_extractor.c -libextractor_id3v24_la_LDFLAGS = \ - $(PLUGINFLAGS) -libextractor_id3v24_la_LIBADD = \ - $(top_builddir)/src/common/libextractor_common.la - -libextractor_it_la_SOURCES = \ - it_extractor.c -libextractor_it_la_LDFLAGS = \ - $(PLUGINFLAGS) - -libextractor_jpeg_la_SOURCES = \ - jpeg_extractor.c -libextractor_jpeg_la_LDFLAGS = \ - $(PLUGINFLAGS) -libextractor_jpeg_la_LIBADD = \ - $(LE_LIBINTL) - -libextractor_man_la_SOURCES = \ - man_extractor.c -libextractor_man_la_LDFLAGS = \ - $(PLUGINFLAGS) -libextractor_man_la_LIBADD = \ - $(LE_LIBINTL) - -libextractor_mime_la_SOURCES = \ - mime_extractor.c -libextractor_mime_la_LDFLAGS = \ - $(PLUGINFLAGS) - -libextractor_mkv_la_SOURCES = \ - mkv_extractor.c -libextractor_mkv_la_LDFLAGS = \ - $(PLUGINFLAGS) - -libextractor_mp3_la_SOURCES = \ - mp3_extractor.c -libextractor_mp3_la_LDFLAGS = \ - $(PLUGINFLAGS) -libextractor_mp3_la_LIBADD = \ - $(top_builddir)/src/common/libextractor_common.la \ - $(LE_LIBINTL) - -libextractor_mpeg_la_SOURCES = \ - mpeg_extractor.c -libextractor_mpeg_la_LDFLAGS = \ - $(PLUGINFLAGS) -libextractor_mpeg_la_LIBADD = \ - -lmpeg2 - -libextractor_nsf_la_SOURCES = \ - nsf_extractor.c -libextractor_nsf_la_LDFLAGS = \ - $(PLUGINFLAGS) - -libextractor_nsfe_la_SOURCES = \ - nsfe_extractor.c -libextractor_nsfe_la_LDFLAGS = \ - $(PLUGINFLAGS) - -libextractor_odf_la_SOURCES = \ - odf_extractor.c -libextractor_odf_la_LDFLAGS = \ - $(PLUGINFLAGS) -libextractor_odf_la_LIBADD = \ - $(top_builddir)/src/common/libextractor_common.la \ - -lz - -libextractor_ogg_la_SOURCES = \ - ogg_extractor.c -libextractor_ogg_la_LDFLAGS = \ - $(PLUGINFLAGS) -libextractor_ogg_la_LIBADD = \ - -lvorbisfile $(vorbisflag) -logg - -libextractor_ole2_la_SOURCES = \ - ole2_extractor.c -libextractor_ole2_la_CFLAGS = \ - $(GSF_CFLAGS) -libextractor_ole2_la_LIBADD = \ - $(LIBADD) $(GSF_LIBS) \ - $(top_builddir)/src/common/libextractor_common.la -libextractor_ole2_la_LDFLAGS = \ - $(PLUGINFLAGS) - -libextractor_pdf_la_SOURCES = \ - pdf_extractor.cc -libextractor_pdf_la_LDFLAGS = \ - $(XTRA_CPPLIBS) $(PLUGINFLAGS) -libextractor_pdf_la_LIBADD = \ - $(top_builddir)/src/common/libextractor_common.la \ - -lpoppler - -libextractor_png_la_SOURCES = \ - png_extractor.c -libextractor_png_la_LDFLAGS = \ - $(PLUGINFLAGS) -libextractor_png_la_LIBADD = \ - $(top_builddir)/src/common/libextractor_common.la \ - -lz - -libextractor_ps_la_SOURCES = \ - ps_extractor.c -libextractor_ps_la_LDFLAGS = \ - $(PLUGINFLAGS) - -libextractor_qt_la_SOURCES = \ - qt_extractor.c -libextractor_qt_la_LDFLAGS = \ - $(PLUGINFLAGS) -libextractor_qt_la_LIBADD = \ - -lz -lm - -libextractor_real_la_SOURCES = \ - real_extractor.c -libextractor_real_la_LDFLAGS = \ - $(PLUGINFLAGS) - -libextractor_riff_la_SOURCES = \ - riff_extractor.c -libextractor_riff_la_LDFLAGS = \ - $(PLUGINFLAGS) -libextractor_riff_la_LIBADD = \ - $(LE_LIBINTL) \ - -lm - -libextractor_rpm_la_SOURCES = \ - rpm_extractor.c -libextractor_rpm_la_LDFLAGS = \ - $(PLUGINFLAGS) -libextractor_rpm_la_LIBADD = \ - -lrpm - -libextractor_s3m_la_SOURCES = \ - s3m_extractor.c -libextractor_s3m_la_LDFLAGS = \ - $(PLUGINFLAGS) - -libextractor_sid_la_SOURCES = \ - sid_extractor.c -libextractor_sid_la_LDFLAGS = \ - $(PLUGINFLAGS) - -libextractor_tar_la_SOURCES = \ - tar_extractor.c -libextractor_tar_la_LDFLAGS = \ - $(PLUGINFLAGS) - -libextractor_thumbnailffmpeg_la_SOURCES = \ - thumbnailffmpeg_extractor.c -libextractor_thumbnailffmpeg_la_LIBADD = \ - -lavformat -lavcodec -lswscale -lavutil -lz -lbz2 -libextractor_thumbnailffmpeg_la_LDFLAGS = \ - $(PLUGINFLAGS) - -libextractor_thumbnailgtk_la_CFLAGS = \ - $(GLIB_CFLAGS) $(GTK_CFLAGS) -libextractor_thumbnailgtk_la_LIBADD = \ - $(LIBADD) -lgobject-2.0 @GTK_LIBS@ -libextractor_thumbnailgtk_la_LDFLAGS = \ - $(PLUGINFLAGS) -libextractor_thumbnailgtk_la_SOURCES = \ - thumbnailgtk_extractor.c - -libextractor_thumbnailqt_la_SOURCES = \ - thumbnailqt_extractor.cc -libextractor_thumbnailqt_la_LDFLAGS = \ - $(QT_LDFLAGS) \ - $(PLUGINFLAGS) -libextractor_thumbnailqt_la_LIBADD = \ - $(qtflags) $(svgflags) -libextractor_thumbnailqt_la_CPPFLAGS = \ - $(QT_CPPFLAGS) \ - $(QT_CFLAGS) $(QT_SVG_CFLAGS) - -libextractor_tiff_la_SOURCES = \ - tiff_extractor.c -libextractor_tiff_la_LDFLAGS = \ - $(PLUGINFLAGS) -libextractor_tiff_la_LIBADD = \ - $(top_builddir)/src/common/libextractor_common.la - -libextractor_wav_la_SOURCES = \ - wav_extractor.c -libextractor_wav_la_LDFLAGS = \ - $(PLUGINFLAGS) -libextractor_wav_la_LIBADD = \ - $(LE_LIBINTL) - -libextractor_xm_la_SOURCES = \ - xm_extractor.c -libextractor_xm_la_LDFLAGS = \ - $(PLUGINFLAGS) - -libextractor_zip_la_SOURCES = \ - zip_extractor.c -libextractor_zip_la_LDFLAGS = \ - $(PLUGINFLAGS) - EXTRA_DIST = template_extractor.c diff --git a/src/plugins/id3_extractor.c b/src/plugins/id3_extractor.c @@ -29,6 +29,8 @@ #include <unistd.h> #include <stdlib.h> +#include "extractor_plugins.h" + typedef struct { char *title; @@ -199,6 +201,46 @@ static const char *const genre_names[] = { #define OK 0 #define INVALID_ID3 1 +struct id3_state +{ + int state; + id3tag info; +}; + +enum ID3State +{ + ID3_INVALID = -1, + ID3_SEEKING_TO_TAIL = 0, + ID3_READING_TAIL = 1 +}; + +void +EXTRACTOR_id3_init_state_method (struct EXTRACTOR_PluginList *plugin) +{ + struct id3_state *state; + state = plugin->state = malloc (sizeof (struct id3_state)); + if (state == NULL) + return; + memset (state, 0, sizeof (struct id3_state)); + state->state = ID3_SEEKING_TO_TAIL; +} + +void +EXTRACTOR_id3_discard_state_method (struct EXTRACTOR_PluginList *plugin) +{ + struct id3_state *state = plugin->state; + if (state != NULL) + { + if (state->info.title != NULL) free (state->info.title); + if (state->info.year != NULL) free (state->info.year); + if (state->info.album != NULL) free (state->info.album); + if (state->info.artist != NULL) free (state->info.artist); + if (state->info.comment != NULL) free (state->info.comment); + free (state); + } + plugin->state = NULL; +} + static void trim (char *k) { @@ -209,14 +251,14 @@ trim (char *k) } static int -get_id3 (const char *data, size_t size, id3tag * id3) +get_id3 (const char *data, int64_t offset, int64_t size, id3tag *id3) { const char *pos; if (size < 128) return INVALID_ID3; - pos = &data[size - 128]; + pos = &data[offset]; if (0 != strncmp ("TAG", pos, 3)) return INVALID_ID3; pos += 3; @@ -253,49 +295,82 @@ get_id3 (const char *data, size_t size, id3tag * id3) } -#define ADD(s,t) do { if ( (s != NULL) && (strlen(s) > 0) && (0 != (ret = proc (proc_cls, "id3", t, EXTRACTOR_METAFORMAT_UTF8, "text/plain", s, strlen(s)+1)))) goto FINISH; } while (0) +#define ADD(s,t) do { if ( (s != NULL) && (strlen(s) > 0) && (0 != proc (proc_cls, "id3", t, EXTRACTOR_METAFORMAT_UTF8, "text/plain", s, strlen(s)+1))) return 1; } while (0) -const char * -EXTRACTOR_id3_options () +int +EXTRACTOR_id3_extract_method (struct EXTRACTOR_PluginList *plugin, + EXTRACTOR_MetaDataProcessor proc, void *proc_cls) { - return "want-tail"; -} + int64_t file_position; + int64_t file_size; + int64_t offset = 0; + int64_t size; + struct id3_state *state; + char *data; + + char track[16]; + if (plugin == NULL || plugin->state == NULL) + return 1; -int -EXTRACTOR_id3_extract (const char *data, - size_t size, - EXTRACTOR_MetaDataProcessor proc, - void *proc_cls, - const char *options) -{ - id3tag info; - char track[16]; - int ret; + state = plugin->state; + file_position = plugin->position; + file_size = plugin->fsize; + size = plugin->map_size; + data = (char *) plugin->shm_ptr; + + if (plugin->seek_request < 0) + return 1; + if (file_position - plugin->seek_request > 0) + { + plugin->seek_request = -1; + return 1; + } + if (plugin->seek_request - file_position < size) + offset = plugin->seek_request - file_position; - ret = 0; - if (OK != get_id3 (data, size, &info)) - return 0; - ADD (info.title, EXTRACTOR_METATYPE_TITLE); - ADD (info.artist, EXTRACTOR_METATYPE_ARTIST); - ADD (info.album, EXTRACTOR_METATYPE_ALBUM); - ADD (info.year, EXTRACTOR_METATYPE_PUBLICATION_YEAR); - ADD (info.genre, EXTRACTOR_METATYPE_GENRE); - ADD (info.comment, EXTRACTOR_METATYPE_COMMENT); - if (info.track_number != 0) + while (1) + { + switch (state->state) { - snprintf(track, - sizeof(track), "%u", info.track_number); - ADD (track, EXTRACTOR_METATYPE_TRACK_NUMBER); + case ID3_INVALID: + plugin->seek_request = -1; + return 1; + case ID3_SEEKING_TO_TAIL: + offset = file_size - 128 - file_position; + if (offset > size) + { + state->state = ID3_READING_TAIL; + plugin->seek_request = file_position + offset; + return 0; + } + else if (offset < 0) + { + state->state = ID3_INVALID; + break; + } + state->state = ID3_READING_TAIL; + break; + case ID3_READING_TAIL: + if (OK != get_id3 (data, offset, size - offset, &state->info)) + return 1; + ADD (state->info.title, EXTRACTOR_METATYPE_TITLE); + ADD (state->info.artist, EXTRACTOR_METATYPE_ARTIST); + ADD (state->info.album, EXTRACTOR_METATYPE_ALBUM); + ADD (state->info.year, EXTRACTOR_METATYPE_PUBLICATION_YEAR); + ADD (state->info.genre, EXTRACTOR_METATYPE_GENRE); + ADD (state->info.comment, EXTRACTOR_METATYPE_COMMENT); + if (state->info.track_number != 0) + { + snprintf(track, + sizeof(track), "%u", state->info.track_number); + ADD (track, EXTRACTOR_METATYPE_TRACK_NUMBER); + } + state->state = ID3_INVALID; } -FINISH: - if (info.title != NULL) free (info.title); - if (info.year != NULL) free (info.year); - if (info.album != NULL) free (info.album); - if (info.artist != NULL) free (info.artist); - if (info.comment != NULL) free (info.comment); - return ret; + } + return 1; } /* end of id3_extractor.c */ diff --git a/src/plugins/id3v23_extractor.c b/src/plugins/id3v23_extractor.c @@ -1,420 +0,0 @@ -/* - This file is part of libextractor. - (C) 2002, 2003, 2004, 2006, 2007, 2009 Vidyut Samanta and Christian Grothoff - - libextractor is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 2, or (at your - option) any later version. - - libextractor is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with libextractor; see the file COPYING. If not, write to the - Free Software Foundation, Inc., 59 Temple Place - Suite 330, - Boston, MA 02111-1307, USA. - - */ -#define DEBUG_EXTRACT_ID3v23 0 - -#include "platform.h" -#include "extractor.h" -#include <string.h> -#include <stdio.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <unistd.h> -#include <stdlib.h> -#include <fcntl.h> -#ifndef MINGW -#include <sys/mman.h> -#endif - -#include "convert.h" - -enum Id3v23Fmt - { - T, /* simple, 0-terminated string, prefixed by encoding */ - U, /* 0-terminated ASCII string, no encoding */ - UL, /* unsync'ed lyrics */ - SL, /* sync'ed lyrics */ - L, /* string with language prefix */ - I /* image */ - }; - -typedef struct -{ - const char *text; - enum EXTRACTOR_MetaType type; - enum Id3v23Fmt fmt; -} Matches; - -static Matches tmap[] = { - {"TALB", EXTRACTOR_METATYPE_ALBUM, T}, - {"TBPM", EXTRACTOR_METATYPE_BEATS_PER_MINUTE, T}, - {"TCOM", EXTRACTOR_METATYPE_COMPOSER, T}, - {"TCON", EXTRACTOR_METATYPE_SONG_VERSION, T}, - {"TCOP", EXTRACTOR_METATYPE_COPYRIGHT, T}, - /* {"TDAT", EXTRACTOR_METATYPE_CREATION_DATE, T}, */ - /* TDLY */ - {"TENC", EXTRACTOR_METATYPE_ENCODED_BY, T}, - {"TEXT", EXTRACTOR_METATYPE_WRITER, T}, - {"TFLT", EXTRACTOR_METATYPE_FORMAT_VERSION, T}, - /* TIME */ - {"TIT1", EXTRACTOR_METATYPE_SECTION, T}, - {"TIT2", EXTRACTOR_METATYPE_TITLE, T}, - {"TIT3", EXTRACTOR_METATYPE_SONG_VERSION, T}, - /* TKEY */ - {"TLAN", EXTRACTOR_METATYPE_LANGUAGE, T}, - {"TLEN", EXTRACTOR_METATYPE_DURATION, T}, /* FIXME: should append 'ms' as unit */ - {"TMED", EXTRACTOR_METATYPE_SOURCE, T}, - {"TOAL", EXTRACTOR_METATYPE_ORIGINAL_TITLE, T}, - {"TOFN", EXTRACTOR_METATYPE_ORIGINAL_ARTIST, T}, - {"TOLY", EXTRACTOR_METATYPE_ORIGINAL_WRITER, T}, - {"TOPE", EXTRACTOR_METATYPE_ORIGINAL_PERFORMER, T}, - {"TORY", EXTRACTOR_METATYPE_ORIGINAL_RELEASE_YEAR, T}, - {"TOWN", EXTRACTOR_METATYPE_LICENSEE, T}, - {"TPE1", EXTRACTOR_METATYPE_ARTIST, T}, - {"TPE2", EXTRACTOR_METATYPE_PERFORMER, T}, - {"TPE3", EXTRACTOR_METATYPE_CONDUCTOR, T}, - {"TPE4", EXTRACTOR_METATYPE_INTERPRETATION, T}, - {"TPOS", EXTRACTOR_METATYPE_DISC_NUMBER, T}, - {"TPUB", EXTRACTOR_METATYPE_PUBLISHER, T}, - {"TRCK", EXTRACTOR_METATYPE_TRACK_NUMBER, T}, - /* TRDA */ - {"TRSN", EXTRACTOR_METATYPE_NETWORK_NAME, T}, - /* TRSO */ - {"TSIZ", EXTRACTOR_METATYPE_EMBEDDED_FILE_SIZE, T}, - {"TSRC", EXTRACTOR_METATYPE_ISRC, T}, - /* TSSE */ - {"TYER", EXTRACTOR_METATYPE_PUBLICATION_YEAR, T}, - {"WCOM", EXTRACTOR_METATYPE_URL, U}, - {"WCOP", EXTRACTOR_METATYPE_URL, U}, - {"WOAF", EXTRACTOR_METATYPE_URL, U}, - {"WOAS", EXTRACTOR_METATYPE_URL, U}, - {"WORS", EXTRACTOR_METATYPE_URL, U}, - {"WPAY", EXTRACTOR_METATYPE_URL, U}, - {"WPUB", EXTRACTOR_METATYPE_URL, U}, - {"WXXX", EXTRACTOR_METATYPE_URL, T}, - {"IPLS", EXTRACTOR_METATYPE_CONTRIBUTOR_NAME, T}, - /* ... */ - {"USLT", EXTRACTOR_METATYPE_LYRICS, UL }, - {"SYLT", EXTRACTOR_METATYPE_LYRICS, SL }, - {"COMM", EXTRACTOR_METATYPE_COMMENT, L}, - /* ... */ - {"APIC", EXTRACTOR_METATYPE_PICTURE, I}, - /* ... */ - {"LINK", EXTRACTOR_METATYPE_URL, U}, - /* ... */ - {"USER", EXTRACTOR_METATYPE_LICENSE, T}, - /* ... */ - {NULL, 0, T} -}; - - -/* mimetype = audio/mpeg */ -int -EXTRACTOR_id3v23_extract (const unsigned char *data, - size_t size, - EXTRACTOR_MetaDataProcessor proc, - void *proc_cls, - const char *options) -{ - int unsync; - int extendedHdr; - int experimental; - uint32_t tsize; - uint32_t pos; - uint32_t ehdrSize; - uint32_t padding; - uint32_t csize; - int i; - uint16_t flags; - char *mime; - enum EXTRACTOR_MetaType type; - size_t off; - int obo; - - if ((size < 16) || - (data[0] != 0x49) || - (data[1] != 0x44) || - (data[2] != 0x33) || (data[3] != 0x03) || (data[4] != 0x00)) - return 0; - unsync = (data[5] & 0x80) > 0; - if (unsync) - return 0; /* not supported */ - extendedHdr = (data[5] & 0x40) > 0; - experimental = (data[5] & 0x20) > 0; - if (experimental) - return 0; - tsize = (((data[6] & 0x7F) << 21) | - ((data[7] & 0x7F) << 14) | - ((data[8] & 0x7F) << 7) | ((data[9] & 0x7F) << 0)); - if (tsize + 10 > size) - return 0; - pos = 10; - padding = 0; - if (extendedHdr) - { - ehdrSize = (((data[10]) << 24) | - ((data[11]) << 16) | ((data[12]) << 8) | ((data[12]) << 0)); - - padding = (((data[15]) << 24) | - ((data[16]) << 16) | ((data[17]) << 8) | ((data[18]) << 0)); - pos += 4 + ehdrSize; - if (padding < tsize) - tsize -= padding; - else - return 0; - } - - - while (pos < tsize) - { - if (pos + 10 > tsize) - return 0; - csize = - (data[pos + 4] << 24) + (data[pos + 5] << 16) + (data[pos + 6] << 8) + - data[pos + 7]; - if ((pos + 10 + csize > tsize) || (csize > tsize) || (csize == 0) || - (pos + 10 + csize <= pos + 10) || (pos + 10 <= pos)) - break; - flags = (data[pos + 8] << 8) + data[pos + 9]; - if (((flags & 0x80) > 0) /* compressed, not yet supported */ || - ((flags & 0x40) > 0) /* encrypted, not supported */ ) - { - pos += 10 + csize; - continue; - } - i = 0; - while (tmap[i].text != NULL) - { - if (0 == strncmp (tmap[i].text, (const char *) &data[pos], 4)) - { - char *word; - if ((flags & 0x20) > 0) - { - /* "group" identifier, skip a byte */ - pos++; - csize--; - } - switch (tmap[i].fmt) - { - case T: - /* this byte describes the encoding - try to convert strings to UTF-8 - if it fails, then forget it */ - switch (data[pos + 10]) - { - case 0x00: - word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 11], - csize - 1, "ISO-8859-1"); - break; - case 0x01: - word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 11], - csize - 1, "UCS-2"); - break; - default: - /* bad encoding byte, - try to convert from iso-8859-1 */ - word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 11], - csize - 1, "ISO-8859-1"); - break; - } - break; - case U: - word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 10], - csize, "ISO-8859-1"); - break; - case UL: - if (csize < 6) - return 0; /* malformed */ - /* find end of description */ - off = 14; - while ( (off < size) && - (off - pos < csize) && - (data[pos + off] == '\0') ) - off++; - if ( (off >= csize) || - (data[pos+off] != '\0') ) - return 0; /* malformed */ - off++; - switch (data[pos + 10]) - { - case 0x00: - word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + off], - csize - off, "ISO-8859-1"); - break; - case 0x01: - word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + off], - csize - off, "UCS-2"); - break; - default: - /* bad encoding byte, - try to convert from iso-8859-1 */ - word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + off], - csize - off, "ISO-8859-1"); - break; - } - break; - case SL: - if (csize < 7) - return 0; /* malformed */ - /* find end of description */ - switch (data[pos + 10]) - { - case 0x00: - word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 16], - csize - 6, "ISO-8859-1"); - break; - case 0x01: - word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 16], - csize - 6, "UCS-2"); - break; - default: - /* bad encoding byte, - try to convert from iso-8859-1 */ - word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 16], - csize - 6, "ISO-8859-1"); - break; - } - break; - case L: - if (csize < 5) - return 0; /* malformed */ - /* find end of description */ - obo = data[pos + 14] == '\0' ? 1 : 0; /* someone put a \0 in front of comments... */ - if (csize < 6) - obo = 0; - switch (data[pos + 10]) - { - case 0x00: - word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 14 + obo], - csize - 4 - obo, "ISO-8859-1"); - break; - case 0x01: - word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 14 + obo], - csize - 4 - obo, "UCS-2"); - break; - default: - /* bad encoding byte, - try to convert from iso-8859-1 */ - word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 14 + obo], - csize - 4 - obo, "ISO-8859-1"); - break; - } - break; - case I: - if (csize < 2) - return 0; /* malformed */ - /* find end of mime type */ - off = 11; - while ( (off < size) && - (off - pos < csize) && - (data[pos + off] == '\0') ) - off++; - if ( (off >= csize) || - (data[pos+off] != '\0') ) - return 0; /* malformed */ - off++; - mime = strdup ((const char*) &data[pos + 11]); - - switch (data[pos+off]) - { - case 0x03: - case 0x04: - type = EXTRACTOR_METATYPE_COVER_PICTURE; - break; - case 0x07: - case 0x08: - case 0x09: - case 0x0A: - case 0x0B: - case 0x0C: - type = EXTRACTOR_METATYPE_CONTRIBUTOR_PICTURE; - break; - case 0x0D: - case 0x0E: - case 0x0F: - type = EXTRACTOR_METATYPE_EVENT_PICTURE; - break; - case 0x14: - type = EXTRACTOR_METATYPE_LOGO; - type = EXTRACTOR_METATYPE_LOGO; - break; - default: - type = EXTRACTOR_METATYPE_PICTURE; - break; - } - off++; - - /* find end of description */ - while ( (off < size) && - (off - pos < csize) && - (data[pos + off] == '\0') ) - off++; - if ( (off >= csize) || - (data[pos+off] != '\0') ) - { - if (mime != NULL) - free (mime); - return 0; /* malformed */ - } - off++; - if ( (mime != NULL) && - (0 == strcasecmp ("-->", - mime)) ) - { - /* not supported */ - } - else - { - if (0 != proc (proc_cls, - "id3v23", - type, - EXTRACTOR_METAFORMAT_BINARY, - mime, - (const char*) &data[pos + off], - csize + 6 - off)) - { - if (mime != NULL) - free (mime); - return 1; - } - } - if (mime != NULL) - free (mime); - word = NULL; - break; - default: - return 0; - } - if ((word != NULL) && (strlen (word) > 0)) - { - if (0 != proc (proc_cls, - "id3v23", - tmap[i].type, - EXTRACTOR_METAFORMAT_UTF8, - "text/plain", - word, - strlen(word)+1)) - { - free (word); - return 1; - } - } - if (word != NULL) - free (word); - break; - } - i++; - } - pos += 10 + csize; - } - return 0; -} - -/* end of id3v23_extractor.c */ diff --git a/src/plugins/id3v24_extractor.c b/src/plugins/id3v24_extractor.c @@ -1,455 +0,0 @@ -/* - This file is part of libextractor. - (C) 2002, 2003, 2004, 2006, 2007, 2009 Vidyut Samanta and Christian Grothoff - - libextractor is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 2, or (at your - option) any later version. - - libextractor is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with libextractor; see the file COPYING. If not, write to the - Free Software Foundation, Inc., 59 Temple Place - Suite 330, - Boston, MA 02111-1307, USA. - - */ -#define DEBUG_EXTRACT_ID3v24 0 - -#include "platform.h" -#include "extractor.h" -#include <string.h> -#include <stdio.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <unistd.h> -#include <stdlib.h> -#include <fcntl.h> -#ifndef MINGW -#include <sys/mman.h> -#endif - -#include "convert.h" - -enum Id3v24Fmt - { - T, /* simple, 0-terminated string, prefixed by encoding */ - U, /* 0-terminated ASCII string, no encoding */ - UL, /* unsync'ed lyrics */ - SL, /* sync'ed lyrics */ - L, /* string with language prefix */ - I /* image */ - }; - -typedef struct -{ - const char *text; - enum EXTRACTOR_MetaType type; - enum Id3v24Fmt fmt; -} Matches; - -static Matches tmap[] = { - {"TALB", EXTRACTOR_METATYPE_ALBUM, T}, - {"TBPM", EXTRACTOR_METATYPE_BEATS_PER_MINUTE, T}, - {"TCOM", EXTRACTOR_METATYPE_COMPOSER, T}, - {"TCON", EXTRACTOR_METATYPE_SONG_VERSION, T}, - {"TCOP", EXTRACTOR_METATYPE_COPYRIGHT, T}, - /* {"TDAT", EXTRACTOR_METATYPE_CREATION_DATE, T}, deprecated in 24 */ - /* TDLY */ - {"TENC", EXTRACTOR_METATYPE_ENCODED_BY, T}, - {"TEXT", EXTRACTOR_METATYPE_WRITER, T}, - {"TFLT", EXTRACTOR_METATYPE_FORMAT_VERSION, T}, - /* TIME, deprecated in 24 */ - {"TIT1", EXTRACTOR_METATYPE_SECTION, T}, - {"TIT2", EXTRACTOR_METATYPE_TITLE, T}, - {"TIT3", EXTRACTOR_METATYPE_SONG_VERSION, T}, - /* TKEY */ - {"TLAN", EXTRACTOR_METATYPE_LANGUAGE, T}, - {"TLEN", EXTRACTOR_METATYPE_DURATION, T}, /* FIXME: should append 'ms' as unit */ - {"TMED", EXTRACTOR_METATYPE_SOURCE, T}, - {"TOAL", EXTRACTOR_METATYPE_ORIGINAL_TITLE, T}, - {"TOFN", EXTRACTOR_METATYPE_ORIGINAL_ARTIST, T}, - {"TOLY", EXTRACTOR_METATYPE_ORIGINAL_WRITER, T}, - {"TOPE", EXTRACTOR_METATYPE_ORIGINAL_PERFORMER, T}, - /* {"TORY", EXTRACTOR_METATYPE_ORIGINAL_RELEASE_YEAR, T}, deprecated in 24 */ - {"TOWN", EXTRACTOR_METATYPE_LICENSEE, T}, - {"TPE1", EXTRACTOR_METATYPE_ARTIST, T}, - {"TPE2", EXTRACTOR_METATYPE_PERFORMER, T}, - {"TPE3", EXTRACTOR_METATYPE_CONDUCTOR, T}, - {"TPE4", EXTRACTOR_METATYPE_INTERPRETATION, T}, - {"TPOS", EXTRACTOR_METATYPE_DISC_NUMBER, T}, - {"TPUB", EXTRACTOR_METATYPE_PUBLISHER, T}, - {"TRCK", EXTRACTOR_METATYPE_TRACK_NUMBER, T}, - /* TRDA, deprecated in 24 */ - {"TRSN", EXTRACTOR_METATYPE_NETWORK_NAME, T}, - /* TRSO */ - /* {"TSIZ", EXTRACTOR_METATYPE_EMBEDDED_FILE_SIZE, T}, deprecated in 24 */ - {"TSRC", EXTRACTOR_METATYPE_ISRC, T}, - /* TSSE */ - /* {"TYER", EXTRACTOR_METATYPE_PUBLICATION_YEAR, T}, deprecated in 24 */ - {"WCOM", EXTRACTOR_METATYPE_URL, U}, - {"WCOP", EXTRACTOR_METATYPE_URL, U}, - {"WOAF", EXTRACTOR_METATYPE_URL, U}, - {"WOAS", EXTRACTOR_METATYPE_URL, U}, - {"WORS", EXTRACTOR_METATYPE_URL, U}, - {"WPAY", EXTRACTOR_METATYPE_URL, U}, - {"WPUB", EXTRACTOR_METATYPE_URL, U}, - {"WXXX", EXTRACTOR_METATYPE_URL, T}, - /* {"IPLS", EXTRACTOR_METATYPE_CONTRIBUTOR_NAME, T}, deprecated in 24 */ - /* ... */ - {"USLT", EXTRACTOR_METATYPE_LYRICS, UL }, - {"SYLT", EXTRACTOR_METATYPE_LYRICS, SL }, - {"COMM", EXTRACTOR_METATYPE_COMMENT, L}, - /* ... */ - {"APIC", EXTRACTOR_METATYPE_PICTURE, I}, - /* ... */ - {"LINK", EXTRACTOR_METATYPE_URL, U}, - /* ... */ - {"USER", EXTRACTOR_METATYPE_LICENSE, T}, - /* ... */ - /* new frames in 24 */ - /* ASPI, EQU2, RVA2, SEEK, SIGN, TDEN */ - {"TDOR", EXTRACTOR_METATYPE_PUBLICATION_DATE, T}, - /* TDRC, TDRL, TDTG */ - {"TIPL", EXTRACTOR_METATYPE_CONTRIBUTOR_NAME, T}, - {"TMCL", EXTRACTOR_METATYPE_MUSICIAN_CREDITS_LIST, T}, - {"TMOO", EXTRACTOR_METATYPE_MOOD, T}, - {"TPRO", EXTRACTOR_METATYPE_COPYRIGHT, T}, - {"TSOA", EXTRACTOR_METATYPE_ALBUM, T}, - {"TSOP", EXTRACTOR_METATYPE_PERFORMER, T}, - {"TSOT", EXTRACTOR_METATYPE_TITLE, T}, - {"TSST", EXTRACTOR_METATYPE_SUBTITLE, T}, - {NULL, 0, T} -}; - - -/* mimetype = audio/mpeg */ -int -EXTRACTOR_id3v24_extract (const unsigned char *data, - size_t size, - EXTRACTOR_MetaDataProcessor proc, - void *proc_cls, - const char *options) -{ - int unsync; - int extendedHdr; - int experimental; - uint32_t tsize; - uint32_t pos; - uint32_t ehdrSize; - uint32_t csize; - int i; - uint16_t flags; - char *mime; - enum EXTRACTOR_MetaType type; - size_t off; - - if ((size < 16) || - (data[0] != 0x49) || - (data[1] != 0x44) || - (data[2] != 0x33) || (data[3] != 0x04) || (data[4] != 0x00)) - return 0; - unsync = (data[5] & 0x80) > 0; - if (unsync) - return 0; /* not supported */ - extendedHdr = (data[5] & 0x40) > 0; - experimental = (data[5] & 0x20) > 0; - if (experimental) - return 0; - /* footer = (data[5] & 0x10) > 0; */ - tsize = (((data[6] & 0x7F) << 21) | - ((data[7] & 0x7F) << 14) | - ((data[8] & 0x7F) << 7) | ((data[9] & 0x7F) << 0)); - if (tsize + 10 > size) - return 0; - pos = 10; - if (extendedHdr) - { - ehdrSize = (((data[10] & 0x7F) << 21) | - ((data[11] & 0x7F) << 14) | - ((data[12] & 0x7F) << 7) | ((data[13] & 0x7F) << 0)); - pos += 4 + ehdrSize; - if (ehdrSize > tsize) - return 0; - } - while (pos < tsize) - { - if (pos + 10 > tsize) - return 0; - csize = - (data[pos + 4] << 24) + (data[pos + 5] << 16) + (data[pos + 6] << 8) + - data[pos + 7]; - if ((pos + 10 + csize > tsize) || (csize > tsize) || (csize == 0) || - (pos + 10 + csize <= pos + 10) || (pos + 10 <= pos)) - break; - flags = (data[pos + 8] << 8) + data[pos + 9]; - if (((flags & 0x08) > 0) /* compressed, not yet supported */ || - ((flags & 0x04) > 0) /* encrypted, not supported */ || - ((flags & 0x02) > 0) /* unsynchronized, not supported */ ) - { - pos += 10 + csize; - continue; - } - i = 0; - while (tmap[i].text != NULL) - { - if (0 == strncmp (tmap[i].text, (const char *) &data[pos], 4)) - { - char *word; - if ((flags & 0x40) > 0) - { - /* "group" identifier, skip a byte */ - pos++; - csize--; - } - - switch (tmap[i].fmt) - { - case T: - /* this byte describes the encoding - try to convert strings to UTF-8 - if it fails, then forget it */ - switch (data[pos + 10]) - { - case 0x00: - word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 11], - csize - 1, "ISO-8859-1"); - break; - case 0x01: - word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 11], - csize - 1, "UTF-16"); - break; - case 0x02: - word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 11], - csize - 1, "UTF-16BE"); - break; - case 0x03: - word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 11], - csize - 1, "UTF-8"); - break; - default: - /* bad encoding byte, - try to convert from iso-8859-1 */ - word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 11], - csize - 1, "ISO-8859-1"); - break; - } - break; - case U: - word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 10], - csize, "ISO-8859-1"); - break; - case UL: - if (csize < 6) - return 0; /* malformed */ - /* find end of description */ - off = 14; - while ( (off < size) && - (off - pos < csize) && - (data[pos + off] == '\0') ) - off++; - if ( (off >= csize) || - (data[pos+off] != '\0') ) - return 0; /* malformed */ - off++; - switch (data[pos + 10]) - { - case 0x00: - word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + off], - csize - off, "ISO-8859-1"); - break; - case 0x01: - word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + off], - csize - off, "UTF-16"); - break; - case 0x02: - word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + off], - csize - off, "UTF-16BE"); - break; - case 0x03: - word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + off], - csize - off, "UTF-8"); - break; - default: - /* bad encoding byte, - try to convert from iso-8859-1 */ - word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + off], - csize - off, "ISO-8859-1"); - break; - } - break; - case SL: - if (csize < 7) - return 0; /* malformed */ - /* find end of description */ - switch (data[pos + 10]) - { - case 0x00: - word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 16], - csize - 6, "ISO-8859-1"); - break; - case 0x01: - word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 16], - csize - 6, "UTF-16"); - break; - case 0x02: - word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 16], - csize - 6, "UTF-16BE"); - break; - case 0x03: - word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 16], - csize - 6, "UTF-8"); - break; - default: - /* bad encoding byte, - try to convert from iso-8859-1 */ - word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 16], - csize - 6, "ISO-8859-1"); - break; - } - break; - case L: - if (csize < 5) - return 0; /* malformed */ - /* find end of description */ - switch (data[pos + 10]) - { - case 0x00: - word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 14], - csize - 4, "ISO-8859-1"); - break; - case 0x01: - word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 14], - csize - 4, "UTF-16"); - break; - case 0x02: - word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 14], - csize - 4, "UTF-16BE"); - break; - case 0x03: - word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 14], - csize - 4, "UTF-8"); - break; - default: - /* bad encoding byte, - try to convert from iso-8859-1 */ - word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 14], - csize - 4, "ISO-8859-1"); - break; - } - break; - case I: - if (csize < 2) - return 0; /* malformed */ - /* find end of mime type */ - off = 11; - while ( (off < size) && - (off - pos < csize) && - (data[pos + off] == '\0') ) - off++; - if ( (off >= csize) || - (data[pos+off] != '\0') ) - return 0; /* malformed */ - off++; - mime = strdup ((const char*) &data[pos + 11]); - - switch (data[pos+off]) - { - case 0x03: - case 0x04: - type = EXTRACTOR_METATYPE_COVER_PICTURE; - break; - case 0x07: - case 0x08: - case 0x09: - case 0x0A: - case 0x0B: - case 0x0C: - type = EXTRACTOR_METATYPE_CONTRIBUTOR_PICTURE; - break; - case 0x0D: - case 0x0E: - case 0x0F: - type = EXTRACTOR_METATYPE_EVENT_PICTURE; - break; - case 0x14: - type = EXTRACTOR_METATYPE_LOGO; - type = EXTRACTOR_METATYPE_LOGO; - break; - default: - type = EXTRACTOR_METATYPE_PICTURE; - break; - } - off++; - - /* find end of description */ - while ( (off < size) && - (off - pos < csize) && - (data[pos + off] == '\0') ) - off++; - if ( (off >= csize) || - (data[pos+off] != '\0') ) - { - if (mime != NULL) - free (mime); - return 0; /* malformed */ - } - off++; - if ( (mime != NULL) && - (0 == strcasecmp ("-->", - mime)) ) - { - /* not supported */ - } - else - { - if (0 != proc (proc_cls, - "id3v24", - type, - EXTRACTOR_METAFORMAT_BINARY, - mime, - (const char*) &data[pos + off], - csize + 6 - off)) - { - if (mime != NULL) - free (mime); - return 1; - } - } - if (mime != NULL) - free (mime); - word = NULL; - break; - default: - return 0; - } - if ((word != NULL) && (strlen (word) > 0)) - { - if (0 != proc (proc_cls, - "id3v24", - tmap[i].type, - EXTRACTOR_METAFORMAT_UTF8, - "text/plain", - word, - strlen(word)+1)) - { - free (word); - return 1; - } - } - if (word != NULL) - free (word); - break; - } - i++; - } - pos += 10 + csize; - } - return 0; -} - -/* end of id3v24_extractor.c */ diff --git a/src/plugins/id3v2_extractor.c b/src/plugins/id3v2_extractor.c @@ -26,6 +26,8 @@ #endif #include "convert.h" +#include "extractor_plugins.h" + #define DEBUG_EXTRACT_ID3v2 0 enum Id3v2Fmt @@ -47,314 +49,723 @@ typedef struct static Matches tmap[] = { /* skipping UFI */ - {"TT1", EXTRACTOR_METATYPE_SECTION, T}, - {"TT2", EXTRACTOR_METATYPE_TITLE, T}, - {"TT3", EXTRACTOR_METATYPE_SONG_VERSION, T}, - {"TP1", EXTRACTOR_METATYPE_ARTIST, T}, - {"TP2", EXTRACTOR_METATYPE_PERFORMER, T}, - {"TP3", EXTRACTOR_METATYPE_CONDUCTOR, T}, - {"TP4", EXTRACTOR_METATYPE_INTERPRETATION, T}, - {"TCM", EXTRACTOR_METATYPE_COMPOSER, T}, - {"TXT", EXTRACTOR_METATYPE_WRITER, T}, - {"TLA", EXTRACTOR_METATYPE_LANGUAGE, T}, - {"TCO", EXTRACTOR_METATYPE_GENRE, T}, - {"TAL", EXTRACTOR_METATYPE_ALBUM, T}, - {"TPA", EXTRACTOR_METATYPE_DISC_NUMBER, T}, - {"TRK", EXTRACTOR_METATYPE_TRACK_NUMBER, T}, - {"TRC", EXTRACTOR_METATYPE_ISRC, T}, - {"TYE", EXTRACTOR_METATYPE_PUBLICATION_YEAR, T}, + {"TT1 ", EXTRACTOR_METATYPE_SECTION, T}, + {"TT2 ", EXTRACTOR_METATYPE_TITLE, T}, + {"TT3 ", EXTRACTOR_METATYPE_SONG_VERSION, T}, + {"TP1 ", EXTRACTOR_METATYPE_ARTIST, T}, + {"TP2 ", EXTRACTOR_METATYPE_PERFORMER, T}, + {"TP3 ", EXTRACTOR_METATYPE_CONDUCTOR, T}, + {"TP4 ", EXTRACTOR_METATYPE_INTERPRETATION, T}, + {"TCM ", EXTRACTOR_METATYPE_COMPOSER, T}, + {"TXT ", EXTRACTOR_METATYPE_WRITER, T}, + {"TLA ", EXTRACTOR_METATYPE_LANGUAGE, T}, + {"TCO ", EXTRACTOR_METATYPE_GENRE, T}, + {"TAL ", EXTRACTOR_METATYPE_ALBUM, T}, + {"TPA ", EXTRACTOR_METATYPE_DISC_NUMBER, T}, + {"TRK ", EXTRACTOR_METATYPE_TRACK_NUMBER, T}, + {"TRC ", EXTRACTOR_METATYPE_ISRC, T}, + {"TYE ", EXTRACTOR_METATYPE_PUBLICATION_YEAR, T}, /* FIXME: these two and TYE should be combined into the actual publication date (if TRD is missing) - {"TDA", EXTRACTOR_METATYPE_PUBLICATION_DATE}, - {"TIM", EXTRACTOR_METATYPE_PUBLICATION_DATE}, + {"TDA ", EXTRACTOR_METATYPE_PUBLICATION_DATE}, + {"TIM ", EXTRACTOR_METATYPE_PUBLICATION_DATE}, */ - {"TRD", EXTRACTOR_METATYPE_CREATION_TIME, T}, - {"TMT", EXTRACTOR_METATYPE_SOURCE, T}, - {"TFT", EXTRACTOR_METATYPE_FORMAT_VERSION, T}, - {"TBP", EXTRACTOR_METATYPE_BEATS_PER_MINUTE, T}, - {"TCR", EXTRACTOR_METATYPE_COPYRIGHT, T}, - {"TPB", EXTRACTOR_METATYPE_PUBLISHER, T}, - {"TEN", EXTRACTOR_METATYPE_ENCODED_BY, T}, - {"TSS", EXTRACTOR_METATYPE_PRODUCED_BY_SOFTWARE, T}, - {"TOF", EXTRACTOR_METATYPE_FILENAME, T}, - {"TLE", EXTRACTOR_METATYPE_DURATION, T}, /* FIXME: should append 'ms' as unit */ - {"TSI", EXTRACTOR_METATYPE_EMBEDDED_FILE_SIZE, T}, + {"TRD ", EXTRACTOR_METATYPE_CREATION_TIME, T}, + {"TMT ", EXTRACTOR_METATYPE_SOURCE, T}, + {"TFT ", EXTRACTOR_METATYPE_FORMAT_VERSION, T}, + {"TBP ", EXTRACTOR_METATYPE_BEATS_PER_MINUTE, T}, + {"TCR ", EXTRACTOR_METATYPE_COPYRIGHT, T}, + {"TPB ", EXTRACTOR_METATYPE_PUBLISHER, T}, + {"TEN ", EXTRACTOR_METATYPE_ENCODED_BY, T}, + {"TSS ", EXTRACTOR_METATYPE_PRODUCED_BY_SOFTWARE, T}, + {"TOF ", EXTRACTOR_METATYPE_FILENAME, T}, + {"TLE ", EXTRACTOR_METATYPE_DURATION, T}, /* FIXME: should append 'ms' as unit */ + {"TSI ", EXTRACTOR_METATYPE_EMBEDDED_FILE_SIZE, T}, /* skipping TDY, TKE */ - {"TOT", EXTRACTOR_METATYPE_ORIGINAL_TITLE, T}, - {"TOA", EXTRACTOR_METATYPE_ORIGINAL_ARTIST, T}, - {"TOL", EXTRACTOR_METATYPE_ORIGINAL_WRITER, T}, - {"TOR", EXTRACTOR_METATYPE_ORIGINAL_RELEASE_YEAR, T}, + {"TOT ", EXTRACTOR_METATYPE_ORIGINAL_TITLE, T}, + {"TOA ", EXTRACTOR_METATYPE_ORIGINAL_ARTIST, T}, + {"TOL ", EXTRACTOR_METATYPE_ORIGINAL_WRITER, T}, + {"TOR ", EXTRACTOR_METATYPE_ORIGINAL_RELEASE_YEAR, T}, /* skipping TXX */ - {"WAF", EXTRACTOR_METATYPE_URL, U}, - {"WAR", EXTRACTOR_METATYPE_URL, U}, - {"WAS", EXTRACTOR_METATYPE_URL, U}, - {"WCM", EXTRACTOR_METATYPE_URL, U}, - {"WCP", EXTRACTOR_METATYPE_RIGHTS, U}, - {"WCB", EXTRACTOR_METATYPE_URL, U}, + {"WAF ", EXTRACTOR_METATYPE_URL, U}, + {"WAR ", EXTRACTOR_METATYPE_URL, U}, + {"WAS ", EXTRACTOR_METATYPE_URL, U}, + {"WCM ", EXTRACTOR_METATYPE_URL, U}, + {"WCP ", EXTRACTOR_METATYPE_RIGHTS, U}, + {"WCB ", EXTRACTOR_METATYPE_URL, U}, /* skipping WXX */ - {"IPL", EXTRACTOR_METATYPE_CONTRIBUTOR_NAME, T}, + {"IPL ", EXTRACTOR_METATYPE_CONTRIBUTOR_NAME, T}, /* skipping MCI */ /* skipping ETC */ /* skipping MLL */ /* skipping STC */ - {"ULT", EXTRACTOR_METATYPE_LYRICS, UL}, - {"SLT", EXTRACTOR_METATYPE_LYRICS, SL}, - {"COM", EXTRACTOR_METATYPE_COMMENT, L}, + {"ULT ", EXTRACTOR_METATYPE_LYRICS, UL}, + {"SLT ", EXTRACTOR_METATYPE_LYRICS, SL}, + {"COM ", EXTRACTOR_METATYPE_COMMENT, L}, /* skipping RVA */ /* skipping EQU */ /* skipping REV */ - {"PIC", EXTRACTOR_METATYPE_PICTURE, I}, + {"PIC ", EXTRACTOR_METATYPE_PICTURE, I}, /* skipping GEN */ - /* {"CNT", EXTRACTOR_METATYPE_PLAY_COUNTER, XXX}, */ - /* {"POP", EXTRACTOR_METATYPE_POPULARITY_METER, XXX}, */ + /* {"CNT ", EXTRACTOR_METATYPE_PLAY_COUNTER, XXX}, */ + /* {"POP ", EXTRACTOR_METATYPE_POPULARITY_METER, XXX}, */ /* skipping BUF */ /* skipping CRM */ /* skipping CRA */ - /* {"LNK", EXTRACTOR_METATYPE_URL, XXX}, */ + /* {"LNK ", EXTRACTOR_METATYPE_URL, XXX}, */ + + + {"TALB", EXTRACTOR_METATYPE_ALBUM, T}, + {"TBPM", EXTRACTOR_METATYPE_BEATS_PER_MINUTE, T}, + {"TCOM", EXTRACTOR_METATYPE_COMPOSER, T}, + {"TCON", EXTRACTOR_METATYPE_SONG_VERSION, T}, + {"TCOP", EXTRACTOR_METATYPE_COPYRIGHT, T}, + {"TDAT", EXTRACTOR_METATYPE_CREATION_DATE, T}, /* idv23 only */ + /* TDLY */ + {"TENC", EXTRACTOR_METATYPE_ENCODED_BY, T}, + {"TEXT", EXTRACTOR_METATYPE_WRITER, T}, + {"TFLT", EXTRACTOR_METATYPE_FORMAT_VERSION, T}, + /* TIME, idv23 only */ + {"TIT1", EXTRACTOR_METATYPE_SECTION, T}, + {"TIT2", EXTRACTOR_METATYPE_TITLE, T}, + {"TIT3", EXTRACTOR_METATYPE_SONG_VERSION, T}, + /* TKEY */ + {"TLAN", EXTRACTOR_METATYPE_LANGUAGE, T}, + {"TLEN", EXTRACTOR_METATYPE_DURATION, T}, /* FIXME: should append 'ms' as unit */ + {"TMED", EXTRACTOR_METATYPE_SOURCE, T}, + {"TOAL", EXTRACTOR_METATYPE_ORIGINAL_TITLE, T}, + {"TOFN", EXTRACTOR_METATYPE_ORIGINAL_ARTIST, T}, + {"TOLY", EXTRACTOR_METATYPE_ORIGINAL_WRITER, T}, + {"TOPE", EXTRACTOR_METATYPE_ORIGINAL_PERFORMER, T}, + {"TORY", EXTRACTOR_METATYPE_ORIGINAL_RELEASE_YEAR, T}, /* idv23 only */ + {"TOWN", EXTRACTOR_METATYPE_LICENSEE, T}, + {"TPE1", EXTRACTOR_METATYPE_ARTIST, T}, + {"TPE2", EXTRACTOR_METATYPE_PERFORMER, T}, + {"TPE3", EXTRACTOR_METATYPE_CONDUCTOR, T}, + {"TPE4", EXTRACTOR_METATYPE_INTERPRETATION, T}, + {"TPOS", EXTRACTOR_METATYPE_DISC_NUMBER, T}, + {"TPUB", EXTRACTOR_METATYPE_PUBLISHER, T}, + {"TRCK", EXTRACTOR_METATYPE_TRACK_NUMBER, T}, + /* TRDA, idv23 only */ + {"TRSN", EXTRACTOR_METATYPE_NETWORK_NAME, T}, + /* TRSO */ + {"TSIZ", EXTRACTOR_METATYPE_EMBEDDED_FILE_SIZE, T}, /* idv23 only */ + {"TSRC", EXTRACTOR_METATYPE_ISRC, T}, + /* TSSE */ + {"TYER", EXTRACTOR_METATYPE_PUBLICATION_YEAR, T}, /* idv23 only */ + {"WCOM", EXTRACTOR_METATYPE_URL, U}, + {"WCOP", EXTRACTOR_METATYPE_URL, U}, + {"WOAF", EXTRACTOR_METATYPE_URL, U}, + {"WOAS", EXTRACTOR_METATYPE_URL, U}, + {"WORS", EXTRACTOR_METATYPE_URL, U}, + {"WPAY", EXTRACTOR_METATYPE_URL, U}, + {"WPUB", EXTRACTOR_METATYPE_URL, U}, + {"WXXX", EXTRACTOR_METATYPE_URL, T}, + {"IPLS", EXTRACTOR_METATYPE_CONTRIBUTOR_NAME, T}, /* idv23 only */ + /* ... */ + {"USLT", EXTRACTOR_METATYPE_LYRICS, UL }, + {"SYLT", EXTRACTOR_METATYPE_LYRICS, SL }, + {"COMM", EXTRACTOR_METATYPE_COMMENT, L}, + /* ... */ + {"APIC", EXTRACTOR_METATYPE_PICTURE, I}, + /* ... */ + {"LINK", EXTRACTOR_METATYPE_URL, U}, + /* ... */ + {"USER", EXTRACTOR_METATYPE_LICENSE, T}, + /* ... */ + + /* new frames in id3v24 */ + /* ASPI, EQU2, RVA2, SEEK, SIGN, TDEN */ + {"TDOR", EXTRACTOR_METATYPE_PUBLICATION_DATE, T}, + /* TDRC, TDRL, TDTG */ + {"TIPL", EXTRACTOR_METATYPE_CONTRIBUTOR_NAME, T}, + {"TMCL", EXTRACTOR_METATYPE_MUSICIAN_CREDITS_LIST, T}, + {"TMOO", EXTRACTOR_METATYPE_MOOD, T}, + {"TPRO", EXTRACTOR_METATYPE_COPYRIGHT, T}, + {"TSOA", EXTRACTOR_METATYPE_ALBUM, T}, + {"TSOP", EXTRACTOR_METATYPE_PERFORMER, T}, + {"TSOT", EXTRACTOR_METATYPE_TITLE, T}, + {"TSST", EXTRACTOR_METATYPE_SUBTITLE, T}, + {NULL, 0, T}, }; - -/* mimetype = audio/mpeg */ -int -EXTRACTOR_id3v2_extract (const unsigned char *data, - size_t size, - EXTRACTOR_MetaDataProcessor proc, - void *proc_cls, - const char *options) +struct id3v2_state { + int state; unsigned int tsize; - unsigned int pos; + size_t csize; + char id[4]; + int32_t ti; + char ver; + char extended_header; + uint16_t frame_flags; + char *mime; +}; + +enum ID3v2State +{ + ID3V2_INVALID = -1, + ID3V2_READING_HEADER = 0, + ID3V2_READING_FRAME_HEADER, + ID3V23_READING_EXTENDED_HEADER, + ID3V24_READING_EXTENDED_HEADER, + ID3V2_READING_FRAME +}; + +void +EXTRACTOR_id3v2_init_state_method (struct EXTRACTOR_PluginList *plugin) +{ + struct id3v2_state *state; + state = plugin->state = malloc (sizeof (struct id3v2_state)); + if (state == NULL) + return; + memset (state, 0, sizeof (struct id3v2_state)); + state->state = ID3V2_READING_HEADER; + state->ti = -1; + state->mime = NULL; +} + +void +EXTRACTOR_id3v2_discard_state_method (struct EXTRACTOR_PluginList *plugin) +{ + struct id3v2_state *state = plugin->state; + if (state != NULL) + { + if (state->mime != NULL) + free (state->mime); + free (state); + } + plugin->state = NULL; +} + +static int +find_type (const char *id, size_t len) +{ + int i; + for (i = 0; tmap[i].text != NULL; i++) + if (0 == strncmp (tmap[i].text, id, len)) + return i; + return -1; +} + +int +EXTRACTOR_id3v2_extract_method (struct EXTRACTOR_PluginList *plugin, + EXTRACTOR_MetaDataProcessor proc, void *proc_cls) +{ + int64_t file_position; + int64_t file_size; + int64_t offset = 0; + int64_t size; + struct id3v2_state *state; + unsigned char *data; + char *word = NULL; unsigned int off; enum EXTRACTOR_MetaType type; - const char *mime; + unsigned char picture_type; - if ((size < 16) || - (data[0] != 0x49) || - (data[1] != 0x44) || - (data[2] != 0x33) || (data[3] != 0x02) || (data[4] != 0x00)) - return 0; - /* unsync: (data[5] & 0x80) > 0; */ - tsize = (((data[6] & 0x7F) << 21) | - ((data[7] & 0x7F) << 14) | - ((data[8] & 0x7F) << 07) | ((data[9] & 0x7F) << 00)); + if (plugin == NULL || plugin->state == NULL) + return 1; - if (tsize + 10 > size) - return 0; - pos = 10; - while (pos < tsize) + state = plugin->state; + file_position = plugin->position; + file_size = plugin->fsize; + size = plugin->map_size; + data = plugin->shm_ptr; + + if (plugin->seek_request < 0) + return 1; + if (file_position - plugin->seek_request > 0) + { + plugin->seek_request = -1; + return 1; + } + if (plugin->seek_request - file_position < size) + offset = plugin->seek_request - file_position; + + while (1) + { + switch (state->state) { - size_t csize; - int i; + case ID3V2_INVALID: + plugin->seek_request = -1; + return 1; + case ID3V2_READING_HEADER: + /* TODO: support id3v24 tags at the end of file. Here's a quote from id3 faq: + * Q: Where is an ID3v2 tag located in an MP3 file? + * A: It is most likely located at the beginning of the file. Look for the + * marker "ID3" in the first 3 bytes of the file. If it's not there, it + * could be at the end of the file (if the tag is ID3v2.4). Look for the + * marker "3DI" 10 bytes from the end of the file, or 10 bytes before the + * beginning of an ID3v1 tag. Finally it is possible to embed ID3v2 tags + * in the actual MPEG stream, on an MPEG frame boundry. Almost nobody does + * this. + * Parsing of such tags will not be completely correct, because we can't + * seek backwards. We will have to seek to file_size - chunk_size instead + * (by the way, chunk size is theoretically unknown, LE is free to use any chunk + * size, even though plugins often make assumptions about chunk size being large + * enough to make one atomic read without seeking, if offset == 0) and search + * for id3v1 at -128 offset, then look if there's a 3DI marker 10 bytes before + * it (or 10 bytes before the end of file, if id3v1 is not there; not sure + * about APETAGs; we should probably just scan byte-by-byte from the end of file, + * until we hit 3DI, or reach the offset == 0), and use it set offset to the + * start of ID3v24 header, adjust the following file_position check and data + * indices (use offset), and otherwise proceed as normal (maybe file size checks + * along the way will have to be adjusted by -1, or made ">" instead of ">="; + * these problems do not arise for tags at the beginning of the file, since + * audio itself is usually at least 1-byte long; when the tag is at the end of + * file, these checks will have to be 100% correct). + * If there are two tags (at the beginning and at the end of the file), + * a SEEK in the one at the beginning of the file can be used to seek to the + * one at the end. + */ + /* TODO: merge id3v1 and id3v2 parsers. There's an "update" flag in id3v2 that + * tells the parser to augument id3v1 values with the values from id3v2 (if this + * flag is not set, id3v2 parser must discard id3v1 data). + * At the moment id3v1 and id3v2 are parsed separately, and update flag is ignored. + */ + if (file_position != 0 || size < 10 || (data[0] != 0x49) || (data[1] != 0x44) || (data[2] != 0x33) || ((data[3] != 0x02) && (data[3] != 0x03) && (data[3] != 0x04))/* || (data[4] != 0x00) minor verisons are backward-compatible*/) + { + state->state = ID3V2_INVALID; + break; + } + state->ver = data[3]; + if (state->ver == 0x02) + { + state->extended_header = 0; + } + else if ((state->ver == 0x03) || (state->ver == 0x04)) + { + if ((data[5] & 0x80) > 0) + { + /* unsync is not supported in id3v23 or id3v24*/ + state->state = ID3V2_INVALID; + break; + } + state->extended_header = (data[5] & 0x40) > 0; + if ((data[5] & 0x20) > 0) + { + /* experimental is not supported in id3v23 or id3v24*/ + state->state = ID3V2_INVALID; + break; + } + } + state->tsize = (((data[6] & 0x7F) << 21) | ((data[7] & 0x7F) << 14) | ((data[8] & 0x7F) << 07) | ((data[9] & 0x7F) << 00)); + if (state->tsize + 10 > file_size) + { + state->state = ID3V2_INVALID; + break; + } + offset = 10; + if (state->ver == 0x03 && state->extended_header) + state->state = ID3V23_READING_EXTENDED_HEADER; + else if (state->ver == 0x04 && state->extended_header) + state->state = ID3V24_READING_EXTENDED_HEADER; + else + state->state = ID3V2_READING_FRAME_HEADER; + break; + case ID3V23_READING_EXTENDED_HEADER: + if (offset + 9 >= size) + { + if (offset == 0) + { + state->state = ID3V2_INVALID; + break; + } + plugin->seek_request = file_position + offset; + return 0; + } + if (state->ver == 0x03 && state->extended_header) + { + uint32_t padding, extended_header_size; + extended_header_size = (((data[offset]) << 24) | ((data[offset + 1]) << 16) | ((data[offset + 2]) << 8) | ((data[offset + 3]) << 0)); + padding = (((data[offset + 6]) << 24) | ((data[offset + 7]) << 16) | ((data[offset + 8]) << 8) | ((data[offset + 9]) << 0)); + if (data[offset + 4] == 0 && data[offset + 5] == 0) + /* Skip the CRC32 byte after extended header */ + offset += 1; + offset += 4 + extended_header_size; + if (padding < state->tsize) + state->tsize -= padding; + else + { + state->state = ID3V2_INVALID; + break; + } + } + break; + case ID3V24_READING_EXTENDED_HEADER: + if (offset + 6 >= size) + { + if (offset == 0) + { + state->state = ID3V2_INVALID; + break; + } + plugin->seek_request = file_position + offset; + return 0; + } + if ( (state->ver == 0x04) && (state->extended_header)) + { + uint32_t extended_header_size; - if (pos + 7 > tsize) + extended_header_size = (((data[offset]) << 24) | + ((data[offset + 1]) << 16) | + ((data[offset + 2]) << 8) | + ((data[offset + 3]) << 0)); + offset += 4 + extended_header_size; + } + break; + case ID3V2_READING_FRAME_HEADER: + if (file_position + offset > state->tsize || + ((state->ver == 0x02) && file_position + offset + 6 >= state->tsize) || + (((state->ver == 0x03) || (state->ver == 0x04))&& file_position + offset + 10 >= state->tsize)) + { + state->state = ID3V2_INVALID; + break; + } + if (((state->ver == 0x02) && (offset + 6 >= size)) || + (((state->ver == 0x03) || (state->ver == 0x04)) && (offset + 10 >= size))) + { + plugin->seek_request = file_position + offset; return 0; - csize = (data[pos + 3] << 16) + (data[pos + 4] << 8) + data[pos + 5]; - if ((pos + 7 + csize > tsize) || (csize > tsize) || (csize == 0)) + } + if (state->ver == 0x02) + { + memcpy (state->id, &data[offset], 3); + state->csize = (data[offset + 3] << 16) + (data[offset + 4] << 8) + data[offset + 5]; + if ((file_position + offset + 6 + state->csize > file_size) || (state->csize > file_size) || (state->csize == 0)) + { + state->state = ID3V2_INVALID; + break; + } + offset += 6; + state->frame_flags = 0; + } + else if ((state->ver == 0x03) || (state->ver == 0x04)) + { + memcpy (state->id, &data[offset], 4); + if (state->ver == 0x03) + state->csize = (data[offset + 4] << 24) + (data[offset + 5] << 16) + (data[offset + 6] << 8) + data[offset + 7]; + else if (state->ver == 0x04) + state->csize = ((data[offset + 4] & 0x7F) << 21) | ((data[offset + 5] & 0x7F) << 14) | ((data[offset + 6] & 0x7F) << 07) | ((data[offset + 7] & 0x7F) << 00); + if ((file_position + offset + 10 + state->csize > file_size) || (state->csize > file_size) || (state->csize == 0)) + { + state->state = ID3V2_INVALID; + break; + } + state->frame_flags = (data[offset + 8] << 8) + data[offset + 9]; + if (state->ver == 0x03) + { + if (((state->frame_flags & 0x80) > 0) /* compressed, not yet supported */ || + ((state->frame_flags & 0x40) > 0) /* encrypted, not supported */) + { + /* Skip to next frame header */ + offset += 10 + state->csize; + break; + } + } + else if (state->ver == 0x04) + { + if (((state->frame_flags & 0x08) > 0) /* compressed, not yet supported */ || + ((state->frame_flags & 0x04) > 0) /* encrypted, not supported */ || + ((state->frame_flags & 0x02) > 0) /* unsynchronization, not supported */) + { + /* Skip to next frame header */ + offset += 10 + state->csize; + break; + } + if ((state->frame_flags & 0x01) > 0) + { + /* Skip data length indicator */ + state->csize -= 4; + offset += 4; + } + } + offset += 10; + } + + state->ti = find_type ((const char *) state->id, (state->ver == 0x02) ? 3 : (((state->ver == 0x03) || (state->ver == 0x04)) ? 4 : 0)); + if (state->ti == -1) + { + offset += state->csize; + break; + } + state->state = ID3V2_READING_FRAME; + break; + case ID3V2_READING_FRAME: + if (offset == 0 && state->csize > size) + { + /* frame size is larger than the size of one data chunk we get at a time */ + offset += state->csize; + state->state = ID3V2_READING_FRAME_HEADER; + break; + } + if (offset + state->csize > size) + { + plugin->seek_request = file_position + offset; + return 0; + } + word = NULL; + if (((state->ver == 0x03) && ((state->frame_flags & 0x20) > 0)) || + ((state->ver == 0x04) && ((state->frame_flags & 0x40) > 0))) + { + /* "group" identifier, skip a byte */ + offset++; + state->csize--; + } + switch (tmap[state->ti].fmt) + { + case T: + if (data[offset] == 0x00) + word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + 1], + state->csize - 1, "ISO-8859-1"); + else if (data[offset] == 0x01) + word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + 1], + state->csize - 1, "UCS-2"); + else if ((state->ver == 0x04) && (data[offset] == 0x02)) + word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + 1], + state->csize - 1, "UTF-16BE"); + else if ((state->ver == 0x04) && (data[offset] == 0x03)) + word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + 1], + state->csize - 1, "UTF-8"); + else + /* bad encoding byte, try to convert from iso-8859-1 */ + word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + 1], + state->csize - 1, "ISO-8859-1"); + break; + case U: + word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset], + state->csize, "ISO-8859-1"); + break; + case UL: + if (state->csize < 6) + { + /* malformed */ + state->state = ID3V2_INVALID; + break; + } + /* find end of description */ + off = 4; + while ((off < size) && (off < offset + state->csize) && (data[offset + off] != '\0')) + off++; + if ((off >= state->csize) || (data[offset + off] != '\0')) + { + /* malformed */ + state->state = ID3V2_INVALID; + break; + } + off++; + if (data[offset] == 0x00) + word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + off], + state->csize - off, "ISO-8859-1"); + else if (data[offset] == 0x01) + word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + off], + state->csize - off, "UCS-2"); + else if ((state->ver == 0x04) && (data[offset] == 0x02)) + word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + off], + state->csize - off, "UTF-16BE"); + else if ((state->ver == 0x04) && (data[offset] == 0x03)) + word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + off], + state->csize - off, "UTF-8"); + else + /* bad encoding byte, try to convert from iso-8859-1 */ + word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + off], + state->csize - off, "ISO-8859-1"); + break; + case SL: + if (state->csize < 7) + { + /* malformed */ + state->state = ID3V2_INVALID; + break; + } + if (data[offset] == 0x00) + word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + 6], + state->csize - 6, "ISO-8859-1"); + else if (data[offset] == 0x01) + word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + 6], + state->csize - 6, "UCS-2"); + else if ((state->ver == 0x04) && (data[offset] == 0x02)) + word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + 6], + state->csize - 6, "UTF-16BE"); + else if ((state->ver == 0x04) && (data[offset] == 0x03)) + word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + 6], + state->csize - 6, "UTF-8"); + else + /* bad encoding byte, try to convert from iso-8859-1 */ + word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + 6], + state->csize - 6, "ISO-8859-1"); + break; + case L: + if (state->csize < 5) + { + /* malformed */ + state->state = ID3V2_INVALID; + break; + } + /* find end of description */ + off = 4; + while ((off < size) && (off < offset + state->csize) && (data[offset + off] != '\0')) + off++; + if ((off >= state->csize) || (data[offset + off] != '\0')) + { + /* malformed */ + state->state = ID3V2_INVALID; + break; + } + off++; + + if (data[offset] == 0x00) + word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + off], + state->csize - off, "ISO-8859-1"); + else if (data[offset] == 0x01) + word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + off], + state->csize - off, "UCS-2"); + else if ((state->ver == 0x04) && (data[offset] == 0x02)) + word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + off], + state->csize - off, "UTF-1offBE"); + else if ((state->ver == 0x04) && (data[offset] == 0x03)) + word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + off], + state->csize - off, "UTF-8"); + else + /* bad encoding byte, try to convert from iso-8859-1 */ + word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + off], + state->csize - off, "ISO-8859-1"); + break; + case I: + if ( ( (state->ver == 0x02) && + (state->csize < 7) ) || + ( ( (state->ver == 0x03) || + (state->ver == 0x04)) && (state->csize < 5)) ) + { + /* malformed */ + state->state = ID3V2_INVALID; + break; + } + if (state->mime != NULL) + free (state->mime); + state->mime = NULL; + if (state->ver == 0x02) + { + off = 5; + picture_type = data[offset + 5]; + } + else if ((state->ver == 0x03) || (state->ver == 0x04)) + { + off = 1; + while ((off < size) && (off < offset + state->csize) && (data[offset + off] != '\0') ) + off++; + if ((off >= state->csize) || (data[offset + off] != '\0')) + { + /* malformed */ + state->state = ID3V2_INVALID; + break; + } + state->mime = malloc (off); + memcpy (state->mime, &data[offset + 1], off - 1); + state->mime[off - 1] = '\0'; + off += 1; + picture_type = data[offset]; + off += 1; + } + /* find end of description */ + while ((off < size) && (off < offset + state->csize) && (data[offset + off] != '\0')) + off++; + if ((off >= state->csize) || (data[offset + off] != '\0')) + { + free (state->mime); + state->mime = NULL; + /* malformed */ + state->state = ID3V2_INVALID; + break; + } + off++; + switch (picture_type) + { + case 0x03: + case 0x04: + type = EXTRACTOR_METATYPE_COVER_PICTURE; + break; + case 0x07: + case 0x08: + case 0x09: + case 0x0A: + case 0x0B: + case 0x0C: + type = EXTRACTOR_METATYPE_CONTRIBUTOR_PICTURE; + break; + case 0x0D: + case 0x0E: + case 0x0F: + type = EXTRACTOR_METATYPE_EVENT_PICTURE; + break; + case 0x14: + type = EXTRACTOR_METATYPE_LOGO; + type = EXTRACTOR_METATYPE_LOGO; + break; + default: + type = EXTRACTOR_METATYPE_PICTURE; + break; + } + if (state->ver == 0x02) + { + if (0 == strncasecmp ("PNG", (const char *) &data[offset + 1], 3)) + state->mime = strdup ("image/png"); + else if (0 == strncasecmp ("JPG", (const char *) &data[offset + 1], 3)) + state->mime = strdup ("image/jpeg"); + else + state->mime = NULL; + } + else if (((state->ver == 0x03) || (state->ver == 0x04)) && (strchr (state->mime, '/') == NULL)) + { + size_t mime_len = strlen (state->mime); + char *type_mime = malloc (mime_len + 6 + 1); + snprintf (type_mime, mime_len + 6 + 1, "image/%s", state->mime); + free (state->mime); + state->mime = type_mime; + } + if ((state->mime != NULL) && (0 == strcmp (state->mime, "-->"))) + { + /* not supported */ + free (state->mime); + state->mime = NULL; + } + else + { + if (0 != proc (proc_cls, "id3v2", type, EXTRACTOR_METAFORMAT_BINARY, state->mime, (const char*) &data[offset + off], state->csize - off)) + { + if (state->mime != NULL) + free (state->mime); + state->mime = NULL; + return 1; + } + if (state->mime != NULL) + free (state->mime); + state->mime = NULL; + } + word = NULL; break; - i = 0; - while (tmap[i].text != NULL) + default: + return 1; + } + if ((word != NULL) && (strlen (word) > 0)) + { + if (0 != proc (proc_cls, "id3v2", tmap[state->ti].type, EXTRACTOR_METAFORMAT_UTF8, "text/plain", word, strlen (word) + 1)) { - if (0 == strncmp (tmap[i].text, (const char *) &data[pos], 3)) - { - char *word; - /* this byte describes the encoding - try to convert strings to UTF-8 - if it fails, then forget it */ - switch (tmap[i].fmt) - { - case T: - switch (data[pos + 6]) - { - case 0x00: - word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 7], - csize - 1, "ISO-8859-1"); - break; - case 0x01: - word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 7], - csize - 1, "UCS-2"); - break; - default: - /* bad encoding byte, - try to convert from iso-8859-1 */ - word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 7], - csize - 1, "ISO-8859-1"); - break; - } - break; - case U: - word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 6], - csize, "ISO-8859-1"); - break; - case UL: - if (csize < 6) - return 0; /* malformed */ - /* find end of description */ - off = 10; - while ( (off < size) && - (off - pos < csize) && - (data[pos + off] == '\0') ) - off++; - if ( (off >= csize) || - (data[pos+off] != '\0') ) - return 0; /* malformed */ - off++; - switch (data[pos + 6]) - { - case 0x00: - word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + off], - csize - off, "ISO-8859-1"); - break; - case 0x01: - word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + off], - csize - off, "UCS-2"); - break; - default: - /* bad encoding byte, - try to convert from iso-8859-1 */ - word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + off], - csize - off, "ISO-8859-1"); - break; - } - break; - case SL: - if (csize < 7) - return 0; /* malformed */ - /* find end of description */ - switch (data[pos + 6]) - { - case 0x00: - word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 12], - csize - 6, "ISO-8859-1"); - break; - case 0x01: - word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 12], - csize - 6, "UCS-2"); - break; - default: - /* bad encoding byte, - try to convert from iso-8859-1 */ - word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 12], - csize - 6, "ISO-8859-1"); - break; - } - break; - case L: - if (csize < 5) - return 0; /* malformed */ - /* find end of description */ - switch (data[pos + 6]) - { - case 0x00: - word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 10], - csize - 4, "ISO-8859-1"); - break; - case 0x01: - word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 10], - csize - 4, "UCS-2"); - break; - default: - /* bad encoding byte, - try to convert from iso-8859-1 */ - word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 10], - csize - 4, "ISO-8859-1"); - break; - } - break; - case I: - if (csize < 6) - return 0; /* malformed */ - /* find end of description */ - off = 12; - while ( (off < size) && - (off - pos < csize) && - (data[pos + off] == '\0') ) - off++; - if ( (off >= csize) || - (data[pos+off] != '\0') ) - return 0; /* malformed */ - off++; - switch (data[pos+11]) - { - case 0x03: - case 0x04: - type = EXTRACTOR_METATYPE_COVER_PICTURE; - break; - case 0x07: - case 0x08: - case 0x09: - case 0x0A: - case 0x0B: - case 0x0C: - type = EXTRACTOR_METATYPE_CONTRIBUTOR_PICTURE; - break; - case 0x0D: - case 0x0E: - case 0x0F: - type = EXTRACTOR_METATYPE_EVENT_PICTURE; - break; - case 0x14: - type = EXTRACTOR_METATYPE_LOGO; - type = EXTRACTOR_METATYPE_LOGO; - break; - default: - type = EXTRACTOR_METATYPE_PICTURE; - break; - } - if (0 == strncasecmp ("PNG", - (const char*) &data[pos + 7], 3)) - mime = "image/png"; - else if (0 == strncasecmp ("JPG", - (const char*) &data[pos + 7], 3)) - mime = "image/jpeg"; - else - mime = NULL; - if (0 == strncasecmp ("-->", - (const char*) &data[pos + 7], 3)) - { - /* not supported */ - } - else - { - if (0 != proc (proc_cls, - "id3v2", - type, - EXTRACTOR_METAFORMAT_BINARY, - mime, - (const char*) &data[pos + off], - csize + 6 - off)) - return 1; - } - word = NULL; - break; - default: - return 0; - } - if ((word != NULL) && (strlen (word) > 0)) - { - if (0 != proc (proc_cls, - "id3v2", - tmap[i].type, - EXTRACTOR_METAFORMAT_UTF8, - "text/plain", - word, - strlen(word)+1)) - { - free (word); - return 1; - } - } - if (word != NULL) - free (word); - break; - } - i++; + free (word); + return 1; } - pos += 6 + csize; + } + if (word != NULL) + free (word); + offset = offset + state->csize; + state->state = ID3V2_READING_FRAME_HEADER; + break; } - return 0; + } + return 1; } /* end of id3v2_extractor.c */ diff --git a/src/plugins/mp3_extractor.c b/src/plugins/mp3_extractor.c @@ -36,8 +36,41 @@ #include <unistd.h> #include <stdlib.h> -#define MAX_MP3_SCAN_DEEP 16768 -const int max_frames_scan = 1024; +#include "extractor_plugins.h" + +#if WINDOWS +#include <sys/param.h> /* #define BYTE_ORDER */ +#endif +#ifndef __BYTE_ORDER +#ifdef _BYTE_ORDER +#define __BYTE_ORDER _BYTE_ORDER +#else +#ifdef BYTE_ORDER +#define __BYTE_ORDER BYTE_ORDER +#endif +#endif +#endif +#ifndef __BIG_ENDIAN +#ifdef _BIG_ENDIAN +#define __BIG_ENDIAN _BIG_ENDIAN +#else +#ifdef BIG_ENDIAN +#define __BIG_ENDIAN BIG_ENDIAN +#endif +#endif +#endif +#ifndef __LITTLE_ENDIAN +#ifdef _LITTLE_ENDIAN +#define __LITTLE_ENDIAN _LITTLE_ENDIAN +#else +#ifdef LITTLE_ENDIAN +#define __LITTLE_ENDIAN LITTLE_ENDIAN +#endif +#endif +#endif + +#define LARGEST_FRAME_SIZE 8065 + enum { MPEG_ERR = 0, MPEG_V1 = 1, MPEG_V2 = 2, MPEG_V25 = 3 }; @@ -45,6 +78,11 @@ enum { LAYER_ERR = 0, LAYER_1 = 1, LAYER_2 = 2, LAYER_3 = 3 }; #define MPA_SYNC_MASK ((unsigned int) 0xFFE00000) +#if __BYTE_ORDER == __BIG_ENDIAN +#define MPA_SYNC_MASK_MEM ((unsigned int) 0xFFE00000) +#else +#define MPA_SYNC_MASK_MEM ((unsigned int) 0x0000E0FF) +#endif #define MPA_LAST_SYNC_BIT_MASK ((unsigned int) 0x00100000) #define MPA_VERSION_MASK ((unsigned int) 0x00080000) #define MPA_LAYER_MASK ((unsigned int) 0x3) @@ -106,169 +144,274 @@ static const char * const layer_names[3] = { #define ADDR(s,t) do { if (0 != proc (proc_cls, "mp3", t, EXTRACTOR_METAFORMAT_UTF8, "text/plain", s, strlen(s)+1)) return 1; } while (0) -/* mimetype = audio/mpeg */ -int -EXTRACTOR_mp3_extract (const unsigned char *data, - size_t size, - EXTRACTOR_MetaDataProcessor proc, - void *proc_cls, - const char *options) +struct mp3_state +{ + int state; + + uint32_t header; + int sample_rate; + char mpeg_ver; + char layer; + char vbr_flag; + int ch; + char copyright_flag; + char original_flag; + int avg_bps; + int bitrate; + + int64_t number_of_frames; + int64_t number_of_valid_frames; +}; + +enum MP3State +{ + MP3_LOOKING_FOR_FRAME = 0, + MP3_READING_FRAME = 1, +}; + +void +EXTRACTOR_mp3_init_state_method (struct EXTRACTOR_PluginList *plugin) +{ + struct mp3_state *state; + state = plugin->state = malloc (sizeof (struct mp3_state)); + if (state == NULL) + return; + state->header = 0; + state->sample_rate = 0; + state->number_of_frames = 0; + state->number_of_valid_frames = 0; + state->mpeg_ver = 0; + state->layer = 0; + state->vbr_flag = 0; + state->ch = 0; + state->copyright_flag = 0; + state->original_flag = 0; + state->avg_bps = 0; + state->bitrate = 0; + state->state = 0; +} + +void +EXTRACTOR_mp3_discard_state_method (struct EXTRACTOR_PluginList *plugin) +{ + if (plugin->state != NULL) + { + free (plugin->state); + } + plugin->state = NULL; +} + +static int +calculate_frame_statistics_and_maybe_report_it (struct EXTRACTOR_PluginList *plugin, + struct mp3_state *state, EXTRACTOR_MetaDataProcessor proc, void *proc_cls) +{ + int length; + char format[512]; + + if (((double) state->number_of_valid_frames / (double) state->number_of_frames) < 0.5 || + state->number_of_valid_frames < 2) + /* Unlikely to be an mp3 file */ + return 0; + ADDR ("audio/mpeg", EXTRACTOR_METATYPE_MIMETYPE); + state->avg_bps = state->avg_bps / state->number_of_valid_frames; + if (state->sample_rate > 0) + length = 1152 * state->number_of_valid_frames / state->sample_rate; + else if (state->avg_bps > 0 || state->bitrate > 0) + length = plugin->fsize / (state->avg_bps ? state->avg_bps : state->bitrate ? state->bitrate : 1) / 125; + else + length = 0; + + ADDR (mpeg_versions[state->mpeg_ver - 1], EXTRACTOR_METATYPE_FORMAT_VERSION); + snprintf (format, + sizeof (format), + "%s %s audio, %d kbps (%s), %d Hz, %s, %s, %s", + mpeg_versions[state->mpeg_ver - 1], + layer_names[state->layer - 1], + state->avg_bps, + state->vbr_flag ? _("VBR") : _("CBR"), + state->sample_rate, + channel_modes[state->ch], + state->copyright_flag ? _("copyright") : _("no copyright"), + state->original_flag ? _("original") : _("copy") ); + + ADDR (format, EXTRACTOR_METATYPE_RESOURCE_TYPE); + snprintf (format, + sizeof (format), "%dm%02d", + length / 60, length % 60); + ADDR (format, EXTRACTOR_METATYPE_DURATION); + return 0; +} + +int +EXTRACTOR_mp3_extract_method (struct EXTRACTOR_PluginList *plugin, + EXTRACTOR_MetaDataProcessor proc, + void *proc_cls) { - unsigned int header; - int counter = 0; + int64_t file_position; + int64_t file_size; + size_t offset = 0; + size_t size; + unsigned char *data; + struct mp3_state *state; + + size_t frames_found_in_this_round = 0; + int start_anew = 0; + char mpeg_ver = 0; char layer = 0; int idx_num = 0; int bitrate = 0; /*used for each frame */ - int avg_bps = 0; /*average bitrate */ - int vbr_flag = 0; int copyright_flag = 0; int original_flag = 0; - int length = 0; int sample_rate = 0; int ch = 0; int frame_size; - int frames = 0; - size_t pos = 0; - char format[512]; - do - { - /* seek for frame start */ - if (pos + sizeof (header) > size) - { - return 0; - } /*unable to find header */ - header = (data[pos] << 24) | (data[pos+1] << 16) | - (data[pos+2] << 8) | data[pos+3]; - if ((header & MPA_SYNC_MASK) == MPA_SYNC_MASK) - break; /*found header sync */ - pos++; - counter++; /*next try */ - } - while (counter < MAX_MP3_SCAN_DEEP); - if (counter >= MAX_MP3_SCAN_DEEP) - return 0; + if (plugin == NULL || plugin->state == NULL) + return 1; - do - { /*ok, now we found a mp3 frame header */ - frames++; - switch (header & (MPA_LAST_SYNC_BIT_MASK | MPA_VERSION_MASK)) - { - case (MPA_LAST_SYNC_BIT_MASK | MPA_VERSION_MASK): - mpeg_ver = MPEG_V1; - break; - case (MPA_LAST_SYNC_BIT_MASK): - mpeg_ver = MPEG_V2; - break; - case 0: - mpeg_ver = MPEG_V25; - break; - case (MPA_VERSION_MASK): - default: - return 0; - } - switch (header & (MPA_LAYER_MASK << MPA_LAYER_SHIFT)) + state = plugin->state; + file_position = plugin->position; + file_size = plugin->fsize; + size = plugin->map_size; + data = plugin->shm_ptr; + + if (plugin->seek_request < 0) + return 1; + if (file_position - plugin->seek_request > 0) + { + plugin->seek_request = -1; + return 1; + } + if (plugin->seek_request - file_position < size) + offset = plugin->seek_request - file_position; + + while (1) + { + switch (state->state) + { + case MP3_LOOKING_FOR_FRAME: + /* Look for a frame header */ + while (offset + sizeof (state->header) < size && (((*((uint32_t *) &data[offset])) & MPA_SYNC_MASK_MEM) != MPA_SYNC_MASK_MEM)) + offset += 1; + if (offset + sizeof (state->header) >= size) + { + /* Alternative: (frames_found_in_this_round < (size / LARGEST_FRAME_SIZE / 2)) is to generous */ + if ((file_position == 0 && ((double) state->number_of_valid_frames / (double) state->number_of_frames) < 0.5) || + file_position + offset + sizeof (state->header) >= file_size) { - case (0x1 << MPA_LAYER_SHIFT): - layer = LAYER_3; - break; - case (0x2 << MPA_LAYER_SHIFT): - layer = LAYER_2; - break; - case (0x3 << MPA_LAYER_SHIFT): - layer = LAYER_1; - break; - case 0x0: - default: - return 0; + calculate_frame_statistics_and_maybe_report_it (plugin, state, proc, proc_cls); + return 1; } + plugin->seek_request = file_position + offset; + return 0; + } + state->header = (data[offset] << 24) | (data[offset + 1] << 16) | + (data[offset + 2] << 8) | data[offset + 3]; + if ((state->header & MPA_SYNC_MASK) == MPA_SYNC_MASK) + { + state->state = MP3_READING_FRAME; + break; + } + break; + case MP3_READING_FRAME: + state->number_of_frames += 1; + start_anew = 0; + switch (state->header & (MPA_LAST_SYNC_BIT_MASK | MPA_VERSION_MASK)) + { + case (MPA_LAST_SYNC_BIT_MASK | MPA_VERSION_MASK): + mpeg_ver = MPEG_V1; + break; + case (MPA_LAST_SYNC_BIT_MASK): + mpeg_ver = MPEG_V2; + break; + case 0: + mpeg_ver = MPEG_V25; + break; + case (MPA_VERSION_MASK): + default: + state->state = MP3_LOOKING_FOR_FRAME; + offset += 1; + start_anew = 1; + } + if (start_anew) + break; + switch (state->header & (MPA_LAYER_MASK << MPA_LAYER_SHIFT)) + { + case (0x1 << MPA_LAYER_SHIFT): + layer = LAYER_3; + break; + case (0x2 << MPA_LAYER_SHIFT): + layer = LAYER_2; + break; + case (0x3 << MPA_LAYER_SHIFT): + layer = LAYER_1; + break; + case 0x0: + default: + state->state = MP3_LOOKING_FOR_FRAME; + offset += 1; + start_anew = 1; + } + if (start_anew) + break; if (mpeg_ver < MPEG_V25) idx_num = (mpeg_ver - 1) * 3 + layer - 1; else idx_num = 2 + layer; - bitrate = 1000 * bitrate_table[(header >> MPA_BITRATE_SHIFT) & + bitrate = 1000 * bitrate_table[(state->header >> MPA_BITRATE_SHIFT) & MPA_BITRATE_MASK][idx_num]; if (bitrate < 0) - { - frames--; - break; - } /*error in header */ - sample_rate = freq_table[(header >> MPA_FREQ_SHIFT) & + { + /*error in header */ + state->state = MP3_LOOKING_FOR_FRAME; + offset += 1; + break; + } + sample_rate = freq_table[(state->header >> MPA_FREQ_SHIFT) & MPA_FREQ_MASK][mpeg_ver - 1]; - if (sample_rate < 0) - { - frames--; - break; - } /*error in header */ - ch = ((header >> MPA_CHMODE_SHIFT) & MPA_CHMODE_MASK); - copyright_flag = (header >> MPA_COPYRIGHT_SHIFT) & 0x1; - original_flag = (header >> MPA_ORIGINAL_SHIFT) & 0x1; - frame_size = - 144 * bitrate / (sample_rate ? sample_rate : 1) + - ((header >> MPA_PADDING_SHIFT) & 0x1); + if (sample_rate <= 0) + { + /*error in header */ + state->state = MP3_LOOKING_FOR_FRAME; + offset += 1; + break; + } + ch = ((state->header >> MPA_CHMODE_SHIFT) & MPA_CHMODE_MASK); + copyright_flag = (state->header >> MPA_COPYRIGHT_SHIFT) & 0x1; + original_flag = (state->header >> MPA_ORIGINAL_SHIFT) & 0x1; + if (layer == LAYER_1) + frame_size = (12 * bitrate / sample_rate + ((state->header >> MPA_PADDING_SHIFT) & 0x1)) * 4; + else + frame_size = 144 * bitrate / sample_rate + ((state->header >> MPA_PADDING_SHIFT) & 0x1); if (frame_size <= 0) - { - /* Technically, bitrate can be 0. However, but this particular - * extractor is incapable of correctly processing 0-bitrate files - * anyway. And bitrate == 0 might also mean that this is just a - * random binary sequence, which is far more likely to be true. - * - * amatus suggests to use a different algorithm and parse significant - * part of the file, then count the number of correct mpeg frames. - * If the the percentage of correct frames is below a threshold, - * then this is not an mpeg file at all. - */ - frames -= 1; - break; - } - avg_bps += bitrate / 1000; - - pos += frame_size - 4; - if (frames > max_frames_scan) - break; /*optimization */ - if (avg_bps / frames != bitrate / 1000) - vbr_flag = 1; - if (pos + sizeof (header) > size) - break; /* EOF */ - header = (data[pos] << 24) | (data[pos+1] << 16) | - (data[pos+2] << 8) | data[pos+3]; - } - while ((header & MPA_SYNC_MASK) == MPA_SYNC_MASK); - - if (frames < 2) - return 0; /*no valid frames */ - ADDR ("audio/mpeg", EXTRACTOR_METATYPE_MIMETYPE); - avg_bps = avg_bps / frames; - if (max_frames_scan) - { /*if not all frames scaned */ - length = - size / (avg_bps ? avg_bps : bitrate ? bitrate : 0xFFFFFFFF) / 125; - } - else - { - length = 1152 * frames / (sample_rate ? sample_rate : 0xFFFFFFFF); - } + { + /*error in header */ + state->state = MP3_LOOKING_FOR_FRAME; + offset += 1; + break; + } - ADDR (mpeg_versions[mpeg_ver-1], EXTRACTOR_METATYPE_FORMAT_VERSION); - snprintf (format, - sizeof(format), - "%s %s audio, %d kbps (%s), %d Hz, %s, %s, %s", - mpeg_versions[mpeg_ver-1], - layer_names[layer-1], - avg_bps, - vbr_flag ? _("VBR") : _("CBR"), - sample_rate, - channel_modes[ch], - copyright_flag ? _("copyright") : _("no copyright"), - original_flag ? _("original") : _("copy") ); + /* Only save data from valid frames in the state */ + state->avg_bps += bitrate / 1000; + state->sample_rate = sample_rate; + state->mpeg_ver = mpeg_ver; + state->layer = layer; + state->ch = ch; + state->copyright_flag = copyright_flag; + state->original_flag = original_flag; + state->bitrate = bitrate; - ADDR (format, EXTRACTOR_METATYPE_RESOURCE_TYPE); - snprintf (format, - sizeof (format), "%dm%02d", - length / 60, length % 60); - ADDR (format, EXTRACTOR_METATYPE_DURATION); - return 0; + frames_found_in_this_round += 1; + state->number_of_valid_frames += 1; + if (state->avg_bps / state->number_of_valid_frames != bitrate / 1000) + state->vbr_flag = 1; + offset += frame_size; + state->state = MP3_LOOKING_FOR_FRAME; + break; + } + } + return 1; } /* end of mp3_extractor.c */ diff --git a/src/plugins/template_extractor.c b/src/plugins/template_extractor.c @@ -21,21 +21,113 @@ #include "platform.h" #include "extractor.h" -int -EXTRACTOR_template_extract (const unsigned char *data, - size_t size, - EXTRACTOR_MetaDataProcessor proc, - void *proc_cls, - const char *options) +#include "extractor_plugins.h" + +struct template_state +{ + int state; + + /* more state fields here + * all variables that should survive more than one atomic read + * from the "file" are to be placed here. + */ +}; + +enum TemplateState +{ + TEMPLATE_INVALID = -1, + TEMPLATE_LOOKING_FOR_FOO = 0, + TEMPLATE_READING_FOO, + TEMPLATE_READING_BAR, + TEMPLATE_SEEKING_TO_ZOOL +}; + +void +EXTRACTOR_template_init_state_method (struct EXTRACTOR_PluginList *plugin) { - if (0 != proc (proc_cls, - "template", - EXTRACTOR_METATYPE_RESERVED, - EXTRACTOR_METAFORMAT_UTF8, - "text/plain", - "foo", - strlen ("foo")+1)) + struct template_state *state; + state = plugin->state = malloc (sizeof (struct template_state)); + if (state == NULL) + return; + state->state = TEMPLATE_LOOKING_FOR_FOO; /* or whatever is the initial one */ + /* initialize other fields to their "uninitialized" values or defaults */ +} + +void +EXTRACTOR_template_discard_state_method (struct EXTRACTOR_PluginList *plugin) +{ + if (plugin->state != NULL) + { + /* free other state fields that are heap-allocated */ + free (plugin->state); + } + plugin->state = NULL; +} + +int +EXTRACTOR_template_extract_method (struct EXTRACTOR_PluginList *plugin, + EXTRACTOR_MetaDataProcessor proc, void *proc_cls) +{ + int64_t file_position; + int64_t file_size; + size_t offset = 0; + size_t size; + unsigned char *data; + unsigned char *ff; + struct mp3_state *state; + + /* temporary variables are declared here */ + + if (plugin == NULL || plugin->state == NULL) return 1; - /* insert more here */ - return 0; + + /* for easier access (and conforms better with the old plugins var names) */ + state = plugin->state; + file_position = plugin->position; + file_size = plugin->fsize; + size = plugin->map_size; + data = plugin->shm_ptr; + + /* sanity checks */ + if (plugin->seek_request < 0) + return 1; + if (file_position - plugin->seek_request > 0) + { + plugin->seek_request = -1; + return 1; + } + if (plugin->seek_request - file_position < size) + offset = plugin->seek_request - file_position; + + while (1) + { + switch (state->state) + { + case TEMPLATE_INVALID: + plugin->seek_request = -1; + return 1; + case TEMPLATE_LOOKING_FOR_FOO: + /* Find FOO in data buffer. + * If found, set offset to its position and set state to TEMPLATE_READING_FOO + * If not found, set seek_request to file_position + offset and return 1 + * (but it's better to give up as early as possible, to avoid reading the whole + * file byte-by-byte). + */ + break; + case TEMPLATE_READING_FOO: + /* See if offset + sizeof(foo) < size, otherwise set seek_request to offset and return 1; + * If file_position is 0, and size is still to small, give up. + * Read FOO, maybe increase offset to reflect that (depends on the parser logic). + * Either process FOO right here, or jump to another state (see ebml plugin for an example of complex + * state-jumps). + * If FOO says you need to seek somewhere - set offset to seek_target - file_position and set the + * next state (next state will check that offset < size; all states that do reading should do that, + * and also check for EOF). + */ + /* ... */ + break; + } + } + /* Should not reach this */ + return 1; }