libextractor

GNU libextractor
Log | Files | Refs | Submodules | README | LICENSE

commit 804080ef9e088be8ce3d4a9e5e5cdd2636b17fcd
parent e19e624671c12726b511e092ea01f977722624d7
Author: Christian Grothoff <christian@grothoff.org>
Date:   Fri, 13 Apr 2012 07:26:16 +0000

-LRN: misc patches:
/home/grothoff/0001-Rewrite-the-template-more-like-documentation-now.patch  /home/grothoff/0003-Minimally-ported-s3m-extractor.patch
/home/grothoff/0002-New-header-for-arch-definitions.patch                   /home/grothoff/0004-Fixed-template-doc-added-architecture-header.patch



Diffstat:
Msrc/plugins/Makefile.am | 9+++++++++
Msrc/plugins/ebml_extractor.c | 33++-------------------------------
Msrc/plugins/mp3_extractor.c | 31+------------------------------
Msrc/plugins/s3m_extractor.c | 132+++++++++++++++++++++++++++++++++++++++++++++++--------------------------------
Msrc/plugins/template_extractor.c | 143+++++++++++++++++++++++++++----------------------------------------------------
5 files changed, 139 insertions(+), 209 deletions(-)

diff --git a/src/plugins/Makefile.am b/src/plugins/Makefile.am @@ -15,6 +15,7 @@ plugin_LTLIBRARIES = \ libextractor_id3.la \ libextractor_id3v2.la \ libextractor_ebml.la \ + libextractor_s3m.la \ libextractor_mp3.la libextractor_mp3_la_SOURCES = \ @@ -49,4 +50,12 @@ libextractor_id3v2_la_LIBADD = \ $(top_builddir)/src/main/libextractor.la \ $(top_builddir)/src/common/libextractor_common.la +libextractor_s3m_la_SOURCES = \ + s3m_extractor.c +libextractor_s3m_la_LDFLAGS = \ + $(PLUGINFLAGS) +libextractor_s3m_la_LIBADD = \ + $(top_builddir)/src/main/libextractor.la \ + $(top_builddir)/src/common/libextractor_common.la + EXTRA_DIST = template_extractor.c diff --git a/src/plugins/ebml_extractor.c b/src/plugins/ebml_extractor.c @@ -28,42 +28,13 @@ #include "extractor.h" #include <stdint.h> +#include "le_architecture.h" + #ifndef DEBUG_EBML # define DEBUG_EBML 0 #endif #if WINDOWS -#include <sys/param.h> /* #define BYTE_ORDER */ -#endif -#ifndef __BYTE_ORDER -#ifdef _BYTE_ORDER -#define __BYTE_ORDER _BYTE_ORDER -#else -#ifdef BYTE_ORDER -#define __BYTE_ORDER BYTE_ORDER -#endif -#endif -#endif -#ifndef __BIG_ENDIAN -#ifdef _BIG_ENDIAN -#define __BIG_ENDIAN _BIG_ENDIAN -#else -#ifdef BIG_ENDIAN -#define __BIG_ENDIAN BIG_ENDIAN -#endif -#endif -#endif -#ifndef __LITTLE_ENDIAN -#ifdef _LITTLE_ENDIAN -#define __LITTLE_ENDIAN _LITTLE_ENDIAN -#else -#ifdef LITTLE_ENDIAN -#define __LITTLE_ENDIAN LITTLE_ENDIAN -#endif -#endif -#endif - -#if WINDOWS /* According to http://old.nabble.com/Porting-localtime_r-and-gmtime_r-td15282276.html * msvcrt.dll does have thread-safe gmtime implementation, * even though the documentation says otherwise. diff --git a/src/plugins/mp3_extractor.c b/src/plugins/mp3_extractor.c @@ -38,36 +38,7 @@ #include "extractor_plugins.h" -#if WINDOWS -#include <sys/param.h> /* #define BYTE_ORDER */ -#endif -#ifndef __BYTE_ORDER -#ifdef _BYTE_ORDER -#define __BYTE_ORDER _BYTE_ORDER -#else -#ifdef BYTE_ORDER -#define __BYTE_ORDER BYTE_ORDER -#endif -#endif -#endif -#ifndef __BIG_ENDIAN -#ifdef _BIG_ENDIAN -#define __BIG_ENDIAN _BIG_ENDIAN -#else -#ifdef BIG_ENDIAN -#define __BIG_ENDIAN BIG_ENDIAN -#endif -#endif -#endif -#ifndef __LITTLE_ENDIAN -#ifdef _LITTLE_ENDIAN -#define __LITTLE_ENDIAN _LITTLE_ENDIAN -#else -#ifdef LITTLE_ENDIAN -#define __LITTLE_ENDIAN LITTLE_ENDIAN -#endif -#endif -#endif +#include "le_architecture.h" #define LARGEST_FRAME_SIZE 8065 diff --git a/src/plugins/s3m_extractor.c b/src/plugins/s3m_extractor.c @@ -1,68 +1,94 @@ /* - * This file is part of libextractor. - * (C) 2008 Toni Ruottu - * - * libextractor is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published - * by the Free Software Foundation; either version 2, or (at your - * option) any later version. - * - * libextractor is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with libextractor; see the file COPYING. If not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. - * + This file is part of libextractor. + (C) 2002, 2003, 2004, 2009 Vidyut Samanta and Christian Grothoff + + libextractor is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 2, or (at your + option) any later version. + + libextractor is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with libextractor; see the file COPYING. If not, write to the + Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ #include "platform.h" #include "extractor.h" -#include "convert.h" -#define HEADER_SIZE 0x70 +#include "extractor_plugins.h" +#include "le_architecture.h" + +/* Based upon ST 3.20 spec at http://16-bits.org/s3m/ */ +/* Looks like the format was defined by the software implementation, + * and that implementation was for little-endian platform, which means + * that the format is little-endian. + */ -struct header +LE_NETWORK_STRUCT_BEGIN +struct S3MHeader { - char title[28]; - char something[16]; - char magicid[4]; + char song_name[28]; + uint8_t byte_1A; + uint8_t file_type; /* 0x10 == ST3 module */ + uint8_t unknown1[2]; + uint16_t number_of_orders; /* should be even */ + uint16_t number_of_instruments; + uint16_t number_of_patterns; + uint16_t flags; + uint16_t created_with_version; + uint16_t file_format_info; + char SCRM[4]; + uint8_t global_volume; + uint8_t initial_speed; + uint8_t initial_tempo; + uint8_t master_volume; + uint8_t ultra_click_removal; + uint8_t default_channel_positions; + uint8_t unknown2[8]; + uint16_t special; + uint8_t channel_settings[32]; }; +LE_NETWORK_STRUCT_END -#define ADD(s,t) do { if (0 != proc (proc_cls, "s3m", t, EXTRACTOR_METAFORMAT_UTF8, "text/plain", s, strlen(s)+1)) return 1; } while (0) - +#define ADD(s,t) if (0 != proc (proc_cls, "s3m", t, EXTRACTOR_METAFORMAT_UTF8, "text/plain", s, strlen(s) + 1)) return 1 +#define ADDL(s,t,l) if (0 != proc (proc_cls, "s3m", t, EXTRACTOR_METAFORMAT_UTF8, "text/plain", s, l)) return 1 -/* "extract" keyword from a Scream Tracker 3 Module - * - * "Scream Tracker 3.01 BETA File Formats And Mixing Info" - * was used, while this piece of software was originally - * written. - * - */ -int -EXTRACTOR_s3m_extract (const unsigned char *data, - size_t size, - EXTRACTOR_MetaDataProcessor proc, - void *proc_cls, - const char *options) +int +EXTRACTOR_s3m_extract_method (struct EXTRACTOR_PluginList *plugin, + EXTRACTOR_MetaDataProcessor proc, void *proc_cls) { - char title[29]; - const struct header *head; - - /* Check header size */ + int64_t offset; + unsigned char *data; + struct S3MHeader header; + char song_name_NT[29]; - if (size < HEADER_SIZE) - return 0; - head = (const struct header *) data; - if (memcmp (head->magicid, "SCRM", 4)) - return 0; - ADD ("audio/x-s3m", EXTRACTOR_METATYPE_MIMETYPE); + if (plugin == NULL) + return 1; + if (sizeof (header) != pl_read (plugin, &data, sizeof (header))) + return 1; + memcpy (&header, data, sizeof (header)); + if (header.byte_1A != 0x1A || memcmp (header.SCRM, "SCRM", 4) != 0) + return 1; + header.number_of_orders = LE_le16toh (header.number_of_orders); + header.number_of_instruments = LE_le16toh (header.number_of_instruments); + header.number_of_patterns = LE_le16toh (header.number_of_patterns); + header.flags = LE_le16toh (header.flags); + header.created_with_version = LE_le16toh (header.created_with_version); + header.file_format_info = LE_le16toh (header.file_format_info); + header.special = LE_le16toh (header.special); + memcpy (song_name_NT, header.song_name, 28); + song_name_NT[28] = '\0'; - memcpy (&title, head->title, 28); - title[28] = '\0'; - ADD (title, EXTRACTOR_METATYPE_TITLE); - return 0; + ADD("audio/x-s3m", EXTRACTOR_METATYPE_MIMETYPE); + ADD(song_name_NT, EXTRACTOR_METATYPE_TITLE); + /* TODO: turn other header data into useful metadata (i.e. RESOURCE_TYPE). + * Also, disabled instruments can be (and are) used to carry user-defined text. + */ + return 1; } diff --git a/src/plugins/template_extractor.c b/src/plugins/template_extractor.c @@ -22,112 +22,65 @@ #include "extractor.h" #include "extractor_plugins.h" - -struct template_state -{ - int state; - - /* more state fields here - * all variables that should survive more than one atomic read - * from the "file" are to be placed here. - */ -}; - -enum TemplateState -{ - TEMPLATE_INVALID = -1, - TEMPLATE_LOOKING_FOR_FOO = 0, - TEMPLATE_READING_FOO, - TEMPLATE_READING_BAR, - TEMPLATE_SEEKING_TO_ZOOL -}; - -void -EXTRACTOR_template_init_state_method (struct EXTRACTOR_PluginList *plugin) -{ - struct template_state *state; - state = plugin->state = malloc (sizeof (struct template_state)); - if (state == NULL) - return; - state->state = TEMPLATE_LOOKING_FOR_FOO; /* or whatever is the initial one */ - /* initialize other fields to their "uninitialized" values or defaults */ -} - -void -EXTRACTOR_template_discard_state_method (struct EXTRACTOR_PluginList *plugin) -{ - if (plugin->state != NULL) - { - /* free other state fields that are heap-allocated */ - free (plugin->state); - } - plugin->state = NULL; -} +#include "le_architecture.h" int EXTRACTOR_template_extract_method (struct EXTRACTOR_PluginList *plugin, EXTRACTOR_MetaDataProcessor proc, void *proc_cls) { - int64_t file_position; - int64_t file_size; - size_t offset = 0; - size_t size; + int64_t offset; unsigned char *data; - unsigned char *ff; - struct mp3_state *state; /* temporary variables are declared here */ - if (plugin == NULL || plugin->state == NULL) + if (plugin == NULL) return 1; - /* for easier access (and conforms better with the old plugins var names) */ - state = plugin->state; - file_position = plugin->position; - file_size = plugin->fsize; - size = plugin->map_size; - data = plugin->shm_ptr; - - /* sanity checks */ - if (plugin->seek_request < 0) - return 1; - if (file_position - plugin->seek_request > 0) - { - plugin->seek_request = -1; - return 1; - } - if (plugin->seek_request - file_position < size) - offset = plugin->seek_request - file_position; + /* initialize state here */ - while (1) - { - switch (state->state) - { - case TEMPLATE_INVALID: - plugin->seek_request = -1; - return 1; - case TEMPLATE_LOOKING_FOR_FOO: - /* Find FOO in data buffer. - * If found, set offset to its position and set state to TEMPLATE_READING_FOO - * If not found, set seek_request to file_position + offset and return 1 - * (but it's better to give up as early as possible, to avoid reading the whole - * file byte-by-byte). - */ - break; - case TEMPLATE_READING_FOO: - /* See if offset + sizeof(foo) < size, otherwise set seek_request to offset and return 1; - * If file_position is 0, and size is still to small, give up. - * Read FOO, maybe increase offset to reflect that (depends on the parser logic). - * Either process FOO right here, or jump to another state (see ebml plugin for an example of complex - * state-jumps). - * If FOO says you need to seek somewhere - set offset to seek_target - file_position and set the - * next state (next state will check that offset < size; all states that do reading should do that, - * and also check for EOF). - */ - /* ... */ - break; - } - } - /* Should not reach this */ + /* Call pl_seek (plugin, POSITION, WHENCE) to seek (if you know where + * data starts. + */ + /* Call pl_read (plugin, &data, COUNT) to read COUNT bytes (will be stored + * as data[0]..data[COUNT-1], no need to allocate data or free it; but it + * "goes away" when you make another read call, so store interesting values + * somewhere once you find them). + */ + /* If you need to search for a magic id that is not at the beginning of the + * file, do pl_read() calls, reading sizable (1 megabyte or so) chunks, + * then use memchr() on them to find first byte of the magic sequence, + * then compare the rest of the sequence, if found. + * Mind the fact that you need to iterate over COUNT - SEQUENCE_LENGTH chars, + * and seek to POS + COUNT - SEQUENCE_LENGTH once you run out of bytes, + * otherwise you'd have a chance to skip bytes at chunk boundaries. + */ + /* Do try to make a reasonable assumption about the amount of data you're + * going to search through. Iterating over the whole file, byte-by-byte is + * NOT a good idea, if the search itself is slow. Try to make the search as + * efficient as possible. + */ + /* Avoid making long seeks backwards (for performance reasons) + */ + /* pl_get_pos (plugin) will return current offset from the beginning of + * the file (i.e. index of the data[0] in the file, if you call pl_read + * at that point). You might need it do calculate forward-searches, if + * there are offsets stored within the file. + * pl_get_fsize (plugin) will return file size OR -1 if it is not known + * yet (file is not decompressed completely). Don't rely on fsize. + */ + /* Seeking forward is safe + */ + /* If you asked to read X bytes, but got less - it's EOF + */ + /* Seeking backward a bit shouldn't hurt performance (i.e. read 4 bytes, + * then immediately seek 4 bytes back). + */ + /* Don't read too much (you can't read more than MAX_READ from extractor.c, + * which is 32MB at the moment) in one call. + */ + /* Once you find something, call proc(). If it returns non-0 - you're done. + */ + /* Return 1 to indicate that you're done. */ + /* Don't forget to free anything you've allocated before returning! */ return 1; }