diff options
author | Christian Grothoff <christian@grothoff.org> | 2012-04-13 07:26:16 +0000 |
---|---|---|
committer | Christian Grothoff <christian@grothoff.org> | 2012-04-13 07:26:16 +0000 |
commit | 804080ef9e088be8ce3d4a9e5e5cdd2636b17fcd (patch) | |
tree | 008cf03f0b3a06a30f20198ec810f536b9f147fb | |
parent | e19e624671c12726b511e092ea01f977722624d7 (diff) | |
download | libextractor-804080ef9e088be8ce3d4a9e5e5cdd2636b17fcd.tar.gz libextractor-804080ef9e088be8ce3d4a9e5e5cdd2636b17fcd.zip |
-LRN: misc patches:
/home/grothoff/0001-Rewrite-the-template-more-like-documentation-now.patch /home/grothoff/0003-Minimally-ported-s3m-extractor.patch
/home/grothoff/0002-New-header-for-arch-definitions.patch /home/grothoff/0004-Fixed-template-doc-added-architecture-header.patch
-rw-r--r-- | src/plugins/Makefile.am | 9 | ||||
-rw-r--r-- | src/plugins/ebml_extractor.c | 33 | ||||
-rw-r--r-- | src/plugins/mp3_extractor.c | 31 | ||||
-rw-r--r-- | src/plugins/s3m_extractor.c | 132 | ||||
-rw-r--r-- | src/plugins/template_extractor.c | 143 |
5 files changed, 139 insertions, 209 deletions
diff --git a/src/plugins/Makefile.am b/src/plugins/Makefile.am index 3c6851f..bdf8e2b 100644 --- a/src/plugins/Makefile.am +++ b/src/plugins/Makefile.am | |||
@@ -15,6 +15,7 @@ plugin_LTLIBRARIES = \ | |||
15 | libextractor_id3.la \ | 15 | libextractor_id3.la \ |
16 | libextractor_id3v2.la \ | 16 | libextractor_id3v2.la \ |
17 | libextractor_ebml.la \ | 17 | libextractor_ebml.la \ |
18 | libextractor_s3m.la \ | ||
18 | libextractor_mp3.la | 19 | libextractor_mp3.la |
19 | 20 | ||
20 | libextractor_mp3_la_SOURCES = \ | 21 | libextractor_mp3_la_SOURCES = \ |
@@ -49,4 +50,12 @@ libextractor_id3v2_la_LIBADD = \ | |||
49 | $(top_builddir)/src/main/libextractor.la \ | 50 | $(top_builddir)/src/main/libextractor.la \ |
50 | $(top_builddir)/src/common/libextractor_common.la | 51 | $(top_builddir)/src/common/libextractor_common.la |
51 | 52 | ||
53 | libextractor_s3m_la_SOURCES = \ | ||
54 | s3m_extractor.c | ||
55 | libextractor_s3m_la_LDFLAGS = \ | ||
56 | $(PLUGINFLAGS) | ||
57 | libextractor_s3m_la_LIBADD = \ | ||
58 | $(top_builddir)/src/main/libextractor.la \ | ||
59 | $(top_builddir)/src/common/libextractor_common.la | ||
60 | |||
52 | EXTRA_DIST = template_extractor.c | 61 | EXTRA_DIST = template_extractor.c |
diff --git a/src/plugins/ebml_extractor.c b/src/plugins/ebml_extractor.c index 9dfbbfa..4661804 100644 --- a/src/plugins/ebml_extractor.c +++ b/src/plugins/ebml_extractor.c | |||
@@ -28,42 +28,13 @@ | |||
28 | #include "extractor.h" | 28 | #include "extractor.h" |
29 | #include <stdint.h> | 29 | #include <stdint.h> |
30 | 30 | ||
31 | #include "le_architecture.h" | ||
32 | |||
31 | #ifndef DEBUG_EBML | 33 | #ifndef DEBUG_EBML |
32 | # define DEBUG_EBML 0 | 34 | # define DEBUG_EBML 0 |
33 | #endif | 35 | #endif |
34 | 36 | ||
35 | #if WINDOWS | 37 | #if WINDOWS |
36 | #include <sys/param.h> /* #define BYTE_ORDER */ | ||
37 | #endif | ||
38 | #ifndef __BYTE_ORDER | ||
39 | #ifdef _BYTE_ORDER | ||
40 | #define __BYTE_ORDER _BYTE_ORDER | ||
41 | #else | ||
42 | #ifdef BYTE_ORDER | ||
43 | #define __BYTE_ORDER BYTE_ORDER | ||
44 | #endif | ||
45 | #endif | ||
46 | #endif | ||
47 | #ifndef __BIG_ENDIAN | ||
48 | #ifdef _BIG_ENDIAN | ||
49 | #define __BIG_ENDIAN _BIG_ENDIAN | ||
50 | #else | ||
51 | #ifdef BIG_ENDIAN | ||
52 | #define __BIG_ENDIAN BIG_ENDIAN | ||
53 | #endif | ||
54 | #endif | ||
55 | #endif | ||
56 | #ifndef __LITTLE_ENDIAN | ||
57 | #ifdef _LITTLE_ENDIAN | ||
58 | #define __LITTLE_ENDIAN _LITTLE_ENDIAN | ||
59 | #else | ||
60 | #ifdef LITTLE_ENDIAN | ||
61 | #define __LITTLE_ENDIAN LITTLE_ENDIAN | ||
62 | #endif | ||
63 | #endif | ||
64 | #endif | ||
65 | |||
66 | #if WINDOWS | ||
67 | /* According to http://old.nabble.com/Porting-localtime_r-and-gmtime_r-td15282276.html | 38 | /* According to http://old.nabble.com/Porting-localtime_r-and-gmtime_r-td15282276.html |
68 | * msvcrt.dll does have thread-safe gmtime implementation, | 39 | * msvcrt.dll does have thread-safe gmtime implementation, |
69 | * even though the documentation says otherwise. | 40 | * even though the documentation says otherwise. |
diff --git a/src/plugins/mp3_extractor.c b/src/plugins/mp3_extractor.c index 3af0c37..68b0fce 100644 --- a/src/plugins/mp3_extractor.c +++ b/src/plugins/mp3_extractor.c | |||
@@ -38,36 +38,7 @@ | |||
38 | 38 | ||
39 | #include "extractor_plugins.h" | 39 | #include "extractor_plugins.h" |
40 | 40 | ||
41 | #if WINDOWS | 41 | #include "le_architecture.h" |
42 | #include <sys/param.h> /* #define BYTE_ORDER */ | ||
43 | #endif | ||
44 | #ifndef __BYTE_ORDER | ||
45 | #ifdef _BYTE_ORDER | ||
46 | #define __BYTE_ORDER _BYTE_ORDER | ||
47 | #else | ||
48 | #ifdef BYTE_ORDER | ||
49 | #define __BYTE_ORDER BYTE_ORDER | ||
50 | #endif | ||
51 | #endif | ||
52 | #endif | ||
53 | #ifndef __BIG_ENDIAN | ||
54 | #ifdef _BIG_ENDIAN | ||
55 | #define __BIG_ENDIAN _BIG_ENDIAN | ||
56 | #else | ||
57 | #ifdef BIG_ENDIAN | ||
58 | #define __BIG_ENDIAN BIG_ENDIAN | ||
59 | #endif | ||
60 | #endif | ||
61 | #endif | ||
62 | #ifndef __LITTLE_ENDIAN | ||
63 | #ifdef _LITTLE_ENDIAN | ||
64 | #define __LITTLE_ENDIAN _LITTLE_ENDIAN | ||
65 | #else | ||
66 | #ifdef LITTLE_ENDIAN | ||
67 | #define __LITTLE_ENDIAN LITTLE_ENDIAN | ||
68 | #endif | ||
69 | #endif | ||
70 | #endif | ||
71 | 42 | ||
72 | #define LARGEST_FRAME_SIZE 8065 | 43 | #define LARGEST_FRAME_SIZE 8065 |
73 | 44 | ||
diff --git a/src/plugins/s3m_extractor.c b/src/plugins/s3m_extractor.c index 7e8ae40..9146042 100644 --- a/src/plugins/s3m_extractor.c +++ b/src/plugins/s3m_extractor.c | |||
@@ -1,68 +1,94 @@ | |||
1 | /* | 1 | /* |
2 | * This file is part of libextractor. | 2 | This file is part of libextractor. |
3 | * (C) 2008 Toni Ruottu | 3 | (C) 2002, 2003, 2004, 2009 Vidyut Samanta and Christian Grothoff |
4 | * | 4 | |
5 | * libextractor is free software; you can redistribute it and/or modify | 5 | libextractor is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published | 6 | it under the terms of the GNU General Public License as published |
7 | * by the Free Software Foundation; either version 2, or (at your | 7 | by the Free Software Foundation; either version 2, or (at your |
8 | * option) any later version. | 8 | option) any later version. |
9 | * | 9 | |
10 | * libextractor is distributed in the hope that it will be useful, but | 10 | libextractor is distributed in the hope that it will be useful, but |
11 | * WITHOUT ANY WARRANTY; without even the implied warranty of | 11 | WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | * General Public License for more details. | 13 | General Public License for more details. |
14 | * | 14 | |
15 | * You should have received a copy of the GNU General Public License | 15 | You should have received a copy of the GNU General Public License |
16 | * along with libextractor; see the file COPYING. If not, write to the | 16 | along with libextractor; see the file COPYING. If not, write to the |
17 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | 17 | Free Software Foundation, Inc., 59 Temple Place - Suite 330, |
18 | * Boston, MA 02111-1307, USA. | 18 | Boston, MA 02111-1307, USA. |
19 | * | ||
20 | */ | 19 | */ |
21 | 20 | ||
22 | #include "platform.h" | 21 | #include "platform.h" |
23 | #include "extractor.h" | 22 | #include "extractor.h" |
24 | #include "convert.h" | ||
25 | 23 | ||
26 | #define HEADER_SIZE 0x70 | 24 | #include "extractor_plugins.h" |
25 | #include "le_architecture.h" | ||
26 | |||
27 | /* Based upon ST 3.20 spec at http://16-bits.org/s3m/ */ | ||
28 | /* Looks like the format was defined by the software implementation, | ||
29 | * and that implementation was for little-endian platform, which means | ||
30 | * that the format is little-endian. | ||
31 | */ | ||
27 | 32 | ||
28 | struct header | 33 | LE_NETWORK_STRUCT_BEGIN |
34 | struct S3MHeader | ||
29 | { | 35 | { |
30 | char title[28]; | 36 | char song_name[28]; |
31 | char something[16]; | 37 | uint8_t byte_1A; |
32 | char magicid[4]; | 38 | uint8_t file_type; /* 0x10 == ST3 module */ |
39 | uint8_t unknown1[2]; | ||
40 | uint16_t number_of_orders; /* should be even */ | ||
41 | uint16_t number_of_instruments; | ||
42 | uint16_t number_of_patterns; | ||
43 | uint16_t flags; | ||
44 | uint16_t created_with_version; | ||
45 | uint16_t file_format_info; | ||
46 | char SCRM[4]; | ||
47 | uint8_t global_volume; | ||
48 | uint8_t initial_speed; | ||
49 | uint8_t initial_tempo; | ||
50 | uint8_t master_volume; | ||
51 | uint8_t ultra_click_removal; | ||
52 | uint8_t default_channel_positions; | ||
53 | uint8_t unknown2[8]; | ||
54 | uint16_t special; | ||
55 | uint8_t channel_settings[32]; | ||
33 | }; | 56 | }; |
57 | LE_NETWORK_STRUCT_END | ||
34 | 58 | ||
35 | #define ADD(s,t) do { if (0 != proc (proc_cls, "s3m", t, EXTRACTOR_METAFORMAT_UTF8, "text/plain", s, strlen(s)+1)) return 1; } while (0) | 59 | #define ADD(s,t) if (0 != proc (proc_cls, "s3m", t, EXTRACTOR_METAFORMAT_UTF8, "text/plain", s, strlen(s) + 1)) return 1 |
36 | 60 | #define ADDL(s,t,l) if (0 != proc (proc_cls, "s3m", t, EXTRACTOR_METAFORMAT_UTF8, "text/plain", s, l)) return 1 | |
37 | 61 | ||
38 | /* "extract" keyword from a Scream Tracker 3 Module | 62 | int |
39 | * | 63 | EXTRACTOR_s3m_extract_method (struct EXTRACTOR_PluginList *plugin, |
40 | * "Scream Tracker 3.01 BETA File Formats And Mixing Info" | 64 | EXTRACTOR_MetaDataProcessor proc, void *proc_cls) |
41 | * was used, while this piece of software was originally | ||
42 | * written. | ||
43 | * | ||
44 | */ | ||
45 | int | ||
46 | EXTRACTOR_s3m_extract (const unsigned char *data, | ||
47 | size_t size, | ||
48 | EXTRACTOR_MetaDataProcessor proc, | ||
49 | void *proc_cls, | ||
50 | const char *options) | ||
51 | { | 65 | { |
52 | char title[29]; | 66 | int64_t offset; |
53 | const struct header *head; | 67 | unsigned char *data; |
54 | 68 | struct S3MHeader header; | |
55 | /* Check header size */ | 69 | char song_name_NT[29]; |
56 | 70 | ||
57 | if (size < HEADER_SIZE) | 71 | if (plugin == NULL) |
58 | return 0; | 72 | return 1; |
59 | head = (const struct header *) data; | 73 | if (sizeof (header) != pl_read (plugin, &data, sizeof (header))) |
60 | if (memcmp (head->magicid, "SCRM", 4)) | 74 | return 1; |
61 | return 0; | 75 | memcpy (&header, data, sizeof (header)); |
62 | ADD ("audio/x-s3m", EXTRACTOR_METATYPE_MIMETYPE); | 76 | if (header.byte_1A != 0x1A || memcmp (header.SCRM, "SCRM", 4) != 0) |
77 | return 1; | ||
78 | header.number_of_orders = LE_le16toh (header.number_of_orders); | ||
79 | header.number_of_instruments = LE_le16toh (header.number_of_instruments); | ||
80 | header.number_of_patterns = LE_le16toh (header.number_of_patterns); | ||
81 | header.flags = LE_le16toh (header.flags); | ||
82 | header.created_with_version = LE_le16toh (header.created_with_version); | ||
83 | header.file_format_info = LE_le16toh (header.file_format_info); | ||
84 | header.special = LE_le16toh (header.special); | ||
85 | memcpy (song_name_NT, header.song_name, 28); | ||
86 | song_name_NT[28] = '\0'; | ||
63 | 87 | ||
64 | memcpy (&title, head->title, 28); | 88 | ADD("audio/x-s3m", EXTRACTOR_METATYPE_MIMETYPE); |
65 | title[28] = '\0'; | 89 | ADD(song_name_NT, EXTRACTOR_METATYPE_TITLE); |
66 | ADD (title, EXTRACTOR_METATYPE_TITLE); | 90 | /* TODO: turn other header data into useful metadata (i.e. RESOURCE_TYPE). |
67 | return 0; | 91 | * Also, disabled instruments can be (and are) used to carry user-defined text. |
92 | */ | ||
93 | return 1; | ||
68 | } | 94 | } |
diff --git a/src/plugins/template_extractor.c b/src/plugins/template_extractor.c index b6f3371..4f5dc1e 100644 --- a/src/plugins/template_extractor.c +++ b/src/plugins/template_extractor.c | |||
@@ -22,112 +22,65 @@ | |||
22 | #include "extractor.h" | 22 | #include "extractor.h" |
23 | 23 | ||
24 | #include "extractor_plugins.h" | 24 | #include "extractor_plugins.h" |
25 | 25 | #include "le_architecture.h" | |
26 | struct template_state | ||
27 | { | ||
28 | int state; | ||
29 | |||
30 | /* more state fields here | ||
31 | * all variables that should survive more than one atomic read | ||
32 | * from the "file" are to be placed here. | ||
33 | */ | ||
34 | }; | ||
35 | |||
36 | enum TemplateState | ||
37 | { | ||
38 | TEMPLATE_INVALID = -1, | ||
39 | TEMPLATE_LOOKING_FOR_FOO = 0, | ||
40 | TEMPLATE_READING_FOO, | ||
41 | TEMPLATE_READING_BAR, | ||
42 | TEMPLATE_SEEKING_TO_ZOOL | ||
43 | }; | ||
44 | |||
45 | void | ||
46 | EXTRACTOR_template_init_state_method (struct EXTRACTOR_PluginList *plugin) | ||
47 | { | ||
48 | struct template_state *state; | ||
49 | state = plugin->state = malloc (sizeof (struct template_state)); | ||
50 | if (state == NULL) | ||
51 | return; | ||
52 | state->state = TEMPLATE_LOOKING_FOR_FOO; /* or whatever is the initial one */ | ||
53 | /* initialize other fields to their "uninitialized" values or defaults */ | ||
54 | } | ||
55 | |||
56 | void | ||
57 | EXTRACTOR_template_discard_state_method (struct EXTRACTOR_PluginList *plugin) | ||
58 | { | ||
59 | if (plugin->state != NULL) | ||
60 | { | ||
61 | /* free other state fields that are heap-allocated */ | ||
62 | free (plugin->state); | ||
63 | } | ||
64 | plugin->state = NULL; | ||
65 | } | ||
66 | 26 | ||
67 | int | 27 | int |
68 | EXTRACTOR_template_extract_method (struct EXTRACTOR_PluginList *plugin, | 28 | EXTRACTOR_template_extract_method (struct EXTRACTOR_PluginList *plugin, |
69 | EXTRACTOR_MetaDataProcessor proc, void *proc_cls) | 29 | EXTRACTOR_MetaDataProcessor proc, void *proc_cls) |
70 | { | 30 | { |
71 | int64_t file_position; | 31 | int64_t offset; |
72 | int64_t file_size; | ||
73 | size_t offset = 0; | ||
74 | size_t size; | ||
75 | unsigned char *data; | 32 | unsigned char *data; |
76 | unsigned char *ff; | ||
77 | struct mp3_state *state; | ||
78 | 33 | ||
79 | /* temporary variables are declared here */ | 34 | /* temporary variables are declared here */ |
80 | 35 | ||
81 | if (plugin == NULL || plugin->state == NULL) | 36 | if (plugin == NULL) |
82 | return 1; | 37 | return 1; |
83 | 38 | ||
84 | /* for easier access (and conforms better with the old plugins var names) */ | 39 | /* initialize state here */ |
85 | state = plugin->state; | ||
86 | file_position = plugin->position; | ||
87 | file_size = plugin->fsize; | ||
88 | size = plugin->map_size; | ||
89 | data = plugin->shm_ptr; | ||
90 | |||
91 | /* sanity checks */ | ||
92 | if (plugin->seek_request < 0) | ||
93 | return 1; | ||
94 | if (file_position - plugin->seek_request > 0) | ||
95 | { | ||
96 | plugin->seek_request = -1; | ||
97 | return 1; | ||
98 | } | ||
99 | if (plugin->seek_request - file_position < size) | ||
100 | offset = plugin->seek_request - file_position; | ||
101 | 40 | ||
102 | while (1) | 41 | /* Call pl_seek (plugin, POSITION, WHENCE) to seek (if you know where |
103 | { | 42 | * data starts. |
104 | switch (state->state) | 43 | */ |
105 | { | 44 | /* Call pl_read (plugin, &data, COUNT) to read COUNT bytes (will be stored |
106 | case TEMPLATE_INVALID: | 45 | * as data[0]..data[COUNT-1], no need to allocate data or free it; but it |
107 | plugin->seek_request = -1; | 46 | * "goes away" when you make another read call, so store interesting values |
108 | return 1; | 47 | * somewhere once you find them). |
109 | case TEMPLATE_LOOKING_FOR_FOO: | 48 | */ |
110 | /* Find FOO in data buffer. | 49 | /* If you need to search for a magic id that is not at the beginning of the |
111 | * If found, set offset to its position and set state to TEMPLATE_READING_FOO | 50 | * file, do pl_read() calls, reading sizable (1 megabyte or so) chunks, |
112 | * If not found, set seek_request to file_position + offset and return 1 | 51 | * then use memchr() on them to find first byte of the magic sequence, |
113 | * (but it's better to give up as early as possible, to avoid reading the whole | 52 | * then compare the rest of the sequence, if found. |
114 | * file byte-by-byte). | 53 | * Mind the fact that you need to iterate over COUNT - SEQUENCE_LENGTH chars, |
115 | */ | 54 | * and seek to POS + COUNT - SEQUENCE_LENGTH once you run out of bytes, |
116 | break; | 55 | * otherwise you'd have a chance to skip bytes at chunk boundaries. |
117 | case TEMPLATE_READING_FOO: | 56 | */ |
118 | /* See if offset + sizeof(foo) < size, otherwise set seek_request to offset and return 1; | 57 | /* Do try to make a reasonable assumption about the amount of data you're |
119 | * If file_position is 0, and size is still to small, give up. | 58 | * going to search through. Iterating over the whole file, byte-by-byte is |
120 | * Read FOO, maybe increase offset to reflect that (depends on the parser logic). | 59 | * NOT a good idea, if the search itself is slow. Try to make the search as |
121 | * Either process FOO right here, or jump to another state (see ebml plugin for an example of complex | 60 | * efficient as possible. |
122 | * state-jumps). | 61 | */ |
123 | * If FOO says you need to seek somewhere - set offset to seek_target - file_position and set the | 62 | /* Avoid making long seeks backwards (for performance reasons) |
124 | * next state (next state will check that offset < size; all states that do reading should do that, | 63 | */ |
125 | * and also check for EOF). | 64 | /* pl_get_pos (plugin) will return current offset from the beginning of |
126 | */ | 65 | * the file (i.e. index of the data[0] in the file, if you call pl_read |
127 | /* ... */ | 66 | * at that point). You might need it do calculate forward-searches, if |
128 | break; | 67 | * there are offsets stored within the file. |
129 | } | 68 | * pl_get_fsize (plugin) will return file size OR -1 if it is not known |
130 | } | 69 | * yet (file is not decompressed completely). Don't rely on fsize. |
131 | /* Should not reach this */ | 70 | */ |
71 | /* Seeking forward is safe | ||
72 | */ | ||
73 | /* If you asked to read X bytes, but got less - it's EOF | ||
74 | */ | ||
75 | /* Seeking backward a bit shouldn't hurt performance (i.e. read 4 bytes, | ||
76 | * then immediately seek 4 bytes back). | ||
77 | */ | ||
78 | /* Don't read too much (you can't read more than MAX_READ from extractor.c, | ||
79 | * which is 32MB at the moment) in one call. | ||
80 | */ | ||
81 | /* Once you find something, call proc(). If it returns non-0 - you're done. | ||
82 | */ | ||
83 | /* Return 1 to indicate that you're done. */ | ||
84 | /* Don't forget to free anything you've allocated before returning! */ | ||
132 | return 1; | 85 | return 1; |
133 | } | 86 | } |