diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/include/extractor.h | 13 | ||||
-rw-r--r-- | src/main/extractor.c | 3071 | ||||
-rw-r--r-- | src/main/extractor_plugins.c | 24 | ||||
-rw-r--r-- | src/main/extractor_plugins.h | 62 | ||||
-rw-r--r-- | src/plugins/Makefile.am | 385 | ||||
-rw-r--r-- | src/plugins/id3_extractor.c | 149 | ||||
-rw-r--r-- | src/plugins/id3v23_extractor.c | 420 | ||||
-rw-r--r-- | src/plugins/id3v24_extractor.c | 455 | ||||
-rw-r--r-- | src/plugins/id3v2_extractor.c | 957 | ||||
-rw-r--r-- | src/plugins/mp3_extractor.c | 425 | ||||
-rw-r--r-- | src/plugins/template_extractor.c | 122 |
11 files changed, 3156 insertions, 2927 deletions
diff --git a/src/include/extractor.h b/src/include/extractor.h index 522463b..c2fec5b 100644 --- a/src/include/extractor.h +++ b/src/include/extractor.h | |||
@@ -392,12 +392,6 @@ typedef int (*EXTRACTOR_MetaDataProcessor)(void *cls, | |||
392 | * @param options options for this plugin; can be NULL | 392 | * @param options options for this plugin; can be NULL |
393 | * @return 0 if all calls to proc returned 0, otherwise 1 | 393 | * @return 0 if all calls to proc returned 0, otherwise 1 |
394 | */ | 394 | */ |
395 | typedef int (*EXTRACTOR_ExtractMethod)(const char *data, | ||
396 | size_t datasize, | ||
397 | EXTRACTOR_MetaDataProcessor proc, | ||
398 | void *proc_cls, | ||
399 | const char *options); | ||
400 | |||
401 | 395 | ||
402 | /** | 396 | /** |
403 | * Linked list of extractor plugins. An application builds this list | 397 | * Linked list of extractor plugins. An application builds this list |
@@ -407,6 +401,13 @@ typedef int (*EXTRACTOR_ExtractMethod)(const char *data, | |||
407 | */ | 401 | */ |
408 | struct EXTRACTOR_PluginList; | 402 | struct EXTRACTOR_PluginList; |
409 | 403 | ||
404 | typedef int (*EXTRACTOR_extract_method) (struct EXTRACTOR_PluginList *plugin, | ||
405 | EXTRACTOR_MetaDataProcessor proc, void *proc_cls); | ||
406 | |||
407 | typedef void (*EXTRACTOR_discard_state_method) (struct EXTRACTOR_PluginList *plugin); | ||
408 | typedef void (*EXTRACTOR_init_state_method) (struct EXTRACTOR_PluginList *plugin); | ||
409 | |||
410 | |||
410 | 411 | ||
411 | /** | 412 | /** |
412 | * Load the default set of plugins. The default can be changed | 413 | * Load the default set of plugins. The default can be changed |
diff --git a/src/main/extractor.c b/src/main/extractor.c index 17ba1d2..a4ccfa1 100644 --- a/src/main/extractor.c +++ b/src/main/extractor.c | |||
@@ -23,7 +23,7 @@ | |||
23 | #include "extractor.h" | 23 | #include "extractor.h" |
24 | #include <dirent.h> | 24 | #include <dirent.h> |
25 | #include <sys/types.h> | 25 | #include <sys/types.h> |
26 | #ifndef WINDOWS | 26 | #if !WINDOWS |
27 | #include <sys/wait.h> | 27 | #include <sys/wait.h> |
28 | #include <sys/shm.h> | 28 | #include <sys/shm.h> |
29 | #endif | 29 | #endif |
@@ -59,117 +59,53 @@ | |||
59 | */ | 59 | */ |
60 | #define MAX_MIME_LEN 256 | 60 | #define MAX_MIME_LEN 256 |
61 | 61 | ||
62 | #define MAX_SHM_NAME 255 | ||
63 | |||
62 | /** | 64 | /** |
63 | * Set to 1 to get failure info, | 65 | * Set to 1 to get failure info, |
64 | * 2 for actual debug info. | 66 | * 2 for actual debug info. |
65 | */ | 67 | */ |
66 | #define DEBUG 1 | 68 | #define DEBUG 1 |
67 | 69 | ||
70 | #define MESSAGE_INIT_STATE 0x01 | ||
71 | #define MESSAGE_UPDATED_SHM 0x02 | ||
72 | #define MESSAGE_DONE 0x03 | ||
73 | #define MESSAGE_SEEK 0x04 | ||
74 | #define MESSAGE_META 0x05 | ||
75 | #define MESSAGE_DISCARD_STATE 0x06 | ||
68 | 76 | ||
69 | /** | 77 | /** |
70 | * Stop the child process of this plugin. | 78 | * Header used for our IPC replies. A header |
79 | * with all fields being zero is used to indicate | ||
80 | * the end of the stream. | ||
71 | */ | 81 | */ |
72 | static void | 82 | struct IpcHeader |
73 | stop_process (struct EXTRACTOR_PluginList *plugin) | ||
74 | { | 83 | { |
75 | int status; | 84 | enum EXTRACTOR_MetaType meta_type; |
76 | #ifdef WINDOWS | 85 | enum EXTRACTOR_MetaFormat meta_format; |
77 | HANDLE process; | 86 | size_t data_len; |
78 | #endif | 87 | size_t mime_len; |
79 | 88 | }; | |
80 | #if DEBUG | ||
81 | #ifndef WINDOWS | ||
82 | if (plugin->cpid == -1) | ||
83 | #else | ||
84 | if (plugin->hProcess == INVALID_HANDLE_VALUE) | ||
85 | #endif | ||
86 | fprintf (stderr, | ||
87 | "Plugin `%s' choked on this input\n", | ||
88 | plugin->short_libname); | ||
89 | #endif | ||
90 | #ifndef WINDOWS | ||
91 | if ( (plugin->cpid == -1) || | ||
92 | (plugin->cpid == 0) ) | ||
93 | return; | ||
94 | kill (plugin->cpid, SIGKILL); | ||
95 | waitpid (plugin->cpid, &status, 0); | ||
96 | plugin->cpid = -1; | ||
97 | close (plugin->cpipe_out); | ||
98 | fclose (plugin->cpipe_in); | ||
99 | #else | ||
100 | if (plugin->hProcess == INVALID_HANDLE_VALUE || | ||
101 | plugin->hProcess == NULL) | ||
102 | return; | ||
103 | TerminateProcess (plugin->hProcess, 0); | ||
104 | CloseHandle (plugin->hProcess); | ||
105 | plugin->hProcess = INVALID_HANDLE_VALUE; | ||
106 | close (plugin->cpipe_out); | ||
107 | fclose (plugin->cpipe_in); | ||
108 | #endif | ||
109 | plugin->cpipe_out = -1; | ||
110 | plugin->cpipe_in = NULL; | ||
111 | } | ||
112 | |||
113 | 89 | ||
114 | /** | 90 | #if !WINDOWS |
115 | * Remove a plugin from a list. | 91 | int |
116 | * | 92 | plugin_open_shm (struct EXTRACTOR_PluginList *plugin, char *shm_name) |
117 | * @param prev the current list of plugins | ||
118 | * @param library the name of the plugin to remove | ||
119 | * @return the reduced list, unchanged if the plugin was not loaded | ||
120 | */ | ||
121 | struct EXTRACTOR_PluginList * | ||
122 | EXTRACTOR_plugin_remove(struct EXTRACTOR_PluginList * prev, | ||
123 | const char * library) | ||
124 | { | 93 | { |
125 | struct EXTRACTOR_PluginList *pos; | 94 | if (plugin->shm_id != -1) |
126 | struct EXTRACTOR_PluginList *first; | 95 | close (plugin->shm_id); |
127 | 96 | plugin->shm_id = shm_open (shm_name, O_RDONLY, 0); | |
128 | pos = prev; | 97 | return plugin->shm_id; |
129 | first = prev; | ||
130 | while ((pos != NULL) && (0 != strcmp (pos->short_libname, library))) | ||
131 | { | ||
132 | prev = pos; | ||
133 | pos = pos->next; | ||
134 | } | ||
135 | if (pos != NULL) | ||
136 | { | ||
137 | /* found, close library */ | ||
138 | if (first == pos) | ||
139 | first = pos->next; | ||
140 | else | ||
141 | prev->next = pos->next; | ||
142 | /* found */ | ||
143 | stop_process (pos); | ||
144 | free (pos->short_libname); | ||
145 | free (pos->libname); | ||
146 | free (pos->plugin_options); | ||
147 | if (NULL != pos->libraryHandle) | ||
148 | lt_dlclose (pos->libraryHandle); | ||
149 | free (pos); | ||
150 | } | ||
151 | #if DEBUG | ||
152 | else | ||
153 | fprintf(stderr, | ||
154 | "Unloading plugin `%s' failed!\n", | ||
155 | library); | ||
156 | #endif | ||
157 | return first; | ||
158 | } | 98 | } |
159 | 99 | #else | |
160 | 100 | HANDLE | |
161 | /** | 101 | plugin_open_shm (struct EXTRACTOR_PluginList *plugin, char *shm_name) |
162 | * Remove all plugins from the given list (destroys the list). | ||
163 | * | ||
164 | * @param plugin the list of plugins | ||
165 | */ | ||
166 | void | ||
167 | EXTRACTOR_plugin_remove_all(struct EXTRACTOR_PluginList *plugins) | ||
168 | { | 102 | { |
169 | while (plugins != NULL) | 103 | if (plugin->map_handle != 0) |
170 | plugins = EXTRACTOR_plugin_remove (plugins, plugins->short_libname); | 104 | CloseHandle (plugin->map_handle); |
105 | plugin->map_handle = OpenFileMapping (FILE_MAP_READ, FALSE, shm_name); | ||
106 | return plugin->map_handle; | ||
171 | } | 107 | } |
172 | 108 | #endif | |
173 | 109 | ||
174 | static int | 110 | static int |
175 | write_all (int fd, | 111 | write_all (int fd, |
@@ -187,44 +123,9 @@ write_all (int fd, | |||
187 | return -1; | 123 | return -1; |
188 | off += ret; | 124 | off += ret; |
189 | } | 125 | } |
190 | return 0; | 126 | return size; |
191 | } | ||
192 | |||
193 | |||
194 | static int | ||
195 | read_all (int fd, | ||
196 | void *buf, | ||
197 | size_t size) | ||
198 | { | ||
199 | char *data = buf; | ||
200 | size_t off = 0; | ||
201 | ssize_t ret; | ||
202 | |||
203 | while (off < size) | ||
204 | { | ||
205 | ret = read (fd, &data[off], size - off); | ||
206 | if (ret <= 0) | ||
207 | return -1; | ||
208 | off += ret; | ||
209 | } | ||
210 | return 0; | ||
211 | } | 127 | } |
212 | 128 | ||
213 | |||
214 | /** | ||
215 | * Header used for our IPC replies. A header | ||
216 | * with all fields being zero is used to indicate | ||
217 | * the end of the stream. | ||
218 | */ | ||
219 | struct IpcHeader | ||
220 | { | ||
221 | enum EXTRACTOR_MetaType type; | ||
222 | enum EXTRACTOR_MetaFormat format; | ||
223 | size_t data_len; | ||
224 | size_t mime_len; | ||
225 | }; | ||
226 | |||
227 | |||
228 | /** | 129 | /** |
229 | * Function called by a plugin in a child process. Transmits | 130 | * Function called by a plugin in a child process. Transmits |
230 | * the meta data back to the parent process. | 131 | * the meta data back to the parent process. |
@@ -254,6 +155,8 @@ transmit_reply (void *cls, | |||
254 | int *cpipe_out = cls; | 155 | int *cpipe_out = cls; |
255 | struct IpcHeader hdr; | 156 | struct IpcHeader hdr; |
256 | size_t mime_len; | 157 | size_t mime_len; |
158 | unsigned char meta_byte = MESSAGE_META; | ||
159 | unsigned char zero_byte = 0; | ||
257 | 160 | ||
258 | if (data_mime_type == NULL) | 161 | if (data_mime_type == NULL) |
259 | mime_len = 0; | 162 | mime_len = 0; |
@@ -261,23 +164,19 @@ transmit_reply (void *cls, | |||
261 | mime_len = strlen (data_mime_type) + 1; | 164 | mime_len = strlen (data_mime_type) + 1; |
262 | if (mime_len > MAX_MIME_LEN) | 165 | if (mime_len > MAX_MIME_LEN) |
263 | mime_len = MAX_MIME_LEN; | 166 | mime_len = MAX_MIME_LEN; |
264 | hdr.type = type; | 167 | hdr.meta_type = type; |
265 | hdr.format = format; | 168 | hdr.meta_format = format; |
266 | hdr.data_len = data_len; | 169 | hdr.data_len = data_len; |
267 | hdr.mime_len = mime_len; | 170 | hdr.mime_len = mime_len; |
268 | if ( (hdr.type == 0) && | 171 | if ((1 != write_all (*cpipe_out, &meta_byte, 1)) || |
269 | (hdr.format == 0) && | 172 | (sizeof(hdr) != write_all (*cpipe_out, &hdr, sizeof(hdr))) || |
270 | (hdr.data_len == 0) && | 173 | (mime_len -1 != write_all (*cpipe_out, data_mime_type, mime_len - 1)) || |
271 | (hdr.mime_len == 0) ) | 174 | (1 != write_all (*cpipe_out, &zero_byte, 1)) || |
272 | return 0; /* better skip this one, would signal termination... */ | 175 | (data_len != write_all (*cpipe_out, data, data_len))) |
273 | if ( (0 != write_all (*cpipe_out, &hdr, sizeof(hdr))) || | 176 | return 1; |
274 | (0 != write_all (*cpipe_out, data_mime_type, mime_len)) || | ||
275 | (0 != write_all (*cpipe_out, data, data_len)) ) | ||
276 | return 1; | ||
277 | return 0; | 177 | return 0; |
278 | } | 178 | } |
279 | 179 | ||
280 | |||
281 | /** | 180 | /** |
282 | * 'main' function of the child process. Reads shm-filenames from | 181 | * 'main' function of the child process. Reads shm-filenames from |
283 | * 'in' (line-by-line) and writes meta data blocks to 'out'. The meta | 182 | * 'in' (line-by-line) and writes meta data blocks to 'out'. The meta |
@@ -288,23 +187,20 @@ transmit_reply (void *cls, | |||
288 | * @param out stream to write to | 187 | * @param out stream to write to |
289 | */ | 188 | */ |
290 | static void | 189 | static void |
291 | process_requests (struct EXTRACTOR_PluginList *plugin, | 190 | process_requests (struct EXTRACTOR_PluginList *plugin, int in, int out) |
292 | int in, | ||
293 | int out) | ||
294 | { | 191 | { |
295 | char hfn[256]; | 192 | int read_result1, read_result2, read_result3; |
296 | char tfn[256]; | 193 | unsigned char code; |
297 | char sze[256]; | 194 | int64_t fsize = -1; |
298 | size_t hfn_len; | 195 | int64_t position = 0; |
299 | size_t tfn_len; | 196 | void *shm_ptr = NULL; |
300 | size_t sze_len; | 197 | size_t shm_size = 0; |
301 | char *fn; | 198 | char *shm_name = NULL; |
302 | FILE *fin; | 199 | size_t shm_name_len; |
303 | void *ptr; | 200 | |
304 | int shmid; | 201 | int extract_reply; |
202 | |||
305 | struct IpcHeader hdr; | 203 | struct IpcHeader hdr; |
306 | size_t size; | ||
307 | int want_tail; | ||
308 | int do_break; | 204 | int do_break; |
309 | #ifdef WINDOWS | 205 | #ifdef WINDOWS |
310 | HANDLE map; | 206 | HANDLE map; |
@@ -312,165 +208,554 @@ process_requests (struct EXTRACTOR_PluginList *plugin, | |||
312 | #endif | 208 | #endif |
313 | 209 | ||
314 | if (plugin == NULL) | 210 | if (plugin == NULL) |
315 | { | 211 | { |
316 | close (in); | 212 | close (in); |
317 | close (out); | 213 | close (out); |
318 | return; | 214 | return; |
319 | } | 215 | } |
320 | if (0 != plugin_load (plugin)) | 216 | if (0 != plugin_load (plugin)) |
321 | { | 217 | { |
322 | close (in); | 218 | close (in); |
323 | close (out); | 219 | close (out); |
324 | #if DEBUG | 220 | #if DEBUG |
325 | fprintf (stderr, | 221 | fprintf (stderr, "Plugin `%s' failed to load!\n", plugin->short_libname); |
326 | "Plugin `%s' failed to load!\n", | ||
327 | plugin->short_libname); | ||
328 | #endif | 222 | #endif |
329 | return; | 223 | return; |
330 | } | 224 | } |
331 | want_tail = 0; | 225 | if ((plugin->specials != NULL) && |
332 | if ( (plugin->specials != NULL) && | 226 | (NULL != strstr (plugin->specials, "close-stderr"))) |
333 | (NULL != strstr (plugin->specials, | 227 | close (2); |
334 | "want-tail")) ) | 228 | if ((plugin->specials != NULL) && |
335 | { | 229 | (NULL != strstr (plugin->specials, "close-stdout"))) |
336 | want_tail = 1; | 230 | close (1); |
337 | } | ||
338 | if ( (plugin->specials != NULL) && | ||
339 | (NULL != strstr (plugin->specials, | ||
340 | "close-stderr")) ) | ||
341 | { | ||
342 | close (2); | ||
343 | } | ||
344 | if ( (plugin->specials != NULL) && | ||
345 | (NULL != strstr (plugin->specials, | ||
346 | "close-stdout")) ) | ||
347 | { | ||
348 | close (1); | ||
349 | } | ||
350 | 231 | ||
351 | memset (&hdr, 0, sizeof (hdr)); | 232 | memset (&hdr, 0, sizeof (hdr)); |
352 | fin = fdopen (in, "r"); | 233 | do_break = 0; |
353 | if (fin == NULL) | 234 | while (!do_break) |
354 | { | 235 | { |
355 | close (in); | 236 | read_result1 = read (in, &code, 1); |
356 | close (out); | 237 | if (read_result1 <= 0) |
357 | return; | 238 | break; |
358 | } | 239 | switch (code) |
359 | while (NULL != fgets (hfn, sizeof(hfn), fin)) | ||
360 | { | 240 | { |
361 | hfn_len = strlen (hfn); | 241 | case MESSAGE_INIT_STATE: |
362 | if (hfn_len <= 1) | 242 | read_result2 = read (in, &fsize, sizeof (int64_t)); |
363 | break; | 243 | read_result3 = read (in, &shm_name_len, sizeof (size_t)); |
364 | ptr = NULL; | 244 | if ((read_result2 < sizeof (int64_t)) || (read_result3 < sizeof (size_t)) || |
365 | hfn[--hfn_len] = '\0'; /* kill newline */ | 245 | shm_name_len > MAX_SHM_NAME || fsize <= 0) |
366 | if (NULL == fgets (tfn, sizeof(tfn), fin)) | 246 | { |
367 | break; | 247 | do_break = 1; |
368 | if ('!' != tfn[0]) | 248 | break; |
369 | break; | 249 | } |
370 | tfn_len = strlen (tfn); | 250 | if (shm_name != NULL) |
371 | tfn[--tfn_len] = '\0'; /* kill newline */ | 251 | free (shm_name); |
372 | if ( (want_tail) && | 252 | shm_name = malloc (shm_name_len); |
373 | (tfn_len > 1) ) | 253 | if (shm_name == NULL) |
374 | { | 254 | { |
375 | fn = &tfn[1]; | 255 | do_break = 1; |
376 | } | ||
377 | else | ||
378 | { | ||
379 | fn = hfn; | ||
380 | } | ||
381 | if (NULL == fgets (sze, sizeof(sze), fin)) | ||
382 | break; | ||
383 | if ('s' != sze[0]) | ||
384 | break; | ||
385 | sze_len = strlen (sze); | ||
386 | sze[--sze_len] = '\0'; /* kill newline */ | ||
387 | size = strtol (&sze[1], NULL, 10); | ||
388 | if (size == LONG_MIN || size == LONG_MAX || size == 0) | ||
389 | break; | 256 | break; |
390 | do_break = 0; | 257 | } |
391 | #ifndef WINDOWS | 258 | read_result2 = read (in, shm_name, shm_name_len); |
392 | if ( (-1 != (shmid = shm_open (fn, O_RDONLY, 0))) && | 259 | if (read_result2 < shm_name_len) |
393 | (SIZE_MAX != (size = lseek (shmid, 0, SEEK_END))) && | 260 | { |
394 | (NULL != (ptr = mmap (NULL, size, PROT_READ, MAP_SHARED, shmid, 0))) && | 261 | do_break = 1; |
395 | (ptr != (void*) -1) ) | 262 | break; |
263 | } | ||
264 | shm_name[shm_name_len - 1] = '\0'; | ||
265 | #if !WINDOWS | ||
266 | if (shm_ptr != NULL) | ||
267 | munmap (shm_ptr, shm_size); | ||
268 | if (-1 == plugin_open_shm (plugin, shm_name)) | ||
269 | { | ||
270 | do_break = 1; | ||
271 | break; | ||
272 | } | ||
396 | #else | 273 | #else |
397 | /* Despite the obvious, this must be READWRITE, not READONLY */ | 274 | if (shm_ptr != NULL) |
398 | map = OpenFileMapping (PAGE_READWRITE, FALSE, fn); | 275 | UnmapViewOfFile (shm_ptr); |
399 | ptr = MapViewOfFile (map, FILE_MAP_READ, 0, 0, 0); | 276 | if (INVALID_HANDLE_VALUE == plugin_open_shm (plugin, shm_name)) |
400 | if (ptr != NULL) | ||
401 | { | 277 | { |
402 | if (0 == VirtualQuery (ptr, &mi, sizeof (mi)) || mi.RegionSize < size) | 278 | do_break = 1; |
403 | { | 279 | break; |
404 | UnmapViewOfFile (ptr); | 280 | } |
405 | ptr = NULL; | 281 | #endif |
406 | } | 282 | plugin->fsize = fsize; |
283 | plugin->init_state_method (plugin); | ||
284 | break; | ||
285 | case MESSAGE_DISCARD_STATE: | ||
286 | plugin->discard_state_method (plugin); | ||
287 | #if !WINDOWS | ||
288 | if (shm_ptr != NULL && shm_size > 0) | ||
289 | munmap (shm_ptr, shm_size); | ||
290 | if (plugin->shm_id != -1) | ||
291 | close (plugin->shm_id); | ||
292 | plugin->shm_id = -1; | ||
293 | shm_size = 0; | ||
294 | #else | ||
295 | if (shm_ptr != NULL) | ||
296 | UnmapViewOfFile (shm_ptr); | ||
297 | if (plugin->map_handle != 0) | ||
298 | CloseHandle (plugin->map_handle); | ||
299 | plugin->map_handle = 0; | ||
300 | #endif | ||
301 | shm_ptr = NULL; | ||
302 | break; | ||
303 | case MESSAGE_UPDATED_SHM: | ||
304 | read_result2 = read (in, &position, sizeof (int64_t)); | ||
305 | read_result3 = read (in, &shm_size, sizeof (size_t)); | ||
306 | if ((read_result2 < sizeof (int64_t)) || (read_result3 < sizeof (size_t)) || | ||
307 | position < 0 || fsize <= 0 || position >= fsize) | ||
308 | { | ||
309 | do_break = 1; | ||
310 | break; | ||
311 | } | ||
312 | /* FIXME: also check mapped region size (lseek for *nix, VirtualQuery for W32) */ | ||
313 | #if !WINDOWS | ||
314 | if ((-1 == plugin->shm_id) || | ||
315 | (NULL == (shm_ptr = mmap (NULL, shm_size, PROT_READ, MAP_SHARED, plugin->shm_id, 0))) || | ||
316 | (shm_ptr == (void *) -1)) | ||
317 | { | ||
318 | do_break = 1; | ||
319 | break; | ||
320 | } | ||
321 | #else | ||
322 | if ((plugin->map_handle == 0) || | ||
323 | (NULL == (shm_ptr = MapViewOfFile (plugin->map_handle, FILE_MAP_READ, 0, 0, 0)))) | ||
324 | { | ||
325 | do_break = 1; | ||
326 | break; | ||
407 | } | 327 | } |
408 | if (ptr != NULL) | ||
409 | #endif | 328 | #endif |
410 | { | 329 | plugin->position = position; |
411 | if ( ( (plugin->extractMethod != NULL) && | 330 | plugin->shm_ptr = shm_ptr; |
412 | (0 != plugin->extractMethod (ptr, | 331 | plugin->map_size = shm_size; |
413 | size, | 332 | /* Now, ideally a plugin would do reads and seeks on a virtual "plugin" object |
414 | &transmit_reply, | 333 | * completely transparently, and the underlying code would return bytes from |
415 | &out, | 334 | * the memory map, or would block and wait for a seek to happen. |
416 | plugin->plugin_options)) ) || | 335 | * That, however, requires somewhat different architecture, and even more wrapping |
417 | (0 != write_all (out, &hdr, sizeof(hdr))) ) | 336 | * and hand-helding. It's easier to make plugins aware of the fact that they work |
418 | do_break = 1; | 337 | * with discrete in-memory buffers with expensive seeking, not continuous files. |
419 | } | 338 | */ |
420 | #ifndef WINDOWS | 339 | extract_reply = plugin->extract_method (plugin, transmit_reply, &out); |
421 | if ( (ptr != NULL) && | 340 | #if !WINDOWS |
422 | (ptr != (void*) -1) ) | 341 | if ((shm_ptr != NULL) && |
423 | munmap (ptr, size); | 342 | (shm_ptr != (void*) -1) ) |
424 | if (-1 != shmid) | 343 | munmap (shm_ptr, shm_size); |
425 | close (shmid); | ||
426 | #else | 344 | #else |
427 | if (ptr != NULL && ptr != (void*) -1) | 345 | if (shm_ptr != NULL) |
428 | UnmapViewOfFile (ptr); | 346 | UnmapViewOfFile (shm_ptr); |
429 | if (map != NULL) | ||
430 | CloseHandle (map); | ||
431 | #endif | 347 | #endif |
432 | if (do_break) | 348 | if (extract_reply == 1) |
433 | break; | 349 | { |
434 | if ( (plugin->specials != NULL) && | 350 | unsigned char done_byte = MESSAGE_DONE; |
435 | (NULL != strstr (plugin->specials, | 351 | if (write (out, &done_byte, 1) != 1) |
436 | "force-kill")) ) | 352 | { |
437 | { | 353 | do_break = 1; |
438 | /* we're required to die after each file since this | 354 | break; |
439 | plugin only supports a single file at a time */ | 355 | } |
440 | _exit (0); | 356 | if ((plugin->specials != NULL) && |
441 | } | 357 | (NULL != strstr (plugin->specials, "force-kill"))) |
358 | { | ||
359 | /* we're required to die after each file since this | ||
360 | plugin only supports a single file at a time */ | ||
361 | #if !WINDOWS | ||
362 | fsync (out); | ||
363 | #else | ||
364 | _commit (out); | ||
365 | #endif | ||
366 | _exit (0); | ||
367 | } | ||
368 | } | ||
369 | else | ||
370 | { | ||
371 | unsigned char seek_byte = MESSAGE_SEEK; | ||
372 | if (write (out, &seek_byte, 1) != 1) | ||
373 | { | ||
374 | do_break = 1; | ||
375 | break; | ||
376 | } | ||
377 | if (write (out, &plugin->seek_request, sizeof (int64_t)) != sizeof (int64_t)) | ||
378 | { | ||
379 | do_break = 1; | ||
380 | break; | ||
381 | } | ||
382 | } | ||
383 | break; | ||
442 | } | 384 | } |
443 | fclose (fin); | 385 | } |
386 | close (in); | ||
444 | close (out); | 387 | close (out); |
445 | } | 388 | } |
446 | 389 | ||
390 | #if !WINDOWS | ||
447 | 391 | ||
448 | #ifdef WINDOWS | 392 | /** |
393 | * Start the process for the given plugin. | ||
394 | */ | ||
449 | static void | 395 | static void |
450 | write_plugin_data (int fd, const struct EXTRACTOR_PluginList *plugin) | 396 | start_process (struct EXTRACTOR_PluginList *plugin) |
451 | { | 397 | { |
452 | size_t i; | 398 | int p1[2]; |
453 | DWORD len; | 399 | int p2[2]; |
454 | char *str; | 400 | pid_t pid; |
401 | int status; | ||
455 | 402 | ||
456 | i = strlen (plugin->libname) + 1; | 403 | switch (plugin->flags) |
457 | write (fd, &i, sizeof (size_t)); | 404 | { |
458 | write (fd, plugin->libname, i); | 405 | case EXTRACTOR_OPTION_DEFAULT_POLICY: |
459 | i = strlen (plugin->short_libname) + 1; | 406 | if (-1 != plugin->cpid && 0 != plugin->cpid) |
460 | write (fd, &i, sizeof (size_t)); | 407 | return; |
461 | write (fd, plugin->short_libname, i); | 408 | break; |
462 | if (plugin->plugin_options != NULL) | 409 | case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART: |
410 | if (0 != plugin->cpid) | ||
411 | return; | ||
412 | break; | ||
413 | case EXTRACTOR_OPTION_IN_PROCESS: | ||
414 | return; | ||
415 | break; | ||
416 | case EXTRACTOR_OPTION_DISABLED: | ||
417 | return; | ||
418 | break; | ||
419 | } | ||
420 | |||
421 | plugin->cpid = -1; | ||
422 | if (0 != pipe (p1)) | ||
423 | { | ||
424 | plugin->flags = EXTRACTOR_OPTION_DISABLED; | ||
425 | return; | ||
426 | } | ||
427 | if (0 != pipe (p2)) | ||
428 | { | ||
429 | close (p1[0]); | ||
430 | close (p1[1]); | ||
431 | plugin->flags = EXTRACTOR_OPTION_DISABLED; | ||
432 | return; | ||
433 | } | ||
434 | pid = fork (); | ||
435 | plugin->cpid = pid; | ||
436 | if (pid == -1) | ||
437 | { | ||
438 | close (p1[0]); | ||
439 | close (p1[1]); | ||
440 | close (p2[0]); | ||
441 | close (p2[1]); | ||
442 | plugin->flags = EXTRACTOR_OPTION_DISABLED; | ||
443 | return; | ||
444 | } | ||
445 | if (pid == 0) | ||
446 | { | ||
447 | close (p1[1]); | ||
448 | close (p2[0]); | ||
449 | process_requests (plugin, p1[0], p2[1]); | ||
450 | _exit (0); | ||
451 | } | ||
452 | close (p1[0]); | ||
453 | close (p2[1]); | ||
454 | plugin->cpipe_in = fdopen (p1[1], "w"); | ||
455 | if (plugin->cpipe_in == NULL) | ||
456 | { | ||
457 | perror ("fdopen"); | ||
458 | (void) kill (plugin->cpid, SIGKILL); | ||
459 | waitpid (plugin->cpid, &status, 0); | ||
460 | close (p1[1]); | ||
461 | close (p2[0]); | ||
462 | plugin->cpid = -1; | ||
463 | plugin->flags = EXTRACTOR_OPTION_DISABLED; | ||
464 | return; | ||
465 | } | ||
466 | plugin->cpipe_out = p2[0]; | ||
467 | } | ||
468 | |||
469 | /** | ||
470 | * Stop the child process of this plugin. | ||
471 | */ | ||
472 | static void | ||
473 | stop_process (struct EXTRACTOR_PluginList *plugin) | ||
474 | { | ||
475 | int status; | ||
476 | |||
477 | #if DEBUG | ||
478 | if (plugin->cpid == -1) | ||
479 | fprintf (stderr, | ||
480 | "Plugin `%s' choked on this input\n", | ||
481 | plugin->short_libname); | ||
482 | #endif | ||
483 | if ( (plugin->cpid == -1) || | ||
484 | (plugin->cpid == 0) ) | ||
485 | return; | ||
486 | kill (plugin->cpid, SIGKILL); | ||
487 | waitpid (plugin->cpid, &status, 0); | ||
488 | plugin->cpid = -1; | ||
489 | close (plugin->cpipe_out); | ||
490 | fclose (plugin->cpipe_in); | ||
491 | plugin->cpipe_out = -1; | ||
492 | plugin->cpipe_in = NULL; | ||
493 | |||
494 | if (plugin->flags != EXTRACTOR_OPTION_DEFAULT_POLICY) | ||
495 | plugin->flags = EXTRACTOR_OPTION_DISABLED; | ||
496 | |||
497 | plugin->seek_request = -1; | ||
498 | } | ||
499 | |||
500 | static int | ||
501 | write_plugin_data (const struct EXTRACTOR_PluginList *plugin) | ||
502 | { | ||
503 | /* only does anything on Windows */ | ||
504 | return 0; | ||
505 | } | ||
506 | |||
507 | #define plugin_print(plug, fmt, ...) fprintf (plug->cpipe_in, fmt, ...) | ||
508 | #define plugin_write(plug, buf, size) write_all (fileno (plug->cpipe_in), buf, size) | ||
509 | |||
510 | #else /* WINDOWS */ | ||
511 | |||
512 | #ifndef PIPE_BUF | ||
513 | #define PIPE_BUF 512 | ||
514 | #endif | ||
515 | |||
516 | /* Copyright Bob Byrnes <byrnes <at> curl.com> | ||
517 | http://permalink.gmane.org/gmane.os.cygwin.patches/2121 | ||
518 | */ | ||
519 | /* Create a pipe, and return handles to the read and write ends, | ||
520 | just like CreatePipe, but ensure that the write end permits | ||
521 | FILE_READ_ATTRIBUTES access, on later versions of win32 where | ||
522 | this is supported. This access is needed by NtQueryInformationFile, | ||
523 | which is used to implement select and nonblocking writes. | ||
524 | Note that the return value is either NO_ERROR or GetLastError, | ||
525 | unlike CreatePipe, which returns a bool for success or failure. */ | ||
526 | static int | ||
527 | create_selectable_pipe (PHANDLE read_pipe_ptr, PHANDLE write_pipe_ptr, | ||
528 | LPSECURITY_ATTRIBUTES sa_ptr, DWORD psize, | ||
529 | DWORD dwReadMode, DWORD dwWriteMode) | ||
530 | { | ||
531 | /* Default to error. */ | ||
532 | *read_pipe_ptr = *write_pipe_ptr = INVALID_HANDLE_VALUE; | ||
533 | |||
534 | HANDLE read_pipe = INVALID_HANDLE_VALUE, write_pipe = INVALID_HANDLE_VALUE; | ||
535 | |||
536 | /* Ensure that there is enough pipe buffer space for atomic writes. */ | ||
537 | if (psize < PIPE_BUF) | ||
538 | psize = PIPE_BUF; | ||
539 | |||
540 | char pipename[MAX_PATH]; | ||
541 | |||
542 | /* Retry CreateNamedPipe as long as the pipe name is in use. | ||
543 | * Retrying will probably never be necessary, but we want | ||
544 | * to be as robust as possible. */ | ||
545 | while (1) | ||
546 | { | ||
547 | static volatile LONG pipe_unique_id; | ||
548 | |||
549 | snprintf (pipename, sizeof pipename, "\\\\.\\pipe\\gnunet-%d-%ld", | ||
550 | getpid (), InterlockedIncrement ((LONG *) & pipe_unique_id)); | ||
551 | /* Use CreateNamedPipe instead of CreatePipe, because the latter | ||
552 | * returns a write handle that does not permit FILE_READ_ATTRIBUTES | ||
553 | * access, on versions of win32 earlier than WinXP SP2. | ||
554 | * CreatePipe also stupidly creates a full duplex pipe, which is | ||
555 | * a waste, since only a single direction is actually used. | ||
556 | * It's important to only allow a single instance, to ensure that | ||
557 | * the pipe was not created earlier by some other process, even if | ||
558 | * the pid has been reused. We avoid FILE_FLAG_FIRST_PIPE_INSTANCE | ||
559 | * because that is only available for Win2k SP2 and WinXP. */ | ||
560 | read_pipe = CreateNamedPipeA (pipename, PIPE_ACCESS_INBOUND | dwReadMode, PIPE_TYPE_BYTE | PIPE_READMODE_BYTE, 1, /* max instances */ | ||
561 | psize, /* output buffer size */ | ||
562 | psize, /* input buffer size */ | ||
563 | NMPWAIT_USE_DEFAULT_WAIT, sa_ptr); | ||
564 | |||
565 | if (read_pipe != INVALID_HANDLE_VALUE) | ||
463 | { | 566 | { |
464 | i = strlen (plugin->plugin_options) + 1; | 567 | break; |
465 | str = plugin->plugin_options; | ||
466 | } | 568 | } |
467 | else | 569 | |
570 | DWORD err = GetLastError (); | ||
571 | |||
572 | switch (err) | ||
468 | { | 573 | { |
469 | i = 0; | 574 | case ERROR_PIPE_BUSY: |
575 | /* The pipe is already open with compatible parameters. | ||
576 | * Pick a new name and retry. */ | ||
577 | continue; | ||
578 | case ERROR_ACCESS_DENIED: | ||
579 | /* The pipe is already open with incompatible parameters. | ||
580 | * Pick a new name and retry. */ | ||
581 | continue; | ||
582 | case ERROR_CALL_NOT_IMPLEMENTED: | ||
583 | /* We are on an older Win9x platform without named pipes. | ||
584 | * Return an anonymous pipe as the best approximation. */ | ||
585 | if (CreatePipe (read_pipe_ptr, write_pipe_ptr, sa_ptr, psize)) | ||
586 | { | ||
587 | return 0; | ||
588 | } | ||
589 | err = GetLastError (); | ||
590 | return err; | ||
591 | default: | ||
592 | return err; | ||
470 | } | 593 | } |
471 | write (fd, &i, sizeof (size_t)); | 594 | /* NOTREACHED */ |
472 | if (i > 0) | 595 | } |
473 | write (fd, str, i); | 596 | |
597 | /* Open the named pipe for writing. | ||
598 | * Be sure to permit FILE_READ_ATTRIBUTES access. */ | ||
599 | write_pipe = CreateFileA (pipename, GENERIC_WRITE | FILE_READ_ATTRIBUTES, 0, /* share mode */ | ||
600 | sa_ptr, OPEN_EXISTING, dwWriteMode, /* flags and attributes */ | ||
601 | 0); /* handle to template file */ | ||
602 | |||
603 | if (write_pipe == INVALID_HANDLE_VALUE) | ||
604 | { | ||
605 | /* Failure. */ | ||
606 | DWORD err = GetLastError (); | ||
607 | |||
608 | CloseHandle (read_pipe); | ||
609 | return err; | ||
610 | } | ||
611 | |||
612 | /* Success. */ | ||
613 | *read_pipe_ptr = read_pipe; | ||
614 | *write_pipe_ptr = write_pipe; | ||
615 | return 0; | ||
616 | } | ||
617 | |||
618 | static int | ||
619 | write_to_pipe (HANDLE h, OVERLAPPED *ov, unsigned char *buf, size_t size, unsigned char **old_buf) | ||
620 | { | ||
621 | DWORD written; | ||
622 | BOOL bresult; | ||
623 | DWORD err; | ||
624 | |||
625 | if (WAIT_OBJECT_0 != WaitForSingleObject (ov->hEvent, INFINITE)) | ||
626 | return -1; | ||
627 | |||
628 | ResetEvent (ov->hEvent); | ||
629 | |||
630 | if (*old_buf != NULL) | ||
631 | free (*old_buf); | ||
632 | |||
633 | *old_buf = malloc (size); | ||
634 | if (*old_buf == NULL) | ||
635 | return -1; | ||
636 | memcpy (*old_buf, buf, size); | ||
637 | written = 0; | ||
638 | ov->Offset = 0; | ||
639 | ov->OffsetHigh = 0; | ||
640 | ov->Pointer = 0; | ||
641 | ov->Internal = 0; | ||
642 | ov->InternalHigh = 0; | ||
643 | bresult = WriteFile (h, *old_buf, size, &written, ov); | ||
644 | |||
645 | if (bresult == TRUE) | ||
646 | { | ||
647 | SetEvent (ov->hEvent); | ||
648 | free (*old_buf); | ||
649 | *old_buf = NULL; | ||
650 | return written; | ||
651 | } | ||
652 | |||
653 | err = GetLastError (); | ||
654 | if (err == ERROR_IO_PENDING) | ||
655 | return size; | ||
656 | SetEvent (ov->hEvent); | ||
657 | *old_buf = NULL; | ||
658 | SetLastError (err); | ||
659 | return -1; | ||
660 | } | ||
661 | |||
662 | static int | ||
663 | print_to_pipe (HANDLE h, OVERLAPPED *ov, unsigned char **buf, const char *fmt, ...) | ||
664 | { | ||
665 | va_list va; | ||
666 | va_list vacp; | ||
667 | size_t size; | ||
668 | char *print_buf; | ||
669 | int result; | ||
670 | |||
671 | va_start (va, fmt); | ||
672 | va_copy (vacp, va); | ||
673 | size = VSNPRINTF (NULL, 0, fmt, vacp) + 1; | ||
674 | va_end (vacp); | ||
675 | if (size <= 0) | ||
676 | { | ||
677 | va_end (va); | ||
678 | return size; | ||
679 | } | ||
680 | |||
681 | print_buf = malloc (size); | ||
682 | if (print_buf == NULL) | ||
683 | return -1; | ||
684 | VSNPRINTF (print_buf, size, fmt, va); | ||
685 | va_end (va); | ||
686 | |||
687 | result = write_to_pipe (h, ov, print_buf, size, buf); | ||
688 | free (buf); | ||
689 | return result; | ||
690 | } | ||
691 | |||
692 | #define plugin_print(plug, fmt, ...) print_to_pipe (plug->cpipe_in, &plug->ov_write, &plug->ov_write_buffer, fmt, ...) | ||
693 | #define plugin_write(plug, buf, size) write_to_pipe (plug->cpipe_in, &plug->ov_write, buf, size, &plug->ov_write_buffer) | ||
694 | |||
695 | static int | ||
696 | write_plugin_data (struct EXTRACTOR_PluginList *plugin) | ||
697 | { | ||
698 | size_t libname_len, shortname_len, opts_len; | ||
699 | DWORD len; | ||
700 | char *str; | ||
701 | size_t total_len = 0; | ||
702 | unsigned char *buf, *ptr; | ||
703 | |||
704 | switch (plugin->flags) | ||
705 | { | ||
706 | case EXTRACTOR_OPTION_DEFAULT_POLICY: | ||
707 | break; | ||
708 | case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART: | ||
709 | break; | ||
710 | case EXTRACTOR_OPTION_IN_PROCESS: | ||
711 | return 0; | ||
712 | break; | ||
713 | case EXTRACTOR_OPTION_DISABLED: | ||
714 | return 0; | ||
715 | break; | ||
716 | } | ||
717 | |||
718 | libname_len = strlen (plugin->libname) + 1; | ||
719 | total_len += sizeof (size_t) + libname_len; | ||
720 | shortname_len = strlen (plugin->short_libname) + 1; | ||
721 | total_len += sizeof (size_t) + shortname_len; | ||
722 | if (plugin->plugin_options != NULL) | ||
723 | { | ||
724 | opts_len = strlen (plugin->plugin_options) + 1; | ||
725 | total_len += opts_len; | ||
726 | } | ||
727 | else | ||
728 | { | ||
729 | opts_len = 0; | ||
730 | } | ||
731 | total_len += sizeof (size_t); | ||
732 | |||
733 | buf = malloc (total_len); | ||
734 | if (buf == NULL) | ||
735 | return -1; | ||
736 | ptr = buf; | ||
737 | memcpy (ptr, &libname_len, sizeof (size_t)); | ||
738 | ptr += sizeof (size_t); | ||
739 | memcpy (ptr, plugin->libname, libname_len); | ||
740 | ptr += libname_len; | ||
741 | memcpy (ptr, &shortname_len, sizeof (size_t)); | ||
742 | ptr += sizeof (size_t); | ||
743 | memcpy (ptr, plugin->short_libname, shortname_len); | ||
744 | ptr += shortname_len; | ||
745 | memcpy (ptr, &opts_len, sizeof (size_t)); | ||
746 | ptr += sizeof (size_t); | ||
747 | if (opts_len > 0) | ||
748 | { | ||
749 | memcpy (ptr, plugin->plugin_options, opts_len); | ||
750 | ptr += opts_len; | ||
751 | } | ||
752 | if (total_len != write_to_pipe (plugin->cpipe_in, &plugin->ov_write, buf, total_len, &plugin->ov_write_buffer)) | ||
753 | { | ||
754 | free (buf); | ||
755 | return -1; | ||
756 | } | ||
757 | free (buf); | ||
758 | return 0; | ||
474 | } | 759 | } |
475 | 760 | ||
476 | static struct EXTRACTOR_PluginList * | 761 | static struct EXTRACTOR_PluginList * |
@@ -485,183 +770,112 @@ read_plugin_data (int fd) | |||
485 | read (fd, &i, sizeof (size_t)); | 770 | read (fd, &i, sizeof (size_t)); |
486 | ret->libname = malloc (i); | 771 | ret->libname = malloc (i); |
487 | if (ret->libname == NULL) | 772 | if (ret->libname == NULL) |
488 | { | 773 | { |
489 | free (ret); | 774 | free (ret); |
490 | return NULL; | 775 | return NULL; |
491 | } | 776 | } |
492 | read (fd, ret->libname, i); | 777 | read (fd, ret->libname, i); |
778 | ret->libname[i - 1] = '\0'; | ||
493 | 779 | ||
494 | read (fd, &i, sizeof (size_t)); | 780 | read (fd, &i, sizeof (size_t)); |
495 | ret->short_libname = malloc (i); | 781 | ret->short_libname = malloc (i); |
496 | if (ret->short_libname == NULL) | 782 | if (ret->short_libname == NULL) |
497 | { | 783 | { |
498 | free (ret->libname); | 784 | free (ret->libname); |
499 | free (ret); | 785 | free (ret); |
500 | return NULL; | 786 | return NULL; |
501 | } | 787 | } |
502 | read (fd, ret->short_libname, i); | 788 | read (fd, ret->short_libname, i); |
789 | ret->short_libname[i - 1] = '\0'; | ||
503 | 790 | ||
504 | read (fd, &i, sizeof (size_t)); | 791 | read (fd, &i, sizeof (size_t)); |
505 | if (i == 0) | 792 | if (i == 0) |
506 | { | 793 | { |
507 | ret->plugin_options = NULL; | 794 | ret->plugin_options = NULL; |
508 | } | 795 | } |
509 | else | 796 | else |
797 | { | ||
798 | ret->plugin_options = malloc (i); | ||
799 | if (ret->plugin_options == NULL) | ||
510 | { | 800 | { |
511 | ret->plugin_options = malloc (i); | 801 | free (ret->short_libname); |
512 | if (ret->plugin_options == NULL) | 802 | free (ret->libname); |
513 | { | 803 | free (ret); |
514 | free (ret->short_libname); | 804 | return NULL; |
515 | free (ret->libname); | ||
516 | free (ret); | ||
517 | return NULL; | ||
518 | } | ||
519 | read (fd, ret->plugin_options, i); | ||
520 | } | 805 | } |
806 | read (fd, ret->plugin_options, i); | ||
807 | ret->plugin_options[i - 1] = '\0'; | ||
808 | } | ||
521 | return ret; | 809 | return ret; |
522 | } | 810 | } |
523 | 811 | ||
524 | |||
525 | void CALLBACK | ||
526 | RundllEntryPoint (HWND hwnd, | ||
527 | HINSTANCE hinst, | ||
528 | LPSTR lpszCmdLine, | ||
529 | int nCmdShow) | ||
530 | { | ||
531 | intptr_t in_h; | ||
532 | intptr_t out_h; | ||
533 | int in, out; | ||
534 | |||
535 | sscanf(lpszCmdLine, "%lu %lu", &in_h, &out_h); | ||
536 | in = _open_osfhandle (in_h, _O_RDONLY); | ||
537 | out = _open_osfhandle (out_h, 0); | ||
538 | setmode (in, _O_BINARY); | ||
539 | setmode (out, _O_BINARY); | ||
540 | process_requests (read_plugin_data (in), | ||
541 | in, out); | ||
542 | } | ||
543 | |||
544 | void CALLBACK | ||
545 | RundllEntryPointA (HWND hwnd, | ||
546 | HINSTANCE hinst, | ||
547 | LPSTR lpszCmdLine, | ||
548 | int nCmdShow) | ||
549 | { | ||
550 | return RundllEntryPoint(hwnd, hinst, lpszCmdLine, nCmdShow); | ||
551 | } | ||
552 | #endif | ||
553 | |||
554 | |||
555 | /** | 812 | /** |
556 | * Start the process for the given plugin. | 813 | * Start the process for the given plugin. |
557 | */ | 814 | */ |
558 | static void | 815 | static void |
559 | start_process (struct EXTRACTOR_PluginList *plugin) | 816 | start_process (struct EXTRACTOR_PluginList *plugin) |
560 | { | 817 | { |
561 | #if !WINDOWS | 818 | HANDLE p1[2]; |
562 | int p1[2]; | 819 | HANDLE p2[2]; |
563 | int p2[2]; | ||
564 | pid_t pid; | ||
565 | int status; | ||
566 | |||
567 | plugin->cpid = -1; | ||
568 | if (0 != pipe (p1)) | ||
569 | { | ||
570 | plugin->flags = EXTRACTOR_OPTION_DISABLED; | ||
571 | return; | ||
572 | } | ||
573 | if (0 != pipe (p2)) | ||
574 | { | ||
575 | close (p1[0]); | ||
576 | close (p1[1]); | ||
577 | plugin->flags = EXTRACTOR_OPTION_DISABLED; | ||
578 | return; | ||
579 | } | ||
580 | pid = fork (); | ||
581 | plugin->cpid = pid; | ||
582 | if (pid == -1) | ||
583 | { | ||
584 | close (p1[0]); | ||
585 | close (p1[1]); | ||
586 | close (p2[0]); | ||
587 | close (p2[1]); | ||
588 | plugin->flags = EXTRACTOR_OPTION_DISABLED; | ||
589 | return; | ||
590 | } | ||
591 | if (pid == 0) | ||
592 | { | ||
593 | close (p1[1]); | ||
594 | close (p2[0]); | ||
595 | process_requests (plugin, p1[0], p2[1]); | ||
596 | _exit (0); | ||
597 | } | ||
598 | close (p1[0]); | ||
599 | close (p2[1]); | ||
600 | plugin->cpipe_in = fdopen (p1[1], "w"); | ||
601 | if (plugin->cpipe_in == NULL) | ||
602 | { | ||
603 | perror ("fdopen"); | ||
604 | (void) kill (plugin->cpid, SIGKILL); | ||
605 | waitpid (plugin->cpid, &status, 0); | ||
606 | close (p1[1]); | ||
607 | close (p2[0]); | ||
608 | plugin->cpid = -1; | ||
609 | plugin->flags = EXTRACTOR_OPTION_DISABLED; | ||
610 | return; | ||
611 | } | ||
612 | plugin->cpipe_out = p2[0]; | ||
613 | #else | ||
614 | int p1[2]; | ||
615 | int p2[2]; | ||
616 | STARTUPINFO startup; | 820 | STARTUPINFO startup; |
617 | PROCESS_INFORMATION proc; | 821 | PROCESS_INFORMATION proc; |
618 | char cmd[MAX_PATH + 1]; | 822 | char cmd[MAX_PATH + 1]; |
619 | char arg1[10], arg2[10]; | 823 | char arg1[10], arg2[10]; |
620 | HANDLE p10_os = INVALID_HANDLE_VALUE, p21_os = INVALID_HANDLE_VALUE; | ||
621 | HANDLE p10_os_inh = INVALID_HANDLE_VALUE, p21_os_inh = INVALID_HANDLE_VALUE; | 824 | HANDLE p10_os_inh = INVALID_HANDLE_VALUE, p21_os_inh = INVALID_HANDLE_VALUE; |
825 | SECURITY_ATTRIBUTES sa; | ||
622 | 826 | ||
623 | plugin->hProcess = NULL; | 827 | switch (plugin->flags) |
624 | if (0 != _pipe (p1, 0, _O_BINARY | _O_NOINHERIT)) | 828 | { |
625 | { | 829 | case EXTRACTOR_OPTION_DEFAULT_POLICY: |
626 | plugin->flags = EXTRACTOR_OPTION_DISABLED; | 830 | if (plugin->hProcess != INVALID_HANDLE_VALUE && plugin->hProcess != 0) |
627 | return; | 831 | return; |
628 | } | 832 | break; |
629 | if (0 != _pipe (p2, 0, _O_BINARY | _O_NOINHERIT)) | 833 | case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART: |
630 | { | 834 | if (plugin->hProcess != 0) |
631 | close (p1[0]); | ||
632 | close (p1[1]); | ||
633 | plugin->flags = EXTRACTOR_OPTION_DISABLED; | ||
634 | return; | 835 | return; |
635 | } | 836 | break; |
837 | case EXTRACTOR_OPTION_IN_PROCESS: | ||
838 | return; | ||
839 | break; | ||
840 | case EXTRACTOR_OPTION_DISABLED: | ||
841 | return; | ||
842 | break; | ||
843 | } | ||
636 | 844 | ||
637 | memset (&startup, 0, sizeof (STARTUPINFO)); | 845 | sa.nLength = sizeof (sa); |
846 | sa.lpSecurityDescriptor = NULL; | ||
847 | sa.bInheritHandle = FALSE; | ||
638 | 848 | ||
639 | p10_os = (HANDLE) _get_osfhandle (p1[0]); | 849 | plugin->hProcess = NULL; |
640 | p21_os = (HANDLE) _get_osfhandle (p2[1]); | ||
641 | 850 | ||
642 | if (p10_os == INVALID_HANDLE_VALUE || p21_os == INVALID_HANDLE_VALUE) | 851 | if (0 != create_selectable_pipe (&p1[0], &p1[1], &sa, 1024, FILE_FLAG_OVERLAPPED, FILE_FLAG_OVERLAPPED)) |
643 | { | 852 | { |
644 | close (p1[0]); | ||
645 | close (p1[1]); | ||
646 | close (p2[0]); | ||
647 | close (p2[1]); | ||
648 | plugin->flags = EXTRACTOR_OPTION_DISABLED; | 853 | plugin->flags = EXTRACTOR_OPTION_DISABLED; |
649 | return; | 854 | return; |
650 | } | 855 | } |
856 | if (0 != create_selectable_pipe (&p2[0], &p2[1], &sa, 1024, FILE_FLAG_OVERLAPPED, FILE_FLAG_OVERLAPPED)) | ||
857 | { | ||
858 | CloseHandle (p1[0]); | ||
859 | CloseHandle (p1[1]); | ||
860 | plugin->flags = EXTRACTOR_OPTION_DISABLED; | ||
861 | return; | ||
862 | } | ||
863 | |||
864 | memset (&startup, 0, sizeof (STARTUPINFO)); | ||
651 | 865 | ||
652 | if (!DuplicateHandle (GetCurrentProcess (), p10_os, GetCurrentProcess (), | 866 | if (!DuplicateHandle (GetCurrentProcess (), p1[0], GetCurrentProcess (), |
653 | &p10_os_inh, 0, TRUE, DUPLICATE_SAME_ACCESS) | 867 | &p10_os_inh, 0, TRUE, DUPLICATE_SAME_ACCESS) |
654 | || !DuplicateHandle (GetCurrentProcess (), p21_os, GetCurrentProcess (), | 868 | || !DuplicateHandle (GetCurrentProcess (), p2[1], GetCurrentProcess (), |
655 | &p21_os_inh, 0, TRUE, DUPLICATE_SAME_ACCESS)) | 869 | &p21_os_inh, 0, TRUE, DUPLICATE_SAME_ACCESS)) |
656 | { | 870 | { |
657 | if (p10_os_inh != INVALID_HANDLE_VALUE) | 871 | if (p10_os_inh != INVALID_HANDLE_VALUE) |
658 | CloseHandle (p10_os_inh); | 872 | CloseHandle (p10_os_inh); |
659 | if (p21_os_inh != INVALID_HANDLE_VALUE) | 873 | if (p21_os_inh != INVALID_HANDLE_VALUE) |
660 | CloseHandle (p21_os_inh); | 874 | CloseHandle (p21_os_inh); |
661 | close (p1[0]); | 875 | CloseHandle (p1[0]); |
662 | close (p1[1]); | 876 | CloseHandle (p1[1]); |
663 | close (p2[0]); | 877 | CloseHandle (p2[0]); |
664 | close (p2[1]); | 878 | CloseHandle (p2[1]); |
665 | plugin->flags = EXTRACTOR_OPTION_DISABLED; | 879 | plugin->flags = EXTRACTOR_OPTION_DISABLED; |
666 | return; | 880 | return; |
667 | } | 881 | } |
@@ -675,186 +889,584 @@ start_process (struct EXTRACTOR_PluginList *plugin) | |||
675 | CloseHandle (proc.hThread); | 889 | CloseHandle (proc.hThread); |
676 | } | 890 | } |
677 | else | 891 | else |
678 | { | 892 | { |
679 | close (p1[0]); | 893 | CloseHandle (p1[0]); |
680 | close (p1[1]); | 894 | CloseHandle (p1[1]); |
681 | close (p2[0]); | 895 | CloseHandle (p2[0]); |
682 | close (p2[1]); | 896 | CloseHandle (p2[1]); |
683 | plugin->flags = EXTRACTOR_OPTION_DISABLED; | 897 | plugin->flags = EXTRACTOR_OPTION_DISABLED; |
684 | return; | 898 | return; |
685 | } | 899 | } |
686 | close (p1[0]); | 900 | CloseHandle (p1[0]); |
687 | close (p2[1]); | 901 | CloseHandle (p2[1]); |
688 | CloseHandle (p10_os_inh); | 902 | CloseHandle (p10_os_inh); |
689 | CloseHandle (p21_os_inh); | 903 | CloseHandle (p21_os_inh); |
690 | 904 | ||
691 | write_plugin_data (p1[1], plugin); | 905 | plugin->cpipe_in = p1[1]; |
906 | plugin->cpipe_out = p2[0]; | ||
692 | 907 | ||
693 | plugin->cpipe_in = fdopen (p1[1], "w"); | 908 | memset (&plugin->ov_read, 0, sizeof (OVERLAPPED)); |
694 | if (plugin->cpipe_in == NULL) | 909 | memset (&plugin->ov_write, 0, sizeof (OVERLAPPED)); |
910 | |||
911 | plugin->ov_write_buffer = NULL; | ||
912 | |||
913 | plugin->ov_write.hEvent = CreateEvent (NULL, TRUE, TRUE, NULL); | ||
914 | plugin->ov_read.hEvent = CreateEvent (NULL, TRUE, TRUE, NULL); | ||
915 | } | ||
916 | |||
917 | /** | ||
918 | * Stop the child process of this plugin. | ||
919 | */ | ||
920 | static void | ||
921 | stop_process (struct EXTRACTOR_PluginList *plugin) | ||
922 | { | ||
923 | int status; | ||
924 | HANDLE process; | ||
925 | |||
926 | #if DEBUG | ||
927 | if (plugin->hProcess == INVALID_HANDLE_VALUE) | ||
928 | fprintf (stderr, | ||
929 | "Plugin `%s' choked on this input\n", | ||
930 | plugin->short_libname); | ||
931 | #endif | ||
932 | if (plugin->hProcess == INVALID_HANDLE_VALUE || | ||
933 | plugin->hProcess == NULL) | ||
934 | return; | ||
935 | TerminateProcess (plugin->hProcess, 0); | ||
936 | CloseHandle (plugin->hProcess); | ||
937 | plugin->hProcess = INVALID_HANDLE_VALUE; | ||
938 | CloseHandle (plugin->cpipe_out); | ||
939 | CloseHandle (plugin->cpipe_in); | ||
940 | plugin->cpipe_out = INVALID_HANDLE_VALUE; | ||
941 | plugin->cpipe_in = INVALID_HANDLE_VALUE; | ||
942 | CloseHandle (plugin->ov_read.hEvent); | ||
943 | CloseHandle (plugin->ov_write.hEvent); | ||
944 | if (plugin->ov_write_buffer != NULL) | ||
945 | { | ||
946 | free (plugin->ov_write_buffer); | ||
947 | plugin->ov_write_buffer = NULL; | ||
948 | } | ||
949 | |||
950 | if (plugin->flags != EXTRACTOR_OPTION_DEFAULT_POLICY) | ||
951 | plugin->flags = EXTRACTOR_OPTION_DISABLED; | ||
952 | |||
953 | plugin->seek_request = -1; | ||
954 | } | ||
955 | |||
956 | #endif /* WINDOWS */ | ||
957 | |||
958 | /** | ||
959 | * Remove a plugin from a list. | ||
960 | * | ||
961 | * @param prev the current list of plugins | ||
962 | * @param library the name of the plugin to remove | ||
963 | * @return the reduced list, unchanged if the plugin was not loaded | ||
964 | */ | ||
965 | struct EXTRACTOR_PluginList * | ||
966 | EXTRACTOR_plugin_remove(struct EXTRACTOR_PluginList * prev, | ||
967 | const char * library) | ||
968 | { | ||
969 | struct EXTRACTOR_PluginList *pos; | ||
970 | struct EXTRACTOR_PluginList *first; | ||
971 | |||
972 | pos = prev; | ||
973 | first = prev; | ||
974 | while ((pos != NULL) && (0 != strcmp (pos->short_libname, library))) | ||
695 | { | 975 | { |
696 | perror ("fdopen"); | 976 | prev = pos; |
697 | TerminateProcess (plugin->hProcess, 0); | 977 | pos = pos->next; |
698 | WaitForSingleObject (plugin->hProcess, INFINITE); | ||
699 | CloseHandle (plugin->hProcess); | ||
700 | close (p1[1]); | ||
701 | close (p2[0]); | ||
702 | plugin->hProcess = INVALID_HANDLE_VALUE; | ||
703 | plugin->flags = EXTRACTOR_OPTION_DISABLED; | ||
704 | return; | ||
705 | } | 978 | } |
706 | plugin->cpipe_out = p2[0]; | 979 | if (pos != NULL) |
980 | { | ||
981 | /* found, close library */ | ||
982 | if (first == pos) | ||
983 | first = pos->next; | ||
984 | else | ||
985 | prev->next = pos->next; | ||
986 | /* found */ | ||
987 | stop_process (pos); | ||
988 | free (pos->short_libname); | ||
989 | free (pos->libname); | ||
990 | free (pos->plugin_options); | ||
991 | if (NULL != pos->libraryHandle) | ||
992 | lt_dlclose (pos->libraryHandle); | ||
993 | free (pos); | ||
994 | } | ||
995 | #if DEBUG | ||
996 | else | ||
997 | fprintf(stderr, | ||
998 | "Unloading plugin `%s' failed!\n", | ||
999 | library); | ||
707 | #endif | 1000 | #endif |
1001 | return first; | ||
708 | } | 1002 | } |
709 | 1003 | ||
710 | 1004 | ||
711 | /** | 1005 | /** |
712 | * Extract meta data using the given plugin, running the | 1006 | * Remove all plugins from the given list (destroys the list). |
713 | * actual code of the plugin out-of-process. | ||
714 | * | 1007 | * |
715 | * @param plugin which plugin to call | 1008 | * @param plugin the list of plugins |
716 | * @param size size of the file mapped by shmfn or tshmfn | 1009 | */ |
717 | * @param shmfn file name of the shared memory segment | 1010 | void |
718 | * @param tshmfn file name of the shared memory segment for the end of the data | 1011 | EXTRACTOR_plugin_remove_all(struct EXTRACTOR_PluginList *plugins) |
719 | * @param proc function to call on the meta data | 1012 | { |
1013 | while (plugins != NULL) | ||
1014 | plugins = EXTRACTOR_plugin_remove (plugins, plugins->short_libname); | ||
1015 | } | ||
1016 | |||
1017 | |||
1018 | |||
1019 | /** | ||
1020 | * Open a file | ||
1021 | */ | ||
1022 | static int file_open(const char *filename, int oflag, ...) | ||
1023 | { | ||
1024 | int mode; | ||
1025 | const char *fn; | ||
1026 | #ifdef MINGW | ||
1027 | char szFile[_MAX_PATH + 1]; | ||
1028 | long lRet; | ||
1029 | |||
1030 | if ((lRet = plibc_conv_to_win_path(filename, szFile)) != ERROR_SUCCESS) | ||
1031 | { | ||
1032 | errno = ENOENT; | ||
1033 | SetLastError(lRet); | ||
1034 | return -1; | ||
1035 | } | ||
1036 | fn = szFile; | ||
1037 | #else | ||
1038 | fn = filename; | ||
1039 | #endif | ||
1040 | mode = 0; | ||
1041 | #ifdef MINGW | ||
1042 | /* Set binary mode */ | ||
1043 | mode |= O_BINARY; | ||
1044 | #endif | ||
1045 | return OPEN(fn, oflag, mode); | ||
1046 | } | ||
1047 | |||
1048 | #ifndef O_LARGEFILE | ||
1049 | #define O_LARGEFILE 0 | ||
1050 | #endif | ||
1051 | |||
1052 | #if HAVE_ZLIB | ||
1053 | #define MIN_ZLIB_HEADER 12 | ||
1054 | #endif | ||
1055 | #if HAVE_LIBBZ2 | ||
1056 | #define MIN_BZ2_HEADER 4 | ||
1057 | #endif | ||
1058 | #if !defined (MIN_COMPRESSED_HEADER) && HAVE_ZLIB | ||
1059 | #define MIN_COMPRESSED_HEADER MIN_ZLIB_HEADER | ||
1060 | #endif | ||
1061 | #if !defined (MIN_COMPRESSED_HEADER) && HAVE_LIBBZ2 | ||
1062 | #define MIN_COMPRESSED_HEADER MIN_BZ2_HEADER | ||
1063 | #endif | ||
1064 | #if !defined (MIN_COMPRESSED_HEADER) | ||
1065 | #define MIN_COMPRESSED_HEADER -1 | ||
1066 | #endif | ||
1067 | |||
1068 | #define COMPRESSED_DATA_PROBE_SIZE 3 | ||
1069 | |||
1070 | /** | ||
1071 | * Try to decompress compressed data | ||
1072 | * | ||
1073 | * @param data data to decompress, or NULL (if fd is not -1) | ||
1074 | * @param fd file to read data from, or -1 (if data is not NULL) | ||
1075 | * @param fsize size of data (if data is not NULL) or size of fd file (if fd is not -1) | ||
1076 | * @param compression_type type of compression, as returned by get_compression_type () | ||
1077 | * @param buffer a pointer to a buffer pointer, buffer pointer is NEVER a NULL and already has some data (usually - COMPRESSED_DATA_PROBE_SIZE bytes) in it. | ||
1078 | * @param buffer_size a pointer to buffer size | ||
1079 | * @param proc callback for metadata | ||
720 | * @param proc_cls cls for proc | 1080 | * @param proc_cls cls for proc |
721 | * @return 0 if proc did not return non-zero | 1081 | * @return 0 on success, anything else on error |
722 | */ | 1082 | */ |
723 | static int | 1083 | static int |
724 | extract_oop (struct EXTRACTOR_PluginList *plugin, | 1084 | try_to_decompress (const unsigned char *data, int fd, int64_t fsize, int compression_type, void **buffer, size_t *buffer_size, EXTRACTOR_MetaDataProcessor proc, void *proc_cls) |
725 | size_t size, | ||
726 | const char *shmfn, | ||
727 | const char *tshmfn, | ||
728 | EXTRACTOR_MetaDataProcessor proc, | ||
729 | void *proc_cls) | ||
730 | { | 1085 | { |
731 | struct IpcHeader hdr; | 1086 | unsigned char *new_buffer; |
732 | char mimetype[MAX_MIME_LEN + 1]; | 1087 | ssize_t read_result; |
733 | char *data; | ||
734 | 1088 | ||
735 | #ifndef WINDOWS | 1089 | unsigned char *buf; |
736 | if (plugin->cpid == -1) | 1090 | unsigned char *rbuf; |
737 | #else | 1091 | size_t dsize; |
738 | if (plugin->hProcess == INVALID_HANDLE_VALUE) | 1092 | #if HAVE_ZLIB |
1093 | z_stream strm; | ||
1094 | int ret; | ||
1095 | size_t pos; | ||
739 | #endif | 1096 | #endif |
740 | return 0; | 1097 | #if HAVE_LIBBZ2 |
741 | if (0 >= fprintf (plugin->cpipe_in, | 1098 | bz_stream bstrm; |
742 | "%s\n", | 1099 | int bret; |
743 | shmfn)) | 1100 | size_t bpos; |
744 | { | ||
745 | stop_process (plugin); | ||
746 | #ifndef WINDOWS | ||
747 | plugin->cpid = -1; | ||
748 | #else | ||
749 | plugin->hProcess = INVALID_HANDLE_VALUE; | ||
750 | #endif | 1101 | #endif |
751 | if (plugin->flags != EXTRACTOR_OPTION_DEFAULT_POLICY) | 1102 | |
752 | plugin->flags = EXTRACTOR_OPTION_DISABLED; | 1103 | if (fd != -1) |
753 | return 0; | 1104 | { |
1105 | if (fsize > *buffer_size) | ||
1106 | { | ||
1107 | /* Read the rest of the file. Can't de-compress it partially anyway */ | ||
1108 | /* Memory mapping is not useful here, because memory mapping ALSO takes up | ||
1109 | * memory (even more than a buffer, since it might be aligned), and | ||
1110 | * because we need to read every byte anyway (lazy on-demand reads into | ||
1111 | * memory provided by memory mapping won't help). | ||
1112 | */ | ||
1113 | new_buffer = realloc (*buffer, fsize); | ||
1114 | if (new_buffer == NULL) | ||
1115 | { | ||
1116 | free (*buffer); | ||
1117 | return -1; | ||
1118 | } | ||
1119 | read_result = READ (fd, &new_buffer[*buffer_size], fsize - *buffer_size); | ||
1120 | if (read_result != fsize - *buffer_size) | ||
1121 | { | ||
1122 | free (*buffer); | ||
1123 | return -1; | ||
1124 | } | ||
1125 | *buffer = new_buffer; | ||
1126 | *buffer_size = fsize; | ||
754 | } | 1127 | } |
755 | if (0 >= fprintf (plugin->cpipe_in, | 1128 | data = (const unsigned char *) new_buffer; |
756 | "!%s\n", | 1129 | } |
757 | (tshmfn != NULL) ? tshmfn : "")) | 1130 | |
1131 | #if HAVE_ZLIB | ||
1132 | if (compression_type == 1) | ||
1133 | { | ||
1134 | /* Process gzip header */ | ||
1135 | unsigned int gzip_header_length = 10; | ||
1136 | |||
1137 | if (data[3] & 0x4) /* FEXTRA set */ | ||
1138 | gzip_header_length += 2 + (unsigned) (data[10] & 0xff) + | ||
1139 | (((unsigned) (data[11] & 0xff)) * 256); | ||
1140 | |||
1141 | if (data[3] & 0x8) /* FNAME set */ | ||
758 | { | 1142 | { |
759 | stop_process (plugin); | 1143 | const unsigned char *cptr = data + gzip_header_length; |
760 | #ifndef WINDOWS | 1144 | |
761 | plugin->cpid = -1; | 1145 | /* stored file name is here */ |
762 | #else | 1146 | while ((cptr - data) < fsize) |
763 | plugin->hProcess = INVALID_HANDLE_VALUE; | 1147 | { |
764 | #endif | 1148 | if ('\0' == *cptr) |
765 | if (plugin->flags != EXTRACTOR_OPTION_DEFAULT_POLICY) | 1149 | break; |
766 | plugin->flags = EXTRACTOR_OPTION_DISABLED; | 1150 | cptr++; |
767 | return 0; | 1151 | } |
1152 | |||
1153 | if (0 != proc (proc_cls, "<zlib>", EXTRACTOR_METATYPE_FILENAME, | ||
1154 | EXTRACTOR_METAFORMAT_C_STRING, "text/plain", | ||
1155 | (const char *) (data + gzip_header_length), | ||
1156 | cptr - (data + gzip_header_length))) | ||
1157 | return 0; /* done */ | ||
1158 | |||
1159 | gzip_header_length = (cptr - data) + 1; | ||
768 | } | 1160 | } |
769 | if (0 >= fprintf (plugin->cpipe_in, | 1161 | |
770 | "s%lu\n", | 1162 | if (data[3] & 0x16) /* FCOMMENT set */ |
771 | size)) | ||
772 | { | 1163 | { |
773 | stop_process (plugin); | 1164 | const unsigned char * cptr = data + gzip_header_length; |
774 | #ifndef WINDOWS | 1165 | |
775 | plugin->cpid = -1; | 1166 | /* stored comment is here */ |
776 | #else | 1167 | while (cptr < data + fsize) |
777 | plugin->hProcess = INVALID_HANDLE_VALUE; | 1168 | { |
1169 | if ('\0' == *cptr) | ||
1170 | break; | ||
1171 | cptr ++; | ||
1172 | } | ||
1173 | |||
1174 | if (0 != proc (proc_cls, "<zlib>", EXTRACTOR_METATYPE_COMMENT, | ||
1175 | EXTRACTOR_METAFORMAT_C_STRING, "text/plain", | ||
1176 | (const char *) (data + gzip_header_length), | ||
1177 | cptr - (data + gzip_header_length))) | ||
1178 | return 0; /* done */ | ||
1179 | |||
1180 | gzip_header_length = (cptr - data) + 1; | ||
1181 | } | ||
1182 | |||
1183 | if (data[3] & 0x2) /* FCHRC set */ | ||
1184 | gzip_header_length += 2; | ||
1185 | |||
1186 | memset (&strm, 0, sizeof (z_stream)); | ||
1187 | |||
1188 | #ifdef ZLIB_VERNUM | ||
1189 | gzip_header_length = 0; | ||
778 | #endif | 1190 | #endif |
779 | if (plugin->flags != EXTRACTOR_OPTION_DEFAULT_POLICY) | 1191 | |
780 | plugin->flags = EXTRACTOR_OPTION_DISABLED; | 1192 | if (fsize > gzip_header_length) |
781 | return 0; | 1193 | { |
1194 | strm.next_in = (Bytef *) data + gzip_header_length; | ||
1195 | strm.avail_in = fsize - gzip_header_length; | ||
782 | } | 1196 | } |
783 | fflush (plugin->cpipe_in); | 1197 | else |
784 | while (1) | ||
785 | { | 1198 | { |
786 | if (0 != read_all (plugin->cpipe_out, | 1199 | strm.next_in = (Bytef *) data; |
787 | &hdr, | 1200 | strm.avail_in = 0; |
788 | sizeof(hdr))) | 1201 | } |
789 | { | 1202 | strm.total_in = 0; |
790 | stop_process (plugin); | 1203 | strm.zalloc = NULL; |
791 | #ifndef WINDOWS | 1204 | strm.zfree = NULL; |
792 | plugin->cpid = -1; | 1205 | strm.opaque = NULL; |
1206 | |||
1207 | /* | ||
1208 | * note: maybe plain inflateInit(&strm) is adequate, | ||
1209 | * it looks more backward-compatible also ; | ||
1210 | * | ||
1211 | * ZLIB_VERNUM isn't defined by zlib version 1.1.4 ; | ||
1212 | * there might be a better check. | ||
1213 | */ | ||
1214 | if (Z_OK == inflateInit2 (&strm, | ||
1215 | #ifdef ZLIB_VERNUM | ||
1216 | 15 + 32 | ||
793 | #else | 1217 | #else |
794 | plugin->hProcess = INVALID_HANDLE_VALUE; | 1218 | -MAX_WBITS |
795 | #endif | 1219 | #endif |
796 | if (plugin->flags != EXTRACTOR_OPTION_DEFAULT_POLICY) | 1220 | )) |
797 | plugin->flags = EXTRACTOR_OPTION_DISABLED; | 1221 | { |
798 | return 0; | 1222 | pos = 0; |
799 | } | 1223 | dsize = 2 * fsize; |
800 | if ( (hdr.type == 0) && | 1224 | if ( (dsize > MAX_DECOMPRESS) || |
801 | (hdr.format == 0) && | 1225 | (dsize < fsize) ) |
802 | (hdr.data_len == 0) && | 1226 | dsize = MAX_DECOMPRESS; |
803 | (hdr.mime_len == 0) ) | 1227 | buf = malloc (dsize); |
804 | break; | 1228 | |
805 | if (hdr.mime_len > MAX_MIME_LEN) | 1229 | if (buf != NULL) |
806 | { | 1230 | { |
807 | stop_process (plugin); | 1231 | strm.next_out = (Bytef *) buf; |
808 | #ifndef WINDOWS | 1232 | strm.avail_out = dsize; |
809 | plugin->cpid = -1; | 1233 | |
810 | #else | 1234 | do |
811 | plugin->hProcess = INVALID_HANDLE_VALUE; | 1235 | { |
1236 | ret = inflate (&strm, Z_SYNC_FLUSH); | ||
1237 | if (ret == Z_OK) | ||
1238 | { | ||
1239 | if (dsize == MAX_DECOMPRESS) | ||
1240 | break; | ||
1241 | |||
1242 | pos += strm.total_out; | ||
1243 | strm.total_out = 0; | ||
1244 | dsize *= 2; | ||
1245 | |||
1246 | if (dsize > MAX_DECOMPRESS) | ||
1247 | dsize = MAX_DECOMPRESS; | ||
1248 | |||
1249 | rbuf = realloc (buf, dsize); | ||
1250 | if (rbuf == NULL) | ||
1251 | { | ||
1252 | free (buf); | ||
1253 | buf = NULL; | ||
1254 | break; | ||
1255 | } | ||
1256 | |||
1257 | buf = rbuf; | ||
1258 | strm.next_out = (Bytef *) &buf[pos]; | ||
1259 | strm.avail_out = dsize - pos; | ||
1260 | } | ||
1261 | else if (ret != Z_STREAM_END) | ||
1262 | { | ||
1263 | /* error */ | ||
1264 | free (buf); | ||
1265 | buf = NULL; | ||
1266 | } | ||
1267 | } while ((buf != NULL) && (ret != Z_STREAM_END)); | ||
1268 | |||
1269 | dsize = pos + strm.total_out; | ||
1270 | if ((dsize == 0) && (buf != NULL)) | ||
1271 | { | ||
1272 | free (buf); | ||
1273 | buf = NULL; | ||
1274 | } | ||
1275 | } | ||
1276 | |||
1277 | inflateEnd (&strm); | ||
1278 | |||
1279 | if (fd != -1) | ||
1280 | if (*buffer != NULL) | ||
1281 | free (*buffer); | ||
1282 | |||
1283 | if (buf == NULL) | ||
1284 | { | ||
1285 | return -1; | ||
1286 | } | ||
1287 | else | ||
1288 | { | ||
1289 | *buffer = buf; | ||
1290 | *buffer_size = dsize; | ||
1291 | return 0; | ||
1292 | } | ||
1293 | } | ||
1294 | } | ||
812 | #endif | 1295 | #endif |
813 | if (plugin->flags != EXTRACTOR_OPTION_DEFAULT_POLICY) | 1296 | |
814 | plugin->flags = EXTRACTOR_OPTION_DISABLED; | 1297 | #if HAVE_LIBBZ2 |
815 | return 0; | 1298 | if (compression_type == 2) |
816 | } | 1299 | { |
817 | data = malloc (hdr.data_len); | 1300 | memset(&bstrm, 0, sizeof (bz_stream)); |
818 | if (data == NULL) | 1301 | bstrm.next_in = (char *) data; |
819 | { | 1302 | bstrm.avail_in = fsize; |
820 | stop_process (plugin); | 1303 | bstrm.total_in_lo32 = 0; |
821 | return 1; | 1304 | bstrm.total_in_hi32 = 0; |
822 | } | 1305 | bstrm.bzalloc = NULL; |
823 | if ( (0 != (read_all (plugin->cpipe_out, | 1306 | bstrm.bzfree = NULL; |
824 | mimetype, | 1307 | bstrm.opaque = NULL; |
825 | hdr.mime_len))) || | 1308 | if (BZ_OK == BZ2_bzDecompressInit(&bstrm, 0,0)) |
826 | (0 != (read_all (plugin->cpipe_out, | 1309 | { |
827 | data, | 1310 | bpos = 0; |
828 | hdr.data_len))) ) | 1311 | dsize = 2 * fsize; |
829 | { | 1312 | if ( (dsize > MAX_DECOMPRESS) || (dsize < fsize) ) |
830 | stop_process (plugin); | 1313 | dsize = MAX_DECOMPRESS; |
831 | #ifndef WINDOWS | 1314 | buf = malloc (dsize); |
832 | plugin->cpid = -1; | 1315 | |
833 | #else | 1316 | if (buf != NULL) |
834 | plugin->hProcess = INVALID_HANDLE_VALUE; | 1317 | { |
1318 | bstrm.next_out = (char *) buf; | ||
1319 | bstrm.avail_out = dsize; | ||
1320 | |||
1321 | do | ||
1322 | { | ||
1323 | bret = BZ2_bzDecompress (&bstrm); | ||
1324 | if (bret == Z_OK) | ||
1325 | { | ||
1326 | if (dsize == MAX_DECOMPRESS) | ||
1327 | break; | ||
1328 | bpos += bstrm.total_out_lo32; | ||
1329 | bstrm.total_out_lo32 = 0; | ||
1330 | |||
1331 | dsize *= 2; | ||
1332 | if (dsize > MAX_DECOMPRESS) | ||
1333 | dsize = MAX_DECOMPRESS; | ||
1334 | |||
1335 | rbuf = realloc(buf, dsize); | ||
1336 | if (rbuf == NULL) | ||
1337 | { | ||
1338 | free (buf); | ||
1339 | buf = NULL; | ||
1340 | break; | ||
1341 | } | ||
1342 | |||
1343 | buf = rbuf; | ||
1344 | bstrm.next_out = (char*) &buf[bpos]; | ||
1345 | bstrm.avail_out = dsize - bpos; | ||
1346 | } | ||
1347 | else if (bret != BZ_STREAM_END) | ||
1348 | { | ||
1349 | /* error */ | ||
1350 | free (buf); | ||
1351 | buf = NULL; | ||
1352 | } | ||
1353 | } while ((buf != NULL) && (bret != BZ_STREAM_END)); | ||
1354 | |||
1355 | dsize = bpos + bstrm.total_out_lo32; | ||
1356 | if ((dsize == 0) && (buf != NULL)) | ||
1357 | { | ||
1358 | free (buf); | ||
1359 | buf = NULL; | ||
1360 | } | ||
1361 | } | ||
1362 | |||
1363 | BZ2_bzDecompressEnd (&bstrm); | ||
1364 | |||
1365 | if (fd != -1) | ||
1366 | if (*buffer != NULL) | ||
1367 | free (*buffer); | ||
1368 | |||
1369 | if (buf == NULL) | ||
1370 | { | ||
1371 | return -1; | ||
1372 | } | ||
1373 | else | ||
1374 | { | ||
1375 | *buffer = buf; | ||
1376 | *buffer_size = dsize; | ||
1377 | return 0; | ||
1378 | } | ||
1379 | } | ||
1380 | } | ||
835 | #endif | 1381 | #endif |
836 | free (data); | 1382 | return -1; |
837 | if (plugin->flags != EXTRACTOR_OPTION_DEFAULT_POLICY) | 1383 | } |
838 | plugin->flags = EXTRACTOR_OPTION_DISABLED; | 1384 | |
839 | return 0; | 1385 | /** |
840 | } | 1386 | * Detect if we have compressed data on our hands. |
841 | mimetype[hdr.mime_len] = '\0'; | 1387 | * |
842 | if ( (proc != NULL) && | 1388 | * @param data pointer to a data buffer or NULL (in case fd is not -1) |
843 | (0 != proc (proc_cls, | 1389 | * @param fd a file to read data from, or -1 (if data is not NULL) |
844 | plugin->short_libname, | 1390 | * @param fsize size of data (if data is not NULL) or of file (if fd is not -1) |
845 | hdr.type, | 1391 | * @param buffer will receive a pointer to the data that this function read |
846 | hdr.format, | 1392 | * @param buffer_size will receive size of the buffer |
847 | mimetype, | 1393 | * @return -1 to indicate an error, 0 to indicate uncompressed data, or a type (> 0) of compression |
848 | data, | 1394 | */ |
849 | hdr.data_len)) ) | 1395 | static int |
850 | proc = NULL; | 1396 | get_compression_type (const unsigned char *data, int fd, int64_t fsize, void **buffer, size_t *buffer_size) |
851 | free (data); | 1397 | { |
1398 | void *read_data = NULL; | ||
1399 | size_t read_data_size = 0; | ||
1400 | ssize_t read_result; | ||
1401 | |||
1402 | if ((MIN_COMPRESSED_HEADER < 0) || (fsize < MIN_COMPRESSED_HEADER)) | ||
1403 | { | ||
1404 | *buffer = NULL; | ||
1405 | return 0; | ||
1406 | } | ||
1407 | if (data == NULL) | ||
1408 | { | ||
1409 | read_data_size = COMPRESSED_DATA_PROBE_SIZE; | ||
1410 | read_data = malloc (read_data_size); | ||
1411 | if (read_data == NULL) | ||
1412 | return -1; | ||
1413 | read_result = READ (fd, read_data, read_data_size); | ||
1414 | if (read_result != read_data_size) | ||
1415 | { | ||
1416 | free (read_data); | ||
1417 | return -1; | ||
852 | } | 1418 | } |
853 | if (NULL == proc) | 1419 | *buffer = read_data; |
1420 | *buffer_size = read_data_size; | ||
1421 | data = (const void *) read_data; | ||
1422 | } | ||
1423 | #if HAVE_ZLIB | ||
1424 | if ((fsize >= MIN_ZLIB_HEADER) && (data[0] == 0x1f) && (data[1] == 0x8b) && (data[2] == 0x08)) | ||
1425 | return 1; | ||
1426 | #endif | ||
1427 | #if HAVE_LIBBZ2 | ||
1428 | if ((fsize >= MIN_BZ2_HEADER) && (data[0] == 'B') && (data[1] == 'Z') && (data[2] == 'h')) | ||
1429 | return 2; | ||
1430 | #endif | ||
1431 | return 0; | ||
1432 | } | ||
1433 | |||
1434 | #if WINDOWS | ||
1435 | |||
1436 | /** | ||
1437 | * Setup a shared memory segment. | ||
1438 | * | ||
1439 | * @param ptr set to the location of the map segment | ||
1440 | * @param map where to store the map handle | ||
1441 | * @param fn name of the mapping | ||
1442 | * @param fn_size size available in fn | ||
1443 | * @param size number of bytes to allocated for the mapping | ||
1444 | * @return 0 on success | ||
1445 | */ | ||
1446 | static int | ||
1447 | make_shm_w32 (void **ptr, HANDLE *map, char *fn, size_t fn_size, size_t size) | ||
1448 | { | ||
1449 | const char *tpath = "Local\\"; | ||
1450 | snprintf (fn, fn_size, "%slibextractor-shm-%u-%u", tpath, getpid(), | ||
1451 | (unsigned int) RANDOM()); | ||
1452 | *map = CreateFileMapping (INVALID_HANDLE_VALUE, NULL, PAGE_READWRITE, 0, size, fn); | ||
1453 | *ptr = MapViewOfFile (*map, FILE_MAP_WRITE, 0, 0, size); | ||
1454 | if (*ptr == NULL) | ||
1455 | { | ||
1456 | CloseHandle (*map); | ||
854 | return 1; | 1457 | return 1; |
1458 | } | ||
855 | return 0; | 1459 | return 0; |
856 | } | 1460 | } |
1461 | |||
1462 | static void | ||
1463 | destroy_shm_w32 (void *ptr, HANDLE map) | ||
1464 | { | ||
1465 | UnmapViewOfFile (ptr); | ||
1466 | CloseHandle (map); | ||
1467 | } | ||
857 | 1468 | ||
1469 | #else | ||
858 | 1470 | ||
859 | /** | 1471 | /** |
860 | * Setup a shared memory segment. | 1472 | * Setup a shared memory segment. |
@@ -867,22 +1479,10 @@ extract_oop (struct EXTRACTOR_PluginList *plugin, | |||
867 | * @return 0 on success | 1479 | * @return 0 on success |
868 | */ | 1480 | */ |
869 | static int | 1481 | static int |
870 | make_shm (int is_tail, | 1482 | make_shm_posix (void **ptr, int *shmid, char *fn, size_t fn_size, size_t size) |
871 | void **ptr, | ||
872 | #ifndef WINDOWS | ||
873 | int *shmid, | ||
874 | #else | ||
875 | HANDLE *map, | ||
876 | #endif | ||
877 | char *fn, | ||
878 | size_t fn_size, | ||
879 | size_t size) | ||
880 | { | 1483 | { |
881 | const char *tpath; | 1484 | const char *tpath; |
882 | #ifdef WINDOWS | 1485 | #if SOMEBSD |
883 | tpath = "Local\\"; | ||
884 | #elif SOMEBSD | ||
885 | const char *tpath; | ||
886 | /* this works on FreeBSD, not sure about others... */ | 1486 | /* this works on FreeBSD, not sure about others... */ |
887 | tpath = getenv ("TMPDIR"); | 1487 | tpath = getenv ("TMPDIR"); |
888 | if (tpath == NULL) | 1488 | if (tpath == NULL) |
@@ -890,578 +1490,606 @@ make_shm (int is_tail, | |||
890 | #else | 1490 | #else |
891 | tpath = "/"; /* Linux */ | 1491 | tpath = "/"; /* Linux */ |
892 | #endif | 1492 | #endif |
893 | snprintf (fn, | 1493 | snprintf (fn, fn_size, "%slibextractor-shm-%u-%u", tpath, getpid(), |
894 | fn_size, | 1494 | (unsigned int) RANDOM()); |
895 | "%slibextractor-%sshm-%u-%u", | ||
896 | tpath, | ||
897 | (is_tail) ? "t" : "", | ||
898 | getpid(), | ||
899 | (unsigned int) RANDOM()); | ||
900 | #ifndef WINDOWS | ||
901 | *shmid = shm_open (fn, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); | 1495 | *shmid = shm_open (fn, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); |
902 | *ptr = NULL; | 1496 | *ptr = NULL; |
903 | if (-1 == (*shmid)) | 1497 | if (-1 == *shmid) |
904 | return 1; | 1498 | return 1; |
905 | if ( (0 != ftruncate (*shmid, size)) || | 1499 | if ((0 != ftruncate (*shmid, size)) || |
906 | (NULL == (*ptr = mmap (NULL, size, PROT_WRITE, MAP_SHARED, *shmid, 0))) || | 1500 | (NULL == (*ptr = mmap (NULL, size, PROT_WRITE, MAP_SHARED, *shmid, 0))) || |
907 | (*ptr == (void*) -1) ) | 1501 | (*ptr == (void*) -1) ) |
1502 | { | ||
1503 | close (*shmid); | ||
1504 | *shmid = -1; | ||
1505 | shm_unlink (fn); | ||
1506 | return 1; | ||
1507 | } | ||
1508 | return 0; | ||
1509 | } | ||
1510 | |||
1511 | static void | ||
1512 | destroy_shm_posix (void *ptr, int shm_id, size_t size, char *shm_name) | ||
1513 | { | ||
1514 | if (NULL != ptr) | ||
1515 | munmap (ptr, size); | ||
1516 | if (shm_id != -1) | ||
1517 | close (shm_id); | ||
1518 | shm_unlink (shm_name); | ||
1519 | } | ||
1520 | #endif | ||
1521 | |||
1522 | |||
1523 | static void | ||
1524 | init_plugin_state (struct EXTRACTOR_PluginList *plugin, char *shm_name, int64_t fsize) | ||
1525 | { | ||
1526 | int write_result; | ||
1527 | int init_state_size; | ||
1528 | unsigned char *init_state; | ||
1529 | int t; | ||
1530 | size_t shm_name_len = strlen (shm_name) + 1; | ||
1531 | init_state_size = 1 + sizeof (size_t) + shm_name_len + sizeof (int64_t); | ||
1532 | switch (plugin->flags) | ||
1533 | { | ||
1534 | case EXTRACTOR_OPTION_DEFAULT_POLICY: | ||
1535 | case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART: | ||
1536 | init_state = malloc (init_state_size); | ||
1537 | if (init_state == NULL) | ||
908 | { | 1538 | { |
909 | close (*shmid); | 1539 | stop_process (plugin); |
910 | *shmid = -1; | 1540 | return; |
911 | shm_unlink (fn); | ||
912 | return 1; | ||
913 | } | 1541 | } |
914 | return 0; | 1542 | t = 0; |
915 | #else | 1543 | init_state[t] = MESSAGE_INIT_STATE; |
916 | *map = CreateFileMapping (INVALID_HANDLE_VALUE, NULL, PAGE_READWRITE, 0, size, fn); | 1544 | t += 1; |
917 | *ptr = MapViewOfFile (*map, FILE_MAP_WRITE, 0, 0, size); | 1545 | memcpy (&init_state[t], &fsize, sizeof (int64_t)); |
918 | if (*ptr == NULL) | 1546 | t += sizeof (int64_t); |
1547 | memcpy (&init_state[t], &shm_name_len, sizeof (size_t)); | ||
1548 | t += sizeof (size_t); | ||
1549 | memcpy (&init_state[t], shm_name, shm_name_len); | ||
1550 | t += shm_name_len; | ||
1551 | write_result = plugin_write (plugin, init_state, init_state_size); | ||
1552 | free (init_state); | ||
1553 | if (write_result < init_state_size) | ||
919 | { | 1554 | { |
920 | CloseHandle (*map); | 1555 | stop_process (plugin); |
921 | return 1; | 1556 | return; |
922 | } | 1557 | } |
923 | return 0; | 1558 | plugin->seek_request = 0; |
924 | #endif | 1559 | break; |
1560 | case EXTRACTOR_OPTION_IN_PROCESS: | ||
1561 | plugin_open_shm (plugin, shm_name); | ||
1562 | plugin->fsize = fsize; | ||
1563 | plugin->init_state_method (plugin); | ||
1564 | plugin->seek_request = 0; | ||
1565 | return; | ||
1566 | break; | ||
1567 | case EXTRACTOR_OPTION_DISABLED: | ||
1568 | return; | ||
1569 | break; | ||
1570 | } | ||
1571 | } | ||
1572 | |||
1573 | static void | ||
1574 | discard_plugin_state (struct EXTRACTOR_PluginList *plugin) | ||
1575 | { | ||
1576 | int write_result; | ||
1577 | unsigned char discard_state = MESSAGE_DISCARD_STATE; | ||
1578 | switch (plugin->flags) | ||
1579 | { | ||
1580 | case EXTRACTOR_OPTION_DEFAULT_POLICY: | ||
1581 | case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART: | ||
1582 | /* This is somewhat clumsy, but it's the only stop-indicating | ||
1583 | * non-W32/POSIX-specific field i could think of... | ||
1584 | */ | ||
1585 | if (plugin->cpipe_out != -1) | ||
1586 | { | ||
1587 | write_result = plugin_write (plugin, &discard_state, 1); | ||
1588 | if (write_result < 1) | ||
1589 | { | ||
1590 | stop_process (plugin); | ||
1591 | return; | ||
1592 | } | ||
1593 | } | ||
1594 | break; | ||
1595 | case EXTRACTOR_OPTION_IN_PROCESS: | ||
1596 | plugin->discard_state_method (plugin); | ||
1597 | return; | ||
1598 | break; | ||
1599 | case EXTRACTOR_OPTION_DISABLED: | ||
1600 | return; | ||
1601 | break; | ||
1602 | } | ||
925 | } | 1603 | } |
926 | 1604 | ||
1605 | static int | ||
1606 | give_shm_to_plugin (struct EXTRACTOR_PluginList *plugin, int64_t position, size_t map_size) | ||
1607 | { | ||
1608 | int write_result; | ||
1609 | int updated_shm_size = 1 + sizeof (int64_t) + sizeof (size_t); | ||
1610 | unsigned char updated_shm[updated_shm_size]; | ||
1611 | int t = 0; | ||
1612 | updated_shm[t] = MESSAGE_UPDATED_SHM; | ||
1613 | t += 1; | ||
1614 | memcpy (&updated_shm[t], &position, sizeof (int64_t)); | ||
1615 | t += sizeof (int64_t); | ||
1616 | memcpy (&updated_shm[t], &map_size, sizeof (size_t)); | ||
1617 | t += sizeof (size_t); | ||
1618 | switch (plugin->flags) | ||
1619 | { | ||
1620 | case EXTRACTOR_OPTION_DEFAULT_POLICY: | ||
1621 | case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART: | ||
1622 | if (plugin->seek_request < 0) | ||
1623 | return 0; | ||
1624 | write_result = plugin_write (plugin, updated_shm, updated_shm_size); | ||
1625 | if (write_result < updated_shm_size) | ||
1626 | { | ||
1627 | stop_process (plugin); | ||
1628 | return 0; | ||
1629 | } | ||
1630 | return 1; | ||
1631 | case EXTRACTOR_OPTION_IN_PROCESS: | ||
1632 | plugin->position = position; | ||
1633 | plugin->map_size = map_size; | ||
1634 | return 0; | ||
1635 | case EXTRACTOR_OPTION_DISABLED: | ||
1636 | return 0; | ||
1637 | default: | ||
1638 | return 1; | ||
1639 | } | ||
1640 | } | ||
927 | 1641 | ||
928 | /** | ||
929 | * Extract keywords using the given set of plugins. | ||
930 | * | ||
931 | * @param plugins the list of plugins to use | ||
932 | * @param data data to process, never NULL | ||
933 | * @param size number of bytes in data, ignored if data is NULL | ||
934 | * @param tdata end of file data, or NULL | ||
935 | * @param tsize number of bytes in tdata | ||
936 | * @param proc function to call for each meta data item found | ||
937 | * @param proc_cls cls argument to proc | ||
938 | */ | ||
939 | static void | 1642 | static void |
940 | extract (struct EXTRACTOR_PluginList *plugins, | 1643 | ask_in_process_plugin (struct EXTRACTOR_PluginList *plugin, int64_t position, void *shm_ptr, EXTRACTOR_MetaDataProcessor proc, void *proc_cls) |
941 | const char * data, | ||
942 | size_t size, | ||
943 | const char * tdata, | ||
944 | size_t tsize, | ||
945 | EXTRACTOR_MetaDataProcessor proc, | ||
946 | void *proc_cls) | ||
947 | { | 1644 | { |
948 | struct EXTRACTOR_PluginList *ppos; | 1645 | int extract_reply; |
949 | enum EXTRACTOR_Options flags; | 1646 | switch (plugin->flags) |
950 | void *ptr; | 1647 | { |
951 | void *tptr; | 1648 | case EXTRACTOR_OPTION_DEFAULT_POLICY: |
952 | char fn[255]; | 1649 | case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART: |
953 | char tfn[255]; | 1650 | return; |
954 | int want_shm; | 1651 | case EXTRACTOR_OPTION_IN_PROCESS: |
955 | int want_tail; | 1652 | if (plugin->seek_request >= 0) |
956 | #ifndef WINDOWS | 1653 | { |
957 | int shmid; | 1654 | plugin->shm_ptr = shm_ptr; |
958 | int tshmid; | 1655 | extract_reply = plugin->extract_method (plugin, proc, proc_cls); |
959 | #else | 1656 | if (extract_reply == 1) |
960 | HANDLE map; | 1657 | plugin->seek_request = -1; |
961 | HANDLE tmap; | 1658 | } |
962 | #endif | 1659 | break; |
1660 | case EXTRACTOR_OPTION_DISABLED: | ||
1661 | return; | ||
1662 | break; | ||
1663 | } | ||
1664 | } | ||
963 | 1665 | ||
964 | want_shm = 0; | 1666 | #if !WINDOWS |
965 | ppos = plugins; | 1667 | int |
966 | while (NULL != ppos) | 1668 | plugin_read (struct EXTRACTOR_PluginList *plugin, unsigned char *buf, size_t size) |
967 | { | 1669 | { |
968 | switch (ppos->flags) | 1670 | ssize_t read_result; |
969 | { | 1671 | size_t read_count = 0; |
970 | case EXTRACTOR_OPTION_DEFAULT_POLICY: | 1672 | while (read_count < size) |
971 | #ifndef WINDOWS | 1673 | { |
972 | if ( (0 == ppos->cpid) || | 1674 | read_result = read (plugin->cpipe_out, &buf[read_count], size - read_count); |
973 | (-1 == ppos->cpid) ) | 1675 | if (read_result <= 0) |
974 | #else | 1676 | return read_result; |
975 | if (ppos->hProcess == NULL || ppos->hProcess == INVALID_HANDLE_VALUE) | 1677 | read_count += read_result; |
976 | #endif | 1678 | } |
977 | start_process (ppos); | 1679 | return read_count; |
978 | want_shm = 1; | 1680 | } |
979 | break; | ||
980 | case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART: | ||
981 | #ifndef WINDOWS | ||
982 | if (0 == ppos->cpid) | ||
983 | #else | 1681 | #else |
984 | if (ppos->hProcess == NULL) | 1682 | int |
1683 | plugin_read (struct EXTRACTOR_PluginList *plugin, unsigned char *buf, size_t size) | ||
1684 | { | ||
1685 | DWORD bytes_read; | ||
1686 | BOOL bresult; | ||
1687 | size_t read_count = 0; | ||
1688 | while (read_count < size) | ||
1689 | { | ||
1690 | bresult = ReadFile (plugin->cpipe_out, &buf[read_count], size - read_count, &bytes_read, NULL); | ||
1691 | if (!bresult) | ||
1692 | return -1; | ||
1693 | read_count += bytes_read; | ||
1694 | } | ||
1695 | return read_count; | ||
1696 | } | ||
985 | #endif | 1697 | #endif |
986 | start_process (ppos); | 1698 | |
987 | want_shm = 1; | 1699 | static int |
988 | break; | 1700 | receive_reply (struct EXTRACTOR_PluginList *plugin, EXTRACTOR_MetaDataProcessor proc, void *proc_cls) |
989 | case EXTRACTOR_OPTION_IN_PROCESS: | 1701 | { |
990 | break; | 1702 | int read_result; |
991 | case EXTRACTOR_OPTION_DISABLED: | 1703 | unsigned char code; |
992 | break; | 1704 | int must_read = 1; |
993 | } | 1705 | |
994 | ppos = ppos->next; | 1706 | int64_t seek_position; |
1707 | struct IpcHeader hdr; | ||
1708 | char *mime_type; | ||
1709 | char *data; | ||
1710 | |||
1711 | while (must_read) | ||
1712 | { | ||
1713 | read_result = plugin_read (plugin, &code, 1); | ||
1714 | if (read_result < 1) | ||
1715 | return -1; | ||
1716 | switch (code) | ||
1717 | { | ||
1718 | case MESSAGE_DONE: /* Done */ | ||
1719 | plugin->seek_request = -1; | ||
1720 | must_read = 0; | ||
1721 | break; | ||
1722 | case MESSAGE_SEEK: /* Seek */ | ||
1723 | read_result = plugin_read (plugin, (unsigned char *) &seek_position, sizeof (int64_t)); | ||
1724 | if (read_result < sizeof (int64_t)) | ||
1725 | return -1; | ||
1726 | plugin->seek_request = seek_position; | ||
1727 | must_read = 0; | ||
1728 | break; | ||
1729 | case MESSAGE_META: /* Meta */ | ||
1730 | read_result = plugin_read (plugin, (unsigned char *) &hdr, sizeof (hdr)); | ||
1731 | if (read_result < sizeof (hdr)) /* FIXME: check hdr for sanity */ | ||
1732 | return -1; | ||
1733 | mime_type = malloc (hdr.mime_len + 1); | ||
1734 | if (mime_type == NULL) | ||
1735 | return -1; | ||
1736 | read_result = plugin_read (plugin, (unsigned char *) mime_type, hdr.mime_len); | ||
1737 | if (read_result < hdr.mime_len) | ||
1738 | return -1; | ||
1739 | mime_type[hdr.mime_len] = '\0'; | ||
1740 | data = malloc (hdr.data_len); | ||
1741 | if (data == NULL) | ||
1742 | { | ||
1743 | free (mime_type); | ||
1744 | return -1; | ||
1745 | } | ||
1746 | read_result = plugin_read (plugin, (unsigned char *) data, hdr.data_len); | ||
1747 | if (read_result < hdr.data_len) | ||
1748 | { | ||
1749 | free (mime_type); | ||
1750 | free (data); | ||
1751 | return -1; | ||
1752 | } | ||
1753 | read_result = proc (proc_cls, plugin->short_libname, hdr.meta_type, hdr.meta_format, mime_type, data, hdr.data_len); | ||
1754 | free (mime_type); | ||
1755 | free (data); | ||
1756 | if (read_result != 0) | ||
1757 | return 1; | ||
1758 | break; | ||
1759 | default: | ||
1760 | return -1; | ||
995 | } | 1761 | } |
996 | ptr = NULL; | 1762 | } |
997 | tptr = NULL; | 1763 | return 0; |
998 | if (want_shm) | 1764 | } |
1765 | |||
1766 | #if !WINDOWS | ||
1767 | static int | ||
1768 | wait_for_reply (struct EXTRACTOR_PluginList *plugins, EXTRACTOR_MetaDataProcessor proc, void *proc_cls) | ||
1769 | { | ||
1770 | int ready; | ||
1771 | int result; | ||
1772 | struct timeval tv; | ||
1773 | fd_set to_check; | ||
1774 | int highest = 0; | ||
1775 | int read_result; | ||
1776 | struct EXTRACTOR_PluginList *ppos; | ||
1777 | |||
1778 | FD_ZERO (&to_check); | ||
1779 | |||
1780 | for (ppos = plugins; NULL != ppos; ppos = ppos->next) | ||
1781 | { | ||
1782 | switch (ppos->flags) | ||
999 | { | 1783 | { |
1000 | if (size > MAX_READ) | 1784 | case EXTRACTOR_OPTION_DEFAULT_POLICY: |
1001 | size = MAX_READ; | 1785 | case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART: |
1002 | if (0 == make_shm (0, | 1786 | if (ppos->seek_request == -1) |
1003 | &ptr, | 1787 | continue; |
1004 | #ifndef WINDOWS | 1788 | FD_SET (ppos->cpipe_out, &to_check); |
1005 | &shmid, | 1789 | if (highest < ppos->cpipe_out) |
1006 | #else | 1790 | highest = ppos->cpipe_out; |
1007 | &map, | 1791 | break; |
1008 | #endif | 1792 | case EXTRACTOR_OPTION_IN_PROCESS: |
1009 | fn, sizeof(fn), size)) | 1793 | break; |
1010 | { | 1794 | case EXTRACTOR_OPTION_DISABLED: |
1011 | memcpy (ptr, data, size); | 1795 | break; |
1012 | if ( (tdata != NULL) && | ||
1013 | (0 == make_shm (1, | ||
1014 | &tptr, | ||
1015 | #ifndef WINDOWS | ||
1016 | &tshmid, | ||
1017 | #else | ||
1018 | &tmap, | ||
1019 | #endif | ||
1020 | tfn, sizeof(tfn), tsize)) ) | ||
1021 | { | ||
1022 | memcpy (tptr, tdata, tsize); | ||
1023 | } | ||
1024 | else | ||
1025 | { | ||
1026 | tptr = NULL; | ||
1027 | } | ||
1028 | } | ||
1029 | else | ||
1030 | { | ||
1031 | want_shm = 0; | ||
1032 | } | ||
1033 | } | 1796 | } |
1034 | ppos = plugins; | 1797 | } |
1035 | while (NULL != ppos) | 1798 | |
1799 | tv.tv_sec = 10; | ||
1800 | tv.tv_usec = 0; | ||
1801 | ready = select (highest + 1, &to_check, NULL, NULL, &tv); | ||
1802 | if (ready <= 0) | ||
1803 | /* an error or timeout -> something's wrong or all plugins hung up */ | ||
1804 | return -1; | ||
1805 | |||
1806 | result = 0; | ||
1807 | for (ppos = plugins; NULL != ppos; ppos = ppos->next) | ||
1808 | { | ||
1809 | switch (ppos->flags) | ||
1036 | { | 1810 | { |
1037 | flags = ppos->flags; | 1811 | case EXTRACTOR_OPTION_DEFAULT_POLICY: |
1038 | if (! want_shm) | 1812 | case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART: |
1039 | flags = EXTRACTOR_OPTION_IN_PROCESS; | 1813 | if (ppos->seek_request == -1) |
1040 | switch (flags) | 1814 | continue; |
1041 | { | 1815 | if (FD_ISSET (ppos->cpipe_out, &to_check)) |
1042 | case EXTRACTOR_OPTION_DEFAULT_POLICY: | 1816 | { |
1043 | if (0 != extract_oop (ppos, (tptr != NULL) ? tsize : size, fn, | 1817 | read_result = receive_reply (ppos, proc, proc_cls); |
1044 | (tptr != NULL) ? tfn : NULL, | 1818 | if (read_result < 0) |
1045 | proc, proc_cls)) | 1819 | { |
1046 | { | 1820 | stop_process (ppos); |
1047 | ppos = NULL; | 1821 | } |
1048 | break; | 1822 | result += 1; |
1049 | } | 1823 | } |
1050 | #ifndef WINDOWS | 1824 | break; |
1051 | if (ppos->cpid == -1) | 1825 | case EXTRACTOR_OPTION_IN_PROCESS: |
1826 | break; | ||
1827 | case EXTRACTOR_OPTION_DISABLED: | ||
1828 | break; | ||
1829 | } | ||
1830 | } | ||
1831 | return result; | ||
1832 | } | ||
1052 | #else | 1833 | #else |
1053 | if (ppos->hProcess == INVALID_HANDLE_VALUE) | 1834 | static int |
1054 | #endif | 1835 | wait_for_reply (struct EXTRACTOR_PluginList *plugins, EXTRACTOR_MetaDataProcessor proc, void *proc_cls) |
1055 | { | 1836 | { |
1056 | start_process (ppos); | 1837 | int result; |
1057 | if (0 != extract_oop (ppos, (tptr != NULL) ? tsize : size, fn, | 1838 | DWORD ms; |
1058 | (tptr != NULL) ? tfn : NULL, | 1839 | DWORD first_ready; |
1059 | proc, proc_cls)) | 1840 | DWORD dwresult; |
1060 | { | 1841 | DWORD bytes_read; |
1061 | ppos = NULL; | 1842 | BOOL bresult; |
1062 | break; | 1843 | int i; |
1063 | } | 1844 | HANDLE events[MAXIMUM_WAIT_OBJECTS]; |
1064 | } | 1845 | |
1065 | break; | 1846 | |
1066 | case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART: | 1847 | struct EXTRACTOR_PluginList *ppos; |
1067 | if (0 != extract_oop (ppos, (tptr != NULL) ? tsize : size, fn, | 1848 | |
1068 | (tptr != NULL) ? tfn : NULL, | 1849 | i = 0; |
1069 | proc, proc_cls)) | 1850 | for (ppos = plugins; NULL != ppos; ppos = ppos->next) |
1070 | { | 1851 | { |
1071 | ppos = NULL; | 1852 | if (i == MAXIMUM_WAIT_OBJECTS) |
1072 | break; | 1853 | return -1; |
1073 | } | 1854 | if (ppos->seek_request == -1) |
1074 | break; | 1855 | continue; |
1075 | case EXTRACTOR_OPTION_IN_PROCESS: | 1856 | switch (ppos->flags) |
1076 | want_tail = ( (ppos->specials != NULL) && | 1857 | { |
1077 | (NULL != strstr (ppos->specials, | 1858 | case EXTRACTOR_OPTION_DEFAULT_POLICY: |
1078 | "want-tail"))); | 1859 | case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART: |
1079 | if (NULL == ppos->extractMethod) | 1860 | if (WaitForSingleObject (ppos->ov_read.hEvent, 0) == WAIT_OBJECT_0) |
1080 | plugin_load (ppos); | 1861 | { |
1081 | if ( ( (ppos->specials == NULL) || | 1862 | ResetEvent (ppos->ov_read.hEvent); |
1082 | (NULL == strstr (ppos->specials, | 1863 | bresult = ReadFile (ppos->cpipe_out, &i, 0, &bytes_read, &ppos->ov_read); |
1083 | "oop-only")) ) ) | 1864 | if (bresult == TRUE) |
1084 | { | 1865 | { |
1085 | if (want_tail) | 1866 | SetEvent (ppos->ov_read.hEvent); |
1086 | { | 1867 | } |
1087 | if ( (NULL != ppos->extractMethod) && | 1868 | else |
1088 | (tdata != NULL) && | 1869 | { |
1089 | (0 != ppos->extractMethod (tdata, | 1870 | DWORD err = GetLastError (); |
1090 | tsize, | 1871 | if (err != ERROR_IO_PENDING) |
1091 | proc, | 1872 | SetEvent (ppos->ov_read.hEvent); |
1092 | proc_cls, | 1873 | } |
1093 | ppos->plugin_options)) ) | 1874 | } |
1094 | { | 1875 | events[i] = ppos->ov_read.hEvent; |
1095 | ppos = NULL; | 1876 | i++; |
1096 | break; | 1877 | break; |
1097 | } | 1878 | case EXTRACTOR_OPTION_IN_PROCESS: |
1098 | } | 1879 | break; |
1099 | else | 1880 | case EXTRACTOR_OPTION_DISABLED: |
1100 | { | 1881 | break; |
1101 | if ( (NULL != ppos->extractMethod) && | ||
1102 | (0 != ppos->extractMethod (data, | ||
1103 | size, | ||
1104 | proc, | ||
1105 | proc_cls, | ||
1106 | ppos->plugin_options)) ) | ||
1107 | { | ||
1108 | ppos = NULL; | ||
1109 | break; | ||
1110 | } | ||
1111 | } | ||
1112 | } | ||
1113 | break; | ||
1114 | case EXTRACTOR_OPTION_DISABLED: | ||
1115 | break; | ||
1116 | } | ||
1117 | if (ppos == NULL) | ||
1118 | break; | ||
1119 | ppos = ppos->next; | ||
1120 | } | 1882 | } |
1121 | if (want_shm) | 1883 | } |
1884 | |||
1885 | ms = 10000; | ||
1886 | first_ready = WaitForMultipleObjects (i, events, FALSE, ms); | ||
1887 | if (first_ready == WAIT_TIMEOUT || first_ready == WAIT_FAILED) | ||
1888 | /* an error or timeout -> something's wrong or all plugins hung up */ | ||
1889 | return -1; | ||
1890 | |||
1891 | i = 0; | ||
1892 | result = 0; | ||
1893 | for (ppos = plugins; NULL != ppos; ppos = ppos->next) | ||
1894 | { | ||
1895 | int read_result; | ||
1896 | switch (ppos->flags) | ||
1122 | { | 1897 | { |
1123 | #ifndef WINDOWS | 1898 | case EXTRACTOR_OPTION_DEFAULT_POLICY: |
1124 | if (NULL != ptr) | 1899 | case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART: |
1125 | munmap (ptr, size); | 1900 | if (ppos->seek_request == -1) |
1126 | if (shmid != -1) | 1901 | continue; |
1127 | close (shmid); | 1902 | if (i < first_ready) |
1128 | shm_unlink (fn); | 1903 | { |
1129 | if (NULL != tptr) | 1904 | i += 1; |
1130 | { | 1905 | continue; |
1131 | munmap (tptr, tsize); | 1906 | } |
1132 | shm_unlink (tfn); | 1907 | dwresult = WaitForSingleObject (ppos->ov_read.hEvent, 0); |
1133 | if (tshmid != -1) | 1908 | read_result = 0; |
1134 | close (tshmid); | 1909 | if (dwresult == WAIT_OBJECT_0) |
1135 | } | 1910 | { |
1136 | #else | 1911 | read_result = receive_reply (ppos, proc, proc_cls); |
1137 | UnmapViewOfFile (ptr); | 1912 | result += 1; |
1138 | CloseHandle (map); | 1913 | } |
1139 | if (tptr != NULL) | 1914 | if (dwresult == WAIT_FAILED || read_result < 0) |
1140 | { | 1915 | { |
1141 | UnmapViewOfFile (tptr); | 1916 | stop_process (ppos); |
1142 | CloseHandle (tmap); | 1917 | if (dwresult == WAIT_FAILED) |
1143 | } | 1918 | result += 1; |
1919 | } | ||
1920 | i++; | ||
1921 | break; | ||
1922 | case EXTRACTOR_OPTION_IN_PROCESS: | ||
1923 | break; | ||
1924 | case EXTRACTOR_OPTION_DISABLED: | ||
1925 | break; | ||
1926 | } | ||
1927 | } | ||
1928 | return result; | ||
1929 | } | ||
1930 | |||
1144 | #endif | 1931 | #endif |
1932 | |||
1933 | static int64_t | ||
1934 | seek_to_new_position (struct EXTRACTOR_PluginList *plugins, int fd, int64_t fsize, int64_t current_position) | ||
1935 | { | ||
1936 | int64_t min_pos = fsize; | ||
1937 | struct EXTRACTOR_PluginList *ppos; | ||
1938 | for (ppos = plugins; NULL != ppos; ppos = ppos->next) | ||
1939 | { | ||
1940 | switch (ppos->flags) | ||
1941 | { | ||
1942 | case EXTRACTOR_OPTION_DEFAULT_POLICY: | ||
1943 | case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART: | ||
1944 | case EXTRACTOR_OPTION_IN_PROCESS: | ||
1945 | if (ppos->seek_request > 0 && ppos->seek_request >= current_position && | ||
1946 | ppos->seek_request <= min_pos) | ||
1947 | min_pos = ppos->seek_request; | ||
1948 | break; | ||
1949 | case EXTRACTOR_OPTION_DISABLED: | ||
1950 | break; | ||
1145 | } | 1951 | } |
1952 | } | ||
1953 | if (min_pos >= fsize) | ||
1954 | return -1; | ||
1955 | #if WINDOWS | ||
1956 | _lseeki64 (fd, min_pos, SEEK_SET); | ||
1957 | #elif !HAVE_SEEK64 | ||
1958 | lseek64 (fd, min_pos, SEEK_SET); | ||
1959 | #else | ||
1960 | if (min_pos >= INT_MAX) | ||
1961 | return -1; | ||
1962 | lseek (fd, (ssize_t) min_pos, SEEK_SET); | ||
1963 | #endif | ||
1964 | return min_pos; | ||
1146 | } | 1965 | } |
1147 | 1966 | ||
1967 | static void | ||
1968 | load_in_process_plugin (struct EXTRACTOR_PluginList *plugin) | ||
1969 | { | ||
1970 | switch (plugin->flags) | ||
1971 | { | ||
1972 | case EXTRACTOR_OPTION_DEFAULT_POLICY: | ||
1973 | case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART: | ||
1974 | case EXTRACTOR_OPTION_DISABLED: | ||
1975 | break; | ||
1976 | case EXTRACTOR_OPTION_IN_PROCESS: | ||
1977 | plugin_load (plugin); | ||
1978 | break; | ||
1979 | } | ||
1980 | } | ||
1148 | 1981 | ||
1149 | /** | 1982 | /** |
1150 | * If the given data is compressed using gzip or bzip2, decompress | 1983 | * Extract keywords using the given set of plugins. |
1151 | * it. Run 'extract' on the decompressed contents (or the original | ||
1152 | * contents if they were not compressed). | ||
1153 | * | 1984 | * |
1154 | * @param plugins the list of plugins to use | 1985 | * @param plugins the list of plugins to use |
1155 | * @param data data to process, never NULL | 1986 | * @param data data to process, or NULL if fds is not -1 |
1156 | * @param size number of bytes in data | 1987 | * @param fd file to read data from, or -1 if data is not NULL |
1157 | * @param tdata end of file data, or NULL | 1988 | * @param fsize size of data or size of file |
1158 | * @param tsize number of bytes in tdata | 1989 | * @param buffer a buffer with data alteady read from the file (if fd != -1) |
1990 | * @param buffer_size size of buffer | ||
1159 | * @param proc function to call for each meta data item found | 1991 | * @param proc function to call for each meta data item found |
1160 | * @param proc_cls cls argument to proc | 1992 | * @param proc_cls cls argument to proc |
1161 | */ | 1993 | */ |
1162 | static void | 1994 | static void |
1163 | decompress_and_extract (struct EXTRACTOR_PluginList *plugins, | 1995 | do_extract (struct EXTRACTOR_PluginList *plugins, const char *data, int fd, int64_t fsize, void *buffer, size_t buffer_size, EXTRACTOR_MetaDataProcessor proc, void *proc_cls) |
1164 | const unsigned char * data, | 1996 | { |
1165 | size_t size, | 1997 | int shm_result; |
1166 | const char * tdata, | 1998 | unsigned char *shm_ptr; |
1167 | size_t tsize, | 1999 | #if !WINDOWS |
1168 | EXTRACTOR_MetaDataProcessor proc, | 2000 | int shm_id; |
1169 | void *proc_cls) { | 2001 | #else |
1170 | unsigned char * buf; | 2002 | HANDLE map_handle; |
1171 | unsigned char * rbuf; | ||
1172 | size_t dsize; | ||
1173 | #if HAVE_ZLIB | ||
1174 | z_stream strm; | ||
1175 | int ret; | ||
1176 | size_t pos; | ||
1177 | #endif | 2003 | #endif |
1178 | #if HAVE_LIBBZ2 | 2004 | char shm_name[MAX_SHM_NAME + 1]; |
1179 | bz_stream bstrm; | 2005 | |
1180 | int bret; | 2006 | struct EXTRACTOR_PluginList *ppos; |
1181 | size_t bpos; | 2007 | |
2008 | int64_t position = 0; | ||
2009 | size_t map_size; | ||
2010 | ssize_t read_result; | ||
2011 | int kill_plugins = 0; | ||
2012 | |||
2013 | map_size = (fd == -1) ? fsize : MAX_READ; | ||
2014 | |||
2015 | /* Make a shared memory object. Even if we're running in-process. Simpler that way */ | ||
2016 | #if !WINDOWS | ||
2017 | shm_result = make_shm_posix ((void **) &shm_ptr, &shm_id, shm_name, MAX_SHM_NAME, | ||
2018 | map_size); | ||
2019 | #else | ||
2020 | shm_result = make_shm_w32 ((void **) &shm_ptr, &map_handle, shm_name, MAX_SHM_NAME, | ||
2021 | map_size); | ||
1182 | #endif | 2022 | #endif |
2023 | if (shm_result != 0) | ||
2024 | return; | ||
1183 | 2025 | ||
1184 | buf = NULL; | 2026 | /* This three-loops-instead-of-one construction is intended to increase parallelism */ |
1185 | dsize = 0; | 2027 | for (ppos = plugins; NULL != ppos; ppos = ppos->next) |
1186 | #if HAVE_ZLIB | 2028 | start_process (ppos); |
1187 | /* try gzip decompression first */ | 2029 | |
1188 | if ( (size >= 12) && | 2030 | for (ppos = plugins; NULL != ppos; ppos = ppos->next) |
1189 | (data[0] == 0x1f) && | 2031 | load_in_process_plugin (ppos); |
1190 | (data[1] == 0x8b) && | 2032 | |
1191 | (data[2] == 0x08) ) | 2033 | for (ppos = plugins; NULL != ppos; ppos = ppos->next) |
2034 | write_plugin_data (ppos); | ||
2035 | |||
2036 | for (ppos = plugins; NULL != ppos; ppos = ppos->next) | ||
2037 | init_plugin_state (ppos, shm_name, fsize); | ||
2038 | |||
2039 | while (1) | ||
2040 | { | ||
2041 | int plugins_not_ready = 0; | ||
2042 | if (fd != -1) | ||
1192 | { | 2043 | { |
1193 | /* Process gzip header */ | 2044 | /* fill the share buffer with data from the file */ |
1194 | unsigned int gzip_header_length = 10; | 2045 | if (buffer_size > 0) |
1195 | 2046 | memcpy (shm_ptr, buffer, buffer_size); | |
1196 | if (data[3] & 0x4) /* FEXTRA set */ | 2047 | read_result = READ (fd, &shm_ptr[buffer_size], MAX_READ - buffer_size); |
1197 | gzip_header_length += 2 + (unsigned) (data[10] & 0xff) | 2048 | if (read_result <= 0) |
1198 | + (((unsigned) (data[11] & 0xff)) * 256); | 2049 | break; |
1199 | |||
1200 | if (data[3] & 0x8) /* FNAME set */ | ||
1201 | { | ||
1202 | const unsigned char * cptr = data + gzip_header_length; | ||
1203 | /* stored file name is here */ | ||
1204 | while (cptr < data + size) | ||
1205 | { | ||
1206 | if ('\0' == *cptr) | ||
1207 | break; | ||
1208 | cptr++; | ||
1209 | } | ||
1210 | if (0 != proc (proc_cls, | ||
1211 | "<zlib>", | ||
1212 | EXTRACTOR_METATYPE_FILENAME, | ||
1213 | EXTRACTOR_METAFORMAT_C_STRING, | ||
1214 | "text/plain", | ||
1215 | (const char*) (data + gzip_header_length), | ||
1216 | cptr - (data + gzip_header_length))) | ||
1217 | return; /* done */ | ||
1218 | gzip_header_length = (cptr - data) + 1; | ||
1219 | } | ||
1220 | if (data[3] & 0x16) /* FCOMMENT set */ | ||
1221 | { | ||
1222 | const unsigned char * cptr = data + gzip_header_length; | ||
1223 | /* stored comment is here */ | ||
1224 | while (cptr < data + size) | ||
1225 | { | ||
1226 | if('\0' == *cptr) | ||
1227 | break; | ||
1228 | cptr ++; | ||
1229 | } | ||
1230 | if (0 != proc (proc_cls, | ||
1231 | "<zlib>", | ||
1232 | EXTRACTOR_METATYPE_COMMENT, | ||
1233 | EXTRACTOR_METAFORMAT_C_STRING, | ||
1234 | "text/plain", | ||
1235 | (const char*) (data + gzip_header_length), | ||
1236 | cptr - (data + gzip_header_length))) | ||
1237 | return; /* done */ | ||
1238 | gzip_header_length = (cptr - data) + 1; | ||
1239 | } | ||
1240 | if(data[3] & 0x2) /* FCHRC set */ | ||
1241 | gzip_header_length += 2; | ||
1242 | memset(&strm, | ||
1243 | 0, | ||
1244 | sizeof(z_stream)); | ||
1245 | #ifdef ZLIB_VERNUM | ||
1246 | gzip_header_length = 0; | ||
1247 | #endif | ||
1248 | if (size > gzip_header_length) | ||
1249 | { | ||
1250 | strm.next_in = (Bytef*) data + gzip_header_length; | ||
1251 | strm.avail_in = size - gzip_header_length; | ||
1252 | } | ||
1253 | else | 2050 | else |
1254 | { | 2051 | map_size = read_result + buffer_size; |
1255 | strm.next_in = (Bytef*) data; | 2052 | if (buffer_size > 0) |
1256 | strm.avail_in = 0; | 2053 | buffer_size = 0; |
1257 | } | ||
1258 | strm.total_in = 0; | ||
1259 | strm.zalloc = NULL; | ||
1260 | strm.zfree = NULL; | ||
1261 | strm.opaque = NULL; | ||
1262 | |||
1263 | /* | ||
1264 | * note: maybe plain inflateInit(&strm) is adequate, | ||
1265 | * it looks more backward-compatible also ; | ||
1266 | * | ||
1267 | * ZLIB_VERNUM isn't defined by zlib version 1.1.4 ; | ||
1268 | * there might be a better check. | ||
1269 | */ | ||
1270 | if (Z_OK == inflateInit2(&strm, | ||
1271 | #ifdef ZLIB_VERNUM | ||
1272 | 15 + 32 | ||
1273 | #else | ||
1274 | -MAX_WBITS | ||
1275 | #endif | ||
1276 | )) { | ||
1277 | dsize = 2 * size; | ||
1278 | if (dsize > MAX_DECOMPRESS) | ||
1279 | dsize = MAX_DECOMPRESS; | ||
1280 | buf = malloc(dsize); | ||
1281 | pos = 0; | ||
1282 | if (buf == NULL) | ||
1283 | { | ||
1284 | inflateEnd(&strm); | ||
1285 | } | ||
1286 | else | ||
1287 | { | ||
1288 | strm.next_out = (Bytef*) buf; | ||
1289 | strm.avail_out = dsize; | ||
1290 | do | ||
1291 | { | ||
1292 | ret = inflate(&strm, | ||
1293 | Z_SYNC_FLUSH); | ||
1294 | if (ret == Z_OK) | ||
1295 | { | ||
1296 | if (dsize == MAX_DECOMPRESS) | ||
1297 | break; | ||
1298 | pos += strm.total_out; | ||
1299 | strm.total_out = 0; | ||
1300 | dsize *= 2; | ||
1301 | if (dsize > MAX_DECOMPRESS) | ||
1302 | dsize = MAX_DECOMPRESS; | ||
1303 | rbuf = realloc(buf, dsize); | ||
1304 | if (rbuf == NULL) | ||
1305 | { | ||
1306 | free (buf); | ||
1307 | buf = NULL; | ||
1308 | break; | ||
1309 | } | ||
1310 | buf = rbuf; | ||
1311 | strm.next_out = (Bytef*) &buf[pos]; | ||
1312 | strm.avail_out = dsize - pos; | ||
1313 | } | ||
1314 | else if (ret != Z_STREAM_END) | ||
1315 | { | ||
1316 | /* error */ | ||
1317 | free(buf); | ||
1318 | buf = NULL; | ||
1319 | } | ||
1320 | } while ( (buf != NULL) && | ||
1321 | (ret != Z_STREAM_END) ); | ||
1322 | dsize = pos + strm.total_out; | ||
1323 | inflateEnd(&strm); | ||
1324 | if ( (dsize == 0) && | ||
1325 | (buf != NULL) ) | ||
1326 | { | ||
1327 | free(buf); | ||
1328 | buf = NULL; | ||
1329 | } | ||
1330 | } | ||
1331 | } | ||
1332 | } | 2054 | } |
1333 | #endif | 2055 | for (ppos = plugins; NULL != ppos; ppos = ppos->next) |
1334 | 2056 | plugins_not_ready += give_shm_to_plugin (ppos, position, map_size); | |
1335 | #if HAVE_LIBBZ2 | 2057 | for (ppos = plugins; NULL != ppos; ppos = ppos->next) |
1336 | if ( (size >= 4) && | 2058 | ask_in_process_plugin (ppos, position, shm_ptr, proc, proc_cls); |
1337 | (data[0] == 'B') && | 2059 | while (plugins_not_ready > 0 && !kill_plugins) |
1338 | (data[1] == 'Z') && | ||
1339 | (data[2] == 'h') ) | ||
1340 | { | 2060 | { |
1341 | /* now try bz2 decompression */ | 2061 | int ready = wait_for_reply (plugins, proc, proc_cls); |
1342 | memset(&bstrm, | 2062 | if (ready <= 0) |
1343 | 0, | 2063 | kill_plugins = 1; |
1344 | sizeof(bz_stream)); | 2064 | plugins_not_ready -= ready; |
1345 | bstrm.next_in = (char*) data; | ||
1346 | bstrm.avail_in = size; | ||
1347 | bstrm.total_in_lo32 = 0; | ||
1348 | bstrm.total_in_hi32 = 0; | ||
1349 | bstrm.bzalloc = NULL; | ||
1350 | bstrm.bzfree = NULL; | ||
1351 | bstrm.opaque = NULL; | ||
1352 | if ( (buf == NULL) && | ||
1353 | (BZ_OK == BZ2_bzDecompressInit(&bstrm, | ||
1354 | 0, | ||
1355 | 0)) ) | ||
1356 | { | ||
1357 | dsize = 2 * size; | ||
1358 | if (dsize > MAX_DECOMPRESS) | ||
1359 | dsize = MAX_DECOMPRESS; | ||
1360 | buf = malloc(dsize); | ||
1361 | bpos = 0; | ||
1362 | if (buf == NULL) | ||
1363 | { | ||
1364 | BZ2_bzDecompressEnd(&bstrm); | ||
1365 | } | ||
1366 | else | ||
1367 | { | ||
1368 | bstrm.next_out = (char*) buf; | ||
1369 | bstrm.avail_out = dsize; | ||
1370 | do { | ||
1371 | bret = BZ2_bzDecompress(&bstrm); | ||
1372 | if (bret == Z_OK) | ||
1373 | { | ||
1374 | if (dsize == MAX_DECOMPRESS) | ||
1375 | break; | ||
1376 | bpos += bstrm.total_out_lo32; | ||
1377 | bstrm.total_out_lo32 = 0; | ||
1378 | dsize *= 2; | ||
1379 | if (dsize > MAX_DECOMPRESS) | ||
1380 | dsize = MAX_DECOMPRESS; | ||
1381 | rbuf = realloc(buf, dsize); | ||
1382 | if (rbuf == NULL) | ||
1383 | { | ||
1384 | free (buf); | ||
1385 | buf = NULL; | ||
1386 | break; | ||
1387 | } | ||
1388 | buf = rbuf; | ||
1389 | bstrm.next_out = (char*) &buf[bpos]; | ||
1390 | bstrm.avail_out = dsize - bpos; | ||
1391 | } | ||
1392 | else if (bret != BZ_STREAM_END) | ||
1393 | { | ||
1394 | /* error */ | ||
1395 | free(buf); | ||
1396 | buf = NULL; | ||
1397 | } | ||
1398 | } while ( (buf != NULL) && | ||
1399 | (bret != BZ_STREAM_END) ); | ||
1400 | dsize = bpos + bstrm.total_out_lo32; | ||
1401 | BZ2_bzDecompressEnd(&bstrm); | ||
1402 | if ( (dsize == 0) && | ||
1403 | (buf != NULL) ) | ||
1404 | { | ||
1405 | free(buf); | ||
1406 | buf = NULL; | ||
1407 | } | ||
1408 | } | ||
1409 | } | ||
1410 | } | 2065 | } |
1411 | #endif | 2066 | if (kill_plugins) |
1412 | if (buf != NULL) | 2067 | break; |
2068 | if (fd != -1) | ||
1413 | { | 2069 | { |
1414 | data = buf; | 2070 | position += map_size; |
1415 | size = dsize; | 2071 | position = seek_to_new_position (plugins, fd, fsize, position); |
2072 | if (position < 0) | ||
2073 | break; | ||
1416 | } | 2074 | } |
1417 | extract (plugins, | 2075 | else |
1418 | (const char*) data, | 2076 | break; |
1419 | size, | 2077 | } |
1420 | tdata, | ||
1421 | tsize, | ||
1422 | proc, | ||
1423 | proc_cls); | ||
1424 | if (buf != NULL) | ||
1425 | free(buf); | ||
1426 | errno = 0; /* kill transient errors */ | ||
1427 | } | ||
1428 | |||
1429 | 2078 | ||
1430 | /** | 2079 | if (kill_plugins) |
1431 | * Open a file | 2080 | for (ppos = plugins; NULL != ppos; ppos = ppos->next) |
1432 | */ | 2081 | stop_process (ppos); |
1433 | static int file_open(const char *filename, int oflag, ...) | 2082 | for (ppos = plugins; NULL != ppos; ppos = ppos->next) |
1434 | { | 2083 | discard_plugin_state (ppos); |
1435 | int mode; | ||
1436 | const char *fn; | ||
1437 | #ifdef MINGW | ||
1438 | char szFile[_MAX_PATH + 1]; | ||
1439 | long lRet; | ||
1440 | 2084 | ||
1441 | if ((lRet = plibc_conv_to_win_path(filename, szFile)) != ERROR_SUCCESS) | 2085 | #if WINDOWS |
1442 | { | 2086 | destroy_shm_w32 (shm_ptr, map_handle); |
1443 | errno = ENOENT; | ||
1444 | SetLastError(lRet); | ||
1445 | return -1; | ||
1446 | } | ||
1447 | fn = szFile; | ||
1448 | #else | 2087 | #else |
1449 | fn = filename; | 2088 | destroy_shm_posix (shm_ptr, shm_id, (fd == -1) ? fsize : MAX_READ, shm_name); |
1450 | #endif | ||
1451 | mode = 0; | ||
1452 | #ifdef MINGW | ||
1453 | /* Set binary mode */ | ||
1454 | mode |= O_BINARY; | ||
1455 | #endif | 2089 | #endif |
1456 | return OPEN(fn, oflag, mode); | ||
1457 | } | 2090 | } |
1458 | 2091 | ||
1459 | 2092 | ||
1460 | #ifndef O_LARGEFILE | ||
1461 | #define O_LARGEFILE 0 | ||
1462 | #endif | ||
1463 | |||
1464 | |||
1465 | /** | 2093 | /** |
1466 | * Extract keywords from a file using the given set of plugins. | 2094 | * Extract keywords from a file using the given set of plugins. |
1467 | * If needed, opens the file and loads its data (via mmap). Then | 2095 | * If needed, opens the file and loads its data (via mmap). Then |
@@ -1478,93 +2106,152 @@ static int file_open(const char *filename, int oflag, ...) | |||
1478 | */ | 2106 | */ |
1479 | void | 2107 | void |
1480 | EXTRACTOR_extract (struct EXTRACTOR_PluginList *plugins, | 2108 | EXTRACTOR_extract (struct EXTRACTOR_PluginList *plugins, |
1481 | const char *filename, | 2109 | const char *filename, |
1482 | const void *data, | 2110 | const void *data, |
1483 | size_t size, | 2111 | size_t size, |
1484 | EXTRACTOR_MetaDataProcessor proc, | 2112 | EXTRACTOR_MetaDataProcessor proc, |
1485 | void *proc_cls) | 2113 | void *proc_cls) |
1486 | { | 2114 | { |
1487 | int fd; | 2115 | int fd = -1; |
1488 | void * buffer; | 2116 | struct stat64 fstatbuf; |
1489 | void * tbuffer; | 2117 | int64_t fsize = 0; |
1490 | struct stat fstatbuf; | 2118 | int memory_only = 1; |
1491 | size_t fsize; | 2119 | int compression_type = -1; |
1492 | size_t tsize; | 2120 | void *buffer = NULL; |
1493 | int eno; | 2121 | size_t buffer_size; |
1494 | off_t offset; | 2122 | int decompression_result; |
1495 | long pg; | 2123 | |
1496 | #ifdef WINDOWS | 2124 | /* If data is not given, then we need to read it from the file. Try opening it */ |
1497 | SYSTEM_INFO sys; | 2125 | if ((data == NULL) && |
1498 | #endif | 2126 | (filename != NULL) && |
1499 | 2127 | (0 == STAT64(filename, &fstatbuf)) && | |
1500 | fd = -1; | 2128 | (!S_ISDIR(fstatbuf.st_mode)) && |
1501 | buffer = NULL; | 2129 | (-1 != (fd = file_open (filename, |
1502 | if ( (data == NULL) && | 2130 | O_RDONLY | O_LARGEFILE)))) |
1503 | (filename != NULL) && | 2131 | { |
1504 | (0 == STAT(filename, &fstatbuf)) && | 2132 | /* Empty files are of no interest */ |
1505 | (!S_ISDIR(fstatbuf.st_mode)) && | 2133 | fsize = fstatbuf.st_size; |
1506 | (-1 != (fd = file_open (filename, | 2134 | if (fsize == 0) |
1507 | O_RDONLY | O_LARGEFILE))) ) | 2135 | { |
1508 | { | 2136 | close(fd); |
1509 | fsize = (fstatbuf.st_size > 0xFFFFFFFF) ? 0xFFFFFFFF : fstatbuf.st_size; | 2137 | return; |
1510 | if (fsize == 0) | ||
1511 | { | ||
1512 | close(fd); | ||
1513 | return; | ||
1514 | } | ||
1515 | if (fsize > MAX_READ) | ||
1516 | fsize = MAX_READ; | ||
1517 | buffer = MMAP(NULL, fsize, PROT_READ, MAP_PRIVATE, fd, 0); | ||
1518 | if ( (buffer == NULL) || (buffer == (void *) -1) ) | ||
1519 | { | ||
1520 | eno = errno; | ||
1521 | close(fd); | ||
1522 | errno = eno; | ||
1523 | return; | ||
1524 | } | ||
1525 | } | 2138 | } |
1526 | if ( (buffer == NULL) && | 2139 | /* File is too big -> can't read it into memory */ |
1527 | (data == NULL) ) | 2140 | if (fsize > MAX_READ) |
2141 | memory_only = 0; | ||
2142 | } | ||
2143 | |||
2144 | /* Data is not given, and we've failed to open the file with data -> exit */ | ||
2145 | if ((fsize == 0) && (data == NULL)) | ||
2146 | return; | ||
2147 | /* fsize is now size of the data OR size of the file */ | ||
2148 | if (data != NULL) | ||
2149 | fsize = size; | ||
2150 | |||
2151 | errno = 0; | ||
2152 | /* Peek at first few bytes of the file (or of the data), and see if it's compressed. | ||
2153 | * If data is NULL, buffer is allocated by the function and holds the first few bytes | ||
2154 | * of the file, buffer_size is set too. | ||
2155 | */ | ||
2156 | compression_type = get_compression_type (data, fd, fsize, &buffer, &buffer_size); | ||
2157 | if (compression_type < 0) | ||
2158 | { | ||
2159 | /* errno is set by get_compression_type () */ | ||
2160 | if (fd != -1) | ||
2161 | close (fd); | ||
1528 | return; | 2162 | return; |
1529 | /* for footer extraction */ | 2163 | } |
1530 | tsize = 0; | 2164 | if (compression_type > 0) |
1531 | tbuffer = NULL; | 2165 | { |
1532 | if ( (data == NULL) && | 2166 | /* Don't assume that MAX_DECOMPRESS < MAX_READ */ |
1533 | (fstatbuf.st_size > fsize) && | 2167 | if ((fsize > MAX_DECOMPRESS) || (fsize > MAX_READ)) |
1534 | (fstatbuf.st_size > MAX_READ) ) | 2168 | { |
2169 | /* File or data is to big to be decompressed in-memory (the only kind of decompression we do) */ | ||
2170 | errno = EFBIG; | ||
2171 | if (fd != -1) | ||
2172 | close (fd); | ||
2173 | if (buffer != NULL) | ||
2174 | free (buffer); | ||
2175 | return; | ||
2176 | } | ||
2177 | /* Decompress data (or file contents + what we've read so far. Either way it writes a new | ||
2178 | * pointer to buffer, sets buffer_size, and frees the old buffer (if it wasn't NULL). | ||
2179 | * In case of failure it cleans up the buffer after itself. | ||
2180 | * Will also report compression-related metadata to the caller. | ||
2181 | */ | ||
2182 | decompression_result = try_to_decompress (data, fd, fsize, compression_type, &buffer, &buffer_size, proc, proc_cls); | ||
2183 | if (decompression_result != 0) | ||
2184 | { | ||
2185 | /* Buffer is taken care of already */ | ||
2186 | close (fd); | ||
2187 | errno = EILSEQ; | ||
2188 | return; | ||
2189 | } | ||
2190 | else | ||
1535 | { | 2191 | { |
1536 | pg = SYSCONF (_SC_PAGE_SIZE); | 2192 | close (fd); |
1537 | if ( (pg > 0) && | 2193 | fd = -1; |
1538 | (pg < MAX_READ) ) | ||
1539 | { | ||
1540 | offset = (1 + (fstatbuf.st_size - MAX_READ) / pg) * pg; | ||
1541 | if (offset < fstatbuf.st_size) | ||
1542 | { | ||
1543 | tsize = fstatbuf.st_size - offset; | ||
1544 | tbuffer = MMAP (NULL, tsize, PROT_READ, MAP_PRIVATE, fd, offset); | ||
1545 | if ( (tbuffer == NULL) || (tbuffer == (void *) -1) ) | ||
1546 | { | ||
1547 | tsize = 0; | ||
1548 | tbuffer = NULL; | ||
1549 | } | ||
1550 | } | ||
1551 | } | ||
1552 | } | 2194 | } |
1553 | decompress_and_extract (plugins, | 2195 | } |
1554 | buffer != NULL ? buffer : data, | 2196 | |
1555 | buffer != NULL ? fsize : size, | 2197 | /* Now we either have a non-NULL data of fsize bytes |
1556 | tbuffer, | 2198 | * OR a valid fd to read from and a small buffer of buffer_size bytes |
1557 | tsize, | 2199 | * OR an invalid fd and a big buffer of buffer_size bytes |
1558 | proc, | 2200 | * Simplify this situation a bit: |
1559 | proc_cls); | 2201 | */ |
2202 | if ((data == NULL) && (fd == -1) && (buffer_size > 0)) | ||
2203 | { | ||
2204 | data = (const void *) buffer; | ||
2205 | fsize = buffer_size; | ||
2206 | } | ||
2207 | |||
2208 | /* Now we either have a non-NULL data of fsize bytes | ||
2209 | * OR a valid fd to read from and a small buffer of buffer_size bytes | ||
2210 | * and we might need to free the buffer later in either case | ||
2211 | */ | ||
2212 | |||
2213 | /* do_extract () might set errno itself, but from our point of view everything is OK */ | ||
2214 | errno = 0; | ||
2215 | |||
2216 | do_extract (plugins, data, fd, fsize, buffer, buffer_size, proc, proc_cls); | ||
2217 | |||
1560 | if (buffer != NULL) | 2218 | if (buffer != NULL) |
1561 | MUNMAP (buffer, fsize); | 2219 | free (buffer); |
1562 | if (tbuffer != NULL) | ||
1563 | MUNMAP (tbuffer, tsize); | ||
1564 | if (-1 != fd) | 2220 | if (-1 != fd) |
1565 | close(fd); | 2221 | close(fd); |
1566 | } | 2222 | } |
1567 | 2223 | ||
2224 | |||
2225 | #if WINDOWS | ||
2226 | void CALLBACK | ||
2227 | RundllEntryPoint (HWND hwnd, | ||
2228 | HINSTANCE hinst, | ||
2229 | LPSTR lpszCmdLine, | ||
2230 | int nCmdShow) | ||
2231 | { | ||
2232 | intptr_t in_h; | ||
2233 | intptr_t out_h; | ||
2234 | int in, out; | ||
2235 | |||
2236 | sscanf(lpszCmdLine, "%lu %lu", &in_h, &out_h); | ||
2237 | in = _open_osfhandle (in_h, _O_RDONLY); | ||
2238 | out = _open_osfhandle (out_h, 0); | ||
2239 | setmode (in, _O_BINARY); | ||
2240 | setmode (out, _O_BINARY); | ||
2241 | process_requests (read_plugin_data (in), | ||
2242 | in, out); | ||
2243 | } | ||
2244 | |||
2245 | void CALLBACK | ||
2246 | RundllEntryPointA (HWND hwnd, | ||
2247 | HINSTANCE hinst, | ||
2248 | LPSTR lpszCmdLine, | ||
2249 | int nCmdShow) | ||
2250 | { | ||
2251 | return RundllEntryPoint(hwnd, hinst, lpszCmdLine, nCmdShow); | ||
2252 | } | ||
2253 | #endif | ||
2254 | |||
1568 | /** | 2255 | /** |
1569 | * Initialize gettext and libltdl (and W32 if needed). | 2256 | * Initialize gettext and libltdl (and W32 if needed). |
1570 | */ | 2257 | */ |
@@ -1579,12 +2266,12 @@ void __attribute__ ((constructor)) EXTRACTOR_ltdl_init() { | |||
1579 | if (err > 0) { | 2266 | if (err > 0) { |
1580 | #if DEBUG | 2267 | #if DEBUG |
1581 | fprintf(stderr, | 2268 | fprintf(stderr, |
1582 | _("Initialization of plugin mechanism failed: %s!\n"), | 2269 | _("Initialization of plugin mechanism failed: %s!\n"), |
1583 | lt_dlerror()); | 2270 | lt_dlerror()); |
1584 | #endif | 2271 | #endif |
1585 | return; | 2272 | return; |
1586 | } | 2273 | } |
1587 | #ifdef MINGW | 2274 | #if WINDOWS |
1588 | plibc_init("GNU", PACKAGE); | 2275 | plibc_init("GNU", PACKAGE); |
1589 | #endif | 2276 | #endif |
1590 | } | 2277 | } |
@@ -1594,12 +2281,10 @@ void __attribute__ ((constructor)) EXTRACTOR_ltdl_init() { | |||
1594 | * Deinit. | 2281 | * Deinit. |
1595 | */ | 2282 | */ |
1596 | void __attribute__ ((destructor)) EXTRACTOR_ltdl_fini() { | 2283 | void __attribute__ ((destructor)) EXTRACTOR_ltdl_fini() { |
1597 | #ifdef MINGW | 2284 | #if WINDOWS |
1598 | plibc_shutdown(); | 2285 | plibc_shutdown(); |
1599 | #endif | 2286 | #endif |
1600 | lt_dlexit (); | 2287 | lt_dlexit (); |
1601 | } | 2288 | } |
1602 | 2289 | ||
1603 | |||
1604 | |||
1605 | /* end of extractor.c */ | 2290 | /* end of extractor.c */ |
diff --git a/src/main/extractor_plugins.c b/src/main/extractor_plugins.c index 026e86b..f5c38f0 100644 --- a/src/main/extractor_plugins.c +++ b/src/main/extractor_plugins.c | |||
@@ -204,15 +204,24 @@ plugin_load (struct EXTRACTOR_PluginList *plugin) | |||
204 | plugin->flags = EXTRACTOR_OPTION_DISABLED; | 204 | plugin->flags = EXTRACTOR_OPTION_DISABLED; |
205 | return -1; | 205 | return -1; |
206 | } | 206 | } |
207 | plugin->extractMethod = get_symbol_with_prefix (plugin->libraryHandle, | 207 | plugin->extract_method = get_symbol_with_prefix (plugin->libraryHandle, |
208 | "_EXTRACTOR_%s_extract", | 208 | "_EXTRACTOR_%s_extract_method", |
209 | plugin->libname, | 209 | plugin->libname, |
210 | &plugin->specials); | 210 | &plugin->specials); |
211 | if (plugin->extractMethod == NULL) | 211 | plugin->init_state_method = get_symbol_with_prefix (plugin->libraryHandle, |
212 | "_EXTRACTOR_%s_init_state_method", | ||
213 | plugin->libname, | ||
214 | &plugin->specials); | ||
215 | plugin->discard_state_method = get_symbol_with_prefix (plugin->libraryHandle, | ||
216 | "_EXTRACTOR_%s_discard_state_method", | ||
217 | plugin->libname, | ||
218 | &plugin->specials); | ||
219 | if (plugin->extract_method == NULL || plugin->init_state_method == NULL || | ||
220 | plugin->discard_state_method == NULL) | ||
212 | { | 221 | { |
213 | #if DEBUG | 222 | #if DEBUG |
214 | fprintf (stderr, | 223 | fprintf (stderr, |
215 | "Resolving `extract' method of plugin `%s' failed: %s\n", | 224 | "Resolving `extract', 'init_state' or 'discard_state' method(s) of plugin `%s' failed: %s\n", |
216 | plugin->short_libname, | 225 | plugin->short_libname, |
217 | lt_dlerror ()); | 226 | lt_dlerror ()); |
218 | #endif | 227 | #endif |
@@ -243,8 +252,15 @@ EXTRACTOR_plugin_add (struct EXTRACTOR_PluginList * prev, | |||
243 | enum EXTRACTOR_Options flags) | 252 | enum EXTRACTOR_Options flags) |
244 | { | 253 | { |
245 | struct EXTRACTOR_PluginList *result; | 254 | struct EXTRACTOR_PluginList *result; |
255 | struct EXTRACTOR_PluginList *i; | ||
246 | char *libname; | 256 | char *libname; |
247 | 257 | ||
258 | for (i = prev; i != NULL; i = i->next) | ||
259 | { | ||
260 | if (strcmp (i->short_libname, library) == 0) | ||
261 | return prev; | ||
262 | } | ||
263 | |||
248 | libname = find_plugin (library); | 264 | libname = find_plugin (library); |
249 | if (libname == NULL) | 265 | if (libname == NULL) |
250 | { | 266 | { |
diff --git a/src/main/extractor_plugins.h b/src/main/extractor_plugins.h index ea0eabb..bea5c2b 100644 --- a/src/main/extractor_plugins.h +++ b/src/main/extractor_plugins.h | |||
@@ -64,7 +64,9 @@ struct EXTRACTOR_PluginList | |||
64 | /** | 64 | /** |
65 | * Pointer to the function used for meta data extraction. | 65 | * Pointer to the function used for meta data extraction. |
66 | */ | 66 | */ |
67 | EXTRACTOR_ExtractMethod extractMethod; | 67 | EXTRACTOR_extract_method extract_method; |
68 | EXTRACTOR_init_state_method init_state_method; | ||
69 | EXTRACTOR_discard_state_method discard_state_method; | ||
68 | 70 | ||
69 | /** | 71 | /** |
70 | * Options for the plugin. | 72 | * Options for the plugin. |
@@ -84,26 +86,72 @@ struct EXTRACTOR_PluginList | |||
84 | enum EXTRACTOR_Options flags; | 86 | enum EXTRACTOR_Options flags; |
85 | 87 | ||
86 | /** | 88 | /** |
87 | * Process ID of the child process for this plugin. 0 for | 89 | * Process ID of the child process for this plugin. 0 for none. |
88 | * none. | ||
89 | */ | 90 | */ |
90 | #ifndef WINDOWS | 91 | #if !WINDOWS |
91 | int cpid; | 92 | int cpid; |
92 | #else | 93 | #else |
93 | HANDLE hProcess; | 94 | HANDLE hProcess; |
94 | #endif | 95 | #endif |
95 | 96 | ||
96 | /** | 97 | /** |
97 | * Pipe used to send information about shared memory segments to | 98 | * Pipe used to communicate information to the plugin child process. |
98 | * the child process. NULL if not initialized. | 99 | * NULL if not initialized. |
99 | */ | 100 | */ |
101 | #if !WINDOWS | ||
100 | FILE *cpipe_in; | 102 | FILE *cpipe_in; |
103 | #else | ||
104 | HANDLE cpipe_in; | ||
105 | #endif | ||
106 | |||
107 | /** | ||
108 | * A position this plugin wants us to seek to. -1 if it's finished. | ||
109 | * Starts at 0; | ||
110 | */ | ||
111 | int64_t seek_request; | ||
112 | |||
113 | #if !WINDOWS | ||
114 | int shm_id; | ||
115 | #else | ||
116 | HANDLE map_handle; | ||
117 | #endif | ||
118 | |||
119 | void *state; | ||
120 | |||
121 | int64_t fsize; | ||
122 | |||
123 | int64_t position; | ||
124 | |||
125 | unsigned char *shm_ptr; | ||
126 | |||
127 | size_t map_size; | ||
101 | 128 | ||
102 | /** | 129 | /** |
103 | * Pipe used to read information about extracted meta data from | 130 | * Pipe used to read information about extracted meta data from |
104 | * the child process. -1 if not initialized. | 131 | * the plugin child process. -1 if not initialized. |
105 | */ | 132 | */ |
133 | #if !WINDOWS | ||
106 | int cpipe_out; | 134 | int cpipe_out; |
135 | #else | ||
136 | HANDLE cpipe_out; | ||
137 | #endif | ||
138 | |||
139 | #if WINDOWS | ||
140 | /** | ||
141 | * A structure for overlapped reads on W32. | ||
142 | */ | ||
143 | OVERLAPPED ov_read; | ||
144 | |||
145 | /** | ||
146 | * A structure for overlapped writes on W32. | ||
147 | */ | ||
148 | OVERLAPPED ov_write; | ||
149 | |||
150 | /** | ||
151 | * A write buffer for overlapped writes on W32 | ||
152 | */ | ||
153 | unsigned char *ov_write_buffer; | ||
154 | #endif | ||
107 | }; | 155 | }; |
108 | 156 | ||
109 | /** | 157 | /** |
diff --git a/src/plugins/Makefile.am b/src/plugins/Makefile.am index c489d19..465db7c 100644 --- a/src/plugins/Makefile.am +++ b/src/plugins/Makefile.am | |||
@@ -1,4 +1,4 @@ | |||
1 | INCLUDES = -I$(top_srcdir)/src/include -I$(top_srcdir)/src/common | 1 | INCLUDES = -I$(top_srcdir)/src/include -I$(top_srcdir)/src/common -I$(top_srcdir)/src/main |
2 | 2 | ||
3 | # install plugins under: | 3 | # install plugins under: |
4 | plugindir = $(libdir)/@RPLUGINDIR@ | 4 | plugindir = $(libdir)/@RPLUGINDIR@ |
@@ -11,183 +11,23 @@ PLUGINFLAGS = $(makesymbolic) $(LE_PLUGIN_LDFLAGS) | |||
11 | 11 | ||
12 | SUBDIRS = . | 12 | SUBDIRS = . |
13 | 13 | ||
14 | if HAVE_FFMPEG | ||
15 | thumbffmpeg=libextractor_thumbnailffmpeg.la | ||
16 | endif | ||
17 | |||
18 | if HAVE_LIBRPM | ||
19 | rpm=libextractor_rpm.la | ||
20 | endif | ||
21 | |||
22 | if HAVE_GLIB | ||
23 | if WITH_GSF | ||
24 | ole2=libextractor_ole2.la | ||
25 | endif | ||
26 | if HAVE_GTK | ||
27 | thumbgtk=libextractor_thumbnailgtk.la | ||
28 | endif | ||
29 | endif | ||
30 | |||
31 | if HAVE_QT | ||
32 | thumbqt=libextractor_thumbnailqt.la | ||
33 | qtflags=-lQtGui -lQtCore -lpthread | ||
34 | else | ||
35 | if HAVE_QT4 | ||
36 | thumbqt=libextractor_thumbnailqt.la | ||
37 | qtflags=-lQtGui4 -lQtCore4 | ||
38 | endif | ||
39 | endif | ||
40 | |||
41 | if HAVE_QT_SVG | ||
42 | svgflags = -lQtSvg | ||
43 | else | ||
44 | if HAVE_QT_SVG4 | ||
45 | svgflags = -lQtSvg4 | ||
46 | endif | ||
47 | endif | ||
48 | |||
49 | if HAVE_CXX | ||
50 | if HAVE_EXIV2 | ||
51 | exiv2=libextractor_exiv2.la | ||
52 | endif | ||
53 | if HAVE_POPPLER | ||
54 | pdf=libextractor_pdf.la | ||
55 | endif | ||
56 | endif | ||
57 | |||
58 | if HAVE_MPEG2 | ||
59 | mpeg = libextractor_mpeg.la | ||
60 | endif | ||
61 | |||
62 | if HAVE_VORBISFILE | ||
63 | ogg = libextractor_ogg.la | ||
64 | endif | ||
65 | |||
66 | if HAVE_FLAC | ||
67 | flac = libextractor_flac.la | ||
68 | endif | ||
69 | |||
70 | if NEED_VORBIS | ||
71 | vorbisflag = -lvorbis | ||
72 | endif | ||
73 | |||
74 | if NEED_OGG | ||
75 | flacoggflag = -logg | ||
76 | endif | ||
77 | |||
78 | plugin_LTLIBRARIES = \ | 14 | plugin_LTLIBRARIES = \ |
79 | libextractor_applefile.la \ | ||
80 | libextractor_asf.la \ | ||
81 | libextractor_deb.la \ | ||
82 | libextractor_dvi.la \ | ||
83 | libextractor_elf.la \ | ||
84 | $(exiv2) \ | ||
85 | $(flac) \ | ||
86 | libextractor_flv.la \ | ||
87 | libextractor_gif.la \ | ||
88 | libextractor_html.la \ | ||
89 | libextractor_id3.la \ | 15 | libextractor_id3.la \ |
90 | libextractor_id3v2.la \ | 16 | libextractor_id3v2.la \ |
91 | libextractor_id3v23.la \ | 17 | libextractor_mp3.la |
92 | libextractor_id3v24.la \ | ||
93 | libextractor_it.la \ | ||
94 | libextractor_jpeg.la \ | ||
95 | libextractor_man.la \ | ||
96 | libextractor_mime.la \ | ||
97 | libextractor_mkv.la \ | ||
98 | libextractor_mp3.la \ | ||
99 | $(mpeg) \ | ||
100 | libextractor_nsf.la \ | ||
101 | libextractor_nsfe.la \ | ||
102 | libextractor_odf.la \ | ||
103 | $(ogg) \ | ||
104 | $(ole2) \ | ||
105 | $(pdf) \ | ||
106 | libextractor_png.la \ | ||
107 | libextractor_ps.la \ | ||
108 | libextractor_qt.la \ | ||
109 | libextractor_real.la \ | ||
110 | libextractor_riff.la \ | ||
111 | $(rpm) \ | ||
112 | libextractor_s3m.la \ | ||
113 | libextractor_sid.la \ | ||
114 | libextractor_tar.la \ | ||
115 | $(thumbgtk) \ | ||
116 | $(thumbqt) \ | ||
117 | $(thumbffmpeg) \ | ||
118 | libextractor_tiff.la \ | ||
119 | libextractor_wav.la \ | ||
120 | libextractor_xm.la \ | ||
121 | libextractor_zip.la | ||
122 | 18 | ||
123 | libextractor_applefile_la_SOURCES = \ | 19 | libextractor_mp3_la_SOURCES = \ |
124 | applefile_extractor.c | 20 | mp3_extractor.c |
125 | libextractor_applefile_la_LDFLAGS = \ | 21 | libextractor_mp3_la_LDFLAGS = \ |
126 | $(PLUGINFLAGS) | 22 | $(PLUGINFLAGS) |
127 | libextractor_applefile_la_LIBADD = \ | 23 | libextractor_mp3_la_LIBADD = \ |
128 | $(top_builddir)/src/common/libextractor_common.la \ | ||
129 | $(LE_LIBINTL) | ||
130 | |||
131 | libextractor_asf_la_SOURCES = \ | ||
132 | asf_extractor.c | ||
133 | libextractor_asf_la_LDFLAGS = \ | ||
134 | $(top_builddir)/src/common/libextractor_common.la \ | 24 | $(top_builddir)/src/common/libextractor_common.la \ |
135 | $(PLUGINFLAGS) | ||
136 | |||
137 | libextractor_deb_la_SOURCES = \ | ||
138 | deb_extractor.c | ||
139 | libextractor_deb_la_LDFLAGS = \ | ||
140 | $(PLUGINFLAGS) | ||
141 | libextractor_deb_la_LIBADD = \ | ||
142 | -lz | ||
143 | |||
144 | libextractor_dvi_la_SOURCES = \ | ||
145 | dvi_extractor.c | ||
146 | libextractor_dvi_la_LDFLAGS = \ | ||
147 | $(PLUGINFLAGS) | ||
148 | |||
149 | libextractor_elf_la_SOURCES = \ | ||
150 | elf_extractor.c | ||
151 | libextractor_elf_la_LDFLAGS = \ | ||
152 | $(PLUGINFLAGS) | ||
153 | libextractor_elf_la_LIBADD = \ | ||
154 | $(top_builddir)/src/common/libextractor_common.la | ||
155 | |||
156 | libextractor_exiv2_la_SOURCES = \ | ||
157 | exiv2_extractor.cc | ||
158 | libextractor_exiv2_la_LDFLAGS = \ | ||
159 | $(XTRA_CPPLIBS) $(PLUGINFLAGS) | ||
160 | libextractor_exiv2_la_LIBADD = \ | ||
161 | -lexiv2 | ||
162 | |||
163 | libextractor_flac_la_SOURCES = \ | ||
164 | flac_extractor.c | ||
165 | libextractor_flac_la_LDFLAGS = \ | ||
166 | $(PLUGINFLAGS) | ||
167 | libextractor_flac_la_LIBADD = \ | ||
168 | -lFLAC $(flacoggflag) \ | ||
169 | $(LE_LIBINTL) | 25 | $(LE_LIBINTL) |
170 | 26 | ||
171 | libextractor_flv_la_SOURCES = \ | 27 | libextractor_ebml_la_SOURCES = \ |
172 | flv_extractor.c | 28 | ebml_extractor.c |
173 | libextractor_flv_la_LDFLAGS = \ | 29 | libextractor_ebml_la_LDFLAGS = \ |
174 | $(PLUGINFLAGS) | 30 | $(PLUGINFLAGS) |
175 | libextractor_flv_la_LIBADD = \ | ||
176 | $(top_builddir)/src/common/libextractor_common.la | ||
177 | |||
178 | libextractor_gif_la_SOURCES = \ | ||
179 | gif_extractor.c | ||
180 | libextractor_gif_la_LDFLAGS = \ | ||
181 | $(PLUGINFLAGS) | ||
182 | libextractor_gif_la_LIBADD = \ | ||
183 | $(top_builddir)/src/common/libextractor_common.la | ||
184 | |||
185 | libextractor_html_la_SOURCES = \ | ||
186 | html_extractor.c | ||
187 | libextractor_html_la_LDFLAGS = \ | ||
188 | $(PLUGINFLAGS) | ||
189 | libextractor_html_la_LIBADD = \ | ||
190 | $(top_builddir)/src/common/libextractor_common.la | ||
191 | 31 | ||
192 | libextractor_id3_la_SOURCES = \ | 32 | libextractor_id3_la_SOURCES = \ |
193 | id3_extractor.c | 33 | id3_extractor.c |
@@ -204,211 +44,4 @@ libextractor_id3v2_la_LDFLAGS = \ | |||
204 | libextractor_id3v2_la_LIBADD = \ | 44 | libextractor_id3v2_la_LIBADD = \ |
205 | $(top_builddir)/src/common/libextractor_common.la | 45 | $(top_builddir)/src/common/libextractor_common.la |
206 | 46 | ||
207 | libextractor_id3v23_la_SOURCES = \ | ||
208 | id3v23_extractor.c | ||
209 | libextractor_id3v23_la_LDFLAGS = \ | ||
210 | $(PLUGINFLAGS) | ||
211 | libextractor_id3v23_la_LIBADD = \ | ||
212 | $(top_builddir)/src/common/libextractor_common.la | ||
213 | |||
214 | libextractor_id3v24_la_SOURCES = \ | ||
215 | id3v24_extractor.c | ||
216 | libextractor_id3v24_la_LDFLAGS = \ | ||
217 | $(PLUGINFLAGS) | ||
218 | libextractor_id3v24_la_LIBADD = \ | ||
219 | $(top_builddir)/src/common/libextractor_common.la | ||
220 | |||
221 | libextractor_it_la_SOURCES = \ | ||
222 | it_extractor.c | ||
223 | libextractor_it_la_LDFLAGS = \ | ||
224 | $(PLUGINFLAGS) | ||
225 | |||
226 | libextractor_jpeg_la_SOURCES = \ | ||
227 | jpeg_extractor.c | ||
228 | libextractor_jpeg_la_LDFLAGS = \ | ||
229 | $(PLUGINFLAGS) | ||
230 | libextractor_jpeg_la_LIBADD = \ | ||
231 | $(LE_LIBINTL) | ||
232 | |||
233 | libextractor_man_la_SOURCES = \ | ||
234 | man_extractor.c | ||
235 | libextractor_man_la_LDFLAGS = \ | ||
236 | $(PLUGINFLAGS) | ||
237 | libextractor_man_la_LIBADD = \ | ||
238 | $(LE_LIBINTL) | ||
239 | |||
240 | libextractor_mime_la_SOURCES = \ | ||
241 | mime_extractor.c | ||
242 | libextractor_mime_la_LDFLAGS = \ | ||
243 | $(PLUGINFLAGS) | ||
244 | |||
245 | libextractor_mkv_la_SOURCES = \ | ||
246 | mkv_extractor.c | ||
247 | libextractor_mkv_la_LDFLAGS = \ | ||
248 | $(PLUGINFLAGS) | ||
249 | |||
250 | libextractor_mp3_la_SOURCES = \ | ||
251 | mp3_extractor.c | ||
252 | libextractor_mp3_la_LDFLAGS = \ | ||
253 | $(PLUGINFLAGS) | ||
254 | libextractor_mp3_la_LIBADD = \ | ||
255 | $(top_builddir)/src/common/libextractor_common.la \ | ||
256 | $(LE_LIBINTL) | ||
257 | |||
258 | libextractor_mpeg_la_SOURCES = \ | ||
259 | mpeg_extractor.c | ||
260 | libextractor_mpeg_la_LDFLAGS = \ | ||
261 | $(PLUGINFLAGS) | ||
262 | libextractor_mpeg_la_LIBADD = \ | ||
263 | -lmpeg2 | ||
264 | |||
265 | libextractor_nsf_la_SOURCES = \ | ||
266 | nsf_extractor.c | ||
267 | libextractor_nsf_la_LDFLAGS = \ | ||
268 | $(PLUGINFLAGS) | ||
269 | |||
270 | libextractor_nsfe_la_SOURCES = \ | ||
271 | nsfe_extractor.c | ||
272 | libextractor_nsfe_la_LDFLAGS = \ | ||
273 | $(PLUGINFLAGS) | ||
274 | |||
275 | libextractor_odf_la_SOURCES = \ | ||
276 | odf_extractor.c | ||
277 | libextractor_odf_la_LDFLAGS = \ | ||
278 | $(PLUGINFLAGS) | ||
279 | libextractor_odf_la_LIBADD = \ | ||
280 | $(top_builddir)/src/common/libextractor_common.la \ | ||
281 | -lz | ||
282 | |||
283 | libextractor_ogg_la_SOURCES = \ | ||
284 | ogg_extractor.c | ||
285 | libextractor_ogg_la_LDFLAGS = \ | ||
286 | $(PLUGINFLAGS) | ||
287 | libextractor_ogg_la_LIBADD = \ | ||
288 | -lvorbisfile $(vorbisflag) -logg | ||
289 | |||
290 | libextractor_ole2_la_SOURCES = \ | ||
291 | ole2_extractor.c | ||
292 | libextractor_ole2_la_CFLAGS = \ | ||
293 | $(GSF_CFLAGS) | ||
294 | libextractor_ole2_la_LIBADD = \ | ||
295 | $(LIBADD) $(GSF_LIBS) \ | ||
296 | $(top_builddir)/src/common/libextractor_common.la | ||
297 | libextractor_ole2_la_LDFLAGS = \ | ||
298 | $(PLUGINFLAGS) | ||
299 | |||
300 | libextractor_pdf_la_SOURCES = \ | ||
301 | pdf_extractor.cc | ||
302 | libextractor_pdf_la_LDFLAGS = \ | ||
303 | $(XTRA_CPPLIBS) $(PLUGINFLAGS) | ||
304 | libextractor_pdf_la_LIBADD = \ | ||
305 | $(top_builddir)/src/common/libextractor_common.la \ | ||
306 | -lpoppler | ||
307 | |||
308 | libextractor_png_la_SOURCES = \ | ||
309 | png_extractor.c | ||
310 | libextractor_png_la_LDFLAGS = \ | ||
311 | $(PLUGINFLAGS) | ||
312 | libextractor_png_la_LIBADD = \ | ||
313 | $(top_builddir)/src/common/libextractor_common.la \ | ||
314 | -lz | ||
315 | |||
316 | libextractor_ps_la_SOURCES = \ | ||
317 | ps_extractor.c | ||
318 | libextractor_ps_la_LDFLAGS = \ | ||
319 | $(PLUGINFLAGS) | ||
320 | |||
321 | libextractor_qt_la_SOURCES = \ | ||
322 | qt_extractor.c | ||
323 | libextractor_qt_la_LDFLAGS = \ | ||
324 | $(PLUGINFLAGS) | ||
325 | libextractor_qt_la_LIBADD = \ | ||
326 | -lz -lm | ||
327 | |||
328 | libextractor_real_la_SOURCES = \ | ||
329 | real_extractor.c | ||
330 | libextractor_real_la_LDFLAGS = \ | ||
331 | $(PLUGINFLAGS) | ||
332 | |||
333 | libextractor_riff_la_SOURCES = \ | ||
334 | riff_extractor.c | ||
335 | libextractor_riff_la_LDFLAGS = \ | ||
336 | $(PLUGINFLAGS) | ||
337 | libextractor_riff_la_LIBADD = \ | ||
338 | $(LE_LIBINTL) \ | ||
339 | -lm | ||
340 | |||
341 | libextractor_rpm_la_SOURCES = \ | ||
342 | rpm_extractor.c | ||
343 | libextractor_rpm_la_LDFLAGS = \ | ||
344 | $(PLUGINFLAGS) | ||
345 | libextractor_rpm_la_LIBADD = \ | ||
346 | -lrpm | ||
347 | |||
348 | libextractor_s3m_la_SOURCES = \ | ||
349 | s3m_extractor.c | ||
350 | libextractor_s3m_la_LDFLAGS = \ | ||
351 | $(PLUGINFLAGS) | ||
352 | |||
353 | libextractor_sid_la_SOURCES = \ | ||
354 | sid_extractor.c | ||
355 | libextractor_sid_la_LDFLAGS = \ | ||
356 | $(PLUGINFLAGS) | ||
357 | |||
358 | libextractor_tar_la_SOURCES = \ | ||
359 | tar_extractor.c | ||
360 | libextractor_tar_la_LDFLAGS = \ | ||
361 | $(PLUGINFLAGS) | ||
362 | |||
363 | libextractor_thumbnailffmpeg_la_SOURCES = \ | ||
364 | thumbnailffmpeg_extractor.c | ||
365 | libextractor_thumbnailffmpeg_la_LIBADD = \ | ||
366 | -lavformat -lavcodec -lswscale -lavutil -lz -lbz2 | ||
367 | libextractor_thumbnailffmpeg_la_LDFLAGS = \ | ||
368 | $(PLUGINFLAGS) | ||
369 | |||
370 | libextractor_thumbnailgtk_la_CFLAGS = \ | ||
371 | $(GLIB_CFLAGS) $(GTK_CFLAGS) | ||
372 | libextractor_thumbnailgtk_la_LIBADD = \ | ||
373 | $(LIBADD) -lgobject-2.0 @GTK_LIBS@ | ||
374 | libextractor_thumbnailgtk_la_LDFLAGS = \ | ||
375 | $(PLUGINFLAGS) | ||
376 | libextractor_thumbnailgtk_la_SOURCES = \ | ||
377 | thumbnailgtk_extractor.c | ||
378 | |||
379 | libextractor_thumbnailqt_la_SOURCES = \ | ||
380 | thumbnailqt_extractor.cc | ||
381 | libextractor_thumbnailqt_la_LDFLAGS = \ | ||
382 | $(QT_LDFLAGS) \ | ||
383 | $(PLUGINFLAGS) | ||
384 | libextractor_thumbnailqt_la_LIBADD = \ | ||
385 | $(qtflags) $(svgflags) | ||
386 | libextractor_thumbnailqt_la_CPPFLAGS = \ | ||
387 | $(QT_CPPFLAGS) \ | ||
388 | $(QT_CFLAGS) $(QT_SVG_CFLAGS) | ||
389 | |||
390 | libextractor_tiff_la_SOURCES = \ | ||
391 | tiff_extractor.c | ||
392 | libextractor_tiff_la_LDFLAGS = \ | ||
393 | $(PLUGINFLAGS) | ||
394 | libextractor_tiff_la_LIBADD = \ | ||
395 | $(top_builddir)/src/common/libextractor_common.la | ||
396 | |||
397 | libextractor_wav_la_SOURCES = \ | ||
398 | wav_extractor.c | ||
399 | libextractor_wav_la_LDFLAGS = \ | ||
400 | $(PLUGINFLAGS) | ||
401 | libextractor_wav_la_LIBADD = \ | ||
402 | $(LE_LIBINTL) | ||
403 | |||
404 | libextractor_xm_la_SOURCES = \ | ||
405 | xm_extractor.c | ||
406 | libextractor_xm_la_LDFLAGS = \ | ||
407 | $(PLUGINFLAGS) | ||
408 | |||
409 | libextractor_zip_la_SOURCES = \ | ||
410 | zip_extractor.c | ||
411 | libextractor_zip_la_LDFLAGS = \ | ||
412 | $(PLUGINFLAGS) | ||
413 | |||
414 | EXTRA_DIST = template_extractor.c | 47 | EXTRA_DIST = template_extractor.c |
diff --git a/src/plugins/id3_extractor.c b/src/plugins/id3_extractor.c index 64d341c..39bd779 100644 --- a/src/plugins/id3_extractor.c +++ b/src/plugins/id3_extractor.c | |||
@@ -29,6 +29,8 @@ | |||
29 | #include <unistd.h> | 29 | #include <unistd.h> |
30 | #include <stdlib.h> | 30 | #include <stdlib.h> |
31 | 31 | ||
32 | #include "extractor_plugins.h" | ||
33 | |||
32 | typedef struct | 34 | typedef struct |
33 | { | 35 | { |
34 | char *title; | 36 | char *title; |
@@ -199,6 +201,46 @@ static const char *const genre_names[] = { | |||
199 | #define OK 0 | 201 | #define OK 0 |
200 | #define INVALID_ID3 1 | 202 | #define INVALID_ID3 1 |
201 | 203 | ||
204 | struct id3_state | ||
205 | { | ||
206 | int state; | ||
207 | id3tag info; | ||
208 | }; | ||
209 | |||
210 | enum ID3State | ||
211 | { | ||
212 | ID3_INVALID = -1, | ||
213 | ID3_SEEKING_TO_TAIL = 0, | ||
214 | ID3_READING_TAIL = 1 | ||
215 | }; | ||
216 | |||
217 | void | ||
218 | EXTRACTOR_id3_init_state_method (struct EXTRACTOR_PluginList *plugin) | ||
219 | { | ||
220 | struct id3_state *state; | ||
221 | state = plugin->state = malloc (sizeof (struct id3_state)); | ||
222 | if (state == NULL) | ||
223 | return; | ||
224 | memset (state, 0, sizeof (struct id3_state)); | ||
225 | state->state = ID3_SEEKING_TO_TAIL; | ||
226 | } | ||
227 | |||
228 | void | ||
229 | EXTRACTOR_id3_discard_state_method (struct EXTRACTOR_PluginList *plugin) | ||
230 | { | ||
231 | struct id3_state *state = plugin->state; | ||
232 | if (state != NULL) | ||
233 | { | ||
234 | if (state->info.title != NULL) free (state->info.title); | ||
235 | if (state->info.year != NULL) free (state->info.year); | ||
236 | if (state->info.album != NULL) free (state->info.album); | ||
237 | if (state->info.artist != NULL) free (state->info.artist); | ||
238 | if (state->info.comment != NULL) free (state->info.comment); | ||
239 | free (state); | ||
240 | } | ||
241 | plugin->state = NULL; | ||
242 | } | ||
243 | |||
202 | static void | 244 | static void |
203 | trim (char *k) | 245 | trim (char *k) |
204 | { | 246 | { |
@@ -209,14 +251,14 @@ trim (char *k) | |||
209 | } | 251 | } |
210 | 252 | ||
211 | static int | 253 | static int |
212 | get_id3 (const char *data, size_t size, id3tag * id3) | 254 | get_id3 (const char *data, int64_t offset, int64_t size, id3tag *id3) |
213 | { | 255 | { |
214 | const char *pos; | 256 | const char *pos; |
215 | 257 | ||
216 | if (size < 128) | 258 | if (size < 128) |
217 | return INVALID_ID3; | 259 | return INVALID_ID3; |
218 | 260 | ||
219 | pos = &data[size - 128]; | 261 | pos = &data[offset]; |
220 | if (0 != strncmp ("TAG", pos, 3)) | 262 | if (0 != strncmp ("TAG", pos, 3)) |
221 | return INVALID_ID3; | 263 | return INVALID_ID3; |
222 | pos += 3; | 264 | pos += 3; |
@@ -253,49 +295,82 @@ get_id3 (const char *data, size_t size, id3tag * id3) | |||
253 | } | 295 | } |
254 | 296 | ||
255 | 297 | ||
256 | #define ADD(s,t) do { if ( (s != NULL) && (strlen(s) > 0) && (0 != (ret = proc (proc_cls, "id3", t, EXTRACTOR_METAFORMAT_UTF8, "text/plain", s, strlen(s)+1)))) goto FINISH; } while (0) | 298 | #define ADD(s,t) do { if ( (s != NULL) && (strlen(s) > 0) && (0 != proc (proc_cls, "id3", t, EXTRACTOR_METAFORMAT_UTF8, "text/plain", s, strlen(s)+1))) return 1; } while (0) |
257 | 299 | ||
258 | 300 | ||
259 | const char * | 301 | int |
260 | EXTRACTOR_id3_options () | 302 | EXTRACTOR_id3_extract_method (struct EXTRACTOR_PluginList *plugin, |
303 | EXTRACTOR_MetaDataProcessor proc, void *proc_cls) | ||
261 | { | 304 | { |
262 | return "want-tail"; | 305 | int64_t file_position; |
263 | } | 306 | int64_t file_size; |
307 | int64_t offset = 0; | ||
308 | int64_t size; | ||
309 | struct id3_state *state; | ||
310 | char *data; | ||
311 | |||
312 | char track[16]; | ||
264 | 313 | ||
314 | if (plugin == NULL || plugin->state == NULL) | ||
315 | return 1; | ||
265 | 316 | ||
266 | int | 317 | state = plugin->state; |
267 | EXTRACTOR_id3_extract (const char *data, | 318 | file_position = plugin->position; |
268 | size_t size, | 319 | file_size = plugin->fsize; |
269 | EXTRACTOR_MetaDataProcessor proc, | 320 | size = plugin->map_size; |
270 | void *proc_cls, | 321 | data = (char *) plugin->shm_ptr; |
271 | const char *options) | 322 | |
272 | { | 323 | if (plugin->seek_request < 0) |
273 | id3tag info; | 324 | return 1; |
274 | char track[16]; | 325 | if (file_position - plugin->seek_request > 0) |
275 | int ret; | 326 | { |
327 | plugin->seek_request = -1; | ||
328 | return 1; | ||
329 | } | ||
330 | if (plugin->seek_request - file_position < size) | ||
331 | offset = plugin->seek_request - file_position; | ||
276 | 332 | ||
277 | ret = 0; | 333 | while (1) |
278 | if (OK != get_id3 (data, size, &info)) | 334 | { |
279 | return 0; | 335 | switch (state->state) |
280 | ADD (info.title, EXTRACTOR_METATYPE_TITLE); | ||
281 | ADD (info.artist, EXTRACTOR_METATYPE_ARTIST); | ||
282 | ADD (info.album, EXTRACTOR_METATYPE_ALBUM); | ||
283 | ADD (info.year, EXTRACTOR_METATYPE_PUBLICATION_YEAR); | ||
284 | ADD (info.genre, EXTRACTOR_METATYPE_GENRE); | ||
285 | ADD (info.comment, EXTRACTOR_METATYPE_COMMENT); | ||
286 | if (info.track_number != 0) | ||
287 | { | 336 | { |
288 | snprintf(track, | 337 | case ID3_INVALID: |
289 | sizeof(track), "%u", info.track_number); | 338 | plugin->seek_request = -1; |
290 | ADD (track, EXTRACTOR_METATYPE_TRACK_NUMBER); | 339 | return 1; |
340 | case ID3_SEEKING_TO_TAIL: | ||
341 | offset = file_size - 128 - file_position; | ||
342 | if (offset > size) | ||
343 | { | ||
344 | state->state = ID3_READING_TAIL; | ||
345 | plugin->seek_request = file_position + offset; | ||
346 | return 0; | ||
347 | } | ||
348 | else if (offset < 0) | ||
349 | { | ||
350 | state->state = ID3_INVALID; | ||
351 | break; | ||
352 | } | ||
353 | state->state = ID3_READING_TAIL; | ||
354 | break; | ||
355 | case ID3_READING_TAIL: | ||
356 | if (OK != get_id3 (data, offset, size - offset, &state->info)) | ||
357 | return 1; | ||
358 | ADD (state->info.title, EXTRACTOR_METATYPE_TITLE); | ||
359 | ADD (state->info.artist, EXTRACTOR_METATYPE_ARTIST); | ||
360 | ADD (state->info.album, EXTRACTOR_METATYPE_ALBUM); | ||
361 | ADD (state->info.year, EXTRACTOR_METATYPE_PUBLICATION_YEAR); | ||
362 | ADD (state->info.genre, EXTRACTOR_METATYPE_GENRE); | ||
363 | ADD (state->info.comment, EXTRACTOR_METATYPE_COMMENT); | ||
364 | if (state->info.track_number != 0) | ||
365 | { | ||
366 | snprintf(track, | ||
367 | sizeof(track), "%u", state->info.track_number); | ||
368 | ADD (track, EXTRACTOR_METATYPE_TRACK_NUMBER); | ||
369 | } | ||
370 | state->state = ID3_INVALID; | ||
291 | } | 371 | } |
292 | FINISH: | 372 | } |
293 | if (info.title != NULL) free (info.title); | 373 | return 1; |
294 | if (info.year != NULL) free (info.year); | ||
295 | if (info.album != NULL) free (info.album); | ||
296 | if (info.artist != NULL) free (info.artist); | ||
297 | if (info.comment != NULL) free (info.comment); | ||
298 | return ret; | ||
299 | } | 374 | } |
300 | 375 | ||
301 | /* end of id3_extractor.c */ | 376 | /* end of id3_extractor.c */ |
diff --git a/src/plugins/id3v23_extractor.c b/src/plugins/id3v23_extractor.c deleted file mode 100644 index c31d63d..0000000 --- a/src/plugins/id3v23_extractor.c +++ /dev/null | |||
@@ -1,420 +0,0 @@ | |||
1 | /* | ||
2 | This file is part of libextractor. | ||
3 | (C) 2002, 2003, 2004, 2006, 2007, 2009 Vidyut Samanta and Christian Grothoff | ||
4 | |||
5 | libextractor is free software; you can redistribute it and/or modify | ||
6 | it under the terms of the GNU General Public License as published | ||
7 | by the Free Software Foundation; either version 2, or (at your | ||
8 | option) any later version. | ||
9 | |||
10 | libextractor is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU General Public License | ||
16 | along with libextractor; see the file COPYING. If not, write to the | ||
17 | Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
18 | Boston, MA 02111-1307, USA. | ||
19 | |||
20 | */ | ||
21 | #define DEBUG_EXTRACT_ID3v23 0 | ||
22 | |||
23 | #include "platform.h" | ||
24 | #include "extractor.h" | ||
25 | #include <string.h> | ||
26 | #include <stdio.h> | ||
27 | #include <sys/types.h> | ||
28 | #include <sys/stat.h> | ||
29 | #include <unistd.h> | ||
30 | #include <stdlib.h> | ||
31 | #include <fcntl.h> | ||
32 | #ifndef MINGW | ||
33 | #include <sys/mman.h> | ||
34 | #endif | ||
35 | |||
36 | #include "convert.h" | ||
37 | |||
38 | enum Id3v23Fmt | ||
39 | { | ||
40 | T, /* simple, 0-terminated string, prefixed by encoding */ | ||
41 | U, /* 0-terminated ASCII string, no encoding */ | ||
42 | UL, /* unsync'ed lyrics */ | ||
43 | SL, /* sync'ed lyrics */ | ||
44 | L, /* string with language prefix */ | ||
45 | I /* image */ | ||
46 | }; | ||
47 | |||
48 | typedef struct | ||
49 | { | ||
50 | const char *text; | ||
51 | enum EXTRACTOR_MetaType type; | ||
52 | enum Id3v23Fmt fmt; | ||
53 | } Matches; | ||
54 | |||
55 | static Matches tmap[] = { | ||
56 | {"TALB", EXTRACTOR_METATYPE_ALBUM, T}, | ||
57 | {"TBPM", EXTRACTOR_METATYPE_BEATS_PER_MINUTE, T}, | ||
58 | {"TCOM", EXTRACTOR_METATYPE_COMPOSER, T}, | ||
59 | {"TCON", EXTRACTOR_METATYPE_SONG_VERSION, T}, | ||
60 | {"TCOP", EXTRACTOR_METATYPE_COPYRIGHT, T}, | ||
61 | /* {"TDAT", EXTRACTOR_METATYPE_CREATION_DATE, T}, */ | ||
62 | /* TDLY */ | ||
63 | {"TENC", EXTRACTOR_METATYPE_ENCODED_BY, T}, | ||
64 | {"TEXT", EXTRACTOR_METATYPE_WRITER, T}, | ||
65 | {"TFLT", EXTRACTOR_METATYPE_FORMAT_VERSION, T}, | ||
66 | /* TIME */ | ||
67 | {"TIT1", EXTRACTOR_METATYPE_SECTION, T}, | ||
68 | {"TIT2", EXTRACTOR_METATYPE_TITLE, T}, | ||
69 | {"TIT3", EXTRACTOR_METATYPE_SONG_VERSION, T}, | ||
70 | /* TKEY */ | ||
71 | {"TLAN", EXTRACTOR_METATYPE_LANGUAGE, T}, | ||
72 | {"TLEN", EXTRACTOR_METATYPE_DURATION, T}, /* FIXME: should append 'ms' as unit */ | ||
73 | {"TMED", EXTRACTOR_METATYPE_SOURCE, T}, | ||
74 | {"TOAL", EXTRACTOR_METATYPE_ORIGINAL_TITLE, T}, | ||
75 | {"TOFN", EXTRACTOR_METATYPE_ORIGINAL_ARTIST, T}, | ||
76 | {"TOLY", EXTRACTOR_METATYPE_ORIGINAL_WRITER, T}, | ||
77 | {"TOPE", EXTRACTOR_METATYPE_ORIGINAL_PERFORMER, T}, | ||
78 | {"TORY", EXTRACTOR_METATYPE_ORIGINAL_RELEASE_YEAR, T}, | ||
79 | {"TOWN", EXTRACTOR_METATYPE_LICENSEE, T}, | ||
80 | {"TPE1", EXTRACTOR_METATYPE_ARTIST, T}, | ||
81 | {"TPE2", EXTRACTOR_METATYPE_PERFORMER, T}, | ||
82 | {"TPE3", EXTRACTOR_METATYPE_CONDUCTOR, T}, | ||
83 | {"TPE4", EXTRACTOR_METATYPE_INTERPRETATION, T}, | ||
84 | {"TPOS", EXTRACTOR_METATYPE_DISC_NUMBER, T}, | ||
85 | {"TPUB", EXTRACTOR_METATYPE_PUBLISHER, T}, | ||
86 | {"TRCK", EXTRACTOR_METATYPE_TRACK_NUMBER, T}, | ||
87 | /* TRDA */ | ||
88 | {"TRSN", EXTRACTOR_METATYPE_NETWORK_NAME, T}, | ||
89 | /* TRSO */ | ||
90 | {"TSIZ", EXTRACTOR_METATYPE_EMBEDDED_FILE_SIZE, T}, | ||
91 | {"TSRC", EXTRACTOR_METATYPE_ISRC, T}, | ||
92 | /* TSSE */ | ||
93 | {"TYER", EXTRACTOR_METATYPE_PUBLICATION_YEAR, T}, | ||
94 | {"WCOM", EXTRACTOR_METATYPE_URL, U}, | ||
95 | {"WCOP", EXTRACTOR_METATYPE_URL, U}, | ||
96 | {"WOAF", EXTRACTOR_METATYPE_URL, U}, | ||
97 | {"WOAS", EXTRACTOR_METATYPE_URL, U}, | ||
98 | {"WORS", EXTRACTOR_METATYPE_URL, U}, | ||
99 | {"WPAY", EXTRACTOR_METATYPE_URL, U}, | ||
100 | {"WPUB", EXTRACTOR_METATYPE_URL, U}, | ||
101 | {"WXXX", EXTRACTOR_METATYPE_URL, T}, | ||
102 | {"IPLS", EXTRACTOR_METATYPE_CONTRIBUTOR_NAME, T}, | ||
103 | /* ... */ | ||
104 | {"USLT", EXTRACTOR_METATYPE_LYRICS, UL }, | ||
105 | {"SYLT", EXTRACTOR_METATYPE_LYRICS, SL }, | ||
106 | {"COMM", EXTRACTOR_METATYPE_COMMENT, L}, | ||
107 | /* ... */ | ||
108 | {"APIC", EXTRACTOR_METATYPE_PICTURE, I}, | ||
109 | /* ... */ | ||
110 | {"LINK", EXTRACTOR_METATYPE_URL, U}, | ||
111 | /* ... */ | ||
112 | {"USER", EXTRACTOR_METATYPE_LICENSE, T}, | ||
113 | /* ... */ | ||
114 | {NULL, 0, T} | ||
115 | }; | ||
116 | |||
117 | |||
118 | /* mimetype = audio/mpeg */ | ||
119 | int | ||
120 | EXTRACTOR_id3v23_extract (const unsigned char *data, | ||
121 | size_t size, | ||
122 | EXTRACTOR_MetaDataProcessor proc, | ||
123 | void *proc_cls, | ||
124 | const char *options) | ||
125 | { | ||
126 | int unsync; | ||
127 | int extendedHdr; | ||
128 | int experimental; | ||
129 | uint32_t tsize; | ||
130 | uint32_t pos; | ||
131 | uint32_t ehdrSize; | ||
132 | uint32_t padding; | ||
133 | uint32_t csize; | ||
134 | int i; | ||
135 | uint16_t flags; | ||
136 | char *mime; | ||
137 | enum EXTRACTOR_MetaType type; | ||
138 | size_t off; | ||
139 | int obo; | ||
140 | |||
141 | if ((size < 16) || | ||
142 | (data[0] != 0x49) || | ||
143 | (data[1] != 0x44) || | ||
144 | (data[2] != 0x33) || (data[3] != 0x03) || (data[4] != 0x00)) | ||
145 | return 0; | ||
146 | unsync = (data[5] & 0x80) > 0; | ||
147 | if (unsync) | ||
148 | return 0; /* not supported */ | ||
149 | extendedHdr = (data[5] & 0x40) > 0; | ||
150 | experimental = (data[5] & 0x20) > 0; | ||
151 | if (experimental) | ||
152 | return 0; | ||
153 | tsize = (((data[6] & 0x7F) << 21) | | ||
154 | ((data[7] & 0x7F) << 14) | | ||
155 | ((data[8] & 0x7F) << 7) | ((data[9] & 0x7F) << 0)); | ||
156 | if (tsize + 10 > size) | ||
157 | return 0; | ||
158 | pos = 10; | ||
159 | padding = 0; | ||
160 | if (extendedHdr) | ||
161 | { | ||
162 | ehdrSize = (((data[10]) << 24) | | ||
163 | ((data[11]) << 16) | ((data[12]) << 8) | ((data[12]) << 0)); | ||
164 | |||
165 | padding = (((data[15]) << 24) | | ||
166 | ((data[16]) << 16) | ((data[17]) << 8) | ((data[18]) << 0)); | ||
167 | pos += 4 + ehdrSize; | ||
168 | if (padding < tsize) | ||
169 | tsize -= padding; | ||
170 | else | ||
171 | return 0; | ||
172 | } | ||
173 | |||
174 | |||
175 | while (pos < tsize) | ||
176 | { | ||
177 | if (pos + 10 > tsize) | ||
178 | return 0; | ||
179 | csize = | ||
180 | (data[pos + 4] << 24) + (data[pos + 5] << 16) + (data[pos + 6] << 8) + | ||
181 | data[pos + 7]; | ||
182 | if ((pos + 10 + csize > tsize) || (csize > tsize) || (csize == 0) || | ||
183 | (pos + 10 + csize <= pos + 10) || (pos + 10 <= pos)) | ||
184 | break; | ||
185 | flags = (data[pos + 8] << 8) + data[pos + 9]; | ||
186 | if (((flags & 0x80) > 0) /* compressed, not yet supported */ || | ||
187 | ((flags & 0x40) > 0) /* encrypted, not supported */ ) | ||
188 | { | ||
189 | pos += 10 + csize; | ||
190 | continue; | ||
191 | } | ||
192 | i = 0; | ||
193 | while (tmap[i].text != NULL) | ||
194 | { | ||
195 | if (0 == strncmp (tmap[i].text, (const char *) &data[pos], 4)) | ||
196 | { | ||
197 | char *word; | ||
198 | if ((flags & 0x20) > 0) | ||
199 | { | ||
200 | /* "group" identifier, skip a byte */ | ||
201 | pos++; | ||
202 | csize--; | ||
203 | } | ||
204 | switch (tmap[i].fmt) | ||
205 | { | ||
206 | case T: | ||
207 | /* this byte describes the encoding | ||
208 | try to convert strings to UTF-8 | ||
209 | if it fails, then forget it */ | ||
210 | switch (data[pos + 10]) | ||
211 | { | ||
212 | case 0x00: | ||
213 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 11], | ||
214 | csize - 1, "ISO-8859-1"); | ||
215 | break; | ||
216 | case 0x01: | ||
217 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 11], | ||
218 | csize - 1, "UCS-2"); | ||
219 | break; | ||
220 | default: | ||
221 | /* bad encoding byte, | ||
222 | try to convert from iso-8859-1 */ | ||
223 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 11], | ||
224 | csize - 1, "ISO-8859-1"); | ||
225 | break; | ||
226 | } | ||
227 | break; | ||
228 | case U: | ||
229 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 10], | ||
230 | csize, "ISO-8859-1"); | ||
231 | break; | ||
232 | case UL: | ||
233 | if (csize < 6) | ||
234 | return 0; /* malformed */ | ||
235 | /* find end of description */ | ||
236 | off = 14; | ||
237 | while ( (off < size) && | ||
238 | (off - pos < csize) && | ||
239 | (data[pos + off] == '\0') ) | ||
240 | off++; | ||
241 | if ( (off >= csize) || | ||
242 | (data[pos+off] != '\0') ) | ||
243 | return 0; /* malformed */ | ||
244 | off++; | ||
245 | switch (data[pos + 10]) | ||
246 | { | ||
247 | case 0x00: | ||
248 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + off], | ||
249 | csize - off, "ISO-8859-1"); | ||
250 | break; | ||
251 | case 0x01: | ||
252 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + off], | ||
253 | csize - off, "UCS-2"); | ||
254 | break; | ||
255 | default: | ||
256 | /* bad encoding byte, | ||
257 | try to convert from iso-8859-1 */ | ||
258 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + off], | ||
259 | csize - off, "ISO-8859-1"); | ||
260 | break; | ||
261 | } | ||
262 | break; | ||
263 | case SL: | ||
264 | if (csize < 7) | ||
265 | return 0; /* malformed */ | ||
266 | /* find end of description */ | ||
267 | switch (data[pos + 10]) | ||
268 | { | ||
269 | case 0x00: | ||
270 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 16], | ||
271 | csize - 6, "ISO-8859-1"); | ||
272 | break; | ||
273 | case 0x01: | ||
274 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 16], | ||
275 | csize - 6, "UCS-2"); | ||
276 | break; | ||
277 | default: | ||
278 | /* bad encoding byte, | ||
279 | try to convert from iso-8859-1 */ | ||
280 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 16], | ||
281 | csize - 6, "ISO-8859-1"); | ||
282 | break; | ||
283 | } | ||
284 | break; | ||
285 | case L: | ||
286 | if (csize < 5) | ||
287 | return 0; /* malformed */ | ||
288 | /* find end of description */ | ||
289 | obo = data[pos + 14] == '\0' ? 1 : 0; /* someone put a \0 in front of comments... */ | ||
290 | if (csize < 6) | ||
291 | obo = 0; | ||
292 | switch (data[pos + 10]) | ||
293 | { | ||
294 | case 0x00: | ||
295 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 14 + obo], | ||
296 | csize - 4 - obo, "ISO-8859-1"); | ||
297 | break; | ||
298 | case 0x01: | ||
299 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 14 + obo], | ||
300 | csize - 4 - obo, "UCS-2"); | ||
301 | break; | ||
302 | default: | ||
303 | /* bad encoding byte, | ||
304 | try to convert from iso-8859-1 */ | ||
305 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 14 + obo], | ||
306 | csize - 4 - obo, "ISO-8859-1"); | ||
307 | break; | ||
308 | } | ||
309 | break; | ||
310 | case I: | ||
311 | if (csize < 2) | ||
312 | return 0; /* malformed */ | ||
313 | /* find end of mime type */ | ||
314 | off = 11; | ||
315 | while ( (off < size) && | ||
316 | (off - pos < csize) && | ||
317 | (data[pos + off] == '\0') ) | ||
318 | off++; | ||
319 | if ( (off >= csize) || | ||
320 | (data[pos+off] != '\0') ) | ||
321 | return 0; /* malformed */ | ||
322 | off++; | ||
323 | mime = strdup ((const char*) &data[pos + 11]); | ||
324 | |||
325 | switch (data[pos+off]) | ||
326 | { | ||
327 | case 0x03: | ||
328 | case 0x04: | ||
329 | type = EXTRACTOR_METATYPE_COVER_PICTURE; | ||
330 | break; | ||
331 | case 0x07: | ||
332 | case 0x08: | ||
333 | case 0x09: | ||
334 | case 0x0A: | ||
335 | case 0x0B: | ||
336 | case 0x0C: | ||
337 | type = EXTRACTOR_METATYPE_CONTRIBUTOR_PICTURE; | ||
338 | break; | ||
339 | case 0x0D: | ||
340 | case 0x0E: | ||
341 | case 0x0F: | ||
342 | type = EXTRACTOR_METATYPE_EVENT_PICTURE; | ||
343 | break; | ||
344 | case 0x14: | ||
345 | type = EXTRACTOR_METATYPE_LOGO; | ||
346 | type = EXTRACTOR_METATYPE_LOGO; | ||
347 | break; | ||
348 | default: | ||
349 | type = EXTRACTOR_METATYPE_PICTURE; | ||
350 | break; | ||
351 | } | ||
352 | off++; | ||
353 | |||
354 | /* find end of description */ | ||
355 | while ( (off < size) && | ||
356 | (off - pos < csize) && | ||
357 | (data[pos + off] == '\0') ) | ||
358 | off++; | ||
359 | if ( (off >= csize) || | ||
360 | (data[pos+off] != '\0') ) | ||
361 | { | ||
362 | if (mime != NULL) | ||
363 | free (mime); | ||
364 | return 0; /* malformed */ | ||
365 | } | ||
366 | off++; | ||
367 | if ( (mime != NULL) && | ||
368 | (0 == strcasecmp ("-->", | ||
369 | mime)) ) | ||
370 | { | ||
371 | /* not supported */ | ||
372 | } | ||
373 | else | ||
374 | { | ||
375 | if (0 != proc (proc_cls, | ||
376 | "id3v23", | ||
377 | type, | ||
378 | EXTRACTOR_METAFORMAT_BINARY, | ||
379 | mime, | ||
380 | (const char*) &data[pos + off], | ||
381 | csize + 6 - off)) | ||
382 | { | ||
383 | if (mime != NULL) | ||
384 | free (mime); | ||
385 | return 1; | ||
386 | } | ||
387 | } | ||
388 | if (mime != NULL) | ||
389 | free (mime); | ||
390 | word = NULL; | ||
391 | break; | ||
392 | default: | ||
393 | return 0; | ||
394 | } | ||
395 | if ((word != NULL) && (strlen (word) > 0)) | ||
396 | { | ||
397 | if (0 != proc (proc_cls, | ||
398 | "id3v23", | ||
399 | tmap[i].type, | ||
400 | EXTRACTOR_METAFORMAT_UTF8, | ||
401 | "text/plain", | ||
402 | word, | ||
403 | strlen(word)+1)) | ||
404 | { | ||
405 | free (word); | ||
406 | return 1; | ||
407 | } | ||
408 | } | ||
409 | if (word != NULL) | ||
410 | free (word); | ||
411 | break; | ||
412 | } | ||
413 | i++; | ||
414 | } | ||
415 | pos += 10 + csize; | ||
416 | } | ||
417 | return 0; | ||
418 | } | ||
419 | |||
420 | /* end of id3v23_extractor.c */ | ||
diff --git a/src/plugins/id3v24_extractor.c b/src/plugins/id3v24_extractor.c deleted file mode 100644 index 301020c..0000000 --- a/src/plugins/id3v24_extractor.c +++ /dev/null | |||
@@ -1,455 +0,0 @@ | |||
1 | /* | ||
2 | This file is part of libextractor. | ||
3 | (C) 2002, 2003, 2004, 2006, 2007, 2009 Vidyut Samanta and Christian Grothoff | ||
4 | |||
5 | libextractor is free software; you can redistribute it and/or modify | ||
6 | it under the terms of the GNU General Public License as published | ||
7 | by the Free Software Foundation; either version 2, or (at your | ||
8 | option) any later version. | ||
9 | |||
10 | libextractor is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU General Public License | ||
16 | along with libextractor; see the file COPYING. If not, write to the | ||
17 | Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
18 | Boston, MA 02111-1307, USA. | ||
19 | |||
20 | */ | ||
21 | #define DEBUG_EXTRACT_ID3v24 0 | ||
22 | |||
23 | #include "platform.h" | ||
24 | #include "extractor.h" | ||
25 | #include <string.h> | ||
26 | #include <stdio.h> | ||
27 | #include <sys/types.h> | ||
28 | #include <sys/stat.h> | ||
29 | #include <unistd.h> | ||
30 | #include <stdlib.h> | ||
31 | #include <fcntl.h> | ||
32 | #ifndef MINGW | ||
33 | #include <sys/mman.h> | ||
34 | #endif | ||
35 | |||
36 | #include "convert.h" | ||
37 | |||
38 | enum Id3v24Fmt | ||
39 | { | ||
40 | T, /* simple, 0-terminated string, prefixed by encoding */ | ||
41 | U, /* 0-terminated ASCII string, no encoding */ | ||
42 | UL, /* unsync'ed lyrics */ | ||
43 | SL, /* sync'ed lyrics */ | ||
44 | L, /* string with language prefix */ | ||
45 | I /* image */ | ||
46 | }; | ||
47 | |||
48 | typedef struct | ||
49 | { | ||
50 | const char *text; | ||
51 | enum EXTRACTOR_MetaType type; | ||
52 | enum Id3v24Fmt fmt; | ||
53 | } Matches; | ||
54 | |||
55 | static Matches tmap[] = { | ||
56 | {"TALB", EXTRACTOR_METATYPE_ALBUM, T}, | ||
57 | {"TBPM", EXTRACTOR_METATYPE_BEATS_PER_MINUTE, T}, | ||
58 | {"TCOM", EXTRACTOR_METATYPE_COMPOSER, T}, | ||
59 | {"TCON", EXTRACTOR_METATYPE_SONG_VERSION, T}, | ||
60 | {"TCOP", EXTRACTOR_METATYPE_COPYRIGHT, T}, | ||
61 | /* {"TDAT", EXTRACTOR_METATYPE_CREATION_DATE, T}, deprecated in 24 */ | ||
62 | /* TDLY */ | ||
63 | {"TENC", EXTRACTOR_METATYPE_ENCODED_BY, T}, | ||
64 | {"TEXT", EXTRACTOR_METATYPE_WRITER, T}, | ||
65 | {"TFLT", EXTRACTOR_METATYPE_FORMAT_VERSION, T}, | ||
66 | /* TIME, deprecated in 24 */ | ||
67 | {"TIT1", EXTRACTOR_METATYPE_SECTION, T}, | ||
68 | {"TIT2", EXTRACTOR_METATYPE_TITLE, T}, | ||
69 | {"TIT3", EXTRACTOR_METATYPE_SONG_VERSION, T}, | ||
70 | /* TKEY */ | ||
71 | {"TLAN", EXTRACTOR_METATYPE_LANGUAGE, T}, | ||
72 | {"TLEN", EXTRACTOR_METATYPE_DURATION, T}, /* FIXME: should append 'ms' as unit */ | ||
73 | {"TMED", EXTRACTOR_METATYPE_SOURCE, T}, | ||
74 | {"TOAL", EXTRACTOR_METATYPE_ORIGINAL_TITLE, T}, | ||
75 | {"TOFN", EXTRACTOR_METATYPE_ORIGINAL_ARTIST, T}, | ||
76 | {"TOLY", EXTRACTOR_METATYPE_ORIGINAL_WRITER, T}, | ||
77 | {"TOPE", EXTRACTOR_METATYPE_ORIGINAL_PERFORMER, T}, | ||
78 | /* {"TORY", EXTRACTOR_METATYPE_ORIGINAL_RELEASE_YEAR, T}, deprecated in 24 */ | ||
79 | {"TOWN", EXTRACTOR_METATYPE_LICENSEE, T}, | ||
80 | {"TPE1", EXTRACTOR_METATYPE_ARTIST, T}, | ||
81 | {"TPE2", EXTRACTOR_METATYPE_PERFORMER, T}, | ||
82 | {"TPE3", EXTRACTOR_METATYPE_CONDUCTOR, T}, | ||
83 | {"TPE4", EXTRACTOR_METATYPE_INTERPRETATION, T}, | ||
84 | {"TPOS", EXTRACTOR_METATYPE_DISC_NUMBER, T}, | ||
85 | {"TPUB", EXTRACTOR_METATYPE_PUBLISHER, T}, | ||
86 | {"TRCK", EXTRACTOR_METATYPE_TRACK_NUMBER, T}, | ||
87 | /* TRDA, deprecated in 24 */ | ||
88 | {"TRSN", EXTRACTOR_METATYPE_NETWORK_NAME, T}, | ||
89 | /* TRSO */ | ||
90 | /* {"TSIZ", EXTRACTOR_METATYPE_EMBEDDED_FILE_SIZE, T}, deprecated in 24 */ | ||
91 | {"TSRC", EXTRACTOR_METATYPE_ISRC, T}, | ||
92 | /* TSSE */ | ||
93 | /* {"TYER", EXTRACTOR_METATYPE_PUBLICATION_YEAR, T}, deprecated in 24 */ | ||
94 | {"WCOM", EXTRACTOR_METATYPE_URL, U}, | ||
95 | {"WCOP", EXTRACTOR_METATYPE_URL, U}, | ||
96 | {"WOAF", EXTRACTOR_METATYPE_URL, U}, | ||
97 | {"WOAS", EXTRACTOR_METATYPE_URL, U}, | ||
98 | {"WORS", EXTRACTOR_METATYPE_URL, U}, | ||
99 | {"WPAY", EXTRACTOR_METATYPE_URL, U}, | ||
100 | {"WPUB", EXTRACTOR_METATYPE_URL, U}, | ||
101 | {"WXXX", EXTRACTOR_METATYPE_URL, T}, | ||
102 | /* {"IPLS", EXTRACTOR_METATYPE_CONTRIBUTOR_NAME, T}, deprecated in 24 */ | ||
103 | /* ... */ | ||
104 | {"USLT", EXTRACTOR_METATYPE_LYRICS, UL }, | ||
105 | {"SYLT", EXTRACTOR_METATYPE_LYRICS, SL }, | ||
106 | {"COMM", EXTRACTOR_METATYPE_COMMENT, L}, | ||
107 | /* ... */ | ||
108 | {"APIC", EXTRACTOR_METATYPE_PICTURE, I}, | ||
109 | /* ... */ | ||
110 | {"LINK", EXTRACTOR_METATYPE_URL, U}, | ||
111 | /* ... */ | ||
112 | {"USER", EXTRACTOR_METATYPE_LICENSE, T}, | ||
113 | /* ... */ | ||
114 | /* new frames in 24 */ | ||
115 | /* ASPI, EQU2, RVA2, SEEK, SIGN, TDEN */ | ||
116 | {"TDOR", EXTRACTOR_METATYPE_PUBLICATION_DATE, T}, | ||
117 | /* TDRC, TDRL, TDTG */ | ||
118 | {"TIPL", EXTRACTOR_METATYPE_CONTRIBUTOR_NAME, T}, | ||
119 | {"TMCL", EXTRACTOR_METATYPE_MUSICIAN_CREDITS_LIST, T}, | ||
120 | {"TMOO", EXTRACTOR_METATYPE_MOOD, T}, | ||
121 | {"TPRO", EXTRACTOR_METATYPE_COPYRIGHT, T}, | ||
122 | {"TSOA", EXTRACTOR_METATYPE_ALBUM, T}, | ||
123 | {"TSOP", EXTRACTOR_METATYPE_PERFORMER, T}, | ||
124 | {"TSOT", EXTRACTOR_METATYPE_TITLE, T}, | ||
125 | {"TSST", EXTRACTOR_METATYPE_SUBTITLE, T}, | ||
126 | {NULL, 0, T} | ||
127 | }; | ||
128 | |||
129 | |||
130 | /* mimetype = audio/mpeg */ | ||
131 | int | ||
132 | EXTRACTOR_id3v24_extract (const unsigned char *data, | ||
133 | size_t size, | ||
134 | EXTRACTOR_MetaDataProcessor proc, | ||
135 | void *proc_cls, | ||
136 | const char *options) | ||
137 | { | ||
138 | int unsync; | ||
139 | int extendedHdr; | ||
140 | int experimental; | ||
141 | uint32_t tsize; | ||
142 | uint32_t pos; | ||
143 | uint32_t ehdrSize; | ||
144 | uint32_t csize; | ||
145 | int i; | ||
146 | uint16_t flags; | ||
147 | char *mime; | ||
148 | enum EXTRACTOR_MetaType type; | ||
149 | size_t off; | ||
150 | |||
151 | if ((size < 16) || | ||
152 | (data[0] != 0x49) || | ||
153 | (data[1] != 0x44) || | ||
154 | (data[2] != 0x33) || (data[3] != 0x04) || (data[4] != 0x00)) | ||
155 | return 0; | ||
156 | unsync = (data[5] & 0x80) > 0; | ||
157 | if (unsync) | ||
158 | return 0; /* not supported */ | ||
159 | extendedHdr = (data[5] & 0x40) > 0; | ||
160 | experimental = (data[5] & 0x20) > 0; | ||
161 | if (experimental) | ||
162 | return 0; | ||
163 | /* footer = (data[5] & 0x10) > 0; */ | ||
164 | tsize = (((data[6] & 0x7F) << 21) | | ||
165 | ((data[7] & 0x7F) << 14) | | ||
166 | ((data[8] & 0x7F) << 7) | ((data[9] & 0x7F) << 0)); | ||
167 | if (tsize + 10 > size) | ||
168 | return 0; | ||
169 | pos = 10; | ||
170 | if (extendedHdr) | ||
171 | { | ||
172 | ehdrSize = (((data[10] & 0x7F) << 21) | | ||
173 | ((data[11] & 0x7F) << 14) | | ||
174 | ((data[12] & 0x7F) << 7) | ((data[13] & 0x7F) << 0)); | ||
175 | pos += 4 + ehdrSize; | ||
176 | if (ehdrSize > tsize) | ||
177 | return 0; | ||
178 | } | ||
179 | while (pos < tsize) | ||
180 | { | ||
181 | if (pos + 10 > tsize) | ||
182 | return 0; | ||
183 | csize = | ||
184 | (data[pos + 4] << 24) + (data[pos + 5] << 16) + (data[pos + 6] << 8) + | ||
185 | data[pos + 7]; | ||
186 | if ((pos + 10 + csize > tsize) || (csize > tsize) || (csize == 0) || | ||
187 | (pos + 10 + csize <= pos + 10) || (pos + 10 <= pos)) | ||
188 | break; | ||
189 | flags = (data[pos + 8] << 8) + data[pos + 9]; | ||
190 | if (((flags & 0x08) > 0) /* compressed, not yet supported */ || | ||
191 | ((flags & 0x04) > 0) /* encrypted, not supported */ || | ||
192 | ((flags & 0x02) > 0) /* unsynchronized, not supported */ ) | ||
193 | { | ||
194 | pos += 10 + csize; | ||
195 | continue; | ||
196 | } | ||
197 | i = 0; | ||
198 | while (tmap[i].text != NULL) | ||
199 | { | ||
200 | if (0 == strncmp (tmap[i].text, (const char *) &data[pos], 4)) | ||
201 | { | ||
202 | char *word; | ||
203 | if ((flags & 0x40) > 0) | ||
204 | { | ||
205 | /* "group" identifier, skip a byte */ | ||
206 | pos++; | ||
207 | csize--; | ||
208 | } | ||
209 | |||
210 | switch (tmap[i].fmt) | ||
211 | { | ||
212 | case T: | ||
213 | /* this byte describes the encoding | ||
214 | try to convert strings to UTF-8 | ||
215 | if it fails, then forget it */ | ||
216 | switch (data[pos + 10]) | ||
217 | { | ||
218 | case 0x00: | ||
219 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 11], | ||
220 | csize - 1, "ISO-8859-1"); | ||
221 | break; | ||
222 | case 0x01: | ||
223 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 11], | ||
224 | csize - 1, "UTF-16"); | ||
225 | break; | ||
226 | case 0x02: | ||
227 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 11], | ||
228 | csize - 1, "UTF-16BE"); | ||
229 | break; | ||
230 | case 0x03: | ||
231 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 11], | ||
232 | csize - 1, "UTF-8"); | ||
233 | break; | ||
234 | default: | ||
235 | /* bad encoding byte, | ||
236 | try to convert from iso-8859-1 */ | ||
237 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 11], | ||
238 | csize - 1, "ISO-8859-1"); | ||
239 | break; | ||
240 | } | ||
241 | break; | ||
242 | case U: | ||
243 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 10], | ||
244 | csize, "ISO-8859-1"); | ||
245 | break; | ||
246 | case UL: | ||
247 | if (csize < 6) | ||
248 | return 0; /* malformed */ | ||
249 | /* find end of description */ | ||
250 | off = 14; | ||
251 | while ( (off < size) && | ||
252 | (off - pos < csize) && | ||
253 | (data[pos + off] == '\0') ) | ||
254 | off++; | ||
255 | if ( (off >= csize) || | ||
256 | (data[pos+off] != '\0') ) | ||
257 | return 0; /* malformed */ | ||
258 | off++; | ||
259 | switch (data[pos + 10]) | ||
260 | { | ||
261 | case 0x00: | ||
262 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + off], | ||
263 | csize - off, "ISO-8859-1"); | ||
264 | break; | ||
265 | case 0x01: | ||
266 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + off], | ||
267 | csize - off, "UTF-16"); | ||
268 | break; | ||
269 | case 0x02: | ||
270 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + off], | ||
271 | csize - off, "UTF-16BE"); | ||
272 | break; | ||
273 | case 0x03: | ||
274 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + off], | ||
275 | csize - off, "UTF-8"); | ||
276 | break; | ||
277 | default: | ||
278 | /* bad encoding byte, | ||
279 | try to convert from iso-8859-1 */ | ||
280 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + off], | ||
281 | csize - off, "ISO-8859-1"); | ||
282 | break; | ||
283 | } | ||
284 | break; | ||
285 | case SL: | ||
286 | if (csize < 7) | ||
287 | return 0; /* malformed */ | ||
288 | /* find end of description */ | ||
289 | switch (data[pos + 10]) | ||
290 | { | ||
291 | case 0x00: | ||
292 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 16], | ||
293 | csize - 6, "ISO-8859-1"); | ||
294 | break; | ||
295 | case 0x01: | ||
296 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 16], | ||
297 | csize - 6, "UTF-16"); | ||
298 | break; | ||
299 | case 0x02: | ||
300 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 16], | ||
301 | csize - 6, "UTF-16BE"); | ||
302 | break; | ||
303 | case 0x03: | ||
304 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 16], | ||
305 | csize - 6, "UTF-8"); | ||
306 | break; | ||
307 | default: | ||
308 | /* bad encoding byte, | ||
309 | try to convert from iso-8859-1 */ | ||
310 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 16], | ||
311 | csize - 6, "ISO-8859-1"); | ||
312 | break; | ||
313 | } | ||
314 | break; | ||
315 | case L: | ||
316 | if (csize < 5) | ||
317 | return 0; /* malformed */ | ||
318 | /* find end of description */ | ||
319 | switch (data[pos + 10]) | ||
320 | { | ||
321 | case 0x00: | ||
322 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 14], | ||
323 | csize - 4, "ISO-8859-1"); | ||
324 | break; | ||
325 | case 0x01: | ||
326 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 14], | ||
327 | csize - 4, "UTF-16"); | ||
328 | break; | ||
329 | case 0x02: | ||
330 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 14], | ||
331 | csize - 4, "UTF-16BE"); | ||
332 | break; | ||
333 | case 0x03: | ||
334 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 14], | ||
335 | csize - 4, "UTF-8"); | ||
336 | break; | ||
337 | default: | ||
338 | /* bad encoding byte, | ||
339 | try to convert from iso-8859-1 */ | ||
340 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 14], | ||
341 | csize - 4, "ISO-8859-1"); | ||
342 | break; | ||
343 | } | ||
344 | break; | ||
345 | case I: | ||
346 | if (csize < 2) | ||
347 | return 0; /* malformed */ | ||
348 | /* find end of mime type */ | ||
349 | off = 11; | ||
350 | while ( (off < size) && | ||
351 | (off - pos < csize) && | ||
352 | (data[pos + off] == '\0') ) | ||
353 | off++; | ||
354 | if ( (off >= csize) || | ||
355 | (data[pos+off] != '\0') ) | ||
356 | return 0; /* malformed */ | ||
357 | off++; | ||
358 | mime = strdup ((const char*) &data[pos + 11]); | ||
359 | |||
360 | switch (data[pos+off]) | ||
361 | { | ||
362 | case 0x03: | ||
363 | case 0x04: | ||
364 | type = EXTRACTOR_METATYPE_COVER_PICTURE; | ||
365 | break; | ||
366 | case 0x07: | ||
367 | case 0x08: | ||
368 | case 0x09: | ||
369 | case 0x0A: | ||
370 | case 0x0B: | ||
371 | case 0x0C: | ||
372 | type = EXTRACTOR_METATYPE_CONTRIBUTOR_PICTURE; | ||
373 | break; | ||
374 | case 0x0D: | ||
375 | case 0x0E: | ||
376 | case 0x0F: | ||
377 | type = EXTRACTOR_METATYPE_EVENT_PICTURE; | ||
378 | break; | ||
379 | case 0x14: | ||
380 | type = EXTRACTOR_METATYPE_LOGO; | ||
381 | type = EXTRACTOR_METATYPE_LOGO; | ||
382 | break; | ||
383 | default: | ||
384 | type = EXTRACTOR_METATYPE_PICTURE; | ||
385 | break; | ||
386 | } | ||
387 | off++; | ||
388 | |||
389 | /* find end of description */ | ||
390 | while ( (off < size) && | ||
391 | (off - pos < csize) && | ||
392 | (data[pos + off] == '\0') ) | ||
393 | off++; | ||
394 | if ( (off >= csize) || | ||
395 | (data[pos+off] != '\0') ) | ||
396 | { | ||
397 | if (mime != NULL) | ||
398 | free (mime); | ||
399 | return 0; /* malformed */ | ||
400 | } | ||
401 | off++; | ||
402 | if ( (mime != NULL) && | ||
403 | (0 == strcasecmp ("-->", | ||
404 | mime)) ) | ||
405 | { | ||
406 | /* not supported */ | ||
407 | } | ||
408 | else | ||
409 | { | ||
410 | if (0 != proc (proc_cls, | ||
411 | "id3v24", | ||
412 | type, | ||
413 | EXTRACTOR_METAFORMAT_BINARY, | ||
414 | mime, | ||
415 | (const char*) &data[pos + off], | ||
416 | csize + 6 - off)) | ||
417 | { | ||
418 | if (mime != NULL) | ||
419 | free (mime); | ||
420 | return 1; | ||
421 | } | ||
422 | } | ||
423 | if (mime != NULL) | ||
424 | free (mime); | ||
425 | word = NULL; | ||
426 | break; | ||
427 | default: | ||
428 | return 0; | ||
429 | } | ||
430 | if ((word != NULL) && (strlen (word) > 0)) | ||
431 | { | ||
432 | if (0 != proc (proc_cls, | ||
433 | "id3v24", | ||
434 | tmap[i].type, | ||
435 | EXTRACTOR_METAFORMAT_UTF8, | ||
436 | "text/plain", | ||
437 | word, | ||
438 | strlen(word)+1)) | ||
439 | { | ||
440 | free (word); | ||
441 | return 1; | ||
442 | } | ||
443 | } | ||
444 | if (word != NULL) | ||
445 | free (word); | ||
446 | break; | ||
447 | } | ||
448 | i++; | ||
449 | } | ||
450 | pos += 10 + csize; | ||
451 | } | ||
452 | return 0; | ||
453 | } | ||
454 | |||
455 | /* end of id3v24_extractor.c */ | ||
diff --git a/src/plugins/id3v2_extractor.c b/src/plugins/id3v2_extractor.c index 4f50d05..0302dc6 100644 --- a/src/plugins/id3v2_extractor.c +++ b/src/plugins/id3v2_extractor.c | |||
@@ -26,6 +26,8 @@ | |||
26 | #endif | 26 | #endif |
27 | #include "convert.h" | 27 | #include "convert.h" |
28 | 28 | ||
29 | #include "extractor_plugins.h" | ||
30 | |||
29 | #define DEBUG_EXTRACT_ID3v2 0 | 31 | #define DEBUG_EXTRACT_ID3v2 0 |
30 | 32 | ||
31 | enum Id3v2Fmt | 33 | enum Id3v2Fmt |
@@ -47,314 +49,723 @@ typedef struct | |||
47 | 49 | ||
48 | static Matches tmap[] = { | 50 | static Matches tmap[] = { |
49 | /* skipping UFI */ | 51 | /* skipping UFI */ |
50 | {"TT1", EXTRACTOR_METATYPE_SECTION, T}, | 52 | {"TT1 ", EXTRACTOR_METATYPE_SECTION, T}, |
51 | {"TT2", EXTRACTOR_METATYPE_TITLE, T}, | 53 | {"TT2 ", EXTRACTOR_METATYPE_TITLE, T}, |
52 | {"TT3", EXTRACTOR_METATYPE_SONG_VERSION, T}, | 54 | {"TT3 ", EXTRACTOR_METATYPE_SONG_VERSION, T}, |
53 | {"TP1", EXTRACTOR_METATYPE_ARTIST, T}, | 55 | {"TP1 ", EXTRACTOR_METATYPE_ARTIST, T}, |
54 | {"TP2", EXTRACTOR_METATYPE_PERFORMER, T}, | 56 | {"TP2 ", EXTRACTOR_METATYPE_PERFORMER, T}, |
55 | {"TP3", EXTRACTOR_METATYPE_CONDUCTOR, T}, | 57 | {"TP3 ", EXTRACTOR_METATYPE_CONDUCTOR, T}, |
56 | {"TP4", EXTRACTOR_METATYPE_INTERPRETATION, T}, | 58 | {"TP4 ", EXTRACTOR_METATYPE_INTERPRETATION, T}, |
57 | {"TCM", EXTRACTOR_METATYPE_COMPOSER, T}, | 59 | {"TCM ", EXTRACTOR_METATYPE_COMPOSER, T}, |
58 | {"TXT", EXTRACTOR_METATYPE_WRITER, T}, | 60 | {"TXT ", EXTRACTOR_METATYPE_WRITER, T}, |
59 | {"TLA", EXTRACTOR_METATYPE_LANGUAGE, T}, | 61 | {"TLA ", EXTRACTOR_METATYPE_LANGUAGE, T}, |
60 | {"TCO", EXTRACTOR_METATYPE_GENRE, T}, | 62 | {"TCO ", EXTRACTOR_METATYPE_GENRE, T}, |
61 | {"TAL", EXTRACTOR_METATYPE_ALBUM, T}, | 63 | {"TAL ", EXTRACTOR_METATYPE_ALBUM, T}, |
62 | {"TPA", EXTRACTOR_METATYPE_DISC_NUMBER, T}, | 64 | {"TPA ", EXTRACTOR_METATYPE_DISC_NUMBER, T}, |
63 | {"TRK", EXTRACTOR_METATYPE_TRACK_NUMBER, T}, | 65 | {"TRK ", EXTRACTOR_METATYPE_TRACK_NUMBER, T}, |
64 | {"TRC", EXTRACTOR_METATYPE_ISRC, T}, | 66 | {"TRC ", EXTRACTOR_METATYPE_ISRC, T}, |
65 | {"TYE", EXTRACTOR_METATYPE_PUBLICATION_YEAR, T}, | 67 | {"TYE ", EXTRACTOR_METATYPE_PUBLICATION_YEAR, T}, |
66 | /* | 68 | /* |
67 | FIXME: these two and TYE should be combined into | 69 | FIXME: these two and TYE should be combined into |
68 | the actual publication date (if TRD is missing) | 70 | the actual publication date (if TRD is missing) |
69 | {"TDA", EXTRACTOR_METATYPE_PUBLICATION_DATE}, | 71 | {"TDA ", EXTRACTOR_METATYPE_PUBLICATION_DATE}, |
70 | {"TIM", EXTRACTOR_METATYPE_PUBLICATION_DATE}, | 72 | {"TIM ", EXTRACTOR_METATYPE_PUBLICATION_DATE}, |
71 | */ | 73 | */ |
72 | {"TRD", EXTRACTOR_METATYPE_CREATION_TIME, T}, | 74 | {"TRD ", EXTRACTOR_METATYPE_CREATION_TIME, T}, |
73 | {"TMT", EXTRACTOR_METATYPE_SOURCE, T}, | 75 | {"TMT ", EXTRACTOR_METATYPE_SOURCE, T}, |
74 | {"TFT", EXTRACTOR_METATYPE_FORMAT_VERSION, T}, | 76 | {"TFT ", EXTRACTOR_METATYPE_FORMAT_VERSION, T}, |
75 | {"TBP", EXTRACTOR_METATYPE_BEATS_PER_MINUTE, T}, | 77 | {"TBP ", EXTRACTOR_METATYPE_BEATS_PER_MINUTE, T}, |
76 | {"TCR", EXTRACTOR_METATYPE_COPYRIGHT, T}, | 78 | {"TCR ", EXTRACTOR_METATYPE_COPYRIGHT, T}, |
77 | {"TPB", EXTRACTOR_METATYPE_PUBLISHER, T}, | 79 | {"TPB ", EXTRACTOR_METATYPE_PUBLISHER, T}, |
78 | {"TEN", EXTRACTOR_METATYPE_ENCODED_BY, T}, | 80 | {"TEN ", EXTRACTOR_METATYPE_ENCODED_BY, T}, |
79 | {"TSS", EXTRACTOR_METATYPE_PRODUCED_BY_SOFTWARE, T}, | 81 | {"TSS ", EXTRACTOR_METATYPE_PRODUCED_BY_SOFTWARE, T}, |
80 | {"TOF", EXTRACTOR_METATYPE_FILENAME, T}, | 82 | {"TOF ", EXTRACTOR_METATYPE_FILENAME, T}, |
81 | {"TLE", EXTRACTOR_METATYPE_DURATION, T}, /* FIXME: should append 'ms' as unit */ | 83 | {"TLE ", EXTRACTOR_METATYPE_DURATION, T}, /* FIXME: should append 'ms' as unit */ |
82 | {"TSI", EXTRACTOR_METATYPE_EMBEDDED_FILE_SIZE, T}, | 84 | {"TSI ", EXTRACTOR_METATYPE_EMBEDDED_FILE_SIZE, T}, |
83 | /* skipping TDY, TKE */ | 85 | /* skipping TDY, TKE */ |
84 | {"TOT", EXTRACTOR_METATYPE_ORIGINAL_TITLE, T}, | 86 | {"TOT ", EXTRACTOR_METATYPE_ORIGINAL_TITLE, T}, |
85 | {"TOA", EXTRACTOR_METATYPE_ORIGINAL_ARTIST, T}, | 87 | {"TOA ", EXTRACTOR_METATYPE_ORIGINAL_ARTIST, T}, |
86 | {"TOL", EXTRACTOR_METATYPE_ORIGINAL_WRITER, T}, | 88 | {"TOL ", EXTRACTOR_METATYPE_ORIGINAL_WRITER, T}, |
87 | {"TOR", EXTRACTOR_METATYPE_ORIGINAL_RELEASE_YEAR, T}, | 89 | {"TOR ", EXTRACTOR_METATYPE_ORIGINAL_RELEASE_YEAR, T}, |
88 | /* skipping TXX */ | 90 | /* skipping TXX */ |
89 | 91 | ||
90 | {"WAF", EXTRACTOR_METATYPE_URL, U}, | 92 | {"WAF ", EXTRACTOR_METATYPE_URL, U}, |
91 | {"WAR", EXTRACTOR_METATYPE_URL, U}, | 93 | {"WAR ", EXTRACTOR_METATYPE_URL, U}, |
92 | {"WAS", EXTRACTOR_METATYPE_URL, U}, | 94 | {"WAS ", EXTRACTOR_METATYPE_URL, U}, |
93 | {"WCM", EXTRACTOR_METATYPE_URL, U}, | 95 | {"WCM ", EXTRACTOR_METATYPE_URL, U}, |
94 | {"WCP", EXTRACTOR_METATYPE_RIGHTS, U}, | 96 | {"WCP ", EXTRACTOR_METATYPE_RIGHTS, U}, |
95 | {"WCB", EXTRACTOR_METATYPE_URL, U}, | 97 | {"WCB ", EXTRACTOR_METATYPE_URL, U}, |
96 | /* skipping WXX */ | 98 | /* skipping WXX */ |
97 | {"IPL", EXTRACTOR_METATYPE_CONTRIBUTOR_NAME, T}, | 99 | {"IPL ", EXTRACTOR_METATYPE_CONTRIBUTOR_NAME, T}, |
98 | /* skipping MCI */ | 100 | /* skipping MCI */ |
99 | /* skipping ETC */ | 101 | /* skipping ETC */ |
100 | /* skipping MLL */ | 102 | /* skipping MLL */ |
101 | /* skipping STC */ | 103 | /* skipping STC */ |
102 | {"ULT", EXTRACTOR_METATYPE_LYRICS, UL}, | 104 | {"ULT ", EXTRACTOR_METATYPE_LYRICS, UL}, |
103 | {"SLT", EXTRACTOR_METATYPE_LYRICS, SL}, | 105 | {"SLT ", EXTRACTOR_METATYPE_LYRICS, SL}, |
104 | {"COM", EXTRACTOR_METATYPE_COMMENT, L}, | 106 | {"COM ", EXTRACTOR_METATYPE_COMMENT, L}, |
105 | /* skipping RVA */ | 107 | /* skipping RVA */ |
106 | /* skipping EQU */ | 108 | /* skipping EQU */ |
107 | /* skipping REV */ | 109 | /* skipping REV */ |
108 | {"PIC", EXTRACTOR_METATYPE_PICTURE, I}, | 110 | {"PIC ", EXTRACTOR_METATYPE_PICTURE, I}, |
109 | /* skipping GEN */ | 111 | /* skipping GEN */ |
110 | /* {"CNT", EXTRACTOR_METATYPE_PLAY_COUNTER, XXX}, */ | 112 | /* {"CNT ", EXTRACTOR_METATYPE_PLAY_COUNTER, XXX}, */ |
111 | /* {"POP", EXTRACTOR_METATYPE_POPULARITY_METER, XXX}, */ | 113 | /* {"POP ", EXTRACTOR_METATYPE_POPULARITY_METER, XXX}, */ |
112 | /* skipping BUF */ | 114 | /* skipping BUF */ |
113 | /* skipping CRM */ | 115 | /* skipping CRM */ |
114 | /* skipping CRA */ | 116 | /* skipping CRA */ |
115 | /* {"LNK", EXTRACTOR_METATYPE_URL, XXX}, */ | 117 | /* {"LNK ", EXTRACTOR_METATYPE_URL, XXX}, */ |
118 | |||
119 | |||
120 | {"TALB", EXTRACTOR_METATYPE_ALBUM, T}, | ||
121 | {"TBPM", EXTRACTOR_METATYPE_BEATS_PER_MINUTE, T}, | ||
122 | {"TCOM", EXTRACTOR_METATYPE_COMPOSER, T}, | ||
123 | {"TCON", EXTRACTOR_METATYPE_SONG_VERSION, T}, | ||
124 | {"TCOP", EXTRACTOR_METATYPE_COPYRIGHT, T}, | ||
125 | {"TDAT", EXTRACTOR_METATYPE_CREATION_DATE, T}, /* idv23 only */ | ||
126 | /* TDLY */ | ||
127 | {"TENC", EXTRACTOR_METATYPE_ENCODED_BY, T}, | ||
128 | {"TEXT", EXTRACTOR_METATYPE_WRITER, T}, | ||
129 | {"TFLT", EXTRACTOR_METATYPE_FORMAT_VERSION, T}, | ||
130 | /* TIME, idv23 only */ | ||
131 | {"TIT1", EXTRACTOR_METATYPE_SECTION, T}, | ||
132 | {"TIT2", EXTRACTOR_METATYPE_TITLE, T}, | ||
133 | {"TIT3", EXTRACTOR_METATYPE_SONG_VERSION, T}, | ||
134 | /* TKEY */ | ||
135 | {"TLAN", EXTRACTOR_METATYPE_LANGUAGE, T}, | ||
136 | {"TLEN", EXTRACTOR_METATYPE_DURATION, T}, /* FIXME: should append 'ms' as unit */ | ||
137 | {"TMED", EXTRACTOR_METATYPE_SOURCE, T}, | ||
138 | {"TOAL", EXTRACTOR_METATYPE_ORIGINAL_TITLE, T}, | ||
139 | {"TOFN", EXTRACTOR_METATYPE_ORIGINAL_ARTIST, T}, | ||
140 | {"TOLY", EXTRACTOR_METATYPE_ORIGINAL_WRITER, T}, | ||
141 | {"TOPE", EXTRACTOR_METATYPE_ORIGINAL_PERFORMER, T}, | ||
142 | {"TORY", EXTRACTOR_METATYPE_ORIGINAL_RELEASE_YEAR, T}, /* idv23 only */ | ||
143 | {"TOWN", EXTRACTOR_METATYPE_LICENSEE, T}, | ||
144 | {"TPE1", EXTRACTOR_METATYPE_ARTIST, T}, | ||
145 | {"TPE2", EXTRACTOR_METATYPE_PERFORMER, T}, | ||
146 | {"TPE3", EXTRACTOR_METATYPE_CONDUCTOR, T}, | ||
147 | {"TPE4", EXTRACTOR_METATYPE_INTERPRETATION, T}, | ||
148 | {"TPOS", EXTRACTOR_METATYPE_DISC_NUMBER, T}, | ||
149 | {"TPUB", EXTRACTOR_METATYPE_PUBLISHER, T}, | ||
150 | {"TRCK", EXTRACTOR_METATYPE_TRACK_NUMBER, T}, | ||
151 | /* TRDA, idv23 only */ | ||
152 | {"TRSN", EXTRACTOR_METATYPE_NETWORK_NAME, T}, | ||
153 | /* TRSO */ | ||
154 | {"TSIZ", EXTRACTOR_METATYPE_EMBEDDED_FILE_SIZE, T}, /* idv23 only */ | ||
155 | {"TSRC", EXTRACTOR_METATYPE_ISRC, T}, | ||
156 | /* TSSE */ | ||
157 | {"TYER", EXTRACTOR_METATYPE_PUBLICATION_YEAR, T}, /* idv23 only */ | ||
158 | {"WCOM", EXTRACTOR_METATYPE_URL, U}, | ||
159 | {"WCOP", EXTRACTOR_METATYPE_URL, U}, | ||
160 | {"WOAF", EXTRACTOR_METATYPE_URL, U}, | ||
161 | {"WOAS", EXTRACTOR_METATYPE_URL, U}, | ||
162 | {"WORS", EXTRACTOR_METATYPE_URL, U}, | ||
163 | {"WPAY", EXTRACTOR_METATYPE_URL, U}, | ||
164 | {"WPUB", EXTRACTOR_METATYPE_URL, U}, | ||
165 | {"WXXX", EXTRACTOR_METATYPE_URL, T}, | ||
166 | {"IPLS", EXTRACTOR_METATYPE_CONTRIBUTOR_NAME, T}, /* idv23 only */ | ||
167 | /* ... */ | ||
168 | {"USLT", EXTRACTOR_METATYPE_LYRICS, UL }, | ||
169 | {"SYLT", EXTRACTOR_METATYPE_LYRICS, SL }, | ||
170 | {"COMM", EXTRACTOR_METATYPE_COMMENT, L}, | ||
171 | /* ... */ | ||
172 | {"APIC", EXTRACTOR_METATYPE_PICTURE, I}, | ||
173 | /* ... */ | ||
174 | {"LINK", EXTRACTOR_METATYPE_URL, U}, | ||
175 | /* ... */ | ||
176 | {"USER", EXTRACTOR_METATYPE_LICENSE, T}, | ||
177 | /* ... */ | ||
178 | |||
179 | /* new frames in id3v24 */ | ||
180 | /* ASPI, EQU2, RVA2, SEEK, SIGN, TDEN */ | ||
181 | {"TDOR", EXTRACTOR_METATYPE_PUBLICATION_DATE, T}, | ||
182 | /* TDRC, TDRL, TDTG */ | ||
183 | {"TIPL", EXTRACTOR_METATYPE_CONTRIBUTOR_NAME, T}, | ||
184 | {"TMCL", EXTRACTOR_METATYPE_MUSICIAN_CREDITS_LIST, T}, | ||
185 | {"TMOO", EXTRACTOR_METATYPE_MOOD, T}, | ||
186 | {"TPRO", EXTRACTOR_METATYPE_COPYRIGHT, T}, | ||
187 | {"TSOA", EXTRACTOR_METATYPE_ALBUM, T}, | ||
188 | {"TSOP", EXTRACTOR_METATYPE_PERFORMER, T}, | ||
189 | {"TSOT", EXTRACTOR_METATYPE_TITLE, T}, | ||
190 | {"TSST", EXTRACTOR_METATYPE_SUBTITLE, T}, | ||
191 | |||
116 | {NULL, 0, T}, | 192 | {NULL, 0, T}, |
117 | }; | 193 | }; |
118 | 194 | ||
119 | 195 | struct id3v2_state | |
120 | /* mimetype = audio/mpeg */ | ||
121 | int | ||
122 | EXTRACTOR_id3v2_extract (const unsigned char *data, | ||
123 | size_t size, | ||
124 | EXTRACTOR_MetaDataProcessor proc, | ||
125 | void *proc_cls, | ||
126 | const char *options) | ||
127 | { | 196 | { |
197 | int state; | ||
128 | unsigned int tsize; | 198 | unsigned int tsize; |
129 | unsigned int pos; | 199 | size_t csize; |
200 | char id[4]; | ||
201 | int32_t ti; | ||
202 | char ver; | ||
203 | char extended_header; | ||
204 | uint16_t frame_flags; | ||
205 | char *mime; | ||
206 | }; | ||
207 | |||
208 | enum ID3v2State | ||
209 | { | ||
210 | ID3V2_INVALID = -1, | ||
211 | ID3V2_READING_HEADER = 0, | ||
212 | ID3V2_READING_FRAME_HEADER, | ||
213 | ID3V23_READING_EXTENDED_HEADER, | ||
214 | ID3V24_READING_EXTENDED_HEADER, | ||
215 | ID3V2_READING_FRAME | ||
216 | }; | ||
217 | |||
218 | void | ||
219 | EXTRACTOR_id3v2_init_state_method (struct EXTRACTOR_PluginList *plugin) | ||
220 | { | ||
221 | struct id3v2_state *state; | ||
222 | state = plugin->state = malloc (sizeof (struct id3v2_state)); | ||
223 | if (state == NULL) | ||
224 | return; | ||
225 | memset (state, 0, sizeof (struct id3v2_state)); | ||
226 | state->state = ID3V2_READING_HEADER; | ||
227 | state->ti = -1; | ||
228 | state->mime = NULL; | ||
229 | } | ||
230 | |||
231 | void | ||
232 | EXTRACTOR_id3v2_discard_state_method (struct EXTRACTOR_PluginList *plugin) | ||
233 | { | ||
234 | struct id3v2_state *state = plugin->state; | ||
235 | if (state != NULL) | ||
236 | { | ||
237 | if (state->mime != NULL) | ||
238 | free (state->mime); | ||
239 | free (state); | ||
240 | } | ||
241 | plugin->state = NULL; | ||
242 | } | ||
243 | |||
244 | static int | ||
245 | find_type (const char *id, size_t len) | ||
246 | { | ||
247 | int i; | ||
248 | for (i = 0; tmap[i].text != NULL; i++) | ||
249 | if (0 == strncmp (tmap[i].text, id, len)) | ||
250 | return i; | ||
251 | return -1; | ||
252 | } | ||
253 | |||
254 | int | ||
255 | EXTRACTOR_id3v2_extract_method (struct EXTRACTOR_PluginList *plugin, | ||
256 | EXTRACTOR_MetaDataProcessor proc, void *proc_cls) | ||
257 | { | ||
258 | int64_t file_position; | ||
259 | int64_t file_size; | ||
260 | int64_t offset = 0; | ||
261 | int64_t size; | ||
262 | struct id3v2_state *state; | ||
263 | unsigned char *data; | ||
264 | char *word = NULL; | ||
130 | unsigned int off; | 265 | unsigned int off; |
131 | enum EXTRACTOR_MetaType type; | 266 | enum EXTRACTOR_MetaType type; |
132 | const char *mime; | 267 | unsigned char picture_type; |
133 | 268 | ||
134 | if ((size < 16) || | 269 | if (plugin == NULL || plugin->state == NULL) |
135 | (data[0] != 0x49) || | 270 | return 1; |
136 | (data[1] != 0x44) || | ||
137 | (data[2] != 0x33) || (data[3] != 0x02) || (data[4] != 0x00)) | ||
138 | return 0; | ||
139 | /* unsync: (data[5] & 0x80) > 0; */ | ||
140 | tsize = (((data[6] & 0x7F) << 21) | | ||
141 | ((data[7] & 0x7F) << 14) | | ||
142 | ((data[8] & 0x7F) << 07) | ((data[9] & 0x7F) << 00)); | ||
143 | 271 | ||
144 | if (tsize + 10 > size) | 272 | state = plugin->state; |
145 | return 0; | 273 | file_position = plugin->position; |
146 | pos = 10; | 274 | file_size = plugin->fsize; |
147 | while (pos < tsize) | 275 | size = plugin->map_size; |
276 | data = plugin->shm_ptr; | ||
277 | |||
278 | if (plugin->seek_request < 0) | ||
279 | return 1; | ||
280 | if (file_position - plugin->seek_request > 0) | ||
281 | { | ||
282 | plugin->seek_request = -1; | ||
283 | return 1; | ||
284 | } | ||
285 | if (plugin->seek_request - file_position < size) | ||
286 | offset = plugin->seek_request - file_position; | ||
287 | |||
288 | while (1) | ||
289 | { | ||
290 | switch (state->state) | ||
148 | { | 291 | { |
149 | size_t csize; | 292 | case ID3V2_INVALID: |
150 | int i; | 293 | plugin->seek_request = -1; |
294 | return 1; | ||
295 | case ID3V2_READING_HEADER: | ||
296 | /* TODO: support id3v24 tags at the end of file. Here's a quote from id3 faq: | ||
297 | * Q: Where is an ID3v2 tag located in an MP3 file? | ||
298 | * A: It is most likely located at the beginning of the file. Look for the | ||
299 | * marker "ID3" in the first 3 bytes of the file. If it's not there, it | ||
300 | * could be at the end of the file (if the tag is ID3v2.4). Look for the | ||
301 | * marker "3DI" 10 bytes from the end of the file, or 10 bytes before the | ||
302 | * beginning of an ID3v1 tag. Finally it is possible to embed ID3v2 tags | ||
303 | * in the actual MPEG stream, on an MPEG frame boundry. Almost nobody does | ||
304 | * this. | ||
305 | * Parsing of such tags will not be completely correct, because we can't | ||
306 | * seek backwards. We will have to seek to file_size - chunk_size instead | ||
307 | * (by the way, chunk size is theoretically unknown, LE is free to use any chunk | ||
308 | * size, even though plugins often make assumptions about chunk size being large | ||
309 | * enough to make one atomic read without seeking, if offset == 0) and search | ||
310 | * for id3v1 at -128 offset, then look if there's a 3DI marker 10 bytes before | ||
311 | * it (or 10 bytes before the end of file, if id3v1 is not there; not sure | ||
312 | * about APETAGs; we should probably just scan byte-by-byte from the end of file, | ||
313 | * until we hit 3DI, or reach the offset == 0), and use it set offset to the | ||
314 | * start of ID3v24 header, adjust the following file_position check and data | ||
315 | * indices (use offset), and otherwise proceed as normal (maybe file size checks | ||
316 | * along the way will have to be adjusted by -1, or made ">" instead of ">="; | ||
317 | * these problems do not arise for tags at the beginning of the file, since | ||
318 | * audio itself is usually at least 1-byte long; when the tag is at the end of | ||
319 | * file, these checks will have to be 100% correct). | ||
320 | * If there are two tags (at the beginning and at the end of the file), | ||
321 | * a SEEK in the one at the beginning of the file can be used to seek to the | ||
322 | * one at the end. | ||
323 | */ | ||
324 | /* TODO: merge id3v1 and id3v2 parsers. There's an "update" flag in id3v2 that | ||
325 | * tells the parser to augument id3v1 values with the values from id3v2 (if this | ||
326 | * flag is not set, id3v2 parser must discard id3v1 data). | ||
327 | * At the moment id3v1 and id3v2 are parsed separately, and update flag is ignored. | ||
328 | */ | ||
329 | if (file_position != 0 || size < 10 || (data[0] != 0x49) || (data[1] != 0x44) || (data[2] != 0x33) || ((data[3] != 0x02) && (data[3] != 0x03) && (data[3] != 0x04))/* || (data[4] != 0x00) minor verisons are backward-compatible*/) | ||
330 | { | ||
331 | state->state = ID3V2_INVALID; | ||
332 | break; | ||
333 | } | ||
334 | state->ver = data[3]; | ||
335 | if (state->ver == 0x02) | ||
336 | { | ||
337 | state->extended_header = 0; | ||
338 | } | ||
339 | else if ((state->ver == 0x03) || (state->ver == 0x04)) | ||
340 | { | ||
341 | if ((data[5] & 0x80) > 0) | ||
342 | { | ||
343 | /* unsync is not supported in id3v23 or id3v24*/ | ||
344 | state->state = ID3V2_INVALID; | ||
345 | break; | ||
346 | } | ||
347 | state->extended_header = (data[5] & 0x40) > 0; | ||
348 | if ((data[5] & 0x20) > 0) | ||
349 | { | ||
350 | /* experimental is not supported in id3v23 or id3v24*/ | ||
351 | state->state = ID3V2_INVALID; | ||
352 | break; | ||
353 | } | ||
354 | } | ||
355 | state->tsize = (((data[6] & 0x7F) << 21) | ((data[7] & 0x7F) << 14) | ((data[8] & 0x7F) << 07) | ((data[9] & 0x7F) << 00)); | ||
356 | if (state->tsize + 10 > file_size) | ||
357 | { | ||
358 | state->state = ID3V2_INVALID; | ||
359 | break; | ||
360 | } | ||
361 | offset = 10; | ||
362 | if (state->ver == 0x03 && state->extended_header) | ||
363 | state->state = ID3V23_READING_EXTENDED_HEADER; | ||
364 | else if (state->ver == 0x04 && state->extended_header) | ||
365 | state->state = ID3V24_READING_EXTENDED_HEADER; | ||
366 | else | ||
367 | state->state = ID3V2_READING_FRAME_HEADER; | ||
368 | break; | ||
369 | case ID3V23_READING_EXTENDED_HEADER: | ||
370 | if (offset + 9 >= size) | ||
371 | { | ||
372 | if (offset == 0) | ||
373 | { | ||
374 | state->state = ID3V2_INVALID; | ||
375 | break; | ||
376 | } | ||
377 | plugin->seek_request = file_position + offset; | ||
378 | return 0; | ||
379 | } | ||
380 | if (state->ver == 0x03 && state->extended_header) | ||
381 | { | ||
382 | uint32_t padding, extended_header_size; | ||
383 | extended_header_size = (((data[offset]) << 24) | ((data[offset + 1]) << 16) | ((data[offset + 2]) << 8) | ((data[offset + 3]) << 0)); | ||
384 | padding = (((data[offset + 6]) << 24) | ((data[offset + 7]) << 16) | ((data[offset + 8]) << 8) | ((data[offset + 9]) << 0)); | ||
385 | if (data[offset + 4] == 0 && data[offset + 5] == 0) | ||
386 | /* Skip the CRC32 byte after extended header */ | ||
387 | offset += 1; | ||
388 | offset += 4 + extended_header_size; | ||
389 | if (padding < state->tsize) | ||
390 | state->tsize -= padding; | ||
391 | else | ||
392 | { | ||
393 | state->state = ID3V2_INVALID; | ||
394 | break; | ||
395 | } | ||
396 | } | ||
397 | break; | ||
398 | case ID3V24_READING_EXTENDED_HEADER: | ||
399 | if (offset + 6 >= size) | ||
400 | { | ||
401 | if (offset == 0) | ||
402 | { | ||
403 | state->state = ID3V2_INVALID; | ||
404 | break; | ||
405 | } | ||
406 | plugin->seek_request = file_position + offset; | ||
407 | return 0; | ||
408 | } | ||
409 | if ( (state->ver == 0x04) && (state->extended_header)) | ||
410 | { | ||
411 | uint32_t extended_header_size; | ||
151 | 412 | ||
152 | if (pos + 7 > tsize) | 413 | extended_header_size = (((data[offset]) << 24) | |
414 | ((data[offset + 1]) << 16) | | ||
415 | ((data[offset + 2]) << 8) | | ||
416 | ((data[offset + 3]) << 0)); | ||
417 | offset += 4 + extended_header_size; | ||
418 | } | ||
419 | break; | ||
420 | case ID3V2_READING_FRAME_HEADER: | ||
421 | if (file_position + offset > state->tsize || | ||
422 | ((state->ver == 0x02) && file_position + offset + 6 >= state->tsize) || | ||
423 | (((state->ver == 0x03) || (state->ver == 0x04))&& file_position + offset + 10 >= state->tsize)) | ||
424 | { | ||
425 | state->state = ID3V2_INVALID; | ||
426 | break; | ||
427 | } | ||
428 | if (((state->ver == 0x02) && (offset + 6 >= size)) || | ||
429 | (((state->ver == 0x03) || (state->ver == 0x04)) && (offset + 10 >= size))) | ||
430 | { | ||
431 | plugin->seek_request = file_position + offset; | ||
153 | return 0; | 432 | return 0; |
154 | csize = (data[pos + 3] << 16) + (data[pos + 4] << 8) + data[pos + 5]; | 433 | } |
155 | if ((pos + 7 + csize > tsize) || (csize > tsize) || (csize == 0)) | 434 | if (state->ver == 0x02) |
435 | { | ||
436 | memcpy (state->id, &data[offset], 3); | ||
437 | state->csize = (data[offset + 3] << 16) + (data[offset + 4] << 8) + data[offset + 5]; | ||
438 | if ((file_position + offset + 6 + state->csize > file_size) || (state->csize > file_size) || (state->csize == 0)) | ||
439 | { | ||
440 | state->state = ID3V2_INVALID; | ||
441 | break; | ||
442 | } | ||
443 | offset += 6; | ||
444 | state->frame_flags = 0; | ||
445 | } | ||
446 | else if ((state->ver == 0x03) || (state->ver == 0x04)) | ||
447 | { | ||
448 | memcpy (state->id, &data[offset], 4); | ||
449 | if (state->ver == 0x03) | ||
450 | state->csize = (data[offset + 4] << 24) + (data[offset + 5] << 16) + (data[offset + 6] << 8) + data[offset + 7]; | ||
451 | else if (state->ver == 0x04) | ||
452 | state->csize = ((data[offset + 4] & 0x7F) << 21) | ((data[offset + 5] & 0x7F) << 14) | ((data[offset + 6] & 0x7F) << 07) | ((data[offset + 7] & 0x7F) << 00); | ||
453 | if ((file_position + offset + 10 + state->csize > file_size) || (state->csize > file_size) || (state->csize == 0)) | ||
454 | { | ||
455 | state->state = ID3V2_INVALID; | ||
456 | break; | ||
457 | } | ||
458 | state->frame_flags = (data[offset + 8] << 8) + data[offset + 9]; | ||
459 | if (state->ver == 0x03) | ||
460 | { | ||
461 | if (((state->frame_flags & 0x80) > 0) /* compressed, not yet supported */ || | ||
462 | ((state->frame_flags & 0x40) > 0) /* encrypted, not supported */) | ||
463 | { | ||
464 | /* Skip to next frame header */ | ||
465 | offset += 10 + state->csize; | ||
466 | break; | ||
467 | } | ||
468 | } | ||
469 | else if (state->ver == 0x04) | ||
470 | { | ||
471 | if (((state->frame_flags & 0x08) > 0) /* compressed, not yet supported */ || | ||
472 | ((state->frame_flags & 0x04) > 0) /* encrypted, not supported */ || | ||
473 | ((state->frame_flags & 0x02) > 0) /* unsynchronization, not supported */) | ||
474 | { | ||
475 | /* Skip to next frame header */ | ||
476 | offset += 10 + state->csize; | ||
477 | break; | ||
478 | } | ||
479 | if ((state->frame_flags & 0x01) > 0) | ||
480 | { | ||
481 | /* Skip data length indicator */ | ||
482 | state->csize -= 4; | ||
483 | offset += 4; | ||
484 | } | ||
485 | } | ||
486 | offset += 10; | ||
487 | } | ||
488 | |||
489 | state->ti = find_type ((const char *) state->id, (state->ver == 0x02) ? 3 : (((state->ver == 0x03) || (state->ver == 0x04)) ? 4 : 0)); | ||
490 | if (state->ti == -1) | ||
491 | { | ||
492 | offset += state->csize; | ||
493 | break; | ||
494 | } | ||
495 | state->state = ID3V2_READING_FRAME; | ||
496 | break; | ||
497 | case ID3V2_READING_FRAME: | ||
498 | if (offset == 0 && state->csize > size) | ||
499 | { | ||
500 | /* frame size is larger than the size of one data chunk we get at a time */ | ||
501 | offset += state->csize; | ||
502 | state->state = ID3V2_READING_FRAME_HEADER; | ||
503 | break; | ||
504 | } | ||
505 | if (offset + state->csize > size) | ||
506 | { | ||
507 | plugin->seek_request = file_position + offset; | ||
508 | return 0; | ||
509 | } | ||
510 | word = NULL; | ||
511 | if (((state->ver == 0x03) && ((state->frame_flags & 0x20) > 0)) || | ||
512 | ((state->ver == 0x04) && ((state->frame_flags & 0x40) > 0))) | ||
513 | { | ||
514 | /* "group" identifier, skip a byte */ | ||
515 | offset++; | ||
516 | state->csize--; | ||
517 | } | ||
518 | switch (tmap[state->ti].fmt) | ||
519 | { | ||
520 | case T: | ||
521 | if (data[offset] == 0x00) | ||
522 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + 1], | ||
523 | state->csize - 1, "ISO-8859-1"); | ||
524 | else if (data[offset] == 0x01) | ||
525 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + 1], | ||
526 | state->csize - 1, "UCS-2"); | ||
527 | else if ((state->ver == 0x04) && (data[offset] == 0x02)) | ||
528 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + 1], | ||
529 | state->csize - 1, "UTF-16BE"); | ||
530 | else if ((state->ver == 0x04) && (data[offset] == 0x03)) | ||
531 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + 1], | ||
532 | state->csize - 1, "UTF-8"); | ||
533 | else | ||
534 | /* bad encoding byte, try to convert from iso-8859-1 */ | ||
535 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + 1], | ||
536 | state->csize - 1, "ISO-8859-1"); | ||
537 | break; | ||
538 | case U: | ||
539 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset], | ||
540 | state->csize, "ISO-8859-1"); | ||
541 | break; | ||
542 | case UL: | ||
543 | if (state->csize < 6) | ||
544 | { | ||
545 | /* malformed */ | ||
546 | state->state = ID3V2_INVALID; | ||
547 | break; | ||
548 | } | ||
549 | /* find end of description */ | ||
550 | off = 4; | ||
551 | while ((off < size) && (off < offset + state->csize) && (data[offset + off] != '\0')) | ||
552 | off++; | ||
553 | if ((off >= state->csize) || (data[offset + off] != '\0')) | ||
554 | { | ||
555 | /* malformed */ | ||
556 | state->state = ID3V2_INVALID; | ||
557 | break; | ||
558 | } | ||
559 | off++; | ||
560 | if (data[offset] == 0x00) | ||
561 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + off], | ||
562 | state->csize - off, "ISO-8859-1"); | ||
563 | else if (data[offset] == 0x01) | ||
564 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + off], | ||
565 | state->csize - off, "UCS-2"); | ||
566 | else if ((state->ver == 0x04) && (data[offset] == 0x02)) | ||
567 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + off], | ||
568 | state->csize - off, "UTF-16BE"); | ||
569 | else if ((state->ver == 0x04) && (data[offset] == 0x03)) | ||
570 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + off], | ||
571 | state->csize - off, "UTF-8"); | ||
572 | else | ||
573 | /* bad encoding byte, try to convert from iso-8859-1 */ | ||
574 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + off], | ||
575 | state->csize - off, "ISO-8859-1"); | ||
576 | break; | ||
577 | case SL: | ||
578 | if (state->csize < 7) | ||
579 | { | ||
580 | /* malformed */ | ||
581 | state->state = ID3V2_INVALID; | ||
582 | break; | ||
583 | } | ||
584 | if (data[offset] == 0x00) | ||
585 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + 6], | ||
586 | state->csize - 6, "ISO-8859-1"); | ||
587 | else if (data[offset] == 0x01) | ||
588 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + 6], | ||
589 | state->csize - 6, "UCS-2"); | ||
590 | else if ((state->ver == 0x04) && (data[offset] == 0x02)) | ||
591 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + 6], | ||
592 | state->csize - 6, "UTF-16BE"); | ||
593 | else if ((state->ver == 0x04) && (data[offset] == 0x03)) | ||
594 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + 6], | ||
595 | state->csize - 6, "UTF-8"); | ||
596 | else | ||
597 | /* bad encoding byte, try to convert from iso-8859-1 */ | ||
598 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + 6], | ||
599 | state->csize - 6, "ISO-8859-1"); | ||
600 | break; | ||
601 | case L: | ||
602 | if (state->csize < 5) | ||
603 | { | ||
604 | /* malformed */ | ||
605 | state->state = ID3V2_INVALID; | ||
606 | break; | ||
607 | } | ||
608 | /* find end of description */ | ||
609 | off = 4; | ||
610 | while ((off < size) && (off < offset + state->csize) && (data[offset + off] != '\0')) | ||
611 | off++; | ||
612 | if ((off >= state->csize) || (data[offset + off] != '\0')) | ||
613 | { | ||
614 | /* malformed */ | ||
615 | state->state = ID3V2_INVALID; | ||
616 | break; | ||
617 | } | ||
618 | off++; | ||
619 | |||
620 | if (data[offset] == 0x00) | ||
621 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + off], | ||
622 | state->csize - off, "ISO-8859-1"); | ||
623 | else if (data[offset] == 0x01) | ||
624 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + off], | ||
625 | state->csize - off, "UCS-2"); | ||
626 | else if ((state->ver == 0x04) && (data[offset] == 0x02)) | ||
627 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + off], | ||
628 | state->csize - off, "UTF-1offBE"); | ||
629 | else if ((state->ver == 0x04) && (data[offset] == 0x03)) | ||
630 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + off], | ||
631 | state->csize - off, "UTF-8"); | ||
632 | else | ||
633 | /* bad encoding byte, try to convert from iso-8859-1 */ | ||
634 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + off], | ||
635 | state->csize - off, "ISO-8859-1"); | ||
636 | break; | ||
637 | case I: | ||
638 | if ( ( (state->ver == 0x02) && | ||
639 | (state->csize < 7) ) || | ||
640 | ( ( (state->ver == 0x03) || | ||
641 | (state->ver == 0x04)) && (state->csize < 5)) ) | ||
642 | { | ||
643 | /* malformed */ | ||
644 | state->state = ID3V2_INVALID; | ||
645 | break; | ||
646 | } | ||
647 | if (state->mime != NULL) | ||
648 | free (state->mime); | ||
649 | state->mime = NULL; | ||
650 | if (state->ver == 0x02) | ||
651 | { | ||
652 | off = 5; | ||
653 | picture_type = data[offset + 5]; | ||
654 | } | ||
655 | else if ((state->ver == 0x03) || (state->ver == 0x04)) | ||
656 | { | ||
657 | off = 1; | ||
658 | while ((off < size) && (off < offset + state->csize) && (data[offset + off] != '\0') ) | ||
659 | off++; | ||
660 | if ((off >= state->csize) || (data[offset + off] != '\0')) | ||
661 | { | ||
662 | /* malformed */ | ||
663 | state->state = ID3V2_INVALID; | ||
664 | break; | ||
665 | } | ||
666 | state->mime = malloc (off); | ||
667 | memcpy (state->mime, &data[offset + 1], off - 1); | ||
668 | state->mime[off - 1] = '\0'; | ||
669 | off += 1; | ||
670 | picture_type = data[offset]; | ||
671 | off += 1; | ||
672 | } | ||
673 | /* find end of description */ | ||
674 | while ((off < size) && (off < offset + state->csize) && (data[offset + off] != '\0')) | ||
675 | off++; | ||
676 | if ((off >= state->csize) || (data[offset + off] != '\0')) | ||
677 | { | ||
678 | free (state->mime); | ||
679 | state->mime = NULL; | ||
680 | /* malformed */ | ||
681 | state->state = ID3V2_INVALID; | ||
682 | break; | ||
683 | } | ||
684 | off++; | ||
685 | switch (picture_type) | ||
686 | { | ||
687 | case 0x03: | ||
688 | case 0x04: | ||
689 | type = EXTRACTOR_METATYPE_COVER_PICTURE; | ||
690 | break; | ||
691 | case 0x07: | ||
692 | case 0x08: | ||
693 | case 0x09: | ||
694 | case 0x0A: | ||
695 | case 0x0B: | ||
696 | case 0x0C: | ||
697 | type = EXTRACTOR_METATYPE_CONTRIBUTOR_PICTURE; | ||
698 | break; | ||
699 | case 0x0D: | ||
700 | case 0x0E: | ||
701 | case 0x0F: | ||
702 | type = EXTRACTOR_METATYPE_EVENT_PICTURE; | ||
703 | break; | ||
704 | case 0x14: | ||
705 | type = EXTRACTOR_METATYPE_LOGO; | ||
706 | type = EXTRACTOR_METATYPE_LOGO; | ||
707 | break; | ||
708 | default: | ||
709 | type = EXTRACTOR_METATYPE_PICTURE; | ||
710 | break; | ||
711 | } | ||
712 | if (state->ver == 0x02) | ||
713 | { | ||
714 | if (0 == strncasecmp ("PNG", (const char *) &data[offset + 1], 3)) | ||
715 | state->mime = strdup ("image/png"); | ||
716 | else if (0 == strncasecmp ("JPG", (const char *) &data[offset + 1], 3)) | ||
717 | state->mime = strdup ("image/jpeg"); | ||
718 | else | ||
719 | state->mime = NULL; | ||
720 | } | ||
721 | else if (((state->ver == 0x03) || (state->ver == 0x04)) && (strchr (state->mime, '/') == NULL)) | ||
722 | { | ||
723 | size_t mime_len = strlen (state->mime); | ||
724 | char *type_mime = malloc (mime_len + 6 + 1); | ||
725 | snprintf (type_mime, mime_len + 6 + 1, "image/%s", state->mime); | ||
726 | free (state->mime); | ||
727 | state->mime = type_mime; | ||
728 | } | ||
729 | if ((state->mime != NULL) && (0 == strcmp (state->mime, "-->"))) | ||
730 | { | ||
731 | /* not supported */ | ||
732 | free (state->mime); | ||
733 | state->mime = NULL; | ||
734 | } | ||
735 | else | ||
736 | { | ||
737 | if (0 != proc (proc_cls, "id3v2", type, EXTRACTOR_METAFORMAT_BINARY, state->mime, (const char*) &data[offset + off], state->csize - off)) | ||
738 | { | ||
739 | if (state->mime != NULL) | ||
740 | free (state->mime); | ||
741 | state->mime = NULL; | ||
742 | return 1; | ||
743 | } | ||
744 | if (state->mime != NULL) | ||
745 | free (state->mime); | ||
746 | state->mime = NULL; | ||
747 | } | ||
748 | word = NULL; | ||
156 | break; | 749 | break; |
157 | i = 0; | 750 | default: |
158 | while (tmap[i].text != NULL) | 751 | return 1; |
752 | } | ||
753 | if ((word != NULL) && (strlen (word) > 0)) | ||
754 | { | ||
755 | if (0 != proc (proc_cls, "id3v2", tmap[state->ti].type, EXTRACTOR_METAFORMAT_UTF8, "text/plain", word, strlen (word) + 1)) | ||
159 | { | 756 | { |
160 | if (0 == strncmp (tmap[i].text, (const char *) &data[pos], 3)) | 757 | free (word); |
161 | { | 758 | return 1; |
162 | char *word; | ||
163 | /* this byte describes the encoding | ||
164 | try to convert strings to UTF-8 | ||
165 | if it fails, then forget it */ | ||
166 | switch (tmap[i].fmt) | ||
167 | { | ||
168 | case T: | ||
169 | switch (data[pos + 6]) | ||
170 | { | ||
171 | case 0x00: | ||
172 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 7], | ||
173 | csize - 1, "ISO-8859-1"); | ||
174 | break; | ||
175 | case 0x01: | ||
176 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 7], | ||
177 | csize - 1, "UCS-2"); | ||
178 | break; | ||
179 | default: | ||
180 | /* bad encoding byte, | ||
181 | try to convert from iso-8859-1 */ | ||
182 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 7], | ||
183 | csize - 1, "ISO-8859-1"); | ||
184 | break; | ||
185 | } | ||
186 | break; | ||
187 | case U: | ||
188 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 6], | ||
189 | csize, "ISO-8859-1"); | ||
190 | break; | ||
191 | case UL: | ||
192 | if (csize < 6) | ||
193 | return 0; /* malformed */ | ||
194 | /* find end of description */ | ||
195 | off = 10; | ||
196 | while ( (off < size) && | ||
197 | (off - pos < csize) && | ||
198 | (data[pos + off] == '\0') ) | ||
199 | off++; | ||
200 | if ( (off >= csize) || | ||
201 | (data[pos+off] != '\0') ) | ||
202 | return 0; /* malformed */ | ||
203 | off++; | ||
204 | switch (data[pos + 6]) | ||
205 | { | ||
206 | case 0x00: | ||
207 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + off], | ||
208 | csize - off, "ISO-8859-1"); | ||
209 | break; | ||
210 | case 0x01: | ||
211 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + off], | ||
212 | csize - off, "UCS-2"); | ||
213 | break; | ||
214 | default: | ||
215 | /* bad encoding byte, | ||
216 | try to convert from iso-8859-1 */ | ||
217 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + off], | ||
218 | csize - off, "ISO-8859-1"); | ||
219 | break; | ||
220 | } | ||
221 | break; | ||
222 | case SL: | ||
223 | if (csize < 7) | ||
224 | return 0; /* malformed */ | ||
225 | /* find end of description */ | ||
226 | switch (data[pos + 6]) | ||
227 | { | ||
228 | case 0x00: | ||
229 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 12], | ||
230 | csize - 6, "ISO-8859-1"); | ||
231 | break; | ||
232 | case 0x01: | ||
233 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 12], | ||
234 | csize - 6, "UCS-2"); | ||
235 | break; | ||
236 | default: | ||
237 | /* bad encoding byte, | ||
238 | try to convert from iso-8859-1 */ | ||
239 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 12], | ||
240 | csize - 6, "ISO-8859-1"); | ||
241 | break; | ||
242 | } | ||
243 | break; | ||
244 | case L: | ||
245 | if (csize < 5) | ||
246 | return 0; /* malformed */ | ||
247 | /* find end of description */ | ||
248 | switch (data[pos + 6]) | ||
249 | { | ||
250 | case 0x00: | ||
251 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 10], | ||
252 | csize - 4, "ISO-8859-1"); | ||
253 | break; | ||
254 | case 0x01: | ||
255 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 10], | ||
256 | csize - 4, "UCS-2"); | ||
257 | break; | ||
258 | default: | ||
259 | /* bad encoding byte, | ||
260 | try to convert from iso-8859-1 */ | ||
261 | word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 10], | ||
262 | csize - 4, "ISO-8859-1"); | ||
263 | break; | ||
264 | } | ||
265 | break; | ||
266 | case I: | ||
267 | if (csize < 6) | ||
268 | return 0; /* malformed */ | ||
269 | /* find end of description */ | ||
270 | off = 12; | ||
271 | while ( (off < size) && | ||
272 | (off - pos < csize) && | ||
273 | (data[pos + off] == '\0') ) | ||
274 | off++; | ||
275 | if ( (off >= csize) || | ||
276 | (data[pos+off] != '\0') ) | ||
277 | return 0; /* malformed */ | ||
278 | off++; | ||
279 | switch (data[pos+11]) | ||
280 | { | ||
281 | case 0x03: | ||
282 | case 0x04: | ||
283 | type = EXTRACTOR_METATYPE_COVER_PICTURE; | ||
284 | break; | ||
285 | case 0x07: | ||
286 | case 0x08: | ||
287 | case 0x09: | ||
288 | case 0x0A: | ||
289 | case 0x0B: | ||
290 | case 0x0C: | ||
291 | type = EXTRACTOR_METATYPE_CONTRIBUTOR_PICTURE; | ||
292 | break; | ||
293 | case 0x0D: | ||
294 | case 0x0E: | ||
295 | case 0x0F: | ||
296 | type = EXTRACTOR_METATYPE_EVENT_PICTURE; | ||
297 | break; | ||
298 | case 0x14: | ||
299 | type = EXTRACTOR_METATYPE_LOGO; | ||
300 | type = EXTRACTOR_METATYPE_LOGO; | ||
301 | break; | ||
302 | default: | ||
303 | type = EXTRACTOR_METATYPE_PICTURE; | ||
304 | break; | ||
305 | } | ||
306 | if (0 == strncasecmp ("PNG", | ||
307 | (const char*) &data[pos + 7], 3)) | ||
308 | mime = "image/png"; | ||
309 | else if (0 == strncasecmp ("JPG", | ||
310 | (const char*) &data[pos + 7], 3)) | ||
311 | mime = "image/jpeg"; | ||
312 | else | ||
313 | mime = NULL; | ||
314 | if (0 == strncasecmp ("-->", | ||
315 | (const char*) &data[pos + 7], 3)) | ||
316 | { | ||
317 | /* not supported */ | ||
318 | } | ||
319 | else | ||
320 | { | ||
321 | if (0 != proc (proc_cls, | ||
322 | "id3v2", | ||
323 | type, | ||
324 | EXTRACTOR_METAFORMAT_BINARY, | ||
325 | mime, | ||
326 | (const char*) &data[pos + off], | ||
327 | csize + 6 - off)) | ||
328 | return 1; | ||
329 | } | ||
330 | word = NULL; | ||
331 | break; | ||
332 | default: | ||
333 | return 0; | ||
334 | } | ||
335 | if ((word != NULL) && (strlen (word) > 0)) | ||
336 | { | ||
337 | if (0 != proc (proc_cls, | ||
338 | "id3v2", | ||
339 | tmap[i].type, | ||
340 | EXTRACTOR_METAFORMAT_UTF8, | ||
341 | "text/plain", | ||
342 | word, | ||
343 | strlen(word)+1)) | ||
344 | { | ||
345 | free (word); | ||
346 | return 1; | ||
347 | } | ||
348 | } | ||
349 | if (word != NULL) | ||
350 | free (word); | ||
351 | break; | ||
352 | } | ||
353 | i++; | ||
354 | } | 759 | } |
355 | pos += 6 + csize; | 760 | } |
761 | if (word != NULL) | ||
762 | free (word); | ||
763 | offset = offset + state->csize; | ||
764 | state->state = ID3V2_READING_FRAME_HEADER; | ||
765 | break; | ||
356 | } | 766 | } |
357 | return 0; | 767 | } |
768 | return 1; | ||
358 | } | 769 | } |
359 | 770 | ||
360 | /* end of id3v2_extractor.c */ | 771 | /* end of id3v2_extractor.c */ |
diff --git a/src/plugins/mp3_extractor.c b/src/plugins/mp3_extractor.c index 3d8d48d..68b3a39 100644 --- a/src/plugins/mp3_extractor.c +++ b/src/plugins/mp3_extractor.c | |||
@@ -36,8 +36,41 @@ | |||
36 | #include <unistd.h> | 36 | #include <unistd.h> |
37 | #include <stdlib.h> | 37 | #include <stdlib.h> |
38 | 38 | ||
39 | #define MAX_MP3_SCAN_DEEP 16768 | 39 | #include "extractor_plugins.h" |
40 | const int max_frames_scan = 1024; | 40 | |
41 | #if WINDOWS | ||
42 | #include <sys/param.h> /* #define BYTE_ORDER */ | ||
43 | #endif | ||
44 | #ifndef __BYTE_ORDER | ||
45 | #ifdef _BYTE_ORDER | ||
46 | #define __BYTE_ORDER _BYTE_ORDER | ||
47 | #else | ||
48 | #ifdef BYTE_ORDER | ||
49 | #define __BYTE_ORDER BYTE_ORDER | ||
50 | #endif | ||
51 | #endif | ||
52 | #endif | ||
53 | #ifndef __BIG_ENDIAN | ||
54 | #ifdef _BIG_ENDIAN | ||
55 | #define __BIG_ENDIAN _BIG_ENDIAN | ||
56 | #else | ||
57 | #ifdef BIG_ENDIAN | ||
58 | #define __BIG_ENDIAN BIG_ENDIAN | ||
59 | #endif | ||
60 | #endif | ||
61 | #endif | ||
62 | #ifndef __LITTLE_ENDIAN | ||
63 | #ifdef _LITTLE_ENDIAN | ||
64 | #define __LITTLE_ENDIAN _LITTLE_ENDIAN | ||
65 | #else | ||
66 | #ifdef LITTLE_ENDIAN | ||
67 | #define __LITTLE_ENDIAN LITTLE_ENDIAN | ||
68 | #endif | ||
69 | #endif | ||
70 | #endif | ||
71 | |||
72 | #define LARGEST_FRAME_SIZE 8065 | ||
73 | |||
41 | enum | 74 | enum |
42 | { MPEG_ERR = 0, MPEG_V1 = 1, MPEG_V2 = 2, MPEG_V25 = 3 }; | 75 | { MPEG_ERR = 0, MPEG_V1 = 1, MPEG_V2 = 2, MPEG_V25 = 3 }; |
43 | 76 | ||
@@ -45,6 +78,11 @@ enum | |||
45 | { LAYER_ERR = 0, LAYER_1 = 1, LAYER_2 = 2, LAYER_3 = 3 }; | 78 | { LAYER_ERR = 0, LAYER_1 = 1, LAYER_2 = 2, LAYER_3 = 3 }; |
46 | 79 | ||
47 | #define MPA_SYNC_MASK ((unsigned int) 0xFFE00000) | 80 | #define MPA_SYNC_MASK ((unsigned int) 0xFFE00000) |
81 | #if __BYTE_ORDER == __BIG_ENDIAN | ||
82 | #define MPA_SYNC_MASK_MEM ((unsigned int) 0xFFE00000) | ||
83 | #else | ||
84 | #define MPA_SYNC_MASK_MEM ((unsigned int) 0x0000E0FF) | ||
85 | #endif | ||
48 | #define MPA_LAST_SYNC_BIT_MASK ((unsigned int) 0x00100000) | 86 | #define MPA_LAST_SYNC_BIT_MASK ((unsigned int) 0x00100000) |
49 | #define MPA_VERSION_MASK ((unsigned int) 0x00080000) | 87 | #define MPA_VERSION_MASK ((unsigned int) 0x00080000) |
50 | #define MPA_LAYER_MASK ((unsigned int) 0x3) | 88 | #define MPA_LAYER_MASK ((unsigned int) 0x3) |
@@ -106,169 +144,274 @@ static const char * const layer_names[3] = { | |||
106 | 144 | ||
107 | #define ADDR(s,t) do { if (0 != proc (proc_cls, "mp3", t, EXTRACTOR_METAFORMAT_UTF8, "text/plain", s, strlen(s)+1)) return 1; } while (0) | 145 | #define ADDR(s,t) do { if (0 != proc (proc_cls, "mp3", t, EXTRACTOR_METAFORMAT_UTF8, "text/plain", s, strlen(s)+1)) return 1; } while (0) |
108 | 146 | ||
109 | /* mimetype = audio/mpeg */ | 147 | struct mp3_state |
110 | int | 148 | { |
111 | EXTRACTOR_mp3_extract (const unsigned char *data, | 149 | int state; |
112 | size_t size, | 150 | |
113 | EXTRACTOR_MetaDataProcessor proc, | 151 | uint32_t header; |
114 | void *proc_cls, | 152 | int sample_rate; |
115 | const char *options) | 153 | char mpeg_ver; |
154 | char layer; | ||
155 | char vbr_flag; | ||
156 | int ch; | ||
157 | char copyright_flag; | ||
158 | char original_flag; | ||
159 | int avg_bps; | ||
160 | int bitrate; | ||
161 | |||
162 | int64_t number_of_frames; | ||
163 | int64_t number_of_valid_frames; | ||
164 | }; | ||
165 | |||
166 | enum MP3State | ||
167 | { | ||
168 | MP3_LOOKING_FOR_FRAME = 0, | ||
169 | MP3_READING_FRAME = 1, | ||
170 | }; | ||
171 | |||
172 | void | ||
173 | EXTRACTOR_mp3_init_state_method (struct EXTRACTOR_PluginList *plugin) | ||
174 | { | ||
175 | struct mp3_state *state; | ||
176 | state = plugin->state = malloc (sizeof (struct mp3_state)); | ||
177 | if (state == NULL) | ||
178 | return; | ||
179 | state->header = 0; | ||
180 | state->sample_rate = 0; | ||
181 | state->number_of_frames = 0; | ||
182 | state->number_of_valid_frames = 0; | ||
183 | state->mpeg_ver = 0; | ||
184 | state->layer = 0; | ||
185 | state->vbr_flag = 0; | ||
186 | state->ch = 0; | ||
187 | state->copyright_flag = 0; | ||
188 | state->original_flag = 0; | ||
189 | state->avg_bps = 0; | ||
190 | state->bitrate = 0; | ||
191 | state->state = 0; | ||
192 | } | ||
193 | |||
194 | void | ||
195 | EXTRACTOR_mp3_discard_state_method (struct EXTRACTOR_PluginList *plugin) | ||
196 | { | ||
197 | if (plugin->state != NULL) | ||
198 | { | ||
199 | free (plugin->state); | ||
200 | } | ||
201 | plugin->state = NULL; | ||
202 | } | ||
203 | |||
204 | static int | ||
205 | calculate_frame_statistics_and_maybe_report_it (struct EXTRACTOR_PluginList *plugin, | ||
206 | struct mp3_state *state, EXTRACTOR_MetaDataProcessor proc, void *proc_cls) | ||
207 | { | ||
208 | int length; | ||
209 | char format[512]; | ||
210 | |||
211 | if (((double) state->number_of_valid_frames / (double) state->number_of_frames) < 0.5 || | ||
212 | state->number_of_valid_frames < 2) | ||
213 | /* Unlikely to be an mp3 file */ | ||
214 | return 0; | ||
215 | ADDR ("audio/mpeg", EXTRACTOR_METATYPE_MIMETYPE); | ||
216 | state->avg_bps = state->avg_bps / state->number_of_valid_frames; | ||
217 | if (state->sample_rate > 0) | ||
218 | length = 1152 * state->number_of_valid_frames / state->sample_rate; | ||
219 | else if (state->avg_bps > 0 || state->bitrate > 0) | ||
220 | length = plugin->fsize / (state->avg_bps ? state->avg_bps : state->bitrate ? state->bitrate : 1) / 125; | ||
221 | else | ||
222 | length = 0; | ||
223 | |||
224 | ADDR (mpeg_versions[state->mpeg_ver - 1], EXTRACTOR_METATYPE_FORMAT_VERSION); | ||
225 | snprintf (format, | ||
226 | sizeof (format), | ||
227 | "%s %s audio, %d kbps (%s), %d Hz, %s, %s, %s", | ||
228 | mpeg_versions[state->mpeg_ver - 1], | ||
229 | layer_names[state->layer - 1], | ||
230 | state->avg_bps, | ||
231 | state->vbr_flag ? _("VBR") : _("CBR"), | ||
232 | state->sample_rate, | ||
233 | channel_modes[state->ch], | ||
234 | state->copyright_flag ? _("copyright") : _("no copyright"), | ||
235 | state->original_flag ? _("original") : _("copy") ); | ||
236 | |||
237 | ADDR (format, EXTRACTOR_METATYPE_RESOURCE_TYPE); | ||
238 | snprintf (format, | ||
239 | sizeof (format), "%dm%02d", | ||
240 | length / 60, length % 60); | ||
241 | ADDR (format, EXTRACTOR_METATYPE_DURATION); | ||
242 | return 0; | ||
243 | } | ||
244 | |||
245 | int | ||
246 | EXTRACTOR_mp3_extract_method (struct EXTRACTOR_PluginList *plugin, | ||
247 | EXTRACTOR_MetaDataProcessor proc, | ||
248 | void *proc_cls) | ||
116 | { | 249 | { |
117 | unsigned int header; | 250 | int64_t file_position; |
118 | int counter = 0; | 251 | int64_t file_size; |
252 | size_t offset = 0; | ||
253 | size_t size; | ||
254 | unsigned char *data; | ||
255 | struct mp3_state *state; | ||
256 | |||
257 | size_t frames_found_in_this_round = 0; | ||
258 | int start_anew = 0; | ||
259 | |||
119 | char mpeg_ver = 0; | 260 | char mpeg_ver = 0; |
120 | char layer = 0; | 261 | char layer = 0; |
121 | int idx_num = 0; | 262 | int idx_num = 0; |
122 | int bitrate = 0; /*used for each frame */ | 263 | int bitrate = 0; /*used for each frame */ |
123 | int avg_bps = 0; /*average bitrate */ | ||
124 | int vbr_flag = 0; | ||
125 | int copyright_flag = 0; | 264 | int copyright_flag = 0; |
126 | int original_flag = 0; | 265 | int original_flag = 0; |
127 | int length = 0; | ||
128 | int sample_rate = 0; | 266 | int sample_rate = 0; |
129 | int ch = 0; | 267 | int ch = 0; |
130 | int frame_size; | 268 | int frame_size; |
131 | int frames = 0; | ||
132 | size_t pos = 0; | ||
133 | char format[512]; | ||
134 | 269 | ||
135 | do | 270 | if (plugin == NULL || plugin->state == NULL) |
136 | { | 271 | return 1; |
137 | /* seek for frame start */ | ||
138 | if (pos + sizeof (header) > size) | ||
139 | { | ||
140 | return 0; | ||
141 | } /*unable to find header */ | ||
142 | header = (data[pos] << 24) | (data[pos+1] << 16) | | ||
143 | (data[pos+2] << 8) | data[pos+3]; | ||
144 | if ((header & MPA_SYNC_MASK) == MPA_SYNC_MASK) | ||
145 | break; /*found header sync */ | ||
146 | pos++; | ||
147 | counter++; /*next try */ | ||
148 | } | ||
149 | while (counter < MAX_MP3_SCAN_DEEP); | ||
150 | if (counter >= MAX_MP3_SCAN_DEEP) | ||
151 | return 0; | ||
152 | 272 | ||
153 | do | 273 | state = plugin->state; |
154 | { /*ok, now we found a mp3 frame header */ | 274 | file_position = plugin->position; |
155 | frames++; | 275 | file_size = plugin->fsize; |
156 | switch (header & (MPA_LAST_SYNC_BIT_MASK | MPA_VERSION_MASK)) | 276 | size = plugin->map_size; |
157 | { | 277 | data = plugin->shm_ptr; |
158 | case (MPA_LAST_SYNC_BIT_MASK | MPA_VERSION_MASK): | 278 | |
159 | mpeg_ver = MPEG_V1; | 279 | if (plugin->seek_request < 0) |
160 | break; | 280 | return 1; |
161 | case (MPA_LAST_SYNC_BIT_MASK): | 281 | if (file_position - plugin->seek_request > 0) |
162 | mpeg_ver = MPEG_V2; | 282 | { |
163 | break; | 283 | plugin->seek_request = -1; |
164 | case 0: | 284 | return 1; |
165 | mpeg_ver = MPEG_V25; | 285 | } |
166 | break; | 286 | if (plugin->seek_request - file_position < size) |
167 | case (MPA_VERSION_MASK): | 287 | offset = plugin->seek_request - file_position; |
168 | default: | 288 | |
169 | return 0; | 289 | while (1) |
170 | } | 290 | { |
171 | switch (header & (MPA_LAYER_MASK << MPA_LAYER_SHIFT)) | 291 | switch (state->state) |
292 | { | ||
293 | case MP3_LOOKING_FOR_FRAME: | ||
294 | /* Look for a frame header */ | ||
295 | while (offset + sizeof (state->header) < size && (((*((uint32_t *) &data[offset])) & MPA_SYNC_MASK_MEM) != MPA_SYNC_MASK_MEM)) | ||
296 | offset += 1; | ||
297 | if (offset + sizeof (state->header) >= size) | ||
298 | { | ||
299 | /* Alternative: (frames_found_in_this_round < (size / LARGEST_FRAME_SIZE / 2)) is to generous */ | ||
300 | if ((file_position == 0 && ((double) state->number_of_valid_frames / (double) state->number_of_frames) < 0.5) || | ||
301 | file_position + offset + sizeof (state->header) >= file_size) | ||
172 | { | 302 | { |
173 | case (0x1 << MPA_LAYER_SHIFT): | 303 | calculate_frame_statistics_and_maybe_report_it (plugin, state, proc, proc_cls); |
174 | layer = LAYER_3; | 304 | return 1; |
175 | break; | ||
176 | case (0x2 << MPA_LAYER_SHIFT): | ||
177 | layer = LAYER_2; | ||
178 | break; | ||
179 | case (0x3 << MPA_LAYER_SHIFT): | ||
180 | layer = LAYER_1; | ||
181 | break; | ||
182 | case 0x0: | ||
183 | default: | ||
184 | return 0; | ||
185 | } | 305 | } |
306 | plugin->seek_request = file_position + offset; | ||
307 | return 0; | ||
308 | } | ||
309 | state->header = (data[offset] << 24) | (data[offset + 1] << 16) | | ||
310 | (data[offset + 2] << 8) | data[offset + 3]; | ||
311 | if ((state->header & MPA_SYNC_MASK) == MPA_SYNC_MASK) | ||
312 | { | ||
313 | state->state = MP3_READING_FRAME; | ||
314 | break; | ||
315 | } | ||
316 | break; | ||
317 | case MP3_READING_FRAME: | ||
318 | state->number_of_frames += 1; | ||
319 | start_anew = 0; | ||
320 | switch (state->header & (MPA_LAST_SYNC_BIT_MASK | MPA_VERSION_MASK)) | ||
321 | { | ||
322 | case (MPA_LAST_SYNC_BIT_MASK | MPA_VERSION_MASK): | ||
323 | mpeg_ver = MPEG_V1; | ||
324 | break; | ||
325 | case (MPA_LAST_SYNC_BIT_MASK): | ||
326 | mpeg_ver = MPEG_V2; | ||
327 | break; | ||
328 | case 0: | ||
329 | mpeg_ver = MPEG_V25; | ||
330 | break; | ||
331 | case (MPA_VERSION_MASK): | ||
332 | default: | ||
333 | state->state = MP3_LOOKING_FOR_FRAME; | ||
334 | offset += 1; | ||
335 | start_anew = 1; | ||
336 | } | ||
337 | if (start_anew) | ||
338 | break; | ||
339 | switch (state->header & (MPA_LAYER_MASK << MPA_LAYER_SHIFT)) | ||
340 | { | ||
341 | case (0x1 << MPA_LAYER_SHIFT): | ||
342 | layer = LAYER_3; | ||
343 | break; | ||
344 | case (0x2 << MPA_LAYER_SHIFT): | ||
345 | layer = LAYER_2; | ||
346 | break; | ||
347 | case (0x3 << MPA_LAYER_SHIFT): | ||
348 | layer = LAYER_1; | ||
349 | break; | ||
350 | case 0x0: | ||
351 | default: | ||
352 | state->state = MP3_LOOKING_FOR_FRAME; | ||
353 | offset += 1; | ||
354 | start_anew = 1; | ||
355 | } | ||
356 | if (start_anew) | ||
357 | break; | ||
186 | if (mpeg_ver < MPEG_V25) | 358 | if (mpeg_ver < MPEG_V25) |
187 | idx_num = (mpeg_ver - 1) * 3 + layer - 1; | 359 | idx_num = (mpeg_ver - 1) * 3 + layer - 1; |
188 | else | 360 | else |
189 | idx_num = 2 + layer; | 361 | idx_num = 2 + layer; |
190 | bitrate = 1000 * bitrate_table[(header >> MPA_BITRATE_SHIFT) & | 362 | bitrate = 1000 * bitrate_table[(state->header >> MPA_BITRATE_SHIFT) & |
191 | MPA_BITRATE_MASK][idx_num]; | 363 | MPA_BITRATE_MASK][idx_num]; |
192 | if (bitrate < 0) | 364 | if (bitrate < 0) |
193 | { | 365 | { |
194 | frames--; | 366 | /*error in header */ |
195 | break; | 367 | state->state = MP3_LOOKING_FOR_FRAME; |
196 | } /*error in header */ | 368 | offset += 1; |
197 | sample_rate = freq_table[(header >> MPA_FREQ_SHIFT) & | 369 | break; |
370 | } | ||
371 | sample_rate = freq_table[(state->header >> MPA_FREQ_SHIFT) & | ||
198 | MPA_FREQ_MASK][mpeg_ver - 1]; | 372 | MPA_FREQ_MASK][mpeg_ver - 1]; |
199 | if (sample_rate < 0) | 373 | if (sample_rate <= 0) |
200 | { | 374 | { |
201 | frames--; | 375 | /*error in header */ |
202 | break; | 376 | state->state = MP3_LOOKING_FOR_FRAME; |
203 | } /*error in header */ | 377 | offset += 1; |
204 | ch = ((header >> MPA_CHMODE_SHIFT) & MPA_CHMODE_MASK); | 378 | break; |
205 | copyright_flag = (header >> MPA_COPYRIGHT_SHIFT) & 0x1; | 379 | } |
206 | original_flag = (header >> MPA_ORIGINAL_SHIFT) & 0x1; | 380 | ch = ((state->header >> MPA_CHMODE_SHIFT) & MPA_CHMODE_MASK); |
207 | frame_size = | 381 | copyright_flag = (state->header >> MPA_COPYRIGHT_SHIFT) & 0x1; |
208 | 144 * bitrate / (sample_rate ? sample_rate : 1) + | 382 | original_flag = (state->header >> MPA_ORIGINAL_SHIFT) & 0x1; |
209 | ((header >> MPA_PADDING_SHIFT) & 0x1); | 383 | if (layer == LAYER_1) |
384 | frame_size = (12 * bitrate / sample_rate + ((state->header >> MPA_PADDING_SHIFT) & 0x1)) * 4; | ||
385 | else | ||
386 | frame_size = 144 * bitrate / sample_rate + ((state->header >> MPA_PADDING_SHIFT) & 0x1); | ||
210 | if (frame_size <= 0) | 387 | if (frame_size <= 0) |
211 | { | 388 | { |
212 | /* Technically, bitrate can be 0. However, but this particular | 389 | /*error in header */ |
213 | * extractor is incapable of correctly processing 0-bitrate files | 390 | state->state = MP3_LOOKING_FOR_FRAME; |
214 | * anyway. And bitrate == 0 might also mean that this is just a | 391 | offset += 1; |
215 | * random binary sequence, which is far more likely to be true. | 392 | break; |
216 | * | 393 | } |
217 | * amatus suggests to use a different algorithm and parse significant | ||
218 | * part of the file, then count the number of correct mpeg frames. | ||
219 | * If the the percentage of correct frames is below a threshold, | ||
220 | * then this is not an mpeg file at all. | ||
221 | */ | ||
222 | frames -= 1; | ||
223 | break; | ||
224 | } | ||
225 | avg_bps += bitrate / 1000; | ||
226 | |||
227 | pos += frame_size - 4; | ||
228 | if (frames > max_frames_scan) | ||
229 | break; /*optimization */ | ||
230 | if (avg_bps / frames != bitrate / 1000) | ||
231 | vbr_flag = 1; | ||
232 | if (pos + sizeof (header) > size) | ||
233 | break; /* EOF */ | ||
234 | header = (data[pos] << 24) | (data[pos+1] << 16) | | ||
235 | (data[pos+2] << 8) | data[pos+3]; | ||
236 | } | ||
237 | while ((header & MPA_SYNC_MASK) == MPA_SYNC_MASK); | ||
238 | |||
239 | if (frames < 2) | ||
240 | return 0; /*no valid frames */ | ||
241 | ADDR ("audio/mpeg", EXTRACTOR_METATYPE_MIMETYPE); | ||
242 | avg_bps = avg_bps / frames; | ||
243 | if (max_frames_scan) | ||
244 | { /*if not all frames scaned */ | ||
245 | length = | ||
246 | size / (avg_bps ? avg_bps : bitrate ? bitrate : 0xFFFFFFFF) / 125; | ||
247 | } | ||
248 | else | ||
249 | { | ||
250 | length = 1152 * frames / (sample_rate ? sample_rate : 0xFFFFFFFF); | ||
251 | } | ||
252 | 394 | ||
253 | ADDR (mpeg_versions[mpeg_ver-1], EXTRACTOR_METATYPE_FORMAT_VERSION); | 395 | /* Only save data from valid frames in the state */ |
254 | snprintf (format, | 396 | state->avg_bps += bitrate / 1000; |
255 | sizeof(format), | 397 | state->sample_rate = sample_rate; |
256 | "%s %s audio, %d kbps (%s), %d Hz, %s, %s, %s", | 398 | state->mpeg_ver = mpeg_ver; |
257 | mpeg_versions[mpeg_ver-1], | 399 | state->layer = layer; |
258 | layer_names[layer-1], | 400 | state->ch = ch; |
259 | avg_bps, | 401 | state->copyright_flag = copyright_flag; |
260 | vbr_flag ? _("VBR") : _("CBR"), | 402 | state->original_flag = original_flag; |
261 | sample_rate, | 403 | state->bitrate = bitrate; |
262 | channel_modes[ch], | ||
263 | copyright_flag ? _("copyright") : _("no copyright"), | ||
264 | original_flag ? _("original") : _("copy") ); | ||
265 | 404 | ||
266 | ADDR (format, EXTRACTOR_METATYPE_RESOURCE_TYPE); | 405 | frames_found_in_this_round += 1; |
267 | snprintf (format, | 406 | state->number_of_valid_frames += 1; |
268 | sizeof (format), "%dm%02d", | 407 | if (state->avg_bps / state->number_of_valid_frames != bitrate / 1000) |
269 | length / 60, length % 60); | 408 | state->vbr_flag = 1; |
270 | ADDR (format, EXTRACTOR_METATYPE_DURATION); | 409 | offset += frame_size; |
271 | return 0; | 410 | state->state = MP3_LOOKING_FOR_FRAME; |
411 | break; | ||
412 | } | ||
413 | } | ||
414 | return 1; | ||
272 | } | 415 | } |
273 | 416 | ||
274 | /* end of mp3_extractor.c */ | 417 | /* end of mp3_extractor.c */ |
diff --git a/src/plugins/template_extractor.c b/src/plugins/template_extractor.c index 63f0393..b6f3371 100644 --- a/src/plugins/template_extractor.c +++ b/src/plugins/template_extractor.c | |||
@@ -21,21 +21,113 @@ | |||
21 | #include "platform.h" | 21 | #include "platform.h" |
22 | #include "extractor.h" | 22 | #include "extractor.h" |
23 | 23 | ||
24 | int | 24 | #include "extractor_plugins.h" |
25 | EXTRACTOR_template_extract (const unsigned char *data, | 25 | |
26 | size_t size, | 26 | struct template_state |
27 | EXTRACTOR_MetaDataProcessor proc, | 27 | { |
28 | void *proc_cls, | 28 | int state; |
29 | const char *options) | 29 | |
30 | /* more state fields here | ||
31 | * all variables that should survive more than one atomic read | ||
32 | * from the "file" are to be placed here. | ||
33 | */ | ||
34 | }; | ||
35 | |||
36 | enum TemplateState | ||
37 | { | ||
38 | TEMPLATE_INVALID = -1, | ||
39 | TEMPLATE_LOOKING_FOR_FOO = 0, | ||
40 | TEMPLATE_READING_FOO, | ||
41 | TEMPLATE_READING_BAR, | ||
42 | TEMPLATE_SEEKING_TO_ZOOL | ||
43 | }; | ||
44 | |||
45 | void | ||
46 | EXTRACTOR_template_init_state_method (struct EXTRACTOR_PluginList *plugin) | ||
30 | { | 47 | { |
31 | if (0 != proc (proc_cls, | 48 | struct template_state *state; |
32 | "template", | 49 | state = plugin->state = malloc (sizeof (struct template_state)); |
33 | EXTRACTOR_METATYPE_RESERVED, | 50 | if (state == NULL) |
34 | EXTRACTOR_METAFORMAT_UTF8, | 51 | return; |
35 | "text/plain", | 52 | state->state = TEMPLATE_LOOKING_FOR_FOO; /* or whatever is the initial one */ |
36 | "foo", | 53 | /* initialize other fields to their "uninitialized" values or defaults */ |
37 | strlen ("foo")+1)) | 54 | } |
55 | |||
56 | void | ||
57 | EXTRACTOR_template_discard_state_method (struct EXTRACTOR_PluginList *plugin) | ||
58 | { | ||
59 | if (plugin->state != NULL) | ||
60 | { | ||
61 | /* free other state fields that are heap-allocated */ | ||
62 | free (plugin->state); | ||
63 | } | ||
64 | plugin->state = NULL; | ||
65 | } | ||
66 | |||
67 | int | ||
68 | EXTRACTOR_template_extract_method (struct EXTRACTOR_PluginList *plugin, | ||
69 | EXTRACTOR_MetaDataProcessor proc, void *proc_cls) | ||
70 | { | ||
71 | int64_t file_position; | ||
72 | int64_t file_size; | ||
73 | size_t offset = 0; | ||
74 | size_t size; | ||
75 | unsigned char *data; | ||
76 | unsigned char *ff; | ||
77 | struct mp3_state *state; | ||
78 | |||
79 | /* temporary variables are declared here */ | ||
80 | |||
81 | if (plugin == NULL || plugin->state == NULL) | ||
38 | return 1; | 82 | return 1; |
39 | /* insert more here */ | 83 | |
40 | return 0; | 84 | /* for easier access (and conforms better with the old plugins var names) */ |
85 | state = plugin->state; | ||
86 | file_position = plugin->position; | ||
87 | file_size = plugin->fsize; | ||
88 | size = plugin->map_size; | ||
89 | data = plugin->shm_ptr; | ||
90 | |||
91 | /* sanity checks */ | ||
92 | if (plugin->seek_request < 0) | ||
93 | return 1; | ||
94 | if (file_position - plugin->seek_request > 0) | ||
95 | { | ||
96 | plugin->seek_request = -1; | ||
97 | return 1; | ||
98 | } | ||
99 | if (plugin->seek_request - file_position < size) | ||
100 | offset = plugin->seek_request - file_position; | ||
101 | |||
102 | while (1) | ||
103 | { | ||
104 | switch (state->state) | ||
105 | { | ||
106 | case TEMPLATE_INVALID: | ||
107 | plugin->seek_request = -1; | ||
108 | return 1; | ||
109 | case TEMPLATE_LOOKING_FOR_FOO: | ||
110 | /* Find FOO in data buffer. | ||
111 | * If found, set offset to its position and set state to TEMPLATE_READING_FOO | ||
112 | * If not found, set seek_request to file_position + offset and return 1 | ||
113 | * (but it's better to give up as early as possible, to avoid reading the whole | ||
114 | * file byte-by-byte). | ||
115 | */ | ||
116 | break; | ||
117 | case TEMPLATE_READING_FOO: | ||
118 | /* See if offset + sizeof(foo) < size, otherwise set seek_request to offset and return 1; | ||
119 | * If file_position is 0, and size is still to small, give up. | ||
120 | * Read FOO, maybe increase offset to reflect that (depends on the parser logic). | ||
121 | * Either process FOO right here, or jump to another state (see ebml plugin for an example of complex | ||
122 | * state-jumps). | ||
123 | * If FOO says you need to seek somewhere - set offset to seek_target - file_position and set the | ||
124 | * next state (next state will check that offset < size; all states that do reading should do that, | ||
125 | * and also check for EOF). | ||
126 | */ | ||
127 | /* ... */ | ||
128 | break; | ||
129 | } | ||
130 | } | ||
131 | /* Should not reach this */ | ||
132 | return 1; | ||
41 | } | 133 | } |