aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/include/extractor.h13
-rw-r--r--src/main/extractor.c3071
-rw-r--r--src/main/extractor_plugins.c24
-rw-r--r--src/main/extractor_plugins.h62
-rw-r--r--src/plugins/Makefile.am385
-rw-r--r--src/plugins/id3_extractor.c149
-rw-r--r--src/plugins/id3v23_extractor.c420
-rw-r--r--src/plugins/id3v24_extractor.c455
-rw-r--r--src/plugins/id3v2_extractor.c957
-rw-r--r--src/plugins/mp3_extractor.c425
-rw-r--r--src/plugins/template_extractor.c122
11 files changed, 3156 insertions, 2927 deletions
diff --git a/src/include/extractor.h b/src/include/extractor.h
index 522463b..c2fec5b 100644
--- a/src/include/extractor.h
+++ b/src/include/extractor.h
@@ -392,12 +392,6 @@ typedef int (*EXTRACTOR_MetaDataProcessor)(void *cls,
392 * @param options options for this plugin; can be NULL 392 * @param options options for this plugin; can be NULL
393 * @return 0 if all calls to proc returned 0, otherwise 1 393 * @return 0 if all calls to proc returned 0, otherwise 1
394 */ 394 */
395typedef int (*EXTRACTOR_ExtractMethod)(const char *data,
396 size_t datasize,
397 EXTRACTOR_MetaDataProcessor proc,
398 void *proc_cls,
399 const char *options);
400
401 395
402/** 396/**
403 * Linked list of extractor plugins. An application builds this list 397 * Linked list of extractor plugins. An application builds this list
@@ -407,6 +401,13 @@ typedef int (*EXTRACTOR_ExtractMethod)(const char *data,
407 */ 401 */
408struct EXTRACTOR_PluginList; 402struct EXTRACTOR_PluginList;
409 403
404typedef int (*EXTRACTOR_extract_method) (struct EXTRACTOR_PluginList *plugin,
405 EXTRACTOR_MetaDataProcessor proc, void *proc_cls);
406
407typedef void (*EXTRACTOR_discard_state_method) (struct EXTRACTOR_PluginList *plugin);
408typedef void (*EXTRACTOR_init_state_method) (struct EXTRACTOR_PluginList *plugin);
409
410
410 411
411/** 412/**
412 * Load the default set of plugins. The default can be changed 413 * Load the default set of plugins. The default can be changed
diff --git a/src/main/extractor.c b/src/main/extractor.c
index 17ba1d2..a4ccfa1 100644
--- a/src/main/extractor.c
+++ b/src/main/extractor.c
@@ -23,7 +23,7 @@
23#include "extractor.h" 23#include "extractor.h"
24#include <dirent.h> 24#include <dirent.h>
25#include <sys/types.h> 25#include <sys/types.h>
26#ifndef WINDOWS 26#if !WINDOWS
27#include <sys/wait.h> 27#include <sys/wait.h>
28#include <sys/shm.h> 28#include <sys/shm.h>
29#endif 29#endif
@@ -59,117 +59,53 @@
59 */ 59 */
60#define MAX_MIME_LEN 256 60#define MAX_MIME_LEN 256
61 61
62#define MAX_SHM_NAME 255
63
62/** 64/**
63 * Set to 1 to get failure info, 65 * Set to 1 to get failure info,
64 * 2 for actual debug info. 66 * 2 for actual debug info.
65 */ 67 */
66#define DEBUG 1 68#define DEBUG 1
67 69
70#define MESSAGE_INIT_STATE 0x01
71#define MESSAGE_UPDATED_SHM 0x02
72#define MESSAGE_DONE 0x03
73#define MESSAGE_SEEK 0x04
74#define MESSAGE_META 0x05
75#define MESSAGE_DISCARD_STATE 0x06
68 76
69/** 77/**
70 * Stop the child process of this plugin. 78 * Header used for our IPC replies. A header
79 * with all fields being zero is used to indicate
80 * the end of the stream.
71 */ 81 */
72static void 82struct IpcHeader
73stop_process (struct EXTRACTOR_PluginList *plugin)
74{ 83{
75 int status; 84 enum EXTRACTOR_MetaType meta_type;
76#ifdef WINDOWS 85 enum EXTRACTOR_MetaFormat meta_format;
77 HANDLE process; 86 size_t data_len;
78#endif 87 size_t mime_len;
79 88};
80#if DEBUG
81#ifndef WINDOWS
82 if (plugin->cpid == -1)
83#else
84 if (plugin->hProcess == INVALID_HANDLE_VALUE)
85#endif
86 fprintf (stderr,
87 "Plugin `%s' choked on this input\n",
88 plugin->short_libname);
89#endif
90#ifndef WINDOWS
91 if ( (plugin->cpid == -1) ||
92 (plugin->cpid == 0) )
93 return;
94 kill (plugin->cpid, SIGKILL);
95 waitpid (plugin->cpid, &status, 0);
96 plugin->cpid = -1;
97 close (plugin->cpipe_out);
98 fclose (plugin->cpipe_in);
99#else
100 if (plugin->hProcess == INVALID_HANDLE_VALUE ||
101 plugin->hProcess == NULL)
102 return;
103 TerminateProcess (plugin->hProcess, 0);
104 CloseHandle (plugin->hProcess);
105 plugin->hProcess = INVALID_HANDLE_VALUE;
106 close (plugin->cpipe_out);
107 fclose (plugin->cpipe_in);
108#endif
109 plugin->cpipe_out = -1;
110 plugin->cpipe_in = NULL;
111}
112
113 89
114/** 90#if !WINDOWS
115 * Remove a plugin from a list. 91int
116 * 92plugin_open_shm (struct EXTRACTOR_PluginList *plugin, char *shm_name)
117 * @param prev the current list of plugins
118 * @param library the name of the plugin to remove
119 * @return the reduced list, unchanged if the plugin was not loaded
120 */
121struct EXTRACTOR_PluginList *
122EXTRACTOR_plugin_remove(struct EXTRACTOR_PluginList * prev,
123 const char * library)
124{ 93{
125 struct EXTRACTOR_PluginList *pos; 94 if (plugin->shm_id != -1)
126 struct EXTRACTOR_PluginList *first; 95 close (plugin->shm_id);
127 96 plugin->shm_id = shm_open (shm_name, O_RDONLY, 0);
128 pos = prev; 97 return plugin->shm_id;
129 first = prev;
130 while ((pos != NULL) && (0 != strcmp (pos->short_libname, library)))
131 {
132 prev = pos;
133 pos = pos->next;
134 }
135 if (pos != NULL)
136 {
137 /* found, close library */
138 if (first == pos)
139 first = pos->next;
140 else
141 prev->next = pos->next;
142 /* found */
143 stop_process (pos);
144 free (pos->short_libname);
145 free (pos->libname);
146 free (pos->plugin_options);
147 if (NULL != pos->libraryHandle)
148 lt_dlclose (pos->libraryHandle);
149 free (pos);
150 }
151#if DEBUG
152 else
153 fprintf(stderr,
154 "Unloading plugin `%s' failed!\n",
155 library);
156#endif
157 return first;
158} 98}
159 99#else
160 100HANDLE
161/** 101plugin_open_shm (struct EXTRACTOR_PluginList *plugin, char *shm_name)
162 * Remove all plugins from the given list (destroys the list).
163 *
164 * @param plugin the list of plugins
165 */
166void
167EXTRACTOR_plugin_remove_all(struct EXTRACTOR_PluginList *plugins)
168{ 102{
169 while (plugins != NULL) 103 if (plugin->map_handle != 0)
170 plugins = EXTRACTOR_plugin_remove (plugins, plugins->short_libname); 104 CloseHandle (plugin->map_handle);
105 plugin->map_handle = OpenFileMapping (FILE_MAP_READ, FALSE, shm_name);
106 return plugin->map_handle;
171} 107}
172 108#endif
173 109
174static int 110static int
175write_all (int fd, 111write_all (int fd,
@@ -187,44 +123,9 @@ write_all (int fd,
187 return -1; 123 return -1;
188 off += ret; 124 off += ret;
189 } 125 }
190 return 0; 126 return size;
191}
192
193
194static int
195read_all (int fd,
196 void *buf,
197 size_t size)
198{
199 char *data = buf;
200 size_t off = 0;
201 ssize_t ret;
202
203 while (off < size)
204 {
205 ret = read (fd, &data[off], size - off);
206 if (ret <= 0)
207 return -1;
208 off += ret;
209 }
210 return 0;
211} 127}
212 128
213
214/**
215 * Header used for our IPC replies. A header
216 * with all fields being zero is used to indicate
217 * the end of the stream.
218 */
219struct IpcHeader
220{
221 enum EXTRACTOR_MetaType type;
222 enum EXTRACTOR_MetaFormat format;
223 size_t data_len;
224 size_t mime_len;
225};
226
227
228/** 129/**
229 * Function called by a plugin in a child process. Transmits 130 * Function called by a plugin in a child process. Transmits
230 * the meta data back to the parent process. 131 * the meta data back to the parent process.
@@ -254,6 +155,8 @@ transmit_reply (void *cls,
254 int *cpipe_out = cls; 155 int *cpipe_out = cls;
255 struct IpcHeader hdr; 156 struct IpcHeader hdr;
256 size_t mime_len; 157 size_t mime_len;
158 unsigned char meta_byte = MESSAGE_META;
159 unsigned char zero_byte = 0;
257 160
258 if (data_mime_type == NULL) 161 if (data_mime_type == NULL)
259 mime_len = 0; 162 mime_len = 0;
@@ -261,23 +164,19 @@ transmit_reply (void *cls,
261 mime_len = strlen (data_mime_type) + 1; 164 mime_len = strlen (data_mime_type) + 1;
262 if (mime_len > MAX_MIME_LEN) 165 if (mime_len > MAX_MIME_LEN)
263 mime_len = MAX_MIME_LEN; 166 mime_len = MAX_MIME_LEN;
264 hdr.type = type; 167 hdr.meta_type = type;
265 hdr.format = format; 168 hdr.meta_format = format;
266 hdr.data_len = data_len; 169 hdr.data_len = data_len;
267 hdr.mime_len = mime_len; 170 hdr.mime_len = mime_len;
268 if ( (hdr.type == 0) && 171 if ((1 != write_all (*cpipe_out, &meta_byte, 1)) ||
269 (hdr.format == 0) && 172 (sizeof(hdr) != write_all (*cpipe_out, &hdr, sizeof(hdr))) ||
270 (hdr.data_len == 0) && 173 (mime_len -1 != write_all (*cpipe_out, data_mime_type, mime_len - 1)) ||
271 (hdr.mime_len == 0) ) 174 (1 != write_all (*cpipe_out, &zero_byte, 1)) ||
272 return 0; /* better skip this one, would signal termination... */ 175 (data_len != write_all (*cpipe_out, data, data_len)))
273 if ( (0 != write_all (*cpipe_out, &hdr, sizeof(hdr))) || 176 return 1;
274 (0 != write_all (*cpipe_out, data_mime_type, mime_len)) ||
275 (0 != write_all (*cpipe_out, data, data_len)) )
276 return 1;
277 return 0; 177 return 0;
278} 178}
279 179
280
281/** 180/**
282 * 'main' function of the child process. Reads shm-filenames from 181 * 'main' function of the child process. Reads shm-filenames from
283 * 'in' (line-by-line) and writes meta data blocks to 'out'. The meta 182 * 'in' (line-by-line) and writes meta data blocks to 'out'. The meta
@@ -288,23 +187,20 @@ transmit_reply (void *cls,
288 * @param out stream to write to 187 * @param out stream to write to
289 */ 188 */
290static void 189static void
291process_requests (struct EXTRACTOR_PluginList *plugin, 190process_requests (struct EXTRACTOR_PluginList *plugin, int in, int out)
292 int in,
293 int out)
294{ 191{
295 char hfn[256]; 192 int read_result1, read_result2, read_result3;
296 char tfn[256]; 193 unsigned char code;
297 char sze[256]; 194 int64_t fsize = -1;
298 size_t hfn_len; 195 int64_t position = 0;
299 size_t tfn_len; 196 void *shm_ptr = NULL;
300 size_t sze_len; 197 size_t shm_size = 0;
301 char *fn; 198 char *shm_name = NULL;
302 FILE *fin; 199 size_t shm_name_len;
303 void *ptr; 200
304 int shmid; 201 int extract_reply;
202
305 struct IpcHeader hdr; 203 struct IpcHeader hdr;
306 size_t size;
307 int want_tail;
308 int do_break; 204 int do_break;
309#ifdef WINDOWS 205#ifdef WINDOWS
310 HANDLE map; 206 HANDLE map;
@@ -312,165 +208,554 @@ process_requests (struct EXTRACTOR_PluginList *plugin,
312#endif 208#endif
313 209
314 if (plugin == NULL) 210 if (plugin == NULL)
315 { 211 {
316 close (in); 212 close (in);
317 close (out); 213 close (out);
318 return; 214 return;
319 } 215 }
320 if (0 != plugin_load (plugin)) 216 if (0 != plugin_load (plugin))
321 { 217 {
322 close (in); 218 close (in);
323 close (out); 219 close (out);
324#if DEBUG 220#if DEBUG
325 fprintf (stderr, 221 fprintf (stderr, "Plugin `%s' failed to load!\n", plugin->short_libname);
326 "Plugin `%s' failed to load!\n",
327 plugin->short_libname);
328#endif 222#endif
329 return; 223 return;
330 } 224 }
331 want_tail = 0; 225 if ((plugin->specials != NULL) &&
332 if ( (plugin->specials != NULL) && 226 (NULL != strstr (plugin->specials, "close-stderr")))
333 (NULL != strstr (plugin->specials, 227 close (2);
334 "want-tail")) ) 228 if ((plugin->specials != NULL) &&
335 { 229 (NULL != strstr (plugin->specials, "close-stdout")))
336 want_tail = 1; 230 close (1);
337 }
338 if ( (plugin->specials != NULL) &&
339 (NULL != strstr (plugin->specials,
340 "close-stderr")) )
341 {
342 close (2);
343 }
344 if ( (plugin->specials != NULL) &&
345 (NULL != strstr (plugin->specials,
346 "close-stdout")) )
347 {
348 close (1);
349 }
350 231
351 memset (&hdr, 0, sizeof (hdr)); 232 memset (&hdr, 0, sizeof (hdr));
352 fin = fdopen (in, "r"); 233 do_break = 0;
353 if (fin == NULL) 234 while (!do_break)
354 { 235 {
355 close (in); 236 read_result1 = read (in, &code, 1);
356 close (out); 237 if (read_result1 <= 0)
357 return; 238 break;
358 } 239 switch (code)
359 while (NULL != fgets (hfn, sizeof(hfn), fin))
360 { 240 {
361 hfn_len = strlen (hfn); 241 case MESSAGE_INIT_STATE:
362 if (hfn_len <= 1) 242 read_result2 = read (in, &fsize, sizeof (int64_t));
363 break; 243 read_result3 = read (in, &shm_name_len, sizeof (size_t));
364 ptr = NULL; 244 if ((read_result2 < sizeof (int64_t)) || (read_result3 < sizeof (size_t)) ||
365 hfn[--hfn_len] = '\0'; /* kill newline */ 245 shm_name_len > MAX_SHM_NAME || fsize <= 0)
366 if (NULL == fgets (tfn, sizeof(tfn), fin)) 246 {
367 break; 247 do_break = 1;
368 if ('!' != tfn[0]) 248 break;
369 break; 249 }
370 tfn_len = strlen (tfn); 250 if (shm_name != NULL)
371 tfn[--tfn_len] = '\0'; /* kill newline */ 251 free (shm_name);
372 if ( (want_tail) && 252 shm_name = malloc (shm_name_len);
373 (tfn_len > 1) ) 253 if (shm_name == NULL)
374 { 254 {
375 fn = &tfn[1]; 255 do_break = 1;
376 }
377 else
378 {
379 fn = hfn;
380 }
381 if (NULL == fgets (sze, sizeof(sze), fin))
382 break;
383 if ('s' != sze[0])
384 break;
385 sze_len = strlen (sze);
386 sze[--sze_len] = '\0'; /* kill newline */
387 size = strtol (&sze[1], NULL, 10);
388 if (size == LONG_MIN || size == LONG_MAX || size == 0)
389 break; 256 break;
390 do_break = 0; 257 }
391#ifndef WINDOWS 258 read_result2 = read (in, shm_name, shm_name_len);
392 if ( (-1 != (shmid = shm_open (fn, O_RDONLY, 0))) && 259 if (read_result2 < shm_name_len)
393 (SIZE_MAX != (size = lseek (shmid, 0, SEEK_END))) && 260 {
394 (NULL != (ptr = mmap (NULL, size, PROT_READ, MAP_SHARED, shmid, 0))) && 261 do_break = 1;
395 (ptr != (void*) -1) ) 262 break;
263 }
264 shm_name[shm_name_len - 1] = '\0';
265#if !WINDOWS
266 if (shm_ptr != NULL)
267 munmap (shm_ptr, shm_size);
268 if (-1 == plugin_open_shm (plugin, shm_name))
269 {
270 do_break = 1;
271 break;
272 }
396#else 273#else
397 /* Despite the obvious, this must be READWRITE, not READONLY */ 274 if (shm_ptr != NULL)
398 map = OpenFileMapping (PAGE_READWRITE, FALSE, fn); 275 UnmapViewOfFile (shm_ptr);
399 ptr = MapViewOfFile (map, FILE_MAP_READ, 0, 0, 0); 276 if (INVALID_HANDLE_VALUE == plugin_open_shm (plugin, shm_name))
400 if (ptr != NULL)
401 { 277 {
402 if (0 == VirtualQuery (ptr, &mi, sizeof (mi)) || mi.RegionSize < size) 278 do_break = 1;
403 { 279 break;
404 UnmapViewOfFile (ptr); 280 }
405 ptr = NULL; 281#endif
406 } 282 plugin->fsize = fsize;
283 plugin->init_state_method (plugin);
284 break;
285 case MESSAGE_DISCARD_STATE:
286 plugin->discard_state_method (plugin);
287#if !WINDOWS
288 if (shm_ptr != NULL && shm_size > 0)
289 munmap (shm_ptr, shm_size);
290 if (plugin->shm_id != -1)
291 close (plugin->shm_id);
292 plugin->shm_id = -1;
293 shm_size = 0;
294#else
295 if (shm_ptr != NULL)
296 UnmapViewOfFile (shm_ptr);
297 if (plugin->map_handle != 0)
298 CloseHandle (plugin->map_handle);
299 plugin->map_handle = 0;
300#endif
301 shm_ptr = NULL;
302 break;
303 case MESSAGE_UPDATED_SHM:
304 read_result2 = read (in, &position, sizeof (int64_t));
305 read_result3 = read (in, &shm_size, sizeof (size_t));
306 if ((read_result2 < sizeof (int64_t)) || (read_result3 < sizeof (size_t)) ||
307 position < 0 || fsize <= 0 || position >= fsize)
308 {
309 do_break = 1;
310 break;
311 }
312 /* FIXME: also check mapped region size (lseek for *nix, VirtualQuery for W32) */
313#if !WINDOWS
314 if ((-1 == plugin->shm_id) ||
315 (NULL == (shm_ptr = mmap (NULL, shm_size, PROT_READ, MAP_SHARED, plugin->shm_id, 0))) ||
316 (shm_ptr == (void *) -1))
317 {
318 do_break = 1;
319 break;
320 }
321#else
322 if ((plugin->map_handle == 0) ||
323 (NULL == (shm_ptr = MapViewOfFile (plugin->map_handle, FILE_MAP_READ, 0, 0, 0))))
324 {
325 do_break = 1;
326 break;
407 } 327 }
408 if (ptr != NULL)
409#endif 328#endif
410 { 329 plugin->position = position;
411 if ( ( (plugin->extractMethod != NULL) && 330 plugin->shm_ptr = shm_ptr;
412 (0 != plugin->extractMethod (ptr, 331 plugin->map_size = shm_size;
413 size, 332 /* Now, ideally a plugin would do reads and seeks on a virtual "plugin" object
414 &transmit_reply, 333 * completely transparently, and the underlying code would return bytes from
415 &out, 334 * the memory map, or would block and wait for a seek to happen.
416 plugin->plugin_options)) ) || 335 * That, however, requires somewhat different architecture, and even more wrapping
417 (0 != write_all (out, &hdr, sizeof(hdr))) ) 336 * and hand-helding. It's easier to make plugins aware of the fact that they work
418 do_break = 1; 337 * with discrete in-memory buffers with expensive seeking, not continuous files.
419 } 338 */
420#ifndef WINDOWS 339 extract_reply = plugin->extract_method (plugin, transmit_reply, &out);
421 if ( (ptr != NULL) && 340#if !WINDOWS
422 (ptr != (void*) -1) ) 341 if ((shm_ptr != NULL) &&
423 munmap (ptr, size); 342 (shm_ptr != (void*) -1) )
424 if (-1 != shmid) 343 munmap (shm_ptr, shm_size);
425 close (shmid);
426#else 344#else
427 if (ptr != NULL && ptr != (void*) -1) 345 if (shm_ptr != NULL)
428 UnmapViewOfFile (ptr); 346 UnmapViewOfFile (shm_ptr);
429 if (map != NULL)
430 CloseHandle (map);
431#endif 347#endif
432 if (do_break) 348 if (extract_reply == 1)
433 break; 349 {
434 if ( (plugin->specials != NULL) && 350 unsigned char done_byte = MESSAGE_DONE;
435 (NULL != strstr (plugin->specials, 351 if (write (out, &done_byte, 1) != 1)
436 "force-kill")) ) 352 {
437 { 353 do_break = 1;
438 /* we're required to die after each file since this 354 break;
439 plugin only supports a single file at a time */ 355 }
440 _exit (0); 356 if ((plugin->specials != NULL) &&
441 } 357 (NULL != strstr (plugin->specials, "force-kill")))
358 {
359 /* we're required to die after each file since this
360 plugin only supports a single file at a time */
361#if !WINDOWS
362 fsync (out);
363#else
364 _commit (out);
365#endif
366 _exit (0);
367 }
368 }
369 else
370 {
371 unsigned char seek_byte = MESSAGE_SEEK;
372 if (write (out, &seek_byte, 1) != 1)
373 {
374 do_break = 1;
375 break;
376 }
377 if (write (out, &plugin->seek_request, sizeof (int64_t)) != sizeof (int64_t))
378 {
379 do_break = 1;
380 break;
381 }
382 }
383 break;
442 } 384 }
443 fclose (fin); 385 }
386 close (in);
444 close (out); 387 close (out);
445} 388}
446 389
390#if !WINDOWS
447 391
448#ifdef WINDOWS 392/**
393 * Start the process for the given plugin.
394 */
449static void 395static void
450write_plugin_data (int fd, const struct EXTRACTOR_PluginList *plugin) 396start_process (struct EXTRACTOR_PluginList *plugin)
451{ 397{
452 size_t i; 398 int p1[2];
453 DWORD len; 399 int p2[2];
454 char *str; 400 pid_t pid;
401 int status;
455 402
456 i = strlen (plugin->libname) + 1; 403 switch (plugin->flags)
457 write (fd, &i, sizeof (size_t)); 404 {
458 write (fd, plugin->libname, i); 405 case EXTRACTOR_OPTION_DEFAULT_POLICY:
459 i = strlen (plugin->short_libname) + 1; 406 if (-1 != plugin->cpid && 0 != plugin->cpid)
460 write (fd, &i, sizeof (size_t)); 407 return;
461 write (fd, plugin->short_libname, i); 408 break;
462 if (plugin->plugin_options != NULL) 409 case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART:
410 if (0 != plugin->cpid)
411 return;
412 break;
413 case EXTRACTOR_OPTION_IN_PROCESS:
414 return;
415 break;
416 case EXTRACTOR_OPTION_DISABLED:
417 return;
418 break;
419 }
420
421 plugin->cpid = -1;
422 if (0 != pipe (p1))
423 {
424 plugin->flags = EXTRACTOR_OPTION_DISABLED;
425 return;
426 }
427 if (0 != pipe (p2))
428 {
429 close (p1[0]);
430 close (p1[1]);
431 plugin->flags = EXTRACTOR_OPTION_DISABLED;
432 return;
433 }
434 pid = fork ();
435 plugin->cpid = pid;
436 if (pid == -1)
437 {
438 close (p1[0]);
439 close (p1[1]);
440 close (p2[0]);
441 close (p2[1]);
442 plugin->flags = EXTRACTOR_OPTION_DISABLED;
443 return;
444 }
445 if (pid == 0)
446 {
447 close (p1[1]);
448 close (p2[0]);
449 process_requests (plugin, p1[0], p2[1]);
450 _exit (0);
451 }
452 close (p1[0]);
453 close (p2[1]);
454 plugin->cpipe_in = fdopen (p1[1], "w");
455 if (plugin->cpipe_in == NULL)
456 {
457 perror ("fdopen");
458 (void) kill (plugin->cpid, SIGKILL);
459 waitpid (plugin->cpid, &status, 0);
460 close (p1[1]);
461 close (p2[0]);
462 plugin->cpid = -1;
463 plugin->flags = EXTRACTOR_OPTION_DISABLED;
464 return;
465 }
466 plugin->cpipe_out = p2[0];
467}
468
469/**
470 * Stop the child process of this plugin.
471 */
472static void
473stop_process (struct EXTRACTOR_PluginList *plugin)
474{
475 int status;
476
477#if DEBUG
478 if (plugin->cpid == -1)
479 fprintf (stderr,
480 "Plugin `%s' choked on this input\n",
481 plugin->short_libname);
482#endif
483 if ( (plugin->cpid == -1) ||
484 (plugin->cpid == 0) )
485 return;
486 kill (plugin->cpid, SIGKILL);
487 waitpid (plugin->cpid, &status, 0);
488 plugin->cpid = -1;
489 close (plugin->cpipe_out);
490 fclose (plugin->cpipe_in);
491 plugin->cpipe_out = -1;
492 plugin->cpipe_in = NULL;
493
494 if (plugin->flags != EXTRACTOR_OPTION_DEFAULT_POLICY)
495 plugin->flags = EXTRACTOR_OPTION_DISABLED;
496
497 plugin->seek_request = -1;
498}
499
500static int
501write_plugin_data (const struct EXTRACTOR_PluginList *plugin)
502{
503 /* only does anything on Windows */
504 return 0;
505}
506
507#define plugin_print(plug, fmt, ...) fprintf (plug->cpipe_in, fmt, ...)
508#define plugin_write(plug, buf, size) write_all (fileno (plug->cpipe_in), buf, size)
509
510#else /* WINDOWS */
511
512#ifndef PIPE_BUF
513#define PIPE_BUF 512
514#endif
515
516/* Copyright Bob Byrnes <byrnes <at> curl.com>
517 http://permalink.gmane.org/gmane.os.cygwin.patches/2121
518*/
519/* Create a pipe, and return handles to the read and write ends,
520 just like CreatePipe, but ensure that the write end permits
521 FILE_READ_ATTRIBUTES access, on later versions of win32 where
522 this is supported. This access is needed by NtQueryInformationFile,
523 which is used to implement select and nonblocking writes.
524 Note that the return value is either NO_ERROR or GetLastError,
525 unlike CreatePipe, which returns a bool for success or failure. */
526static int
527create_selectable_pipe (PHANDLE read_pipe_ptr, PHANDLE write_pipe_ptr,
528 LPSECURITY_ATTRIBUTES sa_ptr, DWORD psize,
529 DWORD dwReadMode, DWORD dwWriteMode)
530{
531 /* Default to error. */
532 *read_pipe_ptr = *write_pipe_ptr = INVALID_HANDLE_VALUE;
533
534 HANDLE read_pipe = INVALID_HANDLE_VALUE, write_pipe = INVALID_HANDLE_VALUE;
535
536 /* Ensure that there is enough pipe buffer space for atomic writes. */
537 if (psize < PIPE_BUF)
538 psize = PIPE_BUF;
539
540 char pipename[MAX_PATH];
541
542 /* Retry CreateNamedPipe as long as the pipe name is in use.
543 * Retrying will probably never be necessary, but we want
544 * to be as robust as possible. */
545 while (1)
546 {
547 static volatile LONG pipe_unique_id;
548
549 snprintf (pipename, sizeof pipename, "\\\\.\\pipe\\gnunet-%d-%ld",
550 getpid (), InterlockedIncrement ((LONG *) & pipe_unique_id));
551 /* Use CreateNamedPipe instead of CreatePipe, because the latter
552 * returns a write handle that does not permit FILE_READ_ATTRIBUTES
553 * access, on versions of win32 earlier than WinXP SP2.
554 * CreatePipe also stupidly creates a full duplex pipe, which is
555 * a waste, since only a single direction is actually used.
556 * It's important to only allow a single instance, to ensure that
557 * the pipe was not created earlier by some other process, even if
558 * the pid has been reused. We avoid FILE_FLAG_FIRST_PIPE_INSTANCE
559 * because that is only available for Win2k SP2 and WinXP. */
560 read_pipe = CreateNamedPipeA (pipename, PIPE_ACCESS_INBOUND | dwReadMode, PIPE_TYPE_BYTE | PIPE_READMODE_BYTE, 1, /* max instances */
561 psize, /* output buffer size */
562 psize, /* input buffer size */
563 NMPWAIT_USE_DEFAULT_WAIT, sa_ptr);
564
565 if (read_pipe != INVALID_HANDLE_VALUE)
463 { 566 {
464 i = strlen (plugin->plugin_options) + 1; 567 break;
465 str = plugin->plugin_options;
466 } 568 }
467 else 569
570 DWORD err = GetLastError ();
571
572 switch (err)
468 { 573 {
469 i = 0; 574 case ERROR_PIPE_BUSY:
575 /* The pipe is already open with compatible parameters.
576 * Pick a new name and retry. */
577 continue;
578 case ERROR_ACCESS_DENIED:
579 /* The pipe is already open with incompatible parameters.
580 * Pick a new name and retry. */
581 continue;
582 case ERROR_CALL_NOT_IMPLEMENTED:
583 /* We are on an older Win9x platform without named pipes.
584 * Return an anonymous pipe as the best approximation. */
585 if (CreatePipe (read_pipe_ptr, write_pipe_ptr, sa_ptr, psize))
586 {
587 return 0;
588 }
589 err = GetLastError ();
590 return err;
591 default:
592 return err;
470 } 593 }
471 write (fd, &i, sizeof (size_t)); 594 /* NOTREACHED */
472 if (i > 0) 595 }
473 write (fd, str, i); 596
597 /* Open the named pipe for writing.
598 * Be sure to permit FILE_READ_ATTRIBUTES access. */
599 write_pipe = CreateFileA (pipename, GENERIC_WRITE | FILE_READ_ATTRIBUTES, 0, /* share mode */
600 sa_ptr, OPEN_EXISTING, dwWriteMode, /* flags and attributes */
601 0); /* handle to template file */
602
603 if (write_pipe == INVALID_HANDLE_VALUE)
604 {
605 /* Failure. */
606 DWORD err = GetLastError ();
607
608 CloseHandle (read_pipe);
609 return err;
610 }
611
612 /* Success. */
613 *read_pipe_ptr = read_pipe;
614 *write_pipe_ptr = write_pipe;
615 return 0;
616}
617
618static int
619write_to_pipe (HANDLE h, OVERLAPPED *ov, unsigned char *buf, size_t size, unsigned char **old_buf)
620{
621 DWORD written;
622 BOOL bresult;
623 DWORD err;
624
625 if (WAIT_OBJECT_0 != WaitForSingleObject (ov->hEvent, INFINITE))
626 return -1;
627
628 ResetEvent (ov->hEvent);
629
630 if (*old_buf != NULL)
631 free (*old_buf);
632
633 *old_buf = malloc (size);
634 if (*old_buf == NULL)
635 return -1;
636 memcpy (*old_buf, buf, size);
637 written = 0;
638 ov->Offset = 0;
639 ov->OffsetHigh = 0;
640 ov->Pointer = 0;
641 ov->Internal = 0;
642 ov->InternalHigh = 0;
643 bresult = WriteFile (h, *old_buf, size, &written, ov);
644
645 if (bresult == TRUE)
646 {
647 SetEvent (ov->hEvent);
648 free (*old_buf);
649 *old_buf = NULL;
650 return written;
651 }
652
653 err = GetLastError ();
654 if (err == ERROR_IO_PENDING)
655 return size;
656 SetEvent (ov->hEvent);
657 *old_buf = NULL;
658 SetLastError (err);
659 return -1;
660}
661
662static int
663print_to_pipe (HANDLE h, OVERLAPPED *ov, unsigned char **buf, const char *fmt, ...)
664{
665 va_list va;
666 va_list vacp;
667 size_t size;
668 char *print_buf;
669 int result;
670
671 va_start (va, fmt);
672 va_copy (vacp, va);
673 size = VSNPRINTF (NULL, 0, fmt, vacp) + 1;
674 va_end (vacp);
675 if (size <= 0)
676 {
677 va_end (va);
678 return size;
679 }
680
681 print_buf = malloc (size);
682 if (print_buf == NULL)
683 return -1;
684 VSNPRINTF (print_buf, size, fmt, va);
685 va_end (va);
686
687 result = write_to_pipe (h, ov, print_buf, size, buf);
688 free (buf);
689 return result;
690}
691
692#define plugin_print(plug, fmt, ...) print_to_pipe (plug->cpipe_in, &plug->ov_write, &plug->ov_write_buffer, fmt, ...)
693#define plugin_write(plug, buf, size) write_to_pipe (plug->cpipe_in, &plug->ov_write, buf, size, &plug->ov_write_buffer)
694
695static int
696write_plugin_data (struct EXTRACTOR_PluginList *plugin)
697{
698 size_t libname_len, shortname_len, opts_len;
699 DWORD len;
700 char *str;
701 size_t total_len = 0;
702 unsigned char *buf, *ptr;
703
704 switch (plugin->flags)
705 {
706 case EXTRACTOR_OPTION_DEFAULT_POLICY:
707 break;
708 case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART:
709 break;
710 case EXTRACTOR_OPTION_IN_PROCESS:
711 return 0;
712 break;
713 case EXTRACTOR_OPTION_DISABLED:
714 return 0;
715 break;
716 }
717
718 libname_len = strlen (plugin->libname) + 1;
719 total_len += sizeof (size_t) + libname_len;
720 shortname_len = strlen (plugin->short_libname) + 1;
721 total_len += sizeof (size_t) + shortname_len;
722 if (plugin->plugin_options != NULL)
723 {
724 opts_len = strlen (plugin->plugin_options) + 1;
725 total_len += opts_len;
726 }
727 else
728 {
729 opts_len = 0;
730 }
731 total_len += sizeof (size_t);
732
733 buf = malloc (total_len);
734 if (buf == NULL)
735 return -1;
736 ptr = buf;
737 memcpy (ptr, &libname_len, sizeof (size_t));
738 ptr += sizeof (size_t);
739 memcpy (ptr, plugin->libname, libname_len);
740 ptr += libname_len;
741 memcpy (ptr, &shortname_len, sizeof (size_t));
742 ptr += sizeof (size_t);
743 memcpy (ptr, plugin->short_libname, shortname_len);
744 ptr += shortname_len;
745 memcpy (ptr, &opts_len, sizeof (size_t));
746 ptr += sizeof (size_t);
747 if (opts_len > 0)
748 {
749 memcpy (ptr, plugin->plugin_options, opts_len);
750 ptr += opts_len;
751 }
752 if (total_len != write_to_pipe (plugin->cpipe_in, &plugin->ov_write, buf, total_len, &plugin->ov_write_buffer))
753 {
754 free (buf);
755 return -1;
756 }
757 free (buf);
758 return 0;
474} 759}
475 760
476static struct EXTRACTOR_PluginList * 761static struct EXTRACTOR_PluginList *
@@ -485,183 +770,112 @@ read_plugin_data (int fd)
485 read (fd, &i, sizeof (size_t)); 770 read (fd, &i, sizeof (size_t));
486 ret->libname = malloc (i); 771 ret->libname = malloc (i);
487 if (ret->libname == NULL) 772 if (ret->libname == NULL)
488 { 773 {
489 free (ret); 774 free (ret);
490 return NULL; 775 return NULL;
491 } 776 }
492 read (fd, ret->libname, i); 777 read (fd, ret->libname, i);
778 ret->libname[i - 1] = '\0';
493 779
494 read (fd, &i, sizeof (size_t)); 780 read (fd, &i, sizeof (size_t));
495 ret->short_libname = malloc (i); 781 ret->short_libname = malloc (i);
496 if (ret->short_libname == NULL) 782 if (ret->short_libname == NULL)
497 { 783 {
498 free (ret->libname); 784 free (ret->libname);
499 free (ret); 785 free (ret);
500 return NULL; 786 return NULL;
501 } 787 }
502 read (fd, ret->short_libname, i); 788 read (fd, ret->short_libname, i);
789 ret->short_libname[i - 1] = '\0';
503 790
504 read (fd, &i, sizeof (size_t)); 791 read (fd, &i, sizeof (size_t));
505 if (i == 0) 792 if (i == 0)
506 { 793 {
507 ret->plugin_options = NULL; 794 ret->plugin_options = NULL;
508 } 795 }
509 else 796 else
797 {
798 ret->plugin_options = malloc (i);
799 if (ret->plugin_options == NULL)
510 { 800 {
511 ret->plugin_options = malloc (i); 801 free (ret->short_libname);
512 if (ret->plugin_options == NULL) 802 free (ret->libname);
513 { 803 free (ret);
514 free (ret->short_libname); 804 return NULL;
515 free (ret->libname);
516 free (ret);
517 return NULL;
518 }
519 read (fd, ret->plugin_options, i);
520 } 805 }
806 read (fd, ret->plugin_options, i);
807 ret->plugin_options[i - 1] = '\0';
808 }
521 return ret; 809 return ret;
522} 810}
523 811
524
525void CALLBACK
526RundllEntryPoint (HWND hwnd,
527 HINSTANCE hinst,
528 LPSTR lpszCmdLine,
529 int nCmdShow)
530{
531 intptr_t in_h;
532 intptr_t out_h;
533 int in, out;
534
535 sscanf(lpszCmdLine, "%lu %lu", &in_h, &out_h);
536 in = _open_osfhandle (in_h, _O_RDONLY);
537 out = _open_osfhandle (out_h, 0);
538 setmode (in, _O_BINARY);
539 setmode (out, _O_BINARY);
540 process_requests (read_plugin_data (in),
541 in, out);
542}
543
544void CALLBACK
545RundllEntryPointA (HWND hwnd,
546 HINSTANCE hinst,
547 LPSTR lpszCmdLine,
548 int nCmdShow)
549{
550 return RundllEntryPoint(hwnd, hinst, lpszCmdLine, nCmdShow);
551}
552#endif
553
554
555/** 812/**
556 * Start the process for the given plugin. 813 * Start the process for the given plugin.
557 */ 814 */
558static void 815static void
559start_process (struct EXTRACTOR_PluginList *plugin) 816start_process (struct EXTRACTOR_PluginList *plugin)
560{ 817{
561#if !WINDOWS 818 HANDLE p1[2];
562 int p1[2]; 819 HANDLE p2[2];
563 int p2[2];
564 pid_t pid;
565 int status;
566
567 plugin->cpid = -1;
568 if (0 != pipe (p1))
569 {
570 plugin->flags = EXTRACTOR_OPTION_DISABLED;
571 return;
572 }
573 if (0 != pipe (p2))
574 {
575 close (p1[0]);
576 close (p1[1]);
577 plugin->flags = EXTRACTOR_OPTION_DISABLED;
578 return;
579 }
580 pid = fork ();
581 plugin->cpid = pid;
582 if (pid == -1)
583 {
584 close (p1[0]);
585 close (p1[1]);
586 close (p2[0]);
587 close (p2[1]);
588 plugin->flags = EXTRACTOR_OPTION_DISABLED;
589 return;
590 }
591 if (pid == 0)
592 {
593 close (p1[1]);
594 close (p2[0]);
595 process_requests (plugin, p1[0], p2[1]);
596 _exit (0);
597 }
598 close (p1[0]);
599 close (p2[1]);
600 plugin->cpipe_in = fdopen (p1[1], "w");
601 if (plugin->cpipe_in == NULL)
602 {
603 perror ("fdopen");
604 (void) kill (plugin->cpid, SIGKILL);
605 waitpid (plugin->cpid, &status, 0);
606 close (p1[1]);
607 close (p2[0]);
608 plugin->cpid = -1;
609 plugin->flags = EXTRACTOR_OPTION_DISABLED;
610 return;
611 }
612 plugin->cpipe_out = p2[0];
613#else
614 int p1[2];
615 int p2[2];
616 STARTUPINFO startup; 820 STARTUPINFO startup;
617 PROCESS_INFORMATION proc; 821 PROCESS_INFORMATION proc;
618 char cmd[MAX_PATH + 1]; 822 char cmd[MAX_PATH + 1];
619 char arg1[10], arg2[10]; 823 char arg1[10], arg2[10];
620 HANDLE p10_os = INVALID_HANDLE_VALUE, p21_os = INVALID_HANDLE_VALUE;
621 HANDLE p10_os_inh = INVALID_HANDLE_VALUE, p21_os_inh = INVALID_HANDLE_VALUE; 824 HANDLE p10_os_inh = INVALID_HANDLE_VALUE, p21_os_inh = INVALID_HANDLE_VALUE;
825 SECURITY_ATTRIBUTES sa;
622 826
623 plugin->hProcess = NULL; 827 switch (plugin->flags)
624 if (0 != _pipe (p1, 0, _O_BINARY | _O_NOINHERIT)) 828 {
625 { 829 case EXTRACTOR_OPTION_DEFAULT_POLICY:
626 plugin->flags = EXTRACTOR_OPTION_DISABLED; 830 if (plugin->hProcess != INVALID_HANDLE_VALUE && plugin->hProcess != 0)
627 return; 831 return;
628 } 832 break;
629 if (0 != _pipe (p2, 0, _O_BINARY | _O_NOINHERIT)) 833 case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART:
630 { 834 if (plugin->hProcess != 0)
631 close (p1[0]);
632 close (p1[1]);
633 plugin->flags = EXTRACTOR_OPTION_DISABLED;
634 return; 835 return;
635 } 836 break;
837 case EXTRACTOR_OPTION_IN_PROCESS:
838 return;
839 break;
840 case EXTRACTOR_OPTION_DISABLED:
841 return;
842 break;
843 }
636 844
637 memset (&startup, 0, sizeof (STARTUPINFO)); 845 sa.nLength = sizeof (sa);
846 sa.lpSecurityDescriptor = NULL;
847 sa.bInheritHandle = FALSE;
638 848
639 p10_os = (HANDLE) _get_osfhandle (p1[0]); 849 plugin->hProcess = NULL;
640 p21_os = (HANDLE) _get_osfhandle (p2[1]);
641 850
642 if (p10_os == INVALID_HANDLE_VALUE || p21_os == INVALID_HANDLE_VALUE) 851 if (0 != create_selectable_pipe (&p1[0], &p1[1], &sa, 1024, FILE_FLAG_OVERLAPPED, FILE_FLAG_OVERLAPPED))
643 { 852 {
644 close (p1[0]);
645 close (p1[1]);
646 close (p2[0]);
647 close (p2[1]);
648 plugin->flags = EXTRACTOR_OPTION_DISABLED; 853 plugin->flags = EXTRACTOR_OPTION_DISABLED;
649 return; 854 return;
650 } 855 }
856 if (0 != create_selectable_pipe (&p2[0], &p2[1], &sa, 1024, FILE_FLAG_OVERLAPPED, FILE_FLAG_OVERLAPPED))
857 {
858 CloseHandle (p1[0]);
859 CloseHandle (p1[1]);
860 plugin->flags = EXTRACTOR_OPTION_DISABLED;
861 return;
862 }
863
864 memset (&startup, 0, sizeof (STARTUPINFO));
651 865
652 if (!DuplicateHandle (GetCurrentProcess (), p10_os, GetCurrentProcess (), 866 if (!DuplicateHandle (GetCurrentProcess (), p1[0], GetCurrentProcess (),
653 &p10_os_inh, 0, TRUE, DUPLICATE_SAME_ACCESS) 867 &p10_os_inh, 0, TRUE, DUPLICATE_SAME_ACCESS)
654 || !DuplicateHandle (GetCurrentProcess (), p21_os, GetCurrentProcess (), 868 || !DuplicateHandle (GetCurrentProcess (), p2[1], GetCurrentProcess (),
655 &p21_os_inh, 0, TRUE, DUPLICATE_SAME_ACCESS)) 869 &p21_os_inh, 0, TRUE, DUPLICATE_SAME_ACCESS))
656 { 870 {
657 if (p10_os_inh != INVALID_HANDLE_VALUE) 871 if (p10_os_inh != INVALID_HANDLE_VALUE)
658 CloseHandle (p10_os_inh); 872 CloseHandle (p10_os_inh);
659 if (p21_os_inh != INVALID_HANDLE_VALUE) 873 if (p21_os_inh != INVALID_HANDLE_VALUE)
660 CloseHandle (p21_os_inh); 874 CloseHandle (p21_os_inh);
661 close (p1[0]); 875 CloseHandle (p1[0]);
662 close (p1[1]); 876 CloseHandle (p1[1]);
663 close (p2[0]); 877 CloseHandle (p2[0]);
664 close (p2[1]); 878 CloseHandle (p2[1]);
665 plugin->flags = EXTRACTOR_OPTION_DISABLED; 879 plugin->flags = EXTRACTOR_OPTION_DISABLED;
666 return; 880 return;
667 } 881 }
@@ -675,186 +889,584 @@ start_process (struct EXTRACTOR_PluginList *plugin)
675 CloseHandle (proc.hThread); 889 CloseHandle (proc.hThread);
676 } 890 }
677 else 891 else
678 { 892 {
679 close (p1[0]); 893 CloseHandle (p1[0]);
680 close (p1[1]); 894 CloseHandle (p1[1]);
681 close (p2[0]); 895 CloseHandle (p2[0]);
682 close (p2[1]); 896 CloseHandle (p2[1]);
683 plugin->flags = EXTRACTOR_OPTION_DISABLED; 897 plugin->flags = EXTRACTOR_OPTION_DISABLED;
684 return; 898 return;
685 } 899 }
686 close (p1[0]); 900 CloseHandle (p1[0]);
687 close (p2[1]); 901 CloseHandle (p2[1]);
688 CloseHandle (p10_os_inh); 902 CloseHandle (p10_os_inh);
689 CloseHandle (p21_os_inh); 903 CloseHandle (p21_os_inh);
690 904
691 write_plugin_data (p1[1], plugin); 905 plugin->cpipe_in = p1[1];
906 plugin->cpipe_out = p2[0];
692 907
693 plugin->cpipe_in = fdopen (p1[1], "w"); 908 memset (&plugin->ov_read, 0, sizeof (OVERLAPPED));
694 if (plugin->cpipe_in == NULL) 909 memset (&plugin->ov_write, 0, sizeof (OVERLAPPED));
910
911 plugin->ov_write_buffer = NULL;
912
913 plugin->ov_write.hEvent = CreateEvent (NULL, TRUE, TRUE, NULL);
914 plugin->ov_read.hEvent = CreateEvent (NULL, TRUE, TRUE, NULL);
915}
916
917/**
918 * Stop the child process of this plugin.
919 */
920static void
921stop_process (struct EXTRACTOR_PluginList *plugin)
922{
923 int status;
924 HANDLE process;
925
926#if DEBUG
927 if (plugin->hProcess == INVALID_HANDLE_VALUE)
928 fprintf (stderr,
929 "Plugin `%s' choked on this input\n",
930 plugin->short_libname);
931#endif
932 if (plugin->hProcess == INVALID_HANDLE_VALUE ||
933 plugin->hProcess == NULL)
934 return;
935 TerminateProcess (plugin->hProcess, 0);
936 CloseHandle (plugin->hProcess);
937 plugin->hProcess = INVALID_HANDLE_VALUE;
938 CloseHandle (plugin->cpipe_out);
939 CloseHandle (plugin->cpipe_in);
940 plugin->cpipe_out = INVALID_HANDLE_VALUE;
941 plugin->cpipe_in = INVALID_HANDLE_VALUE;
942 CloseHandle (plugin->ov_read.hEvent);
943 CloseHandle (plugin->ov_write.hEvent);
944 if (plugin->ov_write_buffer != NULL)
945 {
946 free (plugin->ov_write_buffer);
947 plugin->ov_write_buffer = NULL;
948 }
949
950 if (plugin->flags != EXTRACTOR_OPTION_DEFAULT_POLICY)
951 plugin->flags = EXTRACTOR_OPTION_DISABLED;
952
953 plugin->seek_request = -1;
954}
955
956#endif /* WINDOWS */
957
958/**
959 * Remove a plugin from a list.
960 *
961 * @param prev the current list of plugins
962 * @param library the name of the plugin to remove
963 * @return the reduced list, unchanged if the plugin was not loaded
964 */
965struct EXTRACTOR_PluginList *
966EXTRACTOR_plugin_remove(struct EXTRACTOR_PluginList * prev,
967 const char * library)
968{
969 struct EXTRACTOR_PluginList *pos;
970 struct EXTRACTOR_PluginList *first;
971
972 pos = prev;
973 first = prev;
974 while ((pos != NULL) && (0 != strcmp (pos->short_libname, library)))
695 { 975 {
696 perror ("fdopen"); 976 prev = pos;
697 TerminateProcess (plugin->hProcess, 0); 977 pos = pos->next;
698 WaitForSingleObject (plugin->hProcess, INFINITE);
699 CloseHandle (plugin->hProcess);
700 close (p1[1]);
701 close (p2[0]);
702 plugin->hProcess = INVALID_HANDLE_VALUE;
703 plugin->flags = EXTRACTOR_OPTION_DISABLED;
704 return;
705 } 978 }
706 plugin->cpipe_out = p2[0]; 979 if (pos != NULL)
980 {
981 /* found, close library */
982 if (first == pos)
983 first = pos->next;
984 else
985 prev->next = pos->next;
986 /* found */
987 stop_process (pos);
988 free (pos->short_libname);
989 free (pos->libname);
990 free (pos->plugin_options);
991 if (NULL != pos->libraryHandle)
992 lt_dlclose (pos->libraryHandle);
993 free (pos);
994 }
995#if DEBUG
996 else
997 fprintf(stderr,
998 "Unloading plugin `%s' failed!\n",
999 library);
707#endif 1000#endif
1001 return first;
708} 1002}
709 1003
710 1004
711/** 1005/**
712 * Extract meta data using the given plugin, running the 1006 * Remove all plugins from the given list (destroys the list).
713 * actual code of the plugin out-of-process.
714 * 1007 *
715 * @param plugin which plugin to call 1008 * @param plugin the list of plugins
716 * @param size size of the file mapped by shmfn or tshmfn 1009 */
717 * @param shmfn file name of the shared memory segment 1010void
718 * @param tshmfn file name of the shared memory segment for the end of the data 1011EXTRACTOR_plugin_remove_all(struct EXTRACTOR_PluginList *plugins)
719 * @param proc function to call on the meta data 1012{
1013 while (plugins != NULL)
1014 plugins = EXTRACTOR_plugin_remove (plugins, plugins->short_libname);
1015}
1016
1017
1018
1019/**
1020 * Open a file
1021 */
1022static int file_open(const char *filename, int oflag, ...)
1023{
1024 int mode;
1025 const char *fn;
1026#ifdef MINGW
1027 char szFile[_MAX_PATH + 1];
1028 long lRet;
1029
1030 if ((lRet = plibc_conv_to_win_path(filename, szFile)) != ERROR_SUCCESS)
1031 {
1032 errno = ENOENT;
1033 SetLastError(lRet);
1034 return -1;
1035 }
1036 fn = szFile;
1037#else
1038 fn = filename;
1039#endif
1040 mode = 0;
1041#ifdef MINGW
1042 /* Set binary mode */
1043 mode |= O_BINARY;
1044#endif
1045 return OPEN(fn, oflag, mode);
1046}
1047
1048#ifndef O_LARGEFILE
1049#define O_LARGEFILE 0
1050#endif
1051
1052#if HAVE_ZLIB
1053#define MIN_ZLIB_HEADER 12
1054#endif
1055#if HAVE_LIBBZ2
1056#define MIN_BZ2_HEADER 4
1057#endif
1058#if !defined (MIN_COMPRESSED_HEADER) && HAVE_ZLIB
1059#define MIN_COMPRESSED_HEADER MIN_ZLIB_HEADER
1060#endif
1061#if !defined (MIN_COMPRESSED_HEADER) && HAVE_LIBBZ2
1062#define MIN_COMPRESSED_HEADER MIN_BZ2_HEADER
1063#endif
1064#if !defined (MIN_COMPRESSED_HEADER)
1065#define MIN_COMPRESSED_HEADER -1
1066#endif
1067
1068#define COMPRESSED_DATA_PROBE_SIZE 3
1069
1070/**
1071 * Try to decompress compressed data
1072 *
1073 * @param data data to decompress, or NULL (if fd is not -1)
1074 * @param fd file to read data from, or -1 (if data is not NULL)
1075 * @param fsize size of data (if data is not NULL) or size of fd file (if fd is not -1)
1076 * @param compression_type type of compression, as returned by get_compression_type ()
1077 * @param buffer a pointer to a buffer pointer, buffer pointer is NEVER a NULL and already has some data (usually - COMPRESSED_DATA_PROBE_SIZE bytes) in it.
1078 * @param buffer_size a pointer to buffer size
1079 * @param proc callback for metadata
720 * @param proc_cls cls for proc 1080 * @param proc_cls cls for proc
721 * @return 0 if proc did not return non-zero 1081 * @return 0 on success, anything else on error
722 */ 1082 */
723static int 1083static int
724extract_oop (struct EXTRACTOR_PluginList *plugin, 1084try_to_decompress (const unsigned char *data, int fd, int64_t fsize, int compression_type, void **buffer, size_t *buffer_size, EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
725 size_t size,
726 const char *shmfn,
727 const char *tshmfn,
728 EXTRACTOR_MetaDataProcessor proc,
729 void *proc_cls)
730{ 1085{
731 struct IpcHeader hdr; 1086 unsigned char *new_buffer;
732 char mimetype[MAX_MIME_LEN + 1]; 1087 ssize_t read_result;
733 char *data;
734 1088
735#ifndef WINDOWS 1089 unsigned char *buf;
736 if (plugin->cpid == -1) 1090 unsigned char *rbuf;
737#else 1091 size_t dsize;
738 if (plugin->hProcess == INVALID_HANDLE_VALUE) 1092#if HAVE_ZLIB
1093 z_stream strm;
1094 int ret;
1095 size_t pos;
739#endif 1096#endif
740 return 0; 1097#if HAVE_LIBBZ2
741 if (0 >= fprintf (plugin->cpipe_in, 1098 bz_stream bstrm;
742 "%s\n", 1099 int bret;
743 shmfn)) 1100 size_t bpos;
744 {
745 stop_process (plugin);
746#ifndef WINDOWS
747 plugin->cpid = -1;
748#else
749 plugin->hProcess = INVALID_HANDLE_VALUE;
750#endif 1101#endif
751 if (plugin->flags != EXTRACTOR_OPTION_DEFAULT_POLICY) 1102
752 plugin->flags = EXTRACTOR_OPTION_DISABLED; 1103 if (fd != -1)
753 return 0; 1104 {
1105 if (fsize > *buffer_size)
1106 {
1107 /* Read the rest of the file. Can't de-compress it partially anyway */
1108 /* Memory mapping is not useful here, because memory mapping ALSO takes up
1109 * memory (even more than a buffer, since it might be aligned), and
1110 * because we need to read every byte anyway (lazy on-demand reads into
1111 * memory provided by memory mapping won't help).
1112 */
1113 new_buffer = realloc (*buffer, fsize);
1114 if (new_buffer == NULL)
1115 {
1116 free (*buffer);
1117 return -1;
1118 }
1119 read_result = READ (fd, &new_buffer[*buffer_size], fsize - *buffer_size);
1120 if (read_result != fsize - *buffer_size)
1121 {
1122 free (*buffer);
1123 return -1;
1124 }
1125 *buffer = new_buffer;
1126 *buffer_size = fsize;
754 } 1127 }
755 if (0 >= fprintf (plugin->cpipe_in, 1128 data = (const unsigned char *) new_buffer;
756 "!%s\n", 1129 }
757 (tshmfn != NULL) ? tshmfn : "")) 1130
1131#if HAVE_ZLIB
1132 if (compression_type == 1)
1133 {
1134 /* Process gzip header */
1135 unsigned int gzip_header_length = 10;
1136
1137 if (data[3] & 0x4) /* FEXTRA set */
1138 gzip_header_length += 2 + (unsigned) (data[10] & 0xff) +
1139 (((unsigned) (data[11] & 0xff)) * 256);
1140
1141 if (data[3] & 0x8) /* FNAME set */
758 { 1142 {
759 stop_process (plugin); 1143 const unsigned char *cptr = data + gzip_header_length;
760#ifndef WINDOWS 1144
761 plugin->cpid = -1; 1145 /* stored file name is here */
762#else 1146 while ((cptr - data) < fsize)
763 plugin->hProcess = INVALID_HANDLE_VALUE; 1147 {
764#endif 1148 if ('\0' == *cptr)
765 if (plugin->flags != EXTRACTOR_OPTION_DEFAULT_POLICY) 1149 break;
766 plugin->flags = EXTRACTOR_OPTION_DISABLED; 1150 cptr++;
767 return 0; 1151 }
1152
1153 if (0 != proc (proc_cls, "<zlib>", EXTRACTOR_METATYPE_FILENAME,
1154 EXTRACTOR_METAFORMAT_C_STRING, "text/plain",
1155 (const char *) (data + gzip_header_length),
1156 cptr - (data + gzip_header_length)))
1157 return 0; /* done */
1158
1159 gzip_header_length = (cptr - data) + 1;
768 } 1160 }
769 if (0 >= fprintf (plugin->cpipe_in, 1161
770 "s%lu\n", 1162 if (data[3] & 0x16) /* FCOMMENT set */
771 size))
772 { 1163 {
773 stop_process (plugin); 1164 const unsigned char * cptr = data + gzip_header_length;
774#ifndef WINDOWS 1165
775 plugin->cpid = -1; 1166 /* stored comment is here */
776#else 1167 while (cptr < data + fsize)
777 plugin->hProcess = INVALID_HANDLE_VALUE; 1168 {
1169 if ('\0' == *cptr)
1170 break;
1171 cptr ++;
1172 }
1173
1174 if (0 != proc (proc_cls, "<zlib>", EXTRACTOR_METATYPE_COMMENT,
1175 EXTRACTOR_METAFORMAT_C_STRING, "text/plain",
1176 (const char *) (data + gzip_header_length),
1177 cptr - (data + gzip_header_length)))
1178 return 0; /* done */
1179
1180 gzip_header_length = (cptr - data) + 1;
1181 }
1182
1183 if (data[3] & 0x2) /* FCHRC set */
1184 gzip_header_length += 2;
1185
1186 memset (&strm, 0, sizeof (z_stream));
1187
1188#ifdef ZLIB_VERNUM
1189 gzip_header_length = 0;
778#endif 1190#endif
779 if (plugin->flags != EXTRACTOR_OPTION_DEFAULT_POLICY) 1191
780 plugin->flags = EXTRACTOR_OPTION_DISABLED; 1192 if (fsize > gzip_header_length)
781 return 0; 1193 {
1194 strm.next_in = (Bytef *) data + gzip_header_length;
1195 strm.avail_in = fsize - gzip_header_length;
782 } 1196 }
783 fflush (plugin->cpipe_in); 1197 else
784 while (1)
785 { 1198 {
786 if (0 != read_all (plugin->cpipe_out, 1199 strm.next_in = (Bytef *) data;
787 &hdr, 1200 strm.avail_in = 0;
788 sizeof(hdr))) 1201 }
789 { 1202 strm.total_in = 0;
790 stop_process (plugin); 1203 strm.zalloc = NULL;
791#ifndef WINDOWS 1204 strm.zfree = NULL;
792 plugin->cpid = -1; 1205 strm.opaque = NULL;
1206
1207 /*
1208 * note: maybe plain inflateInit(&strm) is adequate,
1209 * it looks more backward-compatible also ;
1210 *
1211 * ZLIB_VERNUM isn't defined by zlib version 1.1.4 ;
1212 * there might be a better check.
1213 */
1214 if (Z_OK == inflateInit2 (&strm,
1215#ifdef ZLIB_VERNUM
1216 15 + 32
793#else 1217#else
794 plugin->hProcess = INVALID_HANDLE_VALUE; 1218 -MAX_WBITS
795#endif 1219#endif
796 if (plugin->flags != EXTRACTOR_OPTION_DEFAULT_POLICY) 1220 ))
797 plugin->flags = EXTRACTOR_OPTION_DISABLED; 1221 {
798 return 0; 1222 pos = 0;
799 } 1223 dsize = 2 * fsize;
800 if ( (hdr.type == 0) && 1224 if ( (dsize > MAX_DECOMPRESS) ||
801 (hdr.format == 0) && 1225 (dsize < fsize) )
802 (hdr.data_len == 0) && 1226 dsize = MAX_DECOMPRESS;
803 (hdr.mime_len == 0) ) 1227 buf = malloc (dsize);
804 break; 1228
805 if (hdr.mime_len > MAX_MIME_LEN) 1229 if (buf != NULL)
806 { 1230 {
807 stop_process (plugin); 1231 strm.next_out = (Bytef *) buf;
808#ifndef WINDOWS 1232 strm.avail_out = dsize;
809 plugin->cpid = -1; 1233
810#else 1234 do
811 plugin->hProcess = INVALID_HANDLE_VALUE; 1235 {
1236 ret = inflate (&strm, Z_SYNC_FLUSH);
1237 if (ret == Z_OK)
1238 {
1239 if (dsize == MAX_DECOMPRESS)
1240 break;
1241
1242 pos += strm.total_out;
1243 strm.total_out = 0;
1244 dsize *= 2;
1245
1246 if (dsize > MAX_DECOMPRESS)
1247 dsize = MAX_DECOMPRESS;
1248
1249 rbuf = realloc (buf, dsize);
1250 if (rbuf == NULL)
1251 {
1252 free (buf);
1253 buf = NULL;
1254 break;
1255 }
1256
1257 buf = rbuf;
1258 strm.next_out = (Bytef *) &buf[pos];
1259 strm.avail_out = dsize - pos;
1260 }
1261 else if (ret != Z_STREAM_END)
1262 {
1263 /* error */
1264 free (buf);
1265 buf = NULL;
1266 }
1267 } while ((buf != NULL) && (ret != Z_STREAM_END));
1268
1269 dsize = pos + strm.total_out;
1270 if ((dsize == 0) && (buf != NULL))
1271 {
1272 free (buf);
1273 buf = NULL;
1274 }
1275 }
1276
1277 inflateEnd (&strm);
1278
1279 if (fd != -1)
1280 if (*buffer != NULL)
1281 free (*buffer);
1282
1283 if (buf == NULL)
1284 {
1285 return -1;
1286 }
1287 else
1288 {
1289 *buffer = buf;
1290 *buffer_size = dsize;
1291 return 0;
1292 }
1293 }
1294 }
812#endif 1295#endif
813 if (plugin->flags != EXTRACTOR_OPTION_DEFAULT_POLICY) 1296
814 plugin->flags = EXTRACTOR_OPTION_DISABLED; 1297#if HAVE_LIBBZ2
815 return 0; 1298 if (compression_type == 2)
816 } 1299 {
817 data = malloc (hdr.data_len); 1300 memset(&bstrm, 0, sizeof (bz_stream));
818 if (data == NULL) 1301 bstrm.next_in = (char *) data;
819 { 1302 bstrm.avail_in = fsize;
820 stop_process (plugin); 1303 bstrm.total_in_lo32 = 0;
821 return 1; 1304 bstrm.total_in_hi32 = 0;
822 } 1305 bstrm.bzalloc = NULL;
823 if ( (0 != (read_all (plugin->cpipe_out, 1306 bstrm.bzfree = NULL;
824 mimetype, 1307 bstrm.opaque = NULL;
825 hdr.mime_len))) || 1308 if (BZ_OK == BZ2_bzDecompressInit(&bstrm, 0,0))
826 (0 != (read_all (plugin->cpipe_out, 1309 {
827 data, 1310 bpos = 0;
828 hdr.data_len))) ) 1311 dsize = 2 * fsize;
829 { 1312 if ( (dsize > MAX_DECOMPRESS) || (dsize < fsize) )
830 stop_process (plugin); 1313 dsize = MAX_DECOMPRESS;
831#ifndef WINDOWS 1314 buf = malloc (dsize);
832 plugin->cpid = -1; 1315
833#else 1316 if (buf != NULL)
834 plugin->hProcess = INVALID_HANDLE_VALUE; 1317 {
1318 bstrm.next_out = (char *) buf;
1319 bstrm.avail_out = dsize;
1320
1321 do
1322 {
1323 bret = BZ2_bzDecompress (&bstrm);
1324 if (bret == Z_OK)
1325 {
1326 if (dsize == MAX_DECOMPRESS)
1327 break;
1328 bpos += bstrm.total_out_lo32;
1329 bstrm.total_out_lo32 = 0;
1330
1331 dsize *= 2;
1332 if (dsize > MAX_DECOMPRESS)
1333 dsize = MAX_DECOMPRESS;
1334
1335 rbuf = realloc(buf, dsize);
1336 if (rbuf == NULL)
1337 {
1338 free (buf);
1339 buf = NULL;
1340 break;
1341 }
1342
1343 buf = rbuf;
1344 bstrm.next_out = (char*) &buf[bpos];
1345 bstrm.avail_out = dsize - bpos;
1346 }
1347 else if (bret != BZ_STREAM_END)
1348 {
1349 /* error */
1350 free (buf);
1351 buf = NULL;
1352 }
1353 } while ((buf != NULL) && (bret != BZ_STREAM_END));
1354
1355 dsize = bpos + bstrm.total_out_lo32;
1356 if ((dsize == 0) && (buf != NULL))
1357 {
1358 free (buf);
1359 buf = NULL;
1360 }
1361 }
1362
1363 BZ2_bzDecompressEnd (&bstrm);
1364
1365 if (fd != -1)
1366 if (*buffer != NULL)
1367 free (*buffer);
1368
1369 if (buf == NULL)
1370 {
1371 return -1;
1372 }
1373 else
1374 {
1375 *buffer = buf;
1376 *buffer_size = dsize;
1377 return 0;
1378 }
1379 }
1380 }
835#endif 1381#endif
836 free (data); 1382 return -1;
837 if (plugin->flags != EXTRACTOR_OPTION_DEFAULT_POLICY) 1383}
838 plugin->flags = EXTRACTOR_OPTION_DISABLED; 1384
839 return 0; 1385/**
840 } 1386 * Detect if we have compressed data on our hands.
841 mimetype[hdr.mime_len] = '\0'; 1387 *
842 if ( (proc != NULL) && 1388 * @param data pointer to a data buffer or NULL (in case fd is not -1)
843 (0 != proc (proc_cls, 1389 * @param fd a file to read data from, or -1 (if data is not NULL)
844 plugin->short_libname, 1390 * @param fsize size of data (if data is not NULL) or of file (if fd is not -1)
845 hdr.type, 1391 * @param buffer will receive a pointer to the data that this function read
846 hdr.format, 1392 * @param buffer_size will receive size of the buffer
847 mimetype, 1393 * @return -1 to indicate an error, 0 to indicate uncompressed data, or a type (> 0) of compression
848 data, 1394 */
849 hdr.data_len)) ) 1395static int
850 proc = NULL; 1396get_compression_type (const unsigned char *data, int fd, int64_t fsize, void **buffer, size_t *buffer_size)
851 free (data); 1397{
1398 void *read_data = NULL;
1399 size_t read_data_size = 0;
1400 ssize_t read_result;
1401
1402 if ((MIN_COMPRESSED_HEADER < 0) || (fsize < MIN_COMPRESSED_HEADER))
1403 {
1404 *buffer = NULL;
1405 return 0;
1406 }
1407 if (data == NULL)
1408 {
1409 read_data_size = COMPRESSED_DATA_PROBE_SIZE;
1410 read_data = malloc (read_data_size);
1411 if (read_data == NULL)
1412 return -1;
1413 read_result = READ (fd, read_data, read_data_size);
1414 if (read_result != read_data_size)
1415 {
1416 free (read_data);
1417 return -1;
852 } 1418 }
853 if (NULL == proc) 1419 *buffer = read_data;
1420 *buffer_size = read_data_size;
1421 data = (const void *) read_data;
1422 }
1423#if HAVE_ZLIB
1424 if ((fsize >= MIN_ZLIB_HEADER) && (data[0] == 0x1f) && (data[1] == 0x8b) && (data[2] == 0x08))
1425 return 1;
1426#endif
1427#if HAVE_LIBBZ2
1428 if ((fsize >= MIN_BZ2_HEADER) && (data[0] == 'B') && (data[1] == 'Z') && (data[2] == 'h'))
1429 return 2;
1430#endif
1431 return 0;
1432}
1433
1434#if WINDOWS
1435
1436/**
1437 * Setup a shared memory segment.
1438 *
1439 * @param ptr set to the location of the map segment
1440 * @param map where to store the map handle
1441 * @param fn name of the mapping
1442 * @param fn_size size available in fn
1443 * @param size number of bytes to allocated for the mapping
1444 * @return 0 on success
1445 */
1446static int
1447make_shm_w32 (void **ptr, HANDLE *map, char *fn, size_t fn_size, size_t size)
1448{
1449 const char *tpath = "Local\\";
1450 snprintf (fn, fn_size, "%slibextractor-shm-%u-%u", tpath, getpid(),
1451 (unsigned int) RANDOM());
1452 *map = CreateFileMapping (INVALID_HANDLE_VALUE, NULL, PAGE_READWRITE, 0, size, fn);
1453 *ptr = MapViewOfFile (*map, FILE_MAP_WRITE, 0, 0, size);
1454 if (*ptr == NULL)
1455 {
1456 CloseHandle (*map);
854 return 1; 1457 return 1;
1458 }
855 return 0; 1459 return 0;
856} 1460}
1461
1462static void
1463destroy_shm_w32 (void *ptr, HANDLE map)
1464{
1465 UnmapViewOfFile (ptr);
1466 CloseHandle (map);
1467}
857 1468
1469#else
858 1470
859/** 1471/**
860 * Setup a shared memory segment. 1472 * Setup a shared memory segment.
@@ -867,22 +1479,10 @@ extract_oop (struct EXTRACTOR_PluginList *plugin,
867 * @return 0 on success 1479 * @return 0 on success
868 */ 1480 */
869static int 1481static int
870make_shm (int is_tail, 1482make_shm_posix (void **ptr, int *shmid, char *fn, size_t fn_size, size_t size)
871 void **ptr,
872#ifndef WINDOWS
873 int *shmid,
874#else
875 HANDLE *map,
876#endif
877 char *fn,
878 size_t fn_size,
879 size_t size)
880{ 1483{
881 const char *tpath; 1484 const char *tpath;
882#ifdef WINDOWS 1485#if SOMEBSD
883 tpath = "Local\\";
884#elif SOMEBSD
885 const char *tpath;
886 /* this works on FreeBSD, not sure about others... */ 1486 /* this works on FreeBSD, not sure about others... */
887 tpath = getenv ("TMPDIR"); 1487 tpath = getenv ("TMPDIR");
888 if (tpath == NULL) 1488 if (tpath == NULL)
@@ -890,578 +1490,606 @@ make_shm (int is_tail,
890#else 1490#else
891 tpath = "/"; /* Linux */ 1491 tpath = "/"; /* Linux */
892#endif 1492#endif
893 snprintf (fn, 1493 snprintf (fn, fn_size, "%slibextractor-shm-%u-%u", tpath, getpid(),
894 fn_size, 1494 (unsigned int) RANDOM());
895 "%slibextractor-%sshm-%u-%u",
896 tpath,
897 (is_tail) ? "t" : "",
898 getpid(),
899 (unsigned int) RANDOM());
900#ifndef WINDOWS
901 *shmid = shm_open (fn, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); 1495 *shmid = shm_open (fn, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
902 *ptr = NULL; 1496 *ptr = NULL;
903 if (-1 == (*shmid)) 1497 if (-1 == *shmid)
904 return 1; 1498 return 1;
905 if ( (0 != ftruncate (*shmid, size)) || 1499 if ((0 != ftruncate (*shmid, size)) ||
906 (NULL == (*ptr = mmap (NULL, size, PROT_WRITE, MAP_SHARED, *shmid, 0))) || 1500 (NULL == (*ptr = mmap (NULL, size, PROT_WRITE, MAP_SHARED, *shmid, 0))) ||
907 (*ptr == (void*) -1) ) 1501 (*ptr == (void*) -1) )
1502 {
1503 close (*shmid);
1504 *shmid = -1;
1505 shm_unlink (fn);
1506 return 1;
1507 }
1508 return 0;
1509}
1510
1511static void
1512destroy_shm_posix (void *ptr, int shm_id, size_t size, char *shm_name)
1513{
1514 if (NULL != ptr)
1515 munmap (ptr, size);
1516 if (shm_id != -1)
1517 close (shm_id);
1518 shm_unlink (shm_name);
1519}
1520#endif
1521
1522
1523static void
1524init_plugin_state (struct EXTRACTOR_PluginList *plugin, char *shm_name, int64_t fsize)
1525{
1526 int write_result;
1527 int init_state_size;
1528 unsigned char *init_state;
1529 int t;
1530 size_t shm_name_len = strlen (shm_name) + 1;
1531 init_state_size = 1 + sizeof (size_t) + shm_name_len + sizeof (int64_t);
1532 switch (plugin->flags)
1533 {
1534 case EXTRACTOR_OPTION_DEFAULT_POLICY:
1535 case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART:
1536 init_state = malloc (init_state_size);
1537 if (init_state == NULL)
908 { 1538 {
909 close (*shmid); 1539 stop_process (plugin);
910 *shmid = -1; 1540 return;
911 shm_unlink (fn);
912 return 1;
913 } 1541 }
914 return 0; 1542 t = 0;
915#else 1543 init_state[t] = MESSAGE_INIT_STATE;
916 *map = CreateFileMapping (INVALID_HANDLE_VALUE, NULL, PAGE_READWRITE, 0, size, fn); 1544 t += 1;
917 *ptr = MapViewOfFile (*map, FILE_MAP_WRITE, 0, 0, size); 1545 memcpy (&init_state[t], &fsize, sizeof (int64_t));
918 if (*ptr == NULL) 1546 t += sizeof (int64_t);
1547 memcpy (&init_state[t], &shm_name_len, sizeof (size_t));
1548 t += sizeof (size_t);
1549 memcpy (&init_state[t], shm_name, shm_name_len);
1550 t += shm_name_len;
1551 write_result = plugin_write (plugin, init_state, init_state_size);
1552 free (init_state);
1553 if (write_result < init_state_size)
919 { 1554 {
920 CloseHandle (*map); 1555 stop_process (plugin);
921 return 1; 1556 return;
922 } 1557 }
923 return 0; 1558 plugin->seek_request = 0;
924#endif 1559 break;
1560 case EXTRACTOR_OPTION_IN_PROCESS:
1561 plugin_open_shm (plugin, shm_name);
1562 plugin->fsize = fsize;
1563 plugin->init_state_method (plugin);
1564 plugin->seek_request = 0;
1565 return;
1566 break;
1567 case EXTRACTOR_OPTION_DISABLED:
1568 return;
1569 break;
1570 }
1571}
1572
1573static void
1574discard_plugin_state (struct EXTRACTOR_PluginList *plugin)
1575{
1576 int write_result;
1577 unsigned char discard_state = MESSAGE_DISCARD_STATE;
1578 switch (plugin->flags)
1579 {
1580 case EXTRACTOR_OPTION_DEFAULT_POLICY:
1581 case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART:
1582 /* This is somewhat clumsy, but it's the only stop-indicating
1583 * non-W32/POSIX-specific field i could think of...
1584 */
1585 if (plugin->cpipe_out != -1)
1586 {
1587 write_result = plugin_write (plugin, &discard_state, 1);
1588 if (write_result < 1)
1589 {
1590 stop_process (plugin);
1591 return;
1592 }
1593 }
1594 break;
1595 case EXTRACTOR_OPTION_IN_PROCESS:
1596 plugin->discard_state_method (plugin);
1597 return;
1598 break;
1599 case EXTRACTOR_OPTION_DISABLED:
1600 return;
1601 break;
1602 }
925} 1603}
926 1604
1605static int
1606give_shm_to_plugin (struct EXTRACTOR_PluginList *plugin, int64_t position, size_t map_size)
1607{
1608 int write_result;
1609 int updated_shm_size = 1 + sizeof (int64_t) + sizeof (size_t);
1610 unsigned char updated_shm[updated_shm_size];
1611 int t = 0;
1612 updated_shm[t] = MESSAGE_UPDATED_SHM;
1613 t += 1;
1614 memcpy (&updated_shm[t], &position, sizeof (int64_t));
1615 t += sizeof (int64_t);
1616 memcpy (&updated_shm[t], &map_size, sizeof (size_t));
1617 t += sizeof (size_t);
1618 switch (plugin->flags)
1619 {
1620 case EXTRACTOR_OPTION_DEFAULT_POLICY:
1621 case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART:
1622 if (plugin->seek_request < 0)
1623 return 0;
1624 write_result = plugin_write (plugin, updated_shm, updated_shm_size);
1625 if (write_result < updated_shm_size)
1626 {
1627 stop_process (plugin);
1628 return 0;
1629 }
1630 return 1;
1631 case EXTRACTOR_OPTION_IN_PROCESS:
1632 plugin->position = position;
1633 plugin->map_size = map_size;
1634 return 0;
1635 case EXTRACTOR_OPTION_DISABLED:
1636 return 0;
1637 default:
1638 return 1;
1639 }
1640}
927 1641
928/**
929 * Extract keywords using the given set of plugins.
930 *
931 * @param plugins the list of plugins to use
932 * @param data data to process, never NULL
933 * @param size number of bytes in data, ignored if data is NULL
934 * @param tdata end of file data, or NULL
935 * @param tsize number of bytes in tdata
936 * @param proc function to call for each meta data item found
937 * @param proc_cls cls argument to proc
938 */
939static void 1642static void
940extract (struct EXTRACTOR_PluginList *plugins, 1643ask_in_process_plugin (struct EXTRACTOR_PluginList *plugin, int64_t position, void *shm_ptr, EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
941 const char * data,
942 size_t size,
943 const char * tdata,
944 size_t tsize,
945 EXTRACTOR_MetaDataProcessor proc,
946 void *proc_cls)
947{ 1644{
948 struct EXTRACTOR_PluginList *ppos; 1645 int extract_reply;
949 enum EXTRACTOR_Options flags; 1646 switch (plugin->flags)
950 void *ptr; 1647 {
951 void *tptr; 1648 case EXTRACTOR_OPTION_DEFAULT_POLICY:
952 char fn[255]; 1649 case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART:
953 char tfn[255]; 1650 return;
954 int want_shm; 1651 case EXTRACTOR_OPTION_IN_PROCESS:
955 int want_tail; 1652 if (plugin->seek_request >= 0)
956#ifndef WINDOWS 1653 {
957 int shmid; 1654 plugin->shm_ptr = shm_ptr;
958 int tshmid; 1655 extract_reply = plugin->extract_method (plugin, proc, proc_cls);
959#else 1656 if (extract_reply == 1)
960 HANDLE map; 1657 plugin->seek_request = -1;
961 HANDLE tmap; 1658 }
962#endif 1659 break;
1660 case EXTRACTOR_OPTION_DISABLED:
1661 return;
1662 break;
1663 }
1664}
963 1665
964 want_shm = 0; 1666#if !WINDOWS
965 ppos = plugins; 1667int
966 while (NULL != ppos) 1668plugin_read (struct EXTRACTOR_PluginList *plugin, unsigned char *buf, size_t size)
967 { 1669{
968 switch (ppos->flags) 1670 ssize_t read_result;
969 { 1671 size_t read_count = 0;
970 case EXTRACTOR_OPTION_DEFAULT_POLICY: 1672 while (read_count < size)
971#ifndef WINDOWS 1673 {
972 if ( (0 == ppos->cpid) || 1674 read_result = read (plugin->cpipe_out, &buf[read_count], size - read_count);
973 (-1 == ppos->cpid) ) 1675 if (read_result <= 0)
974#else 1676 return read_result;
975 if (ppos->hProcess == NULL || ppos->hProcess == INVALID_HANDLE_VALUE) 1677 read_count += read_result;
976#endif 1678 }
977 start_process (ppos); 1679 return read_count;
978 want_shm = 1; 1680}
979 break;
980 case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART:
981#ifndef WINDOWS
982 if (0 == ppos->cpid)
983#else 1681#else
984 if (ppos->hProcess == NULL) 1682int
1683plugin_read (struct EXTRACTOR_PluginList *plugin, unsigned char *buf, size_t size)
1684{
1685 DWORD bytes_read;
1686 BOOL bresult;
1687 size_t read_count = 0;
1688 while (read_count < size)
1689 {
1690 bresult = ReadFile (plugin->cpipe_out, &buf[read_count], size - read_count, &bytes_read, NULL);
1691 if (!bresult)
1692 return -1;
1693 read_count += bytes_read;
1694 }
1695 return read_count;
1696}
985#endif 1697#endif
986 start_process (ppos); 1698
987 want_shm = 1; 1699static int
988 break; 1700receive_reply (struct EXTRACTOR_PluginList *plugin, EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
989 case EXTRACTOR_OPTION_IN_PROCESS: 1701{
990 break; 1702 int read_result;
991 case EXTRACTOR_OPTION_DISABLED: 1703 unsigned char code;
992 break; 1704 int must_read = 1;
993 } 1705
994 ppos = ppos->next; 1706 int64_t seek_position;
1707 struct IpcHeader hdr;
1708 char *mime_type;
1709 char *data;
1710
1711 while (must_read)
1712 {
1713 read_result = plugin_read (plugin, &code, 1);
1714 if (read_result < 1)
1715 return -1;
1716 switch (code)
1717 {
1718 case MESSAGE_DONE: /* Done */
1719 plugin->seek_request = -1;
1720 must_read = 0;
1721 break;
1722 case MESSAGE_SEEK: /* Seek */
1723 read_result = plugin_read (plugin, (unsigned char *) &seek_position, sizeof (int64_t));
1724 if (read_result < sizeof (int64_t))
1725 return -1;
1726 plugin->seek_request = seek_position;
1727 must_read = 0;
1728 break;
1729 case MESSAGE_META: /* Meta */
1730 read_result = plugin_read (plugin, (unsigned char *) &hdr, sizeof (hdr));
1731 if (read_result < sizeof (hdr)) /* FIXME: check hdr for sanity */
1732 return -1;
1733 mime_type = malloc (hdr.mime_len + 1);
1734 if (mime_type == NULL)
1735 return -1;
1736 read_result = plugin_read (plugin, (unsigned char *) mime_type, hdr.mime_len);
1737 if (read_result < hdr.mime_len)
1738 return -1;
1739 mime_type[hdr.mime_len] = '\0';
1740 data = malloc (hdr.data_len);
1741 if (data == NULL)
1742 {
1743 free (mime_type);
1744 return -1;
1745 }
1746 read_result = plugin_read (plugin, (unsigned char *) data, hdr.data_len);
1747 if (read_result < hdr.data_len)
1748 {
1749 free (mime_type);
1750 free (data);
1751 return -1;
1752 }
1753 read_result = proc (proc_cls, plugin->short_libname, hdr.meta_type, hdr.meta_format, mime_type, data, hdr.data_len);
1754 free (mime_type);
1755 free (data);
1756 if (read_result != 0)
1757 return 1;
1758 break;
1759 default:
1760 return -1;
995 } 1761 }
996 ptr = NULL; 1762 }
997 tptr = NULL; 1763 return 0;
998 if (want_shm) 1764}
1765
1766#if !WINDOWS
1767static int
1768wait_for_reply (struct EXTRACTOR_PluginList *plugins, EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
1769{
1770 int ready;
1771 int result;
1772 struct timeval tv;
1773 fd_set to_check;
1774 int highest = 0;
1775 int read_result;
1776 struct EXTRACTOR_PluginList *ppos;
1777
1778 FD_ZERO (&to_check);
1779
1780 for (ppos = plugins; NULL != ppos; ppos = ppos->next)
1781 {
1782 switch (ppos->flags)
999 { 1783 {
1000 if (size > MAX_READ) 1784 case EXTRACTOR_OPTION_DEFAULT_POLICY:
1001 size = MAX_READ; 1785 case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART:
1002 if (0 == make_shm (0, 1786 if (ppos->seek_request == -1)
1003 &ptr, 1787 continue;
1004#ifndef WINDOWS 1788 FD_SET (ppos->cpipe_out, &to_check);
1005 &shmid, 1789 if (highest < ppos->cpipe_out)
1006#else 1790 highest = ppos->cpipe_out;
1007 &map, 1791 break;
1008#endif 1792 case EXTRACTOR_OPTION_IN_PROCESS:
1009 fn, sizeof(fn), size)) 1793 break;
1010 { 1794 case EXTRACTOR_OPTION_DISABLED:
1011 memcpy (ptr, data, size); 1795 break;
1012 if ( (tdata != NULL) &&
1013 (0 == make_shm (1,
1014 &tptr,
1015#ifndef WINDOWS
1016 &tshmid,
1017#else
1018 &tmap,
1019#endif
1020 tfn, sizeof(tfn), tsize)) )
1021 {
1022 memcpy (tptr, tdata, tsize);
1023 }
1024 else
1025 {
1026 tptr = NULL;
1027 }
1028 }
1029 else
1030 {
1031 want_shm = 0;
1032 }
1033 } 1796 }
1034 ppos = plugins; 1797 }
1035 while (NULL != ppos) 1798
1799 tv.tv_sec = 10;
1800 tv.tv_usec = 0;
1801 ready = select (highest + 1, &to_check, NULL, NULL, &tv);
1802 if (ready <= 0)
1803 /* an error or timeout -> something's wrong or all plugins hung up */
1804 return -1;
1805
1806 result = 0;
1807 for (ppos = plugins; NULL != ppos; ppos = ppos->next)
1808 {
1809 switch (ppos->flags)
1036 { 1810 {
1037 flags = ppos->flags; 1811 case EXTRACTOR_OPTION_DEFAULT_POLICY:
1038 if (! want_shm) 1812 case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART:
1039 flags = EXTRACTOR_OPTION_IN_PROCESS; 1813 if (ppos->seek_request == -1)
1040 switch (flags) 1814 continue;
1041 { 1815 if (FD_ISSET (ppos->cpipe_out, &to_check))
1042 case EXTRACTOR_OPTION_DEFAULT_POLICY: 1816 {
1043 if (0 != extract_oop (ppos, (tptr != NULL) ? tsize : size, fn, 1817 read_result = receive_reply (ppos, proc, proc_cls);
1044 (tptr != NULL) ? tfn : NULL, 1818 if (read_result < 0)
1045 proc, proc_cls)) 1819 {
1046 { 1820 stop_process (ppos);
1047 ppos = NULL; 1821 }
1048 break; 1822 result += 1;
1049 } 1823 }
1050#ifndef WINDOWS 1824 break;
1051 if (ppos->cpid == -1) 1825 case EXTRACTOR_OPTION_IN_PROCESS:
1826 break;
1827 case EXTRACTOR_OPTION_DISABLED:
1828 break;
1829 }
1830 }
1831 return result;
1832}
1052#else 1833#else
1053 if (ppos->hProcess == INVALID_HANDLE_VALUE) 1834static int
1054#endif 1835wait_for_reply (struct EXTRACTOR_PluginList *plugins, EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
1055 { 1836{
1056 start_process (ppos); 1837 int result;
1057 if (0 != extract_oop (ppos, (tptr != NULL) ? tsize : size, fn, 1838 DWORD ms;
1058 (tptr != NULL) ? tfn : NULL, 1839 DWORD first_ready;
1059 proc, proc_cls)) 1840 DWORD dwresult;
1060 { 1841 DWORD bytes_read;
1061 ppos = NULL; 1842 BOOL bresult;
1062 break; 1843 int i;
1063 } 1844 HANDLE events[MAXIMUM_WAIT_OBJECTS];
1064 } 1845
1065 break; 1846
1066 case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART: 1847 struct EXTRACTOR_PluginList *ppos;
1067 if (0 != extract_oop (ppos, (tptr != NULL) ? tsize : size, fn, 1848
1068 (tptr != NULL) ? tfn : NULL, 1849 i = 0;
1069 proc, proc_cls)) 1850 for (ppos = plugins; NULL != ppos; ppos = ppos->next)
1070 { 1851 {
1071 ppos = NULL; 1852 if (i == MAXIMUM_WAIT_OBJECTS)
1072 break; 1853 return -1;
1073 } 1854 if (ppos->seek_request == -1)
1074 break; 1855 continue;
1075 case EXTRACTOR_OPTION_IN_PROCESS: 1856 switch (ppos->flags)
1076 want_tail = ( (ppos->specials != NULL) && 1857 {
1077 (NULL != strstr (ppos->specials, 1858 case EXTRACTOR_OPTION_DEFAULT_POLICY:
1078 "want-tail"))); 1859 case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART:
1079 if (NULL == ppos->extractMethod) 1860 if (WaitForSingleObject (ppos->ov_read.hEvent, 0) == WAIT_OBJECT_0)
1080 plugin_load (ppos); 1861 {
1081 if ( ( (ppos->specials == NULL) || 1862 ResetEvent (ppos->ov_read.hEvent);
1082 (NULL == strstr (ppos->specials, 1863 bresult = ReadFile (ppos->cpipe_out, &i, 0, &bytes_read, &ppos->ov_read);
1083 "oop-only")) ) ) 1864 if (bresult == TRUE)
1084 { 1865 {
1085 if (want_tail) 1866 SetEvent (ppos->ov_read.hEvent);
1086 { 1867 }
1087 if ( (NULL != ppos->extractMethod) && 1868 else
1088 (tdata != NULL) && 1869 {
1089 (0 != ppos->extractMethod (tdata, 1870 DWORD err = GetLastError ();
1090 tsize, 1871 if (err != ERROR_IO_PENDING)
1091 proc, 1872 SetEvent (ppos->ov_read.hEvent);
1092 proc_cls, 1873 }
1093 ppos->plugin_options)) ) 1874 }
1094 { 1875 events[i] = ppos->ov_read.hEvent;
1095 ppos = NULL; 1876 i++;
1096 break; 1877 break;
1097 } 1878 case EXTRACTOR_OPTION_IN_PROCESS:
1098 } 1879 break;
1099 else 1880 case EXTRACTOR_OPTION_DISABLED:
1100 { 1881 break;
1101 if ( (NULL != ppos->extractMethod) &&
1102 (0 != ppos->extractMethod (data,
1103 size,
1104 proc,
1105 proc_cls,
1106 ppos->plugin_options)) )
1107 {
1108 ppos = NULL;
1109 break;
1110 }
1111 }
1112 }
1113 break;
1114 case EXTRACTOR_OPTION_DISABLED:
1115 break;
1116 }
1117 if (ppos == NULL)
1118 break;
1119 ppos = ppos->next;
1120 } 1882 }
1121 if (want_shm) 1883 }
1884
1885 ms = 10000;
1886 first_ready = WaitForMultipleObjects (i, events, FALSE, ms);
1887 if (first_ready == WAIT_TIMEOUT || first_ready == WAIT_FAILED)
1888 /* an error or timeout -> something's wrong or all plugins hung up */
1889 return -1;
1890
1891 i = 0;
1892 result = 0;
1893 for (ppos = plugins; NULL != ppos; ppos = ppos->next)
1894 {
1895 int read_result;
1896 switch (ppos->flags)
1122 { 1897 {
1123#ifndef WINDOWS 1898 case EXTRACTOR_OPTION_DEFAULT_POLICY:
1124 if (NULL != ptr) 1899 case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART:
1125 munmap (ptr, size); 1900 if (ppos->seek_request == -1)
1126 if (shmid != -1) 1901 continue;
1127 close (shmid); 1902 if (i < first_ready)
1128 shm_unlink (fn); 1903 {
1129 if (NULL != tptr) 1904 i += 1;
1130 { 1905 continue;
1131 munmap (tptr, tsize); 1906 }
1132 shm_unlink (tfn); 1907 dwresult = WaitForSingleObject (ppos->ov_read.hEvent, 0);
1133 if (tshmid != -1) 1908 read_result = 0;
1134 close (tshmid); 1909 if (dwresult == WAIT_OBJECT_0)
1135 } 1910 {
1136#else 1911 read_result = receive_reply (ppos, proc, proc_cls);
1137 UnmapViewOfFile (ptr); 1912 result += 1;
1138 CloseHandle (map); 1913 }
1139 if (tptr != NULL) 1914 if (dwresult == WAIT_FAILED || read_result < 0)
1140 { 1915 {
1141 UnmapViewOfFile (tptr); 1916 stop_process (ppos);
1142 CloseHandle (tmap); 1917 if (dwresult == WAIT_FAILED)
1143 } 1918 result += 1;
1919 }
1920 i++;
1921 break;
1922 case EXTRACTOR_OPTION_IN_PROCESS:
1923 break;
1924 case EXTRACTOR_OPTION_DISABLED:
1925 break;
1926 }
1927 }
1928 return result;
1929}
1930
1144#endif 1931#endif
1932
1933static int64_t
1934seek_to_new_position (struct EXTRACTOR_PluginList *plugins, int fd, int64_t fsize, int64_t current_position)
1935{
1936 int64_t min_pos = fsize;
1937 struct EXTRACTOR_PluginList *ppos;
1938 for (ppos = plugins; NULL != ppos; ppos = ppos->next)
1939 {
1940 switch (ppos->flags)
1941 {
1942 case EXTRACTOR_OPTION_DEFAULT_POLICY:
1943 case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART:
1944 case EXTRACTOR_OPTION_IN_PROCESS:
1945 if (ppos->seek_request > 0 && ppos->seek_request >= current_position &&
1946 ppos->seek_request <= min_pos)
1947 min_pos = ppos->seek_request;
1948 break;
1949 case EXTRACTOR_OPTION_DISABLED:
1950 break;
1145 } 1951 }
1952 }
1953 if (min_pos >= fsize)
1954 return -1;
1955#if WINDOWS
1956 _lseeki64 (fd, min_pos, SEEK_SET);
1957#elif !HAVE_SEEK64
1958 lseek64 (fd, min_pos, SEEK_SET);
1959#else
1960 if (min_pos >= INT_MAX)
1961 return -1;
1962 lseek (fd, (ssize_t) min_pos, SEEK_SET);
1963#endif
1964 return min_pos;
1146} 1965}
1147 1966
1967static void
1968load_in_process_plugin (struct EXTRACTOR_PluginList *plugin)
1969{
1970 switch (plugin->flags)
1971 {
1972 case EXTRACTOR_OPTION_DEFAULT_POLICY:
1973 case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART:
1974 case EXTRACTOR_OPTION_DISABLED:
1975 break;
1976 case EXTRACTOR_OPTION_IN_PROCESS:
1977 plugin_load (plugin);
1978 break;
1979 }
1980}
1148 1981
1149/** 1982/**
1150 * If the given data is compressed using gzip or bzip2, decompress 1983 * Extract keywords using the given set of plugins.
1151 * it. Run 'extract' on the decompressed contents (or the original
1152 * contents if they were not compressed).
1153 * 1984 *
1154 * @param plugins the list of plugins to use 1985 * @param plugins the list of plugins to use
1155 * @param data data to process, never NULL 1986 * @param data data to process, or NULL if fds is not -1
1156 * @param size number of bytes in data 1987 * @param fd file to read data from, or -1 if data is not NULL
1157 * @param tdata end of file data, or NULL 1988 * @param fsize size of data or size of file
1158 * @param tsize number of bytes in tdata 1989 * @param buffer a buffer with data alteady read from the file (if fd != -1)
1990 * @param buffer_size size of buffer
1159 * @param proc function to call for each meta data item found 1991 * @param proc function to call for each meta data item found
1160 * @param proc_cls cls argument to proc 1992 * @param proc_cls cls argument to proc
1161 */ 1993 */
1162static void 1994static void
1163decompress_and_extract (struct EXTRACTOR_PluginList *plugins, 1995do_extract (struct EXTRACTOR_PluginList *plugins, const char *data, int fd, int64_t fsize, void *buffer, size_t buffer_size, EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
1164 const unsigned char * data, 1996{
1165 size_t size, 1997 int shm_result;
1166 const char * tdata, 1998 unsigned char *shm_ptr;
1167 size_t tsize, 1999#if !WINDOWS
1168 EXTRACTOR_MetaDataProcessor proc, 2000 int shm_id;
1169 void *proc_cls) { 2001#else
1170 unsigned char * buf; 2002 HANDLE map_handle;
1171 unsigned char * rbuf;
1172 size_t dsize;
1173#if HAVE_ZLIB
1174 z_stream strm;
1175 int ret;
1176 size_t pos;
1177#endif 2003#endif
1178#if HAVE_LIBBZ2 2004 char shm_name[MAX_SHM_NAME + 1];
1179 bz_stream bstrm; 2005
1180 int bret; 2006 struct EXTRACTOR_PluginList *ppos;
1181 size_t bpos; 2007
2008 int64_t position = 0;
2009 size_t map_size;
2010 ssize_t read_result;
2011 int kill_plugins = 0;
2012
2013 map_size = (fd == -1) ? fsize : MAX_READ;
2014
2015 /* Make a shared memory object. Even if we're running in-process. Simpler that way */
2016#if !WINDOWS
2017 shm_result = make_shm_posix ((void **) &shm_ptr, &shm_id, shm_name, MAX_SHM_NAME,
2018 map_size);
2019#else
2020 shm_result = make_shm_w32 ((void **) &shm_ptr, &map_handle, shm_name, MAX_SHM_NAME,
2021 map_size);
1182#endif 2022#endif
2023 if (shm_result != 0)
2024 return;
1183 2025
1184 buf = NULL; 2026 /* This three-loops-instead-of-one construction is intended to increase parallelism */
1185 dsize = 0; 2027 for (ppos = plugins; NULL != ppos; ppos = ppos->next)
1186#if HAVE_ZLIB 2028 start_process (ppos);
1187 /* try gzip decompression first */ 2029
1188 if ( (size >= 12) && 2030 for (ppos = plugins; NULL != ppos; ppos = ppos->next)
1189 (data[0] == 0x1f) && 2031 load_in_process_plugin (ppos);
1190 (data[1] == 0x8b) && 2032
1191 (data[2] == 0x08) ) 2033 for (ppos = plugins; NULL != ppos; ppos = ppos->next)
2034 write_plugin_data (ppos);
2035
2036 for (ppos = plugins; NULL != ppos; ppos = ppos->next)
2037 init_plugin_state (ppos, shm_name, fsize);
2038
2039 while (1)
2040 {
2041 int plugins_not_ready = 0;
2042 if (fd != -1)
1192 { 2043 {
1193 /* Process gzip header */ 2044 /* fill the share buffer with data from the file */
1194 unsigned int gzip_header_length = 10; 2045 if (buffer_size > 0)
1195 2046 memcpy (shm_ptr, buffer, buffer_size);
1196 if (data[3] & 0x4) /* FEXTRA set */ 2047 read_result = READ (fd, &shm_ptr[buffer_size], MAX_READ - buffer_size);
1197 gzip_header_length += 2 + (unsigned) (data[10] & 0xff) 2048 if (read_result <= 0)
1198 + (((unsigned) (data[11] & 0xff)) * 256); 2049 break;
1199
1200 if (data[3] & 0x8) /* FNAME set */
1201 {
1202 const unsigned char * cptr = data + gzip_header_length;
1203 /* stored file name is here */
1204 while (cptr < data + size)
1205 {
1206 if ('\0' == *cptr)
1207 break;
1208 cptr++;
1209 }
1210 if (0 != proc (proc_cls,
1211 "<zlib>",
1212 EXTRACTOR_METATYPE_FILENAME,
1213 EXTRACTOR_METAFORMAT_C_STRING,
1214 "text/plain",
1215 (const char*) (data + gzip_header_length),
1216 cptr - (data + gzip_header_length)))
1217 return; /* done */
1218 gzip_header_length = (cptr - data) + 1;
1219 }
1220 if (data[3] & 0x16) /* FCOMMENT set */
1221 {
1222 const unsigned char * cptr = data + gzip_header_length;
1223 /* stored comment is here */
1224 while (cptr < data + size)
1225 {
1226 if('\0' == *cptr)
1227 break;
1228 cptr ++;
1229 }
1230 if (0 != proc (proc_cls,
1231 "<zlib>",
1232 EXTRACTOR_METATYPE_COMMENT,
1233 EXTRACTOR_METAFORMAT_C_STRING,
1234 "text/plain",
1235 (const char*) (data + gzip_header_length),
1236 cptr - (data + gzip_header_length)))
1237 return; /* done */
1238 gzip_header_length = (cptr - data) + 1;
1239 }
1240 if(data[3] & 0x2) /* FCHRC set */
1241 gzip_header_length += 2;
1242 memset(&strm,
1243 0,
1244 sizeof(z_stream));
1245#ifdef ZLIB_VERNUM
1246 gzip_header_length = 0;
1247#endif
1248 if (size > gzip_header_length)
1249 {
1250 strm.next_in = (Bytef*) data + gzip_header_length;
1251 strm.avail_in = size - gzip_header_length;
1252 }
1253 else 2050 else
1254 { 2051 map_size = read_result + buffer_size;
1255 strm.next_in = (Bytef*) data; 2052 if (buffer_size > 0)
1256 strm.avail_in = 0; 2053 buffer_size = 0;
1257 }
1258 strm.total_in = 0;
1259 strm.zalloc = NULL;
1260 strm.zfree = NULL;
1261 strm.opaque = NULL;
1262
1263 /*
1264 * note: maybe plain inflateInit(&strm) is adequate,
1265 * it looks more backward-compatible also ;
1266 *
1267 * ZLIB_VERNUM isn't defined by zlib version 1.1.4 ;
1268 * there might be a better check.
1269 */
1270 if (Z_OK == inflateInit2(&strm,
1271#ifdef ZLIB_VERNUM
1272 15 + 32
1273#else
1274 -MAX_WBITS
1275#endif
1276 )) {
1277 dsize = 2 * size;
1278 if (dsize > MAX_DECOMPRESS)
1279 dsize = MAX_DECOMPRESS;
1280 buf = malloc(dsize);
1281 pos = 0;
1282 if (buf == NULL)
1283 {
1284 inflateEnd(&strm);
1285 }
1286 else
1287 {
1288 strm.next_out = (Bytef*) buf;
1289 strm.avail_out = dsize;
1290 do
1291 {
1292 ret = inflate(&strm,
1293 Z_SYNC_FLUSH);
1294 if (ret == Z_OK)
1295 {
1296 if (dsize == MAX_DECOMPRESS)
1297 break;
1298 pos += strm.total_out;
1299 strm.total_out = 0;
1300 dsize *= 2;
1301 if (dsize > MAX_DECOMPRESS)
1302 dsize = MAX_DECOMPRESS;
1303 rbuf = realloc(buf, dsize);
1304 if (rbuf == NULL)
1305 {
1306 free (buf);
1307 buf = NULL;
1308 break;
1309 }
1310 buf = rbuf;
1311 strm.next_out = (Bytef*) &buf[pos];
1312 strm.avail_out = dsize - pos;
1313 }
1314 else if (ret != Z_STREAM_END)
1315 {
1316 /* error */
1317 free(buf);
1318 buf = NULL;
1319 }
1320 } while ( (buf != NULL) &&
1321 (ret != Z_STREAM_END) );
1322 dsize = pos + strm.total_out;
1323 inflateEnd(&strm);
1324 if ( (dsize == 0) &&
1325 (buf != NULL) )
1326 {
1327 free(buf);
1328 buf = NULL;
1329 }
1330 }
1331 }
1332 } 2054 }
1333#endif 2055 for (ppos = plugins; NULL != ppos; ppos = ppos->next)
1334 2056 plugins_not_ready += give_shm_to_plugin (ppos, position, map_size);
1335#if HAVE_LIBBZ2 2057 for (ppos = plugins; NULL != ppos; ppos = ppos->next)
1336 if ( (size >= 4) && 2058 ask_in_process_plugin (ppos, position, shm_ptr, proc, proc_cls);
1337 (data[0] == 'B') && 2059 while (plugins_not_ready > 0 && !kill_plugins)
1338 (data[1] == 'Z') &&
1339 (data[2] == 'h') )
1340 { 2060 {
1341 /* now try bz2 decompression */ 2061 int ready = wait_for_reply (plugins, proc, proc_cls);
1342 memset(&bstrm, 2062 if (ready <= 0)
1343 0, 2063 kill_plugins = 1;
1344 sizeof(bz_stream)); 2064 plugins_not_ready -= ready;
1345 bstrm.next_in = (char*) data;
1346 bstrm.avail_in = size;
1347 bstrm.total_in_lo32 = 0;
1348 bstrm.total_in_hi32 = 0;
1349 bstrm.bzalloc = NULL;
1350 bstrm.bzfree = NULL;
1351 bstrm.opaque = NULL;
1352 if ( (buf == NULL) &&
1353 (BZ_OK == BZ2_bzDecompressInit(&bstrm,
1354 0,
1355 0)) )
1356 {
1357 dsize = 2 * size;
1358 if (dsize > MAX_DECOMPRESS)
1359 dsize = MAX_DECOMPRESS;
1360 buf = malloc(dsize);
1361 bpos = 0;
1362 if (buf == NULL)
1363 {
1364 BZ2_bzDecompressEnd(&bstrm);
1365 }
1366 else
1367 {
1368 bstrm.next_out = (char*) buf;
1369 bstrm.avail_out = dsize;
1370 do {
1371 bret = BZ2_bzDecompress(&bstrm);
1372 if (bret == Z_OK)
1373 {
1374 if (dsize == MAX_DECOMPRESS)
1375 break;
1376 bpos += bstrm.total_out_lo32;
1377 bstrm.total_out_lo32 = 0;
1378 dsize *= 2;
1379 if (dsize > MAX_DECOMPRESS)
1380 dsize = MAX_DECOMPRESS;
1381 rbuf = realloc(buf, dsize);
1382 if (rbuf == NULL)
1383 {
1384 free (buf);
1385 buf = NULL;
1386 break;
1387 }
1388 buf = rbuf;
1389 bstrm.next_out = (char*) &buf[bpos];
1390 bstrm.avail_out = dsize - bpos;
1391 }
1392 else if (bret != BZ_STREAM_END)
1393 {
1394 /* error */
1395 free(buf);
1396 buf = NULL;
1397 }
1398 } while ( (buf != NULL) &&
1399 (bret != BZ_STREAM_END) );
1400 dsize = bpos + bstrm.total_out_lo32;
1401 BZ2_bzDecompressEnd(&bstrm);
1402 if ( (dsize == 0) &&
1403 (buf != NULL) )
1404 {
1405 free(buf);
1406 buf = NULL;
1407 }
1408 }
1409 }
1410 } 2065 }
1411#endif 2066 if (kill_plugins)
1412 if (buf != NULL) 2067 break;
2068 if (fd != -1)
1413 { 2069 {
1414 data = buf; 2070 position += map_size;
1415 size = dsize; 2071 position = seek_to_new_position (plugins, fd, fsize, position);
2072 if (position < 0)
2073 break;
1416 } 2074 }
1417 extract (plugins, 2075 else
1418 (const char*) data, 2076 break;
1419 size, 2077 }
1420 tdata,
1421 tsize,
1422 proc,
1423 proc_cls);
1424 if (buf != NULL)
1425 free(buf);
1426 errno = 0; /* kill transient errors */
1427}
1428
1429 2078
1430/** 2079 if (kill_plugins)
1431 * Open a file 2080 for (ppos = plugins; NULL != ppos; ppos = ppos->next)
1432 */ 2081 stop_process (ppos);
1433static int file_open(const char *filename, int oflag, ...) 2082 for (ppos = plugins; NULL != ppos; ppos = ppos->next)
1434{ 2083 discard_plugin_state (ppos);
1435 int mode;
1436 const char *fn;
1437#ifdef MINGW
1438 char szFile[_MAX_PATH + 1];
1439 long lRet;
1440 2084
1441 if ((lRet = plibc_conv_to_win_path(filename, szFile)) != ERROR_SUCCESS) 2085#if WINDOWS
1442 { 2086 destroy_shm_w32 (shm_ptr, map_handle);
1443 errno = ENOENT;
1444 SetLastError(lRet);
1445 return -1;
1446 }
1447 fn = szFile;
1448#else 2087#else
1449 fn = filename; 2088 destroy_shm_posix (shm_ptr, shm_id, (fd == -1) ? fsize : MAX_READ, shm_name);
1450#endif
1451 mode = 0;
1452#ifdef MINGW
1453 /* Set binary mode */
1454 mode |= O_BINARY;
1455#endif 2089#endif
1456 return OPEN(fn, oflag, mode);
1457} 2090}
1458 2091
1459 2092
1460#ifndef O_LARGEFILE
1461#define O_LARGEFILE 0
1462#endif
1463
1464
1465/** 2093/**
1466 * Extract keywords from a file using the given set of plugins. 2094 * Extract keywords from a file using the given set of plugins.
1467 * If needed, opens the file and loads its data (via mmap). Then 2095 * If needed, opens the file and loads its data (via mmap). Then
@@ -1478,93 +2106,152 @@ static int file_open(const char *filename, int oflag, ...)
1478 */ 2106 */
1479void 2107void
1480EXTRACTOR_extract (struct EXTRACTOR_PluginList *plugins, 2108EXTRACTOR_extract (struct EXTRACTOR_PluginList *plugins,
1481 const char *filename, 2109 const char *filename,
1482 const void *data, 2110 const void *data,
1483 size_t size, 2111 size_t size,
1484 EXTRACTOR_MetaDataProcessor proc, 2112 EXTRACTOR_MetaDataProcessor proc,
1485 void *proc_cls) 2113 void *proc_cls)
1486{ 2114{
1487 int fd; 2115 int fd = -1;
1488 void * buffer; 2116 struct stat64 fstatbuf;
1489 void * tbuffer; 2117 int64_t fsize = 0;
1490 struct stat fstatbuf; 2118 int memory_only = 1;
1491 size_t fsize; 2119 int compression_type = -1;
1492 size_t tsize; 2120 void *buffer = NULL;
1493 int eno; 2121 size_t buffer_size;
1494 off_t offset; 2122 int decompression_result;
1495 long pg; 2123
1496#ifdef WINDOWS 2124 /* If data is not given, then we need to read it from the file. Try opening it */
1497 SYSTEM_INFO sys; 2125 if ((data == NULL) &&
1498#endif 2126 (filename != NULL) &&
1499 2127 (0 == STAT64(filename, &fstatbuf)) &&
1500 fd = -1; 2128 (!S_ISDIR(fstatbuf.st_mode)) &&
1501 buffer = NULL; 2129 (-1 != (fd = file_open (filename,
1502 if ( (data == NULL) && 2130 O_RDONLY | O_LARGEFILE))))
1503 (filename != NULL) && 2131 {
1504 (0 == STAT(filename, &fstatbuf)) && 2132 /* Empty files are of no interest */
1505 (!S_ISDIR(fstatbuf.st_mode)) && 2133 fsize = fstatbuf.st_size;
1506 (-1 != (fd = file_open (filename, 2134 if (fsize == 0)
1507 O_RDONLY | O_LARGEFILE))) ) 2135 {
1508 { 2136 close(fd);
1509 fsize = (fstatbuf.st_size > 0xFFFFFFFF) ? 0xFFFFFFFF : fstatbuf.st_size; 2137 return;
1510 if (fsize == 0)
1511 {
1512 close(fd);
1513 return;
1514 }
1515 if (fsize > MAX_READ)
1516 fsize = MAX_READ;
1517 buffer = MMAP(NULL, fsize, PROT_READ, MAP_PRIVATE, fd, 0);
1518 if ( (buffer == NULL) || (buffer == (void *) -1) )
1519 {
1520 eno = errno;
1521 close(fd);
1522 errno = eno;
1523 return;
1524 }
1525 } 2138 }
1526 if ( (buffer == NULL) && 2139 /* File is too big -> can't read it into memory */
1527 (data == NULL) ) 2140 if (fsize > MAX_READ)
2141 memory_only = 0;
2142 }
2143
2144 /* Data is not given, and we've failed to open the file with data -> exit */
2145 if ((fsize == 0) && (data == NULL))
2146 return;
2147 /* fsize is now size of the data OR size of the file */
2148 if (data != NULL)
2149 fsize = size;
2150
2151 errno = 0;
2152 /* Peek at first few bytes of the file (or of the data), and see if it's compressed.
2153 * If data is NULL, buffer is allocated by the function and holds the first few bytes
2154 * of the file, buffer_size is set too.
2155 */
2156 compression_type = get_compression_type (data, fd, fsize, &buffer, &buffer_size);
2157 if (compression_type < 0)
2158 {
2159 /* errno is set by get_compression_type () */
2160 if (fd != -1)
2161 close (fd);
1528 return; 2162 return;
1529 /* for footer extraction */ 2163 }
1530 tsize = 0; 2164 if (compression_type > 0)
1531 tbuffer = NULL; 2165 {
1532 if ( (data == NULL) && 2166 /* Don't assume that MAX_DECOMPRESS < MAX_READ */
1533 (fstatbuf.st_size > fsize) && 2167 if ((fsize > MAX_DECOMPRESS) || (fsize > MAX_READ))
1534 (fstatbuf.st_size > MAX_READ) ) 2168 {
2169 /* File or data is to big to be decompressed in-memory (the only kind of decompression we do) */
2170 errno = EFBIG;
2171 if (fd != -1)
2172 close (fd);
2173 if (buffer != NULL)
2174 free (buffer);
2175 return;
2176 }
2177 /* Decompress data (or file contents + what we've read so far. Either way it writes a new
2178 * pointer to buffer, sets buffer_size, and frees the old buffer (if it wasn't NULL).
2179 * In case of failure it cleans up the buffer after itself.
2180 * Will also report compression-related metadata to the caller.
2181 */
2182 decompression_result = try_to_decompress (data, fd, fsize, compression_type, &buffer, &buffer_size, proc, proc_cls);
2183 if (decompression_result != 0)
2184 {
2185 /* Buffer is taken care of already */
2186 close (fd);
2187 errno = EILSEQ;
2188 return;
2189 }
2190 else
1535 { 2191 {
1536 pg = SYSCONF (_SC_PAGE_SIZE); 2192 close (fd);
1537 if ( (pg > 0) && 2193 fd = -1;
1538 (pg < MAX_READ) )
1539 {
1540 offset = (1 + (fstatbuf.st_size - MAX_READ) / pg) * pg;
1541 if (offset < fstatbuf.st_size)
1542 {
1543 tsize = fstatbuf.st_size - offset;
1544 tbuffer = MMAP (NULL, tsize, PROT_READ, MAP_PRIVATE, fd, offset);
1545 if ( (tbuffer == NULL) || (tbuffer == (void *) -1) )
1546 {
1547 tsize = 0;
1548 tbuffer = NULL;
1549 }
1550 }
1551 }
1552 } 2194 }
1553 decompress_and_extract (plugins, 2195 }
1554 buffer != NULL ? buffer : data, 2196
1555 buffer != NULL ? fsize : size, 2197 /* Now we either have a non-NULL data of fsize bytes
1556 tbuffer, 2198 * OR a valid fd to read from and a small buffer of buffer_size bytes
1557 tsize, 2199 * OR an invalid fd and a big buffer of buffer_size bytes
1558 proc, 2200 * Simplify this situation a bit:
1559 proc_cls); 2201 */
2202 if ((data == NULL) && (fd == -1) && (buffer_size > 0))
2203 {
2204 data = (const void *) buffer;
2205 fsize = buffer_size;
2206 }
2207
2208 /* Now we either have a non-NULL data of fsize bytes
2209 * OR a valid fd to read from and a small buffer of buffer_size bytes
2210 * and we might need to free the buffer later in either case
2211 */
2212
2213 /* do_extract () might set errno itself, but from our point of view everything is OK */
2214 errno = 0;
2215
2216 do_extract (plugins, data, fd, fsize, buffer, buffer_size, proc, proc_cls);
2217
1560 if (buffer != NULL) 2218 if (buffer != NULL)
1561 MUNMAP (buffer, fsize); 2219 free (buffer);
1562 if (tbuffer != NULL)
1563 MUNMAP (tbuffer, tsize);
1564 if (-1 != fd) 2220 if (-1 != fd)
1565 close(fd); 2221 close(fd);
1566} 2222}
1567 2223
2224
2225#if WINDOWS
2226void CALLBACK
2227RundllEntryPoint (HWND hwnd,
2228 HINSTANCE hinst,
2229 LPSTR lpszCmdLine,
2230 int nCmdShow)
2231{
2232 intptr_t in_h;
2233 intptr_t out_h;
2234 int in, out;
2235
2236 sscanf(lpszCmdLine, "%lu %lu", &in_h, &out_h);
2237 in = _open_osfhandle (in_h, _O_RDONLY);
2238 out = _open_osfhandle (out_h, 0);
2239 setmode (in, _O_BINARY);
2240 setmode (out, _O_BINARY);
2241 process_requests (read_plugin_data (in),
2242 in, out);
2243}
2244
2245void CALLBACK
2246RundllEntryPointA (HWND hwnd,
2247 HINSTANCE hinst,
2248 LPSTR lpszCmdLine,
2249 int nCmdShow)
2250{
2251 return RundllEntryPoint(hwnd, hinst, lpszCmdLine, nCmdShow);
2252}
2253#endif
2254
1568/** 2255/**
1569 * Initialize gettext and libltdl (and W32 if needed). 2256 * Initialize gettext and libltdl (and W32 if needed).
1570 */ 2257 */
@@ -1579,12 +2266,12 @@ void __attribute__ ((constructor)) EXTRACTOR_ltdl_init() {
1579 if (err > 0) { 2266 if (err > 0) {
1580#if DEBUG 2267#if DEBUG
1581 fprintf(stderr, 2268 fprintf(stderr,
1582 _("Initialization of plugin mechanism failed: %s!\n"), 2269 _("Initialization of plugin mechanism failed: %s!\n"),
1583 lt_dlerror()); 2270 lt_dlerror());
1584#endif 2271#endif
1585 return; 2272 return;
1586 } 2273 }
1587#ifdef MINGW 2274#if WINDOWS
1588 plibc_init("GNU", PACKAGE); 2275 plibc_init("GNU", PACKAGE);
1589#endif 2276#endif
1590} 2277}
@@ -1594,12 +2281,10 @@ void __attribute__ ((constructor)) EXTRACTOR_ltdl_init() {
1594 * Deinit. 2281 * Deinit.
1595 */ 2282 */
1596void __attribute__ ((destructor)) EXTRACTOR_ltdl_fini() { 2283void __attribute__ ((destructor)) EXTRACTOR_ltdl_fini() {
1597#ifdef MINGW 2284#if WINDOWS
1598 plibc_shutdown(); 2285 plibc_shutdown();
1599#endif 2286#endif
1600 lt_dlexit (); 2287 lt_dlexit ();
1601} 2288}
1602 2289
1603
1604
1605/* end of extractor.c */ 2290/* end of extractor.c */
diff --git a/src/main/extractor_plugins.c b/src/main/extractor_plugins.c
index 026e86b..f5c38f0 100644
--- a/src/main/extractor_plugins.c
+++ b/src/main/extractor_plugins.c
@@ -204,15 +204,24 @@ plugin_load (struct EXTRACTOR_PluginList *plugin)
204 plugin->flags = EXTRACTOR_OPTION_DISABLED; 204 plugin->flags = EXTRACTOR_OPTION_DISABLED;
205 return -1; 205 return -1;
206 } 206 }
207 plugin->extractMethod = get_symbol_with_prefix (plugin->libraryHandle, 207 plugin->extract_method = get_symbol_with_prefix (plugin->libraryHandle,
208 "_EXTRACTOR_%s_extract", 208 "_EXTRACTOR_%s_extract_method",
209 plugin->libname, 209 plugin->libname,
210 &plugin->specials); 210 &plugin->specials);
211 if (plugin->extractMethod == NULL) 211 plugin->init_state_method = get_symbol_with_prefix (plugin->libraryHandle,
212 "_EXTRACTOR_%s_init_state_method",
213 plugin->libname,
214 &plugin->specials);
215 plugin->discard_state_method = get_symbol_with_prefix (plugin->libraryHandle,
216 "_EXTRACTOR_%s_discard_state_method",
217 plugin->libname,
218 &plugin->specials);
219 if (plugin->extract_method == NULL || plugin->init_state_method == NULL ||
220 plugin->discard_state_method == NULL)
212 { 221 {
213#if DEBUG 222#if DEBUG
214 fprintf (stderr, 223 fprintf (stderr,
215 "Resolving `extract' method of plugin `%s' failed: %s\n", 224 "Resolving `extract', 'init_state' or 'discard_state' method(s) of plugin `%s' failed: %s\n",
216 plugin->short_libname, 225 plugin->short_libname,
217 lt_dlerror ()); 226 lt_dlerror ());
218#endif 227#endif
@@ -243,8 +252,15 @@ EXTRACTOR_plugin_add (struct EXTRACTOR_PluginList * prev,
243 enum EXTRACTOR_Options flags) 252 enum EXTRACTOR_Options flags)
244{ 253{
245 struct EXTRACTOR_PluginList *result; 254 struct EXTRACTOR_PluginList *result;
255 struct EXTRACTOR_PluginList *i;
246 char *libname; 256 char *libname;
247 257
258 for (i = prev; i != NULL; i = i->next)
259 {
260 if (strcmp (i->short_libname, library) == 0)
261 return prev;
262 }
263
248 libname = find_plugin (library); 264 libname = find_plugin (library);
249 if (libname == NULL) 265 if (libname == NULL)
250 { 266 {
diff --git a/src/main/extractor_plugins.h b/src/main/extractor_plugins.h
index ea0eabb..bea5c2b 100644
--- a/src/main/extractor_plugins.h
+++ b/src/main/extractor_plugins.h
@@ -64,7 +64,9 @@ struct EXTRACTOR_PluginList
64 /** 64 /**
65 * Pointer to the function used for meta data extraction. 65 * Pointer to the function used for meta data extraction.
66 */ 66 */
67 EXTRACTOR_ExtractMethod extractMethod; 67 EXTRACTOR_extract_method extract_method;
68 EXTRACTOR_init_state_method init_state_method;
69 EXTRACTOR_discard_state_method discard_state_method;
68 70
69 /** 71 /**
70 * Options for the plugin. 72 * Options for the plugin.
@@ -84,26 +86,72 @@ struct EXTRACTOR_PluginList
84 enum EXTRACTOR_Options flags; 86 enum EXTRACTOR_Options flags;
85 87
86 /** 88 /**
87 * Process ID of the child process for this plugin. 0 for 89 * Process ID of the child process for this plugin. 0 for none.
88 * none.
89 */ 90 */
90#ifndef WINDOWS 91#if !WINDOWS
91 int cpid; 92 int cpid;
92#else 93#else
93 HANDLE hProcess; 94 HANDLE hProcess;
94#endif 95#endif
95 96
96 /** 97 /**
97 * Pipe used to send information about shared memory segments to 98 * Pipe used to communicate information to the plugin child process.
98 * the child process. NULL if not initialized. 99 * NULL if not initialized.
99 */ 100 */
101#if !WINDOWS
100 FILE *cpipe_in; 102 FILE *cpipe_in;
103#else
104 HANDLE cpipe_in;
105#endif
106
107 /**
108 * A position this plugin wants us to seek to. -1 if it's finished.
109 * Starts at 0;
110 */
111 int64_t seek_request;
112
113#if !WINDOWS
114 int shm_id;
115#else
116 HANDLE map_handle;
117#endif
118
119 void *state;
120
121 int64_t fsize;
122
123 int64_t position;
124
125 unsigned char *shm_ptr;
126
127 size_t map_size;
101 128
102 /** 129 /**
103 * Pipe used to read information about extracted meta data from 130 * Pipe used to read information about extracted meta data from
104 * the child process. -1 if not initialized. 131 * the plugin child process. -1 if not initialized.
105 */ 132 */
133#if !WINDOWS
106 int cpipe_out; 134 int cpipe_out;
135#else
136 HANDLE cpipe_out;
137#endif
138
139#if WINDOWS
140 /**
141 * A structure for overlapped reads on W32.
142 */
143 OVERLAPPED ov_read;
144
145 /**
146 * A structure for overlapped writes on W32.
147 */
148 OVERLAPPED ov_write;
149
150 /**
151 * A write buffer for overlapped writes on W32
152 */
153 unsigned char *ov_write_buffer;
154#endif
107}; 155};
108 156
109/** 157/**
diff --git a/src/plugins/Makefile.am b/src/plugins/Makefile.am
index c489d19..465db7c 100644
--- a/src/plugins/Makefile.am
+++ b/src/plugins/Makefile.am
@@ -1,4 +1,4 @@
1INCLUDES = -I$(top_srcdir)/src/include -I$(top_srcdir)/src/common 1INCLUDES = -I$(top_srcdir)/src/include -I$(top_srcdir)/src/common -I$(top_srcdir)/src/main
2 2
3# install plugins under: 3# install plugins under:
4plugindir = $(libdir)/@RPLUGINDIR@ 4plugindir = $(libdir)/@RPLUGINDIR@
@@ -11,183 +11,23 @@ PLUGINFLAGS = $(makesymbolic) $(LE_PLUGIN_LDFLAGS)
11 11
12SUBDIRS = . 12SUBDIRS = .
13 13
14if HAVE_FFMPEG
15 thumbffmpeg=libextractor_thumbnailffmpeg.la
16endif
17
18if HAVE_LIBRPM
19 rpm=libextractor_rpm.la
20endif
21
22if HAVE_GLIB
23if WITH_GSF
24 ole2=libextractor_ole2.la
25endif
26if HAVE_GTK
27 thumbgtk=libextractor_thumbnailgtk.la
28endif
29endif
30
31if HAVE_QT
32 thumbqt=libextractor_thumbnailqt.la
33 qtflags=-lQtGui -lQtCore -lpthread
34else
35if HAVE_QT4
36 thumbqt=libextractor_thumbnailqt.la
37 qtflags=-lQtGui4 -lQtCore4
38endif
39endif
40
41if HAVE_QT_SVG
42 svgflags = -lQtSvg
43else
44if HAVE_QT_SVG4
45 svgflags = -lQtSvg4
46endif
47endif
48
49if HAVE_CXX
50if HAVE_EXIV2
51 exiv2=libextractor_exiv2.la
52endif
53if HAVE_POPPLER
54 pdf=libextractor_pdf.la
55endif
56endif
57
58if HAVE_MPEG2
59 mpeg = libextractor_mpeg.la
60endif
61
62if HAVE_VORBISFILE
63 ogg = libextractor_ogg.la
64endif
65
66if HAVE_FLAC
67 flac = libextractor_flac.la
68endif
69
70if NEED_VORBIS
71 vorbisflag = -lvorbis
72endif
73
74if NEED_OGG
75 flacoggflag = -logg
76endif
77
78plugin_LTLIBRARIES = \ 14plugin_LTLIBRARIES = \
79 libextractor_applefile.la \
80 libextractor_asf.la \
81 libextractor_deb.la \
82 libextractor_dvi.la \
83 libextractor_elf.la \
84 $(exiv2) \
85 $(flac) \
86 libextractor_flv.la \
87 libextractor_gif.la \
88 libextractor_html.la \
89 libextractor_id3.la \ 15 libextractor_id3.la \
90 libextractor_id3v2.la \ 16 libextractor_id3v2.la \
91 libextractor_id3v23.la \ 17 libextractor_mp3.la
92 libextractor_id3v24.la \
93 libextractor_it.la \
94 libextractor_jpeg.la \
95 libextractor_man.la \
96 libextractor_mime.la \
97 libextractor_mkv.la \
98 libextractor_mp3.la \
99 $(mpeg) \
100 libextractor_nsf.la \
101 libextractor_nsfe.la \
102 libextractor_odf.la \
103 $(ogg) \
104 $(ole2) \
105 $(pdf) \
106 libextractor_png.la \
107 libextractor_ps.la \
108 libextractor_qt.la \
109 libextractor_real.la \
110 libextractor_riff.la \
111 $(rpm) \
112 libextractor_s3m.la \
113 libextractor_sid.la \
114 libextractor_tar.la \
115 $(thumbgtk) \
116 $(thumbqt) \
117 $(thumbffmpeg) \
118 libextractor_tiff.la \
119 libextractor_wav.la \
120 libextractor_xm.la \
121 libextractor_zip.la
122 18
123libextractor_applefile_la_SOURCES = \ 19libextractor_mp3_la_SOURCES = \
124 applefile_extractor.c 20 mp3_extractor.c
125libextractor_applefile_la_LDFLAGS = \ 21libextractor_mp3_la_LDFLAGS = \
126 $(PLUGINFLAGS) 22 $(PLUGINFLAGS)
127libextractor_applefile_la_LIBADD = \ 23libextractor_mp3_la_LIBADD = \
128 $(top_builddir)/src/common/libextractor_common.la \
129 $(LE_LIBINTL)
130
131libextractor_asf_la_SOURCES = \
132 asf_extractor.c
133libextractor_asf_la_LDFLAGS = \
134 $(top_builddir)/src/common/libextractor_common.la \ 24 $(top_builddir)/src/common/libextractor_common.la \
135 $(PLUGINFLAGS)
136
137libextractor_deb_la_SOURCES = \
138 deb_extractor.c
139libextractor_deb_la_LDFLAGS = \
140 $(PLUGINFLAGS)
141libextractor_deb_la_LIBADD = \
142 -lz
143
144libextractor_dvi_la_SOURCES = \
145 dvi_extractor.c
146libextractor_dvi_la_LDFLAGS = \
147 $(PLUGINFLAGS)
148
149libextractor_elf_la_SOURCES = \
150 elf_extractor.c
151libextractor_elf_la_LDFLAGS = \
152 $(PLUGINFLAGS)
153libextractor_elf_la_LIBADD = \
154 $(top_builddir)/src/common/libextractor_common.la
155
156libextractor_exiv2_la_SOURCES = \
157 exiv2_extractor.cc
158libextractor_exiv2_la_LDFLAGS = \
159 $(XTRA_CPPLIBS) $(PLUGINFLAGS)
160libextractor_exiv2_la_LIBADD = \
161 -lexiv2
162
163libextractor_flac_la_SOURCES = \
164 flac_extractor.c
165libextractor_flac_la_LDFLAGS = \
166 $(PLUGINFLAGS)
167libextractor_flac_la_LIBADD = \
168 -lFLAC $(flacoggflag) \
169 $(LE_LIBINTL) 25 $(LE_LIBINTL)
170 26
171libextractor_flv_la_SOURCES = \ 27libextractor_ebml_la_SOURCES = \
172 flv_extractor.c 28 ebml_extractor.c
173libextractor_flv_la_LDFLAGS = \ 29libextractor_ebml_la_LDFLAGS = \
174 $(PLUGINFLAGS) 30 $(PLUGINFLAGS)
175libextractor_flv_la_LIBADD = \
176 $(top_builddir)/src/common/libextractor_common.la
177
178libextractor_gif_la_SOURCES = \
179 gif_extractor.c
180libextractor_gif_la_LDFLAGS = \
181 $(PLUGINFLAGS)
182libextractor_gif_la_LIBADD = \
183 $(top_builddir)/src/common/libextractor_common.la
184
185libextractor_html_la_SOURCES = \
186 html_extractor.c
187libextractor_html_la_LDFLAGS = \
188 $(PLUGINFLAGS)
189libextractor_html_la_LIBADD = \
190 $(top_builddir)/src/common/libextractor_common.la
191 31
192libextractor_id3_la_SOURCES = \ 32libextractor_id3_la_SOURCES = \
193 id3_extractor.c 33 id3_extractor.c
@@ -204,211 +44,4 @@ libextractor_id3v2_la_LDFLAGS = \
204libextractor_id3v2_la_LIBADD = \ 44libextractor_id3v2_la_LIBADD = \
205 $(top_builddir)/src/common/libextractor_common.la 45 $(top_builddir)/src/common/libextractor_common.la
206 46
207libextractor_id3v23_la_SOURCES = \
208 id3v23_extractor.c
209libextractor_id3v23_la_LDFLAGS = \
210 $(PLUGINFLAGS)
211libextractor_id3v23_la_LIBADD = \
212 $(top_builddir)/src/common/libextractor_common.la
213
214libextractor_id3v24_la_SOURCES = \
215 id3v24_extractor.c
216libextractor_id3v24_la_LDFLAGS = \
217 $(PLUGINFLAGS)
218libextractor_id3v24_la_LIBADD = \
219 $(top_builddir)/src/common/libextractor_common.la
220
221libextractor_it_la_SOURCES = \
222 it_extractor.c
223libextractor_it_la_LDFLAGS = \
224 $(PLUGINFLAGS)
225
226libextractor_jpeg_la_SOURCES = \
227 jpeg_extractor.c
228libextractor_jpeg_la_LDFLAGS = \
229 $(PLUGINFLAGS)
230libextractor_jpeg_la_LIBADD = \
231 $(LE_LIBINTL)
232
233libextractor_man_la_SOURCES = \
234 man_extractor.c
235libextractor_man_la_LDFLAGS = \
236 $(PLUGINFLAGS)
237libextractor_man_la_LIBADD = \
238 $(LE_LIBINTL)
239
240libextractor_mime_la_SOURCES = \
241 mime_extractor.c
242libextractor_mime_la_LDFLAGS = \
243 $(PLUGINFLAGS)
244
245libextractor_mkv_la_SOURCES = \
246 mkv_extractor.c
247libextractor_mkv_la_LDFLAGS = \
248 $(PLUGINFLAGS)
249
250libextractor_mp3_la_SOURCES = \
251 mp3_extractor.c
252libextractor_mp3_la_LDFLAGS = \
253 $(PLUGINFLAGS)
254libextractor_mp3_la_LIBADD = \
255 $(top_builddir)/src/common/libextractor_common.la \
256 $(LE_LIBINTL)
257
258libextractor_mpeg_la_SOURCES = \
259 mpeg_extractor.c
260libextractor_mpeg_la_LDFLAGS = \
261 $(PLUGINFLAGS)
262libextractor_mpeg_la_LIBADD = \
263 -lmpeg2
264
265libextractor_nsf_la_SOURCES = \
266 nsf_extractor.c
267libextractor_nsf_la_LDFLAGS = \
268 $(PLUGINFLAGS)
269
270libextractor_nsfe_la_SOURCES = \
271 nsfe_extractor.c
272libextractor_nsfe_la_LDFLAGS = \
273 $(PLUGINFLAGS)
274
275libextractor_odf_la_SOURCES = \
276 odf_extractor.c
277libextractor_odf_la_LDFLAGS = \
278 $(PLUGINFLAGS)
279libextractor_odf_la_LIBADD = \
280 $(top_builddir)/src/common/libextractor_common.la \
281 -lz
282
283libextractor_ogg_la_SOURCES = \
284 ogg_extractor.c
285libextractor_ogg_la_LDFLAGS = \
286 $(PLUGINFLAGS)
287libextractor_ogg_la_LIBADD = \
288 -lvorbisfile $(vorbisflag) -logg
289
290libextractor_ole2_la_SOURCES = \
291 ole2_extractor.c
292libextractor_ole2_la_CFLAGS = \
293 $(GSF_CFLAGS)
294libextractor_ole2_la_LIBADD = \
295 $(LIBADD) $(GSF_LIBS) \
296 $(top_builddir)/src/common/libextractor_common.la
297libextractor_ole2_la_LDFLAGS = \
298 $(PLUGINFLAGS)
299
300libextractor_pdf_la_SOURCES = \
301 pdf_extractor.cc
302libextractor_pdf_la_LDFLAGS = \
303 $(XTRA_CPPLIBS) $(PLUGINFLAGS)
304libextractor_pdf_la_LIBADD = \
305 $(top_builddir)/src/common/libextractor_common.la \
306 -lpoppler
307
308libextractor_png_la_SOURCES = \
309 png_extractor.c
310libextractor_png_la_LDFLAGS = \
311 $(PLUGINFLAGS)
312libextractor_png_la_LIBADD = \
313 $(top_builddir)/src/common/libextractor_common.la \
314 -lz
315
316libextractor_ps_la_SOURCES = \
317 ps_extractor.c
318libextractor_ps_la_LDFLAGS = \
319 $(PLUGINFLAGS)
320
321libextractor_qt_la_SOURCES = \
322 qt_extractor.c
323libextractor_qt_la_LDFLAGS = \
324 $(PLUGINFLAGS)
325libextractor_qt_la_LIBADD = \
326 -lz -lm
327
328libextractor_real_la_SOURCES = \
329 real_extractor.c
330libextractor_real_la_LDFLAGS = \
331 $(PLUGINFLAGS)
332
333libextractor_riff_la_SOURCES = \
334 riff_extractor.c
335libextractor_riff_la_LDFLAGS = \
336 $(PLUGINFLAGS)
337libextractor_riff_la_LIBADD = \
338 $(LE_LIBINTL) \
339 -lm
340
341libextractor_rpm_la_SOURCES = \
342 rpm_extractor.c
343libextractor_rpm_la_LDFLAGS = \
344 $(PLUGINFLAGS)
345libextractor_rpm_la_LIBADD = \
346 -lrpm
347
348libextractor_s3m_la_SOURCES = \
349 s3m_extractor.c
350libextractor_s3m_la_LDFLAGS = \
351 $(PLUGINFLAGS)
352
353libextractor_sid_la_SOURCES = \
354 sid_extractor.c
355libextractor_sid_la_LDFLAGS = \
356 $(PLUGINFLAGS)
357
358libextractor_tar_la_SOURCES = \
359 tar_extractor.c
360libextractor_tar_la_LDFLAGS = \
361 $(PLUGINFLAGS)
362
363libextractor_thumbnailffmpeg_la_SOURCES = \
364 thumbnailffmpeg_extractor.c
365libextractor_thumbnailffmpeg_la_LIBADD = \
366 -lavformat -lavcodec -lswscale -lavutil -lz -lbz2
367libextractor_thumbnailffmpeg_la_LDFLAGS = \
368 $(PLUGINFLAGS)
369
370libextractor_thumbnailgtk_la_CFLAGS = \
371 $(GLIB_CFLAGS) $(GTK_CFLAGS)
372libextractor_thumbnailgtk_la_LIBADD = \
373 $(LIBADD) -lgobject-2.0 @GTK_LIBS@
374libextractor_thumbnailgtk_la_LDFLAGS = \
375 $(PLUGINFLAGS)
376libextractor_thumbnailgtk_la_SOURCES = \
377 thumbnailgtk_extractor.c
378
379libextractor_thumbnailqt_la_SOURCES = \
380 thumbnailqt_extractor.cc
381libextractor_thumbnailqt_la_LDFLAGS = \
382 $(QT_LDFLAGS) \
383 $(PLUGINFLAGS)
384libextractor_thumbnailqt_la_LIBADD = \
385 $(qtflags) $(svgflags)
386libextractor_thumbnailqt_la_CPPFLAGS = \
387 $(QT_CPPFLAGS) \
388 $(QT_CFLAGS) $(QT_SVG_CFLAGS)
389
390libextractor_tiff_la_SOURCES = \
391 tiff_extractor.c
392libextractor_tiff_la_LDFLAGS = \
393 $(PLUGINFLAGS)
394libextractor_tiff_la_LIBADD = \
395 $(top_builddir)/src/common/libextractor_common.la
396
397libextractor_wav_la_SOURCES = \
398 wav_extractor.c
399libextractor_wav_la_LDFLAGS = \
400 $(PLUGINFLAGS)
401libextractor_wav_la_LIBADD = \
402 $(LE_LIBINTL)
403
404libextractor_xm_la_SOURCES = \
405 xm_extractor.c
406libextractor_xm_la_LDFLAGS = \
407 $(PLUGINFLAGS)
408
409libextractor_zip_la_SOURCES = \
410 zip_extractor.c
411libextractor_zip_la_LDFLAGS = \
412 $(PLUGINFLAGS)
413
414EXTRA_DIST = template_extractor.c 47EXTRA_DIST = template_extractor.c
diff --git a/src/plugins/id3_extractor.c b/src/plugins/id3_extractor.c
index 64d341c..39bd779 100644
--- a/src/plugins/id3_extractor.c
+++ b/src/plugins/id3_extractor.c
@@ -29,6 +29,8 @@
29#include <unistd.h> 29#include <unistd.h>
30#include <stdlib.h> 30#include <stdlib.h>
31 31
32#include "extractor_plugins.h"
33
32typedef struct 34typedef struct
33{ 35{
34 char *title; 36 char *title;
@@ -199,6 +201,46 @@ static const char *const genre_names[] = {
199#define OK 0 201#define OK 0
200#define INVALID_ID3 1 202#define INVALID_ID3 1
201 203
204struct id3_state
205{
206 int state;
207 id3tag info;
208};
209
210enum ID3State
211{
212 ID3_INVALID = -1,
213 ID3_SEEKING_TO_TAIL = 0,
214 ID3_READING_TAIL = 1
215};
216
217void
218EXTRACTOR_id3_init_state_method (struct EXTRACTOR_PluginList *plugin)
219{
220 struct id3_state *state;
221 state = plugin->state = malloc (sizeof (struct id3_state));
222 if (state == NULL)
223 return;
224 memset (state, 0, sizeof (struct id3_state));
225 state->state = ID3_SEEKING_TO_TAIL;
226}
227
228void
229EXTRACTOR_id3_discard_state_method (struct EXTRACTOR_PluginList *plugin)
230{
231 struct id3_state *state = plugin->state;
232 if (state != NULL)
233 {
234 if (state->info.title != NULL) free (state->info.title);
235 if (state->info.year != NULL) free (state->info.year);
236 if (state->info.album != NULL) free (state->info.album);
237 if (state->info.artist != NULL) free (state->info.artist);
238 if (state->info.comment != NULL) free (state->info.comment);
239 free (state);
240 }
241 plugin->state = NULL;
242}
243
202static void 244static void
203trim (char *k) 245trim (char *k)
204{ 246{
@@ -209,14 +251,14 @@ trim (char *k)
209} 251}
210 252
211static int 253static int
212get_id3 (const char *data, size_t size, id3tag * id3) 254get_id3 (const char *data, int64_t offset, int64_t size, id3tag *id3)
213{ 255{
214 const char *pos; 256 const char *pos;
215 257
216 if (size < 128) 258 if (size < 128)
217 return INVALID_ID3; 259 return INVALID_ID3;
218 260
219 pos = &data[size - 128]; 261 pos = &data[offset];
220 if (0 != strncmp ("TAG", pos, 3)) 262 if (0 != strncmp ("TAG", pos, 3))
221 return INVALID_ID3; 263 return INVALID_ID3;
222 pos += 3; 264 pos += 3;
@@ -253,49 +295,82 @@ get_id3 (const char *data, size_t size, id3tag * id3)
253} 295}
254 296
255 297
256#define ADD(s,t) do { if ( (s != NULL) && (strlen(s) > 0) && (0 != (ret = proc (proc_cls, "id3", t, EXTRACTOR_METAFORMAT_UTF8, "text/plain", s, strlen(s)+1)))) goto FINISH; } while (0) 298#define ADD(s,t) do { if ( (s != NULL) && (strlen(s) > 0) && (0 != proc (proc_cls, "id3", t, EXTRACTOR_METAFORMAT_UTF8, "text/plain", s, strlen(s)+1))) return 1; } while (0)
257 299
258 300
259const char * 301int
260EXTRACTOR_id3_options () 302EXTRACTOR_id3_extract_method (struct EXTRACTOR_PluginList *plugin,
303 EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
261{ 304{
262 return "want-tail"; 305 int64_t file_position;
263} 306 int64_t file_size;
307 int64_t offset = 0;
308 int64_t size;
309 struct id3_state *state;
310 char *data;
311
312 char track[16];
264 313
314 if (plugin == NULL || plugin->state == NULL)
315 return 1;
265 316
266int 317 state = plugin->state;
267EXTRACTOR_id3_extract (const char *data, 318 file_position = plugin->position;
268 size_t size, 319 file_size = plugin->fsize;
269 EXTRACTOR_MetaDataProcessor proc, 320 size = plugin->map_size;
270 void *proc_cls, 321 data = (char *) plugin->shm_ptr;
271 const char *options) 322
272{ 323 if (plugin->seek_request < 0)
273 id3tag info; 324 return 1;
274 char track[16]; 325 if (file_position - plugin->seek_request > 0)
275 int ret; 326 {
327 plugin->seek_request = -1;
328 return 1;
329 }
330 if (plugin->seek_request - file_position < size)
331 offset = plugin->seek_request - file_position;
276 332
277 ret = 0; 333 while (1)
278 if (OK != get_id3 (data, size, &info)) 334 {
279 return 0; 335 switch (state->state)
280 ADD (info.title, EXTRACTOR_METATYPE_TITLE);
281 ADD (info.artist, EXTRACTOR_METATYPE_ARTIST);
282 ADD (info.album, EXTRACTOR_METATYPE_ALBUM);
283 ADD (info.year, EXTRACTOR_METATYPE_PUBLICATION_YEAR);
284 ADD (info.genre, EXTRACTOR_METATYPE_GENRE);
285 ADD (info.comment, EXTRACTOR_METATYPE_COMMENT);
286 if (info.track_number != 0)
287 { 336 {
288 snprintf(track, 337 case ID3_INVALID:
289 sizeof(track), "%u", info.track_number); 338 plugin->seek_request = -1;
290 ADD (track, EXTRACTOR_METATYPE_TRACK_NUMBER); 339 return 1;
340 case ID3_SEEKING_TO_TAIL:
341 offset = file_size - 128 - file_position;
342 if (offset > size)
343 {
344 state->state = ID3_READING_TAIL;
345 plugin->seek_request = file_position + offset;
346 return 0;
347 }
348 else if (offset < 0)
349 {
350 state->state = ID3_INVALID;
351 break;
352 }
353 state->state = ID3_READING_TAIL;
354 break;
355 case ID3_READING_TAIL:
356 if (OK != get_id3 (data, offset, size - offset, &state->info))
357 return 1;
358 ADD (state->info.title, EXTRACTOR_METATYPE_TITLE);
359 ADD (state->info.artist, EXTRACTOR_METATYPE_ARTIST);
360 ADD (state->info.album, EXTRACTOR_METATYPE_ALBUM);
361 ADD (state->info.year, EXTRACTOR_METATYPE_PUBLICATION_YEAR);
362 ADD (state->info.genre, EXTRACTOR_METATYPE_GENRE);
363 ADD (state->info.comment, EXTRACTOR_METATYPE_COMMENT);
364 if (state->info.track_number != 0)
365 {
366 snprintf(track,
367 sizeof(track), "%u", state->info.track_number);
368 ADD (track, EXTRACTOR_METATYPE_TRACK_NUMBER);
369 }
370 state->state = ID3_INVALID;
291 } 371 }
292FINISH: 372 }
293 if (info.title != NULL) free (info.title); 373 return 1;
294 if (info.year != NULL) free (info.year);
295 if (info.album != NULL) free (info.album);
296 if (info.artist != NULL) free (info.artist);
297 if (info.comment != NULL) free (info.comment);
298 return ret;
299} 374}
300 375
301/* end of id3_extractor.c */ 376/* end of id3_extractor.c */
diff --git a/src/plugins/id3v23_extractor.c b/src/plugins/id3v23_extractor.c
deleted file mode 100644
index c31d63d..0000000
--- a/src/plugins/id3v23_extractor.c
+++ /dev/null
@@ -1,420 +0,0 @@
1/*
2 This file is part of libextractor.
3 (C) 2002, 2003, 2004, 2006, 2007, 2009 Vidyut Samanta and Christian Grothoff
4
5 libextractor is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 2, or (at your
8 option) any later version.
9
10 libextractor is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with libextractor; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
19
20 */
21#define DEBUG_EXTRACT_ID3v23 0
22
23#include "platform.h"
24#include "extractor.h"
25#include <string.h>
26#include <stdio.h>
27#include <sys/types.h>
28#include <sys/stat.h>
29#include <unistd.h>
30#include <stdlib.h>
31#include <fcntl.h>
32#ifndef MINGW
33#include <sys/mman.h>
34#endif
35
36#include "convert.h"
37
38enum Id3v23Fmt
39 {
40 T, /* simple, 0-terminated string, prefixed by encoding */
41 U, /* 0-terminated ASCII string, no encoding */
42 UL, /* unsync'ed lyrics */
43 SL, /* sync'ed lyrics */
44 L, /* string with language prefix */
45 I /* image */
46 };
47
48typedef struct
49{
50 const char *text;
51 enum EXTRACTOR_MetaType type;
52 enum Id3v23Fmt fmt;
53} Matches;
54
55static Matches tmap[] = {
56 {"TALB", EXTRACTOR_METATYPE_ALBUM, T},
57 {"TBPM", EXTRACTOR_METATYPE_BEATS_PER_MINUTE, T},
58 {"TCOM", EXTRACTOR_METATYPE_COMPOSER, T},
59 {"TCON", EXTRACTOR_METATYPE_SONG_VERSION, T},
60 {"TCOP", EXTRACTOR_METATYPE_COPYRIGHT, T},
61 /* {"TDAT", EXTRACTOR_METATYPE_CREATION_DATE, T}, */
62 /* TDLY */
63 {"TENC", EXTRACTOR_METATYPE_ENCODED_BY, T},
64 {"TEXT", EXTRACTOR_METATYPE_WRITER, T},
65 {"TFLT", EXTRACTOR_METATYPE_FORMAT_VERSION, T},
66 /* TIME */
67 {"TIT1", EXTRACTOR_METATYPE_SECTION, T},
68 {"TIT2", EXTRACTOR_METATYPE_TITLE, T},
69 {"TIT3", EXTRACTOR_METATYPE_SONG_VERSION, T},
70 /* TKEY */
71 {"TLAN", EXTRACTOR_METATYPE_LANGUAGE, T},
72 {"TLEN", EXTRACTOR_METATYPE_DURATION, T}, /* FIXME: should append 'ms' as unit */
73 {"TMED", EXTRACTOR_METATYPE_SOURCE, T},
74 {"TOAL", EXTRACTOR_METATYPE_ORIGINAL_TITLE, T},
75 {"TOFN", EXTRACTOR_METATYPE_ORIGINAL_ARTIST, T},
76 {"TOLY", EXTRACTOR_METATYPE_ORIGINAL_WRITER, T},
77 {"TOPE", EXTRACTOR_METATYPE_ORIGINAL_PERFORMER, T},
78 {"TORY", EXTRACTOR_METATYPE_ORIGINAL_RELEASE_YEAR, T},
79 {"TOWN", EXTRACTOR_METATYPE_LICENSEE, T},
80 {"TPE1", EXTRACTOR_METATYPE_ARTIST, T},
81 {"TPE2", EXTRACTOR_METATYPE_PERFORMER, T},
82 {"TPE3", EXTRACTOR_METATYPE_CONDUCTOR, T},
83 {"TPE4", EXTRACTOR_METATYPE_INTERPRETATION, T},
84 {"TPOS", EXTRACTOR_METATYPE_DISC_NUMBER, T},
85 {"TPUB", EXTRACTOR_METATYPE_PUBLISHER, T},
86 {"TRCK", EXTRACTOR_METATYPE_TRACK_NUMBER, T},
87 /* TRDA */
88 {"TRSN", EXTRACTOR_METATYPE_NETWORK_NAME, T},
89 /* TRSO */
90 {"TSIZ", EXTRACTOR_METATYPE_EMBEDDED_FILE_SIZE, T},
91 {"TSRC", EXTRACTOR_METATYPE_ISRC, T},
92 /* TSSE */
93 {"TYER", EXTRACTOR_METATYPE_PUBLICATION_YEAR, T},
94 {"WCOM", EXTRACTOR_METATYPE_URL, U},
95 {"WCOP", EXTRACTOR_METATYPE_URL, U},
96 {"WOAF", EXTRACTOR_METATYPE_URL, U},
97 {"WOAS", EXTRACTOR_METATYPE_URL, U},
98 {"WORS", EXTRACTOR_METATYPE_URL, U},
99 {"WPAY", EXTRACTOR_METATYPE_URL, U},
100 {"WPUB", EXTRACTOR_METATYPE_URL, U},
101 {"WXXX", EXTRACTOR_METATYPE_URL, T},
102 {"IPLS", EXTRACTOR_METATYPE_CONTRIBUTOR_NAME, T},
103 /* ... */
104 {"USLT", EXTRACTOR_METATYPE_LYRICS, UL },
105 {"SYLT", EXTRACTOR_METATYPE_LYRICS, SL },
106 {"COMM", EXTRACTOR_METATYPE_COMMENT, L},
107 /* ... */
108 {"APIC", EXTRACTOR_METATYPE_PICTURE, I},
109 /* ... */
110 {"LINK", EXTRACTOR_METATYPE_URL, U},
111 /* ... */
112 {"USER", EXTRACTOR_METATYPE_LICENSE, T},
113 /* ... */
114 {NULL, 0, T}
115};
116
117
118/* mimetype = audio/mpeg */
119int
120EXTRACTOR_id3v23_extract (const unsigned char *data,
121 size_t size,
122 EXTRACTOR_MetaDataProcessor proc,
123 void *proc_cls,
124 const char *options)
125{
126 int unsync;
127 int extendedHdr;
128 int experimental;
129 uint32_t tsize;
130 uint32_t pos;
131 uint32_t ehdrSize;
132 uint32_t padding;
133 uint32_t csize;
134 int i;
135 uint16_t flags;
136 char *mime;
137 enum EXTRACTOR_MetaType type;
138 size_t off;
139 int obo;
140
141 if ((size < 16) ||
142 (data[0] != 0x49) ||
143 (data[1] != 0x44) ||
144 (data[2] != 0x33) || (data[3] != 0x03) || (data[4] != 0x00))
145 return 0;
146 unsync = (data[5] & 0x80) > 0;
147 if (unsync)
148 return 0; /* not supported */
149 extendedHdr = (data[5] & 0x40) > 0;
150 experimental = (data[5] & 0x20) > 0;
151 if (experimental)
152 return 0;
153 tsize = (((data[6] & 0x7F) << 21) |
154 ((data[7] & 0x7F) << 14) |
155 ((data[8] & 0x7F) << 7) | ((data[9] & 0x7F) << 0));
156 if (tsize + 10 > size)
157 return 0;
158 pos = 10;
159 padding = 0;
160 if (extendedHdr)
161 {
162 ehdrSize = (((data[10]) << 24) |
163 ((data[11]) << 16) | ((data[12]) << 8) | ((data[12]) << 0));
164
165 padding = (((data[15]) << 24) |
166 ((data[16]) << 16) | ((data[17]) << 8) | ((data[18]) << 0));
167 pos += 4 + ehdrSize;
168 if (padding < tsize)
169 tsize -= padding;
170 else
171 return 0;
172 }
173
174
175 while (pos < tsize)
176 {
177 if (pos + 10 > tsize)
178 return 0;
179 csize =
180 (data[pos + 4] << 24) + (data[pos + 5] << 16) + (data[pos + 6] << 8) +
181 data[pos + 7];
182 if ((pos + 10 + csize > tsize) || (csize > tsize) || (csize == 0) ||
183 (pos + 10 + csize <= pos + 10) || (pos + 10 <= pos))
184 break;
185 flags = (data[pos + 8] << 8) + data[pos + 9];
186 if (((flags & 0x80) > 0) /* compressed, not yet supported */ ||
187 ((flags & 0x40) > 0) /* encrypted, not supported */ )
188 {
189 pos += 10 + csize;
190 continue;
191 }
192 i = 0;
193 while (tmap[i].text != NULL)
194 {
195 if (0 == strncmp (tmap[i].text, (const char *) &data[pos], 4))
196 {
197 char *word;
198 if ((flags & 0x20) > 0)
199 {
200 /* "group" identifier, skip a byte */
201 pos++;
202 csize--;
203 }
204 switch (tmap[i].fmt)
205 {
206 case T:
207 /* this byte describes the encoding
208 try to convert strings to UTF-8
209 if it fails, then forget it */
210 switch (data[pos + 10])
211 {
212 case 0x00:
213 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 11],
214 csize - 1, "ISO-8859-1");
215 break;
216 case 0x01:
217 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 11],
218 csize - 1, "UCS-2");
219 break;
220 default:
221 /* bad encoding byte,
222 try to convert from iso-8859-1 */
223 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 11],
224 csize - 1, "ISO-8859-1");
225 break;
226 }
227 break;
228 case U:
229 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 10],
230 csize, "ISO-8859-1");
231 break;
232 case UL:
233 if (csize < 6)
234 return 0; /* malformed */
235 /* find end of description */
236 off = 14;
237 while ( (off < size) &&
238 (off - pos < csize) &&
239 (data[pos + off] == '\0') )
240 off++;
241 if ( (off >= csize) ||
242 (data[pos+off] != '\0') )
243 return 0; /* malformed */
244 off++;
245 switch (data[pos + 10])
246 {
247 case 0x00:
248 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + off],
249 csize - off, "ISO-8859-1");
250 break;
251 case 0x01:
252 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + off],
253 csize - off, "UCS-2");
254 break;
255 default:
256 /* bad encoding byte,
257 try to convert from iso-8859-1 */
258 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + off],
259 csize - off, "ISO-8859-1");
260 break;
261 }
262 break;
263 case SL:
264 if (csize < 7)
265 return 0; /* malformed */
266 /* find end of description */
267 switch (data[pos + 10])
268 {
269 case 0x00:
270 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 16],
271 csize - 6, "ISO-8859-1");
272 break;
273 case 0x01:
274 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 16],
275 csize - 6, "UCS-2");
276 break;
277 default:
278 /* bad encoding byte,
279 try to convert from iso-8859-1 */
280 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 16],
281 csize - 6, "ISO-8859-1");
282 break;
283 }
284 break;
285 case L:
286 if (csize < 5)
287 return 0; /* malformed */
288 /* find end of description */
289 obo = data[pos + 14] == '\0' ? 1 : 0; /* someone put a \0 in front of comments... */
290 if (csize < 6)
291 obo = 0;
292 switch (data[pos + 10])
293 {
294 case 0x00:
295 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 14 + obo],
296 csize - 4 - obo, "ISO-8859-1");
297 break;
298 case 0x01:
299 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 14 + obo],
300 csize - 4 - obo, "UCS-2");
301 break;
302 default:
303 /* bad encoding byte,
304 try to convert from iso-8859-1 */
305 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 14 + obo],
306 csize - 4 - obo, "ISO-8859-1");
307 break;
308 }
309 break;
310 case I:
311 if (csize < 2)
312 return 0; /* malformed */
313 /* find end of mime type */
314 off = 11;
315 while ( (off < size) &&
316 (off - pos < csize) &&
317 (data[pos + off] == '\0') )
318 off++;
319 if ( (off >= csize) ||
320 (data[pos+off] != '\0') )
321 return 0; /* malformed */
322 off++;
323 mime = strdup ((const char*) &data[pos + 11]);
324
325 switch (data[pos+off])
326 {
327 case 0x03:
328 case 0x04:
329 type = EXTRACTOR_METATYPE_COVER_PICTURE;
330 break;
331 case 0x07:
332 case 0x08:
333 case 0x09:
334 case 0x0A:
335 case 0x0B:
336 case 0x0C:
337 type = EXTRACTOR_METATYPE_CONTRIBUTOR_PICTURE;
338 break;
339 case 0x0D:
340 case 0x0E:
341 case 0x0F:
342 type = EXTRACTOR_METATYPE_EVENT_PICTURE;
343 break;
344 case 0x14:
345 type = EXTRACTOR_METATYPE_LOGO;
346 type = EXTRACTOR_METATYPE_LOGO;
347 break;
348 default:
349 type = EXTRACTOR_METATYPE_PICTURE;
350 break;
351 }
352 off++;
353
354 /* find end of description */
355 while ( (off < size) &&
356 (off - pos < csize) &&
357 (data[pos + off] == '\0') )
358 off++;
359 if ( (off >= csize) ||
360 (data[pos+off] != '\0') )
361 {
362 if (mime != NULL)
363 free (mime);
364 return 0; /* malformed */
365 }
366 off++;
367 if ( (mime != NULL) &&
368 (0 == strcasecmp ("-->",
369 mime)) )
370 {
371 /* not supported */
372 }
373 else
374 {
375 if (0 != proc (proc_cls,
376 "id3v23",
377 type,
378 EXTRACTOR_METAFORMAT_BINARY,
379 mime,
380 (const char*) &data[pos + off],
381 csize + 6 - off))
382 {
383 if (mime != NULL)
384 free (mime);
385 return 1;
386 }
387 }
388 if (mime != NULL)
389 free (mime);
390 word = NULL;
391 break;
392 default:
393 return 0;
394 }
395 if ((word != NULL) && (strlen (word) > 0))
396 {
397 if (0 != proc (proc_cls,
398 "id3v23",
399 tmap[i].type,
400 EXTRACTOR_METAFORMAT_UTF8,
401 "text/plain",
402 word,
403 strlen(word)+1))
404 {
405 free (word);
406 return 1;
407 }
408 }
409 if (word != NULL)
410 free (word);
411 break;
412 }
413 i++;
414 }
415 pos += 10 + csize;
416 }
417 return 0;
418}
419
420/* end of id3v23_extractor.c */
diff --git a/src/plugins/id3v24_extractor.c b/src/plugins/id3v24_extractor.c
deleted file mode 100644
index 301020c..0000000
--- a/src/plugins/id3v24_extractor.c
+++ /dev/null
@@ -1,455 +0,0 @@
1/*
2 This file is part of libextractor.
3 (C) 2002, 2003, 2004, 2006, 2007, 2009 Vidyut Samanta and Christian Grothoff
4
5 libextractor is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 2, or (at your
8 option) any later version.
9
10 libextractor is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with libextractor; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
19
20 */
21#define DEBUG_EXTRACT_ID3v24 0
22
23#include "platform.h"
24#include "extractor.h"
25#include <string.h>
26#include <stdio.h>
27#include <sys/types.h>
28#include <sys/stat.h>
29#include <unistd.h>
30#include <stdlib.h>
31#include <fcntl.h>
32#ifndef MINGW
33#include <sys/mman.h>
34#endif
35
36#include "convert.h"
37
38enum Id3v24Fmt
39 {
40 T, /* simple, 0-terminated string, prefixed by encoding */
41 U, /* 0-terminated ASCII string, no encoding */
42 UL, /* unsync'ed lyrics */
43 SL, /* sync'ed lyrics */
44 L, /* string with language prefix */
45 I /* image */
46 };
47
48typedef struct
49{
50 const char *text;
51 enum EXTRACTOR_MetaType type;
52 enum Id3v24Fmt fmt;
53} Matches;
54
55static Matches tmap[] = {
56 {"TALB", EXTRACTOR_METATYPE_ALBUM, T},
57 {"TBPM", EXTRACTOR_METATYPE_BEATS_PER_MINUTE, T},
58 {"TCOM", EXTRACTOR_METATYPE_COMPOSER, T},
59 {"TCON", EXTRACTOR_METATYPE_SONG_VERSION, T},
60 {"TCOP", EXTRACTOR_METATYPE_COPYRIGHT, T},
61 /* {"TDAT", EXTRACTOR_METATYPE_CREATION_DATE, T}, deprecated in 24 */
62 /* TDLY */
63 {"TENC", EXTRACTOR_METATYPE_ENCODED_BY, T},
64 {"TEXT", EXTRACTOR_METATYPE_WRITER, T},
65 {"TFLT", EXTRACTOR_METATYPE_FORMAT_VERSION, T},
66 /* TIME, deprecated in 24 */
67 {"TIT1", EXTRACTOR_METATYPE_SECTION, T},
68 {"TIT2", EXTRACTOR_METATYPE_TITLE, T},
69 {"TIT3", EXTRACTOR_METATYPE_SONG_VERSION, T},
70 /* TKEY */
71 {"TLAN", EXTRACTOR_METATYPE_LANGUAGE, T},
72 {"TLEN", EXTRACTOR_METATYPE_DURATION, T}, /* FIXME: should append 'ms' as unit */
73 {"TMED", EXTRACTOR_METATYPE_SOURCE, T},
74 {"TOAL", EXTRACTOR_METATYPE_ORIGINAL_TITLE, T},
75 {"TOFN", EXTRACTOR_METATYPE_ORIGINAL_ARTIST, T},
76 {"TOLY", EXTRACTOR_METATYPE_ORIGINAL_WRITER, T},
77 {"TOPE", EXTRACTOR_METATYPE_ORIGINAL_PERFORMER, T},
78 /* {"TORY", EXTRACTOR_METATYPE_ORIGINAL_RELEASE_YEAR, T}, deprecated in 24 */
79 {"TOWN", EXTRACTOR_METATYPE_LICENSEE, T},
80 {"TPE1", EXTRACTOR_METATYPE_ARTIST, T},
81 {"TPE2", EXTRACTOR_METATYPE_PERFORMER, T},
82 {"TPE3", EXTRACTOR_METATYPE_CONDUCTOR, T},
83 {"TPE4", EXTRACTOR_METATYPE_INTERPRETATION, T},
84 {"TPOS", EXTRACTOR_METATYPE_DISC_NUMBER, T},
85 {"TPUB", EXTRACTOR_METATYPE_PUBLISHER, T},
86 {"TRCK", EXTRACTOR_METATYPE_TRACK_NUMBER, T},
87 /* TRDA, deprecated in 24 */
88 {"TRSN", EXTRACTOR_METATYPE_NETWORK_NAME, T},
89 /* TRSO */
90 /* {"TSIZ", EXTRACTOR_METATYPE_EMBEDDED_FILE_SIZE, T}, deprecated in 24 */
91 {"TSRC", EXTRACTOR_METATYPE_ISRC, T},
92 /* TSSE */
93 /* {"TYER", EXTRACTOR_METATYPE_PUBLICATION_YEAR, T}, deprecated in 24 */
94 {"WCOM", EXTRACTOR_METATYPE_URL, U},
95 {"WCOP", EXTRACTOR_METATYPE_URL, U},
96 {"WOAF", EXTRACTOR_METATYPE_URL, U},
97 {"WOAS", EXTRACTOR_METATYPE_URL, U},
98 {"WORS", EXTRACTOR_METATYPE_URL, U},
99 {"WPAY", EXTRACTOR_METATYPE_URL, U},
100 {"WPUB", EXTRACTOR_METATYPE_URL, U},
101 {"WXXX", EXTRACTOR_METATYPE_URL, T},
102 /* {"IPLS", EXTRACTOR_METATYPE_CONTRIBUTOR_NAME, T}, deprecated in 24 */
103 /* ... */
104 {"USLT", EXTRACTOR_METATYPE_LYRICS, UL },
105 {"SYLT", EXTRACTOR_METATYPE_LYRICS, SL },
106 {"COMM", EXTRACTOR_METATYPE_COMMENT, L},
107 /* ... */
108 {"APIC", EXTRACTOR_METATYPE_PICTURE, I},
109 /* ... */
110 {"LINK", EXTRACTOR_METATYPE_URL, U},
111 /* ... */
112 {"USER", EXTRACTOR_METATYPE_LICENSE, T},
113 /* ... */
114 /* new frames in 24 */
115 /* ASPI, EQU2, RVA2, SEEK, SIGN, TDEN */
116 {"TDOR", EXTRACTOR_METATYPE_PUBLICATION_DATE, T},
117 /* TDRC, TDRL, TDTG */
118 {"TIPL", EXTRACTOR_METATYPE_CONTRIBUTOR_NAME, T},
119 {"TMCL", EXTRACTOR_METATYPE_MUSICIAN_CREDITS_LIST, T},
120 {"TMOO", EXTRACTOR_METATYPE_MOOD, T},
121 {"TPRO", EXTRACTOR_METATYPE_COPYRIGHT, T},
122 {"TSOA", EXTRACTOR_METATYPE_ALBUM, T},
123 {"TSOP", EXTRACTOR_METATYPE_PERFORMER, T},
124 {"TSOT", EXTRACTOR_METATYPE_TITLE, T},
125 {"TSST", EXTRACTOR_METATYPE_SUBTITLE, T},
126 {NULL, 0, T}
127};
128
129
130/* mimetype = audio/mpeg */
131int
132EXTRACTOR_id3v24_extract (const unsigned char *data,
133 size_t size,
134 EXTRACTOR_MetaDataProcessor proc,
135 void *proc_cls,
136 const char *options)
137{
138 int unsync;
139 int extendedHdr;
140 int experimental;
141 uint32_t tsize;
142 uint32_t pos;
143 uint32_t ehdrSize;
144 uint32_t csize;
145 int i;
146 uint16_t flags;
147 char *mime;
148 enum EXTRACTOR_MetaType type;
149 size_t off;
150
151 if ((size < 16) ||
152 (data[0] != 0x49) ||
153 (data[1] != 0x44) ||
154 (data[2] != 0x33) || (data[3] != 0x04) || (data[4] != 0x00))
155 return 0;
156 unsync = (data[5] & 0x80) > 0;
157 if (unsync)
158 return 0; /* not supported */
159 extendedHdr = (data[5] & 0x40) > 0;
160 experimental = (data[5] & 0x20) > 0;
161 if (experimental)
162 return 0;
163 /* footer = (data[5] & 0x10) > 0; */
164 tsize = (((data[6] & 0x7F) << 21) |
165 ((data[7] & 0x7F) << 14) |
166 ((data[8] & 0x7F) << 7) | ((data[9] & 0x7F) << 0));
167 if (tsize + 10 > size)
168 return 0;
169 pos = 10;
170 if (extendedHdr)
171 {
172 ehdrSize = (((data[10] & 0x7F) << 21) |
173 ((data[11] & 0x7F) << 14) |
174 ((data[12] & 0x7F) << 7) | ((data[13] & 0x7F) << 0));
175 pos += 4 + ehdrSize;
176 if (ehdrSize > tsize)
177 return 0;
178 }
179 while (pos < tsize)
180 {
181 if (pos + 10 > tsize)
182 return 0;
183 csize =
184 (data[pos + 4] << 24) + (data[pos + 5] << 16) + (data[pos + 6] << 8) +
185 data[pos + 7];
186 if ((pos + 10 + csize > tsize) || (csize > tsize) || (csize == 0) ||
187 (pos + 10 + csize <= pos + 10) || (pos + 10 <= pos))
188 break;
189 flags = (data[pos + 8] << 8) + data[pos + 9];
190 if (((flags & 0x08) > 0) /* compressed, not yet supported */ ||
191 ((flags & 0x04) > 0) /* encrypted, not supported */ ||
192 ((flags & 0x02) > 0) /* unsynchronized, not supported */ )
193 {
194 pos += 10 + csize;
195 continue;
196 }
197 i = 0;
198 while (tmap[i].text != NULL)
199 {
200 if (0 == strncmp (tmap[i].text, (const char *) &data[pos], 4))
201 {
202 char *word;
203 if ((flags & 0x40) > 0)
204 {
205 /* "group" identifier, skip a byte */
206 pos++;
207 csize--;
208 }
209
210 switch (tmap[i].fmt)
211 {
212 case T:
213 /* this byte describes the encoding
214 try to convert strings to UTF-8
215 if it fails, then forget it */
216 switch (data[pos + 10])
217 {
218 case 0x00:
219 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 11],
220 csize - 1, "ISO-8859-1");
221 break;
222 case 0x01:
223 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 11],
224 csize - 1, "UTF-16");
225 break;
226 case 0x02:
227 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 11],
228 csize - 1, "UTF-16BE");
229 break;
230 case 0x03:
231 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 11],
232 csize - 1, "UTF-8");
233 break;
234 default:
235 /* bad encoding byte,
236 try to convert from iso-8859-1 */
237 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 11],
238 csize - 1, "ISO-8859-1");
239 break;
240 }
241 break;
242 case U:
243 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 10],
244 csize, "ISO-8859-1");
245 break;
246 case UL:
247 if (csize < 6)
248 return 0; /* malformed */
249 /* find end of description */
250 off = 14;
251 while ( (off < size) &&
252 (off - pos < csize) &&
253 (data[pos + off] == '\0') )
254 off++;
255 if ( (off >= csize) ||
256 (data[pos+off] != '\0') )
257 return 0; /* malformed */
258 off++;
259 switch (data[pos + 10])
260 {
261 case 0x00:
262 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + off],
263 csize - off, "ISO-8859-1");
264 break;
265 case 0x01:
266 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + off],
267 csize - off, "UTF-16");
268 break;
269 case 0x02:
270 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + off],
271 csize - off, "UTF-16BE");
272 break;
273 case 0x03:
274 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + off],
275 csize - off, "UTF-8");
276 break;
277 default:
278 /* bad encoding byte,
279 try to convert from iso-8859-1 */
280 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + off],
281 csize - off, "ISO-8859-1");
282 break;
283 }
284 break;
285 case SL:
286 if (csize < 7)
287 return 0; /* malformed */
288 /* find end of description */
289 switch (data[pos + 10])
290 {
291 case 0x00:
292 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 16],
293 csize - 6, "ISO-8859-1");
294 break;
295 case 0x01:
296 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 16],
297 csize - 6, "UTF-16");
298 break;
299 case 0x02:
300 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 16],
301 csize - 6, "UTF-16BE");
302 break;
303 case 0x03:
304 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 16],
305 csize - 6, "UTF-8");
306 break;
307 default:
308 /* bad encoding byte,
309 try to convert from iso-8859-1 */
310 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 16],
311 csize - 6, "ISO-8859-1");
312 break;
313 }
314 break;
315 case L:
316 if (csize < 5)
317 return 0; /* malformed */
318 /* find end of description */
319 switch (data[pos + 10])
320 {
321 case 0x00:
322 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 14],
323 csize - 4, "ISO-8859-1");
324 break;
325 case 0x01:
326 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 14],
327 csize - 4, "UTF-16");
328 break;
329 case 0x02:
330 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 14],
331 csize - 4, "UTF-16BE");
332 break;
333 case 0x03:
334 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 14],
335 csize - 4, "UTF-8");
336 break;
337 default:
338 /* bad encoding byte,
339 try to convert from iso-8859-1 */
340 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 14],
341 csize - 4, "ISO-8859-1");
342 break;
343 }
344 break;
345 case I:
346 if (csize < 2)
347 return 0; /* malformed */
348 /* find end of mime type */
349 off = 11;
350 while ( (off < size) &&
351 (off - pos < csize) &&
352 (data[pos + off] == '\0') )
353 off++;
354 if ( (off >= csize) ||
355 (data[pos+off] != '\0') )
356 return 0; /* malformed */
357 off++;
358 mime = strdup ((const char*) &data[pos + 11]);
359
360 switch (data[pos+off])
361 {
362 case 0x03:
363 case 0x04:
364 type = EXTRACTOR_METATYPE_COVER_PICTURE;
365 break;
366 case 0x07:
367 case 0x08:
368 case 0x09:
369 case 0x0A:
370 case 0x0B:
371 case 0x0C:
372 type = EXTRACTOR_METATYPE_CONTRIBUTOR_PICTURE;
373 break;
374 case 0x0D:
375 case 0x0E:
376 case 0x0F:
377 type = EXTRACTOR_METATYPE_EVENT_PICTURE;
378 break;
379 case 0x14:
380 type = EXTRACTOR_METATYPE_LOGO;
381 type = EXTRACTOR_METATYPE_LOGO;
382 break;
383 default:
384 type = EXTRACTOR_METATYPE_PICTURE;
385 break;
386 }
387 off++;
388
389 /* find end of description */
390 while ( (off < size) &&
391 (off - pos < csize) &&
392 (data[pos + off] == '\0') )
393 off++;
394 if ( (off >= csize) ||
395 (data[pos+off] != '\0') )
396 {
397 if (mime != NULL)
398 free (mime);
399 return 0; /* malformed */
400 }
401 off++;
402 if ( (mime != NULL) &&
403 (0 == strcasecmp ("-->",
404 mime)) )
405 {
406 /* not supported */
407 }
408 else
409 {
410 if (0 != proc (proc_cls,
411 "id3v24",
412 type,
413 EXTRACTOR_METAFORMAT_BINARY,
414 mime,
415 (const char*) &data[pos + off],
416 csize + 6 - off))
417 {
418 if (mime != NULL)
419 free (mime);
420 return 1;
421 }
422 }
423 if (mime != NULL)
424 free (mime);
425 word = NULL;
426 break;
427 default:
428 return 0;
429 }
430 if ((word != NULL) && (strlen (word) > 0))
431 {
432 if (0 != proc (proc_cls,
433 "id3v24",
434 tmap[i].type,
435 EXTRACTOR_METAFORMAT_UTF8,
436 "text/plain",
437 word,
438 strlen(word)+1))
439 {
440 free (word);
441 return 1;
442 }
443 }
444 if (word != NULL)
445 free (word);
446 break;
447 }
448 i++;
449 }
450 pos += 10 + csize;
451 }
452 return 0;
453}
454
455/* end of id3v24_extractor.c */
diff --git a/src/plugins/id3v2_extractor.c b/src/plugins/id3v2_extractor.c
index 4f50d05..0302dc6 100644
--- a/src/plugins/id3v2_extractor.c
+++ b/src/plugins/id3v2_extractor.c
@@ -26,6 +26,8 @@
26#endif 26#endif
27#include "convert.h" 27#include "convert.h"
28 28
29#include "extractor_plugins.h"
30
29#define DEBUG_EXTRACT_ID3v2 0 31#define DEBUG_EXTRACT_ID3v2 0
30 32
31enum Id3v2Fmt 33enum Id3v2Fmt
@@ -47,314 +49,723 @@ typedef struct
47 49
48static Matches tmap[] = { 50static Matches tmap[] = {
49 /* skipping UFI */ 51 /* skipping UFI */
50 {"TT1", EXTRACTOR_METATYPE_SECTION, T}, 52 {"TT1 ", EXTRACTOR_METATYPE_SECTION, T},
51 {"TT2", EXTRACTOR_METATYPE_TITLE, T}, 53 {"TT2 ", EXTRACTOR_METATYPE_TITLE, T},
52 {"TT3", EXTRACTOR_METATYPE_SONG_VERSION, T}, 54 {"TT3 ", EXTRACTOR_METATYPE_SONG_VERSION, T},
53 {"TP1", EXTRACTOR_METATYPE_ARTIST, T}, 55 {"TP1 ", EXTRACTOR_METATYPE_ARTIST, T},
54 {"TP2", EXTRACTOR_METATYPE_PERFORMER, T}, 56 {"TP2 ", EXTRACTOR_METATYPE_PERFORMER, T},
55 {"TP3", EXTRACTOR_METATYPE_CONDUCTOR, T}, 57 {"TP3 ", EXTRACTOR_METATYPE_CONDUCTOR, T},
56 {"TP4", EXTRACTOR_METATYPE_INTERPRETATION, T}, 58 {"TP4 ", EXTRACTOR_METATYPE_INTERPRETATION, T},
57 {"TCM", EXTRACTOR_METATYPE_COMPOSER, T}, 59 {"TCM ", EXTRACTOR_METATYPE_COMPOSER, T},
58 {"TXT", EXTRACTOR_METATYPE_WRITER, T}, 60 {"TXT ", EXTRACTOR_METATYPE_WRITER, T},
59 {"TLA", EXTRACTOR_METATYPE_LANGUAGE, T}, 61 {"TLA ", EXTRACTOR_METATYPE_LANGUAGE, T},
60 {"TCO", EXTRACTOR_METATYPE_GENRE, T}, 62 {"TCO ", EXTRACTOR_METATYPE_GENRE, T},
61 {"TAL", EXTRACTOR_METATYPE_ALBUM, T}, 63 {"TAL ", EXTRACTOR_METATYPE_ALBUM, T},
62 {"TPA", EXTRACTOR_METATYPE_DISC_NUMBER, T}, 64 {"TPA ", EXTRACTOR_METATYPE_DISC_NUMBER, T},
63 {"TRK", EXTRACTOR_METATYPE_TRACK_NUMBER, T}, 65 {"TRK ", EXTRACTOR_METATYPE_TRACK_NUMBER, T},
64 {"TRC", EXTRACTOR_METATYPE_ISRC, T}, 66 {"TRC ", EXTRACTOR_METATYPE_ISRC, T},
65 {"TYE", EXTRACTOR_METATYPE_PUBLICATION_YEAR, T}, 67 {"TYE ", EXTRACTOR_METATYPE_PUBLICATION_YEAR, T},
66 /* 68 /*
67 FIXME: these two and TYE should be combined into 69 FIXME: these two and TYE should be combined into
68 the actual publication date (if TRD is missing) 70 the actual publication date (if TRD is missing)
69 {"TDA", EXTRACTOR_METATYPE_PUBLICATION_DATE}, 71 {"TDA ", EXTRACTOR_METATYPE_PUBLICATION_DATE},
70 {"TIM", EXTRACTOR_METATYPE_PUBLICATION_DATE}, 72 {"TIM ", EXTRACTOR_METATYPE_PUBLICATION_DATE},
71 */ 73 */
72 {"TRD", EXTRACTOR_METATYPE_CREATION_TIME, T}, 74 {"TRD ", EXTRACTOR_METATYPE_CREATION_TIME, T},
73 {"TMT", EXTRACTOR_METATYPE_SOURCE, T}, 75 {"TMT ", EXTRACTOR_METATYPE_SOURCE, T},
74 {"TFT", EXTRACTOR_METATYPE_FORMAT_VERSION, T}, 76 {"TFT ", EXTRACTOR_METATYPE_FORMAT_VERSION, T},
75 {"TBP", EXTRACTOR_METATYPE_BEATS_PER_MINUTE, T}, 77 {"TBP ", EXTRACTOR_METATYPE_BEATS_PER_MINUTE, T},
76 {"TCR", EXTRACTOR_METATYPE_COPYRIGHT, T}, 78 {"TCR ", EXTRACTOR_METATYPE_COPYRIGHT, T},
77 {"TPB", EXTRACTOR_METATYPE_PUBLISHER, T}, 79 {"TPB ", EXTRACTOR_METATYPE_PUBLISHER, T},
78 {"TEN", EXTRACTOR_METATYPE_ENCODED_BY, T}, 80 {"TEN ", EXTRACTOR_METATYPE_ENCODED_BY, T},
79 {"TSS", EXTRACTOR_METATYPE_PRODUCED_BY_SOFTWARE, T}, 81 {"TSS ", EXTRACTOR_METATYPE_PRODUCED_BY_SOFTWARE, T},
80 {"TOF", EXTRACTOR_METATYPE_FILENAME, T}, 82 {"TOF ", EXTRACTOR_METATYPE_FILENAME, T},
81 {"TLE", EXTRACTOR_METATYPE_DURATION, T}, /* FIXME: should append 'ms' as unit */ 83 {"TLE ", EXTRACTOR_METATYPE_DURATION, T}, /* FIXME: should append 'ms' as unit */
82 {"TSI", EXTRACTOR_METATYPE_EMBEDDED_FILE_SIZE, T}, 84 {"TSI ", EXTRACTOR_METATYPE_EMBEDDED_FILE_SIZE, T},
83 /* skipping TDY, TKE */ 85 /* skipping TDY, TKE */
84 {"TOT", EXTRACTOR_METATYPE_ORIGINAL_TITLE, T}, 86 {"TOT ", EXTRACTOR_METATYPE_ORIGINAL_TITLE, T},
85 {"TOA", EXTRACTOR_METATYPE_ORIGINAL_ARTIST, T}, 87 {"TOA ", EXTRACTOR_METATYPE_ORIGINAL_ARTIST, T},
86 {"TOL", EXTRACTOR_METATYPE_ORIGINAL_WRITER, T}, 88 {"TOL ", EXTRACTOR_METATYPE_ORIGINAL_WRITER, T},
87 {"TOR", EXTRACTOR_METATYPE_ORIGINAL_RELEASE_YEAR, T}, 89 {"TOR ", EXTRACTOR_METATYPE_ORIGINAL_RELEASE_YEAR, T},
88 /* skipping TXX */ 90 /* skipping TXX */
89 91
90 {"WAF", EXTRACTOR_METATYPE_URL, U}, 92 {"WAF ", EXTRACTOR_METATYPE_URL, U},
91 {"WAR", EXTRACTOR_METATYPE_URL, U}, 93 {"WAR ", EXTRACTOR_METATYPE_URL, U},
92 {"WAS", EXTRACTOR_METATYPE_URL, U}, 94 {"WAS ", EXTRACTOR_METATYPE_URL, U},
93 {"WCM", EXTRACTOR_METATYPE_URL, U}, 95 {"WCM ", EXTRACTOR_METATYPE_URL, U},
94 {"WCP", EXTRACTOR_METATYPE_RIGHTS, U}, 96 {"WCP ", EXTRACTOR_METATYPE_RIGHTS, U},
95 {"WCB", EXTRACTOR_METATYPE_URL, U}, 97 {"WCB ", EXTRACTOR_METATYPE_URL, U},
96 /* skipping WXX */ 98 /* skipping WXX */
97 {"IPL", EXTRACTOR_METATYPE_CONTRIBUTOR_NAME, T}, 99 {"IPL ", EXTRACTOR_METATYPE_CONTRIBUTOR_NAME, T},
98 /* skipping MCI */ 100 /* skipping MCI */
99 /* skipping ETC */ 101 /* skipping ETC */
100 /* skipping MLL */ 102 /* skipping MLL */
101 /* skipping STC */ 103 /* skipping STC */
102 {"ULT", EXTRACTOR_METATYPE_LYRICS, UL}, 104 {"ULT ", EXTRACTOR_METATYPE_LYRICS, UL},
103 {"SLT", EXTRACTOR_METATYPE_LYRICS, SL}, 105 {"SLT ", EXTRACTOR_METATYPE_LYRICS, SL},
104 {"COM", EXTRACTOR_METATYPE_COMMENT, L}, 106 {"COM ", EXTRACTOR_METATYPE_COMMENT, L},
105 /* skipping RVA */ 107 /* skipping RVA */
106 /* skipping EQU */ 108 /* skipping EQU */
107 /* skipping REV */ 109 /* skipping REV */
108 {"PIC", EXTRACTOR_METATYPE_PICTURE, I}, 110 {"PIC ", EXTRACTOR_METATYPE_PICTURE, I},
109 /* skipping GEN */ 111 /* skipping GEN */
110 /* {"CNT", EXTRACTOR_METATYPE_PLAY_COUNTER, XXX}, */ 112 /* {"CNT ", EXTRACTOR_METATYPE_PLAY_COUNTER, XXX}, */
111 /* {"POP", EXTRACTOR_METATYPE_POPULARITY_METER, XXX}, */ 113 /* {"POP ", EXTRACTOR_METATYPE_POPULARITY_METER, XXX}, */
112 /* skipping BUF */ 114 /* skipping BUF */
113 /* skipping CRM */ 115 /* skipping CRM */
114 /* skipping CRA */ 116 /* skipping CRA */
115 /* {"LNK", EXTRACTOR_METATYPE_URL, XXX}, */ 117 /* {"LNK ", EXTRACTOR_METATYPE_URL, XXX}, */
118
119
120 {"TALB", EXTRACTOR_METATYPE_ALBUM, T},
121 {"TBPM", EXTRACTOR_METATYPE_BEATS_PER_MINUTE, T},
122 {"TCOM", EXTRACTOR_METATYPE_COMPOSER, T},
123 {"TCON", EXTRACTOR_METATYPE_SONG_VERSION, T},
124 {"TCOP", EXTRACTOR_METATYPE_COPYRIGHT, T},
125 {"TDAT", EXTRACTOR_METATYPE_CREATION_DATE, T}, /* idv23 only */
126 /* TDLY */
127 {"TENC", EXTRACTOR_METATYPE_ENCODED_BY, T},
128 {"TEXT", EXTRACTOR_METATYPE_WRITER, T},
129 {"TFLT", EXTRACTOR_METATYPE_FORMAT_VERSION, T},
130 /* TIME, idv23 only */
131 {"TIT1", EXTRACTOR_METATYPE_SECTION, T},
132 {"TIT2", EXTRACTOR_METATYPE_TITLE, T},
133 {"TIT3", EXTRACTOR_METATYPE_SONG_VERSION, T},
134 /* TKEY */
135 {"TLAN", EXTRACTOR_METATYPE_LANGUAGE, T},
136 {"TLEN", EXTRACTOR_METATYPE_DURATION, T}, /* FIXME: should append 'ms' as unit */
137 {"TMED", EXTRACTOR_METATYPE_SOURCE, T},
138 {"TOAL", EXTRACTOR_METATYPE_ORIGINAL_TITLE, T},
139 {"TOFN", EXTRACTOR_METATYPE_ORIGINAL_ARTIST, T},
140 {"TOLY", EXTRACTOR_METATYPE_ORIGINAL_WRITER, T},
141 {"TOPE", EXTRACTOR_METATYPE_ORIGINAL_PERFORMER, T},
142 {"TORY", EXTRACTOR_METATYPE_ORIGINAL_RELEASE_YEAR, T}, /* idv23 only */
143 {"TOWN", EXTRACTOR_METATYPE_LICENSEE, T},
144 {"TPE1", EXTRACTOR_METATYPE_ARTIST, T},
145 {"TPE2", EXTRACTOR_METATYPE_PERFORMER, T},
146 {"TPE3", EXTRACTOR_METATYPE_CONDUCTOR, T},
147 {"TPE4", EXTRACTOR_METATYPE_INTERPRETATION, T},
148 {"TPOS", EXTRACTOR_METATYPE_DISC_NUMBER, T},
149 {"TPUB", EXTRACTOR_METATYPE_PUBLISHER, T},
150 {"TRCK", EXTRACTOR_METATYPE_TRACK_NUMBER, T},
151 /* TRDA, idv23 only */
152 {"TRSN", EXTRACTOR_METATYPE_NETWORK_NAME, T},
153 /* TRSO */
154 {"TSIZ", EXTRACTOR_METATYPE_EMBEDDED_FILE_SIZE, T}, /* idv23 only */
155 {"TSRC", EXTRACTOR_METATYPE_ISRC, T},
156 /* TSSE */
157 {"TYER", EXTRACTOR_METATYPE_PUBLICATION_YEAR, T}, /* idv23 only */
158 {"WCOM", EXTRACTOR_METATYPE_URL, U},
159 {"WCOP", EXTRACTOR_METATYPE_URL, U},
160 {"WOAF", EXTRACTOR_METATYPE_URL, U},
161 {"WOAS", EXTRACTOR_METATYPE_URL, U},
162 {"WORS", EXTRACTOR_METATYPE_URL, U},
163 {"WPAY", EXTRACTOR_METATYPE_URL, U},
164 {"WPUB", EXTRACTOR_METATYPE_URL, U},
165 {"WXXX", EXTRACTOR_METATYPE_URL, T},
166 {"IPLS", EXTRACTOR_METATYPE_CONTRIBUTOR_NAME, T}, /* idv23 only */
167 /* ... */
168 {"USLT", EXTRACTOR_METATYPE_LYRICS, UL },
169 {"SYLT", EXTRACTOR_METATYPE_LYRICS, SL },
170 {"COMM", EXTRACTOR_METATYPE_COMMENT, L},
171 /* ... */
172 {"APIC", EXTRACTOR_METATYPE_PICTURE, I},
173 /* ... */
174 {"LINK", EXTRACTOR_METATYPE_URL, U},
175 /* ... */
176 {"USER", EXTRACTOR_METATYPE_LICENSE, T},
177 /* ... */
178
179 /* new frames in id3v24 */
180 /* ASPI, EQU2, RVA2, SEEK, SIGN, TDEN */
181 {"TDOR", EXTRACTOR_METATYPE_PUBLICATION_DATE, T},
182 /* TDRC, TDRL, TDTG */
183 {"TIPL", EXTRACTOR_METATYPE_CONTRIBUTOR_NAME, T},
184 {"TMCL", EXTRACTOR_METATYPE_MUSICIAN_CREDITS_LIST, T},
185 {"TMOO", EXTRACTOR_METATYPE_MOOD, T},
186 {"TPRO", EXTRACTOR_METATYPE_COPYRIGHT, T},
187 {"TSOA", EXTRACTOR_METATYPE_ALBUM, T},
188 {"TSOP", EXTRACTOR_METATYPE_PERFORMER, T},
189 {"TSOT", EXTRACTOR_METATYPE_TITLE, T},
190 {"TSST", EXTRACTOR_METATYPE_SUBTITLE, T},
191
116 {NULL, 0, T}, 192 {NULL, 0, T},
117}; 193};
118 194
119 195struct id3v2_state
120/* mimetype = audio/mpeg */
121int
122EXTRACTOR_id3v2_extract (const unsigned char *data,
123 size_t size,
124 EXTRACTOR_MetaDataProcessor proc,
125 void *proc_cls,
126 const char *options)
127{ 196{
197 int state;
128 unsigned int tsize; 198 unsigned int tsize;
129 unsigned int pos; 199 size_t csize;
200 char id[4];
201 int32_t ti;
202 char ver;
203 char extended_header;
204 uint16_t frame_flags;
205 char *mime;
206};
207
208enum ID3v2State
209{
210 ID3V2_INVALID = -1,
211 ID3V2_READING_HEADER = 0,
212 ID3V2_READING_FRAME_HEADER,
213 ID3V23_READING_EXTENDED_HEADER,
214 ID3V24_READING_EXTENDED_HEADER,
215 ID3V2_READING_FRAME
216};
217
218void
219EXTRACTOR_id3v2_init_state_method (struct EXTRACTOR_PluginList *plugin)
220{
221 struct id3v2_state *state;
222 state = plugin->state = malloc (sizeof (struct id3v2_state));
223 if (state == NULL)
224 return;
225 memset (state, 0, sizeof (struct id3v2_state));
226 state->state = ID3V2_READING_HEADER;
227 state->ti = -1;
228 state->mime = NULL;
229}
230
231void
232EXTRACTOR_id3v2_discard_state_method (struct EXTRACTOR_PluginList *plugin)
233{
234 struct id3v2_state *state = plugin->state;
235 if (state != NULL)
236 {
237 if (state->mime != NULL)
238 free (state->mime);
239 free (state);
240 }
241 plugin->state = NULL;
242}
243
244static int
245find_type (const char *id, size_t len)
246{
247 int i;
248 for (i = 0; tmap[i].text != NULL; i++)
249 if (0 == strncmp (tmap[i].text, id, len))
250 return i;
251 return -1;
252}
253
254int
255EXTRACTOR_id3v2_extract_method (struct EXTRACTOR_PluginList *plugin,
256 EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
257{
258 int64_t file_position;
259 int64_t file_size;
260 int64_t offset = 0;
261 int64_t size;
262 struct id3v2_state *state;
263 unsigned char *data;
264 char *word = NULL;
130 unsigned int off; 265 unsigned int off;
131 enum EXTRACTOR_MetaType type; 266 enum EXTRACTOR_MetaType type;
132 const char *mime; 267 unsigned char picture_type;
133 268
134 if ((size < 16) || 269 if (plugin == NULL || plugin->state == NULL)
135 (data[0] != 0x49) || 270 return 1;
136 (data[1] != 0x44) ||
137 (data[2] != 0x33) || (data[3] != 0x02) || (data[4] != 0x00))
138 return 0;
139 /* unsync: (data[5] & 0x80) > 0; */
140 tsize = (((data[6] & 0x7F) << 21) |
141 ((data[7] & 0x7F) << 14) |
142 ((data[8] & 0x7F) << 07) | ((data[9] & 0x7F) << 00));
143 271
144 if (tsize + 10 > size) 272 state = plugin->state;
145 return 0; 273 file_position = plugin->position;
146 pos = 10; 274 file_size = plugin->fsize;
147 while (pos < tsize) 275 size = plugin->map_size;
276 data = plugin->shm_ptr;
277
278 if (plugin->seek_request < 0)
279 return 1;
280 if (file_position - plugin->seek_request > 0)
281 {
282 plugin->seek_request = -1;
283 return 1;
284 }
285 if (plugin->seek_request - file_position < size)
286 offset = plugin->seek_request - file_position;
287
288 while (1)
289 {
290 switch (state->state)
148 { 291 {
149 size_t csize; 292 case ID3V2_INVALID:
150 int i; 293 plugin->seek_request = -1;
294 return 1;
295 case ID3V2_READING_HEADER:
296 /* TODO: support id3v24 tags at the end of file. Here's a quote from id3 faq:
297 * Q: Where is an ID3v2 tag located in an MP3 file?
298 * A: It is most likely located at the beginning of the file. Look for the
299 * marker "ID3" in the first 3 bytes of the file. If it's not there, it
300 * could be at the end of the file (if the tag is ID3v2.4). Look for the
301 * marker "3DI" 10 bytes from the end of the file, or 10 bytes before the
302 * beginning of an ID3v1 tag. Finally it is possible to embed ID3v2 tags
303 * in the actual MPEG stream, on an MPEG frame boundry. Almost nobody does
304 * this.
305 * Parsing of such tags will not be completely correct, because we can't
306 * seek backwards. We will have to seek to file_size - chunk_size instead
307 * (by the way, chunk size is theoretically unknown, LE is free to use any chunk
308 * size, even though plugins often make assumptions about chunk size being large
309 * enough to make one atomic read without seeking, if offset == 0) and search
310 * for id3v1 at -128 offset, then look if there's a 3DI marker 10 bytes before
311 * it (or 10 bytes before the end of file, if id3v1 is not there; not sure
312 * about APETAGs; we should probably just scan byte-by-byte from the end of file,
313 * until we hit 3DI, or reach the offset == 0), and use it set offset to the
314 * start of ID3v24 header, adjust the following file_position check and data
315 * indices (use offset), and otherwise proceed as normal (maybe file size checks
316 * along the way will have to be adjusted by -1, or made ">" instead of ">=";
317 * these problems do not arise for tags at the beginning of the file, since
318 * audio itself is usually at least 1-byte long; when the tag is at the end of
319 * file, these checks will have to be 100% correct).
320 * If there are two tags (at the beginning and at the end of the file),
321 * a SEEK in the one at the beginning of the file can be used to seek to the
322 * one at the end.
323 */
324 /* TODO: merge id3v1 and id3v2 parsers. There's an "update" flag in id3v2 that
325 * tells the parser to augument id3v1 values with the values from id3v2 (if this
326 * flag is not set, id3v2 parser must discard id3v1 data).
327 * At the moment id3v1 and id3v2 are parsed separately, and update flag is ignored.
328 */
329 if (file_position != 0 || size < 10 || (data[0] != 0x49) || (data[1] != 0x44) || (data[2] != 0x33) || ((data[3] != 0x02) && (data[3] != 0x03) && (data[3] != 0x04))/* || (data[4] != 0x00) minor verisons are backward-compatible*/)
330 {
331 state->state = ID3V2_INVALID;
332 break;
333 }
334 state->ver = data[3];
335 if (state->ver == 0x02)
336 {
337 state->extended_header = 0;
338 }
339 else if ((state->ver == 0x03) || (state->ver == 0x04))
340 {
341 if ((data[5] & 0x80) > 0)
342 {
343 /* unsync is not supported in id3v23 or id3v24*/
344 state->state = ID3V2_INVALID;
345 break;
346 }
347 state->extended_header = (data[5] & 0x40) > 0;
348 if ((data[5] & 0x20) > 0)
349 {
350 /* experimental is not supported in id3v23 or id3v24*/
351 state->state = ID3V2_INVALID;
352 break;
353 }
354 }
355 state->tsize = (((data[6] & 0x7F) << 21) | ((data[7] & 0x7F) << 14) | ((data[8] & 0x7F) << 07) | ((data[9] & 0x7F) << 00));
356 if (state->tsize + 10 > file_size)
357 {
358 state->state = ID3V2_INVALID;
359 break;
360 }
361 offset = 10;
362 if (state->ver == 0x03 && state->extended_header)
363 state->state = ID3V23_READING_EXTENDED_HEADER;
364 else if (state->ver == 0x04 && state->extended_header)
365 state->state = ID3V24_READING_EXTENDED_HEADER;
366 else
367 state->state = ID3V2_READING_FRAME_HEADER;
368 break;
369 case ID3V23_READING_EXTENDED_HEADER:
370 if (offset + 9 >= size)
371 {
372 if (offset == 0)
373 {
374 state->state = ID3V2_INVALID;
375 break;
376 }
377 plugin->seek_request = file_position + offset;
378 return 0;
379 }
380 if (state->ver == 0x03 && state->extended_header)
381 {
382 uint32_t padding, extended_header_size;
383 extended_header_size = (((data[offset]) << 24) | ((data[offset + 1]) << 16) | ((data[offset + 2]) << 8) | ((data[offset + 3]) << 0));
384 padding = (((data[offset + 6]) << 24) | ((data[offset + 7]) << 16) | ((data[offset + 8]) << 8) | ((data[offset + 9]) << 0));
385 if (data[offset + 4] == 0 && data[offset + 5] == 0)
386 /* Skip the CRC32 byte after extended header */
387 offset += 1;
388 offset += 4 + extended_header_size;
389 if (padding < state->tsize)
390 state->tsize -= padding;
391 else
392 {
393 state->state = ID3V2_INVALID;
394 break;
395 }
396 }
397 break;
398 case ID3V24_READING_EXTENDED_HEADER:
399 if (offset + 6 >= size)
400 {
401 if (offset == 0)
402 {
403 state->state = ID3V2_INVALID;
404 break;
405 }
406 plugin->seek_request = file_position + offset;
407 return 0;
408 }
409 if ( (state->ver == 0x04) && (state->extended_header))
410 {
411 uint32_t extended_header_size;
151 412
152 if (pos + 7 > tsize) 413 extended_header_size = (((data[offset]) << 24) |
414 ((data[offset + 1]) << 16) |
415 ((data[offset + 2]) << 8) |
416 ((data[offset + 3]) << 0));
417 offset += 4 + extended_header_size;
418 }
419 break;
420 case ID3V2_READING_FRAME_HEADER:
421 if (file_position + offset > state->tsize ||
422 ((state->ver == 0x02) && file_position + offset + 6 >= state->tsize) ||
423 (((state->ver == 0x03) || (state->ver == 0x04))&& file_position + offset + 10 >= state->tsize))
424 {
425 state->state = ID3V2_INVALID;
426 break;
427 }
428 if (((state->ver == 0x02) && (offset + 6 >= size)) ||
429 (((state->ver == 0x03) || (state->ver == 0x04)) && (offset + 10 >= size)))
430 {
431 plugin->seek_request = file_position + offset;
153 return 0; 432 return 0;
154 csize = (data[pos + 3] << 16) + (data[pos + 4] << 8) + data[pos + 5]; 433 }
155 if ((pos + 7 + csize > tsize) || (csize > tsize) || (csize == 0)) 434 if (state->ver == 0x02)
435 {
436 memcpy (state->id, &data[offset], 3);
437 state->csize = (data[offset + 3] << 16) + (data[offset + 4] << 8) + data[offset + 5];
438 if ((file_position + offset + 6 + state->csize > file_size) || (state->csize > file_size) || (state->csize == 0))
439 {
440 state->state = ID3V2_INVALID;
441 break;
442 }
443 offset += 6;
444 state->frame_flags = 0;
445 }
446 else if ((state->ver == 0x03) || (state->ver == 0x04))
447 {
448 memcpy (state->id, &data[offset], 4);
449 if (state->ver == 0x03)
450 state->csize = (data[offset + 4] << 24) + (data[offset + 5] << 16) + (data[offset + 6] << 8) + data[offset + 7];
451 else if (state->ver == 0x04)
452 state->csize = ((data[offset + 4] & 0x7F) << 21) | ((data[offset + 5] & 0x7F) << 14) | ((data[offset + 6] & 0x7F) << 07) | ((data[offset + 7] & 0x7F) << 00);
453 if ((file_position + offset + 10 + state->csize > file_size) || (state->csize > file_size) || (state->csize == 0))
454 {
455 state->state = ID3V2_INVALID;
456 break;
457 }
458 state->frame_flags = (data[offset + 8] << 8) + data[offset + 9];
459 if (state->ver == 0x03)
460 {
461 if (((state->frame_flags & 0x80) > 0) /* compressed, not yet supported */ ||
462 ((state->frame_flags & 0x40) > 0) /* encrypted, not supported */)
463 {
464 /* Skip to next frame header */
465 offset += 10 + state->csize;
466 break;
467 }
468 }
469 else if (state->ver == 0x04)
470 {
471 if (((state->frame_flags & 0x08) > 0) /* compressed, not yet supported */ ||
472 ((state->frame_flags & 0x04) > 0) /* encrypted, not supported */ ||
473 ((state->frame_flags & 0x02) > 0) /* unsynchronization, not supported */)
474 {
475 /* Skip to next frame header */
476 offset += 10 + state->csize;
477 break;
478 }
479 if ((state->frame_flags & 0x01) > 0)
480 {
481 /* Skip data length indicator */
482 state->csize -= 4;
483 offset += 4;
484 }
485 }
486 offset += 10;
487 }
488
489 state->ti = find_type ((const char *) state->id, (state->ver == 0x02) ? 3 : (((state->ver == 0x03) || (state->ver == 0x04)) ? 4 : 0));
490 if (state->ti == -1)
491 {
492 offset += state->csize;
493 break;
494 }
495 state->state = ID3V2_READING_FRAME;
496 break;
497 case ID3V2_READING_FRAME:
498 if (offset == 0 && state->csize > size)
499 {
500 /* frame size is larger than the size of one data chunk we get at a time */
501 offset += state->csize;
502 state->state = ID3V2_READING_FRAME_HEADER;
503 break;
504 }
505 if (offset + state->csize > size)
506 {
507 plugin->seek_request = file_position + offset;
508 return 0;
509 }
510 word = NULL;
511 if (((state->ver == 0x03) && ((state->frame_flags & 0x20) > 0)) ||
512 ((state->ver == 0x04) && ((state->frame_flags & 0x40) > 0)))
513 {
514 /* "group" identifier, skip a byte */
515 offset++;
516 state->csize--;
517 }
518 switch (tmap[state->ti].fmt)
519 {
520 case T:
521 if (data[offset] == 0x00)
522 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + 1],
523 state->csize - 1, "ISO-8859-1");
524 else if (data[offset] == 0x01)
525 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + 1],
526 state->csize - 1, "UCS-2");
527 else if ((state->ver == 0x04) && (data[offset] == 0x02))
528 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + 1],
529 state->csize - 1, "UTF-16BE");
530 else if ((state->ver == 0x04) && (data[offset] == 0x03))
531 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + 1],
532 state->csize - 1, "UTF-8");
533 else
534 /* bad encoding byte, try to convert from iso-8859-1 */
535 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + 1],
536 state->csize - 1, "ISO-8859-1");
537 break;
538 case U:
539 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset],
540 state->csize, "ISO-8859-1");
541 break;
542 case UL:
543 if (state->csize < 6)
544 {
545 /* malformed */
546 state->state = ID3V2_INVALID;
547 break;
548 }
549 /* find end of description */
550 off = 4;
551 while ((off < size) && (off < offset + state->csize) && (data[offset + off] != '\0'))
552 off++;
553 if ((off >= state->csize) || (data[offset + off] != '\0'))
554 {
555 /* malformed */
556 state->state = ID3V2_INVALID;
557 break;
558 }
559 off++;
560 if (data[offset] == 0x00)
561 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + off],
562 state->csize - off, "ISO-8859-1");
563 else if (data[offset] == 0x01)
564 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + off],
565 state->csize - off, "UCS-2");
566 else if ((state->ver == 0x04) && (data[offset] == 0x02))
567 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + off],
568 state->csize - off, "UTF-16BE");
569 else if ((state->ver == 0x04) && (data[offset] == 0x03))
570 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + off],
571 state->csize - off, "UTF-8");
572 else
573 /* bad encoding byte, try to convert from iso-8859-1 */
574 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + off],
575 state->csize - off, "ISO-8859-1");
576 break;
577 case SL:
578 if (state->csize < 7)
579 {
580 /* malformed */
581 state->state = ID3V2_INVALID;
582 break;
583 }
584 if (data[offset] == 0x00)
585 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + 6],
586 state->csize - 6, "ISO-8859-1");
587 else if (data[offset] == 0x01)
588 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + 6],
589 state->csize - 6, "UCS-2");
590 else if ((state->ver == 0x04) && (data[offset] == 0x02))
591 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + 6],
592 state->csize - 6, "UTF-16BE");
593 else if ((state->ver == 0x04) && (data[offset] == 0x03))
594 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + 6],
595 state->csize - 6, "UTF-8");
596 else
597 /* bad encoding byte, try to convert from iso-8859-1 */
598 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + 6],
599 state->csize - 6, "ISO-8859-1");
600 break;
601 case L:
602 if (state->csize < 5)
603 {
604 /* malformed */
605 state->state = ID3V2_INVALID;
606 break;
607 }
608 /* find end of description */
609 off = 4;
610 while ((off < size) && (off < offset + state->csize) && (data[offset + off] != '\0'))
611 off++;
612 if ((off >= state->csize) || (data[offset + off] != '\0'))
613 {
614 /* malformed */
615 state->state = ID3V2_INVALID;
616 break;
617 }
618 off++;
619
620 if (data[offset] == 0x00)
621 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + off],
622 state->csize - off, "ISO-8859-1");
623 else if (data[offset] == 0x01)
624 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + off],
625 state->csize - off, "UCS-2");
626 else if ((state->ver == 0x04) && (data[offset] == 0x02))
627 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + off],
628 state->csize - off, "UTF-1offBE");
629 else if ((state->ver == 0x04) && (data[offset] == 0x03))
630 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + off],
631 state->csize - off, "UTF-8");
632 else
633 /* bad encoding byte, try to convert from iso-8859-1 */
634 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + off],
635 state->csize - off, "ISO-8859-1");
636 break;
637 case I:
638 if ( ( (state->ver == 0x02) &&
639 (state->csize < 7) ) ||
640 ( ( (state->ver == 0x03) ||
641 (state->ver == 0x04)) && (state->csize < 5)) )
642 {
643 /* malformed */
644 state->state = ID3V2_INVALID;
645 break;
646 }
647 if (state->mime != NULL)
648 free (state->mime);
649 state->mime = NULL;
650 if (state->ver == 0x02)
651 {
652 off = 5;
653 picture_type = data[offset + 5];
654 }
655 else if ((state->ver == 0x03) || (state->ver == 0x04))
656 {
657 off = 1;
658 while ((off < size) && (off < offset + state->csize) && (data[offset + off] != '\0') )
659 off++;
660 if ((off >= state->csize) || (data[offset + off] != '\0'))
661 {
662 /* malformed */
663 state->state = ID3V2_INVALID;
664 break;
665 }
666 state->mime = malloc (off);
667 memcpy (state->mime, &data[offset + 1], off - 1);
668 state->mime[off - 1] = '\0';
669 off += 1;
670 picture_type = data[offset];
671 off += 1;
672 }
673 /* find end of description */
674 while ((off < size) && (off < offset + state->csize) && (data[offset + off] != '\0'))
675 off++;
676 if ((off >= state->csize) || (data[offset + off] != '\0'))
677 {
678 free (state->mime);
679 state->mime = NULL;
680 /* malformed */
681 state->state = ID3V2_INVALID;
682 break;
683 }
684 off++;
685 switch (picture_type)
686 {
687 case 0x03:
688 case 0x04:
689 type = EXTRACTOR_METATYPE_COVER_PICTURE;
690 break;
691 case 0x07:
692 case 0x08:
693 case 0x09:
694 case 0x0A:
695 case 0x0B:
696 case 0x0C:
697 type = EXTRACTOR_METATYPE_CONTRIBUTOR_PICTURE;
698 break;
699 case 0x0D:
700 case 0x0E:
701 case 0x0F:
702 type = EXTRACTOR_METATYPE_EVENT_PICTURE;
703 break;
704 case 0x14:
705 type = EXTRACTOR_METATYPE_LOGO;
706 type = EXTRACTOR_METATYPE_LOGO;
707 break;
708 default:
709 type = EXTRACTOR_METATYPE_PICTURE;
710 break;
711 }
712 if (state->ver == 0x02)
713 {
714 if (0 == strncasecmp ("PNG", (const char *) &data[offset + 1], 3))
715 state->mime = strdup ("image/png");
716 else if (0 == strncasecmp ("JPG", (const char *) &data[offset + 1], 3))
717 state->mime = strdup ("image/jpeg");
718 else
719 state->mime = NULL;
720 }
721 else if (((state->ver == 0x03) || (state->ver == 0x04)) && (strchr (state->mime, '/') == NULL))
722 {
723 size_t mime_len = strlen (state->mime);
724 char *type_mime = malloc (mime_len + 6 + 1);
725 snprintf (type_mime, mime_len + 6 + 1, "image/%s", state->mime);
726 free (state->mime);
727 state->mime = type_mime;
728 }
729 if ((state->mime != NULL) && (0 == strcmp (state->mime, "-->")))
730 {
731 /* not supported */
732 free (state->mime);
733 state->mime = NULL;
734 }
735 else
736 {
737 if (0 != proc (proc_cls, "id3v2", type, EXTRACTOR_METAFORMAT_BINARY, state->mime, (const char*) &data[offset + off], state->csize - off))
738 {
739 if (state->mime != NULL)
740 free (state->mime);
741 state->mime = NULL;
742 return 1;
743 }
744 if (state->mime != NULL)
745 free (state->mime);
746 state->mime = NULL;
747 }
748 word = NULL;
156 break; 749 break;
157 i = 0; 750 default:
158 while (tmap[i].text != NULL) 751 return 1;
752 }
753 if ((word != NULL) && (strlen (word) > 0))
754 {
755 if (0 != proc (proc_cls, "id3v2", tmap[state->ti].type, EXTRACTOR_METAFORMAT_UTF8, "text/plain", word, strlen (word) + 1))
159 { 756 {
160 if (0 == strncmp (tmap[i].text, (const char *) &data[pos], 3)) 757 free (word);
161 { 758 return 1;
162 char *word;
163 /* this byte describes the encoding
164 try to convert strings to UTF-8
165 if it fails, then forget it */
166 switch (tmap[i].fmt)
167 {
168 case T:
169 switch (data[pos + 6])
170 {
171 case 0x00:
172 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 7],
173 csize - 1, "ISO-8859-1");
174 break;
175 case 0x01:
176 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 7],
177 csize - 1, "UCS-2");
178 break;
179 default:
180 /* bad encoding byte,
181 try to convert from iso-8859-1 */
182 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 7],
183 csize - 1, "ISO-8859-1");
184 break;
185 }
186 break;
187 case U:
188 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 6],
189 csize, "ISO-8859-1");
190 break;
191 case UL:
192 if (csize < 6)
193 return 0; /* malformed */
194 /* find end of description */
195 off = 10;
196 while ( (off < size) &&
197 (off - pos < csize) &&
198 (data[pos + off] == '\0') )
199 off++;
200 if ( (off >= csize) ||
201 (data[pos+off] != '\0') )
202 return 0; /* malformed */
203 off++;
204 switch (data[pos + 6])
205 {
206 case 0x00:
207 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + off],
208 csize - off, "ISO-8859-1");
209 break;
210 case 0x01:
211 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + off],
212 csize - off, "UCS-2");
213 break;
214 default:
215 /* bad encoding byte,
216 try to convert from iso-8859-1 */
217 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + off],
218 csize - off, "ISO-8859-1");
219 break;
220 }
221 break;
222 case SL:
223 if (csize < 7)
224 return 0; /* malformed */
225 /* find end of description */
226 switch (data[pos + 6])
227 {
228 case 0x00:
229 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 12],
230 csize - 6, "ISO-8859-1");
231 break;
232 case 0x01:
233 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 12],
234 csize - 6, "UCS-2");
235 break;
236 default:
237 /* bad encoding byte,
238 try to convert from iso-8859-1 */
239 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 12],
240 csize - 6, "ISO-8859-1");
241 break;
242 }
243 break;
244 case L:
245 if (csize < 5)
246 return 0; /* malformed */
247 /* find end of description */
248 switch (data[pos + 6])
249 {
250 case 0x00:
251 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 10],
252 csize - 4, "ISO-8859-1");
253 break;
254 case 0x01:
255 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 10],
256 csize - 4, "UCS-2");
257 break;
258 default:
259 /* bad encoding byte,
260 try to convert from iso-8859-1 */
261 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 10],
262 csize - 4, "ISO-8859-1");
263 break;
264 }
265 break;
266 case I:
267 if (csize < 6)
268 return 0; /* malformed */
269 /* find end of description */
270 off = 12;
271 while ( (off < size) &&
272 (off - pos < csize) &&
273 (data[pos + off] == '\0') )
274 off++;
275 if ( (off >= csize) ||
276 (data[pos+off] != '\0') )
277 return 0; /* malformed */
278 off++;
279 switch (data[pos+11])
280 {
281 case 0x03:
282 case 0x04:
283 type = EXTRACTOR_METATYPE_COVER_PICTURE;
284 break;
285 case 0x07:
286 case 0x08:
287 case 0x09:
288 case 0x0A:
289 case 0x0B:
290 case 0x0C:
291 type = EXTRACTOR_METATYPE_CONTRIBUTOR_PICTURE;
292 break;
293 case 0x0D:
294 case 0x0E:
295 case 0x0F:
296 type = EXTRACTOR_METATYPE_EVENT_PICTURE;
297 break;
298 case 0x14:
299 type = EXTRACTOR_METATYPE_LOGO;
300 type = EXTRACTOR_METATYPE_LOGO;
301 break;
302 default:
303 type = EXTRACTOR_METATYPE_PICTURE;
304 break;
305 }
306 if (0 == strncasecmp ("PNG",
307 (const char*) &data[pos + 7], 3))
308 mime = "image/png";
309 else if (0 == strncasecmp ("JPG",
310 (const char*) &data[pos + 7], 3))
311 mime = "image/jpeg";
312 else
313 mime = NULL;
314 if (0 == strncasecmp ("-->",
315 (const char*) &data[pos + 7], 3))
316 {
317 /* not supported */
318 }
319 else
320 {
321 if (0 != proc (proc_cls,
322 "id3v2",
323 type,
324 EXTRACTOR_METAFORMAT_BINARY,
325 mime,
326 (const char*) &data[pos + off],
327 csize + 6 - off))
328 return 1;
329 }
330 word = NULL;
331 break;
332 default:
333 return 0;
334 }
335 if ((word != NULL) && (strlen (word) > 0))
336 {
337 if (0 != proc (proc_cls,
338 "id3v2",
339 tmap[i].type,
340 EXTRACTOR_METAFORMAT_UTF8,
341 "text/plain",
342 word,
343 strlen(word)+1))
344 {
345 free (word);
346 return 1;
347 }
348 }
349 if (word != NULL)
350 free (word);
351 break;
352 }
353 i++;
354 } 759 }
355 pos += 6 + csize; 760 }
761 if (word != NULL)
762 free (word);
763 offset = offset + state->csize;
764 state->state = ID3V2_READING_FRAME_HEADER;
765 break;
356 } 766 }
357 return 0; 767 }
768 return 1;
358} 769}
359 770
360/* end of id3v2_extractor.c */ 771/* end of id3v2_extractor.c */
diff --git a/src/plugins/mp3_extractor.c b/src/plugins/mp3_extractor.c
index 3d8d48d..68b3a39 100644
--- a/src/plugins/mp3_extractor.c
+++ b/src/plugins/mp3_extractor.c
@@ -36,8 +36,41 @@
36#include <unistd.h> 36#include <unistd.h>
37#include <stdlib.h> 37#include <stdlib.h>
38 38
39#define MAX_MP3_SCAN_DEEP 16768 39#include "extractor_plugins.h"
40const int max_frames_scan = 1024; 40
41#if WINDOWS
42#include <sys/param.h> /* #define BYTE_ORDER */
43#endif
44#ifndef __BYTE_ORDER
45#ifdef _BYTE_ORDER
46#define __BYTE_ORDER _BYTE_ORDER
47#else
48#ifdef BYTE_ORDER
49#define __BYTE_ORDER BYTE_ORDER
50#endif
51#endif
52#endif
53#ifndef __BIG_ENDIAN
54#ifdef _BIG_ENDIAN
55#define __BIG_ENDIAN _BIG_ENDIAN
56#else
57#ifdef BIG_ENDIAN
58#define __BIG_ENDIAN BIG_ENDIAN
59#endif
60#endif
61#endif
62#ifndef __LITTLE_ENDIAN
63#ifdef _LITTLE_ENDIAN
64#define __LITTLE_ENDIAN _LITTLE_ENDIAN
65#else
66#ifdef LITTLE_ENDIAN
67#define __LITTLE_ENDIAN LITTLE_ENDIAN
68#endif
69#endif
70#endif
71
72#define LARGEST_FRAME_SIZE 8065
73
41enum 74enum
42{ MPEG_ERR = 0, MPEG_V1 = 1, MPEG_V2 = 2, MPEG_V25 = 3 }; 75{ MPEG_ERR = 0, MPEG_V1 = 1, MPEG_V2 = 2, MPEG_V25 = 3 };
43 76
@@ -45,6 +78,11 @@ enum
45{ LAYER_ERR = 0, LAYER_1 = 1, LAYER_2 = 2, LAYER_3 = 3 }; 78{ LAYER_ERR = 0, LAYER_1 = 1, LAYER_2 = 2, LAYER_3 = 3 };
46 79
47#define MPA_SYNC_MASK ((unsigned int) 0xFFE00000) 80#define MPA_SYNC_MASK ((unsigned int) 0xFFE00000)
81#if __BYTE_ORDER == __BIG_ENDIAN
82#define MPA_SYNC_MASK_MEM ((unsigned int) 0xFFE00000)
83#else
84#define MPA_SYNC_MASK_MEM ((unsigned int) 0x0000E0FF)
85#endif
48#define MPA_LAST_SYNC_BIT_MASK ((unsigned int) 0x00100000) 86#define MPA_LAST_SYNC_BIT_MASK ((unsigned int) 0x00100000)
49#define MPA_VERSION_MASK ((unsigned int) 0x00080000) 87#define MPA_VERSION_MASK ((unsigned int) 0x00080000)
50#define MPA_LAYER_MASK ((unsigned int) 0x3) 88#define MPA_LAYER_MASK ((unsigned int) 0x3)
@@ -106,169 +144,274 @@ static const char * const layer_names[3] = {
106 144
107#define ADDR(s,t) do { if (0 != proc (proc_cls, "mp3", t, EXTRACTOR_METAFORMAT_UTF8, "text/plain", s, strlen(s)+1)) return 1; } while (0) 145#define ADDR(s,t) do { if (0 != proc (proc_cls, "mp3", t, EXTRACTOR_METAFORMAT_UTF8, "text/plain", s, strlen(s)+1)) return 1; } while (0)
108 146
109/* mimetype = audio/mpeg */ 147struct mp3_state
110int 148{
111EXTRACTOR_mp3_extract (const unsigned char *data, 149 int state;
112 size_t size, 150
113 EXTRACTOR_MetaDataProcessor proc, 151 uint32_t header;
114 void *proc_cls, 152 int sample_rate;
115 const char *options) 153 char mpeg_ver;
154 char layer;
155 char vbr_flag;
156 int ch;
157 char copyright_flag;
158 char original_flag;
159 int avg_bps;
160 int bitrate;
161
162 int64_t number_of_frames;
163 int64_t number_of_valid_frames;
164};
165
166enum MP3State
167{
168 MP3_LOOKING_FOR_FRAME = 0,
169 MP3_READING_FRAME = 1,
170};
171
172void
173EXTRACTOR_mp3_init_state_method (struct EXTRACTOR_PluginList *plugin)
174{
175 struct mp3_state *state;
176 state = plugin->state = malloc (sizeof (struct mp3_state));
177 if (state == NULL)
178 return;
179 state->header = 0;
180 state->sample_rate = 0;
181 state->number_of_frames = 0;
182 state->number_of_valid_frames = 0;
183 state->mpeg_ver = 0;
184 state->layer = 0;
185 state->vbr_flag = 0;
186 state->ch = 0;
187 state->copyright_flag = 0;
188 state->original_flag = 0;
189 state->avg_bps = 0;
190 state->bitrate = 0;
191 state->state = 0;
192}
193
194void
195EXTRACTOR_mp3_discard_state_method (struct EXTRACTOR_PluginList *plugin)
196{
197 if (plugin->state != NULL)
198 {
199 free (plugin->state);
200 }
201 plugin->state = NULL;
202}
203
204static int
205calculate_frame_statistics_and_maybe_report_it (struct EXTRACTOR_PluginList *plugin,
206 struct mp3_state *state, EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
207{
208 int length;
209 char format[512];
210
211 if (((double) state->number_of_valid_frames / (double) state->number_of_frames) < 0.5 ||
212 state->number_of_valid_frames < 2)
213 /* Unlikely to be an mp3 file */
214 return 0;
215 ADDR ("audio/mpeg", EXTRACTOR_METATYPE_MIMETYPE);
216 state->avg_bps = state->avg_bps / state->number_of_valid_frames;
217 if (state->sample_rate > 0)
218 length = 1152 * state->number_of_valid_frames / state->sample_rate;
219 else if (state->avg_bps > 0 || state->bitrate > 0)
220 length = plugin->fsize / (state->avg_bps ? state->avg_bps : state->bitrate ? state->bitrate : 1) / 125;
221 else
222 length = 0;
223
224 ADDR (mpeg_versions[state->mpeg_ver - 1], EXTRACTOR_METATYPE_FORMAT_VERSION);
225 snprintf (format,
226 sizeof (format),
227 "%s %s audio, %d kbps (%s), %d Hz, %s, %s, %s",
228 mpeg_versions[state->mpeg_ver - 1],
229 layer_names[state->layer - 1],
230 state->avg_bps,
231 state->vbr_flag ? _("VBR") : _("CBR"),
232 state->sample_rate,
233 channel_modes[state->ch],
234 state->copyright_flag ? _("copyright") : _("no copyright"),
235 state->original_flag ? _("original") : _("copy") );
236
237 ADDR (format, EXTRACTOR_METATYPE_RESOURCE_TYPE);
238 snprintf (format,
239 sizeof (format), "%dm%02d",
240 length / 60, length % 60);
241 ADDR (format, EXTRACTOR_METATYPE_DURATION);
242 return 0;
243}
244
245int
246EXTRACTOR_mp3_extract_method (struct EXTRACTOR_PluginList *plugin,
247 EXTRACTOR_MetaDataProcessor proc,
248 void *proc_cls)
116{ 249{
117 unsigned int header; 250 int64_t file_position;
118 int counter = 0; 251 int64_t file_size;
252 size_t offset = 0;
253 size_t size;
254 unsigned char *data;
255 struct mp3_state *state;
256
257 size_t frames_found_in_this_round = 0;
258 int start_anew = 0;
259
119 char mpeg_ver = 0; 260 char mpeg_ver = 0;
120 char layer = 0; 261 char layer = 0;
121 int idx_num = 0; 262 int idx_num = 0;
122 int bitrate = 0; /*used for each frame */ 263 int bitrate = 0; /*used for each frame */
123 int avg_bps = 0; /*average bitrate */
124 int vbr_flag = 0;
125 int copyright_flag = 0; 264 int copyright_flag = 0;
126 int original_flag = 0; 265 int original_flag = 0;
127 int length = 0;
128 int sample_rate = 0; 266 int sample_rate = 0;
129 int ch = 0; 267 int ch = 0;
130 int frame_size; 268 int frame_size;
131 int frames = 0;
132 size_t pos = 0;
133 char format[512];
134 269
135 do 270 if (plugin == NULL || plugin->state == NULL)
136 { 271 return 1;
137 /* seek for frame start */
138 if (pos + sizeof (header) > size)
139 {
140 return 0;
141 } /*unable to find header */
142 header = (data[pos] << 24) | (data[pos+1] << 16) |
143 (data[pos+2] << 8) | data[pos+3];
144 if ((header & MPA_SYNC_MASK) == MPA_SYNC_MASK)
145 break; /*found header sync */
146 pos++;
147 counter++; /*next try */
148 }
149 while (counter < MAX_MP3_SCAN_DEEP);
150 if (counter >= MAX_MP3_SCAN_DEEP)
151 return 0;
152 272
153 do 273 state = plugin->state;
154 { /*ok, now we found a mp3 frame header */ 274 file_position = plugin->position;
155 frames++; 275 file_size = plugin->fsize;
156 switch (header & (MPA_LAST_SYNC_BIT_MASK | MPA_VERSION_MASK)) 276 size = plugin->map_size;
157 { 277 data = plugin->shm_ptr;
158 case (MPA_LAST_SYNC_BIT_MASK | MPA_VERSION_MASK): 278
159 mpeg_ver = MPEG_V1; 279 if (plugin->seek_request < 0)
160 break; 280 return 1;
161 case (MPA_LAST_SYNC_BIT_MASK): 281 if (file_position - plugin->seek_request > 0)
162 mpeg_ver = MPEG_V2; 282 {
163 break; 283 plugin->seek_request = -1;
164 case 0: 284 return 1;
165 mpeg_ver = MPEG_V25; 285 }
166 break; 286 if (plugin->seek_request - file_position < size)
167 case (MPA_VERSION_MASK): 287 offset = plugin->seek_request - file_position;
168 default: 288
169 return 0; 289 while (1)
170 } 290 {
171 switch (header & (MPA_LAYER_MASK << MPA_LAYER_SHIFT)) 291 switch (state->state)
292 {
293 case MP3_LOOKING_FOR_FRAME:
294 /* Look for a frame header */
295 while (offset + sizeof (state->header) < size && (((*((uint32_t *) &data[offset])) & MPA_SYNC_MASK_MEM) != MPA_SYNC_MASK_MEM))
296 offset += 1;
297 if (offset + sizeof (state->header) >= size)
298 {
299 /* Alternative: (frames_found_in_this_round < (size / LARGEST_FRAME_SIZE / 2)) is to generous */
300 if ((file_position == 0 && ((double) state->number_of_valid_frames / (double) state->number_of_frames) < 0.5) ||
301 file_position + offset + sizeof (state->header) >= file_size)
172 { 302 {
173 case (0x1 << MPA_LAYER_SHIFT): 303 calculate_frame_statistics_and_maybe_report_it (plugin, state, proc, proc_cls);
174 layer = LAYER_3; 304 return 1;
175 break;
176 case (0x2 << MPA_LAYER_SHIFT):
177 layer = LAYER_2;
178 break;
179 case (0x3 << MPA_LAYER_SHIFT):
180 layer = LAYER_1;
181 break;
182 case 0x0:
183 default:
184 return 0;
185 } 305 }
306 plugin->seek_request = file_position + offset;
307 return 0;
308 }
309 state->header = (data[offset] << 24) | (data[offset + 1] << 16) |
310 (data[offset + 2] << 8) | data[offset + 3];
311 if ((state->header & MPA_SYNC_MASK) == MPA_SYNC_MASK)
312 {
313 state->state = MP3_READING_FRAME;
314 break;
315 }
316 break;
317 case MP3_READING_FRAME:
318 state->number_of_frames += 1;
319 start_anew = 0;
320 switch (state->header & (MPA_LAST_SYNC_BIT_MASK | MPA_VERSION_MASK))
321 {
322 case (MPA_LAST_SYNC_BIT_MASK | MPA_VERSION_MASK):
323 mpeg_ver = MPEG_V1;
324 break;
325 case (MPA_LAST_SYNC_BIT_MASK):
326 mpeg_ver = MPEG_V2;
327 break;
328 case 0:
329 mpeg_ver = MPEG_V25;
330 break;
331 case (MPA_VERSION_MASK):
332 default:
333 state->state = MP3_LOOKING_FOR_FRAME;
334 offset += 1;
335 start_anew = 1;
336 }
337 if (start_anew)
338 break;
339 switch (state->header & (MPA_LAYER_MASK << MPA_LAYER_SHIFT))
340 {
341 case (0x1 << MPA_LAYER_SHIFT):
342 layer = LAYER_3;
343 break;
344 case (0x2 << MPA_LAYER_SHIFT):
345 layer = LAYER_2;
346 break;
347 case (0x3 << MPA_LAYER_SHIFT):
348 layer = LAYER_1;
349 break;
350 case 0x0:
351 default:
352 state->state = MP3_LOOKING_FOR_FRAME;
353 offset += 1;
354 start_anew = 1;
355 }
356 if (start_anew)
357 break;
186 if (mpeg_ver < MPEG_V25) 358 if (mpeg_ver < MPEG_V25)
187 idx_num = (mpeg_ver - 1) * 3 + layer - 1; 359 idx_num = (mpeg_ver - 1) * 3 + layer - 1;
188 else 360 else
189 idx_num = 2 + layer; 361 idx_num = 2 + layer;
190 bitrate = 1000 * bitrate_table[(header >> MPA_BITRATE_SHIFT) & 362 bitrate = 1000 * bitrate_table[(state->header >> MPA_BITRATE_SHIFT) &
191 MPA_BITRATE_MASK][idx_num]; 363 MPA_BITRATE_MASK][idx_num];
192 if (bitrate < 0) 364 if (bitrate < 0)
193 { 365 {
194 frames--; 366 /*error in header */
195 break; 367 state->state = MP3_LOOKING_FOR_FRAME;
196 } /*error in header */ 368 offset += 1;
197 sample_rate = freq_table[(header >> MPA_FREQ_SHIFT) & 369 break;
370 }
371 sample_rate = freq_table[(state->header >> MPA_FREQ_SHIFT) &
198 MPA_FREQ_MASK][mpeg_ver - 1]; 372 MPA_FREQ_MASK][mpeg_ver - 1];
199 if (sample_rate < 0) 373 if (sample_rate <= 0)
200 { 374 {
201 frames--; 375 /*error in header */
202 break; 376 state->state = MP3_LOOKING_FOR_FRAME;
203 } /*error in header */ 377 offset += 1;
204 ch = ((header >> MPA_CHMODE_SHIFT) & MPA_CHMODE_MASK); 378 break;
205 copyright_flag = (header >> MPA_COPYRIGHT_SHIFT) & 0x1; 379 }
206 original_flag = (header >> MPA_ORIGINAL_SHIFT) & 0x1; 380 ch = ((state->header >> MPA_CHMODE_SHIFT) & MPA_CHMODE_MASK);
207 frame_size = 381 copyright_flag = (state->header >> MPA_COPYRIGHT_SHIFT) & 0x1;
208 144 * bitrate / (sample_rate ? sample_rate : 1) + 382 original_flag = (state->header >> MPA_ORIGINAL_SHIFT) & 0x1;
209 ((header >> MPA_PADDING_SHIFT) & 0x1); 383 if (layer == LAYER_1)
384 frame_size = (12 * bitrate / sample_rate + ((state->header >> MPA_PADDING_SHIFT) & 0x1)) * 4;
385 else
386 frame_size = 144 * bitrate / sample_rate + ((state->header >> MPA_PADDING_SHIFT) & 0x1);
210 if (frame_size <= 0) 387 if (frame_size <= 0)
211 { 388 {
212 /* Technically, bitrate can be 0. However, but this particular 389 /*error in header */
213 * extractor is incapable of correctly processing 0-bitrate files 390 state->state = MP3_LOOKING_FOR_FRAME;
214 * anyway. And bitrate == 0 might also mean that this is just a 391 offset += 1;
215 * random binary sequence, which is far more likely to be true. 392 break;
216 * 393 }
217 * amatus suggests to use a different algorithm and parse significant
218 * part of the file, then count the number of correct mpeg frames.
219 * If the the percentage of correct frames is below a threshold,
220 * then this is not an mpeg file at all.
221 */
222 frames -= 1;
223 break;
224 }
225 avg_bps += bitrate / 1000;
226
227 pos += frame_size - 4;
228 if (frames > max_frames_scan)
229 break; /*optimization */
230 if (avg_bps / frames != bitrate / 1000)
231 vbr_flag = 1;
232 if (pos + sizeof (header) > size)
233 break; /* EOF */
234 header = (data[pos] << 24) | (data[pos+1] << 16) |
235 (data[pos+2] << 8) | data[pos+3];
236 }
237 while ((header & MPA_SYNC_MASK) == MPA_SYNC_MASK);
238
239 if (frames < 2)
240 return 0; /*no valid frames */
241 ADDR ("audio/mpeg", EXTRACTOR_METATYPE_MIMETYPE);
242 avg_bps = avg_bps / frames;
243 if (max_frames_scan)
244 { /*if not all frames scaned */
245 length =
246 size / (avg_bps ? avg_bps : bitrate ? bitrate : 0xFFFFFFFF) / 125;
247 }
248 else
249 {
250 length = 1152 * frames / (sample_rate ? sample_rate : 0xFFFFFFFF);
251 }
252 394
253 ADDR (mpeg_versions[mpeg_ver-1], EXTRACTOR_METATYPE_FORMAT_VERSION); 395 /* Only save data from valid frames in the state */
254 snprintf (format, 396 state->avg_bps += bitrate / 1000;
255 sizeof(format), 397 state->sample_rate = sample_rate;
256 "%s %s audio, %d kbps (%s), %d Hz, %s, %s, %s", 398 state->mpeg_ver = mpeg_ver;
257 mpeg_versions[mpeg_ver-1], 399 state->layer = layer;
258 layer_names[layer-1], 400 state->ch = ch;
259 avg_bps, 401 state->copyright_flag = copyright_flag;
260 vbr_flag ? _("VBR") : _("CBR"), 402 state->original_flag = original_flag;
261 sample_rate, 403 state->bitrate = bitrate;
262 channel_modes[ch],
263 copyright_flag ? _("copyright") : _("no copyright"),
264 original_flag ? _("original") : _("copy") );
265 404
266 ADDR (format, EXTRACTOR_METATYPE_RESOURCE_TYPE); 405 frames_found_in_this_round += 1;
267 snprintf (format, 406 state->number_of_valid_frames += 1;
268 sizeof (format), "%dm%02d", 407 if (state->avg_bps / state->number_of_valid_frames != bitrate / 1000)
269 length / 60, length % 60); 408 state->vbr_flag = 1;
270 ADDR (format, EXTRACTOR_METATYPE_DURATION); 409 offset += frame_size;
271 return 0; 410 state->state = MP3_LOOKING_FOR_FRAME;
411 break;
412 }
413 }
414 return 1;
272} 415}
273 416
274/* end of mp3_extractor.c */ 417/* end of mp3_extractor.c */
diff --git a/src/plugins/template_extractor.c b/src/plugins/template_extractor.c
index 63f0393..b6f3371 100644
--- a/src/plugins/template_extractor.c
+++ b/src/plugins/template_extractor.c
@@ -21,21 +21,113 @@
21#include "platform.h" 21#include "platform.h"
22#include "extractor.h" 22#include "extractor.h"
23 23
24int 24#include "extractor_plugins.h"
25EXTRACTOR_template_extract (const unsigned char *data, 25
26 size_t size, 26struct template_state
27 EXTRACTOR_MetaDataProcessor proc, 27{
28 void *proc_cls, 28 int state;
29 const char *options) 29
30 /* more state fields here
31 * all variables that should survive more than one atomic read
32 * from the "file" are to be placed here.
33 */
34};
35
36enum TemplateState
37{
38 TEMPLATE_INVALID = -1,
39 TEMPLATE_LOOKING_FOR_FOO = 0,
40 TEMPLATE_READING_FOO,
41 TEMPLATE_READING_BAR,
42 TEMPLATE_SEEKING_TO_ZOOL
43};
44
45void
46EXTRACTOR_template_init_state_method (struct EXTRACTOR_PluginList *plugin)
30{ 47{
31 if (0 != proc (proc_cls, 48 struct template_state *state;
32 "template", 49 state = plugin->state = malloc (sizeof (struct template_state));
33 EXTRACTOR_METATYPE_RESERVED, 50 if (state == NULL)
34 EXTRACTOR_METAFORMAT_UTF8, 51 return;
35 "text/plain", 52 state->state = TEMPLATE_LOOKING_FOR_FOO; /* or whatever is the initial one */
36 "foo", 53 /* initialize other fields to their "uninitialized" values or defaults */
37 strlen ("foo")+1)) 54}
55
56void
57EXTRACTOR_template_discard_state_method (struct EXTRACTOR_PluginList *plugin)
58{
59 if (plugin->state != NULL)
60 {
61 /* free other state fields that are heap-allocated */
62 free (plugin->state);
63 }
64 plugin->state = NULL;
65}
66
67int
68EXTRACTOR_template_extract_method (struct EXTRACTOR_PluginList *plugin,
69 EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
70{
71 int64_t file_position;
72 int64_t file_size;
73 size_t offset = 0;
74 size_t size;
75 unsigned char *data;
76 unsigned char *ff;
77 struct mp3_state *state;
78
79 /* temporary variables are declared here */
80
81 if (plugin == NULL || plugin->state == NULL)
38 return 1; 82 return 1;
39 /* insert more here */ 83
40 return 0; 84 /* for easier access (and conforms better with the old plugins var names) */
85 state = plugin->state;
86 file_position = plugin->position;
87 file_size = plugin->fsize;
88 size = plugin->map_size;
89 data = plugin->shm_ptr;
90
91 /* sanity checks */
92 if (plugin->seek_request < 0)
93 return 1;
94 if (file_position - plugin->seek_request > 0)
95 {
96 plugin->seek_request = -1;
97 return 1;
98 }
99 if (plugin->seek_request - file_position < size)
100 offset = plugin->seek_request - file_position;
101
102 while (1)
103 {
104 switch (state->state)
105 {
106 case TEMPLATE_INVALID:
107 plugin->seek_request = -1;
108 return 1;
109 case TEMPLATE_LOOKING_FOR_FOO:
110 /* Find FOO in data buffer.
111 * If found, set offset to its position and set state to TEMPLATE_READING_FOO
112 * If not found, set seek_request to file_position + offset and return 1
113 * (but it's better to give up as early as possible, to avoid reading the whole
114 * file byte-by-byte).
115 */
116 break;
117 case TEMPLATE_READING_FOO:
118 /* See if offset + sizeof(foo) < size, otherwise set seek_request to offset and return 1;
119 * If file_position is 0, and size is still to small, give up.
120 * Read FOO, maybe increase offset to reflect that (depends on the parser logic).
121 * Either process FOO right here, or jump to another state (see ebml plugin for an example of complex
122 * state-jumps).
123 * If FOO says you need to seek somewhere - set offset to seek_target - file_position and set the
124 * next state (next state will check that offset < size; all states that do reading should do that,
125 * and also check for EOF).
126 */
127 /* ... */
128 break;
129 }
130 }
131 /* Should not reach this */
132 return 1;
41} 133}