diff options
Diffstat (limited to 'src/main/extractor.c')
-rw-r--r-- | src/main/extractor.c | 175 |
1 files changed, 172 insertions, 3 deletions
diff --git a/src/main/extractor.c b/src/main/extractor.c index 9d25cff..a409fbe 100644 --- a/src/main/extractor.c +++ b/src/main/extractor.c | |||
@@ -194,6 +194,162 @@ process_plugin_reply (void *cls, | |||
194 | 194 | ||
195 | 195 | ||
196 | /** | 196 | /** |
197 | * Closure for the in-process callbacks. | ||
198 | */ | ||
199 | struct InProcessContext | ||
200 | { | ||
201 | /** | ||
202 | * Current plugin. | ||
203 | */ | ||
204 | struct EXTRACTOR_PluginList *plugin; | ||
205 | |||
206 | /** | ||
207 | * Data source to use. | ||
208 | */ | ||
209 | struct EXTRACTOR_Datasource *ds; | ||
210 | |||
211 | /** | ||
212 | * Function to call with meta data. | ||
213 | */ | ||
214 | EXTRACTOR_MetaDataProcessor proc; | ||
215 | |||
216 | /** | ||
217 | * Closure for 'proc'. | ||
218 | */ | ||
219 | void *proc_cls; | ||
220 | |||
221 | /** | ||
222 | * IO buffer. | ||
223 | */ | ||
224 | char buf[DEFAULT_SHM_SIZE]; | ||
225 | |||
226 | /** | ||
227 | * 0 to continue extracting, 1 if we are finished | ||
228 | */ | ||
229 | int finished; | ||
230 | }; | ||
231 | |||
232 | |||
233 | /** | ||
234 | * Obtain a pointer to up to 'size' bytes of data from the file to process. | ||
235 | * Callback used for in-process plugins. | ||
236 | * | ||
237 | * @param cls a 'struct InProcessContext' | ||
238 | * @param data pointer to set to the file data, set to NULL on error | ||
239 | * @param size maximum number of bytes requested | ||
240 | * @return number of bytes now available in data (can be smaller than 'size'), | ||
241 | * -1 on error | ||
242 | * | ||
243 | */ | ||
244 | static ssize_t | ||
245 | in_process_read (void *cls, | ||
246 | void **data, | ||
247 | size_t size) | ||
248 | { | ||
249 | struct InProcessContext *ctx = cls; | ||
250 | ssize_t ret; | ||
251 | size_t bsize; | ||
252 | |||
253 | bsize = sizeof (ctx->buf); | ||
254 | if (size < bsize) | ||
255 | bsize = size; | ||
256 | ret = EXTRACTOR_datasource_read_ (ctx->ds, | ||
257 | ctx->buf, | ||
258 | bsize); | ||
259 | if (-1 == ret) | ||
260 | *data = NULL; | ||
261 | else | ||
262 | *data = ctx->buf; | ||
263 | return ret; | ||
264 | } | ||
265 | |||
266 | |||
267 | /** | ||
268 | * Seek in the file. Use 'SEEK_CUR' for whence and 'pos' of 0 to | ||
269 | * obtain the current position in the file. | ||
270 | * Callback used for in-process plugins. | ||
271 | * | ||
272 | * @param cls a 'struct InProcessContext' | ||
273 | * @param pos position to seek (see 'man lseek') | ||
274 | * @param whence how to see (absolute to start, relative, absolute to end) | ||
275 | * @return new absolute position, -1 on error (i.e. desired position | ||
276 | * does not exist) | ||
277 | */ | ||
278 | static int64_t | ||
279 | in_process_seek (void *cls, | ||
280 | int64_t pos, | ||
281 | int whence) | ||
282 | { | ||
283 | struct InProcessContext *ctx = cls; | ||
284 | |||
285 | return EXTRACTOR_datasource_seek_ (ctx->ds, | ||
286 | pos, | ||
287 | whence); | ||
288 | } | ||
289 | |||
290 | |||
291 | /** | ||
292 | * Determine the overall size of the file. | ||
293 | * Callback used for in-process plugins. | ||
294 | * | ||
295 | * @param cls a 'struct InProcessContext' | ||
296 | * @return overall file size, UINT64_MAX on error (i.e. IPC failure) | ||
297 | */ | ||
298 | static uint64_t | ||
299 | in_process_get_size (void *cls) | ||
300 | { | ||
301 | struct InProcessContext *ctx = cls; | ||
302 | |||
303 | return (uint64_t) EXTRACTOR_datasource_get_size_ (ctx->ds); | ||
304 | } | ||
305 | |||
306 | |||
307 | /** | ||
308 | * Type of a function that libextractor calls for each | ||
309 | * meta data item found. | ||
310 | * Callback used for in-process plugins. | ||
311 | * | ||
312 | * @param cls a 'struct InProcessContext' | ||
313 | * @param plugin_name name of the plugin that produced this value; | ||
314 | * special values can be used (i.e. '<zlib>' for zlib being | ||
315 | * used in the main libextractor library and yielding | ||
316 | * meta data). | ||
317 | * @param type libextractor-type describing the meta data | ||
318 | * @param format basic format information about data | ||
319 | * @param data_mime_type mime-type of data (not of the original file); | ||
320 | * can be NULL (if mime-type is not known) | ||
321 | * @param data actual meta-data found | ||
322 | * @param data_len number of bytes in data | ||
323 | * @return 0 to continue extracting, 1 to abort | ||
324 | */ | ||
325 | static int | ||
326 | in_process_proc (void *cls, | ||
327 | const char *plugin_name, | ||
328 | enum EXTRACTOR_MetaType type, | ||
329 | enum EXTRACTOR_MetaFormat format, | ||
330 | const char *data_mime_type, | ||
331 | const char *data, | ||
332 | size_t data_len) | ||
333 | { | ||
334 | struct InProcessContext *ctx = cls; | ||
335 | int ret; | ||
336 | |||
337 | if (0 != ctx->finished) | ||
338 | return 1; | ||
339 | ret = ctx->proc (ctx->proc_cls, | ||
340 | plugin_name, | ||
341 | type, | ||
342 | format, | ||
343 | data_mime_type, | ||
344 | data, | ||
345 | data_len); | ||
346 | if (0 != ret) | ||
347 | ctx->finished = 1; | ||
348 | return ret; | ||
349 | } | ||
350 | |||
351 | |||
352 | /** | ||
197 | * Extract keywords using the given set of plugins. | 353 | * Extract keywords using the given set of plugins. |
198 | * | 354 | * |
199 | * @param plugins the list of plugins to use | 355 | * @param plugins the list of plugins to use |
@@ -214,6 +370,8 @@ do_extract (struct EXTRACTOR_PluginList *plugins, | |||
214 | struct StartMessage start; | 370 | struct StartMessage start; |
215 | struct EXTRACTOR_Channel *channel; | 371 | struct EXTRACTOR_Channel *channel; |
216 | struct PluginReplyProcessor prp; | 372 | struct PluginReplyProcessor prp; |
373 | struct InProcessContext ctx; | ||
374 | struct EXTRACTOR_ExtractContext ec; | ||
217 | int64_t min_seek; | 375 | int64_t min_seek; |
218 | ssize_t data_available; | 376 | ssize_t data_available; |
219 | uint64_t last_position; | 377 | uint64_t last_position; |
@@ -350,13 +508,24 @@ do_extract (struct EXTRACTOR_PluginList *plugins, | |||
350 | } | 508 | } |
351 | 509 | ||
352 | /* run in-process plugins */ | 510 | /* run in-process plugins */ |
511 | ctx.finished = 0; | ||
512 | ctx.ds = ds; | ||
513 | ctx.proc = proc; | ||
514 | ctx.proc_cls = proc_cls; | ||
515 | ec.cls = &ctx; | ||
516 | ec.read = &in_process_read; | ||
517 | ec.seek = &in_process_seek; | ||
518 | ec.get_size = &in_process_get_size; | ||
519 | ec.proc = &in_process_proc; | ||
353 | for (pos = plugins; NULL != pos; pos = pos->next) | 520 | for (pos = plugins; NULL != pos; pos = pos->next) |
354 | { | 521 | { |
355 | if (EXTRACTOR_OPTION_IN_PROCESS != pos->flags) | 522 | if (EXTRACTOR_OPTION_IN_PROCESS != pos->flags) |
356 | continue; | 523 | continue; |
357 | LOG ("In-process plugins not implemented\n"); | 524 | ctx.plugin = pos; |
358 | // FIXME: initialize read/seek context... | 525 | ec.config = pos->plugin_options; |
359 | // pos->extract_method (FIXME); | 526 | pos->extract_method (&ec); |
527 | if (1 == ctx.finished) | ||
528 | break; | ||
360 | } | 529 | } |
361 | } | 530 | } |
362 | 531 | ||