aboutsummaryrefslogtreecommitdiff
path: root/src/main/extractor.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/main/extractor.c')
-rw-r--r--src/main/extractor.c175
1 files changed, 172 insertions, 3 deletions
diff --git a/src/main/extractor.c b/src/main/extractor.c
index 9d25cff..a409fbe 100644
--- a/src/main/extractor.c
+++ b/src/main/extractor.c
@@ -194,6 +194,162 @@ process_plugin_reply (void *cls,
194 194
195 195
196/** 196/**
197 * Closure for the in-process callbacks.
198 */
199struct InProcessContext
200{
201 /**
202 * Current plugin.
203 */
204 struct EXTRACTOR_PluginList *plugin;
205
206 /**
207 * Data source to use.
208 */
209 struct EXTRACTOR_Datasource *ds;
210
211 /**
212 * Function to call with meta data.
213 */
214 EXTRACTOR_MetaDataProcessor proc;
215
216 /**
217 * Closure for 'proc'.
218 */
219 void *proc_cls;
220
221 /**
222 * IO buffer.
223 */
224 char buf[DEFAULT_SHM_SIZE];
225
226 /**
227 * 0 to continue extracting, 1 if we are finished
228 */
229 int finished;
230};
231
232
233/**
234 * Obtain a pointer to up to 'size' bytes of data from the file to process.
235 * Callback used for in-process plugins.
236 *
237 * @param cls a 'struct InProcessContext'
238 * @param data pointer to set to the file data, set to NULL on error
239 * @param size maximum number of bytes requested
240 * @return number of bytes now available in data (can be smaller than 'size'),
241 * -1 on error
242 *
243 */
244static ssize_t
245in_process_read (void *cls,
246 void **data,
247 size_t size)
248{
249 struct InProcessContext *ctx = cls;
250 ssize_t ret;
251 size_t bsize;
252
253 bsize = sizeof (ctx->buf);
254 if (size < bsize)
255 bsize = size;
256 ret = EXTRACTOR_datasource_read_ (ctx->ds,
257 ctx->buf,
258 bsize);
259 if (-1 == ret)
260 *data = NULL;
261 else
262 *data = ctx->buf;
263 return ret;
264}
265
266
267/**
268 * Seek in the file. Use 'SEEK_CUR' for whence and 'pos' of 0 to
269 * obtain the current position in the file.
270 * Callback used for in-process plugins.
271 *
272 * @param cls a 'struct InProcessContext'
273 * @param pos position to seek (see 'man lseek')
274 * @param whence how to see (absolute to start, relative, absolute to end)
275 * @return new absolute position, -1 on error (i.e. desired position
276 * does not exist)
277 */
278static int64_t
279in_process_seek (void *cls,
280 int64_t pos,
281 int whence)
282{
283 struct InProcessContext *ctx = cls;
284
285 return EXTRACTOR_datasource_seek_ (ctx->ds,
286 pos,
287 whence);
288}
289
290
291/**
292 * Determine the overall size of the file.
293 * Callback used for in-process plugins.
294 *
295 * @param cls a 'struct InProcessContext'
296 * @return overall file size, UINT64_MAX on error (i.e. IPC failure)
297 */
298static uint64_t
299in_process_get_size (void *cls)
300{
301 struct InProcessContext *ctx = cls;
302
303 return (uint64_t) EXTRACTOR_datasource_get_size_ (ctx->ds);
304}
305
306
307/**
308 * Type of a function that libextractor calls for each
309 * meta data item found.
310 * Callback used for in-process plugins.
311 *
312 * @param cls a 'struct InProcessContext'
313 * @param plugin_name name of the plugin that produced this value;
314 * special values can be used (i.e. '&lt;zlib&gt;' for zlib being
315 * used in the main libextractor library and yielding
316 * meta data).
317 * @param type libextractor-type describing the meta data
318 * @param format basic format information about data
319 * @param data_mime_type mime-type of data (not of the original file);
320 * can be NULL (if mime-type is not known)
321 * @param data actual meta-data found
322 * @param data_len number of bytes in data
323 * @return 0 to continue extracting, 1 to abort
324 */
325static int
326in_process_proc (void *cls,
327 const char *plugin_name,
328 enum EXTRACTOR_MetaType type,
329 enum EXTRACTOR_MetaFormat format,
330 const char *data_mime_type,
331 const char *data,
332 size_t data_len)
333{
334 struct InProcessContext *ctx = cls;
335 int ret;
336
337 if (0 != ctx->finished)
338 return 1;
339 ret = ctx->proc (ctx->proc_cls,
340 plugin_name,
341 type,
342 format,
343 data_mime_type,
344 data,
345 data_len);
346 if (0 != ret)
347 ctx->finished = 1;
348 return ret;
349}
350
351
352/**
197 * Extract keywords using the given set of plugins. 353 * Extract keywords using the given set of plugins.
198 * 354 *
199 * @param plugins the list of plugins to use 355 * @param plugins the list of plugins to use
@@ -214,6 +370,8 @@ do_extract (struct EXTRACTOR_PluginList *plugins,
214 struct StartMessage start; 370 struct StartMessage start;
215 struct EXTRACTOR_Channel *channel; 371 struct EXTRACTOR_Channel *channel;
216 struct PluginReplyProcessor prp; 372 struct PluginReplyProcessor prp;
373 struct InProcessContext ctx;
374 struct EXTRACTOR_ExtractContext ec;
217 int64_t min_seek; 375 int64_t min_seek;
218 ssize_t data_available; 376 ssize_t data_available;
219 uint64_t last_position; 377 uint64_t last_position;
@@ -350,13 +508,24 @@ do_extract (struct EXTRACTOR_PluginList *plugins,
350 } 508 }
351 509
352 /* run in-process plugins */ 510 /* run in-process plugins */
511 ctx.finished = 0;
512 ctx.ds = ds;
513 ctx.proc = proc;
514 ctx.proc_cls = proc_cls;
515 ec.cls = &ctx;
516 ec.read = &in_process_read;
517 ec.seek = &in_process_seek;
518 ec.get_size = &in_process_get_size;
519 ec.proc = &in_process_proc;
353 for (pos = plugins; NULL != pos; pos = pos->next) 520 for (pos = plugins; NULL != pos; pos = pos->next)
354 { 521 {
355 if (EXTRACTOR_OPTION_IN_PROCESS != pos->flags) 522 if (EXTRACTOR_OPTION_IN_PROCESS != pos->flags)
356 continue; 523 continue;
357 LOG ("In-process plugins not implemented\n"); 524 ctx.plugin = pos;
358 // FIXME: initialize read/seek context... 525 ec.config = pos->plugin_options;
359 // pos->extract_method (FIXME); 526 pos->extract_method (&ec);
527 if (1 == ctx.finished)
528 break;
360 } 529 }
361} 530}
362 531