diff options
Diffstat (limited to 'doc/extractor.texi')
-rw-r--r-- | doc/extractor.texi | 194 |
1 files changed, 189 insertions, 5 deletions
diff --git a/doc/extractor.texi b/doc/extractor.texi index e6b4c13..588538e 100644 --- a/doc/extractor.texi +++ b/doc/extractor.texi | |||
@@ -171,10 +171,15 @@ always been licensed under GPLv2 @emph{or any later version}.} | |||
171 | @node Preparation | 171 | @node Preparation |
172 | @chapter Preparation | 172 | @chapter Preparation |
173 | 173 | ||
174 | Compiling @gnule{} follows the standard GNU autotools | 174 | This chapter first describes the general build instructions that |
175 | build process using @command{configure} and @command{make}. For | 175 | should apply to all systems. Specific instructions for known problems |
176 | details, read the @file{INSTALL} file and query | 176 | for particular platforms are then described in individual sections |
177 | @verb{|./configure --help|} for additional options. | 177 | afterwards. |
178 | |||
179 | Compiling @gnule{} follows the standard GNU autotools build process | ||
180 | using @command{configure} and @command{make}. For details on the GNU | ||
181 | autotools build process, read the @file{INSTALL} file and query | ||
182 | @verb{|./configure --help|} for additional options. | ||
178 | 183 | ||
179 | @gnule{} has various dependencies, some of which are optional. | 184 | @gnule{} has various dependencies, some of which are optional. |
180 | Instead of specifying the names of the software packages, we | 185 | Instead of specifying the names of the software packages, we |
@@ -271,6 +276,135 @@ environment variable @verb{|LIBEXTRACTOR_PREFIX|}. If | |||
271 | @gnule{} cannot locate a plugin, it will look in | 276 | @gnule{} cannot locate a plugin, it will look in |
272 | @verb{|LIBEXTRACTOR_PREFIX/lib/libextractor/|}. | 277 | @verb{|LIBEXTRACTOR_PREFIX/lib/libextractor/|}. |
273 | 278 | ||
279 | |||
280 | @section Installation on GNU/Linux | ||
281 | |||
282 | Should work using the standard instructions without problems. | ||
283 | |||
284 | |||
285 | @section Installation on FreeBSD | ||
286 | |||
287 | Should work using the standard instructions without problems. | ||
288 | |||
289 | |||
290 | @section Installation on OpenBSD | ||
291 | |||
292 | OpenBSD 3.8 also doesn't have CODESET in @file{langinfo.h}. CODESET | ||
293 | is used in @gnule{} in about three places. This causes problems | ||
294 | during compilation. | ||
295 | |||
296 | |||
297 | @section Installation on NetBSD | ||
298 | |||
299 | No reports so far. | ||
300 | |||
301 | |||
302 | @section Installation using MinGW | ||
303 | |||
304 | Linking -lstdc++ with the provided libtool fails on Cygwin, this | ||
305 | is a problem with libtool, there is unfortunately no flag to tell | ||
306 | libtool how to do its job on Cygwin and it seems that it cannot be the | ||
307 | default to set the library check to 'pass_all'. Patching libtool may | ||
308 | help. | ||
309 | |||
310 | Note: this is a rather dated report and may no longer apply. | ||
311 | |||
312 | |||
313 | @section Installation on OS X | ||
314 | |||
315 | libextractor has two installation methods on Mac OS X: it can be | ||
316 | installed as a Mac OS X framework or with the standard | ||
317 | @command{./configure; make; make install} shell commands. The | ||
318 | framework package is self-contained, but currently omits some of the | ||
319 | extractor plugins that can be compiled in if libextractor is installed | ||
320 | with @command{./configure; make; make install} (provided that the | ||
321 | required dependencies exist.) | ||
322 | |||
323 | @subsection Installing and uninstalling the framework | ||
324 | |||
325 | The binary framework is distributed as a disk image (@file{Extractor-x.x.xx.dmg}). | ||
326 | Installation is done by opening the disk image and clicking @file{Extractor.pkg} | ||
327 | inside it. The Mac OS X installer application will then run. The framework | ||
328 | is installed to the root volume's @file{/Library/Frameworks} folder and installing | ||
329 | will require admin privileges. | ||
330 | |||
331 | The framework can be uninstalled by dragging | ||
332 | @file{/Library/Frameworks/Extractor.framework} cto the @file{Trash}. | ||
333 | |||
334 | |||
335 | @subsection Using the framework | ||
336 | |||
337 | In the framework, the @command{extract} command line tool can be found at | ||
338 | @file{/Library/Frameworks/Extractor.framework/Versions/Current/bin/extract} | ||
339 | |||
340 | The framework can be used in software projects as a framework or as a dynamic | ||
341 | library. | ||
342 | |||
343 | When using the framework as a dynamic library in projects using autotools, | ||
344 | one would most likely want to add | ||
345 | "-I/Library/Frameworks/Extractor.framework/Versions/Current/include" | ||
346 | to CPPFLAGS and | ||
347 | "-L/Library/Frameworks/Extractor.framework/Versions/Current/lib" | ||
348 | to LDFLAGS. | ||
349 | |||
350 | |||
351 | @subsection Example for using the framework | ||
352 | |||
353 | @example | ||
354 | @verbatim | ||
355 | // hello.c | ||
356 | #include <Extractor/extractor.h> | ||
357 | |||
358 | int main() | ||
359 | { | ||
360 | struct EXTRACTOR_PluginList *el; | ||
361 | el = EXTRACTOR_plugin_load_defaults (EXTRACTOR_OPTION_DEFAULT_POLICY); | ||
362 | // ... | ||
363 | EXTRACTOR_plugin_remove_all (el); | ||
364 | return 0; | ||
365 | } | ||
366 | @end verbatim | ||
367 | @end example | ||
368 | |||
369 | You can then compile the example using | ||
370 | |||
371 | @verbatim | ||
372 | $ gcc -o hello hello.c -framework Extractor | ||
373 | @end verbatim | ||
374 | |||
375 | @subsection Example for using the dynamic library | ||
376 | |||
377 | @example | ||
378 | @verbatim | ||
379 | // hello.c | ||
380 | #include <extractor.h> | ||
381 | int main() | ||
382 | { | ||
383 | struct EXTRACTOR_PluginList *el; | ||
384 | el = EXTRACTOR_plugin_load_defaults (EXTRACTOR_OPTION_DEFAULT_POLICY); | ||
385 | // ... | ||
386 | EXTRACTOR_plugin_remove_all (el); | ||
387 | return 0; | ||
388 | } | ||
389 | @end verbatim | ||
390 | @end example | ||
391 | |||
392 | You can then compile the example using | ||
393 | |||
394 | @verbatim | ||
395 | $ gcc -I/Library/Frameworks/Extractor.framework/Versions/Current/include \ | ||
396 | -o hello hello.c \ | ||
397 | -L/Library/Frameworks/Extractor.framework/Versions/Current/lib \ | ||
398 | -lextractor | ||
399 | @end verbatim | ||
400 | |||
401 | Notice the difference in the @code{#include} line. | ||
402 | |||
403 | |||
404 | |||
405 | |||
406 | |||
407 | |||
274 | @section Note to package maintainers | 408 | @section Note to package maintainers |
275 | 409 | ||
276 | The suggested way to package GNU libextractor is to split it into | 410 | The suggested way to package GNU libextractor is to split it into |
@@ -304,6 +438,52 @@ resources. | |||
304 | @node Generalities | 438 | @node Generalities |
305 | @chapter Generalities | 439 | @chapter Generalities |
306 | 440 | ||
441 | @section Introduction to the ``extract'' command | ||
442 | |||
443 | The @command{extract} command takes a list of file names as arguments, | ||
444 | extracts meta data from each of those files and prints the result to | ||
445 | the console. By default, @command{extract} will use all available | ||
446 | plugins and print all (non-binary) meta data that is found. | ||
447 | |||
448 | The set of plugins used by @command{extract} can be controlled using | ||
449 | the ``-l'' and ``-n'' options. Use ``-n'' to not load all of the | ||
450 | default plugins. Use ``-l NAME'' to specifically load a certain | ||
451 | plugin. For example, specify ``-n -l mime'' to only use the MIME | ||
452 | plugin. | ||
453 | |||
454 | Using the ``-p'' option the output of @command{extract} can be limited | ||
455 | to only certain keyword types. Similarly, using the ``-x'' option, | ||
456 | certain keyword types can be excluded. A list of all known keyword | ||
457 | types can be obtained using the ``-L'' option. | ||
458 | |||
459 | The output format of @command{extract} can be influenced with the | ||
460 | ``-V'' (more verbose, lists filenames), ``-g'' (grep-friendly, all | ||
461 | meta data on a single line per file) and ``-b'' (bibTeX style) | ||
462 | options. | ||
463 | |||
464 | @section Common usage examples for ``extract'' | ||
465 | |||
466 | @example | ||
467 | $ extract test/test.jpg | ||
468 | comment - (C) 2001 by Christian Grothoff, using gimp 1.2 1 | ||
469 | mimetype - image/jpeg | ||
470 | |||
471 | $ extract -V -x comment test/test.jpg | ||
472 | Keywords for file test/test.jpg: | ||
473 | mimetype - image/jpeg | ||
474 | |||
475 | $ extract -p comment test/test.jpg | ||
476 | comment - (C) 2001 by Christian Grothoff, using gimp 1.2 1 | ||
477 | |||
478 | $ extract -nV -l png.so -p comment test/test.jpg test/test.png | ||
479 | Keywords for file test/test.jpg: | ||
480 | Keywords for file test/test.png: | ||
481 | comment - Testing keyword extraction | ||
482 | @end example | ||
483 | |||
484 | |||
485 | @section Introduction to the libextractor library | ||
486 | |||
307 | Each public symbol exported by @gnule{} has the prefix | 487 | Each public symbol exported by @gnule{} has the prefix |
308 | @verb{|EXTRACTOR_|}. All-caps names are used for constants. For the | 488 | @verb{|EXTRACTOR_|}. All-caps names are used for constants. For the |
309 | impatient, the minimal C code for using @gnule{} (on the | 489 | impatient, the minimal C code for using @gnule{} (on the |
@@ -322,6 +502,11 @@ int main(int argc, char ** argv) { | |||
322 | } | 502 | } |
323 | @end verbatim | 503 | @end verbatim |
324 | 504 | ||
505 | The minimal API illustrated by this example is actually sufficient for | ||
506 | many applications. The full external C API of @gnule{} is described | ||
507 | in chapter @xref{Extracting meta data}. Bindings for other languages | ||
508 | are described in chapter @xref{Language bindings}. The API for | ||
509 | writing new plugins is described in chapter @xref{Writing new Plugins}. | ||
325 | 510 | ||
326 | @node Extracting meta data | 511 | @node Extracting meta data |
327 | @chapter Extracting meta data | 512 | @chapter Extracting meta data |
@@ -508,7 +693,6 @@ Meta data extraction should never really fail --- at worst, @gnule{} should not | |||
508 | 693 | ||
509 | @node Language bindings | 694 | @node Language bindings |
510 | @chapter Language bindings | 695 | @chapter Language bindings |
511 | |||
512 | @cindex Java | 696 | @cindex Java |
513 | @cindex Mono | 697 | @cindex Mono |
514 | @cindex Perl | 698 | @cindex Perl |