libextractor

GNU libextractor
Log | Files | Refs | Submodules | README | LICENSE

extractor.h (22752B)


      1 /*
      2      This file is part of libextractor.
      3      Copyright (C) 2002-2017 Vidyut Samanta and Christian Grothoff
      4 
      5      libextractor is free software; you can redistribute it and/or modify
      6      it under the terms of the GNU General Public License as published
      7      by the Free Software Foundation; either version 3, or (at your
      8      option) any later version.
      9 
     10      libextractor is distributed in the hope that it will be useful, but
     11      WITHOUT ANY WARRANTY; without even the implied warranty of
     12      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     13      General Public License for more details.
     14 
     15      You should have received a copy of the GNU General Public License
     16      along with libextractor; see the file COPYING.  If not, write to the
     17      Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     18      Boston, MA 02110-1301, USA.
     19  */
     20 
     21 #ifndef EXTRACTOR_H
     22 #define EXTRACTOR_H
     23 
     24 #ifdef __cplusplus
     25 extern "C" {
     26 #if 0 /* keep Emacsens' auto-indent happy */
     27 }
     28 #endif
     29 #endif
     30 
     31 
     32 #include <stdint.h>
     33 
     34 /**
     35  * 0.2.6-1 => 0x00020601
     36  * 4.5.2-0 => 0x04050200
     37  */
     38 #define EXTRACTOR_VERSION 0x010B0000
     39 
     40 #include <stdio.h>
     41 
     42 #ifndef _EXTRACTOR_EXTERN
     43 #if defined(_WIN32) && defined(MHD_W32LIB)
     44 #define _EXTRACTOR_EXTERN extern
     45 #elif defined (_WIN32) && defined(MHD_W32DLL)
     46 /* Define MHD_W32DLL when using MHD as W32 .DLL to speed up linker a little */
     47 #define _EXTRACTOR_EXTERN __declspec(dllimport)
     48 #else
     49 #define _EXTRACTOR_EXTERN extern
     50 #endif
     51 #endif
     52 
     53 /**
     54  * Options for how plugin execution should be done.
     55  */
     56 enum EXTRACTOR_Options
     57 {
     58 
     59   /**
     60    * Run plugin out-of-process, starting the process once the plugin
     61    * is to be run.  If a plugin crashes, automatically restart the
     62    * respective process for the same file and try once more
     63    * (since the crash may be caused by the previous file).  If
     64    * the process crashes immediately again, it is not restarted
     65    * until the next file.
     66    */
     67   EXTRACTOR_OPTION_DEFAULT_POLICY = 0,
     68 
     69   /**
     70    * Deprecated option.  Ignored.
     71    */
     72   EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART = 1,
     73 
     74   /**
     75    * Run plugins in-process.  Unsafe, not recommended,
     76    * can be nice for debugging.
     77    */
     78   EXTRACTOR_OPTION_IN_PROCESS = 2,
     79 
     80   /**
     81    * Internal value for plugins that have been disabled.
     82    */
     83   EXTRACTOR_OPTION_DISABLED = 3
     84 
     85 };
     86 
     87 
     88 /**
     89  * Format in which the extracted meta data is presented.
     90  */
     91 enum EXTRACTOR_MetaFormat
     92 {
     93   /**
     94    * Format is unknown.
     95    */
     96   EXTRACTOR_METAFORMAT_UNKNOWN = 0,
     97 
     98   /**
     99    * 0-terminated, UTF-8 encoded string.  "data_len"
    100    * is strlen(data)+1.
    101    */
    102   EXTRACTOR_METAFORMAT_UTF8 = 1,
    103 
    104   /**
    105    * Some kind of binary format, see given Mime type.
    106    */
    107   EXTRACTOR_METAFORMAT_BINARY = 2,
    108 
    109   /**
    110    * 0-terminated string.  The specific encoding is unknown.
    111    * "data_len" is strlen (data)+1.
    112    */
    113   EXTRACTOR_METAFORMAT_C_STRING = 3
    114 
    115 };
    116 
    117 
    118 /**
    119  * Enumeration defining various sources of keywords.  See also
    120  * http://dublincore.org/documents/1998/09/dces/
    121  *
    122  * @defgroup types meta data types
    123  * @{
    124  */
    125 enum EXTRACTOR_MetaType
    126 {
    127   /* available to application for marking an `enum EXTRACTOR_MetaType` as not
    128      carrying any meaningful value - never used by libextractor */
    129   EXTRACTOR_METATYPE_NONE = -1,
    130 
    131   /* reserved should be used as a terminator (like the '\0'-terminator for strings)
    132      and is never used directly by libextractor */
    133   EXTRACTOR_METATYPE_RESERVED = 0,
    134 
    135   EXTRACTOR_METATYPE_MIMETYPE = 1,
    136   EXTRACTOR_METATYPE_FILENAME = 2,
    137   EXTRACTOR_METATYPE_COMMENT = 3,
    138 
    139   /* Standard types from bibtex */
    140   EXTRACTOR_METATYPE_TITLE = 4,
    141   EXTRACTOR_METATYPE_BOOK_TITLE = 5,
    142   EXTRACTOR_METATYPE_BOOK_EDITION = 6,
    143   EXTRACTOR_METATYPE_BOOK_CHAPTER_NUMBER = 7,
    144   EXTRACTOR_METATYPE_JOURNAL_NAME = 8,
    145   EXTRACTOR_METATYPE_JOURNAL_VOLUME = 9,
    146   EXTRACTOR_METATYPE_JOURNAL_NUMBER = 10,
    147   EXTRACTOR_METATYPE_PAGE_COUNT = 11,
    148   EXTRACTOR_METATYPE_PAGE_RANGE = 12,
    149   EXTRACTOR_METATYPE_AUTHOR_NAME = 13,
    150   EXTRACTOR_METATYPE_AUTHOR_EMAIL = 14,
    151   EXTRACTOR_METATYPE_AUTHOR_INSTITUTION = 15,
    152   EXTRACTOR_METATYPE_PUBLISHER = 16,
    153   EXTRACTOR_METATYPE_PUBLISHER_ADDRESS = 17,
    154   EXTRACTOR_METATYPE_PUBLISHER_INSTITUTION = 18,
    155   EXTRACTOR_METATYPE_PUBLISHER_SERIES = 19,
    156   EXTRACTOR_METATYPE_PUBLICATION_TYPE = 20,
    157   EXTRACTOR_METATYPE_PUBLICATION_YEAR = 21,
    158   EXTRACTOR_METATYPE_PUBLICATION_MONTH = 22,
    159   EXTRACTOR_METATYPE_PUBLICATION_DAY = 23,
    160   EXTRACTOR_METATYPE_PUBLICATION_DATE = 24,
    161   EXTRACTOR_METATYPE_BIBTEX_EPRINT = 25,
    162   EXTRACTOR_METATYPE_BIBTEX_ENTRY_TYPE = 26,
    163   EXTRACTOR_METATYPE_LANGUAGE = 27,
    164   EXTRACTOR_METATYPE_CREATION_TIME = 28,
    165   EXTRACTOR_METATYPE_URL = 29,
    166 
    167   /* "unique" document identifiers */
    168   EXTRACTOR_METATYPE_URI = 30,
    169   EXTRACTOR_METATYPE_ISRC = 31,
    170   EXTRACTOR_METATYPE_HASH_MD4 = 32,
    171   EXTRACTOR_METATYPE_HASH_MD5 = 33,
    172   EXTRACTOR_METATYPE_HASH_SHA0 = 34,
    173   EXTRACTOR_METATYPE_HASH_SHA1 = 35,
    174   EXTRACTOR_METATYPE_HASH_RMD160 = 36,
    175 
    176   /* identifiers of a location */
    177   EXTRACTOR_METATYPE_GPS_LATITUDE_REF = 37,
    178   EXTRACTOR_METATYPE_GPS_LATITUDE = 38,
    179   EXTRACTOR_METATYPE_GPS_LONGITUDE_REF = 39,
    180   EXTRACTOR_METATYPE_GPS_LONGITUDE = 40,
    181   EXTRACTOR_METATYPE_LOCATION_CITY = 41,
    182   EXTRACTOR_METATYPE_LOCATION_SUBLOCATION = 42,
    183   EXTRACTOR_METATYPE_LOCATION_COUNTRY = 43,
    184   EXTRACTOR_METATYPE_LOCATION_COUNTRY_CODE = 44,
    185 
    186   /* generic attributes */
    187   EXTRACTOR_METATYPE_UNKNOWN = 45,
    188   EXTRACTOR_METATYPE_DESCRIPTION = 46,
    189   EXTRACTOR_METATYPE_COPYRIGHT = 47,
    190   EXTRACTOR_METATYPE_RIGHTS = 48,
    191   EXTRACTOR_METATYPE_KEYWORDS = 49,
    192   EXTRACTOR_METATYPE_ABSTRACT = 50,
    193   EXTRACTOR_METATYPE_SUMMARY = 51,
    194   EXTRACTOR_METATYPE_SUBJECT = 52,
    195   EXTRACTOR_METATYPE_CREATOR = 53,
    196   EXTRACTOR_METATYPE_FORMAT = 54,
    197   EXTRACTOR_METATYPE_FORMAT_VERSION = 55,
    198 
    199   /* processing history */
    200   EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE = 56,
    201   EXTRACTOR_METATYPE_UNKNOWN_DATE = 57,
    202   EXTRACTOR_METATYPE_CREATION_DATE = 58,
    203   EXTRACTOR_METATYPE_MODIFICATION_DATE = 59,
    204   EXTRACTOR_METATYPE_LAST_PRINTED = 60,
    205   EXTRACTOR_METATYPE_LAST_SAVED_BY = 61,
    206   EXTRACTOR_METATYPE_TOTAL_EDITING_TIME = 62,
    207   EXTRACTOR_METATYPE_EDITING_CYCLES = 63,
    208   EXTRACTOR_METATYPE_MODIFIED_BY_SOFTWARE = 64,
    209   EXTRACTOR_METATYPE_REVISION_HISTORY = 65,
    210 
    211   EXTRACTOR_METATYPE_EMBEDDED_FILE_SIZE = 66,
    212   EXTRACTOR_METATYPE_FINDER_FILE_TYPE = 67,
    213   EXTRACTOR_METATYPE_FINDER_FILE_CREATOR = 68,
    214 
    215   /* software package specifics (deb, rpm, tgz, elf) */
    216   EXTRACTOR_METATYPE_PACKAGE_NAME = 69,
    217   EXTRACTOR_METATYPE_PACKAGE_VERSION = 70,
    218   EXTRACTOR_METATYPE_SECTION = 71,
    219   EXTRACTOR_METATYPE_UPLOAD_PRIORITY = 72,
    220   EXTRACTOR_METATYPE_PACKAGE_DEPENDENCY = 73,
    221   EXTRACTOR_METATYPE_PACKAGE_CONFLICTS = 74,
    222   EXTRACTOR_METATYPE_PACKAGE_REPLACES = 75,
    223   EXTRACTOR_METATYPE_PACKAGE_PROVIDES = 76,
    224   EXTRACTOR_METATYPE_PACKAGE_RECOMMENDS = 77,
    225   EXTRACTOR_METATYPE_PACKAGE_SUGGESTS = 78,
    226   EXTRACTOR_METATYPE_PACKAGE_MAINTAINER = 79,
    227   EXTRACTOR_METATYPE_PACKAGE_INSTALLED_SIZE = 80,
    228   EXTRACTOR_METATYPE_PACKAGE_SOURCE = 81,
    229   EXTRACTOR_METATYPE_PACKAGE_ESSENTIAL = 82,
    230   EXTRACTOR_METATYPE_TARGET_ARCHITECTURE = 83,
    231   EXTRACTOR_METATYPE_PACKAGE_PRE_DEPENDENCY = 84,
    232   EXTRACTOR_METATYPE_LICENSE = 85,
    233   EXTRACTOR_METATYPE_PACKAGE_DISTRIBUTION = 86,
    234   EXTRACTOR_METATYPE_BUILDHOST = 87,
    235   EXTRACTOR_METATYPE_VENDOR = 88,
    236   EXTRACTOR_METATYPE_TARGET_OS = 89,
    237   EXTRACTOR_METATYPE_SOFTWARE_VERSION = 90,
    238   EXTRACTOR_METATYPE_TARGET_PLATFORM = 91,
    239   EXTRACTOR_METATYPE_RESOURCE_TYPE = 92,
    240   EXTRACTOR_METATYPE_LIBRARY_SEARCH_PATH = 93,
    241   EXTRACTOR_METATYPE_LIBRARY_DEPENDENCY = 94,
    242 
    243   /* photography specifics */
    244   EXTRACTOR_METATYPE_CAMERA_MAKE = 95,
    245   EXTRACTOR_METATYPE_CAMERA_MODEL = 96,
    246   EXTRACTOR_METATYPE_EXPOSURE = 97,
    247   EXTRACTOR_METATYPE_APERTURE = 98,
    248   EXTRACTOR_METATYPE_EXPOSURE_BIAS = 99,
    249   EXTRACTOR_METATYPE_FLASH = 100,
    250   EXTRACTOR_METATYPE_FLASH_BIAS = 101,
    251   EXTRACTOR_METATYPE_FOCAL_LENGTH = 102,
    252   EXTRACTOR_METATYPE_FOCAL_LENGTH_35MM = 103,
    253   EXTRACTOR_METATYPE_ISO_SPEED = 104,
    254   EXTRACTOR_METATYPE_EXPOSURE_MODE = 105,
    255   EXTRACTOR_METATYPE_METERING_MODE = 106,
    256   EXTRACTOR_METATYPE_MACRO_MODE = 107,
    257   EXTRACTOR_METATYPE_IMAGE_QUALITY = 108,
    258   EXTRACTOR_METATYPE_WHITE_BALANCE = 109,
    259   EXTRACTOR_METATYPE_ORIENTATION = 110,
    260   EXTRACTOR_METATYPE_MAGNIFICATION = 111,
    261 
    262   /* image specifics */
    263   EXTRACTOR_METATYPE_IMAGE_DIMENSIONS = 112,
    264   EXTRACTOR_METATYPE_PRODUCED_BY_SOFTWARE = 113,
    265   EXTRACTOR_METATYPE_THUMBNAIL = 114,
    266   EXTRACTOR_METATYPE_IMAGE_RESOLUTION = 115,
    267   EXTRACTOR_METATYPE_SOURCE = 116,
    268 
    269   /* (text) document processing specifics */
    270   EXTRACTOR_METATYPE_CHARACTER_SET = 117,
    271   EXTRACTOR_METATYPE_LINE_COUNT = 118,
    272   EXTRACTOR_METATYPE_PARAGRAPH_COUNT = 119,
    273   EXTRACTOR_METATYPE_WORD_COUNT = 120,
    274   EXTRACTOR_METATYPE_CHARACTER_COUNT = 121,
    275   EXTRACTOR_METATYPE_PAGE_ORIENTATION = 122,
    276   EXTRACTOR_METATYPE_PAPER_SIZE = 123,
    277   EXTRACTOR_METATYPE_TEMPLATE = 124,
    278   EXTRACTOR_METATYPE_COMPANY = 125,
    279   EXTRACTOR_METATYPE_MANAGER = 126,
    280   EXTRACTOR_METATYPE_REVISION_NUMBER = 127,
    281 
    282   /* music / video specifics */
    283   EXTRACTOR_METATYPE_DURATION = 128,
    284   EXTRACTOR_METATYPE_ALBUM = 129,
    285   EXTRACTOR_METATYPE_ARTIST = 130,
    286   EXTRACTOR_METATYPE_GENRE = 131,
    287   EXTRACTOR_METATYPE_TRACK_NUMBER = 132,
    288   EXTRACTOR_METATYPE_DISC_NUMBER = 133,
    289   EXTRACTOR_METATYPE_PERFORMER = 134,
    290   EXTRACTOR_METATYPE_CONTACT_INFORMATION = 135,
    291   EXTRACTOR_METATYPE_SONG_VERSION = 136,
    292   EXTRACTOR_METATYPE_PICTURE = 137,
    293   EXTRACTOR_METATYPE_COVER_PICTURE = 138,
    294   EXTRACTOR_METATYPE_CONTRIBUTOR_PICTURE = 139,
    295   EXTRACTOR_METATYPE_EVENT_PICTURE = 140,
    296   EXTRACTOR_METATYPE_LOGO = 141,
    297   EXTRACTOR_METATYPE_BROADCAST_TELEVISION_SYSTEM = 142,
    298   EXTRACTOR_METATYPE_SOURCE_DEVICE = 143,
    299   EXTRACTOR_METATYPE_DISCLAIMER = 144,
    300   EXTRACTOR_METATYPE_WARNING = 145,
    301   EXTRACTOR_METATYPE_PAGE_ORDER = 146,
    302   EXTRACTOR_METATYPE_WRITER = 147,
    303   EXTRACTOR_METATYPE_PRODUCT_VERSION = 148,
    304   EXTRACTOR_METATYPE_CONTRIBUTOR_NAME = 149,
    305   EXTRACTOR_METATYPE_MOVIE_DIRECTOR = 150,
    306   EXTRACTOR_METATYPE_NETWORK_NAME = 151,
    307   EXTRACTOR_METATYPE_SHOW_NAME = 152,
    308   EXTRACTOR_METATYPE_CHAPTER_NAME = 153,
    309   EXTRACTOR_METATYPE_SONG_COUNT = 154,
    310   EXTRACTOR_METATYPE_STARTING_SONG = 155,
    311   EXTRACTOR_METATYPE_PLAY_COUNTER = 156,
    312   EXTRACTOR_METATYPE_CONDUCTOR = 157,
    313   EXTRACTOR_METATYPE_INTERPRETATION = 158,
    314   EXTRACTOR_METATYPE_COMPOSER = 159,
    315   EXTRACTOR_METATYPE_BEATS_PER_MINUTE = 160,
    316   EXTRACTOR_METATYPE_ENCODED_BY = 161,
    317   EXTRACTOR_METATYPE_ORIGINAL_TITLE = 162,
    318   EXTRACTOR_METATYPE_ORIGINAL_ARTIST = 163,
    319   EXTRACTOR_METATYPE_ORIGINAL_WRITER = 164,
    320   EXTRACTOR_METATYPE_ORIGINAL_RELEASE_YEAR = 165,
    321   EXTRACTOR_METATYPE_ORIGINAL_PERFORMER = 166,
    322   EXTRACTOR_METATYPE_LYRICS = 167,
    323   EXTRACTOR_METATYPE_POPULARITY_METER = 168,
    324   EXTRACTOR_METATYPE_LICENSEE = 169,
    325   EXTRACTOR_METATYPE_MUSICIAN_CREDITS_LIST = 170,
    326   EXTRACTOR_METATYPE_MOOD = 171,
    327   EXTRACTOR_METATYPE_SUBTITLE = 172,
    328 
    329   /* GNUnet specific values (never extracted) */
    330   EXTRACTOR_METATYPE_GNUNET_DISPLAY_TYPE = 173,
    331   EXTRACTOR_METATYPE_GNUNET_FULL_DATA = 174,
    332   EXTRACTOR_METATYPE_RATING = 175,
    333   EXTRACTOR_METATYPE_ORGANIZATION = 176,
    334   EXTRACTOR_METATYPE_RIPPER = 177,
    335   EXTRACTOR_METATYPE_PRODUCER = 178,
    336   EXTRACTOR_METATYPE_GROUP = 179,
    337   EXTRACTOR_METATYPE_GNUNET_ORIGINAL_FILENAME = 180,
    338 
    339   EXTRACTOR_METATYPE_DISC_COUNT = 181,
    340 
    341   EXTRACTOR_METATYPE_CODEC = 182,
    342   EXTRACTOR_METATYPE_VIDEO_CODEC = 183,
    343   EXTRACTOR_METATYPE_AUDIO_CODEC = 184,
    344   EXTRACTOR_METATYPE_SUBTITLE_CODEC = 185,
    345 
    346   EXTRACTOR_METATYPE_CONTAINER_FORMAT = 186,
    347 
    348   EXTRACTOR_METATYPE_BITRATE = 187,
    349   EXTRACTOR_METATYPE_NOMINAL_BITRATE = 188,
    350   EXTRACTOR_METATYPE_MINIMUM_BITRATE = 189,
    351   EXTRACTOR_METATYPE_MAXIMUM_BITRATE = 190,
    352 
    353   EXTRACTOR_METATYPE_SERIAL = 191,
    354 
    355   EXTRACTOR_METATYPE_ENCODER = 192,
    356   EXTRACTOR_METATYPE_ENCODER_VERSION = 193,
    357 
    358   EXTRACTOR_METATYPE_TRACK_GAIN = 194,
    359   EXTRACTOR_METATYPE_TRACK_PEAK = 195,
    360   EXTRACTOR_METATYPE_ALBUM_GAIN = 196,
    361   EXTRACTOR_METATYPE_ALBUM_PEAK = 197,
    362   EXTRACTOR_METATYPE_REFERENCE_LEVEL = 198,
    363 
    364   EXTRACTOR_METATYPE_LOCATION_NAME = 199,
    365   EXTRACTOR_METATYPE_LOCATION_ELEVATION = 200,
    366   EXTRACTOR_METATYPE_LOCATION_HORIZONTAL_ERROR = 201,
    367   EXTRACTOR_METATYPE_LOCATION_MOVEMENT_SPEED = 202,
    368   EXTRACTOR_METATYPE_LOCATION_MOVEMENT_DIRECTION = 203,
    369   EXTRACTOR_METATYPE_LOCATION_CAPTURE_DIRECTION = 204,
    370 
    371   EXTRACTOR_METATYPE_SHOW_EPISODE_NUMBER = 205,
    372   EXTRACTOR_METATYPE_SHOW_SEASON_NUMBER = 206,
    373 
    374   EXTRACTOR_METATYPE_GROUPING = 207,
    375 
    376   EXTRACTOR_METATYPE_DEVICE_MANUFACTURER = 208,
    377   EXTRACTOR_METATYPE_DEVICE_MODEL = 209,
    378 
    379   EXTRACTOR_METATYPE_AUDIO_LANGUAGE = 210,
    380   EXTRACTOR_METATYPE_CHANNELS = 211,
    381   EXTRACTOR_METATYPE_SAMPLE_RATE = 212,
    382   EXTRACTOR_METATYPE_AUDIO_DEPTH = 213,
    383   EXTRACTOR_METATYPE_AUDIO_BITRATE = 214,
    384   EXTRACTOR_METATYPE_MAXIMUM_AUDIO_BITRATE = 215,
    385 
    386   EXTRACTOR_METATYPE_VIDEO_DIMENSIONS = 216,
    387   EXTRACTOR_METATYPE_VIDEO_DEPTH = 217,
    388   EXTRACTOR_METATYPE_FRAME_RATE = 218,
    389   EXTRACTOR_METATYPE_PIXEL_ASPECT_RATIO = 219,
    390   EXTRACTOR_METATYPE_VIDEO_BITRATE = 220,
    391   EXTRACTOR_METATYPE_MAXIMUM_VIDEO_BITRATE = 221,
    392 
    393   EXTRACTOR_METATYPE_SUBTITLE_LANGUAGE = 222,
    394   EXTRACTOR_METATYPE_VIDEO_LANGUAGE = 223,
    395 
    396   EXTRACTOR_METATYPE_TOC = 224,
    397 
    398   EXTRACTOR_METATYPE_VIDEO_DURATION = 225,
    399   EXTRACTOR_METATYPE_AUDIO_DURATION = 226,
    400   EXTRACTOR_METATYPE_SUBTITLE_DURATION = 227,
    401 
    402   EXTRACTOR_METATYPE_AUDIO_PREVIEW = 228,
    403 
    404   EXTRACTOR_METATYPE_NARINFO = 229,
    405   EXTRACTOR_METATYPE_NAR = 230,
    406 
    407   EXTRACTOR_METATYPE_LAST = 231
    408 };
    409 
    410 /** @} */ /* end of meta data types */
    411 
    412 /**
    413  * Get the textual name of the keyword.
    414  *
    415  * @param type meta type to get a UTF-8 string for
    416  * @return NULL if the type is not known, otherwise
    417  *         an English (locale: C) string describing the type;
    418  *         translate using `dgettext ("libextractor", rval)`
    419  * @ingroup types
    420  */
    421 _EXTRACTOR_EXTERN const char *
    422 EXTRACTOR_metatype_to_string (enum EXTRACTOR_MetaType type);
    423 
    424 
    425 /**
    426  * Get a long description for the meta type.
    427  *
    428  * @param type meta type to get a UTF-8 description for
    429  * @return NULL if the type is not known, otherwise
    430  *         an English (locale: C) string describing the type;
    431  *         translate using `dgettext ("libextractor", rval)`
    432  * @ingroup types
    433  */
    434 _EXTRACTOR_EXTERN const char *
    435 EXTRACTOR_metatype_to_description (enum EXTRACTOR_MetaType type);
    436 
    437 
    438 /**
    439  * Return the highest type number, exclusive as in [0,max).
    440  *
    441  * @return highest legal metatype number for this version of libextractor
    442  * @ingroup types
    443  */
    444 _EXTRACTOR_EXTERN enum EXTRACTOR_MetaType
    445 EXTRACTOR_metatype_get_max (void);
    446 
    447 
    448 /**
    449  * Type of a function that libextractor calls for each
    450  * meta data item found.
    451  *
    452  * @param cls closure (user-defined)
    453  * @param plugin_name name of the plugin that produced this value;
    454  *        special values can be used (i.e. '&lt;zlib&gt;' for zlib being
    455  *        used in the main libextractor library and yielding
    456  *        meta data).
    457  * @param type libextractor-type describing the meta data
    458  * @param format basic format information about @a data
    459  * @param data_mime_type mime-type of @a data (not of the original file);
    460  *        can be NULL (if mime-type is not known)
    461  * @param data actual meta-data found
    462  * @param data_len number of bytes in @a data
    463  * @return 0 to continue extracting, 1 to abort
    464  */
    465 typedef int
    466 (*EXTRACTOR_MetaDataProcessor) (void *cls,
    467                                 const char *plugin_name,
    468                                 enum EXTRACTOR_MetaType type,
    469                                 enum EXTRACTOR_MetaFormat format,
    470                                 const char *data_mime_type,
    471                                 const char *data,
    472                                 size_t data_len);
    473 
    474 
    475 /**
    476  * Context provided for plugins that perform meta data extraction.
    477  */
    478 struct EXTRACTOR_ExtractContext
    479 {
    480 
    481   /**
    482    * Closure argument to pass to all callbacks.
    483    */
    484   void *cls;
    485 
    486   /**
    487    * Configuration string for the plugin.
    488    */
    489   const char *config;
    490 
    491   /**
    492    * Obtain a pointer to up to @a size bytes of data from the file to process.
    493    *
    494    * @param cls the @e cls member of this struct
    495    * @param data pointer to set to the file data, set to NULL on error
    496    * @param size maximum number of bytes requested
    497    * @return number of bytes now available in @a data (can be smaller than @a size),
    498    *         -1 on error
    499    */
    500   ssize_t (*read) (void *cls,
    501                    void **data,
    502                    size_t size);
    503 
    504 
    505   /**
    506    * Seek in the file.  Use `SEEK_CUR` for @a whence and @a pos of 0 to
    507    * obtain the current position in the file.
    508    *
    509    * @param cls the @e cls member of this struct
    510    * @param pos position to seek (see 'man lseek')
    511    * @param whence how to see (absolute to start, relative, absolute to end)
    512    * @return new absolute position, -1 on error (i.e. desired position
    513    *         does not exist)
    514    */
    515   int64_t (*seek) (void *cls,
    516                    int64_t pos,
    517                    int whence);
    518 
    519 
    520   /**
    521    * Determine the overall size of the file.
    522    *
    523    * @param cls the @a cls member of this struct
    524    * @return overall file size, `UINT64_MAX` on error (i.e. IPC failure)
    525    */
    526   uint64_t (*get_size) (void *cls);
    527 
    528   /**
    529    * Function to call on extracted data.
    530    */
    531   EXTRACTOR_MetaDataProcessor proc;
    532 
    533 };
    534 
    535 
    536 /**
    537  * Signature of the extract method that each plugin
    538  * must provide.
    539  *
    540  * @param ec extraction context provided to the plugin
    541  */
    542 typedef void
    543 (*EXTRACTOR_extract_method) (struct EXTRACTOR_ExtractContext *ec);
    544 
    545 
    546 /**
    547  * Linked list of extractor plugins.  An application builds this list
    548  * by telling libextractor to load various keyword-extraction
    549  * plugins. Libraries can also be unloaded (removed from this list,
    550  * see #EXTRACTOR_plugin_remove).
    551  */
    552 struct EXTRACTOR_PluginList;
    553 
    554 
    555 /**
    556  * Load the default set of plugins.  The default can be changed
    557  * by setting the LIBEXTRACTOR_LIBRARIES environment variable;
    558  * If it is set to "env", then this function will return
    559  * #EXTRACTOR_plugin_add_config (NULL, env, flags).
    560  *
    561  * If LIBEXTRACTOR_LIBRARIES is not set, the function will attempt
    562  * to locate the installed plugins and load all of them.
    563  * The directory where the code will search for plugins is typically
    564  * automatically determined; it can be specified explicitly using the
    565  * "LIBEXTRACTOR_PREFIX" environment variable.
    566  *
    567  * This environment variable must be set to the precise directory with
    568  * the plugins (i.e. "/usr/lib/libextractor", not "/usr").  Note that
    569  * setting the environment variable will disable all of the methods
    570  * that are typically used to determine the location of plugins.
    571  * Multiple paths can be specified using ':' to separate them.
    572  *
    573  * @param flags options for all of the plugins loaded
    574  * @return the default set of plugins, NULL if no plugins were found
    575  */
    576 _EXTRACTOR_EXTERN struct EXTRACTOR_PluginList *
    577 EXTRACTOR_plugin_add_defaults (enum EXTRACTOR_Options flags);
    578 
    579 
    580 /**
    581  * Add a library for keyword extraction.
    582  *
    583  * @param prev the previous list of libraries, may be NULL
    584  * @param library the name of the library (short handle, i.e. "mime")
    585  * @param options options to give to the library
    586  * @param flags options to use
    587  * @return the new list of libraries, equal to prev iff an error occured
    588  */
    589 _EXTRACTOR_EXTERN struct EXTRACTOR_PluginList *
    590 EXTRACTOR_plugin_add (struct EXTRACTOR_PluginList *prev,
    591                       const char *library,
    592                       const char *options,
    593                       enum EXTRACTOR_Options flags);
    594 
    595 
    596 /**
    597  * Load multiple libraries as specified by the user.
    598  *
    599  * @param config a string given by the user that defines which
    600  *        libraries should be loaded. Has the format
    601  *        "[[-]LIBRARYNAME[(options)][:[-]LIBRARYNAME[(options)]]]*".
    602  *        For example, 'mp3:ogg' loads the
    603  *        mp3 and the ogg plugins. The '-' before the LIBRARYNAME
    604  *        indicates that the library should be removed from
    605  *        the library list.
    606  * @param prev the  previous list of libraries, may be NULL
    607  * @param flags options to use
    608  * @return the new list of libraries, equal to prev iff an error occured
    609  *         or if config was empty (or NULL).
    610  */
    611 _EXTRACTOR_EXTERN struct EXTRACTOR_PluginList *
    612 EXTRACTOR_plugin_add_config (struct EXTRACTOR_PluginList *prev,
    613                              const char *config,
    614                              enum EXTRACTOR_Options flags);
    615 
    616 
    617 /**
    618  * Remove a plugin from a list.
    619  *
    620  * @param prev the current list of plugins
    621  * @param library the name of the plugin to remove (short handle)
    622  * @return the reduced list, unchanged if the plugin was not loaded
    623  */
    624 _EXTRACTOR_EXTERN struct EXTRACTOR_PluginList *
    625 EXTRACTOR_plugin_remove (struct EXTRACTOR_PluginList *prev,
    626                          const char *library);
    627 
    628 
    629 /**
    630  * Remove all plugins from the given list (destroys the list).
    631  *
    632  * @param plugin the list of plugins
    633  */
    634 _EXTRACTOR_EXTERN void
    635 EXTRACTOR_plugin_remove_all (struct EXTRACTOR_PluginList *plugins);
    636 
    637 
    638 /**
    639  * Extract keywords from a file using the given set of plugins.
    640  *
    641  * @param plugins the list of plugins to use
    642  * @param filename the name of the file, can be NULL if @a data is not NULL
    643  * @param data data of the file in memory, can be NULL (in which
    644  *        case libextractor will open file) if filename is not NULL
    645  * @param size number of bytes in @a data, ignored if @a data is NULL
    646  * @param proc function to call for each meta data item found
    647  * @param proc_cls cls argument to @a proc
    648  */
    649 _EXTRACTOR_EXTERN void
    650 EXTRACTOR_extract (struct EXTRACTOR_PluginList *plugins,
    651                    const char *filename,
    652                    const void *data,
    653                    size_t size,
    654                    EXTRACTOR_MetaDataProcessor proc,
    655                    void *proc_cls);
    656 
    657 
    658 /**
    659  * Simple #EXTRACTOR_MetaDataProcessor implementation that simply
    660  * prints the extracted meta data to the given file.  Only prints
    661  * those keywords that are in UTF-8 format.
    662  *
    663  * @param handle the file to write to (`stdout`, `stderr`), must NOT be NULL,
    664  *               must be of type `FILE *`.
    665  * @param plugin_name name of the plugin that produced this value
    666  * @param type libextractor-type describing the meta data
    667  * @param format basic format information about data
    668  * @param data_mime_type mime-type of @a data (not of the original file);
    669  *        can be NULL (if mime-type is not known)
    670  * @param data actual meta-data found
    671  * @param data_len number of bytes in @a data
    672  * @return non-zero if printing failed, otherwise 0.
    673  */
    674 _EXTRACTOR_EXTERN int
    675 EXTRACTOR_meta_data_print (void *handle,
    676                            const char *plugin_name,
    677                            enum EXTRACTOR_MetaType type,
    678                            enum EXTRACTOR_MetaFormat format,
    679                            const char *data_mime_type,
    680                            const char *data,
    681                            size_t data_len);
    682 
    683 
    684 #if 0 /* keep Emacsens' auto-indent happy */
    685 {
    686 #endif
    687 #ifdef __cplusplus
    688 }
    689 #endif
    690 
    691 #endif