aboutsummaryrefslogtreecommitdiff
path: root/src/include
diff options
context:
space:
mode:
authorChristian Grothoff <christian@grothoff.org>2009-12-13 23:02:19 +0000
committerChristian Grothoff <christian@grothoff.org>2009-12-13 23:02:19 +0000
commit304c2d93317f614b247d0d7d5cfcbea458e3e0d8 (patch)
treebc13f3ec9a127ef429d6f3700fecfd50cf3cabff /src/include
parentbff37ddc5da45ef43c94ea86820b25599a84806b (diff)
downloadlibextractor-304c2d93317f614b247d0d7d5cfcbea458e3e0d8.tar.gz
libextractor-304c2d93317f614b247d0d7d5cfcbea458e3e0d8.zip
new API for GNU libextractor, converted first 3 plugins as well
Diffstat (limited to 'src/include')
-rw-r--r--src/include/Makefile.am1
-rw-r--r--src/include/extractor.h789
-rw-r--r--src/include/winproc.h44
3 files changed, 430 insertions, 404 deletions
diff --git a/src/include/Makefile.am b/src/include/Makefile.am
index 8a5aaa7..daa42c4 100644
--- a/src/include/Makefile.am
+++ b/src/include/Makefile.am
@@ -3,6 +3,5 @@ include_HEADERS = \
3 extractor.h 3 extractor.h
4EXTRA_DIST = \ 4EXTRA_DIST = \
5 plibc.h \ 5 plibc.h \
6 winproc.h \
7 platform.h \ 6 platform.h \
8 gettext.h 7 gettext.h
diff --git a/src/include/extractor.h b/src/include/extractor.h
index 0778530..716750f 100644
--- a/src/include/extractor.h
+++ b/src/include/extractor.h
@@ -1,6 +1,6 @@
1/* 1/*
2 This file is part of libextractor. 2 This file is part of libextractor.
3 (C) 2002, 2003, 2004, 2005, 2006 Vidyut Samanta and Christian Grothoff 3 (C) 2002, 2003, 2004, 2005, 2006, 2009 Vidyut Samanta and Christian Grothoff
4 4
5 libextractor is free software; you can redistribute it and/or modify 5 libextractor is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published 6 it under the terms of the GNU General Public License as published
@@ -32,432 +32,503 @@ extern "C" {
32 * 0.2.6-1 => 0x00020601 32 * 0.2.6-1 => 0x00020601
33 * 4.5.2-0 => 0x04050200 33 * 4.5.2-0 => 0x04050200
34 */ 34 */
35#define EXTRACTOR_VERSION 0x00052301 35#define EXTRACTOR_VERSION 0x00060000
36 36
37#include <stdio.h> 37#include <stdio.h>
38 38
39/* ignore the 'type' of the keyword when eliminating duplicates */
40#define EXTRACTOR_DUPLICATES_TYPELESS 1
41/* remove type 'UNKNOWN' if there is a duplicate keyword of
42 known type, even if usually different types should be
43 preserved */
44#define EXTRACTOR_DUPLICATES_REMOVE_UNKNOWN 2
45
46#define EXTRACTOR_DEFAULT_LIBRARIES EXTRACTOR_getDefaultLibraries()
47
48const char * EXTRACTOR_getDefaultLibraries(void);
49 39
50/** 40/**
51 * Enumeration defining various sources of keywords. 41 * Options for how plugin execution should be done.
52 * See also
53 * http://dublincore.org/documents/1998/09/dces/
54 */ 42 */
55typedef enum { 43enum EXTRACTOR_Options
56 EXTRACTOR_UNKNOWN = 0, 44 {
57 EXTRACTOR_FILENAME = 1, 45 /**
58 EXTRACTOR_MIMETYPE = 2, 46 * Run plugins in-process.
59 EXTRACTOR_TITLE = 3, 47 */
60 EXTRACTOR_AUTHOR = 4, 48 EXTRACTOR_OPTION_NONE = 0,
61 EXTRACTOR_ARTIST = 5, 49
62 EXTRACTOR_DESCRIPTION = 6, 50 /**
63 EXTRACTOR_COMMENT = 7, 51 * Run plugins out-of-process, starting the process
64 EXTRACTOR_DATE = 8, 52 * once at the time the plugin is loaded. This will
65 EXTRACTOR_PUBLISHER = 9, 53 * prevent the main process crashing if a plugin dies.
66 EXTRACTOR_LANGUAGE = 10, 54 * Ignored on platforms where out-of-process starts
67 EXTRACTOR_ALBUM = 11, 55 * are not supported.
68 EXTRACTOR_GENRE = 12, 56 */
69 EXTRACTOR_LOCATION = 13, 57 EXTRACTOR_OPTION_OUT_OF_PROCESS = 1,
70 EXTRACTOR_VERSIONNUMBER = 14, 58
71 EXTRACTOR_ORGANIZATION = 15, 59 /**
72 EXTRACTOR_COPYRIGHT = 16, 60 * If a plugin crashes, automatically restart the respective
73 EXTRACTOR_SUBJECT = 17, 61 * process for the next file. Implies
74 EXTRACTOR_KEYWORDS = 18, 62 * EXTRACTOR_OPTION_OUT_OF_PROCESS.
75 EXTRACTOR_CONTRIBUTOR = 19, 63 */
76 EXTRACTOR_RESOURCE_TYPE = 20, 64 EXTRACTOR_OPTION_AUTO_RESTART = 2
77 EXTRACTOR_FORMAT = 21, 65
78 EXTRACTOR_RESOURCE_IDENTIFIER = 22, 66 };
79 EXTRACTOR_SOURCE = 23,
80 EXTRACTOR_RELATION = 24,
81 EXTRACTOR_COVERAGE = 25,
82 EXTRACTOR_SOFTWARE = 26,
83 EXTRACTOR_DISCLAIMER = 27,
84 EXTRACTOR_WARNING = 28,
85 EXTRACTOR_TRANSLATED = 29,
86 EXTRACTOR_CREATION_DATE = 30,
87 EXTRACTOR_MODIFICATION_DATE = 31,
88 EXTRACTOR_CREATOR = 32,
89 EXTRACTOR_PRODUCER = 33,
90 EXTRACTOR_PAGE_COUNT = 34,
91 EXTRACTOR_PAGE_ORIENTATION = 35,
92 EXTRACTOR_PAPER_SIZE = 36,
93 EXTRACTOR_USED_FONTS = 37,
94 EXTRACTOR_PAGE_ORDER = 38,
95 EXTRACTOR_CREATED_FOR = 39,
96 EXTRACTOR_MAGNIFICATION = 40,
97 EXTRACTOR_RELEASE = 41,
98 EXTRACTOR_GROUP = 42,
99 EXTRACTOR_SIZE = 43,
100 EXTRACTOR_SUMMARY = 44,
101 EXTRACTOR_PACKAGER = 45,
102 EXTRACTOR_VENDOR = 46,
103 EXTRACTOR_LICENSE = 47,
104 EXTRACTOR_DISTRIBUTION = 48,
105 EXTRACTOR_BUILDHOST = 49,
106 EXTRACTOR_OS = 50,
107 EXTRACTOR_DEPENDENCY = 51,
108 EXTRACTOR_HASH_MD4 = 52,
109 EXTRACTOR_HASH_MD5 = 53,
110 EXTRACTOR_HASH_SHA0 = 54,
111 EXTRACTOR_HASH_SHA1 = 55,
112 EXTRACTOR_HASH_RMD160 = 56,
113 EXTRACTOR_RESOLUTION = 57,
114 EXTRACTOR_CATEGORY = 58,
115 EXTRACTOR_BOOKTITLE = 59,
116 EXTRACTOR_PRIORITY = 60,
117 EXTRACTOR_CONFLICTS = 61,
118 EXTRACTOR_REPLACES = 62,
119 EXTRACTOR_PROVIDES = 63,
120 EXTRACTOR_CONDUCTOR = 64,
121 EXTRACTOR_INTERPRET = 65,
122 EXTRACTOR_OWNER = 66,
123 EXTRACTOR_LYRICS = 67,
124 EXTRACTOR_MEDIA_TYPE = 68,
125 EXTRACTOR_CONTACT = 69,
126 EXTRACTOR_THUMBNAIL_DATA = 70,
127 EXTRACTOR_PUBLICATION_DATE = 71,
128 EXTRACTOR_CAMERA_MAKE = 72,
129 EXTRACTOR_CAMERA_MODEL = 73,
130 EXTRACTOR_EXPOSURE = 74,
131 EXTRACTOR_APERTURE = 75,
132 EXTRACTOR_EXPOSURE_BIAS = 76,
133 EXTRACTOR_FLASH = 77,
134 EXTRACTOR_FLASH_BIAS = 78,
135 EXTRACTOR_FOCAL_LENGTH = 79,
136 EXTRACTOR_FOCAL_LENGTH_35MM = 80,
137 EXTRACTOR_ISO_SPEED = 81,
138 EXTRACTOR_EXPOSURE_MODE = 82,
139 EXTRACTOR_METERING_MODE = 83,
140 EXTRACTOR_MACRO_MODE = 84,
141 EXTRACTOR_IMAGE_QUALITY = 85,
142 EXTRACTOR_WHITE_BALANCE = 86,
143 EXTRACTOR_ORIENTATION = 87,
144 EXTRACTOR_TEMPLATE = 88,
145 EXTRACTOR_SPLIT = 89,
146 EXTRACTOR_PRODUCTVERSION = 90,
147 EXTRACTOR_LAST_SAVED_BY = 91,
148 EXTRACTOR_LAST_PRINTED = 92,
149 EXTRACTOR_WORD_COUNT = 93,
150 EXTRACTOR_CHARACTER_COUNT = 94,
151 EXTRACTOR_TOTAL_EDITING_TIME = 95,
152 EXTRACTOR_THUMBNAILS = 96,
153 EXTRACTOR_SECURITY = 97,
154 EXTRACTOR_CREATED_BY_SOFTWARE = 98,
155 EXTRACTOR_MODIFIED_BY_SOFTWARE = 99,
156 EXTRACTOR_REVISION_HISTORY = 100,
157 EXTRACTOR_LOWERCASE = 101,
158 EXTRACTOR_COMPANY = 102,
159 EXTRACTOR_GENERATOR = 103,
160 EXTRACTOR_CHARACTER_SET = 104,
161 EXTRACTOR_LINE_COUNT = 105,
162 EXTRACTOR_PARAGRAPH_COUNT = 106,
163 EXTRACTOR_EDITING_CYCLES = 107,
164 EXTRACTOR_SCALE = 108,
165 EXTRACTOR_MANAGER = 109,
166 EXTRACTOR_MOVIE_DIRECTOR = 110,
167 EXTRACTOR_DURATION = 111,
168 EXTRACTOR_INFORMATION = 112,
169 EXTRACTOR_FULL_NAME = 113,
170 EXTRACTOR_CHAPTER = 114,
171 EXTRACTOR_YEAR = 115,
172 EXTRACTOR_LINK = 116,
173 EXTRACTOR_MUSIC_CD_IDENTIFIER = 117,
174 EXTRACTOR_PLAY_COUNTER = 118,
175 EXTRACTOR_POPULARITY_METER = 119,
176 EXTRACTOR_CONTENT_TYPE = 120,
177 EXTRACTOR_ENCODED_BY = 121,
178 EXTRACTOR_TIME = 122,
179 EXTRACTOR_MUSICIAN_CREDITS_LIST = 123,
180 EXTRACTOR_MOOD = 124,
181 EXTRACTOR_FORMAT_VERSION = 125,
182 EXTRACTOR_TELEVISION_SYSTEM = 126,
183 EXTRACTOR_SONG_COUNT = 127,
184 EXTRACTOR_STARTING_SONG = 128,
185 EXTRACTOR_HARDWARE_DEPENDENCY = 129,
186 EXTRACTOR_RIPPER = 130,
187 EXTRACTOR_FILE_SIZE = 131,
188 EXTRACTOR_TRACK_NUMBER = 132,
189 EXTRACTOR_ISRC = 133,
190 EXTRACTOR_DISC_NUMBER = 134,
191 EXTRACTOR_GNUNET_DISPLAY_TYPE = 135,
192 EXTRACTOR_GNUNET_ECBC_URI = 136,
193 EXTRACTOR_GNUNET_FULL_DATA = 137,
194 EXTRACTOR_LOCATION_CITY = 138,
195 EXTRACTOR_LOCATION_COUNTRY = 139,
196 EXTRACTOR_LOCATION_SUBLOCATION = 140,
197 EXTRACTOR_GPS_LATITUDE_REF = 141,
198 EXTRACTOR_GPS_LATITUDE = 142,
199 EXTRACTOR_GPS_LONGITUDE_REF = 143,
200 EXTRACTOR_GPS_LONGITUDE = 144,
201 EXTRACTOR_RATING = 145,
202 EXTRACTOR_COUNTRY_CODE = 146
203} EXTRACTOR_KeywordType;
204 67
205/**
206 * Test if a given LE type contains binary data.
207 */
208#define EXTRACTOR_isBinaryType(type) (type == EXTRACTOR_THUMBNAIL_DATA)
209 68
210/** 69/**
211 * A linked list of keywords. This structure is passed around 70 * Format in which the extracted meta data is presented.
212 * in libExtractor and is typically the result of any keyword
213 * extraction operation.
214 * <p>
215 * Each entry in the keyword list consists of a string (the
216 * keyword) and the keyword type (of type KeywordType)
217 * describing how/from where the keyword was obtained.
218 */ 71 */
219typedef struct EXTRACTOR_Keywords { 72enum EXTRACTOR_MetaFormat
220 /* the keyword that was found */ 73 {
221 char * keyword; 74 /**
222 /* the type of the keyword (classification) */ 75 * Format is unknown.
223 EXTRACTOR_KeywordType keywordType; 76 */
224 /* the next entry in the list */ 77 EXTRACTOR_METAFORMAT_UNKNOWN = 0,
225 struct EXTRACTOR_Keywords * next; 78
226} EXTRACTOR_KeywordList; 79 /**
80 * 0-terminated, UTF-8 encoded string. "data_len"
81 * is strlen(data)+1.
82 */
83 EXTRACTOR_METAFORMAT_UTF8 = 1,
84
85 /**
86 * Some kind of binary format, see given Mime type.
87 */
88 EXTRACTOR_METAFORMAT_BINARY = 2,
89
90 /**
91 * 0-terminated string. The specific encoding is unknown.
92 * "data_len" is strlen(data)+1.
93 */
94 EXTRACTOR_METAFORMAT_C_STRING = 3
95 };
227 96
228/**
229 * Signature of the extract method that each plugin
230 * must provide.
231 *
232 * @param filename MAYBE NULL (!)
233 * @param data must not be modified (!)
234 */
235typedef EXTRACTOR_KeywordList *
236(*ExtractMethod)(const char * filename,
237 char * data,
238 size_t filesize,
239 EXTRACTOR_KeywordList * next,
240 const char * options);
241 97
242/** 98/**
243 * Linked list of extractor helper-libraries. An application 99 * Enumeration defining various sources of keywords. See also
244 * builds this list by telling libextractor to load various 100 * http://dublincore.org/documents/1998/09/dces/
245 * keyword-extraction libraries. Libraries can also be unloaded
246 * (removed from this list, see removeLibrary).
247 * <p>
248 * Client code should never be concerned with the internals of
249 * this struct.
250 */ 101 */
251typedef struct EXTRACTOR_Extractor { 102enum EXTRACTOR_MetaType
252 void * libraryHandle; 103 {
253 char * libname; 104 /* fundamental types */
254 ExtractMethod extractMethod; 105 EXTRACTOR_METATYPE_RESERVED = 0,
255 struct EXTRACTOR_Extractor * next; 106 EXTRACTOR_METATYPE_MIMETYPE = 1,
256 char * options; 107 EXTRACTOR_METATYPE_FILENAME = 2,
257} EXTRACTOR_ExtractorList; 108 EXTRACTOR_METATYPE_COMMENT = 3,
109
110 /* Standard types from bibtex */
111 EXTRACTOR_METATYPE_TITLE = 4,
112 EXTRACTOR_METATYPE_BOOK_TITLE = 5,
113 EXTRACTOR_METATYPE_BOOK_EDITION = 6,
114 EXTRACTOR_METATYPE_BOOK_CHAPTER_NUMBER = 7,
115 EXTRACTOR_METATYPE_JOURNAL_NAME = 8,
116 EXTRACTOR_METATYPE_JOURNAL_VOLUME = 9,
117 EXTRACTOR_METATYPE_JOURNAL_NUMBER = 10,
118 EXTRACTOR_METATYPE_PAGE_COUNT = 11,
119 EXTRACTOR_METATYPE_PAGE_RANGE = 12,
120 EXTRACTOR_METATYPE_AUTHOR_NAME = 13,
121 EXTRACTOR_METATYPE_AUTHOR_EMAIL = 14,
122 EXTRACTOR_METATYPE_AUTHOR_INSTITUTION = 15,
123 EXTRACTOR_METATYPE_PUBLISHER = 16,
124 EXTRACTOR_METATYPE_PUBLISHER_ADDRESS = 17,
125 EXTRACTOR_METATYPE_PUBLISHER_INSTITUTION = 18,
126 EXTRACTOR_METATYPE_PUBLISHER_SERIES = 19,
127 EXTRACTOR_METATYPE_PUBLICATION_TYPE = 20,
128 EXTRACTOR_METATYPE_PUBLICATION_YEAR = 21,
129 EXTRACTOR_METATYPE_PUBLICATION_MONTH = 22,
130 EXTRACTOR_METATYPE_PUBLICATION_DAY = 23,
131 EXTRACTOR_METATYPE_PUBLICATION_DATE = 24,
132 EXTRACTOR_METATYPE_BIBTEX_EPRINT = 25,
133 EXTRACTOR_METATYPE_BIBTEX_ENTRY_TYPE = 26,
134 EXTRACTOR_METATYPE_DOCUMENT_LANGUAGE = 27,
135 EXTRACTOR_METATYPE_CREATION_TIME = 28,
136 EXTRACTOR_METATYPE_URL = 29,
137
138 /* "unique" document identifiers */
139 EXTRACTOR_METATYPE_URI = 30,
140 EXTRACTOR_METATYPE_ISRC = 31,
141 EXTRACTOR_METATYPE_HASH_MD4 = 32,
142 EXTRACTOR_METATYPE_HASH_MD5 = 33,
143 EXTRACTOR_METATYPE_HASH_SHA0 = 34,
144 EXTRACTOR_METATYPE_HASH_SHA1 = 35,
145 EXTRACTOR_METATYPE_HASH_RMD160 = 36,
146
147 /* identifiers of a location */
148 EXTRACTOR_METATYPE_GPS_LATITUDE_REF = 37,
149 EXTRACTOR_METATYPE_GPS_LATITUDE = 38,
150 EXTRACTOR_METATYPE_GPS_LONGITUDE_REF = 39,
151 EXTRACTOR_METATYPE_GPS_LONGITUDE = 40,
152 EXTRACTOR_METATYPE_LOCATION_CITY = 41,
153 EXTRACTOR_METATYPE_LOCATION_SUBLOCATION = 42,
154 EXTRACTOR_METATYPE_LOCATION_COUNTRY = 43,
155 EXTRACTOR_METATYPE_LOCATION_COUNTRY_CODE = 44,
156
157 /* generic attributes */
158 EXTRACTOR_METATYPE_UNKNOWN = 45,
159 EXTRACTOR_METATYPE_DESCRIPTION = 46,
160 EXTRACTOR_METATYPE_COPYRIGHT = 47,
161 EXTRACTOR_METATYPE_RIGHTS = 48,
162 EXTRACTOR_METATYPE_KEYWORDS = 49,
163 EXTRACTOR_METATYPE_ABSTRACT = 50,
164 EXTRACTOR_METATYPE_SUMMARY = 51,
165 EXTRACTOR_METATYPE_SUBJECT = 52,
166 EXTRACTOR_METATYPE_CREATOR = 53,
167 EXTRACTOR_METATYPE_FORMAT = 54,
168 EXTRACTOR_METATYPE_FORMAT_VERSION = 55,
169
170 /* processing history */
171 EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE = 56,
172 EXTRACTOR_METATYPE_UNKNOWN_DATE = 57,
173 EXTRACTOR_METATYPE_CREATION_DATE = 58,
174 EXTRACTOR_METATYPE_MODIFICATION_DATE = 59,
175 EXTRACTOR_METATYPE_LAST_PRINTED = 60,
176 EXTRACTOR_METATYPE_LAST_SAVED_BY = 61,
177 EXTRACTOR_METATYPE_TOTAL_EDITING_TIME = 62,
178 EXTRACTOR_METATYPE_EDITING_CYCLES = 63,
179 EXTRACTOR_METATYPE_MODIFIED_BY_SOFTWARE = 64,
180 EXTRACTOR_METATYPE_REVISION_HISTORY = 65,
181
182 /* FIXME... */
183
184 /* software package specifics (deb, rpm, tgz) */
185 EXTRACTOR_METATYPE_PACKAGER = 45,
186 EXTRACTOR_METATYPE_VENDOR = 46,
187 EXTRACTOR_METATYPE_LICENSE = 47,
188 EXTRACTOR_METATYPE_DISTRIBUTION = 48,
189 EXTRACTOR_METATYPE_BUILDHOST = 49,
190 EXTRACTOR_METATYPE_TARGET_OS = 50,
191 EXTRACTOR_METATYPE_DEPENDENCY = 51,
192 EXTRACTOR_METATYPE_CONFLICTS = 61,
193 EXTRACTOR_METATYPE_REPLACES = 62,
194 EXTRACTOR_METATYPE_PROVIDES = 63,
195
196 /* (text) document processing specifics */
197 EXTRACTOR_METATYPE_CHARACTER_SET = 104,
198 EXTRACTOR_METATYPE_LINE_COUNT = 105,
199 EXTRACTOR_METATYPE_PARAGRAPH_COUNT = 106,
200 EXTRACTOR_METATYPE_WORD_COUNT = 93,
201 EXTRACTOR_METATYPE_CHARACTER_COUNT = 94,
202 EXTRACTOR_METATYPE_PAGE_ORIENTATION = 35,
203 EXTRACTOR_METATYPE_PAPER_SIZE = 36,
204 EXTRACTOR_METATYPE_USED_FONTS = 37,
205 EXTRACTOR_METATYPE_PAGE_ORDER = 38,
206
207 /* music / video specifics */
208 EXTRACTOR_METATYPE_LYRICS = 67,
209 EXTRACTOR_METATYPE_CONDUCTOR = 64,
210 EXTRACTOR_METATYPE_INTERPRET = 65,
211 EXTRACTOR_METATYPE_MUSIC_CD_IDENTIFIER = 117,
212 EXTRACTOR_METATYPE_PLAY_COUNTER = 118,
213 EXTRACTOR_METATYPE_DURATION = 111,
214 EXTRACTOR_METATYPE_MOVIE_DIRECTOR = 110,
215 EXTRACTOR_METATYPE_SONG_COUNT = 127,
216 EXTRACTOR_METATYPE_STARTING_SONG = 128,
217 EXTRACTOR_METATYPE_MUSICIAN_CREDITS_LIST = 123,
218 EXTRACTOR_METATYPE_TRACK_NUMBER = 132,
219 EXTRACTOR_METATYPE_DISC_NUMBER = 134,
220 EXTRACTOR_METATYPE_ALBUM = 11,
221 EXTRACTOR_METATYPE_ARTIST = 5,
222 EXTRACTOR_METATYPE_GENRE = 12,
223
224 /* image specifics */
225 EXTRACTOR_METATYPE_THUMBNAIL_DATA = 70,
226 EXTRACTOR_METATYPE_RESOLUTION = 57,
227 EXTRACTOR_METATYPE_IMAGE_DIMENSIONS = 43,
228 EXTRACTOR_METATYPE_SCALE = 108,
229
230 /* photography specifics */
231 EXTRACTOR_METATYPE_CAMERA_MAKE = 72,
232 EXTRACTOR_METATYPE_CAMERA_MODEL = 73,
233 EXTRACTOR_METATYPE_EXPOSURE = 74,
234 EXTRACTOR_METATYPE_APERTURE = 75,
235 EXTRACTOR_METATYPE_EXPOSURE_BIAS = 76,
236 EXTRACTOR_METATYPE_FLASH = 77,
237 EXTRACTOR_METATYPE_FLASH_BIAS = 78,
238 EXTRACTOR_METATYPE_FOCAL_LENGTH = 79,
239 EXTRACTOR_METATYPE_FOCAL_LENGTH_35MM = 80,
240 EXTRACTOR_METATYPE_ISO_SPEED = 81,
241 EXTRACTOR_METATYPE_EXPOSURE_MODE = 82,
242 EXTRACTOR_METATYPE_METERING_MODE = 83,
243 EXTRACTOR_METATYPE_MACRO_MODE = 84,
244 EXTRACTOR_METATYPE_IMAGE_QUALITY = 85,
245 EXTRACTOR_METATYPE_WHITE_BALANCE = 86,
246 EXTRACTOR_METATYPE_ORIENTATION = 87,
247 EXTRACTOR_METATYPE_MAGNIFICATION = 40,
248
249 /* numeric metrics */
250 EXTRACTOR_METATYPE_POPULARITY_METER = 119,
251 EXTRACTOR_METATYPE_RATING = 145,
252 EXTRACTOR_METATYPE_PRIORITY = 60,
253
254 /* gnunet specific attributes */
255 EXTRACTOR_METATYPE_GNUNET_DISPLAY_TYPE = 135,
256 EXTRACTOR_METATYPE_GNUNET_ECBC_URI = 136,
257
258
259 /* misc (see if these are still needed...) */
260
261 EXTRACTOR_METATYPE_GENERATOR = 103,
262 EXTRACTOR_METATYPE_ENCODED_BY = 121,
263 EXTRACTOR_METATYPE_PRODUCTVERSION = 90,
264
265 EXTRACTOR_METATYPE_DISCLAIMER = 27,
266 EXTRACTOR_METATYPE_FILE_SIZE = 131,
267 EXTRACTOR_METATYPE_FULL_DATA = 137,
268 EXTRACTOR_METATYPE_VERSIONNUMBER = 14,
269
270 EXTRACTOR_METATYPE_ORGANIZATION = 15,
271 EXTRACTOR_METATYPE_CONTRIBUTOR = 19,
272 EXTRACTOR_METATYPE_RESOURCE_TYPE = 20,
273 EXTRACTOR_METATYPE_SOURCE = 23,
274 EXTRACTOR_METATYPE_RELATION = 24,
275 EXTRACTOR_METATYPE_COVERAGE = 25,
276 EXTRACTOR_METATYPE_SOFTWARE = 26,
277 EXTRACTOR_METATYPE_WARNING = 28,
278 EXTRACTOR_METATYPE_TRANSLATED = 29,
279 EXTRACTOR_METATYPE_PRODUCER = 33,
280 EXTRACTOR_METATYPE_CREATED_FOR = 39,
281 EXTRACTOR_METATYPE_RELEASE = 41,
282 EXTRACTOR_METATYPE_GROUP = 42,
283 EXTRACTOR_METATYPE_CATEGORY = 58,
284 EXTRACTOR_METATYPE_OWNER = 66,
285 EXTRACTOR_METATYPE_MEDIA_TYPE = 68,
286 EXTRACTOR_METATYPE_CONTACT = 69,
287 EXTRACTOR_METATYPE_TEMPLATE = 88,
288 EXTRACTOR_METATYPE_SECURITY = 97,
289 EXTRACTOR_METATYPE_COMPANY = 102,
290 EXTRACTOR_METATYPE_MANAGER = 109,
291 EXTRACTOR_METATYPE_INFORMATION = 112,
292 EXTRACTOR_METATYPE_FULL_NAME = 113,
293 EXTRACTOR_METATYPE_LINK = 116,
294 EXTRACTOR_METATYPE_TIME = 122,
295 EXTRACTOR_METATYPE_MOOD = 124,
296 EXTRACTOR_METATYPE_TELEVISION_SYSTEM = 126,
297 EXTRACTOR_METATYPE_HARDWARE_DEPENDENCY = 129,
298 EXTRACTOR_METATYPE_RIPPER = 130,
299 };
258 300
259/**
260 * Load the default set of libraries.
261 * @return the default set of libraries.
262 */
263EXTRACTOR_ExtractorList * EXTRACTOR_loadDefaultLibraries(void);
264 301
265/** 302/**
266 * Get the textual name of the keyword. 303 * Get the textual name of the keyword.
267 * @return NULL if the type is not known 304 *
305 * @param type meta type to get a UTF-8 string for
306 * @return NULL if the type is not known, otherwise
307 * an English (locale: C) string describing the type;
308 * translate using 'dgettext ("libextractor", rval)'
268 */ 309 */
269const char * 310const char *
270EXTRACTOR_getKeywordTypeAsString(EXTRACTOR_KeywordType type); 311EXTRACTOR_metatype_to_string(enum EXTRACTOR_MetaType type);
271 312
272/**
273 * Return the highest type number, exclusive as in [0,highest).
274 */
275EXTRACTOR_KeywordType
276EXTRACTOR_getHighestKeywordTypeNumber(void);
277 313
278/** 314/**
279 * Load multiple libraries as specified by the user. 315 * Get a long description for the meta type.
280 * @param config a string given by the user that defines which 316 *
281 * libraries should be loaded. Has the format 317 * @param type meta type to get a UTF-8 description for
282 * "[[-]LIBRARYNAME[(options)][:[-]LIBRARYNAME[(options)]]]*". 318 * @return NULL if the type is not known, otherwise
283 * For example, 319 * an English (locale: C) string describing the type;
284 * libextractor_mp3.so:libextractor_ogg.so loads the 320 * translate using 'dgettext ("libextractor", rval)'
285 * mp3 and the ogg library. The '-' before the LIBRARYNAME
286 * indicates that the library should be added to the end
287 * of the library list (addLibraryLast).
288 * @param prev the previous list of libraries, may be NULL
289 * @return the new list of libraries, equal to prev iff an error occured
290 * or if config was empty (or NULL).
291 */ 321 */
292EXTRACTOR_ExtractorList * 322const char *
293EXTRACTOR_loadConfigLibraries(EXTRACTOR_ExtractorList * prev, 323EXTRACTOR_metatype_to_description(enum EXTRACTOR_MetaType type);
294 const char * config); 324
295 325
296/** 326/**
297 * Add a library for keyword extraction. 327 * Return the highest type number, exclusive as in [0,max).
298 * @param prev the previous list of libraries, may be NULL 328 *
299 * @param library the name of the library 329 * @return highest legal metatype number for this version of libextractor
300 * @return the new list of libraries, equal to prev iff an error occured
301 */ 330 */
302EXTRACTOR_ExtractorList * 331enum EXTRACTOR_MetaType
303EXTRACTOR_addLibrary(EXTRACTOR_ExtractorList * prev, 332EXTRACTOR_metatype_get_max (void);
304 const char * library); 333
305 334
306/** 335/**
307 * Add a library for keyword extraction at the END of the list. 336 * Type of a function that libextractor calls for each
308 * @param prev the previous list of libraries, may be NULL 337 * meta data item found.
309 * @param library the name of the library 338 *
310 * @return the new list of libraries, always equal to prev 339 * @param cls closure (user-defined)
311 * except if prev was NULL and no error occurs 340 * @param plugin_name name of the plugin that produced this value;
312 */ 341 * special values can be used (i.e. '<zlib>' for zlib being
313EXTRACTOR_ExtractorList * 342 * used in the main libextractor library and yielding
314EXTRACTOR_addLibraryLast(EXTRACTOR_ExtractorList * prev, 343 * meta data).
315 const char * library); 344 * @param type libextractor-type describing the meta data
316 345 * @param format basic format information about data
346 * @param data_mime_type mime-type of data (not of the original file);
347 * can be NULL (if mime-type is not known)
348 * @param data actual meta-data found
349 * @param data_len number of bytes in data
350 * @return 0 to continue extracting, 1 to abort
351 */
352typedef int (*EXTRACTOR_MetaDataProcessor)(void *cls,
353 const char *plugin_name,
354 enum EXTRACTOR_MetaType type,
355 enum EXTRACTOR_MetaFormat format,
356 const char *data_mime_type,
357 const char *data,
358 size_t data_len);
359
360
317/** 361/**
318 * Remove a library for keyword extraction. 362 * Signature of the extract method that each plugin
319 * @param prev the current list of libraries 363 * must provide.
320 * @param library the name of the library to remove 364 *
321 * @return the reduced list, unchanged if the library was not loaded 365 * @param data data to process
366 * @param datasize number of bytes available in data
367 * @param proc function to call for meta data found
368 * @param proc_cls cls argument to proc
369 * @param options options for this plugin; can be NULL
370 * @return 0 if all calls to proc returned 0, otherwise 1
322 */ 371 */
323EXTRACTOR_ExtractorList * 372typedef int (*EXTRACTOR_ExtractMethod)(const char *data,
324EXTRACTOR_removeLibrary(EXTRACTOR_ExtractorList * prev, 373 size_t datasize,
325 const char * library); 374 EXTRACTOR_MetaDataProcessor proc,
375 void *proc_cls,
376 const char *options);
326 377
327/**
328 * Remove all extractors.
329 * @param libraries the list of extractors
330 */
331void EXTRACTOR_removeAll(EXTRACTOR_ExtractorList * libraries);
332 378
333/** 379/**
334 * Extract keywords from a file using the available extractors. 380 * Linked list of extractor plugins. An application builds this list
335 * @param extractor the list of extractor libraries 381 * by telling libextractor to load various keyword-extraction
336 * @param filename the name of the file 382 * plugins. Libraries can also be unloaded (removed from this list,
337 * @return the list of keywords found in the file, NULL if none 383 * see EXTRACTOR_plugin_remove).
338 * were found (or other errors)
339 */ 384 */
340EXTRACTOR_KeywordList * 385struct EXTRACTOR_PluginList;
341EXTRACTOR_getKeywords(EXTRACTOR_ExtractorList * extractor,
342 const char * filename);
343 386
344 387
345/** 388/**
346 * Extract keywords from a buffer in memory 389 * Load the default set of plugins. The default can be changed
347 * using the available extractors. 390 * by setting the LIBEXTRACTOR_LIBRARIES environment variable;
391 * If it is set to "env", then this function will return
392 * EXTRACTOR_plugin_add_config (NULL, env, flags).
393 *
394 * If LIBEXTRACTOR_LIBRARIES is not set, the function will attempt
395 * to locate the installed plugins and load all of them.
396 * The directory where the code will search for plugins is typically
397 * automatically determined; it can be specified explicitly using the
398 * "LIBEXTRACTOR_PREFIX" environment variable.
399 *
400 * This environment variable must be set to the precise directory with
401 * the plugins (i.e. "/usr/lib/libextractor", not "/usr"). Note that
402 * setting the environment variable will disable all of the methods
403 * that are typically used to determine the location of plugins.
404 * Multiple paths can be specified using ':' to separate them.
348 * 405 *
349 * @param extractor the list of extractor libraries 406 * @param flags options for all of the plugins loaded
350 * @param data the data of the file 407 * @return the default set of plugins, NULL if no plugins were found
351 * @param size the number of bytes in data
352 * @return the list of keywords found in the file, NULL if none
353 * were found (or other errors)
354 */ 408 */
355EXTRACTOR_KeywordList * 409struct EXTRACTOR_PluginList *
356EXTRACTOR_getKeywords2(EXTRACTOR_ExtractorList * extractor, 410EXTRACTOR_plugin_add_defaults(enum EXTRACTOR_Options flags);
357 const void * data,
358 size_t size);
359 411
360 412
361/** 413/**
362 * Remove duplicate keywords from the list. 414 * Add a library for keyword extraction.
363 * @param list the original keyword list (destroyed in the process!) 415 *
364 * @param options a set of options (DUPLICATES_XXXX) 416 * @param prev the previous list of libraries, may be NULL
365 * @return a list of keywords without duplicates 417 * @param library the name of the library (full path)
418 * @param options options to give to the library
419 * @param flags options to use
420 * @return the new list of libraries, equal to prev iff an error occured
366 */ 421 */
367EXTRACTOR_KeywordList * 422struct EXTRACTOR_PluginList *
368EXTRACTOR_removeDuplicateKeywords(EXTRACTOR_KeywordList * list, 423EXTRACTOR_plugin_add (struct EXTRACTOR_PluginList * prev,
369 unsigned int options); 424 const char * library,
425 const char *options,
426 enum EXTRACTOR_Options flags);
370 427
371 428
372/** 429/**
373 * Remove empty (all-whitespace) keywords from the list. 430 * Add a library for keyword extraction at the END of the list.
374 * @param list the original keyword list (destroyed in the process!) 431 * @param prev the previous list of libraries, may be NULL
375 * @return a list of keywords without duplicates 432 * @param library the name of the library (full path)
433 * @param options options to give to the library
434 * @param flags options to use
435 * @return the new list of libraries, always equal to prev
436 * except if prev was NULL and no error occurs
376 */ 437 */
377EXTRACTOR_KeywordList * 438struct EXTRACTOR_PluginList *
378EXTRACTOR_removeEmptyKeywords (EXTRACTOR_KeywordList * list); 439EXTRACTOR_plugin_add_last(struct EXTRACTOR_PluginList *prev,
440 const char *library,
441 const char *options,
442 enum EXTRACTOR_Options flags);
379 443
380/**
381 * Remove keywords of a particular type from the list.
382 * @param list the original keyword list (altered in the process!)
383 * @param type the type to remove
384 * @return a list of keywords without entries of given type
385 */
386EXTRACTOR_KeywordList *
387EXTRACTOR_removeKeywordsOfType(EXTRACTOR_KeywordList * list,
388 EXTRACTOR_KeywordType type);
389 444
390/** 445/**
391 * Print a keyword list to a file. 446 * Load multiple libraries as specified by the user.
392 * For debugging. 447 *
393 * @param handle the file to write to (stdout, stderr), must NOT be NULL 448 * @param config a string given by the user that defines which
394 * @param keywords the list of keywords to print, may be NULL 449 * libraries should be loaded. Has the format
395 */ 450 * "[[-]LIBRARYNAME[(options)][:[-]LIBRARYNAME[(options)]]]*".
396void EXTRACTOR_printKeywords(FILE * handle, 451 * For example,
397 EXTRACTOR_KeywordList * keywords); 452 * /usr/lib/libextractor/libextractor_mp3.so:/usr/lib/libextractor/libextractor_ogg.so loads the
398 453 * mp3 and the ogg library. The '-' before the LIBRARYNAME
399/** 454 * indicates that the library should be added to the end
400 * Free the memory occupied by the keyword list (and the 455 * of the library list (addLibraryLast).
401 * keyword strings in it!) 456 * @param prev the previous list of libraries, may be NULL
402 * @param keywords the list to free 457 * @param flags options to use
458 * @return the new list of libraries, equal to prev iff an error occured
459 * or if config was empty (or NULL).
403 */ 460 */
404void EXTRACTOR_freeKeywords(EXTRACTOR_KeywordList * keywords); 461struct EXTRACTOR_PluginList *
462EXTRACTOR_plugin_add_config (struct EXTRACTOR_PluginList * prev,
463 const char *config,
464 enum EXTRACTOR_Options flags);
405 465
466
406/** 467/**
407 * Extract the last keyword that of the given type from the keyword list. 468 * Remove a plugin from a list.
408 * @param type the type of the keyword 469 *
409 * @param keywords the keyword list 470 * @param prev the current list of plugins
410 * @return the last matching keyword, or NULL if none matches; 471 * @param library the name of the plugin to remove (full path)
411 * the string returned is aliased in the keywords list and must 472 * @return the reduced list, unchanged if the plugin was not loaded
412 * not be freed or manipulated by the client. It will become
413 * invalid once the keyword list is freed.
414 */ 473 */
415const char * EXTRACTOR_extractLast(EXTRACTOR_KeywordType type, 474struct EXTRACTOR_PluginList *
416 EXTRACTOR_KeywordList * keywords); 475EXTRACTOR_plugin_remove(struct EXTRACTOR_PluginList * prev,
476 const char * library);
417 477
418/**
419 * Extract the last keyword of the given string from the keyword list.
420 * @param type the string describing the type of the keyword
421 * @param keywords the keyword list
422 * @return the last matching keyword, or NULL if none matches;
423 * the string returned is aliased in the keywords list and must
424 * not be freed or manipulated by the client. It will become
425 * invalid once the keyword list is freed.
426 */
427const char * EXTRACTOR_extractLastByString(const char * type,
428 EXTRACTOR_KeywordList * keywords);
429 478
430/** 479/**
431 * Count the number of keywords in the keyword list. 480 * Remove all plugins from the given list (destroys the list).
432 * @param keywords the keyword list 481 *
433 * @return the number of keywords in the list 482 * @param plugin the list of plugins
434 */ 483 */
435unsigned int EXTRACTOR_countKeywords(EXTRACTOR_KeywordList * keywords); 484void
485EXTRACTOR_plugin_remove_all(struct EXTRACTOR_PluginList *plugins);
436 486
437 487
438/** 488/**
439 * This function can be used to decode the binary data 489 * Extract keywords from a file using the given set of plugins.
440 * encoded in the libextractor metadata (i.e. for
441 * the thumbnails).
442 * 490 *
443 * @param in 0-terminated string from the meta-data 491 * @param plugins the list of plugins to use
444 * @return 1 on error, 0 on success 492 * @param filename the name of the file, can be NULL if data is not NULL
493 * @param data data of the file in memory, can be NULL (in which
494 * case libextractor will open file) if filename is not NULL
495 * @param size number of bytes in data, ignored if data is NULL
496 * @param proc function to call for each meta data item found
497 * @param proc_cls cls argument to proc
445 */ 498 */
446int EXTRACTOR_binaryDecode(const char * in, 499void
447 unsigned char ** out, 500EXTRACTOR_extract(struct EXTRACTOR_PluginList *plugins,
448 size_t * outSize); 501 const char *filename,
502 const void *data,
503 size_t size,
504 EXTRACTOR_MetaDataProcessor proc,
505 void *proc_cls);
449 506
450 507
451/** 508/**
452 * Encode the given binary data object 509 * Simple EXTRACTOR_MetaDataProcessor implementation that simply
453 * as a 0-terminated C-string according 510 * prints the extracted meta data to the given file. Only prints
454 * to the LE binary data encoding standard. 511 * those keywords that are in UTF-8 format.
455 * 512 *
456 * @return NULL on error, the 0-terminated 513 * @param handle the file to write to (stdout, stderr), must NOT be NULL,
457 * encoding otherwise 514 * must be of type "FILE *".
515 * @param plugin_name name of the plugin that produced this value
516 * @param type libextractor-type describing the meta data
517 * @param format basic format information about data
518 * @param data_mime_type mime-type of data (not of the original file);
519 * can be NULL (if mime-type is not known)
520 * @param data actual meta-data found
521 * @param data_len number of bytes in data
522 * @return non-zero if printing failed, otherwise 0.
458 */ 523 */
459char * EXTRACTOR_binaryEncode(const unsigned char * data, 524int
460 size_t size); 525EXTRACTOR_meta_data_print(void * handle,
526 const char *plugin_name,
527 enum EXTRACTOR_MetaType type,
528 enum EXTRACTOR_MetaFormat format,
529 const char *data_mime_type,
530 const char *data,
531 size_t data_len);
461 532
462 533
463#if 0 /* keep Emacsens' auto-indent happy */ 534#if 0 /* keep Emacsens' auto-indent happy */
diff --git a/src/include/winproc.h b/src/include/winproc.h
deleted file mode 100644
index d1cd41b..0000000
--- a/src/include/winproc.h
+++ /dev/null
@@ -1,44 +0,0 @@
1/*
2 This file is part of libextractor.
3 (C) 2001, 2002, 2003, 2003, 2005 Christian Grothoff (and other contributing authors)
4
5 libextractor is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 2, or (at your
8 option) any later version.
9
10 libextractor is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with libextractor; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
19*/
20
21/**
22 * @file include/winproc.h
23 * @brief Definitions for MS Windows
24 * @author Nils Durner
25 * @note This file differs from GNUnet's winproc.h
26 */
27
28#ifndef WINPROC_H
29#define WINPROC_H
30
31#include "platform.h"
32
33#ifdef __cplusplus
34extern "C" {
35#endif
36
37void InitWinEnv();
38void ShutdownWinEnv();
39
40#endif
41
42#ifdef __cplusplus
43} /* extern "C" */
44#endif