diff options
author | Christian Grothoff <christian@grothoff.org> | 2010-01-13 13:42:34 +0000 |
---|---|---|
committer | Christian Grothoff <christian@grothoff.org> | 2010-01-13 13:42:34 +0000 |
commit | 40b9c39604e1d2d9db792940500aa48f933d5588 (patch) | |
tree | 62875e81b544532bcfddcf7333ec330c31b0ff25 | |
parent | 8372891411f4e97914386b4626f1dcdb5ec167e8 (diff) | |
download | libextractor-40b9c39604e1d2d9db792940500aa48f933d5588.tar.gz libextractor-40b9c39604e1d2d9db792940500aa48f933d5588.zip |
adding support for tail extraction, documenting, using it for ID3v1
-rw-r--r-- | doc/extractor.texi | 212 | ||||
-rw-r--r-- | doc/version.texi | 2 | ||||
-rw-r--r-- | src/main/extractor.c | 361 | ||||
-rw-r--r-- | src/plugins/Makefile.am | 8 | ||||
-rw-r--r-- | src/plugins/id3_extractor.c | 305 | ||||
-rw-r--r-- | src/plugins/mp3_extractor.c | 275 |
6 files changed, 753 insertions, 410 deletions
diff --git a/doc/extractor.texi b/doc/extractor.texi index d382aed..4bf6743 100644 --- a/doc/extractor.texi +++ b/doc/extractor.texi | |||
@@ -10,8 +10,10 @@ | |||
10 | @c %**end of header | 10 | @c %**end of header |
11 | @copying | 11 | @copying |
12 | This manual is for GNU libextractor | 12 | This manual is for GNU libextractor |
13 | (version @value{VERSION}, @value{UPDATED}), | 13 | (version @value{VERSION}, @value{UPDATED}). |
14 | which is GNU's library for meta data extraction. | 14 | |
15 | GNU libextractor is a GNU package. | ||
16 | |||
15 | 17 | ||
16 | Copyright @copyright{} 2007, 2010 Christian Grothoff | 18 | Copyright @copyright{} 2007, 2010 Christian Grothoff |
17 | 19 | ||
@@ -73,7 +75,7 @@ Free Documentation License". | |||
73 | @code{NULL} | 75 | @code{NULL} |
74 | @end macro | 76 | @end macro |
75 | 77 | ||
76 | @macro le{} | 78 | @macro gnule{} |
77 | @acronym{GNU libextractor} | 79 | @acronym{GNU libextractor} |
78 | @end macro | 80 | @end macro |
79 | 81 | ||
@@ -84,24 +86,22 @@ Free Documentation License". | |||
84 | @insertcopying | 86 | @insertcopying |
85 | @end ifnottex | 87 | @end ifnottex |
86 | 88 | ||
87 | GNU libextractor is a GNU package. | ||
88 | |||
89 | @menu | 89 | @menu |
90 | * Introduction:: What is @le{}. | 90 | * Introduction:: What is @gnule{}. |
91 | * Preparation:: What you should do before using the library. | 91 | * Preparation:: What you should do before using the library. |
92 | * Generalities:: General library functions and data types. | 92 | * Generalities:: General library functions and data types. |
93 | * Extracting meta data:: How to use @le{} to obtain meta data. | 93 | * Extracting meta data:: How to use @gnule{} to obtain meta data. |
94 | * Language bindings:: How to use @le{} from languages other than C. | 94 | * Language bindings:: How to use @gnule{} from languages other than C. |
95 | * Utility functions:: Utility functions of @le{}. | 95 | * Utility functions:: Utility functions of @gnule{}. |
96 | * Existing Plugins:: What plugins are available. | 96 | * Existing Plugins:: What plugins are available. |
97 | * Writing new Plugins:: How to write new plugins for @le{}. | 97 | * Writing new Plugins:: How to write new plugins for @gnule{}. |
98 | * Internal utility functions:: Utility functions of @le{} for writing plugins. | 98 | * Internal utility functions:: Utility functions of @gnule{} for writing plugins. |
99 | * Reporting bugs:: How to report bugs or request new features. | 99 | * Reporting bugs:: How to report bugs or request new features. |
100 | 100 | ||
101 | Appendices | 101 | Appendices |
102 | 102 | ||
103 | * Copying:: The GNU General Public License says how you | 103 | * Copying:: The GNU General Public License says how you |
104 | can copy and share some parts of @le{}. | 104 | can copy and share some parts of @gnule{}. |
105 | 105 | ||
106 | Indices | 106 | Indices |
107 | 107 | ||
@@ -120,7 +120,7 @@ Indices | |||
120 | @chapter Introduction | 120 | @chapter Introduction |
121 | 121 | ||
122 | @cindex error handling | 122 | @cindex error handling |
123 | @le{} is GNU's library for extracting meta data from | 123 | @gnule{} is GNU's library for extracting meta data from |
124 | files. Meta data includes format information (such as mime type, | 124 | files. Meta data includes format information (such as mime type, |
125 | image dimensions, color depth, recording frequency), content | 125 | image dimensions, color depth, recording frequency), content |
126 | descriptions (such as document title or document description) and | 126 | descriptions (such as document title or document description) and |
@@ -128,55 +128,55 @@ copyright information (such as license, author and contributors). | |||
128 | Meta data extraction is an inherently uncertain business --- a parse | 128 | Meta data extraction is an inherently uncertain business --- a parse |
129 | error can be a corrupt file, an incompatibility in the file format | 129 | error can be a corrupt file, an incompatibility in the file format |
130 | version, an entirely different file format or a bug in the parser. As | 130 | version, an entirely different file format or a bug in the parser. As |
131 | a result of this uncertainty, @le{} deliberately | 131 | a result of this uncertainty, @gnule{} deliberately |
132 | avoids to ever report any errors. Unexpected file contents simply | 132 | avoids to ever report any errors. Unexpected file contents simply |
133 | result in less or possibly no meta data being extracted. | 133 | result in less or possibly no meta data being extracted. |
134 | 134 | ||
135 | @cindex plugin | 135 | @cindex plugin |
136 | @le{} uses plugins to handle various file formats. | 136 | @gnule{} uses plugins to handle various file formats. |
137 | Technically a plugin can support multiple file formats; however, most | 137 | Technically a plugin can support multiple file formats; however, most |
138 | plugins only support one particular format. By default, | 138 | plugins only support one particular format. By default, |
139 | @le{} will use all plugins that are available and found | 139 | @gnule{} will use all plugins that are available and found |
140 | in the plugin installation directory. Applications can | 140 | in the plugin installation directory. Applications can |
141 | request the use of only specific plugins or the exclusion of | 141 | request the use of only specific plugins or the exclusion of |
142 | certain plugins. | 142 | certain plugins. |
143 | 143 | ||
144 | @le{} is distributed with the @command{extract} | 144 | @gnule{} is distributed with the @command{extract} |
145 | command@footnote{Some distributions ship @command{extract} in a | 145 | command@footnote{Some distributions ship @command{extract} in a |
146 | seperate package.} which is a command-line tool for extracting | 146 | seperate package.} which is a command-line tool for extracting |
147 | meta data. @command{extract} is given a list of filenames and | 147 | meta data. @command{extract} is given a list of filenames and |
148 | prints the resulting meta data to the console. The @command{extract} | 148 | prints the resulting meta data to the console. The @command{extract} |
149 | source code also serves as an advanced example for how to use | 149 | source code also serves as an advanced example for how to use |
150 | @le{}. | 150 | @gnule{}. |
151 | 151 | ||
152 | This manual focuses on providing documentation for writing software | 152 | This manual focuses on providing documentation for writing software |
153 | with @le{}. The only relevant parts for end-users | 153 | with @gnule{}. The only relevant parts for end-users |
154 | are the chapter on compiling and installing @le{} | 154 | are the chapter on compiling and installing @gnule{} |
155 | (@xref{Preparation}.). Also, the chapter on existing plugins maybe of | 155 | (@xref{Preparation}.). Also, the chapter on existing plugins maybe of |
156 | interest (@xref{Existing Plugins}.). Additional documentation for | 156 | interest (@xref{Existing Plugins}.). Additional documentation for |
157 | end-users can be find in the man page on @command{extract} (using | 157 | end-users can be find in the man page on @command{extract} (using |
158 | @verb{|man extract|}). | 158 | @verb{|man extract|}). |
159 | 159 | ||
160 | @cindex license | 160 | @cindex license |
161 | @le{} is licensed under the GNU General Public License. The | 161 | @gnule{} is licensed under the GNU General Public License. The |
162 | developers have frequently received requests to license GNU | 162 | developers have frequently received requests to license GNU |
163 | libextractor under alternative terms. However, @le{} | 163 | libextractor under alternative terms. However, @gnule{} |
164 | borrows plenty of GPL-licensed code from various other projects. | 164 | borrows plenty of GPL-licensed code from various other projects. |
165 | Hence we cannot change the license (even if we wanted to).@footnote{It | 165 | Hence we cannot change the license (even if we wanted to).@footnote{It |
166 | maybe possible to switch to GPLv3 in the future. For this, an audit | 166 | maybe possible to switch to GPLv3 in the future. For this, an audit |
167 | of the license status of our dependencies would be required. The new | 167 | of the license status of our dependencies would be required. The new |
168 | code that was developed specifically for @le{} has | 168 | code that was developed specifically for @gnule{} has |
169 | always been licensed under GPLv2 @emph{or any later version}.} | 169 | always been licensed under GPLv2 @emph{or any later version}.} |
170 | 170 | ||
171 | @node Preparation | 171 | @node Preparation |
172 | @chapter Preparation | 172 | @chapter Preparation |
173 | 173 | ||
174 | Compiling @le{} follows the standard GNU autotools | 174 | Compiling @gnule{} follows the standard GNU autotools |
175 | build process using @command{configure} and @command{make}. For | 175 | build process using @command{configure} and @command{make}. For |
176 | details, read the @file{INSTALL} file and query | 176 | details, read the @file{INSTALL} file and query |
177 | @verb{|./configure --help|} for additional options. | 177 | @verb{|./configure --help|} for additional options. |
178 | 178 | ||
179 | @le{} has various dependencies, some of which are optional. | 179 | @gnule{} has various dependencies, some of which are optional. |
180 | Instead of specifying the names of the software packages, we | 180 | Instead of specifying the names of the software packages, we |
181 | will give the list in terms of the names of the respective | 181 | will give the list in terms of the names of the respective |
182 | Debian (unstable) packages that should be installed. | 182 | Debian (unstable) packages that should be installed. |
@@ -246,29 +246,29 @@ Please notify us if we missed some dependencies (note that the list is | |||
246 | supposed to only list direct dependencies, not transitive | 246 | supposed to only list direct dependencies, not transitive |
247 | dependencies). | 247 | dependencies). |
248 | 248 | ||
249 | Once you have compiled and installed @le{}, you should have a file | 249 | Once you have compiled and installed @gnule{}, you should have a file |
250 | @file{extractor.h} installed in your @file{include/} directory. This | 250 | @file{extractor.h} installed in your @file{include/} directory. This |
251 | file should be the starting point for your C and C++ development with | 251 | file should be the starting point for your C and C++ development with |
252 | @le{}. The build process also installs the @file{extract} binary and | 252 | @gnule{}. The build process also installs the @file{extract} binary and |
253 | man pages for @file{extract} and @le{}. The @file{extract} man page | 253 | man pages for @file{extract} and @gnule{}. The @file{extract} man page |
254 | documents the @file{extract} tool. The @le{} man page gives a brief | 254 | documents the @file{extract} tool. The @gnule{} man page gives a brief |
255 | summary of the C API for @le{}. | 255 | summary of the C API for @gnule{}. |
256 | 256 | ||
257 | @cindex packageing | 257 | @cindex packageing |
258 | @cindex directory structure | 258 | @cindex directory structure |
259 | @cindex plugin | 259 | @cindex plugin |
260 | @cindex environment variables | 260 | @cindex environment variables |
261 | @vindex LIBEXTRACTOR_PREFIX | 261 | @vindex LIBEXTRACTOR_PREFIX |
262 | When you install @le{}, various plugins will be | 262 | When you install @gnule{}, various plugins will be |
263 | installed in the @file{lib/libextractor/} directory. The main library | 263 | installed in the @file{lib/libextractor/} directory. The main library |
264 | will be installed as @file{lib/libextractor.so}. Note that | 264 | will be installed as @file{lib/libextractor.so}. Note that |
265 | @le{} will attempt to find the plugins relative to the | 265 | @gnule{} will attempt to find the plugins relative to the |
266 | path of the main library. Consequently, a package manager can move | 266 | path of the main library. Consequently, a package manager can move |
267 | the library and its plugins to a different location later --- as long | 267 | the library and its plugins to a different location later --- as long |
268 | as the relative path between the main library and the plugins is | 268 | as the relative path between the main library and the plugins is |
269 | preserved. As a method of last resort, the user can specify an | 269 | preserved. As a method of last resort, the user can specify an |
270 | environment variable @verb{|LIBEXTRACTOR_PREFIX|}. If | 270 | environment variable @verb{|LIBEXTRACTOR_PREFIX|}. If |
271 | @le{} cannot locate a plugin, it will look in | 271 | @gnule{} cannot locate a plugin, it will look in |
272 | @verb{|LIBEXTRACTOR_PREFIX/lib/libextractor/|}. | 272 | @verb{|LIBEXTRACTOR_PREFIX/lib/libextractor/|}. |
273 | 273 | ||
274 | @section Note to package maintainers | 274 | @section Note to package maintainers |
@@ -304,9 +304,9 @@ resources. | |||
304 | @node Generalities | 304 | @node Generalities |
305 | @chapter Generalities | 305 | @chapter Generalities |
306 | 306 | ||
307 | Each public symbol exported by @le{} has the prefix | 307 | Each public symbol exported by @gnule{} has the prefix |
308 | @verb{|EXTRACTOR_|}. All-caps names are used for constants. For the | 308 | @verb{|EXTRACTOR_|}. All-caps names are used for constants. For the |
309 | impatient, the minimal C code for using @le{} (on the | 309 | impatient, the minimal C code for using @gnule{} (on the |
310 | executing binary itself) looks like this: | 310 | executing binary itself) looks like this: |
311 | 311 | ||
312 | @verbatim | 312 | @verbatim |
@@ -326,6 +326,13 @@ int main(int argc, char ** argv) { | |||
326 | @node Extracting meta data | 326 | @node Extracting meta data |
327 | @chapter Extracting meta data | 327 | @chapter Extracting meta data |
328 | 328 | ||
329 | In order to extract meta data with @gnule{} you first need to | ||
330 | load the respective plugins and then call the extraction API | ||
331 | with the plugins and the data to process. This section | ||
332 | documents how to load and unload plugins, the various types | ||
333 | and formats in which meta data is returned to the application | ||
334 | and finally the extraction API itself. | ||
335 | |||
329 | @menu | 336 | @menu |
330 | * Plugin management:: How to load and unload plugins | 337 | * Plugin management:: How to load and unload plugins |
331 | * Meta types:: About meta types | 338 | * Meta types:: About meta types |
@@ -350,7 +357,7 @@ from multiple threads at the same time is not safe. Creating multiple | |||
350 | plugin lists and using them concurrently is supported as long as | 357 | plugin lists and using them concurrently is supported as long as |
351 | the @code{EXTRACTOR_OPTION_IN_PROCESS} option is not used. | 358 | the @code{EXTRACTOR_OPTION_IN_PROCESS} option is not used. |
352 | 359 | ||
353 | Generally, @le{} is fully thread-safe and mostly reentrant. | 360 | Generally, @gnule{} is fully thread-safe and mostly reentrant. |
354 | All plugin code is expected required to be reentrant and state-less, | 361 | All plugin code is expected required to be reentrant and state-less, |
355 | but due to the extensive use of 3rd party libraries this cannot | 362 | but due to the extensive use of 3rd party libraries this cannot |
356 | be guaranteed. Hence plugins are executed (by default) out of | 363 | be guaranteed. Hence plugins are executed (by default) out of |
@@ -402,7 +409,7 @@ Loads and unloads plugins based on a configuration string, modifying the existin | |||
402 | @deftypefun {struct EXTRACTOR_PluginList *} EXTRACTOR_plugin_add_defaults (enum EXTRACTOR_Options flags) | 409 | @deftypefun {struct EXTRACTOR_PluginList *} EXTRACTOR_plugin_add_defaults (enum EXTRACTOR_Options flags) |
403 | @findex EXTRACTOR_plugin_add_defaults | 410 | @findex EXTRACTOR_plugin_add_defaults |
404 | 411 | ||
405 | Loads all of the plugins in the plugin directory. This function is what most @le{} applications should use to setup the plugins. | 412 | Loads all of the plugins in the plugin directory. This function is what most @gnule{} applications should use to setup the plugins. |
406 | @end deftypefun | 413 | @end deftypefun |
407 | 414 | ||
408 | 415 | ||
@@ -414,14 +421,14 @@ Loads all of the plugins in the plugin directory. This function is what most @l | |||
414 | @tindex enum EXTRACTOR_MetaType | 421 | @tindex enum EXTRACTOR_MetaType |
415 | @findex EXTRACTOR_metatype_get_max | 422 | @findex EXTRACTOR_metatype_get_max |
416 | 423 | ||
417 | @verb{|enum EXTRACTOR_MetaType|} is a C enum which defines a list of over 100 different types of meta data. The total number can differ between different @le{} releases; the maximum value for the current release can be obtained using the @verb{|EXTRACTOR_metatype_get_max|} function. All values in this enumeration are of the form @verb{|EXTRACTOR_METATYPE_XXX|}. | 424 | @verb{|enum EXTRACTOR_MetaType|} is a C enum which defines a list of over 100 different types of meta data. The total number can differ between different @gnule{} releases; the maximum value for the current release can be obtained using the @verb{|EXTRACTOR_metatype_get_max|} function. All values in this enumeration are of the form @verb{|EXTRACTOR_METATYPE_XXX|}. |
418 | 425 | ||
419 | @deftypefun {const char *} EXTRACTOR_metatype_to_string (enum EXTRACTOR_MetaType type) | 426 | @deftypefun {const char *} EXTRACTOR_metatype_to_string (enum EXTRACTOR_MetaType type) |
420 | @findex EXTRACTOR_metatype_to_string | 427 | @findex EXTRACTOR_metatype_to_string |
421 | @cindex gettext | 428 | @cindex gettext |
422 | @cindex internationalization | 429 | @cindex internationalization |
423 | 430 | ||
424 | The function @verb{|EXTRACTOR_metatype_to_string|} can be used to obtain a short English string @samp{s} describing the meta data type. The string can be translated into other languages using GNU gettext with the domain set to @le{} (@verb{|dgettext("libextractor", s)|}). | 431 | The function @verb{|EXTRACTOR_metatype_to_string|} can be used to obtain a short English string @samp{s} describing the meta data type. The string can be translated into other languages using GNU gettext with the domain set to @gnule{} (@verb{|dgettext("libextractor", s)|}). |
425 | @end deftypefun | 432 | @end deftypefun |
426 | 433 | ||
427 | @deftypefun {const char *} EXTRACTOR_metatype_to_description (enum EXTRACTOR_MetaType type) | 434 | @deftypefun {const char *} EXTRACTOR_metatype_to_description (enum EXTRACTOR_MetaType type) |
@@ -429,7 +436,7 @@ The function @verb{|EXTRACTOR_metatype_to_string|} can be used to obtain a short | |||
429 | @cindex gettext | 436 | @cindex gettext |
430 | @cindex internationalization | 437 | @cindex internationalization |
431 | 438 | ||
432 | The function @verb{|EXTRACTOR_metatype_to_description|} can be used to obtain a longer English string @samp{s} describing the meta data type. The description may be empty if the short description returned by @code{EXTRACTOR_metatype_to_string} is already comprehensive. The string can be translated into other languages using GNU gettext with the domain set to @le{} (@verb{|dgettext("libextractor", s)|}). | 439 | The function @verb{|EXTRACTOR_metatype_to_description|} can be used to obtain a longer English string @samp{s} describing the meta data type. The description may be empty if the short description returned by @code{EXTRACTOR_metatype_to_string} is already comprehensive. The string can be translated into other languages using GNU gettext with the domain set to @gnule{} (@verb{|dgettext("libextractor", s)|}). |
433 | @end deftypefun | 440 | @end deftypefun |
434 | 441 | ||
435 | 442 | ||
@@ -490,11 +497,11 @@ Return 0 to continue extracting, 1 to abort. | |||
490 | @cindex threads | 497 | @cindex threads |
491 | @cindex thread-safety | 498 | @cindex thread-safety |
492 | 499 | ||
493 | This is the main function for extracting keywords with @le{}. The first argument is a plugin list which specifies the set of plugins that should be used for extracting meta data. The @samp{filename} argument is optional and can be used to specify the name of a file to process. If @samp{filename} is NULL, then the @samp{data} argument must point to the in-memory data to extract meta data from. If @samp{filename} is non-NULL, @samp{data} can be NULL. If @samp{data} is non-null, then @samp{size} is the size of @samp{data} in bytes. Otherwise @samp{size} should be zero. For each meta data item found, GNU libextractor will call the @samp{proc} function, passing @samp{proc_cls} as the first argument to @samp{proc}. The other arguments to @samp{proc} depend on the specific meta data found. | 500 | This is the main function for extracting keywords with @gnule{}. The first argument is a plugin list which specifies the set of plugins that should be used for extracting meta data. The @samp{filename} argument is optional and can be used to specify the name of a file to process. If @samp{filename} is NULL, then the @samp{data} argument must point to the in-memory data to extract meta data from. If @samp{filename} is non-NULL, @samp{data} can be NULL. If @samp{data} is non-null, then @samp{size} is the size of @samp{data} in bytes. Otherwise @samp{size} should be zero. For each meta data item found, GNU libextractor will call the @samp{proc} function, passing @samp{proc_cls} as the first argument to @samp{proc}. The other arguments to @samp{proc} depend on the specific meta data found. |
494 | 501 | ||
495 | @cindex SIGBUS | 502 | @cindex SIGBUS |
496 | @cindex bus error | 503 | @cindex bus error |
497 | Meta data extraction should never really fail --- at worst, @le{} should not call @samp{proc} with any meta data. By design, @le{} should never crash or leak memory, even given corrupt files as input. Note however, that running @le{} on a corrupt file system (or incorrectly @verb{|mmap|}ed files) can result in the operating system sending a SIGBUS (bus error) to the process. While @le{} runs plugins out-of-process, it first maps the file into memory and then attempts to decompress it. During decompression it is possible to encounter a SIGBUS. @le{} will @emph{not} attempt to catch this signal and your application is likely to crash. Note again that this should only happen if the file @emph{system} is corrupt (not if individual files are corrupt). If this is not acceptable, you might want to consider running @le{} itself also out-of-process (as done, for example, by @url{http://grothoff.org/christian/doodle/,doodle}). | 504 | Meta data extraction should never really fail --- at worst, @gnule{} should not call @samp{proc} with any meta data. By design, @gnule{} should never crash or leak memory, even given corrupt files as input. Note however, that running @gnule{} on a corrupt file system (or incorrectly @verb{|mmap|}ed files) can result in the operating system sending a SIGBUS (bus error) to the process. While @gnule{} runs plugins out-of-process, it first maps the file into memory and then attempts to decompress it. During decompression it is possible to encounter a SIGBUS. @gnule{} will @emph{not} attempt to catch this signal and your application is likely to crash. Note again that this should only happen if the file @emph{system} is corrupt (not if individual files are corrupt). If this is not acceptable, you might want to consider running @gnule{} itself also out-of-process (as done, for example, by @url{http://grothoff.org/christian/doodle/,doodle}). |
498 | 505 | ||
499 | @end deftypefun | 506 | @end deftypefun |
500 | 507 | ||
@@ -509,7 +516,7 @@ Meta data extraction should never really fail --- at worst, @le{} should not cal | |||
509 | @cindex PHP | 516 | @cindex PHP |
510 | @cindex Ruby | 517 | @cindex Ruby |
511 | 518 | ||
512 | @le{} works immediately with C and C++ code. Bindings for Java, Mono, Ruby, Perl, PHP and Python are available for download from the main @le{} website. Documentation for these bindings (if available) is part of the downloads for the respective binding. In all cases, a full installation of the C library is required before the binding can be installed. | 519 | @gnule{} works immediately with C and C++ code. Bindings for Java, Mono, Ruby, Perl, PHP and Python are available for download from the main @gnule{} website. Documentation for these bindings (if available) is part of the downloads for the respective binding. In all cases, a full installation of the C library is required before the binding can be installed. |
513 | 520 | ||
514 | @section Java | 521 | @section Java |
515 | 522 | ||
@@ -571,7 +578,7 @@ This binding is undocumented at this point. | |||
571 | @cindex concurrency | 578 | @cindex concurrency |
572 | @cindex threads | 579 | @cindex threads |
573 | @cindex thread-safety | 580 | @cindex thread-safety |
574 | This chapter describes various utility functions for @le{} usage. All of the functions are reentrant. | 581 | This chapter describes various utility functions for @gnule{} usage. All of the functions are reentrant. |
575 | 582 | ||
576 | @menu | 583 | @menu |
577 | * Utility Constants:: | 584 | * Utility Constants:: |
@@ -724,6 +731,115 @@ in-process (making it easier to debug) and without any of the other | |||
724 | plugins. | 731 | plugins. |
725 | 732 | ||
726 | 733 | ||
734 | @section Example for a minimal extract method | ||
735 | |||
736 | The following example shows how a plugin can return the mime type of | ||
737 | a file. | ||
738 | @example | ||
739 | |||
740 | int | ||
741 | EXTRACTOR_mymime_extract | ||
742 | (const char *data, | ||
743 | size_t data_size, | ||
744 | EXTRACTOR_MetaDataProcessor proc, | ||
745 | void *proc_cls, | ||
746 | const char * options) | ||
747 | { | ||
748 | if (data_size < 4) | ||
749 | return 0; | ||
750 | if (0 != memcmp (data, "\177ELF", 4)) | ||
751 | return 0; | ||
752 | if (0 != proc (proc_cls, | ||
753 | "mymime", | ||
754 | EXTRACTOR_METATYPE_MIMETYPE, | ||
755 | EXTRACTOR_METAFORMAT_UTF8, | ||
756 | "text/plain", | ||
757 | "application/x-executable", | ||
758 | 1 + strlen("application/x-executable"))) | ||
759 | return 1; | ||
760 | /* more calls to 'proc' here as needed */ | ||
761 | return 0; | ||
762 | } | ||
763 | |||
764 | @end example | ||
765 | |||
766 | @section Plugin execution options | ||
767 | |||
768 | Plugins can request that their execution be done in a particular way. | ||
769 | For this, the plugin defines a function with the following signature: | ||
770 | |||
771 | @verbatim | ||
772 | const char * | ||
773 | EXTRACTOR_XXX_options (void); | ||
774 | @end verbatim | ||
775 | |||
776 | The function should return a string with the execution options. | ||
777 | Individual options in this string should be separated by semicolons. | ||
778 | Options that are included in the string but not known to the library | ||
779 | are ignored. The following options are supported: | ||
780 | |||
781 | @itemize @bullet | ||
782 | @item | ||
783 | @code{oop-only} ensures that the plugin is only run out-of-process; if | ||
784 | this is not possible, the plugin will not be executed at all if this | ||
785 | option is set. | ||
786 | |||
787 | @item | ||
788 | @code{close-stderr} ensures that @code{stderr} is closed during the | ||
789 | execution of the plugin. This is useful if the plugin uses libraries | ||
790 | that write (error) messages to @code{stderr} and where this behavior cannot be | ||
791 | turned off. This option only works if the plugin is executed out-of-process. | ||
792 | |||
793 | @item | ||
794 | @code{close-stdout} ensures that @code{stdout} is closed during the | ||
795 | execution of the plugin. This is useful if the plugin uses libraries | ||
796 | that write messages to @code{stdout} and where this behavior cannot be | ||
797 | turned off. This option only works if the plugin is executed out-of-process. | ||
798 | |||
799 | @item | ||
800 | @code{force-kill} kills and restarts the plugin process for each | ||
801 | file that is being analyzed. This is useful if the plugin uses | ||
802 | libraries that keep global state between runs that is problematic or | ||
803 | if the plugin uses libraries that are known to have serious resource | ||
804 | leaks (such as memory leaks). | ||
805 | |||
806 | @item | ||
807 | @code{want-tail} | ||
808 | In order to limit memory consumption, limit the amount if reading from | ||
809 | disk and to keep the API simple, the @samp{data} argument passed to | ||
810 | the @code{EXTRACTOR_XXX_extract} method bounded (to 32 MB of normal | ||
811 | data; for compressed data, a limit of 16 MB is imposed).@footnote{If | ||
812 | @gnule{} was given a pointer to an existing, uncompressed block of | ||
813 | data in memory, no bound is imposed for plugins executing in-process; | ||
814 | for out-of-process plugins, a 32 MB limit is still imposed.} Since | ||
815 | some file formats contain meta data at the end of the file, this option | ||
816 | provides a way for plugins to access not the first 16--32 MB of a file | ||
817 | but instead the last (roughly) 32 MB. | ||
818 | |||
819 | Note that even for files larger than 32 MB, @samp{size} is not | ||
820 | guaranteed to be 32 MB since @samp{data} will be aligned to the page | ||
821 | size of the operating system. However, the last byte of @samp{data} | ||
822 | is guaranteed to be the last byte of the file. Furthermore, if the | ||
823 | file was large and compressed, unlike in the case of meta data | ||
824 | extraction from the header, the end of the file will not be | ||
825 | automatically decompressed by @gnule{}. | ||
826 | |||
827 | @end itemize | ||
828 | |||
829 | Note that using options other than @code{want-tail} is pretty much | ||
830 | always a kludge and should thus be avoided. | ||
831 | |||
832 | @section Example for an options method | ||
833 | |||
834 | The following example shows how a plugin can set some of the options listed above: | ||
835 | @example | ||
836 | const char * | ||
837 | EXTRACTOR_id3_options () | ||
838 | { | ||
839 | return "close-stderr;want-tail"; | ||
840 | } | ||
841 | @end example | ||
842 | |||
727 | @node Internal utility functions | 843 | @node Internal utility functions |
728 | @chapter Internal utility functions | 844 | @chapter Internal utility functions |
729 | 845 | ||
@@ -752,12 +868,12 @@ below. | |||
752 | @cindex UTF-8 | 868 | @cindex UTF-8 |
753 | @cindex character set | 869 | @cindex character set |
754 | @findex EXTRACTOR_common_convert_to_utf8 | 870 | @findex EXTRACTOR_common_convert_to_utf8 |
755 | Various @le{} plugins make use of the internal | 871 | Various @gnule{} plugins make use of the internal |
756 | @file{convert.h} header which defines a function | 872 | @file{convert.h} header which defines a function |
757 | 873 | ||
758 | @verb{|EXTRACTOR_common_convert_to_utf8|} which can be used to easily convert text from | 874 | @verb{|EXTRACTOR_common_convert_to_utf8|} which can be used to easily convert text from |
759 | any character set to UTF-8. This conversion is important since the | 875 | any character set to UTF-8. This conversion is important since the |
760 | linked list of keywords that is returned by @le{} is | 876 | linked list of keywords that is returned by @gnule{} is |
761 | expected to contain only UTF-8 strings. Naturally, proper conversion | 877 | expected to contain only UTF-8 strings. Naturally, proper conversion |
762 | may not always be possible since some file formats fail to specify the | 878 | may not always be possible since some file formats fail to specify the |
763 | character set. In that case, it is often better to not convert at | 879 | character set. In that case, it is often better to not convert at |
@@ -781,9 +897,9 @@ caller, so storing the string in the keyword list is acceptable. | |||
781 | @chapter Reporting bugs | 897 | @chapter Reporting bugs |
782 | 898 | ||
783 | @cindex bug | 899 | @cindex bug |
784 | @le{} uses the @url{http://gnunet.org/bugs/,Mantis bugtracking | 900 | @gnule{} uses the @url{http://gnunet.org/bugs/,Mantis bugtracking |
785 | system}. If possible, please report bugs there. You can also e-mail | 901 | system}. If possible, please report bugs there. You can also e-mail |
786 | the @le{} mailinglist at @url{libextractor@@gnu.org}. | 902 | the @gnule{} mailinglist at @url{libextractor@@gnu.org}. |
787 | 903 | ||
788 | 904 | ||
789 | 905 | ||
diff --git a/doc/version.texi b/doc/version.texi index 0715790..6358b99 100644 --- a/doc/version.texi +++ b/doc/version.texi | |||
@@ -1,4 +1,4 @@ | |||
1 | @set UPDATED 1 January 2010 | 1 | @set UPDATED 13 January 2010 |
2 | @set UPDATED-MONTH January 2010 | 2 | @set UPDATED-MONTH January 2010 |
3 | @set EDITION 0.6.0 | 3 | @set EDITION 0.6.0 |
4 | @set VERSION 0.6.0 | 4 | @set VERSION 0.6.0 |
diff --git a/src/main/extractor.c b/src/main/extractor.c index b29676b..09d402b 100644 --- a/src/main/extractor.c +++ b/src/main/extractor.c | |||
@@ -630,6 +630,7 @@ EXTRACTOR_plugin_add_defaults(enum EXTRACTOR_Options flags) | |||
630 | */ | 630 | */ |
631 | static void * | 631 | static void * |
632 | get_symbol_with_prefix(void *lib_handle, | 632 | get_symbol_with_prefix(void *lib_handle, |
633 | const char *template, | ||
633 | const char *prefix, | 634 | const char *prefix, |
634 | const char **options) | 635 | const char **options) |
635 | { | 636 | { |
@@ -649,9 +650,9 @@ get_symbol_with_prefix(void *lib_handle, | |||
649 | dot = strstr (sym, "."); | 650 | dot = strstr (sym, "."); |
650 | if (dot != NULL) | 651 | if (dot != NULL) |
651 | *dot = '\0'; | 652 | *dot = '\0'; |
652 | name = malloc(strlen(sym) + 32); | 653 | name = malloc(strlen(sym) + strlen(template) + 1); |
653 | sprintf(name, | 654 | sprintf(name, |
654 | "_EXTRACTOR_%s_extract", | 655 | template, |
655 | sym); | 656 | sym); |
656 | /* try without '_' first */ | 657 | /* try without '_' first */ |
657 | symbol = lt_dlsym(lib_handle, name + 1); | 658 | symbol = lt_dlsym(lib_handle, name + 1); |
@@ -678,7 +679,8 @@ get_symbol_with_prefix(void *lib_handle, | |||
678 | #endif | 679 | #endif |
679 | } | 680 | } |
680 | 681 | ||
681 | if (symbol != NULL) | 682 | if ( (symbol != NULL) && |
683 | (NULL != options) ) | ||
682 | { | 684 | { |
683 | /* get special options */ | 685 | /* get special options */ |
684 | sprintf(name, | 686 | sprintf(name, |
@@ -741,6 +743,7 @@ plugin_load (struct EXTRACTOR_PluginList *plugin) | |||
741 | return -1; | 743 | return -1; |
742 | } | 744 | } |
743 | plugin->extractMethod = get_symbol_with_prefix (plugin->libraryHandle, | 745 | plugin->extractMethod = get_symbol_with_prefix (plugin->libraryHandle, |
746 | "_EXTRACTOR_%s_extract", | ||
744 | plugin->libname, | 747 | plugin->libname, |
745 | &plugin->specials); | 748 | &plugin->specials); |
746 | if (plugin->extractMethod == NULL) | 749 | if (plugin->extractMethod == NULL) |
@@ -1094,10 +1097,9 @@ transmit_reply (void *cls, | |||
1094 | 1097 | ||
1095 | 1098 | ||
1096 | /** | 1099 | /** |
1097 | * 'main' function of the child process. | 1100 | * 'main' function of the child process. Reads shm-filenames from |
1098 | * Reads shm-filenames from 'in' (line-by-line) and | 1101 | * 'in' (line-by-line) and writes meta data blocks to 'out'. The meta |
1099 | * writes meta data blocks to 'out'. The meta data | 1102 | * data stream is terminated by an empty entry. |
1100 | * stream is terminated by an empty entry. | ||
1101 | * | 1103 | * |
1102 | * @param plugin extractor plugin to use | 1104 | * @param plugin extractor plugin to use |
1103 | * @param in stream to read from | 1105 | * @param in stream to read from |
@@ -1108,12 +1110,15 @@ process_requests (struct EXTRACTOR_PluginList *plugin, | |||
1108 | int in, | 1110 | int in, |
1109 | int out) | 1111 | int out) |
1110 | { | 1112 | { |
1111 | char fn[256]; | 1113 | char hfn[256]; |
1114 | char tfn[256]; | ||
1115 | char *fn; | ||
1112 | FILE *fin; | 1116 | FILE *fin; |
1113 | void *ptr; | 1117 | void *ptr; |
1114 | int shmid; | 1118 | int shmid; |
1115 | struct IpcHeader hdr; | 1119 | struct IpcHeader hdr; |
1116 | size_t size; | 1120 | size_t size; |
1121 | int want_tail; | ||
1117 | #ifdef WINDOWS | 1122 | #ifdef WINDOWS |
1118 | HANDLE map; | 1123 | HANDLE map; |
1119 | #endif | 1124 | #endif |
@@ -1129,6 +1134,13 @@ process_requests (struct EXTRACTOR_PluginList *plugin, | |||
1129 | #endif | 1134 | #endif |
1130 | return; | 1135 | return; |
1131 | } | 1136 | } |
1137 | want_tail = 0; | ||
1138 | if ( (plugin->specials != NULL) && | ||
1139 | (NULL != strstr (plugin->specials, | ||
1140 | "want-tail")) ) | ||
1141 | { | ||
1142 | want_tail = 1; | ||
1143 | } | ||
1132 | if ( (plugin->specials != NULL) && | 1144 | if ( (plugin->specials != NULL) && |
1133 | (NULL != strstr (plugin->specials, | 1145 | (NULL != strstr (plugin->specials, |
1134 | "close-stderr")) ) | 1146 | "close-stderr")) ) |
@@ -1144,12 +1156,27 @@ process_requests (struct EXTRACTOR_PluginList *plugin, | |||
1144 | 1156 | ||
1145 | memset (&hdr, 0, sizeof (hdr)); | 1157 | memset (&hdr, 0, sizeof (hdr)); |
1146 | fin = fdopen (in, "r"); | 1158 | fin = fdopen (in, "r"); |
1147 | while (NULL != fgets (fn, sizeof(fn), fin)) | 1159 | while (NULL != fgets (hfn, sizeof(hfn), fin)) |
1148 | { | 1160 | { |
1149 | if (strlen (fn) == 0) | 1161 | if (strlen (hfn) <= 1) |
1150 | break; | 1162 | break; |
1151 | ptr = NULL; | 1163 | ptr = NULL; |
1152 | fn[strlen(fn)-1] = '\0'; /* kill newline */ | 1164 | hfn[strlen(hfn)-1] = '\0'; /* kill newline */ |
1165 | if (NULL == fgets (tfn, sizeof(tfn), fin)) | ||
1166 | break; | ||
1167 | if ('!' != tfn[0]) | ||
1168 | break; | ||
1169 | tfn[strlen(tfn)-1] = '\0'; /* kill newline */ | ||
1170 | if ( (want_tail) && | ||
1171 | (strlen (tfn) > 1) ) | ||
1172 | { | ||
1173 | fn = &tfn[1]; | ||
1174 | } | ||
1175 | else | ||
1176 | { | ||
1177 | fn = hfn; | ||
1178 | } | ||
1179 | |||
1153 | #ifndef WINDOWS | 1180 | #ifndef WINDOWS |
1154 | if ( (-1 != (shmid = shm_open (fn, O_RDONLY, 0))) && | 1181 | if ( (-1 != (shmid = shm_open (fn, O_RDONLY, 0))) && |
1155 | (((off_t)-1) != (size = lseek (shmid, 0, SEEK_END))) && | 1182 | (((off_t)-1) != (size = lseek (shmid, 0, SEEK_END))) && |
@@ -1161,12 +1188,13 @@ process_requests (struct EXTRACTOR_PluginList *plugin, | |||
1161 | if (ptr != NULL) | 1188 | if (ptr != NULL) |
1162 | #endif | 1189 | #endif |
1163 | { | 1190 | { |
1164 | if (0 != plugin->extractMethod (ptr, | 1191 | if ( (plugin->extractMethod != NULL) && |
1165 | size, | 1192 | (0 != plugin->extractMethod (ptr, |
1166 | &transmit_reply, | 1193 | size, |
1167 | &out, | 1194 | &transmit_reply, |
1168 | plugin->plugin_options)) | 1195 | &out, |
1169 | break; | 1196 | plugin->plugin_options)) ) |
1197 | break; | ||
1170 | if (0 != write_all (out, &hdr, sizeof(hdr))) | 1198 | if (0 != write_all (out, &hdr, sizeof(hdr))) |
1171 | break; | 1199 | break; |
1172 | } | 1200 | } |
@@ -1195,8 +1223,10 @@ process_requests (struct EXTRACTOR_PluginList *plugin, | |||
1195 | close (out); | 1223 | close (out); |
1196 | } | 1224 | } |
1197 | 1225 | ||
1226 | |||
1198 | #ifdef WINDOWS | 1227 | #ifdef WINDOWS |
1199 | static void write_plugin_data (HANDLE h, const struct EXTRACTOR_PluginList *plugin) | 1228 | static void |
1229 | write_plugin_data (HANDLE h, const struct EXTRACTOR_PluginList *plugin) | ||
1200 | { | 1230 | { |
1201 | size_t i; | 1231 | size_t i; |
1202 | DWORD len; | 1232 | DWORD len; |
@@ -1217,7 +1247,9 @@ static void write_plugin_data (HANDLE h, const struct EXTRACTOR_PluginList *plug | |||
1217 | WriteFile (h, plugin->plugin_options, i, &len, NULL); | 1247 | WriteFile (h, plugin->plugin_options, i, &len, NULL); |
1218 | } | 1248 | } |
1219 | 1249 | ||
1220 | static struct EXTRACTOR_PluginList *read_plugin_data (FILE *f) | 1250 | |
1251 | static struct EXTRACTOR_PluginList * | ||
1252 | read_plugin_data (FILE *f) | ||
1221 | { | 1253 | { |
1222 | struct EXTRACTOR_PluginList *ret; | 1254 | struct EXTRACTOR_PluginList *ret; |
1223 | size_t i; | 1255 | size_t i; |
@@ -1239,7 +1271,9 @@ static struct EXTRACTOR_PluginList *read_plugin_data (FILE *f) | |||
1239 | return ret; | 1271 | return ret; |
1240 | } | 1272 | } |
1241 | 1273 | ||
1242 | void CALLBACK RundllEntryPoint(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow) | 1274 | |
1275 | void CALLBACK | ||
1276 | RundllEntryPoint(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, int nCmdShow) | ||
1243 | { | 1277 | { |
1244 | int in, out; | 1278 | int in, out; |
1245 | 1279 | ||
@@ -1253,6 +1287,7 @@ void CALLBACK RundllEntryPoint(HWND hwnd, HINSTANCE hinst, LPSTR lpszCmdLine, in | |||
1253 | } | 1287 | } |
1254 | #endif | 1288 | #endif |
1255 | 1289 | ||
1290 | |||
1256 | /** | 1291 | /** |
1257 | * Start the process for the given plugin. | 1292 | * Start the process for the given plugin. |
1258 | */ | 1293 | */ |
@@ -1331,6 +1366,7 @@ start_process (struct EXTRACTOR_PluginList *plugin) | |||
1331 | * | 1366 | * |
1332 | * @param plugin which plugin to call | 1367 | * @param plugin which plugin to call |
1333 | * @param shmfn file name of the shared memory segment | 1368 | * @param shmfn file name of the shared memory segment |
1369 | * @param tshmfn file name of the shared memory segment for the end of the data | ||
1334 | * @param proc function to call on the meta data | 1370 | * @param proc function to call on the meta data |
1335 | * @param proc_cls cls for proc | 1371 | * @param proc_cls cls for proc |
1336 | * @return 0 if proc did not return non-zero | 1372 | * @return 0 if proc did not return non-zero |
@@ -1338,6 +1374,7 @@ start_process (struct EXTRACTOR_PluginList *plugin) | |||
1338 | static int | 1374 | static int |
1339 | extract_oop (struct EXTRACTOR_PluginList *plugin, | 1375 | extract_oop (struct EXTRACTOR_PluginList *plugin, |
1340 | const char *shmfn, | 1376 | const char *shmfn, |
1377 | const char *tshmfn, | ||
1341 | EXTRACTOR_MetaDataProcessor proc, | 1378 | EXTRACTOR_MetaDataProcessor proc, |
1342 | void *proc_cls) | 1379 | void *proc_cls) |
1343 | { | 1380 | { |
@@ -1347,7 +1384,19 @@ extract_oop (struct EXTRACTOR_PluginList *plugin, | |||
1347 | 1384 | ||
1348 | if (plugin->cpid == -1) | 1385 | if (plugin->cpid == -1) |
1349 | return 0; | 1386 | return 0; |
1350 | if (0 >= fprintf (plugin->cpipe_in, "%s\n", shmfn)) | 1387 | if (0 >= fprintf (plugin->cpipe_in, |
1388 | "%s\n", | ||
1389 | shmfn)) | ||
1390 | { | ||
1391 | stop_process (plugin); | ||
1392 | plugin->cpid = -1; | ||
1393 | if (plugin->flags != EXTRACTOR_OPTION_DEFAULT_POLICY) | ||
1394 | plugin->flags = EXTRACTOR_OPTION_DISABLED; | ||
1395 | return 0; | ||
1396 | } | ||
1397 | if (0 >= fprintf (plugin->cpipe_in, | ||
1398 | "!%s\n", | ||
1399 | (tshmfn != NULL) ? tshmfn : "")) | ||
1351 | { | 1400 | { |
1352 | stop_process (plugin); | 1401 | stop_process (plugin); |
1353 | plugin->cpid = -1; | 1402 | plugin->cpid = -1; |
@@ -1420,33 +1469,108 @@ extract_oop (struct EXTRACTOR_PluginList *plugin, | |||
1420 | 1469 | ||
1421 | 1470 | ||
1422 | /** | 1471 | /** |
1423 | * Extract keywords from a file using the given set of plugins. | 1472 | * Setup a shared memory segment. |
1473 | * | ||
1474 | * @param ptr set to the location of the shm segment | ||
1475 | * @param shmid where to store the shm ID | ||
1476 | * @param fn name of the shared segment | ||
1477 | * @param fn_size size available in fn | ||
1478 | * @param size number of bytes to allocated for the segment | ||
1479 | * @return 0 on success | ||
1480 | */ | ||
1481 | static int | ||
1482 | make_shm (int is_tail, | ||
1483 | void **ptr, | ||
1484 | #ifndef WINDOWS | ||
1485 | int *shmid, | ||
1486 | #else | ||
1487 | HANDLE *mappedFile, | ||
1488 | HANDLE *map, | ||
1489 | #endif | ||
1490 | char *fn, | ||
1491 | size_t fn_size, | ||
1492 | size_t size) | ||
1493 | { | ||
1494 | snprintf (fn, | ||
1495 | fn_size, | ||
1496 | #ifdef WINDOWS | ||
1497 | "%TEMP%\\" | ||
1498 | #else | ||
1499 | "/" | ||
1500 | #endif | ||
1501 | "libextractor-%sshm-%u-%u", | ||
1502 | (is_tail) ? "t" : "", | ||
1503 | getpid(), | ||
1504 | (unsigned int) RANDOM()); | ||
1505 | #ifndef WINDOWS | ||
1506 | *shmid = shm_open (fn, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); | ||
1507 | *ptr = NULL; | ||
1508 | if (-1 == (*shmid)) | ||
1509 | return 1; | ||
1510 | if ( (0 != ftruncate (*shmid, size)) || | ||
1511 | (NULL == (*ptr = mmap (NULL, size, PROT_WRITE, MAP_SHARED, *shmid, 0))) || | ||
1512 | (*ptr == (void*) -1) ) | ||
1513 | { | ||
1514 | close (*shmid); | ||
1515 | *shmid = -1; | ||
1516 | return 1; | ||
1517 | } | ||
1518 | return 0; | ||
1519 | #else | ||
1520 | *mappedFile = CreateFile (fn, | ||
1521 | GENERIC_READ | GENERIC_WRITE, | ||
1522 | FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, CREATE_ALWAYS, | ||
1523 | FILE_FLAG_DELETE_ON_CLOSE, NULL); | ||
1524 | *map = CreateFileMapping (*mappedFile, NULL, PAGE_READWRITE, 1, 0, NULL); | ||
1525 | ptr = MapViewOfFile (*map, FILE_MAP_READ, 0, 0, 0); | ||
1526 | if (ptr == NULL) | ||
1527 | { | ||
1528 | CloseHandle (*map); | ||
1529 | CloseHandle (*mappedFile); | ||
1530 | return 1; | ||
1531 | } | ||
1532 | #endif | ||
1533 | return 0; | ||
1534 | } | ||
1535 | |||
1536 | |||
1537 | /** | ||
1538 | * Extract keywords using the given set of plugins. | ||
1424 | * | 1539 | * |
1425 | * @param plugins the list of plugins to use | 1540 | * @param plugins the list of plugins to use |
1426 | * @param filename the name of the file, can be NULL | ||
1427 | * @param data data to process, never NULL | 1541 | * @param data data to process, never NULL |
1428 | * @param size number of bytes in data, ignored if data is NULL | 1542 | * @param size number of bytes in data, ignored if data is NULL |
1543 | * @param tdata end of file data, or NULL | ||
1544 | * @param tsize number of bytes in tdata | ||
1429 | * @param proc function to call for each meta data item found | 1545 | * @param proc function to call for each meta data item found |
1430 | * @param proc_cls cls argument to proc | 1546 | * @param proc_cls cls argument to proc |
1431 | */ | 1547 | */ |
1432 | static void | 1548 | static void |
1433 | extract (struct EXTRACTOR_PluginList *plugins, | 1549 | extract (struct EXTRACTOR_PluginList *plugins, |
1434 | const char * filename, | ||
1435 | const char * data, | 1550 | const char * data, |
1436 | size_t size, | 1551 | size_t size, |
1552 | const char * tdata, | ||
1553 | size_t tsize, | ||
1437 | EXTRACTOR_MetaDataProcessor proc, | 1554 | EXTRACTOR_MetaDataProcessor proc, |
1438 | void *proc_cls) | 1555 | void *proc_cls) |
1439 | { | 1556 | { |
1440 | struct EXTRACTOR_PluginList *ppos; | 1557 | struct EXTRACTOR_PluginList *ppos; |
1441 | #ifndef WINDOWS | ||
1442 | int shmid; | ||
1443 | #else | ||
1444 | HANDLE map, mappedFile; | ||
1445 | #endif | ||
1446 | enum EXTRACTOR_Options flags; | 1558 | enum EXTRACTOR_Options flags; |
1447 | void *ptr; | 1559 | void *ptr; |
1560 | void *tptr; | ||
1448 | char fn[255]; | 1561 | char fn[255]; |
1562 | char tfn[255]; | ||
1449 | int want_shm; | 1563 | int want_shm; |
1564 | int want_tail; | ||
1565 | #ifndef WINDOWS | ||
1566 | int shmid; | ||
1567 | int tshmid; | ||
1568 | #else | ||
1569 | HANDLE map; | ||
1570 | HANDLE mappedFile; | ||
1571 | HANDLE tmap; | ||
1572 | HANDLE tmappedFile; | ||
1573 | #endif | ||
1450 | 1574 | ||
1451 | want_shm = 0; | 1575 | want_shm = 0; |
1452 | ppos = plugins; | 1576 | ppos = plugins; |
@@ -1472,100 +1596,106 @@ extract (struct EXTRACTOR_PluginList *plugins, | |||
1472 | } | 1596 | } |
1473 | ppos = ppos->next; | 1597 | ppos = ppos->next; |
1474 | } | 1598 | } |
1599 | ptr = NULL; | ||
1600 | tptr = NULL; | ||
1475 | if (want_shm) | 1601 | if (want_shm) |
1476 | { | 1602 | { |
1477 | snprintf (fn, | 1603 | if (size > MAX_READ) |
1478 | sizeof(fn), | 1604 | size = MAX_READ; |
1479 | #ifdef WINDOWS | 1605 | if (0 == make_shm (0, |
1480 | "%TEMP%\\" | 1606 | &ptr, |
1607 | #ifndef WINDOWS | ||
1608 | &shmid, | ||
1481 | #else | 1609 | #else |
1482 | "/" | 1610 | &mappedFile, |
1611 | &map, | ||
1483 | #endif | 1612 | #endif |
1484 | "libextractor-shm-%u-%u", | 1613 | fn, sizeof(fn), size)) |
1485 | getpid(), | ||
1486 | (unsigned int) RANDOM()); | ||
1487 | #ifndef WINDOWS | ||
1488 | shmid = shm_open (fn, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); | ||
1489 | ptr = NULL; | ||
1490 | if (shmid != -1) | ||
1491 | { | 1614 | { |
1492 | if ( (0 != ftruncate (shmid, size)) || | 1615 | memcpy (ptr, data, size); |
1493 | (NULL == (ptr = mmap (NULL, size, PROT_WRITE, MAP_SHARED, shmid, 0))) || | 1616 | if ( (tdata != NULL) && |
1494 | (ptr == (void*) -1) ) | 1617 | (0 == make_shm (1, |
1618 | &tptr, | ||
1619 | #ifndef WINDOWS | ||
1620 | &tshmid, | ||
1621 | #else | ||
1622 | &tmappedFile, | ||
1623 | &tmap, | ||
1624 | #endif | ||
1625 | tfn, sizeof(tfn), tsize)) ) | ||
1495 | { | 1626 | { |
1496 | close (shmid); | 1627 | memcpy (tptr, tdata, tsize); |
1497 | shmid = -1; | ||
1498 | } | 1628 | } |
1499 | else | 1629 | else |
1500 | { | 1630 | { |
1501 | memcpy (ptr, data, size); | 1631 | tptr = NULL; |
1502 | } | 1632 | } |
1503 | } | 1633 | } |
1504 | #else | ||
1505 | mappedFile = CreateFile (fn, GENERIC_READ | GENERIC_WRITE, | ||
1506 | FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, CREATE_ALWAYS, | ||
1507 | FILE_FLAG_DELETE_ON_CLOSE, NULL); | ||
1508 | map = CreateFileMapping (mappedFile, NULL, PAGE_READWRITE, 1, 0, NULL); | ||
1509 | ptr = MapViewOfFile (map, FILE_MAP_READ, 0, 0, 0); | ||
1510 | if (ptr == NULL) | ||
1511 | { | ||
1512 | CloseHandle (map); | ||
1513 | CloseHandle (mappedFile); | ||
1514 | map = NULL; | ||
1515 | } | ||
1516 | else | 1634 | else |
1517 | memcpy (ptr, data, size); | 1635 | { |
1518 | #endif | 1636 | want_shm = 0; |
1637 | } | ||
1519 | } | 1638 | } |
1520 | else | ||
1521 | #ifndef WINDOWS | ||
1522 | shmid = -1; | ||
1523 | if (want_shm && (shmid == -1)) | ||
1524 | _exit(1); | ||
1525 | #else | ||
1526 | map = NULL; | ||
1527 | if (want_shm && map == NULL) | ||
1528 | _exit(1); | ||
1529 | #endif | ||
1530 | ppos = plugins; | 1639 | ppos = plugins; |
1531 | while (NULL != ppos) | 1640 | while (NULL != ppos) |
1532 | { | 1641 | { |
1533 | flags = ppos->flags; | 1642 | flags = ppos->flags; |
1534 | #ifndef WINDOWS | 1643 | if (! want_shm) |
1535 | if (shmid == -1) | ||
1536 | #else | ||
1537 | if (map == NULL) | ||
1538 | #endif | ||
1539 | flags = EXTRACTOR_OPTION_IN_PROCESS; | 1644 | flags = EXTRACTOR_OPTION_IN_PROCESS; |
1540 | switch (flags) | 1645 | switch (flags) |
1541 | { | 1646 | { |
1542 | case EXTRACTOR_OPTION_DEFAULT_POLICY: | 1647 | case EXTRACTOR_OPTION_DEFAULT_POLICY: |
1543 | if (0 != extract_oop (ppos, fn, proc, proc_cls)) | 1648 | if (0 != extract_oop (ppos, fn, |
1649 | (tptr != NULL) ? tfn : NULL, | ||
1650 | proc, proc_cls)) | ||
1544 | return; | 1651 | return; |
1545 | if (ppos->cpid == -1) | 1652 | if (ppos->cpid == -1) |
1546 | { | 1653 | { |
1547 | start_process (ppos); | 1654 | start_process (ppos); |
1548 | if (0 != extract_oop (ppos, fn, proc, proc_cls)) | 1655 | if (0 != extract_oop (ppos, fn, |
1656 | (tptr != NULL) ? tfn : NULL, | ||
1657 | proc, proc_cls)) | ||
1549 | return; | 1658 | return; |
1550 | } | 1659 | } |
1551 | break; | 1660 | break; |
1552 | case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART: | 1661 | case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART: |
1553 | if (0 != extract_oop (ppos, fn, proc, proc_cls)) | 1662 | if (0 != extract_oop (ppos, fn, |
1663 | (tptr != NULL) ? tfn : NULL, | ||
1664 | proc, proc_cls)) | ||
1554 | return; | 1665 | return; |
1555 | break; | 1666 | break; |
1556 | case EXTRACTOR_OPTION_IN_PROCESS: | 1667 | case EXTRACTOR_OPTION_IN_PROCESS: |
1557 | if (NULL == ppos->extractMethod) | 1668 | want_tail = ( (ppos->specials != NULL) && |
1669 | (NULL != strstr (ppos->specials, | ||
1670 | "want-tail"))); | ||
1671 | if (NULL == ppos->extractMethod) | ||
1558 | plugin_load (ppos); | 1672 | plugin_load (ppos); |
1559 | if ( ( (ppos->specials == NULL) || | 1673 | if ( ( (ppos->specials == NULL) || |
1560 | (NULL == strstr (ppos->specials, | 1674 | (NULL == strstr (ppos->specials, |
1561 | "oop-only")) ) && | 1675 | "oop-only")) ) ) |
1562 | (NULL != ppos->extractMethod) && | 1676 | { |
1563 | (0 != ppos->extractMethod (data, | 1677 | if (want_tail) |
1564 | size, | 1678 | { |
1565 | proc, | 1679 | if ( (NULL != ppos->extractMethod) && |
1566 | proc_cls, | 1680 | (tdata != NULL) && |
1567 | ppos->plugin_options)) ) | 1681 | (0 != ppos->extractMethod (tdata, |
1568 | return; | 1682 | tsize, |
1683 | proc, | ||
1684 | proc_cls, | ||
1685 | ppos->plugin_options)) ) | ||
1686 | return; | ||
1687 | } | ||
1688 | else | ||
1689 | { | ||
1690 | if ( (NULL != ppos->extractMethod) && | ||
1691 | (0 != ppos->extractMethod (data, | ||
1692 | size, | ||
1693 | proc, | ||
1694 | proc_cls, | ||
1695 | ppos->plugin_options)) ) | ||
1696 | return; | ||
1697 | } | ||
1698 | } | ||
1569 | break; | 1699 | break; |
1570 | case EXTRACTOR_OPTION_DISABLED: | 1700 | case EXTRACTOR_OPTION_DISABLED: |
1571 | break; | 1701 | break; |
@@ -1580,10 +1710,21 @@ extract (struct EXTRACTOR_PluginList *plugins, | |||
1580 | if (shmid != -1) | 1710 | if (shmid != -1) |
1581 | close (shmid); | 1711 | close (shmid); |
1582 | shm_unlink (fn); | 1712 | shm_unlink (fn); |
1713 | if (NULL != tptr) | ||
1714 | munmap (tptr, tsize); | ||
1715 | if (tshmid != -1) | ||
1716 | close (tshmid); | ||
1717 | shm_unlink (tfn); | ||
1583 | #else | 1718 | #else |
1584 | UnmapViewOfFile (ptr); | 1719 | UnmapViewOfFile (ptr); |
1585 | CloseHandle (map); | 1720 | CloseHandle (map); |
1586 | CloseHandle (mappedFile); | 1721 | CloseHandle (mappedFile); |
1722 | if (tptr != NULL) | ||
1723 | { | ||
1724 | UnmapViewOfFile (tptr); | ||
1725 | CloseHandle (tmap); | ||
1726 | CloseHandle (tmappedFile); | ||
1727 | } | ||
1587 | #endif | 1728 | #endif |
1588 | } | 1729 | } |
1589 | } | 1730 | } |
@@ -1595,17 +1736,19 @@ extract (struct EXTRACTOR_PluginList *plugins, | |||
1595 | * contents if they were not compressed). | 1736 | * contents if they were not compressed). |
1596 | * | 1737 | * |
1597 | * @param plugins the list of plugins to use | 1738 | * @param plugins the list of plugins to use |
1598 | * @param filename the name of the file, can be NULL | ||
1599 | * @param data data to process, never NULL | 1739 | * @param data data to process, never NULL |
1600 | * @param size number of bytes in data, ignored if data is NULL | 1740 | * @param size number of bytes in data |
1741 | * @param tdata end of file data, or NULL | ||
1742 | * @param tsize number of bytes in tdata | ||
1601 | * @param proc function to call for each meta data item found | 1743 | * @param proc function to call for each meta data item found |
1602 | * @param proc_cls cls argument to proc | 1744 | * @param proc_cls cls argument to proc |
1603 | */ | 1745 | */ |
1604 | static void | 1746 | static void |
1605 | decompress_and_extract (struct EXTRACTOR_PluginList *plugins, | 1747 | decompress_and_extract (struct EXTRACTOR_PluginList *plugins, |
1606 | const char * filename, | ||
1607 | const unsigned char * data, | 1748 | const unsigned char * data, |
1608 | size_t size, | 1749 | size_t size, |
1750 | const char * tdata, | ||
1751 | size_t tsize, | ||
1609 | EXTRACTOR_MetaDataProcessor proc, | 1752 | EXTRACTOR_MetaDataProcessor proc, |
1610 | void *proc_cls) { | 1753 | void *proc_cls) { |
1611 | unsigned char * buf; | 1754 | unsigned char * buf; |
@@ -1838,9 +1981,10 @@ decompress_and_extract (struct EXTRACTOR_PluginList *plugins, | |||
1838 | size = dsize; | 1981 | size = dsize; |
1839 | } | 1982 | } |
1840 | extract (plugins, | 1983 | extract (plugins, |
1841 | filename, | ||
1842 | (const char*) data, | 1984 | (const char*) data, |
1843 | size, | 1985 | size, |
1986 | tdata, | ||
1987 | tsize, | ||
1844 | proc, | 1988 | proc, |
1845 | proc_cls); | 1989 | proc_cls); |
1846 | if (buf != NULL) | 1990 | if (buf != NULL) |
@@ -1908,9 +2052,13 @@ EXTRACTOR_extract (struct EXTRACTOR_PluginList *plugins, | |||
1908 | { | 2052 | { |
1909 | int fd; | 2053 | int fd; |
1910 | void * buffer; | 2054 | void * buffer; |
2055 | void * tbuffer; | ||
1911 | struct stat fstatbuf; | 2056 | struct stat fstatbuf; |
1912 | size_t fsize; | 2057 | size_t fsize; |
2058 | size_t tsize; | ||
1913 | int eno; | 2059 | int eno; |
2060 | off_t offset; | ||
2061 | long pg; | ||
1914 | 2062 | ||
1915 | fd = -1; | 2063 | fd = -1; |
1916 | buffer = NULL; | 2064 | buffer = NULL; |
@@ -1941,14 +2089,41 @@ EXTRACTOR_extract (struct EXTRACTOR_PluginList *plugins, | |||
1941 | if ( (buffer == NULL) && | 2089 | if ( (buffer == NULL) && |
1942 | (data == NULL) ) | 2090 | (data == NULL) ) |
1943 | return; | 2091 | return; |
2092 | /* for footer extraction */ | ||
2093 | tsize = 0; | ||
2094 | tbuffer = NULL; | ||
2095 | if ( (data == NULL) && | ||
2096 | (fstatbuf.st_size > fsize) && | ||
2097 | (fstatbuf.st_size > MAX_READ) ) | ||
2098 | { | ||
2099 | pg = sysconf (_SC_PAGE_SIZE); | ||
2100 | if ( (pg > 0) && | ||
2101 | (pg < MAX_READ) ) | ||
2102 | { | ||
2103 | offset = (1 + (fstatbuf.st_size - MAX_READ) / pg) * pg; | ||
2104 | if (offset < fstatbuf.st_size) | ||
2105 | { | ||
2106 | tsize = fstatbuf.st_size - offset; | ||
2107 | tbuffer = MMAP (NULL, tsize, PROT_READ, MAP_PRIVATE, fd, offset); | ||
2108 | if ( (tbuffer == NULL) || (tbuffer == (void *) -1) ) | ||
2109 | { | ||
2110 | tsize = 0; | ||
2111 | tbuffer = NULL; | ||
2112 | } | ||
2113 | } | ||
2114 | } | ||
2115 | } | ||
1944 | decompress_and_extract (plugins, | 2116 | decompress_and_extract (plugins, |
1945 | filename, | ||
1946 | buffer != NULL ? buffer : data, | 2117 | buffer != NULL ? buffer : data, |
1947 | buffer != NULL ? fsize : size, | 2118 | buffer != NULL ? fsize : size, |
2119 | tbuffer, | ||
2120 | tsize, | ||
1948 | proc, | 2121 | proc, |
1949 | proc_cls); | 2122 | proc_cls); |
1950 | if (buffer != NULL) | 2123 | if (buffer != NULL) |
1951 | MUNMAP (buffer, fsize); | 2124 | MUNMAP (buffer, fsize); |
2125 | if (tbuffer != NULL) | ||
2126 | MUNMAP (tbuffer, tsize); | ||
1952 | if (-1 != fd) | 2127 | if (-1 != fd) |
1953 | close(fd); | 2128 | close(fd); |
1954 | } | 2129 | } |
diff --git a/src/plugins/Makefile.am b/src/plugins/Makefile.am index 0868ebd..07ecb63 100644 --- a/src/plugins/Makefile.am +++ b/src/plugins/Makefile.am | |||
@@ -86,6 +86,7 @@ plugin_LTLIBRARIES = \ | |||
86 | libextractor_flv.la \ | 86 | libextractor_flv.la \ |
87 | libextractor_gif.la \ | 87 | libextractor_gif.la \ |
88 | libextractor_html.la \ | 88 | libextractor_html.la \ |
89 | libextractor_id3.la \ | ||
89 | libextractor_id3v2.la \ | 90 | libextractor_id3v2.la \ |
90 | libextractor_id3v23.la \ | 91 | libextractor_id3v23.la \ |
91 | libextractor_id3v24.la \ | 92 | libextractor_id3v24.la \ |
@@ -186,6 +187,13 @@ libextractor_html_la_LDFLAGS = \ | |||
186 | libextractor_html_la_LIBADD = \ | 187 | libextractor_html_la_LIBADD = \ |
187 | $(top_builddir)/src/common/libextractor_common.la | 188 | $(top_builddir)/src/common/libextractor_common.la |
188 | 189 | ||
190 | libextractor_id3_la_SOURCES = \ | ||
191 | id3_extractor.c | ||
192 | libextractor_id3_la_LDFLAGS = \ | ||
193 | $(PLUGINFLAGS) | ||
194 | libextractor_id3_la_LIBADD = \ | ||
195 | $(top_builddir)/src/common/libextractor_common.la | ||
196 | |||
189 | libextractor_id3v2_la_SOURCES = \ | 197 | libextractor_id3v2_la_SOURCES = \ |
190 | id3v2_extractor.c | 198 | id3v2_extractor.c |
191 | libextractor_id3v2_la_LDFLAGS = \ | 199 | libextractor_id3v2_la_LDFLAGS = \ |
diff --git a/src/plugins/id3_extractor.c b/src/plugins/id3_extractor.c new file mode 100644 index 0000000..be399e0 --- /dev/null +++ b/src/plugins/id3_extractor.c | |||
@@ -0,0 +1,305 @@ | |||
1 | /* | ||
2 | This file is part of libextractor. | ||
3 | (C) 2002, 2003, 2004, 2006, 2009, 2010 Vidyut Samanta and Christian Grothoff | ||
4 | |||
5 | libextractor is free software; you can redistribute it and/or modify | ||
6 | it under the terms of the GNU General Public License as published | ||
7 | by the Free Software Foundation; either version 2, or (at your | ||
8 | option) any later version. | ||
9 | |||
10 | libextractor is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU General Public License | ||
16 | along with libextractor; see the file COPYING. If not, write to the | ||
17 | Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
18 | Boston, MA 02111-1307, USA. | ||
19 | |||
20 | */ | ||
21 | |||
22 | #include "platform.h" | ||
23 | #include "extractor.h" | ||
24 | #include "convert.h" | ||
25 | #include <string.h> | ||
26 | #include <stdio.h> | ||
27 | #include <sys/types.h> | ||
28 | #include <sys/stat.h> | ||
29 | #include <unistd.h> | ||
30 | #include <stdlib.h> | ||
31 | |||
32 | typedef struct | ||
33 | { | ||
34 | char *title; | ||
35 | char *artist; | ||
36 | char *album; | ||
37 | char *year; | ||
38 | char *comment; | ||
39 | const char *genre; | ||
40 | unsigned int track_number; | ||
41 | } id3tag; | ||
42 | |||
43 | static const char *const genre_names[] = { | ||
44 | gettext_noop ("Blues"), | ||
45 | gettext_noop ("Classic Rock"), | ||
46 | gettext_noop ("Country"), | ||
47 | gettext_noop ("Dance"), | ||
48 | gettext_noop ("Disco"), | ||
49 | gettext_noop ("Funk"), | ||
50 | gettext_noop ("Grunge"), | ||
51 | gettext_noop ("Hip-Hop"), | ||
52 | gettext_noop ("Jazz"), | ||
53 | gettext_noop ("Metal"), | ||
54 | gettext_noop ("New Age"), | ||
55 | gettext_noop ("Oldies"), | ||
56 | gettext_noop ("Other"), | ||
57 | gettext_noop ("Pop"), | ||
58 | gettext_noop ("R&B"), | ||
59 | gettext_noop ("Rap"), | ||
60 | gettext_noop ("Reggae"), | ||
61 | gettext_noop ("Rock"), | ||
62 | gettext_noop ("Techno"), | ||
63 | gettext_noop ("Industrial"), | ||
64 | gettext_noop ("Alternative"), | ||
65 | gettext_noop ("Ska"), | ||
66 | gettext_noop ("Death Metal"), | ||
67 | gettext_noop ("Pranks"), | ||
68 | gettext_noop ("Soundtrack"), | ||
69 | gettext_noop ("Euro-Techno"), | ||
70 | gettext_noop ("Ambient"), | ||
71 | gettext_noop ("Trip-Hop"), | ||
72 | gettext_noop ("Vocal"), | ||
73 | gettext_noop ("Jazz+Funk"), | ||
74 | gettext_noop ("Fusion"), | ||
75 | gettext_noop ("Trance"), | ||
76 | gettext_noop ("Classical"), | ||
77 | gettext_noop ("Instrumental"), | ||
78 | gettext_noop ("Acid"), | ||
79 | gettext_noop ("House"), | ||
80 | gettext_noop ("Game"), | ||
81 | gettext_noop ("Sound Clip"), | ||
82 | gettext_noop ("Gospel"), | ||
83 | gettext_noop ("Noise"), | ||
84 | gettext_noop ("Alt. Rock"), | ||
85 | gettext_noop ("Bass"), | ||
86 | gettext_noop ("Soul"), | ||
87 | gettext_noop ("Punk"), | ||
88 | gettext_noop ("Space"), | ||
89 | gettext_noop ("Meditative"), | ||
90 | gettext_noop ("Instrumental Pop"), | ||
91 | gettext_noop ("Instrumental Rock"), | ||
92 | gettext_noop ("Ethnic"), | ||
93 | gettext_noop ("Gothic"), | ||
94 | gettext_noop ("Darkwave"), | ||
95 | gettext_noop ("Techno-Industrial"), | ||
96 | gettext_noop ("Electronic"), | ||
97 | gettext_noop ("Pop-Folk"), | ||
98 | gettext_noop ("Eurodance"), | ||
99 | gettext_noop ("Dream"), | ||
100 | gettext_noop ("Southern Rock"), | ||
101 | gettext_noop ("Comedy"), | ||
102 | gettext_noop ("Cult"), | ||
103 | gettext_noop ("Gangsta Rap"), | ||
104 | gettext_noop ("Top 40"), | ||
105 | gettext_noop ("Christian Rap"), | ||
106 | gettext_noop ("Pop/Funk"), | ||
107 | gettext_noop ("Jungle"), | ||
108 | gettext_noop ("Native American"), | ||
109 | gettext_noop ("Cabaret"), | ||
110 | gettext_noop ("New Wave"), | ||
111 | gettext_noop ("Psychedelic"), | ||
112 | gettext_noop ("Rave"), | ||
113 | gettext_noop ("Showtunes"), | ||
114 | gettext_noop ("Trailer"), | ||
115 | gettext_noop ("Lo-Fi"), | ||
116 | gettext_noop ("Tribal"), | ||
117 | gettext_noop ("Acid Punk"), | ||
118 | gettext_noop ("Acid Jazz"), | ||
119 | gettext_noop ("Polka"), | ||
120 | gettext_noop ("Retro"), | ||
121 | gettext_noop ("Musical"), | ||
122 | gettext_noop ("Rock & Roll"), | ||
123 | gettext_noop ("Hard Rock"), | ||
124 | gettext_noop ("Folk"), | ||
125 | gettext_noop ("Folk/Rock"), | ||
126 | gettext_noop ("National Folk"), | ||
127 | gettext_noop ("Swing"), | ||
128 | gettext_noop ("Fast-Fusion"), | ||
129 | gettext_noop ("Bebob"), | ||
130 | gettext_noop ("Latin"), | ||
131 | gettext_noop ("Revival"), | ||
132 | gettext_noop ("Celtic"), | ||
133 | gettext_noop ("Bluegrass"), | ||
134 | gettext_noop ("Avantgarde"), | ||
135 | gettext_noop ("Gothic Rock"), | ||
136 | gettext_noop ("Progressive Rock"), | ||
137 | gettext_noop ("Psychedelic Rock"), | ||
138 | gettext_noop ("Symphonic Rock"), | ||
139 | gettext_noop ("Slow Rock"), | ||
140 | gettext_noop ("Big Band"), | ||
141 | gettext_noop ("Chorus"), | ||
142 | gettext_noop ("Easy Listening"), | ||
143 | gettext_noop ("Acoustic"), | ||
144 | gettext_noop ("Humour"), | ||
145 | gettext_noop ("Speech"), | ||
146 | gettext_noop ("Chanson"), | ||
147 | gettext_noop ("Opera"), | ||
148 | gettext_noop ("Chamber Music"), | ||
149 | gettext_noop ("Sonata"), | ||
150 | gettext_noop ("Symphony"), | ||
151 | gettext_noop ("Booty Bass"), | ||
152 | gettext_noop ("Primus"), | ||
153 | gettext_noop ("Porn Groove"), | ||
154 | gettext_noop ("Satire"), | ||
155 | gettext_noop ("Slow Jam"), | ||
156 | gettext_noop ("Club"), | ||
157 | gettext_noop ("Tango"), | ||
158 | gettext_noop ("Samba"), | ||
159 | gettext_noop ("Folklore"), | ||
160 | gettext_noop ("Ballad"), | ||
161 | gettext_noop ("Power Ballad"), | ||
162 | gettext_noop ("Rhythmic Soul"), | ||
163 | gettext_noop ("Freestyle"), | ||
164 | gettext_noop ("Duet"), | ||
165 | gettext_noop ("Punk Rock"), | ||
166 | gettext_noop ("Drum Solo"), | ||
167 | gettext_noop ("A Cappella"), | ||
168 | gettext_noop ("Euro-House"), | ||
169 | gettext_noop ("Dance Hall"), | ||
170 | gettext_noop ("Goa"), | ||
171 | gettext_noop ("Drum & Bass"), | ||
172 | gettext_noop ("Club-House"), | ||
173 | gettext_noop ("Hardcore"), | ||
174 | gettext_noop ("Terror"), | ||
175 | gettext_noop ("Indie"), | ||
176 | gettext_noop ("BritPop"), | ||
177 | gettext_noop ("Negerpunk"), | ||
178 | gettext_noop ("Polsk Punk"), | ||
179 | gettext_noop ("Beat"), | ||
180 | gettext_noop ("Christian Gangsta Rap"), | ||
181 | gettext_noop ("Heavy Metal"), | ||
182 | gettext_noop ("Black Metal"), | ||
183 | gettext_noop ("Crossover"), | ||
184 | gettext_noop ("Contemporary Christian"), | ||
185 | gettext_noop ("Christian Rock"), | ||
186 | gettext_noop ("Merengue"), | ||
187 | gettext_noop ("Salsa"), | ||
188 | gettext_noop ("Thrash Metal"), | ||
189 | gettext_noop ("Anime"), | ||
190 | gettext_noop ("JPop"), | ||
191 | gettext_noop ("Synthpop"), | ||
192 | }; | ||
193 | |||
194 | #define GENRE_NAME_COUNT \ | ||
195 | ((unsigned int)(sizeof genre_names / sizeof (const char *const))) | ||
196 | |||
197 | |||
198 | |||
199 | #define OK 0 | ||
200 | #define INVALID_ID3 1 | ||
201 | |||
202 | static void | ||
203 | trim (char *k) | ||
204 | { | ||
205 | while ((strlen (k) > 0) && (isspace (k[strlen (k) - 1]))) | ||
206 | k[strlen (k) - 1] = '\0'; | ||
207 | } | ||
208 | |||
209 | static int | ||
210 | get_id3 (const char *data, size_t size, id3tag * id3) | ||
211 | { | ||
212 | const char *pos; | ||
213 | |||
214 | if (size < 128) | ||
215 | return INVALID_ID3; | ||
216 | |||
217 | pos = &data[size - 128]; | ||
218 | if (0 != strncmp ("TAG", pos, 3)) | ||
219 | return INVALID_ID3; | ||
220 | pos += 3; | ||
221 | |||
222 | id3->title = EXTRACTOR_common_convert_to_utf8 (pos, 30, "ISO-8859-1"); | ||
223 | trim (id3->title); | ||
224 | pos += 30; | ||
225 | id3->artist = EXTRACTOR_common_convert_to_utf8 (pos, 30, "ISO-8859-1"); | ||
226 | trim (id3->artist); | ||
227 | pos += 30; | ||
228 | id3->album = EXTRACTOR_common_convert_to_utf8 (pos, 30, "ISO-8859-1"); | ||
229 | trim (id3->album); | ||
230 | pos += 30; | ||
231 | id3->year = EXTRACTOR_common_convert_to_utf8 (pos, 4, "ISO-8859-1"); | ||
232 | trim (id3->year); | ||
233 | pos += 4; | ||
234 | id3->comment = EXTRACTOR_common_convert_to_utf8 (pos, 30, "ISO-8859-1"); | ||
235 | trim (id3->comment); | ||
236 | if ( (pos[28] == '\0') && | ||
237 | (pos[29] != '\0') ) | ||
238 | { | ||
239 | /* ID3v1.1 */ | ||
240 | id3->track_number = pos[29]; | ||
241 | } | ||
242 | else | ||
243 | { | ||
244 | id3->track_number = 0; | ||
245 | } | ||
246 | pos += 30; | ||
247 | id3->genre = ""; | ||
248 | if (pos[0] < GENRE_NAME_COUNT) | ||
249 | id3->genre = dgettext (PACKAGE, genre_names[(unsigned) pos[0]]); | ||
250 | return OK; | ||
251 | } | ||
252 | |||
253 | |||
254 | #define ADD(s,t) do { if (0 != (ret = proc (proc_cls, "id3", t, EXTRACTOR_METAFORMAT_UTF8, "text/plain", s, strlen(s)+1))) goto FINISH; } while (0) | ||
255 | |||
256 | |||
257 | const char * | ||
258 | EXTRACTOR_id3_options () | ||
259 | { | ||
260 | return "want-tail"; | ||
261 | } | ||
262 | |||
263 | |||
264 | int | ||
265 | EXTRACTOR_id3_extract (const char *data, | ||
266 | size_t size, | ||
267 | EXTRACTOR_MetaDataProcessor proc, | ||
268 | void *proc_cls, | ||
269 | const char *options) | ||
270 | { | ||
271 | id3tag info; | ||
272 | char track[16]; | ||
273 | int ret; | ||
274 | |||
275 | fprintf (stderr, "called with %llu bytes\n", (unsigned long long) size); | ||
276 | if (OK != get_id3 (data, size, &info)) | ||
277 | return 0; | ||
278 | if (strlen (info.title) > 0) | ||
279 | ADD (info.title, EXTRACTOR_METATYPE_TITLE); | ||
280 | if (strlen (info.artist) > 0) | ||
281 | ADD (info.artist, EXTRACTOR_METATYPE_ARTIST); | ||
282 | if (strlen (info.album) > 0) | ||
283 | ADD (info.album, EXTRACTOR_METATYPE_ALBUM); | ||
284 | if (strlen (info.year) > 0) | ||
285 | ADD (info.year, EXTRACTOR_METATYPE_PUBLICATION_YEAR); | ||
286 | if (strlen (info.genre) > 0) | ||
287 | ADD (info.genre, EXTRACTOR_METATYPE_GENRE); | ||
288 | if (strlen (info.comment) > 0) | ||
289 | ADD (info.comment, EXTRACTOR_METATYPE_COMMENT); | ||
290 | if (info.track_number != 0) | ||
291 | { | ||
292 | snprintf(track, | ||
293 | sizeof(track), "%u", info.track_number); | ||
294 | ADD (track, EXTRACTOR_METATYPE_TRACK_NUMBER); | ||
295 | } | ||
296 | FINISH: | ||
297 | free (info.title); | ||
298 | free (info.year); | ||
299 | free (info.album); | ||
300 | free (info.artist); | ||
301 | free (info.comment); | ||
302 | return ret; | ||
303 | } | ||
304 | |||
305 | /* end of id3_extractor.c */ | ||
diff --git a/src/plugins/mp3_extractor.c b/src/plugins/mp3_extractor.c index 2696431..a60754a 100644 --- a/src/plugins/mp3_extractor.c +++ b/src/plugins/mp3_extractor.c | |||
@@ -36,172 +36,6 @@ | |||
36 | #include <unistd.h> | 36 | #include <unistd.h> |
37 | #include <stdlib.h> | 37 | #include <stdlib.h> |
38 | 38 | ||
39 | typedef struct | ||
40 | { | ||
41 | char *title; | ||
42 | char *artist; | ||
43 | char *album; | ||
44 | char *year; | ||
45 | char *comment; | ||
46 | const char *genre; | ||
47 | unsigned int track_number; | ||
48 | } id3tag; | ||
49 | |||
50 | static const char *const genre_names[] = { | ||
51 | gettext_noop ("Blues"), | ||
52 | gettext_noop ("Classic Rock"), | ||
53 | gettext_noop ("Country"), | ||
54 | gettext_noop ("Dance"), | ||
55 | gettext_noop ("Disco"), | ||
56 | gettext_noop ("Funk"), | ||
57 | gettext_noop ("Grunge"), | ||
58 | gettext_noop ("Hip-Hop"), | ||
59 | gettext_noop ("Jazz"), | ||
60 | gettext_noop ("Metal"), | ||
61 | gettext_noop ("New Age"), | ||
62 | gettext_noop ("Oldies"), | ||
63 | gettext_noop ("Other"), | ||
64 | gettext_noop ("Pop"), | ||
65 | gettext_noop ("R&B"), | ||
66 | gettext_noop ("Rap"), | ||
67 | gettext_noop ("Reggae"), | ||
68 | gettext_noop ("Rock"), | ||
69 | gettext_noop ("Techno"), | ||
70 | gettext_noop ("Industrial"), | ||
71 | gettext_noop ("Alternative"), | ||
72 | gettext_noop ("Ska"), | ||
73 | gettext_noop ("Death Metal"), | ||
74 | gettext_noop ("Pranks"), | ||
75 | gettext_noop ("Soundtrack"), | ||
76 | gettext_noop ("Euro-Techno"), | ||
77 | gettext_noop ("Ambient"), | ||
78 | gettext_noop ("Trip-Hop"), | ||
79 | gettext_noop ("Vocal"), | ||
80 | gettext_noop ("Jazz+Funk"), | ||
81 | gettext_noop ("Fusion"), | ||
82 | gettext_noop ("Trance"), | ||
83 | gettext_noop ("Classical"), | ||
84 | gettext_noop ("Instrumental"), | ||
85 | gettext_noop ("Acid"), | ||
86 | gettext_noop ("House"), | ||
87 | gettext_noop ("Game"), | ||
88 | gettext_noop ("Sound Clip"), | ||
89 | gettext_noop ("Gospel"), | ||
90 | gettext_noop ("Noise"), | ||
91 | gettext_noop ("Alt. Rock"), | ||
92 | gettext_noop ("Bass"), | ||
93 | gettext_noop ("Soul"), | ||
94 | gettext_noop ("Punk"), | ||
95 | gettext_noop ("Space"), | ||
96 | gettext_noop ("Meditative"), | ||
97 | gettext_noop ("Instrumental Pop"), | ||
98 | gettext_noop ("Instrumental Rock"), | ||
99 | gettext_noop ("Ethnic"), | ||
100 | gettext_noop ("Gothic"), | ||
101 | gettext_noop ("Darkwave"), | ||
102 | gettext_noop ("Techno-Industrial"), | ||
103 | gettext_noop ("Electronic"), | ||
104 | gettext_noop ("Pop-Folk"), | ||
105 | gettext_noop ("Eurodance"), | ||
106 | gettext_noop ("Dream"), | ||
107 | gettext_noop ("Southern Rock"), | ||
108 | gettext_noop ("Comedy"), | ||
109 | gettext_noop ("Cult"), | ||
110 | gettext_noop ("Gangsta Rap"), | ||
111 | gettext_noop ("Top 40"), | ||
112 | gettext_noop ("Christian Rap"), | ||
113 | gettext_noop ("Pop/Funk"), | ||
114 | gettext_noop ("Jungle"), | ||
115 | gettext_noop ("Native American"), | ||
116 | gettext_noop ("Cabaret"), | ||
117 | gettext_noop ("New Wave"), | ||
118 | gettext_noop ("Psychedelic"), | ||
119 | gettext_noop ("Rave"), | ||
120 | gettext_noop ("Showtunes"), | ||
121 | gettext_noop ("Trailer"), | ||
122 | gettext_noop ("Lo-Fi"), | ||
123 | gettext_noop ("Tribal"), | ||
124 | gettext_noop ("Acid Punk"), | ||
125 | gettext_noop ("Acid Jazz"), | ||
126 | gettext_noop ("Polka"), | ||
127 | gettext_noop ("Retro"), | ||
128 | gettext_noop ("Musical"), | ||
129 | gettext_noop ("Rock & Roll"), | ||
130 | gettext_noop ("Hard Rock"), | ||
131 | gettext_noop ("Folk"), | ||
132 | gettext_noop ("Folk/Rock"), | ||
133 | gettext_noop ("National Folk"), | ||
134 | gettext_noop ("Swing"), | ||
135 | gettext_noop ("Fast-Fusion"), | ||
136 | gettext_noop ("Bebob"), | ||
137 | gettext_noop ("Latin"), | ||
138 | gettext_noop ("Revival"), | ||
139 | gettext_noop ("Celtic"), | ||
140 | gettext_noop ("Bluegrass"), | ||
141 | gettext_noop ("Avantgarde"), | ||
142 | gettext_noop ("Gothic Rock"), | ||
143 | gettext_noop ("Progressive Rock"), | ||
144 | gettext_noop ("Psychedelic Rock"), | ||
145 | gettext_noop ("Symphonic Rock"), | ||
146 | gettext_noop ("Slow Rock"), | ||
147 | gettext_noop ("Big Band"), | ||
148 | gettext_noop ("Chorus"), | ||
149 | gettext_noop ("Easy Listening"), | ||
150 | gettext_noop ("Acoustic"), | ||
151 | gettext_noop ("Humour"), | ||
152 | gettext_noop ("Speech"), | ||
153 | gettext_noop ("Chanson"), | ||
154 | gettext_noop ("Opera"), | ||
155 | gettext_noop ("Chamber Music"), | ||
156 | gettext_noop ("Sonata"), | ||
157 | gettext_noop ("Symphony"), | ||
158 | gettext_noop ("Booty Bass"), | ||
159 | gettext_noop ("Primus"), | ||
160 | gettext_noop ("Porn Groove"), | ||
161 | gettext_noop ("Satire"), | ||
162 | gettext_noop ("Slow Jam"), | ||
163 | gettext_noop ("Club"), | ||
164 | gettext_noop ("Tango"), | ||
165 | gettext_noop ("Samba"), | ||
166 | gettext_noop ("Folklore"), | ||
167 | gettext_noop ("Ballad"), | ||
168 | gettext_noop ("Power Ballad"), | ||
169 | gettext_noop ("Rhythmic Soul"), | ||
170 | gettext_noop ("Freestyle"), | ||
171 | gettext_noop ("Duet"), | ||
172 | gettext_noop ("Punk Rock"), | ||
173 | gettext_noop ("Drum Solo"), | ||
174 | gettext_noop ("A Cappella"), | ||
175 | gettext_noop ("Euro-House"), | ||
176 | gettext_noop ("Dance Hall"), | ||
177 | gettext_noop ("Goa"), | ||
178 | gettext_noop ("Drum & Bass"), | ||
179 | gettext_noop ("Club-House"), | ||
180 | gettext_noop ("Hardcore"), | ||
181 | gettext_noop ("Terror"), | ||
182 | gettext_noop ("Indie"), | ||
183 | gettext_noop ("BritPop"), | ||
184 | gettext_noop ("Negerpunk"), | ||
185 | gettext_noop ("Polsk Punk"), | ||
186 | gettext_noop ("Beat"), | ||
187 | gettext_noop ("Christian Gangsta Rap"), | ||
188 | gettext_noop ("Heavy Metal"), | ||
189 | gettext_noop ("Black Metal"), | ||
190 | gettext_noop ("Crossover"), | ||
191 | gettext_noop ("Contemporary Christian"), | ||
192 | gettext_noop ("Christian Rock"), | ||
193 | gettext_noop ("Merengue"), | ||
194 | gettext_noop ("Salsa"), | ||
195 | gettext_noop ("Thrash Metal"), | ||
196 | gettext_noop ("Anime"), | ||
197 | gettext_noop ("JPop"), | ||
198 | gettext_noop ("Synthpop"), | ||
199 | }; | ||
200 | |||
201 | #define GENRE_NAME_COUNT \ | ||
202 | ((unsigned int)(sizeof genre_names / sizeof (const char *const))) | ||
203 | |||
204 | |||
205 | #define MAX_MP3_SCAN_DEEP 16768 | 39 | #define MAX_MP3_SCAN_DEEP 16768 |
206 | const int max_frames_scan = 1024; | 40 | const int max_frames_scan = 1024; |
207 | enum | 41 | enum |
@@ -270,64 +104,15 @@ static const char * const layer_names[3] = { | |||
270 | #define SYSERR 1 | 104 | #define SYSERR 1 |
271 | #define INVALID_ID3 2 | 105 | #define INVALID_ID3 2 |
272 | 106 | ||
273 | static void | ||
274 | trim (char *k) | ||
275 | { | ||
276 | while ((strlen (k) > 0) && (isspace (k[strlen (k) - 1]))) | ||
277 | k[strlen (k) - 1] = '\0'; | ||
278 | } | ||
279 | |||
280 | static int | ||
281 | get_id3 (const char *data, size_t size, id3tag * id3) | ||
282 | { | ||
283 | const char *pos; | ||
284 | |||
285 | if (size < 128) | ||
286 | return INVALID_ID3; | ||
287 | |||
288 | pos = &data[size - 128]; | ||
289 | if (0 != strncmp ("TAG", pos, 3)) | ||
290 | return INVALID_ID3; | ||
291 | pos += 3; | ||
292 | |||
293 | id3->title = EXTRACTOR_common_convert_to_utf8 (pos, 30, "ISO-8859-1"); | ||
294 | trim (id3->title); | ||
295 | pos += 30; | ||
296 | id3->artist = EXTRACTOR_common_convert_to_utf8 (pos, 30, "ISO-8859-1"); | ||
297 | trim (id3->artist); | ||
298 | pos += 30; | ||
299 | id3->album = EXTRACTOR_common_convert_to_utf8 (pos, 30, "ISO-8859-1"); | ||
300 | trim (id3->album); | ||
301 | pos += 30; | ||
302 | id3->year = EXTRACTOR_common_convert_to_utf8 (pos, 4, "ISO-8859-1"); | ||
303 | trim (id3->year); | ||
304 | pos += 4; | ||
305 | id3->comment = EXTRACTOR_common_convert_to_utf8 (pos, 30, "ISO-8859-1"); | ||
306 | trim (id3->comment); | ||
307 | if ( (pos[28] == '\0') && | ||
308 | (pos[29] != '\0') ) | ||
309 | { | ||
310 | /* ID3v1.1 */ | ||
311 | id3->track_number = pos[29]; | ||
312 | } | ||
313 | else | ||
314 | { | ||
315 | id3->track_number = 0; | ||
316 | } | ||
317 | pos += 30; | ||
318 | id3->genre = ""; | ||
319 | if (pos[0] < GENRE_NAME_COUNT) | ||
320 | id3->genre = dgettext (PACKAGE, genre_names[(unsigned) pos[0]]); | ||
321 | return OK; | ||
322 | } | ||
323 | |||
324 | |||
325 | #define ADDR(s,t) do { if (0 != proc (proc_cls, "mp3", t, EXTRACTOR_METAFORMAT_UTF8, "text/plain", s, strlen(s)+1)) return 1; } while (0) | 107 | #define ADDR(s,t) do { if (0 != proc (proc_cls, "mp3", t, EXTRACTOR_METAFORMAT_UTF8, "text/plain", s, strlen(s)+1)) return 1; } while (0) |
326 | 108 | ||
327 | static int | 109 | /* mimetype = audio/mpeg */ |
328 | mp3parse (const unsigned char *data, size_t size, | 110 | int |
329 | EXTRACTOR_MetaDataProcessor proc, | 111 | EXTRACTOR_mp3_extract (const unsigned char *data, |
330 | void *proc_cls) | 112 | size_t size, |
113 | EXTRACTOR_MetaDataProcessor proc, | ||
114 | void *proc_cls, | ||
115 | const char *options) | ||
331 | { | 116 | { |
332 | unsigned int header; | 117 | unsigned int header; |
333 | int counter = 0; | 118 | int counter = 0; |
@@ -474,50 +259,4 @@ mp3parse (const unsigned char *data, size_t size, | |||
474 | return 0; | 259 | return 0; |
475 | } | 260 | } |
476 | 261 | ||
477 | |||
478 | #define ADD(s,t) do { if (0 != (ret = proc (proc_cls, "mp3", t, EXTRACTOR_METAFORMAT_UTF8, "text/plain", s, strlen(s)+1))) goto FINISH; } while (0) | ||
479 | |||
480 | |||
481 | /* mimetype = audio/mpeg */ | ||
482 | int | ||
483 | EXTRACTOR_mp3_extract (const char *data, | ||
484 | size_t size, | ||
485 | EXTRACTOR_MetaDataProcessor proc, | ||
486 | void *proc_cls, | ||
487 | const char *options) | ||
488 | { | ||
489 | id3tag info; | ||
490 | char track[16]; | ||
491 | int ret; | ||
492 | |||
493 | if (0 != get_id3 (data, size, &info)) | ||
494 | return 0; | ||
495 | if (strlen (info.title) > 0) | ||
496 | ADD (info.title, EXTRACTOR_METATYPE_TITLE); | ||
497 | if (strlen (info.artist) > 0) | ||
498 | ADD (info.artist, EXTRACTOR_METATYPE_ARTIST); | ||
499 | if (strlen (info.album) > 0) | ||
500 | ADD (info.album, EXTRACTOR_METATYPE_ALBUM); | ||
501 | if (strlen (info.year) > 0) | ||
502 | ADD (info.year, EXTRACTOR_METATYPE_PUBLICATION_YEAR); | ||
503 | if (strlen (info.genre) > 0) | ||
504 | ADD (info.genre, EXTRACTOR_METATYPE_GENRE); | ||
505 | if (strlen (info.comment) > 0) | ||
506 | ADD (info.comment, EXTRACTOR_METATYPE_COMMENT); | ||
507 | if (info.track_number != 0) | ||
508 | { | ||
509 | snprintf(track, | ||
510 | sizeof(track), "%u", info.track_number); | ||
511 | ADD (track, EXTRACTOR_METATYPE_TRACK_NUMBER); | ||
512 | } | ||
513 | ret = mp3parse ((const unsigned char *) data, size, proc, proc_cls); | ||
514 | FINISH: | ||
515 | free (info.title); | ||
516 | free (info.year); | ||
517 | free (info.album); | ||
518 | free (info.artist); | ||
519 | free (info.comment); | ||
520 | return ret; | ||
521 | } | ||
522 | |||
523 | /* end of mp3_extractor.c */ | 262 | /* end of mp3_extractor.c */ |