libextractor

GNU libextractor
Log | Files | Refs | Submodules | README | LICENSE

commit 2fd943df51173e8a19f84e4efc8ec3df83c5ef23
parent 99ea22f7293b5b324b3a715f0a7a4eb243c709b0
Author: Christian Grothoff <christian@grothoff.org>
Date:   Fri,  6 May 2005 11:17:48 +0000

load-unload

Diffstat:
Msrc/main/Extractor.py | 79+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------
Msrc/main/libextractor_python.c | 44++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 115 insertions(+), 8 deletions(-)

diff --git a/src/main/Extractor.py b/src/main/Extractor.py @@ -1,6 +1,31 @@ """Extractor.py -Modul docstring... + This file is part of libextractor. + (C) 2002, 2003, 2004, 2005 Vidyut Samanta and Christian Grothoff + + libextractor is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 2, or (at your + option) any later version. + + libextractor is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with libextractor; see the file COPYING. If not, write to the + Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. + +libextractor is a simple library for keyword extraction. libextractor +does not support all formats but supports a simple plugging mechanism +such that you can quickly add extractors for additional formats, even +without recompiling libextractor. libextractor typically ships with a +dozen helper-libraries that can be used to obtain keywords from common +file-types. + +libextractor is a part of the GNU project (http://www.gnu.org/). """ import _extractor @@ -13,20 +38,58 @@ __date__ = "5/5/2005" class Extractor(object): """ + Main class for extracting meta-data with GNU libextractor. + + You may create multiple instances of Extractor to use + different sets of plugins. Initially each Extractor + will start with the default set of plugins. + + Use the extract method to obtain keywords from a file. + + Use the load and unload methods to change the list of + plugins that should be used. """ def __init__(self): self.__plugins = _extractor.loadDefaultLibraries() def __del__(self): - _extractor.removeAll(self.__plugins) -# def load(plugs): -# self.__plugins = _extractor.load(self.__plugins, plugs) -# return None -# def unload(plugs): -# self.__plugins = _extractor.unload(self.__plugins, plugs) -# return None + _extractor.removeAll(self.__plugins) + def load(self,plugs): + """ + Load certain plugins. Invoke with a string with the names + of the plugins that should be loaded. For example, + + 'libextractor_filename:-libextractor_split' + + will prepend the extractor that just adds the filename as a + keyword and append (runs last) the extractor that splits + keywords at whitespaces and punctuations. + + No errors are reported if any of the listed plugins are not + found. + """ + self.__plugins = _extractor.load(self.__plugins, plugs) + return None + def unload(self,plugs): + """ + Unload a plugin. Pass the name of the plugin that is to + be unloaded. Only one plugin can be unloaded at a time. + For example, + + 'libextractor_pdf' + + unloads the PDF extractor (if loaded). No errors are + reported if no matching plugin is found. + """ + self.__plugins = _extractor.unload(self.__plugins, plugs) + return None def extract(self,filename): """Pass a filename to extract keywords. + + This function returns a list of Keyword objects. + If the file cannot be opened or cannot be found, + the list will be empty. The list can also be empty + if no metadata was found for the file. """ return _extractor.extract(self.__plugins, filename, Keyword) diff --git a/src/main/libextractor_python.c b/src/main/libextractor_python.c @@ -37,6 +37,42 @@ static PyObject * EXTRACTOR_PY_removeAll(PyObject * self, return Py_None; } +static PyObject * EXTRACTOR_PY_load(PyObject * self, + PyObject * args) { + PyObject * py_exts; + char * name; + EXTRACTOR_ExtractorList * plugins; + + PyArg_ParseTuple(args, + "Os", + &py_exts, + &name); + + plugins = + EXTRACTOR_loadConfigLibraries((EXTRACTOR_ExtractorList*) PyCObject_AsVoidPtr(py_exts), + name); + return PyCObject_FromVoidPtr(plugins, NULL); +} + + +static PyObject * EXTRACTOR_PY_unload(PyObject * self, + PyObject * args) { + PyObject * py_exts; + char * name; + EXTRACTOR_ExtractorList * plugins; + + PyArg_ParseTuple(args, + "Os", + &py_exts, + &name); + + plugins = + EXTRACTOR_removeLibrary((EXTRACTOR_ExtractorList*) PyCObject_AsVoidPtr(py_exts), + name); + return PyCObject_FromVoidPtr(plugins, NULL); +} + + static PyObject * EXTRACTOR_PY_getKeywordTypeAsString(PyObject * self, PyObject * args) { unsigned int type; @@ -101,6 +137,14 @@ static PyMethodDef ExtractorMethods[] = { EXTRACTOR_PY_removeAll, METH_VARARGS, "unload the given set of libextractor plugins (pass plugins as argument)" }, + { "load", + EXTRACTOR_PY_load, + METH_VARARGS, + "load the given set of libextractor plugins (pass plugins names as argument)" }, + { "unload", + EXTRACTOR_PY_unload, + METH_VARARGS, + "unload the given libextractor plugin (pass plugin name as argument)" }, { "extract", EXTRACTOR_PY_extract, METH_VARARGS,