libextractor

GNU libextractor
Log | Files | Refs | Submodules | README | LICENSE

commit 2cc3ea0265d275d8be5c5d7d76c49e948591b9ec
parent 8827e0b9239b23f96e7446cc21d99a146c99a11e
Author: Christian Grothoff <christian@grothoff.org>
Date:   Fri,  6 May 2005 09:47:28 +0000

draft for py api

Diffstat:
Asrc/main/Extractor.py | 28++++++++++++++++++++++++++++
Msrc/main/Makefile.am | 8++++++++
Msrc/main/extract.py | 20++++++++------------
Dsrc/main/extractor.i | 58----------------------------------------------------------
Msrc/main/libextractor_python.c | 42++++++++++++++++++++++++++++++------------
Asrc/main/libextractor_python_setup.py | 11+++++++++++
6 files changed, 85 insertions(+), 82 deletions(-)

diff --git a/src/main/Extractor.py b/src/main/Extractor.py @@ -0,0 +1,28 @@ +import _extractor + +class Extractor(object): + def __init__(self): + self.plugins = _extractor.EXTRACTOR_PY_loadDefaultLibraries() + def __del__(self): + extractor.EXTRACTOR_PY_removeAll(self.plugins) +# def load(plugs): +# self.plugins = _extractor.EXTRACTOR_PY_load(self.plugins, plugs) +# return None +# def unload(plugs): +# self.plugins = _extractor.EXTRACTOR_PY_unload(self.plugins, plugs) +# return None + def extract(self,filename): + return _extractor.EXTRACTOR_PY_extract(self.plugins, filename, Keyword) + +class Keyword(object): + def __init__(self,type,value): + self.type = type + self.value = value.decode("utf-8") + def __repr__(self): + return u"%s(%i,%s)" % (self.__class__.__name__,self.type,self.value) + def __str__(self): + return u"%s: %s" % (self.getType(), self.getValue()) + def getType(self): + return _extractor.EXTRACTOR_PY_getKeywordTypeAsStringType(self.type).decode("utf-8") + def getValue(self): + return self.value diff --git a/src/main/Makefile.am b/src/main/Makefile.am @@ -34,6 +34,8 @@ libextractor_la_DEPENDENCIES = \ EXTRA_DIST = \ winproc.c \ libextractor_python.c \ + Extractor.py \ + libextractor_python_setup.py \ extract.py \ iconv.c @@ -51,3 +53,9 @@ extract_SOURCES = \ getopt.c \ getopt.h \ getopt1.c + + +all-local: + python libextractor_python_setup.py build +install-exec-local: + python libextractor_python_setup.py --prefix=$(DESTDIR) install diff --git a/src/main/extract.py b/src/main/extract.py @@ -1,13 +1,9 @@ -import extractor - -def getKeywordTypeAsString(t): - return extractor.EXTRACTOR_PY_getKeywordTypeAsString(t) - -class Extractor: - def __init__(self): - self.plugins = extractor.EXTRACTOR_PY_loadDefaultLibraries(self) - def __del__(self): - extractor.EXTRACTOR_PY_removeAll(self, self.plugins) - def extract(filename): - extractor.EXTRACTOR_PY_extract(self, self.plugins, filename) +import Extractor +import sys +xtract = Extractor.Extractor() +for arg in sys.argv: + print "Keywords from " + arg + keys = xtract.extract(arg); + for i in keys: + print i diff --git a/src/main/extractor.i b/src/main/extractor.i @@ -1,58 +0,0 @@ -/* libextractor interface for SWIG */ -/* extractor.i */ -%module extractor -%{ -%} - -typedef struct EXTRACTOR_Keywords { - char * keyword; - EXTRACTOR_KeywordType keywordType; - struct EXTRACTOR_Keywords * next; -} EXTRACTOR_KeywordList; - -EXTRACTOR_ExtractorList * EXTRACTOR_loadDefaultLibraries(); - -const char * EXTRACTOR_getKeywordTypeAsString(const EXTRACTOR_KeywordType type); - -EXTRACTOR_ExtractorList * -EXTRACTOR_loadConfigLibraries(EXTRACTOR_ExtractorList * prev, - const char * config); - -EXTRACTOR_ExtractorList * -EXTRACTOR_addLibrary(EXTRACTOR_ExtractorList * prev, - const char * library); - -EXTRACTOR_ExtractorList * -EXTRACTOR_addLibraryLast(EXTRACTOR_ExtractorList * prev, - const char * library); - -EXTRACTOR_ExtractorList * -EXTRACTOR_removeLibrary(EXTRACTOR_ExtractorList * prev, - const char * library); - -void EXTRACTOR_removeAll(EXTRACTOR_ExtractorList * libraries); - -EXTRACTOR_KeywordList * -EXTRACTOR_getKeywords(EXTRACTOR_ExtractorList * extractor, - const char * filename); - -EXTRACTOR_KeywordList * -EXTRACTOR_removeDuplicateKeywords(EXTRACTOR_KeywordList * list, - const unsigned int options); - - -EXTRACTOR_KeywordList * -EXTRACTOR_removeEmptyKeywords (EXTRACTOR_KeywordList * list); - - -void EXTRACTOR_freeKeywords(EXTRACTOR_KeywordList * keywords); - -const char * EXTRACTOR_extractLast(const EXTRACTOR_KeywordType type, - EXTRACTOR_KeywordList * keywords); - -const char * EXTRACTOR_extractLastByString (const char * type, - EXTRACTOR_KeywordList * keywords); - -unsigned int EXTRACTOR_countKeywords(EXTRACTOR_KeywordList * keywords); - - diff --git a/src/main/libextractor_python.c b/src/main/libextractor_python.c @@ -18,8 +18,8 @@ Boston, MA 02111-1307, USA. */ - -#include <python/Python.h> +#include "extractor.h" +#include <Python.h> static PyObject * EXTRACTOR_PY_loadDefaultLibraries(PyObject * self, PyObject * args) { @@ -51,23 +51,33 @@ static PyObject * EXTRACTOR_PY_getKeywordTypeAsString(PyObject * self, static PyObject * EXTRACTOR_PY_extract(PyObject * self, PyObject * args) { PyObject * py_exts; + PyObject * py_clzz; + PyObject * py_elem; char * filename; EXTRACTOR_ExtractorList * ex; EXTRACTOR_KeywordList * keys; EXTRACTOR_KeywordList * pos; PyObject * ret; - PyArg_ParseTuple(args, "Os", &py_exts, &filename); + PyArg_ParseTuple(args, + "OsO", + &py_exts, + &filename, + &py_clzz); ex = PyCObject_AsVoidPtr(py_exts); keys = EXTRACTOR_getKeywords(ex, filename); ret = PyList_New(0); pos = keys; while (pos != NULL) { + py_elem = PyObject_Call(py_clzz, + Py_BuildValue("(OO)", + PyInt_FromLong((long)pos->keywordType), + PyString_FromString(pos->keyword)), + NULL); PyList_Append(ret, - Py_BuildValue("(OO)", - PyInt_FromLong((long)pos->keywordType), - PyString_FromString(pos->keyword))); + py_elem); + Py_DECREF(py_elem); pos = pos->next; } EXTRACTOR_freeKeywords(keys); @@ -75,19 +85,27 @@ static PyObject * EXTRACTOR_PY_extract(PyObject * self, } static PyMethodDef ExtractorMethods[] = { - { "getKeywordTypeAsString", EXTRACTOR_PY_getKeywordTypeAsString, METH_VARARGS, + { "getKeywordTypeAsString", + EXTRACTOR_PY_getKeywordTypeAsString, + METH_VARARGS, "convert a keyword type (int) to the string describing the type" }, - { "loadDefaultLibraries", EXTRACTOR_PY_loadDefaultLibraries, METH_VARARGS, + { "loadDefaultLibraries", + EXTRACTOR_PY_loadDefaultLibraries, + METH_VARARGS, "load the default set of libextractor plugins (returns the plugins)" }, - { "removeAll", EXTRACTOR_PY_removeAll, METH_VARARGS, + { "removeAll", + EXTRACTOR_PY_removeAll, + METH_VARARGS, "unload the given set of libextractor plugins (pass plugins as argument)" }, - { "extract", EXTRACTOR_PY_extract, METH_VARARGS, + { "extract", + EXTRACTOR_PY_extract, + METH_VARARGS, "extract meta data from a file (pass plugins and filename as arguments, returns vector of meta-data)" }, { NULL, NULL, 0, NULL } }; PyMODINIT_FUNC -initextractor() { - Py_InitModule("extractor", ExtractorMethods); +init_extractor() { + Py_InitModule("_extractor", ExtractorMethods); } diff --git a/src/main/libextractor_python_setup.py b/src/main/libextractor_python_setup.py @@ -0,0 +1,11 @@ +from distutils.core import Extension, setup + +cmod = Extension(sources=["libextractor_python.c"], + module="_extractor") + +setup(name="Extractor", + version="0.1", + extension=[cmod] + sources=["Extractor.py"], + author="Christian Grothoff, Heiko Wundram") +