libextractor

GNU libextractor
Log | Files | Refs | Submodules | README | LICENSE

commit 6abdf53b9880cb5992400697c2176ef11ebdbe24
parent 7293a111119ea36d2d67be2a811c1ac4dad050f9
Author: Heiko Wundram <modelnine@ceosg.de>
Date:   Sat,  7 May 2005 23:36:07 +0000

Really add libextractor_python2.c and libextractor_python3.c, both of 
which were bad because of a conflicht in 
org_gnunet_libextractor_Extractor.h.


Diffstat:
Asrc/main/libextractor_python2.c | 569+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/main/libextractor_python3.c | 498+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/main/libextractor_python_setup.py | 6+++---
3 files changed, 1070 insertions(+), 3 deletions(-)

diff --git a/src/main/libextractor_python2.c b/src/main/libextractor_python2.c @@ -0,0 +1,569 @@ +/* libextractor_python.c + +libextractor-Binding to Python. */ + +#include <Python.h> +#include "extractor.h" + +/* Typedefs. */ + +typedef struct { + PyObject_HEAD + PyObject *moduleList; +} Extractor; + +typedef struct { + PyObject_HEAD + EXTRACTOR_ExtractorList *module; +} Module; + +typedef struct { + PyObject_HEAD + PyObject *keywordList; +} KeywordList; + +typedef struct { + PyObject_HEAD + EXTRACTOR_KeywordList *keyword; +} Keyword; + +/* Types. */ + +static PyTypeObject ExtractorType; +static PyTypeObject ModuleType; +static PyTypeObject KeywordListType; +static PyTypeObject KeywordType; + +/* Extractor type declarations. */ + +static PyObject *Extractor_new(PyTypeObject *type, PyObject *args, + PyObject *kwargs) +{ + Extractor *self = NULL; + + if( !( self = (Extractor*)type->tp_alloc(type,0) ) ) + goto error; + if( !( self->moduleList = PyList_New(0) ) ) + goto error; + + goto finish; + + error: + Py_XDECREF(self); + self = NULL; + + finish: + return (PyObject*)self; +} + +static int Extractor_init(Extractor *self, PyObject *args, PyObject *kwargs) +{ + PyObject *conf = NULL, *conf_iter = NULL, *conf_item = NULL; + EXTRACTOR_ExtractorList *elist = NULL, *ecur = NULL; + Module *cur_mod = NULL; + char *conf_str = NULL; + char *kwargs_list[] = {"config",NULL}; + int rv = 0; + + if( !PyArg_ParseTupleAndKeywords(args,kwargs,"|s:__init__",kwargs_list, + &conf_str) ) { + PyErr_Clear(); + if( !PyArg_ParseTupleAndKeywords(args,kwargs,"O:__init__",kwargs_list, + &conf) ) + goto error; + } + + if( !conf ) { + if( conf_str ) + elist = EXTRACTOR_loadConfigLibraries(NULL,conf_str); + else + elist = EXTRACTOR_loadDefaultLibraries(); + + ecur = elist; + while( ecur ) { + elist = ecur->next; + + if( !( cur_mod = PyObject_NEW(Module,&ModuleType) ) ) + goto error; + cur_mod->module = ecur; + + if( PyList_Append(self->moduleList,(PyObject*)cur_mod) ) + goto error; + + ecur->next = NULL; + ecur = elist; + + Py_DECREF(cur_mod); + cur_mod = NULL; + } + } else { + if( !( conf_iter = PyObject_GetIter(conf) ) ) { + PyErr_Clear(); + + if( !PyObject_IsInstance(conf,(PyObject*)&ModuleType) ) + goto error; + + if( PyList_Append(self->moduleList,conf) ) + goto error; + } else { + while( ( conf_item = PyIter_Next(conf_iter) ) ) { + if( !( conf_str = PyString_AsString(conf_item) ) ) { + if( !PyObject_IsInstance(conf_item,(PyObject*)&ModuleType) ) + goto error; + + if( PyList_Append(self->moduleList,conf_item) ) + goto error; + } else { + elist = EXTRACTOR_addLibrary(NULL,conf_str); + if( elist ) { + if( !( cur_mod = PyObject_NEW(Module,&ModuleType) ) ) + goto error; + cur_mod->module = elist; + + if( PyList_Append(self->moduleList,(PyObject*)cur_mod) ) + goto error; + + Py_DECREF(cur_mod); + cur_mod = NULL; + } + } + + Py_DECREF(conf_item); + conf_item = NULL; + } + + Py_DECREF(conf_iter); + conf_iter = NULL; + } + } + + goto finish; + + error: + if( ecur ) + EXTRACTOR_removeAll(ecur); + Py_XDECREF(cur_mod); + Py_XDECREF(conf_item); + rv = -1; + + finish: + return rv; +} + +static PyObject *Extractor_iter(Extractor *self) +{ + return PyObject_GetIter(self->moduleList); +} + +static void Extractor_dealloc(Extractor *self) +{ + Py_DECREF(self->moduleList); + self->ob_type->tp_free((PyObject*)self); +} + +static PyObject *Extractor_extract(Extractor *self, PyObject *args, + PyObject *kwargs) +{ + PyObject *mlist = NULL; + int i = 0, mlist_len = 0; + Module *mlist_curitem = NULL; + EXTRACTOR_ExtractorList *efirst = NULL, *elist = NULL; + EXTRACTOR_KeywordList *kwlist = NULL; + KeywordList *rv = NULL; + Keyword *kw = NULL; + char *filename = NULL; + char *kwargs_list[] = {"filename",NULL}; + + if( !PyArg_ParseTupleAndKeywords(args,kwargs,"s:extract",kwargs_list, + &filename) ) + goto error; + + mlist_len = PyList_Size(self->moduleList); + if( !( mlist = PyList_New(mlist_len) ) ) + goto error; + + for( i = 0; i < mlist_len; i++ ) { + mlist_curitem = (Module*)PyList_GET_ITEM(self->moduleList,i); + Py_INCREF(mlist_curitem); + PyList_SET_ITEM(mlist,i,(PyObject*)mlist_curitem); + if( !efirst ) + efirst = elist = malloc(sizeof(EXTRACTOR_ExtractorList)); + else { + elist->next = malloc(sizeof(EXTRACTOR_ExtractorList)); + elist = elist->next; + } + memcpy(elist,mlist_curitem->module,sizeof(EXTRACTOR_ExtractorList)); + } + + Py_BEGIN_ALLOW_THREADS; + kwlist = EXTRACTOR_getKeywords(efirst,filename); + Py_END_ALLOW_THREADS; + + if( !( rv = PyObject_NEW(KeywordList,&KeywordListType) ) ) + goto error; + rv->keywordList = NULL; + if( !( rv->keywordList = PyList_New(0) ) ) + goto error; + + while( kwlist ) { + if( !( kw = PyObject_NEW(Keyword,&KeywordType) ) ) + goto error; + kw->keyword = kwlist; + kwlist = kwlist->next; + kw->keyword->next = NULL; + + if( PyList_Append(rv->keywordList,(PyObject*)kw) ) + goto error; + + Py_DECREF(kw); + kw = NULL; + } + + goto finish; + + error: + Py_XDECREF(kw); + if( kwlist ) + EXTRACTOR_freeKeywords(kwlist); + Py_XDECREF(rv); + rv = NULL; + + finish: + Py_XDECREF(mlist); + return (PyObject*)rv; +} + +static PyMethodDef Extractor_methods[] = { + {"extract",(PyCFunction)Extractor_extract,METH_VARARGS|METH_KEYWORDS, + "Extract data from file given as filename."}, + {NULL} /* Sentinel */ +}; + +static PyTypeObject ExtractorType = { + PyObject_HEAD_INIT(NULL) + 0, /*ob_size*/ + "extractor.Extractor", /*tp_name*/ + sizeof(Extractor), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + (destructor)Extractor_dealloc, /*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_compare*/ + 0, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash */ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ + "Extractor objects", /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + (getiterfunc)Extractor_iter, /* tp_iter */ + 0, /* tp_iternext */ + Extractor_methods, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + (initproc)Extractor_init, /* tp_init */ + 0, /* tp_alloc */ + Extractor_new, /* tp_new */ +}; + +/* Module type. */ + +/* KeywordList type. */ + +static PyObject *KeywordList_new(PyTypeObject *type, PyObject *args, + PyObject *kwargs) +{ + KeywordList *self = NULL; + + if( !( self = (KeywordList*)type->tp_alloc(type,0) ) ) + goto error; + if( !( self->keywordList = PyList_New(0) ) ) + goto error; + + goto finish; + + error: + Py_XDECREF(self); + self = NULL; + + finish: + return (PyObject*)self; +} + +static int KeywordList_init(KeywordList *self, PyObject *args, + PyObject *kwargs) +{ + PyObject *kw = NULL, *kw_iter = NULL, *kw_item = NULL; + Keyword *cur_kw = NULL; + int curtype = 0; + char *curvalue = NULL; + char *kwargs_list[] = {"keywords",NULL}; + int rv = 0; + + if( !PyArg_ParseTupleAndKeywords(args,kwargs,"|O:__init__",kwargs_list, + &kw) ) + goto error; + + if( kw ) + if( PyObject_IsInstance(kw,(PyObject*)&KeywordType) ) { + if( PyList_Append(self->keywordList,kw) ) + goto error; + } else if( PyArg_ParseTuple(kw,"(is)",&curtype,&curvalue) ) { + if( !( cur_kw = PyObject_NEW(Keyword,&KeywordType) ) ) + goto error; + cur_kw->keyword = NULL; + + cur_kw->keyword = malloc(sizeof(EXTRACTOR_KeywordList)); + cur_kw->keyword->keyword = strdup(curvalue); + cur_kw->keyword->keywordType = curtype; + cur_kw->keyword->next = NULL; + + if( PyList_Append(self->keywordList,(PyObject*)cur_kw) ) + goto error; + + Py_DECREF(cur_kw); + cur_kw = NULL; + } else { + PyErr_Clear(); + if( !( kw_iter = PyObject_GetIter(kw) ) ) + goto error; + + while( ( kw_item = PyIter_Next(kw_iter) ) ) { + if( PyObject_IsInstance(kw_item,(PyObject*)&KeywordType) ) { + if( PyList_Append(self->keywordList,kw_item) ) + goto error; + } else { + if( !PyArg_ParseTuple(kw_item,"(is)",&curtype,&curvalue) ) + goto error; + + if( !( cur_kw = PyObject_NEW(Keyword,&KeywordType) ) ) + goto error; + cur_kw->keyword = NULL; + + cur_kw->keyword = malloc(sizeof(EXTRACTOR_KeywordList)); + cur_kw->keyword->keyword = strdup(curvalue); + cur_kw->keyword->keywordType = curtype; + cur_kw->keyword->next = NULL; + + if( PyList_Append(self->keywordList,(PyObject*)cur_kw) ) + goto error; + + Py_DECREF(cur_kw); + cur_kw = NULL; + } + + Py_DECREF(kw_item); + kw_item = NULL; + } + + Py_DECREF(kw_iter); + kw_iter = NULL; + } + + goto finish; + + error: + Py_XDECREF(cur_kw); + Py_XDECREF(kw_item); + rv = -1; + + finish: + return rv; +} + +static PyObject *KeywordList_iter(KeywordList *self) +{ + return PyObject_GetIter(self->keywordList); +} + +static void KeywordList_dealloc(KeywordList *self) +{ + Py_XDECREF(self->keywordList); + self->ob_type->tp_free((PyObject*)self); +} + +static PyMethodDef KeywordList_methods[] = { + {NULL} /* Sentinel */ +}; + +static PyTypeObject KeywordListType = { + PyObject_HEAD_INIT(NULL) + 0, /*ob_size*/ + "extractor.KeywordList", /*tp_name*/ + sizeof(KeywordList), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + (destructor)KeywordList_dealloc, /*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_compare*/ + 0, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash */ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ + "KeywordList objects", /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + (getiterfunc)KeywordList_iter, /* tp_iter */ + 0, /* tp_iternext */ + KeywordList_methods, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + (initproc)KeywordList_init, /* tp_init */ + 0, /* tp_alloc */ + KeywordList_new, /* tp_new */ +}; + +/* Keyword type. */ + +static PyObject *Keyword_new(PyTypeObject *type, PyObject *args, + PyObject *kwargs) +{ + Keyword *self = NULL; + char *name = NULL; + char *kwargs_list[] = {"name",NULL}; + + if( !( self = (Keyword*)type->tp_alloc(type,0) ) ) + goto error; + if( !( self->keyword = malloc(sizeof(EXTRACTOR_KeywordList)) ) ) + goto error; + + self->keyword->keyword = strdup(""); + self->keyword->keywordType = 0; + self->keyword->next = NULL; + + goto finish; + + error: + Py_XDECREF(self); + self = NULL; + + finish: + return (PyObject*)self; +} + +static int Keyword_init(Keyword *self, PyObject *args, PyObject *kwargs) +{ + int type = 0; + char *value = NULL; + char *kwargs_list[] = {"type","value",NULL}; + int rv = 0; + + if( !PyArg_ParseTupleAndKeywords(args,kwargs,"is:__init__",kwargs_list, + &type,&value) ) { + PyErr_Clear(); + if( !PyArg_ParseTupleAndKeywords(args,kwargs,":__init__",kwargs_list) ) + goto error; + + goto finish; + } + + free(self->keyword->keyword); + self->keyword->keyword = strdup(value); + self->keyword->keywordType = type; + + goto finish; + + error: + rv = -1; + + finish: + return rv; +} + +static void Keyword_dealloc(Keyword *self) +{ + EXTRACTOR_freeKeywords(self->keyword); + self->ob_type->tp_free((PyObject*)self); +} + +static PyObject *Keyword_getType(Keyword *self, PyObject *args) +{ + return PyInt_FromLong(self->keyword->keywordType); +} + +static PyObject *Keyword_getValue(Keyword *self, PyObject *args) +{ + return PyString_FromString(self->keyword->keyword); +} + +static PyMethodDef Keyword_methods[] = { + {"getType",(PyCFunction)Keyword_getType,METH_NOARGS, + "Retrieve type of keyword."}, + {"getValue",(PyCFunction)Keyword_getValue,METH_NOARGS, + "Retrieve value of keyword."}, + {NULL} /* Sentinel */ +}; + +static PyTypeObject KeywordType = { + PyObject_HEAD_INIT(NULL) + 0, /*ob_size*/ + "extractor.Keyword", /*tp_name*/ + sizeof(Keyword), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + (destructor)Keyword_dealloc, /*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_compare*/ + 0, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash */ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ + "Keyword objects", /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + Keyword_methods, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + (initproc)Keyword_init, /* tp_init */ + 0, /* tp_alloc */ + Keyword_new, /* tp_new */ +}; + +/* Module. */ diff --git a/src/main/libextractor_python3.c b/src/main/libextractor_python3.c @@ -0,0 +1,498 @@ +/* libextractor_python.c + --------------------- + + Implements the Python wrapper for libextractor. The wrapper builds on the + Python type module, which wraps a single module, over extractor, which + implements the extractor from modules, up to keyword(list), which implements + keyword handling. */ + +/* Includes. */ + +#include <Python.h> +#include "extractor.h" + +/* Typedefs. */ + +typedef struct { + PyObject_HEAD + PyObject *mlist; + int locks; +} ModuleList; + +typedef struct { + PyObject_HEAD + EXTRACTOR_ExtractorList *module; + ModuleList *mlist; +} Module; + +/* Type objects. */ + +static PyTypeObject ModuleListType; +static PyTypeObject ModuleType; + +/* Module list type. */ + +static inline int ModuleList_checkModule(Module *arg) +{ + if( !PyObject_IsInstance((PyObject*)arg,(PyObject*)&ModuleType) ) { + PyErr_SetString(PyExc_TypeError,"append only accepts a Module."); + return -1; + } + + if( arg->mlist ) { + PyErr_SetString(PyExc_TypeError,"cannot take ownership of module."); + return -1; + } + + return 0; +} + +static PyObject *ModuleList_prepend(ModuleList *self, Module *arg) +{ + PyObject *rv = NULL; + Module *first = NULL; + int mlistlen = 0; + + if( ModuleList_checkModule(arg) ) + goto error; + + mlistlen = PyList_GET_SIZE(self->mlist); + if( mlistlen ) { + first = (Module*)PyList_GET_ITEM(self->mlist,0); + arg->module->next = first->module; + } + + if( PyList_Insert(self->mlist,0,(PyObject*)arg) ) + goto error; + arg->mlist = self; + Py_INCREF(self); + + rv = (PyObject*)arg; + Py_INCREF(rv); + + goto finish; + + error: + Py_XDECREF(rv); + rv = NULL; + + finish: + return (PyObject*)rv; +} + +static PyObject *ModuleList_append(ModuleList *self, Module *arg) +{ + PyObject *rv = NULL; + Module *last = NULL; + int mlistlen = 0; + + if( ModuleList_checkModule(arg) ) + goto error; + + mlistlen = PyList_GET_SIZE(self->mlist); + if( mlistlen ) { + last = (Module*)PyList_GET_ITEM(self->mlist,mlistlen-1); + last->module->next = arg->module; + } + + if( PyList_Append(self->mlist,(PyObject*)arg) ) + goto error; + arg->mlist = self; + Py_INCREF(self); + + rv = (PyObject*)arg; + Py_INCREF(rv); + + goto finish; + + error: + Py_XDECREF(rv); + rv = NULL; + + finish: + return (PyObject*)rv; +} + +static PyObject *ModuleList_new(PyTypeObject *type, PyObject *args, + PyObject *kwargs) +{ + ModuleList *self = NULL; + + if( !( self = (ModuleList*)type->tp_alloc(type,0) ) ) + goto error; + self->locks = 0; + + if( !( self->mlist = PyList_New(0) ) ) + goto error; + + goto finish; + + error: + Py_XDECREF(self); + self = NULL; + + finish: + return (PyObject*)self; +} + +static int ModuleList_init(ModuleList *self, PyObject *args, PyObject *kwargs) +{ + PyObject *mod = NULL, *mod_iter = NULL, *mod_item = NULL; + EXTRACTOR_ExtractorList *elist = NULL, *ecur = NULL; + char *kwargs_list[] = {"modules",NULL}; + int rv = 0; + + if( !PyArg_ParseTupleAndKeywords(args,kwargs,"|O:__init__",kwargs_list, + &mod) ) + goto error; + + if( !mod || mod == Py_None || PyString_Check(mod) ) { + if( !mod || mod == Py_None ) + elist = EXTRACTOR_loadDefaultLibraries(); + else + elist = EXTRACTOR_loadConfigLibraries(NULL,PyString_AsString(mod)); + + ecur = elist; + while( ecur ) { + if( !( mod_item = (PyObject*)PyObject_GC_New(Module,&ModuleType) ) ) + goto error; + + elist = ecur; + ecur = elist->next; + elist->next = NULL; + + ((Module*)mod_item)->module = elist; + ((Module*)mod_item)->mlist = NULL; + + if( !ModuleList_append(self,(Module*)mod_item) ) + goto error; + Py_DECREF(mod_item); + mod_item = NULL; + } + } else if( PyObject_IsInstance(mod,(PyObject*)&ModuleType) ) { + if( !ModuleList_append(self,(Module*)mod) ) + goto error; + } else { + if( !( mod_iter = PyObject_GetIter(mod) ) ) + goto error; + + while( ( mod_item = PyIter_Next(mod_iter) ) ) { + if( !ModuleList_append(self,(Module*)mod_item) ) + goto error; + Py_DECREF(mod_item); + mod_item = NULL; + } + } + + goto finish; + + error: + EXTRACTOR_removeAll(ecur); + Py_XDECREF(mod_item); + rv = -1; + + finish: + Py_XDECREF(mod_iter); + return rv; +} + +static PyObject *ModuleList_repr(ModuleList *self) +{ + return PyString_FromFormat("<ModuleList: %i modules>", + PyList_GET_SIZE(self->mlist)); +} + +static int ModuleList_traverse(ModuleList *self, visitproc visit, void *arg) +{ + Py_VISIT(self->mlist); + return 0; +} + +static int ModuleList_clear(ModuleList *self) +{ + Py_CLEAR(self->mlist); + return 0; +} + +static void ModuleList_dealloc(ModuleList *self) +{ + ModuleList_clear(self); + self->ob_type->tp_free((PyObject*)self); +} + +static PyMethodDef ModuleList_methods[] = { + {"prepend",(PyCFunction)ModuleList_prepend,METH_O, + "Prepend a single module to the structure."}, + {"append",(PyCFunction)ModuleList_append,METH_O, + "Append a single module to the structure."}, + {NULL} /* Sentinel */ +}; + +static PyTypeObject ModuleListType = { + PyObject_HEAD_INIT(NULL) + 0, /*ob_size*/ + "extractor.ModuleList", /*tp_name*/ + sizeof(ModuleList), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + (destructor)ModuleList_dealloc, /*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_compare*/ + (reprfunc)ModuleList_repr, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash */ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /*tp_flags*/ + "ModuleList objects", /* tp_doc */ + (traverseproc)ModuleList_traverse, /* tp_traverse */ + (inquiry)ModuleList_clear, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + ModuleList_methods, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + (initproc)ModuleList_init, /* tp_init */ + 0, /* tp_alloc */ + ModuleList_new, /* tp_new */ +}; + +/* Module type. */ + +static EXTRACTOR_KeywordList *Module_extractMethod(const char *filename, + char *data, size_t filesize, + EXTRACTOR_KeywordList *next, + const char *options) +{ + Module *self = NULL; + + self = (Module*)atoi(options); /* convert back from string repr of self. */ + + printf("In the extractor with object %i.",(int)self); + return next; +} + +static PyObject *Module_new(PyTypeObject *type, PyObject *args, + PyObject *kwargs) +{ + Module *self = NULL; + char *name = NULL, *options = NULL; + char *kwargs_list[] = {"name","options",NULL}; + int namelen = 0, i; + + if( !PyArg_ParseTupleAndKeywords(args,kwargs,"s#|z:__new__",kwargs_list, + &name,&namelen,&options) ) + goto error; + + i = 0; + while( name[i] ) + if( name[i++] == '(' ) { + PyErr_SetString(PyExc_ValueError,"name may not contain (."); + goto error; + } + + if( !( self = (Module*)type->tp_alloc(type,0) ) ) + goto error; + + /* Somewhat a HACK, creates a module structure from scratch. */ + self->module = malloc(sizeof(EXTRACTOR_ExtractorList)); + self->module->libraryHandle = NULL; + self->module->extractMethod = (ExtractMethod)&Module_extractMethod; + self->module->libname = strdup(name); + self->module->options = malloc(12); /* store self as string in options. */ + sprintf(self->module->options,"%i",(int)self); + self->module->next = NULL; + + goto finish; + + error: + Py_XDECREF(self); + self = NULL; + + finish: + return (PyObject*)self; +} + +static int Module_init(Module *self, PyObject *args, PyObject *kwargs) +{ + char *name = NULL, *options = NULL, *optstring = NULL; + char *kwargs_list[] = {"name","options",NULL}; + int namelen = 0, optionslen = 0, i, rv = 0; + + if( !PyArg_ParseTupleAndKeywords(args,kwargs,"s#|z#:__init__",kwargs_list, + &name,&namelen,&options,&optionslen) ) + goto error; + + i = 0; + while( options && options[i] ) + if( options[i++] == ')' ) { + PyErr_SetString(PyExc_ValueError,"option may not contain )."); + goto error; + } + + EXTRACTOR_removeAll(self->module); /* slight crutch, was allocated in */ + self->module = NULL; /* __new__, so that programmer can create subtype. */ + + optstring = malloc(namelen+optionslen+3); + if( options ) + sprintf(optstring,"%s(%s)",name,options); + else + sprintf(optstring,"%s",name); + if( !( self->module = EXTRACTOR_loadConfigLibraries(NULL,optstring) ) ) { + PyErr_SetString(PyExc_ValueError,"could not load module."); + goto error; + } + + goto finish; + + error: + rv = -1; + + finish: + if( optstring ) + free(optstring); + return rv; +} + +static PyObject *Module_getattr(Module *self, char *name) +{ + if( !strcmp(name,"libname") ) + return PyString_FromString(self->module->libname); + else if( !strcmp(name,"options") ) + return PyString_FromString(self->module->options); + else if( !strcmp(name,"mlist") ) + return (PyObject*)self->mlist; + PyErr_SetString(PyExc_AttributeError,name); + return NULL; +} + +static int Module_setattr(Module *self, char *name, PyObject *value) +{ + if( !strcmp(name,"libname") || !strcmp(name,"options") || + !strcmp(name,"mlist") ) + PyErr_Format(PyExc_AttributeError,"cannot set %s.",name); + else + PyErr_SetString(PyExc_AttributeError,name); + return -1; +} + +static PyObject *Module_repr(Module *self) +{ + if( self->module->options ) + return PyString_FromFormat("%s(\"%s\",\"%s\")",self->ob_type->tp_name, + self->module->libname,self->module->options); + else + return PyString_FromFormat("%s(\"%s\")",self->ob_type->tp_name, + self->module->libname); +} + +static long Module_hash(Module *self) +{ + return (int)self->module; +} + +static int Module_traverse(Module *self, visitproc visit, void *arg) +{ + Py_VISIT((PyObject*)self->mlist); + return 0; +} + +static int Module_clear(Module *self) +{ + printf("Removing module in clear: %s.\n",self->module->libname); + Py_CLEAR(self->mlist); + return 0; +} + +static void Module_dealloc(Module *self) +{ + Module_clear(self); + printf("Removing module: %s.\n",self->module->libname); + self->module->next = NULL; + EXTRACTOR_removeAll(self->module); + self->ob_type->tp_free((PyObject*)self); +} + +static PyMethodDef Module_methods[] = { + {NULL} /* Sentinel */ +}; + +static PyTypeObject ModuleType = { + PyObject_HEAD_INIT(NULL) + 0, /*ob_size*/ + "extractor.Module", /*tp_name*/ + sizeof(Module), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + (destructor)Module_dealloc, /*tp_dealloc*/ + 0, /*tp_print*/ + (getattrfunc)Module_getattr, /*tp_getattr*/ + (setattrfunc)Module_setattr, /*tp_setattr*/ + 0, /*tp_compare*/ + (reprfunc)Module_repr, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + (hashfunc)Module_hash, /*tp_hash */ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, /*tp_flags*/ + "Module objects", /* tp_doc */ + (traverseproc)Module_traverse, /* tp_traverse */ + (inquiry)Module_clear, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + Module_methods, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + (initproc)Module_init, /* tp_init */ + 0, /* tp_alloc */ + Module_new, /* tp_new */ +}; + +/* Module level. */ + +static PyMethodDef Extractor_Module_methods[] = { + {NULL} /* Sentinel */ +}; + +PyMODINIT_FUNC initextractor() +{ + PyObject *m; + + if( PyType_Ready(&ModuleListType) ) + return; + if( PyType_Ready(&ModuleType) ) + return; + + m = Py_InitModule3("extractor",Extractor_Module_methods,"Extractor module."); + if (m == NULL) + return; + + Py_INCREF(&ModuleListType); + Py_INCREF(&ModuleType); + PyModule_AddObject(m,"ModuleList",(PyObject*)&ModuleListType); + PyModule_AddObject(m,"Module",(PyObject*)&ModuleType); +} diff --git a/src/main/libextractor_python_setup.py b/src/main/libextractor_python_setup.py @@ -1,13 +1,13 @@ from distutils.core import Extension, setup -cmod = Extension("_extractor",["libextractor_python.c"], +cmod = Extension("extractor",["libextractor_python3.c"], libraries=["extractor"], - include_dirs=["../include"]) + include_dirs=["../include"], + library_dirs=["/home/heiko/usr/lib"]) setup(name="Extractor", version="0.5.0", ext_modules=[cmod], - py_modules=["Extractor"], author="Christian Grothoff, Heiko Wundram", author_email="libextractor@gnu.org")