libextractor-php

GNU libextractor
Log | Files | Refs | README

commit 7527afd686533968c3504361a00b0508c731e50b
parent 764f6d675c085bb87cf8178db65af67b5be3cd03
Author: Christian Grothoff <christian@grothoff.org>
Date:   Mon,  4 Jul 2005 14:37:35 +0000

php

Diffstat:
ACREDITS | 4++++
AEXPERIMENTAL | 5+++++
AMakefile | 120+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
AREADME | 8++++++++
Aconfig.m4 | 42++++++++++++++++++++++++++++++++++++++++++
Aexamples/extractor_getkeywords.php | 12++++++++++++
Aextractor.c | 189+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Apackage.xml | 31+++++++++++++++++++++++++++++++
Aphp_extractor.h | 81+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
9 files changed, 492 insertions(+), 0 deletions(-)

diff --git a/CREDITS b/CREDITS @@ -0,0 +1,3 @@ +extractor +Manfred Weber +Johann Peter Hartmann +\ No newline at end of file diff --git a/EXPERIMENTAL b/EXPERIMENTAL @@ -0,0 +1,5 @@ +this extension is experimental, +its functions may change their names +or move to extension all together +so do not rely to much on them +you have been warned! diff --git a/Makefile b/Makefile @@ -0,0 +1,120 @@ +srcdir = /tmp/extractor +builddir = /tmp/extractor +top_srcdir = /tmp/extractor +top_builddir = /tmp/extractor +SHLIB_SUFFIX_NAME = so +EXTRACTOR_SHARED_LIBADD = -Wl,-rpath,/lib -L/lib -lextractor +shared_objects_extractor = extractor.lo +PHP_MODULES = $(phplibdir)/extractor.la +all_targets = $(PHP_MODULES) +install_targets = install-modules +prefix = /usr/local/php5 +exec_prefix = $(prefix) +libdir = ${exec_prefix}/lib +prefix = /usr/local/php5 +phplibdir = /tmp/extractor/modules +PHP_COMPILE = +CC = gcc +CFLAGS = -g -O2 +CFLAGS_CLEAN = $(CFLAGS) +CPP = gcc -E +CPPFLAGS = -DHAVE_CONFIG_H +CXX = g++ +DEFS = +EXTENSION_DIR = /usr/local/php5/lib/php/extensions/no-debug-non-zts-20040412 +EXTRA_LDFLAGS = +EXTRA_LIBS = +INCLUDES = -I/usr/local/php5/include/php -I/usr/local/php5/include/php/main -I/usr/local/php5/include/php/Zend -I/usr/local/php5/include/php/TSRM -I/usr/local/include +LEX = +LEX_OUTPUT_ROOT = +LFLAGS = +LDFLAGS = +SHARED_LIBTOOL = +LIBTOOL = $(SHELL) $(top_builddir)/libtool +SHELL = /bin/sh +AWK = gawk +RE2C = exit 0; +mkinstalldirs = $(top_srcdir)/build/shtool mkdir -p +INSTALL = $(top_srcdir)/build/shtool install -c +INSTALL_DATA = $(INSTALL) -m 644 + +DEFS = -DPHP_ATOM_INC -I$(top_builddir)/include -I$(top_builddir)/main -I$(top_srcdir) +COMMON_FLAGS = $(DEFS) $(INCLUDES) $(EXTRA_INCLUDES) $(CPPFLAGS) $(PHP_FRAMEWORKPATH) + + +all: $(all_targets) + @echo + @echo "Build complete." + @echo "(It is safe to ignore warnings about tempnam and tmpnam)." + @echo + +build-modules: $(PHP_MODULES) + +libphp5.la: $(PHP_GLOBAL_OBJS) $(PHP_SAPI_OBJS) + $(LIBTOOL) --mode=link $(CC) $(CFLAGS) $(EXTRA_CFLAGS) -rpath $(phptempdir) $(EXTRA_LDFLAGS) $(LDFLAGS) $(PHP_RPATHS) $(PHP_GLOBAL_OBJS) $(PHP_SAPI_OBJS) $(EXTRA_LIBS) $(ZEND_EXTRA_LIBS) -o $@ + -@$(LIBTOOL) --silent --mode=install cp libphp5.la $(phptempdir)/libphp5.la >/dev/null 2>&1 + +libs/libphp5.bundle: $(PHP_GLOBAL_OBJS) $(PHP_SAPI_OBJS) + $(CC) $(MH_BUNDLE_FLAGS) $(CFLAGS_CLEAN) $(EXTRA_CFLAGS) $(LDFLAGS) $(EXTRA_LDFLAGS) $(PHP_GLOBAL_OBJS:.lo=.o) $(PHP_SAPI_OBJS:.lo=.o) $(PHP_FRAMEWORKS) $(EXTRA_LIBS) $(ZEND_EXTRA_LIBS) -o $@ && cp $@ libs/libphp5.so + +install: $(all_targets) $(install_targets) + +install-sapi: $(OVERALL_TARGET) + @echo "Installing PHP SAPI module: $(PHP_SAPI)" + -@$(mkinstalldirs) $(INSTALL_ROOT)$(bindir) + -@if test ! -r $(phptempdir)/libphp5.$(SHLIB_SUFFIX_NAME); then \ + for i in 0.0.0 0.0 0; do \ + if test -r $(phptempdir)/libphp5.$(SHLIB_SUFFIX_NAME).$$i; then \ + $(LN_S) $(phptempdir)/libphp5.$(SHLIB_SUFFIX_NAME).$$i $(phptempdir)/libphp5.$(SHLIB_SUFFIX_NAME); \ + break; \ + fi; \ + done; \ + fi + @$(INSTALL_IT) + +install-modules: build-modules + @test -d modules && \ + $(mkinstalldirs) $(INSTALL_ROOT)$(EXTENSION_DIR) + @echo "Installing shared extensions: $(INSTALL_ROOT)$(EXTENSION_DIR)/" + @rm -f modules/*.la >/dev/null 2>&1 + @$(INSTALL) modules/* $(INSTALL_ROOT)$(EXTENSION_DIR) + +install-tester: + @echo "Installing regression tester: $(INSTALL_ROOT)$(PEAR_INSTALLDIR)/" + @$(mkinstalldirs) $(INSTALL_ROOT)$(PEAR_INSTALLDIR) + @$(INSTALL) -m 755 $(top_srcdir)/run-tests.php $(INSTALL_ROOT)$(PEAR_INSTALLDIR) + +install-su: install-pear install-tester + +test: + -@if test -x $(SAPI_CLI_PATH) && test ! -z $(SAPI_CLI_PATH); then \ + TEST_PHP_EXECUTABLE=$(top_builddir)/$(SAPI_CLI_PATH) \ + TEST_PHP_SRCDIR=$(top_srcdir) \ + CC="$(CC)" \ + $(top_builddir)/$(SAPI_CLI_PATH) -d 'open_basedir=' -d 'safe_mode=0' -d 'output_buffering=0' $(top_srcdir)/run-tests.php $(TESTS); \ + else \ + echo "ERROR: Cannot run tests without CLI sapi."; \ + fi + +clean: + find . -name \*.lo -o -name \*.o | xargs rm -f + find . -name \*.la -o -name \*.a | xargs rm -f + find . -name \*.so | xargs rm -f + find . -name .libs -a -type d|xargs rm -rf + rm -f libphp5.la $(SAPI_CLI_PATH) $(OVERALL_TARGET) modules/* libs/* + +distclean: clean + rm -f config.cache config.log config.status Makefile.objects Makefile.fragments libtool main/php_config.h stamp-h php5.spec sapi/apache/libphp5.module buildmk.stamp + egrep define'.*include/php' configure|sed 's/.*>//'|xargs rm -f + find . -name Makefile | xargs rm -f + +.PHONY: all clean install distclean test +.NOEXPORT: +extractor.lo: /tmp/extractor/extractor.c + $(LIBTOOL) --mode=compile $(CC) -I. -I/tmp/extractor $(COMMON_FLAGS) $(CFLAGS_CLEAN) $(EXTRA_CFLAGS) -prefer-pic -c /tmp/extractor/extractor.c -o extractor.lo +$(phplibdir)/extractor.la: ./extractor.la + $(LIBTOOL) --mode=install cp ./extractor.la $(phplibdir) + +./extractor.la: $(shared_objects_extractor) $(EXTRACTOR_SHARED_DEPENDENCIES) + $(LIBTOOL) --mode=link $(CC) $(COMMON_FLAGS) $(CFLAGS_CLEAN) $(EXTRA_CFLAGS) $(LDFLAGS) -o $@ -export-dynamic -avoid-version -prefer-pic -module -rpath $(phplibdir) $(EXTRA_LDFLAGS) $(shared_objects_extractor) $(EXTRACTOR_SHARED_LIBADD) + diff --git a/README b/README @@ -0,0 +1,8 @@ +I found this at: + +http://blog.thinkphp.de/archives/12-My-first-PHP-Extension..html + +Note that there maybe a licensing issue here. + +libextractor is released under the GNU GPL, which according to the FSF (IANAL, but they have some) is incompatible with the PHP License. Now, before you even ask if I could make an exception for you guys, I must tell you that I cannot, simply because libextractor uses plenty of GPL'ed code that I did not write -- and the authors of some of that code have already declined to LGPL it. +So while I personally will certainly not go after any _free_ software using my code, you're on rather shaky ground and other people (namely the author of xpdf) could theoretically (see also: FUD) give you trouble here. You might want to put your code under the GPL to avoid this -- and of course you might want to consider to lobby within the PHP community to make their licenses GPL-compatible (again), for example by putting the code under two licenses... :-). diff --git a/config.m4 b/config.m4 @@ -0,0 +1,42 @@ +dnl $Id: config.m4,v 1.1 2004/12/23 06:20:22 manfred Exp $ +dnl config.m4 for extension extractor + +PHP_ARG_WITH(extractor, for extractor support, +[ --with-extractor Include extractor support]) + +if test "$PHP_EXTRACTOR" != "no"; then + + SEARCH_PATH="/usr/local /usr" # you might want to change this + SEARCH_FOR="/include/extractor.h" # you most likely want to change this + if test -r $PHP_EXTRACTOR/$SEARCH_FOR; then # path given as parameter + EXTRACTOR_DIR=$PHP_EXTRACTOR + else # search default path list + AC_MSG_CHECKING([for extractor files in default path]) + for i in $SEARCH_PATH ; do + if test -r $i/$SEARCH_FOR; then + EXTRACTOR_DIR=$i + AC_MSG_RESULT(found in $i) + fi + done + fi + + if test -z "$EXTRACTOR_DIR"; then + AC_MSG_RESULT([not found]) + AC_MSG_ERROR([Please reinstall the extractor distribution]) + fi + + PHP_ADD_INCLUDE($EXTRACTOR_DIR/include) + + PHP_ADD_LIBRARY_WITH_PATH(extractor, $PHP_EXTRACTOR_DIR/lib, EXTRACTOR_SHARED_LIBADD) + + AC_CHECK_LIB(extractor, EXTRACTOR_loadDefaultLibraries, + [ + AC_DEFINE(HAVE_EXTRACTORLIB,1,[ ]) + ], [ + AC_MSG_ERROR(extractor library not found or wrong version) + ],) + + PHP_SUBST(EXTRACTOR_SHARED_LIBADD) + + PHP_NEW_EXTENSION(extractor, extractor.c, $ext_shared) +fi diff --git a/examples/extractor_getkeywords.php b/examples/extractor_getkeywords.php @@ -0,0 +1,12 @@ +#!/usr/local/php5/bin/php + +<?php + +ini_set('display_errors', 1); +error_reporting(E_ALL); + +print_r(extractor_getkeywords("/usr/local/apache2/htdocs/index.html.de")); +print_r(extractor_getkeywords("/usr/local/apache2/htdocs/apache_pb.gif")); + +?> + diff --git a/extractor.c b/extractor.c @@ -0,0 +1,189 @@ +/* + +----------------------------------------------------------------------+ + | PHP Version 5 | + +----------------------------------------------------------------------+ + | Copyright (c) 1997-2004 The PHP Group | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.0 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | http://www.php.net/license/3_0.txt. | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ + | Author: Manfred Weber | + +----------------------------------------------------------------------+ +*/ + +/* $Id: extractor.c,v 1.1 2004/12/23 06:20:22 manfred Exp $ */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "php.h" +#include "php_ini.h" +#include "ext/standard/info.h" +#include "php_extractor.h" +#include <extractor.h> + +/* If you declare any globals in php_extractor.h uncomment this: +ZEND_DECLARE_MODULE_GLOBALS(extractor) +*/ + +/* True global resources - no need for thread safety here */ +static int le_extractor; + +/* {{{ extractor_functions[] + * + * Every user visible function must have an entry in extractor_functions[]. + */ +function_entry extractor_functions[] = { + PHP_FE(extractor_getkeywords, NULL) + {NULL, NULL, NULL} /* Must be the last line in extractor_functions[] */ +}; +/* }}} */ + +/* {{{ extractor_module_entry + */ +zend_module_entry extractor_module_entry = { +#if ZEND_MODULE_API_NO >= 20010901 + STANDARD_MODULE_HEADER, +#endif + "extractor", + extractor_functions, + PHP_MINIT(extractor), + PHP_MSHUTDOWN(extractor), + PHP_RINIT(extractor), /* Replace with NULL if there's nothing to do at request start */ + PHP_RSHUTDOWN(extractor), /* Replace with NULL if there's nothing to do at request end */ + PHP_MINFO(extractor), +#if ZEND_MODULE_API_NO >= 20010901 + "0.1", /* Replace with version number for your extension */ +#endif + STANDARD_MODULE_PROPERTIES +}; +/* }}} */ + +#ifdef COMPILE_DL_EXTRACTOR +ZEND_GET_MODULE(extractor) +#endif + +/* {{{ PHP_INI + */ +/* Remove comments and fill if you need to have entries in php.ini +PHP_INI_BEGIN() + STD_PHP_INI_ENTRY("extractor.global_value", "42", PHP_INI_ALL, OnUpdateLong, global_value, zend_extractor_globals, extractor_globals) + STD_PHP_INI_ENTRY("extractor.global_string", "foobar", PHP_INI_ALL, OnUpdateString, global_string, zend_extractor_globals, extractor_globals) +PHP_INI_END() +*/ +/* }}} */ + +/* {{{ php_extractor_init_globals + */ +/* Uncomment this function if you have INI entries +static void php_extractor_init_globals(zend_extractor_globals *extractor_globals) +{ + extractor_globals->global_value = 0; + extractor_globals->global_string = NULL; +} +*/ +/* }}} */ + +/* {{{ PHP_MINIT_FUNCTION + */ +PHP_MINIT_FUNCTION(extractor) +{ + /* If you have INI entries, uncomment these lines + ZEND_INIT_MODULE_GLOBALS(extractor, php_extractor_init_globals, NULL); + REGISTER_INI_ENTRIES(); + */ + return SUCCESS; +} +/* }}} */ + +/* {{{ PHP_MSHUTDOWN_FUNCTION + */ +PHP_MSHUTDOWN_FUNCTION(extractor) +{ + /* uncomment this line if you have INI entries + UNREGISTER_INI_ENTRIES(); + */ + return SUCCESS; +} +/* }}} */ + +/* Remove if there's nothing to do at request start */ +/* {{{ PHP_RINIT_FUNCTION + */ +PHP_RINIT_FUNCTION(extractor) +{ + return SUCCESS; +} +/* }}} */ + +/* Remove if there's nothing to do at request end */ +/* {{{ PHP_RSHUTDOWN_FUNCTION + */ +PHP_RSHUTDOWN_FUNCTION(extractor) +{ + return SUCCESS; +} +/* }}} */ + +/* {{{ PHP_MINFO_FUNCTION + */ +PHP_MINFO_FUNCTION(extractor) +{ + php_info_print_table_start(); + php_info_print_table_header(2, "extractor support", "enabled"); + php_info_print_table_end(); + + /* Remove comments if you have entries in php.ini + DISPLAY_INI_ENTRIES(); + */ +} +/* }}} */ + +/* The previous line is meant for vim and emacs, so it can correctly fold and + unfold functions in source code. See the corresponding marks just before + function definition, where the functions purpose is also documented. Please + follow this convention for the convenience of others editing your code. +*/ + +/* {{{ proto array extractor_getkeywords(string filename) + returns keywords */ +PHP_FUNCTION(extractor_getkeywords) +{ + char *filename = NULL; + int argc = ZEND_NUM_ARGS(); + int filename_len; + EXTRACTOR_KeywordList *keywords; + EXTRACTOR_ExtractorList *extractors; + + if (zend_parse_parameters(argc TSRMLS_CC, "s", &filename, &filename_len) == FAILURE) + return; + + extractors = EXTRACTOR_loadDefaultLibraries (); + keywords = EXTRACTOR_getKeywords (extractors, filename); + array_init(return_value); + while (keywords != NULL) + { + add_next_index_string(return_value,keywords->keyword,1); + keywords = keywords->next; + } + EXTRACTOR_freeKeywords (keywords); + EXTRACTOR_removeAll (extractors); + return; +} +/* }}} */ + + +/* + * Local variables: + * tab-width: 4 + * c-basic-offset: 4 + * End: + * vim600: noet sw=4 ts=4 fdm=marker + * vim<600: noet sw=4 ts=4 + */ diff --git a/package.xml b/package.xml @@ -0,0 +1,31 @@ +<?xml version="1.0" encoding="ISO-8859-1" ?> +<!DOCTYPE package SYSTEM "http://pear.php.net/dtd/package-1.0"> +<package version="1.0"> + <name>extractor</name> + <summary>extract meta-date from files</summary> + <description>A PHP extension that interfaces the libextractor library. libextractor is a library used to extract meta-data from files of arbitrary type.</description> + <maintainers> + <maintainer> + <user>manfred</user> + <name>Manfred Weber</name> + <email>weber@mayflower.de</email> + <role>lead</role> + </maintainer> + </maintainers> + <release> + <version>0.1</version> + <date>2004-12-22</date> + <license>PHP License</license> + <state>beta</state> + <notes>- Added Function: extractor_getkeywords( string filename )</notes> + <filelist> + <file role="src" md5sum="7291f1b05a4608bc9223993fd2bea6c2" name="EXPERIMENTAL"/> + <file role="src" md5sum="4bd173b649dfe0931845e873361ceeb6" name="CREDITS"/> + <file role="src" md5sum="bedeb14e023df654cfacdc393b9c5024" name="Makefile"/> + <file role="src" md5sum="35dab6504c38bcec058e889362976ed1" name="config.m4"/> + <file role="src" md5sum="bf4db7a65c5f4c0801fcb73c80577c76" name="extractor.c"/> + <file role="src" md5sum="ae3ef44acaeae71d3b48b07164a5e124" name="php_extractor.h"/> + <file role="doc" baseinstalldir="/" md5sum="d50e0fe59205a0c834e02ec2d1748638" name="examples/extractor_getkeywords.php"/> + </filelist> + </release> +</package> diff --git a/php_extractor.h b/php_extractor.h @@ -0,0 +1,81 @@ +/* + +----------------------------------------------------------------------+ + | PHP Version 5 | + +----------------------------------------------------------------------+ + | Copyright (c) 1997-2004 The PHP Group | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.0 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | http://www.php.net/license/3_0.txt. | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ + | Author: | + +----------------------------------------------------------------------+ +*/ + +/* $Id: php_extractor.h,v 1.1 2004/12/23 06:20:22 manfred Exp $ */ + +#ifndef PHP_EXTRACTOR_H +#define PHP_EXTRACTOR_H + +extern zend_module_entry extractor_module_entry; +#define phpext_extractor_ptr &extractor_module_entry + +#ifdef PHP_WIN32 +#define PHP_EXTRACTOR_API __declspec(dllexport) +#else +#define PHP_EXTRACTOR_API +#endif + +#ifdef ZTS +#include "TSRM.h" +#endif + +PHP_MINIT_FUNCTION(extractor); +PHP_MSHUTDOWN_FUNCTION(extractor); +PHP_RINIT_FUNCTION(extractor); +PHP_RSHUTDOWN_FUNCTION(extractor); +PHP_MINFO_FUNCTION(extractor); + +PHP_FUNCTION(extractor_getkeywords); + +/* + Declare any global variables you may need between the BEGIN + and END macros here: + +ZEND_BEGIN_MODULE_GLOBALS(extractor) + long global_value; + char *global_string; +ZEND_END_MODULE_GLOBALS(extractor) +*/ + +/* In every utility function you add that needs to use variables + in php_extractor_globals, call TSRMLS_FETCH(); after declaring other + variables used by that function, or better yet, pass in TSRMLS_CC + after the last function argument and declare your utility function + with TSRMLS_DC after the last declared argument. Always refer to + the globals in your function as EXTRACTOR_G(variable). You are + encouraged to rename these macros something shorter, see + examples in any other php module directory. +*/ + +#ifdef ZTS +#define EXTRACTOR_G(v) TSRMG(extractor_globals_id, zend_extractor_globals *, v) +#else +#define EXTRACTOR_G(v) (extractor_globals.v) +#endif + +#endif /* PHP_EXTRACTOR_H */ + + +/* + * Local variables: + * tab-width: 4 + * c-basic-offset: 4 + * End: + * vim600: noet sw=4 ts=4 fdm=marker + * vim<600: noet sw=4 ts=4 + */