libextractor-java

GNU libextractor
Log | Files | Refs | README | LICENSE

Extractor.java (6165B)


      1 /*
      2      This file is part of libextractor.
      3      Copyright (C) 2002, 2003, 2004, 2007, 2010, 2012 Vidyut Samanta and Christian Grothoff
      4 
      5      libextractor is free software; you can redistribute it and/or modify
      6      it under the terms of the GNU General Public License as published
      7      by the Free Software Foundation; either version 3, or (at your
      8      option) any later version.
      9 
     10      libextractor is distributed in the hope that it will be useful, but
     11      WITHOUT ANY WARRANTY; without even the implied warranty of
     12      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     13      General Public License for more details.
     14 
     15      You should have received a copy of the GNU General Public License
     16      along with libextractor; see the file COPYING.  If not, write to the
     17      Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     18      Boston, MA 02110-1301, USA.
     19  */
     20 package org.gnu.libextractor;
     21 
     22 import java.util.ArrayList;
     23 import java.io.File;
     24 import java.io.FileInputStream;
     25 
     26 /**
     27  * Java Binding for libextractor.  Each Extractor instance
     28  * represents a set of meta data extraction plugins.
     29  *
     30  * @see Xtract
     31  * @see MetaData
     32  * @author Christian Grothoff
     33  */ 
     34 public final class Extractor {
     35 	
     36 
     37     private static final boolean warn_;
     38 
     39     /**
     40      * LE version.  0 if LE was compiled without JNI/Java support, in which
     41      * case we better not call any native methods...
     42      */
     43     private static final int version_;
     44 
     45     static {	
     46 	// first, initialize warn_
     47 	boolean warn = false;
     48 	try {
     49 	    if (System.getProperty("libextractor.warn") != null)
     50 		warn = true;
     51 	} catch (SecurityException se) {
     52 	    // ignore
     53 	} finally {
     54 	    warn_ = true; // warn;
     55 	}
     56 
     57 	// next, load library and determine version_
     58 	int ver = 0;
     59 	try {
     60 	    System.loadLibrary("extractor_java");
     61 	} catch (UnsatisfiedLinkError ule) {
     62 	    ver = -1;
     63 	    warn("Did not find libextractor_java library: " + ule);
     64 	}
     65 	if (ver == 0) {
     66 	    try {
     67 		ver = getVersionInternal();
     68 	    } catch (UnsatisfiedLinkError ule) {
     69 		// warn: libextractor compiled without Java support
     70 		warn("libextractor library compiled without Java support: " + ule);
     71 	    }
     72 	}
     73 	version_ = ver;
     74     }    
     75 
     76 
     77     private static void warn(String warning) {
     78 	if (warn_)
     79 	    System.err.println("WARNING: " + warning);
     80     }
     81 
     82 
     83     /**
     84      * @return -1 if LE library was not found, 0 if LE library
     85      *  was found but compiled without JNI support, otherwise
     86      *  the LE version number
     87      */
     88     public static int getVersion() {
     89 	return version_;
     90     }
     91 
     92 
     93     /**
     94      * Get the 'default' extractor, that is an extractor that loads
     95      * the default set of extractor plugins.
     96      */
     97     public static Extractor getDefault() {
     98 	if (version_ > 0)
     99 	    return new Extractor(loadDefaultInternal());
    100 	return new Extractor(0);
    101     }
    102 
    103 
    104     /**
    105      * Get the 'empty' extractor, that is an extractor that does not
    106      * have any plugins loaded.  This is useful to manually construct
    107      * an Extractor from scratch.
    108      */
    109     public static Extractor getEmpty() {
    110 	return new Extractor(0L);
    111     }
    112 
    113 
    114     /**
    115      * Handle to the list of plugins (a C pointer, long to support
    116      * 64-bit architectures!).
    117      */
    118     private long pluginHandle_;
    119 
    120 
    121     /**
    122      * Creates an extractor.
    123      *
    124      * @param pluginHandle the internal handle (C pointer!) refering
    125      *   to the list of plugins.  0 means no plugins.
    126      */
    127     private Extractor(long pluginHandle) {
    128 	pluginHandle_ = pluginHandle;
    129     }
    130 
    131 
    132     /**
    133      * Unloads all loaded plugins on "exit".
    134      */
    135     protected void finalize() {
    136 	if (pluginHandle_ != 0)
    137 	    unloadAllInternal(pluginHandle_);
    138     }
    139 
    140 
    141     /**
    142      * Remove a plugin from the list of plugins.
    143      *
    144      * @param pluginName name of the plugin to unload
    145      */
    146     public void unloadPlugin(String pluginName) {
    147 	if (pluginHandle_ != 0) 
    148 	    pluginHandle_ = unloadPluginInternal(pluginHandle_,
    149 						 pluginName);	
    150     }
    151 
    152 
    153     /**
    154      * Add an additional plugin to the list of plugins
    155      * used.
    156      *
    157      * @param pluginName name of the plugin to load
    158      */
    159     public void loadPlugin(String pluginName) {
    160 	if (version_ <= 0)
    161 	    return; 
    162 	pluginHandle_ = loadPluginInternal(pluginHandle_,
    163 					   pluginName);
    164     }
    165 
    166 
    167     /**
    168      * Extract keywords (meta-data) from the given file.
    169      *
    170      * @param f the file to extract meta-data from
    171      * @return extracted meta data (ArrayList<MetaData>)
    172      */
    173     public ArrayList extract(File f) {
    174 	return extract(f.getAbsolutePath());
    175     }
    176 
    177 
    178     /**
    179      * Extract keywords (meta-data) from the given file.
    180      *
    181      * @param file the name of the file
    182      * @return extracted meta data (ArrayList<MetaData>)
    183      */
    184     public ArrayList extract(String filename) {
    185 	ArrayList ret = new ArrayList(0);
    186 	if (pluginHandle_ == 0)
    187 	    return ret; // fast way out
    188 	extractInternal(pluginHandle_,
    189 			filename,
    190 			null,
    191 			ret);
    192 	return ret;
    193     }
    194 
    195     
    196     /**
    197      * Extract keywords (meta-data) from the given block
    198      * of data.
    199      *
    200      * @param data the file data
    201      * @return extracted meta data (ArrayList<MetaData>)
    202      */
    203     public ArrayList extract(byte[] data) {
    204 	ArrayList ret = new ArrayList(0);
    205 	if (pluginHandle_ == 0)
    206 	    return ret; // fast way out
    207 	extractInternal(pluginHandle_,
    208 			null,
    209 			data,
    210 			ret);	
    211 	return ret;
    212     }
    213 
    214     
    215     /* ********************* native calls ******************** */
    216 
    217     private static native long unloadPluginInternal(long handle,
    218 						    String pluginName);
    219     
    220     private static native long loadPluginInternal(long handle,
    221 						  String pluginName);
    222 
    223     private static native long loadDefaultInternal();
    224 
    225     private static native void unloadAllInternal(long handle);
    226     
    227     private static native void extractInternal(long handle,
    228 					       String filename,
    229 					       byte[] data,
    230 					       ArrayList result);
    231 
    232     private static native int getVersionInternal();
    233 
    234     /**
    235      * Not private since we use this from "MetaData".
    236      */
    237     static native String getTypeAsStringInternal(int type);
    238 
    239     /**
    240      * Not private since we use this from "MetaData".
    241      */
    242     static native int getMaxTypeInternal();
    243 
    244 } // end of Extractor