libextractor-python

GNU libextractor
Log | Files | Refs | README | LICENSE

__main__.py (2326B)


      1 """
      2 extract.py
      3 
      4      This file is part of libextractor.
      5      (C) 2002, 2003, 2004, 2005 Vidyut Samanta and Christian Grothoff
      6      (C) 2017, 2018 Nikita Gillmann <nikita@n0.is>
      7 
      8      libextractor is free software; you can redistribute it and/or modify
      9      it under the terms of the GNU General Public License as published
     10      by the Free Software Foundation; either version 3, or (at your
     11      option) any later version.
     12 
     13      libextractor is distributed in the hope that it will be useful, but
     14      WITHOUT ANY WARRANTY; without even the implied warranty of
     15      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     16      General Public License for more details.
     17 
     18      You should have received a copy of the GNU General Public License
     19      along with libextractor; see the file COPYING.  If not, write to the
     20      Free Software Foundation, Inc., 59 Temple Place - Suite 330,
     21      Boston, MA 02111-1307, USA.
     22 
     23 Little demo how to use the libextractor Python binding.
     24 
     25 """
     26 from __future__ import print_function
     27 from libextractor import extractor
     28 import sys
     29 from ctypes import *
     30 import struct
     31 import logging
     32 import faulthandler
     33 
     34 faulthandler.enable()
     35 
     36 xtract = extractor.Extractor()
     37 
     38 logger = logging.getLogger()
     39 logger.setLevel(logging.DEBUG)
     40 
     41 formatter = logging.Formatter('%(asctime)s %(levelname)s - %(message)s')
     42 
     43 fh = logging.FileHandler('log.txt')
     44 fh.setLevel(logging.DEBUG)
     45 fh.setFormatter(formatter)
     46 logger.addHandler(fh)
     47 
     48 ch = logging.StreamHandler()
     49 ch.setLevel(logging.DEBUG)
     50 ch.setFormatter(formatter)
     51 logger.addHandler(ch)
     52 
     53 def print_k(xt, plugin, type, format, mime, data, datalen):
     54     mstr = cast(data, c_char_p)
     55     # FIXME: this ignores 'datalen', not that great...
     56     # (in general, depending on the mime type and format, only
     57     # the first 'datalen' bytes in 'data' should be used).
     58     if (format == extractor.EXTRACTOR_METAFORMAT_UTF8):
     59         print("%s - %s" % (xtract.keywordTypes()[type], mstr.value))
     60         # DEBUGGING OUTPUT + LOG:
     61         logger.debug("%s - %s" % (xtract.keywordTypes()[type], mstr.value))
     62     return 0
     63 
     64 def main():
     65     try:
     66         # stuff
     67         for arg in sys.argv[1:]:
     68             logger.debug("Keywords from %s:" % arg)
     69             xtract.extract(print_k, None, arg)
     70     except Exception as e:
     71         logger.debug(e)
     72 
     73 if __name__ == "__main__":
     74     main()