libextractor

GNU libextractor
Log | Files | Refs | Submodules | README | LICENSE

extract.c (26396B)


      1 /*
      2      This file is part of libextractor.
      3      Copyright (C) 2002, 2003, 2004, 2005, 2006, 2009, 2012 Vidyut Samanta and Christian Grothoff
      4 
      5      libextractor is free software; you can redistribute it and/or modify
      6      it under the terms of the GNU General Public License as published
      7      by the Free Software Foundation; either version 3, or (at your
      8      option) any later version.
      9 
     10      libextractor is distributed in the hope that it will be useful, but
     11      WITHOUT ANY WARRANTY; without even the implied warranty of
     12      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     13      General Public License for more details.
     14 
     15      You should have received a copy of the GNU General Public License
     16      along with libextractor; see the file COPYING.  If not, write to the
     17      Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     18      Boston, MA 02110-1301, USA.
     19 */
     20 /**
     21  * @file main/extract.c
     22  * @brief command-line tool to run GNU libextractor
     23  * @author Christian Grothoff
     24  */
     25 #include "platform.h"
     26 #include "extractor.h"
     27 #include "getopt.h"
     28 #include <signal.h>
     29 
     30 #define YES 1
     31 #define NO 0
     32 
     33 
     34 /**
     35  * Which keyword types should we print?
     36  */
     37 static int *print;
     38 
     39 /**
     40  * How verbose are we supposed to be?
     41  */
     42 static int verbose;
     43 
     44 /**
     45  * Run plugins in-process.
     46  */
     47 static int in_process;
     48 
     49 /**
     50  * Read file contents into memory, then feed them to extractor.
     51  */
     52 static int from_memory;
     53 
     54 #ifndef WINDOWS
     55 /**
     56  * Install a signal handler to ignore SIGPIPE.
     57  */
     58 static void
     59 ignore_sigpipe ()
     60 {
     61   struct sigaction oldsig;
     62   struct sigaction sig;
     63 
     64   memset (&sig, 0, sizeof (struct sigaction));
     65   sig.sa_handler = SIG_IGN;
     66   sigemptyset (&sig.sa_mask);
     67 #ifdef SA_INTERRUPT
     68   sig.sa_flags = SA_INTERRUPT;  /* SunOS */
     69 #else
     70   sig.sa_flags = SA_RESTART;
     71 #endif
     72   if (0 != sigaction (SIGPIPE, &sig, &oldsig))
     73     fprintf (stderr,
     74              "Failed to install SIGPIPE handler: %s\n", strerror (errno));
     75 }
     76 
     77 
     78 #endif
     79 
     80 
     81 /**
     82  * Information about command-line options.
     83  */
     84 struct Help
     85 {
     86   /**
     87    * Single-character option name, '\0' for none.
     88    */
     89   char shortArg;
     90 
     91   /**
     92    * Long name of the option.
     93    */
     94   const char *longArg;
     95 
     96   /**
     97    * Name of the mandatory argument, NULL for no argument.
     98    */
     99   const char *mandatoryArg;
    100 
    101   /**
    102    * Help text for the option.
    103    */
    104   const char *description;
    105 };
    106 
    107 
    108 /**
    109  * Indentation for descriptions.
    110  */
    111 #define BORDER 29
    112 
    113 
    114 /**
    115  * Display help text (--help).
    116  *
    117  * @param general binary name
    118  * @param description program description
    119  * @param opt program options (NULL-terminated array)
    120  */
    121 static void
    122 format_help (const char *general,
    123              const char *description,
    124              const struct Help *opt)
    125 {
    126   size_t slen;
    127   unsigned int i;
    128   ssize_t j;
    129   size_t ml;
    130   size_t p;
    131   char scp[80];
    132   const char *trans;
    133 
    134   printf (_ ("Usage: %s\n%s\n\n"),
    135           gettext (general),
    136           gettext (description));
    137   printf (_ (
    138             "Arguments mandatory for long options are also mandatory for short options.\n"));
    139   slen = 0;
    140   i = 0;
    141   while (NULL != opt[i].description)
    142   {
    143     if (0 == opt[i].shortArg)
    144       printf ("      ");
    145     else
    146       printf ("  -%c, ",
    147               opt[i].shortArg);
    148     printf ("--%s",
    149             opt[i].longArg);
    150     slen = 8 + strlen (opt[i].longArg);
    151     if (NULL != opt[i].mandatoryArg)
    152     {
    153       printf ("=%s",
    154               opt[i].mandatoryArg);
    155       slen += 1 + strlen (opt[i].mandatoryArg);
    156     }
    157     if (slen > BORDER)
    158     {
    159       printf ("\n%*s", BORDER, "");
    160       slen = BORDER;
    161     }
    162     if (slen < BORDER)
    163     {
    164       printf ("%*s", (int) (BORDER - slen), "");
    165       slen = BORDER;
    166     }
    167     trans = gettext (opt[i].description);
    168     ml = strlen (trans);
    169     p = 0;
    170 OUTER:
    171     while (ml - p > 78 - slen)
    172     {
    173       for (j = p + 78 - slen; j>p; j--)
    174       {
    175         if (isspace ( (unsigned char) trans[j]))
    176         {
    177           memcpy (scp,
    178                   &trans[p],
    179                   j - p);
    180           scp[j - p] = '\0';
    181           printf ("%s\n%*s",
    182                   scp,
    183                   BORDER + 2,
    184                   "");
    185           p = j + 1;
    186           slen = BORDER + 2;
    187           goto OUTER;
    188         }
    189       }
    190       /* could not find space to break line */
    191       memcpy (scp,
    192               &trans[p],
    193               78 - slen);
    194       scp[78 - slen] = '\0';
    195       printf ("%s\n%*s",
    196               scp,
    197               BORDER + 2,
    198               "");
    199       slen = BORDER + 2;
    200       p = p + 78 - slen;
    201     }
    202     /* print rest */
    203     if (p < ml)
    204       printf ("%s\n",
    205               &trans[p]);
    206     i++;
    207   }
    208 }
    209 
    210 
    211 /**
    212  * Run --help.
    213  */
    214 static void
    215 print_help ()
    216 {
    217   static struct Help help[] = {
    218     { 'b', "bibtex", NULL,
    219       gettext_noop ("print output in bibtex format") },
    220     { 'g', "grep-friendly", NULL,
    221       gettext_noop (
    222         "produce grep-friendly output (all results on one line per file)") },
    223     { 'h', "help", NULL,
    224       gettext_noop ("print this help") },
    225     { 'i', "in-process", NULL,
    226       gettext_noop ("run plugins in-process (simplifies debugging)") },
    227     { 'm', "from-memory", NULL,
    228       gettext_noop (
    229         "read data from file into memory and extract from memory") },
    230     { 'l', "library", "LIBRARY",
    231       gettext_noop ("load an extractor plugin named LIBRARY") },
    232     { 'L', "list", NULL,
    233       gettext_noop ("list all keyword types") },
    234     { 'n', "nodefault", NULL,
    235       gettext_noop ("do not use the default set of extractor plugins") },
    236     { 'p', "print", "TYPE",
    237       gettext_noop (
    238         "print only keywords of the given TYPE (use -L to get a list)") },
    239     { 'v', "version", NULL,
    240       gettext_noop ("print the version number") },
    241     { 'V', "verbose", NULL,
    242       gettext_noop ("be verbose") },
    243     { 'x', "exclude", "TYPE",
    244       gettext_noop ("do not print keywords of the given TYPE") },
    245     { 0, NULL, NULL, NULL },
    246   };
    247   format_help (_ ("extract [OPTIONS] [FILENAME]*"),
    248                _ ("Extract metadata from files."),
    249                help);
    250 
    251 }
    252 
    253 
    254 #if HAVE_ICONV
    255 #include "iconv.c"
    256 #endif
    257 
    258 /**
    259  * Print a keyword list to a file.
    260  *
    261  * @param cls closure, not used
    262  * @param plugin_name name of the plugin that produced this value;
    263  *        special values can be used (i.e. '<zlib>' for zlib being
    264  *        used in the main libextractor library and yielding
    265  *        meta data).
    266  * @param type libextractor-type describing the meta data
    267  * @param format basic format information about data
    268  * @param data_mime_type mime-type of data (not of the original file);
    269  *        can be NULL (if mime-type is not known)
    270  * @param data actual meta-data found
    271  * @param data_len number of bytes in data
    272  * @return 0 to continue extracting, 1 to abort
    273  */
    274 static int
    275 print_selected_keywords (void *cls,
    276                          const char *plugin_name,
    277                          enum EXTRACTOR_MetaType type,
    278                          enum EXTRACTOR_MetaFormat format,
    279                          const char *data_mime_type,
    280                          const char *data,
    281                          size_t data_len)
    282 {
    283   char *keyword;
    284 #if HAVE_ICONV
    285   iconv_t cd;
    286 #endif
    287   const char *stype;
    288   const char *mt;
    289 
    290   if (YES != print[type])
    291     return 0;
    292   if (verbose > 3)
    293     fprintf (stdout,
    294              _ ("Found by `%s' plugin:\n"),
    295              plugin_name);
    296   mt = EXTRACTOR_metatype_to_string (type);
    297   stype = (NULL == mt) ? _ ("unknown") : gettext (mt);
    298   switch (format)
    299   {
    300   case EXTRACTOR_METAFORMAT_UNKNOWN:
    301     fprintf (stdout,
    302              _ ("%s - (unknown, %u bytes)\n"),
    303              stype,
    304              (unsigned int) data_len);
    305     break;
    306   case EXTRACTOR_METAFORMAT_UTF8:
    307     if (0 == data_len)
    308       break;
    309 #if HAVE_ICONV
    310     cd = iconv_open (nl_langinfo (CODESET), "UTF-8");
    311     if (((iconv_t) -1) != cd)
    312       keyword = iconv_helper (cd,
    313                               data,
    314                               data_len);
    315     else
    316 #endif
    317     keyword = strdup (data);
    318     if (NULL != keyword)
    319     {
    320       fprintf (stdout,
    321                "%s - %s\n",
    322                stype,
    323                keyword);
    324       free (keyword);
    325     }
    326 #if HAVE_ICONV
    327     if (((iconv_t) -1) != cd)
    328       iconv_close (cd);
    329 #endif
    330     break;
    331   case EXTRACTOR_METAFORMAT_BINARY:
    332     fprintf (stdout,
    333              _ ("%s - (binary, %u bytes)\n"),
    334              stype,
    335              (unsigned int) data_len);
    336     break;
    337   case EXTRACTOR_METAFORMAT_C_STRING:
    338     fprintf (stdout,
    339              "%s - %.*s\n",
    340              stype,
    341              (int) data_len,
    342              data);
    343     break;
    344   default:
    345     break;
    346   }
    347   return 0;
    348 }
    349 
    350 
    351 /**
    352  * Print a keyword list to a file without new lines.
    353  *
    354  * @param cls closure, not used
    355  * @param plugin_name name of the plugin that produced this value;
    356  *        special values can be used (i.e. '<zlib>' for zlib being
    357  *        used in the main libextractor library and yielding
    358  *        meta data).
    359  * @param type libextractor-type describing the meta data
    360  * @param format basic format information about data
    361  * @param data_mime_type mime-type of data (not of the original file);
    362  *        can be NULL (if mime-type is not known)
    363  * @param data actual meta-data found
    364  * @param data_len number of bytes in data
    365  * @return 0 to continue extracting, 1 to abort
    366  */
    367 static int
    368 print_selected_keywords_grep_friendly (void *cls,
    369                                        const char *plugin_name,
    370                                        enum EXTRACTOR_MetaType type,
    371                                        enum EXTRACTOR_MetaFormat format,
    372                                        const char *data_mime_type,
    373                                        const char *data,
    374                                        size_t data_len)
    375 {
    376   char *keyword;
    377 #if HAVE_ICONV
    378   iconv_t cd;
    379 #endif
    380   const char *mt;
    381 
    382   if (YES != print[type])
    383     return 0;
    384   mt = EXTRACTOR_metatype_to_string (type);
    385   if (NULL == mt)
    386     mt = gettext_noop ("unknown");
    387   switch (format)
    388   {
    389   case EXTRACTOR_METAFORMAT_UNKNOWN:
    390     break;
    391   case EXTRACTOR_METAFORMAT_UTF8:
    392     if (0 == data_len)
    393       return 0;
    394     if (verbose > 1)
    395       fprintf (stdout,
    396                "%s: ",
    397                gettext (mt));
    398 #if HAVE_ICONV
    399     cd = iconv_open (nl_langinfo (CODESET), "UTF-8");
    400     if (((iconv_t) -1) != cd)
    401       keyword = iconv_helper (cd,
    402                               data,
    403                               data_len);
    404     else
    405 #endif
    406     keyword = strdup (data);
    407     if (NULL != keyword)
    408     {
    409       fprintf (stdout,
    410                "`%s' ",
    411                keyword);
    412       free (keyword);
    413     }
    414 #if HAVE_ICONV
    415     if (((iconv_t) -1) != cd)
    416       iconv_close (cd);
    417 #endif
    418     break;
    419   case EXTRACTOR_METAFORMAT_BINARY:
    420     break;
    421   case EXTRACTOR_METAFORMAT_C_STRING:
    422     if (verbose > 1)
    423       fprintf (stdout,
    424                "%s ",
    425                gettext (mt));
    426     fprintf (stdout,
    427              "`%s'",
    428              data);
    429     break;
    430   default:
    431     break;
    432   }
    433   return 0;
    434 }
    435 
    436 
    437 /**
    438  * Entry in the map we construct for each file.
    439  */
    440 struct BibTexMap
    441 {
    442   /**
    443    * Name in bibTeX
    444    */
    445   const char *bibTexName;
    446 
    447   /**
    448    * Meta type for the value.
    449    */
    450   enum EXTRACTOR_MetaType le_type;
    451 
    452   /**
    453    * The value itself.
    454    */
    455   char *value;
    456 };
    457 
    458 
    459 /**
    460  * Type of the entry for bibtex.
    461  */
    462 static char *entry_type;
    463 
    464 /**
    465  * Mapping between bibTeX strings, libextractor
    466  * meta data types and values for the current document.
    467  */
    468 static struct BibTexMap btm[] = {
    469   { "title", EXTRACTOR_METATYPE_TITLE, NULL},
    470   { "year", EXTRACTOR_METATYPE_PUBLICATION_YEAR, NULL },
    471   { "author", EXTRACTOR_METATYPE_AUTHOR_NAME, NULL },
    472   { "book", EXTRACTOR_METATYPE_BOOK_TITLE, NULL},
    473   { "edition", EXTRACTOR_METATYPE_BOOK_EDITION, NULL},
    474   { "chapter", EXTRACTOR_METATYPE_BOOK_CHAPTER_NUMBER, NULL},
    475   { "journal", EXTRACTOR_METATYPE_JOURNAL_NAME, NULL},
    476   { "volume", EXTRACTOR_METATYPE_JOURNAL_VOLUME, NULL},
    477   { "number", EXTRACTOR_METATYPE_JOURNAL_NUMBER, NULL},
    478   { "pages", EXTRACTOR_METATYPE_PAGE_COUNT, NULL },
    479   { "pages", EXTRACTOR_METATYPE_PAGE_RANGE, NULL },
    480   { "school", EXTRACTOR_METATYPE_AUTHOR_INSTITUTION, NULL},
    481   { "publisher", EXTRACTOR_METATYPE_PUBLISHER, NULL },
    482   { "address", EXTRACTOR_METATYPE_PUBLISHER_ADDRESS, NULL },
    483   { "institution", EXTRACTOR_METATYPE_PUBLISHER_INSTITUTION, NULL },
    484   { "series", EXTRACTOR_METATYPE_PUBLISHER_SERIES, NULL},
    485   { "month", EXTRACTOR_METATYPE_PUBLICATION_MONTH, NULL },
    486   { "url", EXTRACTOR_METATYPE_URL, NULL},
    487   { "note", EXTRACTOR_METATYPE_COMMENT, NULL},
    488   { "eprint", EXTRACTOR_METATYPE_BIBTEX_EPRINT, NULL },
    489   { "type", EXTRACTOR_METATYPE_PUBLICATION_TYPE, NULL },
    490   { NULL, 0, NULL }
    491 };
    492 
    493 
    494 /**
    495  * Clean up the bibtex processor in preparation for the next round.
    496  */
    497 static void
    498 cleanup_bibtex ()
    499 {
    500   unsigned int i;
    501 
    502   for (i = 0; NULL != btm[i].bibTexName; i++)
    503   {
    504     free (btm[i].value);
    505     btm[i].value = NULL;
    506   }
    507   free (entry_type);
    508   entry_type = NULL;
    509 }
    510 
    511 
    512 /**
    513  * Callback function for printing meta data in bibtex format.
    514  *
    515  * @param cls closure, not used
    516  * @param plugin_name name of the plugin that produced this value;
    517  *        special values can be used (i.e. '<zlib>' for zlib being
    518  *        used in the main libextractor library and yielding
    519  *        meta data).
    520  * @param type libextractor-type describing the meta data
    521  * @param format basic format information about data
    522  * @param data_mime_type mime-type of data (not of the original file);
    523  *        can be NULL (if mime-type is not known)
    524  * @param data actual meta-data found
    525  * @param data_len number of bytes in data
    526  * @return 0 to continue extracting (always)
    527  */
    528 static int
    529 print_bibtex (void *cls,
    530               const char *plugin_name,
    531               enum EXTRACTOR_MetaType type,
    532               enum EXTRACTOR_MetaFormat format,
    533               const char *data_mime_type,
    534               const char *data,
    535               size_t data_len)
    536 {
    537   unsigned int i;
    538 
    539   if (0 == data_len)
    540     return 0;
    541   if (YES != print[type])
    542     return 0;
    543   if (EXTRACTOR_METAFORMAT_UTF8 != format)
    544     return 0;
    545   if (EXTRACTOR_METATYPE_BIBTEX_ENTRY_TYPE == type)
    546   {
    547     entry_type = strdup (data);
    548     return 0;
    549   }
    550   for (i = 0; NULL != btm[i].bibTexName; i++)
    551     if ( (NULL == btm[i].value) &&
    552          (btm[i].le_type == type) )
    553       btm[i].value = strdup (data);
    554   return 0;
    555 }
    556 
    557 
    558 /**
    559  * Print the computed bibTeX entry.
    560  *
    561  * @param fn file for which the entry was created.
    562  */
    563 static void
    564 finish_bibtex (const char *fn)
    565 {
    566   unsigned int i;
    567   ssize_t n;
    568   const char *et;
    569   char temp[20];
    570 
    571   if (NULL != entry_type)
    572     et = entry_type;
    573   else
    574     et = "misc";
    575   if ( (NULL == btm[0].value) ||
    576        (NULL == btm[1].value) ||
    577        (NULL == btm[2].value) )
    578     fprintf (stdout,
    579              "@%s %s { ",
    580              et,
    581              fn);
    582   else
    583   {
    584     snprintf (temp,
    585               sizeof (temp),
    586               "%.5s%.5s%.5s",
    587               btm[2].value,
    588               btm[1].value,
    589               btm[0].value);
    590     for (n = strlen (temp) - 1; n>=0; n--)
    591       if (! isalnum ( (unsigned char) temp[n]) )
    592         temp[n] = '_';
    593       else
    594         temp[n] = tolower ( (unsigned char) temp[n]);
    595     fprintf (stdout,
    596              "@%s %s { ",
    597              et,
    598              temp);
    599   }
    600   for (i = 0; NULL != btm[i].bibTexName; i++)
    601     if (NULL != btm[i].value)
    602       fprintf (stdout,
    603                "\t%s = {%s},\n",
    604                btm[i].bibTexName,
    605                btm[i].value);
    606   fprintf (stdout, "%s", "}\n\n");
    607 }
    608 
    609 
    610 #ifdef WINDOWS
    611 static int
    612 _wchar_to_str (const wchar_t *wstr, char **retstr, UINT cp)
    613 {
    614   char *str;
    615   int len, lenc;
    616   BOOL lossy = FALSE;
    617   DWORD error;
    618 
    619   SetLastError (0);
    620   len = WideCharToMultiByte (cp, 0, wstr, -1, NULL, 0, NULL, (cp == CP_UTF8 ||
    621                                                               cp == CP_UTF7) ?
    622                              NULL : &lossy);
    623   error = GetLastError ();
    624   if (len <= 0)
    625     return -1;
    626 
    627   str = malloc (sizeof (char) * len);
    628 
    629   SetLastError (0);
    630   lenc = WideCharToMultiByte (cp, 0, wstr, -1, str, len, NULL, (cp == CP_UTF8 ||
    631                                                                 cp == CP_UTF7) ?
    632                               NULL : &lossy);
    633   error = GetLastError ();
    634   if (lenc != len)
    635   {
    636     free (str);
    637     return -3;
    638   }
    639   *retstr = str;
    640   if (lossy)
    641     return 1;
    642   return 0;
    643 }
    644 
    645 
    646 #endif
    647 
    648 
    649 /**
    650  * Makes a copy of argv that consists of a single memory chunk that can be
    651  * freed with a single call to free ();
    652  */
    653 static char **
    654 _make_continuous_arg_copy (int argc, char *const *argv)
    655 {
    656   size_t argvsize = 0;
    657   int i;
    658   char **new_argv;
    659   char *p;
    660   for (i = 0; i < argc; i++)
    661     argvsize += strlen (argv[i]) + 1 + sizeof (char *);
    662   new_argv = malloc (argvsize + sizeof (char *));
    663   if (NULL == new_argv)
    664     return NULL;
    665   p = (char *) &new_argv[argc + 1];
    666   for (i = 0; i < argc; i++)
    667   {
    668     new_argv[i] = p;
    669     strcpy (p, argv[i]);
    670     p += strlen (argv[i]) + 1;
    671   }
    672   new_argv[argc] = NULL;
    673   return (char **) new_argv;
    674 }
    675 
    676 
    677 /**
    678  * Returns utf-8 encoded arguments.
    679  * Returned argv has u8argv[u8argc] == NULL.
    680  * Returned argv is a single memory block, and can be freed with a single
    681  *   free () call.
    682  *
    683  * @param argc argc (as given by main())
    684  * @param argv argv (as given by main())
    685  * @param u8argc a location to store new argc in (though it's th same as argc)
    686  * @param u8argv a location to store new argv in
    687  * @return 0 on success, -1 on failure
    688  */
    689 static int
    690 _get_utf8_args (int argc, char *const *argv, int *u8argc, char ***u8argv)
    691 {
    692 #ifdef WINDOWS
    693   wchar_t *wcmd;
    694   wchar_t **wargv;
    695   int wargc;
    696   int i;
    697   char **split_u8argv;
    698 
    699   wcmd = GetCommandLineW ();
    700   if (NULL == wcmd)
    701     return -1;
    702   wargv = CommandLineToArgvW (wcmd, &wargc);
    703   if (NULL == wargv)
    704     return -1;
    705 
    706   split_u8argv = malloc (wargc * sizeof (char *));
    707 
    708   for (i = 0; i < wargc; i++)
    709   {
    710     if (_wchar_to_str (wargv[i], &split_u8argv[i], CP_UTF8) != 0)
    711     {
    712       int j;
    713       int e = errno;
    714       for (j = 0; j < i; j++)
    715         free (split_u8argv[j]);
    716       free (split_u8argv);
    717       LocalFree (wargv);
    718       errno = e;
    719       return -1;
    720     }
    721   }
    722 
    723   *u8argv = _make_continuous_arg_copy (wargc, split_u8argv);
    724   if (NULL == *u8argv)
    725   {
    726     free (split_u8argv);
    727     return -1;
    728   }
    729   *u8argc = wargc;
    730 
    731   for (i = 0; i < wargc; i++)
    732     free (split_u8argv[i]);
    733   free (split_u8argv);
    734 #else
    735   *u8argv = _make_continuous_arg_copy (argc, argv);
    736   if (NULL == *u8argv)
    737     return -1;
    738   *u8argc = argc;
    739 #endif
    740   return 0;
    741 }
    742 
    743 
    744 /**
    745  * Main function for the 'extract' tool.  Invoke with a list of
    746  * filenames to extract keywords from.
    747  *
    748  * @param argc number of arguments in argv
    749  * @param argv command line options and filename to run on
    750  * @return 0 on success
    751  */
    752 int
    753 main (int argc, char *argv[])
    754 {
    755   unsigned int i;
    756   struct EXTRACTOR_PluginList *plugins;
    757   int option_index;
    758   int c;
    759   char *libraries = NULL;
    760   int nodefault = NO;
    761   int defaultAll = YES;
    762   int bibtex = NO;
    763   int grepfriendly = NO;
    764   int ret = 0;
    765   EXTRACTOR_MetaDataProcessor processor = NULL;
    766   char **utf8_argv;
    767   int utf8_argc;
    768 
    769 #if ENABLE_NLS
    770   setlocale (LC_ALL, "");
    771   textdomain (PACKAGE);
    772 #endif
    773 #ifndef WINDOWS
    774   ignore_sigpipe ();
    775 #endif
    776   if (NULL == (print = malloc (sizeof (int) * EXTRACTOR_metatype_get_max ())))
    777   {
    778     fprintf (stderr,
    779              "malloc failed: %s\n",
    780              strerror (errno));
    781     return 1;
    782   }
    783   for (i = 0; i < EXTRACTOR_metatype_get_max (); i++)
    784     print[i] = YES;   /* default: print everything */
    785 
    786   if (0 != _get_utf8_args (argc, argv, &utf8_argc, &utf8_argv))
    787   {
    788     fprintf (stderr, "Failed to get arguments: %s\n", strerror (errno));
    789     return 1;
    790   }
    791 
    792   while (1)
    793   {
    794     static struct option long_options[] = {
    795       {"bibtex", 0, 0, 'b'},
    796       {"grep-friendly", 0, 0, 'g'},
    797       {"help", 0, 0, 'h'},
    798       {"in-process", 0, 0, 'i'},
    799       {"from-memory", 0, 0, 'm'},
    800       {"list", 0, 0, 'L'},
    801       {"library", 1, 0, 'l'},
    802       {"nodefault", 0, 0, 'n'},
    803       {"print", 1, 0, 'p'},
    804       {"verbose", 0, 0, 'V'},
    805       {"version", 0, 0, 'v'},
    806       {"exclude", 1, 0, 'x'},
    807       {0, 0, 0, 0}
    808     };
    809     option_index = 0;
    810     c = getopt_long (utf8_argc,
    811                      utf8_argv,
    812                      "abghiml:Lnp:vVx:",
    813                      long_options,
    814                      &option_index);
    815 
    816     if (c == -1)
    817       break;  /* No more flags to process */
    818     switch (c)
    819     {
    820     case 'b':
    821       bibtex = YES;
    822       if (NULL != processor)
    823       {
    824         fprintf (stderr,
    825                  "%s",
    826                  _ (
    827                    "Illegal combination of options, cannot combine multiple styles of printing.\n"));
    828         free (utf8_argv);
    829         return 0;
    830       }
    831       processor = &print_bibtex;
    832       break;
    833     case 'g':
    834       grepfriendly = YES;
    835       if (NULL != processor)
    836       {
    837         fprintf (stderr,
    838                  "%s",
    839                  _ (
    840                    "Illegal combination of options, cannot combine multiple styles of printing.\n"));
    841         free (utf8_argv);
    842         return 0;
    843       }
    844       processor = &print_selected_keywords_grep_friendly;
    845       break;
    846     case 'h':
    847       print_help ();
    848       free (utf8_argv);
    849       return 0;
    850     case 'i':
    851       in_process = YES;
    852       break;
    853     case 'm':
    854       from_memory = YES;
    855       break;
    856     case 'l':
    857       libraries = optarg;
    858       break;
    859     case 'L':
    860       i = 0;
    861       while (NULL != EXTRACTOR_metatype_to_string (i))
    862         printf ("%s\n",
    863                 gettext (EXTRACTOR_metatype_to_string (i++)));
    864       free (utf8_argv);
    865       return 0;
    866     case 'n':
    867       nodefault = YES;
    868       break;
    869     case 'p':
    870       if (NULL == optarg)
    871       {
    872         fprintf (stderr,
    873                  _ (
    874                    "You must specify an argument for the `%s' option (option ignored).\n"),
    875                  "-p");
    876         break;
    877       }
    878       if (YES == defaultAll)
    879       {
    880         defaultAll = NO;
    881         i = 0;
    882         while (NULL != EXTRACTOR_metatype_to_string (i))
    883           print[i++] = NO;
    884       }
    885       i = 0;
    886       while (NULL != EXTRACTOR_metatype_to_string (i))
    887       {
    888         if ( (0 == strcmp (optarg,
    889                            EXTRACTOR_metatype_to_string (i))) ||
    890              (0 == strcmp (optarg,
    891                            gettext (EXTRACTOR_metatype_to_string (i)))) )
    892 
    893         {
    894           print[i] = YES;
    895           break;
    896         }
    897         i++;
    898       }
    899       if (NULL == EXTRACTOR_metatype_to_string (i))
    900       {
    901         fprintf (stderr,
    902                  "Unknown keyword type `%s', use option `%s' to get a list.\n",
    903                  optarg,
    904                  "-L");
    905         free (utf8_argv);
    906         return -1;
    907       }
    908       break;
    909     case 'v':
    910       printf ("extract v%s\n", PACKAGE_VERSION);
    911       free (utf8_argv);
    912       return 0;
    913     case 'V':
    914       verbose++;
    915       break;
    916     case 'x':
    917       i = 0;
    918       while (NULL != EXTRACTOR_metatype_to_string (i))
    919       {
    920         if ( (0 == strcmp (optarg,
    921                            EXTRACTOR_metatype_to_string (i))) ||
    922              (0 == strcmp (optarg,
    923                            gettext (EXTRACTOR_metatype_to_string (i)))) )
    924         {
    925           print[i] = NO;
    926           break;
    927         }
    928         i++;
    929       }
    930       if (NULL == EXTRACTOR_metatype_to_string (i))
    931       {
    932         fprintf (stderr,
    933                  "Unknown keyword type `%s', use option `%s' to get a list.\n",
    934                  optarg,
    935                  "-L");
    936         free (utf8_argv);
    937         return -1;
    938       }
    939       break;
    940     default:
    941       fprintf (stderr,
    942                "%s",
    943                _ ("Use --help to get a list of options.\n"));
    944       free (utf8_argv);
    945       return -1;
    946     }   /* end of parsing commandline */
    947   }         /* while (1) */
    948   if (optind < 0)
    949   {
    950     fprintf (stderr,
    951              "%s", "Unknown error parsing options\n");
    952     free (print);
    953     free (utf8_argv);
    954     return -1;
    955   }
    956   if (utf8_argc - optind < 1)
    957   {
    958     fprintf (stderr,
    959              "%s", "Invoke with list of filenames to extract keywords form!\n");
    960     free (print);
    961     free (utf8_argv);
    962     return -1;
    963   }
    964 
    965   /* build list of libraries */
    966   if (NO == nodefault)
    967     plugins = EXTRACTOR_plugin_add_defaults (in_process
    968                                              ? EXTRACTOR_OPTION_IN_PROCESS
    969                                              : EXTRACTOR_OPTION_DEFAULT_POLICY);
    970   else
    971     plugins = NULL;
    972   if (NULL != libraries)
    973     plugins = EXTRACTOR_plugin_add_config (plugins,
    974                                            libraries,
    975                                            in_process
    976                                            ? EXTRACTOR_OPTION_IN_PROCESS
    977                                            : EXTRACTOR_OPTION_DEFAULT_POLICY);
    978   if (NULL == processor)
    979     processor = &print_selected_keywords;
    980 
    981   /* extract keywords */
    982   if (YES == bibtex)
    983     fprintf (stdout,
    984              "%s", _ ("% BiBTeX file\n"));
    985   for (i = optind; i < utf8_argc; i++)
    986   {
    987     errno = 0;
    988     if (YES == grepfriendly)
    989       fprintf (stdout, "%s ", utf8_argv[i]);
    990     else if (NO == bibtex)
    991       fprintf (stdout,
    992                _ ("Keywords for file %s:\n"),
    993                utf8_argv[i]);
    994     else
    995       cleanup_bibtex ();
    996     if (NO == from_memory)
    997       EXTRACTOR_extract (plugins,
    998                          utf8_argv[i],
    999                          NULL, 0,
   1000                          processor,
   1001                          NULL);
   1002     else
   1003     {
   1004       struct stat sb;
   1005       unsigned char *data = NULL;
   1006       int f = open (utf8_argv[i], O_RDONLY
   1007 #if WINDOWS
   1008                     | O_BINARY
   1009 #endif
   1010                     );
   1011       if ( (-1 != f) &&
   1012            (0 == fstat (f, &sb)) &&
   1013            (NULL != (data = malloc ((size_t) sb.st_size))) &&
   1014            (sb.st_size == read (f, data, (size_t) sb.st_size) ) )
   1015       {
   1016         EXTRACTOR_extract (plugins,
   1017                            NULL,
   1018                            data, sb.st_size,
   1019                            processor,
   1020                            NULL);
   1021       }
   1022       else
   1023       {
   1024         if (verbose > 0)
   1025           fprintf (stderr,
   1026                    "%s: %s: %s\n",
   1027                    utf8_argv[0], utf8_argv[i], strerror (errno));
   1028         ret = 1;
   1029       }
   1030       if (NULL != data)
   1031         free (data);
   1032       if (-1 != f)
   1033         (void) close (f);
   1034     }
   1035     if (YES == grepfriendly)
   1036       fprintf (stdout, "%s", "\n");
   1037     continue;
   1038   }
   1039   if (YES == grepfriendly)
   1040     fprintf (stdout, "%s", "\n");
   1041   if (bibtex)
   1042     finish_bibtex (utf8_argv[i]);
   1043   if (verbose > 0)
   1044     fprintf (stdout, "%s", "\n");
   1045   free (print);
   1046   free (utf8_argv);
   1047   EXTRACTOR_plugin_remove_all (plugins);
   1048   plugins = NULL;
   1049   cleanup_bibtex (); /* actually free's stuff */
   1050   return ret;
   1051 }
   1052 
   1053 
   1054 /* end of extract.c */