extract.c (26396B)
1 /* 2 This file is part of libextractor. 3 Copyright (C) 2002, 2003, 2004, 2005, 2006, 2009, 2012 Vidyut Samanta and Christian Grothoff 4 5 libextractor is free software; you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published 7 by the Free Software Foundation; either version 3, or (at your 8 option) any later version. 9 10 libextractor is distributed in the hope that it will be useful, but 11 WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 General Public License for more details. 14 15 You should have received a copy of the GNU General Public License 16 along with libextractor; see the file COPYING. If not, write to the 17 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 18 Boston, MA 02110-1301, USA. 19 */ 20 /** 21 * @file main/extract.c 22 * @brief command-line tool to run GNU libextractor 23 * @author Christian Grothoff 24 */ 25 #include "platform.h" 26 #include "extractor.h" 27 #include "getopt.h" 28 #include <signal.h> 29 30 #define YES 1 31 #define NO 0 32 33 34 /** 35 * Which keyword types should we print? 36 */ 37 static int *print; 38 39 /** 40 * How verbose are we supposed to be? 41 */ 42 static int verbose; 43 44 /** 45 * Run plugins in-process. 46 */ 47 static int in_process; 48 49 /** 50 * Read file contents into memory, then feed them to extractor. 51 */ 52 static int from_memory; 53 54 #ifndef WINDOWS 55 /** 56 * Install a signal handler to ignore SIGPIPE. 57 */ 58 static void 59 ignore_sigpipe () 60 { 61 struct sigaction oldsig; 62 struct sigaction sig; 63 64 memset (&sig, 0, sizeof (struct sigaction)); 65 sig.sa_handler = SIG_IGN; 66 sigemptyset (&sig.sa_mask); 67 #ifdef SA_INTERRUPT 68 sig.sa_flags = SA_INTERRUPT; /* SunOS */ 69 #else 70 sig.sa_flags = SA_RESTART; 71 #endif 72 if (0 != sigaction (SIGPIPE, &sig, &oldsig)) 73 fprintf (stderr, 74 "Failed to install SIGPIPE handler: %s\n", strerror (errno)); 75 } 76 77 78 #endif 79 80 81 /** 82 * Information about command-line options. 83 */ 84 struct Help 85 { 86 /** 87 * Single-character option name, '\0' for none. 88 */ 89 char shortArg; 90 91 /** 92 * Long name of the option. 93 */ 94 const char *longArg; 95 96 /** 97 * Name of the mandatory argument, NULL for no argument. 98 */ 99 const char *mandatoryArg; 100 101 /** 102 * Help text for the option. 103 */ 104 const char *description; 105 }; 106 107 108 /** 109 * Indentation for descriptions. 110 */ 111 #define BORDER 29 112 113 114 /** 115 * Display help text (--help). 116 * 117 * @param general binary name 118 * @param description program description 119 * @param opt program options (NULL-terminated array) 120 */ 121 static void 122 format_help (const char *general, 123 const char *description, 124 const struct Help *opt) 125 { 126 size_t slen; 127 unsigned int i; 128 ssize_t j; 129 size_t ml; 130 size_t p; 131 char scp[80]; 132 const char *trans; 133 134 printf (_ ("Usage: %s\n%s\n\n"), 135 gettext (general), 136 gettext (description)); 137 printf (_ ( 138 "Arguments mandatory for long options are also mandatory for short options.\n")); 139 slen = 0; 140 i = 0; 141 while (NULL != opt[i].description) 142 { 143 if (0 == opt[i].shortArg) 144 printf (" "); 145 else 146 printf (" -%c, ", 147 opt[i].shortArg); 148 printf ("--%s", 149 opt[i].longArg); 150 slen = 8 + strlen (opt[i].longArg); 151 if (NULL != opt[i].mandatoryArg) 152 { 153 printf ("=%s", 154 opt[i].mandatoryArg); 155 slen += 1 + strlen (opt[i].mandatoryArg); 156 } 157 if (slen > BORDER) 158 { 159 printf ("\n%*s", BORDER, ""); 160 slen = BORDER; 161 } 162 if (slen < BORDER) 163 { 164 printf ("%*s", (int) (BORDER - slen), ""); 165 slen = BORDER; 166 } 167 trans = gettext (opt[i].description); 168 ml = strlen (trans); 169 p = 0; 170 OUTER: 171 while (ml - p > 78 - slen) 172 { 173 for (j = p + 78 - slen; j>p; j--) 174 { 175 if (isspace ( (unsigned char) trans[j])) 176 { 177 memcpy (scp, 178 &trans[p], 179 j - p); 180 scp[j - p] = '\0'; 181 printf ("%s\n%*s", 182 scp, 183 BORDER + 2, 184 ""); 185 p = j + 1; 186 slen = BORDER + 2; 187 goto OUTER; 188 } 189 } 190 /* could not find space to break line */ 191 memcpy (scp, 192 &trans[p], 193 78 - slen); 194 scp[78 - slen] = '\0'; 195 printf ("%s\n%*s", 196 scp, 197 BORDER + 2, 198 ""); 199 slen = BORDER + 2; 200 p = p + 78 - slen; 201 } 202 /* print rest */ 203 if (p < ml) 204 printf ("%s\n", 205 &trans[p]); 206 i++; 207 } 208 } 209 210 211 /** 212 * Run --help. 213 */ 214 static void 215 print_help () 216 { 217 static struct Help help[] = { 218 { 'b', "bibtex", NULL, 219 gettext_noop ("print output in bibtex format") }, 220 { 'g', "grep-friendly", NULL, 221 gettext_noop ( 222 "produce grep-friendly output (all results on one line per file)") }, 223 { 'h', "help", NULL, 224 gettext_noop ("print this help") }, 225 { 'i', "in-process", NULL, 226 gettext_noop ("run plugins in-process (simplifies debugging)") }, 227 { 'm', "from-memory", NULL, 228 gettext_noop ( 229 "read data from file into memory and extract from memory") }, 230 { 'l', "library", "LIBRARY", 231 gettext_noop ("load an extractor plugin named LIBRARY") }, 232 { 'L', "list", NULL, 233 gettext_noop ("list all keyword types") }, 234 { 'n', "nodefault", NULL, 235 gettext_noop ("do not use the default set of extractor plugins") }, 236 { 'p', "print", "TYPE", 237 gettext_noop ( 238 "print only keywords of the given TYPE (use -L to get a list)") }, 239 { 'v', "version", NULL, 240 gettext_noop ("print the version number") }, 241 { 'V', "verbose", NULL, 242 gettext_noop ("be verbose") }, 243 { 'x', "exclude", "TYPE", 244 gettext_noop ("do not print keywords of the given TYPE") }, 245 { 0, NULL, NULL, NULL }, 246 }; 247 format_help (_ ("extract [OPTIONS] [FILENAME]*"), 248 _ ("Extract metadata from files."), 249 help); 250 251 } 252 253 254 #if HAVE_ICONV 255 #include "iconv.c" 256 #endif 257 258 /** 259 * Print a keyword list to a file. 260 * 261 * @param cls closure, not used 262 * @param plugin_name name of the plugin that produced this value; 263 * special values can be used (i.e. '<zlib>' for zlib being 264 * used in the main libextractor library and yielding 265 * meta data). 266 * @param type libextractor-type describing the meta data 267 * @param format basic format information about data 268 * @param data_mime_type mime-type of data (not of the original file); 269 * can be NULL (if mime-type is not known) 270 * @param data actual meta-data found 271 * @param data_len number of bytes in data 272 * @return 0 to continue extracting, 1 to abort 273 */ 274 static int 275 print_selected_keywords (void *cls, 276 const char *plugin_name, 277 enum EXTRACTOR_MetaType type, 278 enum EXTRACTOR_MetaFormat format, 279 const char *data_mime_type, 280 const char *data, 281 size_t data_len) 282 { 283 char *keyword; 284 #if HAVE_ICONV 285 iconv_t cd; 286 #endif 287 const char *stype; 288 const char *mt; 289 290 if (YES != print[type]) 291 return 0; 292 if (verbose > 3) 293 fprintf (stdout, 294 _ ("Found by `%s' plugin:\n"), 295 plugin_name); 296 mt = EXTRACTOR_metatype_to_string (type); 297 stype = (NULL == mt) ? _ ("unknown") : gettext (mt); 298 switch (format) 299 { 300 case EXTRACTOR_METAFORMAT_UNKNOWN: 301 fprintf (stdout, 302 _ ("%s - (unknown, %u bytes)\n"), 303 stype, 304 (unsigned int) data_len); 305 break; 306 case EXTRACTOR_METAFORMAT_UTF8: 307 if (0 == data_len) 308 break; 309 #if HAVE_ICONV 310 cd = iconv_open (nl_langinfo (CODESET), "UTF-8"); 311 if (((iconv_t) -1) != cd) 312 keyword = iconv_helper (cd, 313 data, 314 data_len); 315 else 316 #endif 317 keyword = strdup (data); 318 if (NULL != keyword) 319 { 320 fprintf (stdout, 321 "%s - %s\n", 322 stype, 323 keyword); 324 free (keyword); 325 } 326 #if HAVE_ICONV 327 if (((iconv_t) -1) != cd) 328 iconv_close (cd); 329 #endif 330 break; 331 case EXTRACTOR_METAFORMAT_BINARY: 332 fprintf (stdout, 333 _ ("%s - (binary, %u bytes)\n"), 334 stype, 335 (unsigned int) data_len); 336 break; 337 case EXTRACTOR_METAFORMAT_C_STRING: 338 fprintf (stdout, 339 "%s - %.*s\n", 340 stype, 341 (int) data_len, 342 data); 343 break; 344 default: 345 break; 346 } 347 return 0; 348 } 349 350 351 /** 352 * Print a keyword list to a file without new lines. 353 * 354 * @param cls closure, not used 355 * @param plugin_name name of the plugin that produced this value; 356 * special values can be used (i.e. '<zlib>' for zlib being 357 * used in the main libextractor library and yielding 358 * meta data). 359 * @param type libextractor-type describing the meta data 360 * @param format basic format information about data 361 * @param data_mime_type mime-type of data (not of the original file); 362 * can be NULL (if mime-type is not known) 363 * @param data actual meta-data found 364 * @param data_len number of bytes in data 365 * @return 0 to continue extracting, 1 to abort 366 */ 367 static int 368 print_selected_keywords_grep_friendly (void *cls, 369 const char *plugin_name, 370 enum EXTRACTOR_MetaType type, 371 enum EXTRACTOR_MetaFormat format, 372 const char *data_mime_type, 373 const char *data, 374 size_t data_len) 375 { 376 char *keyword; 377 #if HAVE_ICONV 378 iconv_t cd; 379 #endif 380 const char *mt; 381 382 if (YES != print[type]) 383 return 0; 384 mt = EXTRACTOR_metatype_to_string (type); 385 if (NULL == mt) 386 mt = gettext_noop ("unknown"); 387 switch (format) 388 { 389 case EXTRACTOR_METAFORMAT_UNKNOWN: 390 break; 391 case EXTRACTOR_METAFORMAT_UTF8: 392 if (0 == data_len) 393 return 0; 394 if (verbose > 1) 395 fprintf (stdout, 396 "%s: ", 397 gettext (mt)); 398 #if HAVE_ICONV 399 cd = iconv_open (nl_langinfo (CODESET), "UTF-8"); 400 if (((iconv_t) -1) != cd) 401 keyword = iconv_helper (cd, 402 data, 403 data_len); 404 else 405 #endif 406 keyword = strdup (data); 407 if (NULL != keyword) 408 { 409 fprintf (stdout, 410 "`%s' ", 411 keyword); 412 free (keyword); 413 } 414 #if HAVE_ICONV 415 if (((iconv_t) -1) != cd) 416 iconv_close (cd); 417 #endif 418 break; 419 case EXTRACTOR_METAFORMAT_BINARY: 420 break; 421 case EXTRACTOR_METAFORMAT_C_STRING: 422 if (verbose > 1) 423 fprintf (stdout, 424 "%s ", 425 gettext (mt)); 426 fprintf (stdout, 427 "`%s'", 428 data); 429 break; 430 default: 431 break; 432 } 433 return 0; 434 } 435 436 437 /** 438 * Entry in the map we construct for each file. 439 */ 440 struct BibTexMap 441 { 442 /** 443 * Name in bibTeX 444 */ 445 const char *bibTexName; 446 447 /** 448 * Meta type for the value. 449 */ 450 enum EXTRACTOR_MetaType le_type; 451 452 /** 453 * The value itself. 454 */ 455 char *value; 456 }; 457 458 459 /** 460 * Type of the entry for bibtex. 461 */ 462 static char *entry_type; 463 464 /** 465 * Mapping between bibTeX strings, libextractor 466 * meta data types and values for the current document. 467 */ 468 static struct BibTexMap btm[] = { 469 { "title", EXTRACTOR_METATYPE_TITLE, NULL}, 470 { "year", EXTRACTOR_METATYPE_PUBLICATION_YEAR, NULL }, 471 { "author", EXTRACTOR_METATYPE_AUTHOR_NAME, NULL }, 472 { "book", EXTRACTOR_METATYPE_BOOK_TITLE, NULL}, 473 { "edition", EXTRACTOR_METATYPE_BOOK_EDITION, NULL}, 474 { "chapter", EXTRACTOR_METATYPE_BOOK_CHAPTER_NUMBER, NULL}, 475 { "journal", EXTRACTOR_METATYPE_JOURNAL_NAME, NULL}, 476 { "volume", EXTRACTOR_METATYPE_JOURNAL_VOLUME, NULL}, 477 { "number", EXTRACTOR_METATYPE_JOURNAL_NUMBER, NULL}, 478 { "pages", EXTRACTOR_METATYPE_PAGE_COUNT, NULL }, 479 { "pages", EXTRACTOR_METATYPE_PAGE_RANGE, NULL }, 480 { "school", EXTRACTOR_METATYPE_AUTHOR_INSTITUTION, NULL}, 481 { "publisher", EXTRACTOR_METATYPE_PUBLISHER, NULL }, 482 { "address", EXTRACTOR_METATYPE_PUBLISHER_ADDRESS, NULL }, 483 { "institution", EXTRACTOR_METATYPE_PUBLISHER_INSTITUTION, NULL }, 484 { "series", EXTRACTOR_METATYPE_PUBLISHER_SERIES, NULL}, 485 { "month", EXTRACTOR_METATYPE_PUBLICATION_MONTH, NULL }, 486 { "url", EXTRACTOR_METATYPE_URL, NULL}, 487 { "note", EXTRACTOR_METATYPE_COMMENT, NULL}, 488 { "eprint", EXTRACTOR_METATYPE_BIBTEX_EPRINT, NULL }, 489 { "type", EXTRACTOR_METATYPE_PUBLICATION_TYPE, NULL }, 490 { NULL, 0, NULL } 491 }; 492 493 494 /** 495 * Clean up the bibtex processor in preparation for the next round. 496 */ 497 static void 498 cleanup_bibtex () 499 { 500 unsigned int i; 501 502 for (i = 0; NULL != btm[i].bibTexName; i++) 503 { 504 free (btm[i].value); 505 btm[i].value = NULL; 506 } 507 free (entry_type); 508 entry_type = NULL; 509 } 510 511 512 /** 513 * Callback function for printing meta data in bibtex format. 514 * 515 * @param cls closure, not used 516 * @param plugin_name name of the plugin that produced this value; 517 * special values can be used (i.e. '<zlib>' for zlib being 518 * used in the main libextractor library and yielding 519 * meta data). 520 * @param type libextractor-type describing the meta data 521 * @param format basic format information about data 522 * @param data_mime_type mime-type of data (not of the original file); 523 * can be NULL (if mime-type is not known) 524 * @param data actual meta-data found 525 * @param data_len number of bytes in data 526 * @return 0 to continue extracting (always) 527 */ 528 static int 529 print_bibtex (void *cls, 530 const char *plugin_name, 531 enum EXTRACTOR_MetaType type, 532 enum EXTRACTOR_MetaFormat format, 533 const char *data_mime_type, 534 const char *data, 535 size_t data_len) 536 { 537 unsigned int i; 538 539 if (0 == data_len) 540 return 0; 541 if (YES != print[type]) 542 return 0; 543 if (EXTRACTOR_METAFORMAT_UTF8 != format) 544 return 0; 545 if (EXTRACTOR_METATYPE_BIBTEX_ENTRY_TYPE == type) 546 { 547 entry_type = strdup (data); 548 return 0; 549 } 550 for (i = 0; NULL != btm[i].bibTexName; i++) 551 if ( (NULL == btm[i].value) && 552 (btm[i].le_type == type) ) 553 btm[i].value = strdup (data); 554 return 0; 555 } 556 557 558 /** 559 * Print the computed bibTeX entry. 560 * 561 * @param fn file for which the entry was created. 562 */ 563 static void 564 finish_bibtex (const char *fn) 565 { 566 unsigned int i; 567 ssize_t n; 568 const char *et; 569 char temp[20]; 570 571 if (NULL != entry_type) 572 et = entry_type; 573 else 574 et = "misc"; 575 if ( (NULL == btm[0].value) || 576 (NULL == btm[1].value) || 577 (NULL == btm[2].value) ) 578 fprintf (stdout, 579 "@%s %s { ", 580 et, 581 fn); 582 else 583 { 584 snprintf (temp, 585 sizeof (temp), 586 "%.5s%.5s%.5s", 587 btm[2].value, 588 btm[1].value, 589 btm[0].value); 590 for (n = strlen (temp) - 1; n>=0; n--) 591 if (! isalnum ( (unsigned char) temp[n]) ) 592 temp[n] = '_'; 593 else 594 temp[n] = tolower ( (unsigned char) temp[n]); 595 fprintf (stdout, 596 "@%s %s { ", 597 et, 598 temp); 599 } 600 for (i = 0; NULL != btm[i].bibTexName; i++) 601 if (NULL != btm[i].value) 602 fprintf (stdout, 603 "\t%s = {%s},\n", 604 btm[i].bibTexName, 605 btm[i].value); 606 fprintf (stdout, "%s", "}\n\n"); 607 } 608 609 610 #ifdef WINDOWS 611 static int 612 _wchar_to_str (const wchar_t *wstr, char **retstr, UINT cp) 613 { 614 char *str; 615 int len, lenc; 616 BOOL lossy = FALSE; 617 DWORD error; 618 619 SetLastError (0); 620 len = WideCharToMultiByte (cp, 0, wstr, -1, NULL, 0, NULL, (cp == CP_UTF8 || 621 cp == CP_UTF7) ? 622 NULL : &lossy); 623 error = GetLastError (); 624 if (len <= 0) 625 return -1; 626 627 str = malloc (sizeof (char) * len); 628 629 SetLastError (0); 630 lenc = WideCharToMultiByte (cp, 0, wstr, -1, str, len, NULL, (cp == CP_UTF8 || 631 cp == CP_UTF7) ? 632 NULL : &lossy); 633 error = GetLastError (); 634 if (lenc != len) 635 { 636 free (str); 637 return -3; 638 } 639 *retstr = str; 640 if (lossy) 641 return 1; 642 return 0; 643 } 644 645 646 #endif 647 648 649 /** 650 * Makes a copy of argv that consists of a single memory chunk that can be 651 * freed with a single call to free (); 652 */ 653 static char ** 654 _make_continuous_arg_copy (int argc, char *const *argv) 655 { 656 size_t argvsize = 0; 657 int i; 658 char **new_argv; 659 char *p; 660 for (i = 0; i < argc; i++) 661 argvsize += strlen (argv[i]) + 1 + sizeof (char *); 662 new_argv = malloc (argvsize + sizeof (char *)); 663 if (NULL == new_argv) 664 return NULL; 665 p = (char *) &new_argv[argc + 1]; 666 for (i = 0; i < argc; i++) 667 { 668 new_argv[i] = p; 669 strcpy (p, argv[i]); 670 p += strlen (argv[i]) + 1; 671 } 672 new_argv[argc] = NULL; 673 return (char **) new_argv; 674 } 675 676 677 /** 678 * Returns utf-8 encoded arguments. 679 * Returned argv has u8argv[u8argc] == NULL. 680 * Returned argv is a single memory block, and can be freed with a single 681 * free () call. 682 * 683 * @param argc argc (as given by main()) 684 * @param argv argv (as given by main()) 685 * @param u8argc a location to store new argc in (though it's th same as argc) 686 * @param u8argv a location to store new argv in 687 * @return 0 on success, -1 on failure 688 */ 689 static int 690 _get_utf8_args (int argc, char *const *argv, int *u8argc, char ***u8argv) 691 { 692 #ifdef WINDOWS 693 wchar_t *wcmd; 694 wchar_t **wargv; 695 int wargc; 696 int i; 697 char **split_u8argv; 698 699 wcmd = GetCommandLineW (); 700 if (NULL == wcmd) 701 return -1; 702 wargv = CommandLineToArgvW (wcmd, &wargc); 703 if (NULL == wargv) 704 return -1; 705 706 split_u8argv = malloc (wargc * sizeof (char *)); 707 708 for (i = 0; i < wargc; i++) 709 { 710 if (_wchar_to_str (wargv[i], &split_u8argv[i], CP_UTF8) != 0) 711 { 712 int j; 713 int e = errno; 714 for (j = 0; j < i; j++) 715 free (split_u8argv[j]); 716 free (split_u8argv); 717 LocalFree (wargv); 718 errno = e; 719 return -1; 720 } 721 } 722 723 *u8argv = _make_continuous_arg_copy (wargc, split_u8argv); 724 if (NULL == *u8argv) 725 { 726 free (split_u8argv); 727 return -1; 728 } 729 *u8argc = wargc; 730 731 for (i = 0; i < wargc; i++) 732 free (split_u8argv[i]); 733 free (split_u8argv); 734 #else 735 *u8argv = _make_continuous_arg_copy (argc, argv); 736 if (NULL == *u8argv) 737 return -1; 738 *u8argc = argc; 739 #endif 740 return 0; 741 } 742 743 744 /** 745 * Main function for the 'extract' tool. Invoke with a list of 746 * filenames to extract keywords from. 747 * 748 * @param argc number of arguments in argv 749 * @param argv command line options and filename to run on 750 * @return 0 on success 751 */ 752 int 753 main (int argc, char *argv[]) 754 { 755 unsigned int i; 756 struct EXTRACTOR_PluginList *plugins; 757 int option_index; 758 int c; 759 char *libraries = NULL; 760 int nodefault = NO; 761 int defaultAll = YES; 762 int bibtex = NO; 763 int grepfriendly = NO; 764 int ret = 0; 765 EXTRACTOR_MetaDataProcessor processor = NULL; 766 char **utf8_argv; 767 int utf8_argc; 768 769 #if ENABLE_NLS 770 setlocale (LC_ALL, ""); 771 textdomain (PACKAGE); 772 #endif 773 #ifndef WINDOWS 774 ignore_sigpipe (); 775 #endif 776 if (NULL == (print = malloc (sizeof (int) * EXTRACTOR_metatype_get_max ()))) 777 { 778 fprintf (stderr, 779 "malloc failed: %s\n", 780 strerror (errno)); 781 return 1; 782 } 783 for (i = 0; i < EXTRACTOR_metatype_get_max (); i++) 784 print[i] = YES; /* default: print everything */ 785 786 if (0 != _get_utf8_args (argc, argv, &utf8_argc, &utf8_argv)) 787 { 788 fprintf (stderr, "Failed to get arguments: %s\n", strerror (errno)); 789 return 1; 790 } 791 792 while (1) 793 { 794 static struct option long_options[] = { 795 {"bibtex", 0, 0, 'b'}, 796 {"grep-friendly", 0, 0, 'g'}, 797 {"help", 0, 0, 'h'}, 798 {"in-process", 0, 0, 'i'}, 799 {"from-memory", 0, 0, 'm'}, 800 {"list", 0, 0, 'L'}, 801 {"library", 1, 0, 'l'}, 802 {"nodefault", 0, 0, 'n'}, 803 {"print", 1, 0, 'p'}, 804 {"verbose", 0, 0, 'V'}, 805 {"version", 0, 0, 'v'}, 806 {"exclude", 1, 0, 'x'}, 807 {0, 0, 0, 0} 808 }; 809 option_index = 0; 810 c = getopt_long (utf8_argc, 811 utf8_argv, 812 "abghiml:Lnp:vVx:", 813 long_options, 814 &option_index); 815 816 if (c == -1) 817 break; /* No more flags to process */ 818 switch (c) 819 { 820 case 'b': 821 bibtex = YES; 822 if (NULL != processor) 823 { 824 fprintf (stderr, 825 "%s", 826 _ ( 827 "Illegal combination of options, cannot combine multiple styles of printing.\n")); 828 free (utf8_argv); 829 return 0; 830 } 831 processor = &print_bibtex; 832 break; 833 case 'g': 834 grepfriendly = YES; 835 if (NULL != processor) 836 { 837 fprintf (stderr, 838 "%s", 839 _ ( 840 "Illegal combination of options, cannot combine multiple styles of printing.\n")); 841 free (utf8_argv); 842 return 0; 843 } 844 processor = &print_selected_keywords_grep_friendly; 845 break; 846 case 'h': 847 print_help (); 848 free (utf8_argv); 849 return 0; 850 case 'i': 851 in_process = YES; 852 break; 853 case 'm': 854 from_memory = YES; 855 break; 856 case 'l': 857 libraries = optarg; 858 break; 859 case 'L': 860 i = 0; 861 while (NULL != EXTRACTOR_metatype_to_string (i)) 862 printf ("%s\n", 863 gettext (EXTRACTOR_metatype_to_string (i++))); 864 free (utf8_argv); 865 return 0; 866 case 'n': 867 nodefault = YES; 868 break; 869 case 'p': 870 if (NULL == optarg) 871 { 872 fprintf (stderr, 873 _ ( 874 "You must specify an argument for the `%s' option (option ignored).\n"), 875 "-p"); 876 break; 877 } 878 if (YES == defaultAll) 879 { 880 defaultAll = NO; 881 i = 0; 882 while (NULL != EXTRACTOR_metatype_to_string (i)) 883 print[i++] = NO; 884 } 885 i = 0; 886 while (NULL != EXTRACTOR_metatype_to_string (i)) 887 { 888 if ( (0 == strcmp (optarg, 889 EXTRACTOR_metatype_to_string (i))) || 890 (0 == strcmp (optarg, 891 gettext (EXTRACTOR_metatype_to_string (i)))) ) 892 893 { 894 print[i] = YES; 895 break; 896 } 897 i++; 898 } 899 if (NULL == EXTRACTOR_metatype_to_string (i)) 900 { 901 fprintf (stderr, 902 "Unknown keyword type `%s', use option `%s' to get a list.\n", 903 optarg, 904 "-L"); 905 free (utf8_argv); 906 return -1; 907 } 908 break; 909 case 'v': 910 printf ("extract v%s\n", PACKAGE_VERSION); 911 free (utf8_argv); 912 return 0; 913 case 'V': 914 verbose++; 915 break; 916 case 'x': 917 i = 0; 918 while (NULL != EXTRACTOR_metatype_to_string (i)) 919 { 920 if ( (0 == strcmp (optarg, 921 EXTRACTOR_metatype_to_string (i))) || 922 (0 == strcmp (optarg, 923 gettext (EXTRACTOR_metatype_to_string (i)))) ) 924 { 925 print[i] = NO; 926 break; 927 } 928 i++; 929 } 930 if (NULL == EXTRACTOR_metatype_to_string (i)) 931 { 932 fprintf (stderr, 933 "Unknown keyword type `%s', use option `%s' to get a list.\n", 934 optarg, 935 "-L"); 936 free (utf8_argv); 937 return -1; 938 } 939 break; 940 default: 941 fprintf (stderr, 942 "%s", 943 _ ("Use --help to get a list of options.\n")); 944 free (utf8_argv); 945 return -1; 946 } /* end of parsing commandline */ 947 } /* while (1) */ 948 if (optind < 0) 949 { 950 fprintf (stderr, 951 "%s", "Unknown error parsing options\n"); 952 free (print); 953 free (utf8_argv); 954 return -1; 955 } 956 if (utf8_argc - optind < 1) 957 { 958 fprintf (stderr, 959 "%s", "Invoke with list of filenames to extract keywords form!\n"); 960 free (print); 961 free (utf8_argv); 962 return -1; 963 } 964 965 /* build list of libraries */ 966 if (NO == nodefault) 967 plugins = EXTRACTOR_plugin_add_defaults (in_process 968 ? EXTRACTOR_OPTION_IN_PROCESS 969 : EXTRACTOR_OPTION_DEFAULT_POLICY); 970 else 971 plugins = NULL; 972 if (NULL != libraries) 973 plugins = EXTRACTOR_plugin_add_config (plugins, 974 libraries, 975 in_process 976 ? EXTRACTOR_OPTION_IN_PROCESS 977 : EXTRACTOR_OPTION_DEFAULT_POLICY); 978 if (NULL == processor) 979 processor = &print_selected_keywords; 980 981 /* extract keywords */ 982 if (YES == bibtex) 983 fprintf (stdout, 984 "%s", _ ("% BiBTeX file\n")); 985 for (i = optind; i < utf8_argc; i++) 986 { 987 errno = 0; 988 if (YES == grepfriendly) 989 fprintf (stdout, "%s ", utf8_argv[i]); 990 else if (NO == bibtex) 991 fprintf (stdout, 992 _ ("Keywords for file %s:\n"), 993 utf8_argv[i]); 994 else 995 cleanup_bibtex (); 996 if (NO == from_memory) 997 EXTRACTOR_extract (plugins, 998 utf8_argv[i], 999 NULL, 0, 1000 processor, 1001 NULL); 1002 else 1003 { 1004 struct stat sb; 1005 unsigned char *data = NULL; 1006 int f = open (utf8_argv[i], O_RDONLY 1007 #if WINDOWS 1008 | O_BINARY 1009 #endif 1010 ); 1011 if ( (-1 != f) && 1012 (0 == fstat (f, &sb)) && 1013 (NULL != (data = malloc ((size_t) sb.st_size))) && 1014 (sb.st_size == read (f, data, (size_t) sb.st_size) ) ) 1015 { 1016 EXTRACTOR_extract (plugins, 1017 NULL, 1018 data, sb.st_size, 1019 processor, 1020 NULL); 1021 } 1022 else 1023 { 1024 if (verbose > 0) 1025 fprintf (stderr, 1026 "%s: %s: %s\n", 1027 utf8_argv[0], utf8_argv[i], strerror (errno)); 1028 ret = 1; 1029 } 1030 if (NULL != data) 1031 free (data); 1032 if (-1 != f) 1033 (void) close (f); 1034 } 1035 if (YES == grepfriendly) 1036 fprintf (stdout, "%s", "\n"); 1037 continue; 1038 } 1039 if (YES == grepfriendly) 1040 fprintf (stdout, "%s", "\n"); 1041 if (bibtex) 1042 finish_bibtex (utf8_argv[i]); 1043 if (verbose > 0) 1044 fprintf (stdout, "%s", "\n"); 1045 free (print); 1046 free (utf8_argv); 1047 EXTRACTOR_plugin_remove_all (plugins); 1048 plugins = NULL; 1049 cleanup_bibtex (); /* actually free's stuff */ 1050 return ret; 1051 } 1052 1053 1054 /* end of extract.c */