aboutsummaryrefslogtreecommitdiff
path: root/src/main/extract.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/main/extract.c')
-rw-r--r--src/main/extract.c1015
1 files changed, 515 insertions, 500 deletions
diff --git a/src/main/extract.c b/src/main/extract.c
index 2492fa9..144f2c3 100644
--- a/src/main/extract.c
+++ b/src/main/extract.c
@@ -73,6 +73,8 @@ ignore_sigpipe ()
73 FPRINTF (stderr, 73 FPRINTF (stderr,
74 "Failed to install SIGPIPE handler: %s\n", strerror (errno)); 74 "Failed to install SIGPIPE handler: %s\n", strerror (errno));
75} 75}
76
77
76#endif 78#endif
77 79
78 80
@@ -89,17 +91,17 @@ struct Help
89 /** 91 /**
90 * Long name of the option. 92 * Long name of the option.
91 */ 93 */
92 const char * longArg; 94 const char *longArg;
93 95
94 /** 96 /**
95 * Name of the mandatory argument, NULL for no argument. 97 * Name of the mandatory argument, NULL for no argument.
96 */ 98 */
97 const char * mandatoryArg; 99 const char *mandatoryArg;
98 100
99 /** 101 /**
100 * Help text for the option. 102 * Help text for the option.
101 */ 103 */
102 const char * description; 104 const char *description;
103}; 105};
104 106
105 107
@@ -118,8 +120,8 @@ struct Help
118 */ 120 */
119static void 121static void
120format_help (const char *general, 122format_help (const char *general,
121 const char *description, 123 const char *description,
122 const struct Help *opt) 124 const struct Help *opt)
123{ 125{
124 size_t slen; 126 size_t slen;
125 unsigned int i; 127 unsigned int i;
@@ -129,79 +131,80 @@ format_help (const char *general,
129 char scp[80]; 131 char scp[80];
130 const char *trans; 132 const char *trans;
131 133
132 printf (_("Usage: %s\n%s\n\n"), 134 printf (_ ("Usage: %s\n%s\n\n"),
133 gettext(general), 135 gettext (general),
134 gettext(description)); 136 gettext (description));
135 printf (_("Arguments mandatory for long options are also mandatory for short options.\n")); 137 printf (_ (
138 "Arguments mandatory for long options are also mandatory for short options.\n"));
136 slen = 0; 139 slen = 0;
137 i = 0; 140 i = 0;
138 while (NULL != opt[i].description) 141 while (NULL != opt[i].description)
142 {
143 if (0 == opt[i].shortArg)
144 printf (" ");
145 else
146 printf (" -%c, ",
147 opt[i].shortArg);
148 printf ("--%s",
149 opt[i].longArg);
150 slen = 8 + strlen (opt[i].longArg);
151 if (NULL != opt[i].mandatoryArg)
139 { 152 {
140 if (0 == opt[i].shortArg) 153 printf ("=%s",
141 printf (" "); 154 opt[i].mandatoryArg);
142 else 155 slen += 1 + strlen (opt[i].mandatoryArg);
143 printf (" -%c, ", 156 }
144 opt[i].shortArg); 157 if (slen > BORDER)
145 printf ("--%s", 158 {
146 opt[i].longArg); 159 printf ("\n%*s", BORDER, "");
147 slen = 8 + strlen(opt[i].longArg); 160 slen = BORDER;
148 if (NULL != opt[i].mandatoryArg) 161 }
149 { 162 if (slen < BORDER)
150 printf ("=%s", 163 {
151 opt[i].mandatoryArg); 164 printf ("%*s", (int) (BORDER - slen), "");
152 slen += 1+strlen(opt[i].mandatoryArg); 165 slen = BORDER;
153 } 166 }
154 if (slen > BORDER) 167 trans = gettext (opt[i].description);
155 { 168 ml = strlen (trans);
156 printf ("\n%*s", BORDER, ""); 169 p = 0;
157 slen = BORDER; 170OUTER:
158 } 171 while (ml - p > 78 - slen)
159 if (slen < BORDER) 172 {
160 { 173 for (j = p + 78 - slen; j>p; j--)
161 printf ("%*s", (int) (BORDER - slen), ""); 174 {
162 slen = BORDER; 175 if (isspace ( (unsigned char) trans[j]))
163 } 176 {
164 trans = gettext(opt[i].description); 177 memcpy (scp,
165 ml = strlen(trans); 178 &trans[p],
166 p = 0; 179 j - p);
167 OUTER: 180 scp[j - p] = '\0';
168 while (ml - p > 78 - slen) 181 printf ("%s\n%*s",
169 { 182 scp,
170 for (j=p+78-slen;j>p;j--) 183 BORDER + 2,
171 { 184 "");
172 if (isspace( (unsigned char) trans[j])) 185 p = j + 1;
173 { 186 slen = BORDER + 2;
174 memcpy(scp, 187 goto OUTER;
175 &trans[p], 188 }
176 j-p); 189 }
177 scp[j-p] = '\0'; 190 /* could not find space to break line */
178 printf ("%s\n%*s", 191 memcpy (scp,
179 scp, 192 &trans[p],
180 BORDER + 2, 193 78 - slen);
181 ""); 194 scp[78 - slen] = '\0';
182 p = j+1; 195 printf ("%s\n%*s",
183 slen = BORDER + 2; 196 scp,
184 goto OUTER; 197 BORDER + 2,
185 } 198 "");
186 } 199 slen = BORDER + 2;
187 /* could not find space to break line */ 200 p = p + 78 - slen;
188 memcpy (scp,
189 &trans[p],
190 78 - slen);
191 scp[78 - slen] = '\0';
192 printf ("%s\n%*s",
193 scp,
194 BORDER+2,
195 "");
196 slen = BORDER+2;
197 p = p + 78 - slen;
198 }
199 /* print rest */
200 if (p < ml)
201 printf("%s\n",
202 &trans[p]);
203 i++;
204 } 201 }
202 /* print rest */
203 if (p < ml)
204 printf ("%s\n",
205 &trans[p]);
206 i++;
207 }
205} 208}
206 209
207 210
@@ -211,40 +214,43 @@ format_help (const char *general,
211static void 214static void
212print_help () 215print_help ()
213{ 216{
214 static struct Help help[] = 217 static struct Help help[] = {
215 { 218 { 'b', "bibtex", NULL,
216 { 'b', "bibtex", NULL, 219 gettext_noop ("print output in bibtex format") },
217 gettext_noop("print output in bibtex format") }, 220 { 'g', "grep-friendly", NULL,
218 { 'g', "grep-friendly", NULL, 221 gettext_noop (
219 gettext_noop("produce grep-friendly output (all results on one line per file)") }, 222 "produce grep-friendly output (all results on one line per file)") },
220 { 'h', "help", NULL, 223 { 'h', "help", NULL,
221 gettext_noop("print this help") }, 224 gettext_noop ("print this help") },
222 { 'i', "in-process", NULL, 225 { 'i', "in-process", NULL,
223 gettext_noop("run plugins in-process (simplifies debugging)") }, 226 gettext_noop ("run plugins in-process (simplifies debugging)") },
224 { 'm', "from-memory", NULL, 227 { 'm', "from-memory", NULL,
225 gettext_noop("read data from file into memory and extract from memory") }, 228 gettext_noop (
226 { 'l', "library", "LIBRARY", 229 "read data from file into memory and extract from memory") },
227 gettext_noop("load an extractor plugin named LIBRARY") }, 230 { 'l', "library", "LIBRARY",
228 { 'L', "list", NULL, 231 gettext_noop ("load an extractor plugin named LIBRARY") },
229 gettext_noop("list all keyword types") }, 232 { 'L', "list", NULL,
230 { 'n', "nodefault", NULL, 233 gettext_noop ("list all keyword types") },
231 gettext_noop("do not use the default set of extractor plugins") }, 234 { 'n', "nodefault", NULL,
232 { 'p', "print", "TYPE", 235 gettext_noop ("do not use the default set of extractor plugins") },
233 gettext_noop("print only keywords of the given TYPE (use -L to get a list)") }, 236 { 'p', "print", "TYPE",
234 { 'v', "version", NULL, 237 gettext_noop (
235 gettext_noop("print the version number") }, 238 "print only keywords of the given TYPE (use -L to get a list)") },
236 { 'V', "verbose", NULL, 239 { 'v', "version", NULL,
237 gettext_noop("be verbose") }, 240 gettext_noop ("print the version number") },
238 { 'x', "exclude", "TYPE", 241 { 'V', "verbose", NULL,
239 gettext_noop("do not print keywords of the given TYPE") }, 242 gettext_noop ("be verbose") },
240 { 0, NULL, NULL, NULL }, 243 { 'x', "exclude", "TYPE",
241 }; 244 gettext_noop ("do not print keywords of the given TYPE") },
242 format_help (_("extract [OPTIONS] [FILENAME]*"), 245 { 0, NULL, NULL, NULL },
243 _("Extract metadata from files."), 246 };
244 help); 247 format_help (_ ("extract [OPTIONS] [FILENAME]*"),
248 _ ("Extract metadata from files."),
249 help);
245 250
246} 251}
247 252
253
248#if HAVE_ICONV 254#if HAVE_ICONV
249#include "iconv.c" 255#include "iconv.c"
250#endif 256#endif
@@ -267,12 +273,12 @@ print_help ()
267 */ 273 */
268static int 274static int
269print_selected_keywords (void *cls, 275print_selected_keywords (void *cls,
270 const char *plugin_name, 276 const char *plugin_name,
271 enum EXTRACTOR_MetaType type, 277 enum EXTRACTOR_MetaType type,
272 enum EXTRACTOR_MetaFormat format, 278 enum EXTRACTOR_MetaFormat format,
273 const char *data_mime_type, 279 const char *data_mime_type,
274 const char *data, 280 const char *data,
275 size_t data_len) 281 size_t data_len)
276{ 282{
277 char *keyword; 283 char *keyword;
278#if HAVE_ICONV 284#if HAVE_ICONV
@@ -285,59 +291,59 @@ print_selected_keywords (void *cls,
285 return 0; 291 return 0;
286 if (verbose > 3) 292 if (verbose > 3)
287 FPRINTF (stdout, 293 FPRINTF (stdout,
288 _("Found by `%s' plugin:\n"), 294 _ ("Found by `%s' plugin:\n"),
289 plugin_name); 295 plugin_name);
290 mt = EXTRACTOR_metatype_to_string (type); 296 mt = EXTRACTOR_metatype_to_string (type);
291 stype = (NULL == mt) ? _("unknown") : gettext(mt); 297 stype = (NULL == mt) ? _ ("unknown") : gettext (mt);
292 switch (format) 298 switch (format)
293 { 299 {
294 case EXTRACTOR_METAFORMAT_UNKNOWN: 300 case EXTRACTOR_METAFORMAT_UNKNOWN:
295 FPRINTF (stdout, 301 FPRINTF (stdout,
296 _("%s - (unknown, %u bytes)\n"), 302 _ ("%s - (unknown, %u bytes)\n"),
297 stype, 303 stype,
298 (unsigned int) data_len); 304 (unsigned int) data_len);
305 break;
306 case EXTRACTOR_METAFORMAT_UTF8:
307 if (0 == data_len)
299 break; 308 break;
300 case EXTRACTOR_METAFORMAT_UTF8:
301 if (0 == data_len)
302 break;
303#if HAVE_ICONV
304 cd = iconv_open (nl_langinfo(CODESET), "UTF-8");
305 if (((iconv_t) -1) != cd)
306 keyword = iconv_helper (cd,
307 data,
308 data_len);
309 else
310#endif
311 keyword = strdup (data);
312 if (NULL != keyword)
313 {
314 FPRINTF (stdout,
315 "%s - %s\n",
316 stype,
317 keyword);
318 free (keyword);
319 }
320#if HAVE_ICONV 309#if HAVE_ICONV
321 if (((iconv_t) -1) != cd) 310 cd = iconv_open (nl_langinfo (CODESET), "UTF-8");
322 iconv_close (cd); 311 if (((iconv_t) -1) != cd)
312 keyword = iconv_helper (cd,
313 data,
314 data_len);
315 else
323#endif 316#endif
324 break; 317 keyword = strdup (data);
325 case EXTRACTOR_METAFORMAT_BINARY: 318 if (NULL != keyword)
326 FPRINTF (stdout, 319 {
327 _("%s - (binary, %u bytes)\n"),
328 stype,
329 (unsigned int) data_len);
330 break;
331 case EXTRACTOR_METAFORMAT_C_STRING:
332 FPRINTF (stdout, 320 FPRINTF (stdout,
333 "%s - %.*s\n", 321 "%s - %s\n",
334 stype, 322 stype,
335 (int) data_len, 323 keyword);
336 data); 324 free (keyword);
337 break;
338 default:
339 break;
340 } 325 }
326#if HAVE_ICONV
327 if (((iconv_t) -1) != cd)
328 iconv_close (cd);
329#endif
330 break;
331 case EXTRACTOR_METAFORMAT_BINARY:
332 FPRINTF (stdout,
333 _ ("%s - (binary, %u bytes)\n"),
334 stype,
335 (unsigned int) data_len);
336 break;
337 case EXTRACTOR_METAFORMAT_C_STRING:
338 FPRINTF (stdout,
339 "%s - %.*s\n",
340 stype,
341 (int) data_len,
342 data);
343 break;
344 default:
345 break;
346 }
341 return 0; 347 return 0;
342} 348}
343 349
@@ -360,12 +366,12 @@ print_selected_keywords (void *cls,
360 */ 366 */
361static int 367static int
362print_selected_keywords_grep_friendly (void *cls, 368print_selected_keywords_grep_friendly (void *cls,
363 const char *plugin_name, 369 const char *plugin_name,
364 enum EXTRACTOR_MetaType type, 370 enum EXTRACTOR_MetaType type,
365 enum EXTRACTOR_MetaFormat format, 371 enum EXTRACTOR_MetaFormat format,
366 const char *data_mime_type, 372 const char *data_mime_type,
367 const char *data, 373 const char *data,
368 size_t data_len) 374 size_t data_len)
369{ 375{
370 char *keyword; 376 char *keyword;
371#if HAVE_ICONV 377#if HAVE_ICONV
@@ -379,51 +385,51 @@ print_selected_keywords_grep_friendly (void *cls,
379 if (NULL == mt) 385 if (NULL == mt)
380 mt = gettext_noop ("unknown"); 386 mt = gettext_noop ("unknown");
381 switch (format) 387 switch (format)
382 { 388 {
383 case EXTRACTOR_METAFORMAT_UNKNOWN: 389 case EXTRACTOR_METAFORMAT_UNKNOWN:
384 break; 390 break;
385 case EXTRACTOR_METAFORMAT_UTF8: 391 case EXTRACTOR_METAFORMAT_UTF8:
386 if (0 == data_len) 392 if (0 == data_len)
387 return 0; 393 return 0;
388 if (verbose > 1) 394 if (verbose > 1)
389 FPRINTF (stdout, 395 FPRINTF (stdout,
390 "%s: ", 396 "%s: ",
391 gettext(mt)); 397 gettext (mt));
392#if HAVE_ICONV 398#if HAVE_ICONV
393 cd = iconv_open (nl_langinfo (CODESET), "UTF-8"); 399 cd = iconv_open (nl_langinfo (CODESET), "UTF-8");
394 if (((iconv_t) -1) != cd) 400 if (((iconv_t) -1) != cd)
395 keyword = iconv_helper (cd, 401 keyword = iconv_helper (cd,
396 data, 402 data,
397 data_len); 403 data_len);
398 else 404 else
399#endif 405#endif
400 keyword = strdup (data); 406 keyword = strdup (data);
401 if (NULL != keyword) 407 if (NULL != keyword)
402 { 408 {
403 FPRINTF (stdout, 409 FPRINTF (stdout,
404 "`%s' ", 410 "`%s' ",
405 keyword); 411 keyword);
406 free (keyword); 412 free (keyword);
407 } 413 }
408#if HAVE_ICONV 414#if HAVE_ICONV
409 if (((iconv_t) -1) != cd) 415 if (((iconv_t) -1) != cd)
410 iconv_close (cd); 416 iconv_close (cd);
411#endif 417#endif
412 break; 418 break;
413 case EXTRACTOR_METAFORMAT_BINARY: 419 case EXTRACTOR_METAFORMAT_BINARY:
414 break; 420 break;
415 case EXTRACTOR_METAFORMAT_C_STRING: 421 case EXTRACTOR_METAFORMAT_C_STRING:
416 if (verbose > 1) 422 if (verbose > 1)
417 FPRINTF (stdout,
418 "%s ",
419 gettext(mt));
420 FPRINTF (stdout, 423 FPRINTF (stdout,
421 "`%s'", 424 "%s ",
422 data); 425 gettext (mt));
423 break; 426 FPRINTF (stdout,
424 default: 427 "`%s'",
425 break; 428 data);
426 } 429 break;
430 default:
431 break;
432 }
427 return 0; 433 return 0;
428} 434}
429 435
@@ -459,31 +465,30 @@ static char *entry_type;
459 * Mapping between bibTeX strings, libextractor 465 * Mapping between bibTeX strings, libextractor
460 * meta data types and values for the current document. 466 * meta data types and values for the current document.
461 */ 467 */
462static struct BibTexMap btm[] = 468static struct BibTexMap btm[] = {
463 { 469 { "title", EXTRACTOR_METATYPE_TITLE, NULL},
464 { "title", EXTRACTOR_METATYPE_TITLE, NULL}, 470 { "year", EXTRACTOR_METATYPE_PUBLICATION_YEAR, NULL },
465 { "year", EXTRACTOR_METATYPE_PUBLICATION_YEAR, NULL }, 471 { "author", EXTRACTOR_METATYPE_AUTHOR_NAME, NULL },
466 { "author", EXTRACTOR_METATYPE_AUTHOR_NAME, NULL }, 472 { "book", EXTRACTOR_METATYPE_BOOK_TITLE, NULL},
467 { "book", EXTRACTOR_METATYPE_BOOK_TITLE, NULL}, 473 { "edition", EXTRACTOR_METATYPE_BOOK_EDITION, NULL},
468 { "edition", EXTRACTOR_METATYPE_BOOK_EDITION, NULL}, 474 { "chapter", EXTRACTOR_METATYPE_BOOK_CHAPTER_NUMBER, NULL},
469 { "chapter", EXTRACTOR_METATYPE_BOOK_CHAPTER_NUMBER, NULL}, 475 { "journal", EXTRACTOR_METATYPE_JOURNAL_NAME, NULL},
470 { "journal", EXTRACTOR_METATYPE_JOURNAL_NAME, NULL}, 476 { "volume", EXTRACTOR_METATYPE_JOURNAL_VOLUME, NULL},
471 { "volume", EXTRACTOR_METATYPE_JOURNAL_VOLUME, NULL}, 477 { "number", EXTRACTOR_METATYPE_JOURNAL_NUMBER, NULL},
472 { "number", EXTRACTOR_METATYPE_JOURNAL_NUMBER, NULL}, 478 { "pages", EXTRACTOR_METATYPE_PAGE_COUNT, NULL },
473 { "pages", EXTRACTOR_METATYPE_PAGE_COUNT, NULL }, 479 { "pages", EXTRACTOR_METATYPE_PAGE_RANGE, NULL },
474 { "pages", EXTRACTOR_METATYPE_PAGE_RANGE, NULL }, 480 { "school", EXTRACTOR_METATYPE_AUTHOR_INSTITUTION, NULL},
475 { "school", EXTRACTOR_METATYPE_AUTHOR_INSTITUTION, NULL}, 481 { "publisher", EXTRACTOR_METATYPE_PUBLISHER, NULL },
476 { "publisher", EXTRACTOR_METATYPE_PUBLISHER, NULL }, 482 { "address", EXTRACTOR_METATYPE_PUBLISHER_ADDRESS, NULL },
477 { "address", EXTRACTOR_METATYPE_PUBLISHER_ADDRESS, NULL }, 483 { "institution", EXTRACTOR_METATYPE_PUBLISHER_INSTITUTION, NULL },
478 { "institution", EXTRACTOR_METATYPE_PUBLISHER_INSTITUTION, NULL }, 484 { "series", EXTRACTOR_METATYPE_PUBLISHER_SERIES, NULL},
479 { "series", EXTRACTOR_METATYPE_PUBLISHER_SERIES, NULL}, 485 { "month", EXTRACTOR_METATYPE_PUBLICATION_MONTH, NULL },
480 { "month", EXTRACTOR_METATYPE_PUBLICATION_MONTH, NULL }, 486 { "url", EXTRACTOR_METATYPE_URL, NULL},
481 { "url", EXTRACTOR_METATYPE_URL, NULL}, 487 { "note", EXTRACTOR_METATYPE_COMMENT, NULL},
482 { "note", EXTRACTOR_METATYPE_COMMENT, NULL}, 488 { "eprint", EXTRACTOR_METATYPE_BIBTEX_EPRINT, NULL },
483 { "eprint", EXTRACTOR_METATYPE_BIBTEX_EPRINT, NULL }, 489 { "type", EXTRACTOR_METATYPE_PUBLICATION_TYPE, NULL },
484 { "type", EXTRACTOR_METATYPE_PUBLICATION_TYPE, NULL }, 490 { NULL, 0, NULL }
485 { NULL, 0, NULL } 491};
486 };
487 492
488 493
489/** 494/**
@@ -495,10 +500,10 @@ cleanup_bibtex ()
495 unsigned int i; 500 unsigned int i;
496 501
497 for (i = 0; NULL != btm[i].bibTexName; i++) 502 for (i = 0; NULL != btm[i].bibTexName; i++)
498 { 503 {
499 free (btm[i].value); 504 free (btm[i].value);
500 btm[i].value = NULL; 505 btm[i].value = NULL;
501 } 506 }
502 free (entry_type); 507 free (entry_type);
503 entry_type = NULL; 508 entry_type = NULL;
504} 509}
@@ -522,12 +527,12 @@ cleanup_bibtex ()
522 */ 527 */
523static int 528static int
524print_bibtex (void *cls, 529print_bibtex (void *cls,
525 const char *plugin_name, 530 const char *plugin_name,
526 enum EXTRACTOR_MetaType type, 531 enum EXTRACTOR_MetaType type,
527 enum EXTRACTOR_MetaFormat format, 532 enum EXTRACTOR_MetaFormat format,
528 const char *data_mime_type, 533 const char *data_mime_type,
529 const char *data, 534 const char *data,
530 size_t data_len) 535 size_t data_len)
531{ 536{
532 unsigned int i; 537 unsigned int i;
533 538
@@ -538,13 +543,13 @@ print_bibtex (void *cls,
538 if (EXTRACTOR_METAFORMAT_UTF8 != format) 543 if (EXTRACTOR_METAFORMAT_UTF8 != format)
539 return 0; 544 return 0;
540 if (EXTRACTOR_METATYPE_BIBTEX_ENTRY_TYPE == type) 545 if (EXTRACTOR_METATYPE_BIBTEX_ENTRY_TYPE == type)
541 { 546 {
542 entry_type = strdup (data); 547 entry_type = strdup (data);
543 return 0; 548 return 0;
544 } 549 }
545 for (i = 0; NULL != btm[i].bibTexName; i++) 550 for (i = 0; NULL != btm[i].bibTexName; i++)
546 if ( (NULL == btm[i].value) && 551 if ( (NULL == btm[i].value) &&
547 (btm[i].le_type == type) ) 552 (btm[i].le_type == type) )
548 btm[i].value = strdup (data); 553 btm[i].value = strdup (data);
549 return 0; 554 return 0;
550} 555}
@@ -571,33 +576,33 @@ finish_bibtex (const char *fn)
571 (NULL == btm[1].value) || 576 (NULL == btm[1].value) ||
572 (NULL == btm[2].value) ) 577 (NULL == btm[2].value) )
573 FPRINTF (stdout, 578 FPRINTF (stdout,
574 "@%s %s { ", 579 "@%s %s { ",
575 et, 580 et,
576 fn); 581 fn);
577 else 582 else
578 { 583 {
579 snprintf (temp, 584 snprintf (temp,
580 sizeof (temp), 585 sizeof (temp),
581 "%.5s%.5s%.5s", 586 "%.5s%.5s%.5s",
582 btm[2].value, 587 btm[2].value,
583 btm[1].value, 588 btm[1].value,
584 btm[0].value); 589 btm[0].value);
585 for (n=strlen (temp)-1;n>=0;n-- ) 590 for (n = strlen (temp) - 1; n>=0; n--)
586 if (! isalnum ( (unsigned char) temp[n]) ) 591 if (! isalnum ( (unsigned char) temp[n]) )
587 temp[n] = '_'; 592 temp[n] = '_';
588 else 593 else
589 temp[n] = tolower ( (unsigned char) temp[n]); 594 temp[n] = tolower ( (unsigned char) temp[n]);
590 FPRINTF (stdout, 595 FPRINTF (stdout,
591 "@%s %s { ", 596 "@%s %s { ",
592 et, 597 et,
593 temp); 598 temp);
594 } 599 }
595 for (i=0; NULL != btm[i].bibTexName; i++) 600 for (i = 0; NULL != btm[i].bibTexName; i++)
596 if (NULL != btm[i].value) 601 if (NULL != btm[i].value)
597 FPRINTF (stdout, 602 FPRINTF (stdout,
598 "\t%s = {%s},\n", 603 "\t%s = {%s},\n",
599 btm[i].bibTexName, 604 btm[i].bibTexName,
600 btm[i].value); 605 btm[i].value);
601 FPRINTF (stdout, "%s", "}\n\n"); 606 FPRINTF (stdout, "%s", "}\n\n");
602} 607}
603 608
@@ -612,7 +617,9 @@ _wchar_to_str (const wchar_t *wstr, char **retstr, UINT cp)
612 DWORD error; 617 DWORD error;
613 618
614 SetLastError (0); 619 SetLastError (0);
615 len = WideCharToMultiByte (cp, 0, wstr, -1, NULL, 0, NULL, (cp == CP_UTF8 || cp == CP_UTF7) ? NULL : &lossy); 620 len = WideCharToMultiByte (cp, 0, wstr, -1, NULL, 0, NULL, (cp == CP_UTF8 ||
621 cp == CP_UTF7) ?
622 NULL : &lossy);
616 error = GetLastError (); 623 error = GetLastError ();
617 if (len <= 0) 624 if (len <= 0)
618 return -1; 625 return -1;
@@ -620,7 +627,9 @@ _wchar_to_str (const wchar_t *wstr, char **retstr, UINT cp)
620 str = malloc (sizeof (char) * len); 627 str = malloc (sizeof (char) * len);
621 628
622 SetLastError (0); 629 SetLastError (0);
623 lenc = WideCharToMultiByte (cp, 0, wstr, -1, str, len, NULL, (cp == CP_UTF8 || cp == CP_UTF7) ? NULL : &lossy); 630 lenc = WideCharToMultiByte (cp, 0, wstr, -1, str, len, NULL, (cp == CP_UTF8 ||
631 cp == CP_UTF7) ?
632 NULL : &lossy);
624 error = GetLastError (); 633 error = GetLastError ();
625 if (lenc != len) 634 if (lenc != len)
626 { 635 {
@@ -632,6 +641,8 @@ _wchar_to_str (const wchar_t *wstr, char **retstr, UINT cp)
632 return 1; 641 return 1;
633 return 0; 642 return 0;
634} 643}
644
645
635#endif 646#endif
636 647
637 648
@@ -711,10 +722,10 @@ _get_utf8_args (int argc, char *const *argv, int *u8argc, char ***u8argv)
711 722
712 *u8argv = _make_continuous_arg_copy (wargc, split_u8argv); 723 *u8argv = _make_continuous_arg_copy (wargc, split_u8argv);
713 if (NULL == *u8argv) 724 if (NULL == *u8argv)
714 { 725 {
715 free (split_u8argv); 726 free (split_u8argv);
716 return -1; 727 return -1;
717 } 728 }
718 *u8argc = wargc; 729 *u8argc = wargc;
719 730
720 for (i = 0; i < wargc; i++) 731 for (i = 0; i < wargc; i++)
@@ -756,21 +767,21 @@ main (int argc, char *argv[])
756 int utf8_argc; 767 int utf8_argc;
757 768
758#if ENABLE_NLS 769#if ENABLE_NLS
759 setlocale(LC_ALL, ""); 770 setlocale (LC_ALL, "");
760 textdomain(PACKAGE); 771 textdomain (PACKAGE);
761#endif 772#endif
762#ifndef WINDOWS 773#ifndef WINDOWS
763 ignore_sigpipe (); 774 ignore_sigpipe ();
764#endif 775#endif
765 if (NULL == (print = malloc (sizeof (int) * EXTRACTOR_metatype_get_max ()))) 776 if (NULL == (print = malloc (sizeof (int) * EXTRACTOR_metatype_get_max ())))
766 { 777 {
767 FPRINTF (stderr, 778 FPRINTF (stderr,
768 "malloc failed: %s\n", 779 "malloc failed: %s\n",
769 strerror (errno)); 780 strerror (errno));
770 return 1; 781 return 1;
771 } 782 }
772 for (i = 0; i < EXTRACTOR_metatype_get_max (); i++) 783 for (i = 0; i < EXTRACTOR_metatype_get_max (); i++)
773 print[i] = YES; /* default: print everything */ 784 print[i] = YES; /* default: print everything */
774 785
775 if (0 != _get_utf8_args (argc, argv, &utf8_argc, &utf8_argv)) 786 if (0 != _get_utf8_args (argc, argv, &utf8_argc, &utf8_argv))
776 { 787 {
@@ -779,249 +790,252 @@ main (int argc, char *argv[])
779 } 790 }
780 791
781 while (1) 792 while (1)
793 {
794 static struct option long_options[] = {
795 {"bibtex", 0, 0, 'b'},
796 {"grep-friendly", 0, 0, 'g'},
797 {"help", 0, 0, 'h'},
798 {"in-process", 0, 0, 'i'},
799 {"from-memory", 0, 0, 'm'},
800 {"list", 0, 0, 'L'},
801 {"library", 1, 0, 'l'},
802 {"nodefault", 0, 0, 'n'},
803 {"print", 1, 0, 'p'},
804 {"verbose", 0, 0, 'V'},
805 {"version", 0, 0, 'v'},
806 {"exclude", 1, 0, 'x'},
807 {0, 0, 0, 0}
808 };
809 option_index = 0;
810 c = getopt_long (utf8_argc,
811 utf8_argv,
812 "abghiml:Lnp:vVx:",
813 long_options,
814 &option_index);
815
816 if (c == -1)
817 break; /* No more flags to process */
818 switch (c)
782 { 819 {
783 static struct option long_options[] = { 820 case 'b':
784 {"bibtex", 0, 0, 'b'}, 821 bibtex = YES;
785 {"grep-friendly", 0, 0, 'g'}, 822 if (NULL != processor)
786 {"help", 0, 0, 'h'}, 823 {
787 {"in-process", 0, 0, 'i'}, 824 FPRINTF (stderr,
788 {"from-memory", 0, 0, 'm'}, 825 "%s",
789 {"list", 0, 0, 'L'}, 826 _ (
790 {"library", 1, 0, 'l'}, 827 "Illegal combination of options, cannot combine multiple styles of printing.\n"));
791 {"nodefault", 0, 0, 'n'}, 828 free (utf8_argv);
792 {"print", 1, 0, 'p'}, 829 return 0;
793 {"verbose", 0, 0, 'V'}, 830 }
794 {"version", 0, 0, 'v'}, 831 processor = &print_bibtex;
795 {"exclude", 1, 0, 'x'}, 832 break;
796 {0, 0, 0, 0} 833 case 'g':
797 }; 834 grepfriendly = YES;
798 option_index = 0; 835 if (NULL != processor)
799 c = getopt_long (utf8_argc, 836 {
800 utf8_argv, 837 FPRINTF (stderr,
801 "abghiml:Lnp:vVx:", 838 "%s",
802 long_options, 839 _ (
803 &option_index); 840 "Illegal combination of options, cannot combine multiple styles of printing.\n"));
804 841 free (utf8_argv);
805 if (c == -1) 842 return 0;
806 break; /* No more flags to process */ 843 }
807 switch (c) 844 processor = &print_selected_keywords_grep_friendly;
808 { 845 break;
809 case 'b': 846 case 'h':
810 bibtex = YES; 847 print_help ();
811 if (NULL != processor) 848 free (utf8_argv);
812 { 849 return 0;
813 FPRINTF (stderr, 850 case 'i':
814 "%s", 851 in_process = YES;
815 _("Illegal combination of options, cannot combine multiple styles of printing.\n")); 852 break;
816 free (utf8_argv); 853 case 'm':
817 return 0; 854 from_memory = YES;
818 } 855 break;
819 processor = &print_bibtex; 856 case 'l':
820 break; 857 libraries = optarg;
821 case 'g': 858 break;
822 grepfriendly = YES; 859 case 'L':
823 if (NULL != processor) 860 i = 0;
824 { 861 while (NULL != EXTRACTOR_metatype_to_string (i))
825 FPRINTF (stderr, 862 printf ("%s\n",
826 "%s", 863 gettext (EXTRACTOR_metatype_to_string (i++)));
827 _("Illegal combination of options, cannot combine multiple styles of printing.\n")); 864 free (utf8_argv);
828 free (utf8_argv); 865 return 0;
829 return 0; 866 case 'n':
830 } 867 nodefault = YES;
831 processor = &print_selected_keywords_grep_friendly; 868 break;
832 break; 869 case 'p':
833 case 'h': 870 if (NULL == optarg)
834 print_help (); 871 {
835 free (utf8_argv); 872 FPRINTF (stderr,
836 return 0; 873 _ (
837 case 'i': 874 "You must specify an argument for the `%s' option (option ignored).\n"),
838 in_process = YES; 875 "-p");
839 break; 876 break;
840 case 'm': 877 }
841 from_memory = YES; 878 if (YES == defaultAll)
879 {
880 defaultAll = NO;
881 i = 0;
882 while (NULL != EXTRACTOR_metatype_to_string (i))
883 print[i++] = NO;
884 }
885 i = 0;
886 while (NULL != EXTRACTOR_metatype_to_string (i))
887 {
888 if ( (0 == strcmp (optarg,
889 EXTRACTOR_metatype_to_string (i))) ||
890 (0 == strcmp (optarg,
891 gettext (EXTRACTOR_metatype_to_string (i)))) )
892
893 {
894 print[i] = YES;
842 break; 895 break;
843 case 'l': 896 }
844 libraries = optarg; 897 i++;
845 break; 898 }
846 case 'L': 899 if (NULL == EXTRACTOR_metatype_to_string (i))
847 i = 0; 900 {
848 while (NULL != EXTRACTOR_metatype_to_string (i)) 901 FPRINTF (stderr,
849 printf ("%s\n", 902 "Unknown keyword type `%s', use option `%s' to get a list.\n",
850 gettext(EXTRACTOR_metatype_to_string (i++))); 903 optarg,
851 free (utf8_argv); 904 "-L");
852 return 0; 905 free (utf8_argv);
853 case 'n': 906 return -1;
854 nodefault = YES; 907 }
855 break; 908 break;
856 case 'p': 909 case 'v':
857 if (NULL == optarg) 910 printf ("extract v%s\n", PACKAGE_VERSION);
858 {
859 FPRINTF(stderr,
860 _("You must specify an argument for the `%s' option (option ignored).\n"),
861 "-p");
862 break;
863 }
864 if (YES == defaultAll)
865 {
866 defaultAll = NO;
867 i = 0;
868 while (NULL != EXTRACTOR_metatype_to_string (i))
869 print[i++] = NO;
870 }
871 i = 0;
872 while (NULL != EXTRACTOR_metatype_to_string (i))
873 {
874 if ( (0 == strcmp (optarg,
875 EXTRACTOR_metatype_to_string (i))) ||
876 (0 == strcmp (optarg,
877 gettext(EXTRACTOR_metatype_to_string (i)))) )
878
879 {
880 print[i] = YES;
881 break;
882 }
883 i++;
884 }
885 if (NULL == EXTRACTOR_metatype_to_string (i))
886 {
887 FPRINTF(stderr,
888 "Unknown keyword type `%s', use option `%s' to get a list.\n",
889 optarg,
890 "-L");
891 free (utf8_argv);
892 return -1;
893 }
894 break;
895 case 'v':
896 printf ("extract v%s\n", PACKAGE_VERSION);
897 free (utf8_argv);
898 return 0;
899 case 'V':
900 verbose++;
901 break;
902 case 'x':
903 i = 0;
904 while (NULL != EXTRACTOR_metatype_to_string (i))
905 {
906 if ( (0 == strcmp (optarg,
907 EXTRACTOR_metatype_to_string (i))) ||
908 (0 == strcmp (optarg,
909 gettext(EXTRACTOR_metatype_to_string (i)))) )
910 {
911 print[i] = NO;
912 break;
913 }
914 i++;
915 }
916 if (NULL == EXTRACTOR_metatype_to_string (i))
917 {
918 FPRINTF (stderr,
919 "Unknown keyword type `%s', use option `%s' to get a list.\n",
920 optarg,
921 "-L");
922 free (utf8_argv);
923 return -1;
924 }
925 break;
926 default:
927 FPRINTF (stderr,
928 "%s",
929 _("Use --help to get a list of options.\n"));
930 free (utf8_argv);
931 return -1;
932 } /* end of parsing commandline */
933 } /* while (1) */
934 if (optind < 0)
935 {
936 FPRINTF (stderr,
937 "%s", "Unknown error parsing options\n");
938 free (print);
939 free (utf8_argv); 911 free (utf8_argv);
940 return -1; 912 return 0;
941 } 913 case 'V':
942 if (utf8_argc - optind < 1) 914 verbose++;
943 { 915 break;
916 case 'x':
917 i = 0;
918 while (NULL != EXTRACTOR_metatype_to_string (i))
919 {
920 if ( (0 == strcmp (optarg,
921 EXTRACTOR_metatype_to_string (i))) ||
922 (0 == strcmp (optarg,
923 gettext (EXTRACTOR_metatype_to_string (i)))) )
924 {
925 print[i] = NO;
926 break;
927 }
928 i++;
929 }
930 if (NULL == EXTRACTOR_metatype_to_string (i))
931 {
932 FPRINTF (stderr,
933 "Unknown keyword type `%s', use option `%s' to get a list.\n",
934 optarg,
935 "-L");
936 free (utf8_argv);
937 return -1;
938 }
939 break;
940 default:
944 FPRINTF (stderr, 941 FPRINTF (stderr,
945 "%s", "Invoke with list of filenames to extract keywords form!\n"); 942 "%s",
946 free (print); 943 _ ("Use --help to get a list of options.\n"));
947 free (utf8_argv); 944 free (utf8_argv);
948 return -1; 945 return -1;
949 } 946 } /* end of parsing commandline */
947 } /* while (1) */
948 if (optind < 0)
949 {
950 FPRINTF (stderr,
951 "%s", "Unknown error parsing options\n");
952 free (print);
953 free (utf8_argv);
954 return -1;
955 }
956 if (utf8_argc - optind < 1)
957 {
958 FPRINTF (stderr,
959 "%s", "Invoke with list of filenames to extract keywords form!\n");
960 free (print);
961 free (utf8_argv);
962 return -1;
963 }
950 964
951 /* build list of libraries */ 965 /* build list of libraries */
952 if (NO == nodefault) 966 if (NO == nodefault)
953 plugins = EXTRACTOR_plugin_add_defaults (in_process 967 plugins = EXTRACTOR_plugin_add_defaults (in_process
954 ? EXTRACTOR_OPTION_IN_PROCESS 968 ? EXTRACTOR_OPTION_IN_PROCESS
955 : EXTRACTOR_OPTION_DEFAULT_POLICY); 969 : EXTRACTOR_OPTION_DEFAULT_POLICY);
956 else 970 else
957 plugins = NULL; 971 plugins = NULL;
958 if (NULL != libraries) 972 if (NULL != libraries)
959 plugins = EXTRACTOR_plugin_add_config (plugins, 973 plugins = EXTRACTOR_plugin_add_config (plugins,
960 libraries, 974 libraries,
961 in_process 975 in_process
962 ? EXTRACTOR_OPTION_IN_PROCESS 976 ? EXTRACTOR_OPTION_IN_PROCESS
963 : EXTRACTOR_OPTION_DEFAULT_POLICY); 977 : EXTRACTOR_OPTION_DEFAULT_POLICY);
964 if (NULL == processor) 978 if (NULL == processor)
965 processor = &print_selected_keywords; 979 processor = &print_selected_keywords;
966 980
967 /* extract keywords */ 981 /* extract keywords */
968 if (YES == bibtex) 982 if (YES == bibtex)
969 FPRINTF(stdout, 983 FPRINTF (stdout,
970 "%s", _("% BiBTeX file\n")); 984 "%s", _ ("% BiBTeX file\n"));
971 for (i = optind; i < utf8_argc; i++) 985 for (i = optind; i < utf8_argc; i++)
986 {
987 errno = 0;
988 if (YES == grepfriendly)
989 FPRINTF (stdout, "%s ", utf8_argv[i]);
990 else if (NO == bibtex)
991 FPRINTF (stdout,
992 _ ("Keywords for file %s:\n"),
993 utf8_argv[i]);
994 else
995 cleanup_bibtex ();
996 if (NO == from_memory)
997 EXTRACTOR_extract (plugins,
998 utf8_argv[i],
999 NULL, 0,
1000 processor,
1001 NULL);
1002 else
972 { 1003 {
973 errno = 0; 1004 struct stat sb;
974 if (YES == grepfriendly) 1005 unsigned char *data = NULL;
975 FPRINTF (stdout, "%s ", utf8_argv[i]); 1006 int f = OPEN (utf8_argv[i], O_RDONLY
976 else if (NO == bibtex)
977 FPRINTF (stdout,
978 _("Keywords for file %s:\n"),
979 utf8_argv[i]);
980 else
981 cleanup_bibtex ();
982 if (NO == from_memory)
983 EXTRACTOR_extract (plugins,
984 utf8_argv[i],
985 NULL, 0,
986 processor,
987 NULL);
988 else
989 {
990 struct stat sb;
991 unsigned char *data = NULL;
992 int f = OPEN (utf8_argv[i], O_RDONLY
993#if WINDOWS 1007#if WINDOWS
994 | O_BINARY 1008 | O_BINARY
995#endif 1009#endif
996 ); 1010 );
997 if ( (-1 != f) && 1011 if ( (-1 != f) &&
998 (0 == FSTAT (f, &sb)) && 1012 (0 == FSTAT (f, &sb)) &&
999 (NULL != (data = malloc ((size_t) sb.st_size))) && 1013 (NULL != (data = malloc ((size_t) sb.st_size))) &&
1000 (sb.st_size == READ (f, data, (size_t) sb.st_size) ) ) 1014 (sb.st_size == READ (f, data, (size_t) sb.st_size) ) )
1001 { 1015 {
1002 EXTRACTOR_extract (plugins, 1016 EXTRACTOR_extract (plugins,
1003 NULL, 1017 NULL,
1004 data, sb.st_size, 1018 data, sb.st_size,
1005 processor, 1019 processor,
1006 NULL); 1020 NULL);
1007 } 1021 }
1008 else 1022 else
1009 { 1023 {
1010 if (verbose > 0) 1024 if (verbose > 0)
1011 FPRINTF(stderr, 1025 FPRINTF (stderr,
1012 "%s: %s: %s\n", 1026 "%s: %s: %s\n",
1013 utf8_argv[0], utf8_argv[i], strerror(errno)); 1027 utf8_argv[0], utf8_argv[i], strerror (errno));
1014 ret = 1; 1028 ret = 1;
1015 } 1029 }
1016 if (NULL != data) 1030 if (NULL != data)
1017 free (data); 1031 free (data);
1018 if (-1 != f) 1032 if (-1 != f)
1019 (void) CLOSE (f); 1033 (void) CLOSE (f);
1020 }
1021 if (YES == grepfriendly)
1022 FPRINTF (stdout, "%s", "\n");
1023 continue;
1024 } 1034 }
1035 if (YES == grepfriendly)
1036 FPRINTF (stdout, "%s", "\n");
1037 continue;
1038 }
1025 if (YES == grepfriendly) 1039 if (YES == grepfriendly)
1026 FPRINTF (stdout, "%s", "\n"); 1040 FPRINTF (stdout, "%s", "\n");
1027 if (bibtex) 1041 if (bibtex)
@@ -1036,4 +1050,5 @@ main (int argc, char *argv[])
1036 return ret; 1050 return ret;
1037} 1051}
1038 1052
1053
1039/* end of extract.c */ 1054/* end of extract.c */