diff options
author | Christian Grothoff <christian@grothoff.org> | 2012-08-17 16:25:36 +0000 |
---|---|---|
committer | Christian Grothoff <christian@grothoff.org> | 2012-08-17 16:25:36 +0000 |
commit | 9e8a050b21f7f3fa47ba6b46469096c3d9aea8e6 (patch) | |
tree | 1d7c56323938805ee2f2551941e48dbb57b3085c /src/plugins/testdata | |
parent | b142d604d71024a6984f9a63804c41d7272e41d9 (diff) | |
download | libextractor-9e8a050b21f7f3fa47ba6b46469096c3d9aea8e6.tar.gz libextractor-9e8a050b21f7f3fa47ba6b46469096c3d9aea8e6.zip |
work on misc plugins
Diffstat (limited to 'src/plugins/testdata')
-rw-r--r-- | src/plugins/testdata/html_grothoff.html | 44 | ||||
-rw-r--r-- | src/plugins/testdata/man_extract.1 | 109 |
2 files changed, 153 insertions, 0 deletions
diff --git a/src/plugins/testdata/html_grothoff.html b/src/plugins/testdata/html_grothoff.html new file mode 100644 index 0000000..fc7c620 --- /dev/null +++ b/src/plugins/testdata/html_grothoff.html | |||
@@ -0,0 +1,44 @@ | |||
1 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Frameset//EN"> | ||
2 | <html lang="en"> | ||
3 | <head> | ||
4 | <title>Christian Grothoff</title> | ||
5 | <meta name="description" content="Homepage of Christian Grothoff"> | ||
6 | <meta name="author" content="Christian Grothoff"> | ||
7 | <meta name="keywords" content="Christian,Grothoff"> | ||
8 | <meta name="robots" content="index,follow"> | ||
9 | <meta name="revisit-after" content="28 days"> | ||
10 | <meta name="title" content="Welcome to Christian Grothoff"> | ||
11 | <meta name="content-language" content="en"> | ||
12 | <meta name="language" content="en"> | ||
13 | <meta name="publisher" content="Christian Grothoff"> | ||
14 | <meta name="date" content="2000-08-20"> | ||
15 | <meta name="rights" content="(C) 2000 by Christian Grothoff"> | ||
16 | <meta http-equiv="expires" content="43200"> | ||
17 | <meta http-equiv="content-type" content="text/html;CHARSET=iso8859-1"> | ||
18 | <meta http-equiv="Content-Style-Type" content="text/css"> | ||
19 | <link rel=stylesheet type="text/css" href="grothoff.css"> | ||
20 | <script language="JavaScript"> | ||
21 | <!-- | ||
22 | if(top.frames.length > 0) | ||
23 | top.location.href=self.location; | ||
24 | //--> | ||
25 | </script> | ||
26 | </head> | ||
27 | <frameset cols="180,*" border=5 frameborder=5 framespacing=5 bordercolor="#000000"> | ||
28 | <frame src="navigation.php3?currenttopic=Welcome" name="navigation"> | ||
29 | <frame src="welcome.php3" name="contentwindow"> | ||
30 | </frameset> | ||
31 | <body> | ||
32 | <h1>Welcome to Christian Grothoff</h1> | ||
33 | <hr class="big"> | ||
34 | <br clear=all> | ||
35 | <ul> | ||
36 | <li><A href="welcome.php3">Welcome</A></li> | ||
37 | <li><A href="cs/">Computer Science</A></li> | ||
38 | <li><A href="linux/">Linux</A></li> | ||
39 | <li><A href="http://www.stud.uni-wuppertal.de/~ma0035/">Willkommen (my german homepage)</A></li> | ||
40 | </ul> | ||
41 | <hr> | ||
42 | <A href="mailto:grothoff@cs.purdue.edu"><em>grothoff@cs.purdue.edu</em></A> | ||
43 | </body> | ||
44 | </html> | ||
diff --git a/src/plugins/testdata/man_extract.1 b/src/plugins/testdata/man_extract.1 new file mode 100644 index 0000000..500c061 --- /dev/null +++ b/src/plugins/testdata/man_extract.1 | |||
@@ -0,0 +1,109 @@ | |||
1 | .TH EXTRACT 1 "Aug 7, 2012" "libextractor 0.7.0" | ||
2 | .\" $Id | ||
3 | .SH NAME | ||
4 | extract | ||
5 | \- determine meta-information about a file | ||
6 | .SH SYNOPSIS | ||
7 | .B extract | ||
8 | [ | ||
9 | .B \-bgihLmnvV | ||
10 | ] | ||
11 | [ | ||
12 | .B \-l | ||
13 | .I library | ||
14 | ] | ||
15 | [ | ||
16 | .B \-p | ||
17 | .I type | ||
18 | ] | ||
19 | [ | ||
20 | .B \-x | ||
21 | .I type | ||
22 | ] | ||
23 | .I file | ||
24 | \&... | ||
25 | .br | ||
26 | .SH DESCRIPTION | ||
27 | This manual page documents version 0.7.0 of the | ||
28 | .B extract | ||
29 | command. | ||
30 | .PP | ||
31 | .B extract | ||
32 | tests each file specified in the argument list in an attempt to infer meta\-information from it. Each file is subjected to the meta\-data extraction libraries from | ||
33 | .I libextractor. | ||
34 | .PP | ||
35 | libextractor classifies meta\-information (also referred to as keywords) into types. A list of all types can be obtained with the | ||
36 | .B \-L | ||
37 | option. | ||
38 | |||
39 | .SH OPTIONS | ||
40 | .TP 8 | ||
41 | .B \-b | ||
42 | Display the output in BiBTeX format. | ||
43 | .TP 8 | ||
44 | .B \-g | ||
45 | Use grep\-friendly output (all keywords on a single line for each file). Use the verbose option to print the filename first, followed by the keywords. Use the verbose option twice to also display the keyword types. This option will not print keyword types or non\-textual metadata. | ||
46 | .TP 8 | ||
47 | .B \-h | ||
48 | Print a brief summary of the options. | ||
49 | .TP 8 | ||
50 | .B \-i | ||
51 | Run plugins in\-process (for debugging). By default, each plugin is run in its own process. | ||
52 | .TP 8 | ||
53 | .BI \-l " libraries" | ||
54 | Use the specified libraries to extract keywords. The general format of libraries is .I [[\-]LIBRARYNAME[:[\-]LIBRARYNAME]*] where LIBRARYNAME is a libextractor compatible library and typically of the form .Ijpeg\. The minus before the libraryname indicates that this library should be removed from the existing list. To run only a few selected plugins, use \-l in combination with \-n. | ||
55 | .TP 8 | ||
56 | .B \-L | ||
57 | Print a list of all known keyword types. | ||
58 | .TP 8 | ||
59 | .B \-m | ||
60 | Load the file into memory and perform extraction from memory (for debugging). | ||
61 | .TP 8 | ||
62 | .B \-n | ||
63 | Do not use the default set of extractors (typically all standard extractors, currently mp3, ogg, jpg, gif, png, tiff, real, html, pdf and mime\-types), use only the extractors specified with the .B \-l option. | ||
64 | .TP | ||
65 | .B \-p " type" | ||
66 | Print only the keywords matching the specified type. By default, all keywords that are found and not removed as duplicates are printed. | ||
67 | .TP 8 | ||
68 | .B \-v | ||
69 | Print the version number and exit. | ||
70 | .TP 8 | ||
71 | .B \-V | ||
72 | Be verbose. This option can be specified multiple times to increase verbosity further. | ||
73 | .TP 8 | ||
74 | .I \-x " type" | ||
75 | Exclude keywords of the specified type from the output. By default, all keywords that are found and not removed as duplicates are printed. | ||
76 | .SH SEE ALSO | ||
77 | .BR libextractor (3) | ||
78 | \- description of the libextractor library | ||
79 | .br | ||
80 | .SH EXAMPLES | ||
81 | .nf | ||
82 | $ extract test/test.jpg | ||
83 | comment \- (C) 2001 by Christian Grothoff, using gimp 1.2 1 | ||
84 | mimetype \- image/jpeg | ||
85 | |||
86 | $ extract \-V \-x comment test/test.jpg | ||
87 | Keywords for file test/test.jpg: | ||
88 | mimetype \- image/jpeg | ||
89 | |||
90 | $ extract \-p comment test/test.jpg | ||
91 | comment \- (C) 2001 by Christian Grothoff, using gimp 1.2 1 | ||
92 | |||
93 | $ extract \-nV \-l png.so \-p comment test/test.jpg test/test.png | ||
94 | Keywords for file test/test.jpg: | ||
95 | Keywords for file test/test.png: | ||
96 | comment \- Testing keyword extraction | ||
97 | |||
98 | .SH LEGAL NOTICE | ||
99 | libextractor and the extract tool are released under the GPL. libextractor is a GNU package. | ||
100 | |||
101 | .SH BUGS | ||
102 | A couple of file\-formats (on the order of 10^3) are not recognized... | ||
103 | |||
104 | .SH AUTHORS | ||
105 | .B extract | ||
106 | was originally written by Christian Grothoff <christian@grothoff.org> and Vidyut Samanta <vids@cs.ucla.edu>. Use <libextractor@gnu.org> to contact the current maintainer(s). | ||
107 | |||
108 | .SH AVAILABILITY | ||
109 | You can obtain the original author's latest version from http://www.gnu.org/software/libextractor/ | ||