aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/main/extract.c1
-rw-r--r--src/plugins/Makefile.am26
-rw-r--r--src/plugins/mime_extractor.c326
-rw-r--r--src/plugins/test_mime.c61
4 files changed, 132 insertions, 282 deletions
diff --git a/src/main/extract.c b/src/main/extract.c
index ae11102..389179f 100644
--- a/src/main/extract.c
+++ b/src/main/extract.c
@@ -857,6 +857,7 @@ main (int argc, char *argv[])
857 printf ("\n"); 857 printf ("\n");
858 free (print); 858 free (print);
859 EXTRACTOR_plugin_remove_all (plugins); 859 EXTRACTOR_plugin_remove_all (plugins);
860 plugins = NULL;
860 cleanup_bibtex (); /* actually free's stuff */ 861 cleanup_bibtex (); /* actually free's stuff */
861 return ret; 862 return ret;
862} 863}
diff --git a/src/plugins/Makefile.am b/src/plugins/Makefile.am
index bc1fc4e..cc971a8 100644
--- a/src/plugins/Makefile.am
+++ b/src/plugins/Makefile.am
@@ -19,15 +19,22 @@ PLUGIN_OGG=libextractor_ogg.la
19TEST_OGG=test_ogg 19TEST_OGG=test_ogg
20endif 20endif
21 21
22if HAVE_MAGIC
23PLUGIN_MIME=libextractor_mime.la
24TEST_MIME=test_mime
25endif
26
22plugin_LTLIBRARIES = \ 27plugin_LTLIBRARIES = \
23 $(PLUGIN_OGG) 28 $(PLUGIN_OGG) \
29 $(PLUGIN_MIME)
24 30
25if HAVE_ZZUF 31if HAVE_ZZUF
26 fuzz_tests=fuzz_default.sh 32 fuzz_tests=fuzz_default.sh
27endif 33endif
28 34
29check_PROGRAMS = \ 35check_PROGRAMS = \
30 $(TEST_OGG) 36 $(TEST_OGG) \
37 $(TEST_MIME)
31 38
32TESTS = \ 39TESTS = \
33 $(fuzz_tests) \ 40 $(fuzz_tests) \
@@ -58,4 +65,19 @@ test_ogg_LDADD = \
58 $(top_builddir)/src/plugins/libtest.la 65 $(top_builddir)/src/plugins/libtest.la
59 66
60 67
68libextractor_mime_la_SOURCES = \
69 mime_extractor.c
70libextractor_mime_la_LDFLAGS = \
71 $(PLUGINFLAGS)
72libextractor_mime_la_LIBADD = \
73 $(top_builddir)/src/main/libextractor.la \
74 $(top_builddir)/src/common/libextractor_common.la \
75 -lmagic
76
77test_mime_SOURCES = \
78 test_mime.c
79test_mime_LDADD = \
80 $(top_builddir)/src/plugins/libtest.la
81
82
61 83
diff --git a/src/plugins/mime_extractor.c b/src/plugins/mime_extractor.c
index 1413862..26f4d49 100644
--- a/src/plugins/mime_extractor.c
+++ b/src/plugins/mime_extractor.c
@@ -1,6 +1,6 @@
1/* 1/*
2 This file is part of libextractor. 2 This file is part of libextractor.
3 (C) 2002, 2003, 2006 Vidyut Samanta and Christian Grothoff 3 (C) 2012 Vidyut Samanta and Christian Grothoff
4 4
5 libextractor is free software; you can redistribute it and/or modify 5 libextractor is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published 6 it under the terms of the GNU General Public License as published
@@ -20,301 +20,67 @@
20 20
21#include "platform.h" 21#include "platform.h"
22#include "extractor.h" 22#include "extractor.h"
23#include <magic.h>
23 24
24 25
25/** 26/**
26 * Detect a file-type. 27 * Global handle to MAGIC data.
27 * @param data the contents of the file 28 */
28 * @param len the length of the file 29static magic_t magic;
29 * @param arg closure...
30 * @return 0 if the file does not match, 1 if it does
31 **/
32typedef int (*Detector) (const char *data, size_t len, void *arg);
33
34/**
35 * Detect a file-type.
36 * @param data the contents of the file
37 * @param len the length of the file
38 * @return always 1
39 **/
40static int
41defaultDetector (const char *data, size_t len, void *arg)
42{
43 return 1;
44}
45
46/**
47 * Detect a file-type.
48 * @param data the contents of the file
49 * @param len the length of the file
50 * @return always 0
51 **/
52static int
53disableDetector (const char *data, size_t len, void *arg)
54{
55 return 0;
56}
57
58typedef struct ExtraPattern
59{
60 int pos;
61 int len;
62 const char *pattern;
63} ExtraPattern;
64
65/**
66 * Define special matching rules for complicated formats...
67 **/
68static ExtraPattern xpatterns[] = {
69#define AVI_XPATTERN 0
70 {8, 4, "AVI "},
71 {0, 0, NULL},
72#define WAVE_XPATTERN 2
73 {8, 4, "WAVE"},
74 {0, 0, NULL},
75#define ACE_XPATTERN 4
76 {4, 10, "\x00\x00\x90**ACE**"},
77 {0, 0, NULL},
78#define TAR_XPATTERN 6
79 {257, 6, "ustar\x00"},
80 {0, 0, NULL},
81#define GTAR_XPATTERN 8
82 {257, 8, "ustar\040\040\0"},
83 {0, 0, NULL},
84#define RMID_XPATTERN 10
85 {8, 4, "RMID"},
86 {0, 0, NULL},
87#define ACON_XPATTERN 12
88 {8, 4, "ACON"},
89 {0, 0, NULL},
90#define CR2_PATTERN 14
91 {8, 3, "CR\x02"},
92 {0, 0, NULL},
93};
94 30
31
95/** 32/**
96 * Detect AVI. A pattern matches if all XPatterns until the next {0, 33 * Main entry method for the 'application/ogg' extraction plugin.
97 * 0, NULL} slot match. OR-ing patterns can be achieved using multiple 34 *
98 * entries in the main table, so this "AND" (all match) semantics are 35 * @param ec extraction context provided to the plugin
99 * the only reasonable answer. 36 */
100 **/ 37void
101static int 38EXTRACTOR_mime_extract_method (struct EXTRACTOR_ExtractContext *ec)
102xPatternMatcher (const char *data, size_t len, void *cls)
103{ 39{
104 ExtraPattern *arg = cls; 40 void *buf;
105 41 ssize_t ret;
106 while (arg->pattern != NULL) 42 const char *mime;
43
44 ret = ec->read (ec->cls,
45 &buf,
46 16 * 1024);
47 if (-1 == ret)
48 return;
49 mime = magic_buffer (magic, buf, ret);
50 if (NULL == mime)
107 { 51 {
108 if (arg->pos + arg->len > len) 52 magic_close (magic);
109 return 0; 53 return;
110 if (0 != memcmp (&data[arg->pos], arg->pattern, arg->len))
111 return 0;
112 arg++;
113 } 54 }
114 return 1; 55 ec->proc (ec->cls,
56 "mime",
57 EXTRACTOR_METATYPE_MIMETYPE,
58 EXTRACTOR_METAFORMAT_UTF8,
59 "text/plain",
60 mime,
61 strlen (mime) + 1);
115} 62}
116 63
64
117/** 65/**
118 * Detect SVG 66 * Constructor for the library. Loads the magic file.
119 */ 67 */
120static int 68void __attribute__ ((constructor))
121svgMatcher (const char *data, size_t len, void *cls) 69mime_ltdl_init ()
122{ 70{
123 enum 71 magic = magic_open (MAGIC_MIME_TYPE);
124 { XMLSTART, XMLCLOSE, SVGSTART } state; 72 magic_load (magic, "/usr/share/misc/magic");
125 size_t i;
126
127 i = 0;
128 state = XMLSTART;
129
130 while (i < len)
131 {
132 if (!isprint ( (unsigned char) data[i]))
133 return 0;
134 switch (state)
135 {
136 case XMLSTART:
137 if (i + 6 >= len)
138 return 0;
139 else if (memcmp (data + i, "<?xml", 5) == 0
140 && isspace ( (unsigned char) *(data + i + 5)))
141 state = XMLCLOSE;
142 break;
143 case XMLCLOSE:
144 if (i + 2 >= len)
145 return 0;
146 else if (memcmp (data + i, "?>", 2) == 0)
147 state = SVGSTART;
148 break;
149 case SVGSTART:
150 if (i + 5 >= len)
151 return 0;
152 else if (memcmp (data + i, "<svg", 4) == 0
153 && isspace ( (unsigned char) *(data + i + 4)))
154 return 1;
155 break;
156 default:
157 /* do nothing */
158 break;
159 }
160 i++;
161 }
162 return 0;
163} 73}
164 74
165/**
166 * Use this detector, if the simple header-prefix matching is
167 * sufficient.
168 **/
169#define DEFAULT &defaultDetector, NULL
170
171/**
172 * Use this detector, to disable the mime-type (effectively comment it
173 * out).
174 **/
175#define DISABLED &disableDetector, NULL
176 75
177/** 76/**
178 * Select an entry in xpatterns for matching 77 * Destructor for the library, cleans up.
179 **/ 78 */
180#define XPATTERN(a) &xPatternMatcher, &xpatterns[(a)] 79void __attribute__ ((destructor))
181 80mime_ltdl_fini ()
182typedef struct Pattern
183{
184 const char *pattern;
185 int size;
186 const char *mimetype;
187 Detector detector;
188 void *arg;
189} Pattern;
190
191static Pattern patterns[] = {
192 {"\xFF\xD8", 2, "image/jpeg", DEFAULT},
193 {"\211PNG\r\n\032\n", 8, "image/png", DEFAULT},
194 {"/* XPM */", 9, "image/x-xpm", DEFAULT},
195 {"GIF8", 4, "image/gif", DEFAULT},
196 {"P1", 2, "image/x-portable-bitmap", DEFAULT},
197 {"P2", 2, "image/x-portable-graymap", DEFAULT},
198 {"P3", 2, "image/x-portable-pixmap", DEFAULT},
199 {"P4", 2, "image/x-portable-bitmap", DEFAULT},
200 {"P5", 2, "image/x-portable-graymap", DEFAULT},
201 {"P6", 2, "image/x-portable-pixmap", DEFAULT},
202 {"P7", 2, "image/x-portable-anymap", DEFAULT},
203 {"BM", 2, "image/x-bmp", DEFAULT},
204 {"fLaC", 4, "audio/flac", DEFAULT},
205 {"\x89PNG", 4, "image/x-png", DEFAULT},
206 {"id=ImageMagick", 14, "application/x-imagemagick-image", DEFAULT},
207 {"hsi1", 4, "image/x-jpeg-proprietary", DEFAULT},
208 {"FLV", 3, "video/x-flv", DEFAULT},
209 {"FWS", 3, "application/x-shockwave-flash", DEFAULT},
210 {"CWS", 3, "application/x-shockwave-flash", DEFAULT},
211 {"\x2E\x52\x4d\x46", 4, "video/real", DEFAULT},
212 {"\x2e\x72\x61\xfd", 4, "audio/real", DEFAULT},
213 {"\x00\x05\x16\x00", 4, "application/applefile", DEFAULT},
214 {"\x00\x05\x16\x07", 4, "application/applefile", DEFAULT},
215 {"\177ELF", 4, "application/x-executable", DEFAULT},
216 /* FIXME: correct MIME-type for an ELF!? */
217 {"\xca\xfe\xba\xbe", 4, "application/java", DEFAULT},
218 /* FIXME: correct MIME for a class-file? */
219 {"gimp xcf", 8, "image/xcf", DEFAULT},
220 {"II\x2a\x00\x10", 5, "image/x-canon-cr2", XPATTERN (CR2_PATTERN)},
221 {"IIN1", 4, "image/tiff", DEFAULT},
222 {"MM\x00\x2a", 4, "image/tiff", DEFAULT}, /* big-endian */
223 {"II\x2a\x00", 4, "image/tiff", DEFAULT}, /* little-endian */
224 {"%PDF", 4, "application/pdf", DEFAULT},
225 {"%!PS-Adobe-", 11, "application/postscript", DEFAULT},
226 {"\004%!PS-Adobe-", 12, "application/postscript", DEFAULT},
227 {"RIFF", 4, "video/x-msvideo", XPATTERN (AVI_XPATTERN)},
228 {"RIFF", 4, "audio/x-wav", XPATTERN (WAVE_XPATTERN)},
229 {"RIFX", 4, "video/x-msvideo", XPATTERN (AVI_XPATTERN)},
230 {"RIFX", 4, "audio/x-wav", XPATTERN (WAVE_XPATTERN)},
231 {"RIFF", 4, "audio/midi", XPATTERN (RMID_XPATTERN)},
232 {"RIFX", 4, "audio/midi", XPATTERN (RMID_XPATTERN)},
233 {"RIFF", 4, "image/x-animated-cursor", XPATTERN (ACON_XPATTERN)},
234 {"RIFX", 4, "image/x-animated-cursor", XPATTERN (ACON_XPATTERN)},
235 {"\211GND\r\n\032\n", 8, "application/gnunet-directory", DEFAULT},
236 {"{\\rtf", 5, "application/rtf", DEFAULT},
237 {"\xf7\x02", 2, "application/x-dvi", DEFAULT},
238 {"\x1F\x8B\x08\x00", 4, "application/x-gzip", DEFAULT},
239 {"BZh91AY&SY", 10, "application/bz2", DEFAULT},
240 {"\xED\xAB\xEE\xDB", 4, "application/x-rpm", DEFAULT}, /* binary */
241 {"!<arch>\ndebian", 14, "application/x-dpkg", DEFAULT}, /* .deb */
242 {"PK\x03\x04", 4, "application/x-zip", DEFAULT},
243 {"\xea\x60", 2, "application/x-arj", DEFAULT},
244 {"\037\235", 2, "application/x-compress", DEFAULT},
245 {"Rar!", 4, "application/x-rar", DEFAULT},
246 {"", 0, "application/x-ace", XPATTERN (ACE_XPATTERN)},
247 {"", 0, "application/x-tar", XPATTERN (TAR_XPATTERN)},
248 {"", 0, "application/x-gtar", XPATTERN (GTAR_XPATTERN)},
249 {"-lh0-", 5, "application/x-lha", DEFAULT},
250 {"-lh1-", 5, "application/x-lha", DEFAULT},
251 {"-lh2-", 5, "application/x-lha", DEFAULT},
252 {"-lh3-", 5, "application/x-lha", DEFAULT},
253 {"-lh4-", 5, "application/x-lha", DEFAULT},
254 {"-lh5-", 5, "application/x-lha", DEFAULT},
255 {"-lh6-", 5, "application/x-lha", DEFAULT},
256 {"-lh7-", 5, "application/x-lha", DEFAULT},
257 {"-lhd-", 5, "application/x-lha", DEFAULT},
258 {"-lh\40-", 5, "application/x-lha", DEFAULT},
259 {"-lz4-", 5, "application/x-lha", DEFAULT},
260 {"-lz5-", 5, "application/x-lha", DEFAULT},
261 {"-lzs-", 5, "application/x-lha", DEFAULT},
262 {"\xFD\x76", 2, "application/x-lzh", DEFAULT},
263 {"\x00\x00\x01\xb3", 4, "video/mpeg", DEFAULT},
264 {"\x00\x00\x01\xba", 4, "video/mpeg", DEFAULT},
265 {"moov", 4, "video/quicktime", DEFAULT},
266 {"mdat", 4, "video/quicktime", DEFAULT},
267 {"\x8aMNG", 4, "video/x-mng", DEFAULT},
268 {"\x30\x26\xb2\x75\x8e\x66", 6, "video/x-ms-asf", DEFAULT}, /* same as .wmv ? */
269 {"FWS", 3, "application/x-shockwave-flash", DEFAULT},
270 {"MThd", 4, "audio/midi", DEFAULT},
271 {"ID3", 3, "audio/mpeg", DEFAULT},
272 {"\xFF\xFA", 2, "audio/mpeg", DEFAULT},
273 {"\xFF\xFB", 2, "audio/mpeg", DEFAULT},
274 {"\xFF\xFC", 2, "audio/mpeg", DEFAULT},
275 {"\xFF\xFD", 2, "audio/mpeg", DEFAULT},
276 {"\xFF\xFE", 2, "audio/mpeg", DEFAULT},
277 {"\xFF\xFF", 2, "audio/mpeg", DEFAULT},
278 {"OggS", 4, "application/ogg", DEFAULT},
279 {"#!/bin/sh", 9, "application/x-shellscript", DEFAULT},
280 {"#!/bin/bash", 11, "application/x-shellscript", DEFAULT},
281 {"#!/bin/csh", 10, "application/x-shellscript", DEFAULT},
282 {"#!/bin/tcsh", 11, "application/x-shellscript", DEFAULT},
283 {"#!/bin/perl", 11, "application/x-perl", DEFAULT},
284 {"<?xml", 5, "image/svg+xml", svgMatcher, NULL},
285 {NULL, 0, NULL, DISABLED}
286};
287
288
289int
290EXTRACTOR_mime_extract (const char *data,
291 size_t size,
292 EXTRACTOR_MetaDataProcessor proc,
293 void *proc_cls,
294 const char *options)
295{ 81{
296 int i; 82 magic_close (magic);
297 83 magic = NULL;
298 i = 0;
299 while (patterns[i].pattern != NULL)
300 {
301 if (size < patterns[i].size)
302 {
303 i++;
304 continue;
305 }
306 if (0 == memcmp (patterns[i].pattern, data, patterns[i].size))
307 {
308 if (patterns[i].detector (data, size, patterns[i].arg))
309 return proc (proc_cls,
310 "mime",
311 EXTRACTOR_METATYPE_MIMETYPE,
312 EXTRACTOR_METAFORMAT_UTF8,
313 "text/plain",
314 patterns[i].mimetype,
315 strlen(patterns[i].mimetype)+1);
316 }
317 i++;
318 }
319 return 0;
320} 84}
85
86/* end of mime_extractor.c */
diff --git a/src/plugins/test_mime.c b/src/plugins/test_mime.c
new file mode 100644
index 0000000..d3aa786
--- /dev/null
+++ b/src/plugins/test_mime.c
@@ -0,0 +1,61 @@
1/*
2 This file is part of libextractor.
3 (C) 2012 Vidyut Samanta and Christian Grothoff
4
5 libextractor is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 3, or (at your
8 option) any later version.
9
10 libextractor is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with libextractor; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
19*/
20/**
21 * @file plugins/test_mime.c
22 * @brief testcase for ogg plugin
23 * @author Christian Grothoff
24 */
25#include "platform.h"
26#include "test_lib.h"
27
28
29
30/**
31 * Main function for the MIME testcase.
32 *
33 * @param argc number of arguments (ignored)
34 * @param argv arguments (ignored)
35 * @return 0 on success
36 */
37int
38main (int argc, char *argv[])
39{
40 struct SolutionData courseclear_sol[] =
41 {
42 {
43 EXTRACTOR_METATYPE_MIMETYPE,
44 EXTRACTOR_METAFORMAT_UTF8,
45 "text/plain",
46 "application/ogg",
47 strlen ("application/ogg") + 1,
48 0
49 },
50 { 0, 0, NULL, NULL, 0, -1 }
51 };
52 struct ProblemSet ps[] =
53 {
54 { "testdata/ogg_courseclear.ogg",
55 courseclear_sol },
56 { NULL, NULL }
57 };
58 return ET_main ("mime", ps);
59}
60
61/* end of test_mime.c */