aboutsummaryrefslogtreecommitdiff
path: root/src/plugins
diff options
context:
space:
mode:
Diffstat (limited to 'src/plugins')
-rw-r--r--src/plugins/Makefile.am6
-rw-r--r--src/plugins/ole2_extractor.c37
-rw-r--r--src/plugins/test_ole2.c490
-rw-r--r--src/plugins/testdata/ole2_blair.docbin0 -> 65024 bytes
-rw-r--r--src/plugins/testdata/ole2_excel.xlsbin0 -> 128000 bytes
-rw-r--r--src/plugins/testdata/ole2_msword.docbin0 -> 8192 bytes
-rw-r--r--src/plugins/testdata/ole2_starwriter40.sdwbin0 -> 14848 bytes
7 files changed, 517 insertions, 16 deletions
diff --git a/src/plugins/Makefile.am b/src/plugins/Makefile.am
index 6741861..9cc8f90 100644
--- a/src/plugins/Makefile.am
+++ b/src/plugins/Makefile.am
@@ -25,7 +25,11 @@ EXTRA_DIST = template_extractor.c \
25 testdata/wav_noise.wav \ 25 testdata/wav_noise.wav \
26 testdata/wav_alert.wav \ 26 testdata/wav_alert.wav \
27 testdata/it_dawn.it \ 27 testdata/it_dawn.it \
28 testdata/s3m_2nd_pm.s3m 28 testdata/s3m_2nd_pm.s3m \
29 testdata/ole2_msword.doc \
30 testdata/ole2_starwriter40.sdw \
31 testdata/ole2_blair.doc \
32 testdata/ole2_excel.xls
29 33
30if HAVE_VORBISFILE 34if HAVE_VORBISFILE
31PLUGIN_OGG=libextractor_ogg.la 35PLUGIN_OGG=libextractor_ogg.la
diff --git a/src/plugins/ole2_extractor.c b/src/plugins/ole2_extractor.c
index ccfc3cb..4b14256 100644
--- a/src/plugins/ole2_extractor.c
+++ b/src/plugins/ole2_extractor.c
@@ -21,8 +21,7 @@
21 -- the Gnome Structured File Library 21 -- the Gnome Structured File Library
22 Copyright (C) 2002-2004 Jody Goldberg (jody@gnome.org) 22 Copyright (C) 2002-2004 Jody Goldberg (jody@gnome.org)
23 23
24 Part of this code was borrowed from wordleaker.cpp. See also 24 Part of this code was adapted from wordleaker.
25 the README file in this directory.
26*/ 25*/
27/** 26/**
28 * @file plugins/ole2_extractor.c 27 * @file plugins/ole2_extractor.c
@@ -67,10 +66,13 @@
67 */ 66 */
68static int 67static int
69add_metadata (EXTRACTOR_MetaDataProcessor proc, 68add_metadata (EXTRACTOR_MetaDataProcessor proc,
70 void *proc_cls, 69 void *proc_cls,
71 const char *phrase, 70 const char *phrase,
72 enum EXTRACTOR_MetaType type) 71 enum EXTRACTOR_MetaType type)
73{ 72{
73 char *tmp;
74 int ret;
75
74 if (0 == strlen (phrase)) 76 if (0 == strlen (phrase))
75 return 0; 77 return 0;
76 if (0 == strcmp (phrase, "\"\"")) 78 if (0 == strcmp (phrase, "\"\""))
@@ -79,13 +81,21 @@ add_metadata (EXTRACTOR_MetaDataProcessor proc,
79 return 0; 81 return 0;
80 if (0 == strcmp (phrase, " ")) 82 if (0 == strcmp (phrase, " "))
81 return 0; 83 return 0;
82 return proc (proc_cls, 84 if (NULL == (tmp = strdup (phrase)))
83 "ole2", 85 return 0;
84 type, 86
85 EXTRACTOR_METAFORMAT_UTF8, 87 while ( (strlen (tmp) > 0) &&
86 "text/plain", 88 (isblank ((unsigned char) tmp [strlen (tmp) - 1])) )
87 phrase, 89 tmp [strlen (tmp) - 1] = '\0';
88 strlen (phrase) +1); 90 ret = proc (proc_cls,
91 "ole2",
92 type,
93 EXTRACTOR_METAFORMAT_UTF8,
94 "text/plain",
95 tmp,
96 strlen (tmp) + 1);
97 free (tmp);
98 return ret;
89} 99}
90 100
91 101
@@ -212,9 +222,6 @@ process_metadata (gpointer key,
212 } 222 }
213 if (NULL == contents) 223 if (NULL == contents)
214 return; 224 return;
215 if ( (strlen (contents) > 0) &&
216 ('\n' == contents[strlen (contents) - 1]) )
217 contents [strlen (contents) - 1] = '\0';
218 if (0 == strcmp (type, "meta:generator")) 225 if (0 == strcmp (type, "meta:generator"))
219 { 226 {
220 const char *mimetype = "application/vnd.ms-files"; 227 const char *mimetype = "application/vnd.ms-files";
diff --git a/src/plugins/test_ole2.c b/src/plugins/test_ole2.c
new file mode 100644
index 0000000..dd4bfa3
--- /dev/null
+++ b/src/plugins/test_ole2.c
@@ -0,0 +1,490 @@
1/*
2 This file is part of libextractor.
3 (C) 2012 Vidyut Samanta and Christian Grothoff
4
5 libextractor is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 3, or (at your
8 option) any later version.
9
10 libextractor is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with libextractor; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
19*/
20/**
21 * @file plugins/test_ole2.c
22 * @brief testcase for ole2 plugin
23 * @author Christian Grothoff
24 */
25#include "platform.h"
26#include "test_lib.h"
27
28
29/**
30 * Main function for the OLE2 testcase.
31 *
32 * @param argc number of arguments (ignored)
33 * @param argv arguments (ignored)
34 * @return 0 on success
35 */
36int
37main (int argc, char *argv[])
38{
39 struct SolutionData ole2_msword_sol[] =
40 {
41 {
42 EXTRACTOR_METATYPE_CREATOR,
43 EXTRACTOR_METAFORMAT_UTF8,
44 "text/plain",
45 "Nils Durner",
46 strlen ("Nils Durner") + 1,
47 0
48 },
49 {
50 EXTRACTOR_METATYPE_UNKNOWN_DATE,
51 EXTRACTOR_METAFORMAT_UTF8,
52 "text/plain",
53 "2005-03-21T06:11:12Z",
54 strlen ("2005-03-21T06:11:12Z") + 1,
55 0
56 },
57 {
58 EXTRACTOR_METATYPE_DESCRIPTION,
59 EXTRACTOR_METAFORMAT_UTF8,
60 "text/plain",
61 "This is a small document to test meta data extraction by GNU libextractor.",
62 strlen ("This is a small document to test meta data extraction by GNU libextractor.") + 1,
63 0
64 },
65 {
66 EXTRACTOR_METATYPE_KEYWORDS,
67 EXTRACTOR_METAFORMAT_UTF8,
68 "text/plain",
69 "ole ole2 eole2extractor",
70 strlen ("ole ole2 eole2extractor") + 1,
71 0
72 },
73 {
74 EXTRACTOR_METATYPE_SUBJECT,
75 EXTRACTOR_METAFORMAT_UTF8,
76 "text/plain",
77 "GNU libextractor",
78 strlen ("GNU libextractor") + 1,
79 0
80 },
81 {
82 EXTRACTOR_METATYPE_TITLE,
83 EXTRACTOR_METAFORMAT_UTF8,
84 "text/plain",
85 "Testcase for the ole2 extractor",
86 strlen ("Testcase for the ole2 extractor") + 1,
87 0
88 },
89 {
90 EXTRACTOR_METATYPE_LAST_SAVED_BY,
91 EXTRACTOR_METAFORMAT_UTF8,
92 "text/plain",
93 "Nils Durner",
94 strlen ("Nils Durner") + 1,
95 0
96 },
97 {
98 EXTRACTOR_METATYPE_CREATION_DATE,
99 EXTRACTOR_METAFORMAT_UTF8,
100 "text/plain",
101 "2005-03-21T06:10:19Z",
102 strlen ("2005-03-21T06:10:19Z") + 1,
103 0
104 },
105 {
106 EXTRACTOR_METATYPE_EDITING_CYCLES,
107 EXTRACTOR_METAFORMAT_UTF8,
108 "text/plain",
109 "2",
110 strlen ("2") + 1,
111 0
112 },
113 { 0, 0, NULL, NULL, 0, -1 }
114 };
115
116 struct SolutionData ole2_starwriter_sol[] =
117 {
118 {
119 EXTRACTOR_METATYPE_CREATOR,
120 EXTRACTOR_METAFORMAT_UTF8,
121 "text/plain",
122 "Christian Grothoff",
123 strlen ("Christian Grothoff") + 1,
124 0
125 },
126 {
127 EXTRACTOR_METATYPE_UNKNOWN_DATE,
128 EXTRACTOR_METAFORMAT_UTF8,
129 "text/plain",
130 "2004-09-24T02:54:31Z",
131 strlen ("2004-09-24T02:54:31Z") + 1,
132 0
133 },
134 {
135 EXTRACTOR_METATYPE_DESCRIPTION,
136 EXTRACTOR_METAFORMAT_UTF8,
137 "text/plain",
138 "The comments",
139 strlen ("The comments") + 1,
140 0
141 },
142 {
143 EXTRACTOR_METATYPE_KEYWORDS,
144 EXTRACTOR_METAFORMAT_UTF8,
145 "text/plain",
146 "The Keywords",
147 strlen ("The Keywords") + 1,
148 0
149 },
150 {
151 EXTRACTOR_METATYPE_SUBJECT,
152 EXTRACTOR_METAFORMAT_UTF8,
153 "text/plain",
154 "The Subject",
155 strlen ("The Subject") + 1,
156 0
157 },
158 {
159 EXTRACTOR_METATYPE_TITLE,
160 EXTRACTOR_METAFORMAT_UTF8,
161 "text/plain",
162 "The Title",
163 strlen ("The Title") + 1,
164 0
165 },
166 {
167 EXTRACTOR_METATYPE_LAST_SAVED_BY,
168 EXTRACTOR_METAFORMAT_UTF8,
169 "text/plain",
170 "Christian Grothoff",
171 strlen ("Christian Grothoff") + 1,
172 0
173 },
174 {
175 EXTRACTOR_METATYPE_CREATION_DATE,
176 EXTRACTOR_METAFORMAT_UTF8,
177 "text/plain",
178 "2004-09-24T02:53:15Z",
179 strlen ("2004-09-24T02:53:15Z") + 1,
180 0
181 },
182 {
183 EXTRACTOR_METATYPE_EDITING_CYCLES,
184 EXTRACTOR_METAFORMAT_UTF8,
185 "text/plain",
186 "4",
187 strlen ("4") + 1,
188 0
189 },
190 {
191 EXTRACTOR_METATYPE_TITLE,
192 EXTRACTOR_METAFORMAT_UTF8,
193 "text/plain",
194 "The Title",
195 strlen ("The Title") + 1,
196 0
197 },
198 {
199 EXTRACTOR_METATYPE_SUBJECT,
200 EXTRACTOR_METAFORMAT_UTF8,
201 "text/plain",
202 "The Subject",
203 strlen ("The Subject") + 1,
204 0
205 },
206 {
207 EXTRACTOR_METATYPE_COMMENT,
208 EXTRACTOR_METAFORMAT_UTF8,
209 "text/plain",
210 "The comments",
211 strlen ("The comments") + 1,
212 0
213 },
214 {
215 EXTRACTOR_METATYPE_KEYWORDS,
216 EXTRACTOR_METAFORMAT_UTF8,
217 "text/plain",
218 "The Keywords",
219 strlen ("The Keywords") + 1,
220 0
221 },
222 { 0, 0, NULL, NULL, 0, -1 }
223 };
224
225 struct SolutionData ole2_blair_sol[] =
226 {
227 {
228 EXTRACTOR_METATYPE_LANGUAGE,
229 EXTRACTOR_METAFORMAT_UTF8,
230 "text/plain",
231 "U.S. English",
232 strlen ("U.S. English") + 1,
233 0
234 },
235 {
236 EXTRACTOR_METATYPE_CREATOR,
237 EXTRACTOR_METAFORMAT_UTF8,
238 "text/plain",
239 "default",
240 strlen ("default") + 1,
241 0
242 },
243 {
244 EXTRACTOR_METATYPE_UNKNOWN_DATE,
245 EXTRACTOR_METAFORMAT_UTF8,
246 "text/plain",
247 "2003-02-03T11:18:00Z",
248 strlen ("2003-02-03T11:18:00Z") + 1,
249 0
250 },
251 {
252 EXTRACTOR_METATYPE_TITLE,
253 EXTRACTOR_METAFORMAT_UTF8,
254 "text/plain",
255 "Iraq- ITS INFRASTRUCTURE OF CONCEALMENT, DECEPTION AND INTIMIDATION",
256 strlen ("Iraq- ITS INFRASTRUCTURE OF CONCEALMENT, DECEPTION AND INTIMIDATION") + 1,
257 0
258 },
259 {
260 EXTRACTOR_METATYPE_CHARACTER_COUNT,
261 EXTRACTOR_METAFORMAT_UTF8,
262 "text/plain",
263 "22090",
264 strlen ("22090") + 1,
265 0
266 },
267 {
268 EXTRACTOR_METATYPE_LAST_SAVED_BY,
269 EXTRACTOR_METAFORMAT_UTF8,
270 "text/plain",
271 "MKhan",
272 strlen ("MKhan") + 1,
273 0
274 },
275 {
276 EXTRACTOR_METATYPE_PAGE_COUNT,
277 EXTRACTOR_METAFORMAT_UTF8,
278 "text/plain",
279 "1",
280 strlen ("1") + 1,
281 0
282 },
283 {
284 EXTRACTOR_METATYPE_WORD_COUNT,
285 EXTRACTOR_METAFORMAT_UTF8,
286 "text/plain",
287 "3875",
288 strlen ("3875") + 1,
289 0
290 },
291 {
292 EXTRACTOR_METATYPE_CREATION_DATE,
293 EXTRACTOR_METAFORMAT_UTF8,
294 "text/plain",
295 "2003-02-03T09:31:00Z",
296 strlen ("2003-02-03T09:31:00Z") + 1,
297 0
298 },
299 {
300 EXTRACTOR_METATYPE_EDITING_CYCLES,
301 EXTRACTOR_METAFORMAT_UTF8,
302 "text/plain",
303 "4",
304 strlen ("4") + 1,
305 0
306 },
307 {
308 EXTRACTOR_METATYPE_MIMETYPE,
309 EXTRACTOR_METAFORMAT_UTF8,
310 "text/plain",
311 "application/vnd.ms-files",
312 strlen ("application/vnd.ms-files") + 1,
313 0
314 },
315 {
316 EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE,
317 EXTRACTOR_METAFORMAT_UTF8,
318 "text/plain",
319 "Microsoft Word 8.0",
320 strlen ("Microsoft Word 8.0") + 1,
321 0
322 },
323 {
324 EXTRACTOR_METATYPE_TEMPLATE,
325 EXTRACTOR_METAFORMAT_UTF8,
326 "text/plain",
327 "Normal.dot",
328 strlen ("Normal.dot") + 1,
329 0
330 },
331 {
332 EXTRACTOR_METATYPE_LINE_COUNT,
333 EXTRACTOR_METAFORMAT_UTF8,
334 "text/plain",
335 "184",
336 strlen ("184") + 1,
337 0
338 },
339 {
340 EXTRACTOR_METATYPE_PARAGRAPH_COUNT,
341 EXTRACTOR_METAFORMAT_UTF8,
342 "text/plain",
343 "44",
344 strlen ("44") + 1,
345 0
346 },
347 {
348 EXTRACTOR_METATYPE_REVISION_HISTORY,
349 EXTRACTOR_METAFORMAT_UTF8,
350 "text/plain",
351 "Revision #0: Author `cic22' worked on `C:\\DOCUME~1\\phamill\\LOCALS~1\\Temp\\AutoRecovery save of Iraq - security.asd'",
352 strlen ("Revision #0: Author `cic22' worked on `C:\\DOCUME~1\\phamill\\LOCALS~1\\Temp\\AutoRecovery save of Iraq - security.asd'") + 1,
353 0
354 },
355 {
356 EXTRACTOR_METATYPE_REVISION_HISTORY,
357 EXTRACTOR_METAFORMAT_UTF8,
358 "text/plain",
359 "Revision #1: Author `cic22' worked on `C:\\DOCUME~1\\phamill\\LOCALS~1\\Temp\\AutoRecovery save of Iraq - security.asd'",
360 strlen ("Revision #1: Author `cic22' worked on `C:\\DOCUME~1\\phamill\\LOCALS~1\\Temp\\AutoRecovery save of Iraq - security.asd'") + 1,
361 0
362 },
363 {
364 EXTRACTOR_METATYPE_REVISION_HISTORY,
365 EXTRACTOR_METAFORMAT_UTF8,
366 "text/plain",
367 "Revision #2: Author `cic22' worked on `C:\\DOCUME~1\\phamill\\LOCALS~1\\Temp\\AutoRecovery save of Iraq - security.asd'",
368 strlen ("Revision #2: Author `cic22' worked on `C:\\DOCUME~1\\phamill\\LOCALS~1\\Temp\\AutoRecovery save of Iraq - security.asd'") + 1,
369 0
370 },
371 {
372 EXTRACTOR_METATYPE_REVISION_HISTORY,
373 EXTRACTOR_METAFORMAT_UTF8,
374 "text/plain",
375 "Revision #3: Author `JPratt' worked on `C:\\TEMP\\Iraq - security.doc'",
376 strlen ("Revision #3: Author `JPratt' worked on `C:\\TEMP\\Iraq - security.doc'") + 1,
377 0
378 },
379 {
380 EXTRACTOR_METATYPE_REVISION_HISTORY,
381 EXTRACTOR_METAFORMAT_UTF8,
382 "text/plain",
383 "Revision #4: Author `JPratt' worked on `A:\\Iraq - security.doc'",
384 strlen ("Revision #4: Author `JPratt' worked on `A:\\Iraq - security.doc'") + 1,
385 0
386 },
387 {
388 EXTRACTOR_METATYPE_REVISION_HISTORY,
389 EXTRACTOR_METAFORMAT_UTF8,
390 "text/plain",
391 "Revision #5: Author `ablackshaw' worked on `C:\\ABlackshaw\\Iraq - security.doc'",
392 strlen ("Revision #5: Author `ablackshaw' worked on `C:\\ABlackshaw\\Iraq - security.doc'") + 1,
393 0
394 },
395 {
396 EXTRACTOR_METATYPE_REVISION_HISTORY,
397 EXTRACTOR_METAFORMAT_UTF8,
398 "text/plain",
399 "Revision #6: Author `ablackshaw' worked on `C:\\ABlackshaw\\A;Iraq - security.doc'",
400 strlen ("Revision #6: Author `ablackshaw' worked on `C:\\ABlackshaw\\A;Iraq - security.doc'") + 1,
401 0
402 },
403 {
404 EXTRACTOR_METATYPE_REVISION_HISTORY,
405 EXTRACTOR_METAFORMAT_UTF8,
406 "text/plain",
407 "Revision #7: Author `ablackshaw' worked on `A:\\Iraq - security.doc'",
408 strlen ("Revision #7: Author `ablackshaw' worked on `A:\\Iraq - security.doc'") + 1,
409 0
410 },
411 {
412 EXTRACTOR_METATYPE_REVISION_HISTORY,
413 EXTRACTOR_METAFORMAT_UTF8,
414 "text/plain",
415 "Revision #8: Author `MKhan' worked on `C:\\TEMP\\Iraq - security.doc'",
416 strlen ("Revision #8: Author `MKhan' worked on `C:\\TEMP\\Iraq - security.doc'") + 1,
417 0
418 },
419 {
420 EXTRACTOR_METATYPE_REVISION_HISTORY,
421 EXTRACTOR_METAFORMAT_UTF8,
422 "text/plain",
423 "Revision #9: Author `MKhan' worked on `C:\\WINNT\\Profiles\\mkhan\\Desktop\\Iraq.doc'",
424 strlen ("Revision #9: Author `MKhan' worked on `C:\\WINNT\\Profiles\\mkhan\\Desktop\\Iraq.doc'") + 1,
425 0
426 },
427 { 0, 0, NULL, NULL, 0, -1 }
428 };
429
430 struct SolutionData ole2_excel_sol[] =
431 {
432 {
433 EXTRACTOR_METATYPE_CREATOR,
434 EXTRACTOR_METAFORMAT_UTF8,
435 "text/plain",
436 "JV",
437 strlen ("JV") + 1,
438 0
439 },
440 {
441 EXTRACTOR_METATYPE_LAST_SAVED_BY,
442 EXTRACTOR_METAFORMAT_UTF8,
443 "text/plain",
444 "JV",
445 strlen ("JV") + 1,
446 0
447 },
448 {
449 EXTRACTOR_METATYPE_CREATION_DATE,
450 EXTRACTOR_METAFORMAT_UTF8,
451 "text/plain",
452 "2002-03-20T21:26:28Z",
453 strlen ("2002-03-20T21:26:28Z") + 1,
454 0
455 },
456 {
457 EXTRACTOR_METATYPE_MIMETYPE,
458 EXTRACTOR_METAFORMAT_UTF8,
459 "text/plain",
460 "application/vnd.ms-files",
461 strlen ("application/vnd.ms-files") + 1,
462 0
463 },
464 {
465 EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE,
466 EXTRACTOR_METAFORMAT_UTF8,
467 "text/plain",
468 "Microsoft Excel",
469 strlen ("Microsoft Excel") + 1,
470 0
471 },
472 { 0, 0, NULL, NULL, 0, -1 }
473 };
474
475 struct ProblemSet ps[] =
476 {
477 { "testdata/ole2_msword.doc",
478 ole2_msword_sol },
479 { "testdata/ole2_starwriter40.sdw",
480 ole2_starwriter_sol },
481 { "testdata/ole2_blair.doc",
482 ole2_blair_sol },
483 { "testdata/ole2_excel.xls",
484 ole2_excel_sol },
485 { NULL, NULL }
486 };
487 return ET_main ("ole2", ps);
488}
489
490/* end of test_ole2.c */
diff --git a/src/plugins/testdata/ole2_blair.doc b/src/plugins/testdata/ole2_blair.doc
new file mode 100644
index 0000000..c3ea6bc
--- /dev/null
+++ b/src/plugins/testdata/ole2_blair.doc
Binary files differ
diff --git a/src/plugins/testdata/ole2_excel.xls b/src/plugins/testdata/ole2_excel.xls
new file mode 100644
index 0000000..260153b
--- /dev/null
+++ b/src/plugins/testdata/ole2_excel.xls
Binary files differ
diff --git a/src/plugins/testdata/ole2_msword.doc b/src/plugins/testdata/ole2_msword.doc
new file mode 100644
index 0000000..91dbb0f
--- /dev/null
+++ b/src/plugins/testdata/ole2_msword.doc
Binary files differ
diff --git a/src/plugins/testdata/ole2_starwriter40.sdw b/src/plugins/testdata/ole2_starwriter40.sdw
new file mode 100644
index 0000000..410ad1a
--- /dev/null
+++ b/src/plugins/testdata/ole2_starwriter40.sdw
Binary files differ