test_ole2.c (12678B)
1 /* 2 This file is part of libextractor. 3 Copyright (C) 2012 Vidyut Samanta and Christian Grothoff 4 5 libextractor is free software; you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published 7 by the Free Software Foundation; either version 3, or (at your 8 option) any later version. 9 10 libextractor is distributed in the hope that it will be useful, but 11 WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 General Public License for more details. 14 15 You should have received a copy of the GNU General Public License 16 along with libextractor; see the file COPYING. If not, write to the 17 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 18 Boston, MA 02110-1301, USA. 19 */ 20 /** 21 * @file plugins/test_ole2.c 22 * @brief testcase for ole2 plugin 23 * @author Christian Grothoff 24 */ 25 #include "platform.h" 26 #include "test_lib.h" 27 28 29 /** 30 * Main function for the OLE2 testcase. 31 * 32 * @param argc number of arguments (ignored) 33 * @param argv arguments (ignored) 34 * @return 0 on success 35 */ 36 int 37 main (int argc, char *argv[]) 38 { 39 struct SolutionData ole2_msword_sol[] = { 40 { 41 EXTRACTOR_METATYPE_CREATOR, 42 EXTRACTOR_METAFORMAT_UTF8, 43 "text/plain", 44 "Nils Durner", 45 strlen ("Nils Durner") + 1, 46 0 47 }, 48 { 49 EXTRACTOR_METATYPE_UNKNOWN_DATE, 50 EXTRACTOR_METAFORMAT_UTF8, 51 "text/plain", 52 "2005-03-21T06:11:12Z", 53 strlen ("2005-03-21T06:11:12Z") + 1, 54 0 55 }, 56 { 57 EXTRACTOR_METATYPE_DESCRIPTION, 58 EXTRACTOR_METAFORMAT_UTF8, 59 "text/plain", 60 "This is a small document to test meta data extraction by GNU libextractor.", 61 strlen ( 62 "This is a small document to test meta data extraction by GNU libextractor.") 63 + 1, 64 0 65 }, 66 { 67 EXTRACTOR_METATYPE_KEYWORDS, 68 EXTRACTOR_METAFORMAT_UTF8, 69 "text/plain", 70 "ole ole2 eole2extractor", 71 strlen ("ole ole2 eole2extractor") + 1, 72 0 73 }, 74 { 75 EXTRACTOR_METATYPE_SUBJECT, 76 EXTRACTOR_METAFORMAT_UTF8, 77 "text/plain", 78 "GNU libextractor", 79 strlen ("GNU libextractor") + 1, 80 0 81 }, 82 { 83 EXTRACTOR_METATYPE_TITLE, 84 EXTRACTOR_METAFORMAT_UTF8, 85 "text/plain", 86 "Testcase for the ole2 extractor", 87 strlen ("Testcase for the ole2 extractor") + 1, 88 0 89 }, 90 { 91 EXTRACTOR_METATYPE_LAST_SAVED_BY, 92 EXTRACTOR_METAFORMAT_UTF8, 93 "text/plain", 94 "Nils Durner", 95 strlen ("Nils Durner") + 1, 96 0 97 }, 98 { 99 EXTRACTOR_METATYPE_CREATION_DATE, 100 EXTRACTOR_METAFORMAT_UTF8, 101 "text/plain", 102 "2005-03-21T06:10:19Z", 103 strlen ("2005-03-21T06:10:19Z") + 1, 104 0 105 }, 106 { 107 EXTRACTOR_METATYPE_EDITING_CYCLES, 108 EXTRACTOR_METAFORMAT_UTF8, 109 "text/plain", 110 "2", 111 strlen ("2") + 1, 112 0 113 }, 114 { 0, 0, NULL, NULL, 0, -1 } 115 }; 116 117 struct SolutionData ole2_starwriter_sol[] = { 118 { 119 EXTRACTOR_METATYPE_CREATOR, 120 EXTRACTOR_METAFORMAT_UTF8, 121 "text/plain", 122 "Christian Grothoff", 123 strlen ("Christian Grothoff") + 1, 124 0 125 }, 126 { 127 EXTRACTOR_METATYPE_UNKNOWN_DATE, 128 EXTRACTOR_METAFORMAT_UTF8, 129 "text/plain", 130 "2004-09-24T02:54:31Z", 131 strlen ("2004-09-24T02:54:31Z") + 1, 132 0 133 }, 134 { 135 EXTRACTOR_METATYPE_DESCRIPTION, 136 EXTRACTOR_METAFORMAT_UTF8, 137 "text/plain", 138 "The comments", 139 strlen ("The comments") + 1, 140 0 141 }, 142 { 143 EXTRACTOR_METATYPE_KEYWORDS, 144 EXTRACTOR_METAFORMAT_UTF8, 145 "text/plain", 146 "The Keywords", 147 strlen ("The Keywords") + 1, 148 0 149 }, 150 { 151 EXTRACTOR_METATYPE_SUBJECT, 152 EXTRACTOR_METAFORMAT_UTF8, 153 "text/plain", 154 "The Subject", 155 strlen ("The Subject") + 1, 156 0 157 }, 158 { 159 EXTRACTOR_METATYPE_TITLE, 160 EXTRACTOR_METAFORMAT_UTF8, 161 "text/plain", 162 "The Title", 163 strlen ("The Title") + 1, 164 0 165 }, 166 { 167 EXTRACTOR_METATYPE_LAST_SAVED_BY, 168 EXTRACTOR_METAFORMAT_UTF8, 169 "text/plain", 170 "Christian Grothoff", 171 strlen ("Christian Grothoff") + 1, 172 0 173 }, 174 { 175 EXTRACTOR_METATYPE_CREATION_DATE, 176 EXTRACTOR_METAFORMAT_UTF8, 177 "text/plain", 178 "2004-09-24T02:53:15Z", 179 strlen ("2004-09-24T02:53:15Z") + 1, 180 0 181 }, 182 { 183 EXTRACTOR_METATYPE_EDITING_CYCLES, 184 EXTRACTOR_METAFORMAT_UTF8, 185 "text/plain", 186 "4", 187 strlen ("4") + 1, 188 0 189 }, 190 { 191 EXTRACTOR_METATYPE_TITLE, 192 EXTRACTOR_METAFORMAT_UTF8, 193 "text/plain", 194 "The Title", 195 strlen ("The Title") + 1, 196 0 197 }, 198 { 199 EXTRACTOR_METATYPE_SUBJECT, 200 EXTRACTOR_METAFORMAT_UTF8, 201 "text/plain", 202 "The Subject", 203 strlen ("The Subject") + 1, 204 0 205 }, 206 { 207 EXTRACTOR_METATYPE_COMMENT, 208 EXTRACTOR_METAFORMAT_UTF8, 209 "text/plain", 210 "The comments", 211 strlen ("The comments") + 1, 212 0 213 }, 214 { 215 EXTRACTOR_METATYPE_KEYWORDS, 216 EXTRACTOR_METAFORMAT_UTF8, 217 "text/plain", 218 "The Keywords", 219 strlen ("The Keywords") + 1, 220 0 221 }, 222 { 0, 0, NULL, NULL, 0, -1 } 223 }; 224 #if HAVE_ICONV 225 struct SolutionData ole2_blair_sol[] = { 226 { 227 EXTRACTOR_METATYPE_LANGUAGE, 228 EXTRACTOR_METAFORMAT_UTF8, 229 "text/plain", 230 "U.S. English", 231 strlen ("U.S. English") + 1, 232 0 233 }, 234 { 235 EXTRACTOR_METATYPE_CREATOR, 236 EXTRACTOR_METAFORMAT_UTF8, 237 "text/plain", 238 "default", 239 strlen ("default") + 1, 240 0 241 }, 242 { 243 EXTRACTOR_METATYPE_UNKNOWN_DATE, 244 EXTRACTOR_METAFORMAT_UTF8, 245 "text/plain", 246 "2003-02-03T11:18:00Z", 247 strlen ("2003-02-03T11:18:00Z") + 1, 248 0 249 }, 250 { 251 EXTRACTOR_METATYPE_TITLE, 252 EXTRACTOR_METAFORMAT_UTF8, 253 "text/plain", 254 "Iraq- ITS INFRASTRUCTURE OF CONCEALMENT, DECEPTION AND INTIMIDATION", 255 strlen ( 256 "Iraq- ITS INFRASTRUCTURE OF CONCEALMENT, DECEPTION AND INTIMIDATION") 257 + 1, 258 0 259 }, 260 { 261 EXTRACTOR_METATYPE_CHARACTER_COUNT, 262 EXTRACTOR_METAFORMAT_UTF8, 263 "text/plain", 264 "22090", 265 strlen ("22090") + 1, 266 0 267 }, 268 { 269 EXTRACTOR_METATYPE_LAST_SAVED_BY, 270 EXTRACTOR_METAFORMAT_UTF8, 271 "text/plain", 272 "MKhan", 273 strlen ("MKhan") + 1, 274 0 275 }, 276 { 277 EXTRACTOR_METATYPE_PAGE_COUNT, 278 EXTRACTOR_METAFORMAT_UTF8, 279 "text/plain", 280 "1", 281 strlen ("1") + 1, 282 0 283 }, 284 { 285 EXTRACTOR_METATYPE_WORD_COUNT, 286 EXTRACTOR_METAFORMAT_UTF8, 287 "text/plain", 288 "3875", 289 strlen ("3875") + 1, 290 0 291 }, 292 { 293 EXTRACTOR_METATYPE_CREATION_DATE, 294 EXTRACTOR_METAFORMAT_UTF8, 295 "text/plain", 296 "2003-02-03T09:31:00Z", 297 strlen ("2003-02-03T09:31:00Z") + 1, 298 0 299 }, 300 { 301 EXTRACTOR_METATYPE_EDITING_CYCLES, 302 EXTRACTOR_METAFORMAT_UTF8, 303 "text/plain", 304 "4", 305 strlen ("4") + 1, 306 0 307 }, 308 { 309 EXTRACTOR_METATYPE_MIMETYPE, 310 EXTRACTOR_METAFORMAT_UTF8, 311 "text/plain", 312 "application/vnd.ms-files", 313 strlen ("application/vnd.ms-files") + 1, 314 0 315 }, 316 { 317 EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE, 318 EXTRACTOR_METAFORMAT_UTF8, 319 "text/plain", 320 "Microsoft Word 8.0", 321 strlen ("Microsoft Word 8.0") + 1, 322 0 323 }, 324 { 325 EXTRACTOR_METATYPE_TEMPLATE, 326 EXTRACTOR_METAFORMAT_UTF8, 327 "text/plain", 328 "Normal.dot", 329 strlen ("Normal.dot") + 1, 330 0 331 }, 332 { 333 EXTRACTOR_METATYPE_LINE_COUNT, 334 EXTRACTOR_METAFORMAT_UTF8, 335 "text/plain", 336 "184", 337 strlen ("184") + 1, 338 0 339 }, 340 { 341 EXTRACTOR_METATYPE_PARAGRAPH_COUNT, 342 EXTRACTOR_METAFORMAT_UTF8, 343 "text/plain", 344 "44", 345 strlen ("44") + 1, 346 0 347 }, 348 { 349 EXTRACTOR_METATYPE_REVISION_HISTORY, 350 EXTRACTOR_METAFORMAT_UTF8, 351 "text/plain", 352 "Revision #0: Author `cic22' worked on `C:\\DOCUME~1\\phamill\\LOCALS~1\\Temp\\AutoRecovery save of Iraq - security.asd'", 353 strlen ( 354 "Revision #0: Author `cic22' worked on `C:\\DOCUME~1\\phamill\\LOCALS~1\\Temp\\AutoRecovery save of Iraq - security.asd'") 355 + 1, 356 0 357 }, 358 { 359 EXTRACTOR_METATYPE_REVISION_HISTORY, 360 EXTRACTOR_METAFORMAT_UTF8, 361 "text/plain", 362 "Revision #1: Author `cic22' worked on `C:\\DOCUME~1\\phamill\\LOCALS~1\\Temp\\AutoRecovery save of Iraq - security.asd'", 363 strlen ( 364 "Revision #1: Author `cic22' worked on `C:\\DOCUME~1\\phamill\\LOCALS~1\\Temp\\AutoRecovery save of Iraq - security.asd'") 365 + 1, 366 0 367 }, 368 { 369 EXTRACTOR_METATYPE_REVISION_HISTORY, 370 EXTRACTOR_METAFORMAT_UTF8, 371 "text/plain", 372 "Revision #2: Author `cic22' worked on `C:\\DOCUME~1\\phamill\\LOCALS~1\\Temp\\AutoRecovery save of Iraq - security.asd'", 373 strlen ( 374 "Revision #2: Author `cic22' worked on `C:\\DOCUME~1\\phamill\\LOCALS~1\\Temp\\AutoRecovery save of Iraq - security.asd'") 375 + 1, 376 0 377 }, 378 { 379 EXTRACTOR_METATYPE_REVISION_HISTORY, 380 EXTRACTOR_METAFORMAT_UTF8, 381 "text/plain", 382 "Revision #3: Author `JPratt' worked on `C:\\TEMP\\Iraq - security.doc'", 383 strlen ( 384 "Revision #3: Author `JPratt' worked on `C:\\TEMP\\Iraq - security.doc'") 385 + 1, 386 0 387 }, 388 { 389 EXTRACTOR_METATYPE_REVISION_HISTORY, 390 EXTRACTOR_METAFORMAT_UTF8, 391 "text/plain", 392 "Revision #4: Author `JPratt' worked on `A:\\Iraq - security.doc'", 393 strlen ( 394 "Revision #4: Author `JPratt' worked on `A:\\Iraq - security.doc'") + 1, 395 0 396 }, 397 { 398 EXTRACTOR_METATYPE_REVISION_HISTORY, 399 EXTRACTOR_METAFORMAT_UTF8, 400 "text/plain", 401 "Revision #5: Author `ablackshaw' worked on `C:\\ABlackshaw\\Iraq - security.doc'", 402 strlen ( 403 "Revision #5: Author `ablackshaw' worked on `C:\\ABlackshaw\\Iraq - security.doc'") 404 + 1, 405 0 406 }, 407 { 408 EXTRACTOR_METATYPE_REVISION_HISTORY, 409 EXTRACTOR_METAFORMAT_UTF8, 410 "text/plain", 411 "Revision #6: Author `ablackshaw' worked on `C:\\ABlackshaw\\A;Iraq - security.doc'", 412 strlen ( 413 "Revision #6: Author `ablackshaw' worked on `C:\\ABlackshaw\\A;Iraq - security.doc'") 414 + 1, 415 0 416 }, 417 { 418 EXTRACTOR_METATYPE_REVISION_HISTORY, 419 EXTRACTOR_METAFORMAT_UTF8, 420 "text/plain", 421 "Revision #7: Author `ablackshaw' worked on `A:\\Iraq - security.doc'", 422 strlen ( 423 "Revision #7: Author `ablackshaw' worked on `A:\\Iraq - security.doc'") 424 + 1, 425 0 426 }, 427 { 428 EXTRACTOR_METATYPE_REVISION_HISTORY, 429 EXTRACTOR_METAFORMAT_UTF8, 430 "text/plain", 431 "Revision #8: Author `MKhan' worked on `C:\\TEMP\\Iraq - security.doc'", 432 strlen ( 433 "Revision #8: Author `MKhan' worked on `C:\\TEMP\\Iraq - security.doc'") 434 + 1, 435 0 436 }, 437 { 438 EXTRACTOR_METATYPE_REVISION_HISTORY, 439 EXTRACTOR_METAFORMAT_UTF8, 440 "text/plain", 441 "Revision #9: Author `MKhan' worked on `C:\\WINNT\\Profiles\\mkhan\\Desktop\\Iraq.doc'", 442 strlen ( 443 "Revision #9: Author `MKhan' worked on `C:\\WINNT\\Profiles\\mkhan\\Desktop\\Iraq.doc'") 444 + 1, 445 0 446 }, 447 { 0, 0, NULL, NULL, 0, -1 } 448 }; 449 #endif 450 struct SolutionData ole2_excel_sol[] = { 451 { 452 EXTRACTOR_METATYPE_CREATOR, 453 EXTRACTOR_METAFORMAT_UTF8, 454 "text/plain", 455 "JV", 456 strlen ("JV") + 1, 457 0 458 }, 459 { 460 EXTRACTOR_METATYPE_LAST_SAVED_BY, 461 EXTRACTOR_METAFORMAT_UTF8, 462 "text/plain", 463 "JV", 464 strlen ("JV") + 1, 465 0 466 }, 467 { 468 EXTRACTOR_METATYPE_CREATION_DATE, 469 EXTRACTOR_METAFORMAT_UTF8, 470 "text/plain", 471 "2002-03-20T21:26:28Z", 472 strlen ("2002-03-20T21:26:28Z") + 1, 473 0 474 }, 475 { 476 EXTRACTOR_METATYPE_MIMETYPE, 477 EXTRACTOR_METAFORMAT_UTF8, 478 "text/plain", 479 "application/vnd.ms-files", 480 strlen ("application/vnd.ms-files") + 1, 481 0 482 }, 483 { 484 EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE, 485 EXTRACTOR_METAFORMAT_UTF8, 486 "text/plain", 487 "Microsoft Excel", 488 strlen ("Microsoft Excel") + 1, 489 0 490 }, 491 { 0, 0, NULL, NULL, 0, -1 } 492 }; 493 494 struct ProblemSet ps[] = { 495 { "testdata/ole2_msword.doc", 496 ole2_msword_sol }, 497 { "testdata/ole2_starwriter40.sdw", 498 ole2_starwriter_sol }, 499 #if HAVE_ICONV 500 { "testdata/ole2_blair.doc", 501 ole2_blair_sol }, 502 #endif 503 { "testdata/ole2_excel.xls", 504 ole2_excel_sol }, 505 { NULL, NULL } 506 }; 507 return ET_main ("ole2", ps); 508 } 509 510 511 /* end of test_ole2.c */