libextractor

GNU libextractor
Log | Files | Refs | Submodules | README | LICENSE

test_ole2.c (12678B)


      1 /*
      2      This file is part of libextractor.
      3      Copyright (C) 2012 Vidyut Samanta and Christian Grothoff
      4 
      5      libextractor is free software; you can redistribute it and/or modify
      6      it under the terms of the GNU General Public License as published
      7      by the Free Software Foundation; either version 3, or (at your
      8      option) any later version.
      9 
     10      libextractor is distributed in the hope that it will be useful, but
     11      WITHOUT ANY WARRANTY; without even the implied warranty of
     12      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     13      General Public License for more details.
     14 
     15      You should have received a copy of the GNU General Public License
     16      along with libextractor; see the file COPYING.  If not, write to the
     17      Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     18      Boston, MA 02110-1301, USA.
     19 */
     20 /**
     21  * @file plugins/test_ole2.c
     22  * @brief testcase for ole2 plugin
     23  * @author Christian Grothoff
     24  */
     25 #include "platform.h"
     26 #include "test_lib.h"
     27 
     28 
     29 /**
     30  * Main function for the OLE2 testcase.
     31  *
     32  * @param argc number of arguments (ignored)
     33  * @param argv arguments (ignored)
     34  * @return 0 on success
     35  */
     36 int
     37 main (int argc, char *argv[])
     38 {
     39   struct SolutionData ole2_msword_sol[] = {
     40     {
     41       EXTRACTOR_METATYPE_CREATOR,
     42       EXTRACTOR_METAFORMAT_UTF8,
     43       "text/plain",
     44       "Nils Durner",
     45       strlen ("Nils Durner") + 1,
     46       0
     47     },
     48     {
     49       EXTRACTOR_METATYPE_UNKNOWN_DATE,
     50       EXTRACTOR_METAFORMAT_UTF8,
     51       "text/plain",
     52       "2005-03-21T06:11:12Z",
     53       strlen ("2005-03-21T06:11:12Z") + 1,
     54       0
     55     },
     56     {
     57       EXTRACTOR_METATYPE_DESCRIPTION,
     58       EXTRACTOR_METAFORMAT_UTF8,
     59       "text/plain",
     60       "This is a small document to test meta data extraction by GNU libextractor.",
     61       strlen (
     62         "This is a small document to test meta data extraction by GNU libextractor.")
     63       + 1,
     64       0
     65     },
     66     {
     67       EXTRACTOR_METATYPE_KEYWORDS,
     68       EXTRACTOR_METAFORMAT_UTF8,
     69       "text/plain",
     70       "ole ole2 eole2extractor",
     71       strlen ("ole ole2 eole2extractor") + 1,
     72       0
     73     },
     74     {
     75       EXTRACTOR_METATYPE_SUBJECT,
     76       EXTRACTOR_METAFORMAT_UTF8,
     77       "text/plain",
     78       "GNU libextractor",
     79       strlen ("GNU libextractor") + 1,
     80       0
     81     },
     82     {
     83       EXTRACTOR_METATYPE_TITLE,
     84       EXTRACTOR_METAFORMAT_UTF8,
     85       "text/plain",
     86       "Testcase for the ole2 extractor",
     87       strlen ("Testcase for the ole2 extractor") + 1,
     88       0
     89     },
     90     {
     91       EXTRACTOR_METATYPE_LAST_SAVED_BY,
     92       EXTRACTOR_METAFORMAT_UTF8,
     93       "text/plain",
     94       "Nils Durner",
     95       strlen ("Nils Durner") + 1,
     96       0
     97     },
     98     {
     99       EXTRACTOR_METATYPE_CREATION_DATE,
    100       EXTRACTOR_METAFORMAT_UTF8,
    101       "text/plain",
    102       "2005-03-21T06:10:19Z",
    103       strlen ("2005-03-21T06:10:19Z") + 1,
    104       0
    105     },
    106     {
    107       EXTRACTOR_METATYPE_EDITING_CYCLES,
    108       EXTRACTOR_METAFORMAT_UTF8,
    109       "text/plain",
    110       "2",
    111       strlen ("2") + 1,
    112       0
    113     },
    114     { 0, 0, NULL, NULL, 0, -1 }
    115   };
    116 
    117   struct SolutionData ole2_starwriter_sol[] = {
    118     {
    119       EXTRACTOR_METATYPE_CREATOR,
    120       EXTRACTOR_METAFORMAT_UTF8,
    121       "text/plain",
    122       "Christian Grothoff",
    123       strlen ("Christian Grothoff") + 1,
    124       0
    125     },
    126     {
    127       EXTRACTOR_METATYPE_UNKNOWN_DATE,
    128       EXTRACTOR_METAFORMAT_UTF8,
    129       "text/plain",
    130       "2004-09-24T02:54:31Z",
    131       strlen ("2004-09-24T02:54:31Z") + 1,
    132       0
    133     },
    134     {
    135       EXTRACTOR_METATYPE_DESCRIPTION,
    136       EXTRACTOR_METAFORMAT_UTF8,
    137       "text/plain",
    138       "The comments",
    139       strlen ("The comments") + 1,
    140       0
    141     },
    142     {
    143       EXTRACTOR_METATYPE_KEYWORDS,
    144       EXTRACTOR_METAFORMAT_UTF8,
    145       "text/plain",
    146       "The Keywords",
    147       strlen ("The Keywords") + 1,
    148       0
    149     },
    150     {
    151       EXTRACTOR_METATYPE_SUBJECT,
    152       EXTRACTOR_METAFORMAT_UTF8,
    153       "text/plain",
    154       "The Subject",
    155       strlen ("The Subject") + 1,
    156       0
    157     },
    158     {
    159       EXTRACTOR_METATYPE_TITLE,
    160       EXTRACTOR_METAFORMAT_UTF8,
    161       "text/plain",
    162       "The Title",
    163       strlen ("The Title") + 1,
    164       0
    165     },
    166     {
    167       EXTRACTOR_METATYPE_LAST_SAVED_BY,
    168       EXTRACTOR_METAFORMAT_UTF8,
    169       "text/plain",
    170       "Christian Grothoff",
    171       strlen ("Christian Grothoff") + 1,
    172       0
    173     },
    174     {
    175       EXTRACTOR_METATYPE_CREATION_DATE,
    176       EXTRACTOR_METAFORMAT_UTF8,
    177       "text/plain",
    178       "2004-09-24T02:53:15Z",
    179       strlen ("2004-09-24T02:53:15Z") + 1,
    180       0
    181     },
    182     {
    183       EXTRACTOR_METATYPE_EDITING_CYCLES,
    184       EXTRACTOR_METAFORMAT_UTF8,
    185       "text/plain",
    186       "4",
    187       strlen ("4") + 1,
    188       0
    189     },
    190     {
    191       EXTRACTOR_METATYPE_TITLE,
    192       EXTRACTOR_METAFORMAT_UTF8,
    193       "text/plain",
    194       "The Title",
    195       strlen ("The Title") + 1,
    196       0
    197     },
    198     {
    199       EXTRACTOR_METATYPE_SUBJECT,
    200       EXTRACTOR_METAFORMAT_UTF8,
    201       "text/plain",
    202       "The Subject",
    203       strlen ("The Subject") + 1,
    204       0
    205     },
    206     {
    207       EXTRACTOR_METATYPE_COMMENT,
    208       EXTRACTOR_METAFORMAT_UTF8,
    209       "text/plain",
    210       "The comments",
    211       strlen ("The comments") + 1,
    212       0
    213     },
    214     {
    215       EXTRACTOR_METATYPE_KEYWORDS,
    216       EXTRACTOR_METAFORMAT_UTF8,
    217       "text/plain",
    218       "The Keywords",
    219       strlen ("The Keywords") + 1,
    220       0
    221     },
    222     { 0, 0, NULL, NULL, 0, -1 }
    223   };
    224 #if HAVE_ICONV
    225   struct SolutionData ole2_blair_sol[] = {
    226     {
    227       EXTRACTOR_METATYPE_LANGUAGE,
    228       EXTRACTOR_METAFORMAT_UTF8,
    229       "text/plain",
    230       "U.S. English",
    231       strlen ("U.S. English") + 1,
    232       0
    233     },
    234     {
    235       EXTRACTOR_METATYPE_CREATOR,
    236       EXTRACTOR_METAFORMAT_UTF8,
    237       "text/plain",
    238       "default",
    239       strlen ("default") + 1,
    240       0
    241     },
    242     {
    243       EXTRACTOR_METATYPE_UNKNOWN_DATE,
    244       EXTRACTOR_METAFORMAT_UTF8,
    245       "text/plain",
    246       "2003-02-03T11:18:00Z",
    247       strlen ("2003-02-03T11:18:00Z") + 1,
    248       0
    249     },
    250     {
    251       EXTRACTOR_METATYPE_TITLE,
    252       EXTRACTOR_METAFORMAT_UTF8,
    253       "text/plain",
    254       "Iraq- ITS INFRASTRUCTURE OF CONCEALMENT, DECEPTION AND INTIMIDATION",
    255       strlen (
    256         "Iraq- ITS INFRASTRUCTURE OF CONCEALMENT, DECEPTION AND INTIMIDATION")
    257       + 1,
    258       0
    259     },
    260     {
    261       EXTRACTOR_METATYPE_CHARACTER_COUNT,
    262       EXTRACTOR_METAFORMAT_UTF8,
    263       "text/plain",
    264       "22090",
    265       strlen ("22090") + 1,
    266       0
    267     },
    268     {
    269       EXTRACTOR_METATYPE_LAST_SAVED_BY,
    270       EXTRACTOR_METAFORMAT_UTF8,
    271       "text/plain",
    272       "MKhan",
    273       strlen ("MKhan") + 1,
    274       0
    275     },
    276     {
    277       EXTRACTOR_METATYPE_PAGE_COUNT,
    278       EXTRACTOR_METAFORMAT_UTF8,
    279       "text/plain",
    280       "1",
    281       strlen ("1") + 1,
    282       0
    283     },
    284     {
    285       EXTRACTOR_METATYPE_WORD_COUNT,
    286       EXTRACTOR_METAFORMAT_UTF8,
    287       "text/plain",
    288       "3875",
    289       strlen ("3875") + 1,
    290       0
    291     },
    292     {
    293       EXTRACTOR_METATYPE_CREATION_DATE,
    294       EXTRACTOR_METAFORMAT_UTF8,
    295       "text/plain",
    296       "2003-02-03T09:31:00Z",
    297       strlen ("2003-02-03T09:31:00Z") + 1,
    298       0
    299     },
    300     {
    301       EXTRACTOR_METATYPE_EDITING_CYCLES,
    302       EXTRACTOR_METAFORMAT_UTF8,
    303       "text/plain",
    304       "4",
    305       strlen ("4") + 1,
    306       0
    307     },
    308     {
    309       EXTRACTOR_METATYPE_MIMETYPE,
    310       EXTRACTOR_METAFORMAT_UTF8,
    311       "text/plain",
    312       "application/vnd.ms-files",
    313       strlen ("application/vnd.ms-files") + 1,
    314       0
    315     },
    316     {
    317       EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE,
    318       EXTRACTOR_METAFORMAT_UTF8,
    319       "text/plain",
    320       "Microsoft Word 8.0",
    321       strlen ("Microsoft Word 8.0") + 1,
    322       0
    323     },
    324     {
    325       EXTRACTOR_METATYPE_TEMPLATE,
    326       EXTRACTOR_METAFORMAT_UTF8,
    327       "text/plain",
    328       "Normal.dot",
    329       strlen ("Normal.dot") + 1,
    330       0
    331     },
    332     {
    333       EXTRACTOR_METATYPE_LINE_COUNT,
    334       EXTRACTOR_METAFORMAT_UTF8,
    335       "text/plain",
    336       "184",
    337       strlen ("184") + 1,
    338       0
    339     },
    340     {
    341       EXTRACTOR_METATYPE_PARAGRAPH_COUNT,
    342       EXTRACTOR_METAFORMAT_UTF8,
    343       "text/plain",
    344       "44",
    345       strlen ("44") + 1,
    346       0
    347     },
    348     {
    349       EXTRACTOR_METATYPE_REVISION_HISTORY,
    350       EXTRACTOR_METAFORMAT_UTF8,
    351       "text/plain",
    352       "Revision #0: Author `cic22' worked on `C:\\DOCUME~1\\phamill\\LOCALS~1\\Temp\\AutoRecovery save of Iraq - security.asd'",
    353       strlen (
    354         "Revision #0: Author `cic22' worked on `C:\\DOCUME~1\\phamill\\LOCALS~1\\Temp\\AutoRecovery save of Iraq - security.asd'")
    355       + 1,
    356       0
    357     },
    358     {
    359       EXTRACTOR_METATYPE_REVISION_HISTORY,
    360       EXTRACTOR_METAFORMAT_UTF8,
    361       "text/plain",
    362       "Revision #1: Author `cic22' worked on `C:\\DOCUME~1\\phamill\\LOCALS~1\\Temp\\AutoRecovery save of Iraq - security.asd'",
    363       strlen (
    364         "Revision #1: Author `cic22' worked on `C:\\DOCUME~1\\phamill\\LOCALS~1\\Temp\\AutoRecovery save of Iraq - security.asd'")
    365       + 1,
    366       0
    367     },
    368     {
    369       EXTRACTOR_METATYPE_REVISION_HISTORY,
    370       EXTRACTOR_METAFORMAT_UTF8,
    371       "text/plain",
    372       "Revision #2: Author `cic22' worked on `C:\\DOCUME~1\\phamill\\LOCALS~1\\Temp\\AutoRecovery save of Iraq - security.asd'",
    373       strlen (
    374         "Revision #2: Author `cic22' worked on `C:\\DOCUME~1\\phamill\\LOCALS~1\\Temp\\AutoRecovery save of Iraq - security.asd'")
    375       + 1,
    376       0
    377     },
    378     {
    379       EXTRACTOR_METATYPE_REVISION_HISTORY,
    380       EXTRACTOR_METAFORMAT_UTF8,
    381       "text/plain",
    382       "Revision #3: Author `JPratt' worked on `C:\\TEMP\\Iraq - security.doc'",
    383       strlen (
    384         "Revision #3: Author `JPratt' worked on `C:\\TEMP\\Iraq - security.doc'")
    385       + 1,
    386       0
    387     },
    388     {
    389       EXTRACTOR_METATYPE_REVISION_HISTORY,
    390       EXTRACTOR_METAFORMAT_UTF8,
    391       "text/plain",
    392       "Revision #4: Author `JPratt' worked on `A:\\Iraq - security.doc'",
    393       strlen (
    394         "Revision #4: Author `JPratt' worked on `A:\\Iraq - security.doc'") + 1,
    395       0
    396     },
    397     {
    398       EXTRACTOR_METATYPE_REVISION_HISTORY,
    399       EXTRACTOR_METAFORMAT_UTF8,
    400       "text/plain",
    401       "Revision #5: Author `ablackshaw' worked on `C:\\ABlackshaw\\Iraq - security.doc'",
    402       strlen (
    403         "Revision #5: Author `ablackshaw' worked on `C:\\ABlackshaw\\Iraq - security.doc'")
    404       + 1,
    405       0
    406     },
    407     {
    408       EXTRACTOR_METATYPE_REVISION_HISTORY,
    409       EXTRACTOR_METAFORMAT_UTF8,
    410       "text/plain",
    411       "Revision #6: Author `ablackshaw' worked on `C:\\ABlackshaw\\A;Iraq - security.doc'",
    412       strlen (
    413         "Revision #6: Author `ablackshaw' worked on `C:\\ABlackshaw\\A;Iraq - security.doc'")
    414       + 1,
    415       0
    416     },
    417     {
    418       EXTRACTOR_METATYPE_REVISION_HISTORY,
    419       EXTRACTOR_METAFORMAT_UTF8,
    420       "text/plain",
    421       "Revision #7: Author `ablackshaw' worked on `A:\\Iraq - security.doc'",
    422       strlen (
    423         "Revision #7: Author `ablackshaw' worked on `A:\\Iraq - security.doc'")
    424       + 1,
    425       0
    426     },
    427     {
    428       EXTRACTOR_METATYPE_REVISION_HISTORY,
    429       EXTRACTOR_METAFORMAT_UTF8,
    430       "text/plain",
    431       "Revision #8: Author `MKhan' worked on `C:\\TEMP\\Iraq - security.doc'",
    432       strlen (
    433         "Revision #8: Author `MKhan' worked on `C:\\TEMP\\Iraq - security.doc'")
    434       + 1,
    435       0
    436     },
    437     {
    438       EXTRACTOR_METATYPE_REVISION_HISTORY,
    439       EXTRACTOR_METAFORMAT_UTF8,
    440       "text/plain",
    441       "Revision #9: Author `MKhan' worked on `C:\\WINNT\\Profiles\\mkhan\\Desktop\\Iraq.doc'",
    442       strlen (
    443         "Revision #9: Author `MKhan' worked on `C:\\WINNT\\Profiles\\mkhan\\Desktop\\Iraq.doc'")
    444       + 1,
    445       0
    446     },
    447     { 0, 0, NULL, NULL, 0, -1 }
    448   };
    449 #endif
    450   struct SolutionData ole2_excel_sol[] = {
    451     {
    452       EXTRACTOR_METATYPE_CREATOR,
    453       EXTRACTOR_METAFORMAT_UTF8,
    454       "text/plain",
    455       "JV",
    456       strlen ("JV") + 1,
    457       0
    458     },
    459     {
    460       EXTRACTOR_METATYPE_LAST_SAVED_BY,
    461       EXTRACTOR_METAFORMAT_UTF8,
    462       "text/plain",
    463       "JV",
    464       strlen ("JV") + 1,
    465       0
    466     },
    467     {
    468       EXTRACTOR_METATYPE_CREATION_DATE,
    469       EXTRACTOR_METAFORMAT_UTF8,
    470       "text/plain",
    471       "2002-03-20T21:26:28Z",
    472       strlen ("2002-03-20T21:26:28Z") + 1,
    473       0
    474     },
    475     {
    476       EXTRACTOR_METATYPE_MIMETYPE,
    477       EXTRACTOR_METAFORMAT_UTF8,
    478       "text/plain",
    479       "application/vnd.ms-files",
    480       strlen ("application/vnd.ms-files") + 1,
    481       0
    482     },
    483     {
    484       EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE,
    485       EXTRACTOR_METAFORMAT_UTF8,
    486       "text/plain",
    487       "Microsoft Excel",
    488       strlen ("Microsoft Excel") + 1,
    489       0
    490     },
    491     { 0, 0, NULL, NULL, 0, -1 }
    492   };
    493 
    494   struct ProblemSet ps[] = {
    495     { "testdata/ole2_msword.doc",
    496       ole2_msword_sol },
    497     { "testdata/ole2_starwriter40.sdw",
    498       ole2_starwriter_sol },
    499 #if HAVE_ICONV
    500     { "testdata/ole2_blair.doc",
    501       ole2_blair_sol },
    502 #endif
    503     { "testdata/ole2_excel.xls",
    504       ole2_excel_sol },
    505     { NULL, NULL }
    506   };
    507   return ET_main ("ole2", ps);
    508 }
    509 
    510 
    511 /* end of test_ole2.c */