wordleaker integration -- draft - libextractor

commit c099ad78d68ca9d0e91ddbde4da3141b82f1a730
parent a4d078ecfb2703ccd4c17d27559a9ec2f346e5c7
Author: Christian Grothoff <christian@grothoff.org>
Date:   Wed,  8 Mar 2006 13:52:16 +0000

wordleaker integration -- draft

Diffstat:
M configure.ac  | 1 +
M src/include/extractor.h  | 2 ++
M src/main/extractor.c  | 3 ++-
A src/plugins/wordleaker/Makefile.am  | 25 +++++++++++++++++++++++++
A src/plugins/wordleaker/SYMBOLS  | 1 +
D src/plugins/wordleaker/WordLeaker.cpp  | 310 -------------------------------------------------------------------------------
D src/plugins/wordleaker/WordLeaker.h  | 287 -------------------------------------------------------------------------------
A src/plugins/wordleaker/wordextractor.cc  | 221 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A src/plugins/wordleaker/wordleaker.cpp  | 311 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A src/plugins/wordleaker/wordleaker.h  | 287 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

10 files changed, 850 insertions(+), 598 deletions(-)
diff --git a/configure.ac b/configure.ac
@@ -298,6 +298,7 @@ src/plugins/printable/Makefile
 src/plugins/hash/Makefile
 src/plugins/thumbnail/Makefile
 src/plugins/exiv2/Makefile
+src/plugins/wordleaker/Makefile
 src/test/Makefile
 ])
 
diff --git a/src/include/extractor.h b/src/include/extractor.h
@@ -140,6 +140,8 @@ typedef enum {
   EXTRACTOR_ORIENTATION = 87,
   EXTRACTOR_TEMPLATE = 88,
   EXTRACTOR_SPLIT = 89,
+
+  EXTRACTOR_PRODUCTVERSION = 90,
 } EXTRACTOR_KeywordType;
 
 /**
diff --git a/src/main/extractor.c b/src/main/extractor.c
@@ -131,11 +131,12 @@ static const char *keywordTypes[] = {
   gettext_noop("orientation"),
   gettext_noop("template"),
   gettext_noop("split"),
+  gettext_noop("product version"),
   NULL,
 };
 
 /* the number of keyword types (for bounds-checking) */
-#define HIGHEST_TYPE_NUMBER 90
+#define HIGHEST_TYPE_NUMBER 91
 
 #ifdef HAVE_LIBOGG
 #if HAVE_VORBIS
diff --git a/src/plugins/wordleaker/Makefile.am b/src/plugins/wordleaker/Makefile.am
@@ -0,0 +1,25 @@
+include ../Makefile-plugins.am
+
+plugin_LTLIBRARIES = \
+ libextractor_word.la
+
+libextractor_word_la_LINK = \
+  /bin/sh ../../../libtool --mode=link $(CXXLD) -o libextractor_word.la
+libextractor_word_la_LDFLAGS = \
+  $(PLUGINFLAGS)  $(retaincommand) \
+  $(XTRA_CPPLIBS)
+libextractor_word_la_LIBADD = \
+  $(top_builddir)/src/main/libextractor.la \
+  $(top_builddir)/src/plugins/libconvert.la \
+  -lm 
+
+libextractor_word_la_SOURCES = \
+ pole.h pole.cpp \
+ wordleaker.h \
+ wordextractor.cc 
+
+# gcc 3.3 produces BROKEN code for -O1 and -O2 (PDF extraction
+# would fail silently) hence we MUST override the user flag here
+# which may contain -O1 or -O2!
+# CXXFLAGS = -O0
+
diff --git a/src/plugins/wordleaker/SYMBOLS b/src/plugins/wordleaker/SYMBOLS
@@ -0,0 +1 @@
+libextractor_word_extract
diff --git a/src/plugins/wordleaker/WordLeaker.cpp b/src/plugins/wordleaker/WordLeaker.cpp
@@ -1,310 +0,0 @@
-/* 
-   WordLeaker - Shows information about Word DOC files
-   Copyright (C) 2005 Sacha Fuentes <madelman@iname.com>
-
-   Based on poledump.c
-   Original idea from WordDumper (http://www.computerbytesman.com)
-   Info on Word format: http://www.aozw65.dsl.pipex.com/generator_wword8.htm
-   Info on Word format: http://jakarta.apache.org/poi/hpsf/internals.html
-   
-   This program is free software; you can redistribute it and/or
-   modify it under the terms of the GNU General Public
-   License as published by the Free Software Foundation; either
-   version 2 of the License, or (at your option) any later version.
-   
-   This program is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this library; see the file COPYING.  If not, write to
-   the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
-   Boston, MA 02111-1307, US
-*/
-
-// TAKE CARE: there's not a single check for validity of data,
-// so any malformed or malicious Word file will break it
-
-#include <iostream>
-#include <fstream>
-#include <stdlib.h>
-#include <list>
-#include <ctime>
-
-#include "pole.h"
-#include "WordLeaker.h"
-
-unsigned long fcSttbSavedBy;
-unsigned long lcbSttbSavedBy;
-  
-// read the type of the property and displays its value
-void showProperty( POLE::Stream* stream ) {
-  unsigned long read, type;
-  unsigned char buffer[256];
-  unsigned char c;
-  unsigned long i;
-  unsigned long t, t1, t2;
-  char *s;
-    
-  read = stream->read(buffer, 4);
-  type = buffer[0] + (buffer[1] << 8) + (buffer[2] << 16) + (buffer[3] << 24);
-    
-  switch (type) {
-      case 2: // VT_I2
-        read = stream->read(buffer, 2);
-        i = buffer[0] + (buffer[1] << 8);
-        cout << i << endl;
-        break;
-      case 3: // VT_I4
-        read = stream->read(buffer, 4);
-        i = buffer[0] + (buffer[1] << 8) + (buffer[2] << 16) + (buffer[3] << 24);
-        cout << i << endl;
-        break;
-      case 11: // VT_BOOL
-        read = stream->read(buffer, 1);
-        if ((char) buffer[0] == -1)
-            cout << "true" << endl;
-        else        
-            cout << "false" << endl;
-        break;
-      case 30: // VT_LPSTR
-        read = stream->read(buffer, 4);
-        i = buffer[0] + (buffer[1] << 8) + (buffer[2] << 16) + (buffer[3] << 24);
-        while ((c = stream->getch()) != 0)
-            cout << c;
-        cout << endl;
-        break;
-      case 64: // VT_FILETIME
-        read = stream->read(buffer, 8);
-        t1 = buffer[0] + (buffer[1] << 8) + (buffer[2] << 16) + (buffer[3] << 24);
-        t2 = buffer[4]  + (buffer[5] << 8) + (buffer[6] << 16) + (buffer[7] << 24);
-        t = filetime_to_unixtime(t1, t2);
-        s = ctime((time_t *) &t);
-        cout << s;
-        break;
-      default:
-          cout << "Unknown format " << type << endl;
-  }
-}
-
-// show the revision data (users and files)
-void dumpRevision( POLE::Storage* storage ) {
-  unsigned int nRev;
-  unsigned int where = 0;
-  POLE::Stream* stream;
-    
-  cout << "Revision:" << endl;
-  cout << "---------" << endl << endl;
-
-  // FIXME: should look if using 0Table or 1Table
-  stream = storage->stream( "1Table" );
-  if( !stream ) {
-      cout << "There's no revision information" << endl;
-      return;
-  }
-  
-  unsigned char * buffer = new unsigned char[lcbSttbSavedBy];
-  unsigned char buffer2[1024];
-  unsigned int length;
-  
-  // goto offset of revision
-  stream->seek(fcSttbSavedBy);
-  // read all the revision history
-  stream->read(buffer, lcbSttbSavedBy);
-
-  // there are n strings, so n/2 revisions (author & file)
-  nRev = (buffer[2] + (buffer[3] << 8)) / 2;
-  where = 6;
-  
-  for (unsigned int i=0; i < nRev; i++) {
-    cout << "Rev #" << i << ": Author \"";
-    length = buffer[where++];
-    // it's unicode, for now we only get the low byte
-    for (unsigned int j=0; j < length; j++) {
-        where++;
-        cout << buffer[where];
-        where++;
-    }
-    where++;
-    cout << "\" worked on file \"";
-    length = buffer[where++];
-    // it's unicode, for now we only get the low byte
-    for (unsigned int j=0; j < length; j++) {
-        where++;
-        cout << buffer[where];
-        where++;
-    }
-    where++;
-    cout << "\"" << endl;    
-  }
-  
-  cout << endl;      
-  delete buffer;
-  
-}
-
-// show data from DocumentSummary stream
-void dumpDocumentSummary( POLE::Storage* storage ) {
-  POLE::Stream* stream;
-  unsigned long read, nproperties, propertyID, offsetProp, offsetCur;
-  unsigned long begin;
-    
-  cout << "Document Summary:" << endl;
-  cout << "-----------------" << endl << endl;
-
-  stream = storage->stream( "DocumentSummaryInformation" );
-  if( !stream ) {
-      cout << "There's no document summary information" << endl;
-      return;
-  }
-  
-  unsigned char buffer[256];
-
-  // ClassID & Offset
-  stream->seek(28);
-  stream->read(buffer, 20);
-  // beginning of section
-  begin = stream->tell();
-  // length of section
-  read = stream->read(buffer, 4);
-  // number of properties
-  read = stream->read(buffer, 4);
-  nproperties = buffer[0] + (buffer[1] << 8) + (buffer[2] << 16) + (buffer[3] << 24);
-  // properties
-
-  for (unsigned long i = 0; i < nproperties; i++) {
-    read = stream->read(buffer, 8);
-    propertyID = buffer[0] + (buffer[1] << 8) + (buffer[2] << 16) + (buffer[3] << 24);
-    offsetProp = buffer[4] + (buffer[5] << 8) + (buffer[6] << 16) + (buffer[7] << 24);
-      if (propertyID > 1 && propertyID < 16) {
-        cout << DocumentSummaryProperties[propertyID] << ": ";
-        offsetCur = stream->tell();
-        stream->seek(offsetProp + begin);
-        // read and show the property
-        showProperty(stream);  
-        stream->seek(offsetCur);
-    }
-  }
-
-  cout << endl;      
-}
-
-// show data from Summary stream
-void dumpSummary( POLE::Storage* storage ) {
-  POLE::Stream* stream;
-  unsigned long read, nproperties, propertyID, offsetProp, offsetCur;
-  unsigned long begin;
-    
-  cout << "Summary:" << endl;
-  cout << "--------" << endl << endl;
-
-  stream = storage->stream( "SummaryInformation" );
-  if( !stream ) {
-      cout << "There's no summary information" << endl;
-      return;
-  }
-  
-  unsigned char buffer[256];
-
-  // ClassID & Offset
-  stream->seek(28);
-  stream->read(buffer, 20);
-  // beginning of section
-  begin = stream->tell();
-  // length of section
-  read = stream->read(buffer, 4);
-  // number of properties
-  read = stream->read(buffer, 4);
-  nproperties = buffer[0] + (buffer[1] << 8) + (buffer[2] << 16) + (buffer[3] << 24);
-  // properties
-  for (unsigned long i = 0; i < nproperties; i++) {
-    read = stream->read(buffer, 8);
-    propertyID = buffer[0] + (buffer[1] << 8) + (buffer[2] << 16) + (buffer[3] << 24);
-    offsetProp = buffer[4] + (buffer[5] << 8) + (buffer[6] << 16) + (buffer[7] << 24);
-    if (propertyID > 1 && propertyID < 20) {
-        cout << SummaryProperties[propertyID] << ": ";
-        offsetCur = stream->tell();
-        stream->seek(offsetProp + begin);
-        // read and show the property
-        showProperty(stream);  
-        stream->seek(offsetCur);
-    }
-  }
-
-  cout << endl;      
-}
-
-// reads the header of the file
-bool readFIB( char* filename ) {
-  fstream file;
-    
-   file.open( filename, std::ios::binary | std::ios::in );
-  if( !file.good() ) {
-    cout << "Can't find the file" << endl;
-    return false;
-  }
-  
-  unsigned char * buffer = new unsigned char[898];
-  file.seekg( 512 ); 
-  file.read( (char*)buffer, 898 );
-  file.close();
-  
-  unsigned int wIdent = buffer[0] + (buffer[1] << 8);
-  unsigned int nProduct = buffer[4] + (buffer[5] << 8);
-  unsigned int lid = buffer[6] + (buffer[7] << 8);
-  unsigned int envr = buffer[18];
-  unsigned int wMagicCreated = buffer[34] + (buffer[35] << 8);
-  unsigned int wMagicRevised = buffer[36] + (buffer[37] << 8);
-  unsigned long lProductCreated = buffer[68] + (buffer[69] << 8) + (buffer[70] << 16) + (buffer[71] << 24);
-  unsigned long lProductRevised = buffer[72] + (buffer[73] << 8) + (buffer[74] << 16) + (buffer[75] << 24);
-  fcSttbSavedBy = buffer[722] + (buffer[723] << 8) + (buffer[724] << 16) + (buffer[725] << 24);
-  lcbSttbSavedBy = buffer[726] + (buffer[727] << 8) + (buffer[728] << 16) + (buffer[729] << 24);
-  delete[] buffer; 
-  
-  cout << "File: " << filename << endl;
-  cout << "Product version: " << nProduct << endl;  
-  cout << "Language: " << lidToLanguage(lid) << endl;
-  cout << "Created by: " << idToProduct(wMagicCreated) << " (Build " << dateToString(lProductCreated) << ")" << endl;
-  cout << "Revised by: " << idToProduct(wMagicRevised) << " (Build " << dateToString(lProductRevised) << ")" << endl;
-  cout << endl;
-  
-  return true; 
-    
-}
-
-int main(int argc, char *argv[]) {
-  cout << endl << "WordLeaker v.0.1" << endl;
-  cout << " by Madelman (http://elligre.tk/madelman/)" << endl << endl;
-  
-    
-  if( argc < 2 ) {
-    cout << "  You must supply a filename" << endl << endl;
-    return 0;
-  }
-  
-  char* filename = argv[1];
-
-  if ( !readFIB(filename) )
-      return 1;
-  
-  POLE::Storage* storage = new POLE::Storage( filename );
-  storage->open();
-  if( storage->result() != POLE::Storage::Ok ) {
-    cout << "The file " << filename << " is not a Word document" << endl;
-    return 1;
-  }
-  
-  dumpSummary( storage );
-  // FIXME: doesn't always work
-  // but there's nothing really interesting in here
-  //dumpDocumentSummary( storage );
-  dumpRevision( storage );
-  // TODO: we don't show the GUID
-  // TODO: we don't show the macros
-  
-  delete storage;
-  
-  return 0;
-}
-
diff --git a/src/plugins/wordleaker/WordLeaker.h b/src/plugins/wordleaker/WordLeaker.h
@@ -1,287 +0,0 @@
-/* 
-   WordLeaker - Shows information about Word DOC files
-   Copyright (C) 2005 Sacha Fuentes <madelman@iname.com>
-
-   Based on poledump.c
-   Original idea from WordDumper (http://www.computerbytesman.com)
-   Info on Word format: http://www.aozw65.dsl.pipex.com/generator_wword8.htm
-   Info on Word format: http://jakarta.apache.org/poi/hpsf/internals.html
-   
-   This program is free software; you can redistribute it and/or
-   modify it under the terms of the GNU General Public
-   License as published by the Free Software Foundation; either
-   version 2 of the License, or (at your option) any later version.
-   
-   This program is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this library; see the file COPYING.  If not, write to
-   the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
-   Boston, MA 02111-1307, US
-*/
-
-#include <string>
-
-using namespace std;
-
-static char* SummaryProperties[] = {
-"Unknown", 
-"Unknown",
-"Title",
-"Subject",
-"Author",
-"Keywords",
-"Comments",
-"Template",
-"Last Saved By",
-"Revision Number",
-"Total Editing Time",
-"Last Printed",
-"Create Time/Date",
-"Last Saved Time/Date",
-"Number of Pages",
-"Number of Words",
-"Number of Characters",
-"Thumbnails",
-"Creating Application",
-"Security"
-};
-
-static char* DocumentSummaryProperties[] = {
-"Dictionary",
-"Code page",
-"Category",
-"PresentationTarget",
-"Bytes",
-"Lines",
-"Paragraphs",
-"Slides",
-"Notes",
-"HiddenSlides",
-"MMClips",
-"ScaleCrop",
-"HeadingPairs",
-"TitlesofParts",
-"Manager",
-"Company",
-"LinksUpTo"
-};
-
-string dateToString( unsigned long date ) {
-  char f[9];
-  sprintf(f, "%d/%d/%d", (date / 10000 % 100), (date / 100 % 100), (date % 100));
-  return f;
-}
-
-string idToProduct( unsigned int id ) {
-  // TODO: find the rest of ids
-  switch ( id ) {
-    case  0x6A62:
-        return "Word 97";
-    case 0x626A:
-        return "Word 98 (Mac)";
-    default:
-        return "Unknown";
-  }      
-}
-
-string lidToLanguage( unsigned int lid ) {
-  switch ( lid ) {
-    case 0x0400: 
-        return "No Proofing";
-    case 0x0401: 
-        return "Arabic";
-    case 0x0402:
-        return "Bulgarian";
-    case 0x0403:
-        return "Catalan";
-    case 0x0404:
-        return "Traditional Chinese";
-    case 0x0804:
-        return "Simplified Chinese";
-    case 0x0405:
-        return "Czech";
-    case 0x0406:
-        return "Danish";
-    case 0x0407:
-        return "German";
-    case 0x0807:
-        return "Swiss German";
-    case 0x0408:
-        return "Greek";
-    case 0x0409:
-        return "U.S. English";
-    case 0x0809:
-        return "U.K. English";
-    case 0x0c09:
-        return "Australian English";
-    case 0x040a:
-        return "Castilian Spanish";
-    case 0x080a:
-        return "Mexican Spanish";
-    case 0x040b:
-        return "Finnish";
-    case 0x040c:
-        return "French";
-    case 0x080c:
-        return "Belgian French";
-    case 0x0c0c:
-        return "Canadian French";
-    case 0x100c:
-        return "Swiss French";
-    case 0x040d:
-        return "Hebrew";
-    case 0x040e:
-        return "Hungarian";
-    case 0x040f:
-        return "Icelandic";
-    case 0x0410:
-        return "Italian";
-    case 0x0810:
-        return "Swiss Italian";
-    case 0x0411:
-        return "Japanese";
-    case 0x0412:
-        return "Korean";
-    case 0x0413:
-        return "Dutch";
-    case 0x0813:
-        return "Belgian Dutch";
-    case 0x0414:
-        return "Norwegian - Bokmal";
-    case 0x0814:
-        return "Norwegian - Nynorsk";
-    case 0x0415:
-        return "Polish";
-    case 0x0416:
-        return "Brazilian Portuguese";
-    case 0x0816:
-        return "Portuguese";
-    case 0x0417:
-        return "Rhaeto-Romanic";
-    case 0x0418:
-        return "Romanian";
-    case 0x0419:
-        return "Russian";
-    case 0x041a:
-        return "Croato-Serbian (Latin)";
-    case 0x081a:
-        return "Serbo-Croatian (Cyrillic)";
-    case 0x041b:
-        return "Slovak";
-    case 0x041c:
-        return "Albanian";
-    case 0x041d:
-        return "Swedish";
-    case 0x041e:
-        return "Thai";
-    case 0x041f:
-        return "Turkish";
-    case 0x0420:
-        return "Urdu";
-    case 0x0421:
-        return "Bahasa"; 
-    case 0x0422:
-        return "Ukrainian";
-    case 0x0423:
-        return "Byelorussian";
-    case 0x0424:
-        return "Slovenian";
-    case 0x0425:
-        return "Estonian";
-    case 0x0426:
-        return "Latvian";
-    case 0x0427:
-        return "Lithuanian";
-    case 0x0429:
-        return "Farsi";
-    case 0x042D:
-        return "Basque";
-    case 0x042F:
-        return "Macedonian";
-    case 0x0436:
-        return "Afrikaans";
-    case 0x043E:
-        return "Malaysian";  
-    default:
-        return "Unknown";
-  }
-}
-
-/*
- *  filetime_to_unixtime
- *
- *  Adapted from work in 'wv' by:
- *    Caolan McNamara (Caolan.McNamara@ul.ie)
- */
-#define HIGH32_DELTA 27111902
-#define MID16_DELTA  54590
-#define LOW16_DELTA  32768
-
-unsigned long filetime_to_unixtime (unsigned long low_time, unsigned long high_time) {
-  unsigned long low16;/* 16 bit, low    bits */
-  unsigned long mid16;/* 16 bit, medium bits */
-  unsigned long hi32;/* 32 bit, high   bits */
-  unsigned int carry;/* carry bit for subtraction */
-  int negative;/* whether a represents a negative value */
-
-/* Copy the time values to hi32/mid16/low16 */
-hi32  =  high_time;
-mid16 = low_time >> 16;
-low16 = low_time &  0xffff;
-
-/* Subtract the time difference */
-if (low16 >= LOW16_DELTA           )
-low16 -=             LOW16_DELTA        , carry = 0;
-else
-low16 += (1 << 16) - LOW16_DELTA        , carry = 1;
-
-if (mid16 >= MID16_DELTA    + carry)
-mid16 -=             MID16_DELTA + carry, carry = 0;
-else
-mid16 += (1 << 16) - MID16_DELTA - carry, carry = 1;
-
-hi32 -= HIGH32_DELTA + carry;
-
-/* If a is negative, replace a by (-1-a) */
-negative = (hi32 >= ((unsigned long)1) << 31);
-if (negative) {
-/* Set a to -a - 1 (a is hi32/mid16/low16) */
-low16 = 0xffff - low16;
-mid16 = 0xffff - mid16;
-hi32 = ~hi32;
-}
-
-/*
- *  Divide a by 10000000 (a = hi32/mid16/low16), put the rest into r.
-         * Split the divisor into 10000 * 1000 which are both less than 0xffff.
- */
-mid16 += (hi32 % 10000) << 16;
-hi32  /=       10000;
-low16 += (mid16 % 10000) << 16;
-mid16 /=       10000;
-low16 /=       10000;
-
-mid16 += (hi32 % 1000) << 16;
-hi32  /=       1000;
-low16 += (mid16 % 1000) << 16;
-mid16 /=       1000;
-low16 /=       1000;
-
-/* If a was negative, replace a by (-1-a) and r by (9999999 - r) */
-if (negative) {
-/* Set a to -a - 1 (a is hi32/mid16/low16) */
-low16 = 0xffff - low16;
-mid16 = 0xffff - mid16;
-hi32 = ~hi32;
-}
-
-/*  Do not replace this by << 32, it gives a compiler warning and
- *  it does not work
- */
-return ((((unsigned long)hi32) << 16) << 16) + (mid16 << 16) + low16;
-
-}
diff --git a/src/plugins/wordleaker/wordextractor.cc b/src/plugins/wordleaker/wordextractor.cc
@@ -0,0 +1,221 @@
+/*
+     This file is part of libextractor.
+     (C) 2006 Vidyut Samanta and Christian Grothoff
+
+     libextractor is free software; you can redistribute it and/or modify
+     it under the terms of the GNU General Public License as published
+     by the Free Software Foundation; either version 2, or (at your
+     option) any later version.
+
+     libextractor is distributed in the hope that it will be useful, but
+     WITHOUT ANY WARRANTY; without even the implied warranty of
+     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+     General Public License for more details.
+
+     You should have received a copy of the GNU General Public License
+     along with libextractor; see the file COPYING.  If not, write to the
+     Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+     Boston, MA 02111-1307, USA.
+
+     This code depends heavily on the wordleaker code and
+     a lot of code was borrowed from wordleaker.cpp. See also
+     the README file in this directory.
+ */
+
+#include "platform.h"
+#include "extractor.h"
+#include "../convert.h"
+#include <math.h>
+
+#include "wordleaker.h"
+#include "pole.h"
+
+extern "C" {
+
+  static struct EXTRACTOR_Keywords * addKeyword(EXTRACTOR_KeywordType type,
+						const char * keyword,
+						struct EXTRACTOR_Keywords * next) {
+    EXTRACTOR_KeywordList * result;
+
+    if (keyword == NULL)
+      return next;
+    result = (EXTRACTOR_KeywordList*) malloc(sizeof(EXTRACTOR_KeywordList));
+    result->next = next;
+    result->keyword = strdup(keyword);
+    result->keywordType = type;
+    return result;
+  }
+
+ 
+  // read the type of the property and displays its value
+  char * getProperty( POLE::Stream* stream ) {
+    unsigned long read, type;
+    unsigned char buffer[256];
+    unsigned char c;
+    unsigned long i;
+    unsigned int j;
+    unsigned long t, t1, t2;
+    char *s;
+    
+    read = stream->read(buffer, 4);
+    type = buffer[0] + (buffer[1] << 8) + (buffer[2] << 16) + (buffer[3] << 24);
+    
+    switch (type) {
+    case 2: // VT_I2
+      read = stream->read(buffer, 2);
+      i = buffer[0] + (buffer[1] << 8);
+      s = (char*) malloc(16);
+      snprintf(s, 16, "%u", i);
+      return s;
+    case 3: // VT_I4
+      read = stream->read(buffer, 4);
+      i = buffer[0] + (buffer[1] << 8) + (buffer[2] << 16) + (buffer[3] << 24);
+      s = (char*) malloc(16);
+      snprintf(s, 16, "%u", i);
+      return s;
+    case 11: // VT_BOOL
+      read = stream->read(buffer, 1);
+      if ((char) buffer[0] == -1)
+	return strdup("true");
+      return strdup("false");
+    case 30: // VT_LPSTR
+      read = stream->read(buffer, 4);
+      i = buffer[0] + (buffer[1] << 8) + (buffer[2] << 16) + (buffer[3] << 24);
+      if ( (i < 0) || (i > 16*1024*1024))
+	return NULL;
+      s = (char*) malloc(i+1);
+      s[i] = '\0';
+      j = 0;
+      while ( ((c = stream->getch()) != 0) && (i > j) )
+	s[j++] = c;
+      if (j != i) {
+	free(s);
+	return NULL;
+      }
+      return s;
+    case 64: // VT_FILETIME
+      read = stream->read(buffer, 8);
+      t1 = buffer[0] + (buffer[1] << 8) + (buffer[2] << 16) + (buffer[3] << 24);
+      t2 = buffer[4]  + (buffer[5] << 8) + (buffer[6] << 16) + (buffer[7] << 24);
+      t = filetime_to_unixtime(t1, t2);
+      return ctime_r((time_t *) &t, (char*)malloc(32));
+    }
+    return NULL;
+  }
+
+
+  struct EXTRACTOR_Keywords * libextractor_word_extract(const char * filename,
+							const char * data,
+							size_t size,
+							struct EXTRACTOR_Keywords * prev) {
+    char ver[16];
+    if (size < 512 + 898)
+      return prev;
+    const unsigned char * buffer = (const unsigned char*) &data[512];
+    unsigned int wIdent = buffer[0] + (buffer[1] << 8);
+    unsigned int nProduct = buffer[4] + (buffer[5] << 8);
+    unsigned int lid = buffer[6] + (buffer[7] << 8);
+    unsigned int envr = buffer[18];
+    unsigned int wMagicCreated = buffer[34] + (buffer[35] << 8);
+    unsigned int wMagicRevised = buffer[36] + (buffer[37] << 8);
+    unsigned long lProductCreated = buffer[68] + (buffer[69] << 8) + (buffer[70] << 16) + (buffer[71] << 24);
+    unsigned long lProductRevised = buffer[72] + (buffer[73] << 8) + (buffer[74] << 16) + (buffer[75] << 24);
+    unsigned long fcSttbSavedBy = buffer[722] + (buffer[723] << 8) + (buffer[724] << 16) + (buffer[725] << 24);
+    unsigned long lcbSttbSavedBy = buffer[726] + (buffer[727] << 8) + (buffer[728] << 16) + (buffer[729] << 24);
+    
+    snprintf(ver, 16, "%u", nProduct);
+    prev = addKeyword(EXTRACTOR_PRODUCTVERSION,
+		      ver,
+		      prev);
+    prev = addKeyword(EXTRACTOR_LANGUAGE,
+		      lidToLanguage(lid),
+		      prev);
+    
+    // cout << "Created by: " << idToProduct(wMagicCreated) << " (Build " << dateToString(lProductCreated) << ")" << endl;
+    // cout << "Revised by: " << idToProduct(wMagicRevised) << " (Build " << dateToString(lProductRevised) << ")" << endl;
+    
+    POLE::Storage* storage = new POLE::Storage( filename );
+    storage->open();
+    if( storage->result() != POLE::Storage::Ok )
+      return prev;
+    
+    POLE::Stream * stream = storage->stream( "SummaryInformation" );
+    if (stream) {
+      unsigned char buffer[256];
+      
+      // ClassID & Offset
+      stream->seek(28);
+      stream->read(buffer, 20);
+      // beginning of section
+      unsigned long begin = stream->tell();
+      // length of section
+      unsigned long read = stream->read(buffer, 4);
+      // number of properties
+      read = stream->read(buffer, 4);
+      unsigned int nproperties = buffer[0] + (buffer[1] << 8) + (buffer[2] << 16) + (buffer[3] << 24);
+      // properties
+      for (unsigned int i = 0; i < nproperties; i++) {
+	read = stream->read(buffer, 8);
+	unsigned int propertyID = buffer[0] + (buffer[1] << 8) + (buffer[2] << 16) + (buffer[3] << 24);
+	unsigned int offsetProp = buffer[4] + (buffer[5] << 8) + (buffer[6] << 16) + (buffer[7] << 24);
+	if (propertyID > 1 && propertyID < 20) {
+	  // cout << SummaryProperties[propertyID] << ": ";
+	  unsigned long offsetCur = stream->tell();
+	  stream->seek(offsetProp + begin);
+	  // read and show the property
+	  char * prop = getProperty(stream);  
+	  free(prop);
+	  stream->seek(offsetCur);
+	}
+      }
+    }
+    
+    unsigned int where = 0;
+    
+    // FIXME: should look if using 0Table or 1Table
+    stream = storage->stream( "1Table" );
+    if (stream) {
+      unsigned char * buffer = new unsigned char[lcbSttbSavedBy];
+      unsigned char buffer2[1024];
+      
+      // goto offset of revision
+      stream->seek(fcSttbSavedBy);
+      // read all the revision history
+      stream->read(buffer, lcbSttbSavedBy);
+      
+      // there are n strings, so n/2 revisions (author & file)
+      unsigned int nRev = (buffer[2] + (buffer[3] << 8)) / 2;
+      where = 6;
+      
+      for (unsigned int i=0; i < nRev; i++) {
+	// cout << "Rev #" << i << ": Author \"";
+	unsigned int length = buffer[where++];
+	// it's unicode, for now we only get the low byte
+	for (unsigned int j=0; j < length; j++) {
+	  where++;
+	  // cout << buffer[where];
+	  where++;
+	}
+	where++;
+	// cout << "\" worked on file \"";
+	length = buffer[where++];
+	// it's unicode, for now we only get the low byte
+	for (unsigned int j=0; j < length; j++) {
+	  where++;
+	  // cout << buffer[where];
+	  where++;
+	}
+	where++;
+	// cout << "\"" << endl;    
+      }
+      
+      delete buffer;
+    
+    }
+    delete storage;
+    
+    return prev;
+  }
+
+}
+
diff --git a/src/plugins/wordleaker/wordleaker.cpp b/src/plugins/wordleaker/wordleaker.cpp
@@ -0,0 +1,311 @@
+/* 
+   WordLeaker - Shows information about Word DOC files
+   Copyright (C) 2005 Sacha Fuentes <madelman@iname.com>
+
+   Based on poledump.c
+   Original idea from WordDumper (http://www.computerbytesman.com)
+   Info on Word format: http://www.aozw65.dsl.pipex.com/generator_wword8.htm
+   Info on Word format: http://jakarta.apache.org/poi/hpsf/internals.html
+   
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public
+   License as published by the Free Software Foundation; either
+   version 2 of the License, or (at your option) any later version.
+   
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this library; see the file COPYING.  If not, write to
+   the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, US
+*/
+
+// TAKE CARE: there's not a single check for validity of data,
+// so any malformed or malicious Word file will break it
+
+#include <iostream>
+#include <fstream>
+#include <stdlib.h>
+#include <list>
+#include <ctime>
+
+#include "pole.h"
+#include "WordLeaker.h"
+
+unsigned long fcSttbSavedBy;
+unsigned long lcbSttbSavedBy;
+  
+// read the type of the property and displays its value
+void showProperty( POLE::Stream* stream ) {
+  unsigned long read, type;
+  unsigned char buffer[256];
+  unsigned char c;
+  unsigned long i;
+  unsigned long t, t1, t2;
+  char *s;
+    
+  read = stream->read(buffer, 4);
+  type = buffer[0] + (buffer[1] << 8) + (buffer[2] << 16) + (buffer[3] << 24);
+    
+  switch (type) {
+      case 2: // VT_I2
+        read = stream->read(buffer, 2);
+        i = buffer[0] + (buffer[1] << 8);
+        cout << i << endl;
+        break;
+      case 3: // VT_I4
+        read = stream->read(buffer, 4);
+        i = buffer[0] + (buffer[1] << 8) + (buffer[2] << 16) + (buffer[3] << 24);
+        cout << i << endl;
+        break;
+      case 11: // VT_BOOL
+        read = stream->read(buffer, 1);
+        if ((char) buffer[0] == -1)
+            cout << "true" << endl;
+        else        
+            cout << "false" << endl;
+        break;
+      case 30: // VT_LPSTR
+        read = stream->read(buffer, 4);
+        i = buffer[0] + (buffer[1] << 8) + (buffer[2] << 16) + (buffer[3] << 24);
+        while ((c = stream->getch()) != 0)
+            cout << c;
+        cout << endl;
+        break;
+      case 64: // VT_FILETIME
+        read = stream->read(buffer, 8);
+        t1 = buffer[0] + (buffer[1] << 8) + (buffer[2] << 16) + (buffer[3] << 24);
+        t2 = buffer[4]  + (buffer[5] << 8) + (buffer[6] << 16) + (buffer[7] << 24);
+        t = filetime_to_unixtime(t1, t2);
+        s = ctime((time_t *) &t);
+        cout << s;
+        break;
+      default:
+          cout << "Unknown format " << type << endl;
+  }
+}
+
+// show the revision data (users and files)
+void dumpRevision( POLE::Storage* storage ) {
+  unsigned int nRev;
+  unsigned int where = 0;
+  POLE::Stream* stream;
+    
+  cout << "Revision:" << endl;
+  cout << "---------" << endl << endl;
+
+  // FIXME: should look if using 0Table or 1Table
+  stream = storage->stream( "1Table" );
+  if( !stream ) {
+      cout << "There's no revision information" << endl;
+      return;
+  }
+  
+  unsigned char * buffer = new unsigned char[lcbSttbSavedBy];
+  unsigned char buffer2[1024];
+  unsigned int length;
+  
+  // goto offset of revision
+  stream->seek(fcSttbSavedBy);
+  // read all the revision history
+  stream->read(buffer, lcbSttbSavedBy);
+
+  // there are n strings, so n/2 revisions (author & file)
+  nRev = (buffer[2] + (buffer[3] << 8)) / 2;
+  where = 6;
+  
+  for (unsigned int i=0; i < nRev; i++) {
+    cout << "Rev #" << i << ": Author \"";
+    length = buffer[where++];
+    // it's unicode, for now we only get the low byte
+    for (unsigned int j=0; j < length; j++) {
+        where++;
+        cout << buffer[where];
+        where++;
+    }
+    where++;
+    cout << "\" worked on file \"";
+    length = buffer[where++];
+    // it's unicode, for now we only get the low byte
+    for (unsigned int j=0; j < length; j++) {
+        where++;
+        cout << buffer[where];
+        where++;
+    }
+    where++;
+    cout << "\"" << endl;    
+  }
+  
+  cout << endl;      
+  delete buffer;
+  
+}
+
+// show data from DocumentSummary stream
+void dumpDocumentSummary( POLE::Storage* storage ) {
+  POLE::Stream* stream;
+  unsigned long read, nproperties, propertyID, offsetProp, offsetCur;
+  unsigned long begin;
+    
+  cout << "Document Summary:" << endl;
+  cout << "-----------------" << endl << endl;
+
+  stream = storage->stream( "DocumentSummaryInformation" );
+  if( !stream ) {
+      cout << "There's no document summary information" << endl;
+      return;
+  }
+  
+  unsigned char buffer[256];
+
+  // ClassID & Offset
+  stream->seek(28);
+  stream->read(buffer, 20);
+  // beginning of section
+  begin = stream->tell();
+  // length of section
+  read = stream->read(buffer, 4);
+  // number of properties
+  read = stream->read(buffer, 4);
+  nproperties = buffer[0] + (buffer[1] << 8) + (buffer[2] << 16) + (buffer[3] << 24);
+  // properties
+
+  for (unsigned long i = 0; i < nproperties; i++) {
+    read = stream->read(buffer, 8);
+    propertyID = buffer[0] + (buffer[1] << 8) + (buffer[2] << 16) + (buffer[3] << 24);
+    offsetProp = buffer[4] + (buffer[5] << 8) + (buffer[6] << 16) + (buffer[7] << 24);
+      if (propertyID > 1 && propertyID < 16) {
+        cout << DocumentSummaryProperties[propertyID] << ": ";
+        offsetCur = stream->tell();
+        stream->seek(offsetProp + begin);
+        // read and show the property
+        showProperty(stream);  
+        stream->seek(offsetCur);
+    }
+  }
+
+  cout << endl;      
+}
+
+// show data from Summary stream
+void dumpSummary( POLE::Storage* storage ) {
+  POLE::Stream* stream;
+  unsigned long read, nproperties, propertyID, offsetProp, offsetCur;
+  unsigned long begin;
+    
+  cout << "Summary:" << endl;
+  cout << "--------" << endl << endl;
+
+  stream = storage->stream( "SummaryInformation" );
+  if( !stream ) {
+      cout << "There's no summary information" << endl;
+      return;
+  }
+  
+  unsigned char buffer[256];
+
+  // ClassID & Offset
+  stream->seek(28);
+  stream->read(buffer, 20);
+  // beginning of section
+  begin = stream->tell();
+  // length of section
+  read = stream->read(buffer, 4);
+  // number of properties
+  read = stream->read(buffer, 4);
+  nproperties = buffer[0] + (buffer[1] << 8) + (buffer[2] << 16) + (buffer[3] << 24);
+  // properties
+  for (unsigned long i = 0; i < nproperties; i++) {
+    read = stream->read(buffer, 8);
+    propertyID = buffer[0] + (buffer[1] << 8) + (buffer[2] << 16) + (buffer[3] << 24);
+    offsetProp = buffer[4] + (buffer[5] << 8) + (buffer[6] << 16) + (buffer[7] << 24);
+    if (propertyID > 1 && propertyID < 20) {
+        cout << SummaryProperties[propertyID] << ": ";
+        offsetCur = stream->tell();
+        stream->seek(offsetProp + begin);
+        // read and show the property
+        showProperty(stream);  
+        stream->seek(offsetCur);
+    }
+  }
+
+  cout << endl;      
+}
+
+// reads the header of the file
+bool readFIB( char* filename ) {
+  fstream file;
+    
+   file.open( filename, std::ios::binary | std::ios::in );
+  if( !file.good() ) {
+    cout << "Can't find the file" << endl;
+    return false;
+  }
+  
+  unsigned char * buffer = new unsigned char[898];
+  file.seekg( 512 ); 
+  file.read( (char*)buffer, 898 );
+  file.close();
+  
+  unsigned int wIdent = buffer[0] + (buffer[1] << 8);
+  unsigned int nProduct = buffer[4] + (buffer[5] << 8);
+  unsigned int lid = buffer[6] + (buffer[7] << 8);
+  unsigned int envr = buffer[18];
+  unsigned int wMagicCreated = buffer[34] + (buffer[35] << 8);
+  unsigned int wMagicRevised = buffer[36] + (buffer[37] << 8);
+  unsigned long lProductCreated = buffer[68] + (buffer[69] << 8) + (buffer[70] << 16) + (buffer[71] << 24);
+  unsigned long lProductRevised = buffer[72] + (buffer[73] << 8) + (buffer[74] << 16) + (buffer[75] << 24);
+  fcSttbSavedBy = buffer[722] + (buffer[723] << 8) + (buffer[724] << 16) + (buffer[725] << 24);
+  lcbSttbSavedBy = buffer[726] + (buffer[727] << 8) + (buffer[728] << 16) + (buffer[729] << 24);
+  delete[] buffer; 
+  
+  cout << "File: " << filename << endl;
+  cout << "Product version: " << nProduct << endl;  
+  cout << "Language: " << lidToLanguage(lid) << endl;
+  cout << "Created by: " << idToProduct(wMagicCreated) << " (Build " << dateToString(lProductCreated) << ")" << endl;
+  cout << "Revised by: " << idToProduct(wMagicRevised) << " (Build " << dateToString(lProductRevised) << ")" << endl;
+  cout << endl;
+  
+  return true; 
+    
+}
+
+#if HAVE_MAIN
+int main(int argc, char *argv[]) {
+  cout << endl << "WordLeaker v.0.1" << endl;
+  cout << " by Madelman (http://elligre.tk/madelman/)" << endl << endl;
+  
+    
+  if( argc < 2 ) {
+    cout << "  You must supply a filename" << endl << endl;
+    return 0;
+  }
+  
+  char* filename = argv[1];
+
+  if ( !readFIB(filename) )
+      return 1;
+  
+  POLE::Storage* storage = new POLE::Storage( filename );
+  storage->open();
+  if( storage->result() != POLE::Storage::Ok ) {
+    cout << "The file " << filename << " is not a Word document" << endl;
+    return 1;
+  }
+  
+  dumpSummary( storage );
+  // FIXME: doesn't always work
+  // but there's nothing really interesting in here
+  //dumpDocumentSummary( storage );
+  dumpRevision( storage );
+  // TODO: we don't show the GUID
+  // TODO: we don't show the macros
+  
+  delete storage;
+  
+  return 0;
+}
+#endif
diff --git a/src/plugins/wordleaker/wordleaker.h b/src/plugins/wordleaker/wordleaker.h
@@ -0,0 +1,287 @@
+/* 
+   WordLeaker - Shows information about Word DOC files
+   Copyright (C) 2005 Sacha Fuentes <madelman@iname.com>
+
+   Based on poledump.c
+   Original idea from WordDumper (http://www.computerbytesman.com)
+   Info on Word format: http://www.aozw65.dsl.pipex.com/generator_wword8.htm
+   Info on Word format: http://jakarta.apache.org/poi/hpsf/internals.html
+   
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public
+   License as published by the Free Software Foundation; either
+   version 2 of the License, or (at your option) any later version.
+   
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this library; see the file COPYING.  If not, write to
+   the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, US
+*/
+
+#include <string>
+
+using namespace std;
+
+static char* SummaryProperties[] = {
+"Unknown", 
+"Unknown",
+"Title",
+"Subject",
+"Author",
+"Keywords",
+"Comments",
+"Template",
+"Last Saved By",
+"Revision Number",
+"Total Editing Time",
+"Last Printed",
+"Create Time/Date",
+"Last Saved Time/Date",
+"Number of Pages",
+"Number of Words",
+"Number of Characters",
+"Thumbnails",
+"Creating Application",
+"Security"
+};
+
+static char* DocumentSummaryProperties[] = {
+"Dictionary",
+"Code page",
+"Category",
+"PresentationTarget",
+"Bytes",
+"Lines",
+"Paragraphs",
+"Slides",
+"Notes",
+"HiddenSlides",
+"MMClips",
+"ScaleCrop",
+"HeadingPairs",
+"TitlesofParts",
+"Manager",
+"Company",
+"LinksUpTo"
+};
+
+string dateToString( unsigned long date ) {
+  char f[9];
+  sprintf(f, "%d/%d/%d", (date / 10000 % 100), (date / 100 % 100), (date % 100));
+  return f;
+}
+
+string idToProduct( unsigned int id ) {
+  // TODO: find the rest of ids
+  switch ( id ) {
+    case  0x6A62:
+        return "Word 97";
+    case 0x626A:
+        return "Word 98 (Mac)";
+    default:
+        return "Unknown";
+  }      
+}
+
+const char * lidToLanguage( unsigned int lid ) {
+  switch ( lid ) {
+    case 0x0400: 
+        return "No Proofing";
+    case 0x0401: 
+        return "Arabic";
+    case 0x0402:
+        return "Bulgarian";
+    case 0x0403:
+        return "Catalan";
+    case 0x0404:
+        return "Traditional Chinese";
+    case 0x0804:
+        return "Simplified Chinese";
+    case 0x0405:
+        return "Czech";
+    case 0x0406:
+        return "Danish";
+    case 0x0407:
+        return "German";
+    case 0x0807:
+        return "Swiss German";
+    case 0x0408:
+        return "Greek";
+    case 0x0409:
+        return "U.S. English";
+    case 0x0809:
+        return "U.K. English";
+    case 0x0c09:
+        return "Australian English";
+    case 0x040a:
+        return "Castilian Spanish";
+    case 0x080a:
+        return "Mexican Spanish";
+    case 0x040b:
+        return "Finnish";
+    case 0x040c:
+        return "French";
+    case 0x080c:
+        return "Belgian French";
+    case 0x0c0c:
+        return "Canadian French";
+    case 0x100c:
+        return "Swiss French";
+    case 0x040d:
+        return "Hebrew";
+    case 0x040e:
+        return "Hungarian";
+    case 0x040f:
+        return "Icelandic";
+    case 0x0410:
+        return "Italian";
+    case 0x0810:
+        return "Swiss Italian";
+    case 0x0411:
+        return "Japanese";
+    case 0x0412:
+        return "Korean";
+    case 0x0413:
+        return "Dutch";
+    case 0x0813:
+        return "Belgian Dutch";
+    case 0x0414:
+        return "Norwegian - Bokmal";
+    case 0x0814:
+        return "Norwegian - Nynorsk";
+    case 0x0415:
+        return "Polish";
+    case 0x0416:
+        return "Brazilian Portuguese";
+    case 0x0816:
+        return "Portuguese";
+    case 0x0417:
+        return "Rhaeto-Romanic";
+    case 0x0418:
+        return "Romanian";
+    case 0x0419:
+        return "Russian";
+    case 0x041a:
+        return "Croato-Serbian (Latin)";
+    case 0x081a:
+        return "Serbo-Croatian (Cyrillic)";
+    case 0x041b:
+        return "Slovak";
+    case 0x041c:
+        return "Albanian";
+    case 0x041d:
+        return "Swedish";
+    case 0x041e:
+        return "Thai";
+    case 0x041f:
+        return "Turkish";
+    case 0x0420:
+        return "Urdu";
+    case 0x0421:
+        return "Bahasa"; 
+    case 0x0422:
+        return "Ukrainian";
+    case 0x0423:
+        return "Byelorussian";
+    case 0x0424:
+        return "Slovenian";
+    case 0x0425:
+        return "Estonian";
+    case 0x0426:
+        return "Latvian";
+    case 0x0427:
+        return "Lithuanian";
+    case 0x0429:
+        return "Farsi";
+    case 0x042D:
+        return "Basque";
+    case 0x042F:
+        return "Macedonian";
+    case 0x0436:
+        return "Afrikaans";
+    case 0x043E:
+        return "Malaysian";  
+    default:
+        return "Unknown";
+  }
+}
+
+/*
+ *  filetime_to_unixtime
+ *
+ *  Adapted from work in 'wv' by:
+ *    Caolan McNamara (Caolan.McNamara@ul.ie)
+ */
+#define HIGH32_DELTA 27111902
+#define MID16_DELTA  54590
+#define LOW16_DELTA  32768
+
+unsigned long filetime_to_unixtime (unsigned long low_time, unsigned long high_time) {
+  unsigned long low16;/* 16 bit, low    bits */
+  unsigned long mid16;/* 16 bit, medium bits */
+  unsigned long hi32;/* 32 bit, high   bits */
+  unsigned int carry;/* carry bit for subtraction */
+  int negative;/* whether a represents a negative value */
+
+/* Copy the time values to hi32/mid16/low16 */
+hi32  =  high_time;
+mid16 = low_time >> 16;
+low16 = low_time &  0xffff;
+
+/* Subtract the time difference */
+if (low16 >= LOW16_DELTA           )
+low16 -=             LOW16_DELTA        , carry = 0;
+else
+low16 += (1 << 16) - LOW16_DELTA        , carry = 1;
+
+if (mid16 >= MID16_DELTA    + carry)
+mid16 -=             MID16_DELTA + carry, carry = 0;
+else
+mid16 += (1 << 16) - MID16_DELTA - carry, carry = 1;
+
+hi32 -= HIGH32_DELTA + carry;
+
+/* If a is negative, replace a by (-1-a) */
+negative = (hi32 >= ((unsigned long)1) << 31);
+if (negative) {
+/* Set a to -a - 1 (a is hi32/mid16/low16) */
+low16 = 0xffff - low16;
+mid16 = 0xffff - mid16;
+hi32 = ~hi32;
+}
+
+/*
+ *  Divide a by 10000000 (a = hi32/mid16/low16), put the rest into r.
+         * Split the divisor into 10000 * 1000 which are both less than 0xffff.
+ */
+mid16 += (hi32 % 10000) << 16;
+hi32  /=       10000;
+low16 += (mid16 % 10000) << 16;
+mid16 /=       10000;
+low16 /=       10000;
+
+mid16 += (hi32 % 1000) << 16;
+hi32  /=       1000;
+low16 += (mid16 % 1000) << 16;
+mid16 /=       1000;
+low16 /=       1000;
+
+/* If a was negative, replace a by (-1-a) and r by (9999999 - r) */
+if (negative) {
+/* Set a to -a - 1 (a is hi32/mid16/low16) */
+low16 = 0xffff - low16;
+mid16 = 0xffff - mid16;
+hi32 = ~hi32;
+}
+
+/*  Do not replace this by << 32, it gives a compiler warning and
+ *  it does not work
+ */
+return ((((unsigned long)hi32) << 16) << 16) + (mid16 << 16) + low16;
+
+}

	libextractor GNU libextractor
	Log \| Files \| Refs \| Submodules \| README \| LICENSE

M	configure.ac	\|	1	+
M	src/include/extractor.h	\|	2	++
M	src/main/extractor.c	\|	3	++-
A	src/plugins/wordleaker/Makefile.am	\|	25	+++++++++++++++++++++++++
A	src/plugins/wordleaker/SYMBOLS	\|	1	+
D	src/plugins/wordleaker/WordLeaker.cpp	\|	310	-------------------------------------------------------------------------------
D	src/plugins/wordleaker/WordLeaker.h	\|	287	-------------------------------------------------------------------------------
A	src/plugins/wordleaker/wordextractor.cc	\|	221	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	src/plugins/wordleaker/wordleaker.cpp	\|	311	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	src/plugins/wordleaker/wordleaker.h	\|	287	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++