integrating wordleaker into ole2 plugin, switching to libgsf - libextractor

commit 7406fa16dc06ff094b9d2535999d6e695b3c80b3
parent d72714d2581e1d0c54b3bb15b46aa9bd4784768d
Author: Christian Grothoff <christian@grothoff.org>
Date:   Sat, 29 Apr 2006 04:49:06 +0000

integrating wordleaker into ole2 plugin, switching to libgsf

Diffstat:
M ChangeLog  | 4 ++++
M configure.ac  | 5 +++--
A m4/abi-gsf.m4  | 78 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M src/include/extractor.h  | 8 ++++++++
M src/main/extractor.c  | 53 ++++++++++++++++++++++++++++++-----------------------
M src/plugins/Makefile.am  | 7 +++++--
M src/plugins/hash/rmd160extractor.c  | 9 +++++----
M src/plugins/ole2/Makefile.am  | 7 +++----
R src/plugins/wordleaker/README -> src/plugins/ole2/README  | 0 
M src/plugins/ole2/ole2extractor.c  | 2401 ++++++++++++-------------------------------------------------------------------
D src/plugins/wordleaker/Makefile.am  | 25 -------------------------
D src/plugins/wordleaker/SYMBOLS  | 1 -
D src/plugins/wordleaker/pole.cpp  | 1271 -------------------------------------------------------------------------------
D src/plugins/wordleaker/pole.h  | 149 -------------------------------------------------------------------------------
D src/plugins/wordleaker/wordextractor.cc  | 486 -------------------------------------------------------------------------------
D src/plugins/wordleaker/wordleaker.cpp  | 308 -------------------------------------------------------------------------------
D src/plugins/wordleaker/wordleaker.h  | 124 -------------------------------------------------------------------------------

17 files changed, 494 insertions(+), 4442 deletions(-)
diff --git a/ChangeLog b/ChangeLog
@@ -1,3 +1,7 @@
+Fri Apr 28 22:26:43 PDT 2006
+	Integrated wordleaker into OLE2 plugin.  Changed OLE2 plugin to use
+	libgsf (new dependency!).
+
 Fri Apr 28 16:18:26 PDT 2006
 	Fixing some i18n issues.  Specifically, EXTRACTOR_getKeywordTypeAsString
 	will now never return the translated version of the keyword type
diff --git a/configure.ac b/configure.ac
@@ -313,6 +313,7 @@ AC_ARG_ENABLE(exiv2,
 AM_CONDITIONAL(HAVE_EXIV2, test x$exiv2 != x0)
 AC_DEFINE_UNQUOTED([HAVE_EXIV2], $exiv2, [We use EXIV2])
 
+ABI_GSF
 
 AC_SUBST(CPPFLAGS)
 AC_SUBST(LDFLAGS)
@@ -358,9 +359,9 @@ else
  AC_MSG_NOTICE([NOTICE: printable plugins enabled])
 fi
 
-if test "x$without_glib" = "xtrue"
+if test "x$have_gsf" != "xtrue"
 then
- AC_MSG_NOTICE([NOTICE: glib not used, no OLE2 (MS Office) support])
+ AC_MSG_NOTICE([NOTICE: libgsf not found, no OLE2 (MS Office) support])
 fi
 
 if test "x$without_gtk" = "xtrue"
diff --git a/m4/abi-gsf.m4 b/m4/abi-gsf.m4
@@ -0,0 +1,78 @@
+# start: abi/ac-helpers/abi-gsf.m4
+# 
+# Copyright (C) 2005 Christian Neumair
+# 
+# This file is free software; you may copy and/or distribute it with
+# or without modifications, as long as this notice is preserved.
+# This software is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY, to the extent permitted by law; without even
+# the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+# PURPOSE.
+#
+# The above license applies to THIS FILE ONLY, the GNUnet code
+# itself may be copied and distributed under the terms of the GNU
+# GPL, see COPYING for more details
+#
+# Usage: ABI_GSF
+
+# Check for gsf
+
+AC_DEFUN([ABI_GSF], [
+
+test_gsf=true
+have_gsf=false
+
+test_gsf_gnome=true
+have_gsf_gnome=false
+
+AC_ARG_ENABLE(gsf,[  --disable-gsf Turn off gsf], [
+	if test "x$enableval" = "xno"; then
+		test_gsf=false
+	fi
+])
+
+AC_ARG_ENABLE(gsf-gnome,[  --disable-gnome Turn off gsf-gnome], [
+	if test "x$enableval" = "xno"; then
+		test_gsf_gnome=false
+	fi
+])
+
+if test "x$test_gsf" = "xtrue" ; then
+	PKG_CHECK_MODULES(GSF,[libgsf-1 >= 1.10], [
+		have_gsf=true
+		GSF_CFLAGS="$GSF_CFLAGS -DHAVE_GSF"
+	],
+	[
+		have_gsf=false
+	])
+fi
+
+if test "x$have_gsf" = "xtrue" -a "x$test_gsf_gnome" = "xtrue" ; then
+	PKG_CHECK_MODULES(GSF_GNOME, [libgsf-gnome-1 >= 1.10], [
+		have_gsf_gnome=true
+		GSF_GNOME_CFLAGS="$GSF_GNOME_CFLAGS -DHAVE_GSF_GNOME"
+	],
+	[
+		have_gsf_gnome=false
+	])
+fi
+
+AC_SUBST(GSF_CFLAGS)
+AC_SUBST(GSF_LIBS)
+
+AC_SUBST(GSF_GNOME_CFLAGS)
+AC_SUBST(GSF_GNOME_LIBS)
+
+AM_CONDITIONAL(WITH_GSF, test "x$have_gsf" = "xtrue")
+AM_CONDITIONAL(WITH_GSF_GNOME, test "x$have_gsf_gnome" = "xtrue")
+
+if test "x$have_gsf_gnome" = "xtrue" ; then
+	abi_gsf_message="yes, with GNOME support"
+else if test "x$have_gsf" = "xtrue" ; then
+	abi_gsf_message="yes, without GNOME support"
+else
+	abi_gsf_message="no"
+fi
+fi
+
+])
diff --git a/src/include/extractor.h b/src/include/extractor.h
@@ -152,6 +152,14 @@ typedef enum {
   EXTRACTOR_MODIFIED_BY_SOFTWARE = 99,
   EXTRACTOR_REVISION_HISTORY = 100,
   EXTRACTOR_LOWERCASE = 101,
+  EXTRACTOR_COMPANY = 102,
+  EXTRACTOR_GENERATOR = 103,
+  EXTRACTOR_CHARACTER_SET = 104,
+  EXTRACTOR_LINE_COUNT = 105,
+  EXTRACTOR_PARAGRAPH_COUNT = 106,
+  EXTRACTOR_EDITING_CYCLES = 107,
+  EXTRACTOR_SCALE = 108,
+  EXTRACTOR_MANAGER = 109,
 } EXTRACTOR_KeywordType;
 
 /**
diff --git a/src/main/extractor.c b/src/main/extractor.c
@@ -41,113 +41,121 @@
  * The sources of keywords as strings.
  */
 static const char *keywordTypes[] = {
-  gettext_noop("unknown"),
+  gettext_noop("unknown"), /* 0 */
   gettext_noop("filename"),
   gettext_noop("mimetype"),
   gettext_noop("title"),
   gettext_noop("author"),
-  gettext_noop("artist"),
+  gettext_noop("artist"), /* 5 */
   gettext_noop("description"),
   gettext_noop("comment"),
   gettext_noop("date"),
   gettext_noop("publisher"),
-  gettext_noop("language"),
+  gettext_noop("language"), /* 10 */
   gettext_noop("album"),
   gettext_noop("genre"),
   gettext_noop("location"),
   gettext_noop("version"),
-  gettext_noop("organization"),
+  gettext_noop("organization"), /* 15 */
   gettext_noop("copyright"),
   gettext_noop("subject"),
   gettext_noop("keywords"),
   gettext_noop("contributor"),
-  gettext_noop("resource-type"),
+  gettext_noop("resource-type"), /* 20 */
   gettext_noop("format"),
   gettext_noop("resource-identifier"),
   gettext_noop("source"),
   gettext_noop("relation"),
-  gettext_noop("coverage"),
+  gettext_noop("coverage"), /* 25 */
   gettext_noop("software"),
   gettext_noop("disclaimer"),
   gettext_noop("warning"),
   gettext_noop("translated"),
-  gettext_noop("creation date"),
+  gettext_noop("creation date"), /* 30 */
   gettext_noop("modification date"),
   gettext_noop("creator"),
   gettext_noop("producer"),
   gettext_noop("page count"),
-  gettext_noop("page orientation"),
+  gettext_noop("page orientation"), /* 35 */
   gettext_noop("paper size"),
   gettext_noop("used fonts"),
   gettext_noop("page order"),
   gettext_noop("created for"),
-  gettext_noop("magnification"),
+  gettext_noop("magnification"), /* 40 */
   gettext_noop("release"),
   gettext_noop("group"),
   gettext_noop("size"),
   gettext_noop("summary"),
-  gettext_noop("packager"),
+  gettext_noop("packager"), /* 45 */
   gettext_noop("vendor"),
   gettext_noop("license"),
   gettext_noop("distribution"),
   gettext_noop("build-host"),
-  gettext_noop("os"),
+  gettext_noop("operating system"), /* 50 */
   gettext_noop("dependency"),
   gettext_noop("MD4"),
   gettext_noop("MD5"),
   gettext_noop("SHA-0"),
-  gettext_noop("SHA-1"),
+  gettext_noop("SHA-1"), /* 55 */
   gettext_noop("RipeMD160"),
   gettext_noop("resolution"),
   gettext_noop("category"),
   gettext_noop("book title"),
-  gettext_noop("priority"),
+  gettext_noop("priority"), /* 60 */
   gettext_noop("conflicts"),
   gettext_noop("replaces"),
   gettext_noop("provides"),
   gettext_noop("conductor"),
-  gettext_noop("interpreter"),
+  gettext_noop("interpreter"), /* 65 */
   gettext_noop("owner"),
   gettext_noop("lyrics"),
   gettext_noop("media type"),
   gettext_noop("contact"),
-  gettext_noop("binary thumbnail data"),
+  gettext_noop("binary thumbnail data"), /* 70 */
   gettext_noop("publication date"),
   gettext_noop("camera make"),
   gettext_noop("camera model"),
   gettext_noop("exposure"),
-  gettext_noop("aperture"),
+  gettext_noop("aperture"), /* 75 */
   gettext_noop("exposure bias"),
   gettext_noop("flash"),
   gettext_noop("flash bias"),
   gettext_noop("focal length"),
-  gettext_noop("focal length (35mm equivalent)"),
+  gettext_noop("focal length (35mm equivalent)"), /* 80 */
   gettext_noop("iso speed"),
   gettext_noop("exposure mode"),
   gettext_noop("metering mode"),
   gettext_noop("macro mode"),
-  gettext_noop("image quality"),
+  gettext_noop("image quality"), /* 85 */
   gettext_noop("white balance"),
   gettext_noop("orientation"),
   gettext_noop("template"),
   gettext_noop("split"),
-  gettext_noop("product version"),
+  gettext_noop("product version"), /* 90 */
   gettext_noop("last saved by"),
   gettext_noop("last printed"),
   gettext_noop("word count"),
   gettext_noop("character count"),
-  gettext_noop("total editing time"),
+  gettext_noop("total editing time"), /* 95 */
   gettext_noop("thumbnails"),
   gettext_noop("security"),
   gettext_noop("created by software"),
   gettext_noop("modified by software"),
-  gettext_noop("revision history"),
+  gettext_noop("revision history"), /* 100 */
   gettext_noop("lower case conversion"),
+  gettext_noop("company"),
+  gettext_noop("generator"),
+  gettext_noop("character set"),
+  gettext_noop("line count"), /* 105 */
+  gettext_noop("paragraph count"), 
+  gettext_noop("editing cycles"),
+  gettext_noop("scale"),
+  gettext_noop("manager"),
   NULL,
 };
 
 /* the number of keyword types (for bounds-checking) */
-#define HIGHEST_TYPE_NUMBER 102
+#define HIGHEST_TYPE_NUMBER 110
 
 #ifdef HAVE_LIBOGG
 #if HAVE_VORBIS
@@ -211,7 +219,6 @@ libextractor_riff:\
 libextractor_mpeg:\
 libextractor_elf:\
 libextractor_oo:\
-libextractor_word:\
 libextractor_asf"
 
 #define DEFAULT_LIBRARIES EXSO OLESO OGGSO QTSO DEFSO
diff --git a/src/plugins/Makefile.am b/src/plugins/Makefile.am
@@ -1,7 +1,9 @@
 include Makefile-plugins.am
 
 if HAVE_GLIB
-oledir=ole2
+if WITH_GSF
+ oledir=ole2
+endif
 if HAVE_GTK
 thumbdir=thumbnail
 endif
@@ -15,6 +17,7 @@ if HAVE_EXIV2
  exiv2dir=exiv2
 endif
 
+
 if HAVE_XPDF
  xpdfdir=pdf
 else
@@ -23,7 +26,7 @@ endif
 
 # toggle for development
 # SUBDIRS = . 
-SUBDIRS = . $(oodir) $(printdir) hash $(oledir) rpm $(xpdfdir) $(thumbdir) $(exiv2dir) wordleaker
+SUBDIRS = . $(oodir) $(printdir) hash $(oledir) rpm $(xpdfdir) $(thumbdir) $(exiv2dir)
 
 
 if HAVE_VORBISFILE
diff --git a/src/plugins/hash/rmd160extractor.c b/src/plugins/hash/rmd160extractor.c
@@ -619,10 +619,11 @@ static struct EXTRACTOR_Keywords * addKeyword(EXTRACTOR_KeywordList *oldhead,
 #define rmd160_new() rmd160_copy(NULL,NULL)
 
 
-struct EXTRACTOR_Keywords * libextractor_hash_rmd160_extract(const char * filename,
-							     char * data,
-							     size_t size,
-							     struct EXTRACTOR_Keywords * prev) {
+struct EXTRACTOR_Keywords * 
+libextractor_hash_rmd160_extract(const char * filename,
+				 const unsigned char * data,
+				 size_t size,
+				 struct EXTRACTOR_Keywords * prev) {
   unsigned char bin_buffer[MAX_DIGEST_BIN_BYTES];
   char hash[8 * MAX_DIGEST_BIN_BYTES];
   char buf[16];
diff --git a/src/plugins/ole2/Makefile.am b/src/plugins/ole2/Makefile.am
@@ -4,12 +4,11 @@ include ../Makefile-plugins.am
 plugin_LTLIBRARIES = \
   libextractor_ole2.la
 
-AM_CFLAGS = $(GLIB_CFLAGS)
-
 libextractor_ole2_la_CFLAGS = \
-  $(GLIB_CFLAGS) 
+  $(GSF_CFLAGS) 
 libextractor_ole2_la_LIBADD = \
-  $(LIBADD) $(GLIB_LIBS) -lgobject-2.0 \
+  $(LIBADD) $(GSF_LIBS) \
+  $(top_builddir)/src/plugins/libconvert.la \
   $(top_builddir)/src/main/libextractor.la
 libextractor_ole2_la_LDFLAGS = \
   $(PLUGINFLAGS) $(retaincommand) 
diff --git a/src/plugins/wordleaker/README b/src/plugins/ole2/README
diff --git a/src/plugins/ole2/ole2extractor.c b/src/plugins/ole2/ole2extractor.c
@@ -1,6 +1,6 @@
 /*
      This file is part of libextractor.
-     (C) 2004,2005 Vidyut Samanta and Christian Grothoff
+     (C) 2004, 2005, 2006 Vidyut Samanta and Christian Grothoff
 
      libextractor is free software; you can redistribute it and/or modify
      it under the terms of the GNU General Public License as published
@@ -17,1217 +17,30 @@
      Free Software Foundation, Inc., 59 Temple Place - Suite 330,
      Boston, MA 02111-1307, USA.
 
-     Most of the code in this directory comes from
-     libgsf 1.10.1 (Licensed under GPL/LGPL).
-
-     libgsf -- The G Structured File Library
+     This code makes extensive use of libgsf 
+     -- the Gnome Structured File Library
      Copyright (C) 2002-2004 Jody Goldberg (jody@gnome.org)
 
+     Part of this code was borrowed from wordleaker.cpp. See also
+     the README file in this directory.
 */
 
 #include "platform.h"
 #include "extractor.h"
+#include "../convert.h"
+
 #include <glib-object.h>
 #include <string.h>
 #include <stdio.h>
 #include <ctype.h>
 
-#define DEBUG_OLE2 0
-
-#if DEBUG_OLE2
-#define d(code)	do { code } while (0)
-#define warning printf
-#else
-#define d(code)
- static void warning(const char * format, ...) {}
-#endif
-
-#undef g_return_val_if_fail
-#define g_return_val_if_fail(a,b) if (! (a)) return (b);
- 
-/* *********************** formerly gsf-input.c ************* */
-
-typedef struct GsfInput {
-	off_t size;
-	off_t cur_offset;
-	char * name;
-	const unsigned char * buf;
-	int needs_free;
-} GsfInput;
-
-
-static void
-gsf_input_init (GsfInput * input)
-{
-	input->size = 0;
-	input->cur_offset = 0;
-	input->name = NULL;
-	input->buf = NULL;
-}
-
-/**
- * gsf_input_memory_new:
- * @buf: The input bytes
- * @length: The length of @buf
- * @needs_free: Whether you want this memory to be free'd at object destruction
- *
- * Returns: A new #GsfInputMemory
- */
-static GsfInput *
-gsf_input_new (const unsigned char * buf,
-	       off_t length,
-	       int needs_free)
-{
-	GsfInput *mem = malloc(sizeof(GsfInput));
-	if (mem == NULL)
-		return NULL;
-	gsf_input_init(mem);
-	mem->buf = buf;
-	mem->size = length;
-	mem->needs_free = needs_free;
-	return mem;
-}
-
-static void
-gsf_input_finalize (GsfInput * input)
-{
-	if (input->name != NULL) {
-		free (input->name);
-		input->name = NULL;
-	}
-	if ( (input->buf) && input->needs_free)
-		free((void*) input->buf);
-	free(input);
-}
-
-/**
- * gsf_input_set_name :
- * @input :
- * @name :
- *
- * protected.
- *
- * Returns : TRUE if the assignment was ok.
- **/
-static int
-gsf_input_set_name (GsfInput *input, char const *name)
-{
-	char *buf;
-
-	g_return_val_if_fail (input != NULL, 0);
-
-	buf = strdup (name);
-	if (input->name != NULL)
-		free (input->name);
-	input->name = buf;
-	return 1;
-}
-
-
-
-static GsfInput *
-gsf_input_dup (GsfInput *src)
-{
-	GsfInput * dst = malloc(sizeof(GsfInput));
-	if (dst == NULL)
-		return NULL;
-        gsf_input_init(dst);
-	dst->buf = src->buf;
-	dst->needs_free = 0;
-	dst->size = src->size;
-	if (src->name != NULL)
-		gsf_input_set_name (dst, src->name);
-	dst->cur_offset = src->cur_offset;
-	return dst;
-}
-
-static const unsigned char *
-gsf_input_read (GsfInput * mem, size_t num_bytes, unsigned char * optional_buffer)
-{
-	const unsigned char *src = mem->buf;
-	if (src == NULL)
-		return NULL;
-	if (optional_buffer) {
-		memcpy (optional_buffer, src + mem->cur_offset, num_bytes);
-		mem->cur_offset += num_bytes;
-
-		return optional_buffer;
-	} else {
-		const unsigned char * ret = src + mem->cur_offset;
-		mem->cur_offset += num_bytes;
-		return ret;
-	}
-}
-
-/**
- * gsf_input_size :
- * @input : The input
- *
- * Looks up and caches the number of bytes in the input
- *
- * Returns :  the size or -1 on error
- **/
-static off_t
-gsf_input_size (GsfInput *input)
-{
-	g_return_val_if_fail (input != NULL, -1);
-	return input->size;
-}
-
-/**
- * gsf_input_seek :
- * @input :
- * @offset :
- * @whence :
- *
- * Returns TRUE on error.
- **/
-static int
-gsf_input_seek (GsfInput *input, off_t offset, int whence)
-{
-	off_t pos = offset;
-
-	g_return_val_if_fail (input != NULL, 1);
-
-	switch (whence) {
-	case SEEK_SET : break;
-	case SEEK_CUR : pos += input->cur_offset;	break;
-	case SEEK_END : pos += input->size;		break;
-	default : return 1;
-	}
-
-	if (pos < 0 || pos > input->size)
-		return 1;
-
-	/*
-	 * If we go nowhere, just return.  This in particular handles null
-	 * seeks for streams with no seek method.
-	 */
-	if (pos == input->cur_offset)
-		return 0;
-
-	input->cur_offset = pos;
-	return 0;
-}
-
-
-
-
-/* ******************** formerly gsf-utils.c **************** */
-
-
-/* Do this the ugly way so that we don't have to worry about alignment */
-#define GSF_LE_GET_GUINT8(p) (*(guint8 const *)(p))
-#define GSF_LE_GET_GUINT16(p)				\
-	(guint16)((((guint8 const *)(p))[0] << 0)  |	\
-		  (((guint8 const *)(p))[1] << 8))
-#define GSF_LE_GET_GUINT32(p)				\
-	(guint32)((((guint8 const *)(p))[0] << 0)  |	\
-		  (((guint8 const *)(p))[1] << 8)  |	\
-		  (((guint8 const *)(p))[2] << 16) |	\
-		  (((guint8 const *)(p))[3] << 24))
-
-#define GSF_LE_GET_GUINT64(p) (gsf_le_get_guint64 (p))
-#define GSF_LE_GET_GINT64(p) ((gint64)GSF_LE_GET_GUINT64(p))
-#define GSF_LE_GET_GINT8(p) ((gint8)GSF_LE_GET_GUINT8(p))
-#define GSF_LE_GET_GINT16(p) ((gint16)GSF_LE_GET_GUINT16(p))
-#define GSF_LE_GET_GINT32(p) ((gint32)GSF_LE_GET_GUINT32(p))
-#define GSF_LE_GET_FLOAT(p) (gsf_le_get_float (p))
-#define GSF_LE_GET_DOUBLE(p) (gsf_le_get_double (p))
-#define GSF_LE_SET_GUINT8(p, dat)			\
-	(*((guint8 *)(p))      = ((dat)        & 0xff))
-#define GSF_LE_SET_GUINT16(p, dat)			\
-	((*((guint8 *)(p) + 0) = ((dat)        & 0xff)),\
-	 (*((guint8 *)(p) + 1) = ((dat) >>  8) & 0xff))
-#define GSF_LE_SET_GUINT32(p, dat)				\
-	((*((guint8 *)(p) + 0) = ((dat))       & 0xff),	\
-	 (*((guint8 *)(p) + 1) = ((dat) >>  8) & 0xff),	\
-	 (*((guint8 *)(p) + 2) = ((dat) >> 16) & 0xff),	\
-	 (*((guint8 *)(p) + 3) = ((dat) >> 24) & 0xff))
-#define GSF_LE_SET_GINT8(p,dat) GSF_LE_SET_GUINT8((p),(dat))
-#define GSF_LE_SET_GINT16(p,dat) GSF_LE_SET_GUINT16((p),(dat))
-#define GSF_LE_SET_GINT32(p,dat) GSF_LE_SET_GUINT32((p),(dat))
-
-
-/*
- * Glib gets this wrong, really.  ARM's floating point format is a weird
- * mixture.
- */
-#define G_ARMFLOAT_ENDIAN 56781234
-#if defined(__arm__) && !defined(__vfp__) && (G_BYTE_ORDER == G_LITTLE_ENDIAN)
-#define G_FLOAT_BYTE_ORDER G_ARMFLOAT_ENDIAN
-#else
-#define G_FLOAT_BYTE_ORDER G_BYTE_ORDER
-#endif
-
-static guint64
-gsf_le_get_guint64 (void const *p)
-{
-#if G_BYTE_ORDER == G_BIG_ENDIAN
-	if (sizeof (guint64) == 8) {
-		guint64 li;
-		int     i;
-		guint8 *t  = (guint8 *)&li;
-		guint8 *p2 = (guint8 *)p;
-		int     sd = sizeof (li);
-
-		for (i = 0; i < sd; i++)
-			t[i] = p2[sd - 1 - i];
-
-		return li;
-	} else {
-		g_error ("Big endian machine, but weird size of guint64");
-	}
-#elif G_BYTE_ORDER == G_LITTLE_ENDIAN
-	if (sizeof (guint64) == 8) {
-		/*
-		 * On i86, we could access directly, but Alphas require
-		 * aligned access.
-		 */
-		guint64 data;
-		memcpy (&data, p, sizeof (data));
-		return data;
-	} else {
-		g_error ("Little endian machine, but weird size of guint64");
-	}
-#else
-#error "Byte order not recognised -- out of luck"
-#endif
-}
-
-static float
-gsf_le_get_float (void const *p)
-{
-#if G_FLOAT_BYTE_ORDER == G_BIG_ENDIAN
-	if (sizeof (float) == 4) {
-		float   f;
-		int     i;
-		guint8 *t  = (guint8 *)&f;
-		guint8 *p2 = (guint8 *)p;
-		int     sd = sizeof (f);
-
-		for (i = 0; i < sd; i++)
-			t[i] = p2[sd - 1 - i];
-
-		return f;
-	} else {
-		g_error ("Big endian machine, but weird size of floats");
-	}
-#elif (G_FLOAT_BYTE_ORDER == G_LITTLE_ENDIAN) || (G_FLOAT_BYTE_ORDER == G_ARMFLOAT_ENDIAN)
-	if (sizeof (float) == 4) {
-		/*
-		 * On i86, we could access directly, but Alphas require
-		 * aligned access.
-		 */
-		float data;
-		memcpy (&data, p, sizeof (data));
-		return data;
-	} else {
-		g_error ("Little endian machine, but weird size of floats");
-	}
-#else
-#error "Floating-point byte order not recognised -- out of luck"
-#endif
-}
-
-static double
-gsf_le_get_double (void const *p)
-{
-#if G_FLOAT_BYTE_ORDER == G_ARMFLOAT_ENDIAN
-	double data;
-	memcpy ((char *)&data + 4, p, 4);
-	memcpy ((char *)&data, (const char *)p + 4, 4);
-	return data;
-#elif G_FLOAT_BYTE_ORDER == G_BIG_ENDIAN
-	if (sizeof (double) == 8) {
-		double  d;
-		int     i;
-		guint8 *t  = (guint8 *)&d;
-		guint8 *p2 = (guint8 *)p;
-		int     sd = sizeof (d);
-
-		for (i = 0; i < sd; i++)
-			t[i] = p2[sd - 1 - i];
-
-		return d;
-	} else {
-		g_error ("Big endian machine, but weird size of doubles");
-	}
-#elif G_FLOAT_BYTE_ORDER == G_LITTLE_ENDIAN
-	if (sizeof (double) == 8) {
-		/*
-		 * On i86, we could access directly, but Alphas require
-		 * aligned access.
-		 */
-		double data;
-		memcpy (&data, p, sizeof (data));
-		return data;
-	} else {
-		g_error ("Little endian machine, but weird size of doubles");
-	}
-#else
-#error "Floating-point byte order not recognised -- out of luck"
-#endif
-}
-
-/**
- * gsf_iconv_close : A utility wrapper to safely close an iconv handle
- * @handle :
- **/
-static void
-gsf_iconv_close (GIConv handle)
-{
-	if (handle != NULL && handle != ((GIConv)-1))
-		g_iconv_close (handle);
-}
-
-
-/* ***************************** formerly gsf-infile-msole.c ********************* */
-
-#define OLE_HEADER_SIZE		 0x200	/* independent of big block size size */
-#define OLE_HEADER_SIGNATURE	 0x00
-#define OLE_HEADER_CLSID	 0x08	/* See ReadClassStg */
-#define OLE_HEADER_MINOR_VER	 0x18	/* 0x33 and 0x3e have been seen */
-#define OLE_HEADER_MAJOR_VER	 0x1a	/* 0x3 been seen in wild */
-#define OLE_HEADER_BYTE_ORDER	 0x1c	/* 0xfe 0xff == Intel Little Endian */
-#define OLE_HEADER_BB_SHIFT      0x1e
-#define OLE_HEADER_SB_SHIFT      0x20
-/* 0x22..0x27 reserved == 0 */
-#define OLE_HEADER_CSECTDIR	 0x28
-#define OLE_HEADER_NUM_BAT	 0x2c
-#define OLE_HEADER_DIRENT_START  0x30
-/* 0x34..0x37 transacting signature must be 0 */
-#define OLE_HEADER_THRESHOLD	 0x38
-#define OLE_HEADER_SBAT_START    0x3c
-#define OLE_HEADER_NUM_SBAT      0x40
-#define OLE_HEADER_METABAT_BLOCK 0x44
-#define OLE_HEADER_NUM_METABAT   0x48
-#define OLE_HEADER_START_BAT	 0x4c
-#define BAT_INDEX_SIZE		 4
-#define OLE_HEADER_METABAT_SIZE	 ((OLE_HEADER_SIZE - OLE_HEADER_START_BAT) / BAT_INDEX_SIZE)
-
-#define DIRENT_MAX_NAME_SIZE	0x40
-#define DIRENT_DETAILS_SIZE	0x40
-#define DIRENT_SIZE		(DIRENT_MAX_NAME_SIZE + DIRENT_DETAILS_SIZE)
-#define DIRENT_NAME_LEN		0x40	/* length in bytes incl 0 terminator */
-#define DIRENT_TYPE		0x42
-#define DIRENT_COLOUR		0x43
-#define DIRENT_PREV		0x44
-#define DIRENT_NEXT		0x48
-#define DIRENT_CHILD		0x4c
-#define DIRENT_CLSID		0x50	/* only for dirs */
-#define DIRENT_USERFLAGS	0x60	/* only for dirs */
-#define DIRENT_CREATE_TIME	0x64	/* for files */
-#define DIRENT_MODIFY_TIME	0x6c	/* for files */
-#define DIRENT_FIRSTBLOCK	0x74
-#define DIRENT_FILE_SIZE	0x78
-/* 0x7c..0x7f reserved == 0 */
-
-#define DIRENT_TYPE_INVALID	0
-#define DIRENT_TYPE_DIR		1
-#define DIRENT_TYPE_FILE	2
-#define DIRENT_TYPE_LOCKBYTES	3	/* ? */
-#define DIRENT_TYPE_PROPERTY	4	/* ? */
-#define DIRENT_TYPE_ROOTDIR	5
-#define DIRENT_MAGIC_END	0xffffffff
-
-/* flags in the block allocation list to denote special blocks */
-#define BAT_MAGIC_UNUSED	0xffffffff	/*		   -1 */
-#define BAT_MAGIC_END_OF_CHAIN	0xfffffffe	/*		   -2 */
-#define BAT_MAGIC_BAT		0xfffffffd	/* a bat block,    -3 */
-#define BAT_MAGIC_METABAT	0xfffffffc	/* a metabat block -4 */
-
-
-
-
-typedef struct {
-	guint32 *block;
-	guint32  num_blocks;
-} MSOleBAT;
-
-typedef struct {
-	char	 *name;
-	char	 *collation_name;
-	int	  index;
-	size_t    size;
-	gboolean  use_sb;
-	guint32   first_block;
-	gboolean  is_directory;
-	GList	 *children;
-	unsigned char clsid[16];	/* 16 byte GUID used by some apps */
-} MSOleDirent;
-
-typedef struct {
-	struct {
-		MSOleBAT bat;
-		unsigned shift;
-		unsigned filter;
-		size_t   size;
-	} bb, sb;
-	off_t max_block;
-	guint32 threshold; /* transition between small and big blocks */
-        guint32 sbat_start, num_sbat;
-
-	MSOleDirent *root_dir;
-	struct GsfInput *sb_file;
-
-	int ref_count;
-} MSOleInfo;
-
-typedef struct GsfInfileMSOle {
-	off_t size;
-	off_t cur_offset;
-	struct GsfInput    *input;
-	MSOleInfo   *info;
-	MSOleDirent *dirent;
-	MSOleBAT     bat;
-	off_t    cur_block;
-
-	struct {
-		guint8  *buf;
-		size_t  buf_size;
-	} stream;
-} GsfInfileMSOle;
-
-/* utility macros */
-#define OLE_BIG_BLOCK(index, ole)	((index) >> ole->info->bb.shift)
-
-static struct GsfInput *gsf_infile_msole_new_child (GsfInfileMSOle *parent,
-					     MSOleDirent *dirent);
-
-/**
- * ole_get_block :
- * @ole    : the infile
- * @block  :
- * @buffer : optionally NULL
- *
- * Read a block of data from the underlying input.
- * Be really anal.
- **/
-static const guint8 *
-ole_get_block (const GsfInfileMSOle *ole, guint32 block, guint8 *buffer)
-{
-	g_return_val_if_fail (block < ole->info->max_block, NULL);
-
-	/* OLE_HEADER_SIZE is fixed at 512, but the sector containing the
-	 * header is padded out to bb.size (sector size) when bb.size > 512. */
-	if (gsf_input_seek (ole->input,
-		(off_t)(MAX (OLE_HEADER_SIZE, ole->info->bb.size) + (block << ole->info->bb.shift)),
-		SEEK_SET) < 0)
-		return NULL;
-
-	return gsf_input_read (ole->input, ole->info->bb.size, buffer);
-}
-
-/**
- * ole_make_bat :
- * @metabat	: a meta bat to connect to the raw blocks (small or large)
- * @size_guess	: An optional guess as to how many blocks are in the file
- * @block	: The first block in the list.
- * @res		: where to store the result.
- *
- * Walk the linked list of the supplied block allocation table and build up a
- * table for the list starting in @block.
- *
- * Returns TRUE on error.
- */
-static gboolean
-ole_make_bat (MSOleBAT const *metabat, size_t size_guess, guint32 block,
-	      MSOleBAT *res)
-{
-	/* NOTE : Only use size as a suggestion, sometimes it is wrong */
-	GArray *bat = g_array_sized_new (FALSE, FALSE,
-		sizeof (guint32), size_guess);
-
-	guint8 *used = (guint8*)g_alloca (1 + metabat->num_blocks / 8);
-	memset (used, 0, 1 + metabat->num_blocks / 8);
-
-	if (block < metabat->num_blocks)
-		do {
-			/* Catch cycles in the bat list */
-			g_return_val_if_fail (0 == (used[block/8] & (1 << (block & 0x7))), TRUE);
-			used[block/8] |= 1 << (block & 0x7);
-
-			g_array_append_val (bat, block);
-			block = metabat->block [block];
-		} while (block < metabat->num_blocks);
-
-	res->block = NULL;
-
-	res->num_blocks = bat->len;
-	res->block = (guint32 *) (gpointer) g_array_free (bat, FALSE);
-
-	if (block != BAT_MAGIC_END_OF_CHAIN) {
-#if 0
-		g_warning ("This OLE2 file is invalid.\n"
-			   "The Block Allocation  Table for one of the streams had %x instead of a terminator (%x).\n"
-			   "We might still be able to extract some data, but you'll want to check the file.",
-			   block, BAT_MAGIC_END_OF_CHAIN);
-#endif
-	}
-
-	return FALSE;
-}
-
-static void
-ols_bat_release (MSOleBAT *bat)
-{
-	if (bat->block != NULL) {
-		g_free (bat->block);
-		bat->block = NULL;
-		bat->num_blocks = 0;
-	}
-}
-
-/**
- * ole_info_read_metabat :
- * @ole  :
- * @bats :
- *
- * A small utility routine to read a set of references to bat blocks
- * either from the OLE header, or a meta-bat block.
- *
- * Returns a pointer to the element after the last position filled.
- **/
-static guint32 *
-ole_info_read_metabat (GsfInfileMSOle *ole, guint32 *bats, guint32 max,
-		       guint32 const *metabat, guint32 const *metabat_end)
-{
-	guint8 const *bat, *end;
-
-	for (; metabat < metabat_end; metabat++) {
-		bat = ole_get_block (ole, *metabat, NULL);
-		if (bat == NULL)
-			return NULL;
-		end = bat + ole->info->bb.size;
-		for ( ; bat < end ; bat += BAT_INDEX_SIZE, bats++) {
-			*bats = GSF_LE_GET_GUINT32 (bat);
-			g_return_val_if_fail (*bats < max ||
-					      *bats >= BAT_MAGIC_METABAT, NULL);
-		}
-	}
-	return bats;
-}
-
-/**
- * gsf_ole_get_guint32s :
- * @dst :
- * @src :
- * @num_bytes :
- *
- * Copy some some raw data into an array of guint32.
- **/
-static void
-gsf_ole_get_guint32s (guint32 *dst, guint8 const *src, int num_bytes)
-{
-	for (; (num_bytes -= BAT_INDEX_SIZE) >= 0 ; src += BAT_INDEX_SIZE)
-		*dst++ = GSF_LE_GET_GUINT32 (src);
-}
-
-static struct GsfInput *
-ole_info_get_sb_file (GsfInfileMSOle *parent)
-{
-	MSOleBAT meta_sbat;
-
-	if (parent->info->sb_file != NULL)
-		return parent->info->sb_file;
-
-	parent->info->sb_file = gsf_infile_msole_new_child (parent,
-		parent->info->root_dir);
-
-	if (NULL == parent->info->sb_file)
-		return NULL;
-
-	g_return_val_if_fail (parent->info->sb.bat.block == NULL, NULL);
-
-	if (ole_make_bat (&parent->info->bb.bat,
-			  parent->info->num_sbat,
-                          parent->info->sbat_start,
-                          &meta_sbat)) {
-		return NULL;
-	}
-
-	parent->info->sb.bat.num_blocks = meta_sbat.num_blocks * (parent->info->bb.size / BAT_INDEX_SIZE);
-	parent->info->sb.bat.block	= g_new0 (guint32, parent->info->sb.bat.num_blocks);
-	ole_info_read_metabat (parent, parent->info->sb.bat.block,
-		parent->info->sb.bat.num_blocks,
-		meta_sbat.block, meta_sbat.block + meta_sbat.num_blocks);
-	ols_bat_release (&meta_sbat);
-
-	return parent->info->sb_file;
-}
-
-static gint
-ole_dirent_cmp (const MSOleDirent *a, const MSOleDirent *b)
-{
-	g_return_val_if_fail (a, 0);
-	g_return_val_if_fail (b, 0);
-
-	g_return_val_if_fail (a->collation_name, 0);
-	g_return_val_if_fail (b->collation_name, 0);
-
-	return strcmp (b->collation_name, a->collation_name);
-}
-
-/**
- * ole_dirent_new :
- * @ole    :
- * @entry  :
- * @parent : optional
- *
- * Parse dirent number @entry and recursively handle its siblings and children.
- **/
-static MSOleDirent *
-ole_dirent_new (GsfInfileMSOle *ole, guint32 entry, MSOleDirent *parent)
-{
-	MSOleDirent *dirent;
-	guint32 block, next, prev, child, size;
-	guint8 const *data;
-	guint8 type;
-	guint16 name_len;
-
-	if (entry >= DIRENT_MAGIC_END)
-		return NULL;
-
-	block = OLE_BIG_BLOCK (entry * DIRENT_SIZE, ole);
-
-	g_return_val_if_fail (block < ole->bat.num_blocks, NULL);
-	data = ole_get_block (ole, ole->bat.block [block], NULL);
-	if (data == NULL)
-		return NULL;
-	data += (DIRENT_SIZE * entry) % ole->info->bb.size;
-
-	type = GSF_LE_GET_GUINT8 (data + DIRENT_TYPE);
-	if (type != DIRENT_TYPE_DIR &&
-	    type != DIRENT_TYPE_FILE &&
-	    type != DIRENT_TYPE_ROOTDIR) {
-#if 0
-		g_warning ("Unknown stream type 0x%x", type);
-#endif
-		return NULL;
-	}
-
-	/* It looks like directory (and root directory) sizes are sometimes bogus */
-	size = GSF_LE_GET_GUINT32 (data + DIRENT_FILE_SIZE);
-	g_return_val_if_fail (type == DIRENT_TYPE_DIR || type == DIRENT_TYPE_ROOTDIR ||
-			      size <= (guint32)gsf_input_size(ole->input), NULL);
-
-	dirent = g_new0 (MSOleDirent, 1);
-	dirent->index	     = entry;
-	dirent->size	     = size;
-	/* Store the class id which is 16 byte identifier used by some apps */
-	memcpy(dirent->clsid, data + DIRENT_CLSID, sizeof(dirent->clsid));
-
-	/* root dir is always big block */
-	dirent->use_sb	     = parent && (size < ole->info->threshold);
-	dirent->first_block  = (GSF_LE_GET_GUINT32 (data + DIRENT_FIRSTBLOCK));
-	dirent->is_directory = (type != DIRENT_TYPE_FILE);
-	dirent->children     = NULL;
-	prev  = GSF_LE_GET_GUINT32 (data + DIRENT_PREV);
-	next  = GSF_LE_GET_GUINT32 (data + DIRENT_NEXT);
-	child = GSF_LE_GET_GUINT32 (data + DIRENT_CHILD);
-	name_len = GSF_LE_GET_GUINT16 (data + DIRENT_NAME_LEN);
-	dirent->name = NULL;
-	if (0 < name_len && name_len <= DIRENT_MAX_NAME_SIZE) {
-		gunichar2 uni_name [DIRENT_MAX_NAME_SIZE+1];
-		gchar const *end;
-		int i;
-
-		/* !#%!@$#^
-		 * Sometimes, rarely, people store the stream name as ascii
-		 * rather than utf16.  Do a validation first just in case.
-		 */
-		if (!g_utf8_validate ((const char*) data, -1, &end) ||
-		    ((guint8 const *)end - data + 1) != name_len) {
-			/* be wary about endianness */
-			for (i = 0 ; i < name_len ; i += 2)
-				uni_name [i/2] = GSF_LE_GET_GUINT16 (data + i);
-			uni_name [i/2] = 0;
-
-			dirent->name = g_utf16_to_utf8 (uni_name, -1, NULL, NULL, NULL);
-		} else
-			dirent->name = g_strndup ((gchar *)data, (gsize)((guint8 const *)end - data + 1));
-	}
-	/* be really anal in the face of screwups */
-	if (dirent->name == NULL)
-		dirent->name = g_strdup ("");
-	dirent->collation_name = g_utf8_collate_key (dirent->name, -1);
-
-	if (parent != NULL)
-		parent->children = g_list_insert_sorted (parent->children,
-			dirent, (GCompareFunc)ole_dirent_cmp);
-
-	/* NOTE : These links are a tree, not a linked list */
-	if (prev != entry)
-		ole_dirent_new (ole, prev, parent);
-	if (next != entry)
-		ole_dirent_new (ole, next, parent);
-
-	if (dirent->is_directory)
-		ole_dirent_new (ole, child, dirent);
-	return dirent;
-}
-
-static void
-ole_dirent_free (MSOleDirent *dirent)
-{
-	GList *tmp;
-	g_return_if_fail (dirent != NULL);
-
-	g_free (dirent->name);
-	g_free (dirent->collation_name);
-
-	for (tmp = dirent->children; tmp; tmp = tmp->next)
-		ole_dirent_free ((MSOleDirent *)tmp->data);
-	g_list_free (dirent->children);
-	g_free (dirent);
-}
-
-/*****************************************************************************/
-
-static void
-ole_info_unref (MSOleInfo *info)
-{
-	if (info->ref_count-- != 1)
-		return;
-
-	ols_bat_release (&info->bb.bat);
-	ols_bat_release (&info->sb.bat);
-	if (info->root_dir != NULL) {
-		ole_dirent_free (info->root_dir);
-		info->root_dir = NULL;
-	}
-	if (info->sb_file != NULL)  {
-		gsf_input_finalize(info->sb_file);
-		info->sb_file = NULL;
-	}
-	g_free (info);
-}
-
-static MSOleInfo *
-ole_info_ref (MSOleInfo *info)
-{
-	info->ref_count++;
-	return info;
-}
-
-static void
-gsf_infile_msole_init (GsfInfileMSOle * ole)
-{
-	ole->cur_offset = 0;
-	ole->size = 0;
-	ole->input		= NULL;
-	ole->info		= NULL;
-	ole->bat.block		= NULL;
-	ole->bat.num_blocks	= 0;
-	ole->cur_block		= BAT_MAGIC_UNUSED;
-	ole->stream.buf		= NULL;
-	ole->stream.buf_size	= 0;
-}
-
-static void
-gsf_infile_msole_finalize (GsfInfileMSOle * ole)
-{
-	if (ole->input != NULL) {
-		gsf_input_finalize(ole->input);
-		ole->input = NULL;
-	}
-	if (ole->info != NULL) {
-		ole_info_unref (ole->info);
-		ole->info = NULL;
-	}
-	ols_bat_release (&ole->bat);
-
-	g_free (ole->stream.buf);
-	free(ole);
-}
-	
-/**
- * ole_dup :
- * @src :
- *
- * Utility routine to _partially_ replicate a file.  It does NOT copy the bat
- * blocks, or init the dirent.
- *
- * Return value: the partial duplicate.
- **/
-static GsfInfileMSOle *
-ole_dup (GsfInfileMSOle const * src)
-{
-	GsfInfileMSOle	*dst;
-	struct GsfInput *input;
-
-	g_return_val_if_fail (src != NULL, NULL);
-
-	dst = malloc(sizeof(GsfInfileMSOle));
-	if (dst == NULL)
-		return NULL;
-	gsf_infile_msole_init(dst);
-	input = gsf_input_dup (src->input);
-	if (input == NULL) {
-		gsf_infile_msole_finalize(dst);
-		return NULL;
-	}
-	dst->input = input;
-	dst->info  = ole_info_ref (src->info);
-
-	/* buf and buf_size are initialized to NULL */
-
-	return dst;
-}
-	
-/**
- * ole_init_info :
- * @ole :
- *
- * Read an OLE header and do some sanity checking
- * along the way.
- *
- * Return value: TRUE on error
- **/
-static gboolean
-ole_init_info (GsfInfileMSOle *ole)
-{
-	static guint8 const signature[] =
-		{ 0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1 };
-	guint8 const *header, *tmp;
-	guint32 *metabat = NULL;
-	MSOleInfo *info;
-	guint32 bb_shift, sb_shift, num_bat, num_metabat, last, dirent_start;
-	guint32 metabat_block, *ptr;
-
-	/* check the header */
-	if (gsf_input_seek (ole->input, (off_t) 0, SEEK_SET) ||
-	    NULL == (header = gsf_input_read (ole->input, OLE_HEADER_SIZE, NULL)) ||
-	    0 != memcmp (header, signature, sizeof (signature))) {
-		return TRUE;
-	}
-
-	bb_shift      = GSF_LE_GET_GUINT16 (header + OLE_HEADER_BB_SHIFT);
-	sb_shift      = GSF_LE_GET_GUINT16 (header + OLE_HEADER_SB_SHIFT);
-	num_bat	      = GSF_LE_GET_GUINT32 (header + OLE_HEADER_NUM_BAT);
-	dirent_start  = GSF_LE_GET_GUINT32 (header + OLE_HEADER_DIRENT_START);
-        metabat_block = GSF_LE_GET_GUINT32 (header + OLE_HEADER_METABAT_BLOCK);
-	num_metabat   = GSF_LE_GET_GUINT32 (header + OLE_HEADER_NUM_METABAT);
-
-	/* Some sanity checks
-	 * 1) There should always be at least 1 BAT block
-	 * 2) It makes no sense to have a block larger than 2^31 for now.
-	 *    Maybe relax this later, but not much.
-	 */
-	if (6 > bb_shift || bb_shift >= 31 || sb_shift > bb_shift) {
-		return TRUE;
-	}
-
-	info = g_new0 (MSOleInfo, 1);
-	ole->info = info;
-
-	info->ref_count	     = 1;
-	info->bb.shift	     = bb_shift;
-	info->bb.size	     = 1 << info->bb.shift;
-	info->bb.filter	     = info->bb.size - 1;
-	info->sb.shift	     = sb_shift;
-	info->sb.size	     = 1 << info->sb.shift;
-	info->sb.filter	     = info->sb.size - 1;
-	info->threshold	     = GSF_LE_GET_GUINT32 (header + OLE_HEADER_THRESHOLD);
-        info->sbat_start     = GSF_LE_GET_GUINT32 (header + OLE_HEADER_SBAT_START);
-        info->num_sbat       = GSF_LE_GET_GUINT32 (header + OLE_HEADER_NUM_SBAT);
-	info->max_block	     = (gsf_input_size (ole->input) - OLE_HEADER_SIZE) / info->bb.size;
-	info->sb_file	     = NULL;
-
-	if (info->num_sbat == 0 && info->sbat_start != BAT_MAGIC_END_OF_CHAIN) {
-#if 0
-		g_warning ("There is are not supposed to be any blocks in the small block allocation table, yet there is a link to some.  Ignoring it.");
-#endif
-	}
-
-	/* very rough heuristic, just in case */
-	if (num_bat < info->max_block) {
-		info->bb.bat.num_blocks = num_bat * (info->bb.size / BAT_INDEX_SIZE);
-		info->bb.bat.block	= g_new0 (guint32, info->bb.bat.num_blocks);
-
-		metabat = (guint32 *)g_alloca (MAX (info->bb.size, OLE_HEADER_SIZE));
-
-		/* Reading the elements invalidates this memory, make copy */
-		gsf_ole_get_guint32s (metabat, header + OLE_HEADER_START_BAT,
-			OLE_HEADER_SIZE - OLE_HEADER_START_BAT);
-		last = num_bat;
-		if (last > OLE_HEADER_METABAT_SIZE)
-			last = OLE_HEADER_METABAT_SIZE;
-
-		ptr = ole_info_read_metabat (ole, info->bb.bat.block,
-			info->bb.bat.num_blocks, metabat, metabat + last);
-		num_bat -= last;
-	} else
-		ptr = NULL;
-
-	last = (info->bb.size - BAT_INDEX_SIZE) / BAT_INDEX_SIZE;
-	while (ptr != NULL && num_metabat-- > 0) {
-		tmp = ole_get_block (ole, metabat_block, NULL);
-		if (tmp == NULL) {
-			ptr = NULL;
-			break;
-		}
-
-		/* Reading the elements invalidates this memory, make copy */
-		gsf_ole_get_guint32s (metabat, tmp, (int)info->bb.size);
-
-		if (num_metabat == 0) {
-			if (last < num_bat) {
-				/* there should be less that a full metabat block
-				 * remaining */
-				ptr = NULL;
-				break;
-			}
-			last = num_bat;
-		} else if (num_metabat > 0) {
-			metabat_block = metabat[last];
-			num_bat -= last;
-		}
-
-		ptr = ole_info_read_metabat (ole, ptr,
-			info->bb.bat.num_blocks, metabat, metabat + last);
-	}
-
-	if (ptr == NULL) {
-		return TRUE;
-	}
-
-	/* Read the directory's bat, we do not know the size */
-	if (ole_make_bat (&info->bb.bat, 0, dirent_start, &ole->bat)) {
-		return TRUE;
-	}
-
-	/* Read the directory */
-	ole->dirent = info->root_dir = ole_dirent_new (ole, 0, NULL);
-	if (ole->dirent == NULL) {
-		return TRUE;
-	}
-
-	return FALSE;
-}
-
-static guint8 const *
-gsf_infile_msole_read (GsfInfileMSOle *ole, size_t num_bytes, guint8 *buffer)
-{
-	off_t first_block, last_block, raw_block, offset, i;
-	guint8 const *data;
-	guint8 *ptr;
-	size_t count;
-
-	/* small block files are preload */
-	if (ole->dirent != NULL && ole->dirent->use_sb) {
-		if (buffer != NULL) {
-			memcpy (buffer, ole->stream.buf + ole->cur_offset, num_bytes);
-			ole->cur_offset += num_bytes;
-			return buffer;
-		}
-		data = ole->stream.buf + ole->cur_offset;
-		ole->cur_offset += num_bytes;
-		return data;
-	}
-
-	/* GsfInput guarantees that num_bytes > 0 */
-	first_block = OLE_BIG_BLOCK (ole->cur_offset, ole);
-	last_block = OLE_BIG_BLOCK (ole->cur_offset + num_bytes - 1, ole);
-	offset = ole->cur_offset & ole->info->bb.filter;
-
-	/* optimization : are all the raw blocks contiguous */
-	i = first_block;
-	raw_block = ole->bat.block [i];
-	while (++i <= last_block && ++raw_block == ole->bat.block [i])
-		;
-	if (i > last_block) {
-		/* optimization don't seek if we don't need to */
-		if (ole->cur_block != first_block) {
-			if (gsf_input_seek (ole->input,
-				(off_t)(MAX (OLE_HEADER_SIZE, ole->info->bb.size) + (ole->bat.block [first_block] << ole->info->bb.shift) + offset),
-				SEEK_SET) < 0)
-				return NULL;
-		}
-		ole->cur_block = last_block;
-		return gsf_input_read (ole->input, 
-				       num_bytes,
-				       (unsigned char*) buffer);
-	}
-
-	/* damn, we need to copy it block by block */
-	if (buffer == NULL) {
-		if (ole->stream.buf_size < num_bytes) {
-			if (ole->stream.buf != NULL)
-				g_free (ole->stream.buf);
-			ole->stream.buf_size = num_bytes;
-			ole->stream.buf = g_new (guint8, num_bytes);
-		}
-		buffer = ole->stream.buf;
-	}
-
-	ptr = buffer;
-	for (i = first_block ; i <= last_block ; i++ , ptr += count, num_bytes -= count) {
-		count = ole->info->bb.size - offset;
-		if (count > num_bytes)
-			count = num_bytes;
-		data = ole_get_block (ole, ole->bat.block [i], NULL);
-		if (data == NULL)
-			return NULL;
-
-		/* TODO : this could be optimized to avoid the copy */
-		memcpy (ptr, data + offset, count);
-		offset = 0;
-	}
-	ole->cur_block = BAT_MAGIC_UNUSED;
-	ole->cur_offset += num_bytes;
-	return buffer;
-}
-	
-static struct GsfInput *
-gsf_infile_msole_new_child (GsfInfileMSOle *parent,
-			    MSOleDirent *dirent)
-{
-	GsfInfileMSOle * child;
-	MSOleInfo *info;
-	MSOleBAT const *metabat;
-	struct GsfInput *sb_file = NULL;
-	size_t size_guess;
-	char * buf;
-	
-
-	if ( (dirent->index != 0) &&
-	     (dirent->is_directory) ) {
-		/* be wary.  It seems as if some implementations pretend that the
-		 * directories contain data */
-	  return gsf_input_new((const unsigned char*) "",
-			       (off_t) 0,
-			       0);
-	}
-	child = ole_dup (parent);
-	if (child == NULL)
-		return NULL;	
-	child->dirent = dirent;
-	child->size = (off_t) dirent->size;
-		
-	info = parent->info;
-
-        if (dirent->use_sb) {	/* build the bat */
-		metabat = &info->sb.bat;
-		size_guess = dirent->size >> info->sb.shift;
-		sb_file = ole_info_get_sb_file (parent);
-	} else {
-		metabat = &info->bb.bat;
-		size_guess = dirent->size >> info->bb.shift;
-	}
-	if (ole_make_bat (metabat, size_guess + 1, dirent->first_block, &child->bat)) {
-		gsf_infile_msole_finalize(child);
-		return NULL;
-	}
-
-	if (dirent->use_sb) {
-		unsigned i;
-		guint8 const *data;
-		
-		if (sb_file == NULL) {
-			gsf_infile_msole_finalize(child);
-			return NULL;
-		}
-
-		child->stream.buf_size = info->threshold;
-		child->stream.buf = g_new (guint8, info->threshold);
-
-		for (i = 0 ; i < child->bat.num_blocks; i++)
-			if (gsf_input_seek (sb_file,
-					    (off_t)(child->bat.block [i] << info->sb.shift), SEEK_SET) < 0 ||
-			    (data = gsf_input_read (sb_file,
-						    info->sb.size,
-				child->stream.buf + (i << info->sb.shift))) == NULL) {
-				gsf_infile_msole_finalize(child);
-				return NULL;
-			}
-	}
-	buf = malloc(child->size);
-	if (buf == NULL) {
-		gsf_infile_msole_finalize(child);
-		return NULL;
-	}
-	if (NULL == gsf_infile_msole_read(child,
-					  child->size,
-					  (guint8*) buf)) {
-		gsf_infile_msole_finalize(child);	
-		return NULL;
-	}
-	gsf_infile_msole_finalize(child);
-	return gsf_input_new((const unsigned char*) buf,
-			     (off_t) dirent->size,
-			     1);
-}
-	
-
-static struct GsfInput *
-gsf_infile_msole_child_by_index (GsfInfileMSOle * ole, int target)
-{
-	GList *p;
-
-	for (p = ole->dirent->children; p != NULL ; p = p->next)
-		if (target-- <= 0)
-			return gsf_infile_msole_new_child (ole,
-				(MSOleDirent *)p->data);
-	return NULL;
-}
-
-static char const *
-gsf_infile_msole_name_by_index (GsfInfileMSOle * ole, int target)
-{
-	GList *p;
-
-	for (p = ole->dirent->children; p != NULL ; p = p->next)
-		if (target-- <= 0)
-			return ((MSOleDirent *)p->data)->name;
-	return NULL;
-}
-
-static int
-gsf_infile_msole_num_children (GsfInfileMSOle * ole)
-{
-	g_return_val_if_fail (ole->dirent != NULL, -1);
-
-	if (!ole->dirent->is_directory)
-		return -1;
-	return g_list_length (ole->dirent->children);
-}
-
-
-/**
- * gsf_infile_msole_new :
- * @source :
- *
- * Opens the root directory of an MS OLE file.
- * NOTE : adds a reference to @source
- *
- * Returns : the new ole file handler
- **/
-static GsfInfileMSOle *
-gsf_infile_msole_new (struct GsfInput *source)
-{
-	GsfInfileMSOle * ole;
-
-	ole = malloc(sizeof(GsfInfileMSOle));
-	if (ole == NULL)
-		return NULL;
-	gsf_infile_msole_init(ole);
-	ole->input = source;
-	ole->size = (off_t) 0;
-
-	if (ole_init_info (ole)) {
-		gsf_infile_msole_finalize(ole);
-		return NULL;
-	}
-
-	return ole;
-}
-
-
-
-
+#include <gsf/gsf-utils.h>
+#include <gsf/gsf-input-memory.h>
+#include <gsf/gsf-infile.h>
+#include <gsf/gsf-infile-msole.h>
+#include <gsf/gsf-msole-utils.h>
 
+#define DEBUG_OLE2 0
 
 /* ******************************** main extraction code ************************ */
 
@@ -1240,21 +53,21 @@ static struct EXTRACTOR_Keywords *
 addKeyword(EXTRACTOR_KeywordList *oldhead,
 	   const char *phrase,
 	   EXTRACTOR_KeywordType type) {
-   EXTRACTOR_KeywordList * keyword;
-
-   if (strlen(phrase) == 0)
-     return oldhead;
-   if (0 == strcmp(phrase, "\"\""))
-     return oldhead;
-   if (0 == strcmp(phrase, "\" \""))
-     return oldhead;
-   if (0 == strcmp(phrase, " "))
-     return oldhead;
-   keyword = (EXTRACTOR_KeywordList*) malloc(sizeof(EXTRACTOR_KeywordList));
-   keyword->next = oldhead;
-   keyword->keyword = strdup(phrase);
-   keyword->keywordType = type;
-   return keyword;
+  EXTRACTOR_KeywordList * keyword;
+  
+  if (strlen(phrase) == 0)
+    return oldhead;
+  if (0 == strcmp(phrase, "\"\""))
+    return oldhead;
+  if (0 == strcmp(phrase, "\" \""))
+    return oldhead;
+  if (0 == strcmp(phrase, " "))
+    return oldhead;
+  keyword = malloc(sizeof(EXTRACTOR_KeywordList));
+  keyword->next = oldhead;
+  keyword->keyword = strdup(phrase);
+  keyword->keywordType = type;
+  return keyword;
 }
 
 
@@ -1273,122 +86,6 @@ static guint8 const user_guid [] = {
 	0x93, 0x97, 0x08, 0x00, 0x2b, 0x2c, 0xf9, 0xae
 };
 
-typedef enum {
-	GSF_MSOLE_META_DATA_COMPONENT,
-	GSF_MSOLE_META_DATA_DOCUMENT,
-	GSF_MSOLE_META_DATA_USER
-} GsfMSOleMetaDataType;
-
-typedef enum {
-	LE_VT_EMPTY               = 0,
-	LE_VT_NULL                = 1,
-	LE_VT_I2                  = 2,
-	LE_VT_I4                  = 3,
-	LE_VT_R4                  = 4,
-	LE_VT_R8                  = 5,
-	LE_VT_CY                  = 6,
-	LE_VT_DATE                = 7,
-	LE_VT_BSTR                = 8,
-	LE_VT_DISPATCH            = 9,
-	LE_VT_ERROR               = 10,
-	LE_VT_BOOL                = 11,
-	LE_VT_VARIANT             = 12,
-	LE_VT_UNKNOWN             = 13,
-	LE_VT_DECIMAL             = 14,
-	LE_VT_I1                  = 16,
-	LE_VT_UI1                 = 17,
-	LE_VT_UI2                 = 18,
-	LE_VT_UI4                 = 19,
-	LE_VT_I8                  = 20,
-	LE_VT_UI8                 = 21,
-	LE_VT_INT                 = 22,
-	LE_VT_UINT                = 23,
-	LE_VT_VOID                = 24,
-	LE_VT_HRESULT             = 25,
-	LE_VT_PTR                 = 26,
-	LE_VT_SAFEARRAY           = 27,
-	LE_VT_CARRAY              = 28,
-	LE_VT_USERDEFINED         = 29,
-	LE_VT_LPSTR               = 30,
-	LE_VT_LPWSTR              = 31,
-	LE_VT_FILETIME            = 64,
-	LE_VT_BLOB                = 65,
-	LE_VT_STREAM              = 66,
-	LE_VT_STORAGE             = 67,
-	LE_VT_STREAMED_OBJECT     = 68,
-	LE_VT_STORED_OBJECT       = 69,
-	LE_VT_BLOB_OBJECT         = 70,
-	LE_VT_CF                  = 71,
-	LE_VT_CLSID               = 72,
-	LE_VT_VECTOR              = 0x1000
-} GsfMSOleVariantType;
-
-typedef struct {
-	char const *name;
-	guint32	    id;
-	GsfMSOleVariantType prefered_type;
-} GsfMSOleMetaDataPropMap;
-
-typedef struct {
-	guint32		id;
-	off_t	offset;
-} GsfMSOleMetaDataProp;
-
-typedef struct {
-	GsfMSOleMetaDataType type;
-	off_t   offset;
-	guint32	    size, num_props;
-	GIConv	    iconv_handle;
-	unsigned    char_size;
-	GHashTable *dict;
-} GsfMSOleMetaDataSection;
-
-static GsfMSOleMetaDataPropMap const document_props[] = {
-	{ "Category",		2,	LE_VT_LPSTR },
-	{ "PresentationFormat",	3,	LE_VT_LPSTR },
-	{ "NumBytes",		4,	LE_VT_I4 },
-	{ "NumLines",		5,	LE_VT_I4 },
-	{ "NumParagraphs",	6,	LE_VT_I4 },
-	{ "NumSlides",		7,	LE_VT_I4 },
-	{ "NumNotes",		8,	LE_VT_I4 },
-	{ "NumHiddenSlides",	9,	LE_VT_I4 },
-	{ "NumMMClips",		10,	LE_VT_I4 },
-	{ "Scale",		11,	LE_VT_BOOL },
-	{ "HeadingPairs",	12,	LE_VT_VECTOR | LE_VT_VARIANT },
-	{ "DocumentParts",	13,	LE_VT_VECTOR | LE_VT_LPSTR },
-	{ "Manager",		14,	LE_VT_LPSTR },
-	{ "Company",		15,	LE_VT_LPSTR },
-	{ "LinksDirty",		16,	LE_VT_BOOL }
-};
-
-static GsfMSOleMetaDataPropMap const component_props[] = {
-	{ "Title",		2,	LE_VT_LPSTR },
-	{ "Subject",		3,	LE_VT_LPSTR },
-	{ "Author",		4,	LE_VT_LPSTR },
-	{ "Keywords",		5,	LE_VT_LPSTR },
-	{ "Comments",		6,	LE_VT_LPSTR },
-	{ "Template",		7,	LE_VT_LPSTR },
-	{ "LastSavedBy",	8,	LE_VT_LPSTR },
-	{ "RevisionNumber",	9,	LE_VT_LPSTR },
-	{ "TotalEditingTime",	10,	LE_VT_FILETIME },
-	{ "LastPrinted",	11,	LE_VT_FILETIME },
-	{ "CreateTime",		12,	LE_VT_FILETIME },
-	{ "LastSavedTime",	13,	LE_VT_FILETIME },
-	{ "NumPages",		14,	LE_VT_I4 },
-	{ "NumWords",		15,	LE_VT_I4 },
-	{ "NumCharacters",	16,	LE_VT_I4 },
-	{ "Thumbnail",		17,	LE_VT_CF },
-	{ "AppName",		18,	LE_VT_LPSTR },
-	{ "Security",		19,	LE_VT_I4 }
-};
-
-static GsfMSOleMetaDataPropMap const common_props[] = {
-	{ "Dictionary",		0,	0, /* magic */},
-	{ "CodePage",		1,	LE_VT_UI2 },
-	{ "LOCALE_SYSTEM_DEFAULT",	0x80000000,	LE_VT_UI4},
-	{ "CASE_SENSITIVE",		0x80000003,	LE_VT_UI4},
-};
-
 typedef struct {
   char * text;
   EXTRACTOR_KeywordType type;
@@ -1398,8 +95,8 @@ static Matches tmap[] = {
   { "Title", EXTRACTOR_TITLE },
   { "PresentationFormat", EXTRACTOR_FORMAT },
   { "Category", EXTRACTOR_DESCRIPTION },
-  { "Manager", EXTRACTOR_CREATED_FOR },
-  { "Company", EXTRACTOR_ORGANIZATION },
+  { "Manager", EXTRACTOR_MANAGER },
+  { "Company", EXTRACTOR_COMPANY },
   { "Subject", EXTRACTOR_SUBJECT },
   { "Author", EXTRACTOR_AUTHOR },
   { "Keywords", EXTRACTOR_KEYWORDS },
@@ -1412,709 +109,98 @@ static Matches tmap[] = {
   { "NumBytes", EXTRACTOR_SIZE },
   { "CreatedTime", EXTRACTOR_CREATION_DATE },
   { "LastSavedTime" , EXTRACTOR_MODIFICATION_DATE },
+  { "gsf:company", EXTRACTOR_COMPANY },
+  /*  { "gsf:security", EXTRACTOR_SECURITY }, */
+  { "gsf:character-count", EXTRACTOR_CHARACTER_COUNT },
+  { "gsf:page-count", EXTRACTOR_PAGE_COUNT },
+  { "gsf:line-count", EXTRACTOR_LINE_COUNT },
+  { "gsf:word-count", EXTRACTOR_WORD_COUNT },
+  { "gsf:paragraph-count", EXTRACTOR_PARAGRAPH_COUNT },
+  { "gsf:last-saved-by", EXTRACTOR_LAST_SAVED_BY },
+  /* { "gsf:scale", EXTRACTOR_SCALE }, // always "false"? */
+  { "gsf:manager", EXTRACTOR_MANAGER },
+  { "dc:title", EXTRACTOR_TITLE },
+  { "dc:creator", EXTRACTOR_CREATOR },
+  { "dc:date", EXTRACTOR_DATE },
+  { "dc:subject", EXTRACTOR_SUBJECT },
+  { "dc:keywords", EXTRACTOR_KEYWORDS },
+  { "dc:last-printed", EXTRACTOR_LAST_PRINTED },
+  { "dc:description", EXTRACTOR_DESCRIPTION },
+  { "meta:creation-date", EXTRACTOR_CREATION_DATE },
+  /* { "meta:editing-duration", EXTRACTOR_TOTAL_EDITING_TIME }, // encoding? */
+  { "meta:generator", EXTRACTOR_GENERATOR }, 
+  { "meta:template", EXTRACTOR_TEMPLATE },
+  /* { "meta:editing-cycles", EXTRACTOR_EDITING_CYCLES }, // usually "FALSE" */
+  /* { "msole:codepage", EXTRACTOR_CHARACTER_SET }, */
   { NULL, 0 },
 };
 
-
-static char const *
-msole_prop_id_to_gsf (GsfMSOleMetaDataSection *section, guint32 id)
-{
-  char const *res = NULL;
-  GsfMSOleMetaDataPropMap const *map = NULL;
-  unsigned i = 0;
-
-  if (section->dict != NULL) {
-    if (id & 0x1000000) {
-      id &= ~0x1000000;
-      d (printf ("LINKED "););
-    }
-
-    res = g_hash_table_lookup (section->dict, GINT_TO_POINTER (id));
-
-    if (res != NULL) {
-      d (printf (res););
-      return res;
-    }
-  }
-
-  if (section->type == GSF_MSOLE_META_DATA_COMPONENT) {
-    map = component_props;
-    i = G_N_ELEMENTS (component_props);
-  } else if (section->type == GSF_MSOLE_META_DATA_DOCUMENT) {
-    map = document_props;
-    i = G_N_ELEMENTS (document_props);
-  }
-  while (i-- > 0)
-    if (map[i].id == id) {
-      d (printf (map[i].name););
-      return map[i].name;
-    }
-
-  map = common_props;
-  i = G_N_ELEMENTS (common_props);
-  while (i-- > 0)
-    if (map[i].id == id) {
-      d (printf (map[i].name););
-      return map[i].name;
-    }
-
-  d (printf ("_UNKNOWN_(0x%x %d)", id, id););
-
-  return NULL;
-}
-
-static GValue *
-msole_prop_parse(GsfMSOleMetaDataSection *section,
-		 guint32 type,
-		 guint8 const **data,
-		 guint8 const *data_end)
-{
-  GValue *res;
-  char *str;
-  guint32 len;
-  gboolean const is_vector = type & LE_VT_VECTOR;
-  GError * error;
-
-  g_return_val_if_fail (!(type & (unsigned)(~0x1fff)), NULL); /* not valid in a prop set */
-
-  type &= 0xfff;
-
-  if (is_vector) {
-    unsigned i, n;
-
-    g_return_val_if_fail (*data + 4 <= data_end, NULL);
-
-    n = GSF_LE_GET_GUINT32 (*data);
-    *data += 4;
-
-    d (printf (" array with %d elem\n", n););
-    for (i = 0 ; i < n ; i++) {
-      GValue *v;
-      d (printf ("\t[%d] ", i););
-      v = msole_prop_parse (section, type, data, data_end);
-      if (v) {
-	/* FIXME: do something with it.  */
-	if (G_IS_VALUE (v))
-	  g_value_unset (v);
-	g_free (v);
-      }
-    }
-    return NULL;
-  }
-
-  res = g_new0 (GValue, 1);
-  switch (type) {
-  case LE_VT_EMPTY :		 d (puts ("VT_EMPTY"););
-    /* value::unset == empty */
-    break;
-
-  case LE_VT_NULL :		 d (puts ("VT_NULL"););
-    /* value::unset == null too :-) do we need to distinguish ? */
-    break;
-
-  case LE_VT_I2 :		 d (puts ("VT_I2"););
-    g_return_val_if_fail (*data + 2 <= data_end, NULL);
-    g_value_init (res, G_TYPE_INT);
-    g_value_set_int	(res, GSF_LE_GET_GINT16 (*data));
-    *data += 2;
-    break;
-
-  case LE_VT_I4 :		 d (puts ("VT_I4"););
-    g_return_val_if_fail (*data + 4 <= data_end, NULL);
-    g_value_init (res, G_TYPE_INT);
-    g_value_set_int	(res, GSF_LE_GET_GINT32 (*data));
-    *data += 4;
-    break;
-
-  case LE_VT_R4 :		 d (puts ("VT_R4"););
-    g_return_val_if_fail (*data + 4 <= data_end, NULL);
-    g_value_init (res, G_TYPE_FLOAT);
-    g_value_set_float (res, GSF_LE_GET_FLOAT (*data));
-    *data += 4;
-    break;
-
-  case LE_VT_R8 :		 d (puts ("VT_R8"););
-    g_return_val_if_fail (*data + 8 <= data_end, NULL);
-    g_value_init (res, G_TYPE_DOUBLE);
-    g_value_set_double (res, GSF_LE_GET_DOUBLE (*data));
-    *data += 8;
-    break;
-
-  case LE_VT_CY :		 d (puts ("VT_CY"););
-    /* 8-byte two's complement integer (scaled by 10,000) */
-    /* CHEAT : just store as an int64 for now */
-    g_return_val_if_fail (*data + 8 <= data_end, NULL);
-    g_value_init (res, G_TYPE_INT64);
-    g_value_set_int64 (res, GSF_LE_GET_GINT64 (*data));
-    break;
-
-  case LE_VT_DATE :		 d (puts ("VT_DATE"););
-    break;
-
-  case LE_VT_BSTR :		 d (puts ("VT_BSTR"););
-    break;
-
-  case LE_VT_DISPATCH :	 d (puts ("VT_DISPATCH"););
-    break;
-
-  case LE_VT_BOOL :		 d (puts ("VT_BOOL"););
-    g_return_val_if_fail (*data + 1 <= data_end, NULL);
-    g_value_init (res, G_TYPE_BOOLEAN);
-    g_value_set_boolean (res, **data ? TRUE : FALSE);
-    *data += 1;
-    break;
-
-  case LE_VT_VARIANT :	 d (printf ("VT_VARIANT containing a "););
-    g_free (res);
-    type = GSF_LE_GET_GUINT32 (*data);
-    *data += 4;
-    return msole_prop_parse (section, type, data, data_end);
-
-  case LE_VT_UI1 :		 d (puts ("VT_UI1"););
-    g_return_val_if_fail (*data + 1 <= data_end, NULL);
-    g_value_init (res, G_TYPE_UCHAR);
-    g_value_set_uchar (res, (guchar)(**data));
-    *data += 1;
-    break;
-
-  case LE_VT_UI2 :		 d (puts ("VT_UI2"););
-    g_return_val_if_fail (*data + 2 <= data_end, NULL);
-    g_value_init (res, G_TYPE_UINT);
-    g_value_set_uint (res, GSF_LE_GET_GUINT16 (*data));
-    *data += 2;
-    break;
-
-  case LE_VT_UI4 :		 d (puts ("VT_UI4"););
-    g_return_val_if_fail (*data + 4 <= data_end, NULL);
-    g_value_init (res, G_TYPE_UINT);
-    *data += 4;
-    d (printf ("%u\n", GSF_LE_GET_GUINT32 (*data)););
-    break;
-
-  case LE_VT_I8 :		 d (puts ("VT_I8"););
-    g_return_val_if_fail (*data + 8 <= data_end, NULL);
-    g_value_init (res, G_TYPE_INT64);
-    g_value_set_int64 (res, GSF_LE_GET_GINT64 (*data));
-     *data += 8;
-    break;
-
-  case LE_VT_UI8 :		 d (puts ("VT_UI8"););
-    g_return_val_if_fail (*data + 8 <= data_end, NULL);
-    g_value_init (res, G_TYPE_UINT64);
-    g_value_set_uint64 (res, GSF_LE_GET_GUINT64 (*data));
-    *data += 8;
-    break;
-
-  case LE_VT_LPSTR :		 d (puts ("VT_LPSTR"););
-    /*
-     * This is the representation of many strings.  It is stored in
-     * the same representation as VT_BSTR.  Note that the serialized
-     * representation of VP_LPSTR has a preceding byte count, whereas
-     * the in-memory representation does not.
-     */
-    /* be anal and safe */
-    g_return_val_if_fail (*data + 4 <= data_end, NULL);
-    
-    len = GSF_LE_GET_GUINT32 (*data);
-    
-    g_return_val_if_fail (len < 0x10000, NULL);
-    g_return_val_if_fail (*data + 4 + len*section->char_size <= data_end, NULL);
-    
-    error = NULL;
-    d (gsf_mem_dump (*data + 4, len * section->char_size););
-    str = g_convert_with_iconv ((char*) *data + 4,
-				len * section->char_size,
-				section->iconv_handle, NULL, NULL, &error);
-    
-    g_value_init (res, G_TYPE_STRING);
-    if (NULL != str) {
-      g_value_set_string (res, str);
-      g_free (str);
-    } else if (NULL != error) {
-      g_warning ("error: %s", error->message);
-      g_error_free (error);
-    } else {
-      // g_warning ("unknown error converting string property, using blank");
-    }
-    *data += 4 + len * section->char_size;
-    break;
-
-  case LE_VT_LPWSTR : d (puts ("VT_LPWSTR"););
-    /*
-     * A counted and null-terminated Unicode string; a DWORD character
-     * count (where the count includes the terminating null) followed
-     * by that many Unicode (16-bit) characters.  Note that the count
-     * is character count, not byte count.
-     */
-    /* be anal and safe */
-    g_return_val_if_fail (*data + 4 <= data_end, NULL);
-    
-    len = GSF_LE_GET_GUINT32 (*data);
-    
-    g_return_val_if_fail (len < 0x10000, NULL);
-    g_return_val_if_fail (*data + 4 + len <= data_end, NULL);
-    
-    error = NULL;
-    d (gsf_mem_dump (*data + 4, len*2););
-    str = g_convert ((char*) *data + 4, 
-		     len*2,
-		     "UTF-8", 
-		     "UTF-16LE",
-		     NULL, 
-		     NULL, 
-		     &error);
-    
-    g_value_init (res, G_TYPE_STRING);
-    if (NULL != str) {
-      g_value_set_string (res, str);
-      g_free (str);
-    } else if (NULL != error) {
-      g_warning ("error: %s", error->message);
-      g_error_free (error);
-    } else {
-      g_warning ("unknown error converting string property, using blank");
-    }
-    *data += 4 + len*2;
-    break;
-
-  case LE_VT_FILETIME :	 d (puts ("VT_FILETIME"););
-
-    g_return_val_if_fail (*data + 8 <= data_end, NULL);
-
-    g_value_init (res, G_TYPE_STRING);
-    {
-      /* ft * 100ns since Jan 1 1601 */
-      guint64 ft = GSF_LE_GET_GUINT64 (*data);
-
-      ft /= 10000000; /* convert to seconds */
-#ifdef _MSC_VER
-      ft -= 11644473600i64; /* move to Jan 1 1970 */
-#else
-      ft -= 11644473600ULL; /* move to Jan 1 1970 */
-#endif
-
-      str = g_strdup(ctime((time_t*)&ft));
-
-      g_value_set_string (res, str);
-
-      *data += 8;
-      break;
-    }
-  case LE_VT_BLOB :		 d (puts ("VT_BLOB"););
-    g_free (res);
-    res = NULL;
-    break;
-  case LE_VT_STREAM :	 d (puts ("VT_STREAM"););
-    g_free (res);
-    res = NULL;
-     break;
-  case LE_VT_STORAGE :	 d (puts ("VT_STORAGE"););
-    g_free (res);
-    res = NULL;
-    break;
-  case LE_VT_STREAMED_OBJECT: d (puts ("VT_STREAMED_OBJECT"););
-    g_free (res);
-    res = NULL;
-    break;
-  case LE_VT_STORED_OBJECT :	 d (puts ("VT_STORED_OBJECT"););
-    g_free (res);
-    res = NULL;
-    break;
-  case LE_VT_BLOB_OBJECT :	 d (puts ("VT_BLOB_OBJECT"););
-    g_free (res);
-    res = NULL;
-    break;
-  case LE_VT_CF :		 d (puts ("VT_CF"););
-    break;
-  case LE_VT_CLSID :		 d (puts ("VT_CLSID"););
-    *data += 16;
-    g_free (res);
-    res = NULL;
-    break;
-
-  case LE_VT_ERROR :
-  case LE_VT_UNKNOWN :
-  case LE_VT_DECIMAL :
-  case LE_VT_I1 :
-  case LE_VT_INT :
-  case LE_VT_UINT :
-  case LE_VT_VOID :
-  case LE_VT_HRESULT :
-  case LE_VT_PTR :
-  case LE_VT_SAFEARRAY :
-  case LE_VT_CARRAY :
-  case LE_VT_USERDEFINED :
-    warning ("type %d (0x%x) is not permitted in property sets",
-	       type, type);
-    g_free (res);
-    res = NULL;
-    break;
-
-  default :
-    warning ("Unknown property type %d (0x%x)", type, type);
-    g_free (res);
-    res = NULL;
-  };
-
-  d ( if (res != NULL && G_IS_VALUE (res)) {
-    char *val = g_strdup_value_contents (res);
-    d(printf ("%s\n", val););
-    g_free (val);
-  } else
-      puts ("<unparsed>\n");
-      );
-  return res;
-}
-
-static GValue *
-msole_prop_read (struct GsfInput *in,
-		 GsfMSOleMetaDataSection *section,
-		 GsfMSOleMetaDataProp    *props,
-		 unsigned i)
-{
-  guint32 type;
-  guint8 const *data;
-  /* TODO : why size-4 ? I must be missing something */
-  off_t size = ((i+1) >= section->num_props)
-    ? section->size-4 : props[i+1].offset;
-  char const *prop_name;
-
-  g_return_val_if_fail (i < section->num_props, NULL);
-  g_return_val_if_fail (size >= props[i].offset + 4, NULL);
-
-  size -= props[i].offset; /* includes the type id */
-  if (gsf_input_seek (in, section->offset+props[i].offset, SEEK_SET) ||
-      NULL == (data = gsf_input_read (in, size, NULL))) {
-    warning ("failed to read prop #%d", i);
-    return NULL;
-  }
-
-  type = GSF_LE_GET_GUINT32 (data);
-  data += 4;
-
-  /* dictionary is magic */
-  if (props[i].id == 0) {
-    guint32 len, id, i, n;
-    gsize gslen;
-    char *name;
-    guint8 const *start = data;
-
-    g_return_val_if_fail (section->dict == NULL, NULL);
-
-    section->dict = g_hash_table_new_full (
-					   g_direct_hash, g_direct_equal,
-					   NULL, g_free);
-
-    n = type;
-    for (i = 0 ; i < n ; i++) {
-      id = GSF_LE_GET_GUINT32 (data);
-      len = GSF_LE_GET_GUINT32 (data + 4);
-
-      g_return_val_if_fail (len < 0x10000, NULL);
-
-      gslen = 0;
-      name = g_convert_with_iconv ((char*) data + 8,
-				   len * section->char_size,
-				   section->iconv_handle, &gslen, NULL, NULL);
-
-      len = (guint32)gslen;
-      data += 8 + len;
-
-      d (printf ("\t%u == %s\n", id, name););
-      g_hash_table_replace (section->dict,
-			    GINT_TO_POINTER (id), name);
-
-      /* MS documentation blows goats !
-       * The docs claim there are padding bytes in the dictionary.
-       * Their examples show padding bytes.
-       * In reality non-unicode strings do not see to have padding.
-       */
-      if (section->char_size != 1 && (data - start) % 4)
-	data += 4 - ((data - start) % 4);
-    }
-
-    return NULL;
-  }
-
-  d (printf ("%u) ", i););
-  prop_name = msole_prop_id_to_gsf (section, props[i].id);
-
-  d (printf (" @ %x %x = ", (unsigned)props[i].offset, (unsigned)size););
-  return msole_prop_parse (section, type, &data, data + size);
-}
-
-static int
-msole_prop_cmp (gconstpointer a, gconstpointer b)
-{
-  GsfMSOleMetaDataProp const *prop_a = a ;
-  GsfMSOleMetaDataProp const *prop_b = b ;
-  return prop_a->offset - prop_b->offset;
-}
-
-/**
- * gsf_msole_iconv_open_codepage_for_import :
- * @to:
- * @codepage :
- *
- * Returns an iconv converter for @codepage -> utf8.
- **/
-static GIConv
-gsf_msole_iconv_open_codepage_for_import(char const *to,
-					 int codepage)
-{
-  GIConv iconv_handle;
-
-  g_return_val_if_fail (to != NULL, (GIConv)(-1));
-  /* sometimes it is stored as signed short */
-  if (codepage == 65001 || codepage == -535) {
-    iconv_handle = g_iconv_open (to, "UTF-8");
-    if (iconv_handle != (GIConv)(-1))
-      return iconv_handle;
-  } else if (codepage != 1200 && codepage != 1201) {
-    char* src_charset = g_strdup_printf ("CP%d", codepage);
-    iconv_handle = g_iconv_open (to, src_charset);
-    g_free (src_charset);
-    if (iconv_handle != (GIConv)(-1))
-      return iconv_handle;
+static void processMetadata(gpointer key,
+			    gpointer value,
+			    gpointer user_data) {
+  struct EXTRACTOR_Keywords ** pprev = user_data;
+  const char * type = key;
+  const GsfDocProp * prop = value;
+  const GValue * gval;
+  char * contents;
+  int pos;
+
+  if ( (key == NULL) ||
+       (value == NULL) )
+    return;
+  gval = gsf_doc_prop_get_val(prop);
+  
+  if (G_VALUE_TYPE(gval) == G_TYPE_STRING) {
+    contents = strdup(g_value_get_string(gval));
   } else {
-    char const *from = (codepage == 1200) ? "UTF-16LE" : "UTF-16BE";
-    iconv_handle = g_iconv_open (to, from);
-    if (iconv_handle != (GIConv)(-1))
-      return iconv_handle;
-  }
-
-  /* Try aliases.  */
-  if (codepage == 10000) {
-    /* gnu iconv.  */
-    iconv_handle = g_iconv_open (to, "MACROMAN");
-    if (iconv_handle != (GIConv)(-1))
-      return iconv_handle;
-
-    /* glibc.  */
-    iconv_handle = g_iconv_open (to, "MACINTOSH");
-    if (iconv_handle != (GIConv)(-1))
-      return iconv_handle;
+    /* convert other formats? */
+    contents = g_strdup_value_contents(gval);
   }
-
-  warning ("Unable to open an iconv handle from codepage %d -> %s",
-	     codepage, to);
-  return (GIConv)(-1);
-}
-
-/**
- * gsf_msole_iconv_open_for_import :
- * @codepage :
- *
- * Returns an iconv converter for single byte encodings @codepage -> utf8.
- * 	Attempt to handle the semantics of a specification for multibyte encodings
- * 	since this is only supposed to be used for single bytes.
- **/
-static GIConv
-gsf_msole_iconv_open_for_import (int codepage)
-{
-  return gsf_msole_iconv_open_codepage_for_import ("UTF-8", codepage);
-}
-
-
-
-
-
-static struct EXTRACTOR_Keywords * process(struct GsfInput * in,
-					   struct EXTRACTOR_Keywords * prev) {
-  guint8 const *data = gsf_input_read (in, 28, NULL);
-  guint16 version;
-  guint32 os, num_sections;
-  unsigned i, j;
-  GsfMSOleMetaDataSection *sections;
-  GsfMSOleMetaDataProp *props;
-
-  if (NULL == data)
-    return prev;
-
-  /* NOTE : high word is the os, low word is the os version
-   * 0 = win16
-   * 1 = mac
-   * 2 = win32
-   */
-  os = GSF_LE_GET_GUINT16 (data + 6);
-
-  version = GSF_LE_GET_GUINT16 (data + 2);
-
-  num_sections = GSF_LE_GET_GUINT32 (data + 24);
-  if (GSF_LE_GET_GUINT16 (data + 0) != 0xfffe
-      || (version != 0 && version != 1)
-      || os > 2
-      || num_sections > 100) { /* arbitrary sanity check */
-    return prev;
+  if ( (strlen(contents) > 0) &&
+       (contents[strlen(contents)-1] == '\n') )
+    contents[strlen(contents)-1] = '\0';
+  if (contents == NULL)
+    return;
+  pos = 0;
+  while (tmap[pos].text != NULL) {
+    if (0 == strcmp(tmap[pos].text,
+		    type))
+      break;
+    pos++;
   }
-
-  /* extract the section info */
-  sections = (GsfMSOleMetaDataSection *)g_alloca (sizeof (GsfMSOleMetaDataSection)* num_sections);
-  for (i = 0 ; i < num_sections ; i++) {
-    data = gsf_input_read (in, 20, NULL);
-    if (NULL == data) {
-      return prev;
-    }
-    if (!memcmp (data, component_guid, sizeof (component_guid)))
-      sections [i].type = GSF_MSOLE_META_DATA_COMPONENT;
-    else if (!memcmp (data, document_guid, sizeof (document_guid)))
-      sections [i].type = GSF_MSOLE_META_DATA_DOCUMENT;
-    else if (!memcmp (data, user_guid, sizeof (user_guid)))
-      sections [i].type = GSF_MSOLE_META_DATA_USER;
-    else {
-      sections [i].type = GSF_MSOLE_META_DATA_USER;
-      warning ("Unknown property section type, treating it as USER");
-    }
-
-    sections [i].offset = GSF_LE_GET_GUINT32 (data + 16);
-#ifndef NO_DEBUG_OLE_PROPS
-    d(printf ("0x%x\n", (guint32)sections [i].offset););
+  if (tmap[pos].text != NULL)
+    *pprev = addKeyword(*pprev,
+			contents,
+			tmap[pos].type);
+#if DEBUG_OLE2
+  else 
+    printf("No match for type `%s'\n",
+	   type);
 #endif
-  }
-  for (i = 0 ; i < num_sections ; i++) {
-    if (gsf_input_seek (in, sections[i].offset, SEEK_SET) ||
-	NULL == (data = gsf_input_read (in, 8, NULL))) {
-      return prev;
-    }
-
-    sections[i].iconv_handle = (GIConv)-1;
-    sections[i].char_size    = 1;
-    sections[i].dict      = NULL;
-    sections[i].size      = GSF_LE_GET_GUINT32 (data); /* includes header */
-    sections[i].num_props = GSF_LE_GET_GUINT32 (data + 4);
-    if (sections[i].num_props <= 0)
-      continue;
-    props = g_new (GsfMSOleMetaDataProp, sections[i].num_props);
-    for (j = 0; j < sections[i].num_props; j++) {
-      if (NULL == (data = gsf_input_read (in, 8, NULL))) {
-	g_free (props);
-	return prev;
-      }
-
-      props [j].id = GSF_LE_GET_GUINT32 (data);
-      props [j].offset  = GSF_LE_GET_GUINT32 (data + 4);
-    }
-
-    /* order prop info by offset to facilitate bounds checking */
-    qsort (props, sections[i].num_props,
-	   sizeof (GsfMSOleMetaDataProp),
-	   msole_prop_cmp);
+  free(contents);  
+}
 
-    sections[i].iconv_handle = (GIConv)-1;
-    sections[i].char_size = 1;
-    for (j = 0; j < sections[i].num_props; j++) /* first codepage */
-      if (props[j].id == 1) {
-	GValue *v = msole_prop_read (in, sections+i, props, j);
-	if (v != NULL) {
-	  if (G_IS_VALUE (v)) {
-	    if (G_VALUE_HOLDS_INT (v)) {
-	      int codepage = g_value_get_int (v);
-	      sections[i].iconv_handle = gsf_msole_iconv_open_for_import (codepage);
-	      if (codepage == 1200 || codepage == 1201)
-		sections[i].char_size = 2;
-	    }
-	    g_value_unset (v);
-	  }
-	  g_free (v) ;
-	}
-      }
-    if (sections[i].iconv_handle == (GIConv)-1)
-      sections[i].iconv_handle = gsf_msole_iconv_open_for_import (1252);
 
-    for (j = 0; j < sections[i].num_props; j++) /* then dictionary */
-      if (props[j].id == 0) {
-	GValue *v = msole_prop_read (in, sections+i, props, j);
-	if (v) {
-	  if (G_VALUE_TYPE(v) == G_TYPE_STRING) {
-	    gchar * contents = g_strdup_value_contents(v);
-	    free(contents);
-	  } else {	
-	
-	    /* FIXME: do something with non-strings...  */
-	  }
-	  if (G_IS_VALUE (v))
-	    g_value_unset (v);
-	  g_free (v);
-	}
-      }
-    for (j = 0; j < sections[i].num_props; j++) /* the rest */
-      if (props[j].id > 1) {	
-	GValue *v = msole_prop_read (in, sections+i, props, j);
-	if (v && G_IS_VALUE(v)) {
-	  gchar * contents = NULL;
-	  int pc;
-	  int ipc;
-	
-	  if (G_VALUE_TYPE(v) == G_TYPE_STRING) {
-	    contents = strdup(g_value_get_string(v));
-	  } else {
-	    /* convert other formats? */
-	    contents = g_strdup_value_contents(v);
-	  }	
-	  pc = 0;
-	  if (contents != NULL) {
-	    for (ipc=strlen(contents)-1;ipc>=0;ipc--)
-	      if ( (isprint(contents[ipc])) &&
-		   (! isspace(contents[ipc])) )
-		pc++;
-	    if ( (strlen(contents) > 0) &&
-		 (contents[strlen(contents)-1] == '\n') )
-		 contents[strlen(contents)-1] = '\0';
-	  }
-	  if (pc > 0) {
-	    int pos = 0;
-	    const char * prop
-	      = msole_prop_id_to_gsf(sections+i, props[j].id);
-	    if (prop != NULL) {
-	      while (tmap[pos].text != NULL) {
-		if (0 == strcmp(tmap[pos].text,
-				prop))
-		  break;
-		pos++;
-	      }
-	      if (tmap[pos].text != NULL)
-		prev = addKeyword(prev,
-				  contents,
-				  tmap[pos].type);
-	    }
-	  }
-	  if (contents != NULL)
-	    free(contents);	
-	}
-	if (v) {
-	  if (G_IS_VALUE (v))
-	    g_value_unset (v);
-	  g_free (v);
-	}
-      }
+static struct EXTRACTOR_Keywords * 
+process(GsfInput * in,
+	struct EXTRACTOR_Keywords * prev) {
+  GsfDocMetaData * sections;
+  GError * error;
 
-    gsf_iconv_close (sections[i].iconv_handle);
-    g_free (props);
-    if (sections[i].dict != NULL)
-      g_hash_table_destroy (sections[i].dict);
-  }
-  switch (os) {
-  case 0:
-    prev = addKeyword(prev,
-		      "Win16",
-		      EXTRACTOR_OS);
-    break;
-  case 1:
-    prev = addKeyword(prev,
-		      "MacOS",
-		      EXTRACTOR_OS);
-    break;
-  case 2:
-    prev = addKeyword(prev,
-		      "Win32",
-		      EXTRACTOR_OS);
-    break;
+  sections = gsf_doc_meta_data_new();
+  error = gsf_msole_metadata_read(in, sections);
+  if (error == NULL) {
+    gsf_doc_meta_data_foreach(sections,
+			      &processMetadata,
+			      &prev);
   }
+  g_object_unref(G_OBJECT(sections));
   return prev;
 }
 
-static struct EXTRACTOR_Keywords * processSO(struct GsfInput * src,
-					     struct EXTRACTOR_Keywords * prev) {
+static struct EXTRACTOR_Keywords * 
+processSO(GsfInput * src,
+	  struct EXTRACTOR_Keywords * prev) {
   off_t size;
   char * buf;
 
@@ -2161,61 +247,290 @@ static struct EXTRACTOR_Keywords * processSO(struct GsfInput * src,
   return prev;
 }
 
+/* *************** wordleaker stuff *************** */
+
+#define __(a) dgettext("iso-639", a)
+
+static const char * lidToLanguage( unsigned int lid ) {
+  switch ( lid ) {
+  case 0x0400: 
+    return _("No Proofing");
+  case 0x0401: 
+    return __("Arabic");
+  case 0x0402:
+    return __("Bulgarian");
+  case 0x0403:
+    return __("Catalan");
+  case 0x0404:
+    return _("Traditional Chinese");
+  case 0x0804:
+    return _("Simplified Chinese");
+  case 0x0405:
+    return __("Chechen");
+  case 0x0406:
+    return __("Danish");
+  case 0x0407:
+    return __("German");
+  case 0x0807:
+    return _("Swiss German");
+  case 0x0408:
+    return __("Greek");
+  case 0x0409:
+    return _("U.S. English");
+  case 0x0809:
+    return _("U.K. English");
+  case 0x0c09:
+    return _("Australian English");
+  case 0x040a:
+    return _("Castilian Spanish");
+  case 0x080a:
+    return _("Mexican Spanish");
+  case 0x040b:
+    return __("Finnish");
+  case 0x040c:
+    return __("French");
+  case 0x080c:
+    return _("Belgian French");
+  case 0x0c0c:
+    return _("Canadian French");
+  case 0x100c:
+    return _("Swiss French");
+  case 0x040d:
+    return __("Hebrew");
+  case 0x040e:
+    return __("Hungarian");
+  case 0x040f:
+    return __("Icelandic");
+  case 0x0410:
+    return __("Italian");
+  case 0x0810:
+    return _("Swiss Italian");
+  case 0x0411:
+    return __("Japanese");
+  case 0x0412:
+    return __("Korean");
+  case 0x0413:
+    return __("Dutch");
+  case 0x0813:
+    return _("Belgian Dutch");
+  case 0x0414:
+    return _("Norwegian Bokmal");
+  case 0x0814:
+    return __("Norwegian Nynorsk");
+  case 0x0415:
+    return __("Polish");
+  case 0x0416:
+    return __("Brazilian Portuguese");
+  case 0x0816:
+    return __("Portuguese");
+  case 0x0417:
+    return _("Rhaeto-Romanic");
+  case 0x0418:
+    return __("Romanian");
+  case 0x0419:
+    return __("Russian");
+  case 0x041a:
+    return _("Croato-Serbian (Latin)");
+  case 0x081a:
+    return _("Serbo-Croatian (Cyrillic)");
+  case 0x041b:
+    return __("Slovak");
+  case 0x041c:
+    return __("Albanian");
+  case 0x041d:
+    return __("Swedish");
+  case 0x041e:
+    return __("Thai");
+  case 0x041f:
+    return __("Turkish");
+  case 0x0420:
+    return __("Urdu");
+  case 0x0421:
+    return __("Bahasa"); 
+  case 0x0422:
+    return __("Ukrainian");
+  case 0x0423:
+    return __("Byelorussian");
+  case 0x0424:
+    return __("Slovenian");
+  case 0x0425:
+    return __("Estonian");
+  case 0x0426:
+    return __("Latvian");
+  case 0x0427:
+    return __("Lithuanian");
+  case 0x0429:
+    return _("Farsi");
+  case 0x042D:
+    return __("Basque");
+  case 0x042F:
+    return __("Macedonian");
+  case 0x0436:
+    return __("Afrikaans");
+  case 0x043E:
+    return __("Malayalam");  
+  default:
+    return NULL;
+  }
+}
+
+    
+static struct EXTRACTOR_Keywords * 
+history_extract(GsfInput * stream,
+		unsigned int lcbSttbSavedBy,
+		unsigned int fcSttbSavedBy,
+		struct EXTRACTOR_Keywords * prev) {
+  unsigned int where = 0;  
+  unsigned char * lbuffer;
+  unsigned int i;
+  unsigned int length;
+  char * author;
+  char * filename;
+  char * rbuf;
+  unsigned int nRev;
+      
+  // goto offset of revision
+  gsf_input_seek(stream, fcSttbSavedBy, G_SEEK_SET);
+  if (gsf_input_remaining(stream) < lcbSttbSavedBy)
+    return prev;
+  lbuffer = malloc(lcbSttbSavedBy);
+  // read all the revision history
+  gsf_input_read(stream, lcbSttbSavedBy, lbuffer);
+  // there are n strings, so n/2 revisions (author & file)
+  nRev = (lbuffer[2] + (lbuffer[3] << 8)) / 2;
+  where = 6;
+  for (i=0; i < nRev; i++) {	
+    if (where >= lcbSttbSavedBy)
+      break;
+    length = lbuffer[where++];
+    if ( (where + 2 * length + 2 >= lcbSttbSavedBy) ||
+	 (where + 2 * length + 2 <= where) )
+      break;
+    author = convertToUtf8((const char*) &lbuffer[where],
+			   length * 2,
+			   "UTF-16BE");
+    where += length * 2 + 1;
+    length = lbuffer[where++];
+    if ( (where + 2 * length >= lcbSttbSavedBy) ||
+	 (where + 2 * length + 1 <= where) )
+      break;
+    filename = convertToUtf8((const char*) &lbuffer[where],
+			     length * 2,
+			     "UTF-16BE");	
+    where += length * 2 + 1;
+    rbuf = malloc(strlen(author) + strlen(filename) + 512);
+    snprintf(rbuf, 512 + strlen(author) + strlen(filename),
+	     _("Revision #%u: Author '%s' worked on '%s'"),
+	     i, author, filename);
+    free(author);
+    free(filename);
+    prev = addKeyword(prev,
+		      rbuf,
+		      EXTRACTOR_REVISION_HISTORY);
+    free(rbuf);
+  }
+  free(lbuffer);    
+  return prev;
+}
+
+
+/* ************** main method *********** */
+
 struct EXTRACTOR_Keywords *
 libextractor_ole2_extract(const char * filename,
 			  const char * data,
 			  size_t size,
 			  struct EXTRACTOR_Keywords * prev) {
-  struct GsfInput   *input;
-  struct GsfInfileMSOle * infile;
-  struct GsfInput * src;
+  GsfInput * input;
+  GsfInfile * infile;
+  GsfInput * src;
+  GError * err = NULL;
   const char * name;
-  const char * software = 0;
+  const char * software = NULL;
   int i;
-
-  input = gsf_input_new((const unsigned char*) data,
-			(off_t) size,
-			0);
+  unsigned int lcb;
+  unsigned int fcb;
+  const unsigned char * data512;
+  unsigned int lid;
+  const char * lang;
+
+  if (size < 512 + 898)
+    return prev; /* can hardly be OLE2 */
+  input = gsf_input_memory_new((const guint8 *) data,
+			       (gsf_off_t) size,
+			       FALSE);
   if (input == NULL)
     return prev;
 
-  infile = gsf_infile_msole_new(input);
-  if (infile == NULL)
+  infile = gsf_infile_msole_new(input, &err);
+  if (infile == NULL) {
+    g_object_unref(G_OBJECT(input));
     return prev;
-
-  for (i=0;i<gsf_infile_msole_num_children(infile);i++) {
-    name = gsf_infile_msole_name_by_index (infile, i);
+  }
+  lcb = 0;
+  fcb = 0;
+  for (i=0;i<gsf_infile_num_children(infile);i++) {
+    name = gsf_infile_name_by_index (infile, i);
     src = NULL;
     if (name == NULL)
       continue;
     if ( (0 == strcmp(name, "\005SummaryInformation"))
 	 || (0 == strcmp(name, "\005DocumentSummaryInformation")) ) {
-      src = gsf_infile_msole_child_by_index (infile, i);
-      if (src != NULL)
+      src = gsf_infile_child_by_index (infile, i);
+      if (src != NULL) 
 	prev = process(src,
 		       prev);
     }
     if (0 == strcmp(name, "SfxDocumentInfo")) {
-      src = gsf_infile_msole_child_by_index (infile, i);
+      src = gsf_infile_child_by_index (infile, i);
       if (src != NULL)
 	prev = processSO(src,
 			 prev);
     }
     if (src != NULL)
-      gsf_input_finalize(src);
+      g_object_unref(G_OBJECT(src));
   }
-  gsf_infile_msole_finalize(infile);
+
+  data512 = (const unsigned char*) &data[512];
+  lid = data512[6] + (data512[7] << 8);
+  lcb = data512[726] + (data512[727] << 8) + (data512[728] << 16) + (data512[729] << 24);
+  fcb = data512[722] + (data512[723] << 8) + (data512[724] << 16) + (data512[725] << 24);
+  lang = lidToLanguage(lid);
+  if (lang != NULL) {
+    prev = addKeyword(prev,
+		      lang,
+		      EXTRACTOR_LANGUAGE);
+  }
+  if (lcb >= 6) {
+    for (i=0;i<gsf_infile_num_children(infile);i++) {
+      name = gsf_infile_name_by_index (infile, i);
+      if (name == NULL)
+	continue;
+      if ( (0 == strcmp(name, "1Table")) ||
+	   (0 == strcmp(name, "0Table")) ) {
+	src = gsf_infile_child_by_index (infile, i);
+	if (src != NULL) {
+	  prev = history_extract(src,
+				 lcb,
+				 fcb,
+				 prev);
+	  g_object_unref(G_OBJECT(src));
+	}
+      }
+    }
+  }  
+  g_object_unref(G_OBJECT(infile));
 
   /*
    * Hack to return an appropriate mimetype
    */
   software = EXTRACTOR_extractLast(EXTRACTOR_SOFTWARE, prev);
-  if(NULL == software) {
+  if (NULL == software) {
      /*
       * when very puzzled, just look at file magic number
       */
-    if( (8 < size)
-     && (0 == memcmp(data, "\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1", 8)) )
+    if ( (8 < size)
+	 && (0 == memcmp(data, "\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1", 8)) )
       software = "Microsoft Office";
   }
 
diff --git a/src/plugins/wordleaker/Makefile.am b/src/plugins/wordleaker/Makefile.am
@@ -1,25 +0,0 @@
-include ../Makefile-plugins.am
-
-plugin_LTLIBRARIES = \
- libextractor_word.la
-
-libextractor_word_la_LINK = \
-  /bin/sh ../../../libtool --mode=link $(CXXLD) -o libextractor_word.la
-libextractor_word_la_LDFLAGS = \
-  $(PLUGINFLAGS)  $(retaincommand) \
-  $(XTRA_CPPLIBS)
-libextractor_word_la_LIBADD = \
-  $(top_builddir)/src/main/libextractor.la \
-  $(top_builddir)/src/plugins/libconvert.la \
-  -lm 
-
-libextractor_word_la_SOURCES = \
- pole.h pole.cpp \
- wordleaker.h \
- wordextractor.cc 
-
-# gcc 3.3 produces BROKEN code for -O1 and -O2 (PDF extraction
-# would fail silently) hence we MUST override the user flag here
-# which may contain -O1 or -O2!
-# CXXFLAGS = -O0
-
diff --git a/src/plugins/wordleaker/SYMBOLS b/src/plugins/wordleaker/SYMBOLS
@@ -1 +0,0 @@
-libextractor_word_extract
diff --git a/src/plugins/wordleaker/pole.cpp b/src/plugins/wordleaker/pole.cpp
@@ -1,1271 +0,0 @@
-/* POLE - Portable C++ library to access OLE Storage 
-   Copyright (C) 2002-2004 Ariya Hidayat <ariya@kde.org>
-
-   This library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Library General Public
-   License as published by the Free Software Foundation; either
-   version 2 of the License, or (at your option) any later version.
-   
-   This library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Library General Public License for more details.
-
-   You should have received a copy of the GNU Library General Public License
-   along with this library; see the file COPYING.LIB.  If not, write to
-   the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
-   Boston, MA 02111-1307, US
-*/
-
-#include <fstream>
-#include <iostream>
-#include <list>
-#include <string>
-#include <vector>
-
-#include "pole.h"
-
-namespace POLE
-{
-
-class Header
-{
-  public:
-    unsigned char id[8];       // signature, or magic identifier
-    unsigned b_shift;          // bbat->blockSize = 1 << b_shift
-    unsigned s_shift;          // sbat->blockSize = 1 << s_shift
-    unsigned num_bat;          // blocks allocated for big bat
-    unsigned dirent_start;     // starting block for directory info
-    unsigned threshold;        // switch from small to big file (usually 4K)
-    unsigned sbat_start;       // starting block index to store small bat
-    unsigned num_sbat;         // blocks allocated for small bat
-    unsigned mbat_start;       // starting block to store meta bat
-    unsigned num_mbat;         // blocks allocated for meta bat
-    unsigned long bb_blocks[109];
-    
-    Header();
-    void load( const unsigned char* buffer );
-    void save( unsigned char* buffer );
-    void debug();
-};
-
-class AllocTable
-{
-  public:
-    static const unsigned Eof;
-    static const unsigned Avail;
-    static const unsigned Bat;    
-    unsigned blockSize;
-    AllocTable();
-    void clear();
-    unsigned long count();
-    void resize( unsigned long newsize );
-    void preserve( unsigned long n );
-    void set( unsigned long index, unsigned long val );
-    unsigned unused();
-    void setChain( std::vector<unsigned long> );
-    std::vector<unsigned long> follow( unsigned long start );
-    unsigned long operator[](unsigned long index );
-    void load( const unsigned char* buffer, unsigned len );
-    void save( unsigned char* buffer );
-    unsigned size();
-    void debug();
-  private:
-    std::vector<unsigned long> data;
-    AllocTable( const AllocTable& );
-    AllocTable& operator=( const AllocTable& );
-};
-
-class DirEntry
-{
-  public:
-    std::string name;
-    bool dir;              // true if directory   
-    unsigned long size;    // size (not valid if directory)
-    unsigned long start;   // starting block
-    unsigned prev;         // previous sibling
-    unsigned next;         // next sibling
-    unsigned child;        // first child
-};
-
-class DirTree
-{
-  public:
-    static const unsigned End;
-    DirTree();
-    void clear();
-    unsigned entryCount();
-    DirEntry* entry( unsigned index );
-    DirEntry* entry( const std::string& name, bool create=false );
-    int indexOf( DirEntry* e );
-    int parent( unsigned index );
-    std::string fullName( unsigned index );
-    std::vector<unsigned> children( unsigned index );
-    std::vector<DirEntry*> listDirectory();
-    bool enterDirectory( const std::string& dir );
-    void leaveDirectory();
-    std::string path();
-    void load( unsigned char* buffer, unsigned len );
-    void save( unsigned char* buffer );
-    unsigned size();
-    void debug();
-  private:
-    unsigned current;
-    std::vector<DirEntry> entries;
-    DirTree( const DirTree& );
-    DirTree& operator=( const DirTree& );
-};
-
-class StorageIO
-{
-  public:
-    Storage* storage;
-    std::string filename;
-    std::fstream file;
-    int result;               // result of operation
-    bool opened;              // true if file is opened
-    unsigned long filesize;   // size of the file
-    
-    Header* header;           // storage header 
-    DirTree* dirtree;         // directory tree
-    AllocTable* bbat;         // allocation table for big blocks
-    AllocTable* sbat;         // allocation table for small blocks
-    
-    std::vector<unsigned long> sb_blocks; // blocks for "small" files
-       
-    std::list<Stream*> streams;
-
-    StorageIO( Storage* storage, const char* filename );
-    ~StorageIO();
-    
-    bool open();
-    void close();
-    void flush();
-    void load();
-    void create();
-
-    unsigned long loadBigBlocks( std::vector<unsigned long> blocks, unsigned char* buffer, unsigned long maxlen );
-
-    unsigned long loadBigBlock( unsigned long block, unsigned char* buffer, unsigned long maxlen );
-
-    unsigned long loadSmallBlocks( std::vector<unsigned long> blocks, unsigned char* buffer, unsigned long maxlen );
-
-    unsigned long loadSmallBlock( unsigned long block, unsigned char* buffer, unsigned long maxlen );
-
-  private:  
-    // no copy or assign
-    StorageIO( const StorageIO& );
-    StorageIO& operator=( const StorageIO& );
-
-};
-
-class StreamImpl
-{
-  public:
-    StreamImpl( StorageIO* io, DirEntry* entry );
-    ~StreamImpl();
-    unsigned long size();
-    void seek( unsigned long pos );
-    unsigned long tell();
-    int getch();
-    unsigned long read( unsigned char* data, unsigned long maxlen );
-    unsigned long read( unsigned long pos, unsigned char* data, unsigned long maxlen );
-
-    StorageIO* io;
-    DirEntry* entry;
-
-  private:
-    std::vector<unsigned long> blocks;
-
-    // no copy or assign
-    StreamImpl( const StreamImpl& );
-    StreamImpl& operator=( const StreamImpl& );
-
-    // pointer for read
-    unsigned long m_pos;
-
-    // simple cache system to speed-up getch()
-    unsigned char* cache_data;
-    unsigned long cache_size;
-    unsigned long cache_pos;
-    void updateCache();
-};
-
-}; // namespace POLE
-
-using namespace POLE;
-
-static inline unsigned long readU16( const unsigned char* ptr )
-{
-  return ptr[0]+(ptr[1]<<8);
-}
-
-static inline unsigned long readU32( const unsigned char* ptr )
-{
-  return ptr[0]+(ptr[1]<<8)+(ptr[2]<<16)+(ptr[3]<<24);
-}
-
-static inline void writeU16( unsigned char* ptr, unsigned long data )
-{
-  ptr[0] = data & 0xff;
-  ptr[1] = (data >> 8) & 0xff;
-}
-
-static inline void writeU32( unsigned char* ptr, unsigned long data )
-{
-  ptr[0] = data & 0xff;
-  ptr[1] = (data >> 8) & 0xff;
-  ptr[2] = (data >> 16) & 0xff;
-  ptr[3] = (data >> 24) & 0xff;
-}
-
-static const unsigned char pole_magic[] = 
- { 0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1 };
-
-// =========== Header ==========
-
-Header::Header()
-{
-  b_shift = 9;
-  s_shift = 6;
-  num_bat = 0;
-  dirent_start = 0;
-  threshold = 4096;
-  sbat_start = 0;
-  num_sbat = 0;
-  mbat_start = 0;
-  num_mbat = 0;
-
-  for( unsigned i = 0; i < 8; i++ )
-    id[i] = pole_magic[i];  
-  for( unsigned i=0; i<109; i++ )
-    bb_blocks[i] = AllocTable::Avail;
-}
-
-void Header::load( const unsigned char* buffer )
-{
-  b_shift     = readU16( buffer + 0x1e );
-  s_shift     = readU16( buffer + 0x20 );
-  num_bat      = readU32( buffer + 0x2c );
-  dirent_start = readU32( buffer + 0x30 );
-  threshold    = readU32( buffer + 0x38 );
-  sbat_start   = readU32( buffer + 0x3c );
-  num_sbat     = readU32( buffer + 0x40 );
-  mbat_start   = readU32( buffer + 0x44 );
-  num_mbat     = readU32( buffer + 0x48 );
-  
-  for( unsigned i = 0; i < 8; i++ )
-    id[i] = buffer[i];  
-  for( unsigned i=0; i<109; i++ )
-    bb_blocks[i] = readU32( buffer + 0x4C+i*4 );
-}
-
-void Header::save( unsigned char* buffer )
-{
-  memset( buffer, 0, 0x4c );
-  memcpy( buffer, pole_magic, 8 );        // ole signature
-  writeU32( buffer + 8, 0 );              // unknown 
-  writeU32( buffer + 12, 0 );             // unknown
-  writeU32( buffer + 16, 0 );             // unknown
-  writeU16( buffer + 24, 0x003e );        // revision ?
-  writeU16( buffer + 26, 3 );             // version ?
-  writeU16( buffer + 28, 0xfffe );        // unknown
-  writeU16( buffer + 0x1e, b_shift );
-  writeU16( buffer + 0x20, s_shift );
-  writeU32( buffer + 0x2c, num_bat );
-  writeU32( buffer + 0x30, dirent_start );
-  writeU32( buffer + 0x38, threshold );
-  writeU32( buffer + 0x3c, sbat_start );
-  writeU32( buffer + 0x40, num_sbat );
-  writeU32( buffer + 0x44, mbat_start );
-  writeU32( buffer + 0x48, num_mbat );
-  
-  for( unsigned i=0; i<109; i++ )
-    writeU32( buffer + 0x4C+i*4, bb_blocks[i] );
-}
-
-void Header::debug()
-{
-  std::cout << std::endl;
-  std::cout << "b_shift " << b_shift << std::endl;
-  std::cout << "s_shift " << s_shift << std::endl;
-  std::cout << "num_bat " << num_bat << std::endl;
-  std::cout << "dirent_start " << dirent_start << std::endl;
-  std::cout << "threshold " << threshold << std::endl;
-  std::cout << "sbat_start " << sbat_start << std::endl;
-  std::cout << "num_sbat " << num_sbat << std::endl;
-  std::cout << "mbat_start " << mbat_start << std::endl;
-  std::cout << "num_mbat " << num_mbat << std::endl;
-  
-  unsigned s = (num_bat<=109) ? num_bat : 109;
-  std::cout << "bat blocks: ";
-  for( unsigned i = 0; i < s; i++ )
-    std::cout << bb_blocks[i] << " ";
-  std::cout << std::endl;
-}
- 
-// =========== AllocTable ==========
-
-const unsigned AllocTable::Avail = 0xffffffff;
-const unsigned AllocTable::Eof = 0xfffffffe;
-const unsigned AllocTable::Bat = 0xfffffffd;
-
-AllocTable::AllocTable()
-{
-  blockSize = 4096;
-  // initial size
-  resize( 128 );
-}
-
-unsigned long AllocTable::count()
-{
-  return data.size();
-}
-
-void AllocTable::resize( unsigned long newsize )
-{
-  unsigned oldsize = data.size();
-  data.resize( newsize );
-  if( newsize > oldsize )
-    for( unsigned i = oldsize; i<newsize; i++ )
-      data[i] = Avail;
-}
-
-// make sure there're still free blocks
-void AllocTable::preserve( unsigned long n )
-{
-  std::vector<unsigned long> pre;
-  for( unsigned i=0; i < n; i++ )
-    pre.push_back( unused() );
-}
-
-unsigned long AllocTable::operator[]( unsigned long index )
-{
-  unsigned long result;
-  result = data[index];
-  return result;
-}
-
-void AllocTable::set( unsigned long index, unsigned long value )
-{
-  if( index >= count() ) resize( index + 1);
-  data[ index ] = value;
-}
-
-void AllocTable::setChain( std::vector<unsigned long> chain )
-{
-  if( chain.size() )
-  {
-    for( unsigned i=0; i<chain.size()-1; i++ )
-      set( chain[i], chain[i+1] );
-    set( chain[ chain.size()-1 ], AllocTable::Eof );
-  }
-}
-
-// follow 
-std::vector<unsigned long> AllocTable::follow( unsigned long start )
-{
-  std::vector<unsigned long> chain;
-
-  if( start >= count() ) return chain; 
-
-  unsigned long p = start;
-  while( p < count() )
-  {
-    if( p >= (unsigned long)Eof ) break;
-    if( p >= count() ) break;
-    chain.push_back( p );
-    if( data[p] >= count() ) break;
-    p = data[ p ];
-  }
-
-  return chain;
-}
-
-unsigned AllocTable::unused()
-{
-  // find first available block
-  for( unsigned i = 0; i < data.size(); i++ )
-    if( data[i] == Avail )
-      return i;
-  
-  // completely full, so enlarge the table
-  unsigned block = data.size();
-  resize( data.size()+10 );
-  return block;      
-}
-
-void AllocTable::load( const unsigned char* buffer, unsigned len )
-{
-  resize( len / 4 );
-  for( unsigned i = 0; i < count(); i++ )
-    set( i, readU32( buffer + i*4 ) );
-}
-
-// return space required to save this dirtree
-unsigned AllocTable::size()
-{
-  return count() * 4;
-}
-
-void AllocTable::save( unsigned char* buffer )
-{
-  for( unsigned i = 0; i < count(); i++ )
-    writeU32( buffer + i*4, data[i] );
-}
-
-void AllocTable::debug()
-{
-  std::cout << "block size " << data.size() << std::endl;
-  for( unsigned i=0; i< data.size(); i++ )
-  {
-     if( data[i] == Avail ) continue;
-     std::cout << i << ": ";
-     if( data[i] == Eof ) std::cout << "eof";
-     else std::cout << data[i];
-     std::cout << std::endl;
-  }
-}
-
-// =========== DirTree ==========
-
-const unsigned DirTree::End = 0xffffffff;
-
-DirTree::DirTree()
-{
-  current = 0;
-  clear();
-}
-
-void DirTree::clear()
-{
-  // leave only root entry
-  entries.resize( 1 );
-  entries[0].name = "Root Entry";
-  entries[0].dir = true;
-  entries[0].size = 0;
-  entries[0].start = End;
-  entries[0].prev = End;
-  entries[0].next = End;
-  entries[0].child = End;
-  current = 0;
-}
-
-unsigned DirTree::entryCount()
-{
-  return entries.size();
-}
-
-DirEntry* DirTree::entry( unsigned index )
-{
-  if( index >= entryCount() ) return (DirEntry*) 0;
-  return &entries[ index ];
-}
-
-int DirTree::indexOf( DirEntry* e )
-{
-  for( unsigned i = 0; i < entryCount(); i++ )
-    if( entry( i ) == e ) return i;
-    
-  return -1;
-}
-
-int DirTree::parent( unsigned index )
-{
-  // brute-force, basically we iterate for each entries, find its children
-  // and check if one of the children is 'index'
-  for( unsigned j=0; j<entryCount(); j++ )
-  {
-    std::vector<unsigned> chi = children( j );
-    for( unsigned i=0; i<chi.size();i++ )
-      if( chi[i] == index )
-        return j;
-  }
-        
-  return -1;
-}
-
-std::string DirTree::fullName( unsigned index )
-{
-  // don't use root name ("Root Entry"), just give "/"
-  if( index == 0 ) return "/";
-
-  std::string result = entry( index )->name;
-  result.insert( 0,  "/" );
-  int p = parent( index );
-  while( p > 0 )
-  {
-     result.insert( 0, entry( p )->name );
-     result.insert( 0,  "/" );
-     index = p;
-     if( index <= 0 ) break;
-  }
-  return result;
-}
-
-// given a fullname (e.g "/ObjectPool/_1020961869"), find the entry
-// if not found and create is false, return 0
-// if create is true, a new entry is returned
-DirEntry* DirTree::entry( const std::string& name, bool create )
-{
-   if( !name.length() ) return (DirEntry*)0;
-   
-   // quick check for "/" (that's root)
-   if( name == "/" ) return entry( 0 );
-   
-   // split the names, e.g  "/ObjectPool/_1020961869" will become:
-   // "ObjectPool" and "_1020961869" 
-   std::list<std::string> names;
-   std::string::size_type start = 0, end = 0;
-   while( start < name.length() )
-   {
-     end = name.find_first_of( '/', start );
-     if( end == std::string::npos ) end = name.length();
-     names.push_back( name.substr( start, end-start ) );
-     start = end+1;
-   }
-  
-   // start from root when name is absolute
-   // or current directory when name is relative
-   int index = (name[0] == '/' ) ? 0 : current;
-
-   // trace one by one   
-   std::list<std::string>::iterator it; 
-   for( it = names.begin(); it != names.end(); ++it )
-   {
-     // find among the children of index
-     std::vector<unsigned> chi = children( index );
-     unsigned child = 0;
-     for( unsigned i = 0; i < chi.size(); i++ )
-     {
-       DirEntry* ce = entry( chi[i] );
-       if( ce ) if( ce->name == *it )
-         child = chi[i];
-     }
-     
-     // traverse to the child
-     if( child > 0 ) index = child;
-     else
-     {
-       // not found among children
-       if( !create ) return (DirEntry*)0;
-       
-       // create a new entry
-       unsigned parent = index;
-       entries.push_back( DirEntry() );
-       index = entryCount()-1;
-       DirEntry* e = entry( index );
-       e->name = *it;
-       e->dir = false;
-       e->size = 0;
-       e->start = 0;
-       e->child = End;
-       e->prev = End;
-       e->next = entry(parent)->child;
-       entry(parent)->child = index;
-     }
-   }
-
-   return entry( index );
-}
-
-// helper function: recursively find siblings of index
-void dirtree_find_siblings( DirTree* dirtree, std::vector<unsigned>& result, 
-  unsigned index )
-{
-  DirEntry* e = dirtree->entry( index );
-  if( !e ) return;
-
-  // prevent infinite loop  
-  for( unsigned i = 0; i < result.size(); i++ )
-    if( result[i] == index ) return;
-
-  // add myself    
-  result.push_back( index );
-  
-  // visit previous sibling, don't go infinitely
-  unsigned prev = e->prev;
-  if( ( prev > 0 ) && ( prev < dirtree->entryCount() ) )
-  {
-    for( unsigned i = 0; i < result.size(); i++ )
-      if( result[i] == prev ) prev = 0;
-    if( prev ) dirtree_find_siblings( dirtree, result, prev );
-  }
-    
-  // visit next sibling, don't go infinitely
-  unsigned next = e->next;
-  if( ( next > 0 ) && ( next < dirtree->entryCount() ) )
-  {
-    for( unsigned i = 0; i < result.size(); i++ )
-      if( result[i] == next ) next = 0;
-    if( next ) dirtree_find_siblings( dirtree, result, next );
-  }
-}
-
-std::vector<unsigned> DirTree::children( unsigned index )
-{
-  std::vector<unsigned> result;
-  
-  DirEntry* e = entry( index );
-  if( e ) if( e->child < entryCount() )
-    dirtree_find_siblings( this, result, e->child );
-    
-  return result;
-}
-
-std::vector<DirEntry*> DirTree::listDirectory()
-{
-  std::vector<DirEntry*> result;
-  
-  std::vector<unsigned> chi = children( current );
-  for( unsigned i = 0; i < chi.size(); i++ )
-    result.push_back( entry( chi[i] ) );
-  
-  return result;
-}
-
-bool DirTree::enterDirectory( const std::string& dir )
-{
-  DirEntry* e = entry( dir );
-  if( !e ) return false;
-  if( !e->dir ) return false;
-  
-  int index = indexOf( e );
-  if( index < 0 ) return false;
-    
-  current = index;
-  return true;
-}
-
-void DirTree::leaveDirectory()
-{
-  // already at root ?
-  if( current == 0 ) return;
-
-  int p = parent( current );
-  if( p >= 0 ) current = p;
-}
-
-std::string DirTree::path()
-{
-  return fullName( current );
-}
-
-void DirTree::load( unsigned char* buffer, unsigned size )
-{
-  entries.clear();
-  current = 0;
-  
-  for( unsigned i = 0; i < size/128; i++ )
-  {
-    unsigned p = i * 128;
-    
-    // would be < 32 if first char in the name isn't printable
-    unsigned prefix = 32;
-    
-    // parse name of this entry, which stored as Unicode 16-bit
-    std::string name;
-    int name_len = readU16( buffer + 0x40+p );
-    for( int j=0; ( buffer[j+p]) && (j<name_len); j+= 2 )
-      name.append( 1, buffer[j+p] );
-      
-    // first char isn't printable ? remove it...
-    if( buffer[p] < 32 )
-    { 
-      prefix = buffer[0]; 
-      name.erase( 0,1 ); 
-    }
-
-    DirEntry e;
-    e.name = name;
-    e.start = readU32( buffer + 0x74+p );
-    e.size = readU32( buffer + 0x78+p );
-    e.prev = readU32( buffer + 0x44+p );
-    e.next = readU32( buffer + 0x48+p );
-    e.child = readU32( buffer + 0x4C+p );
-    e.dir = ( buffer[ 0x42 + p]!=2 );
-    
-    entries.push_back( e );
-  }  
-}
-
-// return space required to save this dirtree
-unsigned DirTree::size()
-{
-  return entryCount() * 128;
-}
-
-void DirTree::save( unsigned char* buffer )
-{
-  memset( buffer, 0, size() );
-  
-  // root is fixed as "Root Entry"
-  DirEntry* root = entry( 0 );
-  std::string name = "Root Entry";
-  for( unsigned j = 0; j < name.length(); j++ )
-    buffer[ j*2 ] = name[j];
-  writeU16( buffer + 0x40, name.length()*2 + 2 );    
-  writeU32( buffer + 0x74, 0xffffffff );
-  writeU32( buffer + 0x78, 0 );
-  writeU32( buffer + 0x44, 0xffffffff );
-  writeU32( buffer + 0x48, 0xffffffff );
-  writeU32( buffer + 0x4c, root->child );
-  buffer[ 0x42 ] = 5;
-  buffer[ 0x43 ] = 1; 
-
-  for( unsigned i = 1; i < entryCount(); i++ )
-  {
-    DirEntry* e = entry( i );
-    if( !e ) continue;
-    if( e->dir )
-    {
-      e->start = 0xffffffff;
-      e->size = 0;
-    }
-    
-    // max length for name is 32 chars
-    std::string name = e->name;
-    if( name.length() > 32 )
-      name.erase( 32, name.length() );
-      
-    // write name as Unicode 16-bit
-    for( unsigned j = 0; j < name.length(); j++ )
-      buffer[ i*128 + j*2 ] = name[j];
-
-    writeU16( buffer + i*128 + 0x40, name.length()*2 + 2 );    
-    writeU32( buffer + i*128 + 0x74, e->start );
-    writeU32( buffer + i*128 + 0x78, e->size );
-    writeU32( buffer + i*128 + 0x44, e->prev );
-    writeU32( buffer + i*128 + 0x48, e->next );
-    writeU32( buffer + i*128 + 0x4c, e->child );
-    buffer[ i*128 + 0x42 ] = e->dir ? 1 : 2;
-    buffer[ i*128 + 0x43 ] = 1; // always black
-  }  
-}
-
-void DirTree::debug()
-{
-  for( unsigned i = 0; i < entryCount(); i++ )
-  {
-    DirEntry* e = entry( i );
-    if( !e ) continue;
-    std::cout << i << ": ";
-    std::cout << e->name << " ";
-    if( e->dir ) std::cout << "(Dir) ";
-    else std::cout << "(File) ";
-    std::cout << e->size << " ";
-    std::cout << "s:" << e->start << " ";
-    std::cout << "(";
-    if( e->child == End ) std::cout << "-"; else std::cout << e->child;
-    std::cout << " ";
-    if( e->prev == End ) std::cout << "-"; else std::cout << e->prev;
-    std::cout << ":";
-    if( e->next == End ) std::cout << "-"; else std::cout << e->next;
-    std::cout << ")";    
-    std::cout << std::endl;
-  }
-}
-
-// =========== StorageIO ==========
-
-StorageIO::StorageIO( Storage* st, const char* fname )
-{
-  storage = st;
-  filename = fname;
-  result = Storage::Ok;
-  opened = false;
-  
-  header = new Header();
-  dirtree = new DirTree();
-  bbat = new AllocTable();
-  sbat = new AllocTable();
-  
-  filesize = 0;
-  bbat->blockSize = 1 << header->b_shift;
-  sbat->blockSize = 1 << header->s_shift;
-}
-
-StorageIO::~StorageIO()
-{
-  if( opened ) close();
-  delete sbat;
-  delete bbat;
-  delete dirtree;
-  delete header;
-}
-
-bool StorageIO::open()
-{
-  // already opened ? close first
-  if( opened ) close();
-  
-  load();
-  
-  return result == Storage::Ok;
-}
-
-void StorageIO::load()
-{
-  unsigned char* buffer = 0;
-  unsigned long buflen = 0;
-  std::vector<unsigned long> blocks;
-  
-  // open the file, check for error
-  result = Storage::OpenFailed;
-  file.open( filename.c_str(), std::ios::binary | std::ios::in );
-  if( !file.good() ) return;
-  
-  // find size of input file
-  file.seekg( 0, std::ios::end );
-  filesize = file.tellg();
-
-  // load header
-  buffer = new unsigned char[512];
-  file.seekg( 0 ); 
-  file.read( (char*)buffer, 512 );
-  header->load( buffer );
-  delete[] buffer;
-
-  // check OLE magic id
-  result = Storage::NotOLE;
-  for( unsigned i=0; i<8; i++ )
-    if( header->id[i] != pole_magic[i] )
-      return;
-  
-  // sanity checks
-  result = Storage::BadOLE;
-  if( header->threshold != 4096 ) return;
-  if( header->num_bat == 0 ) return;
-  if( header->s_shift > header->b_shift ) return;
-  if( header->b_shift <= 6 ) return;
-  if( header->b_shift >=31 ) return;
-  
-  // important block size
-  bbat->blockSize = 1 << header->b_shift;
-  sbat->blockSize = 1 << header->s_shift;
-  
-  // find blocks allocated to store big bat
-  // the first 109 blocks are in header, the rest in meta bat
-  blocks.resize( header->num_bat );
-  for( unsigned i = 0; i < header->num_bat; i++ )
-    if( i < 109 ) blocks[i] = header->bb_blocks[i];
-  if( header->num_bat > 109 )
-  if( header->num_mbat > 0 )
-  {
-    buffer = new unsigned char[ bbat->blockSize ];
-    unsigned k = 109;
-    for( unsigned r = 0; r < header->num_mbat; r++ )
-    {
-      loadBigBlock( header->mbat_start+r, buffer, bbat->blockSize );
-      for( unsigned s=0; s < bbat->blockSize/4; s+=4 )
-        blocks[k++] = readU32( buffer + s );
-      // FIXME check if k > num_bat
-    }    
-    delete[] buffer;
-  }
-  
-  // load big bat
-  buflen = blocks.size()*bbat->blockSize;
-  buffer = new unsigned char[ buflen ];  
-  loadBigBlocks( blocks, buffer, buflen );
-  bbat->load( buffer, buflen );
-  delete[] buffer;
-
-  // load small bat
-  blocks.clear();
-  blocks = bbat->follow( header->sbat_start );
-  buflen = blocks.size()*bbat->blockSize;
-  buffer = new unsigned char[ buflen ];  
-  loadBigBlocks( blocks, buffer, buflen );
-  sbat->load( buffer, buflen );
-  delete[] buffer;
-  
-  // load directory tree
-  blocks = bbat->follow( header->dirent_start );
-  buflen = blocks.size()*bbat->blockSize;
-  buffer = new unsigned char[ buflen ];  
-  loadBigBlocks( blocks, buffer, buflen );
-  sb_blocks = bbat->follow( readU32( buffer + 0x74 ) ); // small files
-  dirtree->load( buffer, buflen );
-
-  // fetch block chain as data for small-files
-  delete[] buffer;
-  
-  // so far so good
-  result = Storage::Ok;
-  opened = true;
-}
-
-void StorageIO::create()
-{
-  // std::cout << "Creating " << filename << std::endl; 
-  
-  file.open( filename.c_str(), std::ios::out|std::ios::binary );
-  if( !file.good() )
-  {
-    std::cerr << "Can't create " << filename << std::endl;
-    result = Storage::OpenFailed;
-    return;
-  }
-  
-  // so far so good
-  opened = true;
-  result = Storage::Ok;
-}
-
-void StorageIO::close()
-{
-  if( !opened ) return;
-  
-  file.close(); 
-  opened = false;
-  
-  std::list<Stream*>::iterator it;
-  for( it = streams.begin(); it != streams.end(); ++it )
-    delete *it;
-}
-
-unsigned long StorageIO::loadBigBlocks( std::vector<unsigned long> blocks,
-  unsigned char* data, unsigned long maxlen )
-{
-  // sentinel
-  if( !data ) return 0;
-  if( !file.good() ) return 0;
-  if( blocks.size() < 1 ) return 0;
-  if( maxlen == 0 ) return 0;
-
-  // read block one by one, seems fast enough
-  unsigned long bytes = 0;
-  for( unsigned long i=0; (i < blocks.size() ) & ( bytes<maxlen ); i++ )
-  {
-    unsigned long block = blocks[i];
-    if( block < 0 ) continue;
-    unsigned long pos =  bbat->blockSize * ( block+1 );
-    unsigned long p = (bbat->blockSize < maxlen-bytes) ? bbat->blockSize : maxlen-bytes;
-    if( pos + p > filesize ) p = filesize - pos;
-    file.seekg( pos );
-    file.read( (char*)data + bytes, p );
-    bytes += p;
-  }
-
-  return bytes;
-}
-
-unsigned long StorageIO::loadBigBlock( unsigned long block,
-  unsigned char* data, unsigned long maxlen )
-{
-  // sentinel
-  if( !data ) return 0;
-  if( !file.good() ) return 0;
-  if( block < 0 ) return 0;
-
-  // wraps call for loadBigBlocks
-  std::vector<unsigned long> blocks;
-  blocks.resize( 1 );
-  blocks[ 0 ] = block;
-
-  return loadBigBlocks( blocks, data, maxlen );
-}
-
-// return number of bytes which has been read
-unsigned long StorageIO::loadSmallBlocks( std::vector<unsigned long> blocks,
-  unsigned char* data, unsigned long maxlen )
-{
-  // sentinel
-  if( !data ) return 0;
-  if( !file.good() ) return 0;
-  if( blocks.size() < 1 ) return 0;
-  if( maxlen == 0 ) return 0;
-
-  // our own local buffer
-  unsigned char buf[ bbat->blockSize ];
-
-  // read small block one by one
-  unsigned long bytes = 0;
-  for( unsigned long i=0; ( i<blocks.size() ) & ( bytes<maxlen ); i++ )
-  {
-    unsigned long block = blocks[i];
-    if( block < 0 ) continue;
-
-    // find where the small-block exactly is
-    unsigned long pos = block * sbat->blockSize;
-    unsigned long bbindex = pos / bbat->blockSize;
-    if( bbindex >= sb_blocks.size() ) break;
-
-    loadBigBlock( sb_blocks[ bbindex ], buf, bbat->blockSize );
-
-    // copy the data
-    unsigned offset = pos % bbat->blockSize;
-    unsigned long p = (maxlen-bytes < bbat->blockSize-offset ) ? maxlen-bytes :  bbat->blockSize-offset;
-    p = (sbat->blockSize<p ) ? sbat->blockSize : p;
-    memcpy( data + bytes, buf + offset, p );
-    bytes += p;
-  }
-
-  return bytes;
-}
-
-unsigned long StorageIO::loadSmallBlock( unsigned long block,
-  unsigned char* data, unsigned long maxlen )
-{
-  // sentinel
-  if( !data ) return 0;
-  if( !file.good() ) return 0;
-  if( block < 0 ) return 0;
-
-  // wraps call for loadSmallBlocks
-  std::vector<unsigned long> blocks;
-  blocks.resize( 1 );
-  blocks.assign( 1, block );
-
-  return loadSmallBlocks( blocks, data, maxlen );
-}
-
-// =========== StreamImpl ==========
-
-StreamImpl::StreamImpl( StorageIO* s, DirEntry* e)
-{
-  io = s;
-  entry = e;
-  m_pos = 0;
-
-  if( entry->size >= io->header->threshold ) 
-    blocks = io->bbat->follow( entry->start );
-  else
-    blocks = io->sbat->follow( entry->start );
-
-  // prepare cache
-  cache_pos = 0;
-  cache_size = 4096; // optimal ?
-  cache_data = new unsigned char[cache_size];
-  updateCache();
-}
-
-// FIXME tell parent we're gone
-StreamImpl::~StreamImpl()
-{
-  delete[] cache_data;  
-}
-
-void StreamImpl::seek( unsigned long pos )
-{
-  m_pos = pos;
-}
-
-unsigned long StreamImpl::tell()
-{
-  return m_pos;
-}
-
-int StreamImpl::getch()
-{
-  // past end-of-file ?
-  if( m_pos > entry->size ) return -1;
-
-  // need to update cache ?
-  if( !cache_size || ( m_pos < cache_pos ) ||
-    ( m_pos >= cache_pos + cache_size ) )
-      updateCache();
-
-  // something bad if we don't get good cache
-  if( !cache_size ) return -1;
-
-  int data = cache_data[m_pos - cache_pos];
-  m_pos++;
-
-  return data;
-}
-
-unsigned long StreamImpl::read( unsigned long pos, unsigned char* data, unsigned long maxlen )
-{
-  // sanity checks
-  if( !data ) return 0;
-  if( maxlen == 0 ) return 0;
-
-  unsigned long totalbytes = 0;
-  
-  if ( entry->size < io->header->threshold )
-  {
-    // small file
-    unsigned long index = pos / io->sbat->blockSize;
-
-    if( index >= blocks.size() ) return 0;
-
-    unsigned char buf[ io->sbat->blockSize ];
-    unsigned long offset = pos % io->sbat->blockSize;
-    while( totalbytes < maxlen )
-    {
-      if( index >= blocks.size() ) break;
-      io->loadSmallBlock( blocks[index], buf, io->bbat->blockSize );
-      unsigned long count = io->sbat->blockSize - offset;
-      if( count > maxlen-totalbytes ) count = maxlen-totalbytes;
-      memcpy( data+totalbytes, buf + offset, count );
-      totalbytes += count;
-      offset = 0;
-      index++;
-    }
-
-  }
-  else
-  {
-    // big file
-    unsigned long index = pos / io->bbat->blockSize;
-    
-    if( index >= blocks.size() ) return 0;
-    
-    unsigned char buf[ io->bbat->blockSize ];
-    unsigned long offset = pos % io->bbat->blockSize;
-    while( totalbytes < maxlen )
-    {
-      if( index >= blocks.size() ) break;
-      io->loadBigBlock( blocks[index], buf, io->bbat->blockSize );
-      unsigned long count = io->bbat->blockSize - offset;
-      if( count > maxlen-totalbytes ) count = maxlen-totalbytes;
-      memcpy( data+totalbytes, buf + offset, count );
-      totalbytes += count;
-      index++;
-      offset = 0;
-    }
-
-  }
-
-  return totalbytes;
-}
-
-unsigned long StreamImpl::read( unsigned char* data, unsigned long maxlen )
-{
-  unsigned long bytes = read( tell(), data, maxlen );
-  m_pos += bytes;
-  return bytes;
-}
-
-void StreamImpl::updateCache()
-{
-  // sanity check
-  if( !cache_data ) return;
-
-  cache_pos = m_pos - ( m_pos % cache_size );
-  unsigned long bytes = cache_size;
-  if( cache_pos + bytes > entry->size ) bytes = entry->size - cache_pos;
-  cache_size = read( cache_pos, cache_data, bytes );
-}
-
-
-// =========== Storage ==========
-
-Storage::Storage( const char* filename )
-{
-  io = new StorageIO( this, filename );
-}
-
-Storage::~Storage()
-{
-  delete io;
-}
-
-int Storage::result()
-{
-  return io->result;
-}
-
-bool Storage::open()
-{
-  return io->open();
-}
-
-void Storage::close()
-{
-  io->close();
-}
-
-// list all files and subdirs in current path
-std::list<std::string> Storage::listDirectory()
-{
-  std::list<std::string> result;
-
-  std::vector<DirEntry*> entries;
-  entries = io->dirtree->listDirectory();
-  for( unsigned i = 0; i < entries.size(); i++ )
-    result.push_back( entries[i]->name );
-  
-  return result;
-}
-
-// enters a sub-directory, returns false if not a directory or not found
-bool Storage::enterDirectory( const std::string& directory )
-{
-  return io->dirtree->enterDirectory( directory );
-}
-
-// goes up one level (like cd ..)
-void Storage::leaveDirectory()
-{
-  return io->dirtree->leaveDirectory();
-}
-
-std::string Storage::path()
-{
-  return io->dirtree->path();
-}
-
-Stream* Storage::stream( const std::string& name )
-{
-  // sanity check
-  if( !name.length() ) return (Stream*)0;
-  if( !io ) return (Stream*)0;
-
-  // make absolute if necesary
-  std::string fullName = name;
-  if( name[0] != '/' ) fullName.insert( 0, path() + "/" );
-  
-  DirEntry* entry = io->dirtree->entry( name );
-  if( !entry ) return (Stream*)0;
-
-  Stream* s = new Stream();
-  s->impl = new StreamImpl( io, entry );
-  io->streams.push_back( s );
-  
-  return s;
-}
-
-
-
-// =========== Stream ==========
-
-Stream::Stream()
-{
-  // just nullify, will be managed later Storage::stream
-  impl = 0;
-}
-
-// FIXME tell parent we're gone
-Stream::~Stream()
-{
-  delete impl;
-}
-
-unsigned long Stream::tell()
-{
-  return impl ? impl->tell() : 0;
-}
-
-void Stream::seek( unsigned long newpos )
-{
-  if( impl ) impl->seek( newpos );
-}
-
-unsigned long Stream::size()
-{
-  return impl ? impl->entry->size : 0;
-}
-
-int Stream::getch()
-{
-  return impl ? impl->getch() : 0;
-}
-
-unsigned long Stream::read( unsigned char* data, unsigned long maxlen )
-{
-  return impl ? impl->read( data, maxlen ) : 0;
-}
-
diff --git a/src/plugins/wordleaker/pole.h b/src/plugins/wordleaker/pole.h
@@ -1,149 +0,0 @@
-/* POLE - Portable C++ library to access OLE Storage 
-   Copyright (C) 2002-2004 Ariya Hidayat <ariya@kde.org>
-
-   This library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Library General Public
-   License as published by the Free Software Foundation; either
-   version 2 of the License, or (at your option) any later version.
-   
-   This library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Library General Public License for more details.
-
-   You should have received a copy of the GNU Library General Public License
-   along with this library; see the file COPYING.LIB.  If not, write to
-   the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
-   Boston, MA 02111-1307, US
-*/
-
-#ifndef POLE_H
-#define POLE_H
-
-#include <string>
-#include <list>
-
-namespace POLE
-{
-
-class StorageIO;
-class Stream;
-class StreamImpl;
-
-class Storage
-{
-  friend class Stream;
-  friend class StreamOut;
-
-public:
-
-  enum { Ok, OpenFailed, NotOLE, BadOLE, UnknownError, 
-    StupidWorkaroundForBrokenCompiler=255 };
-
-  /**
-   * Constructs a storage with name filename.
-   **/
-  Storage( const char* filename );
-
-  /**
-   * Destroys the storage.
-   **/
-  ~Storage();
-  
-  /**
-   * Opens the storage. Returns true if no error occurs.
-   **/
-  bool open();
-
-  /**
-   * Closes the storage.
-   **/
-  void close();
-
-  /**
-   * Returns the error code of last operation.
-   **/
-  int result();
-
-  /**
-   * Returns the current path.
-   **/
-  std::string path();
-
-  /**
-   * Finds all stream and directories in current path.
-   **/
-  std::list<std::string> listDirectory();
-
-  /**
-   * Changes path to directory. Returns true if no error occurs.
-   **/
-  bool enterDirectory( const std::string& directory );
-
-  /**
-   * Goes to one directory up.
-   **/
-  void leaveDirectory();
-
-  /**
-   * Finds and returns a stream with the specified name.
-   **/
-  Stream* stream( const std::string& name );
-  
-private:
-  StorageIO* io;
-  
-  // no copy or assign
-  Storage( const Storage& );
-  Storage& operator=( const Storage& );
-
-};
-
-class Stream
-{
-  friend class Storage;
-  friend class StorageIO;
-  
-public:
-  
-  /**
-   * Returns the stream size.
-   **/
-  unsigned long size();
-
-  /**
-   * Returns the read pointer.
-   **/
-  unsigned long tell();
-
-  /**
-   * Sets the read position.
-   **/
-  void seek( unsigned long pos ); 
-
-  /**
-   * Reads a byte.
-   **/
-  int getch();
-
-  /**
-   * Reads a block of data.
-   **/
-  unsigned long read( unsigned char* data, unsigned long maxlen );
-
-private:
-
-  Stream();
-  ~Stream();
-
-  // no copy or assign
-  Stream( const Stream& );
-  Stream& operator=( const Stream& );
-    
-  StreamImpl* impl;
-};
-
-
-}
-
-#endif // POLE_H
diff --git a/src/plugins/wordleaker/wordextractor.cc b/src/plugins/wordleaker/wordextractor.cc
@@ -1,486 +0,0 @@
-/*
-     This file is part of libextractor.
-     (C) 2006 Vidyut Samanta and Christian Grothoff
-
-     libextractor is free software; you can redistribute it and/or modify
-     it under the terms of the GNU General Public License as published
-     by the Free Software Foundation; either version 2, or (at your
-     option) any later version.
-
-     libextractor is distributed in the hope that it will be useful, but
-     WITHOUT ANY WARRANTY; without even the implied warranty of
-     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-     General Public License for more details.
-
-     You should have received a copy of the GNU General Public License
-     along with libextractor; see the file COPYING.  If not, write to the
-     Free Software Foundation, Inc., 59 Temple Place - Suite 330,
-     Boston, MA 02111-1307, USA.
-
-     This code depends heavily on the wordleaker code and
-     a lot of code was borrowed from wordleaker.cpp. See also
-     the README file in this directory.
- */
-
-#include <math.h>
-#include <time.h>
-
-#include "wordleaker.h"
-#include "pole.h"
-#include "platform.h"
-#include "extractor.h"
-#include "../convert.h"
-
-#define __(a) dgettext("iso-639", a)
-
-extern "C" {
-
-  static EXTRACTOR_KeywordType 
-  SummaryProperties[] = {
-    EXTRACTOR_UNKNOWN,
-    EXTRACTOR_UNKNOWN,
-    EXTRACTOR_TITLE,
-    EXTRACTOR_SUBJECT,
-    EXTRACTOR_AUTHOR,
-    EXTRACTOR_KEYWORDS,
-    EXTRACTOR_COMMENT,
-    EXTRACTOR_TEMPLATE,
-    EXTRACTOR_LAST_SAVED_BY,
-    EXTRACTOR_VERSIONNUMBER,
-    EXTRACTOR_TOTAL_EDITING_TIME,
-    EXTRACTOR_LAST_PRINTED,
-    EXTRACTOR_CREATION_DATE,
-    EXTRACTOR_MODIFICATION_DATE,
-    EXTRACTOR_PAGE_COUNT,
-    EXTRACTOR_WORD_COUNT,
-    EXTRACTOR_CHARACTER_COUNT,
-    EXTRACTOR_THUMBNAILS,
-    EXTRACTOR_SOFTWARE,
-    EXTRACTOR_SECURITY,
-  };
-  
-  static char * xstrndup(const char * s, size_t n){
-    char * d;
-  
-    d = (char *) malloc(n+1);
-    memcpy(d,s,n);
-    d[n]='\0';
-    return d;
-  }
-
-  static struct EXTRACTOR_Keywords * addKeyword(EXTRACTOR_KeywordType type,
-						const char * keyword,
-						struct EXTRACTOR_Keywords * next) {
-    EXTRACTOR_KeywordList * result;
-
-    if (keyword == NULL)
-      return next;
-    result = (EXTRACTOR_KeywordList*) malloc(sizeof(EXTRACTOR_KeywordList));
-    result->next = next;
-    result->keyword = strdup(keyword);
-    result->keywordType = type;
-    return result;
-  }
-
-  static char * dateToString( unsigned long date ) {
-    char f[128];
-    struct tm t;
-    memset(&t, 0, sizeof(struct tm));
-    t.tm_year = 1900 + date % 100;
-    t.tm_mon = date / 100 % 100;
-    t.tm_mday = date / 10000 % 100;
-    if (0 == strftime(f, 128, 
-		      nl_langinfo(D_FMT),
-		      &t))
-      return NULL;
-      
-    return xstrndup(f, 128);
-  }
-  
-  static const char * idToProduct( unsigned int id ) {
-    // TODO: find the rest of ids (and check existing ones!)
-    switch ( id ) {
-    case 0x6954:
-    case 0x656d:
-      return "Word 97 (Windows NT)?";
-    case 0x206d:
-    case 0x696c:
-      return "Word 6 (MS DOS)?";
-    case 0x6A62:
-      return "Word 97";
-    case 0x626A:
-      return "Word 98 (Mac)";
-    default:
-      return NULL;
-    }      
-  }
-
-  static const char * lidToLanguage( unsigned int lid ) {
-    switch ( lid ) {
-    case 0x0400: 
-      return _("No Proofing");
-    case 0x0401: 
-      return __("Arabic");
-    case 0x0402:
-      return __("Bulgarian");
-    case 0x0403:
-      return __("Catalan");
-    case 0x0404:
-      return _("Traditional Chinese");
-    case 0x0804:
-      return _("Simplified Chinese");
-    case 0x0405:
-      return __("Chechen");
-    case 0x0406:
-      return __("Danish");
-    case 0x0407:
-      return __("German");
-    case 0x0807:
-      return _("Swiss German");
-    case 0x0408:
-      return __("Greek");
-    case 0x0409:
-      return _("U.S. English");
-    case 0x0809:
-      return _("U.K. English");
-    case 0x0c09:
-      return _("Australian English");
-    case 0x040a:
-      return _("Castilian Spanish");
-    case 0x080a:
-      return _("Mexican Spanish");
-    case 0x040b:
-      return __("Finnish");
-    case 0x040c:
-      return __("French");
-    case 0x080c:
-      return _("Belgian French");
-    case 0x0c0c:
-      return _("Canadian French");
-    case 0x100c:
-      return _("Swiss French");
-    case 0x040d:
-      return __("Hebrew");
-    case 0x040e:
-      return __("Hungarian");
-    case 0x040f:
-      return __("Icelandic");
-    case 0x0410:
-      return __("Italian");
-    case 0x0810:
-      return _("Swiss Italian");
-    case 0x0411:
-      return __("Japanese");
-    case 0x0412:
-      return __("Korean");
-    case 0x0413:
-      return __("Dutch");
-    case 0x0813:
-      return _("Belgian Dutch");
-    case 0x0414:
-      return _("Norwegian Bokmal");
-    case 0x0814:
-      return __("Norwegian Nynorsk");
-    case 0x0415:
-      return __("Polish");
-    case 0x0416:
-      return __("Brazilian Portuguese");
-    case 0x0816:
-      return __("Portuguese");
-    case 0x0417:
-      return _("Rhaeto-Romanic");
-    case 0x0418:
-      return __("Romanian");
-    case 0x0419:
-      return __("Russian");
-    case 0x041a:
-      return _("Croato-Serbian (Latin)");
-    case 0x081a:
-      return _("Serbo-Croatian (Cyrillic)");
-    case 0x041b:
-      return __("Slovak");
-    case 0x041c:
-      return __("Albanian");
-    case 0x041d:
-      return __("Swedish");
-    case 0x041e:
-      return __("Thai");
-    case 0x041f:
-      return __("Turkish");
-    case 0x0420:
-      return __("Urdu");
-    case 0x0421:
-      return __("Bahasa"); 
-    case 0x0422:
-      return __("Ukrainian");
-    case 0x0423:
-      return __("Byelorussian");
-    case 0x0424:
-      return __("Slovenian");
-    case 0x0425:
-      return __("Estonian");
-    case 0x0426:
-      return __("Latvian");
-    case 0x0427:
-      return __("Lithuanian");
-    case 0x0429:
-      return _("Farsi");
-    case 0x042D:
-      return __("Basque");
-    case 0x042F:
-      return __("Macedonian");
-    case 0x0436:
-      return __("Afrikaans");
-    case 0x043E:
-      return __("Malayalam");  
-    default:
-      return NULL;
-    }
-  }
-
-
- 
-  // read the type of the property and displays its value
-  static char * getProperty( POLE::Stream* stream ) {
-    unsigned char buffer[256];
-    unsigned char c;
-    unsigned long i;
-    unsigned int j;
-    unsigned long t, t1, t2;
-    char *s;
-    
-    unsigned long read = stream->read(buffer, 4);
-    if (read != 4)
-      return NULL;
-    unsigned int type = buffer[0] + (buffer[1] << 8) + (buffer[2] << 16) + (buffer[3] << 24);
-    
-    switch (type) {
-    case 2: // VT_I2
-      read = stream->read(buffer, 2);
-      if (read != 2)
-	return NULL;
-      i = buffer[0] + (buffer[1] << 8);
-      s = (char*) malloc(16);
-      snprintf(s, 16, "%u", i);
-      return s;
-    case 3: // VT_I4
-      read = stream->read(buffer, 4);
-      if (read != 4)
-	return NULL;
-      i = buffer[0] + (buffer[1] << 8) + (buffer[2] << 16) + (buffer[3] << 24);
-      s = (char*) malloc(16);
-      snprintf(s, 16, "%u", i);
-      return s;
-    case 11: // VT_BOOL
-      read = stream->read(buffer, 1);
-      if (read != 1)
-	return NULL;
-      if ((char) buffer[0] == -1)
-	return strdup("true");
-      return strdup("false");
-    case 30: // VT_LPSTR
-      read = stream->read(buffer, 4);
-      if (read != 4)
-	return NULL;
-      i = buffer[0] + (buffer[1] << 8) + (buffer[2] << 16) + (buffer[3] << 24);
-      if ( (i < 0) || (i > 16*1024*1024))
-	return NULL;
-      s = (char*) malloc(i+1);
-      s[i] = '\0';
-      j = 0;
-      while ( ((c = stream->getch()) != 0) && (i > j) )
-	s[j++] = c;
-      if ( (j > 0) && (s[j-1] == '\n') )
-	s[--j] = '\0';
-      if (j != i) {
-	free(s);
-	return NULL;
-      }
-      return s;
-    case 64: // VT_FILETIME
-      read = stream->read(buffer, 8);
-      if (read != 8)
-	return NULL;
-      t1 = buffer[0] + (buffer[1] << 8) + (buffer[2] << 16) + (buffer[3] << 24);
-      t2 = buffer[4]  + (buffer[5] << 8) + (buffer[6] << 16) + (buffer[7] << 24);
-      t = filetime_to_unixtime(t1, t2);
-      char * ret = ctime_r((time_t *) &t, (char*)malloc(32));
-      ret[strlen(ret)-1] = '\0'; /* kill newline */
-      return ret;
-    }
-    return NULL;
-  }
-
-
-  struct EXTRACTOR_Keywords * libextractor_word_extract(const char * filename,
-							const char * data,
-							size_t size,
-							struct EXTRACTOR_Keywords * prev) {
-    char ver[16];
-    char product[128];
-    unsigned char buffer[256];
-      
-    if ( (size < 512 + 898) || (filename == NULL) ) 
-      return prev;
-    if (0 != memcmp(data, "\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1", 8))
-      /* look at file magic number to avoid false positives */
-      return prev;
-
-
-    POLE::Storage* storage = new POLE::Storage(filename);
-    storage->open();
-    if (storage->result() != POLE::Storage::Ok ) {
-      delete storage;
-      return prev;
-    }    
-
-    POLE::Stream * stream = storage->stream( "SummaryInformation" );
-    if (! stream) {
-      delete storage;
-      return prev;
-    }
-
-    // ClassID & Offset
-    stream->seek(28);
-    if (20 != stream->read(buffer, 20)) {
-      delete storage;
-      return prev;
-    }
-
-    // beginning of section
-    unsigned long begin = stream->tell();
-    // skip length of section
-    stream->read(buffer, 4);
-    // number of properties
-    if (4 == stream->read(buffer, 4)) {
-      unsigned int nproperties = buffer[0] + (buffer[1] << 8) + (buffer[2] << 16) + (buffer[3] << 24);
-      // properties
-      for (unsigned int i = 0; i < nproperties; i++) {
-	if (8 != stream->read(buffer, 8))
-	  break;
-	unsigned int propertyID = buffer[0] + (buffer[1] << 8) + (buffer[2] << 16) + (buffer[3] << 24);
-	unsigned int offsetProp = buffer[4] + (buffer[5] << 8) + (buffer[6] << 16) + (buffer[7] << 24);
-	if (propertyID > 1 && propertyID < 20) {	    
-	  unsigned long offsetCur = stream->tell();
-	  stream->seek(offsetProp + begin);
-	  if (propertyID == 10) {
-	    /* FIXME: how is editing time encoded? */
-	  } if (propertyID == 19) {
-	    /* FIXME: how to interpret the security integer? */
-	  } else {
-	    char * prop = getProperty(stream);  
-	    if (prop != NULL) {
-	      prev = addKeyword(SummaryProperties[propertyID],
-				prop,
-				prev);
-	      free(prop);
-	    }
-	  }
-	  stream->seek(offsetCur);
-	}
-      }
-    }   
-
-
-    const unsigned char * data512 = (const unsigned char*) &data[512];
-    unsigned int wIdent = data512[0] + (data512[1] << 8);
-    unsigned int nProduct = data512[4] + (data512[5] << 8);
-    unsigned int lid = data512[6] + (data512[7] << 8);
-    unsigned int envr = data512[18];
-    unsigned int wMagicCreated = data512[34] + (data512[35] << 8);
-    unsigned int wMagicRevised = data512[36] + (data512[37] << 8);
-    unsigned long lProductCreated = data512[68] + (data512[69] << 8) + (data512[70] << 16) + (data512[71] << 24);
-    unsigned long lProductRevised = data512[72] + (data512[73] << 8) + (data512[74] << 16) + (data512[75] << 24);
-    unsigned long fcSttbSavedBy = data512[722] + (data512[723] << 8) + (data512[724] << 16) + (data512[725] << 24);
-    unsigned long lcbSttbSavedBy = data512[726] + (data512[727] << 8) + (data512[728] << 16) + (data512[729] << 24);
-    
-    if (nProduct != 0) {
-      snprintf(ver, 16, "%u", nProduct);
-      prev = addKeyword(EXTRACTOR_PRODUCTVERSION,
-			ver,
-			prev);
-    }
-    const char * lang = lidToLanguage(lid);
-    if (lang != NULL) {
-      prev = addKeyword(EXTRACTOR_LANGUAGE,
-			lang,
-			prev);
-    }
-    const char * prod = idToProduct(wMagicCreated);
-    if (prod != NULL) {
-      char * date = dateToString(lProductCreated);
-      snprintf(product, 128, _("%s (Build %s)"),
-	       prod,
-	       date);
-      free(date);
-      prev = addKeyword(EXTRACTOR_CREATED_BY_SOFTWARE,
-			product,
-			prev);
-    }
-    prod = idToProduct(wMagicRevised);
-    if (prod != NULL) {
-      char * date = dateToString(lProductRevised);
-      snprintf(product, 128, _("%s (Build %s)"),
-	       prod,
-	       date);
-      free(date);
-      prev = addKeyword(EXTRACTOR_MODIFIED_BY_SOFTWARE,
-			product,
-			prev);
-    }
-    
-    
-    unsigned int where = 0;
-    stream = storage->stream("1Table");
-    if (! stream) 
-      stream = storage->stream("0Table");
-    if ( (stream) && (lcbSttbSavedBy >= 6)) {
-      unsigned char * lbuffer = (unsigned char*) malloc(lcbSttbSavedBy);
-      
-      // goto offset of revision
-      stream->seek(fcSttbSavedBy);
-      // read all the revision history
-      if (lcbSttbSavedBy == stream->read(lbuffer, lcbSttbSavedBy)) {      
-	// there are n strings, so n/2 revisions (author & file)
-	unsigned int nRev = (lbuffer[2] + (lbuffer[3] << 8)) / 2;
-	where = 6;
-	for (unsigned int i=0; i < nRev; i++) {	
-	  if (where >= lcbSttbSavedBy)
-	    break;
-	  unsigned int length = lbuffer[where++];
-	  if ( (where + 2 * length + 2 >= lcbSttbSavedBy) ||
-	       (where + 2 * length + 2 <= where) )
-	    break;
-	  char * author = convertToUtf8((const char*) &lbuffer[where],
-					length * 2,
-					"UTF-16BE");
-	  where += length * 2 + 1;
-	  length = lbuffer[where++];
-	  if ( (where + 2 * length >= lcbSttbSavedBy) ||
-	       (where + 2 * length + 1 <= where) )
-	    break;
-	  char * filename = convertToUtf8((const char*) &lbuffer[where],
-					  length * 2,
-					  "UTF-16BE");	
-	  where += length * 2 + 1;
-	  char * rbuf = (char*) malloc(strlen(author) + strlen(filename) + 512);
-	  snprintf(rbuf, 512 + strlen(author) + strlen(filename),
-		   _("Revision #%u: Author '%s' worked on '%s'"),
-		   i, author, filename);
-	  free(author);
-	  free(filename);
-	  prev = addKeyword(EXTRACTOR_REVISION_HISTORY,
-			    rbuf, 
-			    prev);
-	  free(rbuf);
-	}
-      }
-      free(lbuffer);    
-    }
-    delete storage;
-    
-    return prev;
-  }
-
-}
-
diff --git a/src/plugins/wordleaker/wordleaker.cpp b/src/plugins/wordleaker/wordleaker.cpp
@@ -1,308 +0,0 @@
-/* 
-   WordLeaker - Shows information about Word DOC files
-   Copyright (C) 2005 Sacha Fuentes <madelman@iname.com>
-
-   Based on poledump.c
-   Original idea from WordDumper (http://www.computerbytesman.com)
-   Info on Word format: http://www.aozw65.dsl.pipex.com/generator_wword8.htm
-   Info on Word format: http://jakarta.apache.org/poi/hpsf/internals.html
-   
-   This program is free software; you can redistribute it and/or
-   modify it under the terms of the GNU General Public
-   License as published by the Free Software Foundation; either
-   version 2 of the License, or (at your option) any later version.
-   
-   This program is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this library; see the file COPYING.  If not, write to
-   the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
-   Boston, MA 02111-1307, US
-*/
-
-#include <iostream>
-#include <fstream>
-#include <stdlib.h>
-#include <list>
-#include <ctime>
-
-#include "pole.h"
-#include "WordLeaker.h"
-
-unsigned long fcSttbSavedBy;
-unsigned long lcbSttbSavedBy;
-
-
-  
-// read the type of the property and displays its value
-void showProperty( POLE::Stream* stream ) {
-  unsigned long read, type;
-  unsigned char buffer[256];
-  unsigned char c;
-  unsigned long i;
-  unsigned long t, t1, t2;
-  char *s;
-    
-  read = stream->read(buffer, 4);
-  type = buffer[0] + (buffer[1] << 8) + (buffer[2] << 16) + (buffer[3] << 24);
-    
-  switch (type) {
-      case 2: // VT_I2
-        read = stream->read(buffer, 2);
-        i = buffer[0] + (buffer[1] << 8);
-        cout << i << endl;
-        break;
-      case 3: // VT_I4
-        read = stream->read(buffer, 4);
-        i = buffer[0] + (buffer[1] << 8) + (buffer[2] << 16) + (buffer[3] << 24);
-        cout << i << endl;
-        break;
-      case 11: // VT_BOOL
-        read = stream->read(buffer, 1);
-        if ((char) buffer[0] == -1)
-            cout << "true" << endl;
-        else        
-            cout << "false" << endl;
-        break;
-      case 30: // VT_LPSTR
-        read = stream->read(buffer, 4);
-        i = buffer[0] + (buffer[1] << 8) + (buffer[2] << 16) + (buffer[3] << 24);
-        while ((c = stream->getch()) != 0)
-            cout << c;
-        cout << endl;
-        break;
-      case 64: // VT_FILETIME
-        read = stream->read(buffer, 8);
-        t1 = buffer[0] + (buffer[1] << 8) + (buffer[2] << 16) + (buffer[3] << 24);
-        t2 = buffer[4]  + (buffer[5] << 8) + (buffer[6] << 16) + (buffer[7] << 24);
-        t = filetime_to_unixtime(t1, t2);
-        s = ctime((time_t *) &t);
-        cout << s;
-        break;
-      default:
-	cout << "Unknown format " << type << endl;
-  }
-}
-
-// show the revision data (users and files)
-void dumpRevision( POLE::Storage* storage ) {
-  unsigned int nRev;
-  unsigned int where = 0;
-  POLE::Stream* stream;
-    
-  cout << "Revision:" << endl;
-  cout << "---------" << endl << endl;
-
-  // FIXME: should look if using 0Table or 1Table
-  stream = storage->stream( "1Table" );
-  if( !stream ) {
-      cout << "There's no revision information" << endl;
-      return;
-  }
-  
-  unsigned char * buffer = new unsigned char[lcbSttbSavedBy];
-  unsigned char buffer2[1024];
-  unsigned int length;
-  
-  // goto offset of revision
-  stream->seek(fcSttbSavedBy);
-  // read all the revision history
-  stream->read(buffer, lcbSttbSavedBy);
-
-  // there are n strings, so n/2 revisions (author & file)
-  nRev = (buffer[2] + (buffer[3] << 8)) / 2;
-  where = 6;
-  
-  for (unsigned int i=0; i < nRev; i++) {
-    cout << "Rev #" << i << ": Author \"";
-    length = buffer[where++];
-    // it's unicode, for now we only get the low byte
-    for (unsigned int j=0; j < length; j++) {
-        where++;
-        cout << buffer[where];
-        where++;
-    }
-    where++;
-    cout << "\" worked on file \"";
-    length = buffer[where++];
-    // it's unicode, for now we only get the low byte
-    for (unsigned int j=0; j < length; j++) {
-        where++;
-        cout << buffer[where];
-        where++;
-    }
-    where++;
-    cout << "\"" << endl;    
-  }
-  
-  cout << endl;      
-  delete buffer;
-  
-}
-
-// show data from DocumentSummary stream
-void dumpDocumentSummary( POLE::Storage* storage ) {
-  POLE::Stream* stream;
-  unsigned long read, nproperties, propertyID, offsetProp, offsetCur;
-  unsigned long begin;
-    
-  cout << "Document Summary:" << endl;
-  cout << "-----------------" << endl << endl;
-
-  stream = storage->stream( "DocumentSummaryInformation" );
-  if( !stream ) {
-      cout << "There's no document summary information" << endl;
-      return;
-  }
-  
-  unsigned char buffer[256];
-
-  // ClassID & Offset
-  stream->seek(28);
-  stream->read(buffer, 20);
-  // beginning of section
-  begin = stream->tell();
-  // length of section
-  read = stream->read(buffer, 4);
-  // number of properties
-  read = stream->read(buffer, 4);
-  nproperties = buffer[0] + (buffer[1] << 8) + (buffer[2] << 16) + (buffer[3] << 24);
-  // properties
-
-  for (unsigned long i = 0; i < nproperties; i++) {
-    read = stream->read(buffer, 8);
-    propertyID = buffer[0] + (buffer[1] << 8) + (buffer[2] << 16) + (buffer[3] << 24);
-    offsetProp = buffer[4] + (buffer[5] << 8) + (buffer[6] << 16) + (buffer[7] << 24);
-      if (propertyID > 1 && propertyID < 16) {
-        cout << DocumentSummaryProperties[propertyID] << ": ";
-        offsetCur = stream->tell();
-        stream->seek(offsetProp + begin);
-        // read and show the property
-        showProperty(stream);  
-        stream->seek(offsetCur);
-    }
-  }
-
-  cout << endl;      
-}
-
-// show data from Summary stream
-void dumpSummary( POLE::Storage* storage ) {
-  POLE::Stream* stream;
-  unsigned long read, nproperties, propertyID, offsetProp, offsetCur;
-  unsigned long begin;
-    
-  cout << "Summary:" << endl;
-  cout << "--------" << endl << endl;
-
-  stream = storage->stream( "SummaryInformation" );
-  if( !stream ) {
-      cout << "There's no summary information" << endl;
-      return;
-  }
-  
-  unsigned char buffer[256];
-
-  // ClassID & Offset
-  stream->seek(28);
-  stream->read(buffer, 20);
-  // beginning of section
-  begin = stream->tell();
-  // length of section
-  read = stream->read(buffer, 4);
-  // number of properties
-  read = stream->read(buffer, 4);
-  nproperties = buffer[0] + (buffer[1] << 8) + (buffer[2] << 16) + (buffer[3] << 24);
-  // properties
-  for (unsigned long i = 0; i < nproperties; i++) {
-    read = stream->read(buffer, 8);
-    propertyID = buffer[0] + (buffer[1] << 8) + (buffer[2] << 16) + (buffer[3] << 24);
-    offsetProp = buffer[4] + (buffer[5] << 8) + (buffer[6] << 16) + (buffer[7] << 24);
-    if (propertyID > 1 && propertyID < 20) {
-        cout << SummaryProperties[propertyID] << ": ";
-        offsetCur = stream->tell();
-        stream->seek(offsetProp + begin);
-        // read and show the property
-        showProperty(stream);  
-        stream->seek(offsetCur);
-    }
-  }
-
-  cout << endl;      
-}
-
-// reads the header of the file
-bool readFIB( char* filename ) {
-  fstream file;
-    
-   file.open( filename, std::ios::binary | std::ios::in );
-  if( !file.good() ) {
-    cout << "Can't find the file" << endl;
-    return false;
-  }
-  
-  unsigned char * buffer = new unsigned char[898];
-  file.seekg( 512 ); 
-  file.read( (char*)buffer, 898 );
-  file.close();
-  
-  unsigned int wIdent = buffer[0] + (buffer[1] << 8);
-  unsigned int nProduct = buffer[4] + (buffer[5] << 8);
-  unsigned int lid = buffer[6] + (buffer[7] << 8);
-  unsigned int envr = buffer[18];
-  unsigned int wMagicCreated = buffer[34] + (buffer[35] << 8);
-  unsigned int wMagicRevised = buffer[36] + (buffer[37] << 8);
-  unsigned long lProductCreated = buffer[68] + (buffer[69] << 8) + (buffer[70] << 16) + (buffer[71] << 24);
-  unsigned long lProductRevised = buffer[72] + (buffer[73] << 8) + (buffer[74] << 16) + (buffer[75] << 24);
-  fcSttbSavedBy = buffer[722] + (buffer[723] << 8) + (buffer[724] << 16) + (buffer[725] << 24);
-  lcbSttbSavedBy = buffer[726] + (buffer[727] << 8) + (buffer[728] << 16) + (buffer[729] << 24);
-  delete[] buffer; 
-  
-  cout << "File: " << filename << endl;
-  cout << "Product version: " << nProduct << endl;  
-  cout << "Language: " << lidToLanguage(lid) << endl;
-  cout << "Created by: " << idToProduct(wMagicCreated) << " (Build " << dateToString(lProductCreated) << ")" << endl;
-  cout << "Revised by: " << idToProduct(wMagicRevised) << " (Build " << dateToString(lProductRevised) << ")" << endl;
-  cout << endl;
-  
-  return true; 
-    
-}
-
-int main(int argc, char *argv[]) {
-  cout << endl << "WordLeaker v.0.1" << endl;
-  cout << " by Madelman (http://elligre.tk/madelman/)" << endl << endl;
-  
-    
-  if( argc < 2 ) {
-    cout << "  You must supply a filename" << endl << endl;
-    return 0;
-  }
-  
-  char* filename = argv[1];
-
-  if ( !readFIB(filename) )
-      return 1;
-  
-  POLE::Storage* storage = new POLE::Storage( filename );
-  storage->open();
-  if( storage->result() != POLE::Storage::Ok ) {
-    cout << "The file " << filename << " is not a Word document" << endl;
-    return 1;
-  }
-  
-  dumpSummary( storage );
-  // FIXME: doesn't always work
-  // but there's nothing really interesting in here
-  //dumpDocumentSummary( storage );
-  dumpRevision( storage );
-  // TODO: we don't show the GUID
-  // TODO: we don't show the macros
-  
-  delete storage;
-  
-  return 0;
-}
diff --git a/src/plugins/wordleaker/wordleaker.h b/src/plugins/wordleaker/wordleaker.h
@@ -1,124 +0,0 @@
-/* 
-   WordLeaker - Shows information about Word DOC files
-   Copyright (C) 2005 Sacha Fuentes <madelman@iname.com>
-
-   Based on poledump.c
-   Original idea from WordDumper (http://www.computerbytesman.com)
-   Info on Word format: http://www.aozw65.dsl.pipex.com/generator_wword8.htm
-   Info on Word format: http://jakarta.apache.org/poi/hpsf/internals.html
-   
-   This program is free software; you can redistribute it and/or
-   modify it under the terms of the GNU General Public
-   License as published by the Free Software Foundation; either
-   version 2 of the License, or (at your option) any later version.
-   
-   This program is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this library; see the file COPYING.  If not, write to
-   the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
-   Boston, MA 02111-1307, US
-*/
-
-#include <string>
-
-using namespace std;
-
-static char* 
-DocumentSummaryProperties[] = {
-"Dictionary",
-"Code page",
-"Category",
-"PresentationTarget",
-"Bytes",
-"Lines",
-"Paragraphs",
-"Slides",
-"Notes",
-"HiddenSlides",
-"MMClips",
-"ScaleCrop",
-"HeadingPairs",
-"TitlesofParts",
-"Manager",
-"Company",
-"LinksUpTo"
-};
-
-/*
- *  filetime_to_unixtime
- *
- *  Adapted from work in 'wv' by:
- *    Caolan McNamara (Caolan.McNamara@ul.ie)
- */
-#define HIGH32_DELTA 27111902
-#define MID16_DELTA  54590
-#define LOW16_DELTA  32768
-
-unsigned long filetime_to_unixtime (unsigned long low_time, unsigned long high_time) {
-  unsigned long low16;/* 16 bit, low    bits */
-  unsigned long mid16;/* 16 bit, medium bits */
-  unsigned long hi32;/* 32 bit, high   bits */
-  unsigned int carry;/* carry bit for subtraction */
-  int negative;/* whether a represents a negative value */
-
-/* Copy the time values to hi32/mid16/low16 */
-hi32  =  high_time;
-mid16 = low_time >> 16;
-low16 = low_time &  0xffff;
-
-/* Subtract the time difference */
-if (low16 >= LOW16_DELTA           )
-low16 -=             LOW16_DELTA        , carry = 0;
-else
-low16 += (1 << 16) - LOW16_DELTA        , carry = 1;
-
-if (mid16 >= MID16_DELTA    + carry)
-mid16 -=             MID16_DELTA + carry, carry = 0;
-else
-mid16 += (1 << 16) - MID16_DELTA - carry, carry = 1;
-
-hi32 -= HIGH32_DELTA + carry;
-
-/* If a is negative, replace a by (-1-a) */
-negative = (hi32 >= ((unsigned long)1) << 31);
-if (negative) {
-/* Set a to -a - 1 (a is hi32/mid16/low16) */
-low16 = 0xffff - low16;
-mid16 = 0xffff - mid16;
-hi32 = ~hi32;
-}
-
-/*
- *  Divide a by 10000000 (a = hi32/mid16/low16), put the rest into r.
-         * Split the divisor into 10000 * 1000 which are both less than 0xffff.
- */
-mid16 += (hi32 % 10000) << 16;
-hi32  /=       10000;
-low16 += (mid16 % 10000) << 16;
-mid16 /=       10000;
-low16 /=       10000;
-
-mid16 += (hi32 % 1000) << 16;
-hi32  /=       1000;
-low16 += (mid16 % 1000) << 16;
-mid16 /=       1000;
-low16 /=       1000;
-
-/* If a was negative, replace a by (-1-a) and r by (9999999 - r) */
-if (negative) {
-/* Set a to -a - 1 (a is hi32/mid16/low16) */
-low16 = 0xffff - low16;
-mid16 = 0xffff - mid16;
-hi32 = ~hi32;
-}
-
-/*  Do not replace this by << 32, it gives a compiler warning and
- *  it does not work
- */
-return ((((unsigned long)hi32) << 16) << 16) + (mid16 << 16) + low16;
-
-}

	libextractor GNU libextractor
	Log \| Files \| Refs \| Submodules \| README \| LICENSE

M	ChangeLog	\|	4	++++
M	configure.ac	\|	5	+++--
A	m4/abi-gsf.m4	\|	78	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M	src/include/extractor.h	\|	8	++++++++
M	src/main/extractor.c	\|	53	++++++++++++++++++++++++++++++-----------------------
M	src/plugins/Makefile.am	\|	7	+++++--
M	src/plugins/hash/rmd160extractor.c	\|	9	+++++----
M	src/plugins/ole2/Makefile.am	\|	7	+++----
R	src/plugins/wordleaker/README -> src/plugins/ole2/README	\|	0
M	src/plugins/ole2/ole2extractor.c	\|	2401	++++++++++++-------------------------------------------------------------------
D	src/plugins/wordleaker/Makefile.am	\|	25	-------------------------
D	src/plugins/wordleaker/SYMBOLS	\|	1	-
D	src/plugins/wordleaker/pole.cpp	\|	1271	-------------------------------------------------------------------------------
D	src/plugins/wordleaker/pole.h	\|	149	-------------------------------------------------------------------------------
D	src/plugins/wordleaker/wordextractor.cc	\|	486	-------------------------------------------------------------------------------
D	src/plugins/wordleaker/wordleaker.cpp	\|	308	-------------------------------------------------------------------------------
D	src/plugins/wordleaker/wordleaker.h	\|	124	-------------------------------------------------------------------------------