libextractor

GNU libextractor
Log | Files | Refs | Submodules | README | LICENSE

commit 990f29c8825aeb4578b2688cad389bcec96fda2e
parent 378faca310c91893df9083ea4dcfdcd4a5366dc6
Author: Christian Grothoff <christian@grothoff.org>
Date:   Wed, 18 Jan 2006 18:28:13 +0000

stuff

Diffstat:
Asrc/plugins/wordleaker/README | 25+++++++++++++++++++++++++
Asrc/plugins/wordleaker/WordLeaker.cpp | 310+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/plugins/wordleaker/WordLeaker.h | 287+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/plugins/wordleaker/pole.cpp | 1271+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/plugins/wordleaker/pole.h | 149+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
5 files changed, 2042 insertions(+), 0 deletions(-)

diff --git a/src/plugins/wordleaker/README b/src/plugins/wordleaker/README @@ -0,0 +1,25 @@ +WordLeaker v.0.1 (c) 2005 + by Madelman (http://elligre.tk/madelman/) + +Shows information about a Word file. +It can show all the summary and the revision history of the file. + +It should be portable but, for now, it doesn't work in Linux. I haven't had +the time to debug it but I'll do when I can. + +There are a lot of things that don't work yet, if you want to help contact me. + +Copyright and License +===================== + +WordLeaker v.0.1 (c) 2005 by Madelman (madelman@iname.com) + +WordLeaker program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2 of the License, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +PARTICULAR PURPOSE. See the GNU General Public License for more details. + diff --git a/src/plugins/wordleaker/WordLeaker.cpp b/src/plugins/wordleaker/WordLeaker.cpp @@ -0,0 +1,310 @@ +/* + WordLeaker - Shows information about Word DOC files + Copyright (C) 2005 Sacha Fuentes <madelman@iname.com> + + Based on poledump.c + Original idea from WordDumper (http://www.computerbytesman.com) + Info on Word format: http://www.aozw65.dsl.pipex.com/generator_wword8.htm + Info on Word format: http://jakarta.apache.org/poi/hpsf/internals.html + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this library; see the file COPYING. If not, write to + the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, US +*/ + +// TAKE CARE: there's not a single check for validity of data, +// so any malformed or malicious Word file will break it + +#include <iostream> +#include <fstream> +#include <stdlib.h> +#include <list> +#include <ctime> + +#include "pole.h" +#include "WordLeaker.h" + +unsigned long fcSttbSavedBy; +unsigned long lcbSttbSavedBy; + +// read the type of the property and displays its value +void showProperty( POLE::Stream* stream ) { + unsigned long read, type; + unsigned char buffer[256]; + unsigned char c; + unsigned long i; + unsigned long t, t1, t2; + char *s; + + read = stream->read(buffer, 4); + type = buffer[0] + (buffer[1] << 8) + (buffer[2] << 16) + (buffer[3] << 24); + + switch (type) { + case 2: // VT_I2 + read = stream->read(buffer, 2); + i = buffer[0] + (buffer[1] << 8); + cout << i << endl; + break; + case 3: // VT_I4 + read = stream->read(buffer, 4); + i = buffer[0] + (buffer[1] << 8) + (buffer[2] << 16) + (buffer[3] << 24); + cout << i << endl; + break; + case 11: // VT_BOOL + read = stream->read(buffer, 1); + if ((char) buffer[0] == -1) + cout << "true" << endl; + else + cout << "false" << endl; + break; + case 30: // VT_LPSTR + read = stream->read(buffer, 4); + i = buffer[0] + (buffer[1] << 8) + (buffer[2] << 16) + (buffer[3] << 24); + while ((c = stream->getch()) != 0) + cout << c; + cout << endl; + break; + case 64: // VT_FILETIME + read = stream->read(buffer, 8); + t1 = buffer[0] + (buffer[1] << 8) + (buffer[2] << 16) + (buffer[3] << 24); + t2 = buffer[4] + (buffer[5] << 8) + (buffer[6] << 16) + (buffer[7] << 24); + t = filetime_to_unixtime(t1, t2); + s = ctime((time_t *) &t); + cout << s; + break; + default: + cout << "Unknown format " << type << endl; + } +} + +// show the revision data (users and files) +void dumpRevision( POLE::Storage* storage ) { + unsigned int nRev; + unsigned int where = 0; + POLE::Stream* stream; + + cout << "Revision:" << endl; + cout << "---------" << endl << endl; + + // FIXME: should look if using 0Table or 1Table + stream = storage->stream( "1Table" ); + if( !stream ) { + cout << "There's no revision information" << endl; + return; + } + + unsigned char * buffer = new unsigned char[lcbSttbSavedBy]; + unsigned char buffer2[1024]; + unsigned int length; + + // goto offset of revision + stream->seek(fcSttbSavedBy); + // read all the revision history + stream->read(buffer, lcbSttbSavedBy); + + // there are n strings, so n/2 revisions (author & file) + nRev = (buffer[2] + (buffer[3] << 8)) / 2; + where = 6; + + for (unsigned int i=0; i < nRev; i++) { + cout << "Rev #" << i << ": Author \""; + length = buffer[where++]; + // it's unicode, for now we only get the low byte + for (unsigned int j=0; j < length; j++) { + where++; + cout << buffer[where]; + where++; + } + where++; + cout << "\" worked on file \""; + length = buffer[where++]; + // it's unicode, for now we only get the low byte + for (unsigned int j=0; j < length; j++) { + where++; + cout << buffer[where]; + where++; + } + where++; + cout << "\"" << endl; + } + + cout << endl; + delete buffer; + +} + +// show data from DocumentSummary stream +void dumpDocumentSummary( POLE::Storage* storage ) { + POLE::Stream* stream; + unsigned long read, nproperties, propertyID, offsetProp, offsetCur; + unsigned long begin; + + cout << "Document Summary:" << endl; + cout << "-----------------" << endl << endl; + + stream = storage->stream( "DocumentSummaryInformation" ); + if( !stream ) { + cout << "There's no document summary information" << endl; + return; + } + + unsigned char buffer[256]; + + // ClassID & Offset + stream->seek(28); + stream->read(buffer, 20); + // beginning of section + begin = stream->tell(); + // length of section + read = stream->read(buffer, 4); + // number of properties + read = stream->read(buffer, 4); + nproperties = buffer[0] + (buffer[1] << 8) + (buffer[2] << 16) + (buffer[3] << 24); + // properties + + for (unsigned long i = 0; i < nproperties; i++) { + read = stream->read(buffer, 8); + propertyID = buffer[0] + (buffer[1] << 8) + (buffer[2] << 16) + (buffer[3] << 24); + offsetProp = buffer[4] + (buffer[5] << 8) + (buffer[6] << 16) + (buffer[7] << 24); + if (propertyID > 1 && propertyID < 16) { + cout << DocumentSummaryProperties[propertyID] << ": "; + offsetCur = stream->tell(); + stream->seek(offsetProp + begin); + // read and show the property + showProperty(stream); + stream->seek(offsetCur); + } + } + + cout << endl; +} + +// show data from Summary stream +void dumpSummary( POLE::Storage* storage ) { + POLE::Stream* stream; + unsigned long read, nproperties, propertyID, offsetProp, offsetCur; + unsigned long begin; + + cout << "Summary:" << endl; + cout << "--------" << endl << endl; + + stream = storage->stream( "SummaryInformation" ); + if( !stream ) { + cout << "There's no summary information" << endl; + return; + } + + unsigned char buffer[256]; + + // ClassID & Offset + stream->seek(28); + stream->read(buffer, 20); + // beginning of section + begin = stream->tell(); + // length of section + read = stream->read(buffer, 4); + // number of properties + read = stream->read(buffer, 4); + nproperties = buffer[0] + (buffer[1] << 8) + (buffer[2] << 16) + (buffer[3] << 24); + // properties + for (unsigned long i = 0; i < nproperties; i++) { + read = stream->read(buffer, 8); + propertyID = buffer[0] + (buffer[1] << 8) + (buffer[2] << 16) + (buffer[3] << 24); + offsetProp = buffer[4] + (buffer[5] << 8) + (buffer[6] << 16) + (buffer[7] << 24); + if (propertyID > 1 && propertyID < 20) { + cout << SummaryProperties[propertyID] << ": "; + offsetCur = stream->tell(); + stream->seek(offsetProp + begin); + // read and show the property + showProperty(stream); + stream->seek(offsetCur); + } + } + + cout << endl; +} + +// reads the header of the file +bool readFIB( char* filename ) { + fstream file; + + file.open( filename, std::ios::binary | std::ios::in ); + if( !file.good() ) { + cout << "Can't find the file" << endl; + return false; + } + + unsigned char * buffer = new unsigned char[898]; + file.seekg( 512 ); + file.read( (char*)buffer, 898 ); + file.close(); + + unsigned int wIdent = buffer[0] + (buffer[1] << 8); + unsigned int nProduct = buffer[4] + (buffer[5] << 8); + unsigned int lid = buffer[6] + (buffer[7] << 8); + unsigned int envr = buffer[18]; + unsigned int wMagicCreated = buffer[34] + (buffer[35] << 8); + unsigned int wMagicRevised = buffer[36] + (buffer[37] << 8); + unsigned long lProductCreated = buffer[68] + (buffer[69] << 8) + (buffer[70] << 16) + (buffer[71] << 24); + unsigned long lProductRevised = buffer[72] + (buffer[73] << 8) + (buffer[74] << 16) + (buffer[75] << 24); + fcSttbSavedBy = buffer[722] + (buffer[723] << 8) + (buffer[724] << 16) + (buffer[725] << 24); + lcbSttbSavedBy = buffer[726] + (buffer[727] << 8) + (buffer[728] << 16) + (buffer[729] << 24); + delete[] buffer; + + cout << "File: " << filename << endl; + cout << "Product version: " << nProduct << endl; + cout << "Language: " << lidToLanguage(lid) << endl; + cout << "Created by: " << idToProduct(wMagicCreated) << " (Build " << dateToString(lProductCreated) << ")" << endl; + cout << "Revised by: " << idToProduct(wMagicRevised) << " (Build " << dateToString(lProductRevised) << ")" << endl; + cout << endl; + + return true; + +} + +int main(int argc, char *argv[]) { + cout << endl << "WordLeaker v.0.1" << endl; + cout << " by Madelman (http://elligre.tk/madelman/)" << endl << endl; + + + if( argc < 2 ) { + cout << " You must supply a filename" << endl << endl; + return 0; + } + + char* filename = argv[1]; + + if ( !readFIB(filename) ) + return 1; + + POLE::Storage* storage = new POLE::Storage( filename ); + storage->open(); + if( storage->result() != POLE::Storage::Ok ) { + cout << "The file " << filename << " is not a Word document" << endl; + return 1; + } + + dumpSummary( storage ); + // FIXME: doesn't always work + // but there's nothing really interesting in here + //dumpDocumentSummary( storage ); + dumpRevision( storage ); + // TODO: we don't show the GUID + // TODO: we don't show the macros + + delete storage; + + return 0; +} + diff --git a/src/plugins/wordleaker/WordLeaker.h b/src/plugins/wordleaker/WordLeaker.h @@ -0,0 +1,287 @@ +/* + WordLeaker - Shows information about Word DOC files + Copyright (C) 2005 Sacha Fuentes <madelman@iname.com> + + Based on poledump.c + Original idea from WordDumper (http://www.computerbytesman.com) + Info on Word format: http://www.aozw65.dsl.pipex.com/generator_wword8.htm + Info on Word format: http://jakarta.apache.org/poi/hpsf/internals.html + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this library; see the file COPYING. If not, write to + the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, US +*/ + +#include <string> + +using namespace std; + +static char* SummaryProperties[] = { +"Unknown", +"Unknown", +"Title", +"Subject", +"Author", +"Keywords", +"Comments", +"Template", +"Last Saved By", +"Revision Number", +"Total Editing Time", +"Last Printed", +"Create Time/Date", +"Last Saved Time/Date", +"Number of Pages", +"Number of Words", +"Number of Characters", +"Thumbnails", +"Creating Application", +"Security" +}; + +static char* DocumentSummaryProperties[] = { +"Dictionary", +"Code page", +"Category", +"PresentationTarget", +"Bytes", +"Lines", +"Paragraphs", +"Slides", +"Notes", +"HiddenSlides", +"MMClips", +"ScaleCrop", +"HeadingPairs", +"TitlesofParts", +"Manager", +"Company", +"LinksUpTo" +}; + +string dateToString( unsigned long date ) { + char f[9]; + sprintf(f, "%d/%d/%d", (date / 10000 % 100), (date / 100 % 100), (date % 100)); + return f; +} + +string idToProduct( unsigned int id ) { + // TODO: find the rest of ids + switch ( id ) { + case 0x6A62: + return "Word 97"; + case 0x626A: + return "Word 98 (Mac)"; + default: + return "Unknown"; + } +} + +string lidToLanguage( unsigned int lid ) { + switch ( lid ) { + case 0x0400: + return "No Proofing"; + case 0x0401: + return "Arabic"; + case 0x0402: + return "Bulgarian"; + case 0x0403: + return "Catalan"; + case 0x0404: + return "Traditional Chinese"; + case 0x0804: + return "Simplified Chinese"; + case 0x0405: + return "Czech"; + case 0x0406: + return "Danish"; + case 0x0407: + return "German"; + case 0x0807: + return "Swiss German"; + case 0x0408: + return "Greek"; + case 0x0409: + return "U.S. English"; + case 0x0809: + return "U.K. English"; + case 0x0c09: + return "Australian English"; + case 0x040a: + return "Castilian Spanish"; + case 0x080a: + return "Mexican Spanish"; + case 0x040b: + return "Finnish"; + case 0x040c: + return "French"; + case 0x080c: + return "Belgian French"; + case 0x0c0c: + return "Canadian French"; + case 0x100c: + return "Swiss French"; + case 0x040d: + return "Hebrew"; + case 0x040e: + return "Hungarian"; + case 0x040f: + return "Icelandic"; + case 0x0410: + return "Italian"; + case 0x0810: + return "Swiss Italian"; + case 0x0411: + return "Japanese"; + case 0x0412: + return "Korean"; + case 0x0413: + return "Dutch"; + case 0x0813: + return "Belgian Dutch"; + case 0x0414: + return "Norwegian - Bokmal"; + case 0x0814: + return "Norwegian - Nynorsk"; + case 0x0415: + return "Polish"; + case 0x0416: + return "Brazilian Portuguese"; + case 0x0816: + return "Portuguese"; + case 0x0417: + return "Rhaeto-Romanic"; + case 0x0418: + return "Romanian"; + case 0x0419: + return "Russian"; + case 0x041a: + return "Croato-Serbian (Latin)"; + case 0x081a: + return "Serbo-Croatian (Cyrillic)"; + case 0x041b: + return "Slovak"; + case 0x041c: + return "Albanian"; + case 0x041d: + return "Swedish"; + case 0x041e: + return "Thai"; + case 0x041f: + return "Turkish"; + case 0x0420: + return "Urdu"; + case 0x0421: + return "Bahasa"; + case 0x0422: + return "Ukrainian"; + case 0x0423: + return "Byelorussian"; + case 0x0424: + return "Slovenian"; + case 0x0425: + return "Estonian"; + case 0x0426: + return "Latvian"; + case 0x0427: + return "Lithuanian"; + case 0x0429: + return "Farsi"; + case 0x042D: + return "Basque"; + case 0x042F: + return "Macedonian"; + case 0x0436: + return "Afrikaans"; + case 0x043E: + return "Malaysian"; + default: + return "Unknown"; + } +} + +/* + * filetime_to_unixtime + * + * Adapted from work in 'wv' by: + * Caolan McNamara (Caolan.McNamara@ul.ie) + */ +#define HIGH32_DELTA 27111902 +#define MID16_DELTA 54590 +#define LOW16_DELTA 32768 + +unsigned long filetime_to_unixtime (unsigned long low_time, unsigned long high_time) { + unsigned long low16;/* 16 bit, low bits */ + unsigned long mid16;/* 16 bit, medium bits */ + unsigned long hi32;/* 32 bit, high bits */ + unsigned int carry;/* carry bit for subtraction */ + int negative;/* whether a represents a negative value */ + +/* Copy the time values to hi32/mid16/low16 */ +hi32 = high_time; +mid16 = low_time >> 16; +low16 = low_time & 0xffff; + +/* Subtract the time difference */ +if (low16 >= LOW16_DELTA ) +low16 -= LOW16_DELTA , carry = 0; +else +low16 += (1 << 16) - LOW16_DELTA , carry = 1; + +if (mid16 >= MID16_DELTA + carry) +mid16 -= MID16_DELTA + carry, carry = 0; +else +mid16 += (1 << 16) - MID16_DELTA - carry, carry = 1; + +hi32 -= HIGH32_DELTA + carry; + +/* If a is negative, replace a by (-1-a) */ +negative = (hi32 >= ((unsigned long)1) << 31); +if (negative) { +/* Set a to -a - 1 (a is hi32/mid16/low16) */ +low16 = 0xffff - low16; +mid16 = 0xffff - mid16; +hi32 = ~hi32; +} + +/* + * Divide a by 10000000 (a = hi32/mid16/low16), put the rest into r. + * Split the divisor into 10000 * 1000 which are both less than 0xffff. + */ +mid16 += (hi32 % 10000) << 16; +hi32 /= 10000; +low16 += (mid16 % 10000) << 16; +mid16 /= 10000; +low16 /= 10000; + +mid16 += (hi32 % 1000) << 16; +hi32 /= 1000; +low16 += (mid16 % 1000) << 16; +mid16 /= 1000; +low16 /= 1000; + +/* If a was negative, replace a by (-1-a) and r by (9999999 - r) */ +if (negative) { +/* Set a to -a - 1 (a is hi32/mid16/low16) */ +low16 = 0xffff - low16; +mid16 = 0xffff - mid16; +hi32 = ~hi32; +} + +/* Do not replace this by << 32, it gives a compiler warning and + * it does not work + */ +return ((((unsigned long)hi32) << 16) << 16) + (mid16 << 16) + low16; + +} diff --git a/src/plugins/wordleaker/pole.cpp b/src/plugins/wordleaker/pole.cpp @@ -0,0 +1,1271 @@ +/* POLE - Portable C++ library to access OLE Storage + Copyright (C) 2002-2004 Ariya Hidayat <ariya@kde.org> + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public License + along with this library; see the file COPYING.LIB. If not, write to + the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, US +*/ + +#include <fstream> +#include <iostream> +#include <list> +#include <string> +#include <vector> + +#include "pole.h" + +namespace POLE +{ + +class Header +{ + public: + unsigned char id[8]; // signature, or magic identifier + unsigned b_shift; // bbat->blockSize = 1 << b_shift + unsigned s_shift; // sbat->blockSize = 1 << s_shift + unsigned num_bat; // blocks allocated for big bat + unsigned dirent_start; // starting block for directory info + unsigned threshold; // switch from small to big file (usually 4K) + unsigned sbat_start; // starting block index to store small bat + unsigned num_sbat; // blocks allocated for small bat + unsigned mbat_start; // starting block to store meta bat + unsigned num_mbat; // blocks allocated for meta bat + unsigned long bb_blocks[109]; + + Header(); + void load( const unsigned char* buffer ); + void save( unsigned char* buffer ); + void debug(); +}; + +class AllocTable +{ + public: + static const unsigned Eof; + static const unsigned Avail; + static const unsigned Bat; + unsigned blockSize; + AllocTable(); + void clear(); + unsigned long count(); + void resize( unsigned long newsize ); + void preserve( unsigned long n ); + void set( unsigned long index, unsigned long val ); + unsigned unused(); + void setChain( std::vector<unsigned long> ); + std::vector<unsigned long> follow( unsigned long start ); + unsigned long operator[](unsigned long index ); + void load( const unsigned char* buffer, unsigned len ); + void save( unsigned char* buffer ); + unsigned size(); + void debug(); + private: + std::vector<unsigned long> data; + AllocTable( const AllocTable& ); + AllocTable& operator=( const AllocTable& ); +}; + +class DirEntry +{ + public: + std::string name; + bool dir; // true if directory + unsigned long size; // size (not valid if directory) + unsigned long start; // starting block + unsigned prev; // previous sibling + unsigned next; // next sibling + unsigned child; // first child +}; + +class DirTree +{ + public: + static const unsigned End; + DirTree(); + void clear(); + unsigned entryCount(); + DirEntry* entry( unsigned index ); + DirEntry* entry( const std::string& name, bool create=false ); + int indexOf( DirEntry* e ); + int parent( unsigned index ); + std::string fullName( unsigned index ); + std::vector<unsigned> children( unsigned index ); + std::vector<DirEntry*> listDirectory(); + bool enterDirectory( const std::string& dir ); + void leaveDirectory(); + std::string path(); + void load( unsigned char* buffer, unsigned len ); + void save( unsigned char* buffer ); + unsigned size(); + void debug(); + private: + unsigned current; + std::vector<DirEntry> entries; + DirTree( const DirTree& ); + DirTree& operator=( const DirTree& ); +}; + +class StorageIO +{ + public: + Storage* storage; + std::string filename; + std::fstream file; + int result; // result of operation + bool opened; // true if file is opened + unsigned long filesize; // size of the file + + Header* header; // storage header + DirTree* dirtree; // directory tree + AllocTable* bbat; // allocation table for big blocks + AllocTable* sbat; // allocation table for small blocks + + std::vector<unsigned long> sb_blocks; // blocks for "small" files + + std::list<Stream*> streams; + + StorageIO( Storage* storage, const char* filename ); + ~StorageIO(); + + bool open(); + void close(); + void flush(); + void load(); + void create(); + + unsigned long loadBigBlocks( std::vector<unsigned long> blocks, unsigned char* buffer, unsigned long maxlen ); + + unsigned long loadBigBlock( unsigned long block, unsigned char* buffer, unsigned long maxlen ); + + unsigned long loadSmallBlocks( std::vector<unsigned long> blocks, unsigned char* buffer, unsigned long maxlen ); + + unsigned long loadSmallBlock( unsigned long block, unsigned char* buffer, unsigned long maxlen ); + + private: + // no copy or assign + StorageIO( const StorageIO& ); + StorageIO& operator=( const StorageIO& ); + +}; + +class StreamImpl +{ + public: + StreamImpl( StorageIO* io, DirEntry* entry ); + ~StreamImpl(); + unsigned long size(); + void seek( unsigned long pos ); + unsigned long tell(); + int getch(); + unsigned long read( unsigned char* data, unsigned long maxlen ); + unsigned long read( unsigned long pos, unsigned char* data, unsigned long maxlen ); + + StorageIO* io; + DirEntry* entry; + + private: + std::vector<unsigned long> blocks; + + // no copy or assign + StreamImpl( const StreamImpl& ); + StreamImpl& operator=( const StreamImpl& ); + + // pointer for read + unsigned long m_pos; + + // simple cache system to speed-up getch() + unsigned char* cache_data; + unsigned long cache_size; + unsigned long cache_pos; + void updateCache(); +}; + +}; // namespace POLE + +using namespace POLE; + +static inline unsigned long readU16( const unsigned char* ptr ) +{ + return ptr[0]+(ptr[1]<<8); +} + +static inline unsigned long readU32( const unsigned char* ptr ) +{ + return ptr[0]+(ptr[1]<<8)+(ptr[2]<<16)+(ptr[3]<<24); +} + +static inline void writeU16( unsigned char* ptr, unsigned long data ) +{ + ptr[0] = data & 0xff; + ptr[1] = (data >> 8) & 0xff; +} + +static inline void writeU32( unsigned char* ptr, unsigned long data ) +{ + ptr[0] = data & 0xff; + ptr[1] = (data >> 8) & 0xff; + ptr[2] = (data >> 16) & 0xff; + ptr[3] = (data >> 24) & 0xff; +} + +static const unsigned char pole_magic[] = + { 0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1 }; + +// =========== Header ========== + +Header::Header() +{ + b_shift = 9; + s_shift = 6; + num_bat = 0; + dirent_start = 0; + threshold = 4096; + sbat_start = 0; + num_sbat = 0; + mbat_start = 0; + num_mbat = 0; + + for( unsigned i = 0; i < 8; i++ ) + id[i] = pole_magic[i]; + for( unsigned i=0; i<109; i++ ) + bb_blocks[i] = AllocTable::Avail; +} + +void Header::load( const unsigned char* buffer ) +{ + b_shift = readU16( buffer + 0x1e ); + s_shift = readU16( buffer + 0x20 ); + num_bat = readU32( buffer + 0x2c ); + dirent_start = readU32( buffer + 0x30 ); + threshold = readU32( buffer + 0x38 ); + sbat_start = readU32( buffer + 0x3c ); + num_sbat = readU32( buffer + 0x40 ); + mbat_start = readU32( buffer + 0x44 ); + num_mbat = readU32( buffer + 0x48 ); + + for( unsigned i = 0; i < 8; i++ ) + id[i] = buffer[i]; + for( unsigned i=0; i<109; i++ ) + bb_blocks[i] = readU32( buffer + 0x4C+i*4 ); +} + +void Header::save( unsigned char* buffer ) +{ + memset( buffer, 0, 0x4c ); + memcpy( buffer, pole_magic, 8 ); // ole signature + writeU32( buffer + 8, 0 ); // unknown + writeU32( buffer + 12, 0 ); // unknown + writeU32( buffer + 16, 0 ); // unknown + writeU16( buffer + 24, 0x003e ); // revision ? + writeU16( buffer + 26, 3 ); // version ? + writeU16( buffer + 28, 0xfffe ); // unknown + writeU16( buffer + 0x1e, b_shift ); + writeU16( buffer + 0x20, s_shift ); + writeU32( buffer + 0x2c, num_bat ); + writeU32( buffer + 0x30, dirent_start ); + writeU32( buffer + 0x38, threshold ); + writeU32( buffer + 0x3c, sbat_start ); + writeU32( buffer + 0x40, num_sbat ); + writeU32( buffer + 0x44, mbat_start ); + writeU32( buffer + 0x48, num_mbat ); + + for( unsigned i=0; i<109; i++ ) + writeU32( buffer + 0x4C+i*4, bb_blocks[i] ); +} + +void Header::debug() +{ + std::cout << std::endl; + std::cout << "b_shift " << b_shift << std::endl; + std::cout << "s_shift " << s_shift << std::endl; + std::cout << "num_bat " << num_bat << std::endl; + std::cout << "dirent_start " << dirent_start << std::endl; + std::cout << "threshold " << threshold << std::endl; + std::cout << "sbat_start " << sbat_start << std::endl; + std::cout << "num_sbat " << num_sbat << std::endl; + std::cout << "mbat_start " << mbat_start << std::endl; + std::cout << "num_mbat " << num_mbat << std::endl; + + unsigned s = (num_bat<=109) ? num_bat : 109; + std::cout << "bat blocks: "; + for( unsigned i = 0; i < s; i++ ) + std::cout << bb_blocks[i] << " "; + std::cout << std::endl; +} + +// =========== AllocTable ========== + +const unsigned AllocTable::Avail = 0xffffffff; +const unsigned AllocTable::Eof = 0xfffffffe; +const unsigned AllocTable::Bat = 0xfffffffd; + +AllocTable::AllocTable() +{ + blockSize = 4096; + // initial size + resize( 128 ); +} + +unsigned long AllocTable::count() +{ + return data.size(); +} + +void AllocTable::resize( unsigned long newsize ) +{ + unsigned oldsize = data.size(); + data.resize( newsize ); + if( newsize > oldsize ) + for( unsigned i = oldsize; i<newsize; i++ ) + data[i] = Avail; +} + +// make sure there're still free blocks +void AllocTable::preserve( unsigned long n ) +{ + std::vector<unsigned long> pre; + for( unsigned i=0; i < n; i++ ) + pre.push_back( unused() ); +} + +unsigned long AllocTable::operator[]( unsigned long index ) +{ + unsigned long result; + result = data[index]; + return result; +} + +void AllocTable::set( unsigned long index, unsigned long value ) +{ + if( index >= count() ) resize( index + 1); + data[ index ] = value; +} + +void AllocTable::setChain( std::vector<unsigned long> chain ) +{ + if( chain.size() ) + { + for( unsigned i=0; i<chain.size()-1; i++ ) + set( chain[i], chain[i+1] ); + set( chain[ chain.size()-1 ], AllocTable::Eof ); + } +} + +// follow +std::vector<unsigned long> AllocTable::follow( unsigned long start ) +{ + std::vector<unsigned long> chain; + + if( start >= count() ) return chain; + + unsigned long p = start; + while( p < count() ) + { + if( p >= (unsigned long)Eof ) break; + if( p >= count() ) break; + chain.push_back( p ); + if( data[p] >= count() ) break; + p = data[ p ]; + } + + return chain; +} + +unsigned AllocTable::unused() +{ + // find first available block + for( unsigned i = 0; i < data.size(); i++ ) + if( data[i] == Avail ) + return i; + + // completely full, so enlarge the table + unsigned block = data.size(); + resize( data.size()+10 ); + return block; +} + +void AllocTable::load( const unsigned char* buffer, unsigned len ) +{ + resize( len / 4 ); + for( unsigned i = 0; i < count(); i++ ) + set( i, readU32( buffer + i*4 ) ); +} + +// return space required to save this dirtree +unsigned AllocTable::size() +{ + return count() * 4; +} + +void AllocTable::save( unsigned char* buffer ) +{ + for( unsigned i = 0; i < count(); i++ ) + writeU32( buffer + i*4, data[i] ); +} + +void AllocTable::debug() +{ + std::cout << "block size " << data.size() << std::endl; + for( unsigned i=0; i< data.size(); i++ ) + { + if( data[i] == Avail ) continue; + std::cout << i << ": "; + if( data[i] == Eof ) std::cout << "eof"; + else std::cout << data[i]; + std::cout << std::endl; + } +} + +// =========== DirTree ========== + +const unsigned DirTree::End = 0xffffffff; + +DirTree::DirTree() +{ + current = 0; + clear(); +} + +void DirTree::clear() +{ + // leave only root entry + entries.resize( 1 ); + entries[0].name = "Root Entry"; + entries[0].dir = true; + entries[0].size = 0; + entries[0].start = End; + entries[0].prev = End; + entries[0].next = End; + entries[0].child = End; + current = 0; +} + +unsigned DirTree::entryCount() +{ + return entries.size(); +} + +DirEntry* DirTree::entry( unsigned index ) +{ + if( index >= entryCount() ) return (DirEntry*) 0; + return &entries[ index ]; +} + +int DirTree::indexOf( DirEntry* e ) +{ + for( unsigned i = 0; i < entryCount(); i++ ) + if( entry( i ) == e ) return i; + + return -1; +} + +int DirTree::parent( unsigned index ) +{ + // brute-force, basically we iterate for each entries, find its children + // and check if one of the children is 'index' + for( unsigned j=0; j<entryCount(); j++ ) + { + std::vector<unsigned> chi = children( j ); + for( unsigned i=0; i<chi.size();i++ ) + if( chi[i] == index ) + return j; + } + + return -1; +} + +std::string DirTree::fullName( unsigned index ) +{ + // don't use root name ("Root Entry"), just give "/" + if( index == 0 ) return "/"; + + std::string result = entry( index )->name; + result.insert( 0, "/" ); + int p = parent( index ); + while( p > 0 ) + { + result.insert( 0, entry( p )->name ); + result.insert( 0, "/" ); + index = p; + if( index <= 0 ) break; + } + return result; +} + +// given a fullname (e.g "/ObjectPool/_1020961869"), find the entry +// if not found and create is false, return 0 +// if create is true, a new entry is returned +DirEntry* DirTree::entry( const std::string& name, bool create ) +{ + if( !name.length() ) return (DirEntry*)0; + + // quick check for "/" (that's root) + if( name == "/" ) return entry( 0 ); + + // split the names, e.g "/ObjectPool/_1020961869" will become: + // "ObjectPool" and "_1020961869" + std::list<std::string> names; + std::string::size_type start = 0, end = 0; + while( start < name.length() ) + { + end = name.find_first_of( '/', start ); + if( end == std::string::npos ) end = name.length(); + names.push_back( name.substr( start, end-start ) ); + start = end+1; + } + + // start from root when name is absolute + // or current directory when name is relative + int index = (name[0] == '/' ) ? 0 : current; + + // trace one by one + std::list<std::string>::iterator it; + for( it = names.begin(); it != names.end(); ++it ) + { + // find among the children of index + std::vector<unsigned> chi = children( index ); + unsigned child = 0; + for( unsigned i = 0; i < chi.size(); i++ ) + { + DirEntry* ce = entry( chi[i] ); + if( ce ) if( ce->name == *it ) + child = chi[i]; + } + + // traverse to the child + if( child > 0 ) index = child; + else + { + // not found among children + if( !create ) return (DirEntry*)0; + + // create a new entry + unsigned parent = index; + entries.push_back( DirEntry() ); + index = entryCount()-1; + DirEntry* e = entry( index ); + e->name = *it; + e->dir = false; + e->size = 0; + e->start = 0; + e->child = End; + e->prev = End; + e->next = entry(parent)->child; + entry(parent)->child = index; + } + } + + return entry( index ); +} + +// helper function: recursively find siblings of index +void dirtree_find_siblings( DirTree* dirtree, std::vector<unsigned>& result, + unsigned index ) +{ + DirEntry* e = dirtree->entry( index ); + if( !e ) return; + + // prevent infinite loop + for( unsigned i = 0; i < result.size(); i++ ) + if( result[i] == index ) return; + + // add myself + result.push_back( index ); + + // visit previous sibling, don't go infinitely + unsigned prev = e->prev; + if( ( prev > 0 ) && ( prev < dirtree->entryCount() ) ) + { + for( unsigned i = 0; i < result.size(); i++ ) + if( result[i] == prev ) prev = 0; + if( prev ) dirtree_find_siblings( dirtree, result, prev ); + } + + // visit next sibling, don't go infinitely + unsigned next = e->next; + if( ( next > 0 ) && ( next < dirtree->entryCount() ) ) + { + for( unsigned i = 0; i < result.size(); i++ ) + if( result[i] == next ) next = 0; + if( next ) dirtree_find_siblings( dirtree, result, next ); + } +} + +std::vector<unsigned> DirTree::children( unsigned index ) +{ + std::vector<unsigned> result; + + DirEntry* e = entry( index ); + if( e ) if( e->child < entryCount() ) + dirtree_find_siblings( this, result, e->child ); + + return result; +} + +std::vector<DirEntry*> DirTree::listDirectory() +{ + std::vector<DirEntry*> result; + + std::vector<unsigned> chi = children( current ); + for( unsigned i = 0; i < chi.size(); i++ ) + result.push_back( entry( chi[i] ) ); + + return result; +} + +bool DirTree::enterDirectory( const std::string& dir ) +{ + DirEntry* e = entry( dir ); + if( !e ) return false; + if( !e->dir ) return false; + + int index = indexOf( e ); + if( index < 0 ) return false; + + current = index; + return true; +} + +void DirTree::leaveDirectory() +{ + // already at root ? + if( current == 0 ) return; + + int p = parent( current ); + if( p >= 0 ) current = p; +} + +std::string DirTree::path() +{ + return fullName( current ); +} + +void DirTree::load( unsigned char* buffer, unsigned size ) +{ + entries.clear(); + current = 0; + + for( unsigned i = 0; i < size/128; i++ ) + { + unsigned p = i * 128; + + // would be < 32 if first char in the name isn't printable + unsigned prefix = 32; + + // parse name of this entry, which stored as Unicode 16-bit + std::string name; + int name_len = readU16( buffer + 0x40+p ); + for( int j=0; ( buffer[j+p]) && (j<name_len); j+= 2 ) + name.append( 1, buffer[j+p] ); + + // first char isn't printable ? remove it... + if( buffer[p] < 32 ) + { + prefix = buffer[0]; + name.erase( 0,1 ); + } + + DirEntry e; + e.name = name; + e.start = readU32( buffer + 0x74+p ); + e.size = readU32( buffer + 0x78+p ); + e.prev = readU32( buffer + 0x44+p ); + e.next = readU32( buffer + 0x48+p ); + e.child = readU32( buffer + 0x4C+p ); + e.dir = ( buffer[ 0x42 + p]!=2 ); + + entries.push_back( e ); + } +} + +// return space required to save this dirtree +unsigned DirTree::size() +{ + return entryCount() * 128; +} + +void DirTree::save( unsigned char* buffer ) +{ + memset( buffer, 0, size() ); + + // root is fixed as "Root Entry" + DirEntry* root = entry( 0 ); + std::string name = "Root Entry"; + for( unsigned j = 0; j < name.length(); j++ ) + buffer[ j*2 ] = name[j]; + writeU16( buffer + 0x40, name.length()*2 + 2 ); + writeU32( buffer + 0x74, 0xffffffff ); + writeU32( buffer + 0x78, 0 ); + writeU32( buffer + 0x44, 0xffffffff ); + writeU32( buffer + 0x48, 0xffffffff ); + writeU32( buffer + 0x4c, root->child ); + buffer[ 0x42 ] = 5; + buffer[ 0x43 ] = 1; + + for( unsigned i = 1; i < entryCount(); i++ ) + { + DirEntry* e = entry( i ); + if( !e ) continue; + if( e->dir ) + { + e->start = 0xffffffff; + e->size = 0; + } + + // max length for name is 32 chars + std::string name = e->name; + if( name.length() > 32 ) + name.erase( 32, name.length() ); + + // write name as Unicode 16-bit + for( unsigned j = 0; j < name.length(); j++ ) + buffer[ i*128 + j*2 ] = name[j]; + + writeU16( buffer + i*128 + 0x40, name.length()*2 + 2 ); + writeU32( buffer + i*128 + 0x74, e->start ); + writeU32( buffer + i*128 + 0x78, e->size ); + writeU32( buffer + i*128 + 0x44, e->prev ); + writeU32( buffer + i*128 + 0x48, e->next ); + writeU32( buffer + i*128 + 0x4c, e->child ); + buffer[ i*128 + 0x42 ] = e->dir ? 1 : 2; + buffer[ i*128 + 0x43 ] = 1; // always black + } +} + +void DirTree::debug() +{ + for( unsigned i = 0; i < entryCount(); i++ ) + { + DirEntry* e = entry( i ); + if( !e ) continue; + std::cout << i << ": "; + std::cout << e->name << " "; + if( e->dir ) std::cout << "(Dir) "; + else std::cout << "(File) "; + std::cout << e->size << " "; + std::cout << "s:" << e->start << " "; + std::cout << "("; + if( e->child == End ) std::cout << "-"; else std::cout << e->child; + std::cout << " "; + if( e->prev == End ) std::cout << "-"; else std::cout << e->prev; + std::cout << ":"; + if( e->next == End ) std::cout << "-"; else std::cout << e->next; + std::cout << ")"; + std::cout << std::endl; + } +} + +// =========== StorageIO ========== + +StorageIO::StorageIO( Storage* st, const char* fname ) +{ + storage = st; + filename = fname; + result = Storage::Ok; + opened = false; + + header = new Header(); + dirtree = new DirTree(); + bbat = new AllocTable(); + sbat = new AllocTable(); + + filesize = 0; + bbat->blockSize = 1 << header->b_shift; + sbat->blockSize = 1 << header->s_shift; +} + +StorageIO::~StorageIO() +{ + if( opened ) close(); + delete sbat; + delete bbat; + delete dirtree; + delete header; +} + +bool StorageIO::open() +{ + // already opened ? close first + if( opened ) close(); + + load(); + + return result == Storage::Ok; +} + +void StorageIO::load() +{ + unsigned char* buffer = 0; + unsigned long buflen = 0; + std::vector<unsigned long> blocks; + + // open the file, check for error + result = Storage::OpenFailed; + file.open( filename.c_str(), std::ios::binary | std::ios::in ); + if( !file.good() ) return; + + // find size of input file + file.seekg( 0, std::ios::end ); + filesize = file.tellg(); + + // load header + buffer = new unsigned char[512]; + file.seekg( 0 ); + file.read( (char*)buffer, 512 ); + header->load( buffer ); + delete[] buffer; + + // check OLE magic id + result = Storage::NotOLE; + for( unsigned i=0; i<8; i++ ) + if( header->id[i] != pole_magic[i] ) + return; + + // sanity checks + result = Storage::BadOLE; + if( header->threshold != 4096 ) return; + if( header->num_bat == 0 ) return; + if( header->s_shift > header->b_shift ) return; + if( header->b_shift <= 6 ) return; + if( header->b_shift >=31 ) return; + + // important block size + bbat->blockSize = 1 << header->b_shift; + sbat->blockSize = 1 << header->s_shift; + + // find blocks allocated to store big bat + // the first 109 blocks are in header, the rest in meta bat + blocks.resize( header->num_bat ); + for( unsigned i = 0; i < header->num_bat; i++ ) + if( i < 109 ) blocks[i] = header->bb_blocks[i]; + if( header->num_bat > 109 ) + if( header->num_mbat > 0 ) + { + buffer = new unsigned char[ bbat->blockSize ]; + unsigned k = 109; + for( unsigned r = 0; r < header->num_mbat; r++ ) + { + loadBigBlock( header->mbat_start+r, buffer, bbat->blockSize ); + for( unsigned s=0; s < bbat->blockSize/4; s+=4 ) + blocks[k++] = readU32( buffer + s ); + // FIXME check if k > num_bat + } + delete[] buffer; + } + + // load big bat + buflen = blocks.size()*bbat->blockSize; + buffer = new unsigned char[ buflen ]; + loadBigBlocks( blocks, buffer, buflen ); + bbat->load( buffer, buflen ); + delete[] buffer; + + // load small bat + blocks.clear(); + blocks = bbat->follow( header->sbat_start ); + buflen = blocks.size()*bbat->blockSize; + buffer = new unsigned char[ buflen ]; + loadBigBlocks( blocks, buffer, buflen ); + sbat->load( buffer, buflen ); + delete[] buffer; + + // load directory tree + blocks = bbat->follow( header->dirent_start ); + buflen = blocks.size()*bbat->blockSize; + buffer = new unsigned char[ buflen ]; + loadBigBlocks( blocks, buffer, buflen ); + sb_blocks = bbat->follow( readU32( buffer + 0x74 ) ); // small files + dirtree->load( buffer, buflen ); + + // fetch block chain as data for small-files + delete[] buffer; + + // so far so good + result = Storage::Ok; + opened = true; +} + +void StorageIO::create() +{ + // std::cout << "Creating " << filename << std::endl; + + file.open( filename.c_str(), std::ios::out|std::ios::binary ); + if( !file.good() ) + { + std::cerr << "Can't create " << filename << std::endl; + result = Storage::OpenFailed; + return; + } + + // so far so good + opened = true; + result = Storage::Ok; +} + +void StorageIO::close() +{ + if( !opened ) return; + + file.close(); + opened = false; + + std::list<Stream*>::iterator it; + for( it = streams.begin(); it != streams.end(); ++it ) + delete *it; +} + +unsigned long StorageIO::loadBigBlocks( std::vector<unsigned long> blocks, + unsigned char* data, unsigned long maxlen ) +{ + // sentinel + if( !data ) return 0; + if( !file.good() ) return 0; + if( blocks.size() < 1 ) return 0; + if( maxlen == 0 ) return 0; + + // read block one by one, seems fast enough + unsigned long bytes = 0; + for( unsigned long i=0; (i < blocks.size() ) & ( bytes<maxlen ); i++ ) + { + unsigned long block = blocks[i]; + if( block < 0 ) continue; + unsigned long pos = bbat->blockSize * ( block+1 ); + unsigned long p = (bbat->blockSize < maxlen-bytes) ? bbat->blockSize : maxlen-bytes; + if( pos + p > filesize ) p = filesize - pos; + file.seekg( pos ); + file.read( (char*)data + bytes, p ); + bytes += p; + } + + return bytes; +} + +unsigned long StorageIO::loadBigBlock( unsigned long block, + unsigned char* data, unsigned long maxlen ) +{ + // sentinel + if( !data ) return 0; + if( !file.good() ) return 0; + if( block < 0 ) return 0; + + // wraps call for loadBigBlocks + std::vector<unsigned long> blocks; + blocks.resize( 1 ); + blocks[ 0 ] = block; + + return loadBigBlocks( blocks, data, maxlen ); +} + +// return number of bytes which has been read +unsigned long StorageIO::loadSmallBlocks( std::vector<unsigned long> blocks, + unsigned char* data, unsigned long maxlen ) +{ + // sentinel + if( !data ) return 0; + if( !file.good() ) return 0; + if( blocks.size() < 1 ) return 0; + if( maxlen == 0 ) return 0; + + // our own local buffer + unsigned char buf[ bbat->blockSize ]; + + // read small block one by one + unsigned long bytes = 0; + for( unsigned long i=0; ( i<blocks.size() ) & ( bytes<maxlen ); i++ ) + { + unsigned long block = blocks[i]; + if( block < 0 ) continue; + + // find where the small-block exactly is + unsigned long pos = block * sbat->blockSize; + unsigned long bbindex = pos / bbat->blockSize; + if( bbindex >= sb_blocks.size() ) break; + + loadBigBlock( sb_blocks[ bbindex ], buf, bbat->blockSize ); + + // copy the data + unsigned offset = pos % bbat->blockSize; + unsigned long p = (maxlen-bytes < bbat->blockSize-offset ) ? maxlen-bytes : bbat->blockSize-offset; + p = (sbat->blockSize<p ) ? sbat->blockSize : p; + memcpy( data + bytes, buf + offset, p ); + bytes += p; + } + + return bytes; +} + +unsigned long StorageIO::loadSmallBlock( unsigned long block, + unsigned char* data, unsigned long maxlen ) +{ + // sentinel + if( !data ) return 0; + if( !file.good() ) return 0; + if( block < 0 ) return 0; + + // wraps call for loadSmallBlocks + std::vector<unsigned long> blocks; + blocks.resize( 1 ); + blocks.assign( 1, block ); + + return loadSmallBlocks( blocks, data, maxlen ); +} + +// =========== StreamImpl ========== + +StreamImpl::StreamImpl( StorageIO* s, DirEntry* e) +{ + io = s; + entry = e; + m_pos = 0; + + if( entry->size >= io->header->threshold ) + blocks = io->bbat->follow( entry->start ); + else + blocks = io->sbat->follow( entry->start ); + + // prepare cache + cache_pos = 0; + cache_size = 4096; // optimal ? + cache_data = new unsigned char[cache_size]; + updateCache(); +} + +// FIXME tell parent we're gone +StreamImpl::~StreamImpl() +{ + delete[] cache_data; +} + +void StreamImpl::seek( unsigned long pos ) +{ + m_pos = pos; +} + +unsigned long StreamImpl::tell() +{ + return m_pos; +} + +int StreamImpl::getch() +{ + // past end-of-file ? + if( m_pos > entry->size ) return -1; + + // need to update cache ? + if( !cache_size || ( m_pos < cache_pos ) || + ( m_pos >= cache_pos + cache_size ) ) + updateCache(); + + // something bad if we don't get good cache + if( !cache_size ) return -1; + + int data = cache_data[m_pos - cache_pos]; + m_pos++; + + return data; +} + +unsigned long StreamImpl::read( unsigned long pos, unsigned char* data, unsigned long maxlen ) +{ + // sanity checks + if( !data ) return 0; + if( maxlen == 0 ) return 0; + + unsigned long totalbytes = 0; + + if ( entry->size < io->header->threshold ) + { + // small file + unsigned long index = pos / io->sbat->blockSize; + + if( index >= blocks.size() ) return 0; + + unsigned char buf[ io->sbat->blockSize ]; + unsigned long offset = pos % io->sbat->blockSize; + while( totalbytes < maxlen ) + { + if( index >= blocks.size() ) break; + io->loadSmallBlock( blocks[index], buf, io->bbat->blockSize ); + unsigned long count = io->sbat->blockSize - offset; + if( count > maxlen-totalbytes ) count = maxlen-totalbytes; + memcpy( data+totalbytes, buf + offset, count ); + totalbytes += count; + offset = 0; + index++; + } + + } + else + { + // big file + unsigned long index = pos / io->bbat->blockSize; + + if( index >= blocks.size() ) return 0; + + unsigned char buf[ io->bbat->blockSize ]; + unsigned long offset = pos % io->bbat->blockSize; + while( totalbytes < maxlen ) + { + if( index >= blocks.size() ) break; + io->loadBigBlock( blocks[index], buf, io->bbat->blockSize ); + unsigned long count = io->bbat->blockSize - offset; + if( count > maxlen-totalbytes ) count = maxlen-totalbytes; + memcpy( data+totalbytes, buf + offset, count ); + totalbytes += count; + index++; + offset = 0; + } + + } + + return totalbytes; +} + +unsigned long StreamImpl::read( unsigned char* data, unsigned long maxlen ) +{ + unsigned long bytes = read( tell(), data, maxlen ); + m_pos += bytes; + return bytes; +} + +void StreamImpl::updateCache() +{ + // sanity check + if( !cache_data ) return; + + cache_pos = m_pos - ( m_pos % cache_size ); + unsigned long bytes = cache_size; + if( cache_pos + bytes > entry->size ) bytes = entry->size - cache_pos; + cache_size = read( cache_pos, cache_data, bytes ); +} + + +// =========== Storage ========== + +Storage::Storage( const char* filename ) +{ + io = new StorageIO( this, filename ); +} + +Storage::~Storage() +{ + delete io; +} + +int Storage::result() +{ + return io->result; +} + +bool Storage::open() +{ + return io->open(); +} + +void Storage::close() +{ + io->close(); +} + +// list all files and subdirs in current path +std::list<std::string> Storage::listDirectory() +{ + std::list<std::string> result; + + std::vector<DirEntry*> entries; + entries = io->dirtree->listDirectory(); + for( unsigned i = 0; i < entries.size(); i++ ) + result.push_back( entries[i]->name ); + + return result; +} + +// enters a sub-directory, returns false if not a directory or not found +bool Storage::enterDirectory( const std::string& directory ) +{ + return io->dirtree->enterDirectory( directory ); +} + +// goes up one level (like cd ..) +void Storage::leaveDirectory() +{ + return io->dirtree->leaveDirectory(); +} + +std::string Storage::path() +{ + return io->dirtree->path(); +} + +Stream* Storage::stream( const std::string& name ) +{ + // sanity check + if( !name.length() ) return (Stream*)0; + if( !io ) return (Stream*)0; + + // make absolute if necesary + std::string fullName = name; + if( name[0] != '/' ) fullName.insert( 0, path() + "/" ); + + DirEntry* entry = io->dirtree->entry( name ); + if( !entry ) return (Stream*)0; + + Stream* s = new Stream(); + s->impl = new StreamImpl( io, entry ); + io->streams.push_back( s ); + + return s; +} + + + +// =========== Stream ========== + +Stream::Stream() +{ + // just nullify, will be managed later Storage::stream + impl = 0; +} + +// FIXME tell parent we're gone +Stream::~Stream() +{ + delete impl; +} + +unsigned long Stream::tell() +{ + return impl ? impl->tell() : 0; +} + +void Stream::seek( unsigned long newpos ) +{ + if( impl ) impl->seek( newpos ); +} + +unsigned long Stream::size() +{ + return impl ? impl->entry->size : 0; +} + +int Stream::getch() +{ + return impl ? impl->getch() : 0; +} + +unsigned long Stream::read( unsigned char* data, unsigned long maxlen ) +{ + return impl ? impl->read( data, maxlen ) : 0; +} + diff --git a/src/plugins/wordleaker/pole.h b/src/plugins/wordleaker/pole.h @@ -0,0 +1,149 @@ +/* POLE - Portable C++ library to access OLE Storage + Copyright (C) 2002-2004 Ariya Hidayat <ariya@kde.org> + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public License + along with this library; see the file COPYING.LIB. If not, write to + the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, US +*/ + +#ifndef POLE_H +#define POLE_H + +#include <string> +#include <list> + +namespace POLE +{ + +class StorageIO; +class Stream; +class StreamImpl; + +class Storage +{ + friend class Stream; + friend class StreamOut; + +public: + + enum { Ok, OpenFailed, NotOLE, BadOLE, UnknownError, + StupidWorkaroundForBrokenCompiler=255 }; + + /** + * Constructs a storage with name filename. + **/ + Storage( const char* filename ); + + /** + * Destroys the storage. + **/ + ~Storage(); + + /** + * Opens the storage. Returns true if no error occurs. + **/ + bool open(); + + /** + * Closes the storage. + **/ + void close(); + + /** + * Returns the error code of last operation. + **/ + int result(); + + /** + * Returns the current path. + **/ + std::string path(); + + /** + * Finds all stream and directories in current path. + **/ + std::list<std::string> listDirectory(); + + /** + * Changes path to directory. Returns true if no error occurs. + **/ + bool enterDirectory( const std::string& directory ); + + /** + * Goes to one directory up. + **/ + void leaveDirectory(); + + /** + * Finds and returns a stream with the specified name. + **/ + Stream* stream( const std::string& name ); + +private: + StorageIO* io; + + // no copy or assign + Storage( const Storage& ); + Storage& operator=( const Storage& ); + +}; + +class Stream +{ + friend class Storage; + friend class StorageIO; + +public: + + /** + * Returns the stream size. + **/ + unsigned long size(); + + /** + * Returns the read pointer. + **/ + unsigned long tell(); + + /** + * Sets the read position. + **/ + void seek( unsigned long pos ); + + /** + * Reads a byte. + **/ + int getch(); + + /** + * Reads a block of data. + **/ + unsigned long read( unsigned char* data, unsigned long maxlen ); + +private: + + Stream(); + ~Stream(); + + // no copy or assign + Stream( const Stream& ); + Stream& operator=( const Stream& ); + + StreamImpl* impl; +}; + + +} + +#endif // POLE_H