libextractor

GNU libextractor
Log | Files | Refs | Submodules | README | LICENSE

commit fc53e1949192a9289b25de90696f191340868b5f
parent 2fb6c184aa2d682e4a0caf2ac517a3412a028096
Author: Christian Grothoff <christian@grothoff.org>
Date:   Sat, 12 Jan 2008 21:04:00 +0000

From: 
Pavol Rusnak <prusnak@suse.cz>
  To: 
libextractor@gnu.org
  Date: 
Thursday 06:26:15 am
   
  Spam Status: Spamassassin -5% probability of being spam.

Full report:
No  
Hello!

I see you released new version of libextractor (0.5.19). Unfortunately
it does not contain security fixes against XPSF from CVE. I'm attaching
them. I'll also attach one simple patch that adds missing includes ...


Patches applied, plus version bump.


Diffstat:
Mconfigure.ac | 4++--
Mcontrib/doxygen | 2+-
Mdoc/version.texi | 4++--
Msrc/plugins/exiv2/basicio.hpp | 3+++
Msrc/plugins/exiv2/makernote.hpp | 1+
Msrc/plugins/exiv2/types.cpp | 1+
Msrc/plugins/exiv2/value.cpp | 1+
Msrc/plugins/exiv2/value.hpp | 1+
Msrc/plugins/pdf/Catalog.cc | 20+++++++++++++++-----
Msrc/plugins/pdf/Catalog.h | 2+-
Msrc/plugins/pdf/Stream.cc | 624++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
Msrc/plugins/pdf/Stream.h | 10++++++----
12 files changed, 652 insertions(+), 21 deletions(-)

diff --git a/configure.ac b/configure.ac @@ -1,8 +1,8 @@ # Process this file with autoconf to produce a configure script. AC_PREREQ(2.57) -AC_INIT([libextractor], [0.5.19], [bug-libextractor@gnu.org]) +AC_INIT([libextractor], [0.5.19a], [bug-libextractor@gnu.org]) AC_REVISION($Revision: 1.67 $) -AM_INIT_AUTOMAKE([libextractor], [0.5.19]) +AM_INIT_AUTOMAKE([libextractor], [0.5.19a]) AM_CONFIG_HEADER(src/include/config.h) AH_TOP([#define _GNU_SOURCE 1]) diff --git a/contrib/doxygen b/contrib/doxygen @@ -23,7 +23,7 @@ PROJECT_NAME = libextractor # This could be handy for archiving the generated documentation or # if some version control system is used. -PROJECT_NUMBER = 0.5.19 +PROJECT_NUMBER = 0.5.19a # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) # base path where the generated documentation will be put. diff --git a/doc/version.texi b/doc/version.texi @@ -1,4 +1,4 @@ @set UPDATED 9 June 2007 @set UPDATED-MONTH June 2007 -@set EDITION 0.5.19 -@set VERSION 0.5.19 +@set EDITION 0.5.19a +@set VERSION 0.5.19a diff --git a/src/plugins/exiv2/basicio.hpp b/src/plugins/exiv2/basicio.hpp @@ -39,6 +39,9 @@ #include <string> #include <vector> #include <cstdio> +#include <memory> +#include <string.h> +#include <stdlib.h> // ***************************************************************************** // namespace extensions diff --git a/src/plugins/exiv2/makernote.hpp b/src/plugins/exiv2/makernote.hpp @@ -42,6 +42,7 @@ #include <vector> #include <map> #include <memory> +#include <string.h> // ***************************************************************************** // namespace extensions diff --git a/src/plugins/exiv2/types.cpp b/src/plugins/exiv2/types.cpp @@ -40,6 +40,7 @@ EXIV2_RCSID("@(#) $Id: types.cpp 578 2005-06-07 15:01:11Z ahuggel $"); #include <sstream> #include <utility> #include <cctype> +#include <string.h> // ***************************************************************************** // class member definitions diff --git a/src/plugins/exiv2/value.cpp b/src/plugins/exiv2/value.cpp @@ -43,6 +43,7 @@ EXIV2_RCSID("@(#) $Id: value.cpp 560 2005-04-17 11:51:32Z ahuggel $"); #include <sstream> #include <cassert> #include <ctime> +#include <stdlib.h> // ***************************************************************************** // class member definitions diff --git a/src/plugins/exiv2/value.hpp b/src/plugins/exiv2/value.hpp @@ -41,6 +41,7 @@ #include <iostream> #include <sstream> #include <memory> +#include <string.h> // ***************************************************************************** // namespace extensions diff --git a/src/plugins/pdf/Catalog.cc b/src/plugins/pdf/Catalog.cc @@ -23,6 +23,12 @@ #include "Link.h" #include "Catalog.h" +// This define is used to limit the depth of recursive readPageTree calls +// This is needed because the page tree nodes can reference their parents +// leaving us in an infinite loop +// Most sane pdf documents don't have a call depth higher than 10 +#define MAX_CALL_DEPTH 1000 + //------------------------------------------------------------------------ // Catalog //------------------------------------------------------------------------ @@ -71,7 +77,7 @@ Catalog::Catalog(XRef *xrefA) { pageRefs[i].num = -1; pageRefs[i].gen = -1; } - numPages = readPageTree(pagesDict.getDict(), NULL, 0); + numPages = readPageTree(pagesDict.getDict(), NULL, 0, 0); if (numPages != numPages0) { error(-1, "Page count in top-level pages object is incorrect"); } @@ -165,7 +171,7 @@ GString *Catalog::readMetadata() { return s; } -int Catalog::readPageTree(Dict *pagesDict, PageAttrs *attrs, int start) { +int Catalog::readPageTree(Dict *pagesDict, PageAttrs *attrs, int start, int callDepth) { Object kids; Object kid; Object kidRef; @@ -210,9 +216,13 @@ int Catalog::readPageTree(Dict *pagesDict, PageAttrs *attrs, int start) { // This should really be isDict("Pages"), but I've seen at least one // PDF file where the /Type entry is missing. } else if (kid.isDict()) { - if ((start = readPageTree(kid.getDict(), attrs1, start)) - < 0) - goto err2; + if (callDepth > MAX_CALL_DEPTH) { + error(-1, "Limit of %d recursive calls reached while reading the page tree. If your document is correct and not a test to try to force a crash, please report a bug.", MAX_CALL_DEPTH); + } else { + if ((start = readPageTree(kid.getDict(), attrs1, start, callDepth + 1)) + < 0) + goto err2; + } } else { error(-1, "Kid object (page %d) is wrong type (%s)", start+1, kid.getTypeName()); diff --git a/src/plugins/pdf/Catalog.h b/src/plugins/pdf/Catalog.h @@ -82,7 +82,7 @@ private: Object outline; // outline dictionary GBool ok; // true if catalog is valid - int readPageTree(Dict *pages, PageAttrs *attrs, int start); + int readPageTree(Dict *pages, PageAttrs *attrs, int start, int callDepth); Object *findDestInTree(Object *tree, GString *name, Object *obj); }; diff --git a/src/plugins/pdf/Stream.cc b/src/plugins/pdf/Stream.cc @@ -423,12 +423,9 @@ StreamPredictor::StreamPredictor(Stream *strA, int predictorA, nVals = width * nComps; if (width <= 0 || nComps <= 0 || nBits <= 0 || - nComps >= INT_MAX/nBits || - width >= INT_MAX/nComps/nBits || - nVals * nBits + 7 < 0) { - return; - } - if (nVals + 7 <= 0) { + nComps >= 4 || nBits > 16 || + width >= INT_MAX / nComps || + nVals >= (INT_MAX - 7) / nBits) { return; } @@ -1284,6 +1281,7 @@ GBool RunLengthStream::fillBuf() { // CCITTFaxStream //------------------------------------------------------------------------ +#if 0 CCITTFaxStream::CCITTFaxStream(Stream *strA, int encodingA, GBool endOfLineA, GBool byteAlignA, int columnsA, int rowsA, GBool endOfBlockA, GBool blackA): @@ -1786,6 +1784,609 @@ short CCITTFaxStream::lookBits(int n) { } return (inputBuf >> (inputBits - n)) & (0xffff >> (16 - n)); } +#else // secfix +CCITTFaxStream::CCITTFaxStream(Stream *strA, int encodingA, GBool endOfLineA, + GBool byteAlignA, int columnsA, int rowsA, + GBool endOfBlockA, GBool blackA): + FilterStream(strA) { + encoding = encodingA; + endOfLine = endOfLineA; + byteAlign = byteAlignA; + columns = columnsA; + if (columns < 1) { + columns = 1; + } else if (columns > (INT_MAX - 2)/sizeof(int)) { + columns = (INT_MAX - 2)/sizeof(int); + } + rows = rowsA; + endOfBlock = endOfBlockA; + black = blackA; + // 0 <= codingLine[0] < codingLine[1] < ... < codingLine[n] = columns + // ---> max codingLine size = columns + 1 + // refLine has one extra guard entry at the end + // ---> max refLine size = columns + 2 + codingLine = (int *)gmalloc((columns + 1) * sizeof(int)); + refLine = (int *)gmalloc((columns + 2) * sizeof(int)); + + eof = gFalse; + row = 0; + nextLine2D = encoding < 0; + inputBits = 0; + codingLine[0] = columns; + a0i = 0; + outputBits = 0; + + buf = EOF; +} + +CCITTFaxStream::~CCITTFaxStream() { + delete str; + gfree(refLine); + gfree(codingLine); +} + +void CCITTFaxStream::reset() { + short code1; + + str->reset(); + eof = gFalse; + row = 0; + nextLine2D = encoding < 0; + inputBits = 0; + codingLine[0] = columns; + a0i = 0; + outputBits = 0; + buf = EOF; + + // skip any initial zero bits and end-of-line marker, and get the 2D + // encoding tag + while ((code1 = lookBits(12)) == 0) { + eatBits(1); + } + if (code1 == 0x001) { + eatBits(12); + } + if (encoding > 0) { + nextLine2D = !lookBits(1); + eatBits(1); + } +} + +inline void CCITTFaxStream::addPixels(int a1, int blackPixels) { + if (a1 > codingLine[a0i]) { + if (a1 > columns) { + error(getPos(), "CCITTFax row is wrong length (%d)", a1); + err = gTrue; + a1 = columns; + } + if ((a0i & 1) ^ blackPixels) { + ++a0i; + } + codingLine[a0i] = a1; + } +} + +inline void CCITTFaxStream::addPixelsNeg(int a1, int blackPixels) { + if (a1 > codingLine[a0i]) { + if (a1 > columns) { + error(getPos(), "CCITTFax row is wrong length (%d)", a1); + err = gTrue; + a1 = columns; + } + if ((a0i & 1) ^ blackPixels) { + ++a0i; + } + codingLine[a0i] = a1; + } else if (a1 < codingLine[a0i]) { + if (a1 < 0) { + error(getPos(), "Invalid CCITTFax code"); + err = gTrue; + a1 = 0; + } + while (a0i > 0 && a1 <= codingLine[a0i - 1]) { + --a0i; + } + codingLine[a0i] = a1; + } +} + +int CCITTFaxStream::lookChar() { + short code1, code2, code3; + int b1i, blackPixels, i, bits; + GBool gotEOL; + + if (buf != EOF) { + return buf; + } + + // read the next row + if (outputBits == 0) { + + // if at eof just return EOF + if (eof) { + return EOF; + } + + err = gFalse; + + // 2-D encoding + if (nextLine2D) { + for (i = 0; codingLine[i] < columns; ++i) { + refLine[i] = codingLine[i]; + } + refLine[i++] = columns; + refLine[i] = columns; + codingLine[0] = 0; + a0i = 0; + b1i = 0; + blackPixels = 0; + // invariant: + // refLine[b1i-1] <= codingLine[a0i] < refLine[b1i] < refLine[b1i+1] + // <= columns + // exception at left edge: + // codingLine[a0i = 0] = refLine[b1i = 0] = 0 is possible + // exception at right edge: + // refLine[b1i] = refLine[b1i+1] = columns is possible + while (codingLine[a0i] < columns) { + code1 = getTwoDimCode(); + switch (code1) { + case twoDimPass: + addPixels(refLine[b1i + 1], blackPixels); + if (refLine[b1i + 1] < columns) { + b1i += 2; + } + break; + case twoDimHoriz: + code1 = code2 = 0; + if (blackPixels) { + do { + code1 += code3 = getBlackCode(); + } while (code3 >= 64); + do { + code2 += code3 = getWhiteCode(); + } while (code3 >= 64); + } else { + do { + code1 += code3 = getWhiteCode(); + } while (code3 >= 64); + do { + code2 += code3 = getBlackCode(); + } while (code3 >= 64); + } + addPixels(codingLine[a0i] + code1, blackPixels); + if (codingLine[a0i] < columns) { + addPixels(codingLine[a0i] + code2, blackPixels ^ 1); + } + while (refLine[b1i] <= codingLine[a0i] && refLine[b1i] < columns) { + b1i += 2; + } + break; + case twoDimVertR3: + addPixels(refLine[b1i] + 3, blackPixels); + blackPixels ^= 1; + if (codingLine[a0i] < columns) { + ++b1i; + while (refLine[b1i] <= codingLine[a0i] && refLine[b1i] < columns) { + b1i += 2; + } + } + break; + case twoDimVertR2: + addPixels(refLine[b1i] + 2, blackPixels); + blackPixels ^= 1; + if (codingLine[a0i] < columns) { + ++b1i; + while (refLine[b1i] <= codingLine[a0i] && refLine[b1i] < columns) { + b1i += 2; + } + } + break; + case twoDimVertR1: + addPixels(refLine[b1i] + 1, blackPixels); + blackPixels ^= 1; + if (codingLine[a0i] < columns) { + ++b1i; + while (refLine[b1i] <= codingLine[a0i] && refLine[b1i] < columns) { + b1i += 2; + } + } + break; + case twoDimVert0: + addPixels(refLine[b1i], blackPixels); + blackPixels ^= 1; + if (codingLine[a0i] < columns) { + ++b1i; + while (refLine[b1i] <= codingLine[a0i] && refLine[b1i] < columns) { + b1i += 2; + } + } + break; + case twoDimVertL3: + addPixelsNeg(refLine[b1i] - 3, blackPixels); + blackPixels ^= 1; + if (codingLine[a0i] < columns) { + if (b1i > 0) { + --b1i; + } else { + ++b1i; + } + while (refLine[b1i] <= codingLine[a0i] && refLine[b1i] < columns) { + b1i += 2; + } + } + break; + case twoDimVertL2: + addPixelsNeg(refLine[b1i] - 2, blackPixels); + blackPixels ^= 1; + if (codingLine[a0i] < columns) { + if (b1i > 0) { + --b1i; + } else { + ++b1i; + } + while (refLine[b1i] <= codingLine[a0i] && refLine[b1i] < columns) { + b1i += 2; + } + } + break; + case twoDimVertL1: + addPixelsNeg(refLine[b1i] - 1, blackPixels); + blackPixels ^= 1; + if (codingLine[a0i] < columns) { + if (b1i > 0) { + --b1i; + } else { + ++b1i; + } + while (refLine[b1i] <= codingLine[a0i] && refLine[b1i] < columns) { + b1i += 2; + } + } + break; + case EOF: + addPixels(columns, 0); + eof = gTrue; + break; + default: + error(getPos(), "Bad 2D code %04x in CCITTFax stream", code1); + addPixels(columns, 0); + err = gTrue; + break; + } + } + + // 1-D encoding + } else { + codingLine[0] = 0; + a0i = 0; + blackPixels = 0; + while (codingLine[a0i] < columns) { + code1 = 0; + if (blackPixels) { + do { + code1 += code3 = getBlackCode(); + } while (code3 >= 64); + } else { + do { + code1 += code3 = getWhiteCode(); + } while (code3 >= 64); + } + addPixels(codingLine[a0i] + code1, blackPixels); + blackPixels ^= 1; + } + } + + // byte-align the row + if (byteAlign) { + inputBits &= ~7; + } + + // check for end-of-line marker, skipping over any extra zero bits + gotEOL = gFalse; + if (!endOfBlock && row == rows - 1) { + eof = gTrue; + } else { + code1 = lookBits(12); + while (code1 == 0) { + eatBits(1); + code1 = lookBits(12); + } + if (code1 == 0x001) { + eatBits(12); + gotEOL = gTrue; + } else if (code1 == EOF) { + eof = gTrue; + } + } + + // get 2D encoding tag + if (!eof && encoding > 0) { + nextLine2D = !lookBits(1); + eatBits(1); + } + + // check for end-of-block marker + if (endOfBlock && gotEOL) { + code1 = lookBits(12); + if (code1 == 0x001) { + eatBits(12); + if (encoding > 0) { + lookBits(1); + eatBits(1); + } + if (encoding >= 0) { + for (i = 0; i < 4; ++i) { + code1 = lookBits(12); + if (code1 != 0x001) { + error(getPos(), "Bad RTC code in CCITTFax stream"); + } + eatBits(12); + if (encoding > 0) { + lookBits(1); + eatBits(1); + } + } + } + eof = gTrue; + } + + // look for an end-of-line marker after an error -- we only do + // this if we know the stream contains end-of-line markers because + // the "just plow on" technique tends to work better otherwise + } else if (err && endOfLine) { + while (1) { + code1 = lookBits(13); + if (code1 == EOF) { + eof = gTrue; + return EOF; + } + if ((code1 >> 1) == 0x001) { + break; + } + eatBits(1); + } + eatBits(12); + if (encoding > 0) { + eatBits(1); + nextLine2D = !(code1 & 1); + } + } + + // set up for output + if (codingLine[0] > 0) { + outputBits = codingLine[a0i = 0]; + } else { + outputBits = codingLine[a0i = 1]; + } + + ++row; + } + + // get a byte + if (outputBits >= 8) { + buf = (a0i & 1) ? 0x00 : 0xff; + outputBits -= 8; + if (outputBits == 0 && codingLine[a0i] < columns) { + ++a0i; + outputBits = codingLine[a0i] - codingLine[a0i - 1]; + } + } else { + bits = 8; + buf = 0; + do { + if (outputBits > bits) { + buf <<= bits; + if (!(a0i & 1)) { + buf |= 0xff >> (8 - bits); + } + outputBits -= bits; + bits = 0; + } else { + buf <<= outputBits; + if (!(a0i & 1)) { + buf |= 0xff >> (8 - outputBits); + } + bits -= outputBits; + outputBits = 0; + if (codingLine[a0i] < columns) { + ++a0i; + outputBits = codingLine[a0i] - codingLine[a0i - 1]; + } else if (bits > 0) { + buf <<= bits; + bits = 0; + } + } + } while (bits); + } + if (black) { + buf ^= 0xff; + } + return buf; +} + +short CCITTFaxStream::getTwoDimCode() { + short code; + CCITTCode *p; + int n; + + code = 0; // make gcc happy + if (endOfBlock) { + code = lookBits(7); + p = &twoDimTab1[code]; + if (p->bits > 0) { + eatBits(p->bits); + return p->n; + } + } else { + for (n = 1; n <= 7; ++n) { + code = lookBits(n); + if (n < 7) { + code <<= 7 - n; + } + p = &twoDimTab1[code]; + if (p->bits == n) { + eatBits(n); + return p->n; + } + } + } + error(getPos(), "Bad two dim code (%04x) in CCITTFax stream", code); + return EOF; +} + +short CCITTFaxStream::getWhiteCode() { + short code; + CCITTCode *p; + int n; + + code = 0; // make gcc happy + if (endOfBlock) { + code = lookBits(12); + if (code == EOF) { + return 1; + } + if ((code >> 5) == 0) { + p = &whiteTab1[code]; + } else { + p = &whiteTab2[code >> 3]; + } + if (p->bits > 0) { + eatBits(p->bits); + return p->n; + } + } else { + for (n = 1; n <= 9; ++n) { + code = lookBits(n); + if (code == EOF) { + return 1; + } + if (n < 9) { + code <<= 9 - n; + } + p = &whiteTab2[code]; + if (p->bits == n) { + eatBits(n); + return p->n; + } + } + for (n = 11; n <= 12; ++n) { + code = lookBits(n); + if (code == EOF) { + return 1; + } + if (n < 12) { + code <<= 12 - n; + } + p = &whiteTab1[code]; + if (p->bits == n) { + eatBits(n); + return p->n; + } + } + } + error(getPos(), "Bad white code (%04x) in CCITTFax stream", code); + // eat a bit and return a positive number so that the caller doesn't + // go into an infinite loop + eatBits(1); + return 1; +} + +short CCITTFaxStream::getBlackCode() { + short code; + CCITTCode *p; + int n; + + code = 0; // make gcc happy + if (endOfBlock) { + code = lookBits(13); + if (code == EOF) { + return 1; + } + if ((code >> 7) == 0) { + p = &blackTab1[code]; + } else if ((code >> 9) == 0 && (code >> 7) != 0) { + p = &blackTab2[(code >> 1) - 64]; + } else { + p = &blackTab3[code >> 7]; + } + if (p->bits > 0) { + eatBits(p->bits); + return p->n; + } + } else { + for (n = 2; n <= 6; ++n) { + code = lookBits(n); + if (code == EOF) { + return 1; + } + if (n < 6) { + code <<= 6 - n; + } + p = &blackTab3[code]; + if (p->bits == n) { + eatBits(n); + return p->n; + } + } + for (n = 7; n <= 12; ++n) { + code = lookBits(n); + if (code == EOF) { + return 1; + } + if (n < 12) { + code <<= 12 - n; + } + if (code >= 64) { + p = &blackTab2[code - 64]; + if (p->bits == n) { + eatBits(n); + return p->n; + } + } + } + for (n = 10; n <= 13; ++n) { + code = lookBits(n); + if (code == EOF) { + return 1; + } + if (n < 13) { + code <<= 13 - n; + } + p = &blackTab1[code]; + if (p->bits == n) { + eatBits(n); + return p->n; + } + } + } + error(getPos(), "Bad black code (%04x) in CCITTFax stream", code); + // eat a bit and return a positive number so that the caller doesn't + // go into an infinite loop + eatBits(1); + return 1; +} + +short CCITTFaxStream::lookBits(int n) { + int c; + + while (inputBits < n) { + if ((c = str->getChar()) == EOF) { + if (inputBits == 0) { + return EOF; + } + // near the end of the stream, the caller may ask for more bits + // than are available, but there may still be a valid code in + // however many bits are available -- we need to return correct + // data in this case + return (inputBuf << (n - inputBits)) & (0xffff >> (16 - n)); + } + inputBuf = (inputBuf << 8) + c; + inputBits += 8; + } + return (inputBuf >> (inputBits - n)) & (0xffff >> (16 - n)); +} + +#endif GString *CCITTFaxStream::getPSFilter(int psLevel, char *indent) { GString *s; @@ -1976,6 +2577,12 @@ void DCTStream::reset() { // allocate a buffer for the whole image bufWidth = ((width + mcuWidth - 1) / mcuWidth) * mcuWidth; bufHeight = ((height + mcuHeight - 1) / mcuHeight) * mcuHeight; + if (bufWidth <= 0 || bufHeight <= 0 || + bufWidth > INT_MAX / bufWidth / (int)sizeof(int)) { + error(getPos(), "Invalid image size in DCT stream"); + y = height; + return; + } for (i = 0; i < numComps; ++i) { frameBuf[i] = (int *)gmalloc(bufWidth * bufHeight * sizeof(int)); memset(frameBuf[i], 0, bufWidth * bufHeight * sizeof(int)); @@ -3035,6 +3642,11 @@ GBool DCTStream::readScanInfo() { } scanInfo.firstCoeff = str->getChar(); scanInfo.lastCoeff = str->getChar(); + if (scanInfo.firstCoeff < 0 || scanInfo.lastCoeff > 63 || + scanInfo.firstCoeff > scanInfo.lastCoeff) { + error(getPos(), "Bad DCT coefficient numbers in scan info block"); + return gFalse; + } c = str->getChar(); scanInfo.ah = (c >> 4) & 0x0f; scanInfo.al = c & 0x0f; diff --git a/src/plugins/pdf/Stream.h b/src/plugins/pdf/Stream.h @@ -523,13 +523,15 @@ private: int row; // current row int inputBuf; // input buffer int inputBits; // number of bits in input buffer - short *refLine; // reference line changing elements - int b1; // index into refLine - short *codingLine; // coding line changing elements - int a0; // index into codingLine + int *codingLine; // coding line changing elements + int *refLine; // reference line changing elements + int a0i; // index into codingLine + GBool err; // error on current line int outputBits; // remaining ouput bits int buf; // character buffer + void addPixels(int a1, int black); + void addPixelsNeg(int a1, int black); short getTwoDimCode(); short getWhiteCode(); short getBlackCode();