libextractor

GNU libextractor
Log | Files | Refs | Submodules | README | LICENSE

commit b315b51f1ff05cbe87ecceddf05f4c413568f031
parent c3e3f4454206ed993e70325993b0470b9a1bafc9
Author: Christian Grothoff <christian@grothoff.org>
Date:   Tue, 19 May 2026 22:03:20 +0200

modernize PDF plugin

Diffstat:
M.gitignore | 1+
Mconfigure.ac | 12++++++++++++
Msrc/plugins/Makefile.am | 16+++++++++++++---
Msrc/plugins/pdf_extractor.cc | 8--------
4 files changed, 26 insertions(+), 11 deletions(-)

diff --git a/.gitignore b/.gitignore @@ -110,6 +110,7 @@ src/plugins/test_nsfe src/plugins/test_odf src/plugins/test_ogg src/plugins/test_ole2 +src/plugins/test_pdf src/plugins/test_png src/plugins/test_ps src/plugins/test_real diff --git a/configure.ac b/configure.ac @@ -409,6 +409,15 @@ AM_LDFLAGS=$SAVED_AM_LDFLAGS AC_LANG_POP(C++) +# poppler-cpp is poppler's stable C++ binding. 0.73.0 (2019) is +# the first release with the non-deprecated time_t date accessors +# that the PDF plugin relies on. +PKG_CHECK_MODULES([POPPLER], [poppler-cpp >= 0.73.0], + [AM_CONDITIONAL(HAVE_POPPLER, true) + AC_DEFINE(HAVE_POPPLER, 1, [Have libpoppler-cpp])], + [AM_CONDITIONAL(HAVE_POPPLER, false)]) + + AC_MSG_CHECKING(for DGifOpen -lgif) SAVED_AM_LDFLAGS=$AM_LDFLAGS AC_CHECK_LIB(gif, DGifOpen, @@ -721,6 +730,9 @@ AS_IF([test "x$HAVE_BZ2_TRUE" = "x#"], AS_IF([test "x$HAVE_EXIV2_TRUE" = "x#"], [AC_MSG_NOTICE([NOTICE: libexiv2 not found, exiv2 disabled])]) +AS_IF([test "x$HAVE_POPPLER_TRUE" = "x#"], + [AC_MSG_NOTICE([NOTICE: libpoppler-cpp not found, PDF support disabled])]) + AS_IF([test "x$HAVE_TIFF_TRUE" = "x#"], [AC_MSG_NOTICE([NOTICE: libtiff not found, tiff disabled])]) diff --git a/src/plugins/Makefile.am b/src/plugins/Makefile.am @@ -50,6 +50,7 @@ EXTRA_DIST = \ testdata/ole2_msword.doc \ testdata/ole2_starwriter40.sdw \ testdata/ogg_courseclear.ogg \ + testdata/pdf_extract.pdf \ testdata/png_image.png \ testdata/ps_bloomfilter.ps \ testdata/ps_wallace.ps \ @@ -152,8 +153,9 @@ PLUGIN_OGG=libextractor_ogg.la TEST_OGG=test_ogg endif -if ! WINDOWS +if HAVE_POPPLER PLUGIN_PDF=libextractor_pdf.la +TEST_PDF=test_pdf endif if HAVE_ZLIB @@ -233,6 +235,7 @@ check_PROGRAMS = \ $(TEST_MIME) \ $(TEST_MPEG) \ $(TEST_OGG) \ + $(TEST_PDF) \ $(TEST_RPM) \ $(TEST_TIFF) \ $(TEST_ZLIB) @@ -536,11 +539,18 @@ test_ogg_LDADD = \ libextractor_pdf_la_SOURCES = \ - pdf_extractor.c + pdf_extractor.cc libextractor_pdf_la_LDFLAGS = \ $(PLUGINFLAGS) +libextractor_pdf_la_CPPFLAGS = \ + $(AM_CPPFLAGS) $(POPPLER_CFLAGS) libextractor_pdf_la_LIBADD = \ - $(top_builddir)/src/common/libextractor_common.la $(XLIB) $(SOCKET_LIBS) + $(POPPLER_LIBS) $(XLIB) + +test_pdf_SOURCES = \ + test_pdf.c +test_pdf_LDADD = \ + $(top_builddir)/src/plugins/libtest.la libextractor_png_la_SOURCES = \ diff --git a/src/plugins/pdf_extractor.cc b/src/plugins/pdf_extractor.cc @@ -22,14 +22,6 @@ * @brief plugin to support PDF files * @author Vidyut Samanta * @author Christian Grothoff - * - * This plugin uses the stable C++ binding of libpoppler - * (`poppler-cpp`). Earlier versions of this plugin linked - * against poppler's internal headers (`PDFDoc`, `GooString`, - * ...), which carry no API or ABI stability guarantees and - * broke with virtually every poppler release. The poppler-cpp - * interface is the supported public API and is what we use - * here. */ #include "platform.h" #include "extractor.h"