commit b315b51f1ff05cbe87ecceddf05f4c413568f031
parent c3e3f4454206ed993e70325993b0470b9a1bafc9
Author: Christian Grothoff <christian@grothoff.org>
Date: Tue, 19 May 2026 22:03:20 +0200
modernize PDF plugin
Diffstat:
4 files changed, 26 insertions(+), 11 deletions(-)
diff --git a/.gitignore b/.gitignore
@@ -110,6 +110,7 @@ src/plugins/test_nsfe
src/plugins/test_odf
src/plugins/test_ogg
src/plugins/test_ole2
+src/plugins/test_pdf
src/plugins/test_png
src/plugins/test_ps
src/plugins/test_real
diff --git a/configure.ac b/configure.ac
@@ -409,6 +409,15 @@ AM_LDFLAGS=$SAVED_AM_LDFLAGS
AC_LANG_POP(C++)
+# poppler-cpp is poppler's stable C++ binding. 0.73.0 (2019) is
+# the first release with the non-deprecated time_t date accessors
+# that the PDF plugin relies on.
+PKG_CHECK_MODULES([POPPLER], [poppler-cpp >= 0.73.0],
+ [AM_CONDITIONAL(HAVE_POPPLER, true)
+ AC_DEFINE(HAVE_POPPLER, 1, [Have libpoppler-cpp])],
+ [AM_CONDITIONAL(HAVE_POPPLER, false)])
+
+
AC_MSG_CHECKING(for DGifOpen -lgif)
SAVED_AM_LDFLAGS=$AM_LDFLAGS
AC_CHECK_LIB(gif, DGifOpen,
@@ -721,6 +730,9 @@ AS_IF([test "x$HAVE_BZ2_TRUE" = "x#"],
AS_IF([test "x$HAVE_EXIV2_TRUE" = "x#"],
[AC_MSG_NOTICE([NOTICE: libexiv2 not found, exiv2 disabled])])
+AS_IF([test "x$HAVE_POPPLER_TRUE" = "x#"],
+ [AC_MSG_NOTICE([NOTICE: libpoppler-cpp not found, PDF support disabled])])
+
AS_IF([test "x$HAVE_TIFF_TRUE" = "x#"],
[AC_MSG_NOTICE([NOTICE: libtiff not found, tiff disabled])])
diff --git a/src/plugins/Makefile.am b/src/plugins/Makefile.am
@@ -50,6 +50,7 @@ EXTRA_DIST = \
testdata/ole2_msword.doc \
testdata/ole2_starwriter40.sdw \
testdata/ogg_courseclear.ogg \
+ testdata/pdf_extract.pdf \
testdata/png_image.png \
testdata/ps_bloomfilter.ps \
testdata/ps_wallace.ps \
@@ -152,8 +153,9 @@ PLUGIN_OGG=libextractor_ogg.la
TEST_OGG=test_ogg
endif
-if ! WINDOWS
+if HAVE_POPPLER
PLUGIN_PDF=libextractor_pdf.la
+TEST_PDF=test_pdf
endif
if HAVE_ZLIB
@@ -233,6 +235,7 @@ check_PROGRAMS = \
$(TEST_MIME) \
$(TEST_MPEG) \
$(TEST_OGG) \
+ $(TEST_PDF) \
$(TEST_RPM) \
$(TEST_TIFF) \
$(TEST_ZLIB)
@@ -536,11 +539,18 @@ test_ogg_LDADD = \
libextractor_pdf_la_SOURCES = \
- pdf_extractor.c
+ pdf_extractor.cc
libextractor_pdf_la_LDFLAGS = \
$(PLUGINFLAGS)
+libextractor_pdf_la_CPPFLAGS = \
+ $(AM_CPPFLAGS) $(POPPLER_CFLAGS)
libextractor_pdf_la_LIBADD = \
- $(top_builddir)/src/common/libextractor_common.la $(XLIB) $(SOCKET_LIBS)
+ $(POPPLER_LIBS) $(XLIB)
+
+test_pdf_SOURCES = \
+ test_pdf.c
+test_pdf_LDADD = \
+ $(top_builddir)/src/plugins/libtest.la
libextractor_png_la_SOURCES = \
diff --git a/src/plugins/pdf_extractor.cc b/src/plugins/pdf_extractor.cc
@@ -22,14 +22,6 @@
* @brief plugin to support PDF files
* @author Vidyut Samanta
* @author Christian Grothoff
- *
- * This plugin uses the stable C++ binding of libpoppler
- * (`poppler-cpp`). Earlier versions of this plugin linked
- * against poppler's internal headers (`PDFDoc`, `GooString`,
- * ...), which carry no API or ABI stability guarantees and
- * broke with virtually every poppler release. The poppler-cpp
- * interface is the supported public API and is what we use
- * here.
*/
#include "platform.h"
#include "extractor.h"