libextractor

GNU libextractor
Log | Files | Refs | Submodules | README | LICENSE

commit 23dbd4271c4e97a45db91d6b012a664daeb985a9
parent 47fdd453d21070b178c07024be12749b2c8a5786
Author: Heikki Lindholm <holin@iki.fi>
Date:   Fri, 20 Jun 2008 12:45:27 +0000

add preliminary ffmpeg-based thumbnail extractor


Diffstat:
Mconfigure.ac | 58+++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
Msrc/plugins/Makefile.am | 23++++++++++++++++-------
Asrc/plugins/thumbnailffmpeg/Makefile.am | 37+++++++++++++++++++++++++++++++++++++
Asrc/plugins/thumbnailffmpeg/README | 72++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/plugins/thumbnailffmpeg/thumbnailextractorffmpeg.c | 524+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
5 files changed, 706 insertions(+), 8 deletions(-)

diff --git a/configure.ac b/configure.ac @@ -477,6 +477,22 @@ AC_ARG_ENABLE(printable, printable=1]) AM_CONDITIONAL(HAVE_PRINTABLE, test x$printable != x0) +ffmpeg_enabled=0 +AC_MSG_CHECKING([whether to enable the FFmpeg thumbnail extractor]) +AC_ARG_ENABLE(ffmpeg, + [AC_HELP_STRING([--enable-ffmpeg],[Enable FFmpeg support]) + AC_HELP_STRING([--disable-ffmpeg],[Disable FFmpeg support])], + [case "$enableval" in + no) AC_MSG_RESULT(no) + ffmpeg_enabled=0 + ;; + *) AC_MSG_RESULT(yes) + ffmpeg_enabled=1 + ;; + esac], + [ AC_MSG_RESULT(no) + ffmpeg_enabled=0]) +AM_CONDITIONAL(HAVE_FFMPEG, test x$ffmpeg_enabled != x0) AC_SUBST(CPPFLAGS) AC_SUBST(LDFLAGS) @@ -499,6 +515,7 @@ src/plugins/rpm/Makefile src/plugins/printable/Makefile src/plugins/hash/Makefile src/plugins/thumbnail/Makefile +src/plugins/thumbnailffmpeg/Makefile src/plugins/exiv2/Makefile src/test/Makefile ]) @@ -506,6 +523,38 @@ src/test/Makefile LIBEXTRACTOR_VERSION="0.5.18a" AX_CREATE_PKGCONFIG_INFO([libextractor.pc],,[-lextractor],[Metadata extraction library],,) +if test "x$ffmpeg_enabled" = "x1" +then + AC_MSG_NOTICE([configuring FFmpeg]) + cd src/plugins/thumbnailffmpeg/ffmpeg/ + if ! ./configure \ + --prefix=/tmp \ + --disable-mmx \ + --disable-altivec \ + --enable-shared \ + --enable-swscale \ + --enable-gpl \ + --disable-vhook \ + --disable-postproc \ + --disable-network \ + --disable-ffmpeg \ + --disable-ffserver \ + --disable-ffplay \ + --disable-devices \ + --disable-protocols \ + --disable-bsfs \ + --disable-parsers \ + --disable-muxers \ + --disable-demuxers \ + --disable-encoders \ + --disable-decoders \ + --enable-encoder=png \ + --enable-decoder=png + then + AC_MSG_ERROR([FFmpeg configure failed.]) + fi + cd ../../../../ +fi AC_OUTPUT @@ -534,7 +583,14 @@ then AC_MSG_NOTICE([NOTICE: libgsf not found, no OLE2 (MS Office) support]) fi -if test "x$without_gtk" = "xtrue" -a "x$qt" = "x0" +if test "x$ffmpeg_enabled" = "x0" +then + AC_MSG_NOTICE([NOTICE: FFmpeg thumbnailer plugin disabled]) +else + AC_MSG_NOTICE([NOTICE: FFmpeg thumbnailer plugin enabled (security untested)]) +fi + +if test "x$without_gtk" = "xtrue" -a "x$qt" = "x0" -a "x$ffmpeg_enabled" = "x0" then AC_MSG_NOTICE([NOTICE: neither gtk nor Qt not found, no thumbnail support]) fi diff --git a/src/plugins/Makefile.am b/src/plugins/Makefile.am @@ -2,6 +2,10 @@ include Makefile-plugins.am LIBEXT = @LIBEXT@ +if HAVE_FFMPEG +thumbffmpeg=thumbnailffmpeg +endif + if HAVE_GLIB if WITH_GSF oledir=ole2 @@ -52,7 +56,7 @@ endif # toggle for development # SUBDIRS = . -SUBDIRS = $(thumbgtk) . $(oodir) $(printdir) hash $(oledir) rpm $(xpdfdir) $(exiv2dir) +SUBDIRS = $(thumbgtk) $(thumbffmpeg) . $(oodir) $(printdir) hash $(oledir) rpm $(xpdfdir) $(exiv2dir) if HAVE_VORBISFILE @@ -390,13 +394,18 @@ install-exec-hook: mkdir -p $(DESTDIR)$(plugindir) &> /dev/null || true rm -f $(DESTDIR)$(plugindir)/libextractor_thumbnail$(LIBEXT) rm -f $(DESTDIR)$(plugindir)/libextractor_thumbnail.la - if test "$(thumbgtk)" != ""; then \ - $(LN_S) $(plugindir)/libextractor_thumbnailgtk$(LIBEXT) \ + if test "$(thumbffmpeg)" != ""; then \ + $(LN_S) $(plugindir)/libextractor_thumbnailffmpeg$(LIBEXT) \ $(DESTDIR)$(plugindir)/libextractor_thumbnail$(LIBEXT); \ else \ - if test "$(thumbqt)" != ""; then \ - $(LN_S) $(plugindir)/libextractor_thumbnailqt$(LIBEXT) \ + if test "$(thumbgtk)" != ""; then \ + $(LN_S) $(plugindir)/libextractor_thumbnailgtk$(LIBEXT) \ $(DESTDIR)$(plugindir)/libextractor_thumbnail$(LIBEXT); \ - fi; \ - fi; + else \ + if test "$(thumbqt)" != ""; then \ + $(LN_S) $(plugindir)/libextractor_thumbnailqt$(LIBEXT) \ + $(DESTDIR)$(plugindir)/libextractor_thumbnail$(LIBEXT); \ + fi \ + fi \ + fi diff --git a/src/plugins/thumbnailffmpeg/Makefile.am b/src/plugins/thumbnailffmpeg/Makefile.am @@ -0,0 +1,37 @@ +include ../Makefile-plugins.am + +plugin_LTLIBRARIES = \ + libextractor_thumbnailffmpeg.la + +libextractor_thumbnailffmpeg_la_SOURCES = \ + thumbnailextractorffmpeg.c +libextractor_thumbnailffmpeg_la_LIBADD = \ + $(top_builddir)/src/main/libextractor.la \ + ./ffmpeg/libavformat/libavformat.a \ + ./ffmpeg/libavcodec/libavcodec.a \ + ./ffmpeg/libavutil/libavutil.a \ + ./ffmpeg/libswscale/libswscale.a \ + -lz -lbz2 +libextractor_thumbnailffmpeg_la_LDFLAGS = \ + $(PLUGINFLAGS) $(retaincommand) +libextractor_thumbnailffmpeg_la_CPPFLAGS = \ + -I$(top_scrdir)/include \ + -I./ffmpeg \ + -I./ffmpeg/libavformat \ + -I./ffmpeg/libavcodec \ + -I./ffmpeg/libavutil \ + -I./ffmpeg/libswscale + +./ffmpeg/libavformat/libavformat.a: Makefile + cd ffmpeg && $(MAKE) $(AM_MAKEFLAGS) all +./ffmpeg/libavcodec/libavcodec.a: Makefile + cd ffmpeg && $(MAKE) $(AM_MAKEFLAGS) all +./ffmpeg/libavutil/libavutil.a: Makefile + cd ffmpeg && $(MAKE) $(AM_MAKEFLAGS) all +./ffmpeg/libswscale/libswscale.a: Makefile + cd ffmpeg && $(MAKE) $(AM_MAKEFLAGS) all + +clean-local: + cd ffmpeg && $(MAKE) $(AM_MAKEFLAGS) clean +distclean-local: + cd ffmpeg && $(MAKE) $(AM_MAKEFLAGS) distclean diff --git a/src/plugins/thumbnailffmpeg/README b/src/plugins/thumbnailffmpeg/README @@ -0,0 +1,72 @@ +This is a thumbnail extractor using the ffmpeg libraries that will eventually +support extracting thumbnails from both image and video files. + +A local ffmpeg tree is used, because +(1) there are no recent official releases of the ffmpeg libs, +(2) mainline ffmpeg is not reentrant, +(3) security issues can be handled locally. + +Plan: test & enable ffmpeg decoders one by one +tests: +- multithreading test +- zzuf test +- random input test + +At least, the following should be eventually enabled: +if ! ./configure \ + --prefix=/tmp/FF \ + --disable-mmx \ + --disable-altivec \ + --enable-shared \ + --enable-swscale \ + --enable-gpl \ + --disable-vhook \ + --disable-postproc \ + --disable-network \ + --disable-ffmpeg \ + --disable-ffserver \ + --disable-ffplay \ + --disable-devices \ + --disable-protocols \ + --disable-bsfs \ + --disable-parsers \ + --disable-muxers \ + --disable-demuxers \ + --disable-encoders \ + --disable-decoders \ + --enable-parser=h263 \ + --enable-parser=h264 \ + --enable-parser=mjpeg \ + --enable-parser=mpeg4video \ + --enable-parser=mpegvideo \ + --enable-encoder=png \ + --enable-encoder=mjpeg \ + --enable-decoder=bmp \ + --enable-decoder=pcx \ + --enable-decoder=png \ + --enable-decoder=mjpeg \ + --enable-decoder=mjpegb \ + --enable-decoder=targa \ + --enable-decoder=tiff \ + --enable-decoder=flashsv \ + --enable-decoder=h263 \ + --enable-decoder=flv \ + --enable-decoder=h264 \ + --enable-decoder=mpeg1video \ + --enable-decoder=mpeg2video \ + --enable-decoder=mpegvideo \ + --enable-decoder=mpeg4 \ + --enable-decoder=vp6 \ + --enable-decoder=vp6a \ + --enable-decoder=vp6f \ + --enable-demuxer=asf \ + --enable-demuxer=avi \ + --enable-demuxer=flv \ + --enable-demuxer=mjpeg \ + --enable-demuxer=mpegps \ + --enable-demuxer=mpegts \ + --enable-demuxer=mpegvideo \ + --enable-demuxer=mov \ + --enable-demuxer=ogg \ + --enable-demuxer=rm + diff --git a/src/plugins/thumbnailffmpeg/thumbnailextractorffmpeg.c b/src/plugins/thumbnailffmpeg/thumbnailextractorffmpeg.c @@ -0,0 +1,524 @@ +/* + This file is part of libextractor. + Copyright (C) 2008 Heikki Lindholm + + libextractor is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 2, or (at your + option) any later version. + + libextractor is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with libextractor; see the file COPYING. If not, write to the + Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. + */ + +/** + * @file thumbnailextractorffmpeg.c + * @author Heikki Lindholm + * @brief this extractor produces a binary encoded + * thumbnail of images and videos using the ffmpeg libs. + */ + +#include "platform.h" +#include "extractor.h" + +#include <avformat.h> +#include <avcodec.h> +#include <swscale.h> + +#define DEBUG 1 + +struct StreamDescriptor +{ + const uint8_t *data; + size_t offset; + size_t size; +}; + + +void __attribute__ ((constructor)) ffmpeg_lib_init (void) +{ +#if DEBUG + printf ("av_register_all()\n"); +#endif + av_register_all (); +} + +static int +stream_read (void *opaque, uint8_t * buf, int buf_size) +{ + struct StreamDescriptor *rs = (struct StreamDescriptor *) opaque; + size_t len; +#if DEBUG + printf ("read_packet: %zu\n", buf_size); +#endif + if (rs) + { + if (rs->data == NULL) + return -1; + if (rs->offset >= rs->size) + return 0; + len = buf_size; + if (rs->offset + len > rs->size) + len = rs->size - rs->offset; + + memcpy (buf, rs->data + rs->offset, len); + rs->offset += len; +#if DEBUG + printf ("read_packet: len: %zu\n", len); +#endif + return len; + } + return -1; +} + +static offset_t +stream_seek (void *opaque, offset_t offset, int whence) +{ + struct StreamDescriptor *rs = (struct StreamDescriptor *) opaque; + offset_t off_abs; +#if DEBUG + printf ("my_seek: %lld %d\n", offset, whence); +#endif + if (rs) + { + if (whence == AVSEEK_SIZE) + return (offset_t) rs->size; + else if (whence == SEEK_CUR) + off_abs = (offset_t) rs->offset + offset; + else if (whence == SEEK_SET) + off_abs = offset; + else if (whence == SEEK_END) + off_abs = (offset_t) rs->size + offset; + else + { + printf ("whence error %d\n", whence); + abort (); + return AVERROR (EINVAL); + } + if (off_abs >= 0 && off_abs < (offset_t) rs->size) + rs->offset = (size_t) off_abs; + else + off_abs = AVERROR (EINVAL); + return off_abs; + } + return -1; +} + +static EXTRACTOR_KeywordList * +addKeyword (EXTRACTOR_KeywordType type, + char *keyword, EXTRACTOR_KeywordList * next) +{ + EXTRACTOR_KeywordList *result; + + if (keyword == NULL) + return next; + result = malloc (sizeof (EXTRACTOR_KeywordList)); + result->next = next; + result->keyword = keyword; + result->keywordType = type; + return result; +} + + +struct MimeToDecoderMapping +{ + const char *mime_type; + enum CodecID codec_id; +}; + +/* map mime image types to a decoder */ +static const struct MimeToDecoderMapping m2d_map[] = { + {"image/jpeg", CODEC_ID_MJPEG}, + {"image/png", CODEC_ID_PNG}, + {NULL, CODEC_ID_NONE} +}; + +#define PROBE_MAX (1<<20) +#define BIOBUF_SIZE (64*1024) +#define THUMBSIZE 128 /* max dimension in pixels */ +#define MAX_THUMB_SIZE (100*1024) /* in bytes */ + +struct EXTRACTOR_Keywords * +libextractor_thumbnailffmpeg_extract (const char *filename, + const unsigned char *data, + size_t size, + struct EXTRACTOR_Keywords *prev) +{ + int score; + + AVInputFormat *fmt; + AVProbeData pdat; + + ByteIOContext *bio_ctx = NULL; + uint8_t *bio_buffer; + struct StreamDescriptor reader_state; + + AVFormatContext *format_ctx = NULL; + AVCodecContext *codec_ctx = NULL; + AVPacket packet; + int video_stream_index; + AVCodec *codec; + AVFrame *frame = NULL; + AVFrame *thumb_frame = NULL; + int64_t ts; + + struct SwsContext *scaler_ctx; + int sws_flags = SWS_BICUBIC; + uint8_t *thumb_buffer; + int thumb_width, thumb_height; + int sar_num, sar_den; + + FILE *output = NULL; + uint8_t *encoder_output_buffer; + size_t encoder_output_buffer_size; + AVCodecContext *enc_codec_ctx; + AVCodec *enc_codec; + + int i; + int err; + int frame_finished; + + char *binary; + const char *mime; + int is_image; + enum CodecID image_codec_id; + + bio_ctx = NULL; + bio_buffer = NULL; + format_ctx = NULL; + codec = NULL; + frame = NULL; + thumb_frame = NULL; + thumb_buffer = NULL; + scaler_ctx = NULL; + encoder_output_buffer = NULL; + enc_codec = NULL; + enc_codec_ctx = NULL; + + is_image = 0; + + mime = EXTRACTOR_extractLast (EXTRACTOR_MIMETYPE, prev); + if (mime != NULL) + { + i = 0; + while (m2d_map[i].mime_type != NULL) + { + if (!strcmp (m2d_map[i].mime_type, mime)) + { + is_image = 1; + image_codec_id = m2d_map[i].codec_id; + break; + } + i++; + } + } + +#if DEBUG + printf ("is_image: %d codec:%d\n", is_image, image_codec_id); +#endif + if (!is_image) + { + pdat.filename = filename; + pdat.buf = (unsigned char *) data; + pdat.buf_size = (size > PROBE_MAX) ? PROBE_MAX : size; + + fmt = av_probe_input_format (&pdat, 1); + if (fmt == NULL) + return prev; +#if DEBUG + printf ("format %p [%s] [%s]\n", fmt, fmt->name, fmt->long_name); +#endif + pdat.buf = (unsigned char *) data; + pdat.buf_size = size > PROBE_MAX ? PROBE_MAX : size; + score = fmt->read_probe (&pdat); +#if DEBUG + printf ("score: %d\n", score); +#endif + /*if (score < 50) return prev; */ + } + + if (is_image) + { + codec_ctx = avcodec_alloc_context (); + codec = avcodec_find_decoder (image_codec_id); + if (codec != NULL) + { + if (avcodec_open (codec_ctx, codec) != 0) + { +#if DEBUG + printf ("open codec failed\n"); +#endif + codec = NULL; + } + } + } + else + { + bio_ctx = malloc (sizeof (ByteIOContext)); + bio_buffer = malloc (BIOBUF_SIZE); + + reader_state.data = data; + reader_state.offset = 0; + reader_state.size = size; + + init_put_byte (bio_ctx, bio_buffer, + BIOBUF_SIZE, 0, &reader_state, + stream_read, NULL, stream_seek); + + fmt->flags |= AVFMT_NOFILE; + err = av_open_input_stream (&format_ctx, bio_ctx, "", fmt, NULL); + if (err < 0) + { +#if DEBUG + printf ("couldn't open input stream\n"); +#endif + goto out; + } + + err = av_find_stream_info (format_ctx); + if (err < 0) + { +#if DEBUG + printf ("couldn't find codec params\n"); +#endif + goto out; + } + + for (i = 0; i < format_ctx->nb_streams; i++) + { + codec_ctx = format_ctx->streams[i]->codec; + if (codec_ctx->codec_type == CODEC_TYPE_VIDEO) + { + video_stream_index = i; + codec = avcodec_find_decoder (codec_ctx->codec_id); + if (codec == NULL) + { +#if DEBUG + printf ("find_decoder failed\n"); +#endif + break; + } + err = avcodec_open (codec_ctx, codec); + if (err != 0) + { +#if DEBUG + printf ("failed to open codec\n"); +#endif + codec = NULL; + } + break; + } + } + } + + if (codec_ctx == NULL || codec == NULL) + { +#if DEBUG + printf ("failed to open codec"); +#endif + goto out; + } + frame = avcodec_alloc_frame (); + if (frame == NULL) + { +#if DEBUG + printf ("failed to alloc frame"); +#endif + goto out; + } + + if (!is_image) + { +#if DEBUG + printf ("duration: %lld\n", format_ctx->duration); + if (format_ctx->duration == AV_NOPTS_VALUE) + printf ("duration unknown\n"); +#endif + /* TODO: if duration is known seek to to some better place(?) */ + ts = 10; // s + ts = ts * AV_TIME_BASE; + err = av_seek_frame (format_ctx, -1, ts, 0); + if (err >= 0) + { + avcodec_flush_buffers (codec_ctx); + } +#if DEBUG + else + printf ("seeking failed %d\n", err); +#endif + } + + frame_finished = 0; + if (is_image) + { + avcodec_decode_video (codec_ctx, frame, &frame_finished, data, size); + } + else + { + while (1) + { + err = av_read_frame (format_ctx, &packet); + if (err < 0) + break; + if (packet.stream_index == video_stream_index) + { + avcodec_decode_video (codec_ctx, + frame, + &frame_finished, + packet.data, packet.size); + if (frame_finished && frame->key_frame) + { + av_free_packet (&packet); + break; + } + } + av_free_packet (&packet); + } + } + + if (!frame_finished) + goto out; + + sar_num = codec_ctx->sample_aspect_ratio.num; + sar_den = codec_ctx->sample_aspect_ratio.den; + if (sar_num <= 0 || sar_den <= 0) + { + sar_num = 1; + sar_den = 1; + } + if ((codec_ctx->width * sar_num) / sar_den > codec_ctx->height) + { + thumb_width = THUMBSIZE; + thumb_height = (thumb_width * codec_ctx->height) / + ((codec_ctx->width * sar_num) / sar_den); + } + else + { + thumb_height = THUMBSIZE; + thumb_width = (thumb_height * + ((codec_ctx->width * sar_num) / sar_den)) / + codec_ctx->height; + } + if (thumb_width < 8) + thumb_width = 8; + if (thumb_height < 1) + thumb_height = 1; +#if DEBUG + printf ("thumb dim: %d %d\n", thumb_width, thumb_height); +#endif + + scaler_ctx = + sws_getContext (codec_ctx->width, codec_ctx->height, codec_ctx->pix_fmt, + thumb_width, thumb_height, PIX_FMT_RGB24, sws_flags, NULL, + NULL, NULL); + if (scaler_ctx == NULL) + { +#if DEBUG + printf ("failed to alloc scaler\n"); +#endif + goto out; + } + thumb_frame = avcodec_alloc_frame (); + thumb_buffer = + av_malloc (avpicture_get_size (PIX_FMT_RGB24, thumb_width, thumb_height)); + if (thumb_frame == NULL || thumb_buffer == NULL) + { +#if DEBUG + printf ("failed to alloc thumb frame\n"); +#endif + goto out; + } + avpicture_fill ((AVPicture *) thumb_frame, thumb_buffer, + PIX_FMT_RGB24, thumb_width, thumb_height); + + sws_scale (scaler_ctx, + frame->data, frame->linesize, + 0, codec_ctx->height, thumb_frame->data, thumb_frame->linesize); + + encoder_output_buffer_size = MAX_THUMB_SIZE; + encoder_output_buffer = av_malloc (encoder_output_buffer_size); + if (encoder_output_buffer == NULL) + { +#if DEBUG + printf ("couldn't alloc encoder output buf\n"); +#endif + goto out; + } + + enc_codec = avcodec_find_encoder_by_name ("png"); + if (enc_codec == NULL) + { +#if DEBUG + printf ("couldn't find encoder\n"); +#endif + goto out; + } + enc_codec_ctx = avcodec_alloc_context (); + enc_codec_ctx->width = thumb_width; + enc_codec_ctx->height = thumb_height; + enc_codec_ctx->pix_fmt = PIX_FMT_RGB24; + + if (avcodec_open (enc_codec_ctx, enc_codec) < 0) + { +#if DEBUG + printf ("couldn't open encoder\n"); +#endif + enc_codec = NULL; + goto out; + } + + err = avcodec_encode_video (enc_codec_ctx, + encoder_output_buffer, + encoder_output_buffer_size, thumb_frame); + if (err <= 0) + goto out; + + binary = + EXTRACTOR_binaryEncode ((const unsigned char *) encoder_output_buffer, + err); + if (binary != NULL) + prev = addKeyword (EXTRACTOR_THUMBNAIL_DATA, binary, prev); + +out: + if (enc_codec != NULL) + avcodec_close (enc_codec_ctx); + if (enc_codec_ctx != NULL) + av_free (enc_codec_ctx); + if (encoder_output_buffer != NULL) + av_free (encoder_output_buffer); + if (codec != NULL) + avcodec_close (codec_ctx); + if (format_ctx != NULL) + av_close_input_file (format_ctx); + if (frame != NULL) + av_free (frame); + if (thumb_buffer != NULL) + av_free (thumb_buffer); + if (thumb_frame != NULL) + av_free (thumb_frame); + if (bio_ctx != NULL) + free (bio_ctx); + if (bio_buffer != NULL) + free (bio_buffer); + + return prev; +} + +struct EXTRACTOR_Keywords * +libextractor_thumbnail_extract (const char *filename, + const unsigned char *data, + size_t size, + struct EXTRACTOR_Keywords *prev, + const char *options) +{ + return libextractor_thumbnailffmpeg_extract (filename, data, size, prev); +} + +/* end of thumbnailextractorffmpeg.c */