libextractor

GNU libextractor
Log | Files | Refs | Submodules | README | LICENSE

commit aa056e44077dc7ed731cdfc1dcdcdd1d18b6004d
parent e4423c3bcb9b184b037bd5c0c7b70c46a770b83f
Author: Christian Grothoff <christian@grothoff.org>
Date:   Wed, 16 Dec 2009 13:25:07 +0000

man

Diffstat:
Msrc/include/extractor.h | 7+++----
Msrc/main/extractor_metatypes.c | 10+++++-----
Msrc/plugins/Makefile.am | 17++++++++---------
Msrc/plugins/deb_extractor.c | 2+-
Asrc/plugins/man_extractor.c | 227+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Dsrc/plugins/manextractor.c | 225-------------------------------------------------------------------------------
Msrc/plugins/rpm_extractor.c | 2+-
7 files changed, 245 insertions(+), 245 deletions(-)

diff --git a/src/include/extractor.h b/src/include/extractor.h @@ -191,7 +191,7 @@ enum EXTRACTOR_MetaType /* software package specifics (deb, rpm, tgz, elf) */ EXTRACTOR_METATYPE_PACKAGE_NAME = 69, EXTRACTOR_METATYPE_PACKAGE_VERSION = 70, - EXTRACTOR_METATYPE_PACKAGE_SECTION = 71, + EXTRACTOR_METATYPE_SECTION = 71, EXTRACTOR_METATYPE_UPLOAD_PRIORITY = 72, EXTRACTOR_METATYPE_PACKAGE_DEPENDENCY = 73, EXTRACTOR_METATYPE_PACKAGE_CONFLICTS = 74, @@ -241,10 +241,11 @@ enum EXTRACTOR_MetaType EXTRACTOR_METATYPE_PRODUCED_BY_SOFTWARE = 113, EXTRACTOR_METATYPE_THUMBNAIL = 114, + EXTRACTOR_METATYPE_IMAGE_RESOLUTION = 115, + EXTRACTOR_METATYPE_SOURCE = 116, /* fixme: used up to here! */ - EXTRACTOR_METATYPE_RESOLUTION = 57, EXTRACTOR_METATYPE_SCALE = 108, @@ -299,7 +300,6 @@ enum EXTRACTOR_MetaType EXTRACTOR_METATYPE_ORGANIZATION = 15, EXTRACTOR_METATYPE_CONTRIBUTOR = 19, - EXTRACTOR_METATYPE_SOURCE = 23, EXTRACTOR_METATYPE_RELATION = 24, EXTRACTOR_METATYPE_COVERAGE = 25, EXTRACTOR_METATYPE_SOFTWARE = 26, @@ -309,7 +309,6 @@ enum EXTRACTOR_MetaType EXTRACTOR_METATYPE_CREATED_FOR = 39, EXTRACTOR_METATYPE_RELEASE = 41, EXTRACTOR_METATYPE_GROUP = 42, - EXTRACTOR_METATYPE_CATEGORY = 58, EXTRACTOR_METATYPE_OWNER = 66, EXTRACTOR_METATYPE_MEDIA_TYPE = 68, EXTRACTOR_METATYPE_CONTACT = 69, diff --git a/src/main/extractor_metatypes.c b/src/main/extractor_metatypes.c @@ -281,7 +281,7 @@ static const struct MetaTypeDescription meta_type_descriptions[] = { { gettext_noop ("magnification"), gettext_noop ("") }, { gettext_noop ("image dimensions"), - gettext_noop ("") }, + gettext_noop ("size of the image in pixels (width times height)") }, { gettext_noop ("produced by software"), gettext_noop ("") }, /* what is the exact difference between the software creator and the software producer? PDF and DVI @@ -290,10 +290,10 @@ static const struct MetaTypeDescription meta_type_descriptions[] = { { gettext_noop ("thumbnail"), gettext_noop ("smaller version of the image for previewing") }, /* 115 */ - { gettext_noop (""), - gettext_noop ("") }, - { gettext_noop (""), - gettext_noop ("") }, + { gettext_noop ("image resolution"), + gettext_noop ("resolution in dots per inch") }, + { gettext_noop ("source"), + gettext_noop ("Originating entity") }, { gettext_noop (""), gettext_noop ("") }, { gettext_noop (""), diff --git a/src/plugins/Makefile.am b/src/plugins/Makefile.am @@ -93,6 +93,7 @@ plugin_LTLIBRARIES = \ libextractor_html.la \ libextractor_it.la \ libextractor_jpeg.la \ + libextractor_man.la \ libextractor_mime.la \ $(pdf) \ $(rpm) \ @@ -163,6 +164,13 @@ libextractor_jpeg_la_LDFLAGS = \ libextractor_jpeg_la_LIBADD = \ $(LE_LIBINTL) +libextractor_man_la_SOURCES = \ + man_extractor.c +libextractor_man_la_LDFLAGS = \ + $(PLUGINFLAGS) +libextractor_man_la_LIBADD = \ + $(LE_LIBINTL) + libextractor_mime_la_SOURCES = \ mime_extractor.c libextractor_mime_la_LDFLAGS = \ @@ -202,7 +210,6 @@ OLD_LIBS = \ libextractor_id3v2.la \ libextractor_id3v24.la \ libextractor_id3v23.la \ - libextractor_man.la \ libextractor_mp3.la \ $(extrampeg) \ libextractor_nsf.la \ @@ -340,14 +347,6 @@ libextractor_mpeg_la_LIBADD = \ $(top_builddir)/src/main/libextractor.la endif -libextractor_man_la_SOURCES = \ - manextractor.c -libextractor_man_la_LDFLAGS = \ - $(PLUGINFLAGS) -libextractor_man_la_LIBADD = \ - $(top_builddir)/src/main/libextractor.la \ - $(LE_LIBINTL) - libextractor_riff_la_SOURCES = \ riffextractor.c libextractor_riff_la_LDFLAGS = \ diff --git a/src/plugins/deb_extractor.c b/src/plugins/deb_extractor.c @@ -57,7 +57,7 @@ typedef struct static Matches tmap[] = { {"Package: ", EXTRACTOR_METATYPE_PACKAGE_NAME}, {"Version: ", EXTRACTOR_METATYPE_PACKAGE_VERSION}, - {"Section: ", EXTRACTOR_METATYPE_PACKAGE_SECTION}, + {"Section: ", EXTRACTOR_METATYPE_SECTION}, {"Priority: ", EXTRACTOR_METATYPE_UPLOAD_PRIORITY}, {"Architecture: ", EXTRACTOR_METATYPE_TARGET_ARCHITECTURE}, {"Depends: ", EXTRACTOR_METATYPE_PACKAGE_DEPENDENCY}, diff --git a/src/plugins/man_extractor.c b/src/plugins/man_extractor.c @@ -0,0 +1,227 @@ +/* + This file is part of libextractor. + (C) 2002, 2003, 2004 Vidyut Samanta and Christian Grothoff + + libextractor is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 2, or (at your + option) any later version. + + libextractor is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with libextractor; see the file COPYING. If not, write to the + Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. + */ + +#include "platform.h" +#include "extractor.h" +#include <ctype.h> + +static char * +stndup (const char *str, size_t n) +{ + char *tmp; + tmp = malloc (n + 1); + tmp[n] = '\0'; + memcpy (tmp, str, n); + return tmp; +} + +static int +addKeyword (enum EXTRACTOR_MetaType type, + char *keyword, + EXTRACTOR_MetaDataProcessor proc, + void *proc_cls) +{ + int ret; + if (keyword == NULL) + return 0; + if (strlen (keyword) == 0) + { + free (keyword); + return 0; + } + if ((keyword[0] == '\"') && (keyword[strlen (keyword) - 1] == '\"')) + { + char *tmp; + + keyword[strlen (keyword) - 1] = '\0'; + tmp = strdup (&keyword[1]); + free (keyword); + keyword = tmp; + } + if (strlen (keyword) == 0) + { + free (keyword); + return 0; + } + ret = proc (proc_cls, + "man", + type, + EXTRACTOR_METAFORMAT_UTF8, + "text/plain", + keyword, + strlen (keyword)+1); + free (keyword); + return ret; +} + +static void +NEXT (size_t * end, const char *buf, const size_t size) +{ + int quot; + + quot = 0; + while ((*end < size) && (((quot & 1) != 0) || ((buf[*end] != ' ')))) + { + if (buf[*end] == '\"') + quot++; + (*end)++; + } + if ((quot & 1) == 1) + (*end) = size + 1; +} + +/** + * How many bytes do we actually try to scan? (from the beginning + * of the file). + */ +#define MAX_READ (16 * 1024) + +#define ADD(t,s) do { if (0 != addKeyword (t, s, proc, proc_cls)) return 1; } while (0) + +int +EXTRACTOR_man_extract (const char *buf, + size_t size, + EXTRACTOR_MetaDataProcessor proc, + void *proc_cls, + const char *options) +{ + int pos; + size_t xsize; + const size_t xlen = strlen (".TH "); + + if (size > MAX_READ) + size = MAX_READ; + pos = 0; + if (size < xlen) + return 0; + while ((pos < size - xlen) && + ((0 != strncmp (".TH ", + &buf[pos], + xlen)) || ((pos != 0) && (buf[pos - 1] != '\n')))) + { + if (!isgraph (buf[pos]) && !isspace (buf[pos])) + return 0; + pos++; + } + xsize = pos; + while ((xsize < size) && (buf[xsize] != '\n')) + xsize++; + size = xsize; + + if (0 == strncmp (".TH ", &buf[pos], xlen)) + { + size_t end; + + pos += xlen; + end = pos; + NEXT (&end, buf, size); + if (end > size) + return 0; + if (end - pos > 0) + { + ADD (EXTRACTOR_METATYPE_TITLE, stndup (&buf[pos], end - pos)); + pos = end + 1; + } + if (pos >= size) + return 0; + end = pos; + NEXT (&end, buf, size); + if (end > size) + return 0; + if (buf[pos] == '\"') + pos++; + if ((end - pos >= 1) && (end - pos <= 4)) + { + switch (buf[pos]) + { + case '1': + ADD (EXTRACTOR_METATYPE_SECTION, + strdup (_("Commands"))); + break; + case '2': + ADD (EXTRACTOR_METATYPE_SECTION, + strdup (_("System calls"))); + break; + case '3': + ADD (EXTRACTOR_METATYPE_SECTION, + strdup (_("Library calls"))); + break; + case '4': + ADD (EXTRACTOR_METATYPE_SECTION, + strdup (_("Special files"))); + break; + case '5': + ADD (EXTRACTOR_METATYPE_SECTION, + strdup (_("File formats and conventions"))); + break; + case '6': + ADD (EXTRACTOR_METATYPE_SECTION, + strdup (_("Games"))); + break; + case '7': + ADD (EXTRACTOR_METATYPE_SECTION, + strdup (_("Conventions and miscellaneous"))); + break; + case '8': + ADD (EXTRACTOR_METATYPE_SECTION, + strdup (_("System management commands"))); + break; + case '9': + ADD (EXTRACTOR_METATYPE_SECTION, + strdup (_("Kernel routines"))); + break; + } + pos = end + 1; + } + end = pos; + NEXT (&end, buf, size); + if (end > size) + return 0; + if (end - pos > 0) + { + ADD (EXTRACTOR_METATYPE_MODIFICATION_DATE, stndup (&buf[pos], end - pos)); + pos = end + 1; + } + end = pos; + NEXT (&end, buf, size); + if (end > size) + return 0; + if (end - pos > 0) + { + ADD (EXTRACTOR_METATYPE_SOURCE, + stndup (&buf[pos], end - pos)); + pos = end + 1; + } + end = pos; + NEXT (&end, buf, size); + if (end > size) + return 0; + if (end - pos > 0) + { + ADD (EXTRACTOR_METATYPE_BOOK_TITLE, + stndup (&buf[pos], end - pos)); + pos = end + 1; + } + } + + return 0; +} + +/* end of man_extractor.c */ diff --git a/src/plugins/manextractor.c b/src/plugins/manextractor.c @@ -1,225 +0,0 @@ -/* - This file is part of libextractor. - (C) 2002, 2003, 2004 Vidyut Samanta and Christian Grothoff - - libextractor is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 2, or (at your - option) any later version. - - libextractor is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with libextractor; see the file COPYING. If not, write to the - Free Software Foundation, Inc., 59 Temple Place - Suite 330, - Boston, MA 02111-1307, USA. - */ - -#include "platform.h" -#include "extractor.h" -#include <ctype.h> - -static char * -stndup (const char *str, size_t n) -{ - char *tmp; - tmp = malloc (n + 1); - tmp[n] = '\0'; - memcpy (tmp, str, n); - return tmp; -} - -static EXTRACTOR_KeywordList * -addKeyword (EXTRACTOR_KeywordType type, - char *keyword, EXTRACTOR_KeywordList * next) -{ - EXTRACTOR_KeywordList *result; - - if (keyword == NULL) - return next; - if (strlen (keyword) == 0) - { - free (keyword); - return next; - } - if ((keyword[0] == '\"') && (keyword[strlen (keyword) - 1] == '\"')) - { - char *tmp; - - keyword[strlen (keyword) - 1] = '\0'; - tmp = strdup (&keyword[1]); - free (keyword); - keyword = tmp; - } - if (strlen (keyword) == 0) - { - free (keyword); - return next; - } - result = malloc (sizeof (EXTRACTOR_KeywordList)); - result->next = next; - result->keyword = keyword; - result->keywordType = type; - return result; -} - -static void -NEXT (size_t * end, const char *buf, const size_t size) -{ - int quot; - - quot = 0; - while ((*end < size) && (((quot & 1) != 0) || ((buf[*end] != ' ')))) - { - if (buf[*end] == '\"') - quot++; - (*end)++; - } - if ((quot & 1) == 1) - (*end) = size + 1; -} - -/** - * How many bytes do we actually try to scan? (from the beginning - * of the file). - */ -#define MAX_READ (16 * 1024) - - - -struct EXTRACTOR_Keywords * -libextractor_man_extract (const char *filename, - const char *buf, - size_t size, struct EXTRACTOR_Keywords *prev) -{ - int pos; - size_t xsize; - const size_t xlen = strlen (".TH "); - - if (size > MAX_READ) - size = MAX_READ; - pos = 0; - if (size < xlen) - return prev; - while ((pos < size - xlen) && - ((0 != strncmp (".TH ", - &buf[pos], - xlen)) || ((pos != 0) && (buf[pos - 1] != '\n')))) - { - if (!isgraph (buf[pos]) && !isspace (buf[pos])) - return prev; - pos++; - } - xsize = pos; - while ((xsize < size) && (buf[xsize] != '\n')) - xsize++; - size = xsize; - - if (0 == strncmp (".TH ", &buf[pos], xlen)) - { - size_t end; - - pos += xlen; - end = pos; - NEXT (&end, buf, size); - if (end > size) - return prev; - if (end - pos > 0) - { - prev = addKeyword (EXTRACTOR_TITLE, - stndup (&buf[pos], end - pos), prev); - pos = end + 1; - } - if (pos >= size) - return prev; - end = pos; - NEXT (&end, buf, size); - if (end > size) - return prev; - if (buf[pos] == '\"') - pos++; - if ((end - pos >= 1) && (end - pos <= 4)) - { - switch (buf[pos]) - { - case '1': - prev = addKeyword (EXTRACTOR_CATEGORY, - strdup (_("Commands")), prev); - break; - case '2': - prev = addKeyword (EXTRACTOR_CATEGORY, - strdup (_("System calls")), prev); - break; - case '3': - prev = addKeyword (EXTRACTOR_CATEGORY, - strdup (_("Library calls")), prev); - break; - case '4': - prev = addKeyword (EXTRACTOR_CATEGORY, - strdup (_("Special files")), prev); - break; - case '5': - prev = addKeyword (EXTRACTOR_CATEGORY, - strdup (_("File formats and conventions")), - prev); - break; - case '6': - prev = addKeyword (EXTRACTOR_CATEGORY, - strdup (_("Games")), prev); - break; - case '7': - prev = addKeyword (EXTRACTOR_CATEGORY, - strdup (_("Conventions and miscellaneous")), - prev); - break; - case '8': - prev = addKeyword (EXTRACTOR_CATEGORY, - strdup (_("System management commands")), - prev); - break; - case '9': - prev = addKeyword (EXTRACTOR_CATEGORY, - strdup (_("Kernel routines")), prev); - break; - } - pos = end + 1; - } - end = pos; - NEXT (&end, buf, size); - if (end > size) - return prev; - if (end - pos > 0) - { - prev = addKeyword (EXTRACTOR_DATE, - stndup (&buf[pos], end - pos), prev); - pos = end + 1; - } - end = pos; - NEXT (&end, buf, size); - if (end > size) - return prev; - if (end - pos > 0) - { - prev = addKeyword (EXTRACTOR_SOURCE, - stndup (&buf[pos], end - pos), prev); - pos = end + 1; - } - end = pos; - NEXT (&end, buf, size); - if (end > size) - return prev; - if (end - pos > 0) - { - prev = addKeyword (EXTRACTOR_BOOKTITLE, - stndup (&buf[pos], end - pos), prev); - pos = end + 1; - } - } - - return prev; -} - -/* end of manextractor.c */ diff --git a/src/plugins/rpm_extractor.c b/src/plugins/rpm_extractor.c @@ -71,7 +71,7 @@ typedef struct static Matches tests[] = { {RPMTAG_NAME, EXTRACTOR_METATYPE_PACKAGE_NAME}, {RPMTAG_VERSION, EXTRACTOR_METATYPE_SOFTWARE_VERSION}, - {RPMTAG_GROUP, EXTRACTOR_METATYPE_PACKAGE_SECTION}, + {RPMTAG_GROUP, EXTRACTOR_METATYPE_SECTION}, {RPMTAG_SIZE, EXTRACTOR_METATYPE_PACKAGE_INSTALLED_SIZE}, {RPMTAG_SUMMARY, EXTRACTOR_METATYPE_SUMMARY}, {RPMTAG_PACKAGER, EXTRACTOR_METATYPE_PACKAGE_MAINTAINER},