libextractor

GNU libextractor
Log | Files | Refs | Submodules | README | LICENSE

commit 0c2e763e2a205e51f63f4e797ca6e2f593f4693f
parent 0767ed15fbcebd38536803c958cf6ef0e3334046
Author: Christian Grothoff <christian@grothoff.org>
Date:   Sat, 16 Sep 2006 18:20:48 +0000

fx

Diffstat:
MChangeLog | 4++++
Mconfigure.ac | 4++--
Msrc/include/extractor.h | 13+++++++++++++
Msrc/main/extract.c | 51+++++++++++++++++++++++----------------------------
Msrc/main/extractor.c | 45++++++++++++++++++++++++++++-----------------
Msrc/plugins/id3v23extractor.c | 15+++++++++++----
Msrc/plugins/id3v24extractor.c | 14++++++++++++--
Msrc/plugins/id3v2extractor.c | 2+-
Msrc/plugins/mp3extractor.c | 20+++++++++++++++-----
9 files changed, 109 insertions(+), 59 deletions(-)

diff --git a/ChangeLog b/ChangeLog @@ -1,3 +1,7 @@ +Sat Sep 16 12:36:42 MDT 2006 + Added support for various additional tags to ID3v2 extractors. + Now (again) trimming whitespace at the end of ID3v1 tags. + Wed Sep 6 13:38:55 PDT 2006 Added tIME support to PNG extractor. Bugfixes in PDF extractors. Made libextractor relocateable (plugin path no longer hardwired diff --git a/configure.ac b/configure.ac @@ -1,8 +1,8 @@ # Process this file with autoconf to produce a configure script. AC_PREREQ(2.57) -AC_INIT([libextractor], [0.5.15], [bug-libextractor@gnu.org]) +AC_INIT([libextractor], [0.5.15a], [bug-libextractor@gnu.org]) AC_REVISION($Revision: 1.67 $) -AM_INIT_AUTOMAKE([libextractor], [0.5.15]) +AM_INIT_AUTOMAKE([libextractor], [0.5.15a]) AM_CONFIG_HEADER(src/include/config.h) AH_TOP([#define _GNU_SOURCE 1]) diff --git a/src/include/extractor.h b/src/include/extractor.h @@ -168,6 +168,19 @@ typedef enum { EXTRACTOR_INFORMATION = 112, EXTRACTOR_FULL_NAME = 113, EXTRACTOR_CHAPTER = 114, + EXTRACTOR_YEAR = 115, + + EXTRACTOR_LINK = 116, + EXTRACTOR_MUSIC_CD_IDENTIFIER = 117, + EXTRACTOR_PLAY_COUNTER = 118, + EXTRACTOR_POPULARITY_METER = 119, + EXTRACTOR_CONTENT_TYPE = 120, + EXTRACTOR_ENCODED_BY = 121, + EXTRACTOR_TIME = 122, + EXTRACTOR_MUSICIAN_CREDITS_LIST = 123, + EXTRACTOR_MOOD = 124, + + } EXTRACTOR_KeywordType; /** diff --git a/src/main/extract.c b/src/main/extract.c @@ -1,6 +1,6 @@ /* This file is part of libextractor. - (C) 2002, 2003, 2004, 2005 Vidyut Samanta and Christian Grothoff + (C) 2002, 2003, 2004, 2005, 2006 Vidyut Samanta and Christian Grothoff libextractor is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published @@ -594,39 +594,34 @@ main (int argc, char *argv[]) extractors = EXTRACTOR_addLibraryLast(extractors, "libextractor_split"); - if (verbose == YES) - { - /* print list of all used extractors */ - } /* extract keywords */ if ( bibtex == YES ) fprintf(stdout, _("%% BiBTeX file\n")); - for (i = optind; i < argc; i++) - { - errno = 0; - keywords = EXTRACTOR_getKeywords (extractors, argv[i]); - if (0 != errno) { - if (verbose == YES) { - fprintf(stderr, - "%s: %s: %s\n", - argv[0], argv[i], strerror(errno)); - } - ret = 1; - continue; + for (i = optind; i < argc; i++) { + errno = 0; + keywords = EXTRACTOR_getKeywords (extractors, argv[i]); + if (0 != errno) { + if (verbose == YES) { + fprintf(stderr, + "%s: %s: %s\n", + argv[0], argv[i], strerror(errno)); } - if (duplicates != -1 || bibtex == YES) - keywords = EXTRACTOR_removeDuplicateKeywords (keywords, duplicates); - if (verbose == YES && bibtex == NO) - printf (_("Keywords for file %s:\n"), argv[i]); - if (bibtex == YES) - printSelectedKeywordsBibtex (stdout, keywords, print, argv[i]); - else - printSelectedKeywords (stdout, keywords, print, verbose); - if (verbose == YES && bibtex == NO) - printf ("\n"); - EXTRACTOR_freeKeywords (keywords); + ret = 1; + continue; } + if ( (duplicates != -1) || (bibtex == YES)) + keywords = EXTRACTOR_removeDuplicateKeywords (keywords, duplicates); + if (verbose == YES && bibtex == NO) + printf (_("Keywords for file %s:\n"), argv[i]); + if (bibtex == YES) + printSelectedKeywordsBibtex (stdout, keywords, print, argv[i]); + else + printSelectedKeywords (stdout, keywords, print, verbose); + if (verbose == YES && bibtex == NO) + printf ("\n"); + EXTRACTOR_freeKeywords (keywords); + } free (print); EXTRACTOR_removeAll (extractors); diff --git a/src/main/extractor.c b/src/main/extractor.c @@ -1,6 +1,6 @@ /* This file is part of libextractor. - (C) 2002, 2003, 2004, 2005 Vidyut Samanta and Christian Grothoff + (C) 2002, 2003, 2004, 2005, 2006 Vidyut Samanta and Christian Grothoff libextractor is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published @@ -151,16 +151,26 @@ static const char *keywordTypes[] = { gettext_noop("editing cycles"), gettext_noop("scale"), gettext_noop("manager"), - gettext_noop(/* movie director */"director"), + gettext_noop(/* movie director */"director"), /* 110 */ gettext_noop("duration"), gettext_noop("information"), gettext_noop("full name"), gettext_noop("chapter"), + gettext_noop("year"), /* 115 */ + gettext_noop("link"), + gettext_noop("music CD identifier"), + gettext_noop("play counter"), + gettext_noop("popularity meter"), + gettext_noop("content type"), /* 120 */ + gettext_noop("encoded by"), + gettext_noop("time"), + gettext_noop("musician credits list"), + gettext_noop("mood"), NULL, }; /* the number of keyword types (for bounds-checking) */ -#define HIGHEST_TYPE_NUMBER 115 +#define HIGHEST_TYPE_NUMBER 125 #ifdef HAVE_LIBOGG #if HAVE_VORBIS @@ -1236,11 +1246,11 @@ removeKeyword (const char *keyword, } if (pos == NULL) break; - if ((0 == strcmp (pos->keyword, keyword)) && - ((pos->keywordType == type) || - (((options & EXTRACTOR_DUPLICATES_TYPELESS) > 0)) || - (((options & EXTRACTOR_DUPLICATES_REMOVE_UNKNOWN) > 0) && - (pos->keywordType == EXTRACTOR_UNKNOWN)))) + if ( (0 == strcmp (pos->keyword, keyword)) && + ( (pos->keywordType == type) || + (((options & EXTRACTOR_DUPLICATES_TYPELESS) > 0)) || + ( ((options & EXTRACTOR_DUPLICATES_REMOVE_UNKNOWN) > 0) && + (pos->keywordType == EXTRACTOR_UNKNOWN)) ) ) { /* remove! */ if (prev == NULL) @@ -1269,16 +1279,18 @@ removeKeyword (const char *keyword, */ EXTRACTOR_KeywordList * EXTRACTOR_removeDuplicateKeywords (EXTRACTOR_KeywordList * list, - const unsigned int options) -{ + const unsigned int options) { EXTRACTOR_KeywordList *pos; pos = list; - while (pos != NULL) - { - removeKeyword (pos->keyword, pos->keywordType, options, &list, pos); - pos = pos->next; - } + while (pos != NULL) { + removeKeyword(pos->keyword, + pos->keywordType, + options, + &list, + pos); + pos = pos->next; + } return list; } @@ -1288,8 +1300,7 @@ EXTRACTOR_removeDuplicateKeywords (EXTRACTOR_KeywordList * list, * @return a list of keywords without duplicates */ EXTRACTOR_KeywordList * -EXTRACTOR_removeEmptyKeywords (EXTRACTOR_KeywordList * list) -{ +EXTRACTOR_removeEmptyKeywords (EXTRACTOR_KeywordList * list) { EXTRACTOR_KeywordList * pos; EXTRACTOR_KeywordList * last; diff --git a/src/plugins/id3v23extractor.c b/src/plugins/id3v23extractor.c @@ -1,6 +1,6 @@ /* This file is part of libextractor. - (C) 2002, 2003, 2004 Vidyut Samanta and Christian Grothoff + (C) 2002, 2003, 2004, 2006 Vidyut Samanta and Christian Grothoff libextractor is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published @@ -55,11 +55,16 @@ typedef struct { static Matches tmap[] = { { "COMM", EXTRACTOR_COMMENT }, + { "IPLS", EXTRACTOR_CONTRIBUTOR }, + { "LINK", EXTRACTOR_LINK }, + { "MCDI", EXTRACTOR_MUSIC_CD_IDENTIFIER }, + { "PCNT", EXTRACTOR_PLAY_COUNTER }, + { "POPM", EXTRACTOR_POPULARITY_METER }, { "TCOP", EXTRACTOR_COPYRIGHT }, { "TDAT", EXTRACTOR_DATE }, - { "TCON", EXTRACTOR_GENRE }, + { "TCON", EXTRACTOR_CONTENT_TYPE }, { "TIT1", EXTRACTOR_GENRE }, - { "TENC", EXTRACTOR_PRODUCER }, + { "TENC", EXTRACTOR_ENCODED_BY }, { "TEXT", EXTRACTOR_LYRICS }, { "TOLY", EXTRACTOR_CONTRIBUTOR }, { "TOPE", EXTRACTOR_CONTRIBUTOR }, @@ -70,7 +75,7 @@ static Matches tmap[] = { { "TPE4", EXTRACTOR_INTERPRET }, { "TMED", EXTRACTOR_MEDIA_TYPE }, { "TCOM", EXTRACTOR_CREATOR }, - { "TIME", EXTRACTOR_DATE }, + { "TIME", EXTRACTOR_TIME }, { "TOFN", EXTRACTOR_FILENAME }, { "TOPE", EXTRACTOR_ARTIST }, { "TPUB", EXTRACTOR_PUBLISHER }, @@ -80,6 +85,8 @@ static Matches tmap[] = { { "TOAL", EXTRACTOR_ALBUM }, { "TALB", EXTRACTOR_ALBUM }, { "TLAN", EXTRACTOR_LANGUAGE }, + { "TYER", EXTRACTOR_YEAR }, + { "TLEN", EXTRACTOR_DURATION }, { "TIT2", EXTRACTOR_TITLE }, { "TIT3", EXTRACTOR_DESCRIPTION }, { "WCOM", EXTRACTOR_RELEASE }, diff --git a/src/plugins/id3v24extractor.c b/src/plugins/id3v24extractor.c @@ -1,6 +1,6 @@ /* This file is part of libextractor. - (C) 2002, 2003, 2004 Vidyut Samanta and Christian Grothoff + (C) 2002, 2003, 2004, 2006 Vidyut Samanta and Christian Grothoff libextractor is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published @@ -56,11 +56,19 @@ typedef struct { static Matches tmap[] = { { "COMM", EXTRACTOR_COMMENT }, + { "IPLS", EXTRACTOR_CONTRIBUTOR }, + { "TIPL", EXTRACTOR_CONTRIBUTOR }, + { "TMOO", EXTRACTOR_MOOD }, + { "TMCL", EXTRACTOR_MUSICIAN_CREDITS_LIST }, + { "LINK", EXTRACTOR_LINK }, + { "MCDI", EXTRACTOR_MUSIC_CD_IDENTIFIER }, + { "PCNT", EXTRACTOR_PLAY_COUNTER }, + { "POPM", EXTRACTOR_POPULARITY_METER }, { "TCOP", EXTRACTOR_COPYRIGHT }, { "TDRC", EXTRACTOR_DATE }, { "TCON", EXTRACTOR_GENRE }, { "TIT1", EXTRACTOR_GENRE }, - { "TENC", EXTRACTOR_PRODUCER }, + { "TENC", EXTRACTOR_ENCODED_BY }, { "TEXT", EXTRACTOR_LYRICS }, { "TOLY", EXTRACTOR_CONTRIBUTOR }, { "TOPE", EXTRACTOR_CONTRIBUTOR }, @@ -69,6 +77,7 @@ static Matches tmap[] = { { "TPE2", EXTRACTOR_ARTIST }, { "TPE3", EXTRACTOR_CONDUCTOR }, { "TPE4", EXTRACTOR_INTERPRET }, + { "TIME", EXTRACTOR_TIME }, { "TMED", EXTRACTOR_MEDIA_TYPE }, { "TCOM", EXTRACTOR_CREATOR }, { "TOFN", EXTRACTOR_FILENAME }, @@ -77,6 +86,7 @@ static Matches tmap[] = { { "TRSN", EXTRACTOR_SOURCE }, { "TRSO", EXTRACTOR_CREATED_FOR }, { "TSRC", EXTRACTOR_RESOURCE_IDENTIFIER }, + { "TYER", EXTRACTOR_YEAR }, { "TOAL", EXTRACTOR_ALBUM }, { "TALB", EXTRACTOR_ALBUM }, { "TLAN", EXTRACTOR_LANGUAGE }, diff --git a/src/plugins/id3v2extractor.c b/src/plugins/id3v2extractor.c @@ -1,6 +1,6 @@ /* This file is part of libextractor. - (C) 2002, 2003, 2004 Vidyut Samanta and Christian Grothoff + (C) 2002, 2003, 2004, 2006 Vidyut Samanta and Christian Grothoff libextractor is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published diff --git a/src/plugins/mp3extractor.c b/src/plugins/mp3extractor.c @@ -1,6 +1,6 @@ /* This file is part of libextractor. - (C) 2002, 2003, 2004 Vidyut Samanta and Christian Grothoff + (C) 2002, 2003, 2004, 2006 Vidyut Samanta and Christian Grothoff libextractor is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published @@ -243,6 +243,12 @@ int freq_table[4][3]={ #define SYSERR 1 #define INVALID_ID3 2 +static void trim(char * k) { + while ( (strlen(k) > 0) && + (isspace(k[strlen(k)-1])) ) + k[strlen(k)-1] = '\0'; +} + static int get_id3(const char * data, size_t size, id3tag * id3) { @@ -261,22 +267,27 @@ static int get_id3(const char * data, id3->title = convertToUtf8(pos, 30, "ISO-8859-1"); + trim(id3->title); pos += 30; id3->artist = convertToUtf8(pos, 30, "ISO-8859-1"); + trim(id3->artist); pos += 30; id3->album = convertToUtf8(pos, 30, "ISO-8859-1"); + trim(id3->album); pos += 30; id3->year = convertToUtf8(pos, 4, "ISO-8859-1"); + trim(id3->year); pos += 4; id3->comment = convertToUtf8(pos, 30, "ISO-8859-1"); + trim(id3->comment); pos += 30; id3->genre = ""; if (pos[0] < GENRE_NAME_COUNT) @@ -289,9 +300,8 @@ static struct EXTRACTOR_Keywords * addkword(EXTRACTOR_KeywordList *oldhead, const char * phrase, EXTRACTOR_KeywordType type) { - EXTRACTOR_KeywordList * keyword; - + keyword = malloc(sizeof(EXTRACTOR_KeywordList)); keyword->next = oldhead; keyword->keyword = strdup(phrase); @@ -460,10 +470,10 @@ libextractor_mp3_extract(const char * filename, if (strlen(info.album) > 0) klist = addkword(klist, info.album, EXTRACTOR_ALBUM); if (strlen(info.year) > 0) - klist = addkword(klist, info.year, EXTRACTOR_DATE); + klist = addkword(klist, info.year, EXTRACTOR_YEAR); if (strlen(info.genre) > 0) klist = addkword(klist, info.genre, EXTRACTOR_GENRE); - if (strlen(info.genre) > 0) + if (strlen(info.comment) > 0) klist = addkword(klist, info.comment, EXTRACTOR_COMMENT);