commit fa7592ccb96d40353ff0270c57efe28057c81d7f
parent 5307edba27e65305173177ebbeb5759c0c60217a
Author: Christian Grothoff <christian@grothoff.org>
Date: Sat, 19 Dec 2009 21:10:55 +0000
id3v2
Diffstat:
4 files changed, 305 insertions(+), 75 deletions(-)
diff --git a/src/include/extractor.h b/src/include/extractor.h
@@ -136,7 +136,7 @@ enum EXTRACTOR_MetaType
EXTRACTOR_METATYPE_PUBLICATION_DATE = 24,
EXTRACTOR_METATYPE_BIBTEX_EPRINT = 25,
EXTRACTOR_METATYPE_BIBTEX_ENTRY_TYPE = 26,
- EXTRACTOR_METATYPE_DOCUMENT_LANGUAGE = 27,
+ EXTRACTOR_METATYPE_LANGUAGE = 27,
EXTRACTOR_METATYPE_CREATION_TIME = 28,
EXTRACTOR_METATYPE_URL = 29,
@@ -285,12 +285,21 @@ enum EXTRACTOR_MetaType
EXTRACTOR_METATYPE_CHAPTER_NAME = 153,
EXTRACTOR_METATYPE_SONG_COUNT = 154,
EXTRACTOR_METATYPE_STARTING_SONG = 155,
+ EXTRACTOR_METATYPE_PLAY_COUNTER = 156,
+ EXTRACTOR_METATYPE_CONDUCTOR = 157,
+ EXTRACTOR_METATYPE_INTERPRETATION = 158,
+ EXTRACTOR_METATYPE_COMPOSER = 159,
+ EXTRACTOR_METATYPE_BEATS_PER_MINUTE = 160,
+ EXTRACTOR_METATYPE_ENCODED_BY = 161,
+ EXTRACTOR_METATYPE_ORIGINAL_TITLE = 162,
+ EXTRACTOR_METATYPE_ORIGINAL_ARTIST = 163,
+ EXTRACTOR_METATYPE_ORIGINAL_WRITER = 164,
+ EXTRACTOR_METATYPE_ORIGINAL_RELEASE_YEAR = 165,
+ EXTRACTOR_METATYPE_LYRICS = 166,
+ EXTRACTOR_METATYPE_POPULARITY_METER = 167,
/* fixme: used up to here! */
- EXTRACTOR_METATYPE_LYRICS = 67,
- EXTRACTOR_METATYPE_CONDUCTOR = 64,
- EXTRACTOR_METATYPE_INTERPRET = 65,
EXTRACTOR_METATYPE_MUSIC_CD_IDENTIFIER = 117,
EXTRACTOR_METATYPE_MUSICIAN_CREDITS_LIST = 123,
@@ -304,7 +313,6 @@ enum EXTRACTOR_MetaType
/* numeric metrics */
- EXTRACTOR_METATYPE_POPULARITY_METER = 119,
EXTRACTOR_METATYPE_RATING = 145,
EXTRACTOR_METATYPE_PRIORITY = 60,
@@ -316,7 +324,6 @@ enum EXTRACTOR_MetaType
/* misc (see if these are still needed...) */
EXTRACTOR_METATYPE_GENERATOR = 103,
- EXTRACTOR_METATYPE_ENCODED_BY = 121,
EXTRACTOR_METATYPE_FULL_DATA = 137,
diff --git a/src/main/extractor_metatypes.c b/src/main/extractor_metatypes.c
@@ -380,14 +380,32 @@ static const struct MetaTypeDescription meta_type_descriptions[] = {
/* 155 */
{ gettext_noop ("starting song"),
gettext_noop ("number of the first song to play") },
- { gettext_noop (""),
- gettext_noop ("") },
- { gettext_noop (""),
- gettext_noop ("") },
- { gettext_noop (""),
- gettext_noop ("") },
- { gettext_noop (""),
+ { gettext_noop ("play counter"),
+ gettext_noop ("number of times the media has been played") },
+ { gettext_noop ("conductor"),
+ gettext_noop ("name of the conductor") },
+ { gettext_noop ("interpretation"),
+ gettext_noop ("information about the people behind interpretations of an existing piece") },
+ { gettext_noop ("composer"),
+ gettext_noop ("name of the composer") },
+ /* 160 */
+ { gettext_noop ("beats per minute"),
gettext_noop ("") },
+ { gettext_noop ("encoded by"),
+ gettext_noop ("name of person or organization that encoded the file") },
+ { gettext_noop ("original title"),
+ gettext_noop ("title of the original work") },
+ { gettext_noop ("original artist"),
+ gettext_noop ("name of the original artist") },
+ { gettext_noop ("original writer"),
+ gettext_noop ("name of the original lyricist or writer") },
+ /* 165 */
+ { gettext_noop ("original release year"),
+ gettext_noop ("year of the original release") },
+ { gettext_noop ("lyrics"),
+ gettext_noop ("lyrics of the song or text description of vocal activities") },
+ { gettext_noop ("popularity"),
+ gettext_noop ("information about the file's popularity") },
{ gettext_noop (""),
gettext_noop ("") },
#if 0
diff --git a/src/plugins/html_extractor.c b/src/plugins/html_extractor.c
@@ -44,7 +44,7 @@ static struct
{ "rights", EXTRACTOR_METATYPE_RIGHTS },
{ "dc.rights", EXTRACTOR_METATYPE_RIGHTS },
{ "copyright", EXTRACTOR_METATYPE_COPYRIGHT },
- { "language", EXTRACTOR_METATYPE_DOCUMENT_LANGUAGE },
+ { "language", EXTRACTOR_METATYPE_LANGUAGE },
{ "keywords", EXTRACTOR_METATYPE_KEYWORDS },
{ "abstract", EXTRACTOR_METATYPE_ABSTRACT },
{ "formatter", EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE },
diff --git a/src/plugins/id3v2_extractor.c b/src/plugins/id3v2_extractor.c
@@ -28,46 +28,91 @@
#define DEBUG_EXTRACT_ID3v2 0
+enum Id3v2Fmt
+ {
+ T, /* simple, 0-terminated string, prefixed by encoding */
+ U, /* 0-terminated ASCII string, no encoding */
+ UL, /* unsync'ed lyrics */
+ SL, /* sync'ed lyrics */
+ L, /* string with language prefix */
+ I /* image */
+ };
+
typedef struct
{
const char *text;
enum EXTRACTOR_MetaType type;
+ enum Id3v2Fmt fmt;
} Matches;
static Matches tmap[] = {
- {"TAL", EXTRACTOR_METATYPE_TITLE},
- {"TT1", EXTRACTOR_METATYPE_GROUP},
- {"TT2", EXTRACTOR_METATYPE_TITLE},
- {"TT3", EXTRACTOR_METATYPE_TITLE},
- {"TXT", EXTRACTOR_METATYPE_DESCRIPTION},
- {"TPB", EXTRACTOR_METATYPE_PUBLISHER},
- {"WAF", EXTRACTOR_METATYPE_LOCATION},
- {"WAR", EXTRACTOR_METATYPE_LOCATION},
- {"WAS", EXTRACTOR_METATYPE_LOCATION},
- {"WCP", EXTRACTOR_METATYPE_COPYRIGHT},
- {"WAF", EXTRACTOR_METATYPE_LOCATION},
- {"WCM", EXTRACTOR_METATYPE_DISCLAIMER},
- {"TSS", EXTRACTOR_METATYPE_FORMAT},
- {"TYE", EXTRACTOR_METATYPE_DATE},
- {"TLA", EXTRACTOR_METATYPE_LANGUAGE},
- {"TP1", EXTRACTOR_METATYPE_ARTIST},
- {"TP2", EXTRACTOR_METATYPE_ARTIST},
- {"TP3", EXTRACTOR_METATYPE_CONDUCTOR},
- {"TP4", EXTRACTOR_METATYPE_INTERPRET},
- {"IPL", EXTRACTOR_METATYPE_CONTRIBUTOR},
- {"TOF", EXTRACTOR_METATYPE_FILENAME},
- {"TEN", EXTRACTOR_METATYPE_PRODUCER},
- {"TCO", EXTRACTOR_METATYPE_SUBJECT},
- {"TCR", EXTRACTOR_METATYPE_COPYRIGHT},
- {"SLT", EXTRACTOR_METATYPE_LYRICS},
- {"TOA", EXTRACTOR_METATYPE_ARTIST},
- {"TRC", EXTRACTOR_METATYPE_ISRC},
- {"TRK", EXTRACTOR_METATYPE_TRACK_NUMBER},
- {"TCM", EXTRACTOR_METATYPE_CREATOR},
- {"TOT", EXTRACTOR_METATYPE_ALBUM},
- {"TOL", EXTRACTOR_METATYPE_AUTHOR},
- {"COM", EXTRACTOR_METATYPE_COMMENT},
- {"", EXTRACTOR_METATYPE_KEYWORDS},
+ /* skipping UFI */
+ {"TT1", EXTRACTOR_METATYPE_SECTION, T},
+ {"TT2", EXTRACTOR_METATYPE_TITLE, T},
+ {"TT3", EXTRACTOR_METATYPE_SONG_VERSION, T},
+ {"TP1", EXTRACTOR_METATYPE_ARTIST, T},
+ {"TP2", EXTRACTOR_METATYPE_PERFORMER, T},
+ {"TP3", EXTRACTOR_METATYPE_CONDUCTOR, T},
+ {"TP4", EXTRACTOR_METATYPE_INTERPRETATION, T},
+ {"TCM", EXTRACTOR_METATYPE_COMPOSER, T},
+ {"TXT", EXTRACTOR_METATYPE_WRITER, T},
+ {"TLA", EXTRACTOR_METATYPE_LANGUAGE, T},
+ {"TCO", EXTRACTOR_METATYPE_GENRE, T},
+ {"TAL", EXTRACTOR_METATYPE_ALBUM, T},
+ {"TPA", EXTRACTOR_METATYPE_DISC_NUMBER, T},
+ {"TRK", EXTRACTOR_METATYPE_TRACK_NUMBER, T},
+ {"TRC", EXTRACTOR_METATYPE_ISRC, T},
+ {"TYE", EXTRACTOR_METATYPE_PUBLICATION_YEAR, T},
+ /*
+ FIXME: these two and TYE should be combined into
+ the actual publication date (if TRD is missing)
+ {"TDA", EXTRACTOR_METATYPE_PUBLICATION_DATE},
+ {"TIM", EXTRACTOR_METATYPE_PUBLICATION_DATE},
+ */
+ {"TRD", EXTRACTOR_METATYPE_CREATION_TIME, T},
+ {"TMT", EXTRACTOR_METATYPE_SOURCE, T},
+ {"TFT", EXTRACTOR_METATYPE_FORMAT_VERSION, T},
+ {"TBP", EXTRACTOR_METATYPE_BEATS_PER_MINUTE, T},
+ {"TCR", EXTRACTOR_METATYPE_COPYRIGHT, T},
+ {"TPB", EXTRACTOR_METATYPE_PUBLISHER, T},
+ {"TEN", EXTRACTOR_METATYPE_ENCODED_BY, T},
+ {"TSS", EXTRACTOR_METATYPE_PRODUCED_BY_SOFTWARE, T},
+ {"TOF", EXTRACTOR_METATYPE_FILENAME, T},
+ {"TLE", EXTRACTOR_METATYPE_DURATION, T}, /* FIXME: should append 'ms' as unit */
+ {"TSI", EXTRACTOR_METATYPE_EMBEDDED_FILE_SIZE, T},
+ /* skipping TDY, TKE */
+ {"TOT", EXTRACTOR_METATYPE_ORIGINAL_TITLE, T},
+ {"TOA", EXTRACTOR_METATYPE_ORIGINAL_ARTIST, T},
+ {"TOL", EXTRACTOR_METATYPE_ORIGINAL_WRITER, T},
+ {"TOR", EXTRACTOR_METATYPE_ORIGINAL_RELEASE_YEAR, T},
+ /* skipping TXX */
+
+ {"WAF", EXTRACTOR_METATYPE_URL, U},
+ {"WAR", EXTRACTOR_METATYPE_URL, U},
+ {"WAS", EXTRACTOR_METATYPE_URL, U},
+ {"WCM", EXTRACTOR_METATYPE_URL, U},
+ {"WCP", EXTRACTOR_METATYPE_RIGHTS, U},
+ {"WCB", EXTRACTOR_METATYPE_URL, U},
+ /* skipping WXX */
+ {"IPL", EXTRACTOR_METATYPE_CONTRIBUTOR_NAME, T},
+ /* skipping MCI */
+ /* skipping ETC */
+ /* skipping MLL */
+ /* skipping STC */
+ {"ULT", EXTRACTOR_METATYPE_LYRICS, UL},
+ {"SLT", EXTRACTOR_METATYPE_LYRICS, SL},
+ {"COM", EXTRACTOR_METATYPE_COMMENT, L},
+ /* skipping RVA */
+ /* skipping EQU */
+ /* skipping REV */
+ {"PIC", EXTRACTOR_METATYPE_PICTURE, I},
+ /* skipping GEN */
+ /* {"CNT", EXTRACTOR_METATYPE_PLAY_COUNTER, XXX}, */
+ /* {"POP", EXTRACTOR_METATYPE_POPULARITY_METER, XXX}, */
+ /* skipping BUF */
+ /* skipping CRM */
+ /* skipping CRA */
+ /* {"LNK", EXTRACTOR_METATYPE_URL, XXX}, */
{NULL, 0},
};
@@ -83,6 +128,9 @@ EXTRACTOR_id3v2_extract (const unsigned char *data,
int unsync;
unsigned int tsize;
unsigned int pos;
+ unsigned int off;
+ enum EXTRACTOR_MetaType type;
+ const char *mime;
if ((size < 16) ||
(data[0] != 0x49) ||
@@ -102,10 +150,10 @@ EXTRACTOR_id3v2_extract (const unsigned char *data,
size_t csize;
int i;
- if (pos + 6 > tsize)
+ if (pos + 7 > tsize)
return 0;
csize = (data[pos + 3] << 16) + (data[pos + 4] << 8) + data[pos + 5];
- if ((pos + 6 + csize > tsize) || (csize > tsize) || (csize == 0))
+ if ((pos + 7 + csize > tsize) || (csize > tsize) || (csize == 0))
break;
i = 0;
while (tmap[i].text != NULL)
@@ -116,33 +164,190 @@ EXTRACTOR_id3v2_extract (const unsigned char *data,
/* this byte describes the encoding
try to convert strings to UTF-8
if it fails, then forget it */
- switch (data[pos + 6])
- {
- case 0x00:
- word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 7],
- csize, "ISO-8859-1");
- break;
- case 0x01:
- word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 7],
- csize, "UCS-2");
- break;
- default:
- /* bad encoding byte,
- try to convert from iso-8859-1 */
- word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 7],
- csize, "ISO-8859-1");
- break;
- }
- pos++;
- csize--;
- if ((word != NULL) && (strlen (word) > 0))
- {
- prev = addKeyword (prev, word, tmap[i].type);
- }
- else
+ switch (tmap[i].fmt)
+ {
+ case T:
+ switch (data[pos + 6])
+ {
+ case 0x00:
+ word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 7],
+ csize - 1, "ISO-8859-1");
+ break;
+ case 0x01:
+ word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 7],
+ csize - 1, "UCS-2");
+ break;
+ default:
+ /* bad encoding byte,
+ try to convert from iso-8859-1 */
+ word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 7],
+ csize - 1, "ISO-8859-1");
+ break;
+ }
+ break;
+ case U:
+ word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 6],
+ csize, "ISO-8859-1");
+ break;
+ case UL:
+ if (csize < 6)
+ return 0; /* malformed */
+ /* find end of description */
+ off = 10;
+ while ( (off < size) &&
+ (off - pos < csize) &&
+ (data[pos + off] == '\0') )
+ off++;
+ if ( (off >= csize) ||
+ (data[pos+off] != '\0') )
+ return 0; /* malformed */
+ off++;
+ switch (data[pos + 6])
+ {
+ case 0x00:
+ word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + off],
+ csize - off, "ISO-8859-1");
+ break;
+ case 0x01:
+ word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + off],
+ csize - off, "UCS-2");
+ break;
+ default:
+ /* bad encoding byte,
+ try to convert from iso-8859-1 */
+ word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + off],
+ csize - off, "ISO-8859-1");
+ break;
+ }
+ break;
+ case SL:
+ if (csize < 7)
+ return 0; /* malformed */
+ /* find end of description */
+ switch (data[pos + 6])
+ {
+ case 0x00:
+ word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 12],
+ csize - 6, "ISO-8859-1");
+ break;
+ case 0x01:
+ word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 12],
+ csize - 6, "UCS-2");
+ break;
+ default:
+ /* bad encoding byte,
+ try to convert from iso-8859-1 */
+ word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 12],
+ csize - 6, "ISO-8859-1");
+ break;
+ }
+ break;
+ case L:
+ if (csize < 5)
+ return 0; /* malformed */
+ /* find end of description */
+ switch (data[pos + 6])
+ {
+ case 0x00:
+ word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 10],
+ csize - 4, "ISO-8859-1");
+ break;
+ case 0x01:
+ word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 10],
+ csize - 4, "UCS-2");
+ break;
+ default:
+ /* bad encoding byte,
+ try to convert from iso-8859-1 */
+ word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 10],
+ csize - 4, "ISO-8859-1");
+ break;
+ }
+ break;
+ case I:
+ if (csize < 6)
+ return 0; /* malformed */
+ /* find end of description */
+ off = 12;
+ while ( (off < size) &&
+ (off - pos < csize) &&
+ (data[pos + off] == '\0') )
+ off++;
+ if ( (off >= csize) ||
+ (data[pos+off] != '\0') )
+ return 0; /* malformed */
+ off++;
+ switch (data[pos+11])
+ {
+ case 0x03:
+ case 0x04:
+ type = EXTRACTOR_METATYPE_COVER_PICTURE;
+ break;
+ case 0x07:
+ case 0x08:
+ case 0x09:
+ case 0x0A:
+ case 0x0B:
+ case 0x0C:
+ type = EXTRACTOR_METATYPE_CONTRIBUTOR_PICTURE;
+ break;
+ case 0x0D:
+ case 0x0E:
+ case 0x0F:
+ type = EXTRACTOR_METATYPE_EVENT_PICTURE;
+ break;
+ case 0x14:
+ type = EXTRACTOR_METATYPE_LOGO;
+ type = EXTRACTOR_METATYPE_LOGO;
+ break;
+ default:
+ type = EXTRACTOR_METATYPE_PICTURE;
+ break;
+ }
+ if (0 == strncasecmp ("PNG",
+ (const char*) &data[pos + 7], 3))
+ mime = "image/png";
+ else if (0 == strncasecmp ("JPG",
+ (const char*) &data[pos + 7], 3))
+ mime = "image/jpeg";
+ else
+ mime = NULL;
+ if (0 == strncasecmp ("-->",
+ (const char*) &data[pos + 7], 3))
+ {
+ /* not supported */
+ }
+ else
+ {
+ if (0 != proc (proc_cls,
+ "id3v2",
+ type,
+ EXTRACTOR_METAFORMAT_BINARY,
+ mime,
+ (const char*) &data[pos + off],
+ csize + 6 - off))
+ return 1;
+ }
+ word = NULL;
+ break;
+ default:
+ return 0;
+ }
+ if ((word != NULL) && (strlen (word) > 0))
{
- free (word);
- }
+ if (0 != proc (proc_cls,
+ "id3v2",
+ type,
+ EXTRACTOR_METAFORMAT_UTF8,
+ "text/plain",
+ word,
+ strlen(word)+1))
+ {
+ free (word);
+ return 1;
+ }
+ }
+ free (word);
break;
}
i++;