aboutsummaryrefslogtreecommitdiff
path: root/src/plugins/id3v2_extractor.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/plugins/id3v2_extractor.c')
-rw-r--r--src/plugins/id3v2_extractor.c155
1 files changed, 155 insertions, 0 deletions
diff --git a/src/plugins/id3v2_extractor.c b/src/plugins/id3v2_extractor.c
new file mode 100644
index 0000000..fa5fea6
--- /dev/null
+++ b/src/plugins/id3v2_extractor.c
@@ -0,0 +1,155 @@
1/*
2 This file is part of libextractor.
3 (C) 2002, 2003, 2004, 2006, 2009 Vidyut Samanta and Christian Grothoff
4
5 libextractor is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 2, or (at your
8 option) any later version.
9
10 libextractor is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with libextractor; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
19
20 */
21
22#include "platform.h"
23#include "extractor.h"
24#ifndef MINGW
25#include <sys/mman.h>
26#endif
27#include "convert.h"
28
29#define DEBUG_EXTRACT_ID3v2 0
30
31typedef struct
32{
33 const char *text;
34 enum EXTRACTOR_MetaType type;
35} Matches;
36
37static Matches tmap[] = {
38 {"TAL", EXTRACTOR_METATYPE_TITLE},
39 {"TT1", EXTRACTOR_METATYPE_GROUP},
40 {"TT2", EXTRACTOR_METATYPE_TITLE},
41 {"TT3", EXTRACTOR_METATYPE_TITLE},
42 {"TXT", EXTRACTOR_METATYPE_DESCRIPTION},
43 {"TPB", EXTRACTOR_METATYPE_PUBLISHER},
44 {"WAF", EXTRACTOR_METATYPE_LOCATION},
45 {"WAR", EXTRACTOR_METATYPE_LOCATION},
46 {"WAS", EXTRACTOR_METATYPE_LOCATION},
47 {"WCP", EXTRACTOR_METATYPE_COPYRIGHT},
48 {"WAF", EXTRACTOR_METATYPE_LOCATION},
49 {"WCM", EXTRACTOR_METATYPE_DISCLAIMER},
50 {"TSS", EXTRACTOR_METATYPE_FORMAT},
51 {"TYE", EXTRACTOR_METATYPE_DATE},
52 {"TLA", EXTRACTOR_METATYPE_LANGUAGE},
53 {"TP1", EXTRACTOR_METATYPE_ARTIST},
54 {"TP2", EXTRACTOR_METATYPE_ARTIST},
55 {"TP3", EXTRACTOR_METATYPE_CONDUCTOR},
56 {"TP4", EXTRACTOR_METATYPE_INTERPRET},
57 {"IPL", EXTRACTOR_METATYPE_CONTRIBUTOR},
58 {"TOF", EXTRACTOR_METATYPE_FILENAME},
59 {"TEN", EXTRACTOR_METATYPE_PRODUCER},
60 {"TCO", EXTRACTOR_METATYPE_SUBJECT},
61 {"TCR", EXTRACTOR_METATYPE_COPYRIGHT},
62 {"SLT", EXTRACTOR_METATYPE_LYRICS},
63 {"TOA", EXTRACTOR_METATYPE_ARTIST},
64 {"TRC", EXTRACTOR_METATYPE_ISRC},
65 {"TRK", EXTRACTOR_METATYPE_TRACK_NUMBER},
66 {"TCM", EXTRACTOR_METATYPE_CREATOR},
67 {"TOT", EXTRACTOR_METATYPE_ALBUM},
68 {"TOL", EXTRACTOR_METATYPE_AUTHOR},
69 {"COM", EXTRACTOR_METATYPE_COMMENT},
70 {"", EXTRACTOR_METATYPE_KEYWORDS},
71 {NULL, 0},
72};
73
74
75/* mimetype = audio/mpeg */
76int
77EXTRACTOR_id3v2_extract (const unsigned char *data,
78 size_t size,
79 EXTRACTOR_MetaDataProcessor proc,
80 void *proc_cls,
81 const char *options)
82{
83 int unsync;
84 unsigned int tsize;
85 unsigned int pos;
86
87 if ((size < 16) ||
88 (data[0] != 0x49) ||
89 (data[1] != 0x44) ||
90 (data[2] != 0x33) || (data[3] != 0x02) || (data[4] != 0x00))
91 return 0;
92 unsync = (data[5] & 0x80) > 0;
93 tsize = (((data[6] & 0x7F) << 21) |
94 ((data[7] & 0x7F) << 14) |
95 ((data[8] & 0x7F) << 07) | ((data[9] & 0x7F) << 00));
96
97 if (tsize + 10 > size)
98 return 0;
99 pos = 10;
100 while (pos < tsize)
101 {
102 size_t csize;
103 int i;
104
105 if (pos + 6 > tsize)
106 return 0;
107 csize = (data[pos + 3] << 16) + (data[pos + 4] << 8) + data[pos + 5];
108 if ((pos + 6 + csize > tsize) || (csize > tsize) || (csize == 0))
109 break;
110 i = 0;
111 while (tmap[i].text != NULL)
112 {
113 if (0 == strncmp (tmap[i].text, (const char *) &data[pos], 3))
114 {
115 char *word;
116 /* this byte describes the encoding
117 try to convert strings to UTF-8
118 if it fails, then forget it */
119 switch (data[pos + 6])
120 {
121 case 0x00:
122 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 7],
123 csize, "ISO-8859-1");
124 break;
125 case 0x01:
126 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 7],
127 csize, "UCS-2");
128 break;
129 default:
130 /* bad encoding byte,
131 try to convert from iso-8859-1 */
132 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 7],
133 csize, "ISO-8859-1");
134 break;
135 }
136 pos++;
137 csize--;
138 if ((word != NULL) && (strlen (word) > 0))
139 {
140 prev = addKeyword (prev, word, tmap[i].type);
141 }
142 else
143 {
144 free (word);
145 }
146 break;
147 }
148 i++;
149 }
150 pos += 6 + csize;
151 }
152 return 0;
153}
154
155/* end of id3v2_extractor.c */