aboutsummaryrefslogtreecommitdiff
path: root/src/plugins/id3v24_extractor.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/plugins/id3v24_extractor.c')
-rw-r--r--src/plugins/id3v24_extractor.c230
1 files changed, 230 insertions, 0 deletions
diff --git a/src/plugins/id3v24_extractor.c b/src/plugins/id3v24_extractor.c
new file mode 100644
index 0000000..ec11e4a
--- /dev/null
+++ b/src/plugins/id3v24_extractor.c
@@ -0,0 +1,230 @@
1/*
2 This file is part of libextractor.
3 (C) 2002, 2003, 2004, 2006, 2009 Vidyut Samanta and Christian Grothoff
4
5 libextractor is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 2, or (at your
8 option) any later version.
9
10 libextractor is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with libextractor; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
19
20 */
21
22#define DEBUG_EXTRACT_ID3v24 0
23
24#include "platform.h"
25#include "extractor.h"
26#include <string.h>
27#include <stdio.h>
28#include <sys/types.h>
29#include <sys/stat.h>
30#include <unistd.h>
31#include <stdlib.h>
32#include <fcntl.h>
33#ifndef MINGW
34#include <sys/mman.h>
35#endif
36#include "convert.h"
37
38
39static struct EXTRACTOR_Keywords *
40addKeyword (EXTRACTOR_KeywordList * oldhead,
41 char *phrase, EXTRACTOR_KeywordType type)
42{
43 EXTRACTOR_KeywordList *keyword;
44
45 keyword = malloc (sizeof (EXTRACTOR_KeywordList));
46 keyword->next = oldhead;
47 keyword->keyword = phrase;
48 keyword->keywordType = type;
49 return keyword;
50}
51
52typedef struct
53{
54 char *text;
55 enum EXTRACTOR_MetaType type;
56} Matches;
57
58static Matches tmap[] = {
59 {"COMM", EXTRACTOR_METATYPE_COMMENT},
60 {"IPLS", EXTRACTOR_METATYPE_CONTRIBUTOR},
61 {"TIPL", EXTRACTOR_METATYPE_CONTRIBUTOR},
62 {"TMOO", EXTRACTOR_METATYPE_MOOD},
63 {"TMCL", EXTRACTOR_METATYPE_MUSICIAN_CREDITS_LIST},
64 {"LINK", EXTRACTOR_METATYPE_LINK},
65 {"MCDI", EXTRACTOR_METATYPE_MUSIC_CD_IDENTIFIER},
66 {"PCNT", EXTRACTOR_METATYPE_PLAY_COUNTER},
67 {"POPM", EXTRACTOR_METATYPE_POPULARITY_METER},
68 {"TCOP", EXTRACTOR_METATYPE_COPYRIGHT},
69 {"TDRC", EXTRACTOR_METATYPE_DATE},
70 {"TCON", EXTRACTOR_METATYPE_GENRE},
71 {"TIT1", EXTRACTOR_METATYPE_GENRE},
72 {"TENC", EXTRACTOR_METATYPE_ENCODED_BY},
73 {"TEXT", EXTRACTOR_METATYPE_LYRICS},
74 {"TOLY", EXTRACTOR_METATYPE_CONTRIBUTOR},
75 {"TOPE", EXTRACTOR_METATYPE_CONTRIBUTOR},
76 {"TOWN", EXTRACTOR_METATYPE_OWNER},
77 {"TPE1", EXTRACTOR_METATYPE_ARTIST},
78 {"TPE2", EXTRACTOR_METATYPE_ARTIST},
79 {"TPE3", EXTRACTOR_METATYPE_CONDUCTOR},
80 {"TPE4", EXTRACTOR_METATYPE_INTERPRET},
81 {"TIME", EXTRACTOR_METATYPE_TIME},
82 {"TMED", EXTRACTOR_METATYPE_MEDIA_TYPE},
83 {"TCOM", EXTRACTOR_METATYPE_CREATOR},
84 {"TOFN", EXTRACTOR_METATYPE_FILENAME},
85 {"TOPE", EXTRACTOR_METATYPE_ARTIST},
86 {"TPUB", EXTRACTOR_METATYPE_PUBLISHER},
87 {"TRCK", EXTRACTOR_METATYPE_TRACK_NUMBER},
88 {"TRSC", EXTRACTOR_METATYPE_ISRC},
89 {"TRSN", EXTRACTOR_METATYPE_SOURCE},
90 {"TRSO", EXTRACTOR_METATYPE_CREATED_FOR},
91 {"TSRC", EXTRACTOR_METATYPE_RESOURCE_IDENTIFIER},
92 {"TYER", EXTRACTOR_METATYPE_YEAR},
93 {"TOAL", EXTRACTOR_METATYPE_ALBUM},
94 {"TALB", EXTRACTOR_METATYPE_ALBUM},
95 {"TLAN", EXTRACTOR_METATYPE_LANGUAGE},
96 {"TIT2", EXTRACTOR_METATYPE_TITLE},
97 {"TIT3", EXTRACTOR_METATYPE_DESCRIPTION},
98 {"WCOM", EXTRACTOR_METATYPE_RELEASE},
99 {"WCOP", EXTRACTOR_METATYPE_DISCLAIMER},
100 {"", EXTRACTOR_METATYPE_KEYWORDS},
101 {NULL, 0}
102};
103
104
105/* mimetype = audio/mpeg */
106int
107EXTRACTOR_id3v24_extract (const unsigned char *data,
108 size_t size,
109 EXTRACTOR_MetaDataProcessor proc,
110 void *proc_cls,
111 const char *options)
112{
113 int unsync;
114 int extendedHdr;
115 int experimental;
116 int footer;
117 unsigned int tsize;
118 unsigned int pos;
119 unsigned int ehdrSize;
120 unsigned int padding;
121
122 if ((size < 16) ||
123 (data[0] != 0x49) ||
124 (data[1] != 0x44) ||
125 (data[2] != 0x33) || (data[3] != 0x04) || (data[4] != 0x00))
126 return prev;
127 unsync = (data[5] & 0x80) > 0;
128 extendedHdr = (data[5] & 0x40) > 0;
129 experimental = (data[5] & 0x20) > 0;
130 footer = (data[5] & 0x10) > 0;
131 tsize = (((data[6] & 0x7F) << 21) |
132 ((data[7] & 0x7F) << 14) |
133 ((data[8] & 0x7F) << 7) | ((data[9] & 0x7F) << 0));
134 if ((tsize + 10 > size) || (experimental))
135 return prev;
136 pos = 10;
137 padding = 0;
138 if (extendedHdr)
139 {
140 ehdrSize = (((data[10] & 0x7F) << 21) |
141 ((data[11] & 0x7F) << 14) |
142 ((data[12] & 0x7F) << 7) | ((data[13] & 0x7F) << 0));
143 pos += ehdrSize;
144 }
145
146
147 while (pos < tsize)
148 {
149 size_t csize;
150 int i;
151 unsigned short flags;
152
153 if (pos + 10 > tsize)
154 return prev;
155
156 csize = (((data[pos + 4] & 0x7F) << 21) |
157 ((data[pos + 5] & 0x7F) << 14) |
158 ((data[pos + 6] & 0x7F) << 7) | ((data[pos + 7] & 0x7F) << 0));
159
160 if ((pos + 10 + csize > tsize) || (csize > tsize) || (csize == 0))
161 break;
162 flags = (data[pos + 8] << 8) + data[pos + 9];
163 if (((flags & 0x80) > 0) /* compressed, not yet supported */ ||
164 ((flags & 0x40) > 0) /* encrypted, not supported */ )
165 {
166 pos += 10 + csize;
167 continue;
168 }
169 i = 0;
170 while (tmap[i].text != NULL)
171 {
172 if (0 == strncmp (tmap[i].text, (const char *) &data[pos], 4))
173 {
174 char *word;
175 if ((flags & 0x20) > 0)
176 {
177 /* "group" identifier, skip a byte */
178 pos++;
179 csize--;
180 }
181
182 /* this byte describes the encoding
183 try to convert strings to UTF-8
184 if it fails, then forget it */
185 csize--;
186 switch (data[pos + 10])
187 {
188 case 0x00:
189 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 11],
190 csize, "ISO-8859-1");
191 break;
192 case 0x01:
193 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 11],
194 csize, "UTF-16");
195 break;
196 case 0x02:
197 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 11],
198 csize, "UTF-16BE");
199 break;
200 case 0x03:
201 word = malloc (csize + 1);
202 memcpy (word, &data[pos + 11], csize);
203 word[csize] = '\0';
204 break;
205 default:
206 /* bad encoding byte,
207 try to convert from iso-8859-1 */
208 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 11],
209 csize, "ISO-8859-1");
210 break;
211 }
212 pos++;
213 if ((word != NULL) && (strlen (word) > 0))
214 {
215 prev = addKeyword (prev, word, tmap[i].type);
216 }
217 else
218 {
219 free (word);
220 }
221 break;
222 }
223 i++;
224 }
225 pos += 10 + csize;
226 }
227 return prev;
228}
229
230/* end of id3v24_extractor.c */