aboutsummaryrefslogtreecommitdiff
path: root/src/plugins/id3v23_extractor.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/plugins/id3v23_extractor.c')
-rw-r--r--src/plugins/id3v23_extractor.c219
1 files changed, 219 insertions, 0 deletions
diff --git a/src/plugins/id3v23_extractor.c b/src/plugins/id3v23_extractor.c
new file mode 100644
index 0000000..11b04d9
--- /dev/null
+++ b/src/plugins/id3v23_extractor.c
@@ -0,0 +1,219 @@
1/*
2 This file is part of libextractor.
3 (C) 2002, 2003, 2004, 2006, 2007 Vidyut Samanta and Christian Grothoff
4
5 libextractor is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 2, or (at your
8 option) any later version.
9
10 libextractor is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with libextractor; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
19
20 */
21#define DEBUG_EXTRACT_ID3v23 0
22
23#include "platform.h"
24#include "extractor.h"
25#include <string.h>
26#include <stdio.h>
27#include <sys/types.h>
28#include <sys/stat.h>
29#include <unistd.h>
30#include <stdlib.h>
31#include <fcntl.h>
32#ifndef MINGW
33#include <sys/mman.h>
34#endif
35
36#include "convert.h"
37
38static struct EXTRACTOR_Keywords *
39addKeyword (EXTRACTOR_KeywordList * oldhead,
40 char *phrase, EXTRACTOR_KeywordType type)
41{
42 EXTRACTOR_KeywordList *keyword;
43
44 keyword = malloc (sizeof (EXTRACTOR_KeywordList));
45 keyword->next = oldhead;
46 keyword->keyword = phrase;
47 keyword->keywordType = type;
48 return keyword;
49}
50
51typedef struct
52{
53 const char *text;
54 enum EXTRACTOR_MetaType type;
55} Matches;
56
57static Matches tmap[] = {
58 {"COMM", EXTRACTOR_METATYPE_COMMENT},
59 {"IPLS", EXTRACTOR_METATYPE_CONTRIBUTOR},
60 {"LINK", EXTRACTOR_METATYPE_LINK},
61 {"MCDI", EXTRACTOR_METATYPE_MUSIC_CD_IDENTIFIER},
62 {"PCNT", EXTRACTOR_METATYPE_PLAY_COUNTER},
63 {"POPM", EXTRACTOR_METATYPE_POPULARITY_METER},
64 {"TCOP", EXTRACTOR_METATYPE_COPYRIGHT},
65 {"TDAT", EXTRACTOR_METATYPE_DATE},
66 {"TCON", EXTRACTOR_METATYPE_CONTENT_TYPE},
67 {"TIT1", EXTRACTOR_METATYPE_GENRE},
68 {"TENC", EXTRACTOR_METATYPE_ENCODED_BY},
69 {"TEXT", EXTRACTOR_METATYPE_LYRICS},
70 {"TOLY", EXTRACTOR_METATYPE_CONTRIBUTOR},
71 {"TOPE", EXTRACTOR_METATYPE_CONTRIBUTOR},
72 {"TOWN", EXTRACTOR_METATYPE_OWNER},
73 {"TPE1", EXTRACTOR_METATYPE_ARTIST},
74 {"TPE2", EXTRACTOR_METATYPE_ARTIST},
75 {"TPE3", EXTRACTOR_METATYPE_CONDUCTOR},
76 {"TPE4", EXTRACTOR_METATYPE_INTERPRET},
77 {"TMED", EXTRACTOR_METATYPE_MEDIA_TYPE},
78 {"TCOM", EXTRACTOR_METATYPE_CREATOR},
79 {"TIME", EXTRACTOR_METATYPE_TIME},
80 {"TOFN", EXTRACTOR_METATYPE_FILENAME},
81 {"TOPE", EXTRACTOR_METATYPE_ARTIST},
82 {"TPUB", EXTRACTOR_METATYPE_PUBLISHER},
83 {"TRCK", EXTRACTOR_METATYPE_TRACK_NUMBER},
84 {"TRSC", EXTRACTOR_METATYPE_ISRC},
85 {"TRSN", EXTRACTOR_METATYPE_SOURCE},
86 {"TRSO", EXTRACTOR_METATYPE_CREATED_FOR},
87 {"TSRC", EXTRACTOR_METATYPE_RESOURCE_IDENTIFIER},
88 {"TOAL", EXTRACTOR_METATYPE_ALBUM},
89 {"TALB", EXTRACTOR_METATYPE_ALBUM},
90 {"TLAN", EXTRACTOR_METATYPE_LANGUAGE},
91 {"TYER", EXTRACTOR_METATYPE_YEAR},
92 {"TLEN", EXTRACTOR_METATYPE_DURATION},
93 {"TIT2", EXTRACTOR_METATYPE_TITLE},
94 {"TIT3", EXTRACTOR_METATYPE_DESCRIPTION},
95 {"WCOM", EXTRACTOR_METATYPE_RELEASE},
96 {"WCOP", EXTRACTOR_METATYPE_DISCLAIMER},
97 {"", EXTRACTOR_METATYPE_KEYWORDS},
98 {NULL, 0}
99};
100
101
102/* mimetype = audio/mpeg */
103int
104EXTRACTOR_id3v23_extract (const unsigned char *data,
105 size_t size,
106 EXTRACTOR_MetaDataProcessor proc,
107 void *proc_cls,
108 const char *options)
109{
110 int unsync;
111 int extendedHdr;
112 int experimental;
113 uint32_t tsize;
114 uint32_t pos;
115 uint32_t ehdrSize;
116 uint32_t padding;
117 uint32_t csize;
118 int i;
119 uint16_t flags;
120
121 if ((size < 16) ||
122 (data[0] != 0x49) ||
123 (data[1] != 0x44) ||
124 (data[2] != 0x33) || (data[3] != 0x03) || (data[4] != 0x00))
125 return prev;
126 unsync = (data[5] & 0x80) > 0;
127 extendedHdr = (data[5] & 0x40) > 0;
128 experimental = (data[5] & 0x20) > 0;
129 tsize = (((data[6] & 0x7F) << 21) |
130 ((data[7] & 0x7F) << 14) |
131 ((data[8] & 0x7F) << 7) | ((data[9] & 0x7F) << 0));
132 if ((tsize + 10 > size) || (experimental))
133 return prev;
134 pos = 10;
135 padding = 0;
136 if (extendedHdr)
137 {
138 ehdrSize = (((data[10]) << 24) |
139 ((data[11]) << 16) | ((data[12]) << 8) | ((data[12]) << 0));
140
141 padding = (((data[15]) << 24) |
142 ((data[16]) << 16) | ((data[17]) << 8) | ((data[18]) << 0));
143 pos += 4 + ehdrSize;
144 if (padding < tsize)
145 tsize -= padding;
146 else
147 return prev;
148 }
149
150
151 while (pos < tsize)
152 {
153 if (pos + 10 > tsize)
154 return prev;
155 csize =
156 (data[pos + 4] << 24) + (data[pos + 5] << 16) + (data[pos + 6] << 8) +
157 data[pos + 7];
158 if ((pos + 10 + csize > tsize) || (csize > tsize) || (csize == 0))
159 break;
160 flags = (data[pos + 8] << 8) + data[pos + 9];
161 if (((flags & 0x80) > 0) /* compressed, not yet supported */ ||
162 ((flags & 0x40) > 0) /* encrypted, not supported */ )
163 {
164 pos += 10 + csize;
165 continue;
166 }
167 i = 0;
168 while (tmap[i].text != NULL)
169 {
170 if (0 == strncmp (tmap[i].text, (const char *) &data[pos], 4))
171 {
172 char *word;
173 if ((flags & 0x20) > 0)
174 {
175 /* "group" identifier, skip a byte */
176 pos++;
177 csize--;
178 }
179 csize--;
180 /* this byte describes the encoding
181 try to convert strings to UTF-8
182 if it fails, then forget it */
183 switch (data[pos + 10])
184 {
185 case 0x00:
186 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 11],
187 csize, "ISO-8859-1");
188 break;
189 case 0x01:
190 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 11],
191 csize, "UCS-2");
192 break;
193 default:
194 /* bad encoding byte,
195 try to convert from iso-8859-1 */
196 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 11],
197 csize, "ISO-8859-1");
198 break;
199 }
200 pos++;
201 if ((word != NULL) && (strlen (word) > 0))
202 {
203 prev = addKeyword (prev, word, tmap[i].type);
204 }
205 else
206 {
207 if (word != NULL)
208 free (word);
209 }
210 break;
211 }
212 i++;
213 }
214 pos += 10 + csize;
215 }
216 return prev;
217}
218
219/* end of id3v23_extractor.c */