aboutsummaryrefslogtreecommitdiff
path: root/src/plugins
diff options
context:
space:
mode:
Diffstat (limited to 'src/plugins')
-rw-r--r--src/plugins/Makefile.am385
-rw-r--r--src/plugins/id3_extractor.c149
-rw-r--r--src/plugins/id3v23_extractor.c420
-rw-r--r--src/plugins/id3v24_extractor.c455
-rw-r--r--src/plugins/id3v2_extractor.c957
-rw-r--r--src/plugins/mp3_extractor.c425
-rw-r--r--src/plugins/template_extractor.c122
7 files changed, 1196 insertions, 1717 deletions
diff --git a/src/plugins/Makefile.am b/src/plugins/Makefile.am
index c489d19..465db7c 100644
--- a/src/plugins/Makefile.am
+++ b/src/plugins/Makefile.am
@@ -1,4 +1,4 @@
1INCLUDES = -I$(top_srcdir)/src/include -I$(top_srcdir)/src/common 1INCLUDES = -I$(top_srcdir)/src/include -I$(top_srcdir)/src/common -I$(top_srcdir)/src/main
2 2
3# install plugins under: 3# install plugins under:
4plugindir = $(libdir)/@RPLUGINDIR@ 4plugindir = $(libdir)/@RPLUGINDIR@
@@ -11,183 +11,23 @@ PLUGINFLAGS = $(makesymbolic) $(LE_PLUGIN_LDFLAGS)
11 11
12SUBDIRS = . 12SUBDIRS = .
13 13
14if HAVE_FFMPEG
15 thumbffmpeg=libextractor_thumbnailffmpeg.la
16endif
17
18if HAVE_LIBRPM
19 rpm=libextractor_rpm.la
20endif
21
22if HAVE_GLIB
23if WITH_GSF
24 ole2=libextractor_ole2.la
25endif
26if HAVE_GTK
27 thumbgtk=libextractor_thumbnailgtk.la
28endif
29endif
30
31if HAVE_QT
32 thumbqt=libextractor_thumbnailqt.la
33 qtflags=-lQtGui -lQtCore -lpthread
34else
35if HAVE_QT4
36 thumbqt=libextractor_thumbnailqt.la
37 qtflags=-lQtGui4 -lQtCore4
38endif
39endif
40
41if HAVE_QT_SVG
42 svgflags = -lQtSvg
43else
44if HAVE_QT_SVG4
45 svgflags = -lQtSvg4
46endif
47endif
48
49if HAVE_CXX
50if HAVE_EXIV2
51 exiv2=libextractor_exiv2.la
52endif
53if HAVE_POPPLER
54 pdf=libextractor_pdf.la
55endif
56endif
57
58if HAVE_MPEG2
59 mpeg = libextractor_mpeg.la
60endif
61
62if HAVE_VORBISFILE
63 ogg = libextractor_ogg.la
64endif
65
66if HAVE_FLAC
67 flac = libextractor_flac.la
68endif
69
70if NEED_VORBIS
71 vorbisflag = -lvorbis
72endif
73
74if NEED_OGG
75 flacoggflag = -logg
76endif
77
78plugin_LTLIBRARIES = \ 14plugin_LTLIBRARIES = \
79 libextractor_applefile.la \
80 libextractor_asf.la \
81 libextractor_deb.la \
82 libextractor_dvi.la \
83 libextractor_elf.la \
84 $(exiv2) \
85 $(flac) \
86 libextractor_flv.la \
87 libextractor_gif.la \
88 libextractor_html.la \
89 libextractor_id3.la \ 15 libextractor_id3.la \
90 libextractor_id3v2.la \ 16 libextractor_id3v2.la \
91 libextractor_id3v23.la \ 17 libextractor_mp3.la
92 libextractor_id3v24.la \
93 libextractor_it.la \
94 libextractor_jpeg.la \
95 libextractor_man.la \
96 libextractor_mime.la \
97 libextractor_mkv.la \
98 libextractor_mp3.la \
99 $(mpeg) \
100 libextractor_nsf.la \
101 libextractor_nsfe.la \
102 libextractor_odf.la \
103 $(ogg) \
104 $(ole2) \
105 $(pdf) \
106 libextractor_png.la \
107 libextractor_ps.la \
108 libextractor_qt.la \
109 libextractor_real.la \
110 libextractor_riff.la \
111 $(rpm) \
112 libextractor_s3m.la \
113 libextractor_sid.la \
114 libextractor_tar.la \
115 $(thumbgtk) \
116 $(thumbqt) \
117 $(thumbffmpeg) \
118 libextractor_tiff.la \
119 libextractor_wav.la \
120 libextractor_xm.la \
121 libextractor_zip.la
122 18
123libextractor_applefile_la_SOURCES = \ 19libextractor_mp3_la_SOURCES = \
124 applefile_extractor.c 20 mp3_extractor.c
125libextractor_applefile_la_LDFLAGS = \ 21libextractor_mp3_la_LDFLAGS = \
126 $(PLUGINFLAGS) 22 $(PLUGINFLAGS)
127libextractor_applefile_la_LIBADD = \ 23libextractor_mp3_la_LIBADD = \
128 $(top_builddir)/src/common/libextractor_common.la \
129 $(LE_LIBINTL)
130
131libextractor_asf_la_SOURCES = \
132 asf_extractor.c
133libextractor_asf_la_LDFLAGS = \
134 $(top_builddir)/src/common/libextractor_common.la \ 24 $(top_builddir)/src/common/libextractor_common.la \
135 $(PLUGINFLAGS)
136
137libextractor_deb_la_SOURCES = \
138 deb_extractor.c
139libextractor_deb_la_LDFLAGS = \
140 $(PLUGINFLAGS)
141libextractor_deb_la_LIBADD = \
142 -lz
143
144libextractor_dvi_la_SOURCES = \
145 dvi_extractor.c
146libextractor_dvi_la_LDFLAGS = \
147 $(PLUGINFLAGS)
148
149libextractor_elf_la_SOURCES = \
150 elf_extractor.c
151libextractor_elf_la_LDFLAGS = \
152 $(PLUGINFLAGS)
153libextractor_elf_la_LIBADD = \
154 $(top_builddir)/src/common/libextractor_common.la
155
156libextractor_exiv2_la_SOURCES = \
157 exiv2_extractor.cc
158libextractor_exiv2_la_LDFLAGS = \
159 $(XTRA_CPPLIBS) $(PLUGINFLAGS)
160libextractor_exiv2_la_LIBADD = \
161 -lexiv2
162
163libextractor_flac_la_SOURCES = \
164 flac_extractor.c
165libextractor_flac_la_LDFLAGS = \
166 $(PLUGINFLAGS)
167libextractor_flac_la_LIBADD = \
168 -lFLAC $(flacoggflag) \
169 $(LE_LIBINTL) 25 $(LE_LIBINTL)
170 26
171libextractor_flv_la_SOURCES = \ 27libextractor_ebml_la_SOURCES = \
172 flv_extractor.c 28 ebml_extractor.c
173libextractor_flv_la_LDFLAGS = \ 29libextractor_ebml_la_LDFLAGS = \
174 $(PLUGINFLAGS) 30 $(PLUGINFLAGS)
175libextractor_flv_la_LIBADD = \
176 $(top_builddir)/src/common/libextractor_common.la
177
178libextractor_gif_la_SOURCES = \
179 gif_extractor.c
180libextractor_gif_la_LDFLAGS = \
181 $(PLUGINFLAGS)
182libextractor_gif_la_LIBADD = \
183 $(top_builddir)/src/common/libextractor_common.la
184
185libextractor_html_la_SOURCES = \
186 html_extractor.c
187libextractor_html_la_LDFLAGS = \
188 $(PLUGINFLAGS)
189libextractor_html_la_LIBADD = \
190 $(top_builddir)/src/common/libextractor_common.la
191 31
192libextractor_id3_la_SOURCES = \ 32libextractor_id3_la_SOURCES = \
193 id3_extractor.c 33 id3_extractor.c
@@ -204,211 +44,4 @@ libextractor_id3v2_la_LDFLAGS = \
204libextractor_id3v2_la_LIBADD = \ 44libextractor_id3v2_la_LIBADD = \
205 $(top_builddir)/src/common/libextractor_common.la 45 $(top_builddir)/src/common/libextractor_common.la
206 46
207libextractor_id3v23_la_SOURCES = \
208 id3v23_extractor.c
209libextractor_id3v23_la_LDFLAGS = \
210 $(PLUGINFLAGS)
211libextractor_id3v23_la_LIBADD = \
212 $(top_builddir)/src/common/libextractor_common.la
213
214libextractor_id3v24_la_SOURCES = \
215 id3v24_extractor.c
216libextractor_id3v24_la_LDFLAGS = \
217 $(PLUGINFLAGS)
218libextractor_id3v24_la_LIBADD = \
219 $(top_builddir)/src/common/libextractor_common.la
220
221libextractor_it_la_SOURCES = \
222 it_extractor.c
223libextractor_it_la_LDFLAGS = \
224 $(PLUGINFLAGS)
225
226libextractor_jpeg_la_SOURCES = \
227 jpeg_extractor.c
228libextractor_jpeg_la_LDFLAGS = \
229 $(PLUGINFLAGS)
230libextractor_jpeg_la_LIBADD = \
231 $(LE_LIBINTL)
232
233libextractor_man_la_SOURCES = \
234 man_extractor.c
235libextractor_man_la_LDFLAGS = \
236 $(PLUGINFLAGS)
237libextractor_man_la_LIBADD = \
238 $(LE_LIBINTL)
239
240libextractor_mime_la_SOURCES = \
241 mime_extractor.c
242libextractor_mime_la_LDFLAGS = \
243 $(PLUGINFLAGS)
244
245libextractor_mkv_la_SOURCES = \
246 mkv_extractor.c
247libextractor_mkv_la_LDFLAGS = \
248 $(PLUGINFLAGS)
249
250libextractor_mp3_la_SOURCES = \
251 mp3_extractor.c
252libextractor_mp3_la_LDFLAGS = \
253 $(PLUGINFLAGS)
254libextractor_mp3_la_LIBADD = \
255 $(top_builddir)/src/common/libextractor_common.la \
256 $(LE_LIBINTL)
257
258libextractor_mpeg_la_SOURCES = \
259 mpeg_extractor.c
260libextractor_mpeg_la_LDFLAGS = \
261 $(PLUGINFLAGS)
262libextractor_mpeg_la_LIBADD = \
263 -lmpeg2
264
265libextractor_nsf_la_SOURCES = \
266 nsf_extractor.c
267libextractor_nsf_la_LDFLAGS = \
268 $(PLUGINFLAGS)
269
270libextractor_nsfe_la_SOURCES = \
271 nsfe_extractor.c
272libextractor_nsfe_la_LDFLAGS = \
273 $(PLUGINFLAGS)
274
275libextractor_odf_la_SOURCES = \
276 odf_extractor.c
277libextractor_odf_la_LDFLAGS = \
278 $(PLUGINFLAGS)
279libextractor_odf_la_LIBADD = \
280 $(top_builddir)/src/common/libextractor_common.la \
281 -lz
282
283libextractor_ogg_la_SOURCES = \
284 ogg_extractor.c
285libextractor_ogg_la_LDFLAGS = \
286 $(PLUGINFLAGS)
287libextractor_ogg_la_LIBADD = \
288 -lvorbisfile $(vorbisflag) -logg
289
290libextractor_ole2_la_SOURCES = \
291 ole2_extractor.c
292libextractor_ole2_la_CFLAGS = \
293 $(GSF_CFLAGS)
294libextractor_ole2_la_LIBADD = \
295 $(LIBADD) $(GSF_LIBS) \
296 $(top_builddir)/src/common/libextractor_common.la
297libextractor_ole2_la_LDFLAGS = \
298 $(PLUGINFLAGS)
299
300libextractor_pdf_la_SOURCES = \
301 pdf_extractor.cc
302libextractor_pdf_la_LDFLAGS = \
303 $(XTRA_CPPLIBS) $(PLUGINFLAGS)
304libextractor_pdf_la_LIBADD = \
305 $(top_builddir)/src/common/libextractor_common.la \
306 -lpoppler
307
308libextractor_png_la_SOURCES = \
309 png_extractor.c
310libextractor_png_la_LDFLAGS = \
311 $(PLUGINFLAGS)
312libextractor_png_la_LIBADD = \
313 $(top_builddir)/src/common/libextractor_common.la \
314 -lz
315
316libextractor_ps_la_SOURCES = \
317 ps_extractor.c
318libextractor_ps_la_LDFLAGS = \
319 $(PLUGINFLAGS)
320
321libextractor_qt_la_SOURCES = \
322 qt_extractor.c
323libextractor_qt_la_LDFLAGS = \
324 $(PLUGINFLAGS)
325libextractor_qt_la_LIBADD = \
326 -lz -lm
327
328libextractor_real_la_SOURCES = \
329 real_extractor.c
330libextractor_real_la_LDFLAGS = \
331 $(PLUGINFLAGS)
332
333libextractor_riff_la_SOURCES = \
334 riff_extractor.c
335libextractor_riff_la_LDFLAGS = \
336 $(PLUGINFLAGS)
337libextractor_riff_la_LIBADD = \
338 $(LE_LIBINTL) \
339 -lm
340
341libextractor_rpm_la_SOURCES = \
342 rpm_extractor.c
343libextractor_rpm_la_LDFLAGS = \
344 $(PLUGINFLAGS)
345libextractor_rpm_la_LIBADD = \
346 -lrpm
347
348libextractor_s3m_la_SOURCES = \
349 s3m_extractor.c
350libextractor_s3m_la_LDFLAGS = \
351 $(PLUGINFLAGS)
352
353libextractor_sid_la_SOURCES = \
354 sid_extractor.c
355libextractor_sid_la_LDFLAGS = \
356 $(PLUGINFLAGS)
357
358libextractor_tar_la_SOURCES = \
359 tar_extractor.c
360libextractor_tar_la_LDFLAGS = \
361 $(PLUGINFLAGS)
362
363libextractor_thumbnailffmpeg_la_SOURCES = \
364 thumbnailffmpeg_extractor.c
365libextractor_thumbnailffmpeg_la_LIBADD = \
366 -lavformat -lavcodec -lswscale -lavutil -lz -lbz2
367libextractor_thumbnailffmpeg_la_LDFLAGS = \
368 $(PLUGINFLAGS)
369
370libextractor_thumbnailgtk_la_CFLAGS = \
371 $(GLIB_CFLAGS) $(GTK_CFLAGS)
372libextractor_thumbnailgtk_la_LIBADD = \
373 $(LIBADD) -lgobject-2.0 @GTK_LIBS@
374libextractor_thumbnailgtk_la_LDFLAGS = \
375 $(PLUGINFLAGS)
376libextractor_thumbnailgtk_la_SOURCES = \
377 thumbnailgtk_extractor.c
378
379libextractor_thumbnailqt_la_SOURCES = \
380 thumbnailqt_extractor.cc
381libextractor_thumbnailqt_la_LDFLAGS = \
382 $(QT_LDFLAGS) \
383 $(PLUGINFLAGS)
384libextractor_thumbnailqt_la_LIBADD = \
385 $(qtflags) $(svgflags)
386libextractor_thumbnailqt_la_CPPFLAGS = \
387 $(QT_CPPFLAGS) \
388 $(QT_CFLAGS) $(QT_SVG_CFLAGS)
389
390libextractor_tiff_la_SOURCES = \
391 tiff_extractor.c
392libextractor_tiff_la_LDFLAGS = \
393 $(PLUGINFLAGS)
394libextractor_tiff_la_LIBADD = \
395 $(top_builddir)/src/common/libextractor_common.la
396
397libextractor_wav_la_SOURCES = \
398 wav_extractor.c
399libextractor_wav_la_LDFLAGS = \
400 $(PLUGINFLAGS)
401libextractor_wav_la_LIBADD = \
402 $(LE_LIBINTL)
403
404libextractor_xm_la_SOURCES = \
405 xm_extractor.c
406libextractor_xm_la_LDFLAGS = \
407 $(PLUGINFLAGS)
408
409libextractor_zip_la_SOURCES = \
410 zip_extractor.c
411libextractor_zip_la_LDFLAGS = \
412 $(PLUGINFLAGS)
413
414EXTRA_DIST = template_extractor.c 47EXTRA_DIST = template_extractor.c
diff --git a/src/plugins/id3_extractor.c b/src/plugins/id3_extractor.c
index 64d341c..39bd779 100644
--- a/src/plugins/id3_extractor.c
+++ b/src/plugins/id3_extractor.c
@@ -29,6 +29,8 @@
29#include <unistd.h> 29#include <unistd.h>
30#include <stdlib.h> 30#include <stdlib.h>
31 31
32#include "extractor_plugins.h"
33
32typedef struct 34typedef struct
33{ 35{
34 char *title; 36 char *title;
@@ -199,6 +201,46 @@ static const char *const genre_names[] = {
199#define OK 0 201#define OK 0
200#define INVALID_ID3 1 202#define INVALID_ID3 1
201 203
204struct id3_state
205{
206 int state;
207 id3tag info;
208};
209
210enum ID3State
211{
212 ID3_INVALID = -1,
213 ID3_SEEKING_TO_TAIL = 0,
214 ID3_READING_TAIL = 1
215};
216
217void
218EXTRACTOR_id3_init_state_method (struct EXTRACTOR_PluginList *plugin)
219{
220 struct id3_state *state;
221 state = plugin->state = malloc (sizeof (struct id3_state));
222 if (state == NULL)
223 return;
224 memset (state, 0, sizeof (struct id3_state));
225 state->state = ID3_SEEKING_TO_TAIL;
226}
227
228void
229EXTRACTOR_id3_discard_state_method (struct EXTRACTOR_PluginList *plugin)
230{
231 struct id3_state *state = plugin->state;
232 if (state != NULL)
233 {
234 if (state->info.title != NULL) free (state->info.title);
235 if (state->info.year != NULL) free (state->info.year);
236 if (state->info.album != NULL) free (state->info.album);
237 if (state->info.artist != NULL) free (state->info.artist);
238 if (state->info.comment != NULL) free (state->info.comment);
239 free (state);
240 }
241 plugin->state = NULL;
242}
243
202static void 244static void
203trim (char *k) 245trim (char *k)
204{ 246{
@@ -209,14 +251,14 @@ trim (char *k)
209} 251}
210 252
211static int 253static int
212get_id3 (const char *data, size_t size, id3tag * id3) 254get_id3 (const char *data, int64_t offset, int64_t size, id3tag *id3)
213{ 255{
214 const char *pos; 256 const char *pos;
215 257
216 if (size < 128) 258 if (size < 128)
217 return INVALID_ID3; 259 return INVALID_ID3;
218 260
219 pos = &data[size - 128]; 261 pos = &data[offset];
220 if (0 != strncmp ("TAG", pos, 3)) 262 if (0 != strncmp ("TAG", pos, 3))
221 return INVALID_ID3; 263 return INVALID_ID3;
222 pos += 3; 264 pos += 3;
@@ -253,49 +295,82 @@ get_id3 (const char *data, size_t size, id3tag * id3)
253} 295}
254 296
255 297
256#define ADD(s,t) do { if ( (s != NULL) && (strlen(s) > 0) && (0 != (ret = proc (proc_cls, "id3", t, EXTRACTOR_METAFORMAT_UTF8, "text/plain", s, strlen(s)+1)))) goto FINISH; } while (0) 298#define ADD(s,t) do { if ( (s != NULL) && (strlen(s) > 0) && (0 != proc (proc_cls, "id3", t, EXTRACTOR_METAFORMAT_UTF8, "text/plain", s, strlen(s)+1))) return 1; } while (0)
257 299
258 300
259const char * 301int
260EXTRACTOR_id3_options () 302EXTRACTOR_id3_extract_method (struct EXTRACTOR_PluginList *plugin,
303 EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
261{ 304{
262 return "want-tail"; 305 int64_t file_position;
263} 306 int64_t file_size;
307 int64_t offset = 0;
308 int64_t size;
309 struct id3_state *state;
310 char *data;
311
312 char track[16];
264 313
314 if (plugin == NULL || plugin->state == NULL)
315 return 1;
265 316
266int 317 state = plugin->state;
267EXTRACTOR_id3_extract (const char *data, 318 file_position = plugin->position;
268 size_t size, 319 file_size = plugin->fsize;
269 EXTRACTOR_MetaDataProcessor proc, 320 size = plugin->map_size;
270 void *proc_cls, 321 data = (char *) plugin->shm_ptr;
271 const char *options) 322
272{ 323 if (plugin->seek_request < 0)
273 id3tag info; 324 return 1;
274 char track[16]; 325 if (file_position - plugin->seek_request > 0)
275 int ret; 326 {
327 plugin->seek_request = -1;
328 return 1;
329 }
330 if (plugin->seek_request - file_position < size)
331 offset = plugin->seek_request - file_position;
276 332
277 ret = 0; 333 while (1)
278 if (OK != get_id3 (data, size, &info)) 334 {
279 return 0; 335 switch (state->state)
280 ADD (info.title, EXTRACTOR_METATYPE_TITLE);
281 ADD (info.artist, EXTRACTOR_METATYPE_ARTIST);
282 ADD (info.album, EXTRACTOR_METATYPE_ALBUM);
283 ADD (info.year, EXTRACTOR_METATYPE_PUBLICATION_YEAR);
284 ADD (info.genre, EXTRACTOR_METATYPE_GENRE);
285 ADD (info.comment, EXTRACTOR_METATYPE_COMMENT);
286 if (info.track_number != 0)
287 { 336 {
288 snprintf(track, 337 case ID3_INVALID:
289 sizeof(track), "%u", info.track_number); 338 plugin->seek_request = -1;
290 ADD (track, EXTRACTOR_METATYPE_TRACK_NUMBER); 339 return 1;
340 case ID3_SEEKING_TO_TAIL:
341 offset = file_size - 128 - file_position;
342 if (offset > size)
343 {
344 state->state = ID3_READING_TAIL;
345 plugin->seek_request = file_position + offset;
346 return 0;
347 }
348 else if (offset < 0)
349 {
350 state->state = ID3_INVALID;
351 break;
352 }
353 state->state = ID3_READING_TAIL;
354 break;
355 case ID3_READING_TAIL:
356 if (OK != get_id3 (data, offset, size - offset, &state->info))
357 return 1;
358 ADD (state->info.title, EXTRACTOR_METATYPE_TITLE);
359 ADD (state->info.artist, EXTRACTOR_METATYPE_ARTIST);
360 ADD (state->info.album, EXTRACTOR_METATYPE_ALBUM);
361 ADD (state->info.year, EXTRACTOR_METATYPE_PUBLICATION_YEAR);
362 ADD (state->info.genre, EXTRACTOR_METATYPE_GENRE);
363 ADD (state->info.comment, EXTRACTOR_METATYPE_COMMENT);
364 if (state->info.track_number != 0)
365 {
366 snprintf(track,
367 sizeof(track), "%u", state->info.track_number);
368 ADD (track, EXTRACTOR_METATYPE_TRACK_NUMBER);
369 }
370 state->state = ID3_INVALID;
291 } 371 }
292FINISH: 372 }
293 if (info.title != NULL) free (info.title); 373 return 1;
294 if (info.year != NULL) free (info.year);
295 if (info.album != NULL) free (info.album);
296 if (info.artist != NULL) free (info.artist);
297 if (info.comment != NULL) free (info.comment);
298 return ret;
299} 374}
300 375
301/* end of id3_extractor.c */ 376/* end of id3_extractor.c */
diff --git a/src/plugins/id3v23_extractor.c b/src/plugins/id3v23_extractor.c
deleted file mode 100644
index c31d63d..0000000
--- a/src/plugins/id3v23_extractor.c
+++ /dev/null
@@ -1,420 +0,0 @@
1/*
2 This file is part of libextractor.
3 (C) 2002, 2003, 2004, 2006, 2007, 2009 Vidyut Samanta and Christian Grothoff
4
5 libextractor is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 2, or (at your
8 option) any later version.
9
10 libextractor is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with libextractor; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
19
20 */
21#define DEBUG_EXTRACT_ID3v23 0
22
23#include "platform.h"
24#include "extractor.h"
25#include <string.h>
26#include <stdio.h>
27#include <sys/types.h>
28#include <sys/stat.h>
29#include <unistd.h>
30#include <stdlib.h>
31#include <fcntl.h>
32#ifndef MINGW
33#include <sys/mman.h>
34#endif
35
36#include "convert.h"
37
38enum Id3v23Fmt
39 {
40 T, /* simple, 0-terminated string, prefixed by encoding */
41 U, /* 0-terminated ASCII string, no encoding */
42 UL, /* unsync'ed lyrics */
43 SL, /* sync'ed lyrics */
44 L, /* string with language prefix */
45 I /* image */
46 };
47
48typedef struct
49{
50 const char *text;
51 enum EXTRACTOR_MetaType type;
52 enum Id3v23Fmt fmt;
53} Matches;
54
55static Matches tmap[] = {
56 {"TALB", EXTRACTOR_METATYPE_ALBUM, T},
57 {"TBPM", EXTRACTOR_METATYPE_BEATS_PER_MINUTE, T},
58 {"TCOM", EXTRACTOR_METATYPE_COMPOSER, T},
59 {"TCON", EXTRACTOR_METATYPE_SONG_VERSION, T},
60 {"TCOP", EXTRACTOR_METATYPE_COPYRIGHT, T},
61 /* {"TDAT", EXTRACTOR_METATYPE_CREATION_DATE, T}, */
62 /* TDLY */
63 {"TENC", EXTRACTOR_METATYPE_ENCODED_BY, T},
64 {"TEXT", EXTRACTOR_METATYPE_WRITER, T},
65 {"TFLT", EXTRACTOR_METATYPE_FORMAT_VERSION, T},
66 /* TIME */
67 {"TIT1", EXTRACTOR_METATYPE_SECTION, T},
68 {"TIT2", EXTRACTOR_METATYPE_TITLE, T},
69 {"TIT3", EXTRACTOR_METATYPE_SONG_VERSION, T},
70 /* TKEY */
71 {"TLAN", EXTRACTOR_METATYPE_LANGUAGE, T},
72 {"TLEN", EXTRACTOR_METATYPE_DURATION, T}, /* FIXME: should append 'ms' as unit */
73 {"TMED", EXTRACTOR_METATYPE_SOURCE, T},
74 {"TOAL", EXTRACTOR_METATYPE_ORIGINAL_TITLE, T},
75 {"TOFN", EXTRACTOR_METATYPE_ORIGINAL_ARTIST, T},
76 {"TOLY", EXTRACTOR_METATYPE_ORIGINAL_WRITER, T},
77 {"TOPE", EXTRACTOR_METATYPE_ORIGINAL_PERFORMER, T},
78 {"TORY", EXTRACTOR_METATYPE_ORIGINAL_RELEASE_YEAR, T},
79 {"TOWN", EXTRACTOR_METATYPE_LICENSEE, T},
80 {"TPE1", EXTRACTOR_METATYPE_ARTIST, T},
81 {"TPE2", EXTRACTOR_METATYPE_PERFORMER, T},
82 {"TPE3", EXTRACTOR_METATYPE_CONDUCTOR, T},
83 {"TPE4", EXTRACTOR_METATYPE_INTERPRETATION, T},
84 {"TPOS", EXTRACTOR_METATYPE_DISC_NUMBER, T},
85 {"TPUB", EXTRACTOR_METATYPE_PUBLISHER, T},
86 {"TRCK", EXTRACTOR_METATYPE_TRACK_NUMBER, T},
87 /* TRDA */
88 {"TRSN", EXTRACTOR_METATYPE_NETWORK_NAME, T},
89 /* TRSO */
90 {"TSIZ", EXTRACTOR_METATYPE_EMBEDDED_FILE_SIZE, T},
91 {"TSRC", EXTRACTOR_METATYPE_ISRC, T},
92 /* TSSE */
93 {"TYER", EXTRACTOR_METATYPE_PUBLICATION_YEAR, T},
94 {"WCOM", EXTRACTOR_METATYPE_URL, U},
95 {"WCOP", EXTRACTOR_METATYPE_URL, U},
96 {"WOAF", EXTRACTOR_METATYPE_URL, U},
97 {"WOAS", EXTRACTOR_METATYPE_URL, U},
98 {"WORS", EXTRACTOR_METATYPE_URL, U},
99 {"WPAY", EXTRACTOR_METATYPE_URL, U},
100 {"WPUB", EXTRACTOR_METATYPE_URL, U},
101 {"WXXX", EXTRACTOR_METATYPE_URL, T},
102 {"IPLS", EXTRACTOR_METATYPE_CONTRIBUTOR_NAME, T},
103 /* ... */
104 {"USLT", EXTRACTOR_METATYPE_LYRICS, UL },
105 {"SYLT", EXTRACTOR_METATYPE_LYRICS, SL },
106 {"COMM", EXTRACTOR_METATYPE_COMMENT, L},
107 /* ... */
108 {"APIC", EXTRACTOR_METATYPE_PICTURE, I},
109 /* ... */
110 {"LINK", EXTRACTOR_METATYPE_URL, U},
111 /* ... */
112 {"USER", EXTRACTOR_METATYPE_LICENSE, T},
113 /* ... */
114 {NULL, 0, T}
115};
116
117
118/* mimetype = audio/mpeg */
119int
120EXTRACTOR_id3v23_extract (const unsigned char *data,
121 size_t size,
122 EXTRACTOR_MetaDataProcessor proc,
123 void *proc_cls,
124 const char *options)
125{
126 int unsync;
127 int extendedHdr;
128 int experimental;
129 uint32_t tsize;
130 uint32_t pos;
131 uint32_t ehdrSize;
132 uint32_t padding;
133 uint32_t csize;
134 int i;
135 uint16_t flags;
136 char *mime;
137 enum EXTRACTOR_MetaType type;
138 size_t off;
139 int obo;
140
141 if ((size < 16) ||
142 (data[0] != 0x49) ||
143 (data[1] != 0x44) ||
144 (data[2] != 0x33) || (data[3] != 0x03) || (data[4] != 0x00))
145 return 0;
146 unsync = (data[5] & 0x80) > 0;
147 if (unsync)
148 return 0; /* not supported */
149 extendedHdr = (data[5] & 0x40) > 0;
150 experimental = (data[5] & 0x20) > 0;
151 if (experimental)
152 return 0;
153 tsize = (((data[6] & 0x7F) << 21) |
154 ((data[7] & 0x7F) << 14) |
155 ((data[8] & 0x7F) << 7) | ((data[9] & 0x7F) << 0));
156 if (tsize + 10 > size)
157 return 0;
158 pos = 10;
159 padding = 0;
160 if (extendedHdr)
161 {
162 ehdrSize = (((data[10]) << 24) |
163 ((data[11]) << 16) | ((data[12]) << 8) | ((data[12]) << 0));
164
165 padding = (((data[15]) << 24) |
166 ((data[16]) << 16) | ((data[17]) << 8) | ((data[18]) << 0));
167 pos += 4 + ehdrSize;
168 if (padding < tsize)
169 tsize -= padding;
170 else
171 return 0;
172 }
173
174
175 while (pos < tsize)
176 {
177 if (pos + 10 > tsize)
178 return 0;
179 csize =
180 (data[pos + 4] << 24) + (data[pos + 5] << 16) + (data[pos + 6] << 8) +
181 data[pos + 7];
182 if ((pos + 10 + csize > tsize) || (csize > tsize) || (csize == 0) ||
183 (pos + 10 + csize <= pos + 10) || (pos + 10 <= pos))
184 break;
185 flags = (data[pos + 8] << 8) + data[pos + 9];
186 if (((flags & 0x80) > 0) /* compressed, not yet supported */ ||
187 ((flags & 0x40) > 0) /* encrypted, not supported */ )
188 {
189 pos += 10 + csize;
190 continue;
191 }
192 i = 0;
193 while (tmap[i].text != NULL)
194 {
195 if (0 == strncmp (tmap[i].text, (const char *) &data[pos], 4))
196 {
197 char *word;
198 if ((flags & 0x20) > 0)
199 {
200 /* "group" identifier, skip a byte */
201 pos++;
202 csize--;
203 }
204 switch (tmap[i].fmt)
205 {
206 case T:
207 /* this byte describes the encoding
208 try to convert strings to UTF-8
209 if it fails, then forget it */
210 switch (data[pos + 10])
211 {
212 case 0x00:
213 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 11],
214 csize - 1, "ISO-8859-1");
215 break;
216 case 0x01:
217 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 11],
218 csize - 1, "UCS-2");
219 break;
220 default:
221 /* bad encoding byte,
222 try to convert from iso-8859-1 */
223 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 11],
224 csize - 1, "ISO-8859-1");
225 break;
226 }
227 break;
228 case U:
229 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 10],
230 csize, "ISO-8859-1");
231 break;
232 case UL:
233 if (csize < 6)
234 return 0; /* malformed */
235 /* find end of description */
236 off = 14;
237 while ( (off < size) &&
238 (off - pos < csize) &&
239 (data[pos + off] == '\0') )
240 off++;
241 if ( (off >= csize) ||
242 (data[pos+off] != '\0') )
243 return 0; /* malformed */
244 off++;
245 switch (data[pos + 10])
246 {
247 case 0x00:
248 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + off],
249 csize - off, "ISO-8859-1");
250 break;
251 case 0x01:
252 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + off],
253 csize - off, "UCS-2");
254 break;
255 default:
256 /* bad encoding byte,
257 try to convert from iso-8859-1 */
258 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + off],
259 csize - off, "ISO-8859-1");
260 break;
261 }
262 break;
263 case SL:
264 if (csize < 7)
265 return 0; /* malformed */
266 /* find end of description */
267 switch (data[pos + 10])
268 {
269 case 0x00:
270 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 16],
271 csize - 6, "ISO-8859-1");
272 break;
273 case 0x01:
274 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 16],
275 csize - 6, "UCS-2");
276 break;
277 default:
278 /* bad encoding byte,
279 try to convert from iso-8859-1 */
280 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 16],
281 csize - 6, "ISO-8859-1");
282 break;
283 }
284 break;
285 case L:
286 if (csize < 5)
287 return 0; /* malformed */
288 /* find end of description */
289 obo = data[pos + 14] == '\0' ? 1 : 0; /* someone put a \0 in front of comments... */
290 if (csize < 6)
291 obo = 0;
292 switch (data[pos + 10])
293 {
294 case 0x00:
295 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 14 + obo],
296 csize - 4 - obo, "ISO-8859-1");
297 break;
298 case 0x01:
299 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 14 + obo],
300 csize - 4 - obo, "UCS-2");
301 break;
302 default:
303 /* bad encoding byte,
304 try to convert from iso-8859-1 */
305 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 14 + obo],
306 csize - 4 - obo, "ISO-8859-1");
307 break;
308 }
309 break;
310 case I:
311 if (csize < 2)
312 return 0; /* malformed */
313 /* find end of mime type */
314 off = 11;
315 while ( (off < size) &&
316 (off - pos < csize) &&
317 (data[pos + off] == '\0') )
318 off++;
319 if ( (off >= csize) ||
320 (data[pos+off] != '\0') )
321 return 0; /* malformed */
322 off++;
323 mime = strdup ((const char*) &data[pos + 11]);
324
325 switch (data[pos+off])
326 {
327 case 0x03:
328 case 0x04:
329 type = EXTRACTOR_METATYPE_COVER_PICTURE;
330 break;
331 case 0x07:
332 case 0x08:
333 case 0x09:
334 case 0x0A:
335 case 0x0B:
336 case 0x0C:
337 type = EXTRACTOR_METATYPE_CONTRIBUTOR_PICTURE;
338 break;
339 case 0x0D:
340 case 0x0E:
341 case 0x0F:
342 type = EXTRACTOR_METATYPE_EVENT_PICTURE;
343 break;
344 case 0x14:
345 type = EXTRACTOR_METATYPE_LOGO;
346 type = EXTRACTOR_METATYPE_LOGO;
347 break;
348 default:
349 type = EXTRACTOR_METATYPE_PICTURE;
350 break;
351 }
352 off++;
353
354 /* find end of description */
355 while ( (off < size) &&
356 (off - pos < csize) &&
357 (data[pos + off] == '\0') )
358 off++;
359 if ( (off >= csize) ||
360 (data[pos+off] != '\0') )
361 {
362 if (mime != NULL)
363 free (mime);
364 return 0; /* malformed */
365 }
366 off++;
367 if ( (mime != NULL) &&
368 (0 == strcasecmp ("-->",
369 mime)) )
370 {
371 /* not supported */
372 }
373 else
374 {
375 if (0 != proc (proc_cls,
376 "id3v23",
377 type,
378 EXTRACTOR_METAFORMAT_BINARY,
379 mime,
380 (const char*) &data[pos + off],
381 csize + 6 - off))
382 {
383 if (mime != NULL)
384 free (mime);
385 return 1;
386 }
387 }
388 if (mime != NULL)
389 free (mime);
390 word = NULL;
391 break;
392 default:
393 return 0;
394 }
395 if ((word != NULL) && (strlen (word) > 0))
396 {
397 if (0 != proc (proc_cls,
398 "id3v23",
399 tmap[i].type,
400 EXTRACTOR_METAFORMAT_UTF8,
401 "text/plain",
402 word,
403 strlen(word)+1))
404 {
405 free (word);
406 return 1;
407 }
408 }
409 if (word != NULL)
410 free (word);
411 break;
412 }
413 i++;
414 }
415 pos += 10 + csize;
416 }
417 return 0;
418}
419
420/* end of id3v23_extractor.c */
diff --git a/src/plugins/id3v24_extractor.c b/src/plugins/id3v24_extractor.c
deleted file mode 100644
index 301020c..0000000
--- a/src/plugins/id3v24_extractor.c
+++ /dev/null
@@ -1,455 +0,0 @@
1/*
2 This file is part of libextractor.
3 (C) 2002, 2003, 2004, 2006, 2007, 2009 Vidyut Samanta and Christian Grothoff
4
5 libextractor is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 2, or (at your
8 option) any later version.
9
10 libextractor is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with libextractor; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
19
20 */
21#define DEBUG_EXTRACT_ID3v24 0
22
23#include "platform.h"
24#include "extractor.h"
25#include <string.h>
26#include <stdio.h>
27#include <sys/types.h>
28#include <sys/stat.h>
29#include <unistd.h>
30#include <stdlib.h>
31#include <fcntl.h>
32#ifndef MINGW
33#include <sys/mman.h>
34#endif
35
36#include "convert.h"
37
38enum Id3v24Fmt
39 {
40 T, /* simple, 0-terminated string, prefixed by encoding */
41 U, /* 0-terminated ASCII string, no encoding */
42 UL, /* unsync'ed lyrics */
43 SL, /* sync'ed lyrics */
44 L, /* string with language prefix */
45 I /* image */
46 };
47
48typedef struct
49{
50 const char *text;
51 enum EXTRACTOR_MetaType type;
52 enum Id3v24Fmt fmt;
53} Matches;
54
55static Matches tmap[] = {
56 {"TALB", EXTRACTOR_METATYPE_ALBUM, T},
57 {"TBPM", EXTRACTOR_METATYPE_BEATS_PER_MINUTE, T},
58 {"TCOM", EXTRACTOR_METATYPE_COMPOSER, T},
59 {"TCON", EXTRACTOR_METATYPE_SONG_VERSION, T},
60 {"TCOP", EXTRACTOR_METATYPE_COPYRIGHT, T},
61 /* {"TDAT", EXTRACTOR_METATYPE_CREATION_DATE, T}, deprecated in 24 */
62 /* TDLY */
63 {"TENC", EXTRACTOR_METATYPE_ENCODED_BY, T},
64 {"TEXT", EXTRACTOR_METATYPE_WRITER, T},
65 {"TFLT", EXTRACTOR_METATYPE_FORMAT_VERSION, T},
66 /* TIME, deprecated in 24 */
67 {"TIT1", EXTRACTOR_METATYPE_SECTION, T},
68 {"TIT2", EXTRACTOR_METATYPE_TITLE, T},
69 {"TIT3", EXTRACTOR_METATYPE_SONG_VERSION, T},
70 /* TKEY */
71 {"TLAN", EXTRACTOR_METATYPE_LANGUAGE, T},
72 {"TLEN", EXTRACTOR_METATYPE_DURATION, T}, /* FIXME: should append 'ms' as unit */
73 {"TMED", EXTRACTOR_METATYPE_SOURCE, T},
74 {"TOAL", EXTRACTOR_METATYPE_ORIGINAL_TITLE, T},
75 {"TOFN", EXTRACTOR_METATYPE_ORIGINAL_ARTIST, T},
76 {"TOLY", EXTRACTOR_METATYPE_ORIGINAL_WRITER, T},
77 {"TOPE", EXTRACTOR_METATYPE_ORIGINAL_PERFORMER, T},
78 /* {"TORY", EXTRACTOR_METATYPE_ORIGINAL_RELEASE_YEAR, T}, deprecated in 24 */
79 {"TOWN", EXTRACTOR_METATYPE_LICENSEE, T},
80 {"TPE1", EXTRACTOR_METATYPE_ARTIST, T},
81 {"TPE2", EXTRACTOR_METATYPE_PERFORMER, T},
82 {"TPE3", EXTRACTOR_METATYPE_CONDUCTOR, T},
83 {"TPE4", EXTRACTOR_METATYPE_INTERPRETATION, T},
84 {"TPOS", EXTRACTOR_METATYPE_DISC_NUMBER, T},
85 {"TPUB", EXTRACTOR_METATYPE_PUBLISHER, T},
86 {"TRCK", EXTRACTOR_METATYPE_TRACK_NUMBER, T},
87 /* TRDA, deprecated in 24 */
88 {"TRSN", EXTRACTOR_METATYPE_NETWORK_NAME, T},
89 /* TRSO */
90 /* {"TSIZ", EXTRACTOR_METATYPE_EMBEDDED_FILE_SIZE, T}, deprecated in 24 */
91 {"TSRC", EXTRACTOR_METATYPE_ISRC, T},
92 /* TSSE */
93 /* {"TYER", EXTRACTOR_METATYPE_PUBLICATION_YEAR, T}, deprecated in 24 */
94 {"WCOM", EXTRACTOR_METATYPE_URL, U},
95 {"WCOP", EXTRACTOR_METATYPE_URL, U},
96 {"WOAF", EXTRACTOR_METATYPE_URL, U},
97 {"WOAS", EXTRACTOR_METATYPE_URL, U},
98 {"WORS", EXTRACTOR_METATYPE_URL, U},
99 {"WPAY", EXTRACTOR_METATYPE_URL, U},
100 {"WPUB", EXTRACTOR_METATYPE_URL, U},
101 {"WXXX", EXTRACTOR_METATYPE_URL, T},
102 /* {"IPLS", EXTRACTOR_METATYPE_CONTRIBUTOR_NAME, T}, deprecated in 24 */
103 /* ... */
104 {"USLT", EXTRACTOR_METATYPE_LYRICS, UL },
105 {"SYLT", EXTRACTOR_METATYPE_LYRICS, SL },
106 {"COMM", EXTRACTOR_METATYPE_COMMENT, L},
107 /* ... */
108 {"APIC", EXTRACTOR_METATYPE_PICTURE, I},
109 /* ... */
110 {"LINK", EXTRACTOR_METATYPE_URL, U},
111 /* ... */
112 {"USER", EXTRACTOR_METATYPE_LICENSE, T},
113 /* ... */
114 /* new frames in 24 */
115 /* ASPI, EQU2, RVA2, SEEK, SIGN, TDEN */
116 {"TDOR", EXTRACTOR_METATYPE_PUBLICATION_DATE, T},
117 /* TDRC, TDRL, TDTG */
118 {"TIPL", EXTRACTOR_METATYPE_CONTRIBUTOR_NAME, T},
119 {"TMCL", EXTRACTOR_METATYPE_MUSICIAN_CREDITS_LIST, T},
120 {"TMOO", EXTRACTOR_METATYPE_MOOD, T},
121 {"TPRO", EXTRACTOR_METATYPE_COPYRIGHT, T},
122 {"TSOA", EXTRACTOR_METATYPE_ALBUM, T},
123 {"TSOP", EXTRACTOR_METATYPE_PERFORMER, T},
124 {"TSOT", EXTRACTOR_METATYPE_TITLE, T},
125 {"TSST", EXTRACTOR_METATYPE_SUBTITLE, T},
126 {NULL, 0, T}
127};
128
129
130/* mimetype = audio/mpeg */
131int
132EXTRACTOR_id3v24_extract (const unsigned char *data,
133 size_t size,
134 EXTRACTOR_MetaDataProcessor proc,
135 void *proc_cls,
136 const char *options)
137{
138 int unsync;
139 int extendedHdr;
140 int experimental;
141 uint32_t tsize;
142 uint32_t pos;
143 uint32_t ehdrSize;
144 uint32_t csize;
145 int i;
146 uint16_t flags;
147 char *mime;
148 enum EXTRACTOR_MetaType type;
149 size_t off;
150
151 if ((size < 16) ||
152 (data[0] != 0x49) ||
153 (data[1] != 0x44) ||
154 (data[2] != 0x33) || (data[3] != 0x04) || (data[4] != 0x00))
155 return 0;
156 unsync = (data[5] & 0x80) > 0;
157 if (unsync)
158 return 0; /* not supported */
159 extendedHdr = (data[5] & 0x40) > 0;
160 experimental = (data[5] & 0x20) > 0;
161 if (experimental)
162 return 0;
163 /* footer = (data[5] & 0x10) > 0; */
164 tsize = (((data[6] & 0x7F) << 21) |
165 ((data[7] & 0x7F) << 14) |
166 ((data[8] & 0x7F) << 7) | ((data[9] & 0x7F) << 0));
167 if (tsize + 10 > size)
168 return 0;
169 pos = 10;
170 if (extendedHdr)
171 {
172 ehdrSize = (((data[10] & 0x7F) << 21) |
173 ((data[11] & 0x7F) << 14) |
174 ((data[12] & 0x7F) << 7) | ((data[13] & 0x7F) << 0));
175 pos += 4 + ehdrSize;
176 if (ehdrSize > tsize)
177 return 0;
178 }
179 while (pos < tsize)
180 {
181 if (pos + 10 > tsize)
182 return 0;
183 csize =
184 (data[pos + 4] << 24) + (data[pos + 5] << 16) + (data[pos + 6] << 8) +
185 data[pos + 7];
186 if ((pos + 10 + csize > tsize) || (csize > tsize) || (csize == 0) ||
187 (pos + 10 + csize <= pos + 10) || (pos + 10 <= pos))
188 break;
189 flags = (data[pos + 8] << 8) + data[pos + 9];
190 if (((flags & 0x08) > 0) /* compressed, not yet supported */ ||
191 ((flags & 0x04) > 0) /* encrypted, not supported */ ||
192 ((flags & 0x02) > 0) /* unsynchronized, not supported */ )
193 {
194 pos += 10 + csize;
195 continue;
196 }
197 i = 0;
198 while (tmap[i].text != NULL)
199 {
200 if (0 == strncmp (tmap[i].text, (const char *) &data[pos], 4))
201 {
202 char *word;
203 if ((flags & 0x40) > 0)
204 {
205 /* "group" identifier, skip a byte */
206 pos++;
207 csize--;
208 }
209
210 switch (tmap[i].fmt)
211 {
212 case T:
213 /* this byte describes the encoding
214 try to convert strings to UTF-8
215 if it fails, then forget it */
216 switch (data[pos + 10])
217 {
218 case 0x00:
219 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 11],
220 csize - 1, "ISO-8859-1");
221 break;
222 case 0x01:
223 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 11],
224 csize - 1, "UTF-16");
225 break;
226 case 0x02:
227 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 11],
228 csize - 1, "UTF-16BE");
229 break;
230 case 0x03:
231 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 11],
232 csize - 1, "UTF-8");
233 break;
234 default:
235 /* bad encoding byte,
236 try to convert from iso-8859-1 */
237 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 11],
238 csize - 1, "ISO-8859-1");
239 break;
240 }
241 break;
242 case U:
243 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 10],
244 csize, "ISO-8859-1");
245 break;
246 case UL:
247 if (csize < 6)
248 return 0; /* malformed */
249 /* find end of description */
250 off = 14;
251 while ( (off < size) &&
252 (off - pos < csize) &&
253 (data[pos + off] == '\0') )
254 off++;
255 if ( (off >= csize) ||
256 (data[pos+off] != '\0') )
257 return 0; /* malformed */
258 off++;
259 switch (data[pos + 10])
260 {
261 case 0x00:
262 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + off],
263 csize - off, "ISO-8859-1");
264 break;
265 case 0x01:
266 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + off],
267 csize - off, "UTF-16");
268 break;
269 case 0x02:
270 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + off],
271 csize - off, "UTF-16BE");
272 break;
273 case 0x03:
274 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + off],
275 csize - off, "UTF-8");
276 break;
277 default:
278 /* bad encoding byte,
279 try to convert from iso-8859-1 */
280 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + off],
281 csize - off, "ISO-8859-1");
282 break;
283 }
284 break;
285 case SL:
286 if (csize < 7)
287 return 0; /* malformed */
288 /* find end of description */
289 switch (data[pos + 10])
290 {
291 case 0x00:
292 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 16],
293 csize - 6, "ISO-8859-1");
294 break;
295 case 0x01:
296 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 16],
297 csize - 6, "UTF-16");
298 break;
299 case 0x02:
300 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 16],
301 csize - 6, "UTF-16BE");
302 break;
303 case 0x03:
304 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 16],
305 csize - 6, "UTF-8");
306 break;
307 default:
308 /* bad encoding byte,
309 try to convert from iso-8859-1 */
310 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 16],
311 csize - 6, "ISO-8859-1");
312 break;
313 }
314 break;
315 case L:
316 if (csize < 5)
317 return 0; /* malformed */
318 /* find end of description */
319 switch (data[pos + 10])
320 {
321 case 0x00:
322 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 14],
323 csize - 4, "ISO-8859-1");
324 break;
325 case 0x01:
326 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 14],
327 csize - 4, "UTF-16");
328 break;
329 case 0x02:
330 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 14],
331 csize - 4, "UTF-16BE");
332 break;
333 case 0x03:
334 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 14],
335 csize - 4, "UTF-8");
336 break;
337 default:
338 /* bad encoding byte,
339 try to convert from iso-8859-1 */
340 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 14],
341 csize - 4, "ISO-8859-1");
342 break;
343 }
344 break;
345 case I:
346 if (csize < 2)
347 return 0; /* malformed */
348 /* find end of mime type */
349 off = 11;
350 while ( (off < size) &&
351 (off - pos < csize) &&
352 (data[pos + off] == '\0') )
353 off++;
354 if ( (off >= csize) ||
355 (data[pos+off] != '\0') )
356 return 0; /* malformed */
357 off++;
358 mime = strdup ((const char*) &data[pos + 11]);
359
360 switch (data[pos+off])
361 {
362 case 0x03:
363 case 0x04:
364 type = EXTRACTOR_METATYPE_COVER_PICTURE;
365 break;
366 case 0x07:
367 case 0x08:
368 case 0x09:
369 case 0x0A:
370 case 0x0B:
371 case 0x0C:
372 type = EXTRACTOR_METATYPE_CONTRIBUTOR_PICTURE;
373 break;
374 case 0x0D:
375 case 0x0E:
376 case 0x0F:
377 type = EXTRACTOR_METATYPE_EVENT_PICTURE;
378 break;
379 case 0x14:
380 type = EXTRACTOR_METATYPE_LOGO;
381 type = EXTRACTOR_METATYPE_LOGO;
382 break;
383 default:
384 type = EXTRACTOR_METATYPE_PICTURE;
385 break;
386 }
387 off++;
388
389 /* find end of description */
390 while ( (off < size) &&
391 (off - pos < csize) &&
392 (data[pos + off] == '\0') )
393 off++;
394 if ( (off >= csize) ||
395 (data[pos+off] != '\0') )
396 {
397 if (mime != NULL)
398 free (mime);
399 return 0; /* malformed */
400 }
401 off++;
402 if ( (mime != NULL) &&
403 (0 == strcasecmp ("-->",
404 mime)) )
405 {
406 /* not supported */
407 }
408 else
409 {
410 if (0 != proc (proc_cls,
411 "id3v24",
412 type,
413 EXTRACTOR_METAFORMAT_BINARY,
414 mime,
415 (const char*) &data[pos + off],
416 csize + 6 - off))
417 {
418 if (mime != NULL)
419 free (mime);
420 return 1;
421 }
422 }
423 if (mime != NULL)
424 free (mime);
425 word = NULL;
426 break;
427 default:
428 return 0;
429 }
430 if ((word != NULL) && (strlen (word) > 0))
431 {
432 if (0 != proc (proc_cls,
433 "id3v24",
434 tmap[i].type,
435 EXTRACTOR_METAFORMAT_UTF8,
436 "text/plain",
437 word,
438 strlen(word)+1))
439 {
440 free (word);
441 return 1;
442 }
443 }
444 if (word != NULL)
445 free (word);
446 break;
447 }
448 i++;
449 }
450 pos += 10 + csize;
451 }
452 return 0;
453}
454
455/* end of id3v24_extractor.c */
diff --git a/src/plugins/id3v2_extractor.c b/src/plugins/id3v2_extractor.c
index 4f50d05..0302dc6 100644
--- a/src/plugins/id3v2_extractor.c
+++ b/src/plugins/id3v2_extractor.c
@@ -26,6 +26,8 @@
26#endif 26#endif
27#include "convert.h" 27#include "convert.h"
28 28
29#include "extractor_plugins.h"
30
29#define DEBUG_EXTRACT_ID3v2 0 31#define DEBUG_EXTRACT_ID3v2 0
30 32
31enum Id3v2Fmt 33enum Id3v2Fmt
@@ -47,314 +49,723 @@ typedef struct
47 49
48static Matches tmap[] = { 50static Matches tmap[] = {
49 /* skipping UFI */ 51 /* skipping UFI */
50 {"TT1", EXTRACTOR_METATYPE_SECTION, T}, 52 {"TT1 ", EXTRACTOR_METATYPE_SECTION, T},
51 {"TT2", EXTRACTOR_METATYPE_TITLE, T}, 53 {"TT2 ", EXTRACTOR_METATYPE_TITLE, T},
52 {"TT3", EXTRACTOR_METATYPE_SONG_VERSION, T}, 54 {"TT3 ", EXTRACTOR_METATYPE_SONG_VERSION, T},
53 {"TP1", EXTRACTOR_METATYPE_ARTIST, T}, 55 {"TP1 ", EXTRACTOR_METATYPE_ARTIST, T},
54 {"TP2", EXTRACTOR_METATYPE_PERFORMER, T}, 56 {"TP2 ", EXTRACTOR_METATYPE_PERFORMER, T},
55 {"TP3", EXTRACTOR_METATYPE_CONDUCTOR, T}, 57 {"TP3 ", EXTRACTOR_METATYPE_CONDUCTOR, T},
56 {"TP4", EXTRACTOR_METATYPE_INTERPRETATION, T}, 58 {"TP4 ", EXTRACTOR_METATYPE_INTERPRETATION, T},
57 {"TCM", EXTRACTOR_METATYPE_COMPOSER, T}, 59 {"TCM ", EXTRACTOR_METATYPE_COMPOSER, T},
58 {"TXT", EXTRACTOR_METATYPE_WRITER, T}, 60 {"TXT ", EXTRACTOR_METATYPE_WRITER, T},
59 {"TLA", EXTRACTOR_METATYPE_LANGUAGE, T}, 61 {"TLA ", EXTRACTOR_METATYPE_LANGUAGE, T},
60 {"TCO", EXTRACTOR_METATYPE_GENRE, T}, 62 {"TCO ", EXTRACTOR_METATYPE_GENRE, T},
61 {"TAL", EXTRACTOR_METATYPE_ALBUM, T}, 63 {"TAL ", EXTRACTOR_METATYPE_ALBUM, T},
62 {"TPA", EXTRACTOR_METATYPE_DISC_NUMBER, T}, 64 {"TPA ", EXTRACTOR_METATYPE_DISC_NUMBER, T},
63 {"TRK", EXTRACTOR_METATYPE_TRACK_NUMBER, T}, 65 {"TRK ", EXTRACTOR_METATYPE_TRACK_NUMBER, T},
64 {"TRC", EXTRACTOR_METATYPE_ISRC, T}, 66 {"TRC ", EXTRACTOR_METATYPE_ISRC, T},
65 {"TYE", EXTRACTOR_METATYPE_PUBLICATION_YEAR, T}, 67 {"TYE ", EXTRACTOR_METATYPE_PUBLICATION_YEAR, T},
66 /* 68 /*
67 FIXME: these two and TYE should be combined into 69 FIXME: these two and TYE should be combined into
68 the actual publication date (if TRD is missing) 70 the actual publication date (if TRD is missing)
69 {"TDA", EXTRACTOR_METATYPE_PUBLICATION_DATE}, 71 {"TDA ", EXTRACTOR_METATYPE_PUBLICATION_DATE},
70 {"TIM", EXTRACTOR_METATYPE_PUBLICATION_DATE}, 72 {"TIM ", EXTRACTOR_METATYPE_PUBLICATION_DATE},
71 */ 73 */
72 {"TRD", EXTRACTOR_METATYPE_CREATION_TIME, T}, 74 {"TRD ", EXTRACTOR_METATYPE_CREATION_TIME, T},
73 {"TMT", EXTRACTOR_METATYPE_SOURCE, T}, 75 {"TMT ", EXTRACTOR_METATYPE_SOURCE, T},
74 {"TFT", EXTRACTOR_METATYPE_FORMAT_VERSION, T}, 76 {"TFT ", EXTRACTOR_METATYPE_FORMAT_VERSION, T},
75 {"TBP", EXTRACTOR_METATYPE_BEATS_PER_MINUTE, T}, 77 {"TBP ", EXTRACTOR_METATYPE_BEATS_PER_MINUTE, T},
76 {"TCR", EXTRACTOR_METATYPE_COPYRIGHT, T}, 78 {"TCR ", EXTRACTOR_METATYPE_COPYRIGHT, T},
77 {"TPB", EXTRACTOR_METATYPE_PUBLISHER, T}, 79 {"TPB ", EXTRACTOR_METATYPE_PUBLISHER, T},
78 {"TEN", EXTRACTOR_METATYPE_ENCODED_BY, T}, 80 {"TEN ", EXTRACTOR_METATYPE_ENCODED_BY, T},
79 {"TSS", EXTRACTOR_METATYPE_PRODUCED_BY_SOFTWARE, T}, 81 {"TSS ", EXTRACTOR_METATYPE_PRODUCED_BY_SOFTWARE, T},
80 {"TOF", EXTRACTOR_METATYPE_FILENAME, T}, 82 {"TOF ", EXTRACTOR_METATYPE_FILENAME, T},
81 {"TLE", EXTRACTOR_METATYPE_DURATION, T}, /* FIXME: should append 'ms' as unit */ 83 {"TLE ", EXTRACTOR_METATYPE_DURATION, T}, /* FIXME: should append 'ms' as unit */
82 {"TSI", EXTRACTOR_METATYPE_EMBEDDED_FILE_SIZE, T}, 84 {"TSI ", EXTRACTOR_METATYPE_EMBEDDED_FILE_SIZE, T},
83 /* skipping TDY, TKE */ 85 /* skipping TDY, TKE */
84 {"TOT", EXTRACTOR_METATYPE_ORIGINAL_TITLE, T}, 86 {"TOT ", EXTRACTOR_METATYPE_ORIGINAL_TITLE, T},
85 {"TOA", EXTRACTOR_METATYPE_ORIGINAL_ARTIST, T}, 87 {"TOA ", EXTRACTOR_METATYPE_ORIGINAL_ARTIST, T},
86 {"TOL", EXTRACTOR_METATYPE_ORIGINAL_WRITER, T}, 88 {"TOL ", EXTRACTOR_METATYPE_ORIGINAL_WRITER, T},
87 {"TOR", EXTRACTOR_METATYPE_ORIGINAL_RELEASE_YEAR, T}, 89 {"TOR ", EXTRACTOR_METATYPE_ORIGINAL_RELEASE_YEAR, T},
88 /* skipping TXX */ 90 /* skipping TXX */
89 91
90 {"WAF", EXTRACTOR_METATYPE_URL, U}, 92 {"WAF ", EXTRACTOR_METATYPE_URL, U},
91 {"WAR", EXTRACTOR_METATYPE_URL, U}, 93 {"WAR ", EXTRACTOR_METATYPE_URL, U},
92 {"WAS", EXTRACTOR_METATYPE_URL, U}, 94 {"WAS ", EXTRACTOR_METATYPE_URL, U},
93 {"WCM", EXTRACTOR_METATYPE_URL, U}, 95 {"WCM ", EXTRACTOR_METATYPE_URL, U},
94 {"WCP", EXTRACTOR_METATYPE_RIGHTS, U}, 96 {"WCP ", EXTRACTOR_METATYPE_RIGHTS, U},
95 {"WCB", EXTRACTOR_METATYPE_URL, U}, 97 {"WCB ", EXTRACTOR_METATYPE_URL, U},
96 /* skipping WXX */ 98 /* skipping WXX */
97 {"IPL", EXTRACTOR_METATYPE_CONTRIBUTOR_NAME, T}, 99 {"IPL ", EXTRACTOR_METATYPE_CONTRIBUTOR_NAME, T},
98 /* skipping MCI */ 100 /* skipping MCI */
99 /* skipping ETC */ 101 /* skipping ETC */
100 /* skipping MLL */ 102 /* skipping MLL */
101 /* skipping STC */ 103 /* skipping STC */
102 {"ULT", EXTRACTOR_METATYPE_LYRICS, UL}, 104 {"ULT ", EXTRACTOR_METATYPE_LYRICS, UL},
103 {"SLT", EXTRACTOR_METATYPE_LYRICS, SL}, 105 {"SLT ", EXTRACTOR_METATYPE_LYRICS, SL},
104 {"COM", EXTRACTOR_METATYPE_COMMENT, L}, 106 {"COM ", EXTRACTOR_METATYPE_COMMENT, L},
105 /* skipping RVA */ 107 /* skipping RVA */
106 /* skipping EQU */ 108 /* skipping EQU */
107 /* skipping REV */ 109 /* skipping REV */
108 {"PIC", EXTRACTOR_METATYPE_PICTURE, I}, 110 {"PIC ", EXTRACTOR_METATYPE_PICTURE, I},
109 /* skipping GEN */ 111 /* skipping GEN */
110 /* {"CNT", EXTRACTOR_METATYPE_PLAY_COUNTER, XXX}, */ 112 /* {"CNT ", EXTRACTOR_METATYPE_PLAY_COUNTER, XXX}, */
111 /* {"POP", EXTRACTOR_METATYPE_POPULARITY_METER, XXX}, */ 113 /* {"POP ", EXTRACTOR_METATYPE_POPULARITY_METER, XXX}, */
112 /* skipping BUF */ 114 /* skipping BUF */
113 /* skipping CRM */ 115 /* skipping CRM */
114 /* skipping CRA */ 116 /* skipping CRA */
115 /* {"LNK", EXTRACTOR_METATYPE_URL, XXX}, */ 117 /* {"LNK ", EXTRACTOR_METATYPE_URL, XXX}, */
118
119
120 {"TALB", EXTRACTOR_METATYPE_ALBUM, T},
121 {"TBPM", EXTRACTOR_METATYPE_BEATS_PER_MINUTE, T},
122 {"TCOM", EXTRACTOR_METATYPE_COMPOSER, T},
123 {"TCON", EXTRACTOR_METATYPE_SONG_VERSION, T},
124 {"TCOP", EXTRACTOR_METATYPE_COPYRIGHT, T},
125 {"TDAT", EXTRACTOR_METATYPE_CREATION_DATE, T}, /* idv23 only */
126 /* TDLY */
127 {"TENC", EXTRACTOR_METATYPE_ENCODED_BY, T},
128 {"TEXT", EXTRACTOR_METATYPE_WRITER, T},
129 {"TFLT", EXTRACTOR_METATYPE_FORMAT_VERSION, T},
130 /* TIME, idv23 only */
131 {"TIT1", EXTRACTOR_METATYPE_SECTION, T},
132 {"TIT2", EXTRACTOR_METATYPE_TITLE, T},
133 {"TIT3", EXTRACTOR_METATYPE_SONG_VERSION, T},
134 /* TKEY */
135 {"TLAN", EXTRACTOR_METATYPE_LANGUAGE, T},
136 {"TLEN", EXTRACTOR_METATYPE_DURATION, T}, /* FIXME: should append 'ms' as unit */
137 {"TMED", EXTRACTOR_METATYPE_SOURCE, T},
138 {"TOAL", EXTRACTOR_METATYPE_ORIGINAL_TITLE, T},
139 {"TOFN", EXTRACTOR_METATYPE_ORIGINAL_ARTIST, T},
140 {"TOLY", EXTRACTOR_METATYPE_ORIGINAL_WRITER, T},
141 {"TOPE", EXTRACTOR_METATYPE_ORIGINAL_PERFORMER, T},
142 {"TORY", EXTRACTOR_METATYPE_ORIGINAL_RELEASE_YEAR, T}, /* idv23 only */
143 {"TOWN", EXTRACTOR_METATYPE_LICENSEE, T},
144 {"TPE1", EXTRACTOR_METATYPE_ARTIST, T},
145 {"TPE2", EXTRACTOR_METATYPE_PERFORMER, T},
146 {"TPE3", EXTRACTOR_METATYPE_CONDUCTOR, T},
147 {"TPE4", EXTRACTOR_METATYPE_INTERPRETATION, T},
148 {"TPOS", EXTRACTOR_METATYPE_DISC_NUMBER, T},
149 {"TPUB", EXTRACTOR_METATYPE_PUBLISHER, T},
150 {"TRCK", EXTRACTOR_METATYPE_TRACK_NUMBER, T},
151 /* TRDA, idv23 only */
152 {"TRSN", EXTRACTOR_METATYPE_NETWORK_NAME, T},
153 /* TRSO */
154 {"TSIZ", EXTRACTOR_METATYPE_EMBEDDED_FILE_SIZE, T}, /* idv23 only */
155 {"TSRC", EXTRACTOR_METATYPE_ISRC, T},
156 /* TSSE */
157 {"TYER", EXTRACTOR_METATYPE_PUBLICATION_YEAR, T}, /* idv23 only */
158 {"WCOM", EXTRACTOR_METATYPE_URL, U},
159 {"WCOP", EXTRACTOR_METATYPE_URL, U},
160 {"WOAF", EXTRACTOR_METATYPE_URL, U},
161 {"WOAS", EXTRACTOR_METATYPE_URL, U},
162 {"WORS", EXTRACTOR_METATYPE_URL, U},
163 {"WPAY", EXTRACTOR_METATYPE_URL, U},
164 {"WPUB", EXTRACTOR_METATYPE_URL, U},
165 {"WXXX", EXTRACTOR_METATYPE_URL, T},
166 {"IPLS", EXTRACTOR_METATYPE_CONTRIBUTOR_NAME, T}, /* idv23 only */
167 /* ... */
168 {"USLT", EXTRACTOR_METATYPE_LYRICS, UL },
169 {"SYLT", EXTRACTOR_METATYPE_LYRICS, SL },
170 {"COMM", EXTRACTOR_METATYPE_COMMENT, L},
171 /* ... */
172 {"APIC", EXTRACTOR_METATYPE_PICTURE, I},
173 /* ... */
174 {"LINK", EXTRACTOR_METATYPE_URL, U},
175 /* ... */
176 {"USER", EXTRACTOR_METATYPE_LICENSE, T},
177 /* ... */
178
179 /* new frames in id3v24 */
180 /* ASPI, EQU2, RVA2, SEEK, SIGN, TDEN */
181 {"TDOR", EXTRACTOR_METATYPE_PUBLICATION_DATE, T},
182 /* TDRC, TDRL, TDTG */
183 {"TIPL", EXTRACTOR_METATYPE_CONTRIBUTOR_NAME, T},
184 {"TMCL", EXTRACTOR_METATYPE_MUSICIAN_CREDITS_LIST, T},
185 {"TMOO", EXTRACTOR_METATYPE_MOOD, T},
186 {"TPRO", EXTRACTOR_METATYPE_COPYRIGHT, T},
187 {"TSOA", EXTRACTOR_METATYPE_ALBUM, T},
188 {"TSOP", EXTRACTOR_METATYPE_PERFORMER, T},
189 {"TSOT", EXTRACTOR_METATYPE_TITLE, T},
190 {"TSST", EXTRACTOR_METATYPE_SUBTITLE, T},
191
116 {NULL, 0, T}, 192 {NULL, 0, T},
117}; 193};
118 194
119 195struct id3v2_state
120/* mimetype = audio/mpeg */
121int
122EXTRACTOR_id3v2_extract (const unsigned char *data,
123 size_t size,
124 EXTRACTOR_MetaDataProcessor proc,
125 void *proc_cls,
126 const char *options)
127{ 196{
197 int state;
128 unsigned int tsize; 198 unsigned int tsize;
129 unsigned int pos; 199 size_t csize;
200 char id[4];
201 int32_t ti;
202 char ver;
203 char extended_header;
204 uint16_t frame_flags;
205 char *mime;
206};
207
208enum ID3v2State
209{
210 ID3V2_INVALID = -1,
211 ID3V2_READING_HEADER = 0,
212 ID3V2_READING_FRAME_HEADER,
213 ID3V23_READING_EXTENDED_HEADER,
214 ID3V24_READING_EXTENDED_HEADER,
215 ID3V2_READING_FRAME
216};
217
218void
219EXTRACTOR_id3v2_init_state_method (struct EXTRACTOR_PluginList *plugin)
220{
221 struct id3v2_state *state;
222 state = plugin->state = malloc (sizeof (struct id3v2_state));
223 if (state == NULL)
224 return;
225 memset (state, 0, sizeof (struct id3v2_state));
226 state->state = ID3V2_READING_HEADER;
227 state->ti = -1;
228 state->mime = NULL;
229}
230
231void
232EXTRACTOR_id3v2_discard_state_method (struct EXTRACTOR_PluginList *plugin)
233{
234 struct id3v2_state *state = plugin->state;
235 if (state != NULL)
236 {
237 if (state->mime != NULL)
238 free (state->mime);
239 free (state);
240 }
241 plugin->state = NULL;
242}
243
244static int
245find_type (const char *id, size_t len)
246{
247 int i;
248 for (i = 0; tmap[i].text != NULL; i++)
249 if (0 == strncmp (tmap[i].text, id, len))
250 return i;
251 return -1;
252}
253
254int
255EXTRACTOR_id3v2_extract_method (struct EXTRACTOR_PluginList *plugin,
256 EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
257{
258 int64_t file_position;
259 int64_t file_size;
260 int64_t offset = 0;
261 int64_t size;
262 struct id3v2_state *state;
263 unsigned char *data;
264 char *word = NULL;
130 unsigned int off; 265 unsigned int off;
131 enum EXTRACTOR_MetaType type; 266 enum EXTRACTOR_MetaType type;
132 const char *mime; 267 unsigned char picture_type;
133 268
134 if ((size < 16) || 269 if (plugin == NULL || plugin->state == NULL)
135 (data[0] != 0x49) || 270 return 1;
136 (data[1] != 0x44) ||
137 (data[2] != 0x33) || (data[3] != 0x02) || (data[4] != 0x00))
138 return 0;
139 /* unsync: (data[5] & 0x80) > 0; */
140 tsize = (((data[6] & 0x7F) << 21) |
141 ((data[7] & 0x7F) << 14) |
142 ((data[8] & 0x7F) << 07) | ((data[9] & 0x7F) << 00));
143 271
144 if (tsize + 10 > size) 272 state = plugin->state;
145 return 0; 273 file_position = plugin->position;
146 pos = 10; 274 file_size = plugin->fsize;
147 while (pos < tsize) 275 size = plugin->map_size;
276 data = plugin->shm_ptr;
277
278 if (plugin->seek_request < 0)
279 return 1;
280 if (file_position - plugin->seek_request > 0)
281 {
282 plugin->seek_request = -1;
283 return 1;
284 }
285 if (plugin->seek_request - file_position < size)
286 offset = plugin->seek_request - file_position;
287
288 while (1)
289 {
290 switch (state->state)
148 { 291 {
149 size_t csize; 292 case ID3V2_INVALID:
150 int i; 293 plugin->seek_request = -1;
294 return 1;
295 case ID3V2_READING_HEADER:
296 /* TODO: support id3v24 tags at the end of file. Here's a quote from id3 faq:
297 * Q: Where is an ID3v2 tag located in an MP3 file?
298 * A: It is most likely located at the beginning of the file. Look for the
299 * marker "ID3" in the first 3 bytes of the file. If it's not there, it
300 * could be at the end of the file (if the tag is ID3v2.4). Look for the
301 * marker "3DI" 10 bytes from the end of the file, or 10 bytes before the
302 * beginning of an ID3v1 tag. Finally it is possible to embed ID3v2 tags
303 * in the actual MPEG stream, on an MPEG frame boundry. Almost nobody does
304 * this.
305 * Parsing of such tags will not be completely correct, because we can't
306 * seek backwards. We will have to seek to file_size - chunk_size instead
307 * (by the way, chunk size is theoretically unknown, LE is free to use any chunk
308 * size, even though plugins often make assumptions about chunk size being large
309 * enough to make one atomic read without seeking, if offset == 0) and search
310 * for id3v1 at -128 offset, then look if there's a 3DI marker 10 bytes before
311 * it (or 10 bytes before the end of file, if id3v1 is not there; not sure
312 * about APETAGs; we should probably just scan byte-by-byte from the end of file,
313 * until we hit 3DI, or reach the offset == 0), and use it set offset to the
314 * start of ID3v24 header, adjust the following file_position check and data
315 * indices (use offset), and otherwise proceed as normal (maybe file size checks
316 * along the way will have to be adjusted by -1, or made ">" instead of ">=";
317 * these problems do not arise for tags at the beginning of the file, since
318 * audio itself is usually at least 1-byte long; when the tag is at the end of
319 * file, these checks will have to be 100% correct).
320 * If there are two tags (at the beginning and at the end of the file),
321 * a SEEK in the one at the beginning of the file can be used to seek to the
322 * one at the end.
323 */
324 /* TODO: merge id3v1 and id3v2 parsers. There's an "update" flag in id3v2 that
325 * tells the parser to augument id3v1 values with the values from id3v2 (if this
326 * flag is not set, id3v2 parser must discard id3v1 data).
327 * At the moment id3v1 and id3v2 are parsed separately, and update flag is ignored.
328 */
329 if (file_position != 0 || size < 10 || (data[0] != 0x49) || (data[1] != 0x44) || (data[2] != 0x33) || ((data[3] != 0x02) && (data[3] != 0x03) && (data[3] != 0x04))/* || (data[4] != 0x00) minor verisons are backward-compatible*/)
330 {
331 state->state = ID3V2_INVALID;
332 break;
333 }
334 state->ver = data[3];
335 if (state->ver == 0x02)
336 {
337 state->extended_header = 0;
338 }
339 else if ((state->ver == 0x03) || (state->ver == 0x04))
340 {
341 if ((data[5] & 0x80) > 0)
342 {
343 /* unsync is not supported in id3v23 or id3v24*/
344 state->state = ID3V2_INVALID;
345 break;
346 }
347 state->extended_header = (data[5] & 0x40) > 0;
348 if ((data[5] & 0x20) > 0)
349 {
350 /* experimental is not supported in id3v23 or id3v24*/
351 state->state = ID3V2_INVALID;
352 break;
353 }
354 }
355 state->tsize = (((data[6] & 0x7F) << 21) | ((data[7] & 0x7F) << 14) | ((data[8] & 0x7F) << 07) | ((data[9] & 0x7F) << 00));
356 if (state->tsize + 10 > file_size)
357 {
358 state->state = ID3V2_INVALID;
359 break;
360 }
361 offset = 10;
362 if (state->ver == 0x03 && state->extended_header)
363 state->state = ID3V23_READING_EXTENDED_HEADER;
364 else if (state->ver == 0x04 && state->extended_header)
365 state->state = ID3V24_READING_EXTENDED_HEADER;
366 else
367 state->state = ID3V2_READING_FRAME_HEADER;
368 break;
369 case ID3V23_READING_EXTENDED_HEADER:
370 if (offset + 9 >= size)
371 {
372 if (offset == 0)
373 {
374 state->state = ID3V2_INVALID;
375 break;
376 }
377 plugin->seek_request = file_position + offset;
378 return 0;
379 }
380 if (state->ver == 0x03 && state->extended_header)
381 {
382 uint32_t padding, extended_header_size;
383 extended_header_size = (((data[offset]) << 24) | ((data[offset + 1]) << 16) | ((data[offset + 2]) << 8) | ((data[offset + 3]) << 0));
384 padding = (((data[offset + 6]) << 24) | ((data[offset + 7]) << 16) | ((data[offset + 8]) << 8) | ((data[offset + 9]) << 0));
385 if (data[offset + 4] == 0 && data[offset + 5] == 0)
386 /* Skip the CRC32 byte after extended header */
387 offset += 1;
388 offset += 4 + extended_header_size;
389 if (padding < state->tsize)
390 state->tsize -= padding;
391 else
392 {
393 state->state = ID3V2_INVALID;
394 break;
395 }
396 }
397 break;
398 case ID3V24_READING_EXTENDED_HEADER:
399 if (offset + 6 >= size)
400 {
401 if (offset == 0)
402 {
403 state->state = ID3V2_INVALID;
404 break;
405 }
406 plugin->seek_request = file_position + offset;
407 return 0;
408 }
409 if ( (state->ver == 0x04) && (state->extended_header))
410 {
411 uint32_t extended_header_size;
151 412
152 if (pos + 7 > tsize) 413 extended_header_size = (((data[offset]) << 24) |
414 ((data[offset + 1]) << 16) |
415 ((data[offset + 2]) << 8) |
416 ((data[offset + 3]) << 0));
417 offset += 4 + extended_header_size;
418 }
419 break;
420 case ID3V2_READING_FRAME_HEADER:
421 if (file_position + offset > state->tsize ||
422 ((state->ver == 0x02) && file_position + offset + 6 >= state->tsize) ||
423 (((state->ver == 0x03) || (state->ver == 0x04))&& file_position + offset + 10 >= state->tsize))
424 {
425 state->state = ID3V2_INVALID;
426 break;
427 }
428 if (((state->ver == 0x02) && (offset + 6 >= size)) ||
429 (((state->ver == 0x03) || (state->ver == 0x04)) && (offset + 10 >= size)))
430 {
431 plugin->seek_request = file_position + offset;
153 return 0; 432 return 0;
154 csize = (data[pos + 3] << 16) + (data[pos + 4] << 8) + data[pos + 5]; 433 }
155 if ((pos + 7 + csize > tsize) || (csize > tsize) || (csize == 0)) 434 if (state->ver == 0x02)
435 {
436 memcpy (state->id, &data[offset], 3);
437 state->csize = (data[offset + 3] << 16) + (data[offset + 4] << 8) + data[offset + 5];
438 if ((file_position + offset + 6 + state->csize > file_size) || (state->csize > file_size) || (state->csize == 0))
439 {
440 state->state = ID3V2_INVALID;
441 break;
442 }
443 offset += 6;
444 state->frame_flags = 0;
445 }
446 else if ((state->ver == 0x03) || (state->ver == 0x04))
447 {
448 memcpy (state->id, &data[offset], 4);
449 if (state->ver == 0x03)
450 state->csize = (data[offset + 4] << 24) + (data[offset + 5] << 16) + (data[offset + 6] << 8) + data[offset + 7];
451 else if (state->ver == 0x04)
452 state->csize = ((data[offset + 4] & 0x7F) << 21) | ((data[offset + 5] & 0x7F) << 14) | ((data[offset + 6] & 0x7F) << 07) | ((data[offset + 7] & 0x7F) << 00);
453 if ((file_position + offset + 10 + state->csize > file_size) || (state->csize > file_size) || (state->csize == 0))
454 {
455 state->state = ID3V2_INVALID;
456 break;
457 }
458 state->frame_flags = (data[offset + 8] << 8) + data[offset + 9];
459 if (state->ver == 0x03)
460 {
461 if (((state->frame_flags & 0x80) > 0) /* compressed, not yet supported */ ||
462 ((state->frame_flags & 0x40) > 0) /* encrypted, not supported */)
463 {
464 /* Skip to next frame header */
465 offset += 10 + state->csize;
466 break;
467 }
468 }
469 else if (state->ver == 0x04)
470 {
471 if (((state->frame_flags & 0x08) > 0) /* compressed, not yet supported */ ||
472 ((state->frame_flags & 0x04) > 0) /* encrypted, not supported */ ||
473 ((state->frame_flags & 0x02) > 0) /* unsynchronization, not supported */)
474 {
475 /* Skip to next frame header */
476 offset += 10 + state->csize;
477 break;
478 }
479 if ((state->frame_flags & 0x01) > 0)
480 {
481 /* Skip data length indicator */
482 state->csize -= 4;
483 offset += 4;
484 }
485 }
486 offset += 10;
487 }
488
489 state->ti = find_type ((const char *) state->id, (state->ver == 0x02) ? 3 : (((state->ver == 0x03) || (state->ver == 0x04)) ? 4 : 0));
490 if (state->ti == -1)
491 {
492 offset += state->csize;
493 break;
494 }
495 state->state = ID3V2_READING_FRAME;
496 break;
497 case ID3V2_READING_FRAME:
498 if (offset == 0 && state->csize > size)
499 {
500 /* frame size is larger than the size of one data chunk we get at a time */
501 offset += state->csize;
502 state->state = ID3V2_READING_FRAME_HEADER;
503 break;
504 }
505 if (offset + state->csize > size)
506 {
507 plugin->seek_request = file_position + offset;
508 return 0;
509 }
510 word = NULL;
511 if (((state->ver == 0x03) && ((state->frame_flags & 0x20) > 0)) ||
512 ((state->ver == 0x04) && ((state->frame_flags & 0x40) > 0)))
513 {
514 /* "group" identifier, skip a byte */
515 offset++;
516 state->csize--;
517 }
518 switch (tmap[state->ti].fmt)
519 {
520 case T:
521 if (data[offset] == 0x00)
522 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + 1],
523 state->csize - 1, "ISO-8859-1");
524 else if (data[offset] == 0x01)
525 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + 1],
526 state->csize - 1, "UCS-2");
527 else if ((state->ver == 0x04) && (data[offset] == 0x02))
528 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + 1],
529 state->csize - 1, "UTF-16BE");
530 else if ((state->ver == 0x04) && (data[offset] == 0x03))
531 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + 1],
532 state->csize - 1, "UTF-8");
533 else
534 /* bad encoding byte, try to convert from iso-8859-1 */
535 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + 1],
536 state->csize - 1, "ISO-8859-1");
537 break;
538 case U:
539 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset],
540 state->csize, "ISO-8859-1");
541 break;
542 case UL:
543 if (state->csize < 6)
544 {
545 /* malformed */
546 state->state = ID3V2_INVALID;
547 break;
548 }
549 /* find end of description */
550 off = 4;
551 while ((off < size) && (off < offset + state->csize) && (data[offset + off] != '\0'))
552 off++;
553 if ((off >= state->csize) || (data[offset + off] != '\0'))
554 {
555 /* malformed */
556 state->state = ID3V2_INVALID;
557 break;
558 }
559 off++;
560 if (data[offset] == 0x00)
561 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + off],
562 state->csize - off, "ISO-8859-1");
563 else if (data[offset] == 0x01)
564 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + off],
565 state->csize - off, "UCS-2");
566 else if ((state->ver == 0x04) && (data[offset] == 0x02))
567 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + off],
568 state->csize - off, "UTF-16BE");
569 else if ((state->ver == 0x04) && (data[offset] == 0x03))
570 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + off],
571 state->csize - off, "UTF-8");
572 else
573 /* bad encoding byte, try to convert from iso-8859-1 */
574 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + off],
575 state->csize - off, "ISO-8859-1");
576 break;
577 case SL:
578 if (state->csize < 7)
579 {
580 /* malformed */
581 state->state = ID3V2_INVALID;
582 break;
583 }
584 if (data[offset] == 0x00)
585 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + 6],
586 state->csize - 6, "ISO-8859-1");
587 else if (data[offset] == 0x01)
588 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + 6],
589 state->csize - 6, "UCS-2");
590 else if ((state->ver == 0x04) && (data[offset] == 0x02))
591 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + 6],
592 state->csize - 6, "UTF-16BE");
593 else if ((state->ver == 0x04) && (data[offset] == 0x03))
594 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + 6],
595 state->csize - 6, "UTF-8");
596 else
597 /* bad encoding byte, try to convert from iso-8859-1 */
598 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + 6],
599 state->csize - 6, "ISO-8859-1");
600 break;
601 case L:
602 if (state->csize < 5)
603 {
604 /* malformed */
605 state->state = ID3V2_INVALID;
606 break;
607 }
608 /* find end of description */
609 off = 4;
610 while ((off < size) && (off < offset + state->csize) && (data[offset + off] != '\0'))
611 off++;
612 if ((off >= state->csize) || (data[offset + off] != '\0'))
613 {
614 /* malformed */
615 state->state = ID3V2_INVALID;
616 break;
617 }
618 off++;
619
620 if (data[offset] == 0x00)
621 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + off],
622 state->csize - off, "ISO-8859-1");
623 else if (data[offset] == 0x01)
624 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + off],
625 state->csize - off, "UCS-2");
626 else if ((state->ver == 0x04) && (data[offset] == 0x02))
627 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + off],
628 state->csize - off, "UTF-1offBE");
629 else if ((state->ver == 0x04) && (data[offset] == 0x03))
630 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + off],
631 state->csize - off, "UTF-8");
632 else
633 /* bad encoding byte, try to convert from iso-8859-1 */
634 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[offset + off],
635 state->csize - off, "ISO-8859-1");
636 break;
637 case I:
638 if ( ( (state->ver == 0x02) &&
639 (state->csize < 7) ) ||
640 ( ( (state->ver == 0x03) ||
641 (state->ver == 0x04)) && (state->csize < 5)) )
642 {
643 /* malformed */
644 state->state = ID3V2_INVALID;
645 break;
646 }
647 if (state->mime != NULL)
648 free (state->mime);
649 state->mime = NULL;
650 if (state->ver == 0x02)
651 {
652 off = 5;
653 picture_type = data[offset + 5];
654 }
655 else if ((state->ver == 0x03) || (state->ver == 0x04))
656 {
657 off = 1;
658 while ((off < size) && (off < offset + state->csize) && (data[offset + off] != '\0') )
659 off++;
660 if ((off >= state->csize) || (data[offset + off] != '\0'))
661 {
662 /* malformed */
663 state->state = ID3V2_INVALID;
664 break;
665 }
666 state->mime = malloc (off);
667 memcpy (state->mime, &data[offset + 1], off - 1);
668 state->mime[off - 1] = '\0';
669 off += 1;
670 picture_type = data[offset];
671 off += 1;
672 }
673 /* find end of description */
674 while ((off < size) && (off < offset + state->csize) && (data[offset + off] != '\0'))
675 off++;
676 if ((off >= state->csize) || (data[offset + off] != '\0'))
677 {
678 free (state->mime);
679 state->mime = NULL;
680 /* malformed */
681 state->state = ID3V2_INVALID;
682 break;
683 }
684 off++;
685 switch (picture_type)
686 {
687 case 0x03:
688 case 0x04:
689 type = EXTRACTOR_METATYPE_COVER_PICTURE;
690 break;
691 case 0x07:
692 case 0x08:
693 case 0x09:
694 case 0x0A:
695 case 0x0B:
696 case 0x0C:
697 type = EXTRACTOR_METATYPE_CONTRIBUTOR_PICTURE;
698 break;
699 case 0x0D:
700 case 0x0E:
701 case 0x0F:
702 type = EXTRACTOR_METATYPE_EVENT_PICTURE;
703 break;
704 case 0x14:
705 type = EXTRACTOR_METATYPE_LOGO;
706 type = EXTRACTOR_METATYPE_LOGO;
707 break;
708 default:
709 type = EXTRACTOR_METATYPE_PICTURE;
710 break;
711 }
712 if (state->ver == 0x02)
713 {
714 if (0 == strncasecmp ("PNG", (const char *) &data[offset + 1], 3))
715 state->mime = strdup ("image/png");
716 else if (0 == strncasecmp ("JPG", (const char *) &data[offset + 1], 3))
717 state->mime = strdup ("image/jpeg");
718 else
719 state->mime = NULL;
720 }
721 else if (((state->ver == 0x03) || (state->ver == 0x04)) && (strchr (state->mime, '/') == NULL))
722 {
723 size_t mime_len = strlen (state->mime);
724 char *type_mime = malloc (mime_len + 6 + 1);
725 snprintf (type_mime, mime_len + 6 + 1, "image/%s", state->mime);
726 free (state->mime);
727 state->mime = type_mime;
728 }
729 if ((state->mime != NULL) && (0 == strcmp (state->mime, "-->")))
730 {
731 /* not supported */
732 free (state->mime);
733 state->mime = NULL;
734 }
735 else
736 {
737 if (0 != proc (proc_cls, "id3v2", type, EXTRACTOR_METAFORMAT_BINARY, state->mime, (const char*) &data[offset + off], state->csize - off))
738 {
739 if (state->mime != NULL)
740 free (state->mime);
741 state->mime = NULL;
742 return 1;
743 }
744 if (state->mime != NULL)
745 free (state->mime);
746 state->mime = NULL;
747 }
748 word = NULL;
156 break; 749 break;
157 i = 0; 750 default:
158 while (tmap[i].text != NULL) 751 return 1;
752 }
753 if ((word != NULL) && (strlen (word) > 0))
754 {
755 if (0 != proc (proc_cls, "id3v2", tmap[state->ti].type, EXTRACTOR_METAFORMAT_UTF8, "text/plain", word, strlen (word) + 1))
159 { 756 {
160 if (0 == strncmp (tmap[i].text, (const char *) &data[pos], 3)) 757 free (word);
161 { 758 return 1;
162 char *word;
163 /* this byte describes the encoding
164 try to convert strings to UTF-8
165 if it fails, then forget it */
166 switch (tmap[i].fmt)
167 {
168 case T:
169 switch (data[pos + 6])
170 {
171 case 0x00:
172 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 7],
173 csize - 1, "ISO-8859-1");
174 break;
175 case 0x01:
176 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 7],
177 csize - 1, "UCS-2");
178 break;
179 default:
180 /* bad encoding byte,
181 try to convert from iso-8859-1 */
182 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 7],
183 csize - 1, "ISO-8859-1");
184 break;
185 }
186 break;
187 case U:
188 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 6],
189 csize, "ISO-8859-1");
190 break;
191 case UL:
192 if (csize < 6)
193 return 0; /* malformed */
194 /* find end of description */
195 off = 10;
196 while ( (off < size) &&
197 (off - pos < csize) &&
198 (data[pos + off] == '\0') )
199 off++;
200 if ( (off >= csize) ||
201 (data[pos+off] != '\0') )
202 return 0; /* malformed */
203 off++;
204 switch (data[pos + 6])
205 {
206 case 0x00:
207 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + off],
208 csize - off, "ISO-8859-1");
209 break;
210 case 0x01:
211 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + off],
212 csize - off, "UCS-2");
213 break;
214 default:
215 /* bad encoding byte,
216 try to convert from iso-8859-1 */
217 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + off],
218 csize - off, "ISO-8859-1");
219 break;
220 }
221 break;
222 case SL:
223 if (csize < 7)
224 return 0; /* malformed */
225 /* find end of description */
226 switch (data[pos + 6])
227 {
228 case 0x00:
229 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 12],
230 csize - 6, "ISO-8859-1");
231 break;
232 case 0x01:
233 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 12],
234 csize - 6, "UCS-2");
235 break;
236 default:
237 /* bad encoding byte,
238 try to convert from iso-8859-1 */
239 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 12],
240 csize - 6, "ISO-8859-1");
241 break;
242 }
243 break;
244 case L:
245 if (csize < 5)
246 return 0; /* malformed */
247 /* find end of description */
248 switch (data[pos + 6])
249 {
250 case 0x00:
251 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 10],
252 csize - 4, "ISO-8859-1");
253 break;
254 case 0x01:
255 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 10],
256 csize - 4, "UCS-2");
257 break;
258 default:
259 /* bad encoding byte,
260 try to convert from iso-8859-1 */
261 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 10],
262 csize - 4, "ISO-8859-1");
263 break;
264 }
265 break;
266 case I:
267 if (csize < 6)
268 return 0; /* malformed */
269 /* find end of description */
270 off = 12;
271 while ( (off < size) &&
272 (off - pos < csize) &&
273 (data[pos + off] == '\0') )
274 off++;
275 if ( (off >= csize) ||
276 (data[pos+off] != '\0') )
277 return 0; /* malformed */
278 off++;
279 switch (data[pos+11])
280 {
281 case 0x03:
282 case 0x04:
283 type = EXTRACTOR_METATYPE_COVER_PICTURE;
284 break;
285 case 0x07:
286 case 0x08:
287 case 0x09:
288 case 0x0A:
289 case 0x0B:
290 case 0x0C:
291 type = EXTRACTOR_METATYPE_CONTRIBUTOR_PICTURE;
292 break;
293 case 0x0D:
294 case 0x0E:
295 case 0x0F:
296 type = EXTRACTOR_METATYPE_EVENT_PICTURE;
297 break;
298 case 0x14:
299 type = EXTRACTOR_METATYPE_LOGO;
300 type = EXTRACTOR_METATYPE_LOGO;
301 break;
302 default:
303 type = EXTRACTOR_METATYPE_PICTURE;
304 break;
305 }
306 if (0 == strncasecmp ("PNG",
307 (const char*) &data[pos + 7], 3))
308 mime = "image/png";
309 else if (0 == strncasecmp ("JPG",
310 (const char*) &data[pos + 7], 3))
311 mime = "image/jpeg";
312 else
313 mime = NULL;
314 if (0 == strncasecmp ("-->",
315 (const char*) &data[pos + 7], 3))
316 {
317 /* not supported */
318 }
319 else
320 {
321 if (0 != proc (proc_cls,
322 "id3v2",
323 type,
324 EXTRACTOR_METAFORMAT_BINARY,
325 mime,
326 (const char*) &data[pos + off],
327 csize + 6 - off))
328 return 1;
329 }
330 word = NULL;
331 break;
332 default:
333 return 0;
334 }
335 if ((word != NULL) && (strlen (word) > 0))
336 {
337 if (0 != proc (proc_cls,
338 "id3v2",
339 tmap[i].type,
340 EXTRACTOR_METAFORMAT_UTF8,
341 "text/plain",
342 word,
343 strlen(word)+1))
344 {
345 free (word);
346 return 1;
347 }
348 }
349 if (word != NULL)
350 free (word);
351 break;
352 }
353 i++;
354 } 759 }
355 pos += 6 + csize; 760 }
761 if (word != NULL)
762 free (word);
763 offset = offset + state->csize;
764 state->state = ID3V2_READING_FRAME_HEADER;
765 break;
356 } 766 }
357 return 0; 767 }
768 return 1;
358} 769}
359 770
360/* end of id3v2_extractor.c */ 771/* end of id3v2_extractor.c */
diff --git a/src/plugins/mp3_extractor.c b/src/plugins/mp3_extractor.c
index 3d8d48d..68b3a39 100644
--- a/src/plugins/mp3_extractor.c
+++ b/src/plugins/mp3_extractor.c
@@ -36,8 +36,41 @@
36#include <unistd.h> 36#include <unistd.h>
37#include <stdlib.h> 37#include <stdlib.h>
38 38
39#define MAX_MP3_SCAN_DEEP 16768 39#include "extractor_plugins.h"
40const int max_frames_scan = 1024; 40
41#if WINDOWS
42#include <sys/param.h> /* #define BYTE_ORDER */
43#endif
44#ifndef __BYTE_ORDER
45#ifdef _BYTE_ORDER
46#define __BYTE_ORDER _BYTE_ORDER
47#else
48#ifdef BYTE_ORDER
49#define __BYTE_ORDER BYTE_ORDER
50#endif
51#endif
52#endif
53#ifndef __BIG_ENDIAN
54#ifdef _BIG_ENDIAN
55#define __BIG_ENDIAN _BIG_ENDIAN
56#else
57#ifdef BIG_ENDIAN
58#define __BIG_ENDIAN BIG_ENDIAN
59#endif
60#endif
61#endif
62#ifndef __LITTLE_ENDIAN
63#ifdef _LITTLE_ENDIAN
64#define __LITTLE_ENDIAN _LITTLE_ENDIAN
65#else
66#ifdef LITTLE_ENDIAN
67#define __LITTLE_ENDIAN LITTLE_ENDIAN
68#endif
69#endif
70#endif
71
72#define LARGEST_FRAME_SIZE 8065
73
41enum 74enum
42{ MPEG_ERR = 0, MPEG_V1 = 1, MPEG_V2 = 2, MPEG_V25 = 3 }; 75{ MPEG_ERR = 0, MPEG_V1 = 1, MPEG_V2 = 2, MPEG_V25 = 3 };
43 76
@@ -45,6 +78,11 @@ enum
45{ LAYER_ERR = 0, LAYER_1 = 1, LAYER_2 = 2, LAYER_3 = 3 }; 78{ LAYER_ERR = 0, LAYER_1 = 1, LAYER_2 = 2, LAYER_3 = 3 };
46 79
47#define MPA_SYNC_MASK ((unsigned int) 0xFFE00000) 80#define MPA_SYNC_MASK ((unsigned int) 0xFFE00000)
81#if __BYTE_ORDER == __BIG_ENDIAN
82#define MPA_SYNC_MASK_MEM ((unsigned int) 0xFFE00000)
83#else
84#define MPA_SYNC_MASK_MEM ((unsigned int) 0x0000E0FF)
85#endif
48#define MPA_LAST_SYNC_BIT_MASK ((unsigned int) 0x00100000) 86#define MPA_LAST_SYNC_BIT_MASK ((unsigned int) 0x00100000)
49#define MPA_VERSION_MASK ((unsigned int) 0x00080000) 87#define MPA_VERSION_MASK ((unsigned int) 0x00080000)
50#define MPA_LAYER_MASK ((unsigned int) 0x3) 88#define MPA_LAYER_MASK ((unsigned int) 0x3)
@@ -106,169 +144,274 @@ static const char * const layer_names[3] = {
106 144
107#define ADDR(s,t) do { if (0 != proc (proc_cls, "mp3", t, EXTRACTOR_METAFORMAT_UTF8, "text/plain", s, strlen(s)+1)) return 1; } while (0) 145#define ADDR(s,t) do { if (0 != proc (proc_cls, "mp3", t, EXTRACTOR_METAFORMAT_UTF8, "text/plain", s, strlen(s)+1)) return 1; } while (0)
108 146
109/* mimetype = audio/mpeg */ 147struct mp3_state
110int 148{
111EXTRACTOR_mp3_extract (const unsigned char *data, 149 int state;
112 size_t size, 150
113 EXTRACTOR_MetaDataProcessor proc, 151 uint32_t header;
114 void *proc_cls, 152 int sample_rate;
115 const char *options) 153 char mpeg_ver;
154 char layer;
155 char vbr_flag;
156 int ch;
157 char copyright_flag;
158 char original_flag;
159 int avg_bps;
160 int bitrate;
161
162 int64_t number_of_frames;
163 int64_t number_of_valid_frames;
164};
165
166enum MP3State
167{
168 MP3_LOOKING_FOR_FRAME = 0,
169 MP3_READING_FRAME = 1,
170};
171
172void
173EXTRACTOR_mp3_init_state_method (struct EXTRACTOR_PluginList *plugin)
174{
175 struct mp3_state *state;
176 state = plugin->state = malloc (sizeof (struct mp3_state));
177 if (state == NULL)
178 return;
179 state->header = 0;
180 state->sample_rate = 0;
181 state->number_of_frames = 0;
182 state->number_of_valid_frames = 0;
183 state->mpeg_ver = 0;
184 state->layer = 0;
185 state->vbr_flag = 0;
186 state->ch = 0;
187 state->copyright_flag = 0;
188 state->original_flag = 0;
189 state->avg_bps = 0;
190 state->bitrate = 0;
191 state->state = 0;
192}
193
194void
195EXTRACTOR_mp3_discard_state_method (struct EXTRACTOR_PluginList *plugin)
196{
197 if (plugin->state != NULL)
198 {
199 free (plugin->state);
200 }
201 plugin->state = NULL;
202}
203
204static int
205calculate_frame_statistics_and_maybe_report_it (struct EXTRACTOR_PluginList *plugin,
206 struct mp3_state *state, EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
207{
208 int length;
209 char format[512];
210
211 if (((double) state->number_of_valid_frames / (double) state->number_of_frames) < 0.5 ||
212 state->number_of_valid_frames < 2)
213 /* Unlikely to be an mp3 file */
214 return 0;
215 ADDR ("audio/mpeg", EXTRACTOR_METATYPE_MIMETYPE);
216 state->avg_bps = state->avg_bps / state->number_of_valid_frames;
217 if (state->sample_rate > 0)
218 length = 1152 * state->number_of_valid_frames / state->sample_rate;
219 else if (state->avg_bps > 0 || state->bitrate > 0)
220 length = plugin->fsize / (state->avg_bps ? state->avg_bps : state->bitrate ? state->bitrate : 1) / 125;
221 else
222 length = 0;
223
224 ADDR (mpeg_versions[state->mpeg_ver - 1], EXTRACTOR_METATYPE_FORMAT_VERSION);
225 snprintf (format,
226 sizeof (format),
227 "%s %s audio, %d kbps (%s), %d Hz, %s, %s, %s",
228 mpeg_versions[state->mpeg_ver - 1],
229 layer_names[state->layer - 1],
230 state->avg_bps,
231 state->vbr_flag ? _("VBR") : _("CBR"),
232 state->sample_rate,
233 channel_modes[state->ch],
234 state->copyright_flag ? _("copyright") : _("no copyright"),
235 state->original_flag ? _("original") : _("copy") );
236
237 ADDR (format, EXTRACTOR_METATYPE_RESOURCE_TYPE);
238 snprintf (format,
239 sizeof (format), "%dm%02d",
240 length / 60, length % 60);
241 ADDR (format, EXTRACTOR_METATYPE_DURATION);
242 return 0;
243}
244
245int
246EXTRACTOR_mp3_extract_method (struct EXTRACTOR_PluginList *plugin,
247 EXTRACTOR_MetaDataProcessor proc,
248 void *proc_cls)
116{ 249{
117 unsigned int header; 250 int64_t file_position;
118 int counter = 0; 251 int64_t file_size;
252 size_t offset = 0;
253 size_t size;
254 unsigned char *data;
255 struct mp3_state *state;
256
257 size_t frames_found_in_this_round = 0;
258 int start_anew = 0;
259
119 char mpeg_ver = 0; 260 char mpeg_ver = 0;
120 char layer = 0; 261 char layer = 0;
121 int idx_num = 0; 262 int idx_num = 0;
122 int bitrate = 0; /*used for each frame */ 263 int bitrate = 0; /*used for each frame */
123 int avg_bps = 0; /*average bitrate */
124 int vbr_flag = 0;
125 int copyright_flag = 0; 264 int copyright_flag = 0;
126 int original_flag = 0; 265 int original_flag = 0;
127 int length = 0;
128 int sample_rate = 0; 266 int sample_rate = 0;
129 int ch = 0; 267 int ch = 0;
130 int frame_size; 268 int frame_size;
131 int frames = 0;
132 size_t pos = 0;
133 char format[512];
134 269
135 do 270 if (plugin == NULL || plugin->state == NULL)
136 { 271 return 1;
137 /* seek for frame start */
138 if (pos + sizeof (header) > size)
139 {
140 return 0;
141 } /*unable to find header */
142 header = (data[pos] << 24) | (data[pos+1] << 16) |
143 (data[pos+2] << 8) | data[pos+3];
144 if ((header & MPA_SYNC_MASK) == MPA_SYNC_MASK)
145 break; /*found header sync */
146 pos++;
147 counter++; /*next try */
148 }
149 while (counter < MAX_MP3_SCAN_DEEP);
150 if (counter >= MAX_MP3_SCAN_DEEP)
151 return 0;
152 272
153 do 273 state = plugin->state;
154 { /*ok, now we found a mp3 frame header */ 274 file_position = plugin->position;
155 frames++; 275 file_size = plugin->fsize;
156 switch (header & (MPA_LAST_SYNC_BIT_MASK | MPA_VERSION_MASK)) 276 size = plugin->map_size;
157 { 277 data = plugin->shm_ptr;
158 case (MPA_LAST_SYNC_BIT_MASK | MPA_VERSION_MASK): 278
159 mpeg_ver = MPEG_V1; 279 if (plugin->seek_request < 0)
160 break; 280 return 1;
161 case (MPA_LAST_SYNC_BIT_MASK): 281 if (file_position - plugin->seek_request > 0)
162 mpeg_ver = MPEG_V2; 282 {
163 break; 283 plugin->seek_request = -1;
164 case 0: 284 return 1;
165 mpeg_ver = MPEG_V25; 285 }
166 break; 286 if (plugin->seek_request - file_position < size)
167 case (MPA_VERSION_MASK): 287 offset = plugin->seek_request - file_position;
168 default: 288
169 return 0; 289 while (1)
170 } 290 {
171 switch (header & (MPA_LAYER_MASK << MPA_LAYER_SHIFT)) 291 switch (state->state)
292 {
293 case MP3_LOOKING_FOR_FRAME:
294 /* Look for a frame header */
295 while (offset + sizeof (state->header) < size && (((*((uint32_t *) &data[offset])) & MPA_SYNC_MASK_MEM) != MPA_SYNC_MASK_MEM))
296 offset += 1;
297 if (offset + sizeof (state->header) >= size)
298 {
299 /* Alternative: (frames_found_in_this_round < (size / LARGEST_FRAME_SIZE / 2)) is to generous */
300 if ((file_position == 0 && ((double) state->number_of_valid_frames / (double) state->number_of_frames) < 0.5) ||
301 file_position + offset + sizeof (state->header) >= file_size)
172 { 302 {
173 case (0x1 << MPA_LAYER_SHIFT): 303 calculate_frame_statistics_and_maybe_report_it (plugin, state, proc, proc_cls);
174 layer = LAYER_3; 304 return 1;
175 break;
176 case (0x2 << MPA_LAYER_SHIFT):
177 layer = LAYER_2;
178 break;
179 case (0x3 << MPA_LAYER_SHIFT):
180 layer = LAYER_1;
181 break;
182 case 0x0:
183 default:
184 return 0;
185 } 305 }
306 plugin->seek_request = file_position + offset;
307 return 0;
308 }
309 state->header = (data[offset] << 24) | (data[offset + 1] << 16) |
310 (data[offset + 2] << 8) | data[offset + 3];
311 if ((state->header & MPA_SYNC_MASK) == MPA_SYNC_MASK)
312 {
313 state->state = MP3_READING_FRAME;
314 break;
315 }
316 break;
317 case MP3_READING_FRAME:
318 state->number_of_frames += 1;
319 start_anew = 0;
320 switch (state->header & (MPA_LAST_SYNC_BIT_MASK | MPA_VERSION_MASK))
321 {
322 case (MPA_LAST_SYNC_BIT_MASK | MPA_VERSION_MASK):
323 mpeg_ver = MPEG_V1;
324 break;
325 case (MPA_LAST_SYNC_BIT_MASK):
326 mpeg_ver = MPEG_V2;
327 break;
328 case 0:
329 mpeg_ver = MPEG_V25;
330 break;
331 case (MPA_VERSION_MASK):
332 default:
333 state->state = MP3_LOOKING_FOR_FRAME;
334 offset += 1;
335 start_anew = 1;
336 }
337 if (start_anew)
338 break;
339 switch (state->header & (MPA_LAYER_MASK << MPA_LAYER_SHIFT))
340 {
341 case (0x1 << MPA_LAYER_SHIFT):
342 layer = LAYER_3;
343 break;
344 case (0x2 << MPA_LAYER_SHIFT):
345 layer = LAYER_2;
346 break;
347 case (0x3 << MPA_LAYER_SHIFT):
348 layer = LAYER_1;
349 break;
350 case 0x0:
351 default:
352 state->state = MP3_LOOKING_FOR_FRAME;
353 offset += 1;
354 start_anew = 1;
355 }
356 if (start_anew)
357 break;
186 if (mpeg_ver < MPEG_V25) 358 if (mpeg_ver < MPEG_V25)
187 idx_num = (mpeg_ver - 1) * 3 + layer - 1; 359 idx_num = (mpeg_ver - 1) * 3 + layer - 1;
188 else 360 else
189 idx_num = 2 + layer; 361 idx_num = 2 + layer;
190 bitrate = 1000 * bitrate_table[(header >> MPA_BITRATE_SHIFT) & 362 bitrate = 1000 * bitrate_table[(state->header >> MPA_BITRATE_SHIFT) &
191 MPA_BITRATE_MASK][idx_num]; 363 MPA_BITRATE_MASK][idx_num];
192 if (bitrate < 0) 364 if (bitrate < 0)
193 { 365 {
194 frames--; 366 /*error in header */
195 break; 367 state->state = MP3_LOOKING_FOR_FRAME;
196 } /*error in header */ 368 offset += 1;
197 sample_rate = freq_table[(header >> MPA_FREQ_SHIFT) & 369 break;
370 }
371 sample_rate = freq_table[(state->header >> MPA_FREQ_SHIFT) &
198 MPA_FREQ_MASK][mpeg_ver - 1]; 372 MPA_FREQ_MASK][mpeg_ver - 1];
199 if (sample_rate < 0) 373 if (sample_rate <= 0)
200 { 374 {
201 frames--; 375 /*error in header */
202 break; 376 state->state = MP3_LOOKING_FOR_FRAME;
203 } /*error in header */ 377 offset += 1;
204 ch = ((header >> MPA_CHMODE_SHIFT) & MPA_CHMODE_MASK); 378 break;
205 copyright_flag = (header >> MPA_COPYRIGHT_SHIFT) & 0x1; 379 }
206 original_flag = (header >> MPA_ORIGINAL_SHIFT) & 0x1; 380 ch = ((state->header >> MPA_CHMODE_SHIFT) & MPA_CHMODE_MASK);
207 frame_size = 381 copyright_flag = (state->header >> MPA_COPYRIGHT_SHIFT) & 0x1;
208 144 * bitrate / (sample_rate ? sample_rate : 1) + 382 original_flag = (state->header >> MPA_ORIGINAL_SHIFT) & 0x1;
209 ((header >> MPA_PADDING_SHIFT) & 0x1); 383 if (layer == LAYER_1)
384 frame_size = (12 * bitrate / sample_rate + ((state->header >> MPA_PADDING_SHIFT) & 0x1)) * 4;
385 else
386 frame_size = 144 * bitrate / sample_rate + ((state->header >> MPA_PADDING_SHIFT) & 0x1);
210 if (frame_size <= 0) 387 if (frame_size <= 0)
211 { 388 {
212 /* Technically, bitrate can be 0. However, but this particular 389 /*error in header */
213 * extractor is incapable of correctly processing 0-bitrate files 390 state->state = MP3_LOOKING_FOR_FRAME;
214 * anyway. And bitrate == 0 might also mean that this is just a 391 offset += 1;
215 * random binary sequence, which is far more likely to be true. 392 break;
216 * 393 }
217 * amatus suggests to use a different algorithm and parse significant
218 * part of the file, then count the number of correct mpeg frames.
219 * If the the percentage of correct frames is below a threshold,
220 * then this is not an mpeg file at all.
221 */
222 frames -= 1;
223 break;
224 }
225 avg_bps += bitrate / 1000;
226
227 pos += frame_size - 4;
228 if (frames > max_frames_scan)
229 break; /*optimization */
230 if (avg_bps / frames != bitrate / 1000)
231 vbr_flag = 1;
232 if (pos + sizeof (header) > size)
233 break; /* EOF */
234 header = (data[pos] << 24) | (data[pos+1] << 16) |
235 (data[pos+2] << 8) | data[pos+3];
236 }
237 while ((header & MPA_SYNC_MASK) == MPA_SYNC_MASK);
238
239 if (frames < 2)
240 return 0; /*no valid frames */
241 ADDR ("audio/mpeg", EXTRACTOR_METATYPE_MIMETYPE);
242 avg_bps = avg_bps / frames;
243 if (max_frames_scan)
244 { /*if not all frames scaned */
245 length =
246 size / (avg_bps ? avg_bps : bitrate ? bitrate : 0xFFFFFFFF) / 125;
247 }
248 else
249 {
250 length = 1152 * frames / (sample_rate ? sample_rate : 0xFFFFFFFF);
251 }
252 394
253 ADDR (mpeg_versions[mpeg_ver-1], EXTRACTOR_METATYPE_FORMAT_VERSION); 395 /* Only save data from valid frames in the state */
254 snprintf (format, 396 state->avg_bps += bitrate / 1000;
255 sizeof(format), 397 state->sample_rate = sample_rate;
256 "%s %s audio, %d kbps (%s), %d Hz, %s, %s, %s", 398 state->mpeg_ver = mpeg_ver;
257 mpeg_versions[mpeg_ver-1], 399 state->layer = layer;
258 layer_names[layer-1], 400 state->ch = ch;
259 avg_bps, 401 state->copyright_flag = copyright_flag;
260 vbr_flag ? _("VBR") : _("CBR"), 402 state->original_flag = original_flag;
261 sample_rate, 403 state->bitrate = bitrate;
262 channel_modes[ch],
263 copyright_flag ? _("copyright") : _("no copyright"),
264 original_flag ? _("original") : _("copy") );
265 404
266 ADDR (format, EXTRACTOR_METATYPE_RESOURCE_TYPE); 405 frames_found_in_this_round += 1;
267 snprintf (format, 406 state->number_of_valid_frames += 1;
268 sizeof (format), "%dm%02d", 407 if (state->avg_bps / state->number_of_valid_frames != bitrate / 1000)
269 length / 60, length % 60); 408 state->vbr_flag = 1;
270 ADDR (format, EXTRACTOR_METATYPE_DURATION); 409 offset += frame_size;
271 return 0; 410 state->state = MP3_LOOKING_FOR_FRAME;
411 break;
412 }
413 }
414 return 1;
272} 415}
273 416
274/* end of mp3_extractor.c */ 417/* end of mp3_extractor.c */
diff --git a/src/plugins/template_extractor.c b/src/plugins/template_extractor.c
index 63f0393..b6f3371 100644
--- a/src/plugins/template_extractor.c
+++ b/src/plugins/template_extractor.c
@@ -21,21 +21,113 @@
21#include "platform.h" 21#include "platform.h"
22#include "extractor.h" 22#include "extractor.h"
23 23
24int 24#include "extractor_plugins.h"
25EXTRACTOR_template_extract (const unsigned char *data, 25
26 size_t size, 26struct template_state
27 EXTRACTOR_MetaDataProcessor proc, 27{
28 void *proc_cls, 28 int state;
29 const char *options) 29
30 /* more state fields here
31 * all variables that should survive more than one atomic read
32 * from the "file" are to be placed here.
33 */
34};
35
36enum TemplateState
37{
38 TEMPLATE_INVALID = -1,
39 TEMPLATE_LOOKING_FOR_FOO = 0,
40 TEMPLATE_READING_FOO,
41 TEMPLATE_READING_BAR,
42 TEMPLATE_SEEKING_TO_ZOOL
43};
44
45void
46EXTRACTOR_template_init_state_method (struct EXTRACTOR_PluginList *plugin)
30{ 47{
31 if (0 != proc (proc_cls, 48 struct template_state *state;
32 "template", 49 state = plugin->state = malloc (sizeof (struct template_state));
33 EXTRACTOR_METATYPE_RESERVED, 50 if (state == NULL)
34 EXTRACTOR_METAFORMAT_UTF8, 51 return;
35 "text/plain", 52 state->state = TEMPLATE_LOOKING_FOR_FOO; /* or whatever is the initial one */
36 "foo", 53 /* initialize other fields to their "uninitialized" values or defaults */
37 strlen ("foo")+1)) 54}
55
56void
57EXTRACTOR_template_discard_state_method (struct EXTRACTOR_PluginList *plugin)
58{
59 if (plugin->state != NULL)
60 {
61 /* free other state fields that are heap-allocated */
62 free (plugin->state);
63 }
64 plugin->state = NULL;
65}
66
67int
68EXTRACTOR_template_extract_method (struct EXTRACTOR_PluginList *plugin,
69 EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
70{
71 int64_t file_position;
72 int64_t file_size;
73 size_t offset = 0;
74 size_t size;
75 unsigned char *data;
76 unsigned char *ff;
77 struct mp3_state *state;
78
79 /* temporary variables are declared here */
80
81 if (plugin == NULL || plugin->state == NULL)
38 return 1; 82 return 1;
39 /* insert more here */ 83
40 return 0; 84 /* for easier access (and conforms better with the old plugins var names) */
85 state = plugin->state;
86 file_position = plugin->position;
87 file_size = plugin->fsize;
88 size = plugin->map_size;
89 data = plugin->shm_ptr;
90
91 /* sanity checks */
92 if (plugin->seek_request < 0)
93 return 1;
94 if (file_position - plugin->seek_request > 0)
95 {
96 plugin->seek_request = -1;
97 return 1;
98 }
99 if (plugin->seek_request - file_position < size)
100 offset = plugin->seek_request - file_position;
101
102 while (1)
103 {
104 switch (state->state)
105 {
106 case TEMPLATE_INVALID:
107 plugin->seek_request = -1;
108 return 1;
109 case TEMPLATE_LOOKING_FOR_FOO:
110 /* Find FOO in data buffer.
111 * If found, set offset to its position and set state to TEMPLATE_READING_FOO
112 * If not found, set seek_request to file_position + offset and return 1
113 * (but it's better to give up as early as possible, to avoid reading the whole
114 * file byte-by-byte).
115 */
116 break;
117 case TEMPLATE_READING_FOO:
118 /* See if offset + sizeof(foo) < size, otherwise set seek_request to offset and return 1;
119 * If file_position is 0, and size is still to small, give up.
120 * Read FOO, maybe increase offset to reflect that (depends on the parser logic).
121 * Either process FOO right here, or jump to another state (see ebml plugin for an example of complex
122 * state-jumps).
123 * If FOO says you need to seek somewhere - set offset to seek_target - file_position and set the
124 * next state (next state will check that offset < size; all states that do reading should do that,
125 * and also check for EOF).
126 */
127 /* ... */
128 break;
129 }
130 }
131 /* Should not reach this */
132 return 1;
41} 133}