aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristian Grothoff <christian@grothoff.org>2009-12-18 17:37:16 +0000
committerChristian Grothoff <christian@grothoff.org>2009-12-18 17:37:16 +0000
commit7cd6af4cdfc3b300fe95697883b630bceea2db11 (patch)
tree6477db02504d68f601242805d84fe9163eb73d30
parent76d18069b498088a04d8cc2a1d709a39074b47a7 (diff)
downloadlibextractor-7cd6af4cdfc3b300fe95697883b630bceea2db11.tar.gz
libextractor-7cd6af4cdfc3b300fe95697883b630bceea2db11.zip
real
-rw-r--r--src/plugins/Makefile.am12
-rw-r--r--src/plugins/flv_extractor.c2
-rw-r--r--src/plugins/real_extractor.c (renamed from src/plugins/realextractor.c)264
3 files changed, 169 insertions, 109 deletions
diff --git a/src/plugins/Makefile.am b/src/plugins/Makefile.am
index a7baedf..2898f97 100644
--- a/src/plugins/Makefile.am
+++ b/src/plugins/Makefile.am
@@ -86,6 +86,7 @@ plugin_LTLIBRARIES = \
86 $(ogg) \ 86 $(ogg) \
87 $(ole2) \ 87 $(ole2) \
88 $(pdf) \ 88 $(pdf) \
89 libextractor_real.la \
89 $(rpm) \ 90 $(rpm) \
90 $(thumbgtk) 91 $(thumbgtk)
91 92
@@ -222,6 +223,11 @@ libextractor_pdf_la_LIBADD = \
222 $(top_builddir)/src/common/libextractor_common.la \ 223 $(top_builddir)/src/common/libextractor_common.la \
223 -lpoppler 224 -lpoppler
224 225
226libextractor_real_la_SOURCES = \
227 real_extractor.c
228libextractor_real_la_LDFLAGS = \
229 $(PLUGINFLAGS)
230
225libextractor_rpm_la_SOURCES = \ 231libextractor_rpm_la_SOURCES = \
226 rpm_extractor.c 232 rpm_extractor.c
227libextractor_rpm_la_LDFLAGS = \ 233libextractor_rpm_la_LDFLAGS = \
@@ -256,7 +262,6 @@ OLD_LIBS = \
256 libextractor_png.la \ 262 libextractor_png.la \
257 libextractor_ps.la \ 263 libextractor_ps.la \
258 $(extraqt) \ 264 $(extraqt) \
259 libextractor_real.la \
260 libextractor_riff.la \ 265 libextractor_riff.la \
261 libextractor_s3m.la \ 266 libextractor_s3m.la \
262 libextractor_sid.la \ 267 libextractor_sid.la \
@@ -332,11 +337,6 @@ libextractor_zip_la_LDFLAGS = \
332libextractor_zip_la_LIBADD = \ 337libextractor_zip_la_LIBADD = \
333 $(top_builddir)/src/main/libextractor.la 338 $(top_builddir)/src/main/libextractor.la
334 339
335libextractor_real_la_SOURCES = \
336 realextractor.c
337libextractor_real_la_LDFLAGS = \
338 $(PLUGINFLAGS)
339
340libextractor_mpeg_la_SOURCES = \ 340libextractor_mpeg_la_SOURCES = \
341 mpegextractor.c 341 mpegextractor.c
342libextractor_mpeg_la_LDFLAGS = \ 342libextractor_mpeg_la_LDFLAGS = \
diff --git a/src/plugins/flv_extractor.c b/src/plugins/flv_extractor.c
index 556735a..74ea283 100644
--- a/src/plugins/flv_extractor.c
+++ b/src/plugins/flv_extractor.c
@@ -1,6 +1,6 @@
1/* 1/*
2 This file is part of libextractor. 2 This file is part of libextractor.
3 Copyright (C) 2007 Heikki Lindholm 3 Copyright (C) 2007, 2009 Heikki Lindholm
4 4
5 libextractor is free software; you can redistribute it and/or modify 5 libextractor is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published 6 it under the terms of the GNU General Public License as published
diff --git a/src/plugins/realextractor.c b/src/plugins/real_extractor.c
index 2c56d81..c6089af 100644
--- a/src/plugins/realextractor.c
+++ b/src/plugins/real_extractor.c
@@ -1,6 +1,6 @@
1/* 1/*
2 This file is part of libextractor. 2 This file is part of libextractor.
3 (C) 2002, 2003 Vidyut Samanta and Christian Grothoff 3 (C) 2002, 2003, 2009 Vidyut Samanta and Christian Grothoff
4 4
5 libextractor is free software; you can redistribute it and/or modify 5 libextractor is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published 6 it under the terms of the GNU General Public License as published
@@ -20,10 +20,11 @@
20 20
21#include "platform.h" 21#include "platform.h"
22#include "extractor.h" 22#include "extractor.h"
23#include <stdint.h>
23 24
24#define UINT32 unsigned int 25#define UINT32 uint32_t
25#define UINT16 unsigned short 26#define UINT16 uint16_t
26#define UINT8 unsigned char 27#define UINT8 uint8_t
27 28
28typedef struct 29typedef struct
29{ 30{
@@ -74,57 +75,51 @@ typedef struct
74 75
75#define RAFF4_HEADER 0x2E7261FD 76#define RAFF4_HEADER 0x2E7261FD
76 77
77static struct EXTRACTOR_Keywords *
78addKeyword (EXTRACTOR_KeywordType type,
79 char *keyword, struct EXTRACTOR_Keywords *next)
80{
81 EXTRACTOR_KeywordList *result;
82
83 if (keyword == NULL)
84 return next;
85 result = malloc (sizeof (EXTRACTOR_KeywordList));
86 result->next = next;
87 result->keyword = keyword;
88 result->keywordType = type;
89 return result;
90}
91 78
92static struct EXTRACTOR_Keywords * 79static int
93processMediaProperties (const Media_Properties * prop, 80processMediaProperties (const Media_Properties * prop,
94 struct EXTRACTOR_Keywords *prev) 81 EXTRACTOR_MetaDataProcessor proc,
82 void *proc_cls)
95{ 83{
96 84
97 UINT8 mime_type_size; 85 UINT8 mime_type_size;
98 UINT32 prop_size; 86 UINT32 prop_size;
99 char *data;
100 87
101 prop_size = ntohl (prop->size); 88 prop_size = ntohl (prop->size);
102 if (prop_size <= sizeof (Media_Properties)) 89 if (prop_size <= sizeof (Media_Properties))
103 return prev; 90 return 0;
104 if (0 != prop->object_version) 91 if (0 != prop->object_version)
105 return prev; 92 return 0;
106 if (prop_size <= prop->stream_name_size + sizeof (UINT8) 93 if (prop_size <= prop->stream_name_size + sizeof (UINT8)
107 + sizeof (Media_Properties)) 94 + sizeof (Media_Properties))
108 return prev; 95 return 0;
109 96
110 mime_type_size = prop->data[prop->stream_name_size]; 97 mime_type_size = prop->data[prop->stream_name_size];
111 if (prop_size <= prop->stream_name_size + sizeof (UINT8) + 98 if (mime_type_size > 2048)
99 return 0; /* unrealistic */
100 if (prop_size > prop->stream_name_size + sizeof (UINT8) +
112 +mime_type_size + sizeof (Media_Properties)) 101 +mime_type_size + sizeof (Media_Properties))
113 return prev; 102 {
114 103 char data[mime_type_size + 1];
115 data = malloc (mime_type_size + 1); 104 memcpy (data, &prop->data[prop->stream_name_size + 1], mime_type_size);
116 memcpy (data, &prop->data[prop->stream_name_size + 1], mime_type_size); 105 data[mime_type_size] = '\0';
117 data[mime_type_size] = '\0'; 106
118 107 return proc (proc_cls,
119 return addKeyword (EXTRACTOR_MIMETYPE, data, prev); 108 "real",
109 EXTRACTOR_METATYPE_MIMETYPE,
110 EXTRACTOR_METAFORMAT_UTF8,
111 "text/plain",
112 data,
113 strlen (data));
114 }
115 return 0;
120} 116}
121 117
122static struct EXTRACTOR_Keywords * 118static int
123processContentDescription (const Content_Description * prop, 119processContentDescription (const Content_Description * prop,
124 struct EXTRACTOR_Keywords *prev) 120 EXTRACTOR_MetaDataProcessor proc,
121 void *proc_cls)
125{ 122{
126
127
128 UINT16 author_len; 123 UINT16 author_len;
129 UINT16 copyright_len; 124 UINT16 copyright_len;
130 UINT16 comment_len; 125 UINT16 comment_len;
@@ -134,22 +129,20 @@ processContentDescription (const Content_Description * prop,
134 char *copyright; 129 char *copyright;
135 char *comment; 130 char *comment;
136 UINT32 prop_size; 131 UINT32 prop_size;
132 int ret;
137 133
138 prop_size = ntohl (prop->size); 134 prop_size = ntohl (prop->size);
139 if (prop_size <= sizeof (Content_Description)) 135 if (prop_size <= sizeof (Content_Description))
140 return prev; 136 return 0;
141 if (0 != prop->object_version) 137 if (0 != prop->object_version)
142 return prev; 138 return 0;
143 title_len = ntohs (prop->title_len); 139 title_len = ntohs (prop->title_len);
144 if (prop_size <= title_len + sizeof (UINT16) + sizeof (Content_Description)) 140 if (prop_size <= title_len + sizeof (UINT16) + sizeof (Content_Description))
145 return prev; 141 return 0;
146
147
148 author_len = ntohs (*(UINT16 *) & prop->data[title_len]); 142 author_len = ntohs (*(UINT16 *) & prop->data[title_len]);
149
150 if (prop_size <= title_len + sizeof (UINT16) 143 if (prop_size <= title_len + sizeof (UINT16)
151 + author_len + sizeof (Content_Description)) 144 + author_len + sizeof (Content_Description))
152 return prev; 145 return 0;
153 146
154 copyright_len = ntohs (*(UINT16 *) & prop->data[title_len + 147 copyright_len = ntohs (*(UINT16 *) & prop->data[title_len +
155 author_len + 148 author_len +
@@ -157,7 +150,7 @@ processContentDescription (const Content_Description * prop,
157 150
158 if (prop_size <= title_len + 2 * sizeof (UINT16) 151 if (prop_size <= title_len + 2 * sizeof (UINT16)
159 + author_len + copyright_len + sizeof (Content_Description)) 152 + author_len + copyright_len + sizeof (Content_Description))
160 return prev; 153 return 0;
161 154
162 comment_len = ntohs (*(UINT16 *) & prop->data[title_len + 155 comment_len = ntohs (*(UINT16 *) & prop->data[title_len +
163 author_len + 156 author_len +
@@ -167,39 +160,68 @@ processContentDescription (const Content_Description * prop,
167 if (prop_size < title_len + 3 * sizeof (UINT16) 160 if (prop_size < title_len + 3 * sizeof (UINT16)
168 + author_len + copyright_len + comment_len 161 + author_len + copyright_len + comment_len
169 + sizeof (Content_Description)) 162 + sizeof (Content_Description))
170 return prev; 163 return 0;
171 164
172 title = malloc (title_len + 1); 165 title = malloc (title_len + 1);
173 memcpy (title, &prop->data[0], title_len); 166 memcpy (title, &prop->data[0], title_len);
174 title[title_len] = '\0'; 167 title[title_len] = '\0';
175 168 ret = proc (proc_cls,
176 prev = addKeyword (EXTRACTOR_TITLE, title, prev); 169 "real",
170 EXTRACTOR_METATYPE_TITLE,
171 EXTRACTOR_METAFORMAT_UTF8,
172 "text/plain",
173 title,
174 strlen (title)+1);
175 free (title);
176 if (ret != 0)
177 return ret;
177 178
178 author = malloc (author_len + 1); 179 author = malloc (author_len + 1);
179 memcpy (author, &prop->data[title_len + sizeof (UINT16)], author_len); 180 memcpy (author, &prop->data[title_len + sizeof (UINT16)], author_len);
180 author[author_len] = '\0'; 181 author[author_len] = '\0';
181 182 ret = proc (proc_cls,
182 prev = addKeyword (EXTRACTOR_AUTHOR, author, prev); 183 "real",
184 EXTRACTOR_METATYPE_AUTHOR_NAME,
185 EXTRACTOR_METAFORMAT_UTF8,
186 "text/plain",
187 author,
188 strlen (author)+1);
189 free (author);
190 if (ret != 0)
191 return ret;
183 192
184 copyright = malloc (copyright_len + 1); 193 copyright = malloc (copyright_len + 1);
185 memcpy (copyright, 194 memcpy (copyright,
186 &prop->data[title_len + sizeof (UINT16) * 2 + author_len], 195 &prop->data[title_len + sizeof (UINT16) * 2 + author_len],
187 copyright_len); 196 copyright_len);
188 copyright[copyright_len] = '\0'; 197 copyright[copyright_len] = '\0';
189 198 ret = proc (proc_cls,
190 199 "real",
191 prev = addKeyword (EXTRACTOR_COPYRIGHT, copyright, prev); 200 EXTRACTOR_METATYPE_COPYRIGHT,
192 201 EXTRACTOR_METAFORMAT_UTF8,
202 "text/plain",
203 copyright,
204 strlen (copyright)+1);
205 free (copyright);
206 if (ret != 0)
207 return ret;
193 208
194 comment = malloc (comment_len + 1); 209 comment = malloc (comment_len + 1);
195 memcpy (comment, 210 memcpy (comment,
196 &prop->data[title_len + sizeof (UINT16) * 3 + author_len + 211 &prop->data[title_len + sizeof (UINT16) * 3 + author_len +
197 copyright_len], comment_len); 212 copyright_len], comment_len);
198 comment[comment_len] = '\0'; 213 comment[comment_len] = '\0';
199 214 ret = proc (proc_cls,
200 prev = addKeyword (EXTRACTOR_COMMENT, comment, prev); 215 "real",
201 216 EXTRACTOR_METATYPE_COMMENT,
202 return prev; 217 EXTRACTOR_METAFORMAT_UTF8,
218 "text/plain",
219 comment,
220 strlen (comment)+1);
221 free (comment);
222 if (ret != 0)
223 return ret;
224 return 0;
203} 225}
204 226
205typedef struct RAFF4_header 227typedef struct RAFF4_header
@@ -247,76 +269,112 @@ stndup (const char *str, size_t n)
247} 269}
248 270
249/* audio/vnd.rn-realaudio */ 271/* audio/vnd.rn-realaudio */
250struct EXTRACTOR_Keywords * 272int
251libextractor_real_extract (unsigned char *filename, 273EXTRACTOR_real_extract (const unsigned char *data,
252 const unsigned char *data, 274 size_t size,
253 size_t size, struct EXTRACTOR_Keywords *prev) 275 EXTRACTOR_MetaDataProcessor proc,
276 void *proc_cls,
277 const char *options)
254{ 278{
255 const unsigned char *pos; 279 const unsigned char *pos;
256 const unsigned char *end; 280 const unsigned char *end;
257 struct EXTRACTOR_Keywords *result;
258 unsigned int length; 281 unsigned int length;
259 const RAFF4_header *hdr; 282 const RAFF4_header *hdr;
260 unsigned char tlen; 283 unsigned char tlen;
261 unsigned char alen; 284 unsigned char alen;
262 unsigned char clen; 285 unsigned char clen;
263 unsigned char aplen; 286 unsigned char aplen;
287 char *x;
288 int ret;
264 289
265 if (size <= 2 * sizeof (int)) 290 if (size <= 2 * sizeof (int))
266 return prev; 291 return 0;
267
268 if (RAFF4_HEADER == ntohl (*(int *) data)) 292 if (RAFF4_HEADER == ntohl (*(int *) data))
269 { 293 {
270 /* HELIX */ 294 /* HELIX */
271 if (size <= RAFF4_HDR_SIZE + 16 + 4) 295 if (size <= RAFF4_HDR_SIZE + 16 + 4)
272 return prev; 296 return 0;
273 prev = addKeyword (EXTRACTOR_MIMETYPE, 297 if (0 != proc (proc_cls,
274 strdup ("audio/vnd.rn-realaudio"), prev); 298 "real",
299 EXTRACTOR_METATYPE_MIMETYPE,
300 EXTRACTOR_METAFORMAT_UTF8,
301 "text/plain",
302 "audio/vnd.rn-realaudio",
303 strlen ("audio/vnd.rn-realaudio")+1))
304 return 1;
275 hdr = (const RAFF4_header *) &data[16]; 305 hdr = (const RAFF4_header *) &data[16];
276 if (ntohs (hdr->header_length) + 16 > size) 306 if (ntohs (hdr->header_length) + 16 > size)
277 return prev; 307 return 0;
278 tlen = data[16 + RAFF4_HDR_SIZE]; 308 tlen = data[16 + RAFF4_HDR_SIZE];
279 if (tlen + RAFF4_HDR_SIZE + 20 > size) 309 if (tlen + RAFF4_HDR_SIZE + 20 > size)
280 return prev; 310 return 0;
281 alen = data[17 + tlen + RAFF4_HDR_SIZE]; 311 alen = data[17 + tlen + RAFF4_HDR_SIZE];
282 if (tlen + alen + RAFF4_HDR_SIZE + 20 > size) 312 if (tlen + alen + RAFF4_HDR_SIZE + 20 > size)
283 return prev; 313 return 0;
284 clen = data[18 + tlen + alen + RAFF4_HDR_SIZE]; 314 clen = data[18 + tlen + alen + RAFF4_HDR_SIZE];
285 if (tlen + alen + clen + RAFF4_HDR_SIZE + 20 > size) 315 if (tlen + alen + clen + RAFF4_HDR_SIZE + 20 > size)
286 return prev; 316 return 0;
287 aplen = data[19 + tlen + clen + alen + RAFF4_HDR_SIZE]; 317 aplen = data[19 + tlen + clen + alen + RAFF4_HDR_SIZE];
288 if (tlen + alen + clen + aplen + RAFF4_HDR_SIZE + 20 > size) 318 if (tlen + alen + clen + aplen + RAFF4_HDR_SIZE + 20 > size)
289 return prev; 319 return 0;
290 320 ret = 0;
291 if (tlen > 0) 321 if ( (tlen > 0) && (ret == 0) )
292 prev = addKeyword (EXTRACTOR_TITLE, 322 {
293 stndup ((const char *) &data[17 + RAFF4_HDR_SIZE], 323 x = stndup ((const char *) &data[17 + RAFF4_HDR_SIZE], tlen);
294 tlen), prev); 324 ret = proc (proc_cls,
295 if (alen > 0) 325 "real",
296 prev = addKeyword (EXTRACTOR_AUTHOR, 326 EXTRACTOR_METATYPE_MIMETYPE,
297 stndup ((const char *) 327 EXTRACTOR_METAFORMAT_UTF8,
298 &data[18 + RAFF4_HDR_SIZE + tlen], alen), 328 "text/plain",
299 prev); 329 x,
300 if (clen > 0) 330 strlen (x)+1);
301 prev = addKeyword (EXTRACTOR_COPYRIGHT, 331 free (x);
302 stndup ((const char *) 332 }
303 &data[19 + RAFF4_HDR_SIZE + tlen + alen], 333 if ( (alen > 0) && (ret == 0) )
304 clen), prev); 334 {
305 if (aplen > 0) 335 x = stndup ((const char *) &data[18 + RAFF4_HDR_SIZE + tlen], alen);
306 prev = addKeyword (EXTRACTOR_SOFTWARE, 336 ret = proc (proc_cls,
307 stndup ((const char *) 337 "real",
308 &data[20 + RAFF4_HDR_SIZE + tlen + alen + 338 EXTRACTOR_METATYPE_MIMETYPE,
309 clen], aplen), prev); 339 EXTRACTOR_METAFORMAT_UTF8,
310 return prev; 340 "text/plain",
311 341 x,
342 strlen (x)+1);
343 free (x);
344 }
345 if ( (clen > 0) && (ret == 0) )
346 {
347 x = stndup ((const char *) &data[19 + RAFF4_HDR_SIZE + tlen + alen], clen);
348 ret = proc (proc_cls,
349 "real",
350 EXTRACTOR_METATYPE_MIMETYPE,
351 EXTRACTOR_METAFORMAT_UTF8,
352 "text/plain",
353 x,
354 strlen (x)+1);
355 free (x);
356 }
357 if ( (aplen > 0) && (ret == 0) )
358 {
359 x = stndup ((const char *) &data[20 + RAFF4_HDR_SIZE + tlen + alen + clen], aplen);
360 ret = proc (proc_cls,
361 "real",
362 EXTRACTOR_METATYPE_MIMETYPE,
363 EXTRACTOR_METAFORMAT_UTF8,
364 "text/plain",
365 x,
366 strlen (x)+1);
367 free (x);
368 }
369 return ret;
312 } 370 }
313 if (REAL_HEADER == ntohl (*(int *) data)) 371 if (REAL_HEADER == ntohl (*(int *) data))
314 { 372 {
315 /* old real */ 373 /* old real */
316 result = prev;
317 end = &data[size]; 374 end = &data[size];
318 pos = &data[0]; 375 pos = &data[0];
319 while (1) 376 ret = 0;
377 while (0 == ret)
320 { 378 {
321 if ((pos + 8 >= end) || (pos + 8 < pos)) 379 if ((pos + 8 >= end) || (pos + 8 < pos))
322 break; 380 break;
@@ -328,13 +386,15 @@ libextractor_real_extract (unsigned char *filename,
328 switch (ntohl (*((unsigned int *) pos))) 386 switch (ntohl (*((unsigned int *) pos)))
329 { 387 {
330 case MDPR_HEADER: 388 case MDPR_HEADER:
331 result = processMediaProperties ((Media_Properties *) pos, 389 ret = processMediaProperties ((Media_Properties *) pos,
332 result); 390 proc,
391 proc_cls);
333 pos += length; 392 pos += length;
334 break; 393 break;
335 case CONT_HEADER: 394 case CONT_HEADER:
336 result = processContentDescription ((Content_Description *) pos, 395 ret = processContentDescription ((Content_Description *) pos,
337 result); 396 proc,
397 proc_cls);
338 pos += length; 398 pos += length;
339 break; 399 break;
340 case REAL_HEADER: /* treat like default */ 400 case REAL_HEADER: /* treat like default */
@@ -343,7 +403,7 @@ libextractor_real_extract (unsigned char *filename,
343 break; 403 break;
344 } 404 }
345 } 405 }
346 return result; 406 return ret;
347 } 407 }
348 return prev; 408 return 0;
349} 409}