diff options
Diffstat (limited to 'src/plugins/png_extractor.c')
-rw-r--r-- | src/plugins/png_extractor.c | 380 |
1 files changed, 380 insertions, 0 deletions
diff --git a/src/plugins/png_extractor.c b/src/plugins/png_extractor.c new file mode 100644 index 0000000..5fb2b9d --- /dev/null +++ b/src/plugins/png_extractor.c | |||
@@ -0,0 +1,380 @@ | |||
1 | /* | ||
2 | This file is part of libextractor. | ||
3 | (C) 2002, 2003, 2004, 2005, 2009 Vidyut Samanta and Christian Grothoff | ||
4 | |||
5 | libextractor is free software; you can redistribute it and/or modify | ||
6 | it under the terms of the GNU General Public License as published | ||
7 | by the Free Software Foundation; either version 2, or (at your | ||
8 | option) any later version. | ||
9 | |||
10 | libextractor is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU General Public License | ||
16 | along with libextractor; see the file COPYING. If not, write to the | ||
17 | Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
18 | Boston, MA 02111-1307, USA. | ||
19 | */ | ||
20 | |||
21 | #include "platform.h" | ||
22 | #include "extractor.h" | ||
23 | #include <zlib.h> | ||
24 | #include "convert.h" | ||
25 | |||
26 | static char * | ||
27 | stndup (const char *str, size_t n) | ||
28 | { | ||
29 | char *tmp; | ||
30 | tmp = malloc (n + 1); | ||
31 | tmp[n] = '\0'; | ||
32 | memcpy (tmp, str, n); | ||
33 | return tmp; | ||
34 | } | ||
35 | |||
36 | /** | ||
37 | * strnlen is GNU specific, let's redo it here to be | ||
38 | * POSIX compliant. | ||
39 | */ | ||
40 | static size_t | ||
41 | stnlen (const char *str, size_t maxlen) | ||
42 | { | ||
43 | size_t ret; | ||
44 | ret = 0; | ||
45 | while ((ret < maxlen) && (str[ret] != '\0')) | ||
46 | ret++; | ||
47 | return ret; | ||
48 | } | ||
49 | |||
50 | |||
51 | static int | ||
52 | getIntAt (const void *pos) | ||
53 | { | ||
54 | char p[4]; | ||
55 | |||
56 | memcpy (p, pos, 4); /* ensure alignment! */ | ||
57 | return *(int *) &p[0]; | ||
58 | } | ||
59 | |||
60 | |||
61 | static struct | ||
62 | { | ||
63 | char *name; | ||
64 | enum EXTRACTOR_MetaType type; | ||
65 | } tagmap[] = | ||
66 | { | ||
67 | { "Author", EXTRACTOR_METATYPE_AUTHOR_NAME}, | ||
68 | { "Description", EXTRACTOR_METATYPE_DESCRIPTION}, | ||
69 | { "Comment", EXTRACTOR_METATYPE_COMMENT}, | ||
70 | { "Copyright", EXTRACTOR_METATYPE_COPYRIGHT}, | ||
71 | { "Source", EXTRACTOR_METATYPE_SOURCE_DEVICE }, | ||
72 | { "Creation Time", EXTRACTOR_METATYPE_CREATION_DATE}, | ||
73 | { "Title", EXTRACTOR_METATYPE_TITLE}, | ||
74 | { "Software", EXTRACTOR_METATYPE_PRODUCED_BY_SOFTWARE}, | ||
75 | { "Disclaimer", EXTRACTOR_METATYPE_DISCLAIMER}, | ||
76 | { "Warning", EXTRACTOR_METATYPE_WARNING}, | ||
77 | { NULL, EXTRACTOR_METATYPE_RESERVED } | ||
78 | }; | ||
79 | |||
80 | |||
81 | #define ADD(t,s) do { if (0 != (ret = proc (proc_cls, "tar", t, EXTRACTOR_METAFORMAT_UTF8, "text/plain", s, strlen(s)+1))) goto FINISH; } while (0) | ||
82 | #define ADDF(t,s) do { if (0 != (ret = proc (proc_cls, "tar", t, EXTRACTOR_METAFORMAT_UTF8, "text/plain", s, strlen(s)+1))) { free(s); goto FINISH; } free (s); } while (0) | ||
83 | |||
84 | |||
85 | static int | ||
86 | processtEXt (const char *data, | ||
87 | unsigned int length, | ||
88 | EXTRACTOR_MetaDataProcessor proc, | ||
89 | void *proc_cls) | ||
90 | { | ||
91 | char *keyword; | ||
92 | unsigned int off; | ||
93 | int i; | ||
94 | int ret; | ||
95 | |||
96 | data += 4; | ||
97 | off = stnlen (data, length) + 1; | ||
98 | if (off >= length) | ||
99 | return 0; /* failed to find '\0' */ | ||
100 | keyword = EXTRACTOR_common_convert_to_utf8 (&data[off], length - off, "ISO-8859-1"); | ||
101 | i = 0; | ||
102 | ret = 0; | ||
103 | while (tagmap[i].name != NULL) | ||
104 | { | ||
105 | if (0 == strcmp (tagmap[i].name, data)) | ||
106 | { | ||
107 | ADDF (tagmap[i].type, keyword); | ||
108 | return 0; | ||
109 | } | ||
110 | |||
111 | i++; | ||
112 | } | ||
113 | ADDF (EXTRACTOR_METATYPE_KEYWORDS, keyword); | ||
114 | FINISH: | ||
115 | return ret; | ||
116 | } | ||
117 | |||
118 | static int | ||
119 | processiTXt (const char *data, | ||
120 | unsigned int length, | ||
121 | EXTRACTOR_MetaDataProcessor proc, | ||
122 | void *proc_cls) | ||
123 | { | ||
124 | unsigned int pos; | ||
125 | char *keyword; | ||
126 | const char *language; | ||
127 | const char *translated; | ||
128 | int i; | ||
129 | int compressed; | ||
130 | char *buf; | ||
131 | uLongf bufLen; | ||
132 | int ret; | ||
133 | int zret; | ||
134 | |||
135 | pos = stnlen (data, length) + 1; | ||
136 | if (pos + 3 >= length) | ||
137 | return 0; | ||
138 | compressed = data[pos++]; | ||
139 | if (compressed && (data[pos++] != 0)) | ||
140 | return 0; /* bad compression method */ | ||
141 | language = &data[pos]; | ||
142 | ret = 0; | ||
143 | if (stnlen (language, length - pos) > 0) | ||
144 | ADDF (EXTRACTOR_METATYPE_DOCUMENT_LANGUAGE, | ||
145 | stndup (language, length - pos)); | ||
146 | pos += stnlen (language, length - pos) + 1; | ||
147 | if (pos + 1 >= length) | ||
148 | return 0; | ||
149 | translated = &data[pos]; /* already in utf-8! */ | ||
150 | if (stnlen (translated, length - pos) > 0) | ||
151 | ADDF (EXTRACTOR_METATYPE_KEYWORDS, | ||
152 | stndup (translated, length - pos)); | ||
153 | pos += stnlen (translated, length - pos) + 1; | ||
154 | if (pos >= length) | ||
155 | return 0; | ||
156 | |||
157 | if (compressed) | ||
158 | { | ||
159 | bufLen = 1024 + 2 * (length - pos); | ||
160 | while (1) | ||
161 | { | ||
162 | if (bufLen * 2 < bufLen) | ||
163 | return 0; | ||
164 | bufLen *= 2; | ||
165 | if (bufLen > 50 * (length - pos)) | ||
166 | { | ||
167 | /* printf("zlib problem"); */ | ||
168 | return 0; | ||
169 | } | ||
170 | buf = malloc (bufLen); | ||
171 | if (buf == NULL) | ||
172 | { | ||
173 | /* printf("out of memory"); */ | ||
174 | return 0; /* out of memory */ | ||
175 | } | ||
176 | zret = uncompress ((Bytef *) buf, | ||
177 | &bufLen, | ||
178 | (const Bytef *) &data[pos], length - pos); | ||
179 | if (zret == Z_OK) | ||
180 | { | ||
181 | /* printf("zlib ok"); */ | ||
182 | break; | ||
183 | } | ||
184 | free (buf); | ||
185 | if (zret != Z_BUF_ERROR) | ||
186 | return 0; /* unknown error, abort */ | ||
187 | } | ||
188 | keyword = stndup (buf, bufLen); | ||
189 | free (buf); | ||
190 | } | ||
191 | else | ||
192 | { | ||
193 | keyword = stndup (&data[pos], length - pos); | ||
194 | } | ||
195 | i = 0; | ||
196 | while (tagmap[i].name != NULL) | ||
197 | { | ||
198 | if (0 == strcmp (tagmap[i].name, data)) | ||
199 | { | ||
200 | ADDF (tagmap[i].type, keyword /* already in utf8 */); | ||
201 | return 0; | ||
202 | } | ||
203 | i++; | ||
204 | } | ||
205 | ADDF (EXTRACTOR_METATYPE_COMMENT, keyword); | ||
206 | FINISH: | ||
207 | return ret; | ||
208 | } | ||
209 | |||
210 | |||
211 | static int | ||
212 | processIHDR (const char *data, | ||
213 | unsigned int length, | ||
214 | EXTRACTOR_MetaDataProcessor proc, | ||
215 | void *proc_cls) | ||
216 | { | ||
217 | char tmp[128]; | ||
218 | int ret; | ||
219 | |||
220 | if (length < 12) | ||
221 | return 0; | ||
222 | ret = 0; | ||
223 | snprintf (tmp, | ||
224 | sizeof(tmp), | ||
225 | "%ux%u", | ||
226 | htonl (getIntAt (&data[4])), htonl (getIntAt (&data[8]))); | ||
227 | ADD (EXTRACTOR_METATYPE_IMAGE_DIMENSIONS, tmp); | ||
228 | FINISH: | ||
229 | return ret; | ||
230 | } | ||
231 | |||
232 | static int | ||
233 | processzTXt (const char *data, | ||
234 | unsigned int length, | ||
235 | EXTRACTOR_MetaDataProcessor proc, | ||
236 | void *proc_cls) | ||
237 | { | ||
238 | char *keyword; | ||
239 | unsigned int off; | ||
240 | int i; | ||
241 | char *buf; | ||
242 | uLongf bufLen; | ||
243 | int ret; | ||
244 | int zret; | ||
245 | |||
246 | data += 4; | ||
247 | off = stnlen (data, length) + 1; | ||
248 | if (off >= length) | ||
249 | return 0; /* failed to find '\0' */ | ||
250 | if (data[off] != 0) | ||
251 | return 0; /* compression method must be 0 */ | ||
252 | off++; | ||
253 | |||
254 | bufLen = 1024 + 2 * (length - off); | ||
255 | while (1) | ||
256 | { | ||
257 | if (bufLen * 2 < bufLen) | ||
258 | return 0; | ||
259 | bufLen *= 2; | ||
260 | if (bufLen > 50 * (length - off)) | ||
261 | { | ||
262 | /* printf("zlib problem"); */ | ||
263 | return 0; | ||
264 | } | ||
265 | buf = malloc (bufLen); | ||
266 | if (buf == NULL) | ||
267 | { | ||
268 | /* printf("out of memory"); */ | ||
269 | return 0; /* out of memory */ | ||
270 | } | ||
271 | zret = uncompress ((Bytef *) buf, | ||
272 | &bufLen, (const Bytef *) &data[off], length - off); | ||
273 | if (zret == Z_OK) | ||
274 | { | ||
275 | /* printf("zlib ok"); */ | ||
276 | break; | ||
277 | } | ||
278 | free (buf); | ||
279 | if (zret != Z_BUF_ERROR) | ||
280 | return 0; /* unknown error, abort */ | ||
281 | } | ||
282 | keyword = EXTRACTOR_common_convert_to_utf8 (buf, bufLen, "ISO-8859-1"); | ||
283 | free (buf); | ||
284 | i = 0; | ||
285 | while (tagmap[i].name != NULL) | ||
286 | { | ||
287 | if (0 == strcmp (tagmap[i].name, data)) | ||
288 | { | ||
289 | ADDF (tagmap[i].type, keyword); | ||
290 | return 0; | ||
291 | } | ||
292 | i++; | ||
293 | } | ||
294 | ADDF (EXTRACTOR_METATYPE_COMMENT, keyword); | ||
295 | FINISH: | ||
296 | return ret; | ||
297 | } | ||
298 | |||
299 | static int | ||
300 | processtIME (const char *data, | ||
301 | unsigned int length, | ||
302 | EXTRACTOR_MetaDataProcessor proc, | ||
303 | void *proc_cls) | ||
304 | { | ||
305 | unsigned short y; | ||
306 | unsigned int year; | ||
307 | unsigned int mo; | ||
308 | unsigned int day; | ||
309 | unsigned int h; | ||
310 | unsigned int m; | ||
311 | unsigned int s; | ||
312 | char val[256]; | ||
313 | int ret; | ||
314 | |||
315 | if (length != 7) | ||
316 | return 0; | ||
317 | ret = 0; | ||
318 | memcpy (&y, &data[4], sizeof (unsigned short)); | ||
319 | year = ntohs (y); | ||
320 | mo = (unsigned char) data[6]; | ||
321 | day = (unsigned char) data[7]; | ||
322 | h = (unsigned char) data[8]; | ||
323 | m = (unsigned char) data[9]; | ||
324 | s = (unsigned char) data[10]; | ||
325 | snprintf (val, | ||
326 | sizeof(val), | ||
327 | "%04u-%02u-%02u %02d:%02d:%02d", year, mo, day, h, m, s); | ||
328 | ADD (EXTRACTOR_METATYPE_MODIFICATION_DATE, val); | ||
329 | FINISH: | ||
330 | return ret; | ||
331 | } | ||
332 | |||
333 | #define PNG_HEADER "\211PNG\r\n\032\n" | ||
334 | |||
335 | |||
336 | |||
337 | int | ||
338 | EXTRACTOR_png_extract (const char *data, | ||
339 | size_t size, | ||
340 | EXTRACTOR_MetaDataProcessor proc, | ||
341 | void *proc_cls, | ||
342 | const char *options) | ||
343 | { | ||
344 | const char *pos; | ||
345 | const char *end; | ||
346 | unsigned int length; | ||
347 | int ret; | ||
348 | |||
349 | if (size < strlen (PNG_HEADER)) | ||
350 | return 0; | ||
351 | if (0 != strncmp (data, PNG_HEADER, strlen (PNG_HEADER))) | ||
352 | return 0; | ||
353 | end = &data[size]; | ||
354 | pos = &data[strlen (PNG_HEADER)]; | ||
355 | ADD (EXTRACTOR_METATYPE_MIMETYPE, "image/png"); | ||
356 | ret = 0; | ||
357 | while (ret == 0) | ||
358 | { | ||
359 | if (pos + 12 >= end) | ||
360 | break; | ||
361 | length = htonl (getIntAt (pos)); | ||
362 | pos += 4; | ||
363 | /* printf("Length: %u, pos %u\n", length, pos - data); */ | ||
364 | if ((pos + 4 + length + 4 > end) || (pos + 4 + length + 4 < pos + 8)) | ||
365 | break; | ||
366 | if (0 == strncmp (pos, "IHDR", 4)) | ||
367 | ret = processIHDR (pos, length, proc, proc_cls); | ||
368 | if (0 == strncmp (pos, "iTXt", 4)) | ||
369 | ret = processiTXt (pos, length, proc, proc_cls); | ||
370 | if (0 == strncmp (pos, "tEXt", 4)) | ||
371 | ret = processtEXt (pos, length, proc, proc_cls); | ||
372 | if (0 == strncmp (pos, "zTXt", 4)) | ||
373 | ret = processzTXt (pos, length, proc, proc_cls); | ||
374 | if (0 == strncmp (pos, "tIME", 4)) | ||
375 | ret = processtIME (pos, length, proc, proc_cls); | ||
376 | pos += 4 + length + 4; /* Chunk type, data, crc */ | ||
377 | } | ||
378 | FINISH: | ||
379 | return ret; | ||
380 | } | ||