aboutsummaryrefslogtreecommitdiff
path: root/src/plugins/png_extractor.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/plugins/png_extractor.c')
-rw-r--r--src/plugins/png_extractor.c380
1 files changed, 380 insertions, 0 deletions
diff --git a/src/plugins/png_extractor.c b/src/plugins/png_extractor.c
new file mode 100644
index 0000000..5fb2b9d
--- /dev/null
+++ b/src/plugins/png_extractor.c
@@ -0,0 +1,380 @@
1/*
2 This file is part of libextractor.
3 (C) 2002, 2003, 2004, 2005, 2009 Vidyut Samanta and Christian Grothoff
4
5 libextractor is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 2, or (at your
8 option) any later version.
9
10 libextractor is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with libextractor; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
19 */
20
21#include "platform.h"
22#include "extractor.h"
23#include <zlib.h>
24#include "convert.h"
25
26static char *
27stndup (const char *str, size_t n)
28{
29 char *tmp;
30 tmp = malloc (n + 1);
31 tmp[n] = '\0';
32 memcpy (tmp, str, n);
33 return tmp;
34}
35
36/**
37 * strnlen is GNU specific, let's redo it here to be
38 * POSIX compliant.
39 */
40static size_t
41stnlen (const char *str, size_t maxlen)
42{
43 size_t ret;
44 ret = 0;
45 while ((ret < maxlen) && (str[ret] != '\0'))
46 ret++;
47 return ret;
48}
49
50
51static int
52getIntAt (const void *pos)
53{
54 char p[4];
55
56 memcpy (p, pos, 4); /* ensure alignment! */
57 return *(int *) &p[0];
58}
59
60
61static struct
62{
63 char *name;
64 enum EXTRACTOR_MetaType type;
65} tagmap[] =
66{
67 { "Author", EXTRACTOR_METATYPE_AUTHOR_NAME},
68 { "Description", EXTRACTOR_METATYPE_DESCRIPTION},
69 { "Comment", EXTRACTOR_METATYPE_COMMENT},
70 { "Copyright", EXTRACTOR_METATYPE_COPYRIGHT},
71 { "Source", EXTRACTOR_METATYPE_SOURCE_DEVICE },
72 { "Creation Time", EXTRACTOR_METATYPE_CREATION_DATE},
73 { "Title", EXTRACTOR_METATYPE_TITLE},
74 { "Software", EXTRACTOR_METATYPE_PRODUCED_BY_SOFTWARE},
75 { "Disclaimer", EXTRACTOR_METATYPE_DISCLAIMER},
76 { "Warning", EXTRACTOR_METATYPE_WARNING},
77 { NULL, EXTRACTOR_METATYPE_RESERVED }
78};
79
80
81#define ADD(t,s) do { if (0 != (ret = proc (proc_cls, "tar", t, EXTRACTOR_METAFORMAT_UTF8, "text/plain", s, strlen(s)+1))) goto FINISH; } while (0)
82#define ADDF(t,s) do { if (0 != (ret = proc (proc_cls, "tar", t, EXTRACTOR_METAFORMAT_UTF8, "text/plain", s, strlen(s)+1))) { free(s); goto FINISH; } free (s); } while (0)
83
84
85static int
86processtEXt (const char *data,
87 unsigned int length,
88 EXTRACTOR_MetaDataProcessor proc,
89 void *proc_cls)
90{
91 char *keyword;
92 unsigned int off;
93 int i;
94 int ret;
95
96 data += 4;
97 off = stnlen (data, length) + 1;
98 if (off >= length)
99 return 0; /* failed to find '\0' */
100 keyword = EXTRACTOR_common_convert_to_utf8 (&data[off], length - off, "ISO-8859-1");
101 i = 0;
102 ret = 0;
103 while (tagmap[i].name != NULL)
104 {
105 if (0 == strcmp (tagmap[i].name, data))
106 {
107 ADDF (tagmap[i].type, keyword);
108 return 0;
109 }
110
111 i++;
112 }
113 ADDF (EXTRACTOR_METATYPE_KEYWORDS, keyword);
114 FINISH:
115 return ret;
116}
117
118static int
119processiTXt (const char *data,
120 unsigned int length,
121 EXTRACTOR_MetaDataProcessor proc,
122 void *proc_cls)
123{
124 unsigned int pos;
125 char *keyword;
126 const char *language;
127 const char *translated;
128 int i;
129 int compressed;
130 char *buf;
131 uLongf bufLen;
132 int ret;
133 int zret;
134
135 pos = stnlen (data, length) + 1;
136 if (pos + 3 >= length)
137 return 0;
138 compressed = data[pos++];
139 if (compressed && (data[pos++] != 0))
140 return 0; /* bad compression method */
141 language = &data[pos];
142 ret = 0;
143 if (stnlen (language, length - pos) > 0)
144 ADDF (EXTRACTOR_METATYPE_DOCUMENT_LANGUAGE,
145 stndup (language, length - pos));
146 pos += stnlen (language, length - pos) + 1;
147 if (pos + 1 >= length)
148 return 0;
149 translated = &data[pos]; /* already in utf-8! */
150 if (stnlen (translated, length - pos) > 0)
151 ADDF (EXTRACTOR_METATYPE_KEYWORDS,
152 stndup (translated, length - pos));
153 pos += stnlen (translated, length - pos) + 1;
154 if (pos >= length)
155 return 0;
156
157 if (compressed)
158 {
159 bufLen = 1024 + 2 * (length - pos);
160 while (1)
161 {
162 if (bufLen * 2 < bufLen)
163 return 0;
164 bufLen *= 2;
165 if (bufLen > 50 * (length - pos))
166 {
167 /* printf("zlib problem"); */
168 return 0;
169 }
170 buf = malloc (bufLen);
171 if (buf == NULL)
172 {
173 /* printf("out of memory"); */
174 return 0; /* out of memory */
175 }
176 zret = uncompress ((Bytef *) buf,
177 &bufLen,
178 (const Bytef *) &data[pos], length - pos);
179 if (zret == Z_OK)
180 {
181 /* printf("zlib ok"); */
182 break;
183 }
184 free (buf);
185 if (zret != Z_BUF_ERROR)
186 return 0; /* unknown error, abort */
187 }
188 keyword = stndup (buf, bufLen);
189 free (buf);
190 }
191 else
192 {
193 keyword = stndup (&data[pos], length - pos);
194 }
195 i = 0;
196 while (tagmap[i].name != NULL)
197 {
198 if (0 == strcmp (tagmap[i].name, data))
199 {
200 ADDF (tagmap[i].type, keyword /* already in utf8 */);
201 return 0;
202 }
203 i++;
204 }
205 ADDF (EXTRACTOR_METATYPE_COMMENT, keyword);
206 FINISH:
207 return ret;
208}
209
210
211static int
212processIHDR (const char *data,
213 unsigned int length,
214 EXTRACTOR_MetaDataProcessor proc,
215 void *proc_cls)
216{
217 char tmp[128];
218 int ret;
219
220 if (length < 12)
221 return 0;
222 ret = 0;
223 snprintf (tmp,
224 sizeof(tmp),
225 "%ux%u",
226 htonl (getIntAt (&data[4])), htonl (getIntAt (&data[8])));
227 ADD (EXTRACTOR_METATYPE_IMAGE_DIMENSIONS, tmp);
228 FINISH:
229 return ret;
230}
231
232static int
233processzTXt (const char *data,
234 unsigned int length,
235 EXTRACTOR_MetaDataProcessor proc,
236 void *proc_cls)
237{
238 char *keyword;
239 unsigned int off;
240 int i;
241 char *buf;
242 uLongf bufLen;
243 int ret;
244 int zret;
245
246 data += 4;
247 off = stnlen (data, length) + 1;
248 if (off >= length)
249 return 0; /* failed to find '\0' */
250 if (data[off] != 0)
251 return 0; /* compression method must be 0 */
252 off++;
253
254 bufLen = 1024 + 2 * (length - off);
255 while (1)
256 {
257 if (bufLen * 2 < bufLen)
258 return 0;
259 bufLen *= 2;
260 if (bufLen > 50 * (length - off))
261 {
262 /* printf("zlib problem"); */
263 return 0;
264 }
265 buf = malloc (bufLen);
266 if (buf == NULL)
267 {
268 /* printf("out of memory"); */
269 return 0; /* out of memory */
270 }
271 zret = uncompress ((Bytef *) buf,
272 &bufLen, (const Bytef *) &data[off], length - off);
273 if (zret == Z_OK)
274 {
275 /* printf("zlib ok"); */
276 break;
277 }
278 free (buf);
279 if (zret != Z_BUF_ERROR)
280 return 0; /* unknown error, abort */
281 }
282 keyword = EXTRACTOR_common_convert_to_utf8 (buf, bufLen, "ISO-8859-1");
283 free (buf);
284 i = 0;
285 while (tagmap[i].name != NULL)
286 {
287 if (0 == strcmp (tagmap[i].name, data))
288 {
289 ADDF (tagmap[i].type, keyword);
290 return 0;
291 }
292 i++;
293 }
294 ADDF (EXTRACTOR_METATYPE_COMMENT, keyword);
295 FINISH:
296 return ret;
297}
298
299static int
300processtIME (const char *data,
301 unsigned int length,
302 EXTRACTOR_MetaDataProcessor proc,
303 void *proc_cls)
304{
305 unsigned short y;
306 unsigned int year;
307 unsigned int mo;
308 unsigned int day;
309 unsigned int h;
310 unsigned int m;
311 unsigned int s;
312 char val[256];
313 int ret;
314
315 if (length != 7)
316 return 0;
317 ret = 0;
318 memcpy (&y, &data[4], sizeof (unsigned short));
319 year = ntohs (y);
320 mo = (unsigned char) data[6];
321 day = (unsigned char) data[7];
322 h = (unsigned char) data[8];
323 m = (unsigned char) data[9];
324 s = (unsigned char) data[10];
325 snprintf (val,
326 sizeof(val),
327 "%04u-%02u-%02u %02d:%02d:%02d", year, mo, day, h, m, s);
328 ADD (EXTRACTOR_METATYPE_MODIFICATION_DATE, val);
329 FINISH:
330 return ret;
331}
332
333#define PNG_HEADER "\211PNG\r\n\032\n"
334
335
336
337int
338EXTRACTOR_png_extract (const char *data,
339 size_t size,
340 EXTRACTOR_MetaDataProcessor proc,
341 void *proc_cls,
342 const char *options)
343{
344 const char *pos;
345 const char *end;
346 unsigned int length;
347 int ret;
348
349 if (size < strlen (PNG_HEADER))
350 return 0;
351 if (0 != strncmp (data, PNG_HEADER, strlen (PNG_HEADER)))
352 return 0;
353 end = &data[size];
354 pos = &data[strlen (PNG_HEADER)];
355 ADD (EXTRACTOR_METATYPE_MIMETYPE, "image/png");
356 ret = 0;
357 while (ret == 0)
358 {
359 if (pos + 12 >= end)
360 break;
361 length = htonl (getIntAt (pos));
362 pos += 4;
363 /* printf("Length: %u, pos %u\n", length, pos - data); */
364 if ((pos + 4 + length + 4 > end) || (pos + 4 + length + 4 < pos + 8))
365 break;
366 if (0 == strncmp (pos, "IHDR", 4))
367 ret = processIHDR (pos, length, proc, proc_cls);
368 if (0 == strncmp (pos, "iTXt", 4))
369 ret = processiTXt (pos, length, proc, proc_cls);
370 if (0 == strncmp (pos, "tEXt", 4))
371 ret = processtEXt (pos, length, proc, proc_cls);
372 if (0 == strncmp (pos, "zTXt", 4))
373 ret = processzTXt (pos, length, proc, proc_cls);
374 if (0 == strncmp (pos, "tIME", 4))
375 ret = processtIME (pos, length, proc, proc_cls);
376 pos += 4 + length + 4; /* Chunk type, data, crc */
377 }
378 FINISH:
379 return ret;
380}