diff options
Diffstat (limited to 'src/plugins/jpeg_extractor.c')
-rw-r--r-- | src/plugins/jpeg_extractor.c | 353 |
1 files changed, 123 insertions, 230 deletions
diff --git a/src/plugins/jpeg_extractor.c b/src/plugins/jpeg_extractor.c index a0722d4..2ef0677 100644 --- a/src/plugins/jpeg_extractor.c +++ b/src/plugins/jpeg_extractor.c | |||
@@ -1,10 +1,10 @@ | |||
1 | /* | 1 | /* |
2 | This file is part of libextractor. | 2 | This file is part of libextractor. |
3 | (C) 2002, 2003, 2004 Vidyut Samanta and Christian Grothoff | 3 | (C) 2002, 2003, 2004, 2012 Vidyut Samanta and Christian Grothoff |
4 | 4 | ||
5 | libextractor is free software; you can redistribute it and/or modify | 5 | libextractor is free software; you can redistribute it and/or modify |
6 | it under the terms of the GNU General Public License as published | 6 | it under the terms of the GNU General Public License as published |
7 | by the Free Software Foundation; either version 2, or (at your | 7 | by the Free Software Foundation; either version 3, or (at your |
8 | option) any later version. | 8 | option) any later version. |
9 | 9 | ||
10 | libextractor is distributed in the hope that it will be useful, but | 10 | libextractor is distributed in the hope that it will be useful, but |
@@ -17,261 +17,154 @@ | |||
17 | Free Software Foundation, Inc., 59 Temple Place - Suite 330, | 17 | Free Software Foundation, Inc., 59 Temple Place - Suite 330, |
18 | Boston, MA 02111-1307, USA. | 18 | Boston, MA 02111-1307, USA. |
19 | */ | 19 | */ |
20 | 20 | /** | |
21 | * @file plugins/jpeg_extractor.c | ||
22 | * @brief plugin to support JPEG files | ||
23 | * @author Christian Grothoff | ||
24 | */ | ||
21 | #include "platform.h" | 25 | #include "platform.h" |
22 | #include "extractor.h" | 26 | #include "extractor.h" |
27 | #include <jpeglib.h> | ||
28 | #include <setjmp.h> | ||
23 | 29 | ||
24 | 30 | ||
25 | #define M_SOI 0xD8 /* Start Of Image (beginning of datastream) */ | ||
26 | #define M_EOI 0xD9 /* End Of Image (end of datastream) */ | ||
27 | #define M_SOS 0xDA /* Start Of Scan (begins compressed data) */ | ||
28 | #define M_APP12 0xEC | ||
29 | #define M_COM 0xFE /* COMment */ | ||
30 | #define M_APP0 0xE0 | ||
31 | |||
32 | /** | 31 | /** |
33 | * Get the next character in the sequence and advance | 32 | * Context for custom functions. |
34 | * the pointer *data to the next location in the sequence. | ||
35 | * If we're at the end, return -1. | ||
36 | */ | 33 | */ |
37 | #define NEXTC(data,end) ((*(data)<(end))?*((*(data))++):-1) | 34 | struct Context |
35 | { | ||
36 | /** | ||
37 | * Environment for longjmp from within error_exit handler. | ||
38 | */ | ||
39 | jmp_buf env; | ||
40 | }; | ||
38 | 41 | ||
39 | /* The macro does: | ||
40 | unsigned int NEXTC(unsigned char ** data, char * end) { | ||
41 | if (*data < end) { | ||
42 | char result = **data; | ||
43 | (*data)++; | ||
44 | return result; | ||
45 | } else | ||
46 | return -1; | ||
47 | } | ||
48 | */ | ||
49 | 42 | ||
50 | /** | 43 | /** |
51 | * Read length, convert to unsigned int. | 44 | * Function used to avoid having libjpeg write error messages to the console. |
52 | * All 2-byte quantities in JPEG markers are MSB first | ||
53 | * @return -1 on error | ||
54 | */ | 45 | */ |
55 | static int | 46 | static void |
56 | readLength (const unsigned char **data, const unsigned char *end) | 47 | no_emit (j_common_ptr cinfo, int msg_level) |
57 | { | 48 | { |
58 | int c1; | 49 | /* do nothing */ |
59 | int c2; | ||
60 | |||
61 | c1 = NEXTC (data, end); | ||
62 | if (c1 == -1) | ||
63 | return -1; | ||
64 | c2 = NEXTC (data, end); | ||
65 | if (c2 == -1) | ||
66 | return -1; | ||
67 | return ((((unsigned int) c1) << 8) + ((unsigned int) c2)) - 2; | ||
68 | } | 50 | } |
69 | 51 | ||
52 | |||
70 | /** | 53 | /** |
71 | * @return the next marker or -1 on error. | 54 | * Function used to avoid having libjpeg write error messages to the console. |
72 | */ | 55 | */ |
73 | static int | 56 | static void |
74 | next_marker (const unsigned char **data, const unsigned char *end) | 57 | no_output (j_common_ptr cinfo) |
75 | { | 58 | { |
76 | int c; | 59 | /* do nothing */ |
77 | c = NEXTC (data, end); | ||
78 | while ((c != 0xFF) && (c != -1)) | ||
79 | c = NEXTC (data, end); | ||
80 | do | ||
81 | { | ||
82 | c = NEXTC (data, end); | ||
83 | } | ||
84 | while (c == 0xFF); | ||
85 | return c; | ||
86 | } | 60 | } |
87 | 61 | ||
62 | |||
63 | /** | ||
64 | * Function used to avoid having libjpeg kill our process. | ||
65 | */ | ||
88 | static void | 66 | static void |
89 | skip_variable (const unsigned char **data, const unsigned char *end) | 67 | no_exit (j_common_ptr cinfo) |
90 | { | 68 | { |
91 | int length; | 69 | struct Context *ctx = cinfo->client_data; |
92 | 70 | ||
93 | length = readLength (data, end); | 71 | /* we're not allowed to return (by API definition), |
94 | if (length < 0) | 72 | and we don't want to abort/exit. So we longjmp |
95 | { | 73 | to our cleanup code instead. */ |
96 | (*data) = end; /* skip to the end */ | 74 | longjmp (ctx->env, 1); |
97 | return; | ||
98 | } | ||
99 | /* Skip over length bytes */ | ||
100 | (*data) += length; | ||
101 | } | 75 | } |
102 | 76 | ||
103 | static char * | ||
104 | process_COM (const unsigned char **data, const unsigned char *end) | ||
105 | { | ||
106 | unsigned int length; | ||
107 | int ch; | ||
108 | int pos; | ||
109 | char *comment; | ||
110 | 77 | ||
111 | length = readLength (data, end); | 78 | /** |
112 | if (length <= 0) | 79 | * Main entry method for the 'image/jpeg' extraction plugin. |
113 | return NULL; | 80 | * |
114 | comment = malloc (length + 1); | 81 | * @param ec extraction context provided to the plugin |
115 | if (comment == NULL) | 82 | */ |
116 | return NULL; | 83 | void |
117 | pos = 0; | 84 | EXTRACTOR_jpeg_extract_method (struct EXTRACTOR_ExtractContext *ec) |
118 | while (length > 0) | 85 | { |
86 | struct jpeg_decompress_struct jds; | ||
87 | struct jpeg_error_mgr em; | ||
88 | void *buf; | ||
89 | ssize_t size; | ||
90 | int is_jpeg; | ||
91 | unsigned int rounds; | ||
92 | char format[128]; | ||
93 | struct jpeg_marker_struct *mptr; | ||
94 | struct Context ctx; | ||
95 | |||
96 | is_jpeg = 0; | ||
97 | rounds = 0; /* used to avoid going on forever for non-jpeg files */ | ||
98 | jpeg_std_error (&em); | ||
99 | em.emit_message = &no_emit; | ||
100 | em.output_message = &no_output; | ||
101 | em.error_exit = &no_exit; | ||
102 | jds.client_data = &ctx; | ||
103 | if (1 == setjmp (ctx.env)) | ||
104 | goto EXIT; /* we get here if libjpeg calls 'no_exit' because it wants to die */ | ||
105 | jds.err = &em; | ||
106 | jpeg_create_decompress (&jds); | ||
107 | jpeg_save_markers (&jds, JPEG_COM, 1024 * 8); | ||
108 | while ( (1 == is_jpeg) || (rounds++ < 8) ) | ||
119 | { | 109 | { |
120 | ch = NEXTC (data, end); | 110 | if (-1 == (size = ec->read (ec->cls, |
121 | if ((ch == '\r') || (ch == '\n')) | 111 | &buf, |
122 | comment[pos++] = '\n'; | 112 | 16 * 1024))) |
123 | else if (isprint ((unsigned char) ch)) | 113 | break; |
124 | comment[pos++] = ch; | 114 | if (0 == size) |
125 | length--; | 115 | break; |
116 | jpeg_mem_src (&jds, buf, size); | ||
117 | if (0 == is_jpeg) | ||
118 | { | ||
119 | if (JPEG_HEADER_OK == jpeg_read_header (&jds, 1)) | ||
120 | is_jpeg = 1; /* ok, really a jpeg, keep going until the end */ | ||
121 | continue; | ||
122 | } | ||
123 | jpeg_consume_input (&jds); | ||
126 | } | 124 | } |
127 | comment[pos] = '\0'; | ||
128 | return comment; | ||
129 | } | ||
130 | |||
131 | 125 | ||
132 | int | 126 | if (1 != is_jpeg) |
133 | EXTRACTOR_jpeg_extract (const unsigned char *data, | 127 | goto EXIT; |
134 | size_t size, | 128 | if (0 != |
135 | EXTRACTOR_MetaDataProcessor proc, | 129 | ec->proc (ec->cls, |
136 | void *proc_cls, | 130 | "jpeg", |
137 | const char *options) | 131 | EXTRACTOR_METATYPE_MIMETYPE, |
138 | { | 132 | EXTRACTOR_METAFORMAT_UTF8, |
139 | int c1; | 133 | "text/plain", |
140 | int c2; | 134 | "image/jpeg", |
141 | int marker; | 135 | strlen ("image/jpeg") + 1)) |
142 | const unsigned char *end; | 136 | goto EXIT; |
143 | char *tmp; | 137 | snprintf (format, |
144 | char val[128]; | 138 | sizeof (format), |
145 | 139 | "%ux%u", | |
146 | if (size < 0x12) | 140 | (unsigned int) jds.image_width, |
147 | return 0; | 141 | (unsigned int) jds.image_height); |
148 | end = &data[size]; | 142 | if (0 != |
149 | c1 = NEXTC (&data, end); | 143 | ec->proc (ec->cls, |
150 | c2 = NEXTC (&data, end); | 144 | "jpeg", |
151 | if ((c1 != 0xFF) || (c2 != M_SOI)) | 145 | EXTRACTOR_METATYPE_IMAGE_DIMENSIONS, |
152 | return 0; /* not a JPEG */ | 146 | EXTRACTOR_METAFORMAT_UTF8, |
153 | if (0 != proc (proc_cls, | 147 | "text/plain", |
154 | "jpeg", | 148 | format, |
155 | EXTRACTOR_METATYPE_MIMETYPE, | 149 | strlen (format) + 1)) |
156 | EXTRACTOR_METAFORMAT_UTF8, | 150 | goto EXIT; |
157 | "text/plain", | 151 | for (mptr = jds.marker_list; NULL != mptr; mptr = mptr->next) |
158 | "image/jpeg", | ||
159 | strlen ("image/jpeg")+1)) | ||
160 | return 1; | ||
161 | while (1) | ||
162 | { | 152 | { |
163 | marker = next_marker (&data, end); | 153 | if (JPEG_COM != mptr->marker) |
164 | switch (marker) | 154 | continue; |
165 | { | 155 | if (0 != |
166 | case -1: /* end of file */ | 156 | ec->proc (ec->cls, |
167 | case M_SOS: | 157 | "jpeg", |
168 | case M_EOI: | 158 | EXTRACTOR_METATYPE_COMMENT, |
169 | goto RETURN; | 159 | EXTRACTOR_METAFORMAT_C_STRING, |
170 | case M_APP0: | 160 | "text/plain", |
171 | { | 161 | (const char *) mptr->data, |
172 | int len = readLength (&data, end); | 162 | mptr->data_length)) |
173 | if (len < 0x8) | 163 | goto EXIT; |
174 | goto RETURN; | ||
175 | if (0 == strncmp ((char *) data, "JFIF", 4)) | ||
176 | { | ||
177 | switch (data[0x4]) | ||
178 | { | ||
179 | case 1: /* dots per inch */ | ||
180 | snprintf (val, | ||
181 | sizeof (val), | ||
182 | _("%ux%u dots per inch"), | ||
183 | (data[0x8] << 8) + data[0x9], | ||
184 | (data[0xA] << 8) + data[0xB]); | ||
185 | if (0 != proc (proc_cls, | ||
186 | "jpeg", | ||
187 | EXTRACTOR_METATYPE_IMAGE_RESOLUTION, | ||
188 | EXTRACTOR_METAFORMAT_UTF8, | ||
189 | "text/plain", | ||
190 | val, | ||
191 | strlen (val)+1)) | ||
192 | return 1; | ||
193 | break; | ||
194 | case 2: /* dots per cm */ | ||
195 | snprintf (val, | ||
196 | sizeof (val), | ||
197 | _("%ux%u dots per cm"), | ||
198 | (data[0x8] << 8) + data[0x9], | ||
199 | (data[0xA] << 8) + data[0xB]); | ||
200 | if (0 != proc (proc_cls, | ||
201 | "jpeg", | ||
202 | EXTRACTOR_METATYPE_IMAGE_RESOLUTION, | ||
203 | EXTRACTOR_METAFORMAT_UTF8, | ||
204 | "text/plain", | ||
205 | val, | ||
206 | strlen (val)+1)) | ||
207 | return 1; | ||
208 | break; | ||
209 | case 0: /* no unit given */ | ||
210 | snprintf (val, | ||
211 | sizeof (val), | ||
212 | _("%ux%u dots per inch?"), | ||
213 | (data[0x8] << 8) + data[0x9], | ||
214 | (data[0xA] << 8) + data[0xB]); | ||
215 | if (0 != proc (proc_cls, | ||
216 | "jpeg", | ||
217 | EXTRACTOR_METATYPE_IMAGE_RESOLUTION, | ||
218 | EXTRACTOR_METAFORMAT_UTF8, | ||
219 | "text/plain", | ||
220 | val, | ||
221 | strlen (val)+1)) | ||
222 | return 1; | ||
223 | break; | ||
224 | default: /* unknown unit */ | ||
225 | break; | ||
226 | } | ||
227 | } | ||
228 | data = &data[len]; | ||
229 | break; | ||
230 | } | ||
231 | case 0xC0: | ||
232 | { | ||
233 | int len = readLength (&data, end); | ||
234 | if (len < 0x9) | ||
235 | goto RETURN; | ||
236 | snprintf (val, | ||
237 | sizeof (val), | ||
238 | "%ux%u", | ||
239 | (data[0x3] << 8) + data[0x4], | ||
240 | (data[0x1] << 8) + data[0x2]); | ||
241 | if (0 != proc (proc_cls, | ||
242 | "jpeg", | ||
243 | EXTRACTOR_METATYPE_IMAGE_DIMENSIONS, | ||
244 | EXTRACTOR_METAFORMAT_UTF8, | ||
245 | "text/plain", | ||
246 | val, | ||
247 | strlen (val)+1)) | ||
248 | return 1; | ||
249 | data = &data[len]; | ||
250 | break; | ||
251 | } | ||
252 | case M_COM: | ||
253 | case M_APP12: | ||
254 | tmp = process_COM (&data, end); | ||
255 | if (NULL == tmp) | ||
256 | break; | ||
257 | if (0 != proc (proc_cls, | ||
258 | "jpeg", | ||
259 | EXTRACTOR_METATYPE_COMMENT, | ||
260 | EXTRACTOR_METAFORMAT_UTF8, | ||
261 | "text/plain", | ||
262 | tmp, | ||
263 | strlen (tmp)+1)) | ||
264 | { | ||
265 | free (tmp); | ||
266 | return 1; | ||
267 | } | ||
268 | free (tmp); | ||
269 | break; | ||
270 | default: | ||
271 | skip_variable (&data, end); | ||
272 | break; | ||
273 | } | ||
274 | } | 164 | } |
275 | RETURN: | 165 | |
276 | return 0; | 166 | EXIT: |
167 | jpeg_destroy_decompress (&jds); | ||
277 | } | 168 | } |
169 | |||
170 | /* end of jpeg_extractor.c */ | ||