aboutsummaryrefslogtreecommitdiff
path: root/src/plugins/old/tiff_extractor.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/plugins/old/tiff_extractor.c')
-rw-r--r--src/plugins/old/tiff_extractor.c262
1 files changed, 262 insertions, 0 deletions
diff --git a/src/plugins/old/tiff_extractor.c b/src/plugins/old/tiff_extractor.c
new file mode 100644
index 0000000..d944be1
--- /dev/null
+++ b/src/plugins/old/tiff_extractor.c
@@ -0,0 +1,262 @@
1/*
2 This file is part of libextractor.
3 (C) 2004, 2009 Vidyut Samanta and Christian Grothoff
4
5 libextractor is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 2, or (at your
8 option) any later version.
9
10 libextractor is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with libextractor; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
19 */
20
21#include "platform.h"
22#include "extractor.h"
23#include "pack.h"
24
25#define DEBUG 0
26
27static int
28addKeyword (EXTRACTOR_MetaDataProcessor proc,
29 void *proc_cls,
30 const char *keyword,
31 enum EXTRACTOR_MetaType type)
32{
33 if (keyword == NULL)
34 return 0;
35 return proc (proc_cls,
36 "tiff",
37 type,
38 EXTRACTOR_METAFORMAT_UTF8,
39 "text/plain",
40 keyword,
41 strlen(keyword)+1);
42}
43
44typedef struct
45{
46 unsigned short byteorder;
47 unsigned short fourty_two;
48 unsigned int ifd_offset;
49} TIFF_HEADER;
50#define TIFF_HEADER_SIZE 8
51#define TIFF_HEADER_FIELDS(p) \
52 &(p)->byteorder, \
53 &(p)->fourty_two, \
54 &(p)->ifd_offset
55static char *TIFF_HEADER_SPECS[] = {
56 "hhw",
57 "HHW",
58};
59
60typedef struct
61{
62 unsigned short tag;
63 unsigned short type;
64 unsigned int count;
65 unsigned int value_or_offset;
66} DIRECTORY_ENTRY;
67#define DIRECTORY_ENTRY_SIZE 12
68#define DIRECTORY_ENTRY_FIELDS(p) \
69 &(p)->tag, \
70 &(p)->type, \
71 &(p)->count, \
72 &(p)->value_or_offset
73static char *DIRECTORY_ENTRY_SPECS[] = {
74 "hhww",
75 "HHWW"
76};
77
78#define TAG_LENGTH 0x101
79#define TAG_WIDTH 0x100
80#define TAG_SOFTWARE 0x131
81#define TAG_DAYTIME 0x132
82#define TAG_ARTIST 0x315
83#define TAG_COPYRIGHT 0x8298
84#define TAG_DESCRIPTION 0x10E
85#define TAG_DOCUMENT_NAME 0x10D
86#define TAG_HOST 0x13C
87#define TAG_SCANNER 0x110
88#define TAG_ORIENTATION 0x112
89
90#define TYPE_BYTE 1
91#define TYPE_ASCII 2
92#define TYPE_SHORT 3
93#define TYPE_LONG 4
94#define TYPE_RATIONAL 5
95
96static int
97addASCII (EXTRACTOR_MetaDataProcessor proc,
98 void *proc_cls,
99 const char *data,
100 size_t size, DIRECTORY_ENTRY * entry,
101 enum EXTRACTOR_MetaType type)
102{
103 if (entry->count > size)
104 return 0; /* invalid! */
105 if (entry->type != TYPE_ASCII)
106 return 0; /* huh? */
107 if (entry->count + entry->value_or_offset > size)
108 return 0;
109 if (data[entry->value_or_offset + entry->count - 1] != 0)
110 return 0;
111 return addKeyword (proc, proc_cls,
112 &data[entry->value_or_offset], type);
113}
114
115
116int
117EXTRACTOR_tiff_extract (const char *data,
118 size_t size,
119 EXTRACTOR_MetaDataProcessor proc,
120 void *proc_cls,
121 const char *options)
122{
123 TIFF_HEADER hdr;
124 int byteOrder; /* 0: do not convert;
125 1: do convert */
126 unsigned int current_ifd;
127 unsigned int length = -1;
128 unsigned int width = -1;
129
130 if (size < TIFF_HEADER_SIZE)
131 return 0; /* can not be tiff */
132 if ((data[0] == 0x49) && (data[1] == 0x49))
133 byteOrder = 0;
134 else if ((data[0] == 0x4D) && (data[1] == 0x4D))
135 byteOrder = 1;
136 else
137 return 0; /* can not be tiff */
138#if __BYTE_ORDER == __BIG_ENDIAN
139 byteOrder = 1 - byteOrder;
140#endif
141 EXTRACTOR_common_cat_unpack (data, TIFF_HEADER_SPECS[byteOrder], TIFF_HEADER_FIELDS (&hdr));
142 if (hdr.fourty_two != 42)
143 return 0; /* can not be tiff */
144 if (hdr.ifd_offset + 6 > size)
145 return 0; /* malformed tiff */
146 if (0 != addKeyword (proc, proc_cls, "image/tiff", EXTRACTOR_METATYPE_MIMETYPE))
147 return 1;
148 current_ifd = hdr.ifd_offset;
149 while (current_ifd != 0)
150 {
151 unsigned short len;
152 unsigned int off;
153 int i;
154 if ( (current_ifd + 6 > size) ||
155 (current_ifd + 6 < current_ifd) )
156 return 0;
157 if (byteOrder == 0)
158 len = data[current_ifd + 1] << 8 | data[current_ifd];
159 else
160 len = data[current_ifd] << 8 | data[current_ifd + 1];
161 if (len * DIRECTORY_ENTRY_SIZE + 2 + 4 + current_ifd > size)
162 {
163#if DEBUG
164 printf ("WARNING: malformed tiff\n");
165#endif
166 return 0;
167 }
168 for (i = 0; i < len; i++)
169 {
170 DIRECTORY_ENTRY entry;
171 off = current_ifd + 2 + DIRECTORY_ENTRY_SIZE * i;
172
173 EXTRACTOR_common_cat_unpack (&data[off],
174 DIRECTORY_ENTRY_SPECS[byteOrder],
175 DIRECTORY_ENTRY_FIELDS (&entry));
176 switch (entry.tag)
177 {
178 case TAG_LENGTH:
179 if ((entry.type == TYPE_SHORT) && (byteOrder == 1))
180 {
181 length = entry.value_or_offset >> 16;
182 }
183 else
184 {
185 length = entry.value_or_offset;
186 }
187 if (width != -1)
188 {
189 char tmp[128];
190 snprintf (tmp,
191 sizeof(tmp), "%ux%u",
192 width, length);
193 addKeyword (proc,
194 proc_cls,
195 tmp,
196 EXTRACTOR_METATYPE_IMAGE_DIMENSIONS);
197 }
198 break;
199 case TAG_WIDTH:
200 if ((entry.type == TYPE_SHORT) && (byteOrder == 1))
201 width = entry.value_or_offset >> 16;
202 else
203 width = entry.value_or_offset;
204 if (length != -1)
205 {
206 char tmp[128];
207 snprintf (tmp,
208 sizeof(tmp),
209 "%ux%u",
210 width, length);
211 addKeyword (proc, proc_cls,
212 tmp,
213 EXTRACTOR_METATYPE_IMAGE_DIMENSIONS);
214 }
215 break;
216 case TAG_SOFTWARE:
217 if (0 != addASCII (proc, proc_cls, data, size, &entry, EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE))
218 return 1;
219 break;
220 case TAG_ARTIST:
221 if (0 != addASCII (proc, proc_cls, data, size, &entry, EXTRACTOR_METATYPE_ARTIST))
222 return 1;
223 break;
224 case TAG_DOCUMENT_NAME:
225 if (0 != addASCII (proc, proc_cls, data, size, &entry, EXTRACTOR_METATYPE_TITLE))
226 return 1;
227 break;
228 case TAG_COPYRIGHT:
229 if (0 != addASCII (proc, proc_cls, data, size, &entry, EXTRACTOR_METATYPE_COPYRIGHT))
230 return 1;
231 break;
232 case TAG_DESCRIPTION:
233 if (0 != addASCII (proc, proc_cls, data, size, &entry, EXTRACTOR_METATYPE_DESCRIPTION))
234 return 1;
235 break;
236 case TAG_HOST:
237 if (0 != addASCII (proc, proc_cls, data, size, &entry, EXTRACTOR_METATYPE_BUILDHOST))
238 return 1;
239 break;
240 case TAG_SCANNER:
241 if (0 != addASCII (proc, proc_cls, data, size, &entry, EXTRACTOR_METATYPE_SOURCE))
242 return 1;
243 break;
244 case TAG_DAYTIME:
245 if (0 != addASCII (proc, proc_cls, data, size, &entry, EXTRACTOR_METATYPE_CREATION_DATE))
246 return 1;
247 break;
248 }
249 }
250
251 off = current_ifd + 2 + DIRECTORY_ENTRY_SIZE * len;
252 if (byteOrder == 0)
253 current_ifd =
254 data[off + 3] << 24 | data[off + 2] << 16 |
255 data[off + 1] << 8 | data[off];
256 else
257 current_ifd =
258 data[off] << 24 | data[off + 1] << 16 |
259 data[off + 2] << 8 | data[off + 3];
260 }
261 return 0;
262}