diff options
Diffstat (limited to 'src/plugins/dvi_extractor.c')
-rw-r--r-- | src/plugins/dvi_extractor.c | 205 |
1 files changed, 105 insertions, 100 deletions
diff --git a/src/plugins/dvi_extractor.c b/src/plugins/dvi_extractor.c index e3aa450..4e52150 100644 --- a/src/plugins/dvi_extractor.c +++ b/src/plugins/dvi_extractor.c | |||
@@ -53,12 +53,12 @@ static struct Matches tmap[] = { | |||
53 | { "/Keywords (", EXTRACTOR_METATYPE_KEYWORDS }, | 53 | { "/Keywords (", EXTRACTOR_METATYPE_KEYWORDS }, |
54 | { "/Creator (", EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE }, | 54 | { "/Creator (", EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE }, |
55 | { "/Producer (", EXTRACTOR_METATYPE_PRODUCED_BY_SOFTWARE }, | 55 | { "/Producer (", EXTRACTOR_METATYPE_PRODUCED_BY_SOFTWARE }, |
56 | { NULL, 0 } | 56 | { NULL, 0 } |
57 | }; | 57 | }; |
58 | 58 | ||
59 | 59 | ||
60 | /** | 60 | /** |
61 | * Parse a "ZZZ" tag. Specifically, the data may contain a | 61 | * Parse a "ZZZ" tag. Specifically, the data may contain a |
62 | * postscript dictionary with metadata. | 62 | * postscript dictionary with metadata. |
63 | * | 63 | * |
64 | * @param data overall input stream | 64 | * @param data overall input stream |
@@ -71,8 +71,8 @@ static struct Matches tmap[] = { | |||
71 | static int | 71 | static int |
72 | parseZZZ (const char *data, | 72 | parseZZZ (const char *data, |
73 | size_t pos, size_t len, | 73 | size_t pos, size_t len, |
74 | EXTRACTOR_MetaDataProcessor proc, | 74 | EXTRACTOR_MetaDataProcessor proc, |
75 | void *proc_cls) | 75 | void *proc_cls) |
76 | { | 76 | { |
77 | size_t slen; | 77 | size_t slen; |
78 | size_t end; | 78 | size_t end; |
@@ -85,37 +85,37 @@ parseZZZ (const char *data, | |||
85 | return 0; | 85 | return 0; |
86 | pos += slen; | 86 | pos += slen; |
87 | while (pos < end) | 87 | while (pos < end) |
88 | { | ||
89 | for (i = 0; NULL != tmap[i].text; i++) | ||
88 | { | 90 | { |
89 | for (i = 0; NULL != tmap[i].text; i++) | 91 | slen = strlen (tmap[i].text); |
90 | { | 92 | if ( (pos + slen > end) || |
91 | slen = strlen (tmap[i].text); | 93 | (0 != strncmp (&data[pos], tmap[i].text, slen)) ) |
92 | if ( (pos + slen > end) || | 94 | continue; |
93 | (0 != strncmp (&data[pos], tmap[i].text, slen)) ) | 95 | pos += slen; |
94 | continue; | 96 | slen = pos; |
95 | pos += slen; | 97 | while ((slen < end) && (data[slen] != ')')) |
96 | slen = pos; | 98 | slen++; |
97 | while ((slen < end) && (data[slen] != ')')) | 99 | slen = slen - pos; |
98 | slen++; | 100 | { |
99 | slen = slen - pos; | 101 | char value[slen + 1]; |
100 | { | 102 | |
101 | char value[slen + 1]; | 103 | value[slen] = '\0'; |
102 | 104 | memcpy (value, &data[pos], slen); | |
103 | value[slen] = '\0'; | 105 | if (0 != proc (proc_cls, |
104 | memcpy (value, &data[pos], slen); | 106 | "dvi", |
105 | if (0 != proc (proc_cls, | 107 | tmap[i].type, |
106 | "dvi", | 108 | EXTRACTOR_METAFORMAT_C_STRING, |
107 | tmap[i].type, | 109 | "text/plain", |
108 | EXTRACTOR_METAFORMAT_C_STRING, | 110 | value, |
109 | "text/plain", | 111 | slen + 1)) |
110 | value, | 112 | return 1; |
111 | slen + 1)) | 113 | } |
112 | return 1; | 114 | pos += slen + 1; |
113 | } | 115 | break; |
114 | pos += slen + 1; | ||
115 | break; | ||
116 | } | ||
117 | pos++; | ||
118 | } | 116 | } |
117 | pos++; | ||
118 | } | ||
119 | return 0; | 119 | return 0; |
120 | } | 120 | } |
121 | 121 | ||
@@ -153,7 +153,7 @@ getShortAt (const void *data) | |||
153 | 153 | ||
154 | 154 | ||
155 | /** | 155 | /** |
156 | * Main entry method for the 'application/x-dvi' extraction plugin. | 156 | * Main entry method for the 'application/x-dvi' extraction plugin. |
157 | * | 157 | * |
158 | * @param ec extraction context provided to the plugin | 158 | * @param ec extraction context provided to the plugin |
159 | */ | 159 | */ |
@@ -171,7 +171,7 @@ EXTRACTOR_dvi_extract_method (struct EXTRACTOR_ExtractContext *ec) | |||
171 | uint64_t size; | 171 | uint64_t size; |
172 | uint64_t off; | 172 | uint64_t off; |
173 | ssize_t iret; | 173 | ssize_t iret; |
174 | 174 | ||
175 | if (40 >= (iret = ec->read (ec->cls, &buf, 1024))) | 175 | if (40 >= (iret = ec->read (ec->cls, &buf, 1024))) |
176 | return; | 176 | return; |
177 | data = buf; | 177 | data = buf; |
@@ -189,15 +189,15 @@ EXTRACTOR_dvi_extract_method (struct EXTRACTOR_ExtractContext *ec) | |||
189 | memcpy (data, buf, iret); | 189 | memcpy (data, buf, iret); |
190 | off = iret; | 190 | off = iret; |
191 | while (off < size) | 191 | while (off < size) |
192 | { | ||
193 | if (0 >= (iret = ec->read (ec->cls, &buf, 16 * 1024))) | ||
192 | { | 194 | { |
193 | if (0 >= (iret = ec->read (ec->cls, &buf, 16 * 1024))) | 195 | free (data); |
194 | { | 196 | return; |
195 | free (data); | ||
196 | return; | ||
197 | } | ||
198 | memcpy (&data[off], buf, iret); | ||
199 | off += iret; | ||
200 | } | 197 | } |
198 | memcpy (&data[off], buf, iret); | ||
199 | off += iret; | ||
200 | } | ||
201 | pos = size - 1; | 201 | pos = size - 1; |
202 | while ( (223 == data[pos]) && | 202 | while ( (223 == data[pos]) && |
203 | (pos > 0) ) | 203 | (pos > 0) ) |
@@ -222,28 +222,28 @@ EXTRACTOR_dvi_extract_method (struct EXTRACTOR_ExtractContext *ec) | |||
222 | opos = pos; | 222 | opos = pos; |
223 | pos = getIntAt (&data[opos + 1]); | 223 | pos = getIntAt (&data[opos + 1]); |
224 | while (1) | 224 | while (1) |
225 | { | 225 | { |
226 | if (UINT32_MAX == pos) | 226 | if (UINT32_MAX == pos) |
227 | break; | 227 | break; |
228 | if ( (pos + 45 > size) || | 228 | if ( (pos + 45 > size) || |
229 | (pos + 45 < pos) ) | 229 | (pos + 45 < pos) ) |
230 | goto CLEANUP; | 230 | goto CLEANUP; |
231 | if (data[pos] != 139) /* expect 'bop' */ | 231 | if (data[pos] != 139) /* expect 'bop' */ |
232 | goto CLEANUP; | 232 | goto CLEANUP; |
233 | pageCount++; | 233 | pageCount++; |
234 | opos = pos; | 234 | opos = pos; |
235 | pos = getIntAt (&data[opos + 41]); | 235 | pos = getIntAt (&data[opos + 41]); |
236 | if (UINT32_MAX == pos) | 236 | if (UINT32_MAX == pos) |
237 | break; | 237 | break; |
238 | if (pos >= opos) | 238 | if (pos >= opos) |
239 | goto CLEANUP; /* invalid! */ | 239 | goto CLEANUP; /* invalid! */ |
240 | } | 240 | } |
241 | /* ok, now we believe it's a dvi... */ | 241 | /* ok, now we believe it's a dvi... */ |
242 | snprintf (pages, | 242 | snprintf (pages, |
243 | sizeof (pages), | 243 | sizeof (pages), |
244 | "%u", | 244 | "%u", |
245 | pageCount); | 245 | pageCount); |
246 | if (0 != ec->proc (ec->cls, | 246 | if (0 != ec->proc (ec->cls, |
247 | "dvi", | 247 | "dvi", |
248 | EXTRACTOR_METATYPE_PAGE_COUNT, | 248 | EXTRACTOR_METATYPE_PAGE_COUNT, |
249 | EXTRACTOR_METAFORMAT_UTF8, | 249 | EXTRACTOR_METAFORMAT_UTF8, |
@@ -251,7 +251,7 @@ EXTRACTOR_dvi_extract_method (struct EXTRACTOR_ExtractContext *ec) | |||
251 | pages, | 251 | pages, |
252 | strlen (pages) + 1)) | 252 | strlen (pages) + 1)) |
253 | goto CLEANUP; | 253 | goto CLEANUP; |
254 | if (0 != ec->proc (ec->cls, | 254 | if (0 != ec->proc (ec->cls, |
255 | "dvi", | 255 | "dvi", |
256 | EXTRACTOR_METATYPE_MIMETYPE, | 256 | EXTRACTOR_METATYPE_MIMETYPE, |
257 | EXTRACTOR_METAFORMAT_UTF8, | 257 | EXTRACTOR_METAFORMAT_UTF8, |
@@ -261,10 +261,10 @@ EXTRACTOR_dvi_extract_method (struct EXTRACTOR_ExtractContext *ec) | |||
261 | goto CLEANUP; | 261 | goto CLEANUP; |
262 | { | 262 | { |
263 | char comment[klen + 1]; | 263 | char comment[klen + 1]; |
264 | 264 | ||
265 | comment[klen] = '\0'; | 265 | comment[klen] = '\0'; |
266 | memcpy (comment, &data[15], klen); | 266 | memcpy (comment, &data[15], klen); |
267 | if (0 != ec->proc (ec->cls, | 267 | if (0 != ec->proc (ec->cls, |
268 | "dvi", | 268 | "dvi", |
269 | EXTRACTOR_METATYPE_COMMENT, | 269 | EXTRACTOR_METATYPE_COMMENT, |
270 | EXTRACTOR_METAFORMAT_C_STRING, | 270 | EXTRACTOR_METAFORMAT_C_STRING, |
@@ -277,47 +277,52 @@ EXTRACTOR_dvi_extract_method (struct EXTRACTOR_ExtractContext *ec) | |||
277 | pos = opos; | 277 | pos = opos; |
278 | while ( (size >= 100) && | 278 | while ( (size >= 100) && |
279 | (pos < size - 100) ) | 279 | (pos < size - 100) ) |
280 | { | ||
281 | switch (data[pos]) | ||
280 | { | 282 | { |
281 | switch (data[pos]) | 283 | case 139: /* begin page 'bop', we typically have to skip that one to |
282 | { | ||
283 | case 139: /* begin page 'bop', we typically have to skip that one to | ||
284 | find the zzz's */ | 284 | find the zzz's */ |
285 | pos += 45; /* skip bop */ | 285 | pos += 45; /* skip bop */ |
286 | break; | 286 | break; |
287 | case 239: /* zzz1 */ | 287 | case 239: /* zzz1 */ |
288 | len = data[pos + 1]; | 288 | len = data[pos + 1]; |
289 | if ( (pos + 2 + len < size) && | 289 | if ( (pos + 2 + len < size) && |
290 | (0 != parseZZZ ((const char *) data, pos + 2, len, ec->proc, ec->cls)) ) | 290 | (0 != parseZZZ ((const char *) data, pos + 2, len, ec->proc, |
291 | goto CLEANUP; | 291 | ec->cls)) ) |
292 | pos += len + 2; | 292 | goto CLEANUP; |
293 | break; | 293 | pos += len + 2; |
294 | case 240: /* zzz2 */ | 294 | break; |
295 | len = getShortAt (&data[pos + 1]); | 295 | case 240: /* zzz2 */ |
296 | if ( (pos + 3 + len < size) && | 296 | len = getShortAt (&data[pos + 1]); |
297 | (0 != parseZZZ ((const char *) data, pos + 3, len, ec->proc, ec->cls)) ) | 297 | if ( (pos + 3 + len < size) && |
298 | goto CLEANUP; | 298 | (0 != parseZZZ ((const char *) data, pos + 3, len, ec->proc, |
299 | pos += len + 3; | 299 | ec->cls)) ) |
300 | break; | 300 | goto CLEANUP; |
301 | case 241: /* zzz3, who uses that? */ | 301 | pos += len + 3; |
302 | len = (getShortAt (&data[pos + 1])) + 65536 * data[pos + 3]; | 302 | break; |
303 | if ( (pos + 4 + len < size) && | 303 | case 241: /* zzz3, who uses that? */ |
304 | (0 != parseZZZ ((const char *) data, pos + 4, len, ec->proc, ec->cls)) ) | 304 | len = (getShortAt (&data[pos + 1])) + 65536 * data[pos + 3]; |
305 | goto CLEANUP; | 305 | if ( (pos + 4 + len < size) && |
306 | pos += len + 4; | 306 | (0 != parseZZZ ((const char *) data, pos + 4, len, ec->proc, |
307 | break; | 307 | ec->cls)) ) |
308 | case 242: /* zzz4, hurray! */ | 308 | goto CLEANUP; |
309 | len = getIntAt (&data[pos + 1]); | 309 | pos += len + 4; |
310 | if ( (pos + 1 + len < size) && | 310 | break; |
311 | (0 != parseZZZ ((const char *) data, pos + 5, len, ec->proc, ec->cls)) ) | 311 | case 242: /* zzz4, hurray! */ |
312 | goto CLEANUP; | 312 | len = getIntAt (&data[pos + 1]); |
313 | pos += len + 5; | 313 | if ( (pos + 1 + len < size) && |
314 | break; | 314 | (0 != parseZZZ ((const char *) data, pos + 5, len, ec->proc, |
315 | default: /* unsupported opcode, abort scan */ | 315 | ec->cls)) ) |
316 | goto CLEANUP; | 316 | goto CLEANUP; |
317 | } | 317 | pos += len + 5; |
318 | break; | ||
319 | default: /* unsupported opcode, abort scan */ | ||
320 | goto CLEANUP; | ||
318 | } | 321 | } |
319 | CLEANUP: | 322 | } |
323 | CLEANUP: | ||
320 | free (data); | 324 | free (data); |
321 | } | 325 | } |
322 | 326 | ||
327 | |||
323 | /* end of dvi_extractor.c */ | 328 | /* end of dvi_extractor.c */ |