aboutsummaryrefslogtreecommitdiff
path: root/src/plugins/dvi_extractor.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/plugins/dvi_extractor.c')
-rw-r--r--src/plugins/dvi_extractor.c205
1 files changed, 105 insertions, 100 deletions
diff --git a/src/plugins/dvi_extractor.c b/src/plugins/dvi_extractor.c
index e3aa450..4e52150 100644
--- a/src/plugins/dvi_extractor.c
+++ b/src/plugins/dvi_extractor.c
@@ -53,12 +53,12 @@ static struct Matches tmap[] = {
53 { "/Keywords (", EXTRACTOR_METATYPE_KEYWORDS }, 53 { "/Keywords (", EXTRACTOR_METATYPE_KEYWORDS },
54 { "/Creator (", EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE }, 54 { "/Creator (", EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE },
55 { "/Producer (", EXTRACTOR_METATYPE_PRODUCED_BY_SOFTWARE }, 55 { "/Producer (", EXTRACTOR_METATYPE_PRODUCED_BY_SOFTWARE },
56 { NULL, 0 } 56 { NULL, 0 }
57}; 57};
58 58
59 59
60/** 60/**
61 * Parse a "ZZZ" tag. Specifically, the data may contain a 61 * Parse a "ZZZ" tag. Specifically, the data may contain a
62 * postscript dictionary with metadata. 62 * postscript dictionary with metadata.
63 * 63 *
64 * @param data overall input stream 64 * @param data overall input stream
@@ -71,8 +71,8 @@ static struct Matches tmap[] = {
71static int 71static int
72parseZZZ (const char *data, 72parseZZZ (const char *data,
73 size_t pos, size_t len, 73 size_t pos, size_t len,
74 EXTRACTOR_MetaDataProcessor proc, 74 EXTRACTOR_MetaDataProcessor proc,
75 void *proc_cls) 75 void *proc_cls)
76{ 76{
77 size_t slen; 77 size_t slen;
78 size_t end; 78 size_t end;
@@ -85,37 +85,37 @@ parseZZZ (const char *data,
85 return 0; 85 return 0;
86 pos += slen; 86 pos += slen;
87 while (pos < end) 87 while (pos < end)
88 {
89 for (i = 0; NULL != tmap[i].text; i++)
88 { 90 {
89 for (i = 0; NULL != tmap[i].text; i++) 91 slen = strlen (tmap[i].text);
90 { 92 if ( (pos + slen > end) ||
91 slen = strlen (tmap[i].text); 93 (0 != strncmp (&data[pos], tmap[i].text, slen)) )
92 if ( (pos + slen > end) || 94 continue;
93 (0 != strncmp (&data[pos], tmap[i].text, slen)) ) 95 pos += slen;
94 continue; 96 slen = pos;
95 pos += slen; 97 while ((slen < end) && (data[slen] != ')'))
96 slen = pos; 98 slen++;
97 while ((slen < end) && (data[slen] != ')')) 99 slen = slen - pos;
98 slen++; 100 {
99 slen = slen - pos; 101 char value[slen + 1];
100 { 102
101 char value[slen + 1]; 103 value[slen] = '\0';
102 104 memcpy (value, &data[pos], slen);
103 value[slen] = '\0'; 105 if (0 != proc (proc_cls,
104 memcpy (value, &data[pos], slen); 106 "dvi",
105 if (0 != proc (proc_cls, 107 tmap[i].type,
106 "dvi", 108 EXTRACTOR_METAFORMAT_C_STRING,
107 tmap[i].type, 109 "text/plain",
108 EXTRACTOR_METAFORMAT_C_STRING, 110 value,
109 "text/plain", 111 slen + 1))
110 value, 112 return 1;
111 slen + 1)) 113 }
112 return 1; 114 pos += slen + 1;
113 } 115 break;
114 pos += slen + 1;
115 break;
116 }
117 pos++;
118 } 116 }
117 pos++;
118 }
119 return 0; 119 return 0;
120} 120}
121 121
@@ -153,7 +153,7 @@ getShortAt (const void *data)
153 153
154 154
155/** 155/**
156 * Main entry method for the 'application/x-dvi' extraction plugin. 156 * Main entry method for the 'application/x-dvi' extraction plugin.
157 * 157 *
158 * @param ec extraction context provided to the plugin 158 * @param ec extraction context provided to the plugin
159 */ 159 */
@@ -171,7 +171,7 @@ EXTRACTOR_dvi_extract_method (struct EXTRACTOR_ExtractContext *ec)
171 uint64_t size; 171 uint64_t size;
172 uint64_t off; 172 uint64_t off;
173 ssize_t iret; 173 ssize_t iret;
174 174
175 if (40 >= (iret = ec->read (ec->cls, &buf, 1024))) 175 if (40 >= (iret = ec->read (ec->cls, &buf, 1024)))
176 return; 176 return;
177 data = buf; 177 data = buf;
@@ -189,15 +189,15 @@ EXTRACTOR_dvi_extract_method (struct EXTRACTOR_ExtractContext *ec)
189 memcpy (data, buf, iret); 189 memcpy (data, buf, iret);
190 off = iret; 190 off = iret;
191 while (off < size) 191 while (off < size)
192 {
193 if (0 >= (iret = ec->read (ec->cls, &buf, 16 * 1024)))
192 { 194 {
193 if (0 >= (iret = ec->read (ec->cls, &buf, 16 * 1024))) 195 free (data);
194 { 196 return;
195 free (data);
196 return;
197 }
198 memcpy (&data[off], buf, iret);
199 off += iret;
200 } 197 }
198 memcpy (&data[off], buf, iret);
199 off += iret;
200 }
201 pos = size - 1; 201 pos = size - 1;
202 while ( (223 == data[pos]) && 202 while ( (223 == data[pos]) &&
203 (pos > 0) ) 203 (pos > 0) )
@@ -222,28 +222,28 @@ EXTRACTOR_dvi_extract_method (struct EXTRACTOR_ExtractContext *ec)
222 opos = pos; 222 opos = pos;
223 pos = getIntAt (&data[opos + 1]); 223 pos = getIntAt (&data[opos + 1]);
224 while (1) 224 while (1)
225 { 225 {
226 if (UINT32_MAX == pos) 226 if (UINT32_MAX == pos)
227 break; 227 break;
228 if ( (pos + 45 > size) || 228 if ( (pos + 45 > size) ||
229 (pos + 45 < pos) ) 229 (pos + 45 < pos) )
230 goto CLEANUP; 230 goto CLEANUP;
231 if (data[pos] != 139) /* expect 'bop' */ 231 if (data[pos] != 139) /* expect 'bop' */
232 goto CLEANUP; 232 goto CLEANUP;
233 pageCount++; 233 pageCount++;
234 opos = pos; 234 opos = pos;
235 pos = getIntAt (&data[opos + 41]); 235 pos = getIntAt (&data[opos + 41]);
236 if (UINT32_MAX == pos) 236 if (UINT32_MAX == pos)
237 break; 237 break;
238 if (pos >= opos) 238 if (pos >= opos)
239 goto CLEANUP; /* invalid! */ 239 goto CLEANUP; /* invalid! */
240 } 240 }
241 /* ok, now we believe it's a dvi... */ 241 /* ok, now we believe it's a dvi... */
242 snprintf (pages, 242 snprintf (pages,
243 sizeof (pages), 243 sizeof (pages),
244 "%u", 244 "%u",
245 pageCount); 245 pageCount);
246 if (0 != ec->proc (ec->cls, 246 if (0 != ec->proc (ec->cls,
247 "dvi", 247 "dvi",
248 EXTRACTOR_METATYPE_PAGE_COUNT, 248 EXTRACTOR_METATYPE_PAGE_COUNT,
249 EXTRACTOR_METAFORMAT_UTF8, 249 EXTRACTOR_METAFORMAT_UTF8,
@@ -251,7 +251,7 @@ EXTRACTOR_dvi_extract_method (struct EXTRACTOR_ExtractContext *ec)
251 pages, 251 pages,
252 strlen (pages) + 1)) 252 strlen (pages) + 1))
253 goto CLEANUP; 253 goto CLEANUP;
254 if (0 != ec->proc (ec->cls, 254 if (0 != ec->proc (ec->cls,
255 "dvi", 255 "dvi",
256 EXTRACTOR_METATYPE_MIMETYPE, 256 EXTRACTOR_METATYPE_MIMETYPE,
257 EXTRACTOR_METAFORMAT_UTF8, 257 EXTRACTOR_METAFORMAT_UTF8,
@@ -261,10 +261,10 @@ EXTRACTOR_dvi_extract_method (struct EXTRACTOR_ExtractContext *ec)
261 goto CLEANUP; 261 goto CLEANUP;
262 { 262 {
263 char comment[klen + 1]; 263 char comment[klen + 1];
264 264
265 comment[klen] = '\0'; 265 comment[klen] = '\0';
266 memcpy (comment, &data[15], klen); 266 memcpy (comment, &data[15], klen);
267 if (0 != ec->proc (ec->cls, 267 if (0 != ec->proc (ec->cls,
268 "dvi", 268 "dvi",
269 EXTRACTOR_METATYPE_COMMENT, 269 EXTRACTOR_METATYPE_COMMENT,
270 EXTRACTOR_METAFORMAT_C_STRING, 270 EXTRACTOR_METAFORMAT_C_STRING,
@@ -277,47 +277,52 @@ EXTRACTOR_dvi_extract_method (struct EXTRACTOR_ExtractContext *ec)
277 pos = opos; 277 pos = opos;
278 while ( (size >= 100) && 278 while ( (size >= 100) &&
279 (pos < size - 100) ) 279 (pos < size - 100) )
280 {
281 switch (data[pos])
280 { 282 {
281 switch (data[pos]) 283 case 139: /* begin page 'bop', we typically have to skip that one to
282 {
283 case 139: /* begin page 'bop', we typically have to skip that one to
284 find the zzz's */ 284 find the zzz's */
285 pos += 45; /* skip bop */ 285 pos += 45; /* skip bop */
286 break; 286 break;
287 case 239: /* zzz1 */ 287 case 239: /* zzz1 */
288 len = data[pos + 1]; 288 len = data[pos + 1];
289 if ( (pos + 2 + len < size) && 289 if ( (pos + 2 + len < size) &&
290 (0 != parseZZZ ((const char *) data, pos + 2, len, ec->proc, ec->cls)) ) 290 (0 != parseZZZ ((const char *) data, pos + 2, len, ec->proc,
291 goto CLEANUP; 291 ec->cls)) )
292 pos += len + 2; 292 goto CLEANUP;
293 break; 293 pos += len + 2;
294 case 240: /* zzz2 */ 294 break;
295 len = getShortAt (&data[pos + 1]); 295 case 240: /* zzz2 */
296 if ( (pos + 3 + len < size) && 296 len = getShortAt (&data[pos + 1]);
297 (0 != parseZZZ ((const char *) data, pos + 3, len, ec->proc, ec->cls)) ) 297 if ( (pos + 3 + len < size) &&
298 goto CLEANUP; 298 (0 != parseZZZ ((const char *) data, pos + 3, len, ec->proc,
299 pos += len + 3; 299 ec->cls)) )
300 break; 300 goto CLEANUP;
301 case 241: /* zzz3, who uses that? */ 301 pos += len + 3;
302 len = (getShortAt (&data[pos + 1])) + 65536 * data[pos + 3]; 302 break;
303 if ( (pos + 4 + len < size) && 303 case 241: /* zzz3, who uses that? */
304 (0 != parseZZZ ((const char *) data, pos + 4, len, ec->proc, ec->cls)) ) 304 len = (getShortAt (&data[pos + 1])) + 65536 * data[pos + 3];
305 goto CLEANUP; 305 if ( (pos + 4 + len < size) &&
306 pos += len + 4; 306 (0 != parseZZZ ((const char *) data, pos + 4, len, ec->proc,
307 break; 307 ec->cls)) )
308 case 242: /* zzz4, hurray! */ 308 goto CLEANUP;
309 len = getIntAt (&data[pos + 1]); 309 pos += len + 4;
310 if ( (pos + 1 + len < size) && 310 break;
311 (0 != parseZZZ ((const char *) data, pos + 5, len, ec->proc, ec->cls)) ) 311 case 242: /* zzz4, hurray! */
312 goto CLEANUP; 312 len = getIntAt (&data[pos + 1]);
313 pos += len + 5; 313 if ( (pos + 1 + len < size) &&
314 break; 314 (0 != parseZZZ ((const char *) data, pos + 5, len, ec->proc,
315 default: /* unsupported opcode, abort scan */ 315 ec->cls)) )
316 goto CLEANUP; 316 goto CLEANUP;
317 } 317 pos += len + 5;
318 break;
319 default: /* unsupported opcode, abort scan */
320 goto CLEANUP;
318 } 321 }
319 CLEANUP: 322 }
323CLEANUP:
320 free (data); 324 free (data);
321} 325}
322 326
327
323/* end of dvi_extractor.c */ 328/* end of dvi_extractor.c */