aboutsummaryrefslogtreecommitdiff
path: root/src/plugins/old/dvi_extractor.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/plugins/old/dvi_extractor.c')
-rw-r--r--src/plugins/old/dvi_extractor.c247
1 files changed, 0 insertions, 247 deletions
diff --git a/src/plugins/old/dvi_extractor.c b/src/plugins/old/dvi_extractor.c
deleted file mode 100644
index e4cfa3a..0000000
--- a/src/plugins/old/dvi_extractor.c
+++ /dev/null
@@ -1,247 +0,0 @@
1/*
2 This file is part of libextractor.
3 (C) 2002, 2003, 2004 Vidyut Samanta and Christian Grothoff
4
5 libextractor is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 2, or (at your
8 option) any later version.
9
10 libextractor is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with libextractor; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
19 */
20
21#include "platform.h"
22#include "extractor.h"
23
24typedef struct
25{
26 char *text;
27 enum EXTRACTOR_MetaType type;
28} Matches;
29
30static Matches tmap[] = {
31 {"/Title (", EXTRACTOR_METATYPE_TITLE},
32 {"/Subject (", EXTRACTOR_METATYPE_SUBJECT},
33 {"/Author (", EXTRACTOR_METATYPE_AUTHOR_NAME},
34 {"/Keywords (", EXTRACTOR_METATYPE_KEYWORDS},
35 {"/Creator (", EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE },
36 {"/Producer (", EXTRACTOR_METATYPE_PRODUCED_BY_SOFTWARE},
37 {NULL, 0},
38};
39
40static int
41parseZZZ (const char *data,
42 size_t pos, size_t len,
43 EXTRACTOR_MetaDataProcessor proc,
44 void *proc_cls)
45{
46 size_t slen;
47 size_t end;
48 int i;
49
50 end = pos + len;
51 slen = strlen ("ps:SDict begin [");
52 if (len <= slen)
53 return 0;
54 if (0 != strncmp ("ps:SDict begin [ ", &data[pos], slen))
55 return 0;
56 pos += slen;
57 while (pos < end)
58 {
59 i = 0;
60 while (tmap[i].text != NULL)
61 {
62 slen = strlen (tmap[i].text);
63 if (pos + slen < end)
64 {
65 if (0 == strncmp (&data[pos], tmap[i].text, slen))
66 {
67 pos += slen;
68 slen = pos;
69 while ((slen < end) && (data[slen] != ')'))
70 slen++;
71 slen = slen - pos;
72 {
73 char value[slen + 1];
74 value[slen] = '\0';
75 memcpy (value, &data[pos], slen);
76 if (0 != proc (proc_cls,
77 "dvi",
78 tmap[i].type,
79 EXTRACTOR_METAFORMAT_C_STRING,
80 "text/plain",
81 value,
82 slen +1))
83 {
84 return 1;
85 }
86 }
87 pos += slen + 1;
88 }
89 }
90 i++;
91 }
92 pos++;
93 }
94 return 0;
95}
96
97static unsigned int
98getIntAt (const void *data)
99{
100 char p[4];
101
102 memcpy (p, data, 4); /* ensure alignment! */
103 return *(unsigned int *) &p[0];
104}
105
106static unsigned int
107getShortAt (const void *data)
108{
109 char p[2];
110
111 memcpy (p, data, 2); /* ensure alignment! */
112 return *(unsigned short *) &p[0];
113}
114
115
116int
117EXTRACTOR_dvi_extract (const unsigned char *data,
118 size_t size,
119 EXTRACTOR_MetaDataProcessor proc,
120 void *proc_cls,
121 const char *options)
122{
123 unsigned int klen;
124 uint32_t pos;
125 uint32_t opos;
126 unsigned int len;
127 unsigned int pageCount;
128 char pages[16];
129
130 if (size < 40)
131 return 0;
132 if ((data[0] != 247) || (data[1] != 2))
133 return 0; /* cannot be dvi or unsupported version */
134 klen = data[14];
135
136 pos = size - 1;
137 while ((data[pos] == 223) && (pos > 0))
138 pos--;
139 if ((data[pos] != 2) || (pos < 40))
140 return 0;
141 pos--;
142 pos -= 4;
143 /* assert pos at 'post_post tag' */
144 if (data[pos] != 249)
145 return 0;
146 opos = pos;
147 pos = ntohl (getIntAt (&data[opos + 1]));
148 if (pos + 25 > size)
149 return 0;
150 /* assert pos at 'post' command */
151 if (data[pos] != 248)
152 return 0;
153 pageCount = 0;
154 opos = pos;
155 pos = ntohl (getIntAt (&data[opos + 1]));
156 while (1)
157 {
158 if (pos == UINT32_MAX)
159 break;
160 if (pos + 45 > size)
161 return 0;
162 if (data[pos] != 139) /* expect 'bop' */
163 return 0;
164 pageCount++;
165 opos = pos;
166 pos = ntohl (getIntAt (&data[opos + 41]));
167 if (pos == UINT32_MAX)
168 break;
169 if (pos >= opos)
170 return 0; /* invalid! */
171 }
172 /* ok, now we believe it's a dvi... */
173 snprintf (pages, sizeof(pages), "%u", pageCount);
174 if (0 != proc (proc_cls,
175 "dvi",
176 EXTRACTOR_METATYPE_PAGE_COUNT,
177 EXTRACTOR_METAFORMAT_UTF8,
178 "text/plain",
179 pages,
180 strlen (pages) +1))
181 return 1;
182 if (0 != proc (proc_cls,
183 "dvi",
184 EXTRACTOR_METATYPE_MIMETYPE,
185 EXTRACTOR_METAFORMAT_UTF8,
186 "text/plain",
187 "application/x-dvi",
188 strlen ("application/x-dvi") +1))
189 return 1;
190 {
191 char comment[klen + 1];
192
193 comment[klen] = '\0';
194 memcpy (comment, &data[15], klen);
195 if (0 != proc (proc_cls,
196 "dvi",
197 EXTRACTOR_METATYPE_COMMENT,
198 EXTRACTOR_METAFORMAT_UTF8,
199 "text/plain",
200 comment,
201 klen +1))
202 return 1;
203 }
204 /* try to find PDF/ps special */
205 pos = opos;
206 while (pos < size - 100)
207 {
208 switch (data[pos])
209 {
210 case 139: /* begin page 'bop', we typically have to skip that one to
211 find the zzz's */
212 pos += 45; /* skip bop */
213 break;
214 case 239: /* zzz1 */
215 len = data[pos + 1];
216 if (pos + 2 + len < size)
217 if (0 != parseZZZ ((const char *) data, pos + 2, len, proc, proc_cls))
218 return 1;
219 pos += len + 2;
220 break;
221 case 240: /* zzz2 */
222 len = ntohs (getShortAt (&data[pos + 1]));
223 if (pos + 3 + len < size)
224 if (0 != parseZZZ ((const char *) data, pos + 3, len, proc, proc_cls))
225 return 1;
226 pos += len + 3;
227 break;
228 case 241: /* zzz3, who uses that? */
229 len = (ntohs (getShortAt (&data[pos + 1]))) + 65536 * data[pos + 3];
230 if (pos + 4 + len < size)
231 if (0 != parseZZZ ((const char *) data, pos + 4, len, proc, proc_cls))
232 return 1;
233 pos += len + 4;
234 break;
235 case 242: /* zzz4, hurray! */
236 len = ntohl (getIntAt (&data[pos + 1]));
237 if (pos + 1 + len < size)
238 if (0 != parseZZZ ((const char *) data, pos + 5, len, proc, proc_cls))
239 return 1;
240 pos += len + 5;
241 break;
242 default: /* unsupported opcode, abort scan */
243 return 0;
244 }
245 }
246 return 0;
247}