diff options
Diffstat (limited to 'src/plugins/old/dvi_extractor.c')
-rw-r--r-- | src/plugins/old/dvi_extractor.c | 247 |
1 files changed, 0 insertions, 247 deletions
diff --git a/src/plugins/old/dvi_extractor.c b/src/plugins/old/dvi_extractor.c deleted file mode 100644 index e4cfa3a..0000000 --- a/src/plugins/old/dvi_extractor.c +++ /dev/null | |||
@@ -1,247 +0,0 @@ | |||
1 | /* | ||
2 | This file is part of libextractor. | ||
3 | (C) 2002, 2003, 2004 Vidyut Samanta and Christian Grothoff | ||
4 | |||
5 | libextractor is free software; you can redistribute it and/or modify | ||
6 | it under the terms of the GNU General Public License as published | ||
7 | by the Free Software Foundation; either version 2, or (at your | ||
8 | option) any later version. | ||
9 | |||
10 | libextractor is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU General Public License | ||
16 | along with libextractor; see the file COPYING. If not, write to the | ||
17 | Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
18 | Boston, MA 02111-1307, USA. | ||
19 | */ | ||
20 | |||
21 | #include "platform.h" | ||
22 | #include "extractor.h" | ||
23 | |||
24 | typedef struct | ||
25 | { | ||
26 | char *text; | ||
27 | enum EXTRACTOR_MetaType type; | ||
28 | } Matches; | ||
29 | |||
30 | static Matches tmap[] = { | ||
31 | {"/Title (", EXTRACTOR_METATYPE_TITLE}, | ||
32 | {"/Subject (", EXTRACTOR_METATYPE_SUBJECT}, | ||
33 | {"/Author (", EXTRACTOR_METATYPE_AUTHOR_NAME}, | ||
34 | {"/Keywords (", EXTRACTOR_METATYPE_KEYWORDS}, | ||
35 | {"/Creator (", EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE }, | ||
36 | {"/Producer (", EXTRACTOR_METATYPE_PRODUCED_BY_SOFTWARE}, | ||
37 | {NULL, 0}, | ||
38 | }; | ||
39 | |||
40 | static int | ||
41 | parseZZZ (const char *data, | ||
42 | size_t pos, size_t len, | ||
43 | EXTRACTOR_MetaDataProcessor proc, | ||
44 | void *proc_cls) | ||
45 | { | ||
46 | size_t slen; | ||
47 | size_t end; | ||
48 | int i; | ||
49 | |||
50 | end = pos + len; | ||
51 | slen = strlen ("ps:SDict begin ["); | ||
52 | if (len <= slen) | ||
53 | return 0; | ||
54 | if (0 != strncmp ("ps:SDict begin [ ", &data[pos], slen)) | ||
55 | return 0; | ||
56 | pos += slen; | ||
57 | while (pos < end) | ||
58 | { | ||
59 | i = 0; | ||
60 | while (tmap[i].text != NULL) | ||
61 | { | ||
62 | slen = strlen (tmap[i].text); | ||
63 | if (pos + slen < end) | ||
64 | { | ||
65 | if (0 == strncmp (&data[pos], tmap[i].text, slen)) | ||
66 | { | ||
67 | pos += slen; | ||
68 | slen = pos; | ||
69 | while ((slen < end) && (data[slen] != ')')) | ||
70 | slen++; | ||
71 | slen = slen - pos; | ||
72 | { | ||
73 | char value[slen + 1]; | ||
74 | value[slen] = '\0'; | ||
75 | memcpy (value, &data[pos], slen); | ||
76 | if (0 != proc (proc_cls, | ||
77 | "dvi", | ||
78 | tmap[i].type, | ||
79 | EXTRACTOR_METAFORMAT_C_STRING, | ||
80 | "text/plain", | ||
81 | value, | ||
82 | slen +1)) | ||
83 | { | ||
84 | return 1; | ||
85 | } | ||
86 | } | ||
87 | pos += slen + 1; | ||
88 | } | ||
89 | } | ||
90 | i++; | ||
91 | } | ||
92 | pos++; | ||
93 | } | ||
94 | return 0; | ||
95 | } | ||
96 | |||
97 | static unsigned int | ||
98 | getIntAt (const void *data) | ||
99 | { | ||
100 | char p[4]; | ||
101 | |||
102 | memcpy (p, data, 4); /* ensure alignment! */ | ||
103 | return *(unsigned int *) &p[0]; | ||
104 | } | ||
105 | |||
106 | static unsigned int | ||
107 | getShortAt (const void *data) | ||
108 | { | ||
109 | char p[2]; | ||
110 | |||
111 | memcpy (p, data, 2); /* ensure alignment! */ | ||
112 | return *(unsigned short *) &p[0]; | ||
113 | } | ||
114 | |||
115 | |||
116 | int | ||
117 | EXTRACTOR_dvi_extract (const unsigned char *data, | ||
118 | size_t size, | ||
119 | EXTRACTOR_MetaDataProcessor proc, | ||
120 | void *proc_cls, | ||
121 | const char *options) | ||
122 | { | ||
123 | unsigned int klen; | ||
124 | uint32_t pos; | ||
125 | uint32_t opos; | ||
126 | unsigned int len; | ||
127 | unsigned int pageCount; | ||
128 | char pages[16]; | ||
129 | |||
130 | if (size < 40) | ||
131 | return 0; | ||
132 | if ((data[0] != 247) || (data[1] != 2)) | ||
133 | return 0; /* cannot be dvi or unsupported version */ | ||
134 | klen = data[14]; | ||
135 | |||
136 | pos = size - 1; | ||
137 | while ((data[pos] == 223) && (pos > 0)) | ||
138 | pos--; | ||
139 | if ((data[pos] != 2) || (pos < 40)) | ||
140 | return 0; | ||
141 | pos--; | ||
142 | pos -= 4; | ||
143 | /* assert pos at 'post_post tag' */ | ||
144 | if (data[pos] != 249) | ||
145 | return 0; | ||
146 | opos = pos; | ||
147 | pos = ntohl (getIntAt (&data[opos + 1])); | ||
148 | if (pos + 25 > size) | ||
149 | return 0; | ||
150 | /* assert pos at 'post' command */ | ||
151 | if (data[pos] != 248) | ||
152 | return 0; | ||
153 | pageCount = 0; | ||
154 | opos = pos; | ||
155 | pos = ntohl (getIntAt (&data[opos + 1])); | ||
156 | while (1) | ||
157 | { | ||
158 | if (pos == UINT32_MAX) | ||
159 | break; | ||
160 | if (pos + 45 > size) | ||
161 | return 0; | ||
162 | if (data[pos] != 139) /* expect 'bop' */ | ||
163 | return 0; | ||
164 | pageCount++; | ||
165 | opos = pos; | ||
166 | pos = ntohl (getIntAt (&data[opos + 41])); | ||
167 | if (pos == UINT32_MAX) | ||
168 | break; | ||
169 | if (pos >= opos) | ||
170 | return 0; /* invalid! */ | ||
171 | } | ||
172 | /* ok, now we believe it's a dvi... */ | ||
173 | snprintf (pages, sizeof(pages), "%u", pageCount); | ||
174 | if (0 != proc (proc_cls, | ||
175 | "dvi", | ||
176 | EXTRACTOR_METATYPE_PAGE_COUNT, | ||
177 | EXTRACTOR_METAFORMAT_UTF8, | ||
178 | "text/plain", | ||
179 | pages, | ||
180 | strlen (pages) +1)) | ||
181 | return 1; | ||
182 | if (0 != proc (proc_cls, | ||
183 | "dvi", | ||
184 | EXTRACTOR_METATYPE_MIMETYPE, | ||
185 | EXTRACTOR_METAFORMAT_UTF8, | ||
186 | "text/plain", | ||
187 | "application/x-dvi", | ||
188 | strlen ("application/x-dvi") +1)) | ||
189 | return 1; | ||
190 | { | ||
191 | char comment[klen + 1]; | ||
192 | |||
193 | comment[klen] = '\0'; | ||
194 | memcpy (comment, &data[15], klen); | ||
195 | if (0 != proc (proc_cls, | ||
196 | "dvi", | ||
197 | EXTRACTOR_METATYPE_COMMENT, | ||
198 | EXTRACTOR_METAFORMAT_UTF8, | ||
199 | "text/plain", | ||
200 | comment, | ||
201 | klen +1)) | ||
202 | return 1; | ||
203 | } | ||
204 | /* try to find PDF/ps special */ | ||
205 | pos = opos; | ||
206 | while (pos < size - 100) | ||
207 | { | ||
208 | switch (data[pos]) | ||
209 | { | ||
210 | case 139: /* begin page 'bop', we typically have to skip that one to | ||
211 | find the zzz's */ | ||
212 | pos += 45; /* skip bop */ | ||
213 | break; | ||
214 | case 239: /* zzz1 */ | ||
215 | len = data[pos + 1]; | ||
216 | if (pos + 2 + len < size) | ||
217 | if (0 != parseZZZ ((const char *) data, pos + 2, len, proc, proc_cls)) | ||
218 | return 1; | ||
219 | pos += len + 2; | ||
220 | break; | ||
221 | case 240: /* zzz2 */ | ||
222 | len = ntohs (getShortAt (&data[pos + 1])); | ||
223 | if (pos + 3 + len < size) | ||
224 | if (0 != parseZZZ ((const char *) data, pos + 3, len, proc, proc_cls)) | ||
225 | return 1; | ||
226 | pos += len + 3; | ||
227 | break; | ||
228 | case 241: /* zzz3, who uses that? */ | ||
229 | len = (ntohs (getShortAt (&data[pos + 1]))) + 65536 * data[pos + 3]; | ||
230 | if (pos + 4 + len < size) | ||
231 | if (0 != parseZZZ ((const char *) data, pos + 4, len, proc, proc_cls)) | ||
232 | return 1; | ||
233 | pos += len + 4; | ||
234 | break; | ||
235 | case 242: /* zzz4, hurray! */ | ||
236 | len = ntohl (getIntAt (&data[pos + 1])); | ||
237 | if (pos + 1 + len < size) | ||
238 | if (0 != parseZZZ ((const char *) data, pos + 5, len, proc, proc_cls)) | ||
239 | return 1; | ||
240 | pos += len + 5; | ||
241 | break; | ||
242 | default: /* unsupported opcode, abort scan */ | ||
243 | return 0; | ||
244 | } | ||
245 | } | ||
246 | return 0; | ||
247 | } | ||