aboutsummaryrefslogtreecommitdiff
path: root/src/plugins/ps_extractor.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/plugins/ps_extractor.c')
-rw-r--r--src/plugins/ps_extractor.c213
1 files changed, 213 insertions, 0 deletions
diff --git a/src/plugins/ps_extractor.c b/src/plugins/ps_extractor.c
new file mode 100644
index 0000000..bd99c94
--- /dev/null
+++ b/src/plugins/ps_extractor.c
@@ -0,0 +1,213 @@
1/*
2 This file is part of libextractor.
3 (C) 2002, 2003, 2009, 2012 Vidyut Samanta and Christian Grothoff
4
5 libextractor is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 3, or (at your
8 option) any later version.
9
10 libextractor is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with libextractor; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
19 */
20/**
21 * @file plugins/ps_extractor.c
22 * @brief plugin to support PostScript files
23 * @author Christian Grothoff
24 */
25#include "platform.h"
26#include "extractor.h"
27
28
29/**
30 * Maximum length of a single line in the PostScript file we're
31 * willing to look at. While the body of the file can have longer
32 * lines, this should be a sane limit for the lines in the header with
33 * the meta data.
34 */
35#define MAX_LINE (1024)
36
37/**
38 * Header of a PostScript file.
39 */
40#define PS_HEADER "%!PS-Adobe"
41
42
43/**
44 * Pair with prefix in the PS header and corresponding LE type.
45 */
46struct Matches
47{
48 /**
49 * PS header prefix.
50 */
51 const char *prefix;
52
53 /**
54 * Corresponding LE type.
55 */
56 enum EXTRACTOR_MetaType type;
57};
58
59
60/**
61 * Map of PS prefixes to LE types.
62 */
63static struct Matches tests[] = {
64 { "%%Title: ", EXTRACTOR_METATYPE_TITLE },
65 { "% Subject: ", EXTRACTOR_METATYPE_SUBJECT },
66 { "%%Author: ", EXTRACTOR_METATYPE_AUTHOR_NAME },
67 { "% From: ", EXTRACTOR_METATYPE_AUTHOR_NAME },
68 { "%%Version: ", EXTRACTOR_METATYPE_REVISION_NUMBER },
69 { "%%Creator: ", EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE },
70 { "%%CreationDate: ", EXTRACTOR_METATYPE_CREATION_DATE },
71 { "% Date: ", EXTRACTOR_METATYPE_UNKNOWN_DATE },
72 { "%%Pages: ", EXTRACTOR_METATYPE_PAGE_COUNT },
73 { "%%Orientation: ", EXTRACTOR_METATYPE_PAGE_ORIENTATION },
74 { "%%DocumentPaperSizes: ", EXTRACTOR_METATYPE_PAPER_SIZE },
75 { "%%PageOrder: ", EXTRACTOR_METATYPE_PAGE_ORDER },
76 { "%%LanguageLevel: ", EXTRACTOR_METATYPE_FORMAT_VERSION },
77 { "%%Magnification: ", EXTRACTOR_METATYPE_MAGNIFICATION },
78
79 /* Also widely used but not supported since they
80 probably make no sense:
81 "%%BoundingBox: ",
82 "%%DocumentNeededResources: ",
83 "%%DocumentSuppliedResources: ",
84 "%%DocumentProcSets: ",
85 "%%DocumentData: ", */
86
87 { NULL, 0 }
88};
89
90
91/**
92 * Read a single ('\n'-terminated) line of input.
93 *
94 * @param ec context for IO
95 * @return NULL on end-of-file (or if next line exceeds limit)
96 */
97static char *
98readline (struct EXTRACTOR_ExtractContext *ec)
99{
100 int64_t pos;
101 ssize_t ret;
102 char *res;
103 void *data;
104 const char *cdata;
105 const char *eol;
106
107 pos = ec->seek (ec->cls, 0, SEEK_CUR);
108 if (0 >= (ret = ec->read (ec->cls, &data, MAX_LINE)))
109 return NULL;
110 cdata = data;
111 if (NULL == (eol = memchr (cdata, '\n', ret)))
112 return NULL; /* no end-of-line found */
113 if (NULL == (res = malloc (eol - cdata + 1)))
114 return NULL;
115 memcpy (res, cdata, eol - cdata);
116 res[eol - cdata] = '\0';
117 ec->seek (ec->cls, pos + eol - cdata + 1, SEEK_SET);
118 return res;
119}
120
121
122/**
123 * Main entry method for the 'application/postscript' extraction plugin.
124 *
125 * @param ec extraction context provided to the plugin
126 */
127void
128EXTRACTOR_ps_extract_method (struct EXTRACTOR_ExtractContext *ec)
129{
130 unsigned int i;
131 char *line;
132 char *next;
133 char *acc;
134 const char *match;
135
136 if (NULL == (line = readline (ec)))
137 return;
138 if ( (strlen (line) < strlen (PS_HEADER)) ||
139 (0 != memcmp (PS_HEADER,
140 line,
141 strlen (PS_HEADER))) )
142 {
143 free (line);
144 return;
145 }
146 free (line);
147 if (0 != ec->proc (ec->cls,
148 "ps",
149 EXTRACTOR_METATYPE_MIMETYPE,
150 EXTRACTOR_METAFORMAT_UTF8,
151 "text/plain",
152 "application/postscript",
153 strlen ("application/postscript") + 1))
154 return;
155
156 line = NULL;
157 next = readline (ec);
158 while ( (NULL != next) &&
159 ('%' == next[0]) )
160 {
161 line = next;
162 next = readline (ec);
163 for (i = 0; NULL != tests[i].prefix; i++)
164 {
165 match = tests[i].prefix;
166 if ( (strlen (line) < strlen (match)) ||
167 (0 != strncmp (line, match, strlen (match))) )
168 continue;
169 /* %%+ continues previous meta-data type... */
170 while ( (NULL != next) &&
171 (0 == strncmp (next, "%%+", strlen ("%%+"))) )
172 {
173 acc = malloc (strlen (line) + strlen (next) - 1);
174 strcpy (acc, line);
175 strcat (acc, " ");
176 strcat (acc, next + 3);
177 free (line);
178 line = acc;
179 next = readline (ec);
180 }
181 if ( (line[strlen (line) - 1] == ')') &&
182 (line[strlen (match)] == '(') )
183 {
184 acc = &line[strlen (match) + 1];
185 acc[strlen (acc) - 1] = '\0'; /* remove ")" */
186 }
187 else
188 {
189 acc = &line[strlen (match)];
190 }
191 while (isspace ((unsigned int) acc[0]))
192 acc++;
193 if ( (strlen (acc) > 0) &&
194 (0 != ec->proc (ec->cls,
195 "ps",
196 tests[i].type,
197 EXTRACTOR_METAFORMAT_UTF8,
198 "text/plain",
199 acc,
200 strlen (acc) + 1)) )
201 {
202 free (line);
203 if (NULL != next)
204 free (next);
205 return;
206 }
207 break;
208 }
209 free (line);
210 }
211}
212
213/* end of ps_extractor.c */