aboutsummaryrefslogtreecommitdiff
path: root/src/plugins/ps_extractor.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/plugins/ps_extractor.c')
-rw-r--r--src/plugins/ps_extractor.c192
1 files changed, 192 insertions, 0 deletions
diff --git a/src/plugins/ps_extractor.c b/src/plugins/ps_extractor.c
new file mode 100644
index 0000000..8a5543f
--- /dev/null
+++ b/src/plugins/ps_extractor.c
@@ -0,0 +1,192 @@
1/*
2 This file is part of libextractor.
3 (C) 2002, 2003, 2009 Vidyut Samanta and Christian Grothoff
4
5 libextractor is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 2, or (at your
8 option) any later version.
9
10 libextractor is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with libextractor; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
19 */
20
21#include "platform.h"
22#include "extractor.h"
23
24
25static char *
26readline (const char *data, size_t size, size_t pos)
27{
28 size_t end;
29 char *res;
30
31 while ((pos < size) &&
32 ((data[pos] == (char) 0x0d) || (data[pos] == (char) 0x0a)))
33 pos++;
34
35 if (pos >= size)
36 return NULL; /* end of file */
37 end = pos;
38 while ((end < size) &&
39 (data[end] != (char) 0x0d) && (data[end] != (char) 0x0a))
40 end++;
41 res = malloc (end - pos + 1);
42 memcpy (res, &data[pos], end - pos);
43 res[end - pos] = '\0';
44
45 return res;
46}
47
48
49static int
50testmeta (char *line,
51 const char *match,
52 enum EXTRACTOR_MetaType type,
53 EXTRACTOR_MetaDataProcessor proc,
54 void *proc_cls)
55{
56 char *key;
57
58 if ( (strncmp (line, match, strlen (match)) == 0) &&
59 (strlen (line) > strlen (match)) )
60 {
61 if ((line[strlen (line) - 1] == ')') && (line[strlen (match)] == '('))
62 {
63 key = &line[strlen (match) + 1];
64 key[strlen (key) - 1] = '\0'; /* remove ")" */
65 }
66 else
67 {
68 key = &line[strlen (match)];
69 }
70 if (0 != proc (proc_cls,
71 "ps",
72 type,
73 EXTRACTOR_METAFORMAT_UTF8,
74 "text/plain",
75 key,
76 strlen (key)+1))
77 return 1;
78 }
79 return 0;
80}
81
82typedef struct
83{
84 const char *prefix;
85 enum EXTRACTOR_MetaType type;
86} Matches;
87
88static Matches tests[] = {
89 {"%%Title: ", EXTRACTOR_METATYPE_TITLE},
90 {"%%Author: ", EXTRACTOR_METATYPE_AUTHOR_NAME},
91 {"%%Version: ", EXTRACTOR_METATYPE_REVISION_NUMBER},
92 {"%%Creator: ", EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE},
93 {"%%CreationDate: ", EXTRACTOR_METATYPE_CREATION_DATE},
94 {"%%Pages: ", EXTRACTOR_METATYPE_PAGE_COUNT},
95 {"%%Orientation: ", EXTRACTOR_METATYPE_PAGE_ORIENTATION},
96 {"%%DocumentPaperSizes: ", EXTRACTOR_METATYPE_PAPER_SIZE},
97 {"%%PageOrder: ", EXTRACTOR_METATYPE_PAGE_ORDER},
98 {"%%LanguageLevel: ", EXTRACTOR_METATYPE_FORMAT_VERSION},
99 {"%%Magnification: ", EXTRACTOR_METATYPE_MAGNIFICATION},
100
101 /* Also widely used but not supported since they
102 probably make no sense:
103 "%%BoundingBox: ",
104 "%%DocumentNeededResources: ",
105 "%%DocumentSuppliedResources: ",
106 "%%DocumentProcSets: ",
107 "%%DocumentData: ", */
108
109 {NULL, 0}
110};
111
112#define PS_HEADER "%!PS-Adobe"
113
114/* mimetype = application/postscript */
115int
116EXTRACTOR_ps_extract (const char *data,
117 size_t size,
118 EXTRACTOR_MetaDataProcessor proc,
119 void *proc_cls,
120 const char *options)
121{
122 size_t pos;
123 char *line;
124 int i;
125 int lastLine;
126 int ret;
127
128 pos = strlen (PS_HEADER);
129 if ( (size < pos) ||
130 (0 != strncmp (PS_HEADER,
131 data,
132 pos)) )
133 return 0;
134 ret = 0;
135
136 if (0 != proc (proc_cls,
137 "ps",
138 EXTRACTOR_METATYPE_MIMETYPE,
139 EXTRACTOR_METAFORMAT_UTF8,
140 "text/plain",
141 "application/postscript",
142 strlen ("application/postscript")+1))
143 return 1;
144 /* skip rest of first line */
145 while ((pos < size) && (data[pos] != '\n'))
146 pos++;
147
148 lastLine = -1;
149 line = NULL;
150 /* while Windows-PostScript does not seem to (always?) put
151 "%%EndComments", this should allow us to not read through most of
152 the file for all the sane applications... For Windows-generated
153 PS files, we will bail out at the end of the file. */
154 while (0 != strncmp ("%%EndComments", line, strlen ("%%EndComments")))
155 {
156 free (line);
157 line = readline (data, size, pos);
158 if (line == NULL)
159 break;
160 i = 0;
161 while (tests[i].prefix != NULL)
162 {
163 ret = testmeta (line, tests[i].prefix, tests[i].type, proc, proc_cls);
164 if (ret != 0)
165 break;
166 i++;
167 }
168 if (ret != 0)
169 break;
170
171 /* %%+ continues previous meta-data type... */
172 if ( (lastLine != -1) && (0 == strncmp (line, "%%+ ", strlen ("%%+ "))))
173 {
174 ret = testmeta (line, "%%+ ", tests[lastLine].type, proc, proc_cls);
175 }
176 else
177 {
178 /* update "previous" type */
179 if (tests[i].prefix == NULL)
180 lastLine = -1;
181 else
182 lastLine = i;
183 }
184 if (pos + strlen (line) + 1 <= pos)
185 break; /* overflow */
186 pos += strlen (line) + 1; /* skip newline, too; guarantee progress! */
187 }
188 free (line);
189 return ret;
190}
191
192/* end of ps_extractor.c */