aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/include/extractor.h3
-rw-r--r--src/main/extractor_metatypes.c4
-rw-r--r--src/plugins/Makefile.am13
-rw-r--r--src/plugins/ps_extractor.c192
-rw-r--r--src/plugins/psextractor.c228
5 files changed, 201 insertions, 239 deletions
diff --git a/src/include/extractor.h b/src/include/extractor.h
index 0b7ff05..47eb197 100644
--- a/src/include/extractor.h
+++ b/src/include/extractor.h
@@ -275,7 +275,7 @@ enum EXTRACTOR_MetaType
275 EXTRACTOR_METATYPE_SOURCE_DEVICE = 143, 275 EXTRACTOR_METATYPE_SOURCE_DEVICE = 143,
276 EXTRACTOR_METATYPE_DISCLAIMER = 144, 276 EXTRACTOR_METATYPE_DISCLAIMER = 144,
277 EXTRACTOR_METATYPE_WARNING = 145, 277 EXTRACTOR_METATYPE_WARNING = 145,
278 278 EXTRACTOR_METATYPE_PAGE_ORDER = 146,
279 279
280 /* fixme: used up to here! */ 280 /* fixme: used up to here! */
281 281
@@ -295,7 +295,6 @@ enum EXTRACTOR_MetaType
295 295
296 /* FIXME: transcribe & renumber those below */ 296 /* FIXME: transcribe & renumber those below */
297 EXTRACTOR_METATYPE_USED_FONTS = 37, 297 EXTRACTOR_METATYPE_USED_FONTS = 37,
298 EXTRACTOR_METATYPE_PAGE_ORDER = 38,
299 298
300 299
301 /* numeric metrics */ 300 /* numeric metrics */
diff --git a/src/main/extractor_metatypes.c b/src/main/extractor_metatypes.c
index 26e2ef2..f59172a 100644
--- a/src/main/extractor_metatypes.c
+++ b/src/main/extractor_metatypes.c
@@ -358,8 +358,8 @@ static const struct MetaTypeDescription meta_type_descriptions[] = {
358 /* 145 */ 358 /* 145 */
359 { gettext_noop ("warning"), 359 { gettext_noop ("warning"),
360 gettext_noop ("warning about the nature of the content") }, 360 gettext_noop ("warning about the nature of the content") },
361 { gettext_noop (""), 361 { gettext_noop ("page order"),
362 gettext_noop ("") }, 362 gettext_noop ("order of the pages") },
363 { gettext_noop (""), 363 { gettext_noop (""),
364 gettext_noop ("") }, 364 gettext_noop ("") },
365 { gettext_noop (""), 365 { gettext_noop (""),
diff --git a/src/plugins/Makefile.am b/src/plugins/Makefile.am
index cc44f8b..ee0184d 100644
--- a/src/plugins/Makefile.am
+++ b/src/plugins/Makefile.am
@@ -88,6 +88,7 @@ plugin_LTLIBRARIES = \
88 $(ole2) \ 88 $(ole2) \
89 $(pdf) \ 89 $(pdf) \
90 libextractor_png.la \ 90 libextractor_png.la \
91 libextractor_ps.la \
91 libextractor_real.la \ 92 libextractor_real.la \
92 $(rpm) \ 93 $(rpm) \
93 libextractor_tar.la \ 94 libextractor_tar.la \
@@ -243,6 +244,11 @@ libextractor_png_la_LIBADD = \
243 $(top_builddir)/src/common/libextractor_common.la \ 244 $(top_builddir)/src/common/libextractor_common.la \
244 -lz 245 -lz
245 246
247libextractor_ps_la_SOURCES = \
248 ps_extractor.c
249libextractor_ps_la_LDFLAGS = \
250 $(PLUGINFLAGS)
251
246libextractor_real_la_SOURCES = \ 252libextractor_real_la_SOURCES = \
247 real_extractor.c 253 real_extractor.c
248libextractor_real_la_LDFLAGS = \ 254libextractor_real_la_LDFLAGS = \
@@ -297,7 +303,6 @@ OLD_LIBS = \
297 $(extrampeg) \ 303 $(extrampeg) \
298 libextractor_nsf.la \ 304 libextractor_nsf.la \
299 libextractor_nsfe.la \ 305 libextractor_nsfe.la \
300 libextractor_ps.la \
301 $(extraqt) \ 306 $(extraqt) \
302 libextractor_riff.la \ 307 libextractor_riff.la \
303 libextractor_s3m.la \ 308 libextractor_s3m.la \
@@ -317,12 +322,6 @@ libextractor_qt_la_LIBADD = \
317 -lz 322 -lz
318endif 323endif
319 324
320libextractor_ps_la_SOURCES = \
321 psextractor.c
322libextractor_ps_la_LDFLAGS = \
323 $(PLUGINFLAGS)
324libextractor_ps_la_LIBADD = \
325 $(top_builddir)/src/main/libextractor.la
326 325
327libextractor_id3v2_la_SOURCES = \ 326libextractor_id3v2_la_SOURCES = \
328 id3v2extractor.c 327 id3v2extractor.c
diff --git a/src/plugins/ps_extractor.c b/src/plugins/ps_extractor.c
new file mode 100644
index 0000000..8a5543f
--- /dev/null
+++ b/src/plugins/ps_extractor.c
@@ -0,0 +1,192 @@
1/*
2 This file is part of libextractor.
3 (C) 2002, 2003, 2009 Vidyut Samanta and Christian Grothoff
4
5 libextractor is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 2, or (at your
8 option) any later version.
9
10 libextractor is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with libextractor; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
19 */
20
21#include "platform.h"
22#include "extractor.h"
23
24
25static char *
26readline (const char *data, size_t size, size_t pos)
27{
28 size_t end;
29 char *res;
30
31 while ((pos < size) &&
32 ((data[pos] == (char) 0x0d) || (data[pos] == (char) 0x0a)))
33 pos++;
34
35 if (pos >= size)
36 return NULL; /* end of file */
37 end = pos;
38 while ((end < size) &&
39 (data[end] != (char) 0x0d) && (data[end] != (char) 0x0a))
40 end++;
41 res = malloc (end - pos + 1);
42 memcpy (res, &data[pos], end - pos);
43 res[end - pos] = '\0';
44
45 return res;
46}
47
48
49static int
50testmeta (char *line,
51 const char *match,
52 enum EXTRACTOR_MetaType type,
53 EXTRACTOR_MetaDataProcessor proc,
54 void *proc_cls)
55{
56 char *key;
57
58 if ( (strncmp (line, match, strlen (match)) == 0) &&
59 (strlen (line) > strlen (match)) )
60 {
61 if ((line[strlen (line) - 1] == ')') && (line[strlen (match)] == '('))
62 {
63 key = &line[strlen (match) + 1];
64 key[strlen (key) - 1] = '\0'; /* remove ")" */
65 }
66 else
67 {
68 key = &line[strlen (match)];
69 }
70 if (0 != proc (proc_cls,
71 "ps",
72 type,
73 EXTRACTOR_METAFORMAT_UTF8,
74 "text/plain",
75 key,
76 strlen (key)+1))
77 return 1;
78 }
79 return 0;
80}
81
82typedef struct
83{
84 const char *prefix;
85 enum EXTRACTOR_MetaType type;
86} Matches;
87
88static Matches tests[] = {
89 {"%%Title: ", EXTRACTOR_METATYPE_TITLE},
90 {"%%Author: ", EXTRACTOR_METATYPE_AUTHOR_NAME},
91 {"%%Version: ", EXTRACTOR_METATYPE_REVISION_NUMBER},
92 {"%%Creator: ", EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE},
93 {"%%CreationDate: ", EXTRACTOR_METATYPE_CREATION_DATE},
94 {"%%Pages: ", EXTRACTOR_METATYPE_PAGE_COUNT},
95 {"%%Orientation: ", EXTRACTOR_METATYPE_PAGE_ORIENTATION},
96 {"%%DocumentPaperSizes: ", EXTRACTOR_METATYPE_PAPER_SIZE},
97 {"%%PageOrder: ", EXTRACTOR_METATYPE_PAGE_ORDER},
98 {"%%LanguageLevel: ", EXTRACTOR_METATYPE_FORMAT_VERSION},
99 {"%%Magnification: ", EXTRACTOR_METATYPE_MAGNIFICATION},
100
101 /* Also widely used but not supported since they
102 probably make no sense:
103 "%%BoundingBox: ",
104 "%%DocumentNeededResources: ",
105 "%%DocumentSuppliedResources: ",
106 "%%DocumentProcSets: ",
107 "%%DocumentData: ", */
108
109 {NULL, 0}
110};
111
112#define PS_HEADER "%!PS-Adobe"
113
114/* mimetype = application/postscript */
115int
116EXTRACTOR_ps_extract (const char *data,
117 size_t size,
118 EXTRACTOR_MetaDataProcessor proc,
119 void *proc_cls,
120 const char *options)
121{
122 size_t pos;
123 char *line;
124 int i;
125 int lastLine;
126 int ret;
127
128 pos = strlen (PS_HEADER);
129 if ( (size < pos) ||
130 (0 != strncmp (PS_HEADER,
131 data,
132 pos)) )
133 return 0;
134 ret = 0;
135
136 if (0 != proc (proc_cls,
137 "ps",
138 EXTRACTOR_METATYPE_MIMETYPE,
139 EXTRACTOR_METAFORMAT_UTF8,
140 "text/plain",
141 "application/postscript",
142 strlen ("application/postscript")+1))
143 return 1;
144 /* skip rest of first line */
145 while ((pos < size) && (data[pos] != '\n'))
146 pos++;
147
148 lastLine = -1;
149 line = NULL;
150 /* while Windows-PostScript does not seem to (always?) put
151 "%%EndComments", this should allow us to not read through most of
152 the file for all the sane applications... For Windows-generated
153 PS files, we will bail out at the end of the file. */
154 while (0 != strncmp ("%%EndComments", line, strlen ("%%EndComments")))
155 {
156 free (line);
157 line = readline (data, size, pos);
158 if (line == NULL)
159 break;
160 i = 0;
161 while (tests[i].prefix != NULL)
162 {
163 ret = testmeta (line, tests[i].prefix, tests[i].type, proc, proc_cls);
164 if (ret != 0)
165 break;
166 i++;
167 }
168 if (ret != 0)
169 break;
170
171 /* %%+ continues previous meta-data type... */
172 if ( (lastLine != -1) && (0 == strncmp (line, "%%+ ", strlen ("%%+ "))))
173 {
174 ret = testmeta (line, "%%+ ", tests[lastLine].type, proc, proc_cls);
175 }
176 else
177 {
178 /* update "previous" type */
179 if (tests[i].prefix == NULL)
180 lastLine = -1;
181 else
182 lastLine = i;
183 }
184 if (pos + strlen (line) + 1 <= pos)
185 break; /* overflow */
186 pos += strlen (line) + 1; /* skip newline, too; guarantee progress! */
187 }
188 free (line);
189 return ret;
190}
191
192/* end of ps_extractor.c */
diff --git a/src/plugins/psextractor.c b/src/plugins/psextractor.c
deleted file mode 100644
index 2bb47ce..0000000
--- a/src/plugins/psextractor.c
+++ /dev/null
@@ -1,228 +0,0 @@
1/*
2 This file is part of libextractor.
3 (C) 2002, 2003 Vidyut Samanta and Christian Grothoff
4
5 libextractor is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 2, or (at your
8 option) any later version.
9
10 libextractor is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with libextractor; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
19 **/
20
21#include "platform.h"
22#include "extractor.h"
23
24static struct EXTRACTOR_Keywords *
25addKeyword (EXTRACTOR_KeywordType type,
26 char *keyword, struct EXTRACTOR_Keywords *next)
27{
28 EXTRACTOR_KeywordList *result;
29
30 if (keyword == NULL)
31 return next;
32 result = malloc (sizeof (EXTRACTOR_KeywordList));
33 result->next = next;
34 result->keyword = strdup (keyword);
35 result->keywordType = type;
36 return result;
37}
38
39static char *
40readline (char *data, size_t size, size_t pos)
41{
42 size_t end;
43 char *res;
44
45 while ((pos < size) &&
46 ((data[pos] == (char) 0x0d) || (data[pos] == (char) 0x0a)))
47 pos++;
48
49 if (pos >= size)
50 return NULL; /* end of file */
51 end = pos;
52 while ((end < size) &&
53 (data[end] != (char) 0x0d) && (data[end] != (char) 0x0a))
54 end++;
55 res = malloc (end - pos + 1);
56 memcpy (res, &data[pos], end - pos);
57 res[end - pos] = '\0';
58
59 return res;
60}
61
62static struct EXTRACTOR_Keywords *
63testmeta (char *line,
64 const char *match,
65 EXTRACTOR_KeywordType type, struct EXTRACTOR_Keywords *prev)
66{
67 if ((strncmp (line, match, strlen (match)) == 0) &&
68 (strlen (line) > strlen (match)))
69 {
70 char *key;
71
72 if ((line[strlen (line) - 1] == ')') && (line[strlen (match)] == '('))
73 {
74 key = &line[strlen (match) + 1];
75 key[strlen (key) - 1] = '\0'; /* remove ")" */
76 }
77 else
78 {
79 key = &line[strlen (match)];
80 }
81 prev = addKeyword (type, key, prev);
82 }
83 return prev;
84}
85
86typedef struct
87{
88 char *prefix;
89 EXTRACTOR_KeywordType type;
90} Matches;
91
92static Matches tests[] = {
93 {"%%Title: ", EXTRACTOR_TITLE},
94 {"%%Version: ", EXTRACTOR_VERSIONNUMBER},
95 {"%%Creator: ", EXTRACTOR_CREATOR},
96 {"%%CreationDate: ", EXTRACTOR_CREATION_DATE},
97 {"%%Pages: ", EXTRACTOR_PAGE_COUNT},
98 {"%%Orientation: ", EXTRACTOR_UNKNOWN},
99 {"%%DocumentPaperSizes: ", EXTRACTOR_UNKNOWN},
100 {"%%DocumentFonts: ", EXTRACTOR_UNKNOWN},
101 {"%%PageOrder: ", EXTRACTOR_UNKNOWN},
102 {"%%For: ", EXTRACTOR_UNKNOWN},
103 {"%%Magnification: ", EXTRACTOR_UNKNOWN},
104
105 /* Also widely used but not supported since they
106 probably make no sense:
107 "%%BoundingBox: ",
108 "%%DocumentNeededResources: ",
109 "%%DocumentSuppliedResources: ",
110 "%%DocumentProcSets: ",
111 "%%DocumentData: ", */
112
113 {NULL, 0},
114};
115
116/* which mime-types should not be subjected to
117 the PostScript extractor (no use trying) */
118static char *blacklist[] = {
119 "image/jpeg",
120 "image/gif",
121 "image/png",
122 "image/x-png",
123 "audio/real",
124 "audio/mpeg",
125 "application/x-gzip",
126 "application/x-dpkg",
127 "application/bz2",
128 "application/x-rpm",
129 "application/x-rar",
130 "application/x-zip",
131 "application/x-arj",
132 "application/x-compress",
133 "application/x-tar",
134 "application/x-lha",
135 "application/x-gtar",
136 "application/x-dpkg",
137 "application/ogg",
138 "video/real",
139 "video/asf",
140 "video/quicktime",
141 NULL,
142};
143
144/* mimetype = application/postscript */
145struct EXTRACTOR_Keywords *
146libextractor_ps_extract (const char *filename,
147 char *data,
148 size_t size, struct EXTRACTOR_Keywords *prev)
149{
150 size_t pos;
151 char *psheader = "%!PS-Adobe";
152 char *line;
153 int i;
154 int lastLine;
155 const char *mime;
156
157 /* if the mime-type of the file is blacklisted, don't
158 run the printable extactor! */
159 mime = EXTRACTOR_extractLast (EXTRACTOR_MIMETYPE, prev);
160 if (mime != NULL)
161 {
162 int j;
163 j = 0;
164 while (blacklist[j] != NULL)
165 {
166 if (0 == strcmp (blacklist[j], mime))
167 return prev;
168 j++;
169 }
170 }
171
172
173 pos = 0;
174 while ((pos < size) &&
175 (pos < strlen (psheader)) && (data[pos] == psheader[pos]))
176 pos++;
177 if (pos != strlen (psheader))
178 {
179 return prev; /* no ps */
180 }
181
182 prev = addKeyword (EXTRACTOR_MIMETYPE, "application/postscript", prev);
183
184 /* skip rest of first line */
185 while ((pos < size) && (data[pos] != '\n'))
186 pos++;
187
188 lastLine = -1;
189 line = strdup (psheader);
190
191 /* while Windows-PostScript does not seem to (always?) put
192 "%%EndComments", this should allow us to not read through most of
193 the file for all the sane applications... For Windows-generated
194 PS files, we will bail out at the end of the file. */
195 while (0 != strncmp ("%%EndComments", line, strlen ("%%EndComments")))
196 {
197 free (line);
198 line = readline (data, size, pos);
199 if (line == NULL)
200 break;
201 i = 0;
202 while (tests[i].prefix != NULL)
203 {
204 prev = testmeta (line, tests[i].prefix, tests[i].type, prev);
205 i++;
206 }
207
208 /* %%+ continues previous meta-data type... */
209 if ((lastLine != -1) && (0 == strncmp (line, "%%+ ", strlen ("%%+ "))))
210 {
211 prev = testmeta (line, "%%+ ", tests[lastLine].type, prev);
212 }
213 else
214 {
215 /* update "previous" type */
216 if (tests[i].prefix == NULL)
217 lastLine = -1;
218 else
219 lastLine = i;
220 }
221 pos += strlen (line) + 1; /* skip newline, too; guarantee progress! */
222 }
223 free (line);
224
225 return prev;
226}
227
228/* end of psextractor.c */