1 files changed, 0 insertions, 247 deletions
diff --git a/src/plugins/old/dvi_extractor.c b/src/plugins/old/dvi_extractor.c
deleted file mode 100644
index e4cfa3a..0000000
--- a/src/plugins/old/dvi_extractor.c
+++ /dev/null
@@ -1,247 +0,0 @@
-/*
-     This file is part of libextractor.
-     (C) 2002, 2003, 2004 Vidyut Samanta and Christian Grothoff
-     libextractor is free software; you can redistribute it and/or modify
-     it under the terms of the GNU General Public License as published
-     by the Free Software Foundation; either version 2, or (at your
-     option) any later version.
-     libextractor is distributed in the hope that it will be useful, but
-     WITHOUT ANY WARRANTY; without even the implied warranty of
-     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-     General Public License for more details.
-     You should have received a copy of the GNU General Public License
-     along with libextractor; see the file COPYING.  If not, write to the
-     Free Software Foundation, Inc., 59 Temple Place - Suite 330,
-     Boston, MA 02111-1307, USA.
- */
-#include "platform.h"
-#include "extractor.h"
-typedef struct
-{
-  char *text;
-  enum EXTRACTOR_MetaType type;
-} Matches;
-static Matches tmap[] = {
-  {"/Title (",    EXTRACTOR_METATYPE_TITLE},
-  {"/Subject (",  EXTRACTOR_METATYPE_SUBJECT},
-  {"/Author (",   EXTRACTOR_METATYPE_AUTHOR_NAME},
-  {"/Keywords (", EXTRACTOR_METATYPE_KEYWORDS},
-  {"/Creator (",  EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE },
-  {"/Producer (", EXTRACTOR_METATYPE_PRODUCED_BY_SOFTWARE},
-  {NULL, 0},
-};
-static int
-parseZZZ (const char *data,
-          size_t pos, size_t len,
-          EXTRACTOR_MetaDataProcessor proc,
-          void *proc_cls)
-{
-  size_t slen;
-  size_t end;
-  int i;
-  end = pos + len;
-  slen = strlen ("ps:SDict begin [");
-  if (len <= slen)
-    return 0;
-  if (0 != strncmp ("ps:SDict begin [ ", &data[pos], slen))
-    return 0;
-  pos += slen;
-  while (pos < end)
-    {
-      i = 0;
-      while (tmap[i].text != NULL)
-        {
-          slen = strlen (tmap[i].text);
-          if (pos + slen < end)
-            {
-              if (0 == strncmp (&data[pos], tmap[i].text, slen))
-                {
-                  pos += slen;
-                  slen = pos;
-                  while ((slen < end) && (data[slen] != ')'))
-                    slen++;
-                  slen = slen - pos;
-                  {
-                    char value[slen + 1];
-                    value[slen] = '\0';
-                    memcpy (value, &data[pos], slen);
-                    if (0 != proc (proc_cls, 
-                                   "dvi",
-                                   tmap[i].type,
-                                   EXTRACTOR_METAFORMAT_C_STRING,
-                                   "text/plain",
-                                   value,
-                                   slen +1))
-                      {
-                        return 1;
-                      }
-                  }
-                  pos += slen + 1;
-                }
-            }
-          i++;
-        }
-      pos++;
-    }
-  return 0;
-}
-static unsigned int
-getIntAt (const void *data)
-{
-  char p[4];
-  memcpy (p, data, 4);          /* ensure alignment! */
-  return *(unsigned int *) &p[0];
-}
-static unsigned int
-getShortAt (const void *data)
-{
-  char p[2];
-  memcpy (p, data, 2);          /* ensure alignment! */
-  return *(unsigned short *) &p[0];
-}
-int 
-EXTRACTOR_dvi_extract (const unsigned char *data,
-                       size_t size,
-                       EXTRACTOR_MetaDataProcessor proc,
-                       void *proc_cls,
-                       const char *options)
-{
-  unsigned int klen;
-  uint32_t pos;
-  uint32_t opos;
-  unsigned int len;
-  unsigned int pageCount;
-  char pages[16];
-  if (size < 40)
-    return 0;
-  if ((data[0] != 247) || (data[1] != 2))
-    return 0;                /* cannot be dvi or unsupported version */
-  klen = data[14];
-  pos = size - 1;
-  while ((data[pos] == 223) && (pos > 0))
-    pos--;
-  if ((data[pos] != 2) || (pos < 40))
-    return 0;
-  pos--;
-  pos -= 4;
-  /* assert pos at 'post_post tag' */
-  if (data[pos] != 249)
-    return 0;
-  opos = pos;
-  pos = ntohl (getIntAt (&data[opos + 1]));
-  if (pos + 25 > size)
-    return 0;
-  /* assert pos at 'post' command */
-  if (data[pos] != 248)
-    return 0;
-  pageCount = 0;
-  opos = pos;
-  pos = ntohl (getIntAt (&data[opos + 1]));
-  while (1)
-    {
-      if (pos == UINT32_MAX)
-        break;
-      if (pos + 45 > size)
-        return 0;
-      if (data[pos] != 139)     /* expect 'bop' */
-        return 0;
-      pageCount++;
-      opos = pos;
-      pos = ntohl (getIntAt (&data[opos + 41]));
-      if (pos == UINT32_MAX)
-        break;
-      if (pos >= opos)
-        return 0;            /* invalid! */
-    }
-  /* ok, now we believe it's a dvi... */
-  snprintf (pages, sizeof(pages), "%u", pageCount);
-  if (0 != proc (proc_cls, 
-                 "dvi",
-                 EXTRACTOR_METATYPE_PAGE_COUNT,
-                 EXTRACTOR_METAFORMAT_UTF8,
-                 "text/plain",
-                 pages,
-                 strlen (pages) +1))
-    return 1;
-  if (0 != proc (proc_cls, 
-                 "dvi",
-                 EXTRACTOR_METATYPE_MIMETYPE,
-                 EXTRACTOR_METAFORMAT_UTF8,
-                 "text/plain",
-                 "application/x-dvi",
-                 strlen ("application/x-dvi") +1))
-    return 1;
-  {
-    char comment[klen + 1];
-    
-    comment[klen] = '\0';
-    memcpy (comment, &data[15], klen);
-    if (0 != proc (proc_cls, 
-                   "dvi",
-                   EXTRACTOR_METATYPE_COMMENT,
-                   EXTRACTOR_METAFORMAT_UTF8,
-                   "text/plain",
-                   comment,
-                   klen +1))
-      return 1;
-  }
-  /* try to find PDF/ps special */
-  pos = opos;
-  while (pos < size - 100)
-    {
-      switch (data[pos])
-        {
-        case 139:              /* begin page 'bop', we typically have to skip that one to
-                                   find the zzz's */
-          pos += 45;            /* skip bop */
-          break;
-        case 239:              /* zzz1 */
-          len = data[pos + 1];
-          if (pos + 2 + len < size)
-            if (0 != parseZZZ ((const char *) data, pos + 2, len, proc, proc_cls))
-              return 1;
-          pos += len + 2;
-          break;
-        case 240:              /* zzz2 */
-          len = ntohs (getShortAt (&data[pos + 1]));
-          if (pos + 3 + len < size)
-            if (0 != parseZZZ ((const char *) data, pos + 3, len, proc, proc_cls))
-              return 1;
-          pos += len + 3;
-          break;
-        case 241:              /* zzz3, who uses that? */
-          len = (ntohs (getShortAt (&data[pos + 1]))) + 65536 * data[pos + 3];
-          if (pos + 4 + len < size)
-            if (0 != parseZZZ ((const char *) data, pos + 4, len, proc, proc_cls))
-              return 1;
-          pos += len + 4;
-          break;
-        case 242:              /* zzz4, hurray! */
-          len = ntohl (getIntAt (&data[pos + 1]));
-          if (pos + 1 + len < size)
-            if (0 != parseZZZ ((const char *) data, pos + 5, len, proc, proc_cls))
-              return 1;
-          pos += len + 5;
-          break;
-        default:               /* unsupported opcode, abort scan */
-          return 0;
-        }
-    }
-  return 0;
-}

diff --git a/src/plugins/old/dvi_extractor.c b/src/plugins/old/dvi_extractor.c deleted file mode 100644 index e4cfa3a..0000000 --- a/src/plugins/old/dvi_extractor.c +++ /dev/null
@@ -1,247 +0,0 @@
1	/*
2	This file is part of libextractor.
3	(C) 2002, 2003, 2004 Vidyut Samanta and Christian Grothoff
4
5	libextractor is free software; you can redistribute it and/or modify
6	it under the terms of the GNU General Public License as published
7	by the Free Software Foundation; either version 2, or (at your
8	option) any later version.
9
10	libextractor is distributed in the hope that it will be useful, but
11	WITHOUT ANY WARRANTY; without even the implied warranty of
12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13	General Public License for more details.
14
15	You should have received a copy of the GNU General Public License
16	along with libextractor; see the file COPYING. If not, write to the
17	Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18	Boston, MA 02111-1307, USA.
19	*/
20
21	#include "platform.h"
22	#include "extractor.h"
23
24	typedef struct
25	{
26	char *text;
27	enum EXTRACTOR_MetaType type;
28	} Matches;
29
30	static Matches tmap[] = {
31	{"/Title (", EXTRACTOR_METATYPE_TITLE},
32	{"/Subject (", EXTRACTOR_METATYPE_SUBJECT},
33	{"/Author (", EXTRACTOR_METATYPE_AUTHOR_NAME},
34	{"/Keywords (", EXTRACTOR_METATYPE_KEYWORDS},
35	{"/Creator (", EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE },
36	{"/Producer (", EXTRACTOR_METATYPE_PRODUCED_BY_SOFTWARE},
37	{NULL, 0},
38	};
39
40	static int
41	parseZZZ (const char *data,
42	size_t pos, size_t len,
43	EXTRACTOR_MetaDataProcessor proc,
44	void *proc_cls)
45	{
46	size_t slen;
47	size_t end;
48	int i;
49
50	end = pos + len;
51	slen = strlen ("ps:SDict begin [");
52	if (len <= slen)
53	return 0;
54	if (0 != strncmp ("ps:SDict begin [ ", &data[pos], slen))
55	return 0;
56	pos += slen;
57	while (pos < end)
58	{
59	i = 0;
60	while (tmap[i].text != NULL)
61	{
62	slen = strlen (tmap[i].text);
63	if (pos + slen < end)
64	{
65	if (0 == strncmp (&data[pos], tmap[i].text, slen))
66	{
67	pos += slen;
68	slen = pos;
69	while ((slen < end) && (data[slen] != ')'))
70	slen++;
71	slen = slen - pos;
72	{
73	char value[slen + 1];
74	value[slen] = '\0';
75	memcpy (value, &data[pos], slen);
76	if (0 != proc (proc_cls,
77	"dvi",
78	tmap[i].type,
79	EXTRACTOR_METAFORMAT_C_STRING,
80	"text/plain",
81	value,
82	slen +1))
83	{
84	return 1;
85	}
86	}
87	pos += slen + 1;
88	}
89	}
90	i++;
91	}
92	pos++;
93	}
94	return 0;
95	}
96
97	static unsigned int
98	getIntAt (const void *data)
99	{
100	char p[4];
101
102	memcpy (p, data, 4); /* ensure alignment! */
103	return (unsigned int ) &p[0];
104	}
105
106	static unsigned int
107	getShortAt (const void *data)
108	{
109	char p[2];
110
111	memcpy (p, data, 2); /* ensure alignment! */
112	return (unsigned short ) &p[0];
113	}
114
115
116	int
117	EXTRACTOR_dvi_extract (const unsigned char *data,
118	size_t size,
119	EXTRACTOR_MetaDataProcessor proc,
120	void *proc_cls,
121	const char *options)
122	{
123	unsigned int klen;
124	uint32_t pos;
125	uint32_t opos;
126	unsigned int len;
127	unsigned int pageCount;
128	char pages[16];
129
130	if (size < 40)
131	return 0;
132	if ((data[0] != 247) \|\| (data[1] != 2))
133	return 0; /* cannot be dvi or unsupported version */
134	klen = data[14];
135
136	pos = size - 1;
137	while ((data[pos] == 223) && (pos > 0))
138	pos--;
139	if ((data[pos] != 2) \|\| (pos < 40))
140	return 0;
141	pos--;
142	pos -= 4;
143	/* assert pos at 'post_post tag' */
144	if (data[pos] != 249)
145	return 0;
146	opos = pos;
147	pos = ntohl (getIntAt (&data[opos + 1]));
148	if (pos + 25 > size)
149	return 0;
150	/* assert pos at 'post' command */
151	if (data[pos] != 248)
152	return 0;
153	pageCount = 0;
154	opos = pos;
155	pos = ntohl (getIntAt (&data[opos + 1]));
156	while (1)
157	{
158	if (pos == UINT32_MAX)
159	break;
160	if (pos + 45 > size)
161	return 0;
162	if (data[pos] != 139) /* expect 'bop' */
163	return 0;
164	pageCount++;
165	opos = pos;
166	pos = ntohl (getIntAt (&data[opos + 41]));
167	if (pos == UINT32_MAX)
168	break;
169	if (pos >= opos)
170	return 0; /* invalid! */
171	}
172	/* ok, now we believe it's a dvi... */
173	snprintf (pages, sizeof(pages), "%u", pageCount);
174	if (0 != proc (proc_cls,
175	"dvi",
176	EXTRACTOR_METATYPE_PAGE_COUNT,
177	EXTRACTOR_METAFORMAT_UTF8,
178	"text/plain",
179	pages,
180	strlen (pages) +1))
181	return 1;
182	if (0 != proc (proc_cls,
183	"dvi",
184	EXTRACTOR_METATYPE_MIMETYPE,
185	EXTRACTOR_METAFORMAT_UTF8,
186	"text/plain",
187	"application/x-dvi",
188	strlen ("application/x-dvi") +1))
189	return 1;
190	{
191	char comment[klen + 1];
192
193	comment[klen] = '\0';
194	memcpy (comment, &data[15], klen);
195	if (0 != proc (proc_cls,
196	"dvi",
197	EXTRACTOR_METATYPE_COMMENT,
198	EXTRACTOR_METAFORMAT_UTF8,
199	"text/plain",
200	comment,
201	klen +1))
202	return 1;
203	}
204	/* try to find PDF/ps special */
205	pos = opos;
206	while (pos < size - 100)
207	{
208	switch (data[pos])
209	{
210	case 139: /* begin page 'bop', we typically have to skip that one to
211	find the zzz's */
212	pos += 45; /* skip bop */
213	break;
214	case 239: /* zzz1 */
215	len = data[pos + 1];
216	if (pos + 2 + len < size)
217	if (0 != parseZZZ ((const char *) data, pos + 2, len, proc, proc_cls))
218	return 1;
219	pos += len + 2;
220	break;
221	case 240: /* zzz2 */
222	len = ntohs (getShortAt (&data[pos + 1]));
223	if (pos + 3 + len < size)
224	if (0 != parseZZZ ((const char *) data, pos + 3, len, proc, proc_cls))
225	return 1;
226	pos += len + 3;
227	break;
228	case 241: /* zzz3, who uses that? */
229	len = (ntohs (getShortAt (&data[pos + 1]))) + 65536 * data[pos + 3];
230	if (pos + 4 + len < size)
231	if (0 != parseZZZ ((const char *) data, pos + 4, len, proc, proc_cls))
232	return 1;
233	pos += len + 4;
234	break;
235	case 242: /* zzz4, hurray! */
236	len = ntohl (getIntAt (&data[pos + 1]));
237	if (pos + 1 + len < size)
238	if (0 != parseZZZ ((const char *) data, pos + 5, len, proc, proc_cls))
239	return 1;
240	pos += len + 5;
241	break;
242	default: /* unsupported opcode, abort scan */
243	return 0;
244	}
245	}
246	return 0;
247	}