aboutsummaryrefslogtreecommitdiff
path: root/src/plugins/old/man_extractor.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/plugins/old/man_extractor.c')
-rw-r--r--src/plugins/old/man_extractor.c232
1 files changed, 232 insertions, 0 deletions
diff --git a/src/plugins/old/man_extractor.c b/src/plugins/old/man_extractor.c
new file mode 100644
index 0000000..eeb40a8
--- /dev/null
+++ b/src/plugins/old/man_extractor.c
@@ -0,0 +1,232 @@
1/*
2 This file is part of libextractor.
3 (C) 2002, 2003, 2004, 2009 Vidyut Samanta and Christian Grothoff
4
5 libextractor is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 2, or (at your
8 option) any later version.
9
10 libextractor is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with libextractor; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
19 */
20
21#include "platform.h"
22#include "extractor.h"
23#include <ctype.h>
24
25static char *
26stndup (const char *str, size_t n)
27{
28 char *tmp;
29 tmp = malloc (n + 1);
30 if (tmp == NULL)
31 return NULL;
32 tmp[n] = '\0';
33 memcpy (tmp, str, n);
34 return tmp;
35}
36
37static int
38addKeyword (enum EXTRACTOR_MetaType type,
39 char *keyword,
40 EXTRACTOR_MetaDataProcessor proc,
41 void *proc_cls)
42{
43 int ret;
44 if (keyword == NULL)
45 return 0;
46 if (strlen (keyword) == 0)
47 {
48 free (keyword);
49 return 0;
50 }
51 if ((keyword[0] == '\"') && (keyword[strlen (keyword) - 1] == '\"'))
52 {
53 char *tmp;
54
55 keyword[strlen (keyword) - 1] = '\0';
56 tmp = strdup (&keyword[1]);
57 free (keyword);
58 if (tmp == NULL)
59 return 0;
60 keyword = tmp;
61 }
62 if (strlen (keyword) == 0)
63 {
64 free (keyword);
65 return 0;
66 }
67 ret = proc (proc_cls,
68 "man",
69 type,
70 EXTRACTOR_METAFORMAT_UTF8,
71 "text/plain",
72 keyword,
73 strlen (keyword)+1);
74 free (keyword);
75 return ret;
76}
77
78static void
79NEXT (size_t * end, const char *buf, const size_t size)
80{
81 int quot;
82
83 quot = 0;
84 while ((*end < size) && (((quot & 1) != 0) || ((buf[*end] != ' '))))
85 {
86 if (buf[*end] == '\"')
87 quot++;
88 (*end)++;
89 }
90 if ((quot & 1) == 1)
91 (*end) = size + 1;
92}
93
94/**
95 * How many bytes do we actually try to scan? (from the beginning
96 * of the file).
97 */
98#define MAX_READ (16 * 1024)
99
100#define ADD(t,s) do { if (0 != addKeyword (t, s, proc, proc_cls)) return 1; } while (0)
101
102int
103EXTRACTOR_man_extract (const char *buf,
104 size_t size,
105 EXTRACTOR_MetaDataProcessor proc,
106 void *proc_cls,
107 const char *options)
108{
109 int pos;
110 size_t xsize;
111 const size_t xlen = strlen (".TH ");
112
113 if (size > MAX_READ)
114 size = MAX_READ;
115 pos = 0;
116 if (size < xlen)
117 return 0;
118 while ((pos < size - xlen) &&
119 ((0 != strncmp (".TH ",
120 &buf[pos],
121 xlen)) || ((pos != 0) && (buf[pos - 1] != '\n'))))
122 {
123 if (!isgraph ((unsigned char) buf[pos]) &&
124 !isspace ((unsigned char) buf[pos]))
125 return 0;
126 pos++;
127 }
128 xsize = pos;
129 while ((xsize < size) && (buf[xsize] != '\n'))
130 xsize++;
131 size = xsize;
132
133 if (0 == strncmp (".TH ", &buf[pos], xlen))
134 {
135 size_t end;
136
137 pos += xlen;
138 end = pos;
139 NEXT (&end, buf, size);
140 if (end > size)
141 return 0;
142 if (end - pos > 0)
143 {
144 ADD (EXTRACTOR_METATYPE_TITLE, stndup (&buf[pos], end - pos));
145 pos = end + 1;
146 }
147 if (pos >= size)
148 return 0;
149 end = pos;
150 NEXT (&end, buf, size);
151 if (end > size)
152 return 0;
153 if (buf[pos] == '\"')
154 pos++;
155 if ((end - pos >= 1) && (end - pos <= 4))
156 {
157 switch (buf[pos])
158 {
159 case '1':
160 ADD (EXTRACTOR_METATYPE_SECTION,
161 strdup (_("Commands")));
162 break;
163 case '2':
164 ADD (EXTRACTOR_METATYPE_SECTION,
165 strdup (_("System calls")));
166 break;
167 case '3':
168 ADD (EXTRACTOR_METATYPE_SECTION,
169 strdup (_("Library calls")));
170 break;
171 case '4':
172 ADD (EXTRACTOR_METATYPE_SECTION,
173 strdup (_("Special files")));
174 break;
175 case '5':
176 ADD (EXTRACTOR_METATYPE_SECTION,
177 strdup (_("File formats and conventions")));
178 break;
179 case '6':
180 ADD (EXTRACTOR_METATYPE_SECTION,
181 strdup (_("Games")));
182 break;
183 case '7':
184 ADD (EXTRACTOR_METATYPE_SECTION,
185 strdup (_("Conventions and miscellaneous")));
186 break;
187 case '8':
188 ADD (EXTRACTOR_METATYPE_SECTION,
189 strdup (_("System management commands")));
190 break;
191 case '9':
192 ADD (EXTRACTOR_METATYPE_SECTION,
193 strdup (_("Kernel routines")));
194 break;
195 }
196 pos = end + 1;
197 }
198 end = pos;
199 NEXT (&end, buf, size);
200 if (end > size)
201 return 0;
202 if (end - pos > 0)
203 {
204 ADD (EXTRACTOR_METATYPE_MODIFICATION_DATE, stndup (&buf[pos], end - pos));
205 pos = end + 1;
206 }
207 end = pos;
208 NEXT (&end, buf, size);
209 if (end > size)
210 return 0;
211 if (end - pos > 0)
212 {
213 ADD (EXTRACTOR_METATYPE_SOURCE,
214 stndup (&buf[pos], end - pos));
215 pos = end + 1;
216 }
217 end = pos;
218 NEXT (&end, buf, size);
219 if (end > size)
220 return 0;
221 if (end - pos > 0)
222 {
223 ADD (EXTRACTOR_METATYPE_BOOK_TITLE,
224 stndup (&buf[pos], end - pos));
225 pos = end + 1;
226 }
227 }
228
229 return 0;
230}
231
232/* end of man_extractor.c */