diff options
Diffstat (limited to 'src/plugins/pdf_extractor.c')
-rw-r--r-- | src/plugins/pdf_extractor.c | 181 |
1 files changed, 91 insertions, 90 deletions
diff --git a/src/plugins/pdf_extractor.c b/src/plugins/pdf_extractor.c index da1f663..1bf3c84 100644 --- a/src/plugins/pdf_extractor.c +++ b/src/plugins/pdf_extractor.c | |||
@@ -80,8 +80,8 @@ static struct Matches tmap[] = { | |||
80 | */ | 80 | */ |
81 | static void | 81 | static void |
82 | process_stdout (FILE *fout, | 82 | process_stdout (FILE *fout, |
83 | EXTRACTOR_MetaDataProcessor proc, | 83 | EXTRACTOR_MetaDataProcessor proc, |
84 | void *proc_cls) | 84 | void *proc_cls) |
85 | { | 85 | { |
86 | unsigned int i; | 86 | unsigned int i; |
87 | char line[1025]; | 87 | char line[1025]; |
@@ -89,38 +89,38 @@ process_stdout (FILE *fout, | |||
89 | const char *colon; | 89 | const char *colon; |
90 | 90 | ||
91 | while (! feof (fout)) | 91 | while (! feof (fout)) |
92 | { | ||
93 | if (NULL == fgets (line, sizeof (line) - 1, fout)) | ||
94 | break; | ||
95 | if (0 == strlen (line)) | ||
96 | continue; | ||
97 | if ('\n' == line[strlen (line) - 1]) | ||
98 | line[strlen (line) - 1] = '\0'; | ||
99 | colon = strchr (line, (int) ':'); | ||
100 | if (NULL == colon) | ||
101 | break; | ||
102 | psuffix = colon + 1; | ||
103 | while (isblank ((unsigned char) psuffix[0])) | ||
104 | psuffix++; | ||
105 | if (0 == strlen (psuffix)) | ||
106 | continue; | ||
107 | for (i = 0; NULL != tmap[i].text; i++) | ||
92 | { | 108 | { |
93 | if (NULL == fgets (line, sizeof (line) - 1, fout)) | 109 | if (0 != strncasecmp (line, |
94 | break; | 110 | tmap[i].text, |
95 | if (0 == strlen (line)) | 111 | colon - line)) |
96 | continue; | ||
97 | if ('\n' == line[strlen(line)-1]) | ||
98 | line[strlen(line)-1] = '\0'; | ||
99 | colon = strchr (line, (int) ':'); | ||
100 | if (NULL == colon) | ||
101 | break; | ||
102 | psuffix = colon + 1; | ||
103 | while (isblank ((unsigned char) psuffix[0])) | ||
104 | psuffix++; | ||
105 | if (0 == strlen (psuffix)) | ||
106 | continue; | 112 | continue; |
107 | for (i = 0; NULL != tmap[i].text; i++) | 113 | if (0 != proc (proc_cls, |
108 | { | 114 | "pdf", |
109 | if (0 != strncasecmp (line, | 115 | tmap[i].type, |
110 | tmap[i].text, | 116 | EXTRACTOR_METAFORMAT_UTF8, |
111 | colon - line)) | 117 | "text/plain", |
112 | continue; | 118 | psuffix, |
113 | if (0 != proc (proc_cls, | 119 | strlen (psuffix) + 1)) |
114 | "pdf", | 120 | return; |
115 | tmap[i].type, | 121 | break; |
116 | EXTRACTOR_METAFORMAT_UTF8, | ||
117 | "text/plain", | ||
118 | psuffix, | ||
119 | strlen(psuffix) + 1)) | ||
120 | return; | ||
121 | break; | ||
122 | } | ||
123 | } | 122 | } |
123 | } | ||
124 | } | 124 | } |
125 | 125 | ||
126 | 126 | ||
@@ -154,79 +154,79 @@ EXTRACTOR_pdf_extract_method (struct EXTRACTOR_ExtractContext *ec) | |||
154 | if (0 != pipe (in)) | 154 | if (0 != pipe (in)) |
155 | return; | 155 | return; |
156 | if (0 != pipe (out)) | 156 | if (0 != pipe (out)) |
157 | { | 157 | { |
158 | close (in[0]); | 158 | close (in[0]); |
159 | close (in[1]); | 159 | close (in[1]); |
160 | return; | 160 | return; |
161 | } | 161 | } |
162 | pid = fork (); | 162 | pid = fork (); |
163 | if (-1 == pid) | 163 | if (-1 == pid) |
164 | { | 164 | { |
165 | close (in[0]); | 165 | close (in[0]); |
166 | close (in[1]); | 166 | close (in[1]); |
167 | close (out[0]); | 167 | close (out[0]); |
168 | close (out[1]); | 168 | close (out[1]); |
169 | return; | 169 | return; |
170 | } | 170 | } |
171 | if (0 == pid) | 171 | if (0 == pid) |
172 | { | 172 | { |
173 | char *const args[] = { | 173 | char *const args[] = { |
174 | "pdfinfo", | 174 | "pdfinfo", |
175 | "-", | 175 | "-", |
176 | NULL | 176 | NULL |
177 | }; | 177 | }; |
178 | /* am child, exec 'pdfinfo' */ | 178 | /* am child, exec 'pdfinfo' */ |
179 | close (0); | 179 | close (0); |
180 | close (1); | 180 | close (1); |
181 | if ( (-1 == dup2 (in[0], 0)) || | 181 | if ( (-1 == dup2 (in[0], 0)) || |
182 | (-1 == dup2 (out[1], 1)) ) | 182 | (-1 == dup2 (out[1], 1)) ) |
183 | exit (1); | ||
184 | close (in[0]); | ||
185 | close (in[1]); | ||
186 | close (out[0]); | ||
187 | close (out[1]); | ||
188 | execvp ("pdfinfo", args); | ||
189 | exit (1); | 183 | exit (1); |
190 | } | 184 | close (in[0]); |
185 | close (in[1]); | ||
186 | close (out[0]); | ||
187 | close (out[1]); | ||
188 | execvp ("pdfinfo", args); | ||
189 | exit (1); | ||
190 | } | ||
191 | /* am parent, send file */ | 191 | /* am parent, send file */ |
192 | close (in[0]); | 192 | close (in[0]); |
193 | close (out[1]); | 193 | close (out[1]); |
194 | fout = fdopen (out[0], "r"); | 194 | fout = fdopen (out[0], "r"); |
195 | if (NULL == fout) | 195 | if (NULL == fout) |
196 | { | 196 | { |
197 | close (in[1]); | 197 | close (in[1]); |
198 | close (out[0]); | 198 | close (out[0]); |
199 | kill (pid, SIGKILL); | 199 | kill (pid, SIGKILL); |
200 | waitpid (pid, NULL, 0); | 200 | waitpid (pid, NULL, 0); |
201 | return; | 201 | return; |
202 | } | 202 | } |
203 | pos = 0; | 203 | pos = 0; |
204 | while (pos < fsize) | 204 | while (pos < fsize) |
205 | { | ||
206 | ssize_t got; | ||
207 | size_t wpos; | ||
208 | |||
209 | data = NULL; | ||
210 | got = ec->read (ec->cls, | ||
211 | &data, | ||
212 | fsize - pos); | ||
213 | if ( (-1 == got) || | ||
214 | (NULL == data) ) | ||
215 | break; | ||
216 | wpos = 0; | ||
217 | while (wpos < got) | ||
205 | { | 218 | { |
206 | ssize_t got; | 219 | ssize_t out; |
207 | size_t wpos; | 220 | |
208 | 221 | out = write (in[1], data + wpos, got - wpos); | |
209 | data = NULL; | 222 | if (out <= 0) |
210 | got = ec->read (ec->cls, | ||
211 | &data, | ||
212 | fsize - pos); | ||
213 | if ( (-1 == got) || | ||
214 | (NULL == data) ) | ||
215 | break; | ||
216 | wpos = 0; | ||
217 | while (wpos < got) | ||
218 | { | ||
219 | ssize_t out; | ||
220 | |||
221 | out = write (in[1], data + wpos, got - wpos); | ||
222 | if (out <= 0) | ||
223 | break; | ||
224 | wpos += out; | ||
225 | } | ||
226 | if (wpos < got) | ||
227 | break; | 223 | break; |
228 | pos += got; | 224 | wpos += out; |
229 | } | 225 | } |
226 | if (wpos < got) | ||
227 | break; | ||
228 | pos += got; | ||
229 | } | ||
230 | close (in[1]); | 230 | close (in[1]); |
231 | process_stdout (fout, ec->proc, ec->cls); | 231 | process_stdout (fout, ec->proc, ec->cls); |
232 | fclose (fout); | 232 | fclose (fout); |
@@ -234,4 +234,5 @@ EXTRACTOR_pdf_extract_method (struct EXTRACTOR_ExtractContext *ec) | |||
234 | waitpid (pid, NULL, 0); | 234 | waitpid (pid, NULL, 0); |
235 | } | 235 | } |
236 | 236 | ||
237 | |||
237 | /* end of pdf_extractor.c */ | 238 | /* end of pdf_extractor.c */ |