aboutsummaryrefslogtreecommitdiff
path: root/src/plugins/pdf_extractor.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/plugins/pdf_extractor.c')
-rw-r--r--src/plugins/pdf_extractor.c181
1 files changed, 91 insertions, 90 deletions
diff --git a/src/plugins/pdf_extractor.c b/src/plugins/pdf_extractor.c
index da1f663..1bf3c84 100644
--- a/src/plugins/pdf_extractor.c
+++ b/src/plugins/pdf_extractor.c
@@ -80,8 +80,8 @@ static struct Matches tmap[] = {
80 */ 80 */
81static void 81static void
82process_stdout (FILE *fout, 82process_stdout (FILE *fout,
83 EXTRACTOR_MetaDataProcessor proc, 83 EXTRACTOR_MetaDataProcessor proc,
84 void *proc_cls) 84 void *proc_cls)
85{ 85{
86 unsigned int i; 86 unsigned int i;
87 char line[1025]; 87 char line[1025];
@@ -89,38 +89,38 @@ process_stdout (FILE *fout,
89 const char *colon; 89 const char *colon;
90 90
91 while (! feof (fout)) 91 while (! feof (fout))
92 {
93 if (NULL == fgets (line, sizeof (line) - 1, fout))
94 break;
95 if (0 == strlen (line))
96 continue;
97 if ('\n' == line[strlen (line) - 1])
98 line[strlen (line) - 1] = '\0';
99 colon = strchr (line, (int) ':');
100 if (NULL == colon)
101 break;
102 psuffix = colon + 1;
103 while (isblank ((unsigned char) psuffix[0]))
104 psuffix++;
105 if (0 == strlen (psuffix))
106 continue;
107 for (i = 0; NULL != tmap[i].text; i++)
92 { 108 {
93 if (NULL == fgets (line, sizeof (line) - 1, fout)) 109 if (0 != strncasecmp (line,
94 break; 110 tmap[i].text,
95 if (0 == strlen (line)) 111 colon - line))
96 continue;
97 if ('\n' == line[strlen(line)-1])
98 line[strlen(line)-1] = '\0';
99 colon = strchr (line, (int) ':');
100 if (NULL == colon)
101 break;
102 psuffix = colon + 1;
103 while (isblank ((unsigned char) psuffix[0]))
104 psuffix++;
105 if (0 == strlen (psuffix))
106 continue; 112 continue;
107 for (i = 0; NULL != tmap[i].text; i++) 113 if (0 != proc (proc_cls,
108 { 114 "pdf",
109 if (0 != strncasecmp (line, 115 tmap[i].type,
110 tmap[i].text, 116 EXTRACTOR_METAFORMAT_UTF8,
111 colon - line)) 117 "text/plain",
112 continue; 118 psuffix,
113 if (0 != proc (proc_cls, 119 strlen (psuffix) + 1))
114 "pdf", 120 return;
115 tmap[i].type, 121 break;
116 EXTRACTOR_METAFORMAT_UTF8,
117 "text/plain",
118 psuffix,
119 strlen(psuffix) + 1))
120 return;
121 break;
122 }
123 } 122 }
123 }
124} 124}
125 125
126 126
@@ -154,79 +154,79 @@ EXTRACTOR_pdf_extract_method (struct EXTRACTOR_ExtractContext *ec)
154 if (0 != pipe (in)) 154 if (0 != pipe (in))
155 return; 155 return;
156 if (0 != pipe (out)) 156 if (0 != pipe (out))
157 { 157 {
158 close (in[0]); 158 close (in[0]);
159 close (in[1]); 159 close (in[1]);
160 return; 160 return;
161 } 161 }
162 pid = fork (); 162 pid = fork ();
163 if (-1 == pid) 163 if (-1 == pid)
164 { 164 {
165 close (in[0]); 165 close (in[0]);
166 close (in[1]); 166 close (in[1]);
167 close (out[0]); 167 close (out[0]);
168 close (out[1]); 168 close (out[1]);
169 return; 169 return;
170 } 170 }
171 if (0 == pid) 171 if (0 == pid)
172 { 172 {
173 char *const args[] = { 173 char *const args[] = {
174 "pdfinfo", 174 "pdfinfo",
175 "-", 175 "-",
176 NULL 176 NULL
177 }; 177 };
178 /* am child, exec 'pdfinfo' */ 178 /* am child, exec 'pdfinfo' */
179 close (0); 179 close (0);
180 close (1); 180 close (1);
181 if ( (-1 == dup2 (in[0], 0)) || 181 if ( (-1 == dup2 (in[0], 0)) ||
182 (-1 == dup2 (out[1], 1)) ) 182 (-1 == dup2 (out[1], 1)) )
183 exit (1);
184 close (in[0]);
185 close (in[1]);
186 close (out[0]);
187 close (out[1]);
188 execvp ("pdfinfo", args);
189 exit (1); 183 exit (1);
190 } 184 close (in[0]);
185 close (in[1]);
186 close (out[0]);
187 close (out[1]);
188 execvp ("pdfinfo", args);
189 exit (1);
190 }
191 /* am parent, send file */ 191 /* am parent, send file */
192 close (in[0]); 192 close (in[0]);
193 close (out[1]); 193 close (out[1]);
194 fout = fdopen (out[0], "r"); 194 fout = fdopen (out[0], "r");
195 if (NULL == fout) 195 if (NULL == fout)
196 { 196 {
197 close (in[1]); 197 close (in[1]);
198 close (out[0]); 198 close (out[0]);
199 kill (pid, SIGKILL); 199 kill (pid, SIGKILL);
200 waitpid (pid, NULL, 0); 200 waitpid (pid, NULL, 0);
201 return; 201 return;
202 } 202 }
203 pos = 0; 203 pos = 0;
204 while (pos < fsize) 204 while (pos < fsize)
205 {
206 ssize_t got;
207 size_t wpos;
208
209 data = NULL;
210 got = ec->read (ec->cls,
211 &data,
212 fsize - pos);
213 if ( (-1 == got) ||
214 (NULL == data) )
215 break;
216 wpos = 0;
217 while (wpos < got)
205 { 218 {
206 ssize_t got; 219 ssize_t out;
207 size_t wpos; 220
208 221 out = write (in[1], data + wpos, got - wpos);
209 data = NULL; 222 if (out <= 0)
210 got = ec->read (ec->cls,
211 &data,
212 fsize - pos);
213 if ( (-1 == got) ||
214 (NULL == data) )
215 break;
216 wpos = 0;
217 while (wpos < got)
218 {
219 ssize_t out;
220
221 out = write (in[1], data + wpos, got - wpos);
222 if (out <= 0)
223 break;
224 wpos += out;
225 }
226 if (wpos < got)
227 break; 223 break;
228 pos += got; 224 wpos += out;
229 } 225 }
226 if (wpos < got)
227 break;
228 pos += got;
229 }
230 close (in[1]); 230 close (in[1]);
231 process_stdout (fout, ec->proc, ec->cls); 231 process_stdout (fout, ec->proc, ec->cls);
232 fclose (fout); 232 fclose (fout);
@@ -234,4 +234,5 @@ EXTRACTOR_pdf_extract_method (struct EXTRACTOR_ExtractContext *ec)
234 waitpid (pid, NULL, 0); 234 waitpid (pid, NULL, 0);
235} 235}
236 236
237
237/* end of pdf_extractor.c */ 238/* end of pdf_extractor.c */