diff options
author | Christian Grothoff <christian@grothoff.org> | 2012-01-28 22:42:15 +0000 |
---|---|---|
committer | Christian Grothoff <christian@grothoff.org> | 2012-01-28 22:42:15 +0000 |
commit | c1b28094ef1ccac5f228c52d6c34f8550d3abc73 (patch) | |
tree | 3d3b583ffba1f91d7a610a1815ded00ed0e6811b /src/fs | |
parent | ed7850f90dc664ff96c4a24b5552d5704f8c205e (diff) | |
download | gnunet-c1b28094ef1ccac5f228c52d6c34f8550d3abc73.tar.gz gnunet-c1b28094ef1ccac5f228c52d6c34f8550d3abc73.zip |
-cleaning up dirmetascan code, still failing for me
Diffstat (limited to 'src/fs')
-rw-r--r-- | src/fs/Makefile.am | 1 | ||||
-rw-r--r-- | src/fs/fs_dirmetascan.c | 1495 | ||||
-rw-r--r-- | src/fs/fs_sharetree.c | 6 | ||||
-rw-r--r-- | src/fs/gnunet-publish.c | 96 |
4 files changed, 492 insertions, 1106 deletions
diff --git a/src/fs/Makefile.am b/src/fs/Makefile.am index c4dfec0ca..fe593b394 100644 --- a/src/fs/Makefile.am +++ b/src/fs/Makefile.am | |||
@@ -36,6 +36,7 @@ libgnunetfs_la_SOURCES = \ | |||
36 | fs_misc.c \ | 36 | fs_misc.c \ |
37 | fs_namespace.c \ | 37 | fs_namespace.c \ |
38 | fs_search.c \ | 38 | fs_search.c \ |
39 | fs_sharetree.c \ | ||
39 | fs_tree.c fs_tree.h \ | 40 | fs_tree.c fs_tree.h \ |
40 | fs_unindex.c \ | 41 | fs_unindex.c \ |
41 | fs_uri.c | 42 | fs_uri.c |
diff --git a/src/fs/fs_dirmetascan.c b/src/fs/fs_dirmetascan.c index 11313d750..4c995a72a 100644 --- a/src/fs/fs_dirmetascan.c +++ b/src/fs/fs_dirmetascan.c | |||
@@ -18,862 +18,471 @@ | |||
18 | Boston, MA 02111-1307, USA. | 18 | Boston, MA 02111-1307, USA. |
19 | */ | 19 | */ |
20 | 20 | ||
21 | /** | ||
22 | * @file fs/fs_dirmetascan.c | ||
23 | * @brief code to asynchronously build a 'struct GNUNET_FS_ShareTreeItem' | ||
24 | * from an on-disk directory for publishing | ||
25 | * @author LRN | ||
26 | * @author Christian Grothoff | ||
27 | */ | ||
21 | #include "platform.h" | 28 | #include "platform.h" |
22 | #include "gnunet_fs_service.h" | 29 | #include "gnunet_fs_service.h" |
23 | #include "gnunet_scheduler_lib.h" | 30 | #include "gnunet_scheduler_lib.h" |
24 | #include <pthread.h> | 31 | #include <pthread.h> |
25 | 32 | ||
33 | |||
26 | /** | 34 | /** |
27 | * Entry for each unique keyword to track how often | 35 | * An opaque structure a pointer to which is returned to the |
28 | * it occured. Contains the keyword and the counter. | 36 | * caller to be used to control the scanner. |
29 | */ | 37 | */ |
30 | struct KeywordCounter | 38 | struct GNUNET_FS_DirScanner |
31 | { | 39 | { |
32 | 40 | ||
33 | /** | 41 | /** |
34 | * Keyword that was found. | 42 | * A thread object for the scanner thread. |
35 | */ | ||
36 | const char *value; | ||
37 | |||
38 | /** | ||
39 | * How many files have this keyword? | ||
40 | */ | ||
41 | unsigned int count; | ||
42 | |||
43 | /** | ||
44 | * This is a doubly-linked list | ||
45 | */ | ||
46 | struct KeywordCounter *prev; | ||
47 | |||
48 | /** | ||
49 | * This is a doubly-linked list | ||
50 | */ | 43 | */ |
51 | struct KeywordCounter *next; | 44 | #if WINDOWS |
52 | }; | 45 | HANDLE thread; |
46 | #else | ||
47 | pthread_t thread; | ||
48 | #endif | ||
53 | 49 | ||
54 | /** | ||
55 | * Aggregate information we keep for meta data in each directory. | ||
56 | */ | ||
57 | struct MetaCounter | ||
58 | { | ||
59 | /** | 50 | /** |
60 | * The actual meta data. | 51 | * Expanded filename (as given by the scan initiator). |
52 | * The scanner thread stores a copy here, and frees it when it finishes. | ||
61 | */ | 53 | */ |
62 | const char *data; | 54 | char *filename_expanded; |
63 | 55 | ||
64 | /** | 56 | /** |
65 | * Number of bytes in 'data'. | 57 | * List of libextractor plugins to use for extracting. |
58 | * Initialized when the scan starts, removed when it finishes. | ||
66 | */ | 59 | */ |
67 | size_t data_size; | 60 | struct EXTRACTOR_PluginList *plugins; |
68 | 61 | ||
69 | /** | 62 | /** |
70 | * Name of the plugin that provided that piece of metadata | 63 | * A pipe transfer signals to the scanner. |
71 | */ | 64 | */ |
72 | const char *plugin_name; | 65 | struct GNUNET_DISK_PipeHandle *stop_pipe; |
73 | 66 | ||
74 | /** | 67 | /** |
75 | * Type of the data | 68 | * A pipe end to read signals from. |
76 | */ | 69 | */ |
77 | enum EXTRACTOR_MetaType type; | 70 | const struct GNUNET_DISK_FileHandle *stop_read; |
78 | 71 | ||
79 | /** | 72 | /** |
80 | * Format of the data | 73 | * A pipe end to read signals from. |
81 | */ | 74 | */ |
82 | enum EXTRACTOR_MetaFormat format; | 75 | const struct GNUNET_DISK_FileHandle *stop_write; |
83 | 76 | ||
84 | /** | 77 | /** |
85 | * MIME-type of the metadata itself | 78 | * The pipe that is used to read progress messages. Only closed |
79 | * after the scanner thread is finished. | ||
86 | */ | 80 | */ |
87 | const char *data_mime_type; | 81 | struct GNUNET_DISK_PipeHandle *progress_pipe; |
88 | 82 | ||
89 | /** | 83 | /** |
90 | * How many files have meta entries matching this value? | 84 | * The end of the pipe that is used to read progress messages. |
91 | * (type and format do not have to match). | ||
92 | */ | 85 | */ |
93 | unsigned int count; | 86 | const struct GNUNET_DISK_FileHandle *progress_read; |
94 | 87 | ||
95 | /** | 88 | /** |
96 | * This is a doubly-linked list | 89 | * Handle of the pipe end into which the progress messages are written |
90 | * The initiator MUST keep it alive until the scanner thread is finished. | ||
97 | */ | 91 | */ |
98 | struct MetaCounter *prev; | 92 | const struct GNUNET_DISK_FileHandle *progress_write; |
99 | 93 | ||
100 | /** | 94 | /** |
101 | * This is a doubly-linked list | 95 | * The function that will be called every time there's a progress |
96 | * message. | ||
102 | */ | 97 | */ |
103 | struct MetaCounter *next; | 98 | GNUNET_FS_DirScannerProgressCallback progress_callback; |
104 | }; | 99 | |
105 | |||
106 | struct AddDirContext; | ||
107 | |||
108 | /** | ||
109 | * A structure used to hold a pointer to the tree item that is being | ||
110 | * processed. | ||
111 | * Needed to avoid changing the context for every recursive call. | ||
112 | */ | ||
113 | struct AddDirStack | ||
114 | { | ||
115 | /** | 100 | /** |
116 | * Context pointer | 101 | * A closure for progress_callback. |
117 | */ | 102 | */ |
118 | struct AddDirContext *adc; | 103 | void *progress_callback_cls; |
119 | 104 | ||
120 | /** | 105 | /** |
121 | * Parent directory | 106 | * A task for reading progress messages from the scanner. |
122 | */ | 107 | */ |
123 | struct GNUNET_FS_ShareTreeItem *parent; | 108 | GNUNET_SCHEDULER_TaskIdentifier progress_read_task; |
124 | }; | ||
125 | 109 | ||
126 | /** | ||
127 | * Execution context for 'add_dir' | ||
128 | * Owned by the initiator thread. | ||
129 | */ | ||
130 | struct AddDirContext | ||
131 | { | ||
132 | /** | 110 | /** |
133 | * After the scan is finished, it will contain a pointer to the | 111 | * After the scan is finished, it will contain a pointer to the |
134 | * top-level directory entry in the directory tree built by the | 112 | * top-level directory entry in the directory tree built by the |
135 | * scanner. | 113 | * scanner. Must only be manipulated by the thread for the |
114 | * duration of the thread's runtime. | ||
136 | */ | 115 | */ |
137 | struct GNUNET_FS_ShareTreeItem *toplevel; | 116 | struct GNUNET_FS_ShareTreeItem *toplevel; |
138 | 117 | ||
139 | /** | 118 | /** |
140 | * Expanded filename (as given by the scan initiator). | ||
141 | * The scanner thread stores a copy here, and frees it when it finishes. | ||
142 | */ | ||
143 | char *filename_expanded; | ||
144 | |||
145 | /** | ||
146 | * A pipe end to read signals from. | ||
147 | * Owned by the initiator thread. | ||
148 | */ | ||
149 | const struct GNUNET_DISK_FileHandle *stop_read; | ||
150 | |||
151 | /** | ||
152 | * 1 if the scanner should stop, 0 otherwise. Set in response | 119 | * 1 if the scanner should stop, 0 otherwise. Set in response |
153 | * to communication errors or when the initiator wants the scanning | 120 | * to communication errors or when the initiator wants the scanning |
154 | * process to stop. | 121 | * process to stop. |
155 | */ | 122 | */ |
156 | char do_stop; | 123 | int do_stop; |
157 | |||
158 | /** | ||
159 | * Handle of the pipe end into which the progress messages are written | ||
160 | * The pipe is owned by the initiator thread, and there's no way to | ||
161 | * close this end without having access to the pipe, so it won't | ||
162 | * be closed by the scanner thread. | ||
163 | * The initiator MUST keep it alive until the scanner thread is finished. | ||
164 | */ | ||
165 | const struct GNUNET_DISK_FileHandle *progress_write; | ||
166 | 124 | ||
167 | |||
168 | /** | ||
169 | * List of libextractor plugins to use for extracting. | ||
170 | * Initialized when the scan starts, removed when it finishes. | ||
171 | */ | ||
172 | struct EXTRACTOR_PluginList *plugins; | ||
173 | }; | 125 | }; |
174 | 126 | ||
175 | /** | ||
176 | * An opaque structure a pointer to which is returned to the | ||
177 | * caller to be used to control the scanner. | ||
178 | */ | ||
179 | struct GNUNET_FS_DirScanner | ||
180 | { | ||
181 | /** | ||
182 | * A pipe end to read signals from. | ||
183 | * Owned by the initiator thread. | ||
184 | */ | ||
185 | const struct GNUNET_DISK_FileHandle *stop_write; | ||
186 | |||
187 | /** | ||
188 | * A pipe transfer signals to the scanner. | ||
189 | * Owned by the initiator thread. | ||
190 | */ | ||
191 | struct GNUNET_DISK_PipeHandle *stop_pipe; | ||
192 | |||
193 | /** | ||
194 | * A thread object for the scanner thread. | ||
195 | * Owned by the initiator thread. | ||
196 | */ | ||
197 | #if WINDOWS | ||
198 | HANDLE thread; | ||
199 | #else | ||
200 | pthread_t thread; | ||
201 | #endif | ||
202 | |||
203 | /** | ||
204 | * A task for reading progress messages from the scanner. | ||
205 | */ | ||
206 | GNUNET_SCHEDULER_TaskIdentifier progress_read_task; | ||
207 | |||
208 | /** | ||
209 | * The end of the pipe that is used to read progress messages. | ||
210 | */ | ||
211 | const struct GNUNET_DISK_FileHandle *progress_read; | ||
212 | |||
213 | /** | ||
214 | * The pipe that is used to read progress messages. | ||
215 | * Owned (along with both of its ends) by the initiator thread. | ||
216 | * Only closed after the scanner thread is finished. | ||
217 | */ | ||
218 | struct GNUNET_DISK_PipeHandle *progress_pipe; | ||
219 | |||
220 | /** | ||
221 | * The function that will be called every time there's a progress | ||
222 | * message. | ||
223 | */ | ||
224 | GNUNET_FS_DirScannerProgressCallback progress_callback; | ||
225 | 127 | ||
226 | /** | ||
227 | * A closure for progress_callback. | ||
228 | */ | ||
229 | void *cls; | ||
230 | |||
231 | /** | ||
232 | * A pointer to the context of the scanner. | ||
233 | * Owned by the initiator thread. | ||
234 | * Initiator thread shouldn't touch it until the scanner thread | ||
235 | * is finished. | ||
236 | */ | ||
237 | struct AddDirContext *adc; | ||
238 | }; | ||
239 | 128 | ||
240 | /** | 129 | /** |
241 | * A structure that forms a singly-linked list that serves as a stack | 130 | * Abort the scan. |
242 | * for metadata-processing function. | 131 | * |
132 | * @param ds directory scanner structure | ||
243 | */ | 133 | */ |
244 | struct ProcessMetadataStackItem | 134 | void |
135 | GNUNET_FS_directory_scan_abort (struct GNUNET_FS_DirScanner *ds) | ||
245 | { | 136 | { |
246 | /** | 137 | static char c = 1; |
247 | * A pointer to metadata-processing context. | ||
248 | * The same in every stack item. | ||
249 | */ | ||
250 | struct GNUNET_FS_ProcessMetadataContext *ctx; | ||
251 | |||
252 | /** | ||
253 | * This is a singly-linked list. A pointer to its end is kept, and | ||
254 | * this pointer is used to walk it backwards. | ||
255 | */ | ||
256 | struct ProcessMetadataStackItem *parent; | ||
257 | 138 | ||
258 | /** | 139 | /* signal shutdown to other thread */ |
259 | * Map from the hash over the keyword to an 'struct KeywordCounter *' | 140 | (void) GNUNET_DISK_file_write (ds->stop_write, &c, 1); |
260 | * counter that says how often this keyword was | 141 | GNUNET_DISK_pipe_close_end (ds->stop_pipe, GNUNET_DISK_PIPE_END_WRITE); |
261 | * encountered in the current directory. | ||
262 | */ | ||
263 | struct GNUNET_CONTAINER_MultiHashMap *keywordcounter; | ||
264 | 142 | ||
265 | /** | 143 | /* stop reading from progress */ |
266 | * Map from the hash over the metadata to an 'struct MetaCounter *' | 144 | if (ds->progress_read_task != GNUNET_SCHEDULER_NO_TASK) |
267 | * counter that says how often this metadata was | 145 | { |
268 | * encountered in the current directory. | 146 | GNUNET_SCHEDULER_cancel (ds->progress_read_task); |
269 | */ | 147 | ds->progress_read_task = GNUNET_SCHEDULER_NO_TASK; |
270 | struct GNUNET_CONTAINER_MultiHashMap *metacounter; | 148 | } |
271 | 149 | GNUNET_DISK_pipe_close_end (ds->progress_pipe, GNUNET_DISK_PIPE_END_READ); | |
272 | /** | ||
273 | * Number of files in the current directory. | ||
274 | */ | ||
275 | unsigned int dir_entry_count; | ||
276 | |||
277 | /** | ||
278 | * Keywords to exclude from using for KSK since they'll be associated | ||
279 | * with the parent as well. NULL for nothing blocked. | ||
280 | */ | ||
281 | struct GNUNET_FS_Uri *exclude_ksk; | ||
282 | 150 | ||
283 | /** | 151 | /* wait for other thread to terminate */ |
284 | * A share tree item that is being processed. | 152 | #if WINDOWS |
285 | */ | 153 | WaitForSingleObject (ds->thread, INFINITE); |
286 | struct GNUNET_FS_ShareTreeItem *item; | 154 | CloseHandle (ds->thread); |
155 | #else | ||
156 | pthread_join (ds->thread, NULL); | ||
157 | pthread_detach (ds->thread); | ||
158 | #endif | ||
287 | 159 | ||
288 | /** | 160 | /* free resources */ |
289 | * Set to GNUNET_YES to indicate that the directory pointer by 'item' | 161 | GNUNET_DISK_pipe_close (ds->stop_pipe); |
290 | * was processed, and we should move on to the next. | 162 | GNUNET_DISK_pipe_close (ds->progress_pipe); |
291 | * Otherwise the directory will be recursed into. | 163 | if (NULL != ds->toplevel) |
292 | */ | 164 | GNUNET_FS_share_tree_free (ds->toplevel); |
293 | int end_directory; | 165 | if (NULL != ds->plugins) |
166 | EXTRACTOR_plugin_remove_all (ds->plugins); | ||
167 | GNUNET_free (ds); | ||
168 | } | ||
294 | 169 | ||
295 | }; | ||
296 | 170 | ||
297 | /** | 171 | /** |
298 | * The structure to keep the state of metadata processing | 172 | * Obtain the result of the scan after the scan has signalled |
173 | * completion. Must not be called prior to completion. The 'ds' is | ||
174 | * freed as part of this call. | ||
175 | * | ||
176 | * @param ds directory scanner structure | ||
177 | * @return the results of the scan (a directory tree) | ||
299 | */ | 178 | */ |
300 | struct GNUNET_FS_ProcessMetadataContext | 179 | struct GNUNET_FS_ShareTreeItem * |
180 | GNUNET_FS_directory_scan_get_result (struct GNUNET_FS_DirScanner *ds) | ||
301 | { | 181 | { |
302 | /** | 182 | struct GNUNET_FS_ShareTreeItem *result; |
303 | * The top of the stack. | ||
304 | */ | ||
305 | struct ProcessMetadataStackItem *stack; | ||
306 | |||
307 | /** | ||
308 | * Callback to invoke when processing is finished | ||
309 | */ | ||
310 | GNUNET_SCHEDULER_Task cb; | ||
311 | |||
312 | /** | ||
313 | * Closure for 'cb' | ||
314 | */ | ||
315 | void *cls; | ||
316 | |||
317 | /** | ||
318 | * Toplevel directory item of the tree to process. | ||
319 | */ | ||
320 | struct GNUNET_FS_ShareTreeItem *toplevel; | ||
321 | }; | ||
322 | 183 | ||
323 | /** | 184 | /* check that we're actually done */ |
324 | * Called every now and then by the scanner. | 185 | GNUNET_assert (GNUNET_SCHEDULER_NO_TASK == ds->progress_read_task); |
325 | * Checks the synchronization privitive. | 186 | /* preserve result */ |
326 | * Returns 1 if the scanner should stop, 0 otherwise. | 187 | result = ds->toplevel; |
327 | */ | 188 | ds->toplevel = NULL; |
328 | static int | 189 | GNUNET_FS_directory_scan_abort (ds); |
329 | should_stop (struct AddDirContext *adc) | 190 | return result; |
330 | { | ||
331 | errno = 0; | ||
332 | char c; | ||
333 | if (GNUNET_DISK_file_read_non_blocking (adc->stop_read, &c, 1) == 1 | ||
334 | || errno != EAGAIN) | ||
335 | { | ||
336 | adc->do_stop = 1; | ||
337 | } | ||
338 | return adc->do_stop; | ||
339 | } | 191 | } |
340 | 192 | ||
193 | |||
341 | /** | 194 | /** |
342 | * Write progress message. | 195 | * Write 'size' bytes from 'buf' into 'out'. |
343 | * Format is: | ||
344 | * "reason", "filename length", "filename", "directory flag" | ||
345 | * If filename is NULL, filename is not written, and its length | ||
346 | * is written as 0, and nothing else is written. It signals the initiator | ||
347 | * thread that the scanner is finished, and that it can now join its thread. | ||
348 | * | 196 | * |
349 | * Also checks if the initiator thread wants the scanner to stop, | 197 | * @param in pipe to write to |
350 | * Returns 1 to stop scanning (if the signal was received, or | 198 | * @param buf buffer with data to write |
351 | * if the pipe was broken somehow), 0 otherwise. | 199 | * @param size number of bytes to write |
200 | * @return GNUNET_OK on success, GNUNET_SYSERR on error | ||
352 | */ | 201 | */ |
353 | static int | 202 | static int |
354 | write_progress (struct AddDirContext *adc, const char *filename, | 203 | write_all (const struct GNUNET_DISK_FileHandle *out, |
355 | char is_directory, enum GNUNET_FS_DirScannerProgressUpdateReason reason) | 204 | const void *buf, |
205 | size_t size) | ||
356 | { | 206 | { |
357 | size_t filename_len; | 207 | const char *cbuf = buf; |
208 | size_t total; | ||
358 | ssize_t wr; | 209 | ssize_t wr; |
359 | size_t total_write; | 210 | |
360 | if ((adc->do_stop || should_stop (adc)) && reason != GNUNET_FS_DIRSCANNER_ASKED_TO_STOP | 211 | total = 0; |
361 | && reason != GNUNET_FS_DIRSCANNER_FINISHED) | 212 | do |
362 | return 1; | ||
363 | total_write = 0; | ||
364 | wr = 1; | ||
365 | while ((wr > 0 || errno == EAGAIN) && total_write < sizeof (reason)) | ||
366 | { | ||
367 | wr = GNUNET_DISK_file_write_blocking (adc->progress_write, | ||
368 | &((char *)&reason)[total_write], sizeof (reason) - total_write); | ||
369 | if (wr > 0) | ||
370 | total_write += wr; | ||
371 | } | ||
372 | if (sizeof (reason) != total_write) | ||
373 | return adc->do_stop = 1; | ||
374 | if (filename) | ||
375 | filename_len = strlen (filename) + 1; | ||
376 | else | ||
377 | filename_len = 0; | ||
378 | total_write = 0; | ||
379 | wr = 1; | ||
380 | while ((wr > 0 || errno == EAGAIN) && total_write < sizeof (size_t)) | ||
381 | { | 213 | { |
382 | wr = GNUNET_DISK_file_write_blocking (adc->progress_write, | 214 | wr = GNUNET_DISK_file_write (out, |
383 | &((char *)&filename_len)[total_write], sizeof (size_t) - total_write); | 215 | &cbuf[total], |
216 | size - total); | ||
384 | if (wr > 0) | 217 | if (wr > 0) |
385 | total_write += wr; | 218 | total += wr; |
386 | } | 219 | } while ( (wr > 0) && (total < size) ); |
387 | if (sizeof (size_t) != total_write) | 220 | return (total == size) ? GNUNET_OK : GNUNET_SYSERR; |
388 | return adc->do_stop = 1; | ||
389 | if (filename) | ||
390 | { | ||
391 | total_write = 0; | ||
392 | wr = 1; | ||
393 | while ((wr > 0 || errno == EAGAIN) && total_write < filename_len) | ||
394 | { | ||
395 | wr = GNUNET_DISK_file_write_blocking (adc->progress_write, | ||
396 | &((char *)filename)[total_write], filename_len - total_write); | ||
397 | if (wr > 0) | ||
398 | total_write += wr; | ||
399 | } | ||
400 | if (filename_len != total_write) | ||
401 | return adc->do_stop = 1; | ||
402 | total_write = 0; | ||
403 | wr = 1; | ||
404 | while ((wr > 0 || errno == EAGAIN) && total_write < sizeof (char)) | ||
405 | { | ||
406 | wr = GNUNET_DISK_file_write_blocking (adc->progress_write, | ||
407 | &((char *)&is_directory)[total_write], sizeof (char) - total_write); | ||
408 | if (wr > 0) | ||
409 | total_write += wr; | ||
410 | } | ||
411 | if (sizeof (char) != total_write) | ||
412 | return adc->do_stop = 1; | ||
413 | } | ||
414 | return 0; | ||
415 | } | 221 | } |
416 | 222 | ||
417 | /** | ||
418 | * Add the given keyword to the | ||
419 | * keyword statistics tracker. | ||
420 | * | ||
421 | * @param cls closure (user-defined) | ||
422 | * @param keyword the keyword to count | ||
423 | * @param is_mandatory ignored | ||
424 | * @return always GNUNET_OK | ||
425 | */ | ||
426 | static int | ||
427 | add_to_keyword_counter (void *cls, const char *keyword, int is_mandatory) | ||
428 | { | ||
429 | struct GNUNET_CONTAINER_MultiHashMap *mcm = cls; | ||
430 | struct KeywordCounter *cnt, *first_cnt; | ||
431 | GNUNET_HashCode hc; | ||
432 | size_t klen; | ||
433 | |||
434 | klen = strlen (keyword) + 1; | ||
435 | GNUNET_CRYPTO_hash (keyword, klen - 1, &hc); | ||
436 | /* Since the map might contain multiple values per keyword, we only | ||
437 | * store one value, and attach all other to it, forming a linked list. | ||
438 | * Somewhat easier than retrieving multiple items via callback. | ||
439 | */ | ||
440 | first_cnt = GNUNET_CONTAINER_multihashmap_get (mcm, &hc); | ||
441 | for (cnt = first_cnt; cnt && strcmp (cnt->value, keyword) != 0; cnt = cnt->next); | ||
442 | if (cnt == NULL) | ||
443 | { | ||
444 | cnt = GNUNET_malloc (sizeof (struct KeywordCounter) + klen); | ||
445 | cnt->value = (const char *) &cnt[1]; | ||
446 | memcpy (&cnt[1], keyword, klen); | ||
447 | if (first_cnt != NULL) | ||
448 | { | ||
449 | if (first_cnt->prev != NULL) | ||
450 | { | ||
451 | first_cnt->prev->next = cnt; | ||
452 | cnt->prev = first_cnt->prev; | ||
453 | } | ||
454 | first_cnt->prev = cnt; | ||
455 | cnt->next = first_cnt; | ||
456 | } | ||
457 | else | ||
458 | GNUNET_CONTAINER_multihashmap_put (mcm, &hc, cnt, | ||
459 | GNUNET_CONTAINER_MULTIHASHMAPOPTION_MULTIPLE); | ||
460 | } | ||
461 | cnt->count++; | ||
462 | return GNUNET_OK; | ||
463 | } | ||
464 | 223 | ||
465 | /** | 224 | /** |
466 | * Type of a function that libextractor calls for each | 225 | * Write progress message. |
467 | * meta data item found. | ||
468 | * | 226 | * |
469 | * @param cls the container multihashmap to update | 227 | * @param ds |
470 | * @param plugin_name name of the plugin that produced this value; | 228 | * @param filename name of the file to transmit, never NULL |
471 | * special values can be used (i.e. '<zlib>' for zlib being | 229 | * @param is_directory GNUNET_YES for directory, GNUNET_NO for file, GNUNET_SYSERR for neither |
472 | * used in the main libextractor library and yielding | 230 | * @param reason reason for the progress call |
473 | * meta data). | 231 | * @return GNUNET_SYSERR to stop scanning (the pipe was broken somehow) |
474 | * @param type libextractor-type describing the meta data | ||
475 | * @param format basic format information about data | ||
476 | * @param data_mime_type mime-type of data (not of the original file); | ||
477 | * can be NULL (if mime-type is not known) | ||
478 | * @param data actual meta-data found | ||
479 | * @param data_len number of bytes in data | ||
480 | * @return GNUNET_OK to continue extracting / iterating | ||
481 | */ | 232 | */ |
482 | static int | 233 | static int |
483 | add_to_meta_counter (void *cls, const char *plugin_name, | 234 | write_progress (struct GNUNET_FS_DirScanner *ds, |
484 | enum EXTRACTOR_MetaType type, enum EXTRACTOR_MetaFormat format, | 235 | const char *filename, |
485 | const char *data_mime_type, const char *data, size_t data_len) | 236 | int is_directory, |
486 | { | 237 | enum GNUNET_FS_DirScannerProgressUpdateReason reason) |
487 | struct GNUNET_CONTAINER_MultiHashMap *map = cls; | ||
488 | GNUNET_HashCode key; | ||
489 | struct MetaCounter *cnt, *first_cnt; | ||
490 | |||
491 | GNUNET_CRYPTO_hash (data, data_len, &key); | ||
492 | first_cnt = GNUNET_CONTAINER_multihashmap_get (map, &key); | ||
493 | for (cnt = first_cnt; cnt | ||
494 | && cnt->data_size != data_len | ||
495 | && memcmp (cnt->data, data, cnt->data_size) != 0; cnt = cnt->next); | ||
496 | if (cnt == NULL) | ||
497 | { | ||
498 | cnt = GNUNET_malloc (sizeof (struct MetaCounter)); | ||
499 | cnt->data = data; | ||
500 | cnt->data_size = data_len; | ||
501 | cnt->plugin_name = plugin_name; | ||
502 | cnt->type = type; | ||
503 | cnt->format = format; | ||
504 | cnt->data_mime_type = data_mime_type; | ||
505 | |||
506 | if (first_cnt != NULL) | ||
507 | { | ||
508 | if (first_cnt->prev != NULL) | ||
509 | { | ||
510 | first_cnt->prev->next = cnt; | ||
511 | cnt->prev = first_cnt->prev; | ||
512 | } | ||
513 | first_cnt->prev = cnt; | ||
514 | cnt->next = first_cnt; | ||
515 | } | ||
516 | else | ||
517 | GNUNET_CONTAINER_multihashmap_put (map, &key, cnt, | ||
518 | GNUNET_CONTAINER_MULTIHASHMAPOPTION_MULTIPLE); | ||
519 | } | ||
520 | cnt->count++; | ||
521 | return 0; | ||
522 | } | ||
523 | |||
524 | /** | ||
525 | * Allocates a struct GNUNET_FS_ShareTreeItem and adds it to its parent. | ||
526 | */ | ||
527 | static struct GNUNET_FS_ShareTreeItem * | ||
528 | make_item (struct GNUNET_FS_ShareTreeItem *parent) | ||
529 | { | 238 | { |
530 | struct GNUNET_FS_ShareTreeItem *item; | 239 | size_t slen; |
531 | item = GNUNET_malloc (sizeof (struct GNUNET_FS_ShareTreeItem)); | 240 | |
532 | 241 | slen = strlen (filename) + 1; | |
533 | item->parent = parent; | 242 | if ( (GNUNET_OK != |
534 | if (parent) | 243 | write_all (ds->progress_write, |
535 | GNUNET_CONTAINER_DLL_insert (parent->children_head, parent->children_tail, | 244 | &reason, |
536 | item); | 245 | sizeof (reason))) || |
537 | return item; | 246 | (GNUNET_OK != |
247 | write_all (ds->progress_write, | ||
248 | &slen, | ||
249 | sizeof (slen))) || | ||
250 | (GNUNET_OK != | ||
251 | write_all (ds->progress_write, | ||
252 | filename, | ||
253 | slen)) || | ||
254 | (GNUNET_OK != | ||
255 | write_all (ds->progress_write, | ||
256 | &is_directory, | ||
257 | sizeof (is_directory))) ) | ||
258 | return GNUNET_SYSERR; | ||
259 | return GNUNET_OK; | ||
538 | } | 260 | } |
539 | 261 | ||
540 | /** | ||
541 | * Extract metadata from a file and add it to the share tree | ||
542 | * | ||
543 | * @param ads context to modify | ||
544 | * @param filename name of the file to process | ||
545 | */ | ||
546 | static void | ||
547 | extract_file (struct AddDirStack *ads, const char *filename) | ||
548 | { | ||
549 | struct GNUNET_FS_ShareTreeItem *item; | ||
550 | const char *short_fn; | ||
551 | |||
552 | item = make_item (ads->parent); | ||
553 | |||
554 | GNUNET_DISK_file_size (filename, &item->file_size, GNUNET_YES); | ||
555 | item->is_directory = GNUNET_NO; | ||
556 | |||
557 | item->meta = GNUNET_CONTAINER_meta_data_create (); | ||
558 | GNUNET_FS_meta_data_extract_from_file (item->meta, filename, | ||
559 | ads->adc->plugins); | ||
560 | GNUNET_CONTAINER_meta_data_delete (item->meta, EXTRACTOR_METATYPE_FILENAME, | ||
561 | NULL, 0); | ||
562 | short_fn = GNUNET_STRINGS_get_short_name (filename); | ||
563 | |||
564 | item->filename = GNUNET_strdup (filename); | ||
565 | item->short_filename = GNUNET_strdup (short_fn); | ||
566 | |||
567 | GNUNET_CONTAINER_meta_data_insert (item->meta, "<libgnunetfs>", | ||
568 | EXTRACTOR_METATYPE_FILENAME, | ||
569 | EXTRACTOR_METAFORMAT_UTF8, "text/plain", | ||
570 | short_fn, strlen (short_fn) + 1); | ||
571 | if (ads->parent == NULL) | ||
572 | { | ||
573 | /* we're finished with the scan, make sure caller gets the top-level | ||
574 | * directory pointer | ||
575 | */ | ||
576 | ads->adc->toplevel = item; | ||
577 | } | ||
578 | } | ||
579 | 262 | ||
580 | /** | 263 | /** |
581 | * Remove the keyword from the ksk URI. | 264 | * Called every now and then by the scanner thread to check |
582 | * | 265 | * if we're being aborted. |
583 | * @param cls the ksk uri | 266 | * |
584 | * @param keyword the word to remove | 267 | * @param ds scanner context |
585 | * @param is_mandatory ignored | 268 | * @return GNUNET_OK to continue, GNUNET_SYSERR to stop |
586 | * @return always GNUNET_OK | ||
587 | */ | 269 | */ |
588 | static int | 270 | static int |
589 | remove_keyword (void *cls, const char *keyword, int is_mandatory) | 271 | test_thread_stop (struct GNUNET_FS_DirScanner *ds) |
590 | { | 272 | { |
591 | struct GNUNET_FS_Uri *ksk = cls; | 273 | char c; |
592 | 274 | ||
593 | GNUNET_FS_uri_ksk_remove_keyword (ksk, keyword); | 275 | if ( (GNUNET_DISK_file_read_non_blocking (ds->stop_read, &c, 1) == 1) || |
276 | (EAGAIN != errno) ) | ||
277 | return GNUNET_SYSERR; | ||
594 | return GNUNET_OK; | 278 | return GNUNET_OK; |
595 | } | 279 | } |
596 | 280 | ||
281 | |||
597 | /** | 282 | /** |
598 | * Remove keywords from current directory's children, if they are | 283 | * Function called to (recursively) add all of the files in the |
599 | * in the exluded keywords list of that directory. | 284 | * directory to the tree. Called by the directory scanner to initiate |
285 | * the scan. Does NOT yet add any metadata. | ||
600 | * | 286 | * |
601 | * @param cls the ksk uri | 287 | * @param ds directory scanner context to use |
602 | * @param keyword the word to remove | 288 | * @param filename file or directory to scan |
603 | * @param is_mandatory ignored | 289 | * @param dst where to store the resulting share tree item |
604 | * @return always GNUNET_OK | 290 | * @return GNUNET_OK on success, GNUNET_SYSERR on error |
605 | */ | 291 | */ |
606 | static int | 292 | static int |
607 | remove_keywords (struct ProcessMetadataStackItem *stack, struct GNUNET_FS_ShareTreeItem *dir) | 293 | preprocess_file (struct GNUNET_FS_DirScanner *ds, |
608 | { | 294 | const char *filename, |
609 | struct GNUNET_FS_ShareTreeItem *item; | 295 | struct GNUNET_FS_ShareTreeItem **dst); |
610 | 296 | ||
611 | for (item = dir->children_head; item; item = item->next) | ||
612 | { | ||
613 | if (stack->exclude_ksk != NULL) | ||
614 | GNUNET_FS_uri_ksk_get_keywords (stack->exclude_ksk, &remove_keyword, item->ksk_uri); | ||
615 | } | ||
616 | return GNUNET_OK; | ||
617 | } | ||
618 | 297 | ||
619 | /** | 298 | /** |
620 | * Context passed to 'migrate_and_drop'. | 299 | * Closure for the 'scan_callback' |
621 | */ | 300 | */ |
622 | struct KeywordProcessContext | 301 | struct RecursionContext |
623 | { | 302 | { |
624 | /** | 303 | /** |
625 | * All the keywords we migrated to the parent. | 304 | * Global scanner context. |
626 | */ | 305 | */ |
627 | struct GNUNET_FS_Uri *ksk; | 306 | struct GNUNET_FS_DirScanner *ds; |
628 | |||
629 | /** | ||
630 | * How often does a keyword have to occur to be | ||
631 | * migrated to the parent? | ||
632 | */ | ||
633 | unsigned int threshold; | ||
634 | }; | ||
635 | 307 | ||
636 | /** | ||
637 | * Context passed to 'migrate_and_drop'. | ||
638 | */ | ||
639 | struct MetaProcessContext | ||
640 | { | ||
641 | /** | 308 | /** |
642 | * All the metadata we copy to the parent. | 309 | * Parent to add the files to. |
643 | */ | 310 | */ |
644 | struct GNUNET_CONTAINER_MetaData *meta; | 311 | struct GNUNET_FS_ShareTreeItem *parent; |
645 | 312 | ||
646 | /** | 313 | /** |
647 | * How often does a metadata have to occur to be | 314 | * Flag to set to GNUNET_YES on serious errors. |
648 | * migrated to the parent? | ||
649 | */ | 315 | */ |
650 | unsigned int threshold; | 316 | int stop; |
651 | }; | 317 | }; |
652 | 318 | ||
653 | 319 | ||
654 | /** | 320 | /** |
655 | * Move "frequent" keywords over to the | 321 | * Function called by the directory iterator to (recursively) add all |
656 | * target ksk uri, free the counters. | 322 | * of the files in the directory to the tree. Called by the directory |
323 | * scanner to initiate the scan. Does NOT yet add any metadata. | ||
657 | * | 324 | * |
325 | * @param cls the 'struct RecursionContext' | ||
326 | * @param filename file or directory to scan | ||
327 | * @return GNUNET_OK on success, GNUNET_SYSERR on error | ||
658 | */ | 328 | */ |
659 | static int | 329 | static int |
660 | migrate_and_drop (void *cls, const GNUNET_HashCode * key, void *value) | 330 | scan_callback (void *cls, |
331 | const char *filename) | ||
661 | { | 332 | { |
662 | struct KeywordProcessContext *kpc = cls; | 333 | struct RecursionContext *rc = cls; |
663 | struct KeywordCounter *counter = value; | 334 | struct GNUNET_FS_ShareTreeItem *chld; |
664 | 335 | ||
665 | if (counter->count >= kpc->threshold && counter->count > 1) | 336 | if (GNUNET_OK != |
337 | preprocess_file (rc->ds, | ||
338 | filename, | ||
339 | &chld)) | ||
666 | { | 340 | { |
667 | GNUNET_FS_uri_ksk_add_keyword (kpc->ksk, counter->value, GNUNET_NO); | 341 | rc->stop = GNUNET_YES; |
342 | return GNUNET_SYSERR; | ||
668 | } | 343 | } |
669 | GNUNET_free (counter); | 344 | chld->parent = rc->parent; |
670 | return GNUNET_YES; | 345 | GNUNET_CONTAINER_DLL_insert (rc->parent->children_head, |
346 | rc->parent->children_tail, | ||
347 | chld); | ||
348 | return GNUNET_OK; | ||
671 | } | 349 | } |
672 | /** | ||
673 | * Copy "frequent" metadata items over to the | ||
674 | * target metadata container, free the counters. | ||
675 | * | ||
676 | */ | ||
677 | static int | ||
678 | migrate_and_drop_metadata (void *cls, const GNUNET_HashCode * key, void *value) | ||
679 | { | ||
680 | struct MetaProcessContext *mpc = cls; | ||
681 | struct MetaCounter *counter = value; | ||
682 | 350 | ||
683 | if (counter->count >= mpc->threshold && counter->count > 1) | ||
684 | { | ||
685 | GNUNET_CONTAINER_meta_data_insert (mpc->meta, | ||
686 | counter->plugin_name, | ||
687 | counter->type, | ||
688 | counter->format, | ||
689 | counter->data_mime_type, counter->data, | ||
690 | counter->data_size); | ||
691 | } | ||
692 | GNUNET_free (counter); | ||
693 | return GNUNET_YES; | ||
694 | } | ||
695 | 351 | ||
696 | /** | 352 | /** |
697 | * Go over the collected keywords from all entries in the | 353 | * Function called to (recursively) add all of the files in the |
698 | * directory and push common keywords up one level (by | 354 | * directory to the tree. Called by the directory scanner to initiate |
699 | * adding it to the returned struct). Do the same for metadata. | 355 | * the scan. Does NOT yet add any metadata. |
700 | * Destroys keywordcounter and metacoutner for current directory. | ||
701 | * | 356 | * |
702 | * @param adc collection of child meta data | 357 | * @param ds directory scanner context to use |
703 | * @param exclude_ksk pointer to where moveable keywords will be stored | ||
704 | * @param copy_meta pointer to where copyable metadata will be stored | ||
705 | */ | ||
706 | static void | ||
707 | process_keywords_and_metadata (struct ProcessMetadataStackItem *stack, | ||
708 | struct GNUNET_FS_Uri **exclude_ksk, | ||
709 | struct GNUNET_CONTAINER_MetaData **copy_meta) | ||
710 | { | ||
711 | struct KeywordProcessContext kpc; | ||
712 | struct MetaProcessContext mpc; | ||
713 | struct GNUNET_CONTAINER_MetaData *tmp; | ||
714 | |||
715 | /* Surprisingly, it's impossible to create a ksk with 0 keywords directly. | ||
716 | * But we can create one from an empty metadata set | ||
717 | */ | ||
718 | tmp = GNUNET_CONTAINER_meta_data_create (); | ||
719 | kpc.ksk = GNUNET_FS_uri_ksk_create_from_meta_data (tmp); | ||
720 | GNUNET_CONTAINER_meta_data_destroy (tmp); | ||
721 | mpc.meta = GNUNET_CONTAINER_meta_data_create (); | ||
722 | |||
723 | kpc.threshold = mpc.threshold = (stack->dir_entry_count + 1) / 2; /* 50% */ | ||
724 | |||
725 | GNUNET_CONTAINER_multihashmap_iterate (stack->keywordcounter, | ||
726 | &migrate_and_drop, &kpc); | ||
727 | GNUNET_CONTAINER_multihashmap_iterate (stack->metacounter, | ||
728 | &migrate_and_drop_metadata, &mpc); | ||
729 | |||
730 | GNUNET_CONTAINER_multihashmap_destroy (stack->keywordcounter); | ||
731 | GNUNET_CONTAINER_multihashmap_destroy (stack->metacounter); | ||
732 | *exclude_ksk = kpc.ksk; | ||
733 | *copy_meta = mpc.meta; | ||
734 | } | ||
735 | |||
736 | /** | ||
737 | * Function called by the directory iterator to | ||
738 | * (recursively) add all of the files in the | ||
739 | * directory to the tree. | ||
740 | * Called by the directory scanner to initiate the | ||
741 | * scan. | ||
742 | * TODO: find a way to make it non-recursive. | ||
743 | * | ||
744 | * @param cls the 'struct AddDirStack *' we're in | ||
745 | * @param filename file or directory to scan | 358 | * @param filename file or directory to scan |
359 | * @param dst where to store the resulting share tree item | ||
360 | * @return GNUNET_OK on success, GNUNET_SYSERR on error | ||
746 | */ | 361 | */ |
747 | static int | 362 | static int |
748 | scan_directory (void *cls, const char *filename) | 363 | preprocess_file (struct GNUNET_FS_DirScanner *ds, |
364 | const char *filename, | ||
365 | struct GNUNET_FS_ShareTreeItem **dst) | ||
749 | { | 366 | { |
750 | struct AddDirStack *ads = cls, recurse_ads; | ||
751 | struct AddDirContext *adc = ads->adc; | ||
752 | struct stat sbuf; | ||
753 | struct GNUNET_FS_ShareTreeItem *item; | 367 | struct GNUNET_FS_ShareTreeItem *item; |
754 | const char *short_fn; | 368 | struct stat sbuf; |
755 | int do_stop = 0; | ||
756 | |||
757 | /* Wrap up fast */ | ||
758 | if (adc->do_stop) | ||
759 | return GNUNET_SYSERR; | ||
760 | 369 | ||
761 | /* If the file doesn't exist (or is not statable for any other reason, | ||
762 | * skip it, and report it. | ||
763 | */ | ||
764 | if (0 != STAT (filename, &sbuf)) | 370 | if (0 != STAT (filename, &sbuf)) |
765 | { | 371 | { |
766 | (void) write_progress (adc, filename, S_ISDIR (sbuf.st_mode), | 372 | /* If the file doesn't exist (or is not stat-able for any other reason) |
767 | GNUNET_FS_DIRSCANNER_DOES_NOT_EXIST); | 373 | skip it (but report it), but do continue. */ |
374 | if (GNUNET_OK != | ||
375 | write_progress (ds, filename, GNUNET_SYSERR, | ||
376 | GNUNET_FS_DIRSCANNER_DOES_NOT_EXIST)) | ||
377 | return GNUNET_SYSERR; | ||
768 | return GNUNET_OK; | 378 | return GNUNET_OK; |
769 | } | 379 | } |
770 | 380 | ||
771 | /* Report the progress */ | 381 | /* Report the progress */ |
772 | do_stop = write_progress (adc, filename, S_ISDIR (sbuf.st_mode), | 382 | if (GNUNET_OK != |
773 | GNUNET_FS_DIRSCANNER_NEW_FILE); | 383 | write_progress (ds, |
774 | if (do_stop) | 384 | filename, |
775 | { | 385 | S_ISDIR (sbuf.st_mode) ? GNUNET_YES : GNUNET_NO, |
776 | /* We were asked to stop, acknowledge that and return */ | 386 | GNUNET_FS_DIRSCANNER_FILE_START)) |
777 | (void) write_progress (adc, filename, S_ISDIR (sbuf.st_mode), | ||
778 | GNUNET_FS_DIRSCANNER_ASKED_TO_STOP); | ||
779 | return GNUNET_SYSERR; | 387 | return GNUNET_SYSERR; |
780 | } | 388 | item = GNUNET_malloc (sizeof (struct GNUNET_FS_ShareTreeItem)); |
781 | 389 | item->meta = GNUNET_CONTAINER_meta_data_create (); | |
782 | if (!S_ISDIR (sbuf.st_mode)) | 390 | item->filename = GNUNET_strdup (filename); |
783 | extract_file (ads, filename); | 391 | item->short_filename = GNUNET_strdup (GNUNET_STRINGS_get_short_name (filename)); |
784 | else | 392 | item->is_directory = (S_ISDIR (sbuf.st_mode)) ? GNUNET_YES : GNUNET_NO; |
785 | { | 393 | item->file_size = (uint64_t) sbuf.st_size; |
786 | item = make_item (ads->parent); | 394 | if (item->is_directory) |
787 | item->meta = GNUNET_CONTAINER_meta_data_create (); | 395 | { |
788 | 396 | struct RecursionContext rc; | |
789 | item->is_directory = GNUNET_YES; | 397 | |
790 | 398 | rc.parent = item; | |
791 | recurse_ads.adc = adc; | 399 | rc.ds = ds; |
792 | recurse_ads.parent = item; | 400 | rc.stop = GNUNET_NO; |
793 | 401 | GNUNET_DISK_directory_scan (filename, | |
794 | /* recurse into directory */ | 402 | &scan_callback, |
795 | GNUNET_DISK_directory_scan (filename, &scan_directory, &recurse_ads); | 403 | &rc); |
796 | 404 | if ( (rc.stop == GNUNET_YES) || | |
797 | short_fn = GNUNET_STRINGS_get_short_name (filename); | 405 | (GNUNET_OK != |
798 | 406 | test_thread_stop (ds)) ) | |
799 | item->filename = GNUNET_strdup (filename); | ||
800 | item->short_filename = GNUNET_strdup (short_fn); | ||
801 | |||
802 | if (ads->parent == NULL) | ||
803 | { | 407 | { |
804 | /* we're finished with the scan, make sure caller gets the top-level | 408 | GNUNET_FS_share_tree_free (item); |
805 | * directory pointer | 409 | return GNUNET_SYSERR; |
806 | */ | ||
807 | adc->toplevel = item; | ||
808 | } | 410 | } |
809 | } | 411 | } |
810 | return GNUNET_OK; | 412 | /* Report the progress */ |
811 | } | 413 | if (GNUNET_OK != |
812 | 414 | write_progress (ds, | |
813 | /** | 415 | filename, |
814 | * Signals the scanner to finish the scan as fast as possible. | 416 | S_ISDIR (sbuf.st_mode) ? GNUNET_YES : GNUNET_NO, |
815 | * Does not block. | 417 | GNUNET_FS_DIRSCANNER_SUBTREE_COUNTED)) |
816 | * Can close the pipe if asked to, but that is only used by the | ||
817 | * internal call to this function during cleanup. The client | ||
818 | * must understand the consequences of closing the pipe too early. | ||
819 | * | ||
820 | * @param ds directory scanner structure | ||
821 | * @param close_pipe GNUNET_YES to close | ||
822 | */ | ||
823 | void | ||
824 | GNUNET_FS_directory_scan_finish (struct GNUNET_FS_DirScanner *ds, | ||
825 | int close_pipe) | ||
826 | { | ||
827 | char c = 1; | ||
828 | GNUNET_DISK_file_write (ds->stop_write, &c, 1); | ||
829 | |||
830 | if (close_pipe) | ||
831 | { | 418 | { |
832 | if (ds->progress_read_task != GNUNET_SCHEDULER_NO_TASK) | 419 | GNUNET_FS_share_tree_free (item); |
833 | { | 420 | return GNUNET_SYSERR; |
834 | GNUNET_SCHEDULER_cancel (ds->progress_read_task); | ||
835 | ds->progress_read_task = GNUNET_SCHEDULER_NO_TASK; | ||
836 | } | ||
837 | GNUNET_DISK_pipe_close_end (ds->progress_pipe, GNUNET_DISK_PIPE_END_READ); | ||
838 | ds->progress_read = NULL; | ||
839 | } | 421 | } |
422 | *dst = item; | ||
423 | return GNUNET_OK; | ||
840 | } | 424 | } |
841 | 425 | ||
426 | |||
842 | /** | 427 | /** |
843 | * Signals the scanner thread to finish (in case it isn't finishing | 428 | * Extract metadata from files. |
844 | * already) and joins the scanner thread. Closes the pipes, frees the | ||
845 | * scanner contexts (both of them), returns the results of the scan. | ||
846 | * Results are valid (and have to be freed) even if the scanner had | ||
847 | * an error or was rushed to finish prematurely. | ||
848 | * Blocks until the scanner is finished. | ||
849 | * | 429 | * |
850 | * @param ds directory scanner structure | 430 | * @param ds directory scanner context |
851 | * @return the results of the scan (a directory tree) | 431 | * @param item entry we are processing |
432 | * @return GNUNET_OK on success, GNUNET_SYSERR on fatal errors | ||
852 | */ | 433 | */ |
853 | struct GNUNET_FS_ShareTreeItem * | 434 | static int |
854 | GNUNET_FS_directory_scan_cleanup (struct GNUNET_FS_DirScanner *ds) | 435 | extract_files (struct GNUNET_FS_DirScanner *ds, |
855 | { | 436 | struct GNUNET_FS_ShareTreeItem *item) |
856 | struct GNUNET_FS_ShareTreeItem *result; | 437 | { |
857 | 438 | if (item->is_directory) | |
858 | GNUNET_FS_directory_scan_finish (ds, GNUNET_YES); | 439 | { |
859 | #if WINDOWS | 440 | /* for directories, we simply only descent, no extraction, no |
860 | WaitForSingleObject (ds->thread, INFINITE); | 441 | progress reporting */ |
861 | CloseHandle (ds->thread); | 442 | struct GNUNET_FS_ShareTreeItem *pos; |
862 | #else | 443 | |
863 | pthread_join (ds->thread, NULL); | 444 | for (pos = item->children_head; NULL != pos; pos = pos->next) |
864 | pthread_detach (ds->thread); | 445 | if (GNUNET_OK != |
865 | #endif | 446 | extract_files (ds, pos)) |
447 | return GNUNET_SYSERR; | ||
448 | return GNUNET_OK; | ||
449 | } | ||
450 | |||
451 | /* this is the expensive operation, *afterwards* we'll check for aborts */ | ||
452 | GNUNET_FS_meta_data_extract_from_file (item->meta, | ||
453 | item->filename, | ||
454 | ds->plugins); | ||
455 | |||
456 | /* having full filenames is too dangerous; always make sure we clean them up */ | ||
457 | GNUNET_CONTAINER_meta_data_delete (item->meta, | ||
458 | EXTRACTOR_METATYPE_FILENAME, | ||
459 | NULL, 0); | ||
460 | GNUNET_CONTAINER_meta_data_insert (item->meta, "<libgnunetfs>", | ||
461 | EXTRACTOR_METATYPE_FILENAME, | ||
462 | EXTRACTOR_METAFORMAT_UTF8, "text/plain", | ||
463 | item->short_filename, | ||
464 | strlen (item->short_filename) + 1); | ||
465 | /* check for abort */ | ||
466 | if (GNUNET_OK != | ||
467 | test_thread_stop (ds)) | ||
468 | return GNUNET_SYSERR; | ||
866 | 469 | ||
867 | GNUNET_DISK_pipe_close (ds->stop_pipe); | 470 | /* Report the progress */ |
868 | GNUNET_DISK_pipe_close (ds->progress_pipe); | 471 | if (GNUNET_OK != |
869 | result = ds->adc->toplevel; | 472 | write_progress (ds, |
870 | GNUNET_free (ds->adc); | 473 | item->filename, |
871 | GNUNET_free (ds); | 474 | GNUNET_NO, |
872 | return result; | 475 | GNUNET_FS_DIRSCANNER_EXTRACT_FINISHED)) |
476 | return GNUNET_SYSERR; | ||
477 | return GNUNET_OK; | ||
873 | } | 478 | } |
874 | 479 | ||
480 | |||
875 | /** | 481 | /** |
876 | * The function from which the scanner thread starts | 482 | * The function from which the scanner thread starts |
483 | * | ||
484 | * @param cls the 'struct GNUNET_FS_DirScanner' | ||
485 | * @return 0/NULL | ||
877 | */ | 486 | */ |
878 | #if WINDOWS | 487 | #if WINDOWS |
879 | DWORD | 488 | DWORD |
@@ -882,19 +491,58 @@ static void * | |||
882 | #endif | 491 | #endif |
883 | run_directory_scan_thread (void *cls) | 492 | run_directory_scan_thread (void *cls) |
884 | { | 493 | { |
885 | struct AddDirContext *adc = cls; | 494 | struct GNUNET_FS_DirScanner *ds = cls; |
886 | struct AddDirStack ads; | 495 | |
887 | ads.adc = adc; | 496 | if (GNUNET_OK != preprocess_file (ds, |
888 | ads.parent = NULL; | 497 | ds->filename_expanded, |
889 | scan_directory (&ads, adc->filename_expanded); | 498 | &ds->toplevel)) |
890 | GNUNET_free (adc->filename_expanded); | 499 | { |
891 | if (adc->plugins != NULL) | 500 | (void) write_progress (ds, "", GNUNET_NO, GNUNET_FS_DIRSCANNER_INTERNAL_ERROR); |
892 | EXTRACTOR_plugin_remove_all (adc->plugins); | 501 | return 0; |
893 | /* Tell the initiator that we're finished, it can now join the thread */ | 502 | } |
894 | write_progress (adc, NULL, 0, GNUNET_FS_DIRSCANNER_FINISHED); | 503 | if (GNUNET_OK != |
504 | write_progress (ds, "", GNUNET_NO, GNUNET_FS_DIRSCANNER_ALL_COUNTED)) | ||
505 | return 0; | ||
506 | if (GNUNET_OK != | ||
507 | extract_files (ds, ds->toplevel)) | ||
508 | { | ||
509 | (void) write_progress (ds, "", GNUNET_NO, GNUNET_FS_DIRSCANNER_INTERNAL_ERROR); | ||
510 | return 0; | ||
511 | } | ||
512 | (void) write_progress (ds, "", GNUNET_NO, GNUNET_FS_DIRSCANNER_FINISHED); | ||
895 | return 0; | 513 | return 0; |
896 | } | 514 | } |
897 | 515 | ||
516 | |||
517 | /** | ||
518 | * Read 'size' bytes from 'in' into 'buf'. | ||
519 | * | ||
520 | * @param in pipe to read from | ||
521 | * @param buf buffer to read to | ||
522 | * @param size number of bytes to read | ||
523 | * @return GNUNET_OK on success, GNUNET_SYSERR on error | ||
524 | */ | ||
525 | static int | ||
526 | read_all (const struct GNUNET_DISK_FileHandle *in, | ||
527 | char *buf, | ||
528 | size_t size) | ||
529 | { | ||
530 | size_t total; | ||
531 | ssize_t rd; | ||
532 | |||
533 | total = 0; | ||
534 | do | ||
535 | { | ||
536 | rd = GNUNET_DISK_file_read (in, | ||
537 | &buf[total], | ||
538 | size - total); | ||
539 | if (rd > 0) | ||
540 | total += rd; | ||
541 | } while ( (rd > 0) && (total < size) ); | ||
542 | return (total == size) ? GNUNET_OK : GNUNET_SYSERR; | ||
543 | } | ||
544 | |||
545 | |||
898 | /** | 546 | /** |
899 | * Called every time there is data to read from the scanner. | 547 | * Called every time there is data to read from the scanner. |
900 | * Calls the scanner progress handler. | 548 | * Calls the scanner progress handler. |
@@ -905,124 +553,69 @@ run_directory_scan_thread (void *cls) | |||
905 | static void | 553 | static void |
906 | read_progress_task (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc) | 554 | read_progress_task (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc) |
907 | { | 555 | { |
908 | struct GNUNET_FS_DirScanner *ds; | 556 | struct GNUNET_FS_DirScanner *ds = cls; |
909 | int end_it = 0; | ||
910 | enum GNUNET_FS_DirScannerProgressUpdateReason reason; | 557 | enum GNUNET_FS_DirScannerProgressUpdateReason reason; |
911 | ssize_t rd; | ||
912 | ssize_t total_read; | ||
913 | |||
914 | size_t filename_len; | 558 | size_t filename_len; |
915 | char is_directory; | 559 | int is_directory; |
916 | char *filename; | 560 | char *filename; |
917 | 561 | ||
918 | ds = cls; | ||
919 | |||
920 | ds->progress_read_task = GNUNET_SCHEDULER_NO_TASK; | 562 | ds->progress_read_task = GNUNET_SCHEDULER_NO_TASK; |
921 | 563 | if (! (tc->reason & GNUNET_SCHEDULER_REASON_READ_READY)) | |
922 | if (!(tc->reason & GNUNET_SCHEDULER_REASON_READ_READY)) | ||
923 | { | 564 | { |
924 | ds->progress_callback (ds->cls, ds, NULL, 0, GNUNET_FS_DIRSCANNER_SHUTDOWN); | 565 | ds->progress_read_task |
566 | = GNUNET_SCHEDULER_add_read_file (GNUNET_TIME_UNIT_FOREVER_REL, | ||
567 | ds->progress_read, &read_progress_task, | ||
568 | ds); | ||
925 | return; | 569 | return; |
926 | } | 570 | } |
927 | 571 | ||
928 | /* Read one message. If message is malformed or can't be read, end the scanner */ | 572 | /* Read one message. If message is malformed or can't be read, end the scanner */ |
929 | total_read = rd = GNUNET_DISK_file_read (ds->progress_read, &reason, sizeof (reason)); | 573 | filename = NULL; |
930 | while (rd > 0 && total_read < sizeof (reason)) | 574 | if ( (GNUNET_OK != |
931 | { | 575 | read_all (ds->progress_read, |
932 | rd = GNUNET_DISK_file_read (ds->progress_read, | 576 | (char*) &reason, |
933 | &((char *) &reason)[total_read], | 577 | sizeof (reason))) || |
934 | sizeof (reason) - total_read); | 578 | (reason < GNUNET_FS_DIRSCANNER_FILE_START) || |
935 | if (rd > 0) | 579 | (reason > GNUNET_FS_DIRSCANNER_INTERNAL_ERROR) || |
936 | total_read += rd; | 580 | (GNUNET_OK != |
937 | } | 581 | read_all (ds->progress_read, |
938 | if (total_read != sizeof (reason) | 582 | (char*) &filename_len, |
939 | || reason <= GNUNET_FS_DIRSCANNER_FIRST | 583 | sizeof (size_t))) || |
940 | || reason >= GNUNET_FS_DIRSCANNER_LAST) | 584 | (filename_len == 0) || |
941 | { | 585 | (filename_len > PATH_MAX) || |
942 | end_it = 1; | 586 | (GNUNET_OK != |
943 | reason = GNUNET_FS_DIRSCANNER_PROTOCOL_ERROR; | 587 | read_all (ds->progress_read, |
944 | } | 588 | filename = GNUNET_malloc (filename_len), |
945 | 589 | filename_len)) || | |
946 | if (!end_it) | 590 | (filename[filename_len-1] != '\0') || |
947 | { | 591 | (GNUNET_OK != |
948 | total_read = rd = GNUNET_DISK_file_read (ds->progress_read, &filename_len, | 592 | read_all (ds->progress_read, |
949 | sizeof (size_t)); | 593 | (char*) &is_directory, |
950 | while (rd > 0 && total_read < sizeof (size_t)) | 594 | sizeof (is_directory))) ) |
951 | { | 595 | { |
952 | rd = GNUNET_DISK_file_read (ds->progress_read, | 596 | /* IPC error, complain, signal client and stop reading |
953 | &((char *) &filename_len)[total_read], | 597 | from the pipe */ |
954 | sizeof (size_t) - total_read); | 598 | GNUNET_break (0); |
955 | if (rd > 0) | 599 | ds->progress_callback (ds->progress_callback_cls, ds, |
956 | total_read += rd; | 600 | NULL, GNUNET_SYSERR, |
957 | } | 601 | GNUNET_FS_DIRSCANNER_INTERNAL_ERROR); |
958 | if (rd != sizeof (size_t)) | 602 | GNUNET_free_non_null (filename); |
959 | { | 603 | return; |
960 | end_it = 1; | ||
961 | reason = GNUNET_FS_DIRSCANNER_PROTOCOL_ERROR; | ||
962 | } | ||
963 | } | ||
964 | if (!end_it) | ||
965 | { | ||
966 | if (filename_len == 0) | ||
967 | end_it = 1; | ||
968 | else if (filename_len > PATH_MAX) | ||
969 | { | ||
970 | end_it = 1; | ||
971 | reason = GNUNET_FS_DIRSCANNER_PROTOCOL_ERROR; | ||
972 | } | ||
973 | } | ||
974 | if (!end_it) | ||
975 | { | ||
976 | filename = GNUNET_malloc (filename_len); | ||
977 | total_read = rd = GNUNET_DISK_file_read (ds->progress_read, filename, | ||
978 | filename_len); | ||
979 | while (rd > 0 && total_read < filename_len) | ||
980 | { | ||
981 | rd = GNUNET_DISK_file_read (ds->progress_read, &filename[total_read], | ||
982 | filename_len - total_read); | ||
983 | if (rd > 0) | ||
984 | total_read += rd; | ||
985 | } | ||
986 | if (rd != filename_len) | ||
987 | { | ||
988 | GNUNET_free (filename); | ||
989 | reason = GNUNET_FS_DIRSCANNER_PROTOCOL_ERROR; | ||
990 | end_it = 1; | ||
991 | } | ||
992 | } | ||
993 | if (!end_it && filename_len > 0) | ||
994 | { | ||
995 | total_read = rd = GNUNET_DISK_file_read (ds->progress_read, &is_directory, | ||
996 | sizeof (char)); | ||
997 | while (rd > 0 && total_read < sizeof (char)) | ||
998 | { | ||
999 | rd = GNUNET_DISK_file_read (ds->progress_read, &(&is_directory)[total_read], | ||
1000 | sizeof (char) - total_read); | ||
1001 | if (rd > 0) | ||
1002 | total_read += rd; | ||
1003 | } | ||
1004 | if (rd != sizeof (char)) | ||
1005 | { | ||
1006 | GNUNET_free (filename); | ||
1007 | reason = GNUNET_FS_DIRSCANNER_PROTOCOL_ERROR; | ||
1008 | end_it = 1; | ||
1009 | } | ||
1010 | } | ||
1011 | if (!end_it) | ||
1012 | { | ||
1013 | end_it = ds->progress_callback (ds->cls, ds, (const char *) filename, is_directory, reason); | ||
1014 | GNUNET_free (filename); | ||
1015 | if (!end_it) | ||
1016 | { | ||
1017 | ds->progress_read_task = GNUNET_SCHEDULER_add_read_file ( | ||
1018 | GNUNET_TIME_UNIT_FOREVER_REL, ds->progress_read, &read_progress_task, | ||
1019 | cls); | ||
1020 | } | ||
1021 | } | ||
1022 | else | ||
1023 | { | ||
1024 | ds->progress_callback (ds->cls, ds, NULL, 0, reason); | ||
1025 | } | 604 | } |
605 | /* schedule task to keep reading (done here in case client calls | ||
606 | abort or something similar) */ | ||
607 | ds->progress_read_task | ||
608 | = GNUNET_SCHEDULER_add_read_file (GNUNET_TIME_UNIT_FOREVER_REL, | ||
609 | ds->progress_read, | ||
610 | &read_progress_task, ds); | ||
611 | |||
612 | /* read successfully, notify client about progress */ | ||
613 | ds->progress_callback (ds->progress_callback_cls, | ||
614 | ds, | ||
615 | filename, | ||
616 | is_directory, | ||
617 | reason); | ||
618 | GNUNET_free (filename); | ||
1026 | } | 619 | } |
1027 | 620 | ||
1028 | 621 | ||
@@ -1033,275 +626,89 @@ read_progress_task (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc) | |||
1033 | * @param GNUNET_YES to not to run libextractor on files (only build a tree) | 626 | * @param GNUNET_YES to not to run libextractor on files (only build a tree) |
1034 | * @param ex if not NULL, must be a list of extra plugins for extractor | 627 | * @param ex if not NULL, must be a list of extra plugins for extractor |
1035 | * @param cb the callback to call when there are scanning progress messages | 628 | * @param cb the callback to call when there are scanning progress messages |
1036 | * @param cls closure for 'cb' | 629 | * @param cb_cls closure for 'cb' |
1037 | * @return directory scanner object to be used for controlling the scanner | 630 | * @return directory scanner object to be used for controlling the scanner |
1038 | */ | 631 | */ |
1039 | struct GNUNET_FS_DirScanner * | 632 | struct GNUNET_FS_DirScanner * |
1040 | GNUNET_FS_directory_scan_start (const char *filename, | 633 | GNUNET_FS_directory_scan_start (const char *filename, |
1041 | int disable_extractor, const char *ex, | 634 | int disable_extractor, const char *ex, |
1042 | GNUNET_FS_DirScannerProgressCallback cb, void *cls) | 635 | GNUNET_FS_DirScannerProgressCallback cb, |
636 | void *cb_cls) | ||
1043 | { | 637 | { |
1044 | struct stat sbuf; | 638 | struct stat sbuf; |
1045 | struct AddDirContext *adc; | ||
1046 | char *filename_expanded; | 639 | char *filename_expanded; |
1047 | struct GNUNET_FS_DirScanner *ds; | 640 | struct GNUNET_FS_DirScanner *ds; |
1048 | struct GNUNET_DISK_PipeHandle *progress_pipe; | 641 | struct GNUNET_DISK_PipeHandle *progress_pipe; |
642 | struct GNUNET_DISK_PipeHandle *stop_pipe; | ||
1049 | int ok; | 643 | int ok; |
1050 | 644 | ||
1051 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
1052 | "Starting to scan directory `%s'\n", | ||
1053 | filename); | ||
1054 | if (0 != STAT (filename, &sbuf)) | 645 | if (0 != STAT (filename, &sbuf)) |
1055 | return NULL; | 646 | return NULL; |
1056 | |||
1057 | /* scan_directory() is guaranteed to be given expanded filenames, | ||
1058 | * so expand we will! | ||
1059 | */ | ||
1060 | filename_expanded = GNUNET_STRINGS_filename_expand (filename); | 647 | filename_expanded = GNUNET_STRINGS_filename_expand (filename); |
1061 | if (filename_expanded == NULL) | 648 | if (NULL == filename_expanded) |
1062 | return NULL; | 649 | return NULL; |
1063 | 650 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, | |
651 | "Starting to scan directory `%s'\n", | ||
652 | filename_expanded); | ||
1064 | progress_pipe = GNUNET_DISK_pipe (GNUNET_NO, GNUNET_NO, GNUNET_NO, GNUNET_NO); | 653 | progress_pipe = GNUNET_DISK_pipe (GNUNET_NO, GNUNET_NO, GNUNET_NO, GNUNET_NO); |
1065 | if (progress_pipe == NULL) | 654 | if (NULL == progress_pipe) |
1066 | { | 655 | { |
1067 | GNUNET_free (filename_expanded); | 656 | GNUNET_free (filename_expanded); |
1068 | return NULL; | 657 | return NULL; |
1069 | } | 658 | } |
1070 | 659 | stop_pipe = GNUNET_DISK_pipe (GNUNET_NO, GNUNET_NO, GNUNET_NO, GNUNET_NO); | |
1071 | adc = GNUNET_malloc (sizeof (struct AddDirContext)); | 660 | if (NULL == stop_pipe) |
1072 | |||
1073 | ds = GNUNET_malloc (sizeof (struct GNUNET_FS_DirScanner)); | ||
1074 | |||
1075 | ds->adc = adc; | ||
1076 | |||
1077 | ds->stop_pipe = GNUNET_DISK_pipe (GNUNET_NO, GNUNET_NO, GNUNET_NO, GNUNET_NO); | ||
1078 | if (ds->stop_pipe == NULL) | ||
1079 | { | 661 | { |
1080 | GNUNET_free (adc); | ||
1081 | GNUNET_free (ds); | ||
1082 | GNUNET_free (filename_expanded); | ||
1083 | GNUNET_DISK_pipe_close (progress_pipe); | 662 | GNUNET_DISK_pipe_close (progress_pipe); |
663 | GNUNET_free (filename_expanded); | ||
1084 | return NULL; | 664 | return NULL; |
1085 | } | 665 | } |
666 | |||
667 | ds = GNUNET_malloc (sizeof (struct GNUNET_FS_DirScanner)); | ||
668 | ds->progress_callback = cb; | ||
669 | ds->progress_callback_cls = cb_cls; | ||
670 | ds->stop_pipe = stop_pipe; | ||
1086 | ds->stop_write = GNUNET_DISK_pipe_handle (ds->stop_pipe, | 671 | ds->stop_write = GNUNET_DISK_pipe_handle (ds->stop_pipe, |
1087 | GNUNET_DISK_PIPE_END_WRITE); | 672 | GNUNET_DISK_PIPE_END_WRITE); |
1088 | adc->stop_read = GNUNET_DISK_pipe_handle (ds->stop_pipe, | 673 | ds->stop_read = GNUNET_DISK_pipe_handle (ds->stop_pipe, |
1089 | GNUNET_DISK_PIPE_END_READ); | 674 | GNUNET_DISK_PIPE_END_READ); |
1090 | 675 | ds->progress_pipe = progress_pipe; | |
1091 | adc->plugins = NULL; | 676 | ds->progress_write = GNUNET_DISK_pipe_handle (progress_pipe, |
1092 | if (!disable_extractor) | 677 | GNUNET_DISK_PIPE_END_WRITE); |
678 | ds->progress_read = GNUNET_DISK_pipe_handle (progress_pipe, | ||
679 | GNUNET_DISK_PIPE_END_READ); | ||
680 | ds->filename_expanded = filename_expanded; | ||
681 | if (! disable_extractor) | ||
1093 | { | 682 | { |
1094 | adc->plugins = EXTRACTOR_plugin_add_defaults ( | 683 | ds->plugins = EXTRACTOR_plugin_add_defaults (EXTRACTOR_OPTION_DEFAULT_POLICY); |
1095 | EXTRACTOR_OPTION_DEFAULT_POLICY); | 684 | if ( (NULL != ex) && strlen (ex) > 0) |
1096 | if (ex && strlen (ex) > 0) | 685 | ds->plugins = EXTRACTOR_plugin_add_config (ds->plugins, ex, |
1097 | adc->plugins = EXTRACTOR_plugin_add_config (adc->plugins, ex, | 686 | EXTRACTOR_OPTION_DEFAULT_POLICY); |
1098 | EXTRACTOR_OPTION_DEFAULT_POLICY); | ||
1099 | } | 687 | } |
1100 | |||
1101 | adc->filename_expanded = filename_expanded; | ||
1102 | adc->progress_write = GNUNET_DISK_pipe_handle (progress_pipe, | ||
1103 | GNUNET_DISK_PIPE_END_WRITE); | ||
1104 | |||
1105 | |||
1106 | ds->progress_read = GNUNET_DISK_pipe_handle (progress_pipe, | ||
1107 | GNUNET_DISK_PIPE_END_READ); | ||
1108 | |||
1109 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
1110 | "Creating thread to scan directory `%s'\n", | ||
1111 | filename); | ||
1112 | |||
1113 | #if WINDOWS | 688 | #if WINDOWS |
1114 | ds->thread = CreateThread (NULL, 0, | 689 | ds->thread = CreateThread (NULL, 0, |
1115 | (LPTHREAD_START_ROUTINE) &run_directory_scan_thread, (LPVOID) adc, | 690 | (LPTHREAD_START_ROUTINE) &run_directory_scan_thread, |
1116 | 0, NULL); | 691 | (LPVOID) ds, 0, NULL); |
1117 | ok = ds->thread != NULL; | 692 | ok = (ds->thread != NULL); |
1118 | #else | 693 | #else |
1119 | ok = !pthread_create (&ds->thread, NULL, &run_directory_scan_thread, | 694 | ok = (0 == pthread_create (&ds->thread, NULL, |
1120 | (void *) adc); | 695 | &run_directory_scan_thread, ds)); |
1121 | #endif | 696 | #endif |
1122 | if (!ok) | 697 | if (!ok) |
1123 | { | 698 | { |
1124 | GNUNET_free (adc); | 699 | EXTRACTOR_plugin_remove_all (ds->plugins); |
1125 | GNUNET_free (filename_expanded); | 700 | GNUNET_free (filename_expanded); |
701 | GNUNET_DISK_pipe_close (stop_pipe); | ||
1126 | GNUNET_DISK_pipe_close (progress_pipe); | 702 | GNUNET_DISK_pipe_close (progress_pipe); |
1127 | GNUNET_free (ds); | 703 | GNUNET_free (ds); |
1128 | return NULL; | 704 | return NULL; |
1129 | } | 705 | } |
1130 | 706 | ds->progress_read_task | |
1131 | ds->progress_callback = cb; | 707 | = GNUNET_SCHEDULER_add_read_file (GNUNET_TIME_UNIT_FOREVER_REL, |
1132 | ds->cls = cls; | 708 | ds->progress_read, |
1133 | ds->adc = adc; | 709 | &read_progress_task, ds); |
1134 | ds->progress_pipe = progress_pipe; | ||
1135 | |||
1136 | ds->progress_read_task = GNUNET_SCHEDULER_add_read_file ( | ||
1137 | GNUNET_TIME_UNIT_FOREVER_REL, ds->progress_read, &read_progress_task, | ||
1138 | ds); | ||
1139 | |||
1140 | return ds; | 710 | return ds; |
1141 | } | 711 | } |
1142 | 712 | ||
1143 | /** | ||
1144 | * Task that post-processes the share item tree. | ||
1145 | * This processing has to be done in the main thread, because | ||
1146 | * it requires access to libgcrypt's hashing functions, and | ||
1147 | * libgcrypt is not thread-safe without some special magic. | ||
1148 | * | ||
1149 | * @param cls top of the stack | ||
1150 | * @param tc task context | ||
1151 | */ | ||
1152 | static void | ||
1153 | trim_share_tree_task (void *cls, | ||
1154 | const struct GNUNET_SCHEDULER_TaskContext *tc) | ||
1155 | { | ||
1156 | struct ProcessMetadataStackItem *stack = cls; | ||
1157 | struct ProcessMetadataStackItem *next = stack; | ||
1158 | /* FIXME: figure out what to do when tc says we're shutting down */ | ||
1159 | |||
1160 | /* item == NULL means that we've just finished going over the children of | ||
1161 | * current directory. | ||
1162 | */ | ||
1163 | if (stack->item == NULL) | ||
1164 | { | ||
1165 | if (stack->parent->item != NULL) | ||
1166 | { | ||
1167 | /* end of a directory */ | ||
1168 | struct GNUNET_FS_Uri *ksk; | ||
1169 | |||
1170 | /* use keyword and metadata counters to create lists of keywords to move | ||
1171 | * and metadata to copy. | ||
1172 | */ | ||
1173 | process_keywords_and_metadata (stack, &stack->parent->exclude_ksk, &stack->parent->item->meta); | ||
1174 | |||
1175 | /* create keywords from metadata (copies all text-metadata as keywords, | ||
1176 | * AND parses the directory name we've just added, producing even more | ||
1177 | * keywords. | ||
1178 | * then merge these keywords with the ones moved from children. | ||
1179 | */ | ||
1180 | ksk = GNUNET_FS_uri_ksk_create_from_meta_data (stack->parent->item->meta); | ||
1181 | stack->parent->item->ksk_uri = GNUNET_FS_uri_ksk_merge (ksk, stack->parent->exclude_ksk); | ||
1182 | GNUNET_FS_uri_destroy (ksk); | ||
1183 | |||
1184 | /* remove moved keywords from children (complete the move) */ | ||
1185 | remove_keywords (stack->parent, stack->parent->item); | ||
1186 | GNUNET_FS_uri_destroy (stack->parent->exclude_ksk); | ||
1187 | |||
1188 | /* go up the stack */ | ||
1189 | next = stack->parent; | ||
1190 | GNUNET_free (stack); | ||
1191 | next->end_directory = GNUNET_YES; | ||
1192 | } | ||
1193 | else | ||
1194 | { | ||
1195 | /* we've just finished processing the toplevel directory */ | ||
1196 | struct GNUNET_FS_ProcessMetadataContext *ctx = stack->ctx; | ||
1197 | next = NULL; | ||
1198 | GNUNET_SCHEDULER_add_continuation (ctx->cb, ctx->cls, | ||
1199 | GNUNET_SCHEDULER_REASON_PREREQ_DONE); | ||
1200 | GNUNET_free (stack->parent); | ||
1201 | GNUNET_free (stack); | ||
1202 | GNUNET_free (ctx); | ||
1203 | } | ||
1204 | } | ||
1205 | else if (stack->item->is_directory | ||
1206 | && !stack->end_directory | ||
1207 | && stack->item->children_head != NULL) | ||
1208 | { | ||
1209 | /* recurse into subdirectory */ | ||
1210 | next = GNUNET_malloc (sizeof (struct ProcessMetadataStackItem)); | ||
1211 | next->ctx = stack->ctx; | ||
1212 | next->item = stack->item->children_head; | ||
1213 | next->keywordcounter = GNUNET_CONTAINER_multihashmap_create (1024); | ||
1214 | next->metacounter = GNUNET_CONTAINER_multihashmap_create (1024); | ||
1215 | next->dir_entry_count = 0; | ||
1216 | next->parent = stack; | ||
1217 | } | ||
1218 | else | ||
1219 | { | ||
1220 | /* process a child entry (a file or a directory) and move to the next one*/ | ||
1221 | if (stack->item->is_directory) | ||
1222 | stack->end_directory = GNUNET_NO; | ||
1223 | if (stack->ctx->toplevel->is_directory) | ||
1224 | { | ||
1225 | stack->dir_entry_count++; | ||
1226 | GNUNET_CONTAINER_meta_data_iterate (stack->item->meta, &add_to_meta_counter, stack->metacounter); | ||
1227 | |||
1228 | if (stack->item->is_directory) | ||
1229 | { | ||
1230 | char *user = getenv ("USER"); | ||
1231 | if ((user == NULL) || (0 != strncasecmp (user, stack->item->short_filename, strlen(user)))) | ||
1232 | { | ||
1233 | /* only use filename if it doesn't match $USER */ | ||
1234 | GNUNET_CONTAINER_meta_data_insert (stack->item->meta, "<libgnunetfs>", | ||
1235 | EXTRACTOR_METATYPE_FILENAME, | ||
1236 | EXTRACTOR_METAFORMAT_UTF8, | ||
1237 | "text/plain", stack->item->short_filename, | ||
1238 | strlen (stack->item->short_filename) + 1); | ||
1239 | GNUNET_CONTAINER_meta_data_insert (stack->item->meta, "<libgnunetfs>", | ||
1240 | EXTRACTOR_METATYPE_GNUNET_ORIGINAL_FILENAME, | ||
1241 | EXTRACTOR_METAFORMAT_UTF8, | ||
1242 | "text/plain", stack->item->short_filename, | ||
1243 | strlen (stack->item->short_filename) + 1); | ||
1244 | } | ||
1245 | } | ||
1246 | } | ||
1247 | stack->item->ksk_uri = GNUNET_FS_uri_ksk_create_from_meta_data (stack->item->meta); | ||
1248 | if (stack->ctx->toplevel->is_directory) | ||
1249 | { | ||
1250 | GNUNET_FS_uri_ksk_get_keywords (stack->item->ksk_uri, &add_to_keyword_counter, stack->keywordcounter); | ||
1251 | } | ||
1252 | stack->item = stack->item->next; | ||
1253 | } | ||
1254 | /* Call this task again later, if there are more entries to process */ | ||
1255 | if (next) | ||
1256 | GNUNET_SCHEDULER_add_continuation (&trim_share_tree_task, next, | ||
1257 | GNUNET_SCHEDULER_REASON_PREREQ_DONE); | ||
1258 | } | ||
1259 | |||
1260 | /** | ||
1261 | * Process a share item tree, moving frequent keywords up and | ||
1262 | * copying frequent metadata up. | ||
1263 | * | ||
1264 | * @param toplevel toplevel directory in the tree, returned by the scanner | ||
1265 | * @param cb called after processing is done | ||
1266 | * @param cls closure for 'cb' | ||
1267 | */ | ||
1268 | struct GNUNET_FS_ProcessMetadataContext * | ||
1269 | GNUNET_FS_trim_share_tree (struct GNUNET_FS_ShareTreeItem *toplevel, | ||
1270 | GNUNET_SCHEDULER_Task cb, void *cls) | ||
1271 | { | ||
1272 | struct GNUNET_FS_ProcessMetadataContext *ret; | ||
1273 | |||
1274 | if (toplevel == NULL) | ||
1275 | { | ||
1276 | struct GNUNET_SCHEDULER_TaskContext tc; | ||
1277 | tc.reason = GNUNET_SCHEDULER_REASON_PREREQ_DONE; | ||
1278 | cb (cls, &tc); | ||
1279 | return NULL; | ||
1280 | } | ||
1281 | |||
1282 | ret = GNUNET_malloc (sizeof (struct GNUNET_FS_ProcessMetadataContext)); | ||
1283 | ret->toplevel = toplevel; | ||
1284 | ret->stack = GNUNET_malloc (sizeof (struct ProcessMetadataStackItem)); | ||
1285 | ret->stack->ctx = ret; | ||
1286 | ret->stack->item = toplevel; | ||
1287 | 713 | ||
1288 | if (ret->stack->ctx->toplevel->is_directory) | 714 | /* end of fs_dirmetascan.c */ |
1289 | { | ||
1290 | ret->stack->keywordcounter = GNUNET_CONTAINER_multihashmap_create (1024); | ||
1291 | ret->stack->metacounter = GNUNET_CONTAINER_multihashmap_create (1024); | ||
1292 | } | ||
1293 | |||
1294 | ret->stack->dir_entry_count = 0; | ||
1295 | ret->stack->end_directory = GNUNET_NO; | ||
1296 | |||
1297 | /* dummy stack entry that tells us we're at the top of the stack */ | ||
1298 | ret->stack->parent = GNUNET_malloc (sizeof (struct ProcessMetadataStackItem)); | ||
1299 | ret->stack->parent->ctx = ret; | ||
1300 | |||
1301 | ret->cb = cb; | ||
1302 | ret->cls = cls; | ||
1303 | |||
1304 | GNUNET_SCHEDULER_add_continuation (&trim_share_tree_task, ret->stack, | ||
1305 | GNUNET_SCHEDULER_REASON_PREREQ_DONE); | ||
1306 | return ret; | ||
1307 | } | ||
diff --git a/src/fs/fs_sharetree.c b/src/fs/fs_sharetree.c index 396415bc6..6c9642b9b 100644 --- a/src/fs/fs_sharetree.c +++ b/src/fs/fs_sharetree.c | |||
@@ -421,8 +421,10 @@ GNUNET_FS_share_tree_free (struct GNUNET_FS_ShareTreeItem *toplevel) | |||
421 | GNUNET_CONTAINER_DLL_remove (toplevel->parent->children_head, | 421 | GNUNET_CONTAINER_DLL_remove (toplevel->parent->children_head, |
422 | toplevel->parent->children_tail, | 422 | toplevel->parent->children_tail, |
423 | toplevel); | 423 | toplevel); |
424 | GNUNET_CONTAINER_meta_data_destroy (toplevel->meta); | 424 | if (NULL != toplevel->meta) |
425 | GNUNET_FS_uri_destroy (toplevel->ksk_uri); | 425 | GNUNET_CONTAINER_meta_data_destroy (toplevel->meta); |
426 | if (NULL != toplevel->ksk_uri) | ||
427 | GNUNET_FS_uri_destroy (toplevel->ksk_uri); | ||
426 | GNUNET_free_non_null (toplevel->filename); | 428 | GNUNET_free_non_null (toplevel->filename); |
427 | GNUNET_free_non_null (toplevel->short_filename); | 429 | GNUNET_free_non_null (toplevel->short_filename); |
428 | GNUNET_free (toplevel); | 430 | GNUNET_free (toplevel); |
diff --git a/src/fs/gnunet-publish.c b/src/fs/gnunet-publish.c index 98f39b821..33cba499e 100644 --- a/src/fs/gnunet-publish.c +++ b/src/fs/gnunet-publish.c | |||
@@ -68,12 +68,8 @@ static GNUNET_SCHEDULER_TaskIdentifier kill_task; | |||
68 | 68 | ||
69 | static struct GNUNET_FS_DirScanner *ds; | 69 | static struct GNUNET_FS_DirScanner *ds; |
70 | 70 | ||
71 | static struct GNUNET_FS_ShareTreeItem * directory_scan_intermediary_result; | ||
72 | |||
73 | static struct GNUNET_FS_ShareTreeItem * directory_scan_result; | 71 | static struct GNUNET_FS_ShareTreeItem * directory_scan_result; |
74 | 72 | ||
75 | static struct GNUNET_FS_ProcessMetadataContext *pmc; | ||
76 | |||
77 | static struct GNUNET_FS_Namespace *namespace; | 73 | static struct GNUNET_FS_Namespace *namespace; |
78 | 74 | ||
79 | /** | 75 | /** |
@@ -378,21 +374,17 @@ get_file_information (struct GNUNET_FS_ShareTreeItem *item) | |||
378 | item->ksk_uri, item->meta, !do_insert, | 374 | item->ksk_uri, item->meta, !do_insert, |
379 | &bo); | 375 | &bo); |
380 | } | 376 | } |
381 | GNUNET_CONTAINER_meta_data_destroy (item->meta); | ||
382 | GNUNET_FS_uri_destroy (item->ksk_uri); | ||
383 | GNUNET_free (item->short_filename); | ||
384 | GNUNET_free (item->filename); | ||
385 | GNUNET_free (item); | ||
386 | return fi; | 377 | return fi; |
387 | } | 378 | } |
388 | 379 | ||
380 | |||
389 | static void | 381 | static void |
390 | directory_trim_complete (void *cls, | 382 | directory_trim_complete () |
391 | const struct GNUNET_SCHEDULER_TaskContext *tc) | ||
392 | { | 383 | { |
393 | struct GNUNET_FS_FileInformation *fi; | 384 | struct GNUNET_FS_FileInformation *fi; |
394 | directory_scan_result = directory_scan_intermediary_result; | 385 | |
395 | fi = get_file_information (directory_scan_result); | 386 | fi = get_file_information (directory_scan_result); |
387 | GNUNET_FS_share_tree_free (directory_scan_result); | ||
396 | directory_scan_result = NULL; | 388 | directory_scan_result = NULL; |
397 | if (fi == NULL) | 389 | if (fi == NULL) |
398 | { | 390 | { |
@@ -425,7 +417,8 @@ directory_trim_complete (void *cls, | |||
425 | } | 417 | } |
426 | } | 418 | } |
427 | 419 | ||
428 | static int | 420 | |
421 | static void | ||
429 | directory_scan_cb (void *cls, struct GNUNET_FS_DirScanner *ds, | 422 | directory_scan_cb (void *cls, struct GNUNET_FS_DirScanner *ds, |
430 | const char *filename, | 423 | const char *filename, |
431 | int is_directory, | 424 | int is_directory, |
@@ -433,64 +426,47 @@ directory_scan_cb (void *cls, struct GNUNET_FS_DirScanner *ds, | |||
433 | { | 426 | { |
434 | switch (reason) | 427 | switch (reason) |
435 | { | 428 | { |
436 | case GNUNET_FS_DIRSCANNER_NEW_FILE: | 429 | case GNUNET_FS_DIRSCANNER_FILE_START: |
437 | if (filename != NULL) | 430 | if (is_directory) |
438 | { | 431 | FPRINTF (stdout, _("Scanning directory `%s'.\n"), filename); |
439 | if (is_directory) | 432 | else |
440 | FPRINTF (stdout, _("Scanning directory `%s'.\n"), filename); | 433 | FPRINTF (stdout, _("Scanning file `%s'.\n"), filename); |
441 | else | ||
442 | FPRINTF (stdout, _("Scanning file `%s'.\n"), filename); | ||
443 | } | ||
444 | break; | 434 | break; |
445 | case GNUNET_FS_DIRSCANNER_DOES_NOT_EXIST: | 435 | case GNUNET_FS_DIRSCANNER_SUBTREE_COUNTED: |
446 | if (filename != NULL) | 436 | if (is_directory) |
447 | { | 437 | FPRINTF (stdout, _("Done scanning directory `%s'.\n"), filename); |
448 | FPRINTF (stdout, | ||
449 | _("Failed to scan `%s', because it does not exist.\n"), | ||
450 | filename); | ||
451 | } | ||
452 | break; | 438 | break; |
453 | case GNUNET_FS_DIRSCANNER_ASKED_TO_STOP: | 439 | case GNUNET_FS_DIRSCANNER_ALL_COUNTED: |
454 | if (filename != NULL) | 440 | FPRINTF (stdout, "%s", _("Preprocessing complete.\n")); |
455 | { | 441 | break; |
456 | FPRINTF (stdout, | 442 | case GNUNET_FS_DIRSCANNER_EXTRACT_FINISHED: |
457 | _("Scanner was about to scan `%s', but is now stopping.\n"), | 443 | FPRINTF (stdout, _("Extracting meta data from file `%s' complete.\n"), filename); |
458 | filename); | ||
459 | } | ||
460 | else | ||
461 | FPRINTF (stdout, "%s", _("Scanner is stopping.\n")); | ||
462 | break; | 444 | break; |
463 | case GNUNET_FS_DIRSCANNER_SHUTDOWN: | 445 | case GNUNET_FS_DIRSCANNER_DOES_NOT_EXIST: |
464 | FPRINTF (stdout, "%s", _("Client is shutting down.\n")); | 446 | FPRINTF (stdout, |
447 | _("There was trouble processing file `%s', skipping it.\n"), | ||
448 | filename); | ||
465 | break; | 449 | break; |
466 | case GNUNET_FS_DIRSCANNER_FINISHED: | 450 | case GNUNET_FS_DIRSCANNER_FINISHED: |
467 | FPRINTF (stdout, "%s", _("Scanner has finished.\n")); | 451 | FPRINTF (stdout, "%s", _("Scanner has finished.\n")); |
452 | directory_scan_result = GNUNET_FS_directory_scan_get_result (ds); | ||
453 | ds = NULL; | ||
454 | GNUNET_FS_share_tree_trim (directory_scan_result); | ||
455 | directory_trim_complete (); | ||
468 | break; | 456 | break; |
469 | case GNUNET_FS_DIRSCANNER_PROTOCOL_ERROR: | 457 | case GNUNET_FS_DIRSCANNER_INTERNAL_ERROR: |
470 | FPRINTF (stdout, "%s", | 458 | FPRINTF (stdout, "%s", _("Internal error scanning directory.\n")); |
471 | _("There was a failure communicating with the scanner.\n")); | 459 | GNUNET_FS_directory_scan_abort (ds); |
460 | ds = NULL; | ||
461 | if (namespace != NULL) | ||
462 | GNUNET_FS_namespace_delete (namespace, GNUNET_NO); | ||
463 | GNUNET_FS_stop (ctx); | ||
464 | ret = 1; | ||
472 | break; | 465 | break; |
473 | default: | 466 | default: |
474 | FPRINTF (stdout, _("Got unknown scanner update with filename `%s'.\n"), | 467 | GNUNET_assert (0); |
475 | filename); | ||
476 | break; | 468 | break; |
477 | } | 469 | } |
478 | if ((filename == NULL && GNUNET_FS_DIRSCANNER_FINISHED) | ||
479 | || reason == GNUNET_FS_DIRSCANNER_PROTOCOL_ERROR | ||
480 | || reason == GNUNET_FS_DIRSCANNER_SHUTDOWN) | ||
481 | { | ||
482 | /* Any of this causes us to try to clean up the scanner */ | ||
483 | directory_scan_intermediary_result = GNUNET_FS_directory_scan_cleanup (ds); | ||
484 | pmc = GNUNET_FS_trim_share_tree (directory_scan_intermediary_result, | ||
485 | &directory_trim_complete, NULL); | ||
486 | |||
487 | ds = NULL; | ||
488 | /* FIXME: change the tree processor to be able to free untrimmed trees | ||
489 | * right here instead of waiting for trimming to complete, if we need to | ||
490 | * cancel everything. | ||
491 | */ | ||
492 | } | ||
493 | return 0; | ||
494 | } | 470 | } |
495 | 471 | ||
496 | 472 | ||