diff options
author | Christian Grothoff <christian@grothoff.org> | 2012-01-14 16:04:58 +0000 |
---|---|---|
committer | Christian Grothoff <christian@grothoff.org> | 2012-01-14 16:04:58 +0000 |
commit | 85967c4d4bd03d68a677f6e8023b192b8b4453f5 (patch) | |
tree | fdfea34d80d25bf40c1a1e117ef86b65dba5c096 /src/fs/fs_dirmetascan.c | |
parent | 385d99b60ab8eedc6d26b1e66949a43afafdd79e (diff) | |
download | gnunet-85967c4d4bd03d68a677f6e8023b192b8b4453f5.tar.gz gnunet-85967c4d4bd03d68a677f6e8023b192b8b4453f5.zip |
-file was missing, forgot to add earlier
Diffstat (limited to 'src/fs/fs_dirmetascan.c')
-rw-r--r-- | src/fs/fs_dirmetascan.c | 1282 |
1 files changed, 1282 insertions, 0 deletions
diff --git a/src/fs/fs_dirmetascan.c b/src/fs/fs_dirmetascan.c new file mode 100644 index 000000000..372579ccb --- /dev/null +++ b/src/fs/fs_dirmetascan.c | |||
@@ -0,0 +1,1282 @@ | |||
1 | /* | ||
2 | This file is part of GNUnet | ||
3 | (C) 2005-2012 Christian Grothoff (and other contributing authors) | ||
4 | |||
5 | GNUnet is free software; you can redistribute it and/or modify | ||
6 | it under the terms of the GNU General Public License as published | ||
7 | by the Free Software Foundation; either version 2, or (at your | ||
8 | option) any later version. | ||
9 | |||
10 | GNUnet is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU General Public License | ||
16 | along with GNUnet; see the file COPYING. If not, write to the | ||
17 | Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
18 | Boston, MA 02111-1307, USA. | ||
19 | */ | ||
20 | |||
21 | #include "platform.h" | ||
22 | #include "gnunet_fs_service.h" | ||
23 | #include "gnunet_scheduler_lib.h" | ||
24 | |||
25 | /** | ||
26 | * Entry for each unique keyword to track how often | ||
27 | * it occured. Contains the keyword and the counter. | ||
28 | */ | ||
29 | struct KeywordCounter | ||
30 | { | ||
31 | |||
32 | /** | ||
33 | * Keyword that was found. | ||
34 | */ | ||
35 | const char *value; | ||
36 | |||
37 | /** | ||
38 | * How many files have this keyword? | ||
39 | */ | ||
40 | unsigned int count; | ||
41 | |||
42 | /** | ||
43 | * This is a doubly-linked list | ||
44 | */ | ||
45 | struct KeywordCounter *prev; | ||
46 | |||
47 | /** | ||
48 | * This is a doubly-linked list | ||
49 | */ | ||
50 | struct KeywordCounter *next; | ||
51 | }; | ||
52 | |||
53 | /** | ||
54 | * Aggregate information we keep for meta data in each directory. | ||
55 | */ | ||
56 | struct MetaCounter | ||
57 | { | ||
58 | /** | ||
59 | * The actual meta data. | ||
60 | */ | ||
61 | const char *data; | ||
62 | |||
63 | /** | ||
64 | * Number of bytes in 'data'. | ||
65 | */ | ||
66 | size_t data_size; | ||
67 | |||
68 | /** | ||
69 | * Name of the plugin that provided that piece of metadata | ||
70 | */ | ||
71 | const char *plugin_name; | ||
72 | |||
73 | /** | ||
74 | * Type of the data | ||
75 | */ | ||
76 | enum EXTRACTOR_MetaType type; | ||
77 | |||
78 | /** | ||
79 | * Format of the data | ||
80 | */ | ||
81 | enum EXTRACTOR_MetaFormat format; | ||
82 | |||
83 | /** | ||
84 | * MIME-type of the metadata itself | ||
85 | */ | ||
86 | const char *data_mime_type; | ||
87 | |||
88 | /** | ||
89 | * How many files have meta entries matching this value? | ||
90 | * (type and format do not have to match). | ||
91 | */ | ||
92 | unsigned int count; | ||
93 | |||
94 | /** | ||
95 | * This is a doubly-linked list | ||
96 | */ | ||
97 | struct MetaCounter *prev; | ||
98 | |||
99 | /** | ||
100 | * This is a doubly-linked list | ||
101 | */ | ||
102 | struct MetaCounter *next; | ||
103 | }; | ||
104 | |||
105 | /** | ||
106 | * Execution context for 'add_dir' | ||
107 | * Owned by the initiator thread. | ||
108 | */ | ||
109 | struct AddDirContext | ||
110 | { | ||
111 | /** | ||
112 | * Parent directory (used to access keyword and metadata counters, | ||
113 | * and the like). | ||
114 | * After the scan is finished, it will contain a pointer to the | ||
115 | * top-level directory entry in the directory tree built by the | ||
116 | * scanner. | ||
117 | */ | ||
118 | struct ShareTreeItem *parent; | ||
119 | |||
120 | /** | ||
121 | * Expanded filename (as given by the scan initiator). | ||
122 | * The scanner thread stores a copy here, and frees it when it finishes. | ||
123 | */ | ||
124 | char *filename_expanded; | ||
125 | |||
126 | /** | ||
127 | * A synchronization privitive. Whenever its state is altered, | ||
128 | * it means that the initiator wants the scanner to wrap up. | ||
129 | * It is owned by the initiator thread. | ||
130 | */ | ||
131 | #if WINDOWS | ||
132 | HANDLE stop; | ||
133 | #else | ||
134 | sem_t *stop; | ||
135 | #endif | ||
136 | |||
137 | /** | ||
138 | * 1 if the scanner should stop, 0 otherwise. Set in response | ||
139 | * to communication errors or when the initiator wants the scanning | ||
140 | * process to stop. | ||
141 | */ | ||
142 | char do_stop; | ||
143 | |||
144 | /** | ||
145 | * Handle of the pipe end into which the progress messages are written | ||
146 | * The pipe is owned by the initiator thread, and there's no way to | ||
147 | * close this end without having access to the pipe, so it won't | ||
148 | * be closed by the scanner thread. | ||
149 | * The initiator MUST keep it alive until the scanner thread is finished. | ||
150 | */ | ||
151 | const struct GNUNET_DISK_FileHandle *progress_write; | ||
152 | |||
153 | |||
154 | /** | ||
155 | * List of libextractor plugins to use for extracting. | ||
156 | * Initialized when the scan starts, removed when it finishes. | ||
157 | */ | ||
158 | struct EXTRACTOR_PluginList *plugins; | ||
159 | }; | ||
160 | |||
161 | /** | ||
162 | * An opaque structure a pointer to which is returned to the | ||
163 | * caller to be used to control the scanner. | ||
164 | */ | ||
165 | struct GNUNET_FS_DirScanner | ||
166 | { | ||
167 | /** | ||
168 | * A synchronization privitive that is used to signal the scanner to stop. | ||
169 | * Owned by the initiator thread. | ||
170 | */ | ||
171 | #if WINDOWS | ||
172 | HANDLE stop; | ||
173 | #else | ||
174 | sem_t *stop; | ||
175 | #endif | ||
176 | |||
177 | /** | ||
178 | * A thread object for the scanner thread. | ||
179 | * Owned by the initiator thread. | ||
180 | */ | ||
181 | #if WINDOWS | ||
182 | HANDLE thread; | ||
183 | #else | ||
184 | pthread_t thread; | ||
185 | #endif | ||
186 | |||
187 | /** | ||
188 | * A task for reading progress messages from the scanner. | ||
189 | */ | ||
190 | GNUNET_SCHEDULER_TaskIdentifier progress_read_task; | ||
191 | |||
192 | /** | ||
193 | * The end of the pipe that is used to read progress messages. | ||
194 | */ | ||
195 | const struct GNUNET_DISK_FileHandle *progress_read; | ||
196 | |||
197 | /** | ||
198 | * The pipe that is used to read progress messages. | ||
199 | * Owned (along with both of its ends) by the initiator thread. | ||
200 | * Only closed after the scanner thread is finished. | ||
201 | */ | ||
202 | struct GNUNET_DISK_PipeHandle *progress_pipe; | ||
203 | |||
204 | /** | ||
205 | * The function that will be called every time there's a progress | ||
206 | * message. | ||
207 | */ | ||
208 | GNUNET_FS_DirScannerProgressCallback progress_callback; | ||
209 | |||
210 | /** | ||
211 | * A closure for progress_callback. | ||
212 | */ | ||
213 | void *cls; | ||
214 | |||
215 | /** | ||
216 | * A pointer to the context of the scanner. | ||
217 | * Owned by the initiator thread. | ||
218 | * Initiator thread shouldn't touch it until the scanner thread | ||
219 | * is finished. | ||
220 | */ | ||
221 | struct AddDirContext *adc; | ||
222 | }; | ||
223 | |||
224 | /** | ||
225 | * A structure that forms a singly-linked list that serves as a stack | ||
226 | * for metadata-processing function. | ||
227 | */ | ||
228 | struct ProcessMetadataStackItem | ||
229 | { | ||
230 | /** | ||
231 | * A pointer to metadata-processing context. | ||
232 | * The same in every stack item. | ||
233 | */ | ||
234 | struct ProcessMetadataContext *ctx; | ||
235 | |||
236 | /** | ||
237 | * This is a singly-linked list. A pointer to its end is kept, and | ||
238 | * this pointer is used to walk it backwards. | ||
239 | */ | ||
240 | struct ProcessMetadataStackItem *parent; | ||
241 | |||
242 | /** | ||
243 | * Map from the hash over the keyword to an 'struct KeywordCounter *' | ||
244 | * counter that says how often this keyword was | ||
245 | * encountered in the current directory. | ||
246 | */ | ||
247 | struct GNUNET_CONTAINER_MultiHashMap *keywordcounter; | ||
248 | |||
249 | /** | ||
250 | * Map from the hash over the metadata to an 'struct MetaCounter *' | ||
251 | * counter that says how often this metadata was | ||
252 | * encountered in the current directory. | ||
253 | */ | ||
254 | struct GNUNET_CONTAINER_MultiHashMap *metacounter; | ||
255 | |||
256 | /** | ||
257 | * Number of files in the current directory. | ||
258 | */ | ||
259 | unsigned int dir_entry_count; | ||
260 | |||
261 | /** | ||
262 | * Keywords to exclude from using for KSK since they'll be associated | ||
263 | * with the parent as well. NULL for nothing blocked. | ||
264 | */ | ||
265 | struct GNUNET_FS_Uri *exclude_ksk; | ||
266 | |||
267 | /** | ||
268 | * A share tree item that is being processed. | ||
269 | */ | ||
270 | struct ShareTreeItem *item; | ||
271 | |||
272 | /** | ||
273 | * Set to GNUNET_YES to indicate that the directory pointer by 'item' | ||
274 | * was processed, and we should move on to the next. | ||
275 | * Otherwise the directory will be recursed into. | ||
276 | */ | ||
277 | int end_directory; | ||
278 | |||
279 | }; | ||
280 | |||
281 | /** | ||
282 | * The structure to keep the state of metadata processing | ||
283 | */ | ||
284 | struct ProcessMetadataContext | ||
285 | { | ||
286 | /** | ||
287 | * The top of the stack. | ||
288 | */ | ||
289 | struct ProcessMetadataStackItem *stack; | ||
290 | |||
291 | /** | ||
292 | * Callback to invoke when processing is finished | ||
293 | */ | ||
294 | GNUNET_SCHEDULER_Task cb; | ||
295 | |||
296 | /** | ||
297 | * Closure for 'cb' | ||
298 | */ | ||
299 | void *cls; | ||
300 | |||
301 | /** | ||
302 | * Toplevel directory item of the tree to process. | ||
303 | */ | ||
304 | struct ShareTreeItem *toplevel; | ||
305 | }; | ||
306 | |||
307 | /** | ||
308 | * Called every now and then by the scanner. | ||
309 | * Checks the synchronization privitive. | ||
310 | * Returns 1 if the scanner should stop, 0 otherwise. | ||
311 | */ | ||
312 | static int | ||
313 | should_stop (struct AddDirContext *adc) | ||
314 | { | ||
315 | #if WINDOWS | ||
316 | if (WaitForSingleObject (adc->stop, 0) == WAIT_TIMEOUT) | ||
317 | return 0; | ||
318 | adc->do_stop = 1; | ||
319 | return 1; | ||
320 | #else | ||
321 | int value; | ||
322 | sem_getvalue(adc->stop, &value); | ||
323 | if (value > 0) | ||
324 | { | ||
325 | adc->do_stop = 1; | ||
326 | return 1; | ||
327 | } | ||
328 | return 0; | ||
329 | #endif | ||
330 | } | ||
331 | |||
332 | /** | ||
333 | * Write progress message. | ||
334 | * Format is: | ||
335 | * <reason><filename length><filename><directory flag> | ||
336 | * If filename is NULL, filename is not written, and its length | ||
337 | * is written as 0, and nothing else is written. It signals the initiator | ||
338 | * thread that the scanner is finished, and that it can now join its thread. | ||
339 | * | ||
340 | * Also checks if the initiator thread wants the scanner to stop, | ||
341 | * Returns 1 to stop scanning (if the signal was received, or | ||
342 | * if the pipe was broken somehow), 0 otherwise. | ||
343 | */ | ||
344 | static int | ||
345 | write_progress (struct AddDirContext *adc, const char *filename, | ||
346 | char is_directory, enum GNUNET_DirScannerProgressUpdateReason reason) | ||
347 | { | ||
348 | size_t filename_len; | ||
349 | size_t wr; | ||
350 | size_t total_write; | ||
351 | if ((adc->do_stop || should_stop (adc)) && reason != GNUNET_DIR_SCANNER_ASKED_TO_STOP | ||
352 | && reason != GNUNET_DIR_SCANNER_FINISHED) | ||
353 | return 1; | ||
354 | total_write = wr = GNUNET_DISK_file_write (adc->progress_write, | ||
355 | &reason, sizeof (reason)); | ||
356 | while (wr > 0 && total_write < sizeof (reason)) | ||
357 | { | ||
358 | total_write = wr = GNUNET_DISK_file_write (adc->progress_write, | ||
359 | &((char *)&reason)[total_write], sizeof (reason) - total_write); | ||
360 | if (wr > 0) | ||
361 | total_write += wr; | ||
362 | } | ||
363 | if (sizeof (reason) != wr) | ||
364 | return 1; | ||
365 | if (filename) | ||
366 | filename_len = strlen (filename) + 1; | ||
367 | else | ||
368 | filename_len = 0; | ||
369 | total_write = wr = GNUNET_DISK_file_write (adc->progress_write, | ||
370 | &filename_len, sizeof (size_t)); | ||
371 | while (wr > 0 && total_write < sizeof (size_t)) | ||
372 | { | ||
373 | total_write = wr = GNUNET_DISK_file_write (adc->progress_write, | ||
374 | &((char *)&filename_len)[total_write], sizeof (size_t) - total_write); | ||
375 | if (wr > 0) | ||
376 | total_write += wr; | ||
377 | } | ||
378 | if (sizeof (size_t) != wr) | ||
379 | return 1; | ||
380 | if (filename) | ||
381 | { | ||
382 | total_write = wr = GNUNET_DISK_file_write (adc->progress_write, | ||
383 | filename, filename_len); | ||
384 | while (wr > 0 && total_write < filename_len) | ||
385 | { | ||
386 | total_write = wr = GNUNET_DISK_file_write (adc->progress_write, | ||
387 | &((char *)filename)[total_write], filename_len - total_write); | ||
388 | if (wr > 0) | ||
389 | total_write += wr; | ||
390 | } | ||
391 | if (filename_len != wr) | ||
392 | return 1; | ||
393 | total_write = wr = GNUNET_DISK_file_write (adc->progress_write, | ||
394 | &is_directory, sizeof (char)); | ||
395 | while (wr > 0 && total_write < sizeof (char)) | ||
396 | { | ||
397 | total_write = wr = GNUNET_DISK_file_write (adc->progress_write, | ||
398 | &((char *)&is_directory)[total_write], sizeof (char) - total_write); | ||
399 | if (wr > 0) | ||
400 | total_write += wr; | ||
401 | } | ||
402 | if (sizeof (char) != wr) | ||
403 | return 1; | ||
404 | } | ||
405 | return 0; | ||
406 | } | ||
407 | |||
408 | /** | ||
409 | * Add the given keyword to the | ||
410 | * keyword statistics tracker. | ||
411 | * | ||
412 | * @param cls closure (user-defined) | ||
413 | * @param keyword the keyword to count | ||
414 | * @param is_mandatory ignored | ||
415 | * @return always GNUNET_OK | ||
416 | */ | ||
417 | static int | ||
418 | add_to_keyword_counter (void *cls, const char *keyword, int is_mandatory) | ||
419 | { | ||
420 | struct GNUNET_CONTAINER_MultiHashMap *mcm = cls; | ||
421 | struct KeywordCounter *cnt, *first_cnt; | ||
422 | GNUNET_HashCode hc; | ||
423 | size_t klen; | ||
424 | |||
425 | klen = strlen (keyword) + 1; | ||
426 | GNUNET_CRYPTO_hash (keyword, klen - 1, &hc); | ||
427 | /* Since the map might contain multiple values per keyword, we only | ||
428 | * store one value, and attach all other to it, forming a linked list. | ||
429 | * Somewhat easier than retrieving multiple items via callback. | ||
430 | */ | ||
431 | first_cnt = GNUNET_CONTAINER_multihashmap_get (mcm, &hc); | ||
432 | for (cnt = first_cnt; cnt && strcmp (cnt->value, keyword) != 0; cnt = cnt->next); | ||
433 | if (cnt == NULL) | ||
434 | { | ||
435 | cnt = GNUNET_malloc (sizeof (struct KeywordCounter) + klen); | ||
436 | cnt->value = (const char *) &cnt[1]; | ||
437 | memcpy (&cnt[1], keyword, klen); | ||
438 | if (first_cnt != NULL) | ||
439 | { | ||
440 | if (first_cnt->prev != NULL) | ||
441 | { | ||
442 | first_cnt->prev->next = cnt; | ||
443 | cnt->prev = first_cnt->prev; | ||
444 | } | ||
445 | first_cnt->prev = cnt; | ||
446 | cnt->next = first_cnt; | ||
447 | } | ||
448 | else | ||
449 | GNUNET_CONTAINER_multihashmap_put (mcm, &hc, cnt, | ||
450 | GNUNET_CONTAINER_MULTIHASHMAPOPTION_MULTIPLE); | ||
451 | } | ||
452 | cnt->count++; | ||
453 | return GNUNET_OK; | ||
454 | } | ||
455 | |||
456 | /** | ||
457 | * Type of a function that libextractor calls for each | ||
458 | * meta data item found. | ||
459 | * | ||
460 | * @param cls the container multihashmap to update | ||
461 | * @param plugin_name name of the plugin that produced this value; | ||
462 | * special values can be used (i.e. '<zlib>' for zlib being | ||
463 | * used in the main libextractor library and yielding | ||
464 | * meta data). | ||
465 | * @param type libextractor-type describing the meta data | ||
466 | * @param format basic format information about data | ||
467 | * @param data_mime_type mime-type of data (not of the original file); | ||
468 | * can be NULL (if mime-type is not known) | ||
469 | * @param data actual meta-data found | ||
470 | * @param data_len number of bytes in data | ||
471 | * @return GNUNET_OK to continue extracting / iterating | ||
472 | */ | ||
473 | static int | ||
474 | add_to_meta_counter (void *cls, const char *plugin_name, | ||
475 | enum EXTRACTOR_MetaType type, enum EXTRACTOR_MetaFormat format, | ||
476 | const char *data_mime_type, const char *data, size_t data_len) | ||
477 | { | ||
478 | struct GNUNET_CONTAINER_MultiHashMap *map = cls; | ||
479 | GNUNET_HashCode key; | ||
480 | struct MetaCounter *cnt, *first_cnt; | ||
481 | |||
482 | GNUNET_CRYPTO_hash (data, data_len, &key); | ||
483 | first_cnt = GNUNET_CONTAINER_multihashmap_get (map, &key); | ||
484 | for (cnt = first_cnt; cnt | ||
485 | && cnt->data_size != data_len | ||
486 | && memcmp (cnt->data, data, cnt->data_size) != 0; cnt = cnt->next); | ||
487 | if (cnt == NULL) | ||
488 | { | ||
489 | cnt = GNUNET_malloc (sizeof (struct MetaCounter)); | ||
490 | cnt->data = data; | ||
491 | cnt->data_size = data_len; | ||
492 | cnt->plugin_name = plugin_name; | ||
493 | cnt->type = type; | ||
494 | cnt->format = format; | ||
495 | cnt->data_mime_type = data_mime_type; | ||
496 | |||
497 | if (first_cnt != NULL) | ||
498 | { | ||
499 | if (first_cnt->prev != NULL) | ||
500 | { | ||
501 | first_cnt->prev->next = cnt; | ||
502 | cnt->prev = first_cnt->prev; | ||
503 | } | ||
504 | first_cnt->prev = cnt; | ||
505 | cnt->next = first_cnt; | ||
506 | } | ||
507 | else | ||
508 | GNUNET_CONTAINER_multihashmap_put (map, &key, cnt, | ||
509 | GNUNET_CONTAINER_MULTIHASHMAPOPTION_MULTIPLE); | ||
510 | } | ||
511 | cnt->count++; | ||
512 | return 0; | ||
513 | } | ||
514 | |||
515 | /** | ||
516 | * Allocates a struct ShareTreeItem and adds it to its parent. | ||
517 | */ | ||
518 | static struct ShareTreeItem * | ||
519 | make_item (struct ShareTreeItem *parent) | ||
520 | { | ||
521 | struct ShareTreeItem *item; | ||
522 | item = GNUNET_malloc (sizeof (struct ShareTreeItem)); | ||
523 | |||
524 | item->parent = parent; | ||
525 | if (parent) | ||
526 | GNUNET_CONTAINER_DLL_insert (parent->children_head, parent->children_tail, | ||
527 | item); | ||
528 | return item; | ||
529 | } | ||
530 | |||
531 | /** | ||
532 | * Extract metadata from a file and add it to the share tree | ||
533 | * | ||
534 | * @param adc context to modify | ||
535 | * @param filename name of the file to process | ||
536 | */ | ||
537 | static void | ||
538 | extract_file (struct AddDirContext *adc, const char *filename) | ||
539 | { | ||
540 | struct ShareTreeItem *item; | ||
541 | const char *short_fn; | ||
542 | |||
543 | item = make_item (adc->parent); | ||
544 | |||
545 | GNUNET_DISK_file_size (filename, &item->file_size, GNUNET_YES); | ||
546 | item->is_directory = GNUNET_NO; | ||
547 | |||
548 | item->meta = GNUNET_CONTAINER_meta_data_create (); | ||
549 | GNUNET_FS_meta_data_extract_from_file (item->meta, filename, | ||
550 | adc->plugins); | ||
551 | GNUNET_CONTAINER_meta_data_delete (item->meta, EXTRACTOR_METATYPE_FILENAME, | ||
552 | NULL, 0); | ||
553 | short_fn = GNUNET_STRINGS_get_short_name (filename); | ||
554 | |||
555 | item->filename = GNUNET_strdup (filename); | ||
556 | item->short_filename = GNUNET_strdup (short_fn); | ||
557 | |||
558 | GNUNET_CONTAINER_meta_data_insert (item->meta, "<libgnunetfs>", | ||
559 | EXTRACTOR_METATYPE_FILENAME, | ||
560 | EXTRACTOR_METAFORMAT_UTF8, "text/plain", | ||
561 | short_fn, strlen (short_fn) + 1); | ||
562 | } | ||
563 | |||
564 | /** | ||
565 | * Remove the keyword from the ksk URI. | ||
566 | * | ||
567 | * @param cls the ksk uri | ||
568 | * @param keyword the word to remove | ||
569 | * @param is_mandatory ignored | ||
570 | * @return always GNUNET_OK | ||
571 | */ | ||
572 | static int | ||
573 | remove_keyword (void *cls, const char *keyword, int is_mandatory) | ||
574 | { | ||
575 | struct GNUNET_FS_Uri *ksk = cls; | ||
576 | |||
577 | GNUNET_FS_uri_ksk_remove_keyword (ksk, keyword); | ||
578 | return GNUNET_OK; | ||
579 | } | ||
580 | |||
581 | /** | ||
582 | * Remove keywords from current directory's children, if they are | ||
583 | * in the exluded keywords list of that directory. | ||
584 | * | ||
585 | * @param cls the ksk uri | ||
586 | * @param keyword the word to remove | ||
587 | * @param is_mandatory ignored | ||
588 | * @return always GNUNET_OK | ||
589 | */ | ||
590 | static int | ||
591 | remove_keywords (struct ProcessMetadataStackItem *stack, struct ShareTreeItem *dir) | ||
592 | { | ||
593 | struct ShareTreeItem *item; | ||
594 | |||
595 | for (item = dir->children_head; item; item = item->next) | ||
596 | { | ||
597 | if (stack->exclude_ksk != NULL) | ||
598 | GNUNET_FS_uri_ksk_get_keywords (stack->exclude_ksk, &remove_keyword, item->ksk_uri); | ||
599 | } | ||
600 | return GNUNET_OK; | ||
601 | } | ||
602 | |||
603 | /** | ||
604 | * Context passed to 'migrate_and_drop'. | ||
605 | */ | ||
606 | struct KeywordProcessContext | ||
607 | { | ||
608 | /** | ||
609 | * All the keywords we migrated to the parent. | ||
610 | */ | ||
611 | struct GNUNET_FS_Uri *ksk; | ||
612 | |||
613 | /** | ||
614 | * How often does a keyword have to occur to be | ||
615 | * migrated to the parent? | ||
616 | */ | ||
617 | unsigned int threshold; | ||
618 | }; | ||
619 | |||
620 | /** | ||
621 | * Context passed to 'migrate_and_drop'. | ||
622 | */ | ||
623 | struct MetaProcessContext | ||
624 | { | ||
625 | /** | ||
626 | * All the metadata we copy to the parent. | ||
627 | */ | ||
628 | struct GNUNET_CONTAINER_MetaData *meta; | ||
629 | |||
630 | /** | ||
631 | * How often does a metadata have to occur to be | ||
632 | * migrated to the parent? | ||
633 | */ | ||
634 | unsigned int threshold; | ||
635 | }; | ||
636 | |||
637 | |||
638 | /** | ||
639 | * Move "frequent" keywords over to the | ||
640 | * target ksk uri, free the counters. | ||
641 | * | ||
642 | */ | ||
643 | static int | ||
644 | migrate_and_drop (void *cls, const GNUNET_HashCode * key, void *value) | ||
645 | { | ||
646 | struct KeywordProcessContext *kpc = cls; | ||
647 | struct KeywordCounter *counter = value; | ||
648 | |||
649 | if (counter->count >= kpc->threshold && counter->count > 1) | ||
650 | { | ||
651 | GNUNET_FS_uri_ksk_add_keyword (kpc->ksk, counter->value, GNUNET_NO); | ||
652 | } | ||
653 | GNUNET_free (counter); | ||
654 | return GNUNET_YES; | ||
655 | } | ||
656 | /** | ||
657 | * Copy "frequent" metadata items over to the | ||
658 | * target metadata container, free the counters. | ||
659 | * | ||
660 | */ | ||
661 | static int | ||
662 | migrate_and_drop_metadata (void *cls, const GNUNET_HashCode * key, void *value) | ||
663 | { | ||
664 | struct MetaProcessContext *mpc = cls; | ||
665 | struct MetaCounter *counter = value; | ||
666 | |||
667 | if (counter->count >= mpc->threshold && counter->count > 1) | ||
668 | { | ||
669 | GNUNET_CONTAINER_meta_data_insert (mpc->meta, | ||
670 | counter->plugin_name, | ||
671 | counter->type, | ||
672 | counter->format, | ||
673 | counter->data_mime_type, counter->data, | ||
674 | counter->data_size); | ||
675 | } | ||
676 | GNUNET_free (counter); | ||
677 | return GNUNET_YES; | ||
678 | } | ||
679 | |||
680 | /** | ||
681 | * Go over the collected keywords from all entries in the | ||
682 | * directory and push common keywords up one level (by | ||
683 | * adding it to the returned struct). Do the same for metadata. | ||
684 | * Destroys keywordcounter and metacoutner for current directory. | ||
685 | * | ||
686 | * @param adc collection of child meta data | ||
687 | * @param exclude_ksk pointer to where moveable keywords will be stored | ||
688 | * @param copy_meta pointer to where copyable metadata will be stored | ||
689 | */ | ||
690 | static void | ||
691 | process_keywords_and_metadata (struct ProcessMetadataStackItem *stack, | ||
692 | struct GNUNET_FS_Uri **exclude_ksk, | ||
693 | struct GNUNET_CONTAINER_MetaData **copy_meta) | ||
694 | { | ||
695 | struct KeywordProcessContext kpc; | ||
696 | struct MetaProcessContext mpc; | ||
697 | struct GNUNET_CONTAINER_MetaData *tmp; | ||
698 | |||
699 | /* Surprisingly, it's impossible to create a ksk with 0 keywords directly. | ||
700 | * But we can create one from an empty metadata set | ||
701 | */ | ||
702 | tmp = GNUNET_CONTAINER_meta_data_create (); | ||
703 | kpc.ksk = GNUNET_FS_uri_ksk_create_from_meta_data (tmp); | ||
704 | GNUNET_CONTAINER_meta_data_destroy (tmp); | ||
705 | mpc.meta = GNUNET_CONTAINER_meta_data_create (); | ||
706 | |||
707 | kpc.threshold = mpc.threshold = (stack->dir_entry_count + 1) / 2; /* 50% */ | ||
708 | |||
709 | GNUNET_CONTAINER_multihashmap_iterate (stack->keywordcounter, | ||
710 | &migrate_and_drop, &kpc); | ||
711 | GNUNET_CONTAINER_multihashmap_iterate (stack->metacounter, | ||
712 | &migrate_and_drop_metadata, &mpc); | ||
713 | |||
714 | GNUNET_CONTAINER_multihashmap_destroy (stack->keywordcounter); | ||
715 | GNUNET_CONTAINER_multihashmap_destroy (stack->metacounter); | ||
716 | *exclude_ksk = kpc.ksk; | ||
717 | *copy_meta = mpc.meta; | ||
718 | } | ||
719 | |||
720 | /** | ||
721 | * Function called by the directory iterator to | ||
722 | * (recursively) add all of the files in the | ||
723 | * directory to the tree. | ||
724 | * Called by the directory scanner to initiate the | ||
725 | * scan. | ||
726 | * TODO: find a way to make it non-recursive. | ||
727 | * | ||
728 | * @param cls the 'struct AddDirContext*' we're in | ||
729 | * @param filename file or directory to scan | ||
730 | */ | ||
731 | static int | ||
732 | scan_directory (void *cls, const char *filename) | ||
733 | { | ||
734 | struct AddDirContext *adc = cls, recurse_adc; | ||
735 | struct stat sbuf; | ||
736 | struct ShareTreeItem *item; | ||
737 | const char *short_fn; | ||
738 | int do_stop = 0; | ||
739 | |||
740 | /* Wrap up fast */ | ||
741 | if (adc->do_stop) | ||
742 | return GNUNET_SYSERR; | ||
743 | |||
744 | /* If the file doesn't exist (or is not statable for any other reason, | ||
745 | * skip it, and report it. | ||
746 | */ | ||
747 | if (0 != STAT (filename, &sbuf)) | ||
748 | { | ||
749 | do_stop = write_progress (adc, filename, S_ISDIR (sbuf.st_mode), | ||
750 | GNUNET_DIR_SCANNER_DOES_NOT_EXIST); | ||
751 | return GNUNET_OK; | ||
752 | } | ||
753 | |||
754 | /* Report the progress */ | ||
755 | do_stop = write_progress (adc, filename, S_ISDIR (sbuf.st_mode), | ||
756 | GNUNET_DIR_SCANNER_NEW_FILE); | ||
757 | if (do_stop) | ||
758 | { | ||
759 | /* We were asked to stop, acknowledge that and return */ | ||
760 | do_stop = write_progress (adc, filename, S_ISDIR (sbuf.st_mode), | ||
761 | GNUNET_DIR_SCANNER_ASKED_TO_STOP); | ||
762 | return GNUNET_SYSERR; | ||
763 | } | ||
764 | |||
765 | if (!S_ISDIR (sbuf.st_mode)) | ||
766 | extract_file (adc, filename); | ||
767 | else | ||
768 | { | ||
769 | item = make_item (adc->parent); | ||
770 | item->meta = GNUNET_CONTAINER_meta_data_create (); | ||
771 | |||
772 | item->is_directory = GNUNET_YES; | ||
773 | |||
774 | /* copy fields from adc */ | ||
775 | recurse_adc = *adc; | ||
776 | /* replace recurse_adc contents with the ones for this directory */ | ||
777 | recurse_adc.parent = item; | ||
778 | |||
779 | /* recurse into directory */ | ||
780 | GNUNET_DISK_directory_scan (filename, &scan_directory, &recurse_adc); | ||
781 | |||
782 | short_fn = GNUNET_STRINGS_get_short_name (filename); | ||
783 | |||
784 | item->filename = GNUNET_strdup (filename); | ||
785 | item->short_filename = GNUNET_strdup (short_fn); | ||
786 | |||
787 | if (adc->parent == NULL) | ||
788 | { | ||
789 | /* we're finished with the scan, make sure caller gets the top-level | ||
790 | * directory pointer | ||
791 | */ | ||
792 | adc->parent = item; | ||
793 | } | ||
794 | } | ||
795 | return GNUNET_OK; | ||
796 | } | ||
797 | |||
798 | /** | ||
799 | * Signals the scanner to finish the scan as fast as possible. | ||
800 | * Does not block. | ||
801 | * Can close the pipe if asked to, but that is only used by the | ||
802 | * internal call to this function during cleanup. The client | ||
803 | * must understand the consequences of closing the pipe too early. | ||
804 | * | ||
805 | * @param ds directory scanner structure | ||
806 | * @param close_pipe GNUNET_YES to close | ||
807 | */ | ||
808 | void | ||
809 | GNUNET_FS_directory_scan_finish (struct GNUNET_FS_DirScanner *ds, | ||
810 | int close_pipe) | ||
811 | { | ||
812 | #if WINDOWS | ||
813 | SetEvent (ds->stop); | ||
814 | #else | ||
815 | sem_post (&ds->stop); | ||
816 | #endif | ||
817 | if (close_pipe) | ||
818 | { | ||
819 | if (ds->progress_read_task != GNUNET_SCHEDULER_NO_TASK) | ||
820 | { | ||
821 | GNUNET_SCHEDULER_cancel (ds->progress_read_task); | ||
822 | ds->progress_read_task = GNUNET_SCHEDULER_NO_TASK; | ||
823 | } | ||
824 | GNUNET_DISK_pipe_close_end (ds->progress_pipe, GNUNET_DISK_PIPE_END_READ); | ||
825 | ds->progress_read = NULL; | ||
826 | } | ||
827 | } | ||
828 | |||
829 | /** | ||
830 | * Signals the scanner thread to finish (in case it isn't finishing | ||
831 | * already) and joins the scanner thread. Closes the pipes, frees the | ||
832 | * scanner contexts (both of them), returns the results of the scan. | ||
833 | * Results are valid (and have to be freed) even if the scanner had | ||
834 | * an error or was rushed to finish prematurely. | ||
835 | * Blocks until the scanner is finished. | ||
836 | * | ||
837 | * @param ds directory scanner structure | ||
838 | * @return the results of the scan (a directory tree) | ||
839 | */ | ||
840 | struct ShareTreeItem * | ||
841 | GNUNET_FS_directory_scan_cleanup (struct GNUNET_FS_DirScanner *ds) | ||
842 | { | ||
843 | struct ShareTreeItem *result; | ||
844 | |||
845 | GNUNET_FS_directory_scan_finish (ds, GNUNET_YES); | ||
846 | #if WINDOWS | ||
847 | WaitForSingleObject (ds->thread, INFINITE); | ||
848 | CloseHandle (ds->stop); | ||
849 | CloseHandle (ds->thread); | ||
850 | #else | ||
851 | pthread_join (ds->thread, NULL); | ||
852 | sem_destroy (&ds->stop); | ||
853 | pthread_detach (ds->thread); | ||
854 | #endif | ||
855 | |||
856 | GNUNET_DISK_pipe_close (ds->progress_pipe); | ||
857 | result = ds->adc->parent; | ||
858 | GNUNET_free (ds->adc); | ||
859 | GNUNET_free (ds); | ||
860 | return result; | ||
861 | } | ||
862 | |||
863 | /** | ||
864 | * The function from which the scanner thread starts | ||
865 | */ | ||
866 | #if WINDOWS | ||
867 | static DWORD | ||
868 | #else | ||
869 | static int | ||
870 | #endif | ||
871 | run_directory_scan_thread (struct AddDirContext *adc) | ||
872 | { | ||
873 | scan_directory (adc, adc->filename_expanded); | ||
874 | GNUNET_free (adc->filename_expanded); | ||
875 | if (adc->plugins != NULL) | ||
876 | EXTRACTOR_plugin_remove_all (adc->plugins); | ||
877 | /* Tell the initiator that we're finished, it can now join the thread */ | ||
878 | write_progress (adc, NULL, 0, GNUNET_DIR_SCANNER_FINISHED); | ||
879 | return 0; | ||
880 | } | ||
881 | |||
882 | /** | ||
883 | * Called every time there is data to read from the scanner. | ||
884 | * Calls the scanner progress handler. | ||
885 | * | ||
886 | * @param cls the closure (directory scanner object) | ||
887 | * @param tc task context in which the task is running | ||
888 | */ | ||
889 | static void | ||
890 | read_progress_task (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc) | ||
891 | { | ||
892 | struct GNUNET_FS_DirScanner *ds; | ||
893 | int end_it = 0; | ||
894 | enum GNUNET_DirScannerProgressUpdateReason reason; | ||
895 | ssize_t rd; | ||
896 | ssize_t total_read; | ||
897 | |||
898 | size_t filename_len; | ||
899 | char is_directory; | ||
900 | char *filename; | ||
901 | |||
902 | ds = cls; | ||
903 | |||
904 | ds->progress_read_task = GNUNET_SCHEDULER_NO_TASK; | ||
905 | |||
906 | if (!(tc->reason & GNUNET_SCHEDULER_REASON_READ_READY)) | ||
907 | { | ||
908 | ds->progress_callback (ds->cls, ds, NULL, 0, GNUNET_DIR_SCANNER_SHUTDOWN); | ||
909 | return; | ||
910 | } | ||
911 | |||
912 | /* Read one message. If message is malformed or can't be read, end the scanner */ | ||
913 | total_read = rd = GNUNET_DISK_file_read (ds->progress_read, &reason, sizeof (reason)); | ||
914 | while (rd > 0 && total_read < sizeof (reason)) | ||
915 | { | ||
916 | rd = GNUNET_DISK_file_read (ds->progress_read, | ||
917 | &((char *) &reason)[total_read], | ||
918 | sizeof (reason) - total_read); | ||
919 | if (rd > 0) | ||
920 | total_read += rd; | ||
921 | } | ||
922 | if (total_read != sizeof (reason) | ||
923 | || reason <= GNUNET_DIR_SCANNER_FIRST | ||
924 | || reason >= GNUNET_DIR_SCANNER_LAST) | ||
925 | { | ||
926 | end_it = 1; | ||
927 | reason = GNUNET_DIR_SCANNER_PROTOCOL_ERROR; | ||
928 | } | ||
929 | |||
930 | if (!end_it) | ||
931 | { | ||
932 | total_read = rd = GNUNET_DISK_file_read (ds->progress_read, &filename_len, | ||
933 | sizeof (size_t)); | ||
934 | while (rd > 0 && total_read < sizeof (size_t)) | ||
935 | { | ||
936 | rd = GNUNET_DISK_file_read (ds->progress_read, | ||
937 | &((char *) &filename_len)[total_read], | ||
938 | sizeof (size_t) - total_read); | ||
939 | if (rd > 0) | ||
940 | total_read += rd; | ||
941 | } | ||
942 | if (rd != sizeof (size_t)) | ||
943 | { | ||
944 | end_it = 1; | ||
945 | reason = GNUNET_DIR_SCANNER_PROTOCOL_ERROR; | ||
946 | } | ||
947 | } | ||
948 | if (!end_it) | ||
949 | { | ||
950 | if (filename_len == 0) | ||
951 | end_it = 1; | ||
952 | else if (filename_len > MAX_PATH) | ||
953 | { | ||
954 | end_it = 1; | ||
955 | reason = GNUNET_DIR_SCANNER_PROTOCOL_ERROR; | ||
956 | } | ||
957 | } | ||
958 | if (!end_it) | ||
959 | { | ||
960 | filename = GNUNET_malloc (filename_len); | ||
961 | total_read = rd = GNUNET_DISK_file_read (ds->progress_read, filename, | ||
962 | filename_len); | ||
963 | while (rd > 0 && total_read < filename_len) | ||
964 | { | ||
965 | rd = GNUNET_DISK_file_read (ds->progress_read, &filename[total_read], | ||
966 | filename_len - total_read); | ||
967 | if (rd > 0) | ||
968 | total_read += rd; | ||
969 | } | ||
970 | if (rd != filename_len) | ||
971 | { | ||
972 | GNUNET_free (filename); | ||
973 | reason = GNUNET_DIR_SCANNER_PROTOCOL_ERROR; | ||
974 | end_it = 1; | ||
975 | } | ||
976 | } | ||
977 | if (!end_it && filename_len > 0) | ||
978 | { | ||
979 | total_read = rd = GNUNET_DISK_file_read (ds->progress_read, &is_directory, | ||
980 | sizeof (char)); | ||
981 | while (rd > 0 && total_read < sizeof (char)) | ||
982 | { | ||
983 | rd = GNUNET_DISK_file_read (ds->progress_read, &(&is_directory)[total_read], | ||
984 | sizeof (char) - total_read); | ||
985 | if (rd > 0) | ||
986 | total_read += rd; | ||
987 | } | ||
988 | if (rd != sizeof (char)) | ||
989 | { | ||
990 | GNUNET_free (filename); | ||
991 | reason = GNUNET_DIR_SCANNER_PROTOCOL_ERROR; | ||
992 | end_it = 1; | ||
993 | } | ||
994 | } | ||
995 | if (!end_it) | ||
996 | { | ||
997 | end_it = ds->progress_callback (ds->cls, ds, (const char *) filename, is_directory, reason); | ||
998 | GNUNET_free (filename); | ||
999 | if (!end_it) | ||
1000 | { | ||
1001 | ds->progress_read_task = GNUNET_SCHEDULER_add_read_file ( | ||
1002 | GNUNET_TIME_UNIT_FOREVER_REL, ds->progress_read, &read_progress_task, | ||
1003 | cls); | ||
1004 | } | ||
1005 | } | ||
1006 | else | ||
1007 | { | ||
1008 | ds->progress_callback (ds->cls, ds, NULL, 0, reason); | ||
1009 | } | ||
1010 | } | ||
1011 | |||
1012 | |||
1013 | /** | ||
1014 | * Start a directory scanner thread. | ||
1015 | * | ||
1016 | * @param filename name of the directory to scan | ||
1017 | * @param GNUNET_YES to not to run libextractor on files (only build a tree) | ||
1018 | * @param ex if not NULL, must be a list of extra plugins for extractor | ||
1019 | * @param cb the callback to call when there are scanning progress messages | ||
1020 | * @param cls closure for 'cb' | ||
1021 | * @return directory scanner object to be used for controlling the scanner | ||
1022 | */ | ||
1023 | struct GNUNET_FS_DirScanner * | ||
1024 | GNUNET_FS_directory_scan_start (const char *filename, | ||
1025 | int disable_extractor, const char *ex, | ||
1026 | GNUNET_FS_DirScannerProgressCallback cb, void *cls) | ||
1027 | { | ||
1028 | struct stat sbuf; | ||
1029 | struct AddDirContext *adc; | ||
1030 | char *filename_expanded; | ||
1031 | struct GNUNET_FS_DirScanner *ds; | ||
1032 | struct GNUNET_DISK_PipeHandle *progress_pipe; | ||
1033 | int ok; | ||
1034 | |||
1035 | if (0 != STAT (filename, &sbuf)) | ||
1036 | return NULL; | ||
1037 | /* TODO: consider generalizing this for files too! */ | ||
1038 | if (!S_ISDIR (sbuf.st_mode)) | ||
1039 | { | ||
1040 | GNUNET_break (0); | ||
1041 | return NULL; | ||
1042 | } | ||
1043 | /* scan_directory() is guaranteed to be given expanded filenames, | ||
1044 | * so expand we will! | ||
1045 | */ | ||
1046 | filename_expanded = GNUNET_STRINGS_filename_expand (filename); | ||
1047 | if (filename_expanded == NULL) | ||
1048 | return NULL; | ||
1049 | |||
1050 | progress_pipe = GNUNET_DISK_pipe (GNUNET_NO, GNUNET_NO, GNUNET_NO); | ||
1051 | if (progress_pipe == NULL) | ||
1052 | { | ||
1053 | GNUNET_free (filename_expanded); | ||
1054 | return NULL; | ||
1055 | } | ||
1056 | |||
1057 | adc = GNUNET_malloc (sizeof (struct AddDirContext)); | ||
1058 | |||
1059 | ds = GNUNET_malloc (sizeof (struct GNUNET_FS_DirScanner)); | ||
1060 | |||
1061 | ds->adc = adc; | ||
1062 | |||
1063 | #if WINDOWS | ||
1064 | ds->stop = CreateEvent (NULL, TRUE, FALSE, NULL); | ||
1065 | adc->stop = ds->stop; | ||
1066 | ok = ds->stop != INVALID_HANDLE_VALUE; | ||
1067 | #else | ||
1068 | ok = !sem_init (&ds->stop, 0, 0); | ||
1069 | adc = &ds->stop; | ||
1070 | #endif | ||
1071 | if (!ok) | ||
1072 | { | ||
1073 | GNUNET_free (adc); | ||
1074 | GNUNET_free (ds); | ||
1075 | GNUNET_free (filename_expanded); | ||
1076 | GNUNET_DISK_pipe_close (progress_pipe); | ||
1077 | return NULL; | ||
1078 | } | ||
1079 | |||
1080 | adc->plugins = NULL; | ||
1081 | if (!disable_extractor) | ||
1082 | { | ||
1083 | adc->plugins = EXTRACTOR_plugin_add_defaults ( | ||
1084 | EXTRACTOR_OPTION_DEFAULT_POLICY); | ||
1085 | if (ex && strlen (ex) > 0) | ||
1086 | adc->plugins = EXTRACTOR_plugin_add_config (adc->plugins, ex, | ||
1087 | EXTRACTOR_OPTION_DEFAULT_POLICY); | ||
1088 | } | ||
1089 | |||
1090 | adc->filename_expanded = filename_expanded; | ||
1091 | adc->progress_write = GNUNET_DISK_pipe_handle (progress_pipe, | ||
1092 | GNUNET_DISK_PIPE_END_WRITE); | ||
1093 | |||
1094 | |||
1095 | ds->progress_read = GNUNET_DISK_pipe_handle (progress_pipe, | ||
1096 | GNUNET_DISK_PIPE_END_READ); | ||
1097 | |||
1098 | #if WINDOWS | ||
1099 | ds->thread = CreateThread (NULL, 0, | ||
1100 | (LPTHREAD_START_ROUTINE) &run_directory_scan_thread, (LPVOID) adc, | ||
1101 | 0, NULL); | ||
1102 | ok = ds->thread != NULL; | ||
1103 | #else | ||
1104 | ok = !pthread_create (&ds->thread, NULL, &run_directory_scan_thread, | ||
1105 | (void *) adc); | ||
1106 | #endif | ||
1107 | if (!ok) | ||
1108 | { | ||
1109 | GNUNET_free (adc); | ||
1110 | GNUNET_free (filename_expanded); | ||
1111 | GNUNET_DISK_pipe_close (progress_pipe); | ||
1112 | GNUNET_free (ds); | ||
1113 | return NULL; | ||
1114 | } | ||
1115 | |||
1116 | ds->progress_callback = cb; | ||
1117 | ds->cls = cls; | ||
1118 | ds->adc = adc; | ||
1119 | ds->progress_pipe = progress_pipe; | ||
1120 | |||
1121 | ds->progress_read_task = GNUNET_SCHEDULER_add_read_file ( | ||
1122 | GNUNET_TIME_UNIT_FOREVER_REL, ds->progress_read, &read_progress_task, | ||
1123 | ds); | ||
1124 | |||
1125 | return ds; | ||
1126 | } | ||
1127 | |||
1128 | /** | ||
1129 | * Task that post-processes the share item tree. | ||
1130 | * This processing has to be done in the main thread, because | ||
1131 | * it requires access to libgcrypt's hashing functions, and | ||
1132 | * libgcrypt is not thread-safe without some special magic. | ||
1133 | * | ||
1134 | * @param cls top of the stack | ||
1135 | * @param tc task context | ||
1136 | */ | ||
1137 | static void | ||
1138 | trim_share_tree_task (void *cls, | ||
1139 | const struct GNUNET_SCHEDULER_TaskContext *tc) | ||
1140 | { | ||
1141 | struct ProcessMetadataStackItem *stack = cls; | ||
1142 | struct ProcessMetadataStackItem *next = stack; | ||
1143 | /* FIXME: figure out what to do when tc says we're shutting down */ | ||
1144 | |||
1145 | /* item == NULL means that we've just finished going over the children of | ||
1146 | * current directory. | ||
1147 | */ | ||
1148 | if (stack->item == NULL) | ||
1149 | { | ||
1150 | if (stack->parent->item != NULL) | ||
1151 | { | ||
1152 | /* end of a directory */ | ||
1153 | struct GNUNET_FS_Uri *ksk; | ||
1154 | |||
1155 | /* use keyword and metadata counters to create lists of keywords to move | ||
1156 | * and metadata to copy. | ||
1157 | */ | ||
1158 | process_keywords_and_metadata (stack, &stack->parent->exclude_ksk, &stack->parent->item->meta); | ||
1159 | |||
1160 | /* create keywords from metadata (copies all text-metadata as keywords, | ||
1161 | * AND parses the directory name we've just added, producing even more | ||
1162 | * keywords. | ||
1163 | * then merge these keywords with the ones moved from children. | ||
1164 | */ | ||
1165 | ksk = GNUNET_FS_uri_ksk_create_from_meta_data (stack->parent->item->meta); | ||
1166 | stack->parent->item->ksk_uri = GNUNET_FS_uri_ksk_merge (ksk, stack->parent->exclude_ksk); | ||
1167 | GNUNET_FS_uri_destroy (ksk); | ||
1168 | |||
1169 | /* remove moved keywords from children (complete the move) */ | ||
1170 | remove_keywords (stack->parent, stack->parent->item); | ||
1171 | GNUNET_FS_uri_destroy (stack->parent->exclude_ksk); | ||
1172 | |||
1173 | /* go up the stack */ | ||
1174 | next = stack->parent; | ||
1175 | GNUNET_free (stack); | ||
1176 | next->end_directory = GNUNET_YES; | ||
1177 | } | ||
1178 | else | ||
1179 | { | ||
1180 | /* we've just finished processing the toplevel directory */ | ||
1181 | struct ProcessMetadataContext *ctx = stack->ctx; | ||
1182 | next = NULL; | ||
1183 | GNUNET_SCHEDULER_add_continuation (ctx->cb, ctx->cls, | ||
1184 | GNUNET_SCHEDULER_REASON_PREREQ_DONE); | ||
1185 | GNUNET_free (stack->parent); | ||
1186 | GNUNET_free (stack); | ||
1187 | GNUNET_free (ctx); | ||
1188 | } | ||
1189 | } | ||
1190 | else if (stack->item->is_directory | ||
1191 | && !stack->end_directory | ||
1192 | && stack->item->children_head != NULL) | ||
1193 | { | ||
1194 | /* recurse into subdirectory */ | ||
1195 | next = GNUNET_malloc (sizeof (struct ProcessMetadataStackItem)); | ||
1196 | next->ctx = stack->ctx; | ||
1197 | next->item = stack->item->children_head; | ||
1198 | next->keywordcounter = GNUNET_CONTAINER_multihashmap_create (1024); | ||
1199 | next->metacounter = GNUNET_CONTAINER_multihashmap_create (1024); | ||
1200 | next->dir_entry_count = 0; | ||
1201 | next->parent = stack; | ||
1202 | } | ||
1203 | else | ||
1204 | { | ||
1205 | /* process a child entry (a file or a directory) and move to the next one*/ | ||
1206 | if (stack->item->is_directory) | ||
1207 | stack->end_directory = GNUNET_NO; | ||
1208 | stack->dir_entry_count++; | ||
1209 | GNUNET_CONTAINER_meta_data_iterate (stack->item->meta, &add_to_meta_counter, stack->metacounter); | ||
1210 | |||
1211 | if (stack->item->is_directory) | ||
1212 | { | ||
1213 | char *user = getenv ("USER"); | ||
1214 | if ((user == NULL) || (0 != strncasecmp (user, stack->item->short_filename, strlen(user)))) | ||
1215 | { | ||
1216 | /* only use filename if it doesn't match $USER */ | ||
1217 | GNUNET_CONTAINER_meta_data_insert (stack->item->meta, "<libgnunetfs>", | ||
1218 | EXTRACTOR_METATYPE_FILENAME, | ||
1219 | EXTRACTOR_METAFORMAT_UTF8, | ||
1220 | "text/plain", stack->item->short_filename, | ||
1221 | strlen (stack->item->short_filename) + 1); | ||
1222 | GNUNET_CONTAINER_meta_data_insert (stack->item->meta, "<libgnunetfs>", | ||
1223 | EXTRACTOR_METATYPE_GNUNET_ORIGINAL_FILENAME, | ||
1224 | EXTRACTOR_METAFORMAT_UTF8, | ||
1225 | "text/plain", stack->item->short_filename, | ||
1226 | strlen (stack->item->short_filename) + 1); | ||
1227 | } | ||
1228 | } | ||
1229 | |||
1230 | stack->item->ksk_uri = GNUNET_FS_uri_ksk_create_from_meta_data (stack->item->meta); | ||
1231 | GNUNET_FS_uri_ksk_get_keywords (stack->item->ksk_uri, &add_to_keyword_counter, stack->keywordcounter); | ||
1232 | stack->item = stack->item->next; | ||
1233 | } | ||
1234 | /* Call this task again later, if there are more entries to process */ | ||
1235 | if (next) | ||
1236 | GNUNET_SCHEDULER_add_continuation (&trim_share_tree_task, next, | ||
1237 | GNUNET_SCHEDULER_REASON_PREREQ_DONE); | ||
1238 | } | ||
1239 | |||
1240 | /** | ||
1241 | * Process a share item tree, moving frequent keywords up and | ||
1242 | * copying frequent metadata up. | ||
1243 | * | ||
1244 | * @param toplevel toplevel directory in the tree, returned by the scanner | ||
1245 | * @param cb called after processing is done | ||
1246 | * @param cls closure for 'cb' | ||
1247 | */ | ||
1248 | struct ProcessMetadataContext * | ||
1249 | GNUNET_FS_trim_share_tree (struct ShareTreeItem *toplevel, | ||
1250 | GNUNET_SCHEDULER_Task cb, void *cls) | ||
1251 | { | ||
1252 | struct ProcessMetadataContext *ret; | ||
1253 | |||
1254 | if (toplevel == NULL) | ||
1255 | { | ||
1256 | struct GNUNET_SCHEDULER_TaskContext tc; | ||
1257 | tc.reason = GNUNET_SCHEDULER_REASON_PREREQ_DONE; | ||
1258 | cb (cls, &tc); | ||
1259 | return NULL; | ||
1260 | } | ||
1261 | |||
1262 | ret = GNUNET_malloc (sizeof (struct ProcessMetadataContext)); | ||
1263 | ret->toplevel = toplevel; | ||
1264 | ret->stack = GNUNET_malloc (sizeof (struct ProcessMetadataStackItem)); | ||
1265 | ret->stack->ctx = ret; | ||
1266 | ret->stack->item = toplevel; | ||
1267 | ret->stack->keywordcounter = GNUNET_CONTAINER_multihashmap_create (1024); | ||
1268 | ret->stack->metacounter = GNUNET_CONTAINER_multihashmap_create (1024); | ||
1269 | ret->stack->dir_entry_count = 0; | ||
1270 | ret->stack->end_directory = GNUNET_NO; | ||
1271 | |||
1272 | /* dummy stack entry that tells us we're at the top of the stack */ | ||
1273 | ret->stack->parent = GNUNET_malloc (sizeof (struct ProcessMetadataStackItem)); | ||
1274 | ret->stack->parent->ctx = ret; | ||
1275 | |||
1276 | ret->cb = cb; | ||
1277 | ret->cls = cls; | ||
1278 | |||
1279 | GNUNET_SCHEDULER_add_continuation (&trim_share_tree_task, ret->stack, | ||
1280 | GNUNET_SCHEDULER_REASON_PREREQ_DONE); | ||
1281 | return ret; | ||
1282 | } \ No newline at end of file | ||