diff options
Diffstat (limited to 'src/service/fs/fs_sharetree.c')
-rw-r--r-- | src/service/fs/fs_sharetree.c | 457 |
1 files changed, 457 insertions, 0 deletions
diff --git a/src/service/fs/fs_sharetree.c b/src/service/fs/fs_sharetree.c new file mode 100644 index 000000000..6c246a3ad --- /dev/null +++ b/src/service/fs/fs_sharetree.c | |||
@@ -0,0 +1,457 @@ | |||
1 | /* | ||
2 | This file is part of GNUnet | ||
3 | Copyright (C) 2005-2012 GNUnet e.V. | ||
4 | |||
5 | GNUnet is free software: you can redistribute it and/or modify it | ||
6 | under the terms of the GNU Affero General Public License as published | ||
7 | by the Free Software Foundation, either version 3 of the License, | ||
8 | or (at your option) any later version. | ||
9 | |||
10 | GNUnet is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | Affero General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU Affero General Public License | ||
16 | along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | |||
18 | SPDX-License-Identifier: AGPL3.0-or-later | ||
19 | */ | ||
20 | |||
21 | /** | ||
22 | * @file fs/fs_sharetree.c | ||
23 | * @brief code to manipulate the 'struct GNUNET_FS_ShareTreeItem' tree | ||
24 | * @author LRN | ||
25 | * @author Christian Grothoff | ||
26 | */ | ||
27 | #include "platform.h" | ||
28 | |||
29 | #include "gnunet_fs_service.h" | ||
30 | #include "gnunet_scheduler_lib.h" | ||
31 | #include <pthread.h> | ||
32 | |||
33 | |||
34 | /** | ||
35 | * Entry for each unique keyword to track how often | ||
36 | * it occurred. Contains the keyword and the counter. | ||
37 | */ | ||
38 | struct KeywordCounter | ||
39 | { | ||
40 | /** | ||
41 | * This is a doubly-linked list | ||
42 | */ | ||
43 | struct KeywordCounter *prev; | ||
44 | |||
45 | /** | ||
46 | * This is a doubly-linked list | ||
47 | */ | ||
48 | struct KeywordCounter *next; | ||
49 | |||
50 | /** | ||
51 | * Keyword that was found. | ||
52 | */ | ||
53 | const char *value; | ||
54 | |||
55 | /** | ||
56 | * How many files have this keyword? | ||
57 | */ | ||
58 | unsigned int count; | ||
59 | }; | ||
60 | |||
61 | |||
62 | /** | ||
63 | * Aggregate information we keep for meta data in each directory. | ||
64 | */ | ||
65 | struct MetaCounter | ||
66 | { | ||
67 | /** | ||
68 | * This is a doubly-linked list | ||
69 | */ | ||
70 | struct MetaCounter *prev; | ||
71 | |||
72 | /** | ||
73 | * This is a doubly-linked list | ||
74 | */ | ||
75 | struct MetaCounter *next; | ||
76 | |||
77 | /** | ||
78 | * Name of the plugin that provided that piece of metadata | ||
79 | */ | ||
80 | const char *plugin_name; | ||
81 | |||
82 | /** | ||
83 | * MIME-type of the metadata itself | ||
84 | */ | ||
85 | const char *data_mime_type; | ||
86 | |||
87 | /** | ||
88 | * The actual meta data. | ||
89 | */ | ||
90 | const char *data; | ||
91 | |||
92 | /** | ||
93 | * Number of bytes in 'data'. | ||
94 | */ | ||
95 | size_t data_size; | ||
96 | |||
97 | /** | ||
98 | * Type of the data | ||
99 | */ | ||
100 | enum EXTRACTOR_MetaType type; | ||
101 | |||
102 | /** | ||
103 | * Format of the data | ||
104 | */ | ||
105 | enum EXTRACTOR_MetaFormat format; | ||
106 | |||
107 | /** | ||
108 | * How many files have meta entries matching this value? | ||
109 | * (type and format do not have to match). | ||
110 | */ | ||
111 | unsigned int count; | ||
112 | }; | ||
113 | |||
114 | |||
115 | /** | ||
116 | * A structure that forms a singly-linked list that serves as a stack | ||
117 | * for metadata-processing function. | ||
118 | */ | ||
119 | struct TrimContext | ||
120 | { | ||
121 | /** | ||
122 | * Map from the hash over the keyword to an 'struct KeywordCounter *' | ||
123 | * counter that says how often this keyword was | ||
124 | * encountered in the current directory. | ||
125 | */ | ||
126 | struct GNUNET_CONTAINER_MultiHashMap *keywordcounter; | ||
127 | |||
128 | /** | ||
129 | * Map from the hash over the metadata to an 'struct MetaCounter *' | ||
130 | * counter that says how often this metadata was | ||
131 | * encountered in the current directory. | ||
132 | */ | ||
133 | struct GNUNET_CONTAINER_MultiHashMap *metacounter; | ||
134 | |||
135 | /** | ||
136 | * Position we are currently manipulating. | ||
137 | */ | ||
138 | struct GNUNET_FS_ShareTreeItem *pos; | ||
139 | |||
140 | /** | ||
141 | * Number of times an item has to be found to be moved to the parent. | ||
142 | */ | ||
143 | unsigned int move_threshold; | ||
144 | }; | ||
145 | |||
146 | |||
147 | /** | ||
148 | * Add the given keyword to the keyword statistics tracker. | ||
149 | * | ||
150 | * @param cls the multihashmap we store the keyword counters in | ||
151 | * @param keyword the keyword to count | ||
152 | * @param is_mandatory ignored | ||
153 | * @return always GNUNET_OK | ||
154 | */ | ||
155 | static int | ||
156 | add_to_keyword_counter (void *cls, const char *keyword, int is_mandatory) | ||
157 | { | ||
158 | struct GNUNET_CONTAINER_MultiHashMap *mcm = cls; | ||
159 | struct KeywordCounter *cnt; | ||
160 | struct GNUNET_HashCode hc; | ||
161 | size_t klen; | ||
162 | |||
163 | klen = strlen (keyword) + 1; | ||
164 | GNUNET_CRYPTO_hash (keyword, klen - 1, &hc); | ||
165 | cnt = GNUNET_CONTAINER_multihashmap_get (mcm, &hc); | ||
166 | if (cnt == NULL) | ||
167 | { | ||
168 | cnt = GNUNET_malloc (sizeof(struct KeywordCounter) + klen); | ||
169 | cnt->value = (const char *) &cnt[1]; | ||
170 | GNUNET_memcpy (&cnt[1], keyword, klen); | ||
171 | GNUNET_assert (GNUNET_OK == | ||
172 | GNUNET_CONTAINER_multihashmap_put (mcm, | ||
173 | &hc, cnt, | ||
174 | GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_ONLY)); | ||
175 | } | ||
176 | cnt->count++; | ||
177 | return GNUNET_OK; | ||
178 | } | ||
179 | |||
180 | |||
181 | /** | ||
182 | * Function called on each meta data item. Increments the | ||
183 | * respective counter. | ||
184 | * | ||
185 | * @param cls the container multihashmap to update | ||
186 | * @param plugin_name name of the plugin that produced this value; | ||
187 | * special values can be used (e.g. '<zlib>' for zlib being | ||
188 | * used in the main libextractor library and yielding | ||
189 | * meta data). | ||
190 | * @param type libextractor-type describing the meta data | ||
191 | * @param format basic format information about data | ||
192 | * @param data_mime_type mime-type of data (not of the original file); | ||
193 | * can be NULL (if mime-type is not known) | ||
194 | * @param data actual meta-data found | ||
195 | * @param data_len number of bytes in data | ||
196 | * @return 0 to continue extracting / iterating | ||
197 | */ | ||
198 | static int | ||
199 | add_to_meta_counter (void *cls, const char *plugin_name, | ||
200 | enum EXTRACTOR_MetaType type, enum EXTRACTOR_MetaFormat | ||
201 | format, | ||
202 | const char *data_mime_type, const char *data, size_t | ||
203 | data_len) | ||
204 | { | ||
205 | struct GNUNET_CONTAINER_MultiHashMap *map = cls; | ||
206 | struct GNUNET_HashCode key; | ||
207 | struct MetaCounter *cnt; | ||
208 | |||
209 | GNUNET_CRYPTO_hash (data, data_len, &key); | ||
210 | cnt = GNUNET_CONTAINER_multihashmap_get (map, &key); | ||
211 | if (NULL == cnt) | ||
212 | { | ||
213 | cnt = GNUNET_new (struct MetaCounter); | ||
214 | cnt->data = data; | ||
215 | cnt->data_size = data_len; | ||
216 | cnt->plugin_name = plugin_name; | ||
217 | cnt->type = type; | ||
218 | cnt->format = format; | ||
219 | cnt->data_mime_type = data_mime_type; | ||
220 | GNUNET_assert (GNUNET_OK == | ||
221 | GNUNET_CONTAINER_multihashmap_put (map, | ||
222 | &key, cnt, | ||
223 | GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_ONLY)); | ||
224 | } | ||
225 | cnt->count++; | ||
226 | return 0; | ||
227 | } | ||
228 | |||
229 | |||
230 | /** | ||
231 | * Remove keywords above the threshold. | ||
232 | * | ||
233 | * @param cls the 'struct TrimContext' with the pos to remove the keywords from | ||
234 | * @param keyword the keyword to check | ||
235 | * @param is_mandatory ignored | ||
236 | * @return always GNUNET_OK | ||
237 | */ | ||
238 | static int | ||
239 | remove_high_frequency_keywords (void *cls, const char *keyword, int | ||
240 | is_mandatory) | ||
241 | { | ||
242 | struct TrimContext *tc = cls; | ||
243 | struct KeywordCounter *counter; | ||
244 | struct GNUNET_HashCode hc; | ||
245 | size_t klen; | ||
246 | |||
247 | klen = strlen (keyword) + 1; | ||
248 | GNUNET_CRYPTO_hash (keyword, klen - 1, &hc); | ||
249 | counter = GNUNET_CONTAINER_multihashmap_get (tc->keywordcounter, &hc); | ||
250 | GNUNET_assert (NULL != counter); | ||
251 | if (counter->count < tc->move_threshold) | ||
252 | return GNUNET_OK; | ||
253 | GNUNET_FS_uri_ksk_remove_keyword (tc->pos->ksk_uri, | ||
254 | counter->value); | ||
255 | return GNUNET_OK; | ||
256 | } | ||
257 | |||
258 | |||
259 | /** | ||
260 | * Move "frequent" keywords over to the target ksk uri, free the | ||
261 | * counters. | ||
262 | * | ||
263 | * @param cls the 'struct TrimContext' | ||
264 | * @param key key of the entry | ||
265 | * @param value the 'struct KeywordCounter' | ||
266 | * @return GNUNET_YES (always) | ||
267 | */ | ||
268 | static int | ||
269 | migrate_and_drop_keywords (void *cls, const struct GNUNET_HashCode *key, | ||
270 | void *value) | ||
271 | { | ||
272 | struct TrimContext *tc = cls; | ||
273 | struct KeywordCounter *counter = value; | ||
274 | |||
275 | if (counter->count >= tc->move_threshold) | ||
276 | { | ||
277 | if (NULL == tc->pos->ksk_uri) | ||
278 | tc->pos->ksk_uri = GNUNET_FS_uri_ksk_create_from_args (1, | ||
279 | &counter->value); | ||
280 | else | ||
281 | GNUNET_FS_uri_ksk_add_keyword (tc->pos->ksk_uri, counter->value, | ||
282 | GNUNET_NO); | ||
283 | } | ||
284 | GNUNET_assert (GNUNET_YES == | ||
285 | GNUNET_CONTAINER_multihashmap_remove (tc->keywordcounter, | ||
286 | key, | ||
287 | counter)); | ||
288 | GNUNET_free (counter); | ||
289 | return GNUNET_YES; | ||
290 | } | ||
291 | |||
292 | |||
293 | /** | ||
294 | * Copy "frequent" metadata items over to the | ||
295 | * target metadata container, free the counters. | ||
296 | * | ||
297 | * @param cls the 'struct TrimContext' | ||
298 | * @param key key of the entry | ||
299 | * @param value the 'struct KeywordCounter' | ||
300 | * @return GNUNET_YES (always) | ||
301 | */ | ||
302 | static int | ||
303 | migrate_and_drop_metadata (void *cls, const struct GNUNET_HashCode *key, | ||
304 | void *value) | ||
305 | { | ||
306 | struct TrimContext *tc = cls; | ||
307 | struct MetaCounter *counter = value; | ||
308 | |||
309 | if (counter->count >= tc->move_threshold) | ||
310 | { | ||
311 | if (NULL == tc->pos->meta) | ||
312 | tc->pos->meta = GNUNET_FS_meta_data_create (); | ||
313 | GNUNET_FS_meta_data_insert (tc->pos->meta, | ||
314 | counter->plugin_name, | ||
315 | counter->type, | ||
316 | counter->format, | ||
317 | counter->data_mime_type, counter->data, | ||
318 | counter->data_size); | ||
319 | } | ||
320 | GNUNET_assert (GNUNET_YES == | ||
321 | GNUNET_CONTAINER_multihashmap_remove (tc->metacounter, | ||
322 | key, | ||
323 | counter)); | ||
324 | GNUNET_free (counter); | ||
325 | return GNUNET_YES; | ||
326 | } | ||
327 | |||
328 | |||
329 | /** | ||
330 | * Process a share item tree, moving frequent keywords up and | ||
331 | * copying frequent metadata up. | ||
332 | * | ||
333 | * @param tc trim context with hash maps to use | ||
334 | * @param tree tree to trim | ||
335 | */ | ||
336 | static void | ||
337 | share_tree_trim (struct TrimContext *tc, | ||
338 | struct GNUNET_FS_ShareTreeItem *tree) | ||
339 | { | ||
340 | struct GNUNET_FS_ShareTreeItem *pos; | ||
341 | unsigned int num_children; | ||
342 | |||
343 | /* first, trim all children */ | ||
344 | num_children = 0; | ||
345 | for (pos = tree->children_head; NULL != pos; pos = pos->next) | ||
346 | { | ||
347 | share_tree_trim (tc, pos); | ||
348 | num_children++; | ||
349 | } | ||
350 | |||
351 | /* consider adding filename to directory meta data */ | ||
352 | if (tree->is_directory == GNUNET_YES) | ||
353 | { | ||
354 | const char *user = getenv ("USER"); | ||
355 | if ((user == NULL) || | ||
356 | (0 != strncasecmp (user, tree->short_filename, strlen (user)))) | ||
357 | { | ||
358 | /* only use filename if it doesn't match $USER */ | ||
359 | if (NULL == tree->meta) | ||
360 | tree->meta = GNUNET_FS_meta_data_create (); | ||
361 | GNUNET_FS_meta_data_insert (tree->meta, "<libgnunetfs>", | ||
362 | EXTRACTOR_METATYPE_GNUNET_ORIGINAL_FILENAME, | ||
363 | EXTRACTOR_METAFORMAT_UTF8, | ||
364 | "text/plain", tree->short_filename, | ||
365 | strlen (tree->short_filename) + 1); | ||
366 | } | ||
367 | } | ||
368 | |||
369 | if (1 >= num_children) | ||
370 | return; /* nothing to trim */ | ||
371 | |||
372 | /* now, count keywords and meta data in children */ | ||
373 | for (pos = tree->children_head; NULL != pos; pos = pos->next) | ||
374 | { | ||
375 | if (NULL != pos->meta) | ||
376 | GNUNET_FS_meta_data_iterate (pos->meta, &add_to_meta_counter, | ||
377 | tc->metacounter); | ||
378 | if (NULL != pos->ksk_uri) | ||
379 | GNUNET_FS_uri_ksk_get_keywords (pos->ksk_uri, &add_to_keyword_counter, | ||
380 | tc->keywordcounter); | ||
381 | } | ||
382 | |||
383 | /* calculate threshold for moving keywords / meta data */ | ||
384 | tc->move_threshold = 1 + (num_children / 2); | ||
385 | |||
386 | /* remove high-frequency keywords from children */ | ||
387 | for (pos = tree->children_head; NULL != pos; pos = pos->next) | ||
388 | { | ||
389 | tc->pos = pos; | ||
390 | if (NULL != pos->ksk_uri) | ||
391 | { | ||
392 | struct GNUNET_FS_Uri *ksk_uri_copy = GNUNET_FS_uri_dup (pos->ksk_uri); | ||
393 | GNUNET_FS_uri_ksk_get_keywords (ksk_uri_copy, | ||
394 | &remove_high_frequency_keywords, tc); | ||
395 | GNUNET_FS_uri_destroy (ksk_uri_copy); | ||
396 | } | ||
397 | } | ||
398 | |||
399 | /* add high-frequency meta data and keywords to parent */ | ||
400 | tc->pos = tree; | ||
401 | GNUNET_CONTAINER_multihashmap_iterate (tc->keywordcounter, | ||
402 | &migrate_and_drop_keywords, | ||
403 | tc); | ||
404 | GNUNET_CONTAINER_multihashmap_iterate (tc->metacounter, | ||
405 | &migrate_and_drop_metadata, | ||
406 | tc); | ||
407 | } | ||
408 | |||
409 | |||
410 | /** | ||
411 | * Process a share item tree, moving frequent keywords up and | ||
412 | * copying frequent metadata up. | ||
413 | * | ||
414 | * @param toplevel toplevel directory in the tree, returned by the scanner | ||
415 | */ | ||
416 | void | ||
417 | GNUNET_FS_share_tree_trim (struct GNUNET_FS_ShareTreeItem *toplevel) | ||
418 | { | ||
419 | struct TrimContext tc; | ||
420 | |||
421 | if (toplevel == NULL) | ||
422 | return; | ||
423 | tc.keywordcounter = GNUNET_CONTAINER_multihashmap_create (1024, GNUNET_NO); | ||
424 | tc.metacounter = GNUNET_CONTAINER_multihashmap_create (1024, GNUNET_NO); | ||
425 | share_tree_trim (&tc, toplevel); | ||
426 | GNUNET_CONTAINER_multihashmap_destroy (tc.keywordcounter); | ||
427 | GNUNET_CONTAINER_multihashmap_destroy (tc.metacounter); | ||
428 | } | ||
429 | |||
430 | |||
431 | /** | ||
432 | * Release memory of a share item tree. | ||
433 | * | ||
434 | * @param toplevel toplevel of the tree to be freed | ||
435 | */ | ||
436 | void | ||
437 | GNUNET_FS_share_tree_free (struct GNUNET_FS_ShareTreeItem *toplevel) | ||
438 | { | ||
439 | struct GNUNET_FS_ShareTreeItem *pos; | ||
440 | |||
441 | while (NULL != (pos = toplevel->children_head)) | ||
442 | GNUNET_FS_share_tree_free (pos); | ||
443 | if (NULL != toplevel->parent) | ||
444 | GNUNET_CONTAINER_DLL_remove (toplevel->parent->children_head, | ||
445 | toplevel->parent->children_tail, | ||
446 | toplevel); | ||
447 | if (NULL != toplevel->meta) | ||
448 | GNUNET_FS_meta_data_destroy (toplevel->meta); | ||
449 | if (NULL != toplevel->ksk_uri) | ||
450 | GNUNET_FS_uri_destroy (toplevel->ksk_uri); | ||
451 | GNUNET_free (toplevel->filename); | ||
452 | GNUNET_free (toplevel->short_filename); | ||
453 | GNUNET_free (toplevel); | ||
454 | } | ||
455 | |||
456 | |||
457 | /* end fs_sharetree.c */ | ||