diff options
Diffstat (limited to 'src/fs/fs_sharetree.c')
-rw-r--r-- | src/fs/fs_sharetree.c | 456 |
1 files changed, 0 insertions, 456 deletions
diff --git a/src/fs/fs_sharetree.c b/src/fs/fs_sharetree.c deleted file mode 100644 index 3610b202e..000000000 --- a/src/fs/fs_sharetree.c +++ /dev/null | |||
@@ -1,456 +0,0 @@ | |||
1 | /* | ||
2 | This file is part of GNUnet | ||
3 | Copyright (C) 2005-2012 GNUnet e.V. | ||
4 | |||
5 | GNUnet is free software: you can redistribute it and/or modify it | ||
6 | under the terms of the GNU Affero General Public License as published | ||
7 | by the Free Software Foundation, either version 3 of the License, | ||
8 | or (at your option) any later version. | ||
9 | |||
10 | GNUnet is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | Affero General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU Affero General Public License | ||
16 | along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | |||
18 | SPDX-License-Identifier: AGPL3.0-or-later | ||
19 | */ | ||
20 | |||
21 | /** | ||
22 | * @file fs/fs_sharetree.c | ||
23 | * @brief code to manipulate the 'struct GNUNET_FS_ShareTreeItem' tree | ||
24 | * @author LRN | ||
25 | * @author Christian Grothoff | ||
26 | */ | ||
27 | #include "platform.h" | ||
28 | #include "gnunet_fs_service.h" | ||
29 | #include "gnunet_scheduler_lib.h" | ||
30 | #include <pthread.h> | ||
31 | |||
32 | |||
33 | /** | ||
34 | * Entry for each unique keyword to track how often | ||
35 | * it occurred. Contains the keyword and the counter. | ||
36 | */ | ||
37 | struct KeywordCounter | ||
38 | { | ||
39 | /** | ||
40 | * This is a doubly-linked list | ||
41 | */ | ||
42 | struct KeywordCounter *prev; | ||
43 | |||
44 | /** | ||
45 | * This is a doubly-linked list | ||
46 | */ | ||
47 | struct KeywordCounter *next; | ||
48 | |||
49 | /** | ||
50 | * Keyword that was found. | ||
51 | */ | ||
52 | const char *value; | ||
53 | |||
54 | /** | ||
55 | * How many files have this keyword? | ||
56 | */ | ||
57 | unsigned int count; | ||
58 | }; | ||
59 | |||
60 | |||
61 | /** | ||
62 | * Aggregate information we keep for meta data in each directory. | ||
63 | */ | ||
64 | struct MetaCounter | ||
65 | { | ||
66 | /** | ||
67 | * This is a doubly-linked list | ||
68 | */ | ||
69 | struct MetaCounter *prev; | ||
70 | |||
71 | /** | ||
72 | * This is a doubly-linked list | ||
73 | */ | ||
74 | struct MetaCounter *next; | ||
75 | |||
76 | /** | ||
77 | * Name of the plugin that provided that piece of metadata | ||
78 | */ | ||
79 | const char *plugin_name; | ||
80 | |||
81 | /** | ||
82 | * MIME-type of the metadata itself | ||
83 | */ | ||
84 | const char *data_mime_type; | ||
85 | |||
86 | /** | ||
87 | * The actual meta data. | ||
88 | */ | ||
89 | const char *data; | ||
90 | |||
91 | /** | ||
92 | * Number of bytes in 'data'. | ||
93 | */ | ||
94 | size_t data_size; | ||
95 | |||
96 | /** | ||
97 | * Type of the data | ||
98 | */ | ||
99 | enum EXTRACTOR_MetaType type; | ||
100 | |||
101 | /** | ||
102 | * Format of the data | ||
103 | */ | ||
104 | enum EXTRACTOR_MetaFormat format; | ||
105 | |||
106 | /** | ||
107 | * How many files have meta entries matching this value? | ||
108 | * (type and format do not have to match). | ||
109 | */ | ||
110 | unsigned int count; | ||
111 | }; | ||
112 | |||
113 | |||
114 | /** | ||
115 | * A structure that forms a singly-linked list that serves as a stack | ||
116 | * for metadata-processing function. | ||
117 | */ | ||
118 | struct TrimContext | ||
119 | { | ||
120 | /** | ||
121 | * Map from the hash over the keyword to an 'struct KeywordCounter *' | ||
122 | * counter that says how often this keyword was | ||
123 | * encountered in the current directory. | ||
124 | */ | ||
125 | struct GNUNET_CONTAINER_MultiHashMap *keywordcounter; | ||
126 | |||
127 | /** | ||
128 | * Map from the hash over the metadata to an 'struct MetaCounter *' | ||
129 | * counter that says how often this metadata was | ||
130 | * encountered in the current directory. | ||
131 | */ | ||
132 | struct GNUNET_CONTAINER_MultiHashMap *metacounter; | ||
133 | |||
134 | /** | ||
135 | * Position we are currently manipulating. | ||
136 | */ | ||
137 | struct GNUNET_FS_ShareTreeItem *pos; | ||
138 | |||
139 | /** | ||
140 | * Number of times an item has to be found to be moved to the parent. | ||
141 | */ | ||
142 | unsigned int move_threshold; | ||
143 | }; | ||
144 | |||
145 | |||
146 | /** | ||
147 | * Add the given keyword to the keyword statistics tracker. | ||
148 | * | ||
149 | * @param cls the multihashmap we store the keyword counters in | ||
150 | * @param keyword the keyword to count | ||
151 | * @param is_mandatory ignored | ||
152 | * @return always GNUNET_OK | ||
153 | */ | ||
154 | static int | ||
155 | add_to_keyword_counter (void *cls, const char *keyword, int is_mandatory) | ||
156 | { | ||
157 | struct GNUNET_CONTAINER_MultiHashMap *mcm = cls; | ||
158 | struct KeywordCounter *cnt; | ||
159 | struct GNUNET_HashCode hc; | ||
160 | size_t klen; | ||
161 | |||
162 | klen = strlen (keyword) + 1; | ||
163 | GNUNET_CRYPTO_hash (keyword, klen - 1, &hc); | ||
164 | cnt = GNUNET_CONTAINER_multihashmap_get (mcm, &hc); | ||
165 | if (cnt == NULL) | ||
166 | { | ||
167 | cnt = GNUNET_malloc (sizeof(struct KeywordCounter) + klen); | ||
168 | cnt->value = (const char *) &cnt[1]; | ||
169 | GNUNET_memcpy (&cnt[1], keyword, klen); | ||
170 | GNUNET_assert (GNUNET_OK == | ||
171 | GNUNET_CONTAINER_multihashmap_put (mcm, | ||
172 | &hc, cnt, | ||
173 | GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_ONLY)); | ||
174 | } | ||
175 | cnt->count++; | ||
176 | return GNUNET_OK; | ||
177 | } | ||
178 | |||
179 | |||
180 | /** | ||
181 | * Function called on each meta data item. Increments the | ||
182 | * respective counter. | ||
183 | * | ||
184 | * @param cls the container multihashmap to update | ||
185 | * @param plugin_name name of the plugin that produced this value; | ||
186 | * special values can be used (e.g. '<zlib>' for zlib being | ||
187 | * used in the main libextractor library and yielding | ||
188 | * meta data). | ||
189 | * @param type libextractor-type describing the meta data | ||
190 | * @param format basic format information about data | ||
191 | * @param data_mime_type mime-type of data (not of the original file); | ||
192 | * can be NULL (if mime-type is not known) | ||
193 | * @param data actual meta-data found | ||
194 | * @param data_len number of bytes in data | ||
195 | * @return 0 to continue extracting / iterating | ||
196 | */ | ||
197 | static int | ||
198 | add_to_meta_counter (void *cls, const char *plugin_name, | ||
199 | enum EXTRACTOR_MetaType type, enum EXTRACTOR_MetaFormat | ||
200 | format, | ||
201 | const char *data_mime_type, const char *data, size_t | ||
202 | data_len) | ||
203 | { | ||
204 | struct GNUNET_CONTAINER_MultiHashMap *map = cls; | ||
205 | struct GNUNET_HashCode key; | ||
206 | struct MetaCounter *cnt; | ||
207 | |||
208 | GNUNET_CRYPTO_hash (data, data_len, &key); | ||
209 | cnt = GNUNET_CONTAINER_multihashmap_get (map, &key); | ||
210 | if (NULL == cnt) | ||
211 | { | ||
212 | cnt = GNUNET_new (struct MetaCounter); | ||
213 | cnt->data = data; | ||
214 | cnt->data_size = data_len; | ||
215 | cnt->plugin_name = plugin_name; | ||
216 | cnt->type = type; | ||
217 | cnt->format = format; | ||
218 | cnt->data_mime_type = data_mime_type; | ||
219 | GNUNET_assert (GNUNET_OK == | ||
220 | GNUNET_CONTAINER_multihashmap_put (map, | ||
221 | &key, cnt, | ||
222 | GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_ONLY)); | ||
223 | } | ||
224 | cnt->count++; | ||
225 | return 0; | ||
226 | } | ||
227 | |||
228 | |||
229 | /** | ||
230 | * Remove keywords above the threshold. | ||
231 | * | ||
232 | * @param cls the 'struct TrimContext' with the pos to remove the keywords from | ||
233 | * @param keyword the keyword to check | ||
234 | * @param is_mandatory ignored | ||
235 | * @return always GNUNET_OK | ||
236 | */ | ||
237 | static int | ||
238 | remove_high_frequency_keywords (void *cls, const char *keyword, int | ||
239 | is_mandatory) | ||
240 | { | ||
241 | struct TrimContext *tc = cls; | ||
242 | struct KeywordCounter *counter; | ||
243 | struct GNUNET_HashCode hc; | ||
244 | size_t klen; | ||
245 | |||
246 | klen = strlen (keyword) + 1; | ||
247 | GNUNET_CRYPTO_hash (keyword, klen - 1, &hc); | ||
248 | counter = GNUNET_CONTAINER_multihashmap_get (tc->keywordcounter, &hc); | ||
249 | GNUNET_assert (NULL != counter); | ||
250 | if (counter->count < tc->move_threshold) | ||
251 | return GNUNET_OK; | ||
252 | GNUNET_FS_uri_ksk_remove_keyword (tc->pos->ksk_uri, | ||
253 | counter->value); | ||
254 | return GNUNET_OK; | ||
255 | } | ||
256 | |||
257 | |||
258 | /** | ||
259 | * Move "frequent" keywords over to the target ksk uri, free the | ||
260 | * counters. | ||
261 | * | ||
262 | * @param cls the 'struct TrimContext' | ||
263 | * @param key key of the entry | ||
264 | * @param value the 'struct KeywordCounter' | ||
265 | * @return GNUNET_YES (always) | ||
266 | */ | ||
267 | static int | ||
268 | migrate_and_drop_keywords (void *cls, const struct GNUNET_HashCode *key, | ||
269 | void *value) | ||
270 | { | ||
271 | struct TrimContext *tc = cls; | ||
272 | struct KeywordCounter *counter = value; | ||
273 | |||
274 | if (counter->count >= tc->move_threshold) | ||
275 | { | ||
276 | if (NULL == tc->pos->ksk_uri) | ||
277 | tc->pos->ksk_uri = GNUNET_FS_uri_ksk_create_from_args (1, | ||
278 | &counter->value); | ||
279 | else | ||
280 | GNUNET_FS_uri_ksk_add_keyword (tc->pos->ksk_uri, counter->value, | ||
281 | GNUNET_NO); | ||
282 | } | ||
283 | GNUNET_assert (GNUNET_YES == | ||
284 | GNUNET_CONTAINER_multihashmap_remove (tc->keywordcounter, | ||
285 | key, | ||
286 | counter)); | ||
287 | GNUNET_free (counter); | ||
288 | return GNUNET_YES; | ||
289 | } | ||
290 | |||
291 | |||
292 | /** | ||
293 | * Copy "frequent" metadata items over to the | ||
294 | * target metadata container, free the counters. | ||
295 | * | ||
296 | * @param cls the 'struct TrimContext' | ||
297 | * @param key key of the entry | ||
298 | * @param value the 'struct KeywordCounter' | ||
299 | * @return GNUNET_YES (always) | ||
300 | */ | ||
301 | static int | ||
302 | migrate_and_drop_metadata (void *cls, const struct GNUNET_HashCode *key, | ||
303 | void *value) | ||
304 | { | ||
305 | struct TrimContext *tc = cls; | ||
306 | struct MetaCounter *counter = value; | ||
307 | |||
308 | if (counter->count >= tc->move_threshold) | ||
309 | { | ||
310 | if (NULL == tc->pos->meta) | ||
311 | tc->pos->meta = GNUNET_CONTAINER_meta_data_create (); | ||
312 | GNUNET_CONTAINER_meta_data_insert (tc->pos->meta, | ||
313 | counter->plugin_name, | ||
314 | counter->type, | ||
315 | counter->format, | ||
316 | counter->data_mime_type, counter->data, | ||
317 | counter->data_size); | ||
318 | } | ||
319 | GNUNET_assert (GNUNET_YES == | ||
320 | GNUNET_CONTAINER_multihashmap_remove (tc->metacounter, | ||
321 | key, | ||
322 | counter)); | ||
323 | GNUNET_free (counter); | ||
324 | return GNUNET_YES; | ||
325 | } | ||
326 | |||
327 | |||
328 | /** | ||
329 | * Process a share item tree, moving frequent keywords up and | ||
330 | * copying frequent metadata up. | ||
331 | * | ||
332 | * @param tc trim context with hash maps to use | ||
333 | * @param tree tree to trim | ||
334 | */ | ||
335 | static void | ||
336 | share_tree_trim (struct TrimContext *tc, | ||
337 | struct GNUNET_FS_ShareTreeItem *tree) | ||
338 | { | ||
339 | struct GNUNET_FS_ShareTreeItem *pos; | ||
340 | unsigned int num_children; | ||
341 | |||
342 | /* first, trim all children */ | ||
343 | num_children = 0; | ||
344 | for (pos = tree->children_head; NULL != pos; pos = pos->next) | ||
345 | { | ||
346 | share_tree_trim (tc, pos); | ||
347 | num_children++; | ||
348 | } | ||
349 | |||
350 | /* consider adding filename to directory meta data */ | ||
351 | if (tree->is_directory == GNUNET_YES) | ||
352 | { | ||
353 | const char *user = getenv ("USER"); | ||
354 | if ((user == NULL) || | ||
355 | (0 != strncasecmp (user, tree->short_filename, strlen (user)))) | ||
356 | { | ||
357 | /* only use filename if it doesn't match $USER */ | ||
358 | if (NULL == tree->meta) | ||
359 | tree->meta = GNUNET_CONTAINER_meta_data_create (); | ||
360 | GNUNET_CONTAINER_meta_data_insert (tree->meta, "<libgnunetfs>", | ||
361 | EXTRACTOR_METATYPE_GNUNET_ORIGINAL_FILENAME, | ||
362 | EXTRACTOR_METAFORMAT_UTF8, | ||
363 | "text/plain", tree->short_filename, | ||
364 | strlen (tree->short_filename) + 1); | ||
365 | } | ||
366 | } | ||
367 | |||
368 | if (1 >= num_children) | ||
369 | return; /* nothing to trim */ | ||
370 | |||
371 | /* now, count keywords and meta data in children */ | ||
372 | for (pos = tree->children_head; NULL != pos; pos = pos->next) | ||
373 | { | ||
374 | if (NULL != pos->meta) | ||
375 | GNUNET_CONTAINER_meta_data_iterate (pos->meta, &add_to_meta_counter, | ||
376 | tc->metacounter); | ||
377 | if (NULL != pos->ksk_uri) | ||
378 | GNUNET_FS_uri_ksk_get_keywords (pos->ksk_uri, &add_to_keyword_counter, | ||
379 | tc->keywordcounter); | ||
380 | } | ||
381 | |||
382 | /* calculate threshold for moving keywords / meta data */ | ||
383 | tc->move_threshold = 1 + (num_children / 2); | ||
384 | |||
385 | /* remove high-frequency keywords from children */ | ||
386 | for (pos = tree->children_head; NULL != pos; pos = pos->next) | ||
387 | { | ||
388 | tc->pos = pos; | ||
389 | if (NULL != pos->ksk_uri) | ||
390 | { | ||
391 | struct GNUNET_FS_Uri *ksk_uri_copy = GNUNET_FS_uri_dup (pos->ksk_uri); | ||
392 | GNUNET_FS_uri_ksk_get_keywords (ksk_uri_copy, | ||
393 | &remove_high_frequency_keywords, tc); | ||
394 | GNUNET_FS_uri_destroy (ksk_uri_copy); | ||
395 | } | ||
396 | } | ||
397 | |||
398 | /* add high-frequency meta data and keywords to parent */ | ||
399 | tc->pos = tree; | ||
400 | GNUNET_CONTAINER_multihashmap_iterate (tc->keywordcounter, | ||
401 | &migrate_and_drop_keywords, | ||
402 | tc); | ||
403 | GNUNET_CONTAINER_multihashmap_iterate (tc->metacounter, | ||
404 | &migrate_and_drop_metadata, | ||
405 | tc); | ||
406 | } | ||
407 | |||
408 | |||
409 | /** | ||
410 | * Process a share item tree, moving frequent keywords up and | ||
411 | * copying frequent metadata up. | ||
412 | * | ||
413 | * @param toplevel toplevel directory in the tree, returned by the scanner | ||
414 | */ | ||
415 | void | ||
416 | GNUNET_FS_share_tree_trim (struct GNUNET_FS_ShareTreeItem *toplevel) | ||
417 | { | ||
418 | struct TrimContext tc; | ||
419 | |||
420 | if (toplevel == NULL) | ||
421 | return; | ||
422 | tc.keywordcounter = GNUNET_CONTAINER_multihashmap_create (1024, GNUNET_NO); | ||
423 | tc.metacounter = GNUNET_CONTAINER_multihashmap_create (1024, GNUNET_NO); | ||
424 | share_tree_trim (&tc, toplevel); | ||
425 | GNUNET_CONTAINER_multihashmap_destroy (tc.keywordcounter); | ||
426 | GNUNET_CONTAINER_multihashmap_destroy (tc.metacounter); | ||
427 | } | ||
428 | |||
429 | |||
430 | /** | ||
431 | * Release memory of a share item tree. | ||
432 | * | ||
433 | * @param toplevel toplevel of the tree to be freed | ||
434 | */ | ||
435 | void | ||
436 | GNUNET_FS_share_tree_free (struct GNUNET_FS_ShareTreeItem *toplevel) | ||
437 | { | ||
438 | struct GNUNET_FS_ShareTreeItem *pos; | ||
439 | |||
440 | while (NULL != (pos = toplevel->children_head)) | ||
441 | GNUNET_FS_share_tree_free (pos); | ||
442 | if (NULL != toplevel->parent) | ||
443 | GNUNET_CONTAINER_DLL_remove (toplevel->parent->children_head, | ||
444 | toplevel->parent->children_tail, | ||
445 | toplevel); | ||
446 | if (NULL != toplevel->meta) | ||
447 | GNUNET_CONTAINER_meta_data_destroy (toplevel->meta); | ||
448 | if (NULL != toplevel->ksk_uri) | ||
449 | GNUNET_FS_uri_destroy (toplevel->ksk_uri); | ||
450 | GNUNET_free (toplevel->filename); | ||
451 | GNUNET_free (toplevel->short_filename); | ||
452 | GNUNET_free (toplevel); | ||
453 | } | ||
454 | |||
455 | |||
456 | /* end fs_sharetree.c */ | ||