aboutsummaryrefslogtreecommitdiff
path: root/src/fs/fs_sharetree.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/fs/fs_sharetree.c')
-rw-r--r--src/fs/fs_sharetree.c456
1 files changed, 0 insertions, 456 deletions
diff --git a/src/fs/fs_sharetree.c b/src/fs/fs_sharetree.c
deleted file mode 100644
index 3610b202e..000000000
--- a/src/fs/fs_sharetree.c
+++ /dev/null
@@ -1,456 +0,0 @@
1/*
2 This file is part of GNUnet
3 Copyright (C) 2005-2012 GNUnet e.V.
4
5 GNUnet is free software: you can redistribute it and/or modify it
6 under the terms of the GNU Affero General Public License as published
7 by the Free Software Foundation, either version 3 of the License,
8 or (at your option) any later version.
9
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Affero General Public License for more details.
14
15 You should have received a copy of the GNU Affero General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>.
17
18 SPDX-License-Identifier: AGPL3.0-or-later
19 */
20
21/**
22 * @file fs/fs_sharetree.c
23 * @brief code to manipulate the 'struct GNUNET_FS_ShareTreeItem' tree
24 * @author LRN
25 * @author Christian Grothoff
26 */
27#include "platform.h"
28#include "gnunet_fs_service.h"
29#include "gnunet_scheduler_lib.h"
30#include <pthread.h>
31
32
33/**
34 * Entry for each unique keyword to track how often
35 * it occurred. Contains the keyword and the counter.
36 */
37struct KeywordCounter
38{
39 /**
40 * This is a doubly-linked list
41 */
42 struct KeywordCounter *prev;
43
44 /**
45 * This is a doubly-linked list
46 */
47 struct KeywordCounter *next;
48
49 /**
50 * Keyword that was found.
51 */
52 const char *value;
53
54 /**
55 * How many files have this keyword?
56 */
57 unsigned int count;
58};
59
60
61/**
62 * Aggregate information we keep for meta data in each directory.
63 */
64struct MetaCounter
65{
66 /**
67 * This is a doubly-linked list
68 */
69 struct MetaCounter *prev;
70
71 /**
72 * This is a doubly-linked list
73 */
74 struct MetaCounter *next;
75
76 /**
77 * Name of the plugin that provided that piece of metadata
78 */
79 const char *plugin_name;
80
81 /**
82 * MIME-type of the metadata itself
83 */
84 const char *data_mime_type;
85
86 /**
87 * The actual meta data.
88 */
89 const char *data;
90
91 /**
92 * Number of bytes in 'data'.
93 */
94 size_t data_size;
95
96 /**
97 * Type of the data
98 */
99 enum EXTRACTOR_MetaType type;
100
101 /**
102 * Format of the data
103 */
104 enum EXTRACTOR_MetaFormat format;
105
106 /**
107 * How many files have meta entries matching this value?
108 * (type and format do not have to match).
109 */
110 unsigned int count;
111};
112
113
114/**
115 * A structure that forms a singly-linked list that serves as a stack
116 * for metadata-processing function.
117 */
118struct TrimContext
119{
120 /**
121 * Map from the hash over the keyword to an 'struct KeywordCounter *'
122 * counter that says how often this keyword was
123 * encountered in the current directory.
124 */
125 struct GNUNET_CONTAINER_MultiHashMap *keywordcounter;
126
127 /**
128 * Map from the hash over the metadata to an 'struct MetaCounter *'
129 * counter that says how often this metadata was
130 * encountered in the current directory.
131 */
132 struct GNUNET_CONTAINER_MultiHashMap *metacounter;
133
134 /**
135 * Position we are currently manipulating.
136 */
137 struct GNUNET_FS_ShareTreeItem *pos;
138
139 /**
140 * Number of times an item has to be found to be moved to the parent.
141 */
142 unsigned int move_threshold;
143};
144
145
146/**
147 * Add the given keyword to the keyword statistics tracker.
148 *
149 * @param cls the multihashmap we store the keyword counters in
150 * @param keyword the keyword to count
151 * @param is_mandatory ignored
152 * @return always GNUNET_OK
153 */
154static int
155add_to_keyword_counter (void *cls, const char *keyword, int is_mandatory)
156{
157 struct GNUNET_CONTAINER_MultiHashMap *mcm = cls;
158 struct KeywordCounter *cnt;
159 struct GNUNET_HashCode hc;
160 size_t klen;
161
162 klen = strlen (keyword) + 1;
163 GNUNET_CRYPTO_hash (keyword, klen - 1, &hc);
164 cnt = GNUNET_CONTAINER_multihashmap_get (mcm, &hc);
165 if (cnt == NULL)
166 {
167 cnt = GNUNET_malloc (sizeof(struct KeywordCounter) + klen);
168 cnt->value = (const char *) &cnt[1];
169 GNUNET_memcpy (&cnt[1], keyword, klen);
170 GNUNET_assert (GNUNET_OK ==
171 GNUNET_CONTAINER_multihashmap_put (mcm,
172 &hc, cnt,
173 GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_ONLY));
174 }
175 cnt->count++;
176 return GNUNET_OK;
177}
178
179
180/**
181 * Function called on each meta data item. Increments the
182 * respective counter.
183 *
184 * @param cls the container multihashmap to update
185 * @param plugin_name name of the plugin that produced this value;
186 * special values can be used (e.g. '&lt;zlib&gt;' for zlib being
187 * used in the main libextractor library and yielding
188 * meta data).
189 * @param type libextractor-type describing the meta data
190 * @param format basic format information about data
191 * @param data_mime_type mime-type of data (not of the original file);
192 * can be NULL (if mime-type is not known)
193 * @param data actual meta-data found
194 * @param data_len number of bytes in data
195 * @return 0 to continue extracting / iterating
196 */
197static int
198add_to_meta_counter (void *cls, const char *plugin_name,
199 enum EXTRACTOR_MetaType type, enum EXTRACTOR_MetaFormat
200 format,
201 const char *data_mime_type, const char *data, size_t
202 data_len)
203{
204 struct GNUNET_CONTAINER_MultiHashMap *map = cls;
205 struct GNUNET_HashCode key;
206 struct MetaCounter *cnt;
207
208 GNUNET_CRYPTO_hash (data, data_len, &key);
209 cnt = GNUNET_CONTAINER_multihashmap_get (map, &key);
210 if (NULL == cnt)
211 {
212 cnt = GNUNET_new (struct MetaCounter);
213 cnt->data = data;
214 cnt->data_size = data_len;
215 cnt->plugin_name = plugin_name;
216 cnt->type = type;
217 cnt->format = format;
218 cnt->data_mime_type = data_mime_type;
219 GNUNET_assert (GNUNET_OK ==
220 GNUNET_CONTAINER_multihashmap_put (map,
221 &key, cnt,
222 GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_ONLY));
223 }
224 cnt->count++;
225 return 0;
226}
227
228
229/**
230 * Remove keywords above the threshold.
231 *
232 * @param cls the 'struct TrimContext' with the pos to remove the keywords from
233 * @param keyword the keyword to check
234 * @param is_mandatory ignored
235 * @return always GNUNET_OK
236 */
237static int
238remove_high_frequency_keywords (void *cls, const char *keyword, int
239 is_mandatory)
240{
241 struct TrimContext *tc = cls;
242 struct KeywordCounter *counter;
243 struct GNUNET_HashCode hc;
244 size_t klen;
245
246 klen = strlen (keyword) + 1;
247 GNUNET_CRYPTO_hash (keyword, klen - 1, &hc);
248 counter = GNUNET_CONTAINER_multihashmap_get (tc->keywordcounter, &hc);
249 GNUNET_assert (NULL != counter);
250 if (counter->count < tc->move_threshold)
251 return GNUNET_OK;
252 GNUNET_FS_uri_ksk_remove_keyword (tc->pos->ksk_uri,
253 counter->value);
254 return GNUNET_OK;
255}
256
257
258/**
259 * Move "frequent" keywords over to the target ksk uri, free the
260 * counters.
261 *
262 * @param cls the 'struct TrimContext'
263 * @param key key of the entry
264 * @param value the 'struct KeywordCounter'
265 * @return GNUNET_YES (always)
266 */
267static int
268migrate_and_drop_keywords (void *cls, const struct GNUNET_HashCode *key,
269 void *value)
270{
271 struct TrimContext *tc = cls;
272 struct KeywordCounter *counter = value;
273
274 if (counter->count >= tc->move_threshold)
275 {
276 if (NULL == tc->pos->ksk_uri)
277 tc->pos->ksk_uri = GNUNET_FS_uri_ksk_create_from_args (1,
278 &counter->value);
279 else
280 GNUNET_FS_uri_ksk_add_keyword (tc->pos->ksk_uri, counter->value,
281 GNUNET_NO);
282 }
283 GNUNET_assert (GNUNET_YES ==
284 GNUNET_CONTAINER_multihashmap_remove (tc->keywordcounter,
285 key,
286 counter));
287 GNUNET_free (counter);
288 return GNUNET_YES;
289}
290
291
292/**
293 * Copy "frequent" metadata items over to the
294 * target metadata container, free the counters.
295 *
296 * @param cls the 'struct TrimContext'
297 * @param key key of the entry
298 * @param value the 'struct KeywordCounter'
299 * @return GNUNET_YES (always)
300 */
301static int
302migrate_and_drop_metadata (void *cls, const struct GNUNET_HashCode *key,
303 void *value)
304{
305 struct TrimContext *tc = cls;
306 struct MetaCounter *counter = value;
307
308 if (counter->count >= tc->move_threshold)
309 {
310 if (NULL == tc->pos->meta)
311 tc->pos->meta = GNUNET_CONTAINER_meta_data_create ();
312 GNUNET_CONTAINER_meta_data_insert (tc->pos->meta,
313 counter->plugin_name,
314 counter->type,
315 counter->format,
316 counter->data_mime_type, counter->data,
317 counter->data_size);
318 }
319 GNUNET_assert (GNUNET_YES ==
320 GNUNET_CONTAINER_multihashmap_remove (tc->metacounter,
321 key,
322 counter));
323 GNUNET_free (counter);
324 return GNUNET_YES;
325}
326
327
328/**
329 * Process a share item tree, moving frequent keywords up and
330 * copying frequent metadata up.
331 *
332 * @param tc trim context with hash maps to use
333 * @param tree tree to trim
334 */
335static void
336share_tree_trim (struct TrimContext *tc,
337 struct GNUNET_FS_ShareTreeItem *tree)
338{
339 struct GNUNET_FS_ShareTreeItem *pos;
340 unsigned int num_children;
341
342 /* first, trim all children */
343 num_children = 0;
344 for (pos = tree->children_head; NULL != pos; pos = pos->next)
345 {
346 share_tree_trim (tc, pos);
347 num_children++;
348 }
349
350 /* consider adding filename to directory meta data */
351 if (tree->is_directory == GNUNET_YES)
352 {
353 const char *user = getenv ("USER");
354 if ((user == NULL) ||
355 (0 != strncasecmp (user, tree->short_filename, strlen (user))))
356 {
357 /* only use filename if it doesn't match $USER */
358 if (NULL == tree->meta)
359 tree->meta = GNUNET_CONTAINER_meta_data_create ();
360 GNUNET_CONTAINER_meta_data_insert (tree->meta, "<libgnunetfs>",
361 EXTRACTOR_METATYPE_GNUNET_ORIGINAL_FILENAME,
362 EXTRACTOR_METAFORMAT_UTF8,
363 "text/plain", tree->short_filename,
364 strlen (tree->short_filename) + 1);
365 }
366 }
367
368 if (1 >= num_children)
369 return; /* nothing to trim */
370
371 /* now, count keywords and meta data in children */
372 for (pos = tree->children_head; NULL != pos; pos = pos->next)
373 {
374 if (NULL != pos->meta)
375 GNUNET_CONTAINER_meta_data_iterate (pos->meta, &add_to_meta_counter,
376 tc->metacounter);
377 if (NULL != pos->ksk_uri)
378 GNUNET_FS_uri_ksk_get_keywords (pos->ksk_uri, &add_to_keyword_counter,
379 tc->keywordcounter);
380 }
381
382 /* calculate threshold for moving keywords / meta data */
383 tc->move_threshold = 1 + (num_children / 2);
384
385 /* remove high-frequency keywords from children */
386 for (pos = tree->children_head; NULL != pos; pos = pos->next)
387 {
388 tc->pos = pos;
389 if (NULL != pos->ksk_uri)
390 {
391 struct GNUNET_FS_Uri *ksk_uri_copy = GNUNET_FS_uri_dup (pos->ksk_uri);
392 GNUNET_FS_uri_ksk_get_keywords (ksk_uri_copy,
393 &remove_high_frequency_keywords, tc);
394 GNUNET_FS_uri_destroy (ksk_uri_copy);
395 }
396 }
397
398 /* add high-frequency meta data and keywords to parent */
399 tc->pos = tree;
400 GNUNET_CONTAINER_multihashmap_iterate (tc->keywordcounter,
401 &migrate_and_drop_keywords,
402 tc);
403 GNUNET_CONTAINER_multihashmap_iterate (tc->metacounter,
404 &migrate_and_drop_metadata,
405 tc);
406}
407
408
409/**
410 * Process a share item tree, moving frequent keywords up and
411 * copying frequent metadata up.
412 *
413 * @param toplevel toplevel directory in the tree, returned by the scanner
414 */
415void
416GNUNET_FS_share_tree_trim (struct GNUNET_FS_ShareTreeItem *toplevel)
417{
418 struct TrimContext tc;
419
420 if (toplevel == NULL)
421 return;
422 tc.keywordcounter = GNUNET_CONTAINER_multihashmap_create (1024, GNUNET_NO);
423 tc.metacounter = GNUNET_CONTAINER_multihashmap_create (1024, GNUNET_NO);
424 share_tree_trim (&tc, toplevel);
425 GNUNET_CONTAINER_multihashmap_destroy (tc.keywordcounter);
426 GNUNET_CONTAINER_multihashmap_destroy (tc.metacounter);
427}
428
429
430/**
431 * Release memory of a share item tree.
432 *
433 * @param toplevel toplevel of the tree to be freed
434 */
435void
436GNUNET_FS_share_tree_free (struct GNUNET_FS_ShareTreeItem *toplevel)
437{
438 struct GNUNET_FS_ShareTreeItem *pos;
439
440 while (NULL != (pos = toplevel->children_head))
441 GNUNET_FS_share_tree_free (pos);
442 if (NULL != toplevel->parent)
443 GNUNET_CONTAINER_DLL_remove (toplevel->parent->children_head,
444 toplevel->parent->children_tail,
445 toplevel);
446 if (NULL != toplevel->meta)
447 GNUNET_CONTAINER_meta_data_destroy (toplevel->meta);
448 if (NULL != toplevel->ksk_uri)
449 GNUNET_FS_uri_destroy (toplevel->ksk_uri);
450 GNUNET_free (toplevel->filename);
451 GNUNET_free (toplevel->short_filename);
452 GNUNET_free (toplevel);
453}
454
455
456/* end fs_sharetree.c */