aboutsummaryrefslogtreecommitdiff
path: root/src/service/fs/fs_sharetree.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/service/fs/fs_sharetree.c')
-rw-r--r--src/service/fs/fs_sharetree.c457
1 files changed, 457 insertions, 0 deletions
diff --git a/src/service/fs/fs_sharetree.c b/src/service/fs/fs_sharetree.c
new file mode 100644
index 000000000..6c246a3ad
--- /dev/null
+++ b/src/service/fs/fs_sharetree.c
@@ -0,0 +1,457 @@
1/*
2 This file is part of GNUnet
3 Copyright (C) 2005-2012 GNUnet e.V.
4
5 GNUnet is free software: you can redistribute it and/or modify it
6 under the terms of the GNU Affero General Public License as published
7 by the Free Software Foundation, either version 3 of the License,
8 or (at your option) any later version.
9
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Affero General Public License for more details.
14
15 You should have received a copy of the GNU Affero General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>.
17
18 SPDX-License-Identifier: AGPL3.0-or-later
19 */
20
21/**
22 * @file fs/fs_sharetree.c
23 * @brief code to manipulate the 'struct GNUNET_FS_ShareTreeItem' tree
24 * @author LRN
25 * @author Christian Grothoff
26 */
27#include "platform.h"
28
29#include "gnunet_fs_service.h"
30#include "gnunet_scheduler_lib.h"
31#include <pthread.h>
32
33
34/**
35 * Entry for each unique keyword to track how often
36 * it occurred. Contains the keyword and the counter.
37 */
38struct KeywordCounter
39{
40 /**
41 * This is a doubly-linked list
42 */
43 struct KeywordCounter *prev;
44
45 /**
46 * This is a doubly-linked list
47 */
48 struct KeywordCounter *next;
49
50 /**
51 * Keyword that was found.
52 */
53 const char *value;
54
55 /**
56 * How many files have this keyword?
57 */
58 unsigned int count;
59};
60
61
62/**
63 * Aggregate information we keep for meta data in each directory.
64 */
65struct MetaCounter
66{
67 /**
68 * This is a doubly-linked list
69 */
70 struct MetaCounter *prev;
71
72 /**
73 * This is a doubly-linked list
74 */
75 struct MetaCounter *next;
76
77 /**
78 * Name of the plugin that provided that piece of metadata
79 */
80 const char *plugin_name;
81
82 /**
83 * MIME-type of the metadata itself
84 */
85 const char *data_mime_type;
86
87 /**
88 * The actual meta data.
89 */
90 const char *data;
91
92 /**
93 * Number of bytes in 'data'.
94 */
95 size_t data_size;
96
97 /**
98 * Type of the data
99 */
100 enum EXTRACTOR_MetaType type;
101
102 /**
103 * Format of the data
104 */
105 enum EXTRACTOR_MetaFormat format;
106
107 /**
108 * How many files have meta entries matching this value?
109 * (type and format do not have to match).
110 */
111 unsigned int count;
112};
113
114
115/**
116 * A structure that forms a singly-linked list that serves as a stack
117 * for metadata-processing function.
118 */
119struct TrimContext
120{
121 /**
122 * Map from the hash over the keyword to an 'struct KeywordCounter *'
123 * counter that says how often this keyword was
124 * encountered in the current directory.
125 */
126 struct GNUNET_CONTAINER_MultiHashMap *keywordcounter;
127
128 /**
129 * Map from the hash over the metadata to an 'struct MetaCounter *'
130 * counter that says how often this metadata was
131 * encountered in the current directory.
132 */
133 struct GNUNET_CONTAINER_MultiHashMap *metacounter;
134
135 /**
136 * Position we are currently manipulating.
137 */
138 struct GNUNET_FS_ShareTreeItem *pos;
139
140 /**
141 * Number of times an item has to be found to be moved to the parent.
142 */
143 unsigned int move_threshold;
144};
145
146
147/**
148 * Add the given keyword to the keyword statistics tracker.
149 *
150 * @param cls the multihashmap we store the keyword counters in
151 * @param keyword the keyword to count
152 * @param is_mandatory ignored
153 * @return always GNUNET_OK
154 */
155static int
156add_to_keyword_counter (void *cls, const char *keyword, int is_mandatory)
157{
158 struct GNUNET_CONTAINER_MultiHashMap *mcm = cls;
159 struct KeywordCounter *cnt;
160 struct GNUNET_HashCode hc;
161 size_t klen;
162
163 klen = strlen (keyword) + 1;
164 GNUNET_CRYPTO_hash (keyword, klen - 1, &hc);
165 cnt = GNUNET_CONTAINER_multihashmap_get (mcm, &hc);
166 if (cnt == NULL)
167 {
168 cnt = GNUNET_malloc (sizeof(struct KeywordCounter) + klen);
169 cnt->value = (const char *) &cnt[1];
170 GNUNET_memcpy (&cnt[1], keyword, klen);
171 GNUNET_assert (GNUNET_OK ==
172 GNUNET_CONTAINER_multihashmap_put (mcm,
173 &hc, cnt,
174 GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_ONLY));
175 }
176 cnt->count++;
177 return GNUNET_OK;
178}
179
180
181/**
182 * Function called on each meta data item. Increments the
183 * respective counter.
184 *
185 * @param cls the container multihashmap to update
186 * @param plugin_name name of the plugin that produced this value;
187 * special values can be used (e.g. '&lt;zlib&gt;' for zlib being
188 * used in the main libextractor library and yielding
189 * meta data).
190 * @param type libextractor-type describing the meta data
191 * @param format basic format information about data
192 * @param data_mime_type mime-type of data (not of the original file);
193 * can be NULL (if mime-type is not known)
194 * @param data actual meta-data found
195 * @param data_len number of bytes in data
196 * @return 0 to continue extracting / iterating
197 */
198static int
199add_to_meta_counter (void *cls, const char *plugin_name,
200 enum EXTRACTOR_MetaType type, enum EXTRACTOR_MetaFormat
201 format,
202 const char *data_mime_type, const char *data, size_t
203 data_len)
204{
205 struct GNUNET_CONTAINER_MultiHashMap *map = cls;
206 struct GNUNET_HashCode key;
207 struct MetaCounter *cnt;
208
209 GNUNET_CRYPTO_hash (data, data_len, &key);
210 cnt = GNUNET_CONTAINER_multihashmap_get (map, &key);
211 if (NULL == cnt)
212 {
213 cnt = GNUNET_new (struct MetaCounter);
214 cnt->data = data;
215 cnt->data_size = data_len;
216 cnt->plugin_name = plugin_name;
217 cnt->type = type;
218 cnt->format = format;
219 cnt->data_mime_type = data_mime_type;
220 GNUNET_assert (GNUNET_OK ==
221 GNUNET_CONTAINER_multihashmap_put (map,
222 &key, cnt,
223 GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_ONLY));
224 }
225 cnt->count++;
226 return 0;
227}
228
229
230/**
231 * Remove keywords above the threshold.
232 *
233 * @param cls the 'struct TrimContext' with the pos to remove the keywords from
234 * @param keyword the keyword to check
235 * @param is_mandatory ignored
236 * @return always GNUNET_OK
237 */
238static int
239remove_high_frequency_keywords (void *cls, const char *keyword, int
240 is_mandatory)
241{
242 struct TrimContext *tc = cls;
243 struct KeywordCounter *counter;
244 struct GNUNET_HashCode hc;
245 size_t klen;
246
247 klen = strlen (keyword) + 1;
248 GNUNET_CRYPTO_hash (keyword, klen - 1, &hc);
249 counter = GNUNET_CONTAINER_multihashmap_get (tc->keywordcounter, &hc);
250 GNUNET_assert (NULL != counter);
251 if (counter->count < tc->move_threshold)
252 return GNUNET_OK;
253 GNUNET_FS_uri_ksk_remove_keyword (tc->pos->ksk_uri,
254 counter->value);
255 return GNUNET_OK;
256}
257
258
259/**
260 * Move "frequent" keywords over to the target ksk uri, free the
261 * counters.
262 *
263 * @param cls the 'struct TrimContext'
264 * @param key key of the entry
265 * @param value the 'struct KeywordCounter'
266 * @return GNUNET_YES (always)
267 */
268static int
269migrate_and_drop_keywords (void *cls, const struct GNUNET_HashCode *key,
270 void *value)
271{
272 struct TrimContext *tc = cls;
273 struct KeywordCounter *counter = value;
274
275 if (counter->count >= tc->move_threshold)
276 {
277 if (NULL == tc->pos->ksk_uri)
278 tc->pos->ksk_uri = GNUNET_FS_uri_ksk_create_from_args (1,
279 &counter->value);
280 else
281 GNUNET_FS_uri_ksk_add_keyword (tc->pos->ksk_uri, counter->value,
282 GNUNET_NO);
283 }
284 GNUNET_assert (GNUNET_YES ==
285 GNUNET_CONTAINER_multihashmap_remove (tc->keywordcounter,
286 key,
287 counter));
288 GNUNET_free (counter);
289 return GNUNET_YES;
290}
291
292
293/**
294 * Copy "frequent" metadata items over to the
295 * target metadata container, free the counters.
296 *
297 * @param cls the 'struct TrimContext'
298 * @param key key of the entry
299 * @param value the 'struct KeywordCounter'
300 * @return GNUNET_YES (always)
301 */
302static int
303migrate_and_drop_metadata (void *cls, const struct GNUNET_HashCode *key,
304 void *value)
305{
306 struct TrimContext *tc = cls;
307 struct MetaCounter *counter = value;
308
309 if (counter->count >= tc->move_threshold)
310 {
311 if (NULL == tc->pos->meta)
312 tc->pos->meta = GNUNET_FS_meta_data_create ();
313 GNUNET_FS_meta_data_insert (tc->pos->meta,
314 counter->plugin_name,
315 counter->type,
316 counter->format,
317 counter->data_mime_type, counter->data,
318 counter->data_size);
319 }
320 GNUNET_assert (GNUNET_YES ==
321 GNUNET_CONTAINER_multihashmap_remove (tc->metacounter,
322 key,
323 counter));
324 GNUNET_free (counter);
325 return GNUNET_YES;
326}
327
328
329/**
330 * Process a share item tree, moving frequent keywords up and
331 * copying frequent metadata up.
332 *
333 * @param tc trim context with hash maps to use
334 * @param tree tree to trim
335 */
336static void
337share_tree_trim (struct TrimContext *tc,
338 struct GNUNET_FS_ShareTreeItem *tree)
339{
340 struct GNUNET_FS_ShareTreeItem *pos;
341 unsigned int num_children;
342
343 /* first, trim all children */
344 num_children = 0;
345 for (pos = tree->children_head; NULL != pos; pos = pos->next)
346 {
347 share_tree_trim (tc, pos);
348 num_children++;
349 }
350
351 /* consider adding filename to directory meta data */
352 if (tree->is_directory == GNUNET_YES)
353 {
354 const char *user = getenv ("USER");
355 if ((user == NULL) ||
356 (0 != strncasecmp (user, tree->short_filename, strlen (user))))
357 {
358 /* only use filename if it doesn't match $USER */
359 if (NULL == tree->meta)
360 tree->meta = GNUNET_FS_meta_data_create ();
361 GNUNET_FS_meta_data_insert (tree->meta, "<libgnunetfs>",
362 EXTRACTOR_METATYPE_GNUNET_ORIGINAL_FILENAME,
363 EXTRACTOR_METAFORMAT_UTF8,
364 "text/plain", tree->short_filename,
365 strlen (tree->short_filename) + 1);
366 }
367 }
368
369 if (1 >= num_children)
370 return; /* nothing to trim */
371
372 /* now, count keywords and meta data in children */
373 for (pos = tree->children_head; NULL != pos; pos = pos->next)
374 {
375 if (NULL != pos->meta)
376 GNUNET_FS_meta_data_iterate (pos->meta, &add_to_meta_counter,
377 tc->metacounter);
378 if (NULL != pos->ksk_uri)
379 GNUNET_FS_uri_ksk_get_keywords (pos->ksk_uri, &add_to_keyword_counter,
380 tc->keywordcounter);
381 }
382
383 /* calculate threshold for moving keywords / meta data */
384 tc->move_threshold = 1 + (num_children / 2);
385
386 /* remove high-frequency keywords from children */
387 for (pos = tree->children_head; NULL != pos; pos = pos->next)
388 {
389 tc->pos = pos;
390 if (NULL != pos->ksk_uri)
391 {
392 struct GNUNET_FS_Uri *ksk_uri_copy = GNUNET_FS_uri_dup (pos->ksk_uri);
393 GNUNET_FS_uri_ksk_get_keywords (ksk_uri_copy,
394 &remove_high_frequency_keywords, tc);
395 GNUNET_FS_uri_destroy (ksk_uri_copy);
396 }
397 }
398
399 /* add high-frequency meta data and keywords to parent */
400 tc->pos = tree;
401 GNUNET_CONTAINER_multihashmap_iterate (tc->keywordcounter,
402 &migrate_and_drop_keywords,
403 tc);
404 GNUNET_CONTAINER_multihashmap_iterate (tc->metacounter,
405 &migrate_and_drop_metadata,
406 tc);
407}
408
409
410/**
411 * Process a share item tree, moving frequent keywords up and
412 * copying frequent metadata up.
413 *
414 * @param toplevel toplevel directory in the tree, returned by the scanner
415 */
416void
417GNUNET_FS_share_tree_trim (struct GNUNET_FS_ShareTreeItem *toplevel)
418{
419 struct TrimContext tc;
420
421 if (toplevel == NULL)
422 return;
423 tc.keywordcounter = GNUNET_CONTAINER_multihashmap_create (1024, GNUNET_NO);
424 tc.metacounter = GNUNET_CONTAINER_multihashmap_create (1024, GNUNET_NO);
425 share_tree_trim (&tc, toplevel);
426 GNUNET_CONTAINER_multihashmap_destroy (tc.keywordcounter);
427 GNUNET_CONTAINER_multihashmap_destroy (tc.metacounter);
428}
429
430
431/**
432 * Release memory of a share item tree.
433 *
434 * @param toplevel toplevel of the tree to be freed
435 */
436void
437GNUNET_FS_share_tree_free (struct GNUNET_FS_ShareTreeItem *toplevel)
438{
439 struct GNUNET_FS_ShareTreeItem *pos;
440
441 while (NULL != (pos = toplevel->children_head))
442 GNUNET_FS_share_tree_free (pos);
443 if (NULL != toplevel->parent)
444 GNUNET_CONTAINER_DLL_remove (toplevel->parent->children_head,
445 toplevel->parent->children_tail,
446 toplevel);
447 if (NULL != toplevel->meta)
448 GNUNET_FS_meta_data_destroy (toplevel->meta);
449 if (NULL != toplevel->ksk_uri)
450 GNUNET_FS_uri_destroy (toplevel->ksk_uri);
451 GNUNET_free (toplevel->filename);
452 GNUNET_free (toplevel->short_filename);
453 GNUNET_free (toplevel);
454}
455
456
457/* end fs_sharetree.c */