aboutsummaryrefslogtreecommitdiff
path: root/src/fs
diff options
context:
space:
mode:
authorChristian Grothoff <christian@grothoff.org>2012-01-28 21:08:10 +0000
committerChristian Grothoff <christian@grothoff.org>2012-01-28 21:08:10 +0000
commit6535dad120517e6572f93ffc28b800801bf2781d (patch)
tree4c4afc385e23e0234c8443f77eb91a2a0e01a335 /src/fs
parent4b9e92b8f0c33dd655abff1cda89b33aaf90ef16 (diff)
downloadgnunet-6535dad120517e6572f93ffc28b800801bf2781d.tar.gz
gnunet-6535dad120517e6572f93ffc28b800801bf2781d.zip
-cleaner version of sharetree trim code; should be fast enough to run synchrnously
Diffstat (limited to 'src/fs')
-rw-r--r--src/fs/fs_sharetree.c408
1 files changed, 408 insertions, 0 deletions
diff --git a/src/fs/fs_sharetree.c b/src/fs/fs_sharetree.c
new file mode 100644
index 000000000..864be58bf
--- /dev/null
+++ b/src/fs/fs_sharetree.c
@@ -0,0 +1,408 @@
1/*
2 This file is part of GNUnet
3 (C) 2005-2012 Christian Grothoff (and other contributing authors)
4
5 GNUnet is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 2, or (at your
8 option) any later version.
9
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with GNUnet; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
19*/
20
21/**
22 * @file fs/fs_sharetree.c
23 * @brief code to manipulate the 'struct GNUNET_FS_ShareTreeItem' tree
24 * @author LRN
25 * @author Christian Grothoff
26 */
27#include "platform.h"
28#include "gnunet_fs_service.h"
29#include "gnunet_scheduler_lib.h"
30#include <pthread.h>
31
32
33/**
34 * Entry for each unique keyword to track how often
35 * it occured. Contains the keyword and the counter.
36 */
37struct KeywordCounter
38{
39
40 /**
41 * This is a doubly-linked list
42 */
43 struct KeywordCounter *prev;
44
45 /**
46 * This is a doubly-linked list
47 */
48 struct KeywordCounter *next;
49
50 /**
51 * Keyword that was found.
52 */
53 const char *value;
54
55 /**
56 * How many files have this keyword?
57 */
58 unsigned int count;
59
60};
61
62
63/**
64 * Aggregate information we keep for meta data in each directory.
65 */
66struct MetaCounter
67{
68
69 /**
70 * This is a doubly-linked list
71 */
72 struct MetaCounter *prev;
73
74 /**
75 * This is a doubly-linked list
76 */
77 struct MetaCounter *next;
78
79 /**
80 * Name of the plugin that provided that piece of metadata
81 */
82 const char *plugin_name;
83
84 /**
85 * MIME-type of the metadata itself
86 */
87 const char *data_mime_type;
88
89 /**
90 * The actual meta data.
91 */
92 const char *data;
93
94 /**
95 * Number of bytes in 'data'.
96 */
97 size_t data_size;
98
99 /**
100 * Type of the data
101 */
102 enum EXTRACTOR_MetaType type;
103
104 /**
105 * Format of the data
106 */
107 enum EXTRACTOR_MetaFormat format;
108
109 /**
110 * How many files have meta entries matching this value?
111 * (type and format do not have to match).
112 */
113 unsigned int count;
114
115};
116
117
118/**
119 * A structure that forms a singly-linked list that serves as a stack
120 * for metadata-processing function.
121 */
122struct TrimContext
123{
124
125 /**
126 * Map from the hash over the keyword to an 'struct KeywordCounter *'
127 * counter that says how often this keyword was
128 * encountered in the current directory.
129 */
130 struct GNUNET_CONTAINER_MultiHashMap *keywordcounter;
131
132 /**
133 * Map from the hash over the metadata to an 'struct MetaCounter *'
134 * counter that says how often this metadata was
135 * encountered in the current directory.
136 */
137 struct GNUNET_CONTAINER_MultiHashMap *metacounter;
138
139 /**
140 * Position we are currently manipulating.
141 */
142 struct GNUNET_FS_ShareTreeItem *pos;
143
144 /**
145 * Number of times an item has to be found to be moved to the parent.
146 */
147 unsigned int move_threshold;
148
149};
150
151
152/**
153 * Add the given keyword to the keyword statistics tracker.
154 *
155 * @param cls the multihashmap we store the keyword counters in
156 * @param keyword the keyword to count
157 * @param is_mandatory ignored
158 * @return always GNUNET_OK
159 */
160static int
161add_to_keyword_counter (void *cls, const char *keyword, int is_mandatory)
162{
163 struct GNUNET_CONTAINER_MultiHashMap *mcm = cls;
164 struct KeywordCounter *cnt;
165 GNUNET_HashCode hc;
166 size_t klen;
167
168 klen = strlen (keyword) + 1;
169 GNUNET_CRYPTO_hash (keyword, klen - 1, &hc);
170 cnt = GNUNET_CONTAINER_multihashmap_get (mcm, &hc);
171 if (cnt == NULL)
172 {
173 cnt = GNUNET_malloc (sizeof (struct KeywordCounter) + klen);
174 cnt->value = (const char *) &cnt[1];
175 memcpy (&cnt[1], keyword, klen);
176 GNUNET_assert (GNUNET_OK ==
177 GNUNET_CONTAINER_multihashmap_put (mcm,
178 &hc, cnt,
179 GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_ONLY));
180 }
181 cnt->count++;
182 return GNUNET_OK;
183}
184
185
186/**
187 * Function called on each meta data item. Increments the
188 * respective counter.
189 *
190 * @param cls the container multihashmap to update
191 * @param plugin_name name of the plugin that produced this value;
192 * special values can be used (i.e. '&lt;zlib&gt;' for zlib being
193 * used in the main libextractor library and yielding
194 * meta data).
195 * @param type libextractor-type describing the meta data
196 * @param format basic format information about data
197 * @param data_mime_type mime-type of data (not of the original file);
198 * can be NULL (if mime-type is not known)
199 * @param data actual meta-data found
200 * @param data_len number of bytes in data
201 * @return GNUNET_OK to continue extracting / iterating
202 */
203static int
204add_to_meta_counter (void *cls, const char *plugin_name,
205 enum EXTRACTOR_MetaType type, enum EXTRACTOR_MetaFormat format,
206 const char *data_mime_type, const char *data, size_t data_len)
207{
208 struct GNUNET_CONTAINER_MultiHashMap *map = cls;
209 GNUNET_HashCode key;
210 struct MetaCounter *cnt;
211
212 GNUNET_CRYPTO_hash (data, data_len, &key);
213 cnt = GNUNET_CONTAINER_multihashmap_get (map, &key);
214 if (cnt == NULL)
215 {
216 cnt = GNUNET_malloc (sizeof (struct MetaCounter));
217 cnt->data = data;
218 cnt->data_size = data_len;
219 cnt->plugin_name = plugin_name;
220 cnt->type = type;
221 cnt->format = format;
222 cnt->data_mime_type = data_mime_type;
223 GNUNET_assert (GNUNET_OK ==
224 GNUNET_CONTAINER_multihashmap_put (map,
225 &key, cnt,
226 GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_ONLY));
227 }
228 cnt->count++;
229 return 0;
230}
231
232
233/**
234 * Remove keywords above the threshold.
235 *
236 * @param cls the 'struct TrimContext' with the pos to remove the keywords from
237 * @param keyword the keyword to check
238 * @param is_mandatory ignored
239 * @return always GNUNET_OK
240 */
241static int
242remove_high_frequency_keywords (void *cls, const char *keyword, int is_mandatory)
243{
244 struct TrimContext *tc = cls;
245 struct KeywordCounter *counter;
246 GNUNET_HashCode hc;
247 size_t klen;
248
249 klen = strlen (keyword) + 1;
250 GNUNET_CRYPTO_hash (keyword, klen - 1, &hc);
251 counter = GNUNET_CONTAINER_multihashmap_get (tc->keywordcounter, &hc);
252 GNUNET_assert (NULL != counter);
253 if (counter->count < tc->move_threshold)
254 return GNUNET_OK;
255 GNUNET_FS_uri_ksk_remove_keyword (tc->pos->ksk_uri,
256 counter->value);
257 return GNUNET_OK;
258}
259
260
261/**
262 * Move "frequent" keywords over to the target ksk uri, free the
263 * counters.
264 *
265 * @param cls the 'struct TrimContext'
266 * @param key key of the entry
267 * @param value the 'struct KeywordCounter'
268 * @return GNUNET_YES (always)
269 */
270static int
271migrate_and_drop_keywords (void *cls, const GNUNET_HashCode * key, void *value)
272{
273 struct TrimContext *tc = cls;
274 struct KeywordCounter *counter = value;
275
276 if (counter->count >= tc->move_threshold)
277 GNUNET_FS_uri_ksk_add_keyword (tc->pos->ksk_uri, counter->value, GNUNET_NO);
278 GNUNET_assert (GNUNET_YES ==
279 GNUNET_CONTAINER_multihashmap_remove (tc->keywordcounter,
280 key,
281 counter));
282 GNUNET_free (counter);
283 return GNUNET_YES;
284}
285
286
287/**
288 * Copy "frequent" metadata items over to the
289 * target metadata container, free the counters.
290 *
291 * @param cls the 'struct TrimContext'
292 * @param key key of the entry
293 * @param value the 'struct KeywordCounter'
294 * @return GNUNET_YES (always)
295 */
296static int
297migrate_and_drop_metadata (void *cls, const GNUNET_HashCode * key, void *value)
298{
299 struct TrimContext *tc = cls;
300 struct MetaCounter *counter = value;
301
302 if (counter->count >= tc->move_threshold)
303 GNUNET_CONTAINER_meta_data_insert (tc->pos->meta,
304 counter->plugin_name,
305 counter->type,
306 counter->format,
307 counter->data_mime_type, counter->data,
308 counter->data_size);
309 GNUNET_assert (GNUNET_YES ==
310 GNUNET_CONTAINER_multihashmap_remove (tc->metacounter,
311 key,
312 counter));
313 GNUNET_free (counter);
314 return GNUNET_YES;
315}
316
317
318/**
319 * Process a share item tree, moving frequent keywords up and
320 * copying frequent metadata up.
321 *
322 * @param tc trim context with hash maps to use
323 * @param tree tree to trim
324 */
325static void
326share_tree_trim (struct TrimContext *tc,
327 struct GNUNET_FS_ShareTreeItem *tree)
328{
329 struct GNUNET_FS_ShareTreeItem *pos;
330 unsigned int num_children;
331
332 /* first, trim all children */
333 num_children = 0;
334 for (pos = tree->children_head; NULL != pos; pos = pos->next)
335 {
336 share_tree_trim (tc, pos);
337 num_children++;
338 }
339
340 /* consider adding filename to directory meta data */
341 if (tree->is_directory)
342 {
343 const char *user = getenv ("USER");
344 if ( (user == NULL) ||
345 (0 != strncasecmp (user, tree->short_filename, strlen(user))))
346 {
347 /* only use filename if it doesn't match $USER */
348 GNUNET_CONTAINER_meta_data_insert (tree->meta, "<libgnunetfs>",
349 EXTRACTOR_METATYPE_GNUNET_ORIGINAL_FILENAME,
350 EXTRACTOR_METAFORMAT_UTF8,
351 "text/plain", tree->short_filename,
352 strlen (tree->short_filename) + 1);
353 }
354 }
355
356 if (1 >= num_children)
357 return; /* nothing to trim */
358
359 /* now, count keywords and meta data in children */
360 for (pos = tree->children_head; NULL != pos; pos = pos->next)
361 {
362 GNUNET_CONTAINER_meta_data_iterate (pos->meta, &add_to_meta_counter, tc->metacounter);
363 GNUNET_FS_uri_ksk_get_keywords (pos->ksk_uri, &add_to_keyword_counter, tc->keywordcounter);
364 }
365
366 /* calculate threshold for moving keywords / meta data */
367 tc->move_threshold = 1 + (num_children / 2);
368
369 /* remove high-frequency keywords from children */
370 for (pos = tree->children_head; NULL != pos; pos = pos->next)
371 {
372 tc->pos = pos;
373 GNUNET_FS_uri_ksk_get_keywords (pos->ksk_uri, &remove_high_frequency_keywords, tc);
374 }
375
376 /* add high-frequency meta data and keywords to parent */
377 tc->pos = tree;
378 GNUNET_CONTAINER_multihashmap_iterate (tc->keywordcounter,
379 &migrate_and_drop_keywords,
380 tc);
381 GNUNET_CONTAINER_multihashmap_iterate (tc->metacounter,
382 &migrate_and_drop_metadata,
383 tc);
384}
385
386
387/**
388 * Process a share item tree, moving frequent keywords up and
389 * copying frequent metadata up.
390 *
391 * @param toplevel toplevel directory in the tree, returned by the scanner
392 */
393void
394GNUNET_FS_share_tree_trim (struct GNUNET_FS_ShareTreeItem *toplevel)
395{
396 struct TrimContext tc;
397
398 if (toplevel == NULL)
399 return;
400 tc.keywordcounter = GNUNET_CONTAINER_multihashmap_create (1024);
401 tc.metacounter = GNUNET_CONTAINER_multihashmap_create (1024);
402 share_tree_trim (&tc, toplevel);
403 GNUNET_CONTAINER_multihashmap_destroy (tc.keywordcounter);
404 GNUNET_CONTAINER_multihashmap_destroy (tc.metacounter);
405}
406
407/* end fs_sharetree.c */
408