diff options
Diffstat (limited to 'src/block/bg_bf.c')
-rw-r--r-- | src/block/bg_bf.c | 268 |
1 files changed, 268 insertions, 0 deletions
diff --git a/src/block/bg_bf.c b/src/block/bg_bf.c new file mode 100644 index 000000000..3e7d38892 --- /dev/null +++ b/src/block/bg_bf.c | |||
@@ -0,0 +1,268 @@ | |||
1 | /* | ||
2 | This file is part of GNUnet | ||
3 | Copyright (C) 2017 GNUnet e.V. | ||
4 | |||
5 | GNUnet is free software; you can redistribute it and/or modify | ||
6 | it under the terms of the GNU General Public License as published | ||
7 | by the Free Software Foundation; either version 3, or (at your | ||
8 | option) any later version. | ||
9 | |||
10 | GNUnet is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU General Public License | ||
16 | along with GNUnet; see the file COPYING. If not, write to the | ||
17 | Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, | ||
18 | Boston, MA 02110-1301, USA. | ||
19 | */ | ||
20 | /** | ||
21 | * @file block/bg_bf.c | ||
22 | * @brief implementation of a block group using a Bloom filter | ||
23 | * to drop duplicate blocks | ||
24 | * @author Christian Grothoff | ||
25 | */ | ||
26 | #include "platform.h" | ||
27 | #include "gnunet_util_lib.h" | ||
28 | #include "gnunet_block_group_lib.h" | ||
29 | #include "gnunet_block_plugin.h" | ||
30 | |||
31 | |||
32 | /** | ||
33 | * Internal data structure for a block group. | ||
34 | */ | ||
35 | struct BfGroupInternals | ||
36 | { | ||
37 | /** | ||
38 | * A Bloom filter to weed out duplicate replies probabilistically. | ||
39 | */ | ||
40 | struct GNUNET_CONTAINER_BloomFilter *bf; | ||
41 | |||
42 | /** | ||
43 | * Set from the nonce to mingle the hashes before going into the @e bf. | ||
44 | */ | ||
45 | uint32_t bf_mutator; | ||
46 | |||
47 | /** | ||
48 | * Size of @a bf. | ||
49 | */ | ||
50 | uint32_t bf_size; | ||
51 | |||
52 | }; | ||
53 | |||
54 | |||
55 | /** | ||
56 | * Serialize state of a block group. | ||
57 | * | ||
58 | * @param bg group to serialize | ||
59 | * @param[out] nonce set to the nonce of the @a bg | ||
60 | * @param[out] raw_data set to the serialized state | ||
61 | * @param[out] raw_data_size set to the number of bytes in @a raw_data | ||
62 | * @return #GNUNET_OK on success, #GNUNET_NO if serialization is not | ||
63 | * supported, #GNUNET_SYSERR on error | ||
64 | */ | ||
65 | static int | ||
66 | bf_group_serialize_cb (struct GNUNET_BLOCK_Group *bg, | ||
67 | uint32_t *nonce, | ||
68 | void **raw_data, | ||
69 | size_t *raw_data_size) | ||
70 | { | ||
71 | struct BfGroupInternals *gi = bg->internal_cls; | ||
72 | char *raw; | ||
73 | |||
74 | raw = GNUNET_malloc (gi->bf_size); | ||
75 | if (GNUNET_OK != | ||
76 | GNUNET_CONTAINER_bloomfilter_get_raw_data (gi->bf, | ||
77 | raw, | ||
78 | gi->bf_size)) | ||
79 | { | ||
80 | GNUNET_free (raw); | ||
81 | GNUNET_break (0); | ||
82 | return GNUNET_SYSERR; | ||
83 | } | ||
84 | *nonce = gi->bf_mutator; | ||
85 | *raw_data = raw; | ||
86 | *raw_data_size = gi->bf_size; | ||
87 | return GNUNET_OK; | ||
88 | } | ||
89 | |||
90 | |||
91 | /** | ||
92 | * Mark elements as "seen" using a hash of the element. Not supported | ||
93 | * by all block plugins. | ||
94 | * | ||
95 | * @param bg group to update | ||
96 | * @param seen_results results already seen | ||
97 | * @param seen_results_count number of entries in @a seen_results | ||
98 | */ | ||
99 | static void | ||
100 | bf_group_mark_seen_cb (struct GNUNET_BLOCK_Group *bg, | ||
101 | const struct GNUNET_HashCode *seen_results, | ||
102 | unsigned int seen_results_count) | ||
103 | { | ||
104 | struct BfGroupInternals *gi = bg->internal_cls; | ||
105 | |||
106 | for (unsigned int i=0;i<seen_results_count;i++) | ||
107 | { | ||
108 | struct GNUNET_HashCode mhash; | ||
109 | |||
110 | GNUNET_BLOCK_mingle_hash (&seen_results[i], | ||
111 | gi->bf_mutator, | ||
112 | &mhash); | ||
113 | GNUNET_CONTAINER_bloomfilter_add (gi->bf, | ||
114 | &mhash); | ||
115 | } | ||
116 | } | ||
117 | |||
118 | |||
119 | /** | ||
120 | * Merge two groups, if possible. Not supported by all block plugins, | ||
121 | * can also fail if the nonces were different. | ||
122 | * | ||
123 | * @param bg1 group to update | ||
124 | * @param bg2 group to merge into @a bg1 | ||
125 | * @return #GNUNET_OK on success, #GNUNET_NO if the nonces were different and thus | ||
126 | * we failed. | ||
127 | */ | ||
128 | static int | ||
129 | bf_group_merge_cb (struct GNUNET_BLOCK_Group *bg1, | ||
130 | const struct GNUNET_BLOCK_Group *bg2) | ||
131 | { | ||
132 | struct BfGroupInternals *gi1 = bg1->internal_cls; | ||
133 | struct BfGroupInternals *gi2 = bg2->internal_cls; | ||
134 | |||
135 | if (gi1->bf_mutator != gi2->bf_mutator) | ||
136 | return GNUNET_NO; | ||
137 | if (gi1->bf_size != gi2->bf_size) | ||
138 | return GNUNET_NO; | ||
139 | GNUNET_CONTAINER_bloomfilter_or2 (gi1->bf, | ||
140 | gi2->bf); | ||
141 | return GNUNET_OK; | ||
142 | } | ||
143 | |||
144 | |||
145 | /** | ||
146 | * Destroy resources used by a block group. | ||
147 | * | ||
148 | * @param bg group to destroy, NULL is allowed | ||
149 | */ | ||
150 | static void | ||
151 | bf_group_destroy_cb (struct GNUNET_BLOCK_Group *bg) | ||
152 | { | ||
153 | struct BfGroupInternals *gi = bg->internal_cls; | ||
154 | |||
155 | GNUNET_CONTAINER_bloomfilter_free (gi->bf); | ||
156 | GNUNET_free (gi); | ||
157 | GNUNET_free (bg); | ||
158 | } | ||
159 | |||
160 | |||
161 | /** | ||
162 | * Create a new block group that filters duplicates using a Bloom filter. | ||
163 | * | ||
164 | * @param ctx block context in which the block group is created | ||
165 | * @param bf_size size of the Bloom filter | ||
166 | * @param bf_k K-value for the Bloom filter | ||
167 | * @param type block type | ||
168 | * @param nonce random value used to seed the group creation | ||
169 | * @param raw_data optional serialized prior state of the group, NULL if unavailable/fresh | ||
170 | * @param raw_data_size number of bytes in @a raw_data, 0 if unavailable/fresh | ||
171 | * @return block group handle, NULL if block groups are not supported | ||
172 | * by this @a type of block (this is not an error) | ||
173 | */ | ||
174 | struct GNUNET_BLOCK_Group * | ||
175 | GNUNET_BLOCK_GROUP_bf_create (void *cls, | ||
176 | size_t bf_size, | ||
177 | unsigned int bf_k, | ||
178 | enum GNUNET_BLOCK_Type type, | ||
179 | uint32_t nonce, | ||
180 | const void *raw_data, | ||
181 | size_t raw_data_size) | ||
182 | { | ||
183 | struct BfGroupInternals *gi; | ||
184 | struct GNUNET_BLOCK_Group *bg; | ||
185 | |||
186 | gi = GNUNET_new (struct BfGroupInternals); | ||
187 | gi->bf = GNUNET_CONTAINER_bloomfilter_init ((bf_size != raw_data_size) ? NULL : raw_data, | ||
188 | bf_size, | ||
189 | bf_k); | ||
190 | gi->bf_mutator = nonce; | ||
191 | gi->bf_size = bf_size; | ||
192 | bg = GNUNET_new (struct GNUNET_BLOCK_Group); | ||
193 | bg->type = type; | ||
194 | bg->serialize_cb = &bf_group_serialize_cb; | ||
195 | bg->mark_seen_cb = &bf_group_mark_seen_cb; | ||
196 | bg->merge_cb = &bf_group_merge_cb; | ||
197 | bg->destroy_cb = &bf_group_destroy_cb; | ||
198 | bg->internal_cls = gi; | ||
199 | return bg; | ||
200 | } | ||
201 | |||
202 | |||
203 | /** | ||
204 | * Test if @a hc is contained in the Bloom filter of @a bg. If so, | ||
205 | * return #GNUNET_YES. If not, add @a hc to the Bloom filter and | ||
206 | * return #GNUNET_NO. | ||
207 | * | ||
208 | * @param bg block group to use for testing | ||
209 | * @param hc hash of element to evaluate | ||
210 | * @return #GNUNET_YES if @a hc is (likely) a duplicate | ||
211 | * #GNUNET_NO if @a hc was definitively not in @bg (but now is) | ||
212 | */ | ||
213 | int | ||
214 | GNUNET_BLOCK_GROUP_bf_test_and_set (struct GNUNET_BLOCK_Group *bg, | ||
215 | const struct GNUNET_HashCode *hc) | ||
216 | { | ||
217 | struct BfGroupInternals *gi; | ||
218 | struct GNUNET_HashCode mhash; | ||
219 | |||
220 | if (NULL == bg) | ||
221 | return GNUNET_NO; | ||
222 | gi = bg->internal_cls; | ||
223 | GNUNET_BLOCK_mingle_hash (hc, | ||
224 | gi->bf_mutator, | ||
225 | &mhash); | ||
226 | if (GNUNET_YES == | ||
227 | GNUNET_CONTAINER_bloomfilter_test (gi->bf, | ||
228 | &mhash)) | ||
229 | return GNUNET_YES; | ||
230 | GNUNET_CONTAINER_bloomfilter_add (gi->bf, | ||
231 | &mhash); | ||
232 | return GNUNET_NO; | ||
233 | } | ||
234 | |||
235 | |||
236 | /** | ||
237 | * How many bytes should a bloomfilter be if we have already seen | ||
238 | * entry_count responses? Sized so that do not have to | ||
239 | * re-size the filter too often (to keep it cheap). | ||
240 | * | ||
241 | * Since other peers will also add entries but not resize the filter, | ||
242 | * we should generally pick a slightly larger size than what the | ||
243 | * strict math would suggest. | ||
244 | * | ||
245 | * @param entry_count expected number of entries in the Bloom filter | ||
246 | * @param k number of bits set per entry | ||
247 | * @return must be a power of two and smaller or equal to 2^15. | ||
248 | */ | ||
249 | size_t | ||
250 | GNUNET_BLOCK_GROUP_compute_bloomfilter_size (unsigned int entry_count, | ||
251 | unsigned int k) | ||
252 | { | ||
253 | size_t size; | ||
254 | unsigned int ideal = (entry_count * k) / 4; | ||
255 | uint16_t max = 1 << 15; | ||
256 | |||
257 | if (entry_count > max) | ||
258 | return max; | ||
259 | size = 8; | ||
260 | while ((size < max) && (size < ideal)) | ||
261 | size *= 2; | ||
262 | if (size > max) | ||
263 | return max; | ||
264 | return size; | ||
265 | } | ||
266 | |||
267 | |||
268 | /* end of bg_bf.c */ | ||