diff options
Diffstat (limited to 'src/lib/block/bg_bf.c')
-rw-r--r-- | src/lib/block/bg_bf.c | 286 |
1 files changed, 286 insertions, 0 deletions
diff --git a/src/lib/block/bg_bf.c b/src/lib/block/bg_bf.c new file mode 100644 index 000000000..c8269498e --- /dev/null +++ b/src/lib/block/bg_bf.c | |||
@@ -0,0 +1,286 @@ | |||
1 | /* | ||
2 | This file is part of GNUnet | ||
3 | Copyright (C) 2017 GNUnet e.V. | ||
4 | |||
5 | GNUnet is free software: you can redistribute it and/or modify it | ||
6 | under the terms of the GNU Affero General Public License as published | ||
7 | by the Free Software Foundation, either version 3 of the License, | ||
8 | or (at your option) any later version. | ||
9 | |||
10 | GNUnet is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | Affero General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU Affero General Public License | ||
16 | along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | |||
18 | SPDX-License-Identifier: AGPL3.0-or-later | ||
19 | */ | ||
20 | /** | ||
21 | * @file block/bg_bf.c | ||
22 | * @brief implementation of a block group using a Bloom filter | ||
23 | * to drop duplicate blocks | ||
24 | * @author Christian Grothoff | ||
25 | */ | ||
26 | #include "platform.h" | ||
27 | #include "gnunet_util_lib.h" | ||
28 | #include "gnunet_block_group_lib.h" | ||
29 | #include "gnunet_block_plugin.h" | ||
30 | |||
31 | |||
32 | /** | ||
33 | * Internal data structure for a block group. | ||
34 | */ | ||
35 | struct BfGroupInternals | ||
36 | { | ||
37 | /** | ||
38 | * A Bloom filter to weed out duplicate replies probabilistically. | ||
39 | */ | ||
40 | struct GNUNET_CONTAINER_BloomFilter *bf; | ||
41 | |||
42 | /** | ||
43 | * Set from the nonce to mingle the hashes before going into the @e bf. | ||
44 | */ | ||
45 | uint32_t bf_mutator; | ||
46 | |||
47 | /** | ||
48 | * Size of @a bf. | ||
49 | */ | ||
50 | uint32_t bf_size; | ||
51 | }; | ||
52 | |||
53 | |||
54 | /** | ||
55 | * Serialize state of a block group. | ||
56 | * | ||
57 | * @param bg group to serialize | ||
58 | * @param[out] raw_data set to the serialized state | ||
59 | * @param[out] raw_data_size set to the number of bytes in @a raw_data | ||
60 | * @return #GNUNET_OK on success, #GNUNET_NO if serialization is not | ||
61 | * supported, #GNUNET_SYSERR on error | ||
62 | */ | ||
63 | static enum GNUNET_GenericReturnValue | ||
64 | bf_group_serialize_cb (struct GNUNET_BLOCK_Group *bg, | ||
65 | void **raw_data, | ||
66 | size_t *raw_data_size) | ||
67 | { | ||
68 | struct BfGroupInternals *gi = bg->internal_cls; | ||
69 | void *raw; | ||
70 | |||
71 | raw = GNUNET_malloc (gi->bf_size + sizeof (uint32_t)); | ||
72 | if (GNUNET_OK != | ||
73 | GNUNET_CONTAINER_bloomfilter_get_raw_data (gi->bf, | ||
74 | raw + sizeof (uint32_t), | ||
75 | gi->bf_size)) | ||
76 | { | ||
77 | GNUNET_free (raw); | ||
78 | GNUNET_break (0); | ||
79 | return GNUNET_SYSERR; | ||
80 | } | ||
81 | memcpy (raw, | ||
82 | &gi->bf_mutator, | ||
83 | sizeof (uint32_t)); | ||
84 | *raw_data = raw; | ||
85 | *raw_data_size = gi->bf_size + sizeof (uint32_t); | ||
86 | return GNUNET_OK; | ||
87 | } | ||
88 | |||
89 | |||
90 | /** | ||
91 | * Mark elements as "seen" using a hash of the element. Not supported | ||
92 | * by all block plugins. | ||
93 | * | ||
94 | * @param bg group to update | ||
95 | * @param seen_results results already seen | ||
96 | * @param seen_results_count number of entries in @a seen_results | ||
97 | */ | ||
98 | static void | ||
99 | bf_group_mark_seen_cb (struct GNUNET_BLOCK_Group *bg, | ||
100 | const struct GNUNET_HashCode *seen_results, | ||
101 | unsigned int seen_results_count) | ||
102 | { | ||
103 | struct BfGroupInternals *gi = bg->internal_cls; | ||
104 | |||
105 | for (unsigned int i = 0; i < seen_results_count; i++) | ||
106 | { | ||
107 | struct GNUNET_HashCode mhash; | ||
108 | |||
109 | GNUNET_BLOCK_mingle_hash (&seen_results[i], | ||
110 | gi->bf_mutator, | ||
111 | &mhash); | ||
112 | GNUNET_CONTAINER_bloomfilter_add (gi->bf, | ||
113 | &mhash); | ||
114 | } | ||
115 | } | ||
116 | |||
117 | |||
118 | /** | ||
119 | * Merge two groups, if possible. Not supported by all block plugins, | ||
120 | * can also fail if the nonces were different. | ||
121 | * | ||
122 | * @param bg1 group to update | ||
123 | * @param bg2 group to merge into @a bg1 | ||
124 | * @return #GNUNET_OK on success, #GNUNET_NO if the nonces were different and thus | ||
125 | * we failed. | ||
126 | */ | ||
127 | static enum GNUNET_GenericReturnValue | ||
128 | bf_group_merge_cb (struct GNUNET_BLOCK_Group *bg1, | ||
129 | const struct GNUNET_BLOCK_Group *bg2) | ||
130 | { | ||
131 | struct BfGroupInternals *gi1 = bg1->internal_cls; | ||
132 | struct BfGroupInternals *gi2 = bg2->internal_cls; | ||
133 | |||
134 | if (gi1->bf_mutator != gi2->bf_mutator) | ||
135 | return GNUNET_NO; | ||
136 | if (gi1->bf_size != gi2->bf_size) | ||
137 | return GNUNET_NO; | ||
138 | GNUNET_CONTAINER_bloomfilter_or2 (gi1->bf, | ||
139 | gi2->bf); | ||
140 | return GNUNET_OK; | ||
141 | } | ||
142 | |||
143 | |||
144 | /** | ||
145 | * Destroy resources used by a block group. | ||
146 | * | ||
147 | * @param bg group to destroy, NULL is allowed | ||
148 | */ | ||
149 | static void | ||
150 | bf_group_destroy_cb (struct GNUNET_BLOCK_Group *bg) | ||
151 | { | ||
152 | struct BfGroupInternals *gi = bg->internal_cls; | ||
153 | |||
154 | GNUNET_CONTAINER_bloomfilter_free (gi->bf); | ||
155 | GNUNET_free (gi); | ||
156 | GNUNET_free (bg); | ||
157 | } | ||
158 | |||
159 | |||
160 | /** | ||
161 | * Create a new block group that filters duplicates using a Bloom filter. | ||
162 | * | ||
163 | * @param ctx block context in which the block group is created | ||
164 | * @param bf_size size of the Bloom filter | ||
165 | * @param bf_k K-value for the Bloom filter | ||
166 | * @param type block type | ||
167 | * @param raw_data optional serialized prior state of the group, NULL if unavailable/fresh | ||
168 | * @param raw_data_size number of bytes in @a raw_data, 0 if unavailable/fresh | ||
169 | * @return block group handle, NULL if block groups are not supported | ||
170 | * by this @a type of block (this is not an error) | ||
171 | */ | ||
172 | struct GNUNET_BLOCK_Group * | ||
173 | GNUNET_BLOCK_GROUP_bf_create (void *cls, | ||
174 | size_t bf_size, | ||
175 | unsigned int bf_k, | ||
176 | enum GNUNET_BLOCK_Type type, | ||
177 | const void *raw_data, | ||
178 | size_t raw_data_size) | ||
179 | { | ||
180 | struct BfGroupInternals *gi; | ||
181 | struct GNUNET_BLOCK_Group *bg; | ||
182 | uint32_t nonce; | ||
183 | |||
184 | if ( (NULL != raw_data) && | ||
185 | (raw_data_size < sizeof (nonce)) ) | ||
186 | { | ||
187 | GNUNET_break_op (0); | ||
188 | return NULL; | ||
189 | } | ||
190 | if (NULL != raw_data) | ||
191 | { | ||
192 | memcpy (&nonce, | ||
193 | raw_data, | ||
194 | sizeof (nonce)); | ||
195 | raw_data += sizeof (nonce); | ||
196 | raw_data_size -= sizeof (nonce); | ||
197 | } | ||
198 | else | ||
199 | { | ||
200 | nonce = GNUNET_CRYPTO_random_u32 (GNUNET_CRYPTO_QUALITY_NONCE, | ||
201 | UINT32_MAX); | ||
202 | } | ||
203 | gi = GNUNET_new (struct BfGroupInternals); | ||
204 | gi->bf = GNUNET_CONTAINER_bloomfilter_init ((bf_size != raw_data_size) ? | ||
205 | NULL : raw_data, | ||
206 | bf_size, | ||
207 | bf_k); | ||
208 | gi->bf_mutator = nonce; | ||
209 | gi->bf_size = bf_size; | ||
210 | bg = GNUNET_new (struct GNUNET_BLOCK_Group); | ||
211 | bg->type = type; | ||
212 | bg->serialize_cb = &bf_group_serialize_cb; | ||
213 | bg->mark_seen_cb = &bf_group_mark_seen_cb; | ||
214 | bg->merge_cb = &bf_group_merge_cb; | ||
215 | bg->destroy_cb = &bf_group_destroy_cb; | ||
216 | bg->internal_cls = gi; | ||
217 | return bg; | ||
218 | } | ||
219 | |||
220 | |||
221 | /** | ||
222 | * Test if @a hc is contained in the Bloom filter of @a bg. If so, | ||
223 | * return #GNUNET_YES. If not, add @a hc to the Bloom filter and | ||
224 | * return #GNUNET_NO. | ||
225 | * | ||
226 | * @param bg block group to use for testing | ||
227 | * @param hc hash of element to evaluate | ||
228 | * @return #GNUNET_YES if @a hc is (likely) a duplicate | ||
229 | * #GNUNET_NO if @a hc was definitively not in @a bg (but now is) | ||
230 | */ | ||
231 | enum GNUNET_GenericReturnValue | ||
232 | GNUNET_BLOCK_GROUP_bf_test_and_set (struct GNUNET_BLOCK_Group *bg, | ||
233 | const struct GNUNET_HashCode *hc) | ||
234 | { | ||
235 | struct BfGroupInternals *gi; | ||
236 | struct GNUNET_HashCode mhash; | ||
237 | |||
238 | if (NULL == bg) | ||
239 | return GNUNET_NO; | ||
240 | gi = bg->internal_cls; | ||
241 | GNUNET_BLOCK_mingle_hash (hc, | ||
242 | gi->bf_mutator, | ||
243 | &mhash); | ||
244 | if (GNUNET_YES == | ||
245 | GNUNET_CONTAINER_bloomfilter_test (gi->bf, | ||
246 | &mhash)) | ||
247 | return GNUNET_YES; | ||
248 | GNUNET_CONTAINER_bloomfilter_add (gi->bf, | ||
249 | &mhash); | ||
250 | return GNUNET_NO; | ||
251 | } | ||
252 | |||
253 | |||
254 | /** | ||
255 | * How many bytes should a bloomfilter be if we have already seen | ||
256 | * entry_count responses? Sized so that do not have to | ||
257 | * re-size the filter too often (to keep it cheap). | ||
258 | * | ||
259 | * Since other peers will also add entries but not resize the filter, | ||
260 | * we should generally pick a slightly larger size than what the | ||
261 | * strict math would suggest. | ||
262 | * | ||
263 | * @param entry_count expected number of entries in the Bloom filter | ||
264 | * @param k number of bits set per entry | ||
265 | * @return must be a power of two and smaller or equal to 2^15. | ||
266 | */ | ||
267 | size_t | ||
268 | GNUNET_BLOCK_GROUP_compute_bloomfilter_size (unsigned int entry_count, | ||
269 | unsigned int k) | ||
270 | { | ||
271 | size_t size; | ||
272 | unsigned int ideal = (entry_count * k) / 4; | ||
273 | uint16_t max = 1 << 15; | ||
274 | |||
275 | if (entry_count > max) | ||
276 | return max; | ||
277 | size = 8; | ||
278 | while ((size < max) && (size < ideal)) | ||
279 | size *= 2; | ||
280 | if (size > max) | ||
281 | return max; | ||
282 | return size; | ||
283 | } | ||
284 | |||
285 | |||
286 | /* end of bg_bf.c */ | ||