diff options
Diffstat (limited to 'src/contrib/service/set/ibf.c')
-rw-r--r-- | src/contrib/service/set/ibf.c | 410 |
1 files changed, 410 insertions, 0 deletions
diff --git a/src/contrib/service/set/ibf.c b/src/contrib/service/set/ibf.c new file mode 100644 index 000000000..b6fb52b6b --- /dev/null +++ b/src/contrib/service/set/ibf.c | |||
@@ -0,0 +1,410 @@ | |||
1 | /* | ||
2 | This file is part of GNUnet | ||
3 | Copyright (C) 2012 GNUnet e.V. | ||
4 | |||
5 | GNUnet is free software: you can redistribute it and/or modify it | ||
6 | under the terms of the GNU Affero General Public License as published | ||
7 | by the Free Software Foundation, either version 3 of the License, | ||
8 | or (at your option) any later version. | ||
9 | |||
10 | GNUnet is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | Affero General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU Affero General Public License | ||
16 | along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | |||
18 | SPDX-License-Identifier: AGPL3.0-or-later | ||
19 | */ | ||
20 | |||
21 | /** | ||
22 | * @file set/ibf.c | ||
23 | * @brief implementation of the invertible bloom filter | ||
24 | * @author Florian Dold | ||
25 | */ | ||
26 | |||
27 | #include "platform.h" | ||
28 | #include "ibf.h" | ||
29 | |||
30 | /** | ||
31 | * Compute the key's hash from the key. | ||
32 | * Redefine to use a different hash function. | ||
33 | */ | ||
34 | #define IBF_KEY_HASH_VAL(k) (GNUNET_CRYPTO_crc32_n (&(k), sizeof(struct \ | ||
35 | IBF_KeyHash))) | ||
36 | |||
37 | /** | ||
38 | * Create a key from a hashcode. | ||
39 | * | ||
40 | * @param hash the hashcode | ||
41 | * @return a key | ||
42 | */ | ||
43 | struct IBF_Key | ||
44 | ibf_key_from_hashcode (const struct GNUNET_HashCode *hash) | ||
45 | { | ||
46 | return *(struct IBF_Key *) hash; | ||
47 | } | ||
48 | |||
49 | |||
50 | /** | ||
51 | * Create a hashcode from a key, by replicating the key | ||
52 | * until the hascode is filled | ||
53 | * | ||
54 | * @param key the key | ||
55 | * @param dst hashcode to store the result in | ||
56 | */ | ||
57 | void | ||
58 | ibf_hashcode_from_key (struct IBF_Key key, | ||
59 | struct GNUNET_HashCode *dst) | ||
60 | { | ||
61 | struct IBF_Key *p; | ||
62 | unsigned int i; | ||
63 | const unsigned int keys_per_hashcode = sizeof(struct GNUNET_HashCode) | ||
64 | / sizeof(struct IBF_Key); | ||
65 | |||
66 | p = (struct IBF_Key *) dst; | ||
67 | for (i = 0; i < keys_per_hashcode; i++) | ||
68 | *p++ = key; | ||
69 | } | ||
70 | |||
71 | |||
72 | /** | ||
73 | * Create an invertible bloom filter. | ||
74 | * | ||
75 | * @param size number of IBF buckets | ||
76 | * @param hash_num number of buckets one element is hashed in | ||
77 | * @return the newly created invertible bloom filter, NULL on error | ||
78 | */ | ||
79 | struct InvertibleBloomFilter * | ||
80 | ibf_create (uint32_t size, uint8_t hash_num) | ||
81 | { | ||
82 | struct InvertibleBloomFilter *ibf; | ||
83 | |||
84 | GNUNET_assert (0 != size); | ||
85 | |||
86 | ibf = GNUNET_new (struct InvertibleBloomFilter); | ||
87 | ibf->count = GNUNET_malloc_large (size * sizeof(uint8_t)); | ||
88 | if (NULL == ibf->count) | ||
89 | { | ||
90 | GNUNET_free (ibf); | ||
91 | return NULL; | ||
92 | } | ||
93 | ibf->key_sum = GNUNET_malloc_large (size * sizeof(struct IBF_Key)); | ||
94 | if (NULL == ibf->key_sum) | ||
95 | { | ||
96 | GNUNET_free (ibf->count); | ||
97 | GNUNET_free (ibf); | ||
98 | return NULL; | ||
99 | } | ||
100 | ibf->key_hash_sum = GNUNET_malloc_large (size * sizeof(struct IBF_KeyHash)); | ||
101 | if (NULL == ibf->key_hash_sum) | ||
102 | { | ||
103 | GNUNET_free (ibf->key_sum); | ||
104 | GNUNET_free (ibf->count); | ||
105 | GNUNET_free (ibf); | ||
106 | return NULL; | ||
107 | } | ||
108 | ibf->size = size; | ||
109 | ibf->hash_num = hash_num; | ||
110 | |||
111 | return ibf; | ||
112 | } | ||
113 | |||
114 | |||
115 | /** | ||
116 | * Store unique bucket indices for the specified key in dst. | ||
117 | */ | ||
118 | static void | ||
119 | ibf_get_indices (const struct InvertibleBloomFilter *ibf, | ||
120 | struct IBF_Key key, | ||
121 | int *dst) | ||
122 | { | ||
123 | uint32_t filled; | ||
124 | uint32_t i; | ||
125 | uint32_t bucket; | ||
126 | |||
127 | bucket = GNUNET_CRYPTO_crc32_n (&key, sizeof key); | ||
128 | for (i = 0, filled = 0; filled < ibf->hash_num; i++) | ||
129 | { | ||
130 | unsigned int j; | ||
131 | uint64_t x; | ||
132 | for (j = 0; j < filled; j++) | ||
133 | if (dst[j] == bucket) | ||
134 | goto try_next; | ||
135 | dst[filled++] = bucket % ibf->size; | ||
136 | try_next:; | ||
137 | x = ((uint64_t) bucket << 32) | i; | ||
138 | bucket = GNUNET_CRYPTO_crc32_n (&x, sizeof x); | ||
139 | } | ||
140 | } | ||
141 | |||
142 | |||
143 | static void | ||
144 | ibf_insert_into (struct InvertibleBloomFilter *ibf, | ||
145 | struct IBF_Key key, | ||
146 | const int *buckets, int side) | ||
147 | { | ||
148 | int i; | ||
149 | |||
150 | for (i = 0; i < ibf->hash_num; i++) | ||
151 | { | ||
152 | const int bucket = buckets[i]; | ||
153 | ibf->count[bucket].count_val += side; | ||
154 | ibf->key_sum[bucket].key_val ^= key.key_val; | ||
155 | ibf->key_hash_sum[bucket].key_hash_val | ||
156 | ^= IBF_KEY_HASH_VAL (key); | ||
157 | } | ||
158 | } | ||
159 | |||
160 | |||
161 | /** | ||
162 | * Insert a key into an IBF. | ||
163 | * | ||
164 | * @param ibf the IBF | ||
165 | * @param key the element's hash code | ||
166 | */ | ||
167 | void | ||
168 | ibf_insert (struct InvertibleBloomFilter *ibf, struct IBF_Key key) | ||
169 | { | ||
170 | int buckets[ibf->hash_num]; | ||
171 | |||
172 | GNUNET_assert (ibf->hash_num <= ibf->size); | ||
173 | ibf_get_indices (ibf, key, buckets); | ||
174 | ibf_insert_into (ibf, key, buckets, 1); | ||
175 | } | ||
176 | |||
177 | |||
178 | /** | ||
179 | * Remove a key from an IBF. | ||
180 | * | ||
181 | * @param ibf the IBF | ||
182 | * @param key the element's hash code | ||
183 | */ | ||
184 | void | ||
185 | ibf_remove (struct InvertibleBloomFilter *ibf, struct IBF_Key key) | ||
186 | { | ||
187 | int buckets[ibf->hash_num]; | ||
188 | |||
189 | GNUNET_assert (ibf->hash_num <= ibf->size); | ||
190 | ibf_get_indices (ibf, key, buckets); | ||
191 | ibf_insert_into (ibf, key, buckets, -1); | ||
192 | } | ||
193 | |||
194 | |||
195 | /** | ||
196 | * Test is the IBF is empty, i.e. all counts, keys and key hashes are zero. | ||
197 | */ | ||
198 | static int | ||
199 | ibf_is_empty (struct InvertibleBloomFilter *ibf) | ||
200 | { | ||
201 | int i; | ||
202 | |||
203 | for (i = 0; i < ibf->size; i++) | ||
204 | { | ||
205 | if (0 != ibf->count[i].count_val) | ||
206 | return GNUNET_NO; | ||
207 | if (0 != ibf->key_hash_sum[i].key_hash_val) | ||
208 | return GNUNET_NO; | ||
209 | if (0 != ibf->key_sum[i].key_val) | ||
210 | return GNUNET_NO; | ||
211 | } | ||
212 | return GNUNET_YES; | ||
213 | } | ||
214 | |||
215 | |||
216 | /** | ||
217 | * Decode and remove an element from the IBF, if possible. | ||
218 | * | ||
219 | * @param ibf the invertible bloom filter to decode | ||
220 | * @param ret_side sign of the cell's count where the decoded element came from. | ||
221 | * A negative sign indicates that the element was recovered | ||
222 | * resides in an IBF that was previously subtracted from. | ||
223 | * @param ret_id receives the hash code of the decoded element, if successful | ||
224 | * @return GNUNET_YES if decoding an element was successful, | ||
225 | * GNUNET_NO if the IBF is empty, | ||
226 | * GNUNET_SYSERR if the decoding has failed | ||
227 | */ | ||
228 | int | ||
229 | ibf_decode (struct InvertibleBloomFilter *ibf, | ||
230 | int *ret_side, struct IBF_Key *ret_id) | ||
231 | { | ||
232 | struct IBF_KeyHash hash; | ||
233 | int i; | ||
234 | int buckets[ibf->hash_num]; | ||
235 | |||
236 | GNUNET_assert (NULL != ibf); | ||
237 | |||
238 | for (i = 0; i < ibf->size; i++) | ||
239 | { | ||
240 | int j; | ||
241 | int hit; | ||
242 | |||
243 | /* we can only decode from pure buckets */ | ||
244 | if ((1 != ibf->count[i].count_val) && (-1 != ibf->count[i].count_val)) | ||
245 | continue; | ||
246 | |||
247 | hash.key_hash_val = IBF_KEY_HASH_VAL (ibf->key_sum[i]); | ||
248 | |||
249 | /* test if the hash matches the key */ | ||
250 | if (hash.key_hash_val != ibf->key_hash_sum[i].key_hash_val) | ||
251 | continue; | ||
252 | |||
253 | /* test if key in bucket hits its own location, | ||
254 | * if not, the key hash was subject to collision */ | ||
255 | hit = GNUNET_NO; | ||
256 | ibf_get_indices (ibf, ibf->key_sum[i], buckets); | ||
257 | for (j = 0; j < ibf->hash_num; j++) | ||
258 | if (buckets[j] == i) | ||
259 | hit = GNUNET_YES; | ||
260 | |||
261 | if (GNUNET_NO == hit) | ||
262 | continue; | ||
263 | |||
264 | if (NULL != ret_side) | ||
265 | *ret_side = ibf->count[i].count_val; | ||
266 | if (NULL != ret_id) | ||
267 | *ret_id = ibf->key_sum[i]; | ||
268 | |||
269 | /* insert on the opposite side, effectively removing the element */ | ||
270 | ibf_insert_into (ibf, ibf->key_sum[i], buckets, -ibf->count[i].count_val); | ||
271 | |||
272 | return GNUNET_YES; | ||
273 | } | ||
274 | |||
275 | if (GNUNET_YES == ibf_is_empty (ibf)) | ||
276 | return GNUNET_NO; | ||
277 | return GNUNET_SYSERR; | ||
278 | } | ||
279 | |||
280 | |||
281 | /** | ||
282 | * Write buckets from an ibf to a buffer. | ||
283 | * Exactly (IBF_BUCKET_SIZE*ibf->size) bytes are written to buf. | ||
284 | * | ||
285 | * @param ibf the ibf to write | ||
286 | * @param start with which bucket to start | ||
287 | * @param count how many buckets to write | ||
288 | * @param buf buffer to write the data to | ||
289 | */ | ||
290 | void | ||
291 | ibf_write_slice (const struct InvertibleBloomFilter *ibf, uint32_t start, | ||
292 | uint32_t count, void *buf) | ||
293 | { | ||
294 | struct IBF_Key *key_dst; | ||
295 | struct IBF_KeyHash *key_hash_dst; | ||
296 | struct IBF_Count *count_dst; | ||
297 | |||
298 | GNUNET_assert (start + count <= ibf->size); | ||
299 | |||
300 | /* copy keys */ | ||
301 | key_dst = (struct IBF_Key *) buf; | ||
302 | GNUNET_memcpy (key_dst, ibf->key_sum + start, count * sizeof *key_dst); | ||
303 | key_dst += count; | ||
304 | /* copy key hashes */ | ||
305 | key_hash_dst = (struct IBF_KeyHash *) key_dst; | ||
306 | GNUNET_memcpy (key_hash_dst, ibf->key_hash_sum + start, count | ||
307 | * sizeof *key_hash_dst); | ||
308 | key_hash_dst += count; | ||
309 | /* copy counts */ | ||
310 | count_dst = (struct IBF_Count *) key_hash_dst; | ||
311 | GNUNET_memcpy (count_dst, ibf->count + start, count * sizeof *count_dst); | ||
312 | } | ||
313 | |||
314 | |||
315 | /** | ||
316 | * Read buckets from a buffer into an ibf. | ||
317 | * | ||
318 | * @param buf pointer to the buffer to read from | ||
319 | * @param start which bucket to start at | ||
320 | * @param count how many buckets to read | ||
321 | * @param ibf the ibf to read from | ||
322 | */ | ||
323 | void | ||
324 | ibf_read_slice (const void *buf, uint32_t start, uint32_t count, struct | ||
325 | InvertibleBloomFilter *ibf) | ||
326 | { | ||
327 | struct IBF_Key *key_src; | ||
328 | struct IBF_KeyHash *key_hash_src; | ||
329 | struct IBF_Count *count_src; | ||
330 | |||
331 | GNUNET_assert (count > 0); | ||
332 | GNUNET_assert (start + count <= ibf->size); | ||
333 | |||
334 | /* copy keys */ | ||
335 | key_src = (struct IBF_Key *) buf; | ||
336 | GNUNET_memcpy (ibf->key_sum + start, key_src, count * sizeof *key_src); | ||
337 | key_src += count; | ||
338 | /* copy key hashes */ | ||
339 | key_hash_src = (struct IBF_KeyHash *) key_src; | ||
340 | GNUNET_memcpy (ibf->key_hash_sum + start, key_hash_src, count | ||
341 | * sizeof *key_hash_src); | ||
342 | key_hash_src += count; | ||
343 | /* copy counts */ | ||
344 | count_src = (struct IBF_Count *) key_hash_src; | ||
345 | GNUNET_memcpy (ibf->count + start, count_src, count * sizeof *count_src); | ||
346 | } | ||
347 | |||
348 | |||
349 | /** | ||
350 | * Subtract ibf2 from ibf1, storing the result in ibf1. | ||
351 | * The two IBF's must have the same parameters size and hash_num. | ||
352 | * | ||
353 | * @param ibf1 IBF that is subtracted from | ||
354 | * @param ibf2 IBF that will be subtracted from ibf1 | ||
355 | */ | ||
356 | void | ||
357 | ibf_subtract (struct InvertibleBloomFilter *ibf1, const struct | ||
358 | InvertibleBloomFilter *ibf2) | ||
359 | { | ||
360 | int i; | ||
361 | |||
362 | GNUNET_assert (ibf1->size == ibf2->size); | ||
363 | GNUNET_assert (ibf1->hash_num == ibf2->hash_num); | ||
364 | |||
365 | for (i = 0; i < ibf1->size; i++) | ||
366 | { | ||
367 | ibf1->count[i].count_val -= ibf2->count[i].count_val; | ||
368 | ibf1->key_hash_sum[i].key_hash_val ^= ibf2->key_hash_sum[i].key_hash_val; | ||
369 | ibf1->key_sum[i].key_val ^= ibf2->key_sum[i].key_val; | ||
370 | } | ||
371 | } | ||
372 | |||
373 | |||
374 | /** | ||
375 | * Create a copy of an IBF, the copy has to be destroyed properly. | ||
376 | * | ||
377 | * @param ibf the IBF to copy | ||
378 | */ | ||
379 | struct InvertibleBloomFilter * | ||
380 | ibf_dup (const struct InvertibleBloomFilter *ibf) | ||
381 | { | ||
382 | struct InvertibleBloomFilter *copy; | ||
383 | |||
384 | copy = GNUNET_malloc (sizeof *copy); | ||
385 | copy->hash_num = ibf->hash_num; | ||
386 | copy->size = ibf->size; | ||
387 | copy->key_hash_sum = GNUNET_memdup (ibf->key_hash_sum, ibf->size | ||
388 | * sizeof(struct IBF_KeyHash)); | ||
389 | copy->key_sum = GNUNET_memdup (ibf->key_sum, ibf->size * sizeof(struct | ||
390 | IBF_Key)); | ||
391 | copy->count = GNUNET_memdup (ibf->count, ibf->size * sizeof(struct | ||
392 | IBF_Count)); | ||
393 | return copy; | ||
394 | } | ||
395 | |||
396 | |||
397 | /** | ||
398 | * Destroy all resources associated with the invertible bloom filter. | ||
399 | * No more ibf_*-functions may be called on ibf after calling destroy. | ||
400 | * | ||
401 | * @param ibf the intertible bloom filter to destroy | ||
402 | */ | ||
403 | void | ||
404 | ibf_destroy (struct InvertibleBloomFilter *ibf) | ||
405 | { | ||
406 | GNUNET_free (ibf->key_sum); | ||
407 | GNUNET_free (ibf->key_hash_sum); | ||
408 | GNUNET_free (ibf->count); | ||
409 | GNUNET_free (ibf); | ||
410 | } | ||