diff options
Diffstat (limited to 'src/util/container_bloomfilter.c')
-rw-r--r-- | src/util/container_bloomfilter.c | 902 |
1 files changed, 0 insertions, 902 deletions
diff --git a/src/util/container_bloomfilter.c b/src/util/container_bloomfilter.c deleted file mode 100644 index d7460043d..000000000 --- a/src/util/container_bloomfilter.c +++ /dev/null | |||
@@ -1,902 +0,0 @@ | |||
1 | /* | ||
2 | This file is part of GNUnet. | ||
3 | Copyright (C) 2001, 2002, 2003, 2004, 2006, 2008, 2011, 2012, 2018 GNUnet e.V. | ||
4 | |||
5 | GNUnet is free software: you can redistribute it and/or modify it | ||
6 | under the terms of the GNU Affero General Public License as published | ||
7 | by the Free Software Foundation, either version 3 of the License, | ||
8 | or (at your option) any later version. | ||
9 | |||
10 | GNUnet is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | Affero General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU Affero General Public License | ||
16 | along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | |||
18 | SPDX-License-Identifier: AGPL3.0-or-later | ||
19 | */ | ||
20 | /** | ||
21 | * @file util/container_bloomfilter.c | ||
22 | * @brief data structure used to reduce disk accesses. | ||
23 | * | ||
24 | * The idea basically: Create a signature for each element in the | ||
25 | * database. Add those signatures to a bit array. When doing a lookup, | ||
26 | * check if the bit array matches the signature of the requested | ||
27 | * element. If yes, address the disk, otherwise return 'not found'. | ||
28 | * | ||
29 | * A property of the bloom filter is that sometimes we will have | ||
30 | * a match even if the element is not on the disk (then we do | ||
31 | * an unnecessary disk access), but what's most important is that | ||
32 | * we never get a single "false negative". | ||
33 | * | ||
34 | * To be able to delete entries from the bloom filter, we maintain | ||
35 | * a 4 bit counter in the file on the drive (we still use only one | ||
36 | * bit in memory). | ||
37 | * | ||
38 | * @author Igor Wronsky | ||
39 | * @author Christian Grothoff | ||
40 | */ | ||
41 | |||
42 | #include "platform.h" | ||
43 | #include "gnunet_util_lib.h" | ||
44 | |||
45 | #define LOG(kind, ...) \ | ||
46 | GNUNET_log_from (kind, "util-container-bloomfilter", __VA_ARGS__) | ||
47 | |||
48 | #define LOG_STRERROR(kind, syscall) \ | ||
49 | GNUNET_log_from_strerror (kind, "util-container-bloomfilter", syscall) | ||
50 | |||
51 | #define LOG_STRERROR_FILE(kind, syscall, filename) \ | ||
52 | GNUNET_log_from_strerror_file (kind, \ | ||
53 | "util-container-bloomfilter", \ | ||
54 | syscall, \ | ||
55 | filename) | ||
56 | |||
57 | struct GNUNET_CONTAINER_BloomFilter | ||
58 | { | ||
59 | /** | ||
60 | * The actual bloomfilter bit array | ||
61 | */ | ||
62 | char *bitArray; | ||
63 | |||
64 | /** | ||
65 | * Filename of the filter | ||
66 | */ | ||
67 | char *filename; | ||
68 | |||
69 | /** | ||
70 | * The bit counter file on disk | ||
71 | */ | ||
72 | struct GNUNET_DISK_FileHandle *fh; | ||
73 | |||
74 | /** | ||
75 | * How many bits we set for each stored element | ||
76 | */ | ||
77 | unsigned int addressesPerElement; | ||
78 | |||
79 | /** | ||
80 | * Size of bitArray in bytes | ||
81 | */ | ||
82 | size_t bitArraySize; | ||
83 | }; | ||
84 | |||
85 | |||
86 | /** | ||
87 | * Get the number of the addresses set per element in the bloom filter. | ||
88 | * | ||
89 | * @param bf the filter | ||
90 | * @return addresses set per element in the bf | ||
91 | */ | ||
92 | size_t | ||
93 | GNUNET_CONTAINER_bloomfilter_get_element_addresses ( | ||
94 | const struct GNUNET_CONTAINER_BloomFilter *bf) | ||
95 | { | ||
96 | if (bf == NULL) | ||
97 | return 0; | ||
98 | return bf->addressesPerElement; | ||
99 | } | ||
100 | |||
101 | |||
102 | /** | ||
103 | * Get size of the bloom filter. | ||
104 | * | ||
105 | * @param bf the filter | ||
106 | * @return number of bytes used for the data of the bloom filter | ||
107 | */ | ||
108 | size_t | ||
109 | GNUNET_CONTAINER_bloomfilter_get_size ( | ||
110 | const struct GNUNET_CONTAINER_BloomFilter *bf) | ||
111 | { | ||
112 | if (bf == NULL) | ||
113 | return 0; | ||
114 | return bf->bitArraySize; | ||
115 | } | ||
116 | |||
117 | |||
118 | /** | ||
119 | * Copy an existing memory. Any association with a file | ||
120 | * on-disk will be lost in the process. | ||
121 | * @param bf the filter to copy | ||
122 | * @return copy of the bf | ||
123 | */ | ||
124 | struct GNUNET_CONTAINER_BloomFilter * | ||
125 | GNUNET_CONTAINER_bloomfilter_copy ( | ||
126 | const struct GNUNET_CONTAINER_BloomFilter *bf) | ||
127 | { | ||
128 | return GNUNET_CONTAINER_bloomfilter_init (bf->bitArray, | ||
129 | bf->bitArraySize, | ||
130 | bf->addressesPerElement); | ||
131 | } | ||
132 | |||
133 | |||
134 | /** | ||
135 | * Sets a bit active in the bitArray. Increment bit-specific | ||
136 | * usage counter on disk only if below 4bit max (==15). | ||
137 | * | ||
138 | * @param bitArray memory area to set the bit in | ||
139 | * @param bitIdx which bit to set | ||
140 | */ | ||
141 | static void | ||
142 | setBit (char *bitArray, unsigned int bitIdx) | ||
143 | { | ||
144 | size_t arraySlot; | ||
145 | unsigned int targetBit; | ||
146 | |||
147 | arraySlot = bitIdx / 8; | ||
148 | targetBit = (1L << (bitIdx % 8)); | ||
149 | bitArray[arraySlot] |= targetBit; | ||
150 | } | ||
151 | |||
152 | |||
153 | /** | ||
154 | * Clears a bit from bitArray. Bit is cleared from the array | ||
155 | * only if the respective usage counter on the disk hits/is zero. | ||
156 | * | ||
157 | * @param bitArray memory area to set the bit in | ||
158 | * @param bitIdx which bit to unset | ||
159 | */ | ||
160 | static void | ||
161 | clearBit (char *bitArray, unsigned int bitIdx) | ||
162 | { | ||
163 | size_t slot; | ||
164 | unsigned int targetBit; | ||
165 | |||
166 | slot = bitIdx / 8; | ||
167 | targetBit = (1L << (bitIdx % 8)); | ||
168 | bitArray[slot] = bitArray[slot] & (~targetBit); | ||
169 | } | ||
170 | |||
171 | |||
172 | /** | ||
173 | * Checks if a bit is active in the bitArray | ||
174 | * | ||
175 | * @param bitArray memory area to set the bit in | ||
176 | * @param bitIdx which bit to test | ||
177 | * @return GNUNET_YES if the bit is set, GNUNET_NO if not. | ||
178 | */ | ||
179 | static int | ||
180 | testBit (char *bitArray, unsigned int bitIdx) | ||
181 | { | ||
182 | size_t slot; | ||
183 | unsigned int targetBit; | ||
184 | |||
185 | slot = bitIdx / 8; | ||
186 | targetBit = (1L << (bitIdx % 8)); | ||
187 | if (bitArray[slot] & targetBit) | ||
188 | return GNUNET_YES; | ||
189 | else | ||
190 | return GNUNET_NO; | ||
191 | } | ||
192 | |||
193 | |||
194 | /** | ||
195 | * Sets a bit active in the bitArray and increments | ||
196 | * bit-specific usage counter on disk (but only if | ||
197 | * the counter was below 4 bit max (==15)). | ||
198 | * | ||
199 | * @param bitArray memory area to set the bit in | ||
200 | * @param bitIdx which bit to test | ||
201 | * @param fh A file to keep the 4 bit address usage counters in | ||
202 | */ | ||
203 | static void | ||
204 | incrementBit (char *bitArray, | ||
205 | unsigned int bitIdx, | ||
206 | const struct GNUNET_DISK_FileHandle *fh) | ||
207 | { | ||
208 | off_t fileSlot; | ||
209 | unsigned char value; | ||
210 | unsigned int high; | ||
211 | unsigned int low; | ||
212 | unsigned int targetLoc; | ||
213 | |||
214 | setBit (bitArray, bitIdx); | ||
215 | if (GNUNET_DISK_handle_invalid (fh)) | ||
216 | return; | ||
217 | /* Update the counter file on disk */ | ||
218 | fileSlot = bitIdx / 2; | ||
219 | targetLoc = bitIdx % 2; | ||
220 | |||
221 | GNUNET_assert (fileSlot == | ||
222 | GNUNET_DISK_file_seek (fh, fileSlot, GNUNET_DISK_SEEK_SET)); | ||
223 | if (1 != GNUNET_DISK_file_read (fh, &value, 1)) | ||
224 | value = 0; | ||
225 | low = value & 0xF; | ||
226 | high = (value & (~0xF)) >> 4; | ||
227 | |||
228 | if (targetLoc == 0) | ||
229 | { | ||
230 | if (low < 0xF) | ||
231 | low++; | ||
232 | } | ||
233 | else | ||
234 | { | ||
235 | if (high < 0xF) | ||
236 | high++; | ||
237 | } | ||
238 | value = ((high << 4) | low); | ||
239 | GNUNET_assert (fileSlot == | ||
240 | GNUNET_DISK_file_seek (fh, fileSlot, GNUNET_DISK_SEEK_SET)); | ||
241 | GNUNET_assert (1 == GNUNET_DISK_file_write (fh, &value, 1)); | ||
242 | } | ||
243 | |||
244 | |||
245 | /** | ||
246 | * Clears a bit from bitArray if the respective usage | ||
247 | * counter on the disk hits/is zero. | ||
248 | * | ||
249 | * @param bitArray memory area to set the bit in | ||
250 | * @param bitIdx which bit to test | ||
251 | * @param fh A file to keep the 4bit address usage counters in | ||
252 | */ | ||
253 | static void | ||
254 | decrementBit (char *bitArray, | ||
255 | unsigned int bitIdx, | ||
256 | const struct GNUNET_DISK_FileHandle *fh) | ||
257 | { | ||
258 | off_t fileslot; | ||
259 | unsigned char value; | ||
260 | unsigned int high; | ||
261 | unsigned int low; | ||
262 | unsigned int targetLoc; | ||
263 | |||
264 | if (GNUNET_DISK_handle_invalid (fh)) | ||
265 | return; /* cannot decrement! */ | ||
266 | /* Each char slot in the counter file holds two 4 bit counters */ | ||
267 | fileslot = bitIdx / 2; | ||
268 | targetLoc = bitIdx % 2; | ||
269 | if (GNUNET_SYSERR == | ||
270 | GNUNET_DISK_file_seek (fh, fileslot, GNUNET_DISK_SEEK_SET)) | ||
271 | { | ||
272 | GNUNET_log_strerror (GNUNET_ERROR_TYPE_ERROR, "seek"); | ||
273 | return; | ||
274 | } | ||
275 | if (1 != GNUNET_DISK_file_read (fh, &value, 1)) | ||
276 | value = 0; | ||
277 | low = value & 0xF; | ||
278 | high = (value & 0xF0) >> 4; | ||
279 | |||
280 | /* decrement, but once we have reached the max, never go back! */ | ||
281 | if (targetLoc == 0) | ||
282 | { | ||
283 | if ((low > 0) && (low < 0xF)) | ||
284 | low--; | ||
285 | if (low == 0) | ||
286 | { | ||
287 | clearBit (bitArray, bitIdx); | ||
288 | } | ||
289 | } | ||
290 | else | ||
291 | { | ||
292 | if ((high > 0) && (high < 0xF)) | ||
293 | high--; | ||
294 | if (high == 0) | ||
295 | { | ||
296 | clearBit (bitArray, bitIdx); | ||
297 | } | ||
298 | } | ||
299 | value = ((high << 4) | low); | ||
300 | if (GNUNET_SYSERR == | ||
301 | GNUNET_DISK_file_seek (fh, fileslot, GNUNET_DISK_SEEK_SET)) | ||
302 | { | ||
303 | GNUNET_log_strerror (GNUNET_ERROR_TYPE_ERROR, "seek"); | ||
304 | return; | ||
305 | } | ||
306 | GNUNET_assert (1 == GNUNET_DISK_file_write (fh, &value, 1)); | ||
307 | } | ||
308 | |||
309 | |||
310 | #define BUFFSIZE 65536 | ||
311 | |||
312 | /** | ||
313 | * Creates a file filled with zeroes | ||
314 | * | ||
315 | * @param fh the file handle | ||
316 | * @param size the size of the file | ||
317 | * @return GNUNET_OK if created ok, GNUNET_SYSERR otherwise | ||
318 | */ | ||
319 | static int | ||
320 | make_empty_file (const struct GNUNET_DISK_FileHandle *fh, size_t size) | ||
321 | { | ||
322 | char buffer[BUFFSIZE]; | ||
323 | size_t bytesleft = size; | ||
324 | int res = 0; | ||
325 | |||
326 | if (GNUNET_DISK_handle_invalid (fh)) | ||
327 | return GNUNET_SYSERR; | ||
328 | memset (buffer, 0, sizeof(buffer)); | ||
329 | GNUNET_DISK_file_seek (fh, 0, GNUNET_DISK_SEEK_SET); | ||
330 | while (bytesleft > 0) | ||
331 | { | ||
332 | if (bytesleft > sizeof(buffer)) | ||
333 | { | ||
334 | res = GNUNET_DISK_file_write (fh, buffer, sizeof(buffer)); | ||
335 | if (res >= 0) | ||
336 | bytesleft -= res; | ||
337 | } | ||
338 | else | ||
339 | { | ||
340 | res = GNUNET_DISK_file_write (fh, buffer, bytesleft); | ||
341 | if (res >= 0) | ||
342 | bytesleft -= res; | ||
343 | } | ||
344 | if (GNUNET_SYSERR == res) | ||
345 | return GNUNET_SYSERR; | ||
346 | } | ||
347 | return GNUNET_OK; | ||
348 | } | ||
349 | |||
350 | |||
351 | /* ************** GNUNET_CONTAINER_BloomFilter iterator ********* */ | ||
352 | |||
353 | /** | ||
354 | * Iterator (callback) method to be called by the | ||
355 | * bloomfilter iterator on each bit that is to be | ||
356 | * set or tested for the key. | ||
357 | * | ||
358 | * @param cls closure | ||
359 | * @param bf the filter to manipulate | ||
360 | * @param bit the current bit | ||
361 | * @return GNUNET_YES to continue, GNUNET_NO to stop early | ||
362 | */ | ||
363 | typedef int (*BitIterator) (void *cls, | ||
364 | const struct GNUNET_CONTAINER_BloomFilter *bf, | ||
365 | unsigned int bit); | ||
366 | |||
367 | |||
368 | /** | ||
369 | * Call an iterator for each bit that the bloomfilter | ||
370 | * must test or set for this element. | ||
371 | * | ||
372 | * @param bf the filter | ||
373 | * @param callback the method to call | ||
374 | * @param arg extra argument to callback | ||
375 | * @param key the key for which we iterate over the BF bits | ||
376 | */ | ||
377 | static void | ||
378 | iterateBits (const struct GNUNET_CONTAINER_BloomFilter *bf, | ||
379 | BitIterator callback, | ||
380 | void *arg, | ||
381 | const struct GNUNET_HashCode *key) | ||
382 | { | ||
383 | struct GNUNET_HashCode tmp[2]; | ||
384 | int bitCount; | ||
385 | unsigned int round; | ||
386 | unsigned int slot = 0; | ||
387 | |||
388 | bitCount = bf->addressesPerElement; | ||
389 | tmp[0] = *key; | ||
390 | round = 0; | ||
391 | GNUNET_assert (bf->bitArraySize > 0); | ||
392 | GNUNET_assert (bf->bitArraySize * 8LL > bf->bitArraySize); | ||
393 | while (bitCount > 0) | ||
394 | { | ||
395 | while (slot < (sizeof(struct GNUNET_HashCode) / sizeof(uint32_t))) | ||
396 | { | ||
397 | if (GNUNET_YES != | ||
398 | callback (arg, | ||
399 | bf, | ||
400 | ntohl ((((uint32_t *) &tmp[round & 1])[slot])) | ||
401 | % ((bf->bitArraySize * 8LL)))) | ||
402 | return; | ||
403 | slot++; | ||
404 | bitCount--; | ||
405 | if (bitCount == 0) | ||
406 | break; | ||
407 | } | ||
408 | if (bitCount > 0) | ||
409 | { | ||
410 | GNUNET_CRYPTO_hash (&tmp[round & 1], | ||
411 | sizeof(struct GNUNET_HashCode), | ||
412 | &tmp[(round + 1) & 1]); | ||
413 | round++; | ||
414 | slot = 0; | ||
415 | } | ||
416 | } | ||
417 | } | ||
418 | |||
419 | |||
420 | /** | ||
421 | * Callback: increment bit | ||
422 | * | ||
423 | * @param cls pointer to writeable form of bf | ||
424 | * @param bf the filter to manipulate | ||
425 | * @param bit the bit to increment | ||
426 | * @return GNUNET_YES | ||
427 | */ | ||
428 | static int | ||
429 | incrementBitCallback (void *cls, | ||
430 | const struct GNUNET_CONTAINER_BloomFilter *bf, | ||
431 | unsigned int bit) | ||
432 | { | ||
433 | struct GNUNET_CONTAINER_BloomFilter *b = cls; | ||
434 | |||
435 | incrementBit (b->bitArray, bit, bf->fh); | ||
436 | return GNUNET_YES; | ||
437 | } | ||
438 | |||
439 | |||
440 | /** | ||
441 | * Callback: decrement bit | ||
442 | * | ||
443 | * @param cls pointer to writeable form of bf | ||
444 | * @param bf the filter to manipulate | ||
445 | * @param bit the bit to decrement | ||
446 | * @return GNUNET_YES | ||
447 | */ | ||
448 | static int | ||
449 | decrementBitCallback (void *cls, | ||
450 | const struct GNUNET_CONTAINER_BloomFilter *bf, | ||
451 | unsigned int bit) | ||
452 | { | ||
453 | struct GNUNET_CONTAINER_BloomFilter *b = cls; | ||
454 | |||
455 | decrementBit (b->bitArray, bit, bf->fh); | ||
456 | return GNUNET_YES; | ||
457 | } | ||
458 | |||
459 | |||
460 | /** | ||
461 | * Callback: test if all bits are set | ||
462 | * | ||
463 | * @param cls pointer set to GNUNET_NO if bit is not set | ||
464 | * @param bf the filter | ||
465 | * @param bit the bit to test | ||
466 | * @return YES if the bit is set, NO if not | ||
467 | */ | ||
468 | static int | ||
469 | testBitCallback (void *cls, | ||
470 | const struct GNUNET_CONTAINER_BloomFilter *bf, | ||
471 | unsigned int bit) | ||
472 | { | ||
473 | int *arg = cls; | ||
474 | |||
475 | if (GNUNET_NO == testBit (bf->bitArray, bit)) | ||
476 | { | ||
477 | *arg = GNUNET_NO; | ||
478 | return GNUNET_NO; | ||
479 | } | ||
480 | return GNUNET_YES; | ||
481 | } | ||
482 | |||
483 | |||
484 | /* *********************** INTERFACE **************** */ | ||
485 | |||
486 | /** | ||
487 | * Load a bloom-filter from a file. | ||
488 | * | ||
489 | * @param filename the name of the file (or the prefix) | ||
490 | * @param size the size of the bloom-filter (number of | ||
491 | * bytes of storage space to use); will be rounded up | ||
492 | * to next power of 2 | ||
493 | * @param k the number of GNUNET_CRYPTO_hash-functions to apply per | ||
494 | * element (number of bits set per element in the set) | ||
495 | * @return the bloomfilter | ||
496 | */ | ||
497 | struct GNUNET_CONTAINER_BloomFilter * | ||
498 | GNUNET_CONTAINER_bloomfilter_load (const char *filename, | ||
499 | size_t size, | ||
500 | unsigned int k) | ||
501 | { | ||
502 | struct GNUNET_CONTAINER_BloomFilter *bf; | ||
503 | char *rbuff; | ||
504 | off_t pos; | ||
505 | int i; | ||
506 | size_t ui; | ||
507 | off_t fsize; | ||
508 | int must_read; | ||
509 | |||
510 | GNUNET_assert (NULL != filename); | ||
511 | if ((k == 0) || (size == 0)) | ||
512 | return NULL; | ||
513 | if (size < BUFFSIZE) | ||
514 | size = BUFFSIZE; | ||
515 | ui = 1; | ||
516 | while ((ui < size) && (ui * 2 > ui)) | ||
517 | ui *= 2; | ||
518 | size = ui; /* make sure it's a power of 2 */ | ||
519 | |||
520 | bf = GNUNET_new (struct GNUNET_CONTAINER_BloomFilter); | ||
521 | /* Try to open a bloomfilter file */ | ||
522 | if (GNUNET_YES == GNUNET_DISK_file_test (filename)) | ||
523 | bf->fh = GNUNET_DISK_file_open (filename, | ||
524 | GNUNET_DISK_OPEN_READWRITE, | ||
525 | GNUNET_DISK_PERM_USER_READ | ||
526 | | GNUNET_DISK_PERM_USER_WRITE); | ||
527 | if (NULL != bf->fh) | ||
528 | { | ||
529 | /* file existed, try to read it! */ | ||
530 | must_read = GNUNET_YES; | ||
531 | if (GNUNET_OK != GNUNET_DISK_file_handle_size (bf->fh, &fsize)) | ||
532 | { | ||
533 | GNUNET_DISK_file_close (bf->fh); | ||
534 | GNUNET_free (bf); | ||
535 | return NULL; | ||
536 | } | ||
537 | if (0 == fsize) | ||
538 | { | ||
539 | /* found existing empty file, just overwrite */ | ||
540 | if (GNUNET_OK != make_empty_file (bf->fh, size * 4LL)) | ||
541 | { | ||
542 | GNUNET_log_strerror (GNUNET_ERROR_TYPE_WARNING, "write"); | ||
543 | GNUNET_DISK_file_close (bf->fh); | ||
544 | GNUNET_free (bf); | ||
545 | return NULL; | ||
546 | } | ||
547 | } | ||
548 | else if (fsize != ((off_t) size) * 4LL) | ||
549 | { | ||
550 | GNUNET_log ( | ||
551 | GNUNET_ERROR_TYPE_ERROR, | ||
552 | _ ( | ||
553 | "Size of file on disk is incorrect for this Bloom filter (want %llu, have %llu)\n"), | ||
554 | (unsigned long long) (size * 4LL), | ||
555 | (unsigned long long) fsize); | ||
556 | GNUNET_DISK_file_close (bf->fh); | ||
557 | GNUNET_free (bf); | ||
558 | return NULL; | ||
559 | } | ||
560 | } | ||
561 | else | ||
562 | { | ||
563 | /* file did not exist, don't read, just create */ | ||
564 | must_read = GNUNET_NO; | ||
565 | bf->fh = GNUNET_DISK_file_open (filename, | ||
566 | GNUNET_DISK_OPEN_CREATE | ||
567 | | GNUNET_DISK_OPEN_READWRITE, | ||
568 | GNUNET_DISK_PERM_USER_READ | ||
569 | | GNUNET_DISK_PERM_USER_WRITE); | ||
570 | if (NULL == bf->fh) | ||
571 | { | ||
572 | GNUNET_free (bf); | ||
573 | return NULL; | ||
574 | } | ||
575 | if (GNUNET_OK != make_empty_file (bf->fh, size * 4LL)) | ||
576 | { | ||
577 | GNUNET_log_strerror (GNUNET_ERROR_TYPE_WARNING, "write"); | ||
578 | GNUNET_DISK_file_close (bf->fh); | ||
579 | GNUNET_free (bf); | ||
580 | return NULL; | ||
581 | } | ||
582 | } | ||
583 | bf->filename = GNUNET_strdup (filename); | ||
584 | /* Alloc block */ | ||
585 | bf->bitArray = GNUNET_malloc_large (size); | ||
586 | if (NULL == bf->bitArray) | ||
587 | { | ||
588 | if (NULL != bf->fh) | ||
589 | GNUNET_DISK_file_close (bf->fh); | ||
590 | GNUNET_free (bf->filename); | ||
591 | GNUNET_free (bf); | ||
592 | return NULL; | ||
593 | } | ||
594 | bf->bitArraySize = size; | ||
595 | bf->addressesPerElement = k; | ||
596 | if (GNUNET_YES != must_read) | ||
597 | return bf; /* already done! */ | ||
598 | /* Read from the file what bits we can */ | ||
599 | rbuff = GNUNET_malloc (BUFFSIZE); | ||
600 | pos = 0; | ||
601 | while (pos < ((off_t) size) * 8LL) | ||
602 | { | ||
603 | int res; | ||
604 | |||
605 | res = GNUNET_DISK_file_read (bf->fh, rbuff, BUFFSIZE); | ||
606 | if (res == -1) | ||
607 | { | ||
608 | LOG_STRERROR_FILE (GNUNET_ERROR_TYPE_WARNING, "read", bf->filename); | ||
609 | GNUNET_free (rbuff); | ||
610 | GNUNET_free (bf->filename); | ||
611 | GNUNET_DISK_file_close (bf->fh); | ||
612 | GNUNET_free (bf); | ||
613 | return NULL; | ||
614 | } | ||
615 | if (res == 0) | ||
616 | break; /* is ok! we just did not use that many bits yet */ | ||
617 | for (i = 0; i < res; i++) | ||
618 | { | ||
619 | if ((rbuff[i] & 0x0F) != 0) | ||
620 | setBit (bf->bitArray, pos + i * 2); | ||
621 | if ((rbuff[i] & 0xF0) != 0) | ||
622 | setBit (bf->bitArray, pos + i * 2 + 1); | ||
623 | } | ||
624 | if (res < BUFFSIZE) | ||
625 | break; | ||
626 | pos += BUFFSIZE * 2; /* 2 bits per byte in the buffer */ | ||
627 | } | ||
628 | GNUNET_free (rbuff); | ||
629 | return bf; | ||
630 | } | ||
631 | |||
632 | |||
633 | /** | ||
634 | * Create a bloom filter from raw bits. | ||
635 | * | ||
636 | * @param data the raw bits in memory (maybe NULL, | ||
637 | * in which case all bits should be considered | ||
638 | * to be zero). | ||
639 | * @param size the size of the bloom-filter (number of | ||
640 | * bytes of storage space to use); also size of data | ||
641 | * -- unless data is NULL | ||
642 | * @param k the number of GNUNET_CRYPTO_hash-functions to apply per | ||
643 | * element (number of bits set per element in the set) | ||
644 | * @return the bloomfilter | ||
645 | */ | ||
646 | struct GNUNET_CONTAINER_BloomFilter * | ||
647 | GNUNET_CONTAINER_bloomfilter_init (const char *data, | ||
648 | size_t size, | ||
649 | unsigned int k) | ||
650 | { | ||
651 | struct GNUNET_CONTAINER_BloomFilter *bf; | ||
652 | |||
653 | if ((0 == k) || (0 == size)) | ||
654 | return NULL; | ||
655 | bf = GNUNET_new (struct GNUNET_CONTAINER_BloomFilter); | ||
656 | bf->filename = NULL; | ||
657 | bf->fh = NULL; | ||
658 | bf->bitArray = GNUNET_malloc_large (size); | ||
659 | if (NULL == bf->bitArray) | ||
660 | { | ||
661 | GNUNET_free (bf); | ||
662 | return NULL; | ||
663 | } | ||
664 | bf->bitArraySize = size; | ||
665 | bf->addressesPerElement = k; | ||
666 | if (NULL != data) | ||
667 | GNUNET_memcpy (bf->bitArray, data, size); | ||
668 | return bf; | ||
669 | } | ||
670 | |||
671 | |||
672 | /** | ||
673 | * Copy the raw data of this bloomfilter into | ||
674 | * the given data array. | ||
675 | * | ||
676 | * @param bf bloomfilter to take the raw data from | ||
677 | * @param data where to write the data | ||
678 | * @param size the size of the given data array | ||
679 | * @return #GNUNET_SYSERR if the data array is not big enough | ||
680 | */ | ||
681 | int | ||
682 | GNUNET_CONTAINER_bloomfilter_get_raw_data ( | ||
683 | const struct GNUNET_CONTAINER_BloomFilter *bf, | ||
684 | char *data, | ||
685 | size_t size) | ||
686 | { | ||
687 | if (NULL == bf) | ||
688 | return GNUNET_SYSERR; | ||
689 | if (bf->bitArraySize != size) | ||
690 | return GNUNET_SYSERR; | ||
691 | GNUNET_memcpy (data, bf->bitArray, size); | ||
692 | return GNUNET_OK; | ||
693 | } | ||
694 | |||
695 | |||
696 | /** | ||
697 | * Free the space associated with a filter | ||
698 | * in memory, flush to drive if needed (do not | ||
699 | * free the space on the drive) | ||
700 | * | ||
701 | * @param bf the filter | ||
702 | */ | ||
703 | void | ||
704 | GNUNET_CONTAINER_bloomfilter_free (struct GNUNET_CONTAINER_BloomFilter *bf) | ||
705 | { | ||
706 | if (NULL == bf) | ||
707 | return; | ||
708 | if (bf->fh != NULL) | ||
709 | GNUNET_DISK_file_close (bf->fh); | ||
710 | GNUNET_free (bf->filename); | ||
711 | GNUNET_free (bf->bitArray); | ||
712 | GNUNET_free (bf); | ||
713 | } | ||
714 | |||
715 | |||
716 | /** | ||
717 | * Reset a bloom filter to empty. Clears the file on disk. | ||
718 | * | ||
719 | * @param bf the filter | ||
720 | */ | ||
721 | void | ||
722 | GNUNET_CONTAINER_bloomfilter_clear (struct GNUNET_CONTAINER_BloomFilter *bf) | ||
723 | { | ||
724 | if (NULL == bf) | ||
725 | return; | ||
726 | |||
727 | memset (bf->bitArray, 0, bf->bitArraySize); | ||
728 | if (bf->filename != NULL) | ||
729 | make_empty_file (bf->fh, bf->bitArraySize * 4LL); | ||
730 | } | ||
731 | |||
732 | |||
733 | /** | ||
734 | * Test if an element is in the filter. | ||
735 | * | ||
736 | * @param e the element | ||
737 | * @param bf the filter | ||
738 | * @return #GNUNET_YES if the element is in the filter, #GNUNET_NO if not | ||
739 | */ | ||
740 | int | ||
741 | GNUNET_CONTAINER_bloomfilter_test ( | ||
742 | const struct GNUNET_CONTAINER_BloomFilter *bf, | ||
743 | const struct GNUNET_HashCode *e) | ||
744 | { | ||
745 | int res; | ||
746 | |||
747 | if (NULL == bf) | ||
748 | return GNUNET_YES; | ||
749 | res = GNUNET_YES; | ||
750 | iterateBits (bf, &testBitCallback, &res, e); | ||
751 | return res; | ||
752 | } | ||
753 | |||
754 | |||
755 | /** | ||
756 | * Add an element to the filter | ||
757 | * | ||
758 | * @param bf the filter | ||
759 | * @param e the element | ||
760 | */ | ||
761 | void | ||
762 | GNUNET_CONTAINER_bloomfilter_add (struct GNUNET_CONTAINER_BloomFilter *bf, | ||
763 | const struct GNUNET_HashCode *e) | ||
764 | { | ||
765 | if (NULL == bf) | ||
766 | return; | ||
767 | iterateBits (bf, &incrementBitCallback, bf, e); | ||
768 | } | ||
769 | |||
770 | |||
771 | /** | ||
772 | * Or the entries of the given raw data array with the | ||
773 | * data of the given bloom filter. Assumes that | ||
774 | * the size of the data array and the current filter | ||
775 | * match. | ||
776 | * | ||
777 | * @param bf the filter | ||
778 | * @param data the data to or-in | ||
779 | * @param size number of bytes in data | ||
780 | */ | ||
781 | int | ||
782 | GNUNET_CONTAINER_bloomfilter_or (struct GNUNET_CONTAINER_BloomFilter *bf, | ||
783 | const char *data, | ||
784 | size_t size) | ||
785 | { | ||
786 | unsigned int i; | ||
787 | unsigned int n; | ||
788 | unsigned long long *fc; | ||
789 | const unsigned long long *dc; | ||
790 | |||
791 | if (NULL == bf) | ||
792 | return GNUNET_YES; | ||
793 | if (bf->bitArraySize != size) | ||
794 | return GNUNET_SYSERR; | ||
795 | fc = (unsigned long long *) bf->bitArray; | ||
796 | dc = (const unsigned long long *) data; | ||
797 | n = size / sizeof(unsigned long long); | ||
798 | |||
799 | for (i = 0; i < n; i++) | ||
800 | fc[i] |= dc[i]; | ||
801 | for (i = n * sizeof(unsigned long long); i < size; i++) | ||
802 | bf->bitArray[i] |= data[i]; | ||
803 | return GNUNET_OK; | ||
804 | } | ||
805 | |||
806 | |||
807 | /** | ||
808 | * Or the entries of the given raw data array with the | ||
809 | * data of the given bloom filter. Assumes that | ||
810 | * the size of the data array and the current filter | ||
811 | * match. | ||
812 | * | ||
813 | * @param bf the filter | ||
814 | * @param to_or the bloomfilter to or-in | ||
815 | * @return #GNUNET_OK on success | ||
816 | */ | ||
817 | int | ||
818 | GNUNET_CONTAINER_bloomfilter_or2 ( | ||
819 | struct GNUNET_CONTAINER_BloomFilter *bf, | ||
820 | const struct GNUNET_CONTAINER_BloomFilter *to_or) | ||
821 | { | ||
822 | unsigned int i; | ||
823 | unsigned int n; | ||
824 | unsigned long long *fc; | ||
825 | const unsigned long long *dc; | ||
826 | size_t size; | ||
827 | |||
828 | if (NULL == bf) | ||
829 | return GNUNET_OK; | ||
830 | if (bf->bitArraySize != to_or->bitArraySize) | ||
831 | { | ||
832 | GNUNET_break (0); | ||
833 | return GNUNET_SYSERR; | ||
834 | } | ||
835 | size = bf->bitArraySize; | ||
836 | fc = (unsigned long long *) bf->bitArray; | ||
837 | dc = (const unsigned long long *) to_or->bitArray; | ||
838 | n = size / sizeof(unsigned long long); | ||
839 | |||
840 | for (i = 0; i < n; i++) | ||
841 | fc[i] |= dc[i]; | ||
842 | for (i = n * sizeof(unsigned long long); i < size; i++) | ||
843 | bf->bitArray[i] |= to_or->bitArray[i]; | ||
844 | return GNUNET_OK; | ||
845 | } | ||
846 | |||
847 | |||
848 | /** | ||
849 | * Remove an element from the filter. | ||
850 | * | ||
851 | * @param bf the filter | ||
852 | * @param e the element to remove | ||
853 | */ | ||
854 | void | ||
855 | GNUNET_CONTAINER_bloomfilter_remove (struct GNUNET_CONTAINER_BloomFilter *bf, | ||
856 | const struct GNUNET_HashCode *e) | ||
857 | { | ||
858 | if (NULL == bf) | ||
859 | return; | ||
860 | if (NULL == bf->filename) | ||
861 | return; | ||
862 | iterateBits (bf, &decrementBitCallback, bf, e); | ||
863 | } | ||
864 | |||
865 | |||
866 | /** | ||
867 | * Resize a bloom filter. Note that this operation | ||
868 | * is pretty costly. Essentially, the bloom filter | ||
869 | * needs to be completely re-build. | ||
870 | * | ||
871 | * @param bf the filter | ||
872 | * @param iterator an iterator over all elements stored in the BF | ||
873 | * @param iterator_cls argument to the iterator function | ||
874 | * @param size the new size for the filter | ||
875 | * @param k the new number of GNUNET_CRYPTO_hash-function to apply per element | ||
876 | */ | ||
877 | void | ||
878 | GNUNET_CONTAINER_bloomfilter_resize (struct GNUNET_CONTAINER_BloomFilter *bf, | ||
879 | GNUNET_CONTAINER_HashCodeIterator iterator, | ||
880 | void *iterator_cls, | ||
881 | size_t size, | ||
882 | unsigned int k) | ||
883 | { | ||
884 | struct GNUNET_HashCode hc; | ||
885 | unsigned int i; | ||
886 | |||
887 | GNUNET_free (bf->bitArray); | ||
888 | i = 1; | ||
889 | while (i < size) | ||
890 | i *= 2; | ||
891 | size = i; /* make sure it's a power of 2 */ | ||
892 | bf->addressesPerElement = k; | ||
893 | bf->bitArraySize = size; | ||
894 | bf->bitArray = GNUNET_malloc (size); | ||
895 | if (NULL != bf->filename) | ||
896 | make_empty_file (bf->fh, bf->bitArraySize * 4LL); | ||
897 | while (GNUNET_YES == iterator (iterator_cls, &hc)) | ||
898 | GNUNET_CONTAINER_bloomfilter_add (bf, &hc); | ||
899 | } | ||
900 | |||
901 | |||
902 | /* end of container_bloomfilter.c */ | ||