diff options
Diffstat (limited to 'src/util/container_bloomfilter.c')
-rw-r--r-- | src/util/container_bloomfilter.c | 897 |
1 files changed, 0 insertions, 897 deletions
diff --git a/src/util/container_bloomfilter.c b/src/util/container_bloomfilter.c deleted file mode 100644 index 8a0487e04..000000000 --- a/src/util/container_bloomfilter.c +++ /dev/null | |||
@@ -1,897 +0,0 @@ | |||
1 | /* | ||
2 | This file is part of GNUnet. | ||
3 | Copyright (C) 2001, 2002, 2003, 2004, 2006, 2008, 2011, 2012, 2018 GNUnet e.V. | ||
4 | |||
5 | GNUnet is free software: you can redistribute it and/or modify it | ||
6 | under the terms of the GNU Affero General Public License as published | ||
7 | by the Free Software Foundation, either version 3 of the License, | ||
8 | or (at your option) any later version. | ||
9 | |||
10 | GNUnet is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | Affero General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU Affero General Public License | ||
16 | along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | |||
18 | SPDX-License-Identifier: AGPL3.0-or-later | ||
19 | */ | ||
20 | /** | ||
21 | * @file util/container_bloomfilter.c | ||
22 | * @brief data structure used to reduce disk accesses. | ||
23 | * | ||
24 | * The idea basically: Create a signature for each element in the | ||
25 | * database. Add those signatures to a bit array. When doing a lookup, | ||
26 | * check if the bit array matches the signature of the requested | ||
27 | * element. If yes, address the disk, otherwise return 'not found'. | ||
28 | * | ||
29 | * A property of the bloom filter is that sometimes we will have | ||
30 | * a match even if the element is not on the disk (then we do | ||
31 | * an unnecessary disk access), but what's most important is that | ||
32 | * we never get a single "false negative". | ||
33 | * | ||
34 | * To be able to delete entries from the bloom filter, we maintain | ||
35 | * a 4 bit counter in the file on the drive (we still use only one | ||
36 | * bit in memory). | ||
37 | * | ||
38 | * @author Igor Wronsky | ||
39 | * @author Christian Grothoff | ||
40 | */ | ||
41 | |||
42 | #include "platform.h" | ||
43 | #include "gnunet_util_lib.h" | ||
44 | |||
45 | #define LOG(kind, ...) \ | ||
46 | GNUNET_log_from (kind, "util-container-bloomfilter", __VA_ARGS__) | ||
47 | |||
48 | #define LOG_STRERROR(kind, syscall) \ | ||
49 | GNUNET_log_from_strerror (kind, "util-container-bloomfilter", syscall) | ||
50 | |||
51 | #define LOG_STRERROR_FILE(kind, syscall, filename) \ | ||
52 | GNUNET_log_from_strerror_file (kind, \ | ||
53 | "util-container-bloomfilter", \ | ||
54 | syscall, \ | ||
55 | filename) | ||
56 | |||
57 | struct GNUNET_CONTAINER_BloomFilter | ||
58 | { | ||
59 | /** | ||
60 | * The actual bloomfilter bit array | ||
61 | */ | ||
62 | char *bitArray; | ||
63 | |||
64 | /** | ||
65 | * Filename of the filter | ||
66 | */ | ||
67 | char *filename; | ||
68 | |||
69 | /** | ||
70 | * The bit counter file on disk | ||
71 | */ | ||
72 | struct GNUNET_DISK_FileHandle *fh; | ||
73 | |||
74 | /** | ||
75 | * How many bits we set for each stored element | ||
76 | */ | ||
77 | unsigned int addressesPerElement; | ||
78 | |||
79 | /** | ||
80 | * Size of bitArray in bytes | ||
81 | */ | ||
82 | size_t bitArraySize; | ||
83 | }; | ||
84 | |||
85 | |||
86 | /** | ||
87 | * Get the number of the addresses set per element in the bloom filter. | ||
88 | * | ||
89 | * @param bf the filter | ||
90 | * @return addresses set per element in the bf | ||
91 | */ | ||
92 | size_t | ||
93 | GNUNET_CONTAINER_bloomfilter_get_element_addresses ( | ||
94 | const struct GNUNET_CONTAINER_BloomFilter *bf) | ||
95 | { | ||
96 | if (bf == NULL) | ||
97 | return 0; | ||
98 | return bf->addressesPerElement; | ||
99 | } | ||
100 | |||
101 | |||
102 | /** | ||
103 | * Get size of the bloom filter. | ||
104 | * | ||
105 | * @param bf the filter | ||
106 | * @return number of bytes used for the data of the bloom filter | ||
107 | */ | ||
108 | size_t | ||
109 | GNUNET_CONTAINER_bloomfilter_get_size ( | ||
110 | const struct GNUNET_CONTAINER_BloomFilter *bf) | ||
111 | { | ||
112 | if (bf == NULL) | ||
113 | return 0; | ||
114 | return bf->bitArraySize; | ||
115 | } | ||
116 | |||
117 | |||
118 | /** | ||
119 | * Copy an existing memory. Any association with a file | ||
120 | * on-disk will be lost in the process. | ||
121 | * @param bf the filter to copy | ||
122 | * @return copy of the bf | ||
123 | */ | ||
124 | struct GNUNET_CONTAINER_BloomFilter * | ||
125 | GNUNET_CONTAINER_bloomfilter_copy ( | ||
126 | const struct GNUNET_CONTAINER_BloomFilter *bf) | ||
127 | { | ||
128 | return GNUNET_CONTAINER_bloomfilter_init (bf->bitArray, | ||
129 | bf->bitArraySize, | ||
130 | bf->addressesPerElement); | ||
131 | } | ||
132 | |||
133 | |||
134 | /** | ||
135 | * Sets a bit active in the bitArray. Increment bit-specific | ||
136 | * usage counter on disk only if below 4bit max (==15). | ||
137 | * | ||
138 | * @param bitArray memory area to set the bit in | ||
139 | * @param bitIdx which bit to set | ||
140 | */ | ||
141 | static void | ||
142 | setBit (char *bitArray, unsigned int bitIdx) | ||
143 | { | ||
144 | size_t arraySlot; | ||
145 | unsigned int targetBit; | ||
146 | |||
147 | arraySlot = bitIdx / 8; | ||
148 | targetBit = (1L << (bitIdx % 8)); | ||
149 | bitArray[arraySlot] |= targetBit; | ||
150 | } | ||
151 | |||
152 | |||
153 | /** | ||
154 | * Clears a bit from bitArray. Bit is cleared from the array | ||
155 | * only if the respective usage counter on the disk hits/is zero. | ||
156 | * | ||
157 | * @param bitArray memory area to set the bit in | ||
158 | * @param bitIdx which bit to unset | ||
159 | */ | ||
160 | static void | ||
161 | clearBit (char *bitArray, unsigned int bitIdx) | ||
162 | { | ||
163 | size_t slot; | ||
164 | unsigned int targetBit; | ||
165 | |||
166 | slot = bitIdx / 8; | ||
167 | targetBit = (1L << (bitIdx % 8)); | ||
168 | bitArray[slot] = bitArray[slot] & (~targetBit); | ||
169 | } | ||
170 | |||
171 | |||
172 | /** | ||
173 | * Checks if a bit is active in the bitArray | ||
174 | * | ||
175 | * @param bitArray memory area to set the bit in | ||
176 | * @param bitIdx which bit to test | ||
177 | * @return GNUNET_YES if the bit is set, GNUNET_NO if not. | ||
178 | */ | ||
179 | static int | ||
180 | testBit (char *bitArray, unsigned int bitIdx) | ||
181 | { | ||
182 | size_t slot; | ||
183 | unsigned int targetBit; | ||
184 | |||
185 | slot = bitIdx / 8; | ||
186 | targetBit = (1L << (bitIdx % 8)); | ||
187 | if (bitArray[slot] & targetBit) | ||
188 | return GNUNET_YES; | ||
189 | else | ||
190 | return GNUNET_NO; | ||
191 | } | ||
192 | |||
193 | |||
194 | /** | ||
195 | * Sets a bit active in the bitArray and increments | ||
196 | * bit-specific usage counter on disk (but only if | ||
197 | * the counter was below 4 bit max (==15)). | ||
198 | * | ||
199 | * @param bitArray memory area to set the bit in | ||
200 | * @param bitIdx which bit to test | ||
201 | * @param fh A file to keep the 4 bit address usage counters in | ||
202 | */ | ||
203 | static void | ||
204 | incrementBit (char *bitArray, | ||
205 | unsigned int bitIdx, | ||
206 | const struct GNUNET_DISK_FileHandle *fh) | ||
207 | { | ||
208 | off_t fileSlot; | ||
209 | unsigned char value; | ||
210 | unsigned int high; | ||
211 | unsigned int low; | ||
212 | unsigned int targetLoc; | ||
213 | |||
214 | setBit (bitArray, bitIdx); | ||
215 | if (GNUNET_DISK_handle_invalid (fh)) | ||
216 | return; | ||
217 | /* Update the counter file on disk */ | ||
218 | fileSlot = bitIdx / 2; | ||
219 | targetLoc = bitIdx % 2; | ||
220 | |||
221 | GNUNET_assert (fileSlot == | ||
222 | GNUNET_DISK_file_seek (fh, fileSlot, GNUNET_DISK_SEEK_SET)); | ||
223 | if (1 != GNUNET_DISK_file_read (fh, &value, 1)) | ||
224 | value = 0; | ||
225 | low = value & 0xF; | ||
226 | high = (value & (~0xF)) >> 4; | ||
227 | |||
228 | if (targetLoc == 0) | ||
229 | { | ||
230 | if (low < 0xF) | ||
231 | low++; | ||
232 | } | ||
233 | else | ||
234 | { | ||
235 | if (high < 0xF) | ||
236 | high++; | ||
237 | } | ||
238 | value = ((high << 4) | low); | ||
239 | GNUNET_assert (fileSlot == | ||
240 | GNUNET_DISK_file_seek (fh, fileSlot, GNUNET_DISK_SEEK_SET)); | ||
241 | GNUNET_assert (1 == GNUNET_DISK_file_write (fh, &value, 1)); | ||
242 | } | ||
243 | |||
244 | |||
245 | /** | ||
246 | * Clears a bit from bitArray if the respective usage | ||
247 | * counter on the disk hits/is zero. | ||
248 | * | ||
249 | * @param bitArray memory area to set the bit in | ||
250 | * @param bitIdx which bit to test | ||
251 | * @param fh A file to keep the 4bit address usage counters in | ||
252 | */ | ||
253 | static void | ||
254 | decrementBit (char *bitArray, | ||
255 | unsigned int bitIdx, | ||
256 | const struct GNUNET_DISK_FileHandle *fh) | ||
257 | { | ||
258 | off_t fileslot; | ||
259 | unsigned char value; | ||
260 | unsigned int high; | ||
261 | unsigned int low; | ||
262 | unsigned int targetLoc; | ||
263 | |||
264 | if (GNUNET_DISK_handle_invalid (fh)) | ||
265 | return; /* cannot decrement! */ | ||
266 | /* Each char slot in the counter file holds two 4 bit counters */ | ||
267 | fileslot = bitIdx / 2; | ||
268 | targetLoc = bitIdx % 2; | ||
269 | if (GNUNET_SYSERR == | ||
270 | GNUNET_DISK_file_seek (fh, fileslot, GNUNET_DISK_SEEK_SET)) | ||
271 | { | ||
272 | GNUNET_log_strerror (GNUNET_ERROR_TYPE_ERROR, "seek"); | ||
273 | return; | ||
274 | } | ||
275 | if (1 != GNUNET_DISK_file_read (fh, &value, 1)) | ||
276 | value = 0; | ||
277 | low = value & 0xF; | ||
278 | high = (value & 0xF0) >> 4; | ||
279 | |||
280 | /* decrement, but once we have reached the max, never go back! */ | ||
281 | if (targetLoc == 0) | ||
282 | { | ||
283 | if ((low > 0) && (low < 0xF)) | ||
284 | low--; | ||
285 | if (low == 0) | ||
286 | { | ||
287 | clearBit (bitArray, bitIdx); | ||
288 | } | ||
289 | } | ||
290 | else | ||
291 | { | ||
292 | if ((high > 0) && (high < 0xF)) | ||
293 | high--; | ||
294 | if (high == 0) | ||
295 | { | ||
296 | clearBit (bitArray, bitIdx); | ||
297 | } | ||
298 | } | ||
299 | value = ((high << 4) | low); | ||
300 | if (GNUNET_SYSERR == | ||
301 | GNUNET_DISK_file_seek (fh, fileslot, GNUNET_DISK_SEEK_SET)) | ||
302 | { | ||
303 | GNUNET_log_strerror (GNUNET_ERROR_TYPE_ERROR, "seek"); | ||
304 | return; | ||
305 | } | ||
306 | GNUNET_assert (1 == GNUNET_DISK_file_write (fh, &value, 1)); | ||
307 | } | ||
308 | |||
309 | |||
310 | #define BUFFSIZE 65536 | ||
311 | |||
312 | /** | ||
313 | * Creates a file filled with zeroes | ||
314 | * | ||
315 | * @param fh the file handle | ||
316 | * @param size the size of the file | ||
317 | * @return GNUNET_OK if created ok, GNUNET_SYSERR otherwise | ||
318 | */ | ||
319 | static int | ||
320 | make_empty_file (const struct GNUNET_DISK_FileHandle *fh, size_t size) | ||
321 | { | ||
322 | char buffer[BUFFSIZE]; | ||
323 | size_t bytesleft = size; | ||
324 | int res = 0; | ||
325 | |||
326 | if (GNUNET_DISK_handle_invalid (fh)) | ||
327 | return GNUNET_SYSERR; | ||
328 | memset (buffer, 0, sizeof(buffer)); | ||
329 | GNUNET_DISK_file_seek (fh, 0, GNUNET_DISK_SEEK_SET); | ||
330 | while (bytesleft > 0) | ||
331 | { | ||
332 | if (bytesleft > sizeof(buffer)) | ||
333 | { | ||
334 | res = GNUNET_DISK_file_write (fh, buffer, sizeof(buffer)); | ||
335 | if (res >= 0) | ||
336 | bytesleft -= res; | ||
337 | } | ||
338 | else | ||
339 | { | ||
340 | res = GNUNET_DISK_file_write (fh, buffer, bytesleft); | ||
341 | if (res >= 0) | ||
342 | bytesleft -= res; | ||
343 | } | ||
344 | if (GNUNET_SYSERR == res) | ||
345 | return GNUNET_SYSERR; | ||
346 | } | ||
347 | return GNUNET_OK; | ||
348 | } | ||
349 | |||
350 | |||
351 | /* ************** GNUNET_CONTAINER_BloomFilter iterator ********* */ | ||
352 | |||
353 | /** | ||
354 | * Iterator (callback) method to be called by the | ||
355 | * bloomfilter iterator on each bit that is to be | ||
356 | * set or tested for the key. | ||
357 | * | ||
358 | * @param cls closure | ||
359 | * @param bf the filter to manipulate | ||
360 | * @param bit the current bit | ||
361 | * @return #GNUNET_YES to continue, #GNUNET_NO to stop early | ||
362 | */ | ||
363 | typedef int | ||
364 | (*BitIterator) (void *cls, | ||
365 | const struct GNUNET_CONTAINER_BloomFilter *bf, | ||
366 | unsigned int bit); | ||
367 | |||
368 | |||
369 | /** | ||
370 | * Call an iterator for each bit that the bloomfilter | ||
371 | * must test or set for this element. | ||
372 | * | ||
373 | * @param bf the filter | ||
374 | * @param callback the method to call | ||
375 | * @param arg extra argument to callback | ||
376 | * @param key the key for which we iterate over the BF bits | ||
377 | */ | ||
378 | static void | ||
379 | iterateBits (const struct GNUNET_CONTAINER_BloomFilter *bf, | ||
380 | BitIterator callback, | ||
381 | void *arg, | ||
382 | const struct GNUNET_HashCode *key) | ||
383 | { | ||
384 | struct GNUNET_HashCode tmp = *key; | ||
385 | int bitCount; | ||
386 | unsigned int slot = 0; | ||
387 | |||
388 | bitCount = bf->addressesPerElement; | ||
389 | GNUNET_assert (bf->bitArraySize > 0); | ||
390 | GNUNET_assert (bf->bitArraySize * 8LL > bf->bitArraySize); | ||
391 | while (bitCount > 0) | ||
392 | { | ||
393 | while ( (0 != bitCount) && | ||
394 | (slot < (sizeof(struct GNUNET_HashCode) / sizeof(uint32_t))) ) | ||
395 | { | ||
396 | if (GNUNET_YES != | ||
397 | callback (arg, | ||
398 | bf, | ||
399 | ntohl ((((uint32_t *) &tmp)[slot])) | ||
400 | % ((bf->bitArraySize * 8LL)))) | ||
401 | return; | ||
402 | slot++; | ||
403 | bitCount--; | ||
404 | } | ||
405 | if (0 == bitCount) | ||
406 | break; | ||
407 | GNUNET_CRYPTO_hash (&tmp, | ||
408 | sizeof(tmp), | ||
409 | &tmp); | ||
410 | slot = 0; | ||
411 | } | ||
412 | } | ||
413 | |||
414 | |||
415 | /** | ||
416 | * Callback: increment bit | ||
417 | * | ||
418 | * @param cls pointer to writeable form of bf | ||
419 | * @param bf the filter to manipulate | ||
420 | * @param bit the bit to increment | ||
421 | * @return GNUNET_YES | ||
422 | */ | ||
423 | static int | ||
424 | incrementBitCallback (void *cls, | ||
425 | const struct GNUNET_CONTAINER_BloomFilter *bf, | ||
426 | unsigned int bit) | ||
427 | { | ||
428 | struct GNUNET_CONTAINER_BloomFilter *b = cls; | ||
429 | |||
430 | incrementBit (b->bitArray, bit, bf->fh); | ||
431 | return GNUNET_YES; | ||
432 | } | ||
433 | |||
434 | |||
435 | /** | ||
436 | * Callback: decrement bit | ||
437 | * | ||
438 | * @param cls pointer to writeable form of bf | ||
439 | * @param bf the filter to manipulate | ||
440 | * @param bit the bit to decrement | ||
441 | * @return GNUNET_YES | ||
442 | */ | ||
443 | static int | ||
444 | decrementBitCallback (void *cls, | ||
445 | const struct GNUNET_CONTAINER_BloomFilter *bf, | ||
446 | unsigned int bit) | ||
447 | { | ||
448 | struct GNUNET_CONTAINER_BloomFilter *b = cls; | ||
449 | |||
450 | decrementBit (b->bitArray, bit, bf->fh); | ||
451 | return GNUNET_YES; | ||
452 | } | ||
453 | |||
454 | |||
455 | /** | ||
456 | * Callback: test if all bits are set | ||
457 | * | ||
458 | * @param cls pointer set to GNUNET_NO if bit is not set | ||
459 | * @param bf the filter | ||
460 | * @param bit the bit to test | ||
461 | * @return YES if the bit is set, NO if not | ||
462 | */ | ||
463 | static int | ||
464 | testBitCallback (void *cls, | ||
465 | const struct GNUNET_CONTAINER_BloomFilter *bf, | ||
466 | unsigned int bit) | ||
467 | { | ||
468 | int *arg = cls; | ||
469 | |||
470 | if (GNUNET_NO == testBit (bf->bitArray, bit)) | ||
471 | { | ||
472 | *arg = GNUNET_NO; | ||
473 | return GNUNET_NO; | ||
474 | } | ||
475 | return GNUNET_YES; | ||
476 | } | ||
477 | |||
478 | |||
479 | /* *********************** INTERFACE **************** */ | ||
480 | |||
481 | /** | ||
482 | * Load a bloom-filter from a file. | ||
483 | * | ||
484 | * @param filename the name of the file (or the prefix) | ||
485 | * @param size the size of the bloom-filter (number of | ||
486 | * bytes of storage space to use); will be rounded up | ||
487 | * to next power of 2 | ||
488 | * @param k the number of GNUNET_CRYPTO_hash-functions to apply per | ||
489 | * element (number of bits set per element in the set) | ||
490 | * @return the bloomfilter | ||
491 | */ | ||
492 | struct GNUNET_CONTAINER_BloomFilter * | ||
493 | GNUNET_CONTAINER_bloomfilter_load (const char *filename, | ||
494 | size_t size, | ||
495 | unsigned int k) | ||
496 | { | ||
497 | struct GNUNET_CONTAINER_BloomFilter *bf; | ||
498 | char *rbuff; | ||
499 | off_t pos; | ||
500 | int i; | ||
501 | size_t ui; | ||
502 | off_t fsize; | ||
503 | int must_read; | ||
504 | |||
505 | GNUNET_assert (NULL != filename); | ||
506 | if ((k == 0) || (size == 0)) | ||
507 | return NULL; | ||
508 | if (size < BUFFSIZE) | ||
509 | size = BUFFSIZE; | ||
510 | ui = 1; | ||
511 | while ((ui < size) && (ui * 2 > ui)) | ||
512 | ui *= 2; | ||
513 | size = ui; /* make sure it's a power of 2 */ | ||
514 | |||
515 | bf = GNUNET_new (struct GNUNET_CONTAINER_BloomFilter); | ||
516 | /* Try to open a bloomfilter file */ | ||
517 | if (GNUNET_YES == GNUNET_DISK_file_test (filename)) | ||
518 | bf->fh = GNUNET_DISK_file_open (filename, | ||
519 | GNUNET_DISK_OPEN_READWRITE, | ||
520 | GNUNET_DISK_PERM_USER_READ | ||
521 | | GNUNET_DISK_PERM_USER_WRITE); | ||
522 | if (NULL != bf->fh) | ||
523 | { | ||
524 | /* file existed, try to read it! */ | ||
525 | must_read = GNUNET_YES; | ||
526 | if (GNUNET_OK != GNUNET_DISK_file_handle_size (bf->fh, &fsize)) | ||
527 | { | ||
528 | GNUNET_DISK_file_close (bf->fh); | ||
529 | GNUNET_free (bf); | ||
530 | return NULL; | ||
531 | } | ||
532 | if (0 == fsize) | ||
533 | { | ||
534 | /* found existing empty file, just overwrite */ | ||
535 | if (GNUNET_OK != make_empty_file (bf->fh, size * 4LL)) | ||
536 | { | ||
537 | GNUNET_log_strerror (GNUNET_ERROR_TYPE_WARNING, "write"); | ||
538 | GNUNET_DISK_file_close (bf->fh); | ||
539 | GNUNET_free (bf); | ||
540 | return NULL; | ||
541 | } | ||
542 | } | ||
543 | else if (fsize != ((off_t) size) * 4LL) | ||
544 | { | ||
545 | GNUNET_log ( | ||
546 | GNUNET_ERROR_TYPE_ERROR, | ||
547 | _ ( | ||
548 | "Size of file on disk is incorrect for this Bloom filter (want %llu, have %llu)\n"), | ||
549 | (unsigned long long) (size * 4LL), | ||
550 | (unsigned long long) fsize); | ||
551 | GNUNET_DISK_file_close (bf->fh); | ||
552 | GNUNET_free (bf); | ||
553 | return NULL; | ||
554 | } | ||
555 | } | ||
556 | else | ||
557 | { | ||
558 | /* file did not exist, don't read, just create */ | ||
559 | must_read = GNUNET_NO; | ||
560 | bf->fh = GNUNET_DISK_file_open (filename, | ||
561 | GNUNET_DISK_OPEN_CREATE | ||
562 | | GNUNET_DISK_OPEN_READWRITE, | ||
563 | GNUNET_DISK_PERM_USER_READ | ||
564 | | GNUNET_DISK_PERM_USER_WRITE); | ||
565 | if (NULL == bf->fh) | ||
566 | { | ||
567 | GNUNET_free (bf); | ||
568 | return NULL; | ||
569 | } | ||
570 | if (GNUNET_OK != make_empty_file (bf->fh, size * 4LL)) | ||
571 | { | ||
572 | GNUNET_log_strerror (GNUNET_ERROR_TYPE_WARNING, "write"); | ||
573 | GNUNET_DISK_file_close (bf->fh); | ||
574 | GNUNET_free (bf); | ||
575 | return NULL; | ||
576 | } | ||
577 | } | ||
578 | bf->filename = GNUNET_strdup (filename); | ||
579 | /* Alloc block */ | ||
580 | bf->bitArray = GNUNET_malloc_large (size); | ||
581 | if (NULL == bf->bitArray) | ||
582 | { | ||
583 | if (NULL != bf->fh) | ||
584 | GNUNET_DISK_file_close (bf->fh); | ||
585 | GNUNET_free (bf->filename); | ||
586 | GNUNET_free (bf); | ||
587 | return NULL; | ||
588 | } | ||
589 | bf->bitArraySize = size; | ||
590 | bf->addressesPerElement = k; | ||
591 | if (GNUNET_YES != must_read) | ||
592 | return bf; /* already done! */ | ||
593 | /* Read from the file what bits we can */ | ||
594 | rbuff = GNUNET_malloc (BUFFSIZE); | ||
595 | pos = 0; | ||
596 | while (pos < ((off_t) size) * 8LL) | ||
597 | { | ||
598 | int res; | ||
599 | |||
600 | res = GNUNET_DISK_file_read (bf->fh, rbuff, BUFFSIZE); | ||
601 | if (res == -1) | ||
602 | { | ||
603 | LOG_STRERROR_FILE (GNUNET_ERROR_TYPE_WARNING, "read", bf->filename); | ||
604 | GNUNET_free (rbuff); | ||
605 | GNUNET_free (bf->filename); | ||
606 | GNUNET_DISK_file_close (bf->fh); | ||
607 | GNUNET_free (bf); | ||
608 | return NULL; | ||
609 | } | ||
610 | if (res == 0) | ||
611 | break; /* is ok! we just did not use that many bits yet */ | ||
612 | for (i = 0; i < res; i++) | ||
613 | { | ||
614 | if ((rbuff[i] & 0x0F) != 0) | ||
615 | setBit (bf->bitArray, pos + i * 2); | ||
616 | if ((rbuff[i] & 0xF0) != 0) | ||
617 | setBit (bf->bitArray, pos + i * 2 + 1); | ||
618 | } | ||
619 | if (res < BUFFSIZE) | ||
620 | break; | ||
621 | pos += BUFFSIZE * 2; /* 2 bits per byte in the buffer */ | ||
622 | } | ||
623 | GNUNET_free (rbuff); | ||
624 | return bf; | ||
625 | } | ||
626 | |||
627 | |||
628 | /** | ||
629 | * Create a bloom filter from raw bits. | ||
630 | * | ||
631 | * @param data the raw bits in memory (maybe NULL, | ||
632 | * in which case all bits should be considered | ||
633 | * to be zero). | ||
634 | * @param size the size of the bloom-filter (number of | ||
635 | * bytes of storage space to use); also size of data | ||
636 | * -- unless data is NULL | ||
637 | * @param k the number of GNUNET_CRYPTO_hash-functions to apply per | ||
638 | * element (number of bits set per element in the set) | ||
639 | * @return the bloomfilter | ||
640 | */ | ||
641 | struct GNUNET_CONTAINER_BloomFilter * | ||
642 | GNUNET_CONTAINER_bloomfilter_init (const char *data, | ||
643 | size_t size, | ||
644 | unsigned int k) | ||
645 | { | ||
646 | struct GNUNET_CONTAINER_BloomFilter *bf; | ||
647 | |||
648 | if ((0 == k) || (0 == size)) | ||
649 | return NULL; | ||
650 | bf = GNUNET_new (struct GNUNET_CONTAINER_BloomFilter); | ||
651 | bf->filename = NULL; | ||
652 | bf->fh = NULL; | ||
653 | bf->bitArray = GNUNET_malloc_large (size); | ||
654 | if (NULL == bf->bitArray) | ||
655 | { | ||
656 | GNUNET_free (bf); | ||
657 | return NULL; | ||
658 | } | ||
659 | bf->bitArraySize = size; | ||
660 | bf->addressesPerElement = k; | ||
661 | if (NULL != data) | ||
662 | GNUNET_memcpy (bf->bitArray, data, size); | ||
663 | return bf; | ||
664 | } | ||
665 | |||
666 | |||
667 | /** | ||
668 | * Copy the raw data of this bloomfilter into | ||
669 | * the given data array. | ||
670 | * | ||
671 | * @param bf bloomfilter to take the raw data from | ||
672 | * @param data where to write the data | ||
673 | * @param size the size of the given data array | ||
674 | * @return #GNUNET_SYSERR if the data array is not big enough | ||
675 | */ | ||
676 | int | ||
677 | GNUNET_CONTAINER_bloomfilter_get_raw_data ( | ||
678 | const struct GNUNET_CONTAINER_BloomFilter *bf, | ||
679 | char *data, | ||
680 | size_t size) | ||
681 | { | ||
682 | if (NULL == bf) | ||
683 | return GNUNET_SYSERR; | ||
684 | if (bf->bitArraySize != size) | ||
685 | return GNUNET_SYSERR; | ||
686 | GNUNET_memcpy (data, bf->bitArray, size); | ||
687 | return GNUNET_OK; | ||
688 | } | ||
689 | |||
690 | |||
691 | /** | ||
692 | * Free the space associated with a filter | ||
693 | * in memory, flush to drive if needed (do not | ||
694 | * free the space on the drive) | ||
695 | * | ||
696 | * @param bf the filter | ||
697 | */ | ||
698 | void | ||
699 | GNUNET_CONTAINER_bloomfilter_free (struct GNUNET_CONTAINER_BloomFilter *bf) | ||
700 | { | ||
701 | if (NULL == bf) | ||
702 | return; | ||
703 | if (bf->fh != NULL) | ||
704 | GNUNET_DISK_file_close (bf->fh); | ||
705 | GNUNET_free (bf->filename); | ||
706 | GNUNET_free (bf->bitArray); | ||
707 | GNUNET_free (bf); | ||
708 | } | ||
709 | |||
710 | |||
711 | /** | ||
712 | * Reset a bloom filter to empty. Clears the file on disk. | ||
713 | * | ||
714 | * @param bf the filter | ||
715 | */ | ||
716 | void | ||
717 | GNUNET_CONTAINER_bloomfilter_clear (struct GNUNET_CONTAINER_BloomFilter *bf) | ||
718 | { | ||
719 | if (NULL == bf) | ||
720 | return; | ||
721 | |||
722 | memset (bf->bitArray, 0, bf->bitArraySize); | ||
723 | if (bf->filename != NULL) | ||
724 | make_empty_file (bf->fh, bf->bitArraySize * 4LL); | ||
725 | } | ||
726 | |||
727 | |||
728 | /** | ||
729 | * Test if an element is in the filter. | ||
730 | * | ||
731 | * @param e the element | ||
732 | * @param bf the filter | ||
733 | * @return #GNUNET_YES if the element is in the filter, #GNUNET_NO if not | ||
734 | */ | ||
735 | int | ||
736 | GNUNET_CONTAINER_bloomfilter_test ( | ||
737 | const struct GNUNET_CONTAINER_BloomFilter *bf, | ||
738 | const struct GNUNET_HashCode *e) | ||
739 | { | ||
740 | int res; | ||
741 | |||
742 | if (NULL == bf) | ||
743 | return GNUNET_YES; | ||
744 | res = GNUNET_YES; | ||
745 | iterateBits (bf, &testBitCallback, &res, e); | ||
746 | return res; | ||
747 | } | ||
748 | |||
749 | |||
750 | /** | ||
751 | * Add an element to the filter | ||
752 | * | ||
753 | * @param bf the filter | ||
754 | * @param e the element | ||
755 | */ | ||
756 | void | ||
757 | GNUNET_CONTAINER_bloomfilter_add (struct GNUNET_CONTAINER_BloomFilter *bf, | ||
758 | const struct GNUNET_HashCode *e) | ||
759 | { | ||
760 | if (NULL == bf) | ||
761 | return; | ||
762 | iterateBits (bf, &incrementBitCallback, bf, e); | ||
763 | } | ||
764 | |||
765 | |||
766 | /** | ||
767 | * Or the entries of the given raw data array with the | ||
768 | * data of the given bloom filter. Assumes that | ||
769 | * the size of the data array and the current filter | ||
770 | * match. | ||
771 | * | ||
772 | * @param bf the filter | ||
773 | * @param data the data to or-in | ||
774 | * @param size number of bytes in data | ||
775 | */ | ||
776 | int | ||
777 | GNUNET_CONTAINER_bloomfilter_or (struct GNUNET_CONTAINER_BloomFilter *bf, | ||
778 | const char *data, | ||
779 | size_t size) | ||
780 | { | ||
781 | unsigned int i; | ||
782 | unsigned int n; | ||
783 | unsigned long long *fc; | ||
784 | const unsigned long long *dc; | ||
785 | |||
786 | if (NULL == bf) | ||
787 | return GNUNET_YES; | ||
788 | if (bf->bitArraySize != size) | ||
789 | return GNUNET_SYSERR; | ||
790 | fc = (unsigned long long *) bf->bitArray; | ||
791 | dc = (const unsigned long long *) data; | ||
792 | n = size / sizeof(unsigned long long); | ||
793 | |||
794 | for (i = 0; i < n; i++) | ||
795 | fc[i] |= dc[i]; | ||
796 | for (i = n * sizeof(unsigned long long); i < size; i++) | ||
797 | bf->bitArray[i] |= data[i]; | ||
798 | return GNUNET_OK; | ||
799 | } | ||
800 | |||
801 | |||
802 | /** | ||
803 | * Or the entries of the given raw data array with the | ||
804 | * data of the given bloom filter. Assumes that | ||
805 | * the size of the data array and the current filter | ||
806 | * match. | ||
807 | * | ||
808 | * @param bf the filter | ||
809 | * @param to_or the bloomfilter to or-in | ||
810 | * @return #GNUNET_OK on success | ||
811 | */ | ||
812 | int | ||
813 | GNUNET_CONTAINER_bloomfilter_or2 ( | ||
814 | struct GNUNET_CONTAINER_BloomFilter *bf, | ||
815 | const struct GNUNET_CONTAINER_BloomFilter *to_or) | ||
816 | { | ||
817 | unsigned int i; | ||
818 | unsigned int n; | ||
819 | unsigned long long *fc; | ||
820 | const unsigned long long *dc; | ||
821 | size_t size; | ||
822 | |||
823 | if (NULL == bf) | ||
824 | return GNUNET_OK; | ||
825 | if (bf->bitArraySize != to_or->bitArraySize) | ||
826 | { | ||
827 | GNUNET_break (0); | ||
828 | return GNUNET_SYSERR; | ||
829 | } | ||
830 | size = bf->bitArraySize; | ||
831 | fc = (unsigned long long *) bf->bitArray; | ||
832 | dc = (const unsigned long long *) to_or->bitArray; | ||
833 | n = size / sizeof(unsigned long long); | ||
834 | |||
835 | for (i = 0; i < n; i++) | ||
836 | fc[i] |= dc[i]; | ||
837 | for (i = n * sizeof(unsigned long long); i < size; i++) | ||
838 | bf->bitArray[i] |= to_or->bitArray[i]; | ||
839 | return GNUNET_OK; | ||
840 | } | ||
841 | |||
842 | |||
843 | /** | ||
844 | * Remove an element from the filter. | ||
845 | * | ||
846 | * @param bf the filter | ||
847 | * @param e the element to remove | ||
848 | */ | ||
849 | void | ||
850 | GNUNET_CONTAINER_bloomfilter_remove (struct GNUNET_CONTAINER_BloomFilter *bf, | ||
851 | const struct GNUNET_HashCode *e) | ||
852 | { | ||
853 | if (NULL == bf) | ||
854 | return; | ||
855 | if (NULL == bf->filename) | ||
856 | return; | ||
857 | iterateBits (bf, &decrementBitCallback, bf, e); | ||
858 | } | ||
859 | |||
860 | |||
861 | /** | ||
862 | * Resize a bloom filter. Note that this operation | ||
863 | * is pretty costly. Essentially, the bloom filter | ||
864 | * needs to be completely re-build. | ||
865 | * | ||
866 | * @param bf the filter | ||
867 | * @param iterator an iterator over all elements stored in the BF | ||
868 | * @param iterator_cls argument to the iterator function | ||
869 | * @param size the new size for the filter | ||
870 | * @param k the new number of GNUNET_CRYPTO_hash-function to apply per element | ||
871 | */ | ||
872 | void | ||
873 | GNUNET_CONTAINER_bloomfilter_resize (struct GNUNET_CONTAINER_BloomFilter *bf, | ||
874 | GNUNET_CONTAINER_HashCodeIterator iterator, | ||
875 | void *iterator_cls, | ||
876 | size_t size, | ||
877 | unsigned int k) | ||
878 | { | ||
879 | struct GNUNET_HashCode hc; | ||
880 | unsigned int i; | ||
881 | |||
882 | GNUNET_free (bf->bitArray); | ||
883 | i = 1; | ||
884 | while (i < size) | ||
885 | i *= 2; | ||
886 | size = i; /* make sure it's a power of 2 */ | ||
887 | bf->addressesPerElement = k; | ||
888 | bf->bitArraySize = size; | ||
889 | bf->bitArray = GNUNET_malloc (size); | ||
890 | if (NULL != bf->filename) | ||
891 | make_empty_file (bf->fh, bf->bitArraySize * 4LL); | ||
892 | while (GNUNET_YES == iterator (iterator_cls, &hc)) | ||
893 | GNUNET_CONTAINER_bloomfilter_add (bf, &hc); | ||
894 | } | ||
895 | |||
896 | |||
897 | /* end of container_bloomfilter.c */ | ||