/* This file is part of GNUnet Copyright (C) 2013 GNUnet e.V. GNUnet is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. GNUnet is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see . SPDX-License-Identifier: AGPL3.0-or-later */ /** * @file set/ibf_sim.c * @brief implementation of simulation for invertible bloom filter * @author Florian Dold * * This code was used for some internal experiments, it is not * build or shipped as part of the GNUnet system. */ #include #include #include #define MAX_IBF_DECODE 16 /* report average over how many rounds? */ #define ROUNDS 100000 /* enable one of the three below */ // simple fix #define FIX1 0 // possibly slightly better fix for large IBF_DECODE values #define FIX2 1 // SIGCOMM algorithm #define STRATA 0 // print each value? #define VERBOSE 0 // avoid assembly? (ASM is about 50% faster) #define SLOW 0 int main (int argc, char **argv) { unsigned int round; unsigned int buckets[31]; // max is 2^31 as 'random' returns only between 0 and 2^31 unsigned int i; int j; unsigned int r; unsigned int ret; unsigned long long total; unsigned int want; double predict; srandom (time (NULL)); total = 0; want = atoi (argv[1]); for (round = 0; round < ROUNDS; round++) { memset (buckets, 0, sizeof(buckets)); for (i = 0; i < want; i++) { /* FIXME: might want to use 'better' PRNG to avoid PRNG-induced biases */ r = random (); if (0 == r) continue; #if SLOW for (j = 0; (j < 31) && (0 == (r & (1 << j))); j++) ; #else /* use assembly / gcc */ j = __builtin_ffs (r) - 1; #endif buckets[j]++; } ret = 0; predict = 0.0; for (j = 31; j >= 0; j--) { #if FIX1 /* improved algorithm, for 1000 elements with IBF-DECODE 8, I get 990/1000 elements on average over 1 million runs; key idea being to stop short of the 'last' possible IBF as otherwise a "lowball" per-chance would unduely influence the result */if ((j > 0) && (buckets[j - 1] > MAX_IBF_DECODE)) { ret *= (1 << (j + 1)); break; } #endif #if FIX2 /* another improvement: don't just always cut off the last one, but rather try to predict based on all previous values where that "last" one is; additional prediction can only really work if MAX_IBF_DECODE is sufficiently high */ if ((j > 0) && ((buckets[j - 1] > MAX_IBF_DECODE) || (predict > MAX_IBF_DECODE))) { ret *= (1 << (j + 1)); break; } #endif #if STRATA /* original algorithm, for 1000 elements with IBF-DECODE 8, I get 920/1000 elements on average over 1 million runs */ if (buckets[j] > MAX_IBF_DECODE) { ret *= (1 << (j + 1)); break; } #endif ret += buckets[j]; predict = (buckets[j] + 2.0 * predict) / 2.0; } #if VERBOSE fprintf (stderr, "%u ", ret); #endif total += ret; } fprintf (stderr, "\n"); fprintf (stdout, "average %llu\n", total / ROUNDS); return 0; } /* TODO: should calculate stddev of the results to also be able to say something about the stability of the results, outside of large-scale averages -- gaining 8% precision at the expense of 50% additional variance might not be worth it... */