diff options
Diffstat (limited to 'src/setu/ibf_sim.c')
-rw-r--r-- | src/setu/ibf_sim.c | 142 |
1 files changed, 142 insertions, 0 deletions
diff --git a/src/setu/ibf_sim.c b/src/setu/ibf_sim.c new file mode 100644 index 000000000..6415d00e1 --- /dev/null +++ b/src/setu/ibf_sim.c | |||
@@ -0,0 +1,142 @@ | |||
1 | /* | ||
2 | This file is part of GNUnet | ||
3 | Copyright (C) 2013 GNUnet e.V. | ||
4 | |||
5 | GNUnet is free software: you can redistribute it and/or modify it | ||
6 | under the terms of the GNU Affero General Public License as published | ||
7 | by the Free Software Foundation, either version 3 of the License, | ||
8 | or (at your option) any later version. | ||
9 | |||
10 | GNUnet is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | Affero General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU Affero General Public License | ||
16 | along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | |||
18 | SPDX-License-Identifier: AGPL3.0-or-later | ||
19 | */ | ||
20 | |||
21 | /** | ||
22 | * @file set/ibf_sim.c | ||
23 | * @brief implementation of simulation for invertible bloom filter | ||
24 | * @author Florian Dold | ||
25 | * | ||
26 | * This code was used for some internal experiments, it is not | ||
27 | * build or shipped as part of the GNUnet system. | ||
28 | */ | ||
29 | #include <stdlib.h> | ||
30 | #include <stdio.h> | ||
31 | #include <string.h> | ||
32 | |||
33 | #define MAX_IBF_DECODE 16 | ||
34 | |||
35 | /* report average over how many rounds? */ | ||
36 | #define ROUNDS 100000 | ||
37 | |||
38 | /* enable one of the three below */ | ||
39 | // simple fix | ||
40 | #define FIX1 0 | ||
41 | // possibly slightly better fix for large IBF_DECODE values | ||
42 | #define FIX2 1 | ||
43 | |||
44 | // SIGCOMM algorithm | ||
45 | #define STRATA 0 | ||
46 | |||
47 | // print each value? | ||
48 | #define VERBOSE 0 | ||
49 | // avoid assembly? (ASM is about 50% faster) | ||
50 | #define SLOW 0 | ||
51 | |||
52 | int | ||
53 | main (int argc, char **argv) | ||
54 | { | ||
55 | unsigned int round; | ||
56 | unsigned int buckets[31]; // max is 2^31 as 'random' returns only between 0 and 2^31 | ||
57 | unsigned int i; | ||
58 | int j; | ||
59 | unsigned int r; | ||
60 | unsigned int ret; | ||
61 | unsigned long long total; | ||
62 | unsigned int want; | ||
63 | double predict; | ||
64 | |||
65 | srandom (time (NULL)); | ||
66 | total = 0; | ||
67 | want = atoi (argv[1]); | ||
68 | for (round = 0; round < ROUNDS; round++) | ||
69 | { | ||
70 | memset (buckets, 0, sizeof(buckets)); | ||
71 | for (i = 0; i < want; i++) | ||
72 | { | ||
73 | /* FIXME: might want to use 'better' PRNG to avoid | ||
74 | PRNG-induced biases */ | ||
75 | r = random (); | ||
76 | if (0 == r) | ||
77 | continue; | ||
78 | #if SLOW | ||
79 | for (j = 0; (j < 31) && (0 == (r & (1 << j))); j++) | ||
80 | ; | ||
81 | #else | ||
82 | /* use assembly / gcc */ | ||
83 | j = __builtin_ffs (r) - 1; | ||
84 | #endif | ||
85 | buckets[j]++; | ||
86 | } | ||
87 | ret = 0; | ||
88 | predict = 0.0; | ||
89 | for (j = 31; j >= 0; j--) | ||
90 | { | ||
91 | #if FIX1 | ||
92 | /* improved algorithm, for 1000 elements with IBF-DECODE 8, I | ||
93 | get 990/1000 elements on average over 1 million runs; key | ||
94 | idea being to stop short of the 'last' possible IBF as | ||
95 | otherwise a "lowball" per-chance would unduely influence the | ||
96 | result */if ((j > 0) && | ||
97 | (buckets[j - 1] > MAX_IBF_DECODE)) | ||
98 | { | ||
99 | ret *= (1 << (j + 1)); | ||
100 | break; | ||
101 | } | ||
102 | #endif | ||
103 | #if FIX2 | ||
104 | /* another improvement: don't just always cut off the last one, | ||
105 | but rather try to predict based on all previous values where | ||
106 | that "last" one is; additional prediction can only really | ||
107 | work if MAX_IBF_DECODE is sufficiently high */ | ||
108 | if ((j > 0) && | ||
109 | ((buckets[j - 1] > MAX_IBF_DECODE) || | ||
110 | (predict > MAX_IBF_DECODE))) | ||
111 | { | ||
112 | ret *= (1 << (j + 1)); | ||
113 | break; | ||
114 | } | ||
115 | #endif | ||
116 | #if STRATA | ||
117 | /* original algorithm, for 1000 elements with IBF-DECODE 8, | ||
118 | I get 920/1000 elements on average over 1 million runs */ | ||
119 | if (buckets[j] > MAX_IBF_DECODE) | ||
120 | { | ||
121 | ret *= (1 << (j + 1)); | ||
122 | break; | ||
123 | } | ||
124 | #endif | ||
125 | ret += buckets[j]; | ||
126 | predict = (buckets[j] + 2.0 * predict) / 2.0; | ||
127 | } | ||
128 | #if VERBOSE | ||
129 | fprintf (stderr, "%u ", ret); | ||
130 | #endif | ||
131 | total += ret; | ||
132 | } | ||
133 | fprintf (stderr, "\n"); | ||
134 | fprintf (stdout, "average %llu\n", total / ROUNDS); | ||
135 | return 0; | ||
136 | } | ||
137 | |||
138 | |||
139 | /* TODO: should calculate stddev of the results to also be able to | ||
140 | say something about the stability of the results, outside of | ||
141 | large-scale averages -- gaining 8% precision at the expense of | ||
142 | 50% additional variance might not be worth it... */ | ||