diff options
author | Christian Grothoff <christian@grothoff.org> | 2015-10-24 13:37:48 +0000 |
---|---|---|
committer | Christian Grothoff <christian@grothoff.org> | 2015-10-24 13:37:48 +0000 |
commit | 2c8b0facf033e0e8b6704795de6ac13b4ffd7a07 (patch) | |
tree | c98a152ae8ea5b7ee3227284e18ec2e2dfee10dc | |
parent | 1eae99cc19c77db34811d235cbf7d235526dc523 (diff) | |
download | gnunet-2c8b0facf033e0e8b6704795de6ac13b4ffd7a07.tar.gz gnunet-2c8b0facf033e0e8b6704795de6ac13b4ffd7a07.zip |
-adding Florian's IBF sim code
-rw-r--r-- | src/set/ibf_sim.c | 141 |
1 files changed, 141 insertions, 0 deletions
diff --git a/src/set/ibf_sim.c b/src/set/ibf_sim.c new file mode 100644 index 000000000..61c98dc52 --- /dev/null +++ b/src/set/ibf_sim.c | |||
@@ -0,0 +1,141 @@ | |||
1 | /* | ||
2 | This file is part of GNUnet | ||
3 | Copyright (C) 2013 Christian Grothoff (and other contributing authors) | ||
4 | |||
5 | GNUnet is free software; you can redistribute it and/or modify | ||
6 | it under the terms of the GNU General Public License as published | ||
7 | by the Free Software Foundation; either version 3, or (at your | ||
8 | option) any later version. | ||
9 | |||
10 | GNUnet is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU General Public License | ||
16 | along with GNUnet; see the file COPYING. If not, write to the | ||
17 | Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, | ||
18 | Boston, MA 02110-1301, USA. | ||
19 | */ | ||
20 | |||
21 | /** | ||
22 | * @file set/ibf_sim.c | ||
23 | * @brief implementation of simulation for invertible bloom filter | ||
24 | * @author Florian Dold | ||
25 | * | ||
26 | * This code was used for some internal experiments, it is not | ||
27 | * build or shipped as part of the GNUnet system. | ||
28 | */ | ||
29 | #include <stdlib.h> | ||
30 | #include <stdio.h> | ||
31 | #include <string.h> | ||
32 | |||
33 | #define MAX_IBF_DECODE 16 | ||
34 | |||
35 | /* report average over how many rounds? */ | ||
36 | #define ROUNDS 100000 | ||
37 | |||
38 | /* enable one of the three below */ | ||
39 | // simple fix | ||
40 | #define FIX1 0 | ||
41 | // possibly slightly better fix for large IBF_DECODE values | ||
42 | #define FIX2 1 | ||
43 | |||
44 | // SIGCOMM algorithm | ||
45 | #define STRATA 0 | ||
46 | |||
47 | // print each value? | ||
48 | #define VERBOSE 0 | ||
49 | // avoid assembly? (ASM is about 50% faster) | ||
50 | #define SLOW 0 | ||
51 | |||
52 | int | ||
53 | main(int argc, char **argv) | ||
54 | { | ||
55 | unsigned int round; | ||
56 | unsigned int buckets[31]; // max is 2^31 as 'random' returns only between 0 and 2^31 | ||
57 | unsigned int i; | ||
58 | int j; | ||
59 | unsigned int r; | ||
60 | unsigned int ret; | ||
61 | unsigned long long total; | ||
62 | unsigned int want; | ||
63 | double predict; | ||
64 | |||
65 | srandom (time (NULL)); | ||
66 | total = 0; | ||
67 | want = atoi (argv[1]); | ||
68 | for (round=0;round<ROUNDS;round++) | ||
69 | { | ||
70 | memset (buckets, 0, sizeof (buckets)); | ||
71 | for (i=0;i<want;i++) | ||
72 | { | ||
73 | /* FIXME: might want to use 'better' PRNG to avoid | ||
74 | PRNG-induced biases */ | ||
75 | r = random (); | ||
76 | if (0 == r) | ||
77 | continue; | ||
78 | #if SLOW | ||
79 | for (j=0;(j < 31) && (0 == (r & (1 << j)));j++) ; | ||
80 | #else | ||
81 | /* use assembly / gcc */ | ||
82 | j = __builtin_ffs (r) - 1; | ||
83 | #endif | ||
84 | buckets[j]++; | ||
85 | } | ||
86 | ret = 0; | ||
87 | predict = 0.0; | ||
88 | for (j=31;j >= 0; j--) | ||
89 | { | ||
90 | #if FIX1 | ||
91 | /* improved algorithm, for 1000 elements with IBF-DECODE 8, I | ||
92 | get 990/1000 elements on average over 1 million runs; key | ||
93 | idea being to stop short of the 'last' possible IBF as | ||
94 | otherwise a "lowball" per-chance would unduely influence the | ||
95 | result */ | ||
96 | if ( (j > 0) && | ||
97 | (buckets[j - 1] > MAX_IBF_DECODE) ) | ||
98 | { | ||
99 | ret *= (1 << (j + 1)); | ||
100 | break; | ||
101 | } | ||
102 | #endif | ||
103 | #if FIX2 | ||
104 | /* another improvement: don't just always cut off the last one, | ||
105 | but rather try to predict based on all previous values where | ||
106 | that "last" one is; additional prediction can only really | ||
107 | work if MAX_IBF_DECODE is sufficiently high */ | ||
108 | if ( (j > 0) && | ||
109 | ( (buckets[j - 1] > MAX_IBF_DECODE) || | ||
110 | (predict > MAX_IBF_DECODE) ) ) | ||
111 | { | ||
112 | ret *= (1 << (j + 1)); | ||
113 | break; | ||
114 | } | ||
115 | #endif | ||
116 | #if STRATA | ||
117 | /* original algorithm, for 1000 elements with IBF-DECODE 8, | ||
118 | I get 920/1000 elements on average over 1 million runs */ | ||
119 | if (buckets[j] > MAX_IBF_DECODE) | ||
120 | { | ||
121 | ret *= (1 << (j+1)); | ||
122 | break; | ||
123 | } | ||
124 | #endif | ||
125 | ret += buckets[j]; | ||
126 | predict = (buckets[j] + 2.0 * predict) / 2.0; | ||
127 | } | ||
128 | #if VERBOSE | ||
129 | fprintf (stderr, "%u ", ret); | ||
130 | #endif | ||
131 | total += ret; | ||
132 | } | ||
133 | fprintf (stderr, "\n"); | ||
134 | fprintf (stdout, "average %llu\n", total / ROUNDS); | ||
135 | return 0; | ||
136 | } | ||
137 | |||
138 | /* TODO: should calculate stddev of the results to also be able to | ||
139 | say something about the stability of the results, outside of | ||
140 | large-scale averages -- gaining 8% precision at the expense of | ||
141 | 50% additional variance might not be worth it... */ | ||