Added some text - lsd0003 - LSD0003: Set Union

commit a3d1a875aa5114ae4fbe192a302dbcbddcc30e0c
parent 7254645f5040df95cccc84d8f0350cd88be22845
Author: Elias Summermatter <elias.summermatter@seccom.ch>
Date:   Thu,  3 Dec 2020 11:50:04 +0100

Added some text

Diffstat:
M draft-summermatter-set-union.xml  | 120 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------

1 file changed, 104 insertions(+), 16 deletions(-)
diff --git a/draft-summermatter-set-union.xml b/draft-summermatter-set-union.xml
@@ -91,16 +91,113 @@
             </t>
         </section>
 
+        <section anchor="ibv" numbered="true" toc="default">
+        <name>Invertible Bloom Filter</name>
+            <section anchor="ibv_description" numbered="true" toc="default">
+                <name>Description</name>
+                <t>
+                    A Invertible Bloom Filter (IBF) is a probabilistic data structure that allows to determinate efficiently
+                    but not with absolut certainty that a given element is presence or absence in a set.
+                    The IBF knows tree basic operations add element, remove element and decode.
+
+                    IBF are useful when there is the need to check a set of elements for the presence or absence of some
+                    elements in a big set efficiently.
+                </t>
+            </section>
+            <section anchor="ibv_format" numbered="true" toc="default">
+                <name>Format</name>
+                <t>
+                    The storage format of an IBF is a "two-component data structure" that stores a value
+                    and a count in a bucket. For every bit of the defined output length of the used hash functions
+                    there is a value and a count stored.
+                    The count component is increased by one if on the given bit has been hit by the
+                    Add operation and is decreased by one if an bit has been hit by the delete operation.
+                    If the bucket is pure (counter = 1) the value contains the hash value of an element otherwise in the
+                    value field is empty (counter= 0) or a XOR product of all previously written hashes of elements
+                    (counter > 1)
+                    ### SOME GRAPHIC OF THE FORMAT ###
+                </t>
+            </section>
+            <section anchor="ibv_operations" numbered="true" toc="default">
+                <name>Operations</name>
+                <section anchor="ibv_operations_insert" numbered="true" toc="default">
+                    <name>Insert Element</name>
+                    <t>
+                        To add an element to a IBF the element is hashed with different hash functions the output of
+                        the hash functions is mapped on the two-component data structure. At the
+                        bit positions with a one of the hash output the counter is increased by one and the value
+                        field is updated with the XOR product of the new hash and the previously stored value.
+                    </t>
+                </section>
+                <section anchor="ibv_operations_remove" numbered="true" toc="default">
+                    <name>Remove Element</name>
+                    <t>
+                        To remove an element from the IBF the element is hashed with the same hash functions that it was
+                        hashed to insert and at the bit positions where the hashes result in a one the counter in the
+                        two-component data structure is reduced by one and the resulting hash is XORed with the value field
+                        and is written to the value field again.
+                    </t>
+                </section>
+                <section anchor="ibv_operations_decode" numbered="true" toc="default">
+                    <name>Decode IBF</name>
+                    <t>
+                        To decode a IBF there are pure buckets needed, pure buckets are buckets where which contain only
+                        one element (counter = 1). If there is no pure element in the IBF does not decode and there is the
+                        need to choose a bigger IBF or different hash function to create the IBF.
+                        If there are pure buckets its possible to decode the IBF by removing elements as described
+                        in the section "Remove Element" from the pure buckets from the filter creating new pure buckets
+                        until the IBF is completely empty and all elements have been decoded.
+                    </t>
+                    <t>
+                        In case there are two different sets for example from two different clients (A and B). The two
+                        sets can easily be compared by XORing the IBF of client A with the IBF of client B which results in
+                        a new IBF A/B. The IBF A/B can then be decoded as described above. The IBF A/B can have two types
+                        of pure buckets with counter set to 1 or -1. If the counter is set to 1 the element is missing
+                        in the set of client B and if the counter is set to -1 the element is missing in the set of
+                        client A.
+                    </t>
+                </section>
+            </section>
+        </section>
+
+        <section anchor="se" numbered="true" toc="default">
+            <name>Strata Estimator</name>
+            <section anchor="se_description" numbered="true" toc="default">
+                <name>Description</name>
+                <t>
+                    Strata Estimators should help to estimate the difference between two different set of elements.
+                    That is necessary to efficiently determinate the needed size of an IBF.
+                </t>
+                <t>
+                    Basically an Strata Estimator (SE) is an IBF with the difference that only a certain share of
+                    the elements is added to to the SE. The size of the share is described by the metric stratum which
+                    means "Stratum n" contains 1/(2^n) (stratum factor) of all elements.
+                </t>
+                <t>
+                    The trick is to decode the SE with the biggest stratum first and calculate the difference of
+                    the set if the SE decodes successfully decode the next smaller strata estimator repeat
+                    this until the decoding of the SE fails or Stratum 0 is reached. Then its possible to estimate
+                    how big the difference between two sets is by calculating the count of the
+                    decoded elements divided by the stratum factor. If no of the SE decoded choose a smaller stratum or
+                    try a other hash function.
+                </t>
+            </section>
+        </section>
+
+
         <section anchor="modeofoperation" numbered="true" toc="default">
             <name>Mode of operation</name>
             <section anchor="modeofoperation_full-sync-client-with-bigger-set" numbered="true" toc="default">
                 <name>Full sync mode</name>
+                <t>--- TEXT HERE ---</t>
             </section>
             <section anchor="modeofoperation_individual-elements" numbered="true" toc="default">
                 <name>Individual element sync mode</name>
+                <t>--- TEXT HERE ---</t>
             </section>
             <section anchor="modeofoperation_combined-mode" numbered="true" toc="default">
                 <name>Combined mode</name>
+                <t>--- TEXT HERE ---</t>
             </section>
         </section>
 
@@ -229,22 +326,6 @@
             </section>
         </section>
 
-        <section anchor="states" numbered="true" toc="default">
-            <name>States</name>
-            <section anchor="state_expect-se" numbered="true" toc="default">
-                <name>Expect Strata Estimator</name>
-                <section anchor="state_expect-se_description" numbered="true" toc="default">
-                    <name>Description</name>
-                    <t>
-                        Some description what this state does
-                    </t>
-                </section>
-                <section anchor="state_expect-se_supported-messages" numbered="true" toc="default">
-                    <name>Supported/Unsupported Messages</name>
-                </section>
-            </section>
-        </section>
-
         <section anchor="security" numbered="true" toc="default">
             <name>Security Considerations</name>
             <section anchor="security_crypto" numbered="true" toc="default">
@@ -255,6 +336,13 @@
             </section>
         </section>
 
+        <section anchor="performance" numbered="true" toc="default">
+            <name>Performance</name>
+            <t>
+                --- TEXT HERE ---
+            </t>
+        </section>
+
         <section anchor="gana" numbered="true" toc="default">
             <name>GANA Considerations</name>
             <t>

	lsd0003 LSD0003: Set Union
	Log \| Files \| Refs \| README