Added some code to the security/perfomance section - lsd0003

commit e97391fb44c69274ba2c2e6ac276383f20779e24
parent 413fcd9a22b4f6ff01bc1a315c7003d2d836bb50
Author: Elias Summermatter <elias.summermatter@seccom.ch>
Date:   Sun, 14 Mar 2021 16:49:41 +0100

Added some code to the security/perfomance section

Diffstat:
M draft-summermatter-set-union.xml  | 328 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----

1 file changed, 313 insertions(+), 15 deletions(-)
diff --git a/draft-summermatter-set-union.xml b/draft-summermatter-set-union.xml
@@ -109,7 +109,7 @@
               suitable for a wide range of applicaitons. As a result,
               the internal structure of the elements in the sets must
               be defined and verified by the application using the
-              protocol.  This document thus does not cover the elemtn
+              protocol.  This document thus does not cover the element
               structure, except for imposing a limit on the maximum
               size of an element.
             </t>
@@ -1818,12 +1818,100 @@ FUNCTION get_bucket_id (key, number_of_buckets_per_element, ibf_size)
                     </dl>
                 </section>
             </section>
-
         </section>
 
 
         <section anchor="performance" numbered="true" toc="default">
             <name>Performance Considerations</name>
+            <section anchor="performance_formulas" numbered="true" toc="default">
+                <name>Formulas</name>
+                <section anchor="performance_formulas_operationmode" numbered="true" toc="default">
+                    <name>Operation Mode</name>
+                    <t>
+                        The decision which mode of operations is used is described by the following code:
+                    </t>
+                    <t>
+                        The function takes as input the initial the local setsize, the remote setsize, the
+                        by the strata estimator calculated difference, a static boolean that enforces full
+                        synchronisation mode of operation and the bandwith/roundtrips tradeoff.
+                        As output the function returns "FULL" if the full synchronisation
+                        mode should be used and "DIFFERENTIAL" if the differential mode should be used.
+                    </t>
+                    <figure anchor="performance_formulas_operationmode_code">
+                        <artwork name="" type="" align="left" alt=""><![CDATA[
+
+# INPUTS:
+# initial_local_setsize: The initial local setsize
+# remote_setsize: The remote setsize
+# set_diff: the set difference calculated by the strata estimator
+# force_full: boolean to enforce FULL
+# ba_rtt_tradeoff: the tradeoff between round trips and bandwidth defined by the use case
+# OUTPUTS:
+# returns: the decision (FULL or DIFFERENTIAL)
+
+FUNCTION decide_operation_mode(initial_local_setsize, remote_setsize, set_diff, force_full, ba_rtt_tradeoff)
+    IF set_diff > 200
+        set_diff = set_diff * 3 / 2
+    ENDIF
+    IF force_full || ( set_diff > initial_local_setsize / 4 ) || remote_setsize = 0
+        return "FULL"
+    ENDIF
+    return "DIFFERENTIAL"
+
+                                    ]]></artwork>
+                    </figure>
+                </section>
+                <section anchor="performance_formulas_full_sending_dec_first_send" numbered="true" toc="default">
+                    <name>Full Synchronisation: Decision witch peer sends elements first</name>
+                    <t>
+                        The following function determinate which peer starts sending its full set in full synchronisation
+                        mode of operation.
+                    </t>
+                    <t>
+                        The function takes as input the initial local setsize (set size of the first iteration) and
+                        the remote setsize and returns as output the decision "REMOTE" or "LOCAL" to determinate if the
+                        remote or the local peer starts sending the full set.
+                    </t>
+                    <figure anchor="performance_formulas_full_sending_dec_first_send_code">
+                        <artwork name="" type="" align="left" alt=""><![CDATA[
+# INPUTS:
+# initial_local_setsize: The initial local setsize
+# remote_setsize: The remote setsize
+# OUTPUTS:
+# returns: the decision (LOCAL or REMOTE)
+
+FUNCTION decide_full_sending(initial_local_size, remote_setsize)
+    IF ( initial_local_size <= remote_setsize ) || ( remote_setsize = 0 )
+        return LOCAL
+    ELIF
+        return REMOTE
+
+                                      ]]></artwork>
+                    </figure>
+                </section>
+                <section anchor="performance_formula_ibf_parameters" numbered="true" toc="default">
+                    <name>IBF Parameters</name>
+                    <t>
+                        The following function calculate the required parameter to create an optimal sized IBF. These
+                        parameter are the number of buckets and the number of buckets a single element is mapped to.
+                    </t>
+                    <t>
+                        The function takes as input the setsize and returns a array with two numbers the total number of buckets
+                        and the number of buckets a single element is mapped to.
+                    </t>
+                    <figure anchor="performance_formula_ibf_parameters_code">
+                        <artwork name="" type="" align="left" alt=""><![CDATA[
+FUNCTION  (setsize):
+    number_of_bucket_per_element = 4
+    total_number_of_buckets = setsize
+    return [ total_number_of_buckets, number_of_bucket_per_element ]
+
+                        ]]></artwork>
+                    </figure>
+                </section>
+            </section>
+        </section>
+
         <!--
         <t>
             - TEXT HERE -
@@ -1832,7 +1920,6 @@ FUNCTION get_bucket_id (key, number_of_buckets_per_element, ibf_size)
             be detailed here, but the full details likely need a separate section on the algorithms.
         </t>
           -->
-    </section>
 
     <section anchor="security" numbered="true" toc="default">
         <name>Security Considerations</name>
@@ -1874,6 +1961,63 @@ FUNCTION get_bucket_id (key, number_of_buckets_per_element, ibf_size)
             <!-- IMPLEMENT: How ist this handheld right now? -->
         </t>
 
+        <section anchor="security_generic_functions" numbered="true" toc="default">
+            <name>Generic functions</name>
+            <t>
+                Some functions are used in most of the messages described in the State
+                section.
+            </t>
+            <section anchor="security_generic_functions_missing_message" numbered="true" toc="default">
+                <name>Duplicated or Missing Message detection</name>
+                <t>
+                    Most of the messages received need to be checked that they are not
+                    received multiple times this is solved with a global store (message)
+                    and the following code
+                </t>
+                <figure anchor="security_generic_functions_missing_message_code">
+                    <artwork name="" type="" align="left" alt=""><![CDATA[
+# Initially creates message store
+FUNCTION createStore()
+    store = {}
+    return store
+
+# Returns adds a message to the store
+FUNCTION addMessageToStore(store, message)
+    key = hash(sha512, message)
+    IF store.get(key) != NULL
+        return FALSE
+    store.set(key) = 1
+    return TRUE
+
+# Returns the count of message received
+FUNCTION getNumberOfMessage(store)
+    return store.size()
+                    ]]></artwork>
+                </figure>
+            </section>
+
+        <section anchor="security_generic_functions_element_nr" numbered="true" toc="default">
+            <name>Store Remote Peers Element Number</name>
+            <t>
+                To prevent an other peer from requesting the same set multiple times
+                its important to memorize the number of elements a peer had in previous
+                reconciliation sessions.
+            </t>
+            <figure anchor="security_generic_functions_element_nr_code">
+                <artwork name="" type="" align="left" alt=""><![CDATA[
+FUNCTION number_elements_last_sync(client_id)
+    IF number_store.get(clientID)
+        return number_store.get(client_id)
+    ENDIF
+    return 0
+
+FUNCTION saveNumberOfElementsLastSync(client_id, remote_setsize)
+    number_store.update(clientID, remote_setsize)
+                 ]]></artwork>
+            </figure>
+        </section>
+    </section>
+
 
 
         <section anchor="security_states" numbered="true" toc="default">
@@ -1891,6 +2035,7 @@ FUNCTION get_bucket_id (key, number_of_buckets_per_element, ibf_size)
                 <dl>
                     <dt><xref target="messages_request_full" format="title" /></dt>
                     <dd>
+                        <t>
                         It needs to be checked that the full synchronisation is
                         plausible according to the formula deciding which operation mode
                         is applicable this is achieved by calculating the upper and lower
@@ -1900,24 +2045,112 @@ FUNCTION get_bucket_id (key, number_of_buckets_per_element, ibf_size)
                         boundary can be estimated with prior knowledge of the maximal
                         plausible increase of element since the last reconciliation and
                         the maximal plausible number of elements.
+                        <!-- Entscheindungsfindung: myset fulltransimtion or epstein
+                         5.3 ist zu verckürtzt fall: nur 2 cases Es fehlt traidof nach paramer  in perfomance section
+                         -->
+                        <!-- Erlaubt perfomance section formel für mode choose: das sind die inputs klar definitert:
+                          formel invertierte aus
+                          7.1/7.2: INPUT/OUTPUT Forcefull:-->
                         <!-- IMPLEMENT: Check if this two checks already exists -->
+                        <!-- Christian: Should we implement a check for max increase over time? -->
+                        </t>
+                        <figure anchor="security_states_expecting_ibf_request_full_code">
+                            <artwork name="" type="" align="left" alt=""><![CDATA[
+# INPUTS:
+# client_id: The initial local setsize
+# remote_setsize: The remote setsize
+# local_setsize: The local setsize
+# initial_local_size: The initial local setsize
+# set_diff: the set difference calculated by the strata estimator
+# OUTPUTS:
+# returns: the decision
+
+FUNCTION validate_messages_request_full(client_id, remote_setsize, local_setsize, initial_local_size, set_diff)
+
+    last_setsize = getNumberOfElementsLastSync(clientId)
+    IF remote_setsize > last_setsize
+        return FALSE
+    ENDIF
+
+    # Update number of elements in store
+    saveNumberOfElementsLastSync(client_id, remote_setsize)
+
+    # Check for max plausible set size as defined on use case basis (can be infinite)
+    plausible_setsize = getMaxPlausibleSetSize()
+    IF remote_setsize > plausible_setsize
+        return FALSE
+    ENDIF
+
+    # Check for correct operation mode operation_mode function is described in performance section
+    IF decide_operation_mode(initial_local_size, remote_setsize, set_diff) != "FULL"
+        return FALSE
+    ENDIF
+
+    # Check that the other peer is honest and we should send our set
+    IF decide_full_sending(local_size, initial_remote_setsize ) != "LOCAL"
+        return FALSE
+    ENDIF
+
+    return TRUE
+                 ]]></artwork>
+                        </figure>
                     </dd>
                     <dt><xref target="messages_ibf" format="title" /></dt>
                     <dd>
-                        Its important do define a threshold to limit
-                        the maximal count of IBFs that are expected from the other peer.
-                        This maximal plausible size can be calculated with the known inputs:
-                        number of elements in my set and the pre defined applications upper
-                        limit as described in the performance section.
-                        <!-- IMPLEMENT: Is this already checked?-->
-                        <!-- TODO: Link performance section -->
-                        That the other peer chooses the correct mode of operation MUST
-                        be checked as described in the section above.
-                        <!-- IMPLEMENT: Is this already checked?-->
+                        <t>
+                            Its important do define a threshold to limit
+                            the maximal number of IBFs that are expected from the other peer.
+                            <!-- change count to number full section -->
+                            This maximal plausible size can be calculated with the known inputs:
+                            number of elements in my set and the pre defined applications upper
+                            limit as described in the performance section.
+                            <!-- IMPLEMENT: Is this already checked?-->
+                            <!-- TODO: Link performance section -->
+                            That the other peer chooses the correct mode of operation MUST
+                            be checked as described in the section above.
+                            <!-- IMPLEMENT: Is this already checked?-->
+                        </t>
+                        <figure anchor="security_states_expecting_ibf_message_ibf_code">
+                            <artwork name="" type="" align="left" alt=""><![CDATA[
+
+FUNCTION validate_messages_ibf(remote_setsize, local_setsize, initial_local_size, set_diff, ibf_msg)
+    IF is_undefined(number_buckets_left)
+        number_buckets_left = get_bucket_number(remote_setsize, local_setsize, initial_local_size, set_diff, ibf_msg)
+    ENDIF
+    number_buckets_left --
+    IF number_buckets_left < 0
+        return FALSE
+    return TRUE
+
+
+# Security check executed when first ibf message is received
+FUNCTION get_bucket_number(remote_setsize, local_setsize, initial_local_size, set_diff, ibf_msg)
+
+    # Check for max plausible set size as defined on use case basis (can be infinite)
+    max_plausible_setsize = getMaxPlausibleSetSize()
+    IF remote_setsize > max_plausible_setsize
+        return 0
+    ENDIF
+
+    # Check for correct operation mode operation_mode function is described in performance section
+    IF decide_operation_mode(initial_local_size, remote_setsize, set_diff) != "DIFFERENTIAL"
+        return 0
+    ENDIF
+
+    ibf_params = calculate_optimal_IBF_params(local_setsize)
+    total_number_of_buckets = ibf_params[0]
+    number_of_bucket_per_element = ibf_params[0]
+    IF  ( 2^(ibf.order) != total_number_of_buckets ) ||
+            (ibf.number_of_bucket_per_element != number_of_bucket_per_element)
+        return 0
 
+    return total_number_of_buckets
+                        ]]></artwork>
+                    </figure>
                     </dd>
                     <dt><xref target="messages_full_element" format="title" /></dt>
                     <dd>
+                        <t>
                         If a full element is received  the set of the other peer
                         is smaller than the set of the peer in the <strong>Expecting IBF</strong>
                         state and the set difference is smaller than threshold for
@@ -1927,6 +2160,71 @@ FUNCTION get_bucket_id (key, number_of_buckets_per_element, ibf_size)
                         of the number of elements in the other peers set as described in
                         the first section.
                         <!-- if valid ok otherwise cancel connection! -->
+                        </t>
+                    <figure anchor="security_states_expecting_ibf_full_element_code">
+                        <artwork name="" type="" align="left" alt=""><![CDATA[
+
+FUNCTION validate_messages_full_element(client_id, remote_setsize, local_setsize, initial_local_size, set_diff, message)
+
+    # On first run create store and make initial checks
+    IF is_undefined(store)
+        store = createStore()
+        IF ! validate_messages_full_element_init(client_id, remote_setsize, local_setsize, initial_local_size, set_diff)
+           return FALSE
+    ENDIF
+
+    # Prevent duplication of received message
+    IF ! addMessageToStore(store, message)
+        return FALSE
+    ENDIF
+
+    # Prevent to receive more elements than the remote peer has
+    number_received_messages = getNumberOfMessage(store)
+    IF ( number_received_messages > remote_setsize )
+        return FALSE
+
+    return TRUE
+
+
+# INPUTS:
+# client_id: The initial local setsize
+# remote_setsize: The remote setsize
+# local_setsize: The local setsize
+# initial_local_size: The initial local setsize
+# set_diff: the set difference calculated by the strata estimator
+# OUTPUTS:
+# returns: the decision
+
+FUNCTION validate_messages_full_element_init(client_id, remote_setsize, local_setsize, initial_local_size, set_diff)
+
+    last_setsize = getNumberOfElementsLastSync(clientId)
+    IF remote_setsize < last_setsize
+        return FALSE
+    ENDIF
+
+    # Update number of elements in store
+    saveNumberOfElementsLastSync(client_id, remote_setsize)
+
+    # Check for max plausible set size as defined on use case basis (can be infinite)
+    plausible_setsize = getMaxPlausibleSetSize()
+    IF remote_setsize > plausible_setsize
+        return FALSE
+    ENDIF
+
+    # Check for correct operation mode operation_mode function is described in performance section
+    IF decide_operation_mode(initial_local_size, remote_setsize, set_diff) != "FULL"
+        return FALSE
+    ENDIF
+
+    # Check that the other peer is honest and he should send us his set
+    IF decide_full_sending(local_size, initial_remote_setsize ) != "REMOTE"
+        return FALSE
+    ENDIF
+
+    return TRUE
+
+                            ]]></artwork>
+                        </figure>
                     </dd>
                 </dl>
             </section>
@@ -1940,7 +2238,8 @@ FUNCTION get_bucket_id (key, number_of_buckets_per_element, ibf_size)
                         When receiving full elements there needs to be checked that every
                         element is a valid element, no element is resized more than once and
                         not more or less elements are received as the other peer has committed
-                        to in the beginning of the operation.
+                        to in the beginning of the operation. Detail pseudocode implementation
+                        can be found in <xref target="security_states_expecting_ibf" format="title" />
                         <!-- IMPLEMENT: Is this check already implemented?-->
                     </dd>
                     <dt><xref target="messages_full_done" format="title" /></dt>
@@ -2175,7 +2474,6 @@ FUNCTION get_bucket_id (key, number_of_buckets_per_element, ibf_size)
                 </dl>
             </section>
 
-
         </section>
 
         <!--

	lsd0003 LSD0003: Set Union
	Log \| Files \| Refs \| README