lsd0003

LSD0003: Set Union
Log | Files | Refs | README

commit f350b79cbc925784ba5b7e80c829639f37a86c0a
parent 11a1c66370b81978f913edad4d6ee8128442e6bd
Author: Elias Summermatter <elias.summermatter@seccom.ch>
Date:   Tue, 23 Feb 2021 09:12:31 +0100

Added some security considerations

Diffstat:
Ddraft-summermatter-set-union-00.xml | 2202-------------------------------------------------------------------------------
1 file changed, 0 insertions(+), 2202 deletions(-)

diff --git a/draft-summermatter-set-union-00.xml b/draft-summermatter-set-union-00.xml @@ -1,2202 +0,0 @@ -<?xml version='1.0' encoding='utf-8'?> -<!DOCTYPE rfc SYSTEM "rfc2629-xhtml.ent" [ - <!ENTITY RFC1034 PUBLIC '' "http://xml.resource.org/public/rfc/bibxml/reference.RFC.1034.xml"> - <!ENTITY RFC1035 PUBLIC '' "http://xml.resource.org/public/rfc/bibxml/reference.RFC.1035.xml"> - <!ENTITY RFC2119 PUBLIC '' "http://xml.resource.org/public/rfc/bibxml/reference.RFC.2119.xml"> - <!ENTITY RFC2782 PUBLIC '' "http://xml.resource.org/public/rfc/bibxml/reference.RFC.2782.xml"> - <!ENTITY RFC3629 PUBLIC '' "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3629.xml"> - <!ENTITY RFC3686 PUBLIC '' "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3686.xml"> - <!ENTITY RFC3826 PUBLIC '' "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3826.xml"> - <!ENTITY RFC3912 PUBLIC '' "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3912.xml"> - <!ENTITY RFC5869 PUBLIC '' "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5869.xml"> - <!ENTITY RFC5890 PUBLIC '' "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5890.xml"> - <!ENTITY RFC5891 PUBLIC '' "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5891.xml"> - <!ENTITY RFC6781 PUBLIC '' "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6781.xml"> - <!ENTITY RFC6895 PUBLIC '' "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6895.xml"> - <!ENTITY RFC6979 PUBLIC '' "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6979.xml"> - <!ENTITY RFC7748 PUBLIC '' "http://xml.resource.org/public/rfc/bibxml/reference.RFC.7748.xml"> - <!ENTITY RFC8032 PUBLIC '' "http://xml.resource.org/public/rfc/bibxml/reference.RFC.8032.xml"> - <!ENTITY RFC8126 PUBLIC '' "http://xml.resource.org/public/rfc/bibxml/reference.RFC.8126.xml"> - ]> -<?xml-stylesheet type='text/xsl' href='rfc2629.xslt' ?> -<?rfc strict="yes" ?> -<?rfc toc="yes" ?> -<?rfc symrefs="yes"?> -<?rfc sortrefs="yes" ?> -<?rfc compact="yes" ?> -<?rfc subcompact="no" ?> -<rfc category="info" docName="draft-schanzen-gns-01" ipr="trust200902" - obsoletes="" updates="" submissionType="IETF" xml:lang="en" version="3"> - <!-- xml2rfc v2v3 conversion 2.26.0 --> - <front> - <title abbrev="Set Union"> - Byzantine Fault Tolerant Set Reconciliation - </title> - <seriesInfo name="Internet-Draft" value="draft-summermatter-set-union-01"/> - <author fullname="Elias Summermatter" initials="E." surname="Summermatter"> - <organization>Seccom GmbH</organization> - <address> - <postal> - <street>Brunnmattstrasse 44</street> - <city>Bern</city> - <code>3007</code> - <country>CH</country> - </postal> - <email>elias.summermatter@seccom.ch</email> - </address> - </author> - <author fullname="Christian Grothoff" initials="C." surname="Grothoff"> - <organization>Berner Fachhochschule</organization> - <address> - <postal> - <street>Hoeheweg 80</street> - <city>Biel/Bienne</city> - <code>2501</code> - <country>CH</country> - </postal> - <email>grothoff@gnunet.org</email> - </address> - </author> - - <!-- Meta-data Declarations --> - <area>General</area> - <workgroup>Independent Stream</workgroup> - <keyword>name systems</keyword> - <abstract> - <t>This document contains a protocol specification for Byzantine fault-tolerant - Set Reconciliation. - </t> - </abstract> - </front> - <middle> - <section anchor="introduction" numbered="true" toc="default"> - <name>Introduction</name> - <t> - This document describes a Byzantine fault-tolerant set reconciliation protocol used to efficient and securely - synchronize two sets of elements between two peers. - </t> - <t> - This Byzantine fault-tolerant set reconciliation - protocol can be used in a variety of applications. - - Our primary envisioned application domain is the - distribution of revocation messages in the GNU Name - System (GNS) <xref target="GNUNET" format="default" />. In GNS, - key revocation messages are usually flooded across the - peer-to-peer overlay network to all connected peers - whenever a key is revoked. However, as peers may be - offline or the network might have been partitioned, - there is a need to reconcile revocation lists whenever - network partitions are healed or peers go online. The - GNU Name System uses the protocol described in this - specification to efficiently distribute revocation - messages whenever network partitions are healed. - - Another application domain for the protocol described - in this specification are Byzantine fault-tolerant - bulletin boards, like those required in some secure - multiparty computations. A well-known example for - secure multiparty computations are various E-voting - protocols <xref target="CryptographicallySecureVoting" format="default"/> which - use a bulletin board to share the votes and intermediate - computational results. We note that for such systems, - the set reconciliation protocol is merely a component of - a multiparty consensus protocol, such as the one - described in (FIXME-CITE: DOLD MS Thesis! Which paper is his MS thesis on fdold.eu). - </t> - <t> - The protocol described in this report is generic and - suitable for a wide range of applicaitons. As a result, - the internal structure of the elements in the sets must - be defined and verified by the application using the - protocol. This document thus does not cover the elemtn - structure, except for imposing a limit on the maximum - size of an element. - </t> - <t> - The protocol faces an inherent trade-off between minimizing - the number of network round-trips and the number of bytes - sent over the network. Thus, for the protocol to choose - the right parameters for a given situation, applications - using the protocol must provide a parameter that specifies - the cost-ratio of round-trips vs. bandwidth usage. Given - this trade-off factor, the protocol will then choose parameters - that minimize the total execution cost. In particular, there - is one major choice to be made, which is between sending the - full set of elements, or just sending the elements that differ. - In the latter case, our design is basically a concrete - implementation of a proposal by Eppstein.<xref target="Eppstein" format="default" /> - </t> - - <t> - We say that our set reconciliation protocol is Byzantine - fault-tolerant because it provides cryptographic and - probabilistic methods to discover if the other peer - is dishonest or misbehaving. - </t> - <t> - The objective here is to limit resources wasted on - malicious actors. Malicious actors could send malformed - messages, including malformed set elements, claim to - have much larger numbers of valid set elements than the - actually hold, or request the retransmission of elements - that they have already received in previous - interactions. Bounding resources consumed by malicous - actors is important to ensure that higher-level protocols - can use set reconciliation and still meet their resource - targets. This can be particularly critical in multi-round - synchronous consensus protocols where peers that cannot - answer in a timely fashion would have to be treated as - failed or malicious. - </t> - <t> - To defend against some of these attacks, applications - need to remember the number of elements previously - shared with a peer, and offer a means to check that - elements are well-formed. Applications may also be able - to provide an upper bound on the total number of valid - elements that may exist. For example, in E-voting, the - number of eligible voters could be used to provide such - an upper bound. - </t> - - <t> - This document defines the normative wire format of resource records, resolution processes, - cryptographic routines and security considerations for use by implementors. - SETU requires a bidirectional secure communication channel between the two parties. - Specification of the communication channel is out of scope of this document. - </t> - <t> - The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL - NOT", "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and - "OPTIONAL" in this document are to be interpreted as described - in<xref target="RFC2119"/>. - </t> - </section> - - <section anchor="background" numbered="true" toc="default"> - <name>Background</name> - <section anchor="bf" numbered="true" toc="default"> - <name>Bloom Filters</name> - <t> - A Bloom filter (BF) is a space-efficient datastructure to test if am element is part of a set of elements. - Elements are identified by an element ID. - Since a BF is a probabilistic datastructure, it is possible to have false-positives: when asked - if an element is in the set, the answer from a BF is either "no" or "maybe". - </t> - <t> - A BF consists of L buckets. Every bucket is a binary value that can be either 0 or 1. All buckets are initialized - to 0. A mapping function M is used to map each the ID of each element from the set to a subset of k buckets. M is non-injective - and can thus map the same element multiple times to the same bucket. - The type of the mapping function can thus be described by the following mathematical notation: - </t> - <figure anchor="bf_mapping_function_math"> - <artwork name="" type="" align="left" alt=""><![CDATA[ - ------------------------------------ - # M: E->B^k - ------------------------------------ - # L = Number of buckets - # B = 0,1,2,3,4,...L-1 (the buckets) - # k = Number of buckets per element - # E = Set of elements - ------------------------------------ - Example: L=256, k=3 - M('element-data') = {4,6,255} - - ]]></artwork> - </figure> - <t> - A typical mapping function is constructed by hashing the element, for example - using the well-known <relref section="2" target="RFC5869" displayFormat="of">HKDF construction</relref>. - </t> - <t> - To add an element to the BF, the corresponding buckets under the map M are set to 1. - To check if an element may be in the set, one tests if all buckets under the map M are set to 1. - </t> - <t> - Further in this document a bitstream outputted by the mapping function is represented by - a set of numeric values for example (0101) = (2,4). - In the BF the buckets are set to 1 if the corresponding bit in the bitstream is 1. - If there is a collision and a bucket is already set to 1, the bucket stays 1. - </t> - <t> - In the following example the element M(element) = (1,3) has been added: - </t> - <figure anchor="figure_bf_insert_0"> - <artwork name="" type="" align="left" alt=""><![CDATA[ - bucket-0 bucket-1 bucket-2 bucket-3 - +-------------+-------------+-------------+-------------+ - | 0 | 1 | 0 | 1 | - +-------------+-------------+-------------+-------------+ - ]]></artwork> - </figure> - <t> - Is easy to see that the M(element) = (0,3) could be in the BF bellow and M(element) = (0,2) can't be - in the BF bellow: - </t> - - <figure anchor="figure_bf_contains"> - <artwork name="" type="" align="left" alt=""><![CDATA[ - bucket-0 bucket-1 bucket-2 bucket-3 - +-------------+-------------+-------------+-------------+ - | 1 | 0 | 0 | 1 | - +-------------+-------------+-------------+-------------+ - ]]></artwork> - </figure> - <t> - The parameters L and k depend on the set size and must be - chosen carefully to ensure that the BF does not return too - many false-positives. - </t> - <t> - It is not possible to remove an element from the BF because buckets can only be set to 1 or 0. Hence it is impossible to - differentiate between buckets containing one or more elements. To remove elements from the BF a <xref target="cbf" format="title" /> - is required. - </t> - </section> - - <section anchor="cbf" numbered="true" toc="default"> - <name>Counting Bloom Filter</name> - <t> - A Counting Bloom Filter (CBF) is an extension of the <xref target="bf" format="title" />. In the CBF, buckets are - unsigned numbers instead of binary values. This allows the removal of an elements from the CBF. - </t> - <t> - Adding an element to the CBF is similar to the adding operation of the BF. However, instead of setting the bucket on hit to 1 the - numeric value stored in the bucket is increased by 1. For example if two colliding elements M(element1) = (1,3) and - M(element2) = (0,3) are added to the CBF, bucket 0 and 1 are set to 1 and bucket 3 (the colliding bucket) is set - to 2: - </t> - <figure anchor="figure_cbf_insert_0"> - <artwork name="" type="" align="left" alt=""><![CDATA[ - bucket-0 bucket-1 bucket-2 bucket-3 - +-------------+-------------+-------------+-------------+ - | 1 | 1 | 0 | 2 | - +-------------+-------------+-------------+-------------+ - ]]></artwork> - </figure> - <t> - The counter stored in the bucket is also called the order of the bucket. - </t> - <t> - To remove an element form the CBF the counters of all buckets the element is mapped to are decreased by 1. - </t> - <t> - Removing M(element2) = (1,3) from the CBF above: - </t> - <figure anchor="figure_cbf_remove_0"> - <artwork name="" type="" align="left" alt=""><![CDATA[ - bucket-0 bucket-1 bucket-2 bucket-3 - +-------------+-------------+-------------+-------------+ - | 1 | 0 | 0 | 1 | - +-------------+-------------+-------------+-------------+ - ]]></artwork> - </figure> - <t> - In practice, the number of bits available for the counters is usually finite. For example, given a 4-bit - counter, a CBF bucket would overflow once 16 elements are mapped to the same bucket. To efficiently - handle this case, the maximum value (15 in our example) is considered to represent "infinity". Once the - order of a bucket reaches "infinity", it is no longer incremented or decremented. - </t> - <t> - The parameters L and k and the number of bits allocated to the counters should depend on the set size. - An IBF will degenerate when subjected to insert and remove iterations of different elements, and eventually all - buckets will reach "infinity". The speed of the degradation will depend on the choice of L and k in - relation to the number of elements stored in the IBF. - </t> - </section> - </section> - - <section anchor="ibv" numbered="true" toc="default"> - <name>Invertible Bloom Filter</name> - <t> - An Invertible Bloom Filter (IBF) is a further extension of the <xref target="cbf" format="title" />. - An IBF extends the <xref target="cbf" format="title" /> with two more operations: - decode and set difference. This two extra operations are useful to efficiently extract - small differences between large sets. - </t> - <section anchor="ibf_structure" numbered="true" toc="default"> - <name>Structure</name> - <t> - An IBF consists of a mapping function M and - L buckets that each store a signed - counter and an XHASH. An XHASH is the XOR of various - hash values. As before, the - values used for k, L and the number of bits used - for the signed counter and the XHASH depend - on the set size and various other trade-offs, - including the CPU architecture. - </t> - <t> - If the IBF size is to small or the mapping - function does not spread out the elements - uniformly, the signed counter can overflow or - underflow. As with the CBF, the "maximum" value is - thus used to represent "infinite". As there is no - need to distinguish between overflow and - underflow, the most canonical representation of - "infinite" would be the minimum value of the - counter in the canonical 2-complement - interpretation. For example, given a 4-bit - counter a value of -8 would be used to represent - "infinity". - </t> - <figure anchor="figure_ibf_structure"> - <artwork name="" type="" align="left" alt=""><![CDATA[ - bucket-0 bucket-1 bucket-2 bucket-3 - +-------------+-------------+-------------+-------------+------- - count | COUNTER | COUNTER | COUNTER | COUNTER | C... - +-------------+-------------+-------------+-------------+------ - idSum | IDSUM | IDSUM | IDSUM | IDSUM | I... - +-------------+-------------+-------------+-------------+------ -hashSum | HASHSUM | HASHSUM | HASHSUM | HASHSUM | H.. - +-------------+-------------+-------------+-------------+------- - ]]></artwork> - </figure> - - </section> - <section anchor="ibf_operations" numbered="true" toc="default"> - <name>Operations</name> - <t> - When an IBF is created, all counters and IDSUM and HASHSUM values of - all buckets are initialized to zero. - </t> - <section anchor="ibv_operations_insert" numbered="true" toc="default"> - <name>Insert Element</name> - <t> - To add an element to a IBF, the element is mapped to a subset of k buckets using - the mapping function M as described in the <xref target="bf" format="title" /> section introducing - BFs. For the buckets selected by the mapping function, the counter is increased by one and the - IDSUM field is set to the XOR of the element ID and the previously stored IDSUM. Furthermore, - the HASHSUM is set to the XOR of the hash of the element ID and the previously stored HASHSUM. - </t> - <t> - In the following example, the insert operation is illustrated using an element with the - ID 0x0102 and a hash of 0x4242, and a second element with the ID 0x0304 and - a hash of 0x0101. - </t> - <t>Empty IBF:</t> - <figure anchor="figure_ibf_insert_0"> - <artwork name="" type="" align="left" alt=""><![CDATA[ - bucket-0 bucket-1 bucket-2 bucket-3 - +-------------+-------------+-------------+-------------+ - count | 0 | 0 | 0 | 0 | - +-------------+-------------+-------------+-------------+ - idSum | 0x0000 | 0x0000 | 0x0000 | 0x0000 | - +-------------+-------------+-------------+-------------+ -hashSum | 0x0000 | 0x0000 | 0x0000 | 0x0000 | - +-------------+-------------+-------------+-------------+ - ]]></artwork> - </figure> - <t>Insert first element: [0101] with ID 0x0102 and hash 0x4242:</t> - <figure anchor="figure_ibf_insert_1"> - <artwork name="" type="" align="left" alt=""><![CDATA[ - bucket-0 bucket-1 bucket-2 bucket-3 - +-------------+-------------+-------------+-------------+ - count | 0 | 1 | 0 | 1 | - +-------------+-------------+-------------+-------------+ - idSum | 0x0000 | 0x0102 | 0x0000 | 0x0102 | - +-------------+-------------+-------------+-------------+ -hashSum | 0x0000 | 0x4242 | 0x0000 | 0x4242 | - +-------------+-------------+-------------+-------------+ - ]]></artwork> - </figure> - <t>Insert second element: [1100] with ID 0x0304 and hash 0101:</t> - <figure anchor="figure_ibf_insert_2"> - <artwork name="" type="" align="left" alt=""><![CDATA[ - bucket-0 bucket-1 bucket-2 bucket-3 - +-------------+-------------+-------------+-------------+ - count | 1 | 2 | 0 | 1 | - +-------------+-------------+-------------+-------------+ - idSum | 0x0304 | 0x0206 | 0x0000 | 0x0102 | - +-------------+-------------+-------------+-------------+ -hashSum | 0x0101 | 0x4343 | 0x0000 | 0x4242 | - +-------------+-------------+-------------+-------------+ - ]]></artwork> - </figure> - </section> - <section anchor="ibf_operations_remove" numbered="true" toc="default"> - <name>Remove Element</name> - <t> - To remove an element from the IBF the element is again mapped to a subset of the buckets using M. - Then all the counters of the buckets selected by M are reduced by one, the IDSUM is - replaced by the XOR of the old IDSUM and the ID of the element being removed, and the - HASHSUM is similarly replaced with the XOR of the old HASHSUM and the hash of the ID. - </t> - <t> - In the following example the remove operation for the element [1100] with the hash 0x0101 is demonstrated. - </t> - <t>IBF with encoded elements:</t> - <figure anchor="figure_ibf_remove_0"> - <artwork name="" type="" align="left" alt=""><![CDATA[ - bucket-0 bucket-1 bucket-2 bucket-3 - +-------------+-------------+-------------+-------------+ - count | 1 | 2 | 0 | 1 | - +-------------+-------------+-------------+-------------+ - idSum | 0x0304 | 0x0206 | 0x0000 | 0x0102 | - +-------------+-------------+-------------+-------------+ -hashSum | 0x0101 | 0x4343 | 0x0000 | 0x4242 | - +-------------+-------------+-------------+-------------+ - ]]></artwork> - </figure> - <t>Remove element [1100] with ID 0x0304 and hash 0x0101 from the IBF:</t> - <figure anchor="figure_ibf_remove_1"> - <artwork name="" type="" align="left" alt=""><![CDATA[ - bucket-0 bucket-1 bucket-2 bucket-3 - +-------------+-------------+-------------+-------------+ - count | 0 | 1 | 0 | 1 | - +-------------+-------------+-------------+-------------+ - idSum | 0x0000 | 0x0102 | 0x0000 | 0x0102 | - +-------------+-------------+-------------+-------------+ -hashSum | 0x0000 | 0x4242 | 0x0000 | 0x4242 | - +-------------+-------------+-------------+-------------+ - ]]></artwork> - </figure> - <t> - Note that it is possible to "remove" elements from an IBF that were never present - in the IBF in the first place. A negative counter value is thus indicative of - elements that were removed without having been added. Note that an IBF bucket - counter of zero no longer warrants that an element mapped to that bucket is not - present in the set: a bucket with a counter of zero can be the result of one - element being added and a different element (mapped to the same bucket) being removed. - To check that an element is not present requires a counter of zero and an - IDSUM and HASHSUM of zero --- and some assurance that there was no collision due - to the limited number of bits in IDSUM and HASHSUM. Thus, - IBFs are not suitable to replace BFs or IBFs. - </t> - <t> - Buckets in an IBF with a counter of 1 or -1 are crucial for decoding an IBF, as - they might represent only a single element, with the IDSUM being the ID of that element. - Following Eppstein (CITE), we will call buckets that only represent a single - element pure buckets. - Note that due to the possibility of multiple insertion and removal operations - affecting the same bucket, not all buckets with a counter of 1 or -1 are - actually pure buckets. Sometimes a counter can be 1 or -1 because N elements - mapped to that bucket were added while N-1 or N+1 different elements also - mapped to that bucket were removed. - </t> - </section> - - <section anchor="ibf_operations_decode" numbered="true" toc="default"> - <name>Decode IBF</name> - <t> - Decoding an IBF yields the HASH of an element from the IBF, or failure. - </t> - <t> - A decode operation requires a pure bucket, that is a bucket to which M - only mapped a single element, to succeed. Thus, if there is no bucket with - a counter of 1 or -1, decoding fails. However, as a counter of 1 or -1 is - not a guarantee that the bucket is pure, there is also a chance that the - decoder returns an IDSUM value that is actually the XOR of several IDSUMs. - This is primarily detected by checking that the HASHSUM is the hash of the IDSUM. - Only if the HASHSUM also matches, the bucket could be pure. Additionally, - one should check that the IDSUM value actually would be mapped by M to - the respective bucket. If not, there was a hash collision. - </t> - <t> - The very rare case that after all these checks a bucket is still - falsely identified as pure must be detected (say by determining that - extracted element IDs do not match any actual elements), and addressed - at a higher level in the protocol. As these failures are probabilistic - and depend on element IDs and the IBF construction, they can typically - be avoided by retrying with different parameters, such as a different - way to assign element IDs to elements, using a larger value for L, or - a different mapping function M. - A more common scenario (especially if L was too small) is that - IBF decoding fails because there is no pure bucket. In this case, the - higher-level protocol also should retry using different parameters. - </t> - <t> - Suppose the IBF contains a pure bucket. In this case, the IDSUM in the - bucket identifies a single element. Furthermore, it is then possible - to remove that element from the IBF (by inserting it if the counter - was negative, and by removing it if the counter was positive). This - is likely to cause other buckets to become pure, allowing further - elements to be decoded. Eventually, decoding should succeed with - all counters and IDSUM and HASHSUM values reaching zero. However, it is also - possible that an IBF only partly decodes and then decoding fails after - yielding some elements. - </t> - <t> - In the following example the successful decoding of an IBF containing - the two elements previously added in our running example. - </t> - <t> - IBF with the two encoded elements: - </t> - <figure anchor="figure_ibf_decode_0"> - <artwork name="" type="" align="left" alt=""><![CDATA[ - bucket-0 bucket-1 bucket-2 bucket-3 - +-------------+-------------+-------------+-------------+ - count | 1 | 2 | 0 | 1 | - +-------------+-------------+-------------+-------------+ - idSum | 0x0304 | 0x0206 | 0x0000 | 0x0102 | - +-------------+-------------+-------------+-------------+ -hashSum | 0x0101 | 0x4343 | 0x0000 | 0x4242 | - +-------------+-------------+-------------+-------------+ - ]]></artwork> - </figure> - <t> - In the IBF are two pure buckets to decode (bit-1 and bit-4) we choose to start with decoding bucket 1, - we decode the element with the hash 1010 and we see that there is a new pure bucket created (bit-2) - </t> - <figure anchor="figure_ibf_decode_1"> - <artwork name="" type="" align="left" alt=""><![CDATA[ - bucket-0 bucket-1 bucket-2 bucket-3 - +-------------+-------------+-------------+-------------+ - count | 0 | 1 | 0 | 1 | - +-------------+-------------+-------------+-------------+ - idSum | 0x0000 | 0x0102 | 0x0000 | 0x0102 | - +-------------+-------------+-------------+-------------+ -hashSum | 0x0000 | 0x4242 | 0x0000 | 0x4242 | - +-------------+-------------+-------------+-------------+ - ]]></artwork> - </figure> - <t> - In the IBF only pure buckets are left, we choose to continue decoding bucket 2 and decode element - with the hash 0x4242. Now the IBF is empty (all buckets have count 0) that means the IBF has successfully - decoded. - </t> - <figure anchor="figure_ibf_decode_2"> - <artwork name="" type="" align="left" alt=""><![CDATA[ - bucket-0 bucket-1 bucket-2 bucket-3 - +-------------+-------------+-------------+-------------+ - count | 0 | 0 | 0 | 0 | - +-------------+-------------+-------------+-------------+ - idSum | 0x0000 | 0x0000 | 0x0000 | 0x0000 | - +-------------+-------------+-------------+-------------+ -hashSum | 0x0000 | 0x0000 | 0x0000 | 0x0000 | - +-------------+-------------+-------------+-------------+ - ]]></artwork> - </figure> - </section> - - <section anchor="ibv_operations_setdiff" numbered="true" toc="default"> - <name>Set Difference</name> - <t> - Given addition and removal as defined above, it is possible to define an operation on IBFs - that computes an IBF representing the set difference. Suppose IBF1 represents set A, and - IBF2 represents set B. Then this set difference operation will compute IBF3 which - represents the set A - B --- without needing elements from set A or B. - - To calculate the IBF representing this set difference, both IBFs must have the same - length L, the same number of buckets per element k and use the same map M. Given this, - one can compute the IBF representing the set difference by taking the XOR of the IDSUM and HASHSUM values - of the respective buckets and subtracting the respective counters. Care should be taken - to handle overflows and underflows by setting the counter to "infinity" as necessary. - The result is a new IBF with the same number of buckets representing the set difference. - </t> - <t> - This new IBF can be decoded as described in section <xref target="ibf_operations_decode" format="counter" />. - The new IBF can have two types of pure buckets with counter set to 1 or -1. If the counter is set to 1 - the element is missing in the secondary set, and if the counter is set to -1 the element is missing in - the primary set. - </t> - <t> - To demonstrate the set difference operation we compare IBF-A with IBF-B and generate as described - IBF-AB - </t> - <t>IBF-A containing elements with hashes 0x0101 and 0x4242:</t> - <figure anchor="figure_ibf_setdiff_A"> - <artwork name="" type="" align="left" alt=""><![CDATA[ - bucket-0 bucket-1 bucket-2 bucket-3 - +-------------+-------------+-------------+-------------+ - count | 1 | 2 | 0 | 1 | - +-------------+-------------+-------------+-------------+ - idSum | 0x0304 | 0x0206 | 0x0000 | 0x0102 | - +-------------+-------------+-------------+-------------+ -hashSum | 0x0101 | 0x4343 | 0x0000 | 0x4242 | - +-------------+-------------+-------------+-------------+ - ]]></artwork> - </figure> - <t>IBF-B containing elements with hashes 0x4242 and 0x5050</t> - <figure anchor="figure_ibf_setdiff_B"> - <artwork name="" type="" align="left" alt=""><![CDATA[ - bucket-0 bucket-1 bucket-2 bucket-3 - +-------------+-------------+-------------+-------------+ - count | 0 | 1 | 1 | 1 | - +-------------+-------------+-------------+-------------+ - idSum | 0x0000 | 0x0102 | 0x1345 | 0x0102 | - +-------------+-------------+-------------+-------------+ -hashSum | 0x0000 | 0x4242 | 0x5050 | 0x4242 | - +-------------+-------------+-------------+-------------+ - ]]></artwork> - </figure> - <t>IBF-AB XOR value and subtract count:</t> - <figure anchor="figure_ibf_setdiff_AB"> - <artwork name="" type="" align="left" alt=""><![CDATA[ - bucket-0 bucket-1 bucket-2 bucket-3 - +-------------+-------------+-------------+-------------+ - count | 1 | 1 | -1 | 0 | - +-------------+-------------+-------------+-------------+ - idSum | 0x0304 | 0x0304 | 0x1345 | 0x0000 | - +-------------+-------------+-------------+-------------+ -hashSum | 0x0101 | 0x0101 | 0x5050 | 0x0000 | - +-------------+-------------+-------------+-------------+ - ]]></artwork> - </figure> - <t>After calculating and decoding the IBF-AB its clear that in IBF-A the element with the hash 0x5050 - is missing (-1 in bit-3) while in IBF-B the element with the hash 0101 is missing - (1 in bit-1 and bit-2). The element with hash 0x4242 is present in IBF-A and IBF-B and is - removed by the set difference operation (bit-4). - </t> - </section> - </section> - - <section anchor="ibf_format" numbered="true" toc="default"> - <name>Wire format</name> - <t> - To facilitate a reasonably CPU-efficient - implementation, this specification requires the - IBF counter to always use 8 bits. Fewer bits - would result in a paritcularly inefficient - implementation, while more bits are rarely useful - as sets with so many elements should likely be - represented using a larger number of buckets. This - means the counter of this design can reach a - minimum of -127 and a maximum of 127 before the - counter reaches "infinity" (-128). - </t> - <t> - For the "IDSUM", we always use a 64-bit representation. - The IDSUM value must have sufficient entropy for the - mapping function M to yield reasonably random buckets - even for very large values of L. With a 32 bit - value the chance that multiple elements may be mapped - to the same ID would be quite high, even for moderately - large sets. Using more than 64 bits would at best make - sense for very large sets, but then it is likely always - better to simply afford additional round trips to handle - the occasional collision. 64 bits are also a reasonable - size for many CPU architectures. - </t> - <t> - For the "HASHSUM", we always use a 32-bit - representation. Here, it is mostly important to - avoid collisions, where different elements are - mapped to the same hash. However, we note that - by design only a few elements (certainly less than - 127) should ever be mapped - to the same bucket, so a small number of bits - should suffice. Furthermore, our protocol is designed - to handle occasional collisions, so while with - 32-bits there remains a chance of - accidental collisions, at 32 bit the chance is - generally believed to be sufficiently small enough - for the protocol to handle those cases efficiently - for a wide range of use-cases. Smaller hash - values would safe bandwidth, but also drastically - increase the chance of collisions. 32 bits are - also again a reasonable size for many CPU - architectures. - </t> - <section anchor="ibf_format_id_generation" numbered="true" toc="default"> - <name>ID Calculation</name> - <t> - The ID is generated as 64-bit output from a <relref section="2" target="RFC5869" displayFormat="of">HKDF construction</relref> - with HMAC-SHA512 as XTR and HMAC-SHA256 as PRF and salt is set to the unsigned 64-bit equivalent of 0. - The output is then truncated to 64-bit. - Its important that the elements can be redistributed over the buckets in case the IBF does not - decode, that's why the ID is salted with a random salt given in the SALT field of this message. - Salting is done by calculation the a random salt modulo 64 (using only the lowest 6-bits of the salt) - and do a bitwise right rotation of output of KDF by the 6-bit salts numeric representation. - </t> - <t> - Representation in pseudocode: - </t> - <figure anchor="ibf_format_id_generation_pseudo_code"> - <artwork name="" type="" align="left" alt=""><![CDATA[ -# INPUTS: -# key: Pre calculated and truncated key from id_calculation function -# ibf_salt: Salt of the IBF -# OUTPUT: -# value: salted key -FUNCTION salt_key(key,ibf_salt): - s = ibf_salt % 64; - k = key - - /* rotate ibf key */ - k = (k >> s) | (k << (64 - k)) - return key - - -# INPUTS: -# element: Element to calculated id from. -# salt: Salt of the IBF -# OUTPUT: -# value: the ID of the element - -FUNCTION id_calculation (element,ibf_salt): - salt = 0 - XTR=HMAC-SHA256 - PRF=HMAC-SHA256 - key = HKDF(XTR, PRF, salt, element) - key = key modulo 2^64 // Truncate - return salt_key(key,ibf_salt) - - - ]]></artwork> - </figure> - </section> - <section anchor="ibf_format_bucket_identification" numbered="true" toc="default"> - <name>Mapping Function</name> - <t> - The mapping function M as described above in the figure <xref target="bf_mapping_function_math" format="default" /> - decides in which buckets the ID and HASH have to be binary XORed to. In practice - there the following algorithm is used: - </t> - <t> - The first index is simply the HASH modulo the IBF size. The second - index is calculated by creating a new 64-bit value by shifting the 32-bit - value left and setting the lower 32-bit to the number of indexes already processed. From the - resulting 64-bit value a CRC32 checksum is created the second index is now the modulo of the - CRC32 output this is repeated until the predefined amount indexes is generated. - In the case a index is hit twice, which would mean this bucket could not get pure again, - the second hit is just skipped and the next iteration is used as. - </t> - <figure anchor="ibf_format_bucket_identification_pseudo_code"> - <artwork name="" type="" align="left" alt=""><![CDATA[ -# INPUTS: -# key: Is the ID of the element calculated in the id_calculation function above. -# number_of_buckets_per_element: Pre-defined count of buckets elements are inserted into -# ibf_size: the size of the ibf (count of buckets) -# OUTPUT: -# dst: Array with bucket IDs to insert ID and HASH - -FUNCTION get_bucket_id (key, number_of_buckets_per_element, ibf_size) - bucket = CRC32(key) - - i = 0 - filled = 0 - WHILE filled < number_of_buckets_per_element - - element_already_in_bucket = false - j = 0 - WHILE j < filled - IF dst[j] == bucket modulo ibf_size THEN - element_already_in_bucket = true - ENDIF - j++ - ENDWHILE - - IF !element_already_in_bucket THEN - dst[filled++] = bucket modulo ibf_size - ENDIF - - x = (bucket << 32) | i - bucket = CRC32(x) - - i++ - ENDWHILE - return dst - ]]></artwork> - </figure> - </section> - <section anchor="ibf_format_HASH_calculation" numbered="true" toc="default"> - <name>HASH calculation</name> - <t> - The HASH is calculated by calculating the CRC32 checksum of the 64-bit ID value - which returns a 32-bit value. - </t> - </section> - </section> - </section> - - <section anchor="se" numbered="true" toc="default"> - <name>Strata Estimator</name> - <section anchor="se_description" numbered="true" toc="default"> - <name>Description</name> - <t> - Strata Estimators help estimate the size of the set difference between two set of elements. - This is necessary to efficiently determinate the tuning parameters for an IBF, in particular - a good value for L. - </t> - <t> - Basically a Strata Estimator (SE) is a series of IBFs (with a rather small value of L) - in which increasingly large subsets of the full set - of elements are added to each IBF. For the n-th IBF, the function selecting the - subset of elements should sample to select (probabilistically) 1/(2^n) of all - elements. This can be done by counting the number of trailing bits set to "1" - in an element ID, and then inserting the element into the IBF identified by that - counter. As a result, all elements will be mapped to one IBF, with the n-th - IBF being statistically expected to contain 1/(2^n) elements. - </t> - <t> - Given two SEs, the set size difference can be estimated by trying to decode all of the - IBFs. Given that L was set to a rather small value, IBFs containing large strata - will likely fail to decode. For those IBFs that failed to decode, one simply - extrapolates the number of elements by scaling the numbers obtained from the - other IBFs that did decode. If none of the IBFs of the SE decoded (which given - a reasonable choice of L should be highly unlikely), one can retry using a different - mapping function M. - </t> - </section> - </section> - - - <section anchor="modeofoperation" numbered="true" toc="default"> - <name>Mode of operation</name> - <t> - The set union protocol uses IBFs and SEs as primitives. - Depending on the state of the two sets there are different strategies or operation modes how to efficiently - determinate missing elements between the two sets. - </t> - - <t> - The simplest mode is the "full" synchronization mode. The idea is that if the difference between the sets of the two - peers exceeds a certain threshold, the overhead to determine which elements are different outweighs - the overhead of sending the complete set. In this case, the most efficient method can be to just - exchange the full sets. - </t> - <t> - <!-- TODO: Add smaller version --> - <eref target="https://git.gnunet.org/lsd0003.git/plain/statemaschine/full_state_maschine.jpg">Link to statemachine diagram</eref> - </t> - <t> - The second possibility is that the difference of the sets is small compared to the set size. - Here, an efficient "delta" synchronization mode is more efficient. Given these two possibilities, - the first steps of the protocol are used to determine which mode should be used. - </t> - - <t> - Thus, the set synchronization protocol always begins with the following operation mode independent steps. - </t> - - <t> - The initiating peer begins in the <strong>Initiating Connection</strong> state and the receiving peer in the <strong>Expecting Connection</strong> - state. The first step for the initiating peer in the protocol is to send an <em><xref target="messages_operation_request" format="title" /></em> to the receiving peer and - transition into the <strong>Expect SE</strong> state. After receiving the <em><xref target="messages_operation_request" format="title" /></em> the receiving peer - transitions to the <strong>Expecting IBF</strong> state and answers with the - <em><xref target="messages_se" format="title" /></em> message. When the initiating peer receives the <em><xref target="messages_se" format="title" /></em> message, - it decides with some heuristics which operation mode is likely more suitable for the estimated set difference and the application-provided latency-bandwidth tradeoff. - The detailed tradeoff between the <xref target="modeofoperation_full-sync" format="title" /> and the <xref target="modeofoperation_individual-elements" format="title" /> - is explained in the section <xref target="modeofoperation_combined-mode" format="title" />. - </t> - <section anchor="modeofoperation_full-sync" numbered="true" toc="default"> - <name>Full Synchronisation Mode</name> - - <t> - When the initiating peer decides to use the full synchronisation mode and the set of the initiating peer is bigger than the set of the receiving peer, the initiating - peer sends a <em><xref target="messages_request_full" format="title" /></em> message, and transitions from <strong>Expecting SE</strong> to the <strong>Full Receiving</strong> state. - If the set of the initiating peer is smaller, it sends all set elements to the other peer followed by the <em><xref target="messages_full_done" format="title" /></em> message, and - transitions into the <strong>Full Sending</strong> state. - </t> - <t> - <!-- TODO: Add smaller version --> - <eref target="https://git.gnunet.org/lsd0003.git/plain/statemaschine/full_state_maschine.jpg">Link to statemachine diagram</eref> - </t> - <t><strong>The behavior of the participants the different state is described below:</strong></t> - <dl> - <dt><strong>Expecting IBF:</strong></dt> - <dd> - If a peer in the <strong>Expecting IBF</strong> state receives a <em><xref target="messages_request_full" format="title" /></em> message from the other peer, the - peer sends all the elements of its set followed by a <em><xref target="messages_full_done" format="title" /></em> message to the other peer, and transitions to the - <strong>Full Sending</strong> state. If the peer receives an <em><xref target="messages_full_element" format="title" /></em> message, it processes the element and transitions to the <strong>Full Receiving</strong> state. - </dd> - <dt><strong>Full Sending:</strong></dt> - <dd> - While a peer is in <strong>Full Sending</strong> state the peer expects to continuously receive elements from the other - peer. As soon as a the <em><xref target="messages_full_done" format="title" /></em> message is received, the peer transitions into - the <strong>Finished</strong> state. - </dd> - <dt><strong>Full Receiving (In code: Expecting IBF): </strong></dt> - <dd> - While a peer is in the <strong>Full Receiving</strong> state, it expects to continuously receive elements from the other - peer. As soon as a the <em><xref target="messages_full_done" format="title" /></em> message is received, it sends - the remaining elements (those it did not receive) from its set to the other - peer, followed by a <em><xref target="messages_full_done" format="title" /></em>. - After sending the last message, the peer transitions into the <strong>Finished</strong> state. - </dd> - </dl> - </section> - <section anchor="modeofoperation_individual-elements" numbered="true" toc="default"> - <name>Delta Synchronisation Mode</name> - <t> - When the initiating peer in the <strong>Expected SE</strong> state decides to use the delta synchronisation mode, it - sends a <em><xref target="messages_ibf" format="title" /></em> to the receiving peer and transitions into the <strong>Passive Decoding</strong> state. - </t> - <t> - The receiving peer in the <strong>Expecting IBF</strong> state receives the - <em><xref target="messages_ibf" format="title" /></em> message from - the initiating peer and transitions into the <strong>Expecting IBF Last</strong> state when there - are multiple <em><xref target="messages_ibf" format="title" /></em> messages to sent, - when there is just a single <em><xref target="messages_ibf" format="title" /></em> message the reviving peer - transitions directly to the <strong>Active Decoding</strong> state. - </t> - <t> - The peer that is in the <strong>Active Decoding</strong>, <strong>Finish Closing</strong> or in the <strong>Expecting IBF Last</strong> - state is called the active peer and the peer that is in either the <strong>Passive Decoding</strong> or the <strong>Finish Waiting</strong> state - is called the passive peer. - </t> - <t> - <!-- TODO: Add smaler version --> - <eref target="https://git.gnunet.org/lsd0003.git/plain/statemaschine/full_state_maschine.jpg">Link to statemachine diagram</eref> - </t> - <t><strong>The behavior of the participants the different states is described below:</strong></t> - <dl> - <dt><strong>Passive Decoding:</strong></dt> - <dd> - <t> - In the <strong>Passive Decoding</strong> state the passive peer reacts to requests from the active peer. - The action the passive peer executes depends on the message the passive peer receives in the <strong>Passive Decoding</strong> state from the active peer - and is described below on a per message basis. - </t> - - <dl> - <dt><em><xref target="messages_inquiry" format="title" /></em> message:</dt> - <dd> - The <em><xref target="messages_inquiry" format="title" /></em> message - is received if the active peer requests the SHA-512 hash of one or more elements (by sending the 64 bit element ID) - that are missing from the active peer's set. - In this case the passive peer answers with <em><xref target="messages_offer" format="title" /></em> messages - which contain the SHA-512 hash of the requested element. If the passive peer does not have an element with - a matching element ID, it MUST ignore the inquiry. If multiple elements match the 64 bit element ID, the passive - peer MUST send offers for all of the matching elements. - </dd> - <dt><em><xref target="messages_demand" format="title" /></em> message:</dt> - <dd> - The <em><xref target="messages_demand" format="title" /></em> message - is received if the active peer requests a complete element that is missing in the active peers set. If the requested element is valid - the passive peer answers with an <em><xref target="messages_elements" format="title" /></em> message which contains the full, - application-dependent data of the requested element. If the passive peer receives a demand for a SHA-512 hash for which - it has no element, a protocol violation is detected and the protocol MUST be aborted. - Implementations MAY strengthen this and forbid demands without previous matching offers. - </dd> - <dt><em><xref target="messages_offer" format="title" /></em> message:</dt> - <dd> - The <em><xref target="messages_offer" format="title" /></em> message - is received if the active peer has decoded an element that is present in the active peers set and may be missing in the - set of the passive peer. If the SHA-512 hash of the offer is indeed not a hash of any of the elements from the set of - the passive peer, the passive peer MUST answer with a <em><xref target="messages_demand" format="title" /></em> message - for that SHA-512 hash and remember that it issued this demand. The send demand need to be added to a list with unsatisfied demands. - </dd> - <dt><em><xref target="messages_elements" format="title" /></em> message:</dt> - <dd> - When a new element message has been received the peer checks if a corresponding - <em><xref target="messages_demand" format="title" /></em> for the element has been sent - and the demand is still unsatisfied. - If the element has been demanded the peer checks the element for validity, removed it from the list - of pending demands and then then saves the element to the the set otherwise the peer - rejects the element. - </dd> - <dt><em><xref target="messages_ibf" format="title" /></em> message:</dt> - <dd> - If an <em><xref target="messages_ibf" format="title" /></em> message is received, this - indicates that decoding of the IBF on the active site has failed and roles should be swapped. - The receiving passive peer transitions into the <strong>Expecting IBF Last</strong> state, - and waits for more <em><xref target="messages_ibf" format="title" /></em> messages - or the final <em><xref target="messages_ibf_last" format="title" /></em> message to be received. - </dd> - <dt><em><xref target="messages_ibf_last" format="title" /></em> message:</dt> - <dd> - If an <em><xref target="messages_ibf_last" format="title" /></em> message is received this - indicates that the there is just one IBF slice and a direct state and role transition from - <strong>Passive Decoding</strong> to <strong>Active Decoding</strong> is initiated. - </dd> - <dt><em><xref target="messages_done" format="title" /></em> message:</dt> - <dd> - Receiving the <em><xref target="messages_done" format="title" /></em> message signals - the passive peer that all demands of the active peer have been satisfied. Alas, the - active peer will continue to process demands from the passive peer. - Upon receiving this message, the passive peer transitions into the - <strong>Finish Waiting</strong> state. - </dd> - </dl> - </dd> - <dt><strong>Active Decoding:</strong></dt> - <dd> - <t> - In the <strong>Active Decoding</strong> state the active peer decodes the IBFs and evaluates the set difference - between the active and passive peer. Whenever an element ID is obtained by decoding the IBF, the active peer - sends either an offer or an inquiry to the passive peer, depending on which site the decoded element is missing. - </t> - <t> - If the IBF decodes a positive (1) pure bucket, the element is missing on the passive peers site. - Thus the active peer sends an <em><xref target="messages_offer" format="title" /></em> to the passive peer. - A negative (-1) pure bucket indicates that a element is missing in the active peers set, so the active peer - sends a <em><xref target="messages_inquiry" format="title" /></em> to the passive peer. - </t> - <t> - In case the IBF does not successfully decode anymore, the active peer sends a new IBF to the passive client - and changes into <strong>Passive Decoding</strong> state. This initiates a role swap. - To reduce overhead and prevent double transmission of offers and elements the new IBF is created - on the new complete set after all demands and inquiries have been satisfied. - - </t> - <t> - As soon as the active peer successfully finished decoding the IBF, the active peer sends a - <em><xref target="messages_done" format="title" /></em> message to the passive peer. - </t> - <t> - All other actions taken by the active peer depend on the message the active peer receives from - the passive peer. The actions are described below on a per message basis: - </t> - <dl> - <dt><em><xref target="messages_offer" format="title" /></em> message:</dt> - <dd> - The <em><xref target="messages_offer" format="title" /></em> message indicates that the - passive peer received a <em><xref target="messages_inquiry" format="title" /></em> message from - the active peer. If a Inquiry has been sent and <!-- FIXME: is this indeed a condition that is checked? --> - the offered element is missing in the active peers set, - the active peer sends a <em><xref target="messages_demand" format="title" /></em> message to the - passive peer. The send demand need to be added to a list with unsatisfied demands. - In the case the received offer is for an element that is already in the set of the peer the offer is ignored. - <!-- FIXME: what happens if the offer is for an element that is not missing? I think then we just ignore it, right? --> - </dd> - <dt><em><xref target="messages_demand" format="title" /></em> message:</dt> - <dd> - The <em><xref target="messages_demand" format="title" /></em> message indicates that the - passive peer received a <em><xref target="messages_offer" format="title" /></em> from - the active peer. The active peer satisfies the demand of the passive peer by sending - <em><xref target="messages_elements" format="title" /></em> message if a offer request - for the element has been sent. - <!-- IMPLEMENT: This is not implemented in code // Change --> - In the case the demanded element does not exist in the - set there was probably a bucket decoded that was not really pure so potentially all <em><xref target="messages_offer" format="title" /></em> - and <em><xref target="messages_demand" format="title" /></em> messages sent after are invalid - in this case a role change active -> passive with a new IBF is easiest. - If a demand for the same element is received multiple times the demands should be - discarded. - <!-- IMPLEMENT: This is not implemented in code // Change --> - <!--FIXME: Do we really check that we first made an offer?--> - </dd> - <dt><em><xref target="messages_elements" format="title" /></em> message:</dt> - <dd> - A element that is received is marked in the list of demanded elements as satisfied, validated and - saved and not further action is taken. - Elements that are not demanded or already known are discarded. - </dd> - <dt><em><xref target="messages_done" format="title" /></em> message:</dt> - <dd> - Receiving the message <em><xref target="messages_done" format="title" /></em> indicates - that all demands of the passive peer have been satisfied. The active peer then changes into the - state <strong>Finish Closing</strong> state. - <!-- IMPLEMENT: This is not implemented in code // Change --> - If the IBF is not finished decoding and the <em><xref target="messages_done" format="title" /></em> - is received the other peer is not in compliance with the protocol and the set reconciliation MUST be aborted. - <!-- IMPLEMENT: This is not implemented in code // Change --> - </dd> - </dl> - </dd> - <dt><strong>Expecing IBF Last</strong></dt> - <dd> - <t> - In the <strong>Expecing IBF Last</strong> state the active peer continuously receives <em><xref target="messages_ibf" format="title" /></em> - messages from the passive peer. When the last <em><xref target="messages_ibf_last" format="title" /></em> message is received - the active peer changes into <strong>Active Decoding</strong> state. - </t> - </dd> - <dt><strong>Finish Closing</strong> / <strong>Finish Waiting</strong></dt> - <dd> - <t> - In this states the peers are waiting for all demands to be satisfied and for the synchronisation - to be completed. When all demands are satisfied the peer changes into state <strong>Finished</strong>. - </t> - </dd> - </dl> - </section> - <section anchor="modeofoperation_combined-mode" numbered="true" toc="default"> - <name>Combined Mode</name> - <t> - In the combined mode the <xref target="modeofoperation_full-sync" format="title" /> and - the <xref target="modeofoperation_individual-elements" format="title" /> - are combined to minimize resource consumption. - </t> - <t> - The <xref target="modeofoperation_individual-elements" format="title" /> is only efficient on small set - differences or if the byte-size of the elements is large. Is the set difference is estimated to be large - the <xref target="modeofoperation_full-sync" format="title" /> is - more efficient. The exact heuristics and parameters on which the protocol decides which mode - should be used are described in the <xref target="performance" format="title" /> section of this document. - </t> - <t> - There are two main cases when a <xref target="modeofoperation_full-sync" format="title" /> - is always used. - The first case is when one of the peers announces having an empty set. This is announced by setting - the SETSIZE field in the <em><xref target="messages_se" format="title" /></em> to 0. - The second case is if the application requested full synchronization explicitly. - This is useful for testing and should not be used in production. - </t> - <!-- - <t> - ############# NOTE ############ - To ensure that ...... the difference is multiplied by 1.5 if there are more than 200 elements differences between the sets (WHY? line 1398). - The Full Synchronisation Mode is used if the flag to force full sync is set, the estimated difference between the two sets is bigger - than 25% or the set size of the receiving peer is zero. Otherwise the delta synchronisation mode is used. - ############# NOTE END############ - </t> - --> - </section> - </section> - - - <section anchor="messages" numbered="true" toc="default"> - <name>Messages</name> - - <section anchor="messages_operation_request" numbered="true" toc="default"> - <name>Operation Request</name> - - <section anchor="messages_operation_request_description" numbered="true" toc="default"> - <name>Description</name> - <t> - This message is the first message of the protocol and it is sent to signal to the receiving peer - that the initiating peer wants to initialize a new connection. - </t> - <t> - This message is sent in the transition between the <strong>Initiating Connection</strong> state and the <strong>Expect SE</strong> state. - </t> - <t> - If a peer receives this message and is willing to run the protocol, it answers by sending back a <em><xref target="messages_se" format="title" /></em> message. - Otherwise it simply closes the connection. - </t> - </section> - <section anchor="messages_operation_request_structure" numbered="true" toc="default"> - <name>Structure</name> - - <figure anchor="figure_operation_request"> - <artwork name="" type="" align="left" alt=""><![CDATA[ - 0 8 16 24 32 40 48 56 - +-----+-----+-----+-----+-----+-----+-----+-----+ - | MSG SIZE | MSG TYPE | ELEMENT COUNT | - +-----+-----+-----+-----+-----+-----+-----+-----+ - | APX - +-----+-----+-----+-----+-----+-----+-----+-----+ / - / / - / / - ]]></artwork> - </figure> - <t>where:</t> - <dl> - <dt>MSG SIZE</dt> - <dd> - is 16-bit unsigned integer in network byte order witch describes the message size in bytes and the header is included. - </dd> - <dt>MSG TYPE</dt> - <dd> - the type of SETU_P2P_OPERATION_REQUEST as registered in <xref target="gana" format="title" />, in network byte order. - </dd> - <!-- dt>OPERATION TYPE</dt> - <dd> - is a 32-bit unsigned integer which describes the type of operation that should be initiated on the set. The filed can have three - different value NONE, INTERSECTION and UNION, numeric represented by 0,1 and 2. - @Christian can you check?: Right, alas we - here only do UNION and INTERSECTION is a completely different protocol => we shall simply REMOVE this field. Hence commented out here: - reminder to _remove_ in implementation! - NONE should never occur and signals the set supports no operation and is just for local use. - INTERSECTION returns only elements that are in both sets and the default case UNION, return all - elements that are in at least one of the sets. - </dd --> - <dt>ELEMENT COUNT</dt> - <dd> - is the number of the elements the requesting party has in its set, as a 32-bit unsigned integer in network byte order. - </dd> - <dt>APX</dt> - <dd> - is a SHA-512 hash that identifies the application. - </dd> - </dl> - </section> - </section> - - <section anchor="messages_ibf" numbered="true" toc="default"> - <name>IBF</name> - - <section anchor="messages_ibf_description" numbered="true" toc="default"> - <name>Description</name> - <t> - The IBF message contains a slice of the IBF. - </t> - <t> - The <em>IBF</em> message is sent at the start of the protocol from the initiating peer in the transaction - between <strong>Expect SE</strong> -> <strong>Expecting IBF Last</strong> or when the IBF does not - decode and there is a role change in the transition between <strong>Active Decoding</strong> -> <strong>Expecting IBF Last</strong>. - This message is only sent if there are more than one IBF slice to sent, in the case there is just - one slice the <xref target="messages_ibf_last" format="title" /> message is sent. - </t> - </section> - <section anchor="messages_ibf_structure" numbered="true" toc="default"> - <name>Structure</name> - <figure anchor="figure_ibf"> - <artwork name="" type="" align="left" alt=""><![CDATA[ - 0 8 16 24 32 40 48 56 - +-----+-----+-----+-----+-----+-----+-----+-----+ - | MSG SIZE | MSG TYPE |ORDER| PAD | - +-----+-----+-----+-----+-----+-----+-----+-----+ - | OFFSET | SALT | - +-----+-----+-----+-----+-----+-----+-----+-----+ - | IBF-SLICE - + / - / / - / / - ]]></artwork> - </figure> - <t>where:</t> - <dl> - <dt>MSG SIZE</dt> - <dd> - is 16-bit unsigned integer in network byte order witch describes the message size in bytes and the header is included. - </dd> - <dt>MSG TYPE</dt> - <dd> - the type of SETU_P2P_REQUEST_IBF as registered in <xref target="gana" format="title" /> in network byte order. - </dd> - <dt>ORDER</dt> - <dd> - is a 8-bit unsigned integer which signals the order of the IBF. The order of the IBF - is defined as the logarithm of the number of buckets of the IBF. - </dd> - <dt>PAD</dt> - <dd> - is 24-bit always set to zero - </dd> - <dt>OFFSET</dt> - <dd> - is a 32-bit unsigned integer which signals the offset to the following ibf slices in the original. - </dd> - <dt>SALT</dt> - <dd> - is a 32-bit unsigned integer that contains the salt which was used to create the - IBF. - </dd> - <dt>IBF-SLICE</dt> - <dd> - <t> - are variable count of slices in an array. A single slice contains out multiple 64-bit IDSUMS, - 32-bit HASHSUMS and 8-bit COUNTERS. In the network order the array of IDSUMS is first, followed - by an array of HASHSUMS and ended with an array of COUNTERS. Length of the array is defined - by MIN( 2^ORDER - OFFSET, MAX_BUCKETS_PER_MESSAGE). MAX_BUCKETS_PER_MESSAGE is defined as - 32768 divided by the BUCKET_SIZE which is 13-byte (104-bit). - </t> - <t> - To get the IDSUM field, all IDs who hit a bucket are added up with a binary XOR operation. - See <xref target="ibf_format_id_generation" format="title" /> for details about ID generation. - </t> - <t> - The calculation of the HASHSUM field is done accordingly to the calculation of the IDSUM field: - all HASHes are added up with a binary XOR operation. - The HASH value is calculated as described in detail in section <xref target="ibf_format_HASH_calculation" format="title" />. - </t> - <t> - The algorithm to find the correct bucket in which the ID and the HASH have to be added - is described in detail in section <xref target="ibf_format_bucket_identification" format="title" />. - </t> - - <!-- - FIXME: this is not sufficiently precise! How are the element IDs (and IDSUMS) computed? - How are the HASHes (and HASHSUMS) computed? Which byte order is used? What role does - the SALT have in these computations? Definitively needs DETAILED algorithm(s) and - test vectors.--> - </dd> - </dl> - <figure anchor="figure_ibf_slice"> - <artwork name="" type="" align="left" alt=""><![CDATA[ - IBF-SLICE - 0 8 16 24 32 40 48 56 - +-----+-----+-----+-----+-----+-----+-----+-----+ - | IDSUMS | - +-----+-----+-----+-----+-----+-----+-----+-----+ - | IDSUMS | - +-----+-----+-----+-----+-----+-----+-----+-----+ - | HASHSUMS | HASHSUMS | - +-----+-----+-----+-----+-----+-----+-----+-----+ - | COUNTERS | COUNTERS | - +-----+-----+-----+-----+-----+-----+-----+-----+ - / / - / / - ]]></artwork> - </figure> - </section> - </section> - - <section anchor="messages_ibf_last" numbered="true" toc="default"> - <name>IBF</name> - - <section anchor="messages_ibf_last_description" numbered="true" toc="default"> - <name>Description</name> - <t> - This message indicates to the remote peer that all slices of the bloom filter have been sent. - The binary structure is exactly the same as the <xref target="messages_ibf_structure" format="title" /> of - the message <xref target="messages_ibf" format="title" /> with a different "MSG TYPE" - which is defined in <xref target="gana" format="title" /> "SETU_P2P_IBF_LAST". - </t> - <t> - Receiving this message initiates the state transmissions - <strong>Expecting IBF Last</strong> -> <strong>Active Decoding</strong>, - <strong>Expecting IBF</strong> -> <strong>Active Decoding</strong> and - <strong>Passive Decoding</strong> -> <strong>Active Decoding</strong>. This message - can initiate a peer the roll change from <strong>Active Decoding</strong> to - <strong>Passive Decoding</strong>. - </t> - </section> - </section> - - <section anchor="messages_elements" numbered="true" toc="default"> - <name>Elements</name> - - <section anchor="messages_elements_description" numbered="true" toc="default"> - <name>Description</name> - <t> - The Element message contains an element that is synchronized in the <xref target="modeofoperation_individual-elements" format="title" /> - and transmits a full element between the peers. - </t> - <t> - This message is sent in the state <strong>Active Decoding</strong> and <strong>Passive Decoding</strong> - as answer to a <em><xref target="messages_demand" format="title" /></em> message from the remote peer. - The Element message can also be received in the <strong>Finish Closing</strong> or <strong>Finish Waiting</strong> - state after receiving a <em><xref target="messages_done" format="title" /></em> message from the remote peer, in this - case the client changes to the <strong>Finished</strong> state as soon as all demands for elements have been satisfied. - </t> - <t> - This message is exclusively sent in the <xref target="modeofoperation_individual-elements" format="title" />. - </t> - </section> - <section anchor="messages_elements_structure" numbered="true" toc="default"> - <name>Structure</name> - <figure anchor="figure_elements"> - <artwork name="" type="" align="left" alt=""><![CDATA[ - 0 8 16 24 32 40 48 56 - +-----+-----+-----+-----+-----+-----+-----+-----+ - | MSG SIZE | MSG TYPE | E TYPE | PADDING | - +-----+-----+-----+-----+-----+-----+-----+-----+ - | E SIZE | AE TYPE | DATA - +-----+-----+-----+-----+ / - / / - / / - ]]></artwork> - </figure> - <t>where:</t> - <dl> - <dt>MSG SIZE</dt> - <dd> - is 16-bit unsigned integer in network byte order witch describes the message size in bytes and the header is included. - </dd> - <dt>MSG TYPE</dt> - <dd> - the type of SETU_P2P_ELEMENTS as registered in <xref target="gana" format="title" /> in network byte order. - </dd> - <dt>E TYPE</dt> - <dd> - element type is a 16-bit unsigned integer witch defines the element type for - the application. - </dd> - <dt>PADDING</dt> - <dd> - is 16-bit always set to zero - </dd> - <dt>E SIZE</dt> - <dd> - element size is 16-bit unsigned integer that signals the size of the elements data part. - </dd> - <dt>AE TYPE</dt> - <dd> - application specific element type is a 16-bit unsigned integer that is needed to identify - the type of element that is in the data field - </dd> - <dt>DATA</dt> - <dd> - is a field with variable length that contains the data of the element. - </dd> - </dl> - </section> - </section> - - <section anchor="messages_offer" numbered="true" toc="default"> - <name>Offer</name> - - <section anchor="messages_offer_description" numbered="true" toc="default"> - <name>Description</name> - <t> - The offer message is an answer to an <em><xref target="messages_inquiry" format="title" /></em> message - and transmits the full hash of an element that has been requested by the other peer. - This full hash enables the other peer to check if the element is really missing in its set and - eventually sends a <em><xref target="messages_demand" format="title" /></em> message for that a element. - </t> - <t> - The offer is sent and received only in the <strong>Active Decoding</strong> and in the <strong>Passive Decoding</strong> - state. - </t> - <t> - This message is exclusively sent in the <xref target="modeofoperation_individual-elements" format="title" />. - </t> - </section> - <section anchor="messages_offer_structure" numbered="true" toc="default"> - <name>Structure</name> - <figure anchor="figure_offer"> - <artwork name="" type="" align="left" alt=""><![CDATA[ - 0 8 16 24 32 40 48 56 - +-----+-----+-----+-----+-----+-----+-----+-----+ - | MSG SIZE | MSG TYPE | HASH - +-----+-----+-----+-----+ - / / - / / - ]]></artwork> - </figure> - <t>where:</t> - <dl> - <dt>MSG SIZE</dt> - <dd> - is 16-bit unsigned integer in network byte order witch describes the message size in bytes and the header is included. - </dd> - <dt>MSG TYPE</dt> - <dd> - the type of SETU_P2P_OFFER as registered in <xref target="gana" format="title" /> in network byte order. - </dd> - <dt>HASH</dt> - <dd> - is a SHA 512-bit hash of the element that is requested with a inquiry message. - </dd> - </dl> - </section> - </section> - - - <section anchor="messages_inquiry" numbered="true" toc="default"> - <name>Inquiry</name> - - <section anchor="messages_inquiry_description" numbered="true" toc="default"> - <name>Description</name> - <t> - The Inquiry message is exclusively sent by the active peer in <strong>Active Decoding</strong> state - to request the full hash of an element that is missing in the active peers set. This is normally answered - by the passive peer with <em><xref target="messages_offer" format="title" /></em> message. - </t> - <t> - This message is exclusively sent in the <xref target="modeofoperation_individual-elements" format="title" />. - </t> - <t> - NOTE: HERE IS AN IMPLEMENTATION BUG UNNECESSARY 32-BIT PADDING! - </t> - </section> - <section anchor="messages_inquiry_structure" numbered="true" toc="default"> - <name>Structure</name> - <figure anchor="figure_inquiry"> - <artwork name="" type="" align="left" alt=""><![CDATA[ - 0 8 16 24 32 40 48 56 - +-----+-----+-----+-----+-----+-----+-----+-----+ - | MSG SIZE | MSG TYPE | SALT | - +-----+-----+-----+-----+-----+-----+-----+-----+ - | IBF KEY | - +-----+-----+-----+-----+-----+-----+-----+-----+ - ]]></artwork> - </figure> - <t>where:</t> - <dl> - <dt>MSG SIZE</dt> - <dd> - is 16-bit unsigned integer in network byte order witch describes the message size in bytes and the header is included. - </dd> - <dt>MSG TYPE</dt> - <dd> - the type of SETU_P2P_INQUIRY as registered in <xref target="gana" format="title" /> in network byte order. - </dd> - <dt>IBF KEY</dt> - <dd> - is a 64-bit unsigned integer that contains the key for which the inquiry is sent. - </dd> - </dl> - </section> - </section> - - <section anchor="messages_demand" numbered="true" toc="default"> - <name>Demand</name> - - <section anchor="messages_demand_description" numbered="true" toc="default"> - <name>Description</name> - <t> - The demand message is sent in the <strong>Active Decoding</strong> and in the <strong>Passive Decoding</strong> - state. It is a answer to a received <em><xref target="messages_offer" format="title" /></em> message - and is sent if the element described in the <em><xref target="messages_offer" format="title" /></em> message - is missing in the peers set. In the normal workflow the answer to the demand message is an - <em><xref target="messages_elements" format="title" /></em> message. - </t> - <t> - This message is exclusively sent in the <xref target="modeofoperation_individual-elements" format="title" />. - </t> - </section> - <section anchor="messages_demand_structure" numbered="true" toc="default"> - <name>Structure</name> - <figure anchor="figure_demand"> - <artwork name="" type="" align="left" alt=""><![CDATA[ - 0 8 16 24 32 40 48 56 - +-----+-----+-----+-----+-----+-----+-----+-----+ - | MSG SIZE | MSG TYPE | HASH - +-----+-----+-----+-----+ - / / - / / - ]]></artwork> - </figure> - <t>where:</t> - <dl> - <dt>MSG SIZE</dt> - <dd> - is 16-bit unsigned integer in network byte order witch describes the message size in bytes and the header is included. - </dd> - <dt>MSG TYPE</dt> - <dd> - the type of SETU_P2P_DEMAND as registered in <xref target="gana" format="title" /> in network byte order. - </dd> - <dt>HASH</dt> - <dd> - is a 512-bit Hash of the element that is demanded. - </dd> - </dl> - </section> - </section> - - <section anchor="messages_done" numbered="true" toc="default"> - <name>Done</name> - - <section anchor="messages_done_description" numbered="true" toc="default"> - <name>Description</name> - <t> - The done message is sent when all <em><xref target="messages_demand" format="title" /></em> messages - have been successfully satisfied and the set is complete synchronized. - <!-- IMPLEMENT: This is not implemented in code // Change --> - A final checksum (XOR SHA-512 hash) over all elements of the set is added to the message - to allow the other peer to make sure that the sets are equal. - <!-- IMPLEMENT: This is not implemented in code // Change --> - - </t> - <t> - This message is exclusively sent in the <xref target="modeofoperation_individual-elements" format="title" />. - </t> - </section> - <section anchor="messages_done_structure" numbered="true" toc="default"> - <name>Structure</name> - <figure anchor="figure_done"> - <artwork name="" type="" align="left" alt=""><![CDATA[ - 0 8 16 24 32 - +-----+-----+-----+-----+ - | MSG SIZE | MSG TYPE | - +-----+-----+-----+-----+ - | HASH - +-----+-----+-----+-----+ - ]]></artwork> - </figure> - <t>where:</t> - <dl> - <dt>MSG SIZE</dt> - <dd> - is 16-bit unsigned integer in network byte order witch describes the message size in bytes and the header is included. - </dd> - <dt>MSG TYPE</dt> - <dd> - the type of SETU_P2P_DONE as registered in <xref target="gana" format="title" /> in network byte order. - </dd> - <dt>HASH</dt> - <dd> - is a 512-bit hash of the set to allow a final equality check. - </dd> - - </dl> - </section> - </section> - - <section anchor="messages_full_done" numbered="true" toc="default"> - <name>Full Done</name> - - <section anchor="messages_full_done_description" numbered="true" toc="default"> - <name>Description</name> - <t> - The full done message is sent in the <xref target="modeofoperation_full-sync" format="title" /> - to signal that all remaining elements of the set have been sent. The message is received and sent in in the - <strong>Full Sending</strong> and in the <strong>Full Receiving</strong> state. When the full done message is received - in <strong>Full Sending</strong> state the peer changes directly into <strong>Finished</strong> state. In - <strong>Full Receiving</strong> state receiving a full done message initiates the sending of - the remaining elements that are missing in the set of the other peer. - </t> - </section> - <section anchor="messages_full_done_structure" numbered="true" toc="default"> - <name>Structure</name> - <figure anchor="figure_full_done"> - <artwork name="" type="" align="left" alt=""><![CDATA[ - 0 8 16 24 32 - +-----+-----+-----+-----+ - | MSG SIZE | MSG TYPE | - +-----+-----+-----+-----+ - ]]></artwork> - </figure> - <t>where:</t> - <dl> - <dt>MSG SIZE</dt> - <dd> - is 16-bit unsigned integer in network byte order witch describes the message size in bytes and the header is included. - </dd> - <dt>MSG TYPE</dt> - <dd> - the type of SETU_P2P_FULL_DONE as registered in <xref target="gana" format="title" /> in network byte order. - </dd> - </dl> - </section> - </section> - - <section anchor="messages_request_full" numbered="true" toc="default"> - <name>Request Full</name> - - <section anchor="messages_request_full_description" numbered="true" toc="default"> - <name>Description</name> - <t> - The request full message is sent by the initiating peer in <strong>Expect SE</strong> state to the receiving peer if - the operation mode "<xref target="modeofoperation_full-sync" format="title" />" is - determined as the better <xref target="modeofoperation" format="title" /> and the set size of the initiating peer is smaller - than the set size of the receiving peer. The initiating peer changes after sending the request full message into - <strong>Full Receiving</strong> state. - </t> - <t> - The receiving peer receives the Request Full message in the <strong>Expecting IBF</strong>, afterwards the receiving peer - starts sending its complete set in <xref target="messages_full_element" format="title" /> messages to the initiating peer. - </t> - </section> - <section anchor="messages_request_full_structure" numbered="true" toc="default"> - <name>Structure</name> - <figure anchor="figure_request_full"> - <artwork name="" type="" align="left" alt=""><![CDATA[ - 0 8 16 24 32 - +-----+-----+-----+-----+ - | MSG SIZE | MSG TYPE | - +-----+-----+-----+-----+ - ]]></artwork> - </figure> - <t>where:</t> - <dl> - <dt>MSG SIZE</dt> - <dd> - is 16-bit unsigned integer in network byte order witch describes the message size in bytes and the header is included. - </dd> - <dt>MSG TYPE</dt> - <dd> - the type of SETU_P2P_REQUEST_FULL as registered in <xref target="gana" format="title" /> in network byte order. - </dd> - </dl> - </section> - </section> - - <section anchor="messages_se" numbered="true" toc="default"> - <name>Strata Estimator</name> - - <section anchor="messages_se_description" numbered="true" toc="default"> - <name>Description</name> - <t> - The strata estimator is sent by the receiving peer at the start of the protocol right after the - <xref target="messages_operation_request" format="title" /> message has been received. - </t> - <t> - The strata estimator is used to estimate the difference between the two sets as described in section <xref target="se" format="counter" />. - </t> - <t> - When the initiating peer receives the strata estimator the peer decides which <xref target="modeofoperation" format="title" /> to use - for the synchronization. Depending on the size of the set difference and the <xref target="modeofoperation" format="title" /> the initiating peer - changes into <strong>Full Sending</strong>, <strong>Full Receiving</strong> or <strong>Passive Decoding</strong> state. - </t> - </section> - <section anchor="messages_se_structure" numbered="true" toc="default"> - <name>Structure</name> - <figure anchor="figure_se"> - <artwork name="" type="" align="left" alt=""><![CDATA[ - 0 8 16 24 32 40 48 56 - +-----+-----+-----+-----+-----+-----+-----+-----+ - | MSG SIZE | MSG TYPE | SETSIZE - +-----+-----+-----+-----+-----+-----+-----+-----+ - SETSIZE | SE-SLICES - +-----+-----+-----+-----+ - / / - / / - ]]></artwork> - </figure> - <t>where:</t> - <dl> - <dt>MSG SIZE</dt> - <dd> - is 16-bit unsigned integer in network byte order witch describes the message size in bytes and the header is included. - </dd> - <dt>MSG TYPE</dt> - <dd> - the type of SETU_P2P_SE as registered in <xref target="gana" format="title" /> in network byte order. - </dd> - <dt>SETSIZE</dt> - <dd> - is a 64-bit unsigned integer that is defined by the size of the set the SE is <!--IMPLEMENT: Mögliche optimierung wäre wäre hier eine 32bit padding einzuführen damit es aligned --> - </dd> - <dt>SE-SLICES</dt> - <dd> - is variable in size and contains the same structure as the IBF-SLICES field in the IBF message. - </dd> - </dl> - </section> - </section> - - <section anchor="messages_sec" numbered="true" toc="default"> - <name>Strata Estimator Compressed</name> - - <section anchor="messages_sec_description" numbered="true" toc="default"> - <name>Description</name> - <t> - The Strata estimator can be compressed with gzip to improve performance. For - details see section <xref target="performance" format="title" />. - </t> - <t> - Since the content of the message is the same as the uncompressed Strata Estimator, the details - aren't repeated here for details see section <xref target="messages_se" format="counter" />. - </t> - </section> - </section> - - - <section anchor="messages_full_element" numbered="true" toc="default"> - <name>Full Element</name> - - <section anchor="messages_full_element_description" numbered="true" toc="default"> - <name>Description</name> - <t> - The full element message is the equivalent of the <xref target="messages_elements" format="title" /> message in - the <xref target="modeofoperation_full-sync" format="title" />. It contains a complete element that is missing - in the set of the peer that receives this message. - </t> - <t> - The full element message is exclusively sent in the transitions <strong>Expecting IBF</strong> -> <strong>Full Receiving</strong> and - <strong>Full Receiving</strong> -> <strong>Finished</strong>. The message is only received in the <strong> Full Sending</strong> and - <strong>Full Receiving</strong> state. - </t> - <t> - After the last full element messages has been sent the <xref target="messages_full_done" format="title" /> message - is sent to conclude the full synchronisation of the element sending peer. - </t> - </section> - <section anchor="messages_full_element_structure" numbered="true" toc="default"> - <name>Structure</name> - <figure anchor="figure_full_element"> - <artwork name="" type="" align="left" alt=""><![CDATA[ - 0 8 16 24 32 40 48 56 - +-----+-----+-----+-----+-----+-----+-----+-----+ - | MSG SIZE | MSG TYPE | E TYPE | PADDING | - +-----+-----+-----+-----+-----+-----+-----+-----+ - | SIZE | AE TYPE | DATA - +-----+-----+-----+-----+ - / / - / / - ]]></artwork> - </figure> - <t>where:</t> - <dl> - <dt>MSG SIZE</dt> - <dd> - is 16-bit unsigned integer in network byte order witch describes the message size in bytes and the header is included. - </dd> - <dt>MSG TYPE</dt> - <dd> - the type of SETU_P2P_REQUEST_FULL_ELEMENT as registered in <xref target="gana" format="title" /> in network byte order. - </dd> - <dt>E TYPE</dt> - <dd> - element type is a 16-bit unsigned integer witch defines the element type for - the application. - </dd> - <dt>PADDING</dt> - <dd> - is 16-bit always set to zero - </dd> - <dt>E SIZE</dt> - <dd> - element size is 16-bit unsigned integer that signals the size of the elements data part. - </dd> - <dt>AE TYPE</dt> - <dd> - application specific element type is a 16-bit unsigned integer that is needed to identify - the type of element that is in the data field - </dd> - <dt>DATA</dt> - <dd> - is a field with variable length that contains the data of the element. - </dd> - </dl> - </section> - </section> - - </section> - - - <section anchor="performance" numbered="true" toc="default"> - <name>Performance Considerations</name> - <!-- - <t> - - TEXT HERE - - On what basis is the new IBF constructed? Specifically, which set is used? Do we - wait for the completion of pending demands first? How do L/k/M change? Some of this should - be detailed here, but the full details likely need a separate section on the algorithms. - </t> - --> - </section> - - <section anchor="security" numbered="true" toc="default"> - <name>Security Considerations</name> - - <section anchor="security_states" numbered="true" toc="default"> - <name>states</name> - - <section anchor="security_states_expecting_ibf" numbered="true" toc="default"> - <name>Expecting IBF</name> - <t> - Bla Bla - </t> - </section> - - <section anchor="security_states_full_sending" numbered="true" toc="default"> - <name>Full Sending</name> - <t> - Bla Bla - </t> - </section> - - <section anchor="security_states_expecting_ibf_last" numbered="true" toc="default"> - <name>Expecting IBF Last</name> - <t> - Bla Bla - </t> - </section> - <section anchor="security_states_active_decoding" numbered="true" toc="default"> - <name>Active Decoding</name> - <t> - Bla Bla - </t> - </section> - <section anchor="security_states_finish_closing" numbered="true" toc="default"> - <name>Finish Closing</name> - <t> - Bla Bla - </t> - </section> - <section anchor="security_states_finished" numbered="true" toc="default"> - <name>Finished</name> - <t> - Bla Bla - </t> - </section> - <section anchor="security_states_expect_se" numbered="true" toc="default"> - <name>Expect SE</name> - <t> - Bla Bla - </t> - </section> - <section anchor="security_states_full_receiving" numbered="true" toc="default"> - <name>Full Receiving</name> - <t> - Bla Bla - </t> - </section> - <section anchor="security_states_passive_decoding" numbered="true" toc="default"> - <name>Passive Decoding</name> - <t> - Bla Bla - </t> - </section> - <section anchor="security_states_finish_waiting" numbered="true" toc="default"> - <name>Finish Waiting</name> - <t> - Bla Bla - </t> - </section> - - - </section> - - <!-- - <section anchor="security_crypto" numbered="true" toc="default"> - <name>BLAH</name> - <t> - Bulub. - </t> - <t> - Another probabilistic approach to discover bad behaving peers is sampling, in this approach the proving peer needs - to prove that he is in possession of the elements he claimed to be. This is achieved by the following procedure: - </t> - <t> - The verifying peer chooses some - random salt and sends the salt to the proving peer. The proving peer salts the hash of elements with the given - salt from the verifying peer. Then the proving peer calculates the new hashes modulo a number depending on the set sized difference and - sends all the elements where the modulo calculation equals 0 to the verifying peer. - As soon as the verifying peer receives the elements the verifying peer can verify that all the elements - are valid and the modulo calculation equals 0 then the verifying peer can be assured with a high probability - that the peer is honest about his remaining set size and difference. - </t> - </section> - --> - </section> - - <section anchor="gana" numbered="true" toc="default"> - <name>GANA Considerations</name> - <t> - GANA is requested to amend the "GNUnet Message Type" registry - as follows: - </t> - <figure anchor="figure_purposenums"> - <artwork name="" type="" align="left" alt=""><![CDATA[ -Type | Name | References | Description ---------+----------------------------+------------+-------------------------- - 559 | SETU_P2P_REQUEST_FULL | [This.I-D] | Request the full set of the other peer - 560 | SETU_P2P_DEMAND | [This.I-D] | Demand the whole element from the other peer, given only the hash code. - 561 | SETU_P2P_INQUIRY | [This.I-D] | Tell the other peer to send us a list of hashes that match an IBF key. - 562 | SETU_P2P_OFFER | [This.I-D] | Tell the other peer which hashes match a given IBF key. - 563 | SETU_P2P_OPERATION_REQUEST | [This.I-D] | Request a set union operation from a remote peer. - 564 | SETU_P2P_SE | [This.I-D] | Strata Estimator uncompressed - 565 | SETU_P2P_IBF | [This.I-D] | Invertible Bloom Filter Slice. - 566 | SETU_P2P_ELEMENTS | [This.I-D] | Actual set elements. - 567 | SETU_P2P_IBF_LAST | [This.I-D] | Invertible Bloom Filter Last Slice. - 568 | SETU_P2P_DONE | [This.I-D] | Set operation is done. - 569 | SETU_P2P_SEC | [This.I-D] | Strata Estimator compressed - 570 | SETU_P2P_FULL_DONE | [This.I-D] | All elements in full synchronization mode have been send is done. - 571 | SETU_P2P_FULL_ELEMENT | [This.I-D] | Send an actual element in full synchronization mode. - - ]]></artwork> - </figure> - </section> - <!-- gana --> - <section anchor="contributors" numbered="true" toc="default"> - <name>Contributors</name> - <t> - The original GNUnet implementation of the Byzantine Fault Tolerant Set Reconciliation - protocol has mainly been - written by Florian Dold and Christian Grothoff. - </t> - </section> - </middle> - <back> - <references> - <name>Normative References</name> - &RFC5869; - &RFC1034; - &RFC1035; - &RFC2782; - &RFC2119; - &RFC3629; - &RFC3686; - &RFC3826; - &RFC3912; - &RFC5890; - &RFC5891; - &RFC6781; - &RFC6895; - &RFC6979; - &RFC7748; - &RFC8032; - &RFC8126; - - <reference anchor="GANA" target="https://gana.gnunet.org/"> - <front> - <title>GNUnet Assigned Numbers Authority (GANA)</title> - <author> - <organization>GNUnet e.V.</organization> - </author> - <date month="April" year="2020"/> - </front> - </reference> - - <reference anchor="CryptographicallySecureVoting" target="https://git.gnunet.org/bibliography.git/plain/docs/ba_dold_voting_24aug2014.pdf"> - <front> - <title>Cryptographically Secure, DistributedElectronic Voting</title> - <author initials="F." surname="Dold" fullname="Florian Dold"> - <organization>Technische Universität München</organization> - </author> - </front> - </reference> - - - <reference anchor="GNUNET" target="https://git.gnunet.org/bibliography.git/plain/docs/gns2014wachs.pdf"> - <front> - <title>A Censorship-Resistant, Privacy-Enhancing andFully Decentralized Name System</title> - <author initials="M." surname="Wachs" fullname="Matthias Wachs"> - <organization>Technische Universität München</organization> - </author> - <author initials="M." surname="Schanzenbach" fullname="Martin Schanzenbach"> - <organization>Technische Universität München</organization> - </author> - <author initials="C." surname="Grothoff" fullname="Christian Grothoff"> - <organization>Technische Universität München</organization> - </author> - </front> - </reference> - - <reference anchor="Eppstein" target="https://doi.org/10.1145/2018436.2018462"> - <front> - <title>What’s the Difference? Efficient Set Reconciliation without Prior Context</title> - <author initials="D." surname="Eppstein" fullname="David Eppstein"> - <organization>U.C. Irvine</organization> - </author> - <author initials="M." surname="Goodrich" fullname="Michael T. Goodrich"> - <organization>U.C. Irvine</organization> - </author> - <author initials="F." surname="Uyeda" fullname="Frank Uyeda"> - <organization>U.C. San Diego</organization> - </author> - <author initials="G." surname="Varghese" fullname="George Varghese"> - <organization>U.C. San Diego</organization> - </author> - </front> - </reference> - - <reference anchor="GNS" target="https://doi.org/10.1007/978-3-319-12280-9_9"> - <front> - <title>A Censorship-Resistant, Privacy-Enhancing and Fully Decentralized Name System</title> - <author initials="M." surname="Wachs" fullname="Matthias Wachs"> - <organization>Technische Universitaet Muenchen</organization> - </author> - - <author initials="M." surname="Schanzenbach" fullname="Martin Schanzenbach"> - <organization>Technische Universitaet Muenchen</organization> - </author> - - <author initials="C." surname="Grothoff" - fullname="Christian Grothoff"> - <organization>Technische Universitaet Muenchen</organization> - </author> - <date year="2014"/> - </front> - </reference> - <reference anchor="R5N" target="https://doi.org/10.1109/ICNSS.2011.6060022"> - <front> - <title>R5N: Randomized recursive routing for restricted-route networks</title> - <author initials="N. S." surname="Evans" fullname="Nathan S. Evans"> - <organization>Technische Universitaet Muenchen</organization> - </author> - - <author initials="C." surname="Grothoff" - fullname="Christian Grothoff"> - <organization>Technische Universitaet Muenchen</organization> - </author> - <date year="2011"/> - </front> - </reference> - - - <reference anchor="Argon2" target="https://datatracker.ietf.org/doc/draft-irtf-cfrg-argon2/"> - <front> - <title>The memory-hard Argon2 password hash and proof-of-work function</title> - <author initials="A." surname="Biryukov" fullname="Alex Biryukov"> - <organization>University of Luxembourg</organization> - </author> - - <author initials="D." surname="Dinu" fullname="Daniel Dinu"> - <organization>University of Luxembourg</organization> - </author> - - <author initials="D." surname="Khovratovich" - fullname="Dmitry Khovratovich"> - <organization>ABDK Consulting</organization> - </author> - <author initials="S." surname="Josefsson" - fullname="Simon Josefsson"> - <organization>SJD AB</organization> - </author> - <date year="2020" month="March"/> - <abstract> - <t> - This document describes the Argon2 memory-hard function for - password hashing and proof-of-work applications. We provide an - implementer-oriented description with - test vectors. The purpose is to simplify adoption of Argon2 for - Internet protocols. This document is a product of the Crypto Forum Research Group (CFRG) - in the IRTF. - </t> - </abstract> - </front> - </reference> - <reference anchor="MODES" target="https://doi.org/10.6028/NIST.SP.800-38A"> - <front> - <title>Recommendation for Block Cipher Modes of Operation: Methods and Techniques</title> - <author initials="M." surname="Dworkin" fullname="Morris Dworkin"> - <organization>NIST</organization> - </author> - - <date year="2001" month="December"/> - <abstract> - <t> - This recommendation defines five confidentiality modes of operation for use with an - underlying symmetric key block cipher algorithm: Electronic Codebook (ECB), Cipher Block - Chaining (CBC), Cipher Feedback (CFB), Output Feedback (OFB), and Counter (CTR). Used with - an underlying block cipher algorithm that is approved in a Federal Information Processing - Standard (FIPS), these modes can provide cryptographic protection for sensitive, but - unclassified, computer data. - </t> - </abstract> - </front> - </reference> - <reference anchor="ed25519" target="http://link.springer.com/chapter/10.1007/978-3-642-23951-9_9"> - <front> - <title>High-Speed High-Security Signatures</title> - <author initials="D." surname="Bernstein" fullname="Daniel Bernstein"> - <organization>University of Illinois at Chicago</organization> - </author> - - <author initials="N." surname="Duif" - fullname="Niels Duif"> - <organization>Technische Universiteit Eindhoven</organization> - - </author> - <author initials="T." surname="Lange" - fullname="Tanja Lange"> - <organization>Technische Universiteit Eindhoven</organization> - - </author> - <author initials="P." surname="Schwabe" - fullname="Peter Schwabe"> - <organization>National Taiwan University</organization> - - </author> - <author initials="B." surname="Yang" - fullname="Bo-Yin Yang"> - <organization>Academia Sinica</organization> - - </author> - <date year="2011"/> - </front> - </reference> - - <!-- <reference anchor="ISO20022"> - <front> - <title>ISO 20022 Financial Services - Universal financial industry message scheme</title> - <author> - <organization>International Organization for Standardization</organization> - <address> - <uri>http://www.iso.ch</uri> - </address> - </author> - <date month="May" year="2013"/> - </front> - </reference>--> - </references> - <section anchor="test_vectors" numbered="true" toc="default"> - <name>Test Vectors</name> - <section anchor="test_vectors_map_function" numbered="true" toc="default"> - <name>Map Function</name> - <t> - INPUTS: - </t> - <t> - key: 0xFFFFFFFFFFFFFFFF (64-bit) - number_of_buckets_per_element: 3 - ibf_size: 300 - </t> - <t> - OUTPUT: - </t> - <t> - ["222","32","10"] - </t> - </section> - <section anchor="test_vectors_id_function" numbered="true" toc="default"> - <name>ID Calculation Function</name> - <t> - INPUTS: - </t> - <t> - element: 0xadadadadadadadad - ibf_salt 0x3F (6-bit) - </t> - <t> - OUTPUT: - </t> - <t> - 0xFFFFFFFFFFFFFFFF - </t> - </section> - </section> - </back> -</rfc>