aboutsummaryrefslogtreecommitdiff
path: root/contrib/scripts/gnunet-chk.py.in
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/scripts/gnunet-chk.py.in')
-rwxr-xr-xcontrib/scripts/gnunet-chk.py.in383
1 files changed, 383 insertions, 0 deletions
diff --git a/contrib/scripts/gnunet-chk.py.in b/contrib/scripts/gnunet-chk.py.in
new file mode 100755
index 000000000..c976b2143
--- /dev/null
+++ b/contrib/scripts/gnunet-chk.py.in
@@ -0,0 +1,383 @@
1#!@PYTHON@
2# This file is part of GNUnet.
3# (C) 2013, 2018 Christian Grothoff (and other contributing authors)
4#
5# GNUnet is free software; you can redistribute it and/or modify
6# it under the terms of the GNU General Public License as published
7# by the Free Software Foundation; either version 3, or (at your
8# option) any later version.
9#
10# GNUnet is distributed in the hope that it will be useful, but
11# WITHOUT ANY WARRANTY; without even the implied warranty of
12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13# General Public License for more details.
14#
15# You should have received a copy of the GNU General Public License
16# along with GNUnet; see the file COPYING. If not, write to the
17# Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18# Boston, MA 02110-1301, USA.
19#
20# File: gnunet-chk.py
21# Brief: Computes GNUNET style Content Hash Key for a given file
22# Author: Sree Harsha Totakura
23
24from hashlib import sha512
25import logging
26import os
27import getopt
28import sys
29from Crypto.Cipher import AES
30from functools import reduce
31
32
33# Defaults
34DBLOCK_SIZE = (32 * 1024) # Data block size
35
36# Pick a multiple of 2 here to achive 8-byte alignment! We also
37# probably want DBlocks to have (roughly) the same size as IBlocks.
38# With SHA-512, the optimal value is 32768 byte / 128 byte = 256 (128
39# byte = 2 * 512 bits). DO NOT CHANGE!
40CHK_PER_INODE = 256
41
42CHK_HASH_SIZE = 64 # SHA-512 hash = 512 bits = 64 bytes
43
44CHK_QUERY_SIZE = CHK_HASH_SIZE # Again a SHA-512 hash
45
46GNUNET_FS_URI_PREFIX = "gnunet://fs/" # FS CHK URI prefix
47
48GNUNET_FS_URI_CHK_INFIX = "chk/" # FS CHK URI infix
49
50
51def encode_data_to_string(data):
52 """Returns an ASCII encoding of the given data block like
53 GNUNET_STRINGS_data_to_string() function.
54
55 data: A bytearray representing the block of data which has to be encoded
56 """
57 echart = "0123456789ABCDEFGHIJKLMNOPQRSTUV"
58 assert (None != data)
59 assert (bytearray == type(data))
60 size = len(data)
61 assert (0 != size)
62 vbit = 0
63 wpos = 0
64 rpos = 0
65 bits = 0
66 out = ""
67 while (rpos < size) or (vbit > 0):
68 if (rpos < size) and (vbit < 5):
69 bits = (bits << 8) | data[rpos] # eat 8 more bits
70 rpos += 1
71 vbit += 8
72 if (vbit < 5):
73 bits <<= (5 - vbit) # zero-padding
74 assert (vbit == ((size * 8) % 5))
75 vbit = 5
76 out += echart[(bits >> (vbit - 5)) & 31]
77 wpos += 1
78 vbit -= 5
79 assert (0 == vbit)
80 return out
81
82
83def sha512_hash(data):
84 """ Returns the sha512 hash of the given data.
85
86 data: string to hash
87 """
88 hash_obj = sha512()
89 hash_obj.update(data)
90 return hash_obj.digest()
91
92
93class AESKey:
94 """Class for AES Keys. Contains the main key and the initialization
95 vector. """
96
97 key = None # The actual AES key
98 iv = None # The initialization vector
99 cipher = None # The cipher object
100 KEY_SIZE = 32 # AES 256-bit key = 32 bytes
101 IV_SIZE = AES.block_size # Initialization vector size (= AES block size)
102
103 def __init__(self, passphrase):
104 """Creates a new AES key.
105
106 passphrase: string containing the passphrase to get the AES key and
107 initialization vector
108 """
109 passphrase = bytearray(passphrase)
110 self.key = bytearray(self.KEY_SIZE)
111 self.iv = bytearray(self.IV_SIZE)
112 if (len(passphrase) > self.KEY_SIZE):
113 self.key = passphrase[:self.KEY_SIZE]
114 passphrase = passphrase[self.KEY_SIZE:]
115 if (len(passphrase) > self.IV_SIZE):
116 self.iv = passphrase[:self.IV_SIZE]
117 else:
118 self.iv[0:len(passphrase)] = passphrase
119 else:
120 self.key[0:len(passphrase)] = passphrase
121 self.key = str(self.key)
122 self.iv = str(self.iv)
123 assert (len(self.key) == self.KEY_SIZE)
124 assert (len(self.iv) == self.IV_SIZE)
125
126
127def setup_aes_cipher_(aes_key):
128 """Initializes the AES object with settings similar to those in GNUnet.
129
130 aes_key: the AESKey object
131 Returns the newly initialized AES object
132 """
133 return AES.new(aes_key.key, AES.MODE_CFB, aes_key.iv, segment_size=128)
134
135
136def aes_pad_(data):
137 """Adds padding to the data such that the size of the data is a multiple of
138 16 bytes
139
140 data: the data string
141 Returns a tuple:(pad_len, data). pad_len denotes the number of bytes added
142 as padding; data is the new data string with padded bytes at the end
143 """
144 pad_len = len(data) % 16
145 if (0 != pad_len):
146 pad_len = 16 - pad_len
147 pad_bytes = bytearray(15)
148 data += str(pad_bytes[:pad_len])
149 return (pad_len, data)
150
151
152def aes_encrypt(aes_key, data):
153 """Encrypts the given data using AES.
154
155 aes_key: the AESKey object to use for AES encryption
156 data: the data string to encrypt
157 """
158 (pad_len, data) = aes_pad_(data)
159 cipher = setup_aes_cipher_(aes_key)
160 enc_data = cipher.encrypt(data)
161 if (0 != pad_len):
162 enc_data = enc_data[:-pad_len]
163 return enc_data
164
165
166def aes_decrypt(aes_key, data):
167 """Decrypts the given data using AES
168
169 aes_key: the AESKey object to use for AES decryption
170 data: the data string to decrypt
171 """
172 (pad_len, data) = aes_pad_(data)
173 cipher = setup_aes_cipher_(aes_key)
174 ptext = cipher.decrypt(data)
175 if (0 != pad_len):
176 ptext = ptext[:-pad_len]
177 return ptext
178
179
180class Chk:
181 """Class for the content hash key."""
182 key = None
183 query = None
184 fsize = None
185
186 def __init__(self, key, query):
187 assert (len(key) == CHK_HASH_SIZE)
188 assert (len(query) == CHK_QUERY_SIZE)
189 self.key = key
190 self.query = query
191
192 def setSize(self, size):
193 self.fsize = size
194
195 def uri(self):
196 sizestr = repr(self.fsize)
197 if isinstance(self.fsize, int):
198 sizestr = sizestr[:-1]
199 return GNUNET_FS_URI_PREFIX + GNUNET_FS_URI_CHK_INFIX + \
200 encode_data_to_string(bytearray(self.key)) + "." + \
201 encode_data_to_string(bytearray(self.query)) + "." + \
202 sizestr
203
204
205def compute_depth_(size):
206 """Computes the depth of the hash tree.
207
208 size: the size of the file whose tree's depth has to be computed
209 Returns the depth of the tree. Always > 0.
210 """
211 depth = 1
212 fl = DBLOCK_SIZE
213 while (fl < size):
214 depth += 1
215 if ((fl * CHK_PER_INODE) < fl):
216 return depth
217 fl = fl * CHK_PER_INODE
218 return depth
219
220
221def compute_tree_size_(depth):
222 """Calculate how many bytes of payload a block tree of the given depth MAY
223 correspond to at most (this function ignores the fact that some blocks will
224 only be present partially due to the total file size cutting some blocks
225 off at the end).
226
227 depth: depth of the block. depth==0 is a DBLOCK.
228 Returns the number of bytes of payload a subtree of this depth may
229 correspond to.
230 """
231 rsize = DBLOCK_SIZE
232 for cnt in range(0, depth):
233 rsize *= CHK_PER_INODE
234 return rsize
235
236
237def compute_chk_offset_(depth, end_offset):
238 """Compute the offset of the CHK for the current block in the IBlock
239 above
240
241 depth: depth of the IBlock in the tree (aka overall number of tree levels
242 minus depth); 0 == DBLOCK
243 end_offset: current offset in the overall file, at the *beginning* of the
244 block for DBLOCK (depth == 0), otherwise at the *end* of the
245 block (exclusive)
246 Returns the offset in the list of CHKs in the above IBlock
247 """
248 bds = compute_tree_size_(depth)
249 if (depth > 0):
250 end_offset -= 1
251 ret = end_offset / bds
252 return ret % CHK_PER_INODE
253
254
255def compute_iblock_size_(depth, offset):
256 """Compute the size of the current IBLOCK. The encoder is triggering the
257 calculation of the size of an IBLOCK at the *end* (hence end_offset) of its
258 construction. The IBLOCK maybe a full or a partial IBLOCK, and this
259 function is to calculate how long it should be.
260
261 depth: depth of the IBlock in the tree, 0 would be a DBLOCK, must be > 0
262 (this function is for IBLOCKs only!)
263 offset: current offset in the payload (!) of the overall file, must be > 0
264 (since this function is called at the end of a block).
265 Returns the number of elements to be in the corresponding IBlock
266 """
267 assert (depth > 0)
268 assert (offset > 0)
269 bds = compute_tree_size_(depth)
270 mod = offset % bds
271 if mod is 0:
272 ret = CHK_PER_INODE
273 else:
274 bds /= CHK_PER_INODE
275 ret = mod / bds
276 if (mod % bds) is not 0:
277 ret += 1
278 return ret
279
280
281def compute_rootchk(readin, size):
282 """Returns the content hash key after generating the hash tree for the given
283 input stream.
284
285 readin: the stream where to read data from
286 size: the size of data to be read
287 """
288 depth = compute_depth_(size)
289 current_depth = 0
290 chks = [None] * (depth * CHK_PER_INODE) # list buffer
291 read_offset = 0
292 logging.debug("Begining to calculate tree hash with depth: " + repr(depth))
293 while True:
294 if (depth == current_depth):
295 off = CHK_PER_INODE * (depth - 1)
296 assert (chks[off] is not None)
297 logging.debug("Encoding done, reading CHK `" + chks[off].query + \
298 "' from " + repr(off) + "\n")
299 uri_chk = chks[off]
300 assert (size == read_offset)
301 uri_chk.setSize(size)
302 return uri_chk
303 if (0 == current_depth):
304 pt_size = min(DBLOCK_SIZE, size - read_offset)
305 try:
306 pt_block = readin.read(pt_size)
307 except IOError:
308 logging.warning("Error reading input file stream")
309 return None
310 else:
311 pt_elements = compute_iblock_size_(current_depth, read_offset)
312 pt_block = ""
313 pt_block = \
314 reduce((lambda ba, chk:
315 ba + (chk.key + chk.query)),
316 chks[(current_depth - 1) * CHK_PER_INODE:][:pt_elements],
317 pt_block)
318 pt_size = pt_elements * (CHK_HASH_SIZE + CHK_QUERY_SIZE)
319 assert (len(pt_block) == pt_size)
320 assert (pt_size <= DBLOCK_SIZE)
321 off = compute_chk_offset_(current_depth, read_offset)
322 logging.debug("Encoding data at offset " + repr(read_offset) + \
323 " and depth " + repr(current_depth) + " with block " \
324 "size " + repr(pt_size) + " and target CHK offset " + \
325 repr(current_depth * CHK_PER_INODE))
326 pt_hash = sha512_hash(pt_block)
327 pt_aes_key = AESKey(pt_hash)
328 pt_enc = aes_encrypt(pt_aes_key, pt_block)
329 pt_enc_hash = sha512_hash(pt_enc)
330 chk = Chk(pt_hash, pt_enc_hash)
331 chks[(current_depth * CHK_PER_INODE) + off] = chk
332 if (0 == current_depth):
333 read_offset += pt_size
334 if (read_offset == size) or \
335 (0 == (read_offset % (CHK_PER_INODE * DBLOCK_SIZE))):
336 current_depth += 1
337 else:
338 if (CHK_PER_INODE == off) or (read_offset == size):
339 current_depth += 1
340 else:
341 current_depth = 0
342
343
344def chkuri_from_path(path):
345 """Returns the CHK URI of the file at the given path.
346
347 path: the path of the file whose CHK has to be calculated
348 """
349 size = os.path.getsize(path)
350 readin = open(path, "rb")
351 chk = compute_rootchk(readin, size)
352 readin.close()
353 return chk.uri()
354
355
356def usage():
357 """Prints help about using this script."""
358 print("""
359Usage: gnunet-chk.py [options] file
360Prints the Content Hash Key of given file in GNUNET-style URI.
361
362Options:
363 -h, --help : prints this message
364""")
365
366
367if '__main__' == __name__:
368 try:
369 opts, args = getopt.getopt(sys.argv[1:], "h", ["help"])
370 except getopt.GetoptError as err:
371 print(err)
372 print("Exception occured")
373 usage()
374 sys.exit(2)
375 for option, value in opts:
376 if option in("-h", "--help"):
377 usage()
378 sys.exit(0)
379 if len(args) != 1:
380 print("Incorrect number of arguments passed")
381 usage()
382 sys.exit(1)
383 print(chkuri_from_path(args[0]))