aboutsummaryrefslogtreecommitdiff
path: root/contrib/scripts/gnunet-chk.py.in
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/scripts/gnunet-chk.py.in')
-rwxr-xr-xcontrib/scripts/gnunet-chk.py.in381
1 files changed, 381 insertions, 0 deletions
diff --git a/contrib/scripts/gnunet-chk.py.in b/contrib/scripts/gnunet-chk.py.in
new file mode 100755
index 000000000..cabaef6c4
--- /dev/null
+++ b/contrib/scripts/gnunet-chk.py.in
@@ -0,0 +1,381 @@
1#!@PYTHON@
2# This file is part of GNUnet.
3# (C) 2013, 2018 Christian Grothoff (and other contributing authors)
4#
5# GNUnet is free software: you can redistribute it and/or modify it
6# under the terms of the GNU Affero General Public License as published
7# by the Free Software Foundation, either version 3 of the License, or
8# (at your option) any later version.
9#
10# GNUnet is distributed in the hope that it will be useful, but
11# WITHOUT ANY WARRANTY; without even the implied warranty of
12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13# Affero General Public License for more details.
14#
15# You should have received a copy of the GNU Affero General Public License
16# along with this program. If not, see <http://www.gnu.org/licenses/>.
17#
18# File: gnunet-chk.py
19# Brief: Computes GNUNET style Content Hash Key for a given file
20# Author: Sree Harsha Totakura
21
22from hashlib import sha512
23import logging
24import os
25import getopt
26import sys
27from Crypto.Cipher import AES
28from functools import reduce
29
30
31# Defaults
32DBLOCK_SIZE = (32 * 1024) # Data block size
33
34# Pick a multiple of 2 here to achive 8-byte alignment! We also
35# probably want DBlocks to have (roughly) the same size as IBlocks.
36# With SHA-512, the optimal value is 32768 byte / 128 byte = 256 (128
37# byte = 2 * 512 bits). DO NOT CHANGE!
38CHK_PER_INODE = 256
39
40CHK_HASH_SIZE = 64 # SHA-512 hash = 512 bits = 64 bytes
41
42CHK_QUERY_SIZE = CHK_HASH_SIZE # Again a SHA-512 hash
43
44GNUNET_FS_URI_PREFIX = "gnunet://fs/" # FS CHK URI prefix
45
46GNUNET_FS_URI_CHK_INFIX = "chk/" # FS CHK URI infix
47
48
49def encode_data_to_string(data):
50 """Returns an ASCII encoding of the given data block like
51 GNUNET_STRINGS_data_to_string() function.
52
53 data: A bytearray representing the block of data which has to be encoded
54 """
55 echart = "0123456789ABCDEFGHIJKLMNOPQRSTUV"
56 assert (None != data)
57 assert (bytearray == type(data))
58 size = len(data)
59 assert (0 != size)
60 vbit = 0
61 wpos = 0
62 rpos = 0
63 bits = 0
64 out = ""
65 while (rpos < size) or (vbit > 0):
66 if (rpos < size) and (vbit < 5):
67 bits = (bits << 8) | data[rpos] # eat 8 more bits
68 rpos += 1
69 vbit += 8
70 if (vbit < 5):
71 bits <<= (5 - vbit) # zero-padding
72 assert (vbit == ((size * 8) % 5))
73 vbit = 5
74 out += echart[(bits >> (vbit - 5)) & 31]
75 wpos += 1
76 vbit -= 5
77 assert (0 == vbit)
78 return out
79
80
81def sha512_hash(data):
82 """ Returns the sha512 hash of the given data.
83
84 data: string to hash
85 """
86 hash_obj = sha512()
87 hash_obj.update(data)
88 return hash_obj.digest()
89
90
91class AESKey:
92 """Class for AES Keys. Contains the main key and the initialization
93 vector. """
94
95 key = None # The actual AES key
96 iv = None # The initialization vector
97 cipher = None # The cipher object
98 KEY_SIZE = 32 # AES 256-bit key = 32 bytes
99 IV_SIZE = AES.block_size # Initialization vector size (= AES block size)
100
101 def __init__(self, passphrase):
102 """Creates a new AES key.
103
104 passphrase: string containing the passphrase to get the AES key and
105 initialization vector
106 """
107 passphrase = bytearray(passphrase)
108 self.key = bytearray(self.KEY_SIZE)
109 self.iv = bytearray(self.IV_SIZE)
110 if (len(passphrase) > self.KEY_SIZE):
111 self.key = passphrase[:self.KEY_SIZE]
112 passphrase = passphrase[self.KEY_SIZE:]
113 if (len(passphrase) > self.IV_SIZE):
114 self.iv = passphrase[:self.IV_SIZE]
115 else:
116 self.iv[0:len(passphrase)] = passphrase
117 else:
118 self.key[0:len(passphrase)] = passphrase
119 self.key = str(self.key)
120 self.iv = str(self.iv)
121 assert (len(self.key) == self.KEY_SIZE)
122 assert (len(self.iv) == self.IV_SIZE)
123
124
125def setup_aes_cipher_(aes_key):
126 """Initializes the AES object with settings similar to those in GNUnet.
127
128 aes_key: the AESKey object
129 Returns the newly initialized AES object
130 """
131 return AES.new(aes_key.key, AES.MODE_CFB, aes_key.iv, segment_size=128)
132
133
134def aes_pad_(data):
135 """Adds padding to the data such that the size of the data is a multiple of
136 16 bytes
137
138 data: the data string
139 Returns a tuple:(pad_len, data). pad_len denotes the number of bytes added
140 as padding; data is the new data string with padded bytes at the end
141 """
142 pad_len = len(data) % 16
143 if (0 != pad_len):
144 pad_len = 16 - pad_len
145 pad_bytes = bytearray(15)
146 data += str(pad_bytes[:pad_len])
147 return (pad_len, data)
148
149
150def aes_encrypt(aes_key, data):
151 """Encrypts the given data using AES.
152
153 aes_key: the AESKey object to use for AES encryption
154 data: the data string to encrypt
155 """
156 (pad_len, data) = aes_pad_(data)
157 cipher = setup_aes_cipher_(aes_key)
158 enc_data = cipher.encrypt(data)
159 if (0 != pad_len):
160 enc_data = enc_data[:-pad_len]
161 return enc_data
162
163
164def aes_decrypt(aes_key, data):
165 """Decrypts the given data using AES
166
167 aes_key: the AESKey object to use for AES decryption
168 data: the data string to decrypt
169 """
170 (pad_len, data) = aes_pad_(data)
171 cipher = setup_aes_cipher_(aes_key)
172 ptext = cipher.decrypt(data)
173 if (0 != pad_len):
174 ptext = ptext[:-pad_len]
175 return ptext
176
177
178class Chk:
179 """Class for the content hash key."""
180 key = None
181 query = None
182 fsize = None
183
184 def __init__(self, key, query):
185 assert (len(key) == CHK_HASH_SIZE)
186 assert (len(query) == CHK_QUERY_SIZE)
187 self.key = key
188 self.query = query
189
190 def setSize(self, size):
191 self.fsize = size
192
193 def uri(self):
194 sizestr = repr(self.fsize)
195 if isinstance(self.fsize, int):
196 sizestr = sizestr[:-1]
197 return GNUNET_FS_URI_PREFIX + GNUNET_FS_URI_CHK_INFIX + \
198 encode_data_to_string(bytearray(self.key)) + "." + \
199 encode_data_to_string(bytearray(self.query)) + "." + \
200 sizestr
201
202
203def compute_depth_(size):
204 """Computes the depth of the hash tree.
205
206 size: the size of the file whose tree's depth has to be computed
207 Returns the depth of the tree. Always > 0.
208 """
209 depth = 1
210 fl = DBLOCK_SIZE
211 while (fl < size):
212 depth += 1
213 if ((fl * CHK_PER_INODE) < fl):
214 return depth
215 fl = fl * CHK_PER_INODE
216 return depth
217
218
219def compute_tree_size_(depth):
220 """Calculate how many bytes of payload a block tree of the given depth MAY
221 correspond to at most (this function ignores the fact that some blocks will
222 only be present partially due to the total file size cutting some blocks
223 off at the end).
224
225 depth: depth of the block. depth==0 is a DBLOCK.
226 Returns the number of bytes of payload a subtree of this depth may
227 correspond to.
228 """
229 rsize = DBLOCK_SIZE
230 for cnt in range(0, depth):
231 rsize *= CHK_PER_INODE
232 return rsize
233
234
235def compute_chk_offset_(depth, end_offset):
236 """Compute the offset of the CHK for the current block in the IBlock
237 above
238
239 depth: depth of the IBlock in the tree (aka overall number of tree levels
240 minus depth); 0 == DBLOCK
241 end_offset: current offset in the overall file, at the *beginning* of the
242 block for DBLOCK (depth == 0), otherwise at the *end* of the
243 block (exclusive)
244 Returns the offset in the list of CHKs in the above IBlock
245 """
246 bds = compute_tree_size_(depth)
247 if (depth > 0):
248 end_offset -= 1
249 ret = end_offset / bds
250 return ret % CHK_PER_INODE
251
252
253def compute_iblock_size_(depth, offset):
254 """Compute the size of the current IBLOCK. The encoder is triggering the
255 calculation of the size of an IBLOCK at the *end* (hence end_offset) of its
256 construction. The IBLOCK maybe a full or a partial IBLOCK, and this
257 function is to calculate how long it should be.
258
259 depth: depth of the IBlock in the tree, 0 would be a DBLOCK, must be > 0
260 (this function is for IBLOCKs only!)
261 offset: current offset in the payload (!) of the overall file, must be > 0
262 (since this function is called at the end of a block).
263 Returns the number of elements to be in the corresponding IBlock
264 """
265 assert (depth > 0)
266 assert (offset > 0)
267 bds = compute_tree_size_(depth)
268 mod = offset % bds
269 if mod is 0:
270 ret = CHK_PER_INODE
271 else:
272 bds /= CHK_PER_INODE
273 ret = mod / bds
274 if (mod % bds) is not 0:
275 ret += 1
276 return ret
277
278
279def compute_rootchk(readin, size):
280 """Returns the content hash key after generating the hash tree for the given
281 input stream.
282
283 readin: the stream where to read data from
284 size: the size of data to be read
285 """
286 depth = compute_depth_(size)
287 current_depth = 0
288 chks = [None] * (depth * CHK_PER_INODE) # list buffer
289 read_offset = 0
290 logging.debug("Begining to calculate tree hash with depth: " + repr(depth))
291 while True:
292 if (depth == current_depth):
293 off = CHK_PER_INODE * (depth - 1)
294 assert (chks[off] is not None)
295 logging.debug("Encoding done, reading CHK `" + chks[off].query + \
296 "' from " + repr(off) + "\n")
297 uri_chk = chks[off]
298 assert (size == read_offset)
299 uri_chk.setSize(size)
300 return uri_chk
301 if (0 == current_depth):
302 pt_size = min(DBLOCK_SIZE, size - read_offset)
303 try:
304 pt_block = readin.read(pt_size)
305 except IOError:
306 logging.warning("Error reading input file stream")
307 return None
308 else:
309 pt_elements = compute_iblock_size_(current_depth, read_offset)
310 pt_block = ""
311 pt_block = \
312 reduce((lambda ba, chk:
313 ba + (chk.key + chk.query)),
314 chks[(current_depth - 1) * CHK_PER_INODE:][:pt_elements],
315 pt_block)
316 pt_size = pt_elements * (CHK_HASH_SIZE + CHK_QUERY_SIZE)
317 assert (len(pt_block) == pt_size)
318 assert (pt_size <= DBLOCK_SIZE)
319 off = compute_chk_offset_(current_depth, read_offset)
320 logging.debug("Encoding data at offset " + repr(read_offset) + \
321 " and depth " + repr(current_depth) + " with block " \
322 "size " + repr(pt_size) + " and target CHK offset " + \
323 repr(current_depth * CHK_PER_INODE))
324 pt_hash = sha512_hash(pt_block)
325 pt_aes_key = AESKey(pt_hash)
326 pt_enc = aes_encrypt(pt_aes_key, pt_block)
327 pt_enc_hash = sha512_hash(pt_enc)
328 chk = Chk(pt_hash, pt_enc_hash)
329 chks[(current_depth * CHK_PER_INODE) + off] = chk
330 if (0 == current_depth):
331 read_offset += pt_size
332 if (read_offset == size) or \
333 (0 == (read_offset % (CHK_PER_INODE * DBLOCK_SIZE))):
334 current_depth += 1
335 else:
336 if (CHK_PER_INODE == off) or (read_offset == size):
337 current_depth += 1
338 else:
339 current_depth = 0
340
341
342def chkuri_from_path(path):
343 """Returns the CHK URI of the file at the given path.
344
345 path: the path of the file whose CHK has to be calculated
346 """
347 size = os.path.getsize(path)
348 readin = open(path, "rb")
349 chk = compute_rootchk(readin, size)
350 readin.close()
351 return chk.uri()
352
353
354def usage():
355 """Prints help about using this script."""
356 print("""
357Usage: gnunet-chk.py [options] file
358Prints the Content Hash Key of given file in GNUNET-style URI.
359
360Options:
361 -h, --help : prints this message
362""")
363
364
365if '__main__' == __name__:
366 try:
367 opts, args = getopt.getopt(sys.argv[1:], "h", ["help"])
368 except getopt.GetoptError as err:
369 print(err)
370 print("Exception occured")
371 usage()
372 sys.exit(2)
373 for option, value in opts:
374 if option in("-h", "--help"):
375 usage()
376 sys.exit(0)
377 if len(args) != 1:
378 print("Incorrect number of arguments passed")
379 usage()
380 sys.exit(1)
381 print(chkuri_from_path(args[0]))