diff options
Diffstat (limited to 'contrib/scripts/gnunet-chk.py.in')
-rwxr-xr-x | contrib/scripts/gnunet-chk.py.in | 383 |
1 files changed, 383 insertions, 0 deletions
diff --git a/contrib/scripts/gnunet-chk.py.in b/contrib/scripts/gnunet-chk.py.in new file mode 100755 index 000000000..c976b2143 --- /dev/null +++ b/contrib/scripts/gnunet-chk.py.in | |||
@@ -0,0 +1,383 @@ | |||
1 | #!@PYTHON@ | ||
2 | # This file is part of GNUnet. | ||
3 | # (C) 2013, 2018 Christian Grothoff (and other contributing authors) | ||
4 | # | ||
5 | # GNUnet is free software; you can redistribute it and/or modify | ||
6 | # it under the terms of the GNU General Public License as published | ||
7 | # by the Free Software Foundation; either version 3, or (at your | ||
8 | # option) any later version. | ||
9 | # | ||
10 | # GNUnet is distributed in the hope that it will be useful, but | ||
11 | # WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | # General Public License for more details. | ||
14 | # | ||
15 | # You should have received a copy of the GNU General Public License | ||
16 | # along with GNUnet; see the file COPYING. If not, write to the | ||
17 | # Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, | ||
18 | # Boston, MA 02110-1301, USA. | ||
19 | # | ||
20 | # File: gnunet-chk.py | ||
21 | # Brief: Computes GNUNET style Content Hash Key for a given file | ||
22 | # Author: Sree Harsha Totakura | ||
23 | |||
24 | from hashlib import sha512 | ||
25 | import logging | ||
26 | import os | ||
27 | import getopt | ||
28 | import sys | ||
29 | from Crypto.Cipher import AES | ||
30 | from functools import reduce | ||
31 | |||
32 | |||
33 | # Defaults | ||
34 | DBLOCK_SIZE = (32 * 1024) # Data block size | ||
35 | |||
36 | # Pick a multiple of 2 here to achive 8-byte alignment! We also | ||
37 | # probably want DBlocks to have (roughly) the same size as IBlocks. | ||
38 | # With SHA-512, the optimal value is 32768 byte / 128 byte = 256 (128 | ||
39 | # byte = 2 * 512 bits). DO NOT CHANGE! | ||
40 | CHK_PER_INODE = 256 | ||
41 | |||
42 | CHK_HASH_SIZE = 64 # SHA-512 hash = 512 bits = 64 bytes | ||
43 | |||
44 | CHK_QUERY_SIZE = CHK_HASH_SIZE # Again a SHA-512 hash | ||
45 | |||
46 | GNUNET_FS_URI_PREFIX = "gnunet://fs/" # FS CHK URI prefix | ||
47 | |||
48 | GNUNET_FS_URI_CHK_INFIX = "chk/" # FS CHK URI infix | ||
49 | |||
50 | |||
51 | def encode_data_to_string(data): | ||
52 | """Returns an ASCII encoding of the given data block like | ||
53 | GNUNET_STRINGS_data_to_string() function. | ||
54 | |||
55 | data: A bytearray representing the block of data which has to be encoded | ||
56 | """ | ||
57 | echart = "0123456789ABCDEFGHIJKLMNOPQRSTUV" | ||
58 | assert (None != data) | ||
59 | assert (bytearray == type(data)) | ||
60 | size = len(data) | ||
61 | assert (0 != size) | ||
62 | vbit = 0 | ||
63 | wpos = 0 | ||
64 | rpos = 0 | ||
65 | bits = 0 | ||
66 | out = "" | ||
67 | while (rpos < size) or (vbit > 0): | ||
68 | if (rpos < size) and (vbit < 5): | ||
69 | bits = (bits << 8) | data[rpos] # eat 8 more bits | ||
70 | rpos += 1 | ||
71 | vbit += 8 | ||
72 | if (vbit < 5): | ||
73 | bits <<= (5 - vbit) # zero-padding | ||
74 | assert (vbit == ((size * 8) % 5)) | ||
75 | vbit = 5 | ||
76 | out += echart[(bits >> (vbit - 5)) & 31] | ||
77 | wpos += 1 | ||
78 | vbit -= 5 | ||
79 | assert (0 == vbit) | ||
80 | return out | ||
81 | |||
82 | |||
83 | def sha512_hash(data): | ||
84 | """ Returns the sha512 hash of the given data. | ||
85 | |||
86 | data: string to hash | ||
87 | """ | ||
88 | hash_obj = sha512() | ||
89 | hash_obj.update(data) | ||
90 | return hash_obj.digest() | ||
91 | |||
92 | |||
93 | class AESKey: | ||
94 | """Class for AES Keys. Contains the main key and the initialization | ||
95 | vector. """ | ||
96 | |||
97 | key = None # The actual AES key | ||
98 | iv = None # The initialization vector | ||
99 | cipher = None # The cipher object | ||
100 | KEY_SIZE = 32 # AES 256-bit key = 32 bytes | ||
101 | IV_SIZE = AES.block_size # Initialization vector size (= AES block size) | ||
102 | |||
103 | def __init__(self, passphrase): | ||
104 | """Creates a new AES key. | ||
105 | |||
106 | passphrase: string containing the passphrase to get the AES key and | ||
107 | initialization vector | ||
108 | """ | ||
109 | passphrase = bytearray(passphrase) | ||
110 | self.key = bytearray(self.KEY_SIZE) | ||
111 | self.iv = bytearray(self.IV_SIZE) | ||
112 | if (len(passphrase) > self.KEY_SIZE): | ||
113 | self.key = passphrase[:self.KEY_SIZE] | ||
114 | passphrase = passphrase[self.KEY_SIZE:] | ||
115 | if (len(passphrase) > self.IV_SIZE): | ||
116 | self.iv = passphrase[:self.IV_SIZE] | ||
117 | else: | ||
118 | self.iv[0:len(passphrase)] = passphrase | ||
119 | else: | ||
120 | self.key[0:len(passphrase)] = passphrase | ||
121 | self.key = str(self.key) | ||
122 | self.iv = str(self.iv) | ||
123 | assert (len(self.key) == self.KEY_SIZE) | ||
124 | assert (len(self.iv) == self.IV_SIZE) | ||
125 | |||
126 | |||
127 | def setup_aes_cipher_(aes_key): | ||
128 | """Initializes the AES object with settings similar to those in GNUnet. | ||
129 | |||
130 | aes_key: the AESKey object | ||
131 | Returns the newly initialized AES object | ||
132 | """ | ||
133 | return AES.new(aes_key.key, AES.MODE_CFB, aes_key.iv, segment_size=128) | ||
134 | |||
135 | |||
136 | def aes_pad_(data): | ||
137 | """Adds padding to the data such that the size of the data is a multiple of | ||
138 | 16 bytes | ||
139 | |||
140 | data: the data string | ||
141 | Returns a tuple:(pad_len, data). pad_len denotes the number of bytes added | ||
142 | as padding; data is the new data string with padded bytes at the end | ||
143 | """ | ||
144 | pad_len = len(data) % 16 | ||
145 | if (0 != pad_len): | ||
146 | pad_len = 16 - pad_len | ||
147 | pad_bytes = bytearray(15) | ||
148 | data += str(pad_bytes[:pad_len]) | ||
149 | return (pad_len, data) | ||
150 | |||
151 | |||
152 | def aes_encrypt(aes_key, data): | ||
153 | """Encrypts the given data using AES. | ||
154 | |||
155 | aes_key: the AESKey object to use for AES encryption | ||
156 | data: the data string to encrypt | ||
157 | """ | ||
158 | (pad_len, data) = aes_pad_(data) | ||
159 | cipher = setup_aes_cipher_(aes_key) | ||
160 | enc_data = cipher.encrypt(data) | ||
161 | if (0 != pad_len): | ||
162 | enc_data = enc_data[:-pad_len] | ||
163 | return enc_data | ||
164 | |||
165 | |||
166 | def aes_decrypt(aes_key, data): | ||
167 | """Decrypts the given data using AES | ||
168 | |||
169 | aes_key: the AESKey object to use for AES decryption | ||
170 | data: the data string to decrypt | ||
171 | """ | ||
172 | (pad_len, data) = aes_pad_(data) | ||
173 | cipher = setup_aes_cipher_(aes_key) | ||
174 | ptext = cipher.decrypt(data) | ||
175 | if (0 != pad_len): | ||
176 | ptext = ptext[:-pad_len] | ||
177 | return ptext | ||
178 | |||
179 | |||
180 | class Chk: | ||
181 | """Class for the content hash key.""" | ||
182 | key = None | ||
183 | query = None | ||
184 | fsize = None | ||
185 | |||
186 | def __init__(self, key, query): | ||
187 | assert (len(key) == CHK_HASH_SIZE) | ||
188 | assert (len(query) == CHK_QUERY_SIZE) | ||
189 | self.key = key | ||
190 | self.query = query | ||
191 | |||
192 | def setSize(self, size): | ||
193 | self.fsize = size | ||
194 | |||
195 | def uri(self): | ||
196 | sizestr = repr(self.fsize) | ||
197 | if isinstance(self.fsize, int): | ||
198 | sizestr = sizestr[:-1] | ||
199 | return GNUNET_FS_URI_PREFIX + GNUNET_FS_URI_CHK_INFIX + \ | ||
200 | encode_data_to_string(bytearray(self.key)) + "." + \ | ||
201 | encode_data_to_string(bytearray(self.query)) + "." + \ | ||
202 | sizestr | ||
203 | |||
204 | |||
205 | def compute_depth_(size): | ||
206 | """Computes the depth of the hash tree. | ||
207 | |||
208 | size: the size of the file whose tree's depth has to be computed | ||
209 | Returns the depth of the tree. Always > 0. | ||
210 | """ | ||
211 | depth = 1 | ||
212 | fl = DBLOCK_SIZE | ||
213 | while (fl < size): | ||
214 | depth += 1 | ||
215 | if ((fl * CHK_PER_INODE) < fl): | ||
216 | return depth | ||
217 | fl = fl * CHK_PER_INODE | ||
218 | return depth | ||
219 | |||
220 | |||
221 | def compute_tree_size_(depth): | ||
222 | """Calculate how many bytes of payload a block tree of the given depth MAY | ||
223 | correspond to at most (this function ignores the fact that some blocks will | ||
224 | only be present partially due to the total file size cutting some blocks | ||
225 | off at the end). | ||
226 | |||
227 | depth: depth of the block. depth==0 is a DBLOCK. | ||
228 | Returns the number of bytes of payload a subtree of this depth may | ||
229 | correspond to. | ||
230 | """ | ||
231 | rsize = DBLOCK_SIZE | ||
232 | for cnt in range(0, depth): | ||
233 | rsize *= CHK_PER_INODE | ||
234 | return rsize | ||
235 | |||
236 | |||
237 | def compute_chk_offset_(depth, end_offset): | ||
238 | """Compute the offset of the CHK for the current block in the IBlock | ||
239 | above | ||
240 | |||
241 | depth: depth of the IBlock in the tree (aka overall number of tree levels | ||
242 | minus depth); 0 == DBLOCK | ||
243 | end_offset: current offset in the overall file, at the *beginning* of the | ||
244 | block for DBLOCK (depth == 0), otherwise at the *end* of the | ||
245 | block (exclusive) | ||
246 | Returns the offset in the list of CHKs in the above IBlock | ||
247 | """ | ||
248 | bds = compute_tree_size_(depth) | ||
249 | if (depth > 0): | ||
250 | end_offset -= 1 | ||
251 | ret = end_offset / bds | ||
252 | return ret % CHK_PER_INODE | ||
253 | |||
254 | |||
255 | def compute_iblock_size_(depth, offset): | ||
256 | """Compute the size of the current IBLOCK. The encoder is triggering the | ||
257 | calculation of the size of an IBLOCK at the *end* (hence end_offset) of its | ||
258 | construction. The IBLOCK maybe a full or a partial IBLOCK, and this | ||
259 | function is to calculate how long it should be. | ||
260 | |||
261 | depth: depth of the IBlock in the tree, 0 would be a DBLOCK, must be > 0 | ||
262 | (this function is for IBLOCKs only!) | ||
263 | offset: current offset in the payload (!) of the overall file, must be > 0 | ||
264 | (since this function is called at the end of a block). | ||
265 | Returns the number of elements to be in the corresponding IBlock | ||
266 | """ | ||
267 | assert (depth > 0) | ||
268 | assert (offset > 0) | ||
269 | bds = compute_tree_size_(depth) | ||
270 | mod = offset % bds | ||
271 | if mod is 0: | ||
272 | ret = CHK_PER_INODE | ||
273 | else: | ||
274 | bds /= CHK_PER_INODE | ||
275 | ret = mod / bds | ||
276 | if (mod % bds) is not 0: | ||
277 | ret += 1 | ||
278 | return ret | ||
279 | |||
280 | |||
281 | def compute_rootchk(readin, size): | ||
282 | """Returns the content hash key after generating the hash tree for the given | ||
283 | input stream. | ||
284 | |||
285 | readin: the stream where to read data from | ||
286 | size: the size of data to be read | ||
287 | """ | ||
288 | depth = compute_depth_(size) | ||
289 | current_depth = 0 | ||
290 | chks = [None] * (depth * CHK_PER_INODE) # list buffer | ||
291 | read_offset = 0 | ||
292 | logging.debug("Begining to calculate tree hash with depth: " + repr(depth)) | ||
293 | while True: | ||
294 | if (depth == current_depth): | ||
295 | off = CHK_PER_INODE * (depth - 1) | ||
296 | assert (chks[off] is not None) | ||
297 | logging.debug("Encoding done, reading CHK `" + chks[off].query + \ | ||
298 | "' from " + repr(off) + "\n") | ||
299 | uri_chk = chks[off] | ||
300 | assert (size == read_offset) | ||
301 | uri_chk.setSize(size) | ||
302 | return uri_chk | ||
303 | if (0 == current_depth): | ||
304 | pt_size = min(DBLOCK_SIZE, size - read_offset) | ||
305 | try: | ||
306 | pt_block = readin.read(pt_size) | ||
307 | except IOError: | ||
308 | logging.warning("Error reading input file stream") | ||
309 | return None | ||
310 | else: | ||
311 | pt_elements = compute_iblock_size_(current_depth, read_offset) | ||
312 | pt_block = "" | ||
313 | pt_block = \ | ||
314 | reduce((lambda ba, chk: | ||
315 | ba + (chk.key + chk.query)), | ||
316 | chks[(current_depth - 1) * CHK_PER_INODE:][:pt_elements], | ||
317 | pt_block) | ||
318 | pt_size = pt_elements * (CHK_HASH_SIZE + CHK_QUERY_SIZE) | ||
319 | assert (len(pt_block) == pt_size) | ||
320 | assert (pt_size <= DBLOCK_SIZE) | ||
321 | off = compute_chk_offset_(current_depth, read_offset) | ||
322 | logging.debug("Encoding data at offset " + repr(read_offset) + \ | ||
323 | " and depth " + repr(current_depth) + " with block " \ | ||
324 | "size " + repr(pt_size) + " and target CHK offset " + \ | ||
325 | repr(current_depth * CHK_PER_INODE)) | ||
326 | pt_hash = sha512_hash(pt_block) | ||
327 | pt_aes_key = AESKey(pt_hash) | ||
328 | pt_enc = aes_encrypt(pt_aes_key, pt_block) | ||
329 | pt_enc_hash = sha512_hash(pt_enc) | ||
330 | chk = Chk(pt_hash, pt_enc_hash) | ||
331 | chks[(current_depth * CHK_PER_INODE) + off] = chk | ||
332 | if (0 == current_depth): | ||
333 | read_offset += pt_size | ||
334 | if (read_offset == size) or \ | ||
335 | (0 == (read_offset % (CHK_PER_INODE * DBLOCK_SIZE))): | ||
336 | current_depth += 1 | ||
337 | else: | ||
338 | if (CHK_PER_INODE == off) or (read_offset == size): | ||
339 | current_depth += 1 | ||
340 | else: | ||
341 | current_depth = 0 | ||
342 | |||
343 | |||
344 | def chkuri_from_path(path): | ||
345 | """Returns the CHK URI of the file at the given path. | ||
346 | |||
347 | path: the path of the file whose CHK has to be calculated | ||
348 | """ | ||
349 | size = os.path.getsize(path) | ||
350 | readin = open(path, "rb") | ||
351 | chk = compute_rootchk(readin, size) | ||
352 | readin.close() | ||
353 | return chk.uri() | ||
354 | |||
355 | |||
356 | def usage(): | ||
357 | """Prints help about using this script.""" | ||
358 | print(""" | ||
359 | Usage: gnunet-chk.py [options] file | ||
360 | Prints the Content Hash Key of given file in GNUNET-style URI. | ||
361 | |||
362 | Options: | ||
363 | -h, --help : prints this message | ||
364 | """) | ||
365 | |||
366 | |||
367 | if '__main__' == __name__: | ||
368 | try: | ||
369 | opts, args = getopt.getopt(sys.argv[1:], "h", ["help"]) | ||
370 | except getopt.GetoptError as err: | ||
371 | print(err) | ||
372 | print("Exception occured") | ||
373 | usage() | ||
374 | sys.exit(2) | ||
375 | for option, value in opts: | ||
376 | if option in("-h", "--help"): | ||
377 | usage() | ||
378 | sys.exit(0) | ||
379 | if len(args) != 1: | ||
380 | print("Incorrect number of arguments passed") | ||
381 | usage() | ||
382 | sys.exit(1) | ||
383 | print(chkuri_from_path(args[0])) | ||