diff options
Diffstat (limited to 'contrib/scripts/gnunet-chk.py.in')
-rwxr-xr-x | contrib/scripts/gnunet-chk.py.in | 381 |
1 files changed, 381 insertions, 0 deletions
diff --git a/contrib/scripts/gnunet-chk.py.in b/contrib/scripts/gnunet-chk.py.in new file mode 100755 index 000000000..cabaef6c4 --- /dev/null +++ b/contrib/scripts/gnunet-chk.py.in | |||
@@ -0,0 +1,381 @@ | |||
1 | #!@PYTHON@ | ||
2 | # This file is part of GNUnet. | ||
3 | # (C) 2013, 2018 Christian Grothoff (and other contributing authors) | ||
4 | # | ||
5 | # GNUnet is free software: you can redistribute it and/or modify it | ||
6 | # under the terms of the GNU Affero General Public License as published | ||
7 | # by the Free Software Foundation, either version 3 of the License, or | ||
8 | # (at your option) any later version. | ||
9 | # | ||
10 | # GNUnet is distributed in the hope that it will be useful, but | ||
11 | # WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | # Affero General Public License for more details. | ||
14 | # | ||
15 | # You should have received a copy of the GNU Affero General Public License | ||
16 | # along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | # | ||
18 | # File: gnunet-chk.py | ||
19 | # Brief: Computes GNUNET style Content Hash Key for a given file | ||
20 | # Author: Sree Harsha Totakura | ||
21 | |||
22 | from hashlib import sha512 | ||
23 | import logging | ||
24 | import os | ||
25 | import getopt | ||
26 | import sys | ||
27 | from Crypto.Cipher import AES | ||
28 | from functools import reduce | ||
29 | |||
30 | |||
31 | # Defaults | ||
32 | DBLOCK_SIZE = (32 * 1024) # Data block size | ||
33 | |||
34 | # Pick a multiple of 2 here to achive 8-byte alignment! We also | ||
35 | # probably want DBlocks to have (roughly) the same size as IBlocks. | ||
36 | # With SHA-512, the optimal value is 32768 byte / 128 byte = 256 (128 | ||
37 | # byte = 2 * 512 bits). DO NOT CHANGE! | ||
38 | CHK_PER_INODE = 256 | ||
39 | |||
40 | CHK_HASH_SIZE = 64 # SHA-512 hash = 512 bits = 64 bytes | ||
41 | |||
42 | CHK_QUERY_SIZE = CHK_HASH_SIZE # Again a SHA-512 hash | ||
43 | |||
44 | GNUNET_FS_URI_PREFIX = "gnunet://fs/" # FS CHK URI prefix | ||
45 | |||
46 | GNUNET_FS_URI_CHK_INFIX = "chk/" # FS CHK URI infix | ||
47 | |||
48 | |||
49 | def encode_data_to_string(data): | ||
50 | """Returns an ASCII encoding of the given data block like | ||
51 | GNUNET_STRINGS_data_to_string() function. | ||
52 | |||
53 | data: A bytearray representing the block of data which has to be encoded | ||
54 | """ | ||
55 | echart = "0123456789ABCDEFGHIJKLMNOPQRSTUV" | ||
56 | assert (None != data) | ||
57 | assert (bytearray == type(data)) | ||
58 | size = len(data) | ||
59 | assert (0 != size) | ||
60 | vbit = 0 | ||
61 | wpos = 0 | ||
62 | rpos = 0 | ||
63 | bits = 0 | ||
64 | out = "" | ||
65 | while (rpos < size) or (vbit > 0): | ||
66 | if (rpos < size) and (vbit < 5): | ||
67 | bits = (bits << 8) | data[rpos] # eat 8 more bits | ||
68 | rpos += 1 | ||
69 | vbit += 8 | ||
70 | if (vbit < 5): | ||
71 | bits <<= (5 - vbit) # zero-padding | ||
72 | assert (vbit == ((size * 8) % 5)) | ||
73 | vbit = 5 | ||
74 | out += echart[(bits >> (vbit - 5)) & 31] | ||
75 | wpos += 1 | ||
76 | vbit -= 5 | ||
77 | assert (0 == vbit) | ||
78 | return out | ||
79 | |||
80 | |||
81 | def sha512_hash(data): | ||
82 | """ Returns the sha512 hash of the given data. | ||
83 | |||
84 | data: string to hash | ||
85 | """ | ||
86 | hash_obj = sha512() | ||
87 | hash_obj.update(data) | ||
88 | return hash_obj.digest() | ||
89 | |||
90 | |||
91 | class AESKey: | ||
92 | """Class for AES Keys. Contains the main key and the initialization | ||
93 | vector. """ | ||
94 | |||
95 | key = None # The actual AES key | ||
96 | iv = None # The initialization vector | ||
97 | cipher = None # The cipher object | ||
98 | KEY_SIZE = 32 # AES 256-bit key = 32 bytes | ||
99 | IV_SIZE = AES.block_size # Initialization vector size (= AES block size) | ||
100 | |||
101 | def __init__(self, passphrase): | ||
102 | """Creates a new AES key. | ||
103 | |||
104 | passphrase: string containing the passphrase to get the AES key and | ||
105 | initialization vector | ||
106 | """ | ||
107 | passphrase = bytearray(passphrase) | ||
108 | self.key = bytearray(self.KEY_SIZE) | ||
109 | self.iv = bytearray(self.IV_SIZE) | ||
110 | if (len(passphrase) > self.KEY_SIZE): | ||
111 | self.key = passphrase[:self.KEY_SIZE] | ||
112 | passphrase = passphrase[self.KEY_SIZE:] | ||
113 | if (len(passphrase) > self.IV_SIZE): | ||
114 | self.iv = passphrase[:self.IV_SIZE] | ||
115 | else: | ||
116 | self.iv[0:len(passphrase)] = passphrase | ||
117 | else: | ||
118 | self.key[0:len(passphrase)] = passphrase | ||
119 | self.key = str(self.key) | ||
120 | self.iv = str(self.iv) | ||
121 | assert (len(self.key) == self.KEY_SIZE) | ||
122 | assert (len(self.iv) == self.IV_SIZE) | ||
123 | |||
124 | |||
125 | def setup_aes_cipher_(aes_key): | ||
126 | """Initializes the AES object with settings similar to those in GNUnet. | ||
127 | |||
128 | aes_key: the AESKey object | ||
129 | Returns the newly initialized AES object | ||
130 | """ | ||
131 | return AES.new(aes_key.key, AES.MODE_CFB, aes_key.iv, segment_size=128) | ||
132 | |||
133 | |||
134 | def aes_pad_(data): | ||
135 | """Adds padding to the data such that the size of the data is a multiple of | ||
136 | 16 bytes | ||
137 | |||
138 | data: the data string | ||
139 | Returns a tuple:(pad_len, data). pad_len denotes the number of bytes added | ||
140 | as padding; data is the new data string with padded bytes at the end | ||
141 | """ | ||
142 | pad_len = len(data) % 16 | ||
143 | if (0 != pad_len): | ||
144 | pad_len = 16 - pad_len | ||
145 | pad_bytes = bytearray(15) | ||
146 | data += str(pad_bytes[:pad_len]) | ||
147 | return (pad_len, data) | ||
148 | |||
149 | |||
150 | def aes_encrypt(aes_key, data): | ||
151 | """Encrypts the given data using AES. | ||
152 | |||
153 | aes_key: the AESKey object to use for AES encryption | ||
154 | data: the data string to encrypt | ||
155 | """ | ||
156 | (pad_len, data) = aes_pad_(data) | ||
157 | cipher = setup_aes_cipher_(aes_key) | ||
158 | enc_data = cipher.encrypt(data) | ||
159 | if (0 != pad_len): | ||
160 | enc_data = enc_data[:-pad_len] | ||
161 | return enc_data | ||
162 | |||
163 | |||
164 | def aes_decrypt(aes_key, data): | ||
165 | """Decrypts the given data using AES | ||
166 | |||
167 | aes_key: the AESKey object to use for AES decryption | ||
168 | data: the data string to decrypt | ||
169 | """ | ||
170 | (pad_len, data) = aes_pad_(data) | ||
171 | cipher = setup_aes_cipher_(aes_key) | ||
172 | ptext = cipher.decrypt(data) | ||
173 | if (0 != pad_len): | ||
174 | ptext = ptext[:-pad_len] | ||
175 | return ptext | ||
176 | |||
177 | |||
178 | class Chk: | ||
179 | """Class for the content hash key.""" | ||
180 | key = None | ||
181 | query = None | ||
182 | fsize = None | ||
183 | |||
184 | def __init__(self, key, query): | ||
185 | assert (len(key) == CHK_HASH_SIZE) | ||
186 | assert (len(query) == CHK_QUERY_SIZE) | ||
187 | self.key = key | ||
188 | self.query = query | ||
189 | |||
190 | def setSize(self, size): | ||
191 | self.fsize = size | ||
192 | |||
193 | def uri(self): | ||
194 | sizestr = repr(self.fsize) | ||
195 | if isinstance(self.fsize, int): | ||
196 | sizestr = sizestr[:-1] | ||
197 | return GNUNET_FS_URI_PREFIX + GNUNET_FS_URI_CHK_INFIX + \ | ||
198 | encode_data_to_string(bytearray(self.key)) + "." + \ | ||
199 | encode_data_to_string(bytearray(self.query)) + "." + \ | ||
200 | sizestr | ||
201 | |||
202 | |||
203 | def compute_depth_(size): | ||
204 | """Computes the depth of the hash tree. | ||
205 | |||
206 | size: the size of the file whose tree's depth has to be computed | ||
207 | Returns the depth of the tree. Always > 0. | ||
208 | """ | ||
209 | depth = 1 | ||
210 | fl = DBLOCK_SIZE | ||
211 | while (fl < size): | ||
212 | depth += 1 | ||
213 | if ((fl * CHK_PER_INODE) < fl): | ||
214 | return depth | ||
215 | fl = fl * CHK_PER_INODE | ||
216 | return depth | ||
217 | |||
218 | |||
219 | def compute_tree_size_(depth): | ||
220 | """Calculate how many bytes of payload a block tree of the given depth MAY | ||
221 | correspond to at most (this function ignores the fact that some blocks will | ||
222 | only be present partially due to the total file size cutting some blocks | ||
223 | off at the end). | ||
224 | |||
225 | depth: depth of the block. depth==0 is a DBLOCK. | ||
226 | Returns the number of bytes of payload a subtree of this depth may | ||
227 | correspond to. | ||
228 | """ | ||
229 | rsize = DBLOCK_SIZE | ||
230 | for cnt in range(0, depth): | ||
231 | rsize *= CHK_PER_INODE | ||
232 | return rsize | ||
233 | |||
234 | |||
235 | def compute_chk_offset_(depth, end_offset): | ||
236 | """Compute the offset of the CHK for the current block in the IBlock | ||
237 | above | ||
238 | |||
239 | depth: depth of the IBlock in the tree (aka overall number of tree levels | ||
240 | minus depth); 0 == DBLOCK | ||
241 | end_offset: current offset in the overall file, at the *beginning* of the | ||
242 | block for DBLOCK (depth == 0), otherwise at the *end* of the | ||
243 | block (exclusive) | ||
244 | Returns the offset in the list of CHKs in the above IBlock | ||
245 | """ | ||
246 | bds = compute_tree_size_(depth) | ||
247 | if (depth > 0): | ||
248 | end_offset -= 1 | ||
249 | ret = end_offset / bds | ||
250 | return ret % CHK_PER_INODE | ||
251 | |||
252 | |||
253 | def compute_iblock_size_(depth, offset): | ||
254 | """Compute the size of the current IBLOCK. The encoder is triggering the | ||
255 | calculation of the size of an IBLOCK at the *end* (hence end_offset) of its | ||
256 | construction. The IBLOCK maybe a full or a partial IBLOCK, and this | ||
257 | function is to calculate how long it should be. | ||
258 | |||
259 | depth: depth of the IBlock in the tree, 0 would be a DBLOCK, must be > 0 | ||
260 | (this function is for IBLOCKs only!) | ||
261 | offset: current offset in the payload (!) of the overall file, must be > 0 | ||
262 | (since this function is called at the end of a block). | ||
263 | Returns the number of elements to be in the corresponding IBlock | ||
264 | """ | ||
265 | assert (depth > 0) | ||
266 | assert (offset > 0) | ||
267 | bds = compute_tree_size_(depth) | ||
268 | mod = offset % bds | ||
269 | if mod is 0: | ||
270 | ret = CHK_PER_INODE | ||
271 | else: | ||
272 | bds /= CHK_PER_INODE | ||
273 | ret = mod / bds | ||
274 | if (mod % bds) is not 0: | ||
275 | ret += 1 | ||
276 | return ret | ||
277 | |||
278 | |||
279 | def compute_rootchk(readin, size): | ||
280 | """Returns the content hash key after generating the hash tree for the given | ||
281 | input stream. | ||
282 | |||
283 | readin: the stream where to read data from | ||
284 | size: the size of data to be read | ||
285 | """ | ||
286 | depth = compute_depth_(size) | ||
287 | current_depth = 0 | ||
288 | chks = [None] * (depth * CHK_PER_INODE) # list buffer | ||
289 | read_offset = 0 | ||
290 | logging.debug("Begining to calculate tree hash with depth: " + repr(depth)) | ||
291 | while True: | ||
292 | if (depth == current_depth): | ||
293 | off = CHK_PER_INODE * (depth - 1) | ||
294 | assert (chks[off] is not None) | ||
295 | logging.debug("Encoding done, reading CHK `" + chks[off].query + \ | ||
296 | "' from " + repr(off) + "\n") | ||
297 | uri_chk = chks[off] | ||
298 | assert (size == read_offset) | ||
299 | uri_chk.setSize(size) | ||
300 | return uri_chk | ||
301 | if (0 == current_depth): | ||
302 | pt_size = min(DBLOCK_SIZE, size - read_offset) | ||
303 | try: | ||
304 | pt_block = readin.read(pt_size) | ||
305 | except IOError: | ||
306 | logging.warning("Error reading input file stream") | ||
307 | return None | ||
308 | else: | ||
309 | pt_elements = compute_iblock_size_(current_depth, read_offset) | ||
310 | pt_block = "" | ||
311 | pt_block = \ | ||
312 | reduce((lambda ba, chk: | ||
313 | ba + (chk.key + chk.query)), | ||
314 | chks[(current_depth - 1) * CHK_PER_INODE:][:pt_elements], | ||
315 | pt_block) | ||
316 | pt_size = pt_elements * (CHK_HASH_SIZE + CHK_QUERY_SIZE) | ||
317 | assert (len(pt_block) == pt_size) | ||
318 | assert (pt_size <= DBLOCK_SIZE) | ||
319 | off = compute_chk_offset_(current_depth, read_offset) | ||
320 | logging.debug("Encoding data at offset " + repr(read_offset) + \ | ||
321 | " and depth " + repr(current_depth) + " with block " \ | ||
322 | "size " + repr(pt_size) + " and target CHK offset " + \ | ||
323 | repr(current_depth * CHK_PER_INODE)) | ||
324 | pt_hash = sha512_hash(pt_block) | ||
325 | pt_aes_key = AESKey(pt_hash) | ||
326 | pt_enc = aes_encrypt(pt_aes_key, pt_block) | ||
327 | pt_enc_hash = sha512_hash(pt_enc) | ||
328 | chk = Chk(pt_hash, pt_enc_hash) | ||
329 | chks[(current_depth * CHK_PER_INODE) + off] = chk | ||
330 | if (0 == current_depth): | ||
331 | read_offset += pt_size | ||
332 | if (read_offset == size) or \ | ||
333 | (0 == (read_offset % (CHK_PER_INODE * DBLOCK_SIZE))): | ||
334 | current_depth += 1 | ||
335 | else: | ||
336 | if (CHK_PER_INODE == off) or (read_offset == size): | ||
337 | current_depth += 1 | ||
338 | else: | ||
339 | current_depth = 0 | ||
340 | |||
341 | |||
342 | def chkuri_from_path(path): | ||
343 | """Returns the CHK URI of the file at the given path. | ||
344 | |||
345 | path: the path of the file whose CHK has to be calculated | ||
346 | """ | ||
347 | size = os.path.getsize(path) | ||
348 | readin = open(path, "rb") | ||
349 | chk = compute_rootchk(readin, size) | ||
350 | readin.close() | ||
351 | return chk.uri() | ||
352 | |||
353 | |||
354 | def usage(): | ||
355 | """Prints help about using this script.""" | ||
356 | print(""" | ||
357 | Usage: gnunet-chk.py [options] file | ||
358 | Prints the Content Hash Key of given file in GNUNET-style URI. | ||
359 | |||
360 | Options: | ||
361 | -h, --help : prints this message | ||
362 | """) | ||
363 | |||
364 | |||
365 | if '__main__' == __name__: | ||
366 | try: | ||
367 | opts, args = getopt.getopt(sys.argv[1:], "h", ["help"]) | ||
368 | except getopt.GetoptError as err: | ||
369 | print(err) | ||
370 | print("Exception occured") | ||
371 | usage() | ||
372 | sys.exit(2) | ||
373 | for option, value in opts: | ||
374 | if option in("-h", "--help"): | ||
375 | usage() | ||
376 | sys.exit(0) | ||
377 | if len(args) != 1: | ||
378 | print("Incorrect number of arguments passed") | ||
379 | usage() | ||
380 | sys.exit(1) | ||
381 | print(chkuri_from_path(args[0])) | ||