diff options
author | Nils Gillmann <ng0@n0.is> | 2018-05-17 12:30:37 +0000 |
---|---|---|
committer | Nils Gillmann <ng0@n0.is> | 2018-05-17 12:30:37 +0000 |
commit | 42399e29836ef338bd6452391f3fc62348e6e4fa (patch) | |
tree | e97f663c2c6fd73855f82969566f364391cd277b /contrib/gnunet-chk.py | |
parent | 8a24a03937a8e528ba5933c4c1bd724369b84474 (diff) | |
download | gnunet-42399e29836ef338bd6452391f3fc62348e6e4fa.tar.gz gnunet-42399e29836ef338bd6452391f3fc62348e6e4fa.zip |
Opportunisticly rename gnunet-chk.py to let its python invokation line be generated (py.in), and assume that it is not supposed to be installed
Signed-off-by: Nils Gillmann <ng0@n0.is>
Diffstat (limited to 'contrib/gnunet-chk.py')
-rwxr-xr-x | contrib/gnunet-chk.py | 382 |
1 files changed, 0 insertions, 382 deletions
diff --git a/contrib/gnunet-chk.py b/contrib/gnunet-chk.py deleted file mode 100755 index dba694c34..000000000 --- a/contrib/gnunet-chk.py +++ /dev/null | |||
@@ -1,382 +0,0 @@ | |||
1 | #!/usr/bin/python | ||
2 | # This file is part of GNUnet. | ||
3 | # (C) 2013 Christian Grothoff (and other contributing authors) | ||
4 | # | ||
5 | # GNUnet is free software; you can redistribute it and/or modify | ||
6 | # it under the terms of the GNU General Public License as published | ||
7 | # by the Free Software Foundation; either version 3, or (at your | ||
8 | # option) any later version. | ||
9 | # | ||
10 | # GNUnet is distributed in the hope that it will be useful, but | ||
11 | # WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | # General Public License for more details. | ||
14 | # | ||
15 | # You should have received a copy of the GNU General Public License | ||
16 | # along with GNUnet; see the file COPYING. If not, write to the | ||
17 | # Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, | ||
18 | # Boston, MA 02110-1301, USA. | ||
19 | # | ||
20 | # File: gnunet-chk.py | ||
21 | # Brief: Computes GNUNET style Content Hash Key for a given file | ||
22 | # Author: Sree Harsha Totakura | ||
23 | |||
24 | from hashlib import sha512 | ||
25 | import logging | ||
26 | import os | ||
27 | import getopt | ||
28 | import sys | ||
29 | from Crypto.Cipher import AES | ||
30 | |||
31 | |||
32 | # Defaults | ||
33 | DBLOCK_SIZE = (32 * 1024) # Data block size | ||
34 | |||
35 | # Pick a multiple of 2 here to achive 8-byte alignment! We also | ||
36 | # probably want DBlocks to have (roughly) the same size as IBlocks. | ||
37 | # With SHA-512, the optimal value is 32768 byte / 128 byte = 256 (128 | ||
38 | # byte = 2 * 512 bits). DO NOT CHANGE! | ||
39 | CHK_PER_INODE = 256 | ||
40 | |||
41 | CHK_HASH_SIZE = 64 # SHA-512 hash = 512 bits = 64 bytes | ||
42 | |||
43 | CHK_QUERY_SIZE = CHK_HASH_SIZE # Again a SHA-512 hash | ||
44 | |||
45 | GNUNET_FS_URI_PREFIX = "gnunet://fs/" # FS CHK URI prefix | ||
46 | |||
47 | GNUNET_FS_URI_CHK_INFIX = "chk/" # FS CHK URI infix | ||
48 | |||
49 | |||
50 | def encode_data_to_string(data): | ||
51 | """Returns an ASCII encoding of the given data block like | ||
52 | GNUNET_STRINGS_data_to_string() function. | ||
53 | |||
54 | data: A bytearray representing the block of data which has to be encoded | ||
55 | """ | ||
56 | echart = "0123456789ABCDEFGHIJKLMNOPQRSTUV" | ||
57 | assert (None != data) | ||
58 | assert (bytearray == type(data)) | ||
59 | size = len(data) | ||
60 | assert (0 != size) | ||
61 | vbit = 0 | ||
62 | wpos = 0 | ||
63 | rpos = 0 | ||
64 | bits = 0 | ||
65 | out = "" | ||
66 | while (rpos < size) or (vbit > 0): | ||
67 | if (rpos < size) and (vbit < 5): | ||
68 | bits = (bits << 8) | data[rpos] # eat 8 more bits | ||
69 | rpos += 1 | ||
70 | vbit += 8 | ||
71 | if (vbit < 5): | ||
72 | bits <<= (5 - vbit) # zero-padding | ||
73 | assert (vbit == ((size * 8) % 5)) | ||
74 | vbit = 5 | ||
75 | out += echart[(bits >> (vbit - 5)) & 31] | ||
76 | wpos += 1 | ||
77 | vbit -= 5 | ||
78 | assert (0 == vbit) | ||
79 | return out; | ||
80 | |||
81 | |||
82 | def sha512_hash(data): | ||
83 | """ Returns the sha512 hash of the given data. | ||
84 | |||
85 | data: string to hash | ||
86 | """ | ||
87 | hash_obj = sha512() | ||
88 | hash_obj.update(data) | ||
89 | return hash_obj.digest() | ||
90 | |||
91 | |||
92 | class AESKey: | ||
93 | """Class for AES Keys. Contains the main key and the initialization | ||
94 | vector. """ | ||
95 | |||
96 | key = None # The actual AES key | ||
97 | iv = None # The initialization vector | ||
98 | cipher = None # The cipher object | ||
99 | KEY_SIZE = 32 # AES 256-bit key = 32 bytes | ||
100 | IV_SIZE = AES.block_size # Initialization vector size (= AES block size) | ||
101 | |||
102 | def __init__(self, passphrase): | ||
103 | """Creates a new AES key. | ||
104 | |||
105 | passphrase: string containing the passphrase to get the AES key and | ||
106 | initialization vector | ||
107 | """ | ||
108 | passphrase = bytearray(passphrase); | ||
109 | self.key = bytearray(self.KEY_SIZE) | ||
110 | self.iv = bytearray(self.IV_SIZE) | ||
111 | if (len(passphrase) > self.KEY_SIZE): | ||
112 | self.key = passphrase[:self.KEY_SIZE] | ||
113 | passphrase = passphrase[self.KEY_SIZE:] | ||
114 | if (len(passphrase) > self.IV_SIZE): | ||
115 | self.iv = passphrase[:self.IV_SIZE] | ||
116 | else: | ||
117 | self.iv[0:len(passphrase)] = passphrase | ||
118 | else: | ||
119 | self.key[0:len(passphrase)] = passphrase | ||
120 | self.key = str(self.key) | ||
121 | self.iv = str(self.iv) | ||
122 | assert (len(self.key) == self.KEY_SIZE) | ||
123 | assert (len(self.iv) == self.IV_SIZE) | ||
124 | |||
125 | |||
126 | def setup_aes_cipher_(aes_key): | ||
127 | """Initializes the AES object with settings similar to those in GNUnet. | ||
128 | |||
129 | aes_key: the AESKey object | ||
130 | Returns the newly initialized AES object | ||
131 | """ | ||
132 | return AES.new(aes_key.key, AES.MODE_CFB, aes_key.iv, segment_size=128) | ||
133 | |||
134 | |||
135 | def aes_pad_(data): | ||
136 | """Adds padding to the data such that the size of the data is a multiple of | ||
137 | 16 bytes | ||
138 | |||
139 | data: the data string | ||
140 | Returns a tuple:(pad_len, data). pad_len denotes the number of bytes added | ||
141 | as padding; data is the new data string with padded bytes at the end | ||
142 | """ | ||
143 | pad_len = len(data) % 16 | ||
144 | if (0 != pad_len): | ||
145 | pad_len = 16 - pad_len | ||
146 | pad_bytes = bytearray(15) | ||
147 | data += str(pad_bytes[:pad_len]) | ||
148 | return (pad_len, data) | ||
149 | |||
150 | |||
151 | def aes_encrypt(aes_key, data): | ||
152 | """Encrypts the given data using AES. | ||
153 | |||
154 | aes_key: the AESKey object to use for AES encryption | ||
155 | data: the data string to encrypt | ||
156 | """ | ||
157 | (pad_len, data) = aes_pad_(data) | ||
158 | cipher = setup_aes_cipher_(aes_key) | ||
159 | enc_data = cipher.encrypt(data) | ||
160 | if (0 != pad_len): | ||
161 | enc_data = enc_data[:-pad_len] | ||
162 | return enc_data | ||
163 | |||
164 | |||
165 | def aes_decrypt(aes_key, data): | ||
166 | """Decrypts the given data using AES | ||
167 | |||
168 | aes_key: the AESKey object to use for AES decryption | ||
169 | data: the data string to decrypt | ||
170 | """ | ||
171 | (pad_len, data) = aes_pad_(data) | ||
172 | cipher = setup_aes_cipher_(aes_key) | ||
173 | ptext = cipher.decrypt(data) | ||
174 | if (0 != pad_len): | ||
175 | ptext = ptext[:-pad_len] | ||
176 | return ptext | ||
177 | |||
178 | |||
179 | class Chk: | ||
180 | """Class for the content hash key.""" | ||
181 | key = None | ||
182 | query = None | ||
183 | fsize = None | ||
184 | |||
185 | def __init__(self, key, query): | ||
186 | assert (len(key) == CHK_HASH_SIZE) | ||
187 | assert (len(query) == CHK_QUERY_SIZE) | ||
188 | self.key = key | ||
189 | self.query = query | ||
190 | |||
191 | def setSize(self, size): | ||
192 | self.fsize = size | ||
193 | |||
194 | def uri(self): | ||
195 | sizestr = repr(self.fsize) | ||
196 | if isinstance(self.fsize, long): | ||
197 | sizestr = sizestr[:-1] | ||
198 | return GNUNET_FS_URI_PREFIX + GNUNET_FS_URI_CHK_INFIX + \ | ||
199 | encode_data_to_string(bytearray(self.key)) + "." + \ | ||
200 | encode_data_to_string(bytearray(self.query)) + "." + \ | ||
201 | sizestr | ||
202 | |||
203 | |||
204 | def compute_depth_(size): | ||
205 | """Computes the depth of the hash tree. | ||
206 | |||
207 | size: the size of the file whose tree's depth has to be computed | ||
208 | Returns the depth of the tree. Always > 0. | ||
209 | """ | ||
210 | depth = 1 | ||
211 | fl = DBLOCK_SIZE | ||
212 | while (fl < size): | ||
213 | depth += 1 | ||
214 | if ((fl * CHK_PER_INODE) < fl): | ||
215 | return depth | ||
216 | fl = fl * CHK_PER_INODE | ||
217 | return depth | ||
218 | |||
219 | |||
220 | def compute_tree_size_(depth): | ||
221 | """Calculate how many bytes of payload a block tree of the given depth MAY | ||
222 | correspond to at most (this function ignores the fact that some blocks will | ||
223 | only be present partially due to the total file size cutting some blocks | ||
224 | off at the end). | ||
225 | |||
226 | depth: depth of the block. depth==0 is a DBLOCK. | ||
227 | Returns the number of bytes of payload a subtree of this depth may | ||
228 | correspond to. | ||
229 | """ | ||
230 | rsize = DBLOCK_SIZE | ||
231 | for cnt in range(0, depth): | ||
232 | rsize *= CHK_PER_INODE | ||
233 | return rsize | ||
234 | |||
235 | |||
236 | def compute_chk_offset_(depth, end_offset): | ||
237 | """Compute the offset of the CHK for the current block in the IBlock | ||
238 | above | ||
239 | |||
240 | depth: depth of the IBlock in the tree (aka overall number of tree levels | ||
241 | minus depth); 0 == DBLOCK | ||
242 | end_offset: current offset in the overall file, at the *beginning* of the | ||
243 | block for DBLOCK (depth == 0), otherwise at the *end* of the | ||
244 | block (exclusive) | ||
245 | Returns the offset in the list of CHKs in the above IBlock | ||
246 | """ | ||
247 | bds = compute_tree_size_(depth) | ||
248 | if (depth > 0): | ||
249 | end_offset -= 1 | ||
250 | ret = end_offset / bds | ||
251 | return ret % CHK_PER_INODE | ||
252 | |||
253 | |||
254 | def compute_iblock_size_(depth, offset): | ||
255 | """Compute the size of the current IBLOCK. The encoder is triggering the | ||
256 | calculation of the size of an IBLOCK at the *end* (hence end_offset) of its | ||
257 | construction. The IBLOCK maybe a full or a partial IBLOCK, and this | ||
258 | function is to calculate how long it should be. | ||
259 | |||
260 | depth: depth of the IBlock in the tree, 0 would be a DBLOCK, must be > 0 | ||
261 | (this function is for IBLOCKs only!) | ||
262 | offset: current offset in the payload (!) of the overall file, must be > 0 | ||
263 | (since this function is called at the end of a block). | ||
264 | Returns the number of elements to be in the corresponding IBlock | ||
265 | """ | ||
266 | assert (depth > 0) | ||
267 | assert (offset > 0) | ||
268 | bds = compute_tree_size_(depth) | ||
269 | mod = offset % bds | ||
270 | if mod is 0: | ||
271 | ret = CHK_PER_INODE | ||
272 | else: | ||
273 | bds /= CHK_PER_INODE | ||
274 | ret = mod / bds | ||
275 | if (mod % bds) is not 0: | ||
276 | ret += 1 | ||
277 | return ret | ||
278 | |||
279 | |||
280 | def compute_rootchk(readin, size): | ||
281 | """Returns the content hash key after generating the hash tree for the given | ||
282 | input stream. | ||
283 | |||
284 | readin: the stream where to read data from | ||
285 | size: the size of data to be read | ||
286 | """ | ||
287 | depth = compute_depth_(size); | ||
288 | current_depth = 0 | ||
289 | chks = [None] * (depth * CHK_PER_INODE) # list buffer | ||
290 | read_offset = 0 | ||
291 | logging.debug("Begining to calculate tree hash with depth: " + repr(depth)) | ||
292 | while True: | ||
293 | if (depth == current_depth): | ||
294 | off = CHK_PER_INODE * (depth - 1) | ||
295 | assert (chks[off] is not None) | ||
296 | logging.debug("Encoding done, reading CHK `" + chks[off].query + \ | ||
297 | "' from " + repr(off) + "\n") | ||
298 | uri_chk = chks[off] | ||
299 | assert (size == read_offset) | ||
300 | uri_chk.setSize(size) | ||
301 | return uri_chk | ||
302 | if (0 == current_depth): | ||
303 | pt_size = min(DBLOCK_SIZE, size - read_offset); | ||
304 | try: | ||
305 | pt_block = readin.read(pt_size) | ||
306 | except IOError: | ||
307 | logging.warning("Error reading input file stream") | ||
308 | return None | ||
309 | else: | ||
310 | pt_elements = compute_iblock_size_(current_depth, read_offset) | ||
311 | pt_block = "" | ||
312 | pt_block = \ | ||
313 | reduce((lambda ba, chk: | ||
314 | ba + (chk.key + chk.query)), | ||
315 | chks[(current_depth - 1) * CHK_PER_INODE:][:pt_elements], | ||
316 | pt_block) | ||
317 | pt_size = pt_elements * (CHK_HASH_SIZE + CHK_QUERY_SIZE) | ||
318 | assert (len(pt_block) == pt_size) | ||
319 | assert (pt_size <= DBLOCK_SIZE) | ||
320 | off = compute_chk_offset_(current_depth, read_offset) | ||
321 | logging.debug("Encoding data at offset " + repr(read_offset) + \ | ||
322 | " and depth " + repr(current_depth) + " with block " \ | ||
323 | "size " + repr(pt_size) + " and target CHK offset " + \ | ||
324 | repr(current_depth * CHK_PER_INODE)) | ||
325 | pt_hash = sha512_hash(pt_block) | ||
326 | pt_aes_key = AESKey(pt_hash) | ||
327 | pt_enc = aes_encrypt(pt_aes_key, pt_block) | ||
328 | pt_enc_hash = sha512_hash(pt_enc) | ||
329 | chk = Chk(pt_hash, pt_enc_hash) | ||
330 | chks[(current_depth * CHK_PER_INODE) + off] = chk | ||
331 | if (0 == current_depth): | ||
332 | read_offset += pt_size | ||
333 | if (read_offset == size) or \ | ||
334 | (0 == (read_offset % (CHK_PER_INODE * DBLOCK_SIZE))): | ||
335 | current_depth += 1 | ||
336 | else: | ||
337 | if (CHK_PER_INODE == off) or (read_offset == size): | ||
338 | current_depth += 1 | ||
339 | else: | ||
340 | current_depth = 0 | ||
341 | |||
342 | |||
343 | def chkuri_from_path(path): | ||
344 | """Returns the CHK URI of the file at the given path. | ||
345 | |||
346 | path: the path of the file whose CHK has to be calculated | ||
347 | """ | ||
348 | size = os.path.getsize(path) | ||
349 | readin = open(path, "rb") | ||
350 | chk = compute_rootchk(readin, size) | ||
351 | readin.close() | ||
352 | return chk.uri() | ||
353 | |||
354 | |||
355 | def usage(): | ||
356 | """Prints help about using this script.""" | ||
357 | print(""" | ||
358 | Usage: gnunet-chk.py [options] file | ||
359 | Prints the Content Hash Key of given file in GNUNET-style URI. | ||
360 | |||
361 | Options: | ||
362 | -h, --help : prints this message | ||
363 | """) | ||
364 | |||
365 | |||
366 | if '__main__' == __name__: | ||
367 | try: | ||
368 | opts, args = getopt.getopt(sys.argv[1:], "h", ["help"]) | ||
369 | except getopt.GetoptError, err: | ||
370 | print(err) | ||
371 | print("Exception occured") | ||
372 | usage() | ||
373 | sys.exit(2) | ||
374 | for option, value in opts: | ||
375 | if option in("-h", "--help"): | ||
376 | usage() | ||
377 | sys.exit(0) | ||
378 | if len(args) != 1: | ||
379 | print("Incorrect number of arguments passed") | ||
380 | usage() | ||
381 | sys.exit(1) | ||
382 | print chkuri_from_path(args[0]) | ||