diff options
author | Christian Grothoff <christian@grothoff.org> | 2013-01-21 13:18:58 +0000 |
---|---|---|
committer | Christian Grothoff <christian@grothoff.org> | 2013-01-21 13:18:58 +0000 |
commit | 6dafc0d03567eacb4569ecdb93afaa2a803369e5 (patch) | |
tree | 8d2696f22b9eb32d9298110a1b04fb2309d8fae5 /contrib/gnunet-chk.py | |
parent | e5a04492ab3e988a391f88d87f3bf7fcdf80d66b (diff) | |
download | gnunet-6dafc0d03567eacb4569ecdb93afaa2a803369e5.tar.gz gnunet-6dafc0d03567eacb4569ecdb93afaa2a803369e5.zip |
-python script to compute GNUnet CHK URIs
Diffstat (limited to 'contrib/gnunet-chk.py')
-rwxr-xr-x | contrib/gnunet-chk.py | 375 |
1 files changed, 375 insertions, 0 deletions
diff --git a/contrib/gnunet-chk.py b/contrib/gnunet-chk.py new file mode 100755 index 000000000..e34b065b5 --- /dev/null +++ b/contrib/gnunet-chk.py | |||
@@ -0,0 +1,375 @@ | |||
1 | #!/usr/bin/python | ||
2 | # This file is part of GNUnet. | ||
3 | # (C) 2013 Christian Grothoff (and other contributing authors) | ||
4 | # | ||
5 | # GNUnet is free software; you can redistribute it and/or modify | ||
6 | # it under the terms of the GNU General Public License as published | ||
7 | # by the Free Software Foundation; either version 2, or (at your | ||
8 | # option) any later version. | ||
9 | # | ||
10 | # GNUnet is distributed in the hope that it will be useful, but | ||
11 | # WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | # General Public License for more details. | ||
14 | # | ||
15 | # You should have received a copy of the GNU General Public License | ||
16 | # along with GNUnet; see the file COPYING. If not, write to the | ||
17 | # Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
18 | # Boston, MA 02111-1307, USA. | ||
19 | # | ||
20 | # File: gnunet-chk.py | ||
21 | # Brief: Computes GNUNET style Content Hash Key for a given file | ||
22 | # Author: Sree Harsha Totakura | ||
23 | |||
24 | from hashlib import sha512 | ||
25 | import logging | ||
26 | import os | ||
27 | import getopt | ||
28 | import sys | ||
29 | from Crypto.Cipher import AES | ||
30 | |||
31 | # Defaults | ||
32 | DBLOCK_SIZE = (32 * 1024) # Data block size | ||
33 | |||
34 | # Pick a multiple of 2 here to achive 8-byte alignment! We also | ||
35 | # probably want DBlocks to have (roughly) the same size as IBlocks. | ||
36 | # With SHA-512, the optimal value is 32768 byte / 128 byte = 256 (128 | ||
37 | # byte = 2 * 512 bits). DO NOT CHANGE! | ||
38 | CHK_PER_INODE = 256 | ||
39 | |||
40 | CHK_HASH_SIZE = 64 # SHA-512 hash = 512 bits = 64 bytes | ||
41 | |||
42 | CHK_QUERY_SIZE = CHK_HASH_SIZE # Again a SHA-512 hash | ||
43 | |||
44 | GNUNET_FS_URI_PREFIX = "gnunet://fs/" # FS CHK URI prefix | ||
45 | |||
46 | GNUNET_FS_URI_CHK_INFIX = "chk/" # FS CHK URI infix | ||
47 | |||
48 | |||
49 | def encode_data_to_string (data): | ||
50 | """Returns an ASCII encoding of the given data block like | ||
51 | GNUNET_STRINGS_data_to_string() function. | ||
52 | |||
53 | data: A bytearray representing the block of data which has to be encoded | ||
54 | """ | ||
55 | echart = "0123456789ABCDEFGHIJKLMNOPQRSTUV" | ||
56 | assert (None != data) | ||
57 | assert (bytearray == type(data)) | ||
58 | size = len(data) | ||
59 | assert (0 != size) | ||
60 | vbit = 0 | ||
61 | wpos = 0 | ||
62 | rpos = 0 | ||
63 | bits = 0 | ||
64 | out = "" | ||
65 | while (rpos < size) or (vbit > 0): | ||
66 | if (rpos < size) and (vbit < 5): | ||
67 | bits = (bits << 8) | data[rpos] # eat 8 more bits | ||
68 | rpos += 1 | ||
69 | vbit += 8 | ||
70 | if (vbit < 5): | ||
71 | bits <<= (5 - vbit) # zero-padding | ||
72 | assert (vbit == ((size * 8) % 5)) | ||
73 | vbit = 5 | ||
74 | out += echart[(bits >> (vbit - 5)) & 31] | ||
75 | wpos += 1 | ||
76 | vbit -= 5 | ||
77 | assert (0 == vbit) | ||
78 | return out; | ||
79 | |||
80 | |||
81 | def sha512_hash (data): | ||
82 | """ Returns the sha512 hash of the given data. | ||
83 | |||
84 | data: string to hash | ||
85 | """ | ||
86 | hash_obj = sha512() | ||
87 | hash_obj.update (data) | ||
88 | return hash_obj.digest() | ||
89 | |||
90 | |||
91 | class AESKey: | ||
92 | """Class for AES Keys. Contains the main key and the initialization | ||
93 | vector. """ | ||
94 | |||
95 | key = None # The actual AES key | ||
96 | iv = None # The initialization vector | ||
97 | cipher = None # The cipher object | ||
98 | KEY_SIZE = 32 # AES 256-bit key = 32 bytes | ||
99 | IV_SIZE = AES.block_size # Initialization vector size (= AES block size) | ||
100 | |||
101 | def __init__ (self, passphrase): | ||
102 | """Creates a new AES key. | ||
103 | |||
104 | passphrase: string containing the passphrase to get the AES key and | ||
105 | initialization vector | ||
106 | """ | ||
107 | passphrase = bytearray (passphrase); | ||
108 | self.key = bytearray (self.KEY_SIZE) | ||
109 | self.iv = bytearray (self.IV_SIZE) | ||
110 | if (len (passphrase) > self.KEY_SIZE): | ||
111 | self.key = passphrase[:self.KEY_SIZE] | ||
112 | passphrase = passphrase[self.KEY_SIZE:] | ||
113 | if (len (passphrase) > self.IV_SIZE): | ||
114 | self.iv = passphrase[:self.IV_SIZE] | ||
115 | else: | ||
116 | self.iv[0:len (passphrase)] = passphrase | ||
117 | else: | ||
118 | self.key[0:len (passphrase)] = passphrase | ||
119 | self.key = str (self.key) | ||
120 | self.iv = str (self.iv) | ||
121 | assert (len(self.key) == self.KEY_SIZE) | ||
122 | assert (len(self.iv) == self.IV_SIZE) | ||
123 | |||
124 | def setup_aes_cipher_ (aes_key): | ||
125 | """Initializes the AES object with settings similar to those in GNUnet. | ||
126 | |||
127 | aes_key: the AESKey object | ||
128 | Returns the newly initialized AES object | ||
129 | """ | ||
130 | return AES.new (aes_key.key, AES.MODE_CFB, aes_key.iv, segment_size=128) | ||
131 | |||
132 | def aes_pad_ (data): | ||
133 | """Adds padding to the data such that the size of the data is a multiple of | ||
134 | 16 bytes | ||
135 | |||
136 | data: the data string | ||
137 | Returns a tuple:(pad_len, data). pad_len denotes the number of bytes added | ||
138 | as padding; data is the new data string with padded bytes at the end | ||
139 | """ | ||
140 | pad_len = len(data) % 16 | ||
141 | if (0 != pad_len): | ||
142 | pad_len = 16 - pad_len | ||
143 | pad_bytes = bytearray (15) | ||
144 | data += str(pad_bytes[:pad_len]) | ||
145 | return (pad_len, data) | ||
146 | |||
147 | def aes_encrypt (aes_key, data): | ||
148 | """Encrypts the given data using AES. | ||
149 | |||
150 | aes_key: the AESKey object to use for AES encryption | ||
151 | data: the data string to encrypt | ||
152 | """ | ||
153 | (pad_len, data) = aes_pad_ (data) | ||
154 | cipher = setup_aes_cipher_ (aes_key) | ||
155 | enc_data = cipher.encrypt (data) | ||
156 | if (0 != pad_len): | ||
157 | enc_data = enc_data[:-pad_len] | ||
158 | return enc_data | ||
159 | |||
160 | def aes_decrypt (aes_key, data): | ||
161 | """Decrypts the given data using AES | ||
162 | |||
163 | aes_key: the AESKey object to use for AES decryption | ||
164 | data: the data string to decrypt | ||
165 | """ | ||
166 | (pad_len, data) = aes_pad_ (data) | ||
167 | cipher = setup_aes_cipher_ (aes_key) | ||
168 | ptext = cipher.decrypt (data) | ||
169 | if (0 != pad_len): | ||
170 | ptext = ptext[:-pad_len] | ||
171 | return ptext | ||
172 | |||
173 | |||
174 | class Chk: | ||
175 | """Class for the content hash key.""" | ||
176 | key = None | ||
177 | query = None | ||
178 | fsize = None | ||
179 | |||
180 | def __init__(self, key, query): | ||
181 | assert (len(key) == CHK_HASH_SIZE) | ||
182 | assert (len(query) == CHK_QUERY_SIZE) | ||
183 | self.key = key | ||
184 | self.query = query | ||
185 | |||
186 | def setSize(self, size): | ||
187 | self.fsize = size | ||
188 | |||
189 | def uri(self): | ||
190 | sizestr = repr (self.fsize) | ||
191 | if isinstance (self.fsize, long): | ||
192 | sizestr = sizestr[:-1] | ||
193 | return GNUNET_FS_URI_PREFIX + GNUNET_FS_URI_CHK_INFIX + \ | ||
194 | encode_data_to_string(bytearray(self.key)) + "." + \ | ||
195 | encode_data_to_string(bytearray(self.query)) + "." + \ | ||
196 | sizestr | ||
197 | |||
198 | |||
199 | def compute_depth_(size): | ||
200 | """Computes the depth of the hash tree. | ||
201 | |||
202 | size: the size of the file whose tree's depth has to be computed | ||
203 | Returns the depth of the tree. Always > 0. | ||
204 | """ | ||
205 | depth = 1 | ||
206 | fl = DBLOCK_SIZE | ||
207 | while (fl < size): | ||
208 | depth += 1 | ||
209 | if ((fl * CHK_PER_INODE) < fl): | ||
210 | return depth | ||
211 | fl = fl * CHK_PER_INODE | ||
212 | return depth | ||
213 | |||
214 | def compute_tree_size_(depth): | ||
215 | """Calculate how many bytes of payload a block tree of the given depth MAY | ||
216 | correspond to at most (this function ignores the fact that some blocks will | ||
217 | only be present partially due to the total file size cutting some blocks | ||
218 | off at the end). | ||
219 | |||
220 | depth: depth of the block. depth==0 is a DBLOCK. | ||
221 | Returns the number of bytes of payload a subtree of this depth may | ||
222 | correspond to. | ||
223 | """ | ||
224 | rsize = DBLOCK_SIZE | ||
225 | for cnt in range(0, depth): | ||
226 | rsize *= CHK_PER_INODE | ||
227 | return rsize | ||
228 | |||
229 | def compute_chk_offset_(depth, end_offset): | ||
230 | """Compute the offset of the CHK for the current block in the IBlock | ||
231 | above | ||
232 | |||
233 | depth: depth of the IBlock in the tree (aka overall number of tree levels | ||
234 | minus depth); 0 == DBLOCK | ||
235 | end_offset: current offset in the overall file, at the *beginning* of the | ||
236 | block for DBLOCK (depth == 0), otherwise at the *end* of the | ||
237 | block (exclusive) | ||
238 | Returns the offset in the list of CHKs in the above IBlock | ||
239 | """ | ||
240 | bds = compute_tree_size_(depth) | ||
241 | if (depth > 0): | ||
242 | end_offset -= 1 | ||
243 | ret = end_offset / bds | ||
244 | return ret % CHK_PER_INODE | ||
245 | |||
246 | def compute_iblock_size_(depth, offset): | ||
247 | """Compute the size of the current IBLOCK. The encoder is triggering the | ||
248 | calculation of the size of an IBLOCK at the *end* (hence end_offset) of its | ||
249 | construction. The IBLOCK maybe a full or a partial IBLOCK, and this | ||
250 | function is to calculate how long it should be. | ||
251 | |||
252 | depth: depth of the IBlock in the tree, 0 would be a DBLOCK, must be > 0 | ||
253 | (this function is for IBLOCKs only!) | ||
254 | offset: current offset in the payload (!) of the overall file, must be > 0 | ||
255 | (since this function is called at the end of a block). | ||
256 | Returns the number of elements to be in the corresponding IBlock | ||
257 | """ | ||
258 | assert (depth > 0) | ||
259 | assert (offset > 0) | ||
260 | bds = compute_tree_size_ (depth) | ||
261 | mod = offset % bds | ||
262 | if mod is 0: | ||
263 | ret = CHK_PER_INODE | ||
264 | else: | ||
265 | bds /= CHK_PER_INODE | ||
266 | ret = mod / bds | ||
267 | if (mod % bds) is not 0: | ||
268 | ret += 1 | ||
269 | return ret | ||
270 | |||
271 | |||
272 | def compute_rootchk(readin, size): | ||
273 | """Returns the content hash key after generating the hash tree for the given | ||
274 | input stream. | ||
275 | |||
276 | readin: the stream where to read data from | ||
277 | size: the size of data to be read | ||
278 | """ | ||
279 | depth = compute_depth_(size); | ||
280 | current_depth = 0 | ||
281 | chks = [None] * (depth * CHK_PER_INODE) # list buffer | ||
282 | read_offset = 0 | ||
283 | logging.debug("Begining to calculate tree hash with depth: "+ repr(depth)) | ||
284 | while True: | ||
285 | if (depth == current_depth): | ||
286 | off = CHK_PER_INODE * (depth - 1) | ||
287 | assert (chks[off] is not None) | ||
288 | logging.debug("Encoding done, reading CHK `"+ chks[off].query + \ | ||
289 | "' from "+ repr(off) +"\n") | ||
290 | uri_chk = chks[off] | ||
291 | assert (size == read_offset) | ||
292 | uri_chk.setSize (size) | ||
293 | return uri_chk | ||
294 | if (0 == current_depth): | ||
295 | pt_size = min(DBLOCK_SIZE, size - read_offset); | ||
296 | try: | ||
297 | pt_block = readin.read(pt_size) | ||
298 | except IOError: | ||
299 | logging.warning ("Error reading input file stream") | ||
300 | return None | ||
301 | else: | ||
302 | pt_elements = compute_iblock_size_(current_depth, read_offset) | ||
303 | pt_block = "" | ||
304 | pt_block = \ | ||
305 | reduce ((lambda ba, chk: | ||
306 | ba + (chk.key + chk.query)), | ||
307 | chks[(current_depth - 1) * CHK_PER_INODE:][:pt_elements], | ||
308 | pt_block) | ||
309 | pt_size = pt_elements * (CHK_HASH_SIZE + CHK_QUERY_SIZE) | ||
310 | assert (len(pt_block) == pt_size) | ||
311 | assert (pt_size <= DBLOCK_SIZE) | ||
312 | off = compute_chk_offset_ (current_depth, read_offset) | ||
313 | logging.debug ("Encoding data at offset "+ repr(read_offset) + \ | ||
314 | " and depth "+ repr(current_depth) +" with block " \ | ||
315 | "size "+ repr(pt_size) +" and target CHK offset "+ \ | ||
316 | repr(current_depth * CHK_PER_INODE)) | ||
317 | pt_hash = sha512_hash (pt_block) | ||
318 | pt_aes_key = AESKey (pt_hash) | ||
319 | pt_enc = aes_encrypt (pt_aes_key, pt_block) | ||
320 | pt_enc_hash = sha512_hash (pt_enc) | ||
321 | chk = Chk(pt_hash, pt_enc_hash) | ||
322 | chks[(current_depth * CHK_PER_INODE) + off] = chk | ||
323 | if (0 == current_depth): | ||
324 | read_offset += pt_size | ||
325 | if (read_offset == size) or \ | ||
326 | (0 == (read_offset % (CHK_PER_INODE * DBLOCK_SIZE))): | ||
327 | current_depth += 1 | ||
328 | else: | ||
329 | if (CHK_PER_INODE == off) or (read_offset == size): | ||
330 | current_depth += 1 | ||
331 | else: | ||
332 | current_depth = 0 | ||
333 | |||
334 | |||
335 | def chkuri_from_path (path): | ||
336 | """Returns the CHK URI of the file at the given path. | ||
337 | |||
338 | path: the path of the file whose CHK has to be calculated | ||
339 | """ | ||
340 | size = os.path.getsize (path) | ||
341 | readin = open (path, "rb") | ||
342 | chk = compute_rootchk (readin, size) | ||
343 | readin.close() | ||
344 | return chk.uri() | ||
345 | |||
346 | def usage (): | ||
347 | """Prints help about using this script.""" | ||
348 | print """ | ||
349 | Usage: gnunet-chk.py [options] file | ||
350 | Prints the Content Hash Key of given file in GNUNET-style URI. | ||
351 | |||
352 | Options: | ||
353 | -h, --help : prints this message | ||
354 | """ | ||
355 | |||
356 | |||
357 | if '__main__' == __name__: | ||
358 | try: | ||
359 | opts, args = getopt.getopt(sys.argv[1:], | ||
360 | "h", | ||
361 | ["help"]) | ||
362 | except getopt.GetoptError, err: | ||
363 | print err | ||
364 | print "Exception occured" | ||
365 | usage() | ||
366 | sys.exit(2) | ||
367 | for option, value in opts: | ||
368 | if option in ("-h", "--help"): | ||
369 | usage() | ||
370 | sys.exit(0) | ||
371 | if len(args) != 1: | ||
372 | print "Incorrect number of arguments passed" | ||
373 | usage() | ||
374 | sys.exit(1) | ||
375 | print chkuri_from_path (args[0]) | ||