aboutsummaryrefslogtreecommitdiff
path: root/contrib
diff options
context:
space:
mode:
Diffstat (limited to 'contrib')
-rwxr-xr-xcontrib/gnunet-chk.py375
1 files changed, 375 insertions, 0 deletions
diff --git a/contrib/gnunet-chk.py b/contrib/gnunet-chk.py
new file mode 100755
index 000000000..e34b065b5
--- /dev/null
+++ b/contrib/gnunet-chk.py
@@ -0,0 +1,375 @@
1#!/usr/bin/python
2# This file is part of GNUnet.
3# (C) 2013 Christian Grothoff (and other contributing authors)
4#
5# GNUnet is free software; you can redistribute it and/or modify
6# it under the terms of the GNU General Public License as published
7# by the Free Software Foundation; either version 2, or (at your
8# option) any later version.
9#
10# GNUnet is distributed in the hope that it will be useful, but
11# WITHOUT ANY WARRANTY; without even the implied warranty of
12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13# General Public License for more details.
14#
15# You should have received a copy of the GNU General Public License
16# along with GNUnet; see the file COPYING. If not, write to the
17# Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18# Boston, MA 02111-1307, USA.
19#
20# File: gnunet-chk.py
21# Brief: Computes GNUNET style Content Hash Key for a given file
22# Author: Sree Harsha Totakura
23
24from hashlib import sha512
25import logging
26import os
27import getopt
28import sys
29from Crypto.Cipher import AES
30
31# Defaults
32DBLOCK_SIZE = (32 * 1024) # Data block size
33
34# Pick a multiple of 2 here to achive 8-byte alignment! We also
35# probably want DBlocks to have (roughly) the same size as IBlocks.
36# With SHA-512, the optimal value is 32768 byte / 128 byte = 256 (128
37# byte = 2 * 512 bits). DO NOT CHANGE!
38CHK_PER_INODE = 256
39
40CHK_HASH_SIZE = 64 # SHA-512 hash = 512 bits = 64 bytes
41
42CHK_QUERY_SIZE = CHK_HASH_SIZE # Again a SHA-512 hash
43
44GNUNET_FS_URI_PREFIX = "gnunet://fs/" # FS CHK URI prefix
45
46GNUNET_FS_URI_CHK_INFIX = "chk/" # FS CHK URI infix
47
48
49def encode_data_to_string (data):
50 """Returns an ASCII encoding of the given data block like
51 GNUNET_STRINGS_data_to_string() function.
52
53 data: A bytearray representing the block of data which has to be encoded
54 """
55 echart = "0123456789ABCDEFGHIJKLMNOPQRSTUV"
56 assert (None != data)
57 assert (bytearray == type(data))
58 size = len(data)
59 assert (0 != size)
60 vbit = 0
61 wpos = 0
62 rpos = 0
63 bits = 0
64 out = ""
65 while (rpos < size) or (vbit > 0):
66 if (rpos < size) and (vbit < 5):
67 bits = (bits << 8) | data[rpos] # eat 8 more bits
68 rpos += 1
69 vbit += 8
70 if (vbit < 5):
71 bits <<= (5 - vbit) # zero-padding
72 assert (vbit == ((size * 8) % 5))
73 vbit = 5
74 out += echart[(bits >> (vbit - 5)) & 31]
75 wpos += 1
76 vbit -= 5
77 assert (0 == vbit)
78 return out;
79
80
81def sha512_hash (data):
82 """ Returns the sha512 hash of the given data.
83
84 data: string to hash
85 """
86 hash_obj = sha512()
87 hash_obj.update (data)
88 return hash_obj.digest()
89
90
91class AESKey:
92 """Class for AES Keys. Contains the main key and the initialization
93 vector. """
94
95 key = None # The actual AES key
96 iv = None # The initialization vector
97 cipher = None # The cipher object
98 KEY_SIZE = 32 # AES 256-bit key = 32 bytes
99 IV_SIZE = AES.block_size # Initialization vector size (= AES block size)
100
101 def __init__ (self, passphrase):
102 """Creates a new AES key.
103
104 passphrase: string containing the passphrase to get the AES key and
105 initialization vector
106 """
107 passphrase = bytearray (passphrase);
108 self.key = bytearray (self.KEY_SIZE)
109 self.iv = bytearray (self.IV_SIZE)
110 if (len (passphrase) > self.KEY_SIZE):
111 self.key = passphrase[:self.KEY_SIZE]
112 passphrase = passphrase[self.KEY_SIZE:]
113 if (len (passphrase) > self.IV_SIZE):
114 self.iv = passphrase[:self.IV_SIZE]
115 else:
116 self.iv[0:len (passphrase)] = passphrase
117 else:
118 self.key[0:len (passphrase)] = passphrase
119 self.key = str (self.key)
120 self.iv = str (self.iv)
121 assert (len(self.key) == self.KEY_SIZE)
122 assert (len(self.iv) == self.IV_SIZE)
123
124def setup_aes_cipher_ (aes_key):
125 """Initializes the AES object with settings similar to those in GNUnet.
126
127 aes_key: the AESKey object
128 Returns the newly initialized AES object
129 """
130 return AES.new (aes_key.key, AES.MODE_CFB, aes_key.iv, segment_size=128)
131
132def aes_pad_ (data):
133 """Adds padding to the data such that the size of the data is a multiple of
134 16 bytes
135
136 data: the data string
137 Returns a tuple:(pad_len, data). pad_len denotes the number of bytes added
138 as padding; data is the new data string with padded bytes at the end
139 """
140 pad_len = len(data) % 16
141 if (0 != pad_len):
142 pad_len = 16 - pad_len
143 pad_bytes = bytearray (15)
144 data += str(pad_bytes[:pad_len])
145 return (pad_len, data)
146
147def aes_encrypt (aes_key, data):
148 """Encrypts the given data using AES.
149
150 aes_key: the AESKey object to use for AES encryption
151 data: the data string to encrypt
152 """
153 (pad_len, data) = aes_pad_ (data)
154 cipher = setup_aes_cipher_ (aes_key)
155 enc_data = cipher.encrypt (data)
156 if (0 != pad_len):
157 enc_data = enc_data[:-pad_len]
158 return enc_data
159
160def aes_decrypt (aes_key, data):
161 """Decrypts the given data using AES
162
163 aes_key: the AESKey object to use for AES decryption
164 data: the data string to decrypt
165 """
166 (pad_len, data) = aes_pad_ (data)
167 cipher = setup_aes_cipher_ (aes_key)
168 ptext = cipher.decrypt (data)
169 if (0 != pad_len):
170 ptext = ptext[:-pad_len]
171 return ptext
172
173
174class Chk:
175 """Class for the content hash key."""
176 key = None
177 query = None
178 fsize = None
179
180 def __init__(self, key, query):
181 assert (len(key) == CHK_HASH_SIZE)
182 assert (len(query) == CHK_QUERY_SIZE)
183 self.key = key
184 self.query = query
185
186 def setSize(self, size):
187 self.fsize = size
188
189 def uri(self):
190 sizestr = repr (self.fsize)
191 if isinstance (self.fsize, long):
192 sizestr = sizestr[:-1]
193 return GNUNET_FS_URI_PREFIX + GNUNET_FS_URI_CHK_INFIX + \
194 encode_data_to_string(bytearray(self.key)) + "." + \
195 encode_data_to_string(bytearray(self.query)) + "." + \
196 sizestr
197
198
199def compute_depth_(size):
200 """Computes the depth of the hash tree.
201
202 size: the size of the file whose tree's depth has to be computed
203 Returns the depth of the tree. Always > 0.
204 """
205 depth = 1
206 fl = DBLOCK_SIZE
207 while (fl < size):
208 depth += 1
209 if ((fl * CHK_PER_INODE) < fl):
210 return depth
211 fl = fl * CHK_PER_INODE
212 return depth
213
214def compute_tree_size_(depth):
215 """Calculate how many bytes of payload a block tree of the given depth MAY
216 correspond to at most (this function ignores the fact that some blocks will
217 only be present partially due to the total file size cutting some blocks
218 off at the end).
219
220 depth: depth of the block. depth==0 is a DBLOCK.
221 Returns the number of bytes of payload a subtree of this depth may
222 correspond to.
223 """
224 rsize = DBLOCK_SIZE
225 for cnt in range(0, depth):
226 rsize *= CHK_PER_INODE
227 return rsize
228
229def compute_chk_offset_(depth, end_offset):
230 """Compute the offset of the CHK for the current block in the IBlock
231 above
232
233 depth: depth of the IBlock in the tree (aka overall number of tree levels
234 minus depth); 0 == DBLOCK
235 end_offset: current offset in the overall file, at the *beginning* of the
236 block for DBLOCK (depth == 0), otherwise at the *end* of the
237 block (exclusive)
238 Returns the offset in the list of CHKs in the above IBlock
239 """
240 bds = compute_tree_size_(depth)
241 if (depth > 0):
242 end_offset -= 1
243 ret = end_offset / bds
244 return ret % CHK_PER_INODE
245
246def compute_iblock_size_(depth, offset):
247 """Compute the size of the current IBLOCK. The encoder is triggering the
248 calculation of the size of an IBLOCK at the *end* (hence end_offset) of its
249 construction. The IBLOCK maybe a full or a partial IBLOCK, and this
250 function is to calculate how long it should be.
251
252 depth: depth of the IBlock in the tree, 0 would be a DBLOCK, must be > 0
253 (this function is for IBLOCKs only!)
254 offset: current offset in the payload (!) of the overall file, must be > 0
255 (since this function is called at the end of a block).
256 Returns the number of elements to be in the corresponding IBlock
257 """
258 assert (depth > 0)
259 assert (offset > 0)
260 bds = compute_tree_size_ (depth)
261 mod = offset % bds
262 if mod is 0:
263 ret = CHK_PER_INODE
264 else:
265 bds /= CHK_PER_INODE
266 ret = mod / bds
267 if (mod % bds) is not 0:
268 ret += 1
269 return ret
270
271
272def compute_rootchk(readin, size):
273 """Returns the content hash key after generating the hash tree for the given
274 input stream.
275
276 readin: the stream where to read data from
277 size: the size of data to be read
278 """
279 depth = compute_depth_(size);
280 current_depth = 0
281 chks = [None] * (depth * CHK_PER_INODE) # list buffer
282 read_offset = 0
283 logging.debug("Begining to calculate tree hash with depth: "+ repr(depth))
284 while True:
285 if (depth == current_depth):
286 off = CHK_PER_INODE * (depth - 1)
287 assert (chks[off] is not None)
288 logging.debug("Encoding done, reading CHK `"+ chks[off].query + \
289 "' from "+ repr(off) +"\n")
290 uri_chk = chks[off]
291 assert (size == read_offset)
292 uri_chk.setSize (size)
293 return uri_chk
294 if (0 == current_depth):
295 pt_size = min(DBLOCK_SIZE, size - read_offset);
296 try:
297 pt_block = readin.read(pt_size)
298 except IOError:
299 logging.warning ("Error reading input file stream")
300 return None
301 else:
302 pt_elements = compute_iblock_size_(current_depth, read_offset)
303 pt_block = ""
304 pt_block = \
305 reduce ((lambda ba, chk:
306 ba + (chk.key + chk.query)),
307 chks[(current_depth - 1) * CHK_PER_INODE:][:pt_elements],
308 pt_block)
309 pt_size = pt_elements * (CHK_HASH_SIZE + CHK_QUERY_SIZE)
310 assert (len(pt_block) == pt_size)
311 assert (pt_size <= DBLOCK_SIZE)
312 off = compute_chk_offset_ (current_depth, read_offset)
313 logging.debug ("Encoding data at offset "+ repr(read_offset) + \
314 " and depth "+ repr(current_depth) +" with block " \
315 "size "+ repr(pt_size) +" and target CHK offset "+ \
316 repr(current_depth * CHK_PER_INODE))
317 pt_hash = sha512_hash (pt_block)
318 pt_aes_key = AESKey (pt_hash)
319 pt_enc = aes_encrypt (pt_aes_key, pt_block)
320 pt_enc_hash = sha512_hash (pt_enc)
321 chk = Chk(pt_hash, pt_enc_hash)
322 chks[(current_depth * CHK_PER_INODE) + off] = chk
323 if (0 == current_depth):
324 read_offset += pt_size
325 if (read_offset == size) or \
326 (0 == (read_offset % (CHK_PER_INODE * DBLOCK_SIZE))):
327 current_depth += 1
328 else:
329 if (CHK_PER_INODE == off) or (read_offset == size):
330 current_depth += 1
331 else:
332 current_depth = 0
333
334
335def chkuri_from_path (path):
336 """Returns the CHK URI of the file at the given path.
337
338 path: the path of the file whose CHK has to be calculated
339 """
340 size = os.path.getsize (path)
341 readin = open (path, "rb")
342 chk = compute_rootchk (readin, size)
343 readin.close()
344 return chk.uri()
345
346def usage ():
347 """Prints help about using this script."""
348 print """
349Usage: gnunet-chk.py [options] file
350Prints the Content Hash Key of given file in GNUNET-style URI.
351
352Options:
353 -h, --help : prints this message
354"""
355
356
357if '__main__' == __name__:
358 try:
359 opts, args = getopt.getopt(sys.argv[1:],
360 "h",
361 ["help"])
362 except getopt.GetoptError, err:
363 print err
364 print "Exception occured"
365 usage()
366 sys.exit(2)
367 for option, value in opts:
368 if option in ("-h", "--help"):
369 usage()
370 sys.exit(0)
371 if len(args) != 1:
372 print "Incorrect number of arguments passed"
373 usage()
374 sys.exit(1)
375 print chkuri_from_path (args[0])