diff options
Diffstat (limited to 'src/plugins/hash/md5extractor.c')
-rw-r--r-- | src/plugins/hash/md5extractor.c | 455 |
1 files changed, 0 insertions, 455 deletions
diff --git a/src/plugins/hash/md5extractor.c b/src/plugins/hash/md5extractor.c deleted file mode 100644 index f96beac..0000000 --- a/src/plugins/hash/md5extractor.c +++ /dev/null | |||
@@ -1,455 +0,0 @@ | |||
1 | /* | ||
2 | This file is part of libextractor. | ||
3 | (C) 2004, 2005 Vidyut Samanta and Christian Grothoff | ||
4 | |||
5 | Copyright (C) 1995, 1996, 1999, 2000, 2003 Free Software Foundation, Inc. | ||
6 | NOTE: The canonical source of the MD5 code from this file is maintained | ||
7 | with the GNU C Library. Bugs can be reported to bug-glibc@prep.ai.mit.edu. | ||
8 | |||
9 | libextractor is free software; you can redistribute it and/or modify | ||
10 | it under the terms of the GNU General Public License as published | ||
11 | by the Free Software Foundation; either version 2, or (at your | ||
12 | option) any later version. | ||
13 | |||
14 | libextractor is distributed in the hope that it will be useful, but | ||
15 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
17 | General Public License for more details. | ||
18 | |||
19 | You should have received a copy of the GNU General Public License | ||
20 | along with libextractor; see the file COPYING. If not, write to the | ||
21 | Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
22 | Boston, MA 02111-1307, USA. */ | ||
23 | |||
24 | #include "platform.h" | ||
25 | #include "extractor.h" | ||
26 | #include <stdio.h> | ||
27 | #include <limits.h> | ||
28 | |||
29 | #ifdef _LIBC | ||
30 | #include <stdint.h> | ||
31 | typedef uint32_t md5_uint32; | ||
32 | typedef uintptr_t md5_uintptr; | ||
33 | #else | ||
34 | # define UINT_MAX_32_BITS 4294967295U | ||
35 | |||
36 | # if UINT_MAX == UINT_MAX_32_BITS | ||
37 | typedef unsigned int md5_uint32; | ||
38 | # else | ||
39 | # if USHRT_MAX == UINT_MAX_32_BITS | ||
40 | typedef unsigned short md5_uint32; | ||
41 | # else | ||
42 | # if ULONG_MAX == UINT_MAX_32_BITS | ||
43 | typedef unsigned long md5_uint32; | ||
44 | # else | ||
45 | /* The following line is intended to evoke an error. | ||
46 | Using #error is not portable enough. */ | ||
47 | "Cannot determine unsigned 32-bit data type." | ||
48 | # endif | ||
49 | # endif | ||
50 | # endif | ||
51 | /* We have to make a guess about the integer type equivalent in size | ||
52 | to pointers which should always be correct. */ | ||
53 | typedef unsigned long int md5_uintptr; | ||
54 | #endif | ||
55 | |||
56 | /* Structure to save state of computation between the single steps. */ | ||
57 | struct md5_ctx | ||
58 | { | ||
59 | md5_uint32 A; | ||
60 | md5_uint32 B; | ||
61 | md5_uint32 C; | ||
62 | md5_uint32 D; | ||
63 | |||
64 | md5_uint32 total[2]; | ||
65 | md5_uint32 buflen; | ||
66 | char buffer[128]; | ||
67 | }; | ||
68 | |||
69 | #define rol(x,n) ( ((x) << (n)) | ((x) >> (32-(n))) ) | ||
70 | |||
71 | #if __BYTE_ORDER == __BIG_ENDIAN | ||
72 | #define WORDS_BIGENDIAN 1 | ||
73 | #endif | ||
74 | |||
75 | #ifdef WORDS_BIGENDIAN | ||
76 | # define SWAP(n) \ | ||
77 | (((n) << 24) | (((n) & 0xff00) << 8) | (((n) >> 8) & 0xff00) | ((n) >> 24)) | ||
78 | #else | ||
79 | # define SWAP(n) (n) | ||
80 | #endif | ||
81 | |||
82 | #define BLOCKSIZE 4096 | ||
83 | /* Ensure that BLOCKSIZE is a multiple of 64. */ | ||
84 | #if BLOCKSIZE % 64 != 0 | ||
85 | #error "invalid BLOCKSIZE" | ||
86 | #endif | ||
87 | |||
88 | /* This array contains the bytes used to pad the buffer to the next | ||
89 | 64-byte boundary. (RFC 1321, 3.1: Step 1) */ | ||
90 | static const unsigned char fillbuf[64] = { 0x80, 0 /* , 0, 0, ... */ }; | ||
91 | |||
92 | |||
93 | |||
94 | |||
95 | /* These are the four functions used in the four steps of the MD5 algorithm | ||
96 | and defined in the RFC 1321. The first function is a little bit optimized | ||
97 | (as found in Colin Plumbs public domain implementation). */ | ||
98 | /* #define FF(b, c, d) ((b & c) | (~b & d)) */ | ||
99 | #define FF(b, c, d) (d ^ (b & (c ^ d))) | ||
100 | #define FG(b, c, d) FF (d, b, c) | ||
101 | #define FH(b, c, d) (b ^ c ^ d) | ||
102 | #define FI(b, c, d) (c ^ (b | ~d)) | ||
103 | |||
104 | /* Process LEN bytes of BUFFER, accumulating context into CTX. | ||
105 | It is assumed that LEN % 64 == 0. */ | ||
106 | |||
107 | static void | ||
108 | md5_process_block (const void *buffer, size_t len, struct md5_ctx *ctx) | ||
109 | { | ||
110 | md5_uint32 correct_words[16]; | ||
111 | const md5_uint32 *words = buffer; | ||
112 | size_t nwords = len / sizeof (md5_uint32); | ||
113 | const md5_uint32 *endp = words + nwords; | ||
114 | md5_uint32 A = ctx->A; | ||
115 | md5_uint32 B = ctx->B; | ||
116 | md5_uint32 C = ctx->C; | ||
117 | md5_uint32 D = ctx->D; | ||
118 | |||
119 | /* First increment the byte count. RFC 1321 specifies the possible | ||
120 | length of the file up to 2^64 bits. Here we only compute the | ||
121 | number of bytes. Do a double word increment. */ | ||
122 | ctx->total[0] += len; | ||
123 | if (ctx->total[0] < len) | ||
124 | ++ctx->total[1]; | ||
125 | |||
126 | /* Process all bytes in the buffer with 64 bytes in each round of | ||
127 | the loop. */ | ||
128 | while (words < endp) | ||
129 | { | ||
130 | md5_uint32 *cwp = correct_words; | ||
131 | md5_uint32 A_save = A; | ||
132 | md5_uint32 B_save = B; | ||
133 | md5_uint32 C_save = C; | ||
134 | md5_uint32 D_save = D; | ||
135 | |||
136 | /* First round: using the given function, the context and a constant | ||
137 | the next context is computed. Because the algorithms processing | ||
138 | unit is a 32-bit word and it is determined to work on words in | ||
139 | little endian byte order we perhaps have to change the byte order | ||
140 | before the computation. To reduce the work for the next steps | ||
141 | we store the swapped words in the array CORRECT_WORDS. */ | ||
142 | |||
143 | #define OP(a, b, c, d, s, T) \ | ||
144 | do \ | ||
145 | { \ | ||
146 | a += FF (b, c, d) + (*cwp++ = SWAP (*words)) + T; \ | ||
147 | ++words; \ | ||
148 | a = rol (a, s); \ | ||
149 | a += b; \ | ||
150 | } \ | ||
151 | while (0) | ||
152 | |||
153 | /* Before we start, one word to the strange constants. | ||
154 | They are defined in RFC 1321 as | ||
155 | |||
156 | T[i] = (int) (4294967296.0 * fabs (sin (i))), i=1..64, or | ||
157 | perl -e 'foreach(1..64){printf "0x%08x\n", int (4294967296 * abs (sin $_))}' | ||
158 | */ | ||
159 | |||
160 | /* Round 1. */ | ||
161 | OP (A, B, C, D, 7, 0xd76aa478); | ||
162 | OP (D, A, B, C, 12, 0xe8c7b756); | ||
163 | OP (C, D, A, B, 17, 0x242070db); | ||
164 | OP (B, C, D, A, 22, 0xc1bdceee); | ||
165 | OP (A, B, C, D, 7, 0xf57c0faf); | ||
166 | OP (D, A, B, C, 12, 0x4787c62a); | ||
167 | OP (C, D, A, B, 17, 0xa8304613); | ||
168 | OP (B, C, D, A, 22, 0xfd469501); | ||
169 | OP (A, B, C, D, 7, 0x698098d8); | ||
170 | OP (D, A, B, C, 12, 0x8b44f7af); | ||
171 | OP (C, D, A, B, 17, 0xffff5bb1); | ||
172 | OP (B, C, D, A, 22, 0x895cd7be); | ||
173 | OP (A, B, C, D, 7, 0x6b901122); | ||
174 | OP (D, A, B, C, 12, 0xfd987193); | ||
175 | OP (C, D, A, B, 17, 0xa679438e); | ||
176 | OP (B, C, D, A, 22, 0x49b40821); | ||
177 | |||
178 | /* For the second to fourth round we have the possibly swapped words | ||
179 | in CORRECT_WORDS. Redefine the macro to take an additional first | ||
180 | argument specifying the function to use. */ | ||
181 | #undef OP | ||
182 | #define OP(f, a, b, c, d, k, s, T) \ | ||
183 | do \ | ||
184 | { \ | ||
185 | a += f (b, c, d) + correct_words[k] + T; \ | ||
186 | a = rol (a, s); \ | ||
187 | a += b; \ | ||
188 | } \ | ||
189 | while (0) | ||
190 | |||
191 | /* Round 2. */ | ||
192 | OP (FG, A, B, C, D, 1, 5, 0xf61e2562); | ||
193 | OP (FG, D, A, B, C, 6, 9, 0xc040b340); | ||
194 | OP (FG, C, D, A, B, 11, 14, 0x265e5a51); | ||
195 | OP (FG, B, C, D, A, 0, 20, 0xe9b6c7aa); | ||
196 | OP (FG, A, B, C, D, 5, 5, 0xd62f105d); | ||
197 | OP (FG, D, A, B, C, 10, 9, 0x02441453); | ||
198 | OP (FG, C, D, A, B, 15, 14, 0xd8a1e681); | ||
199 | OP (FG, B, C, D, A, 4, 20, 0xe7d3fbc8); | ||
200 | OP (FG, A, B, C, D, 9, 5, 0x21e1cde6); | ||
201 | OP (FG, D, A, B, C, 14, 9, 0xc33707d6); | ||
202 | OP (FG, C, D, A, B, 3, 14, 0xf4d50d87); | ||
203 | OP (FG, B, C, D, A, 8, 20, 0x455a14ed); | ||
204 | OP (FG, A, B, C, D, 13, 5, 0xa9e3e905); | ||
205 | OP (FG, D, A, B, C, 2, 9, 0xfcefa3f8); | ||
206 | OP (FG, C, D, A, B, 7, 14, 0x676f02d9); | ||
207 | OP (FG, B, C, D, A, 12, 20, 0x8d2a4c8a); | ||
208 | |||
209 | /* Round 3. */ | ||
210 | OP (FH, A, B, C, D, 5, 4, 0xfffa3942); | ||
211 | OP (FH, D, A, B, C, 8, 11, 0x8771f681); | ||
212 | OP (FH, C, D, A, B, 11, 16, 0x6d9d6122); | ||
213 | OP (FH, B, C, D, A, 14, 23, 0xfde5380c); | ||
214 | OP (FH, A, B, C, D, 1, 4, 0xa4beea44); | ||
215 | OP (FH, D, A, B, C, 4, 11, 0x4bdecfa9); | ||
216 | OP (FH, C, D, A, B, 7, 16, 0xf6bb4b60); | ||
217 | OP (FH, B, C, D, A, 10, 23, 0xbebfbc70); | ||
218 | OP (FH, A, B, C, D, 13, 4, 0x289b7ec6); | ||
219 | OP (FH, D, A, B, C, 0, 11, 0xeaa127fa); | ||
220 | OP (FH, C, D, A, B, 3, 16, 0xd4ef3085); | ||
221 | OP (FH, B, C, D, A, 6, 23, 0x04881d05); | ||
222 | OP (FH, A, B, C, D, 9, 4, 0xd9d4d039); | ||
223 | OP (FH, D, A, B, C, 12, 11, 0xe6db99e5); | ||
224 | OP (FH, C, D, A, B, 15, 16, 0x1fa27cf8); | ||
225 | OP (FH, B, C, D, A, 2, 23, 0xc4ac5665); | ||
226 | |||
227 | /* Round 4. */ | ||
228 | OP (FI, A, B, C, D, 0, 6, 0xf4292244); | ||
229 | OP (FI, D, A, B, C, 7, 10, 0x432aff97); | ||
230 | OP (FI, C, D, A, B, 14, 15, 0xab9423a7); | ||
231 | OP (FI, B, C, D, A, 5, 21, 0xfc93a039); | ||
232 | OP (FI, A, B, C, D, 12, 6, 0x655b59c3); | ||
233 | OP (FI, D, A, B, C, 3, 10, 0x8f0ccc92); | ||
234 | OP (FI, C, D, A, B, 10, 15, 0xffeff47d); | ||
235 | OP (FI, B, C, D, A, 1, 21, 0x85845dd1); | ||
236 | OP (FI, A, B, C, D, 8, 6, 0x6fa87e4f); | ||
237 | OP (FI, D, A, B, C, 15, 10, 0xfe2ce6e0); | ||
238 | OP (FI, C, D, A, B, 6, 15, 0xa3014314); | ||
239 | OP (FI, B, C, D, A, 13, 21, 0x4e0811a1); | ||
240 | OP (FI, A, B, C, D, 4, 6, 0xf7537e82); | ||
241 | OP (FI, D, A, B, C, 11, 10, 0xbd3af235); | ||
242 | OP (FI, C, D, A, B, 2, 15, 0x2ad7d2bb); | ||
243 | OP (FI, B, C, D, A, 9, 21, 0xeb86d391); | ||
244 | |||
245 | /* Add the starting values of the context. */ | ||
246 | A += A_save; | ||
247 | B += B_save; | ||
248 | C += C_save; | ||
249 | D += D_save; | ||
250 | } | ||
251 | |||
252 | /* Put checksum in context given as argument. */ | ||
253 | ctx->A = A; | ||
254 | ctx->B = B; | ||
255 | ctx->C = C; | ||
256 | ctx->D = D; | ||
257 | } | ||
258 | |||
259 | |||
260 | static void | ||
261 | md5_process_bytes (const void *buffer, size_t len, struct md5_ctx *ctx) | ||
262 | { | ||
263 | /* When we already have some bits in our internal buffer concatenate | ||
264 | both inputs first. */ | ||
265 | if (ctx->buflen != 0) | ||
266 | { | ||
267 | size_t left_over = ctx->buflen; | ||
268 | size_t add = 128 - left_over > len ? len : 128 - left_over; | ||
269 | |||
270 | memcpy (&ctx->buffer[left_over], buffer, add); | ||
271 | ctx->buflen += add; | ||
272 | |||
273 | if (ctx->buflen > 64) | ||
274 | { | ||
275 | md5_process_block (ctx->buffer, ctx->buflen & ~63, ctx); | ||
276 | |||
277 | ctx->buflen &= 63; | ||
278 | /* The regions in the following copy operation cannot overlap. */ | ||
279 | memcpy (ctx->buffer, &ctx->buffer[(left_over + add) & ~63], | ||
280 | ctx->buflen); | ||
281 | } | ||
282 | |||
283 | buffer = (const char *) buffer + add; | ||
284 | len -= add; | ||
285 | } | ||
286 | |||
287 | /* Process available complete blocks. */ | ||
288 | if (len >= 64) | ||
289 | { | ||
290 | #if !_STRING_ARCH_unaligned | ||
291 | /* To check alignment gcc has an appropriate operator. Other | ||
292 | compilers don't. */ | ||
293 | # if __GNUC__ >= 2 | ||
294 | # define UNALIGNED_P(p) (((md5_uintptr) p) % __alignof__ (md5_uint32) != 0) | ||
295 | # else | ||
296 | # define UNALIGNED_P(p) (((md5_uintptr) p) % sizeof (md5_uint32) != 0) | ||
297 | # endif | ||
298 | if (UNALIGNED_P (buffer)) | ||
299 | while (len > 64) | ||
300 | { | ||
301 | md5_process_block (memcpy (ctx->buffer, buffer, 64), 64, ctx); | ||
302 | buffer = (const char *) buffer + 64; | ||
303 | len -= 64; | ||
304 | } | ||
305 | else | ||
306 | #endif | ||
307 | { | ||
308 | md5_process_block (buffer, len & ~63, ctx); | ||
309 | buffer = (const char *) buffer + (len & ~63); | ||
310 | len &= 63; | ||
311 | } | ||
312 | } | ||
313 | |||
314 | /* Move remaining bytes in internal buffer. */ | ||
315 | if (len > 0) | ||
316 | { | ||
317 | size_t left_over = ctx->buflen; | ||
318 | |||
319 | memcpy (&ctx->buffer[left_over], buffer, len); | ||
320 | left_over += len; | ||
321 | if (left_over >= 64) | ||
322 | { | ||
323 | md5_process_block (ctx->buffer, 64, ctx); | ||
324 | left_over -= 64; | ||
325 | memcpy (ctx->buffer, &ctx->buffer[64], left_over); | ||
326 | } | ||
327 | ctx->buflen = left_over; | ||
328 | } | ||
329 | } | ||
330 | |||
331 | |||
332 | /* Initialize structure containing state of computation. | ||
333 | (RFC 1321, 3.3: Step 3) */ | ||
334 | static void | ||
335 | md5_init_ctx (struct md5_ctx *ctx) | ||
336 | { | ||
337 | ctx->A = 0x67452301; | ||
338 | ctx->B = 0xefcdab89; | ||
339 | ctx->C = 0x98badcfe; | ||
340 | ctx->D = 0x10325476; | ||
341 | |||
342 | ctx->total[0] = ctx->total[1] = 0; | ||
343 | ctx->buflen = 0; | ||
344 | } | ||
345 | |||
346 | /* Put result from CTX in first 16 bytes following RESBUF. The result | ||
347 | must be in little endian byte order. | ||
348 | |||
349 | IMPORTANT: On some systems it is required that RESBUF is correctly | ||
350 | aligned for a 32 bits value. */ | ||
351 | static void * | ||
352 | md5_read_ctx (const struct md5_ctx *ctx, void *resbuf) | ||
353 | { | ||
354 | ((md5_uint32 *) resbuf)[0] = SWAP (ctx->A); | ||
355 | ((md5_uint32 *) resbuf)[1] = SWAP (ctx->B); | ||
356 | ((md5_uint32 *) resbuf)[2] = SWAP (ctx->C); | ||
357 | ((md5_uint32 *) resbuf)[3] = SWAP (ctx->D); | ||
358 | |||
359 | return resbuf; | ||
360 | } | ||
361 | |||
362 | /* Process the remaining bytes in the internal buffer and the usual | ||
363 | prolog according to the standard and write the result to RESBUF. | ||
364 | |||
365 | IMPORTANT: On some systems it is required that RESBUF is correctly | ||
366 | aligned for a 32 bits value. */ | ||
367 | static void * | ||
368 | md5_finish_ctx (struct md5_ctx *ctx, void *resbuf) | ||
369 | { | ||
370 | /* Take yet unprocessed bytes into account. */ | ||
371 | md5_uint32 bytes = ctx->buflen; | ||
372 | size_t pad; | ||
373 | |||
374 | /* Now count remaining bytes. */ | ||
375 | ctx->total[0] += bytes; | ||
376 | if (ctx->total[0] < bytes) | ||
377 | ++ctx->total[1]; | ||
378 | |||
379 | pad = bytes >= 56 ? 64 + 56 - bytes : 56 - bytes; | ||
380 | memcpy (&ctx->buffer[bytes], fillbuf, pad); | ||
381 | |||
382 | /* Put the 64-bit file length in *bits* at the end of the buffer. */ | ||
383 | *(md5_uint32 *) & ctx->buffer[bytes + pad] = SWAP (ctx->total[0] << 3); | ||
384 | *(md5_uint32 *) & ctx->buffer[bytes + pad + 4] = | ||
385 | SWAP ((ctx->total[1] << 3) | (ctx->total[0] >> 29)); | ||
386 | |||
387 | /* Process last bytes. */ | ||
388 | md5_process_block (ctx->buffer, bytes + pad + 8, ctx); | ||
389 | |||
390 | return md5_read_ctx (ctx, resbuf); | ||
391 | } | ||
392 | |||
393 | /* Compute MD5 message digest for LEN bytes beginning at BUFFER. The | ||
394 | result is always in little endian byte order, so that a byte-wise | ||
395 | output yields to the wanted ASCII representation of the message | ||
396 | digest. */ | ||
397 | static void * | ||
398 | md5_buffer (const char *buffer, size_t len, void *resblock) | ||
399 | { | ||
400 | struct md5_ctx ctx; | ||
401 | |||
402 | /* Initialize the computation context. */ | ||
403 | md5_init_ctx (&ctx); | ||
404 | |||
405 | /* Process whole buffer but last len % 64 bytes. */ | ||
406 | md5_process_bytes (buffer, len, &ctx); | ||
407 | |||
408 | /* Put result in desired memory area. */ | ||
409 | return md5_finish_ctx (&ctx, resblock); | ||
410 | } | ||
411 | |||
412 | |||
413 | |||
414 | |||
415 | static struct EXTRACTOR_Keywords * | ||
416 | addKeyword (EXTRACTOR_KeywordList * oldhead, | ||
417 | const char *phrase, EXTRACTOR_KeywordType type) | ||
418 | { | ||
419 | |||
420 | EXTRACTOR_KeywordList *keyword; | ||
421 | keyword = (EXTRACTOR_KeywordList *) malloc (sizeof (EXTRACTOR_KeywordList)); | ||
422 | keyword->next = oldhead; | ||
423 | keyword->keyword = strdup (phrase); | ||
424 | keyword->keywordType = type; | ||
425 | return keyword; | ||
426 | } | ||
427 | |||
428 | |||
429 | |||
430 | #define DIGEST_BITS 128 | ||
431 | #define DIGEST_HEX_BYTES (DIGEST_BITS / 4) | ||
432 | #define DIGEST_BIN_BYTES (DIGEST_BITS / 8) | ||
433 | #define MAX_DIGEST_BIN_BYTES DIGEST_BIN_BYTES | ||
434 | |||
435 | struct EXTRACTOR_Keywords * | ||
436 | libextractor_hash_md5_extract (const char *filename, | ||
437 | const char *data, | ||
438 | size_t size, struct EXTRACTOR_Keywords *prev) | ||
439 | { | ||
440 | unsigned char bin_buffer[MAX_DIGEST_BIN_BYTES]; | ||
441 | char hash[8 * MAX_DIGEST_BIN_BYTES]; | ||
442 | char buf[16]; | ||
443 | int i; | ||
444 | |||
445 | md5_buffer (data, size, bin_buffer); | ||
446 | hash[0] = '\0'; | ||
447 | for (i = 0; i < DIGEST_HEX_BYTES / 2; i++) | ||
448 | { | ||
449 | snprintf (buf, 16, "%02x", bin_buffer[i]); | ||
450 | strcat (hash, buf); | ||
451 | } | ||
452 | prev = addKeyword (prev, hash, EXTRACTOR_HASH_MD5); | ||
453 | |||
454 | return prev; | ||
455 | } | ||