aboutsummaryrefslogtreecommitdiff
path: root/src/plugins/hash/md5extractor.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/plugins/hash/md5extractor.c')
-rw-r--r--src/plugins/hash/md5extractor.c455
1 files changed, 0 insertions, 455 deletions
diff --git a/src/plugins/hash/md5extractor.c b/src/plugins/hash/md5extractor.c
deleted file mode 100644
index f96beac..0000000
--- a/src/plugins/hash/md5extractor.c
+++ /dev/null
@@ -1,455 +0,0 @@
1/*
2 This file is part of libextractor.
3 (C) 2004, 2005 Vidyut Samanta and Christian Grothoff
4
5 Copyright (C) 1995, 1996, 1999, 2000, 2003 Free Software Foundation, Inc.
6 NOTE: The canonical source of the MD5 code from this file is maintained
7 with the GNU C Library. Bugs can be reported to bug-glibc@prep.ai.mit.edu.
8
9 libextractor is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 2, or (at your
12 option) any later version.
13
14 libextractor is distributed in the hope that it will be useful, but
15 WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with libextractor; see the file COPYING. If not, write to the
21 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
22 Boston, MA 02111-1307, USA. */
23
24#include "platform.h"
25#include "extractor.h"
26#include <stdio.h>
27#include <limits.h>
28
29#ifdef _LIBC
30#include <stdint.h>
31typedef uint32_t md5_uint32;
32typedef uintptr_t md5_uintptr;
33#else
34# define UINT_MAX_32_BITS 4294967295U
35
36# if UINT_MAX == UINT_MAX_32_BITS
37typedef unsigned int md5_uint32;
38# else
39# if USHRT_MAX == UINT_MAX_32_BITS
40typedef unsigned short md5_uint32;
41# else
42# if ULONG_MAX == UINT_MAX_32_BITS
43typedef unsigned long md5_uint32;
44# else
45 /* The following line is intended to evoke an error.
46 Using #error is not portable enough. */
47"Cannot determine unsigned 32-bit data type."
48# endif
49# endif
50# endif
51/* We have to make a guess about the integer type equivalent in size
52 to pointers which should always be correct. */
53typedef unsigned long int md5_uintptr;
54#endif
55
56/* Structure to save state of computation between the single steps. */
57struct md5_ctx
58{
59 md5_uint32 A;
60 md5_uint32 B;
61 md5_uint32 C;
62 md5_uint32 D;
63
64 md5_uint32 total[2];
65 md5_uint32 buflen;
66 char buffer[128];
67};
68
69#define rol(x,n) ( ((x) << (n)) | ((x) >> (32-(n))) )
70
71#if __BYTE_ORDER == __BIG_ENDIAN
72#define WORDS_BIGENDIAN 1
73#endif
74
75#ifdef WORDS_BIGENDIAN
76# define SWAP(n) \
77 (((n) << 24) | (((n) & 0xff00) << 8) | (((n) >> 8) & 0xff00) | ((n) >> 24))
78#else
79# define SWAP(n) (n)
80#endif
81
82#define BLOCKSIZE 4096
83/* Ensure that BLOCKSIZE is a multiple of 64. */
84#if BLOCKSIZE % 64 != 0
85#error "invalid BLOCKSIZE"
86#endif
87
88/* This array contains the bytes used to pad the buffer to the next
89 64-byte boundary. (RFC 1321, 3.1: Step 1) */
90static const unsigned char fillbuf[64] = { 0x80, 0 /* , 0, 0, ... */ };
91
92
93
94
95/* These are the four functions used in the four steps of the MD5 algorithm
96 and defined in the RFC 1321. The first function is a little bit optimized
97 (as found in Colin Plumbs public domain implementation). */
98/* #define FF(b, c, d) ((b & c) | (~b & d)) */
99#define FF(b, c, d) (d ^ (b & (c ^ d)))
100#define FG(b, c, d) FF (d, b, c)
101#define FH(b, c, d) (b ^ c ^ d)
102#define FI(b, c, d) (c ^ (b | ~d))
103
104/* Process LEN bytes of BUFFER, accumulating context into CTX.
105 It is assumed that LEN % 64 == 0. */
106
107static void
108md5_process_block (const void *buffer, size_t len, struct md5_ctx *ctx)
109{
110 md5_uint32 correct_words[16];
111 const md5_uint32 *words = buffer;
112 size_t nwords = len / sizeof (md5_uint32);
113 const md5_uint32 *endp = words + nwords;
114 md5_uint32 A = ctx->A;
115 md5_uint32 B = ctx->B;
116 md5_uint32 C = ctx->C;
117 md5_uint32 D = ctx->D;
118
119 /* First increment the byte count. RFC 1321 specifies the possible
120 length of the file up to 2^64 bits. Here we only compute the
121 number of bytes. Do a double word increment. */
122 ctx->total[0] += len;
123 if (ctx->total[0] < len)
124 ++ctx->total[1];
125
126 /* Process all bytes in the buffer with 64 bytes in each round of
127 the loop. */
128 while (words < endp)
129 {
130 md5_uint32 *cwp = correct_words;
131 md5_uint32 A_save = A;
132 md5_uint32 B_save = B;
133 md5_uint32 C_save = C;
134 md5_uint32 D_save = D;
135
136 /* First round: using the given function, the context and a constant
137 the next context is computed. Because the algorithms processing
138 unit is a 32-bit word and it is determined to work on words in
139 little endian byte order we perhaps have to change the byte order
140 before the computation. To reduce the work for the next steps
141 we store the swapped words in the array CORRECT_WORDS. */
142
143#define OP(a, b, c, d, s, T) \
144 do \
145 { \
146 a += FF (b, c, d) + (*cwp++ = SWAP (*words)) + T; \
147 ++words; \
148 a = rol (a, s); \
149 a += b; \
150 } \
151 while (0)
152
153 /* Before we start, one word to the strange constants.
154 They are defined in RFC 1321 as
155
156 T[i] = (int) (4294967296.0 * fabs (sin (i))), i=1..64, or
157 perl -e 'foreach(1..64){printf "0x%08x\n", int (4294967296 * abs (sin $_))}'
158 */
159
160 /* Round 1. */
161 OP (A, B, C, D, 7, 0xd76aa478);
162 OP (D, A, B, C, 12, 0xe8c7b756);
163 OP (C, D, A, B, 17, 0x242070db);
164 OP (B, C, D, A, 22, 0xc1bdceee);
165 OP (A, B, C, D, 7, 0xf57c0faf);
166 OP (D, A, B, C, 12, 0x4787c62a);
167 OP (C, D, A, B, 17, 0xa8304613);
168 OP (B, C, D, A, 22, 0xfd469501);
169 OP (A, B, C, D, 7, 0x698098d8);
170 OP (D, A, B, C, 12, 0x8b44f7af);
171 OP (C, D, A, B, 17, 0xffff5bb1);
172 OP (B, C, D, A, 22, 0x895cd7be);
173 OP (A, B, C, D, 7, 0x6b901122);
174 OP (D, A, B, C, 12, 0xfd987193);
175 OP (C, D, A, B, 17, 0xa679438e);
176 OP (B, C, D, A, 22, 0x49b40821);
177
178 /* For the second to fourth round we have the possibly swapped words
179 in CORRECT_WORDS. Redefine the macro to take an additional first
180 argument specifying the function to use. */
181#undef OP
182#define OP(f, a, b, c, d, k, s, T) \
183 do \
184 { \
185 a += f (b, c, d) + correct_words[k] + T; \
186 a = rol (a, s); \
187 a += b; \
188 } \
189 while (0)
190
191 /* Round 2. */
192 OP (FG, A, B, C, D, 1, 5, 0xf61e2562);
193 OP (FG, D, A, B, C, 6, 9, 0xc040b340);
194 OP (FG, C, D, A, B, 11, 14, 0x265e5a51);
195 OP (FG, B, C, D, A, 0, 20, 0xe9b6c7aa);
196 OP (FG, A, B, C, D, 5, 5, 0xd62f105d);
197 OP (FG, D, A, B, C, 10, 9, 0x02441453);
198 OP (FG, C, D, A, B, 15, 14, 0xd8a1e681);
199 OP (FG, B, C, D, A, 4, 20, 0xe7d3fbc8);
200 OP (FG, A, B, C, D, 9, 5, 0x21e1cde6);
201 OP (FG, D, A, B, C, 14, 9, 0xc33707d6);
202 OP (FG, C, D, A, B, 3, 14, 0xf4d50d87);
203 OP (FG, B, C, D, A, 8, 20, 0x455a14ed);
204 OP (FG, A, B, C, D, 13, 5, 0xa9e3e905);
205 OP (FG, D, A, B, C, 2, 9, 0xfcefa3f8);
206 OP (FG, C, D, A, B, 7, 14, 0x676f02d9);
207 OP (FG, B, C, D, A, 12, 20, 0x8d2a4c8a);
208
209 /* Round 3. */
210 OP (FH, A, B, C, D, 5, 4, 0xfffa3942);
211 OP (FH, D, A, B, C, 8, 11, 0x8771f681);
212 OP (FH, C, D, A, B, 11, 16, 0x6d9d6122);
213 OP (FH, B, C, D, A, 14, 23, 0xfde5380c);
214 OP (FH, A, B, C, D, 1, 4, 0xa4beea44);
215 OP (FH, D, A, B, C, 4, 11, 0x4bdecfa9);
216 OP (FH, C, D, A, B, 7, 16, 0xf6bb4b60);
217 OP (FH, B, C, D, A, 10, 23, 0xbebfbc70);
218 OP (FH, A, B, C, D, 13, 4, 0x289b7ec6);
219 OP (FH, D, A, B, C, 0, 11, 0xeaa127fa);
220 OP (FH, C, D, A, B, 3, 16, 0xd4ef3085);
221 OP (FH, B, C, D, A, 6, 23, 0x04881d05);
222 OP (FH, A, B, C, D, 9, 4, 0xd9d4d039);
223 OP (FH, D, A, B, C, 12, 11, 0xe6db99e5);
224 OP (FH, C, D, A, B, 15, 16, 0x1fa27cf8);
225 OP (FH, B, C, D, A, 2, 23, 0xc4ac5665);
226
227 /* Round 4. */
228 OP (FI, A, B, C, D, 0, 6, 0xf4292244);
229 OP (FI, D, A, B, C, 7, 10, 0x432aff97);
230 OP (FI, C, D, A, B, 14, 15, 0xab9423a7);
231 OP (FI, B, C, D, A, 5, 21, 0xfc93a039);
232 OP (FI, A, B, C, D, 12, 6, 0x655b59c3);
233 OP (FI, D, A, B, C, 3, 10, 0x8f0ccc92);
234 OP (FI, C, D, A, B, 10, 15, 0xffeff47d);
235 OP (FI, B, C, D, A, 1, 21, 0x85845dd1);
236 OP (FI, A, B, C, D, 8, 6, 0x6fa87e4f);
237 OP (FI, D, A, B, C, 15, 10, 0xfe2ce6e0);
238 OP (FI, C, D, A, B, 6, 15, 0xa3014314);
239 OP (FI, B, C, D, A, 13, 21, 0x4e0811a1);
240 OP (FI, A, B, C, D, 4, 6, 0xf7537e82);
241 OP (FI, D, A, B, C, 11, 10, 0xbd3af235);
242 OP (FI, C, D, A, B, 2, 15, 0x2ad7d2bb);
243 OP (FI, B, C, D, A, 9, 21, 0xeb86d391);
244
245 /* Add the starting values of the context. */
246 A += A_save;
247 B += B_save;
248 C += C_save;
249 D += D_save;
250 }
251
252 /* Put checksum in context given as argument. */
253 ctx->A = A;
254 ctx->B = B;
255 ctx->C = C;
256 ctx->D = D;
257}
258
259
260static void
261md5_process_bytes (const void *buffer, size_t len, struct md5_ctx *ctx)
262{
263 /* When we already have some bits in our internal buffer concatenate
264 both inputs first. */
265 if (ctx->buflen != 0)
266 {
267 size_t left_over = ctx->buflen;
268 size_t add = 128 - left_over > len ? len : 128 - left_over;
269
270 memcpy (&ctx->buffer[left_over], buffer, add);
271 ctx->buflen += add;
272
273 if (ctx->buflen > 64)
274 {
275 md5_process_block (ctx->buffer, ctx->buflen & ~63, ctx);
276
277 ctx->buflen &= 63;
278 /* The regions in the following copy operation cannot overlap. */
279 memcpy (ctx->buffer, &ctx->buffer[(left_over + add) & ~63],
280 ctx->buflen);
281 }
282
283 buffer = (const char *) buffer + add;
284 len -= add;
285 }
286
287 /* Process available complete blocks. */
288 if (len >= 64)
289 {
290#if !_STRING_ARCH_unaligned
291/* To check alignment gcc has an appropriate operator. Other
292 compilers don't. */
293# if __GNUC__ >= 2
294# define UNALIGNED_P(p) (((md5_uintptr) p) % __alignof__ (md5_uint32) != 0)
295# else
296# define UNALIGNED_P(p) (((md5_uintptr) p) % sizeof (md5_uint32) != 0)
297# endif
298 if (UNALIGNED_P (buffer))
299 while (len > 64)
300 {
301 md5_process_block (memcpy (ctx->buffer, buffer, 64), 64, ctx);
302 buffer = (const char *) buffer + 64;
303 len -= 64;
304 }
305 else
306#endif
307 {
308 md5_process_block (buffer, len & ~63, ctx);
309 buffer = (const char *) buffer + (len & ~63);
310 len &= 63;
311 }
312 }
313
314 /* Move remaining bytes in internal buffer. */
315 if (len > 0)
316 {
317 size_t left_over = ctx->buflen;
318
319 memcpy (&ctx->buffer[left_over], buffer, len);
320 left_over += len;
321 if (left_over >= 64)
322 {
323 md5_process_block (ctx->buffer, 64, ctx);
324 left_over -= 64;
325 memcpy (ctx->buffer, &ctx->buffer[64], left_over);
326 }
327 ctx->buflen = left_over;
328 }
329}
330
331
332/* Initialize structure containing state of computation.
333 (RFC 1321, 3.3: Step 3) */
334static void
335md5_init_ctx (struct md5_ctx *ctx)
336{
337 ctx->A = 0x67452301;
338 ctx->B = 0xefcdab89;
339 ctx->C = 0x98badcfe;
340 ctx->D = 0x10325476;
341
342 ctx->total[0] = ctx->total[1] = 0;
343 ctx->buflen = 0;
344}
345
346/* Put result from CTX in first 16 bytes following RESBUF. The result
347 must be in little endian byte order.
348
349 IMPORTANT: On some systems it is required that RESBUF is correctly
350 aligned for a 32 bits value. */
351static void *
352md5_read_ctx (const struct md5_ctx *ctx, void *resbuf)
353{
354 ((md5_uint32 *) resbuf)[0] = SWAP (ctx->A);
355 ((md5_uint32 *) resbuf)[1] = SWAP (ctx->B);
356 ((md5_uint32 *) resbuf)[2] = SWAP (ctx->C);
357 ((md5_uint32 *) resbuf)[3] = SWAP (ctx->D);
358
359 return resbuf;
360}
361
362/* Process the remaining bytes in the internal buffer and the usual
363 prolog according to the standard and write the result to RESBUF.
364
365 IMPORTANT: On some systems it is required that RESBUF is correctly
366 aligned for a 32 bits value. */
367static void *
368md5_finish_ctx (struct md5_ctx *ctx, void *resbuf)
369{
370 /* Take yet unprocessed bytes into account. */
371 md5_uint32 bytes = ctx->buflen;
372 size_t pad;
373
374 /* Now count remaining bytes. */
375 ctx->total[0] += bytes;
376 if (ctx->total[0] < bytes)
377 ++ctx->total[1];
378
379 pad = bytes >= 56 ? 64 + 56 - bytes : 56 - bytes;
380 memcpy (&ctx->buffer[bytes], fillbuf, pad);
381
382 /* Put the 64-bit file length in *bits* at the end of the buffer. */
383 *(md5_uint32 *) & ctx->buffer[bytes + pad] = SWAP (ctx->total[0] << 3);
384 *(md5_uint32 *) & ctx->buffer[bytes + pad + 4] =
385 SWAP ((ctx->total[1] << 3) | (ctx->total[0] >> 29));
386
387 /* Process last bytes. */
388 md5_process_block (ctx->buffer, bytes + pad + 8, ctx);
389
390 return md5_read_ctx (ctx, resbuf);
391}
392
393/* Compute MD5 message digest for LEN bytes beginning at BUFFER. The
394 result is always in little endian byte order, so that a byte-wise
395 output yields to the wanted ASCII representation of the message
396 digest. */
397static void *
398md5_buffer (const char *buffer, size_t len, void *resblock)
399{
400 struct md5_ctx ctx;
401
402 /* Initialize the computation context. */
403 md5_init_ctx (&ctx);
404
405 /* Process whole buffer but last len % 64 bytes. */
406 md5_process_bytes (buffer, len, &ctx);
407
408 /* Put result in desired memory area. */
409 return md5_finish_ctx (&ctx, resblock);
410}
411
412
413
414
415static struct EXTRACTOR_Keywords *
416addKeyword (EXTRACTOR_KeywordList * oldhead,
417 const char *phrase, EXTRACTOR_KeywordType type)
418{
419
420 EXTRACTOR_KeywordList *keyword;
421 keyword = (EXTRACTOR_KeywordList *) malloc (sizeof (EXTRACTOR_KeywordList));
422 keyword->next = oldhead;
423 keyword->keyword = strdup (phrase);
424 keyword->keywordType = type;
425 return keyword;
426}
427
428
429
430#define DIGEST_BITS 128
431#define DIGEST_HEX_BYTES (DIGEST_BITS / 4)
432#define DIGEST_BIN_BYTES (DIGEST_BITS / 8)
433#define MAX_DIGEST_BIN_BYTES DIGEST_BIN_BYTES
434
435struct EXTRACTOR_Keywords *
436libextractor_hash_md5_extract (const char *filename,
437 const char *data,
438 size_t size, struct EXTRACTOR_Keywords *prev)
439{
440 unsigned char bin_buffer[MAX_DIGEST_BIN_BYTES];
441 char hash[8 * MAX_DIGEST_BIN_BYTES];
442 char buf[16];
443 int i;
444
445 md5_buffer (data, size, bin_buffer);
446 hash[0] = '\0';
447 for (i = 0; i < DIGEST_HEX_BYTES / 2; i++)
448 {
449 snprintf (buf, 16, "%02x", bin_buffer[i]);
450 strcat (hash, buf);
451 }
452 prev = addKeyword (prev, hash, EXTRACTOR_HASH_MD5);
453
454 return prev;
455}