/* MD5ASM.C - MD5 with inline assembly speed-ups * derived from the RSA Data Security, Inc. * MD5 Message-Digest Algorithm * * The speed ups are as follows: * * 1. ROTATE_LEFT has been replaced with a series of macros that * interpolate ROL.L instructions that each do a maximum rotation * of 8 bits; * * 2. MD5Transform replaces the only function calls, Decode and * MD5_memset, with inline assembly implementations; * * 3. MD5_memcpy and MD5_memset copy THINK C 5.0's assembly * implementations. * * Validate with mddriver.c */ /* Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All rights reserved. License to copy and use this software is granted provided that it is identified as the "RSA Data Security, Inc. MD5 Message-Digest Algorithm" in all material mentioning or referencing this software or this function. License is also granted to make and use derivative works provided that such works are identified as "derived from the RSA Data Security, Inc. MD5 Message-Digest Algorithm" in all material mentioning or referencing the derived work. RSA Data Security, Inc. makes no representations concerning either the merchantability of this software or the suitability of this software for any particular purpose. It is provided "as is" without express or implied warranty of any kind. These notices must be retained in any copies of any part of this documentation and/or software. Assembly speed-ups ©2024 mrw */ #include "global.h" #include "md5.h" /* Constants for MD5Transform routine. */ #define S11 7 #define S12 12 #define S13 17 #define S14 22 #define S21 5 #define S22 9 #define S23 14 #define S24 20 #define S31 4 #define S32 11 #define S33 16 #define S34 23 #define S41 6 #define S42 10 #define S43 15 #define S44 21 static void MD5Transform PROTO_LIST ((UINT4 [4], unsigned char [64])); static void Encode PROTO_LIST ((unsigned char *, UINT4 *, unsigned int)); static void MD5_memcpy PROTO_LIST ((POINTER, POINTER, unsigned long)); static void MD5_memset PROTO_LIST ((POINTER, int, unsigned long)); #ifdef _MD5_EXPLICIT_INIT static unsigned char *PADDING; #else static unsigned char PADDING[64] = { 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; #endif /* F, G, H and I are basic MD5 functions. */ #define F(x, y, z) (((x) & (y)) | ((~x) & (z))) #define G(x, y, z) (((x) & (z)) | ((y) & (~z))) #define H(x, y, z) ((x) ^ (y) ^ (z)) #define I(x, y, z) ((y) ^ ((x) | (~z))) /* ROTATE_LEFT rotates x left n bits. * Displacement is a literal to reduce register pressure; * that requires it be <= 8, thus the 3 variants. */ #define LITERAL # #define ROTATE_LEFT_A0(x, n) asm { rol.l LITERAL n,x } #define ROTATE_LEFT_A8(x, n) asm { rol.l #8,x } \ asm { rol.l LITERAL n-8,x } #define ROTATE_LEFT_AA(x, n) asm { rol.l #8,x } \ asm { rol.l #8,x} \ asm { rol.l LITERAL n-16,x } /* FF, GG, HH, and II transformations for rounds 1, 2, 3, and 4. Rotation is separate from addition to prevent recomputation. A0,A8,AA variants correspond to rotates above. */ #define FFA0(a, b, c, d, x, s, ac) { \ (a) += F ((b), (c), (d)) + (x) + (UINT4)(ac); \ ROTATE_LEFT_A0 (a, s); \ (a) += (b); \ } #define FFA8(a, b, c, d, x, s, ac) { \ (a) += F ((b), (c), (d)) + (x) + (UINT4)(ac); \ ROTATE_LEFT_A8 (a, s); \ (a) += (b); \ } #define FFAA(a, b, c, d, x, s, ac) { \ (a) += F ((b), (c), (d)) + (x) + (UINT4)(ac); \ ROTATE_LEFT_AA (a, s); \ (a) += (b); \ } #define GGA0(a, b, c, d, x, s, ac) { \ (a) += G ((b), (c), (d)) + (x) + (UINT4)(ac); \ ROTATE_LEFT_A0 (a, s); \ (a) += (b); \ } #define GGA8(a, b, c, d, x, s, ac) { \ (a) += G ((b), (c), (d)) + (x) + (UINT4)(ac); \ ROTATE_LEFT_A8 (a, s); \ (a) += (b); \ } #define GGAA(a, b, c, d, x, s, ac) { \ (a) += G ((b), (c), (d)) + (x) + (UINT4)(ac); \ ROTATE_LEFT_AA (a, s); \ (a) += (b); \ } #define HHA0(a, b, c, d, x, s, ac) { \ (a) += H ((b), (c), (d)) + (x) + (UINT4)(ac); \ ROTATE_LEFT_A0 (a, s); \ (a) += (b); \ } #define HHA8(a, b, c, d, x, s, ac) { \ (a) += H ((b), (c), (d)) + (x) + (UINT4)(ac); \ ROTATE_LEFT_A8 (a, s); \ (a) += (b); \ } #define HHAA(a, b, c, d, x, s, ac) { \ (a) += H ((b), (c), (d)) + (x) + (UINT4)(ac); \ ROTATE_LEFT_AA (a, s); \ (a) += (b); \ } #define IIA0(a, b, c, d, x, s, ac) { \ (a) += I ((b), (c), (d)) + (x) + (UINT4)(ac); \ ROTATE_LEFT_A0 (a, s); \ (a) += (b); \ } #define IIA8(a, b, c, d, x, s, ac) { \ (a) += I ((b), (c), (d)) + (x) + (UINT4)(ac); \ ROTATE_LEFT_A8 (a, s); \ (a) += (b); \ } #define IIAA(a, b, c, d, x, s, ac) { \ (a) += I ((b), (c), (d)) + (x) + (UINT4)(ac); \ ROTATE_LEFT_AA (a, s); \ (a) += (b); \ } /* MD5 initialization. Begins an MD5 operation, writing a new context. */ void MD5Init (context) MD5_CTX *context; /* context */ { context->count[0] = context->count[1] = 0; /* Load magic initialization constants. */ context->state[0] = 0x67452301; context->state[1] = 0xefcdab89; context->state[2] = 0x98badcfe; context->state[3] = 0x10325476; } /* MD5 block update operation. Continues an MD5 message-digest operation, processing another message block, and updating the context. */ void MD5Update (context, input, inputLen) MD5_CTX *context; /* context */ unsigned char *input; /* input block */ unsigned int inputLen; /* length of input block */ { unsigned int i, index, partLen; /* Compute number of bytes mod 64 */ index = (unsigned int)((context->count[0] >> 3) & 0x3F); /* Update number of bits */ if ((context->count[0] += ((UINT4)inputLen << 3)) < ((UINT4)inputLen << 3)) context->count[1]++; context->count[1] += ((UINT4)inputLen >> 29); partLen = 64 - index; /* Transform as many times as possible. */ if (inputLen >= partLen) { MD5_memcpy ((POINTER)&context->buffer[index], (POINTER)input, partLen); MD5Transform (context->state, context->buffer); for (i = partLen; i + 63 < inputLen; i += 64) MD5Transform (context->state, &input[i]); index = 0; } else i = 0; /* Buffer remaining input */ MD5_memcpy ((POINTER)&context->buffer[index], (POINTER)&input[i], inputLen-i); } /* MD5 finalization. Ends an MD5 message-digest operation, writing the the message digest and zeroizing the context. */ void MD5Final (digest, context) unsigned char digest[16]; /* message digest */ MD5_CTX *context; /* context */ { unsigned char bits[8]; unsigned int index, padLen; /* Save number of bits */ Encode (bits, context->count, 8); /* Pad out to 56 mod 64. */ index = (unsigned int)((context->count[0] >> 3) & 0x3f); padLen = (index < 56) ? (56 - index) : (120 - index); MD5Update (context, PADDING, padLen); /* Append length (before padding) */ MD5Update (context, bits, 8); /* Store state in digest */ Encode (digest, context->state, 16); /* Zeroize sensitive information. */ MD5_memset ((POINTER)context, 0, sizeof (*context)); } /* MD5 basic transformation. Transforms state based on block. */ static void MD5Transform (state, block) UINT4 state[4]; unsigned char block[64]; { register unsigned long a, b, c, d; unsigned long x[16]; // BUG - the assembler wants to copy x[0] instead of x into a0 // alias to a pointer to force it to copy the address unsigned long *xp = x; /* Inline Decode - translate block to 16 little endian integers */ asm { movea.l block,a0 ;input addr movea.l xp,a1 ;output addr move.w #3,d0 ;loop counter ;dbeq stops branching on -1 @dloop: movem.l (a0),a/b/c/d ;input -> 4 reg ;reverse a rol.w #8,a ;1234 -> 1243 swap a ;1243 -> 4312 rol.w #8,a ;4312 -> 4321 ;reverse b rol.w #8,b ;1234 -> 1243 swap b ;1243 -> 4312 rol.w #8,b ;4312 -> 4321 ;reverse c rol.w #8,c ;1234 -> 1243 swap c ;1243 -> 4312 rol.w #8,c ;4312 -> 4321 ;reverse d rol.w #8,d ;1234 -> 1243 swap d ;1243 -> 4312 rol.w #8,d ;4312 -> 4321 movem.l a/b/c/d,(a1) ;4 reg -> output addi.l #16,a0 ;next input addr addi.l #16,a1 ;next output addr dbra d0,@dloop ;loop if > -1 } a = state[0], b = state[1], c = state[2], d = state[3]; /* Round 1 */ FFA0 (a, b, c, d, x[ 0], S11, 0xd76aa478); /* 1 */ FFA8 (d, a, b, c, x[ 1], S12, 0xe8c7b756); /* 2 */ FFAA (c, d, a, b, x[ 2], S13, 0x242070db); /* 3 */ FFAA (b, c, d, a, x[ 3], S14, 0xc1bdceee); /* 4 */ FFA0 (a, b, c, d, x[ 4], S11, 0xf57c0faf); /* 5 */ FFA8 (d, a, b, c, x[ 5], S12, 0x4787c62a); /* 6 */ FFAA (c, d, a, b, x[ 6], S13, 0xa8304613); /* 7 */ FFAA (b, c, d, a, x[ 7], S14, 0xfd469501); /* 8 */ FFA0 (a, b, c, d, x[ 8], S11, 0x698098d8); /* 9 */ FFA8 (d, a, b, c, x[ 9], S12, 0x8b44f7af); /* 10 */ FFAA (c, d, a, b, x[10], S13, 0xffff5bb1); /* 11 */ FFAA (b, c, d, a, x[11], S14, 0x895cd7be); /* 12 */ FFA0 (a, b, c, d, x[12], S11, 0x6b901122); /* 13 */ FFA8 (d, a, b, c, x[13], S12, 0xfd987193); /* 14 */ FFAA (c, d, a, b, x[14], S13, 0xa679438e); /* 15 */ FFAA (b, c, d, a, x[15], S14, 0x49b40821); /* 16 */ /* Round 2 */ GGA0 (a, b, c, d, x[ 1], S21, 0xf61e2562); /* 17 */ GGA8 (d, a, b, c, x[ 6], S22, 0xc040b340); /* 18 */ GGA8 (c, d, a, b, x[11], S23, 0x265e5a51); /* 19 */ GGAA (b, c, d, a, x[ 0], S24, 0xe9b6c7aa); /* 20 */ GGA0 (a, b, c, d, x[ 5], S21, 0xd62f105d); /* 21 */ GGA8 (d, a, b, c, x[10], S22, 0x2441453); /* 22 */ GGA8 (c, d, a, b, x[15], S23, 0xd8a1e681); /* 23 */ GGAA (b, c, d, a, x[ 4], S24, 0xe7d3fbc8); /* 24 */ GGA0 (a, b, c, d, x[ 9], S21, 0x21e1cde6); /* 25 */ GGA8 (d, a, b, c, x[14], S22, 0xc33707d6); /* 26 */ GGA8 (c, d, a, b, x[ 3], S23, 0xf4d50d87); /* 27 */ GGAA (b, c, d, a, x[ 8], S24, 0x455a14ed); /* 28 */ GGA0 (a, b, c, d, x[13], S21, 0xa9e3e905); /* 29 */ GGA8 (d, a, b, c, x[ 2], S22, 0xfcefa3f8); /* 30 */ GGA8 (c, d, a, b, x[ 7], S23, 0x676f02d9); /* 31 */ GGAA (b, c, d, a, x[12], S24, 0x8d2a4c8a); /* 32 */ /* Round 3 */ HHA0 (a, b, c, d, x[ 5], S31, 0xfffa3942); /* 33 */ HHA8 (d, a, b, c, x[ 8], S32, 0x8771f681); /* 34 */ HHA8 (c, d, a, b, x[11], S33, 0x6d9d6122); /* 35 */ HHAA (b, c, d, a, x[14], S34, 0xfde5380c); /* 36 */ HHA0 (a, b, c, d, x[ 1], S31, 0xa4beea44); /* 37 */ HHA8 (d, a, b, c, x[ 4], S32, 0x4bdecfa9); /* 38 */ HHA8 (c, d, a, b, x[ 7], S33, 0xf6bb4b60); /* 39 */ HHAA (b, c, d, a, x[10], S34, 0xbebfbc70); /* 40 */ HHA0 (a, b, c, d, x[13], S31, 0x289b7ec6); /* 41 */ HHA8 (d, a, b, c, x[ 0], S32, 0xeaa127fa); /* 42 */ HHA8 (c, d, a, b, x[ 3], S33, 0xd4ef3085); /* 43 */ HHAA (b, c, d, a, x[ 6], S34, 0x4881d05); /* 44 */ HHA0 (a, b, c, d, x[ 9], S31, 0xd9d4d039); /* 45 */ HHA8 (d, a, b, c, x[12], S32, 0xe6db99e5); /* 46 */ HHA8 (c, d, a, b, x[15], S33, 0x1fa27cf8); /* 47 */ HHAA (b, c, d, a, x[ 2], S34, 0xc4ac5665); /* 48 */ /* Round 4 */ IIA0 (a, b, c, d, x[ 0], S41, 0xf4292244); /* 49 */ IIA8 (d, a, b, c, x[ 7], S42, 0x432aff97); /* 50 */ IIA8 (c, d, a, b, x[14], S43, 0xab9423a7); /* 51 */ IIAA (b, c, d, a, x[ 5], S44, 0xfc93a039); /* 52 */ IIA0 (a, b, c, d, x[12], S41, 0x655b59c3); /* 53 */ IIA8 (d, a, b, c, x[ 3], S42, 0x8f0ccc92); /* 54 */ IIA8 (c, d, a, b, x[10], S43, 0xffeff47d); /* 55 */ IIAA (b, c, d, a, x[ 1], S44, 0x85845dd1); /* 56 */ IIA0 (a, b, c, d, x[ 8], S41, 0x6fa87e4f); /* 57 */ IIA8 (d, a, b, c, x[15], S42, 0xfe2ce6e0); /* 58 */ IIA8 (c, d, a, b, x[ 6], S43, 0xa3014314); /* 59 */ IIAA (b, c, d, a, x[13], S44, 0x4e0811a1); /* 60 */ IIA0 (a, b, c, d, x[ 4], S41, 0xf7537e82); /* 61 */ IIA8 (d, a, b, c, x[11], S42, 0xbd3af235); /* 62 */ IIA8 (c, d, a, b, x[ 2], S43, 0x2ad7d2bb); /* 63 */ IIAA (b, c, d, a, x[ 9], S44, 0xeb86d391); /* 64 */ state[0] += a; state[1] += b; state[2] += c; state[3] += d; /* Zeroize sensitive information. * Inline for speed. */ asm { movea.l xp,a0 ; A0 = output move.l #15,a ; D2.L = n @mloop move.b 0,(a0)+ dbra a,@mloop } } /* Encodes input (UINT4) into output (unsigned char). Assumes len is a multiple of 4. */ static void Encode (output, input, len) unsigned char *output; UINT4 *input; unsigned int len; { unsigned int i, j; for (i = 0, j = 0; j < len; i++, j += 4) { output[j] = (unsigned char)(input[i] & 0xff); output[j+1] = (unsigned char)((input[i] >> 8) & 0xff); output[j+2] = (unsigned char)((input[i] >> 16) & 0xff); output[j+3] = (unsigned char)((input[i] >> 24) & 0xff); } } /* Adapted from THINK C 5.0 mem.c */ static void MD5_memcpy (output, input, len) POINTER output; POINTER input; unsigned long len; { asm { movea.l output,a0 ; A0 = output movea.l input,a1 ; A1 = input move.l len,d1 ; D1.L = n bra.s @2 @1 move.b (a1)+,(a0)+ subq.l #1,d1 @2 bne.s @1 } } /* Adapted from THINK C 5.0 mem.c */ static void MD5_memset (output, value, len) POINTER output; int value; unsigned long len; { asm { movea.l output,a0 ; A0 = output move.b value,d1 ; D1.B = (unsigned char) c move.l len,d2 ; D2.L = n bra.s @2 @1 move.b d1,(a0)+ subq.l #1,d2 @2 bne.s @1 } }