AmendHub

Download

plus

/

MD5

/

md5asm.c

 

(View History)

mrw   Implement 68000 assembly speed ups in md5asm.c Latest amendment: 2 on 2024-10-22

1
2 /* MD5ASM.C - MD5 with inline assembly speed-ups
3 * derived from the RSA Data Security, Inc.
4 * MD5 Message-Digest Algorithm
5 *
6 * The speed ups are as follows:
7 *
8 * 1. ROTATE_LEFT has been replaced with a series of macros that
9 * interpolate ROL.L instructions that each do a maximum rotation
10 * of 8 bits;
11 *
12 * 2. MD5Transform replaces the only function calls, Decode and
13 * MD5_memset, with inline assembly implementations;
14 *
15 * 3. MD5_memcpy and MD5_memset copy THINK C 5.0's assembly
16 * implementations.
17 *
18 * Validate with mddriver.c
19 */
20
21 /* Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All
22 rights reserved.
23
24 License to copy and use this software is granted provided that it
25 is identified as the "RSA Data Security, Inc. MD5 Message-Digest
26 Algorithm" in all material mentioning or referencing this software
27 or this function.
28
29 License is also granted to make and use derivative works provided
30 that such works are identified as "derived from the RSA Data
31 Security, Inc. MD5 Message-Digest Algorithm" in all material
32 mentioning or referencing the derived work.
33
34 RSA Data Security, Inc. makes no representations concerning either
35 the merchantability of this software or the suitability of this
36 software for any particular purpose. It is provided "as is"
37 without express or implied warranty of any kind.
38
39 These notices must be retained in any copies of any part of this
40 documentation and/or software.
41
42 Assembly speed-ups ©2024 mrw <plus@m0001a.org>
43 */
44
45 #include "global.h"
46 #include "md5.h"
47
48 /* Constants for MD5Transform routine.
49 */
50 #define S11 7
51 #define S12 12
52 #define S13 17
53 #define S14 22
54 #define S21 5
55 #define S22 9
56 #define S23 14
57 #define S24 20
58 #define S31 4
59 #define S32 11
60 #define S33 16
61 #define S34 23
62 #define S41 6
63 #define S42 10
64 #define S43 15
65 #define S44 21
66
67 static void MD5Transform PROTO_LIST ((UINT4 [4], unsigned char [64]));
68 static void Encode PROTO_LIST
69 ((unsigned char *, UINT4 *, unsigned int));
70 static void MD5_memcpy PROTO_LIST ((POINTER, POINTER, unsigned long));
71 static void MD5_memset PROTO_LIST ((POINTER, int, unsigned long));
72
73 #ifdef _MD5_EXPLICIT_INIT
74 static unsigned char *PADDING;
75 #else
76 static unsigned char PADDING[64] = {
77 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
78 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
79 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
80 };
81 #endif
82
83 /* F, G, H and I are basic MD5 functions.
84 */
85 #define F(x, y, z) (((x) & (y)) | ((~x) & (z)))
86 #define G(x, y, z) (((x) & (z)) | ((y) & (~z)))
87 #define H(x, y, z) ((x) ^ (y) ^ (z))
88 #define I(x, y, z) ((y) ^ ((x) | (~z)))
89
90 /* ROTATE_LEFT rotates x left n bits.
91 * Displacement is a literal to reduce register pressure;
92 * that requires it be <= 8, thus the 3 variants.
93 */
94 #define LITERAL #
95 #define ROTATE_LEFT_A0(x, n) asm { rol.l LITERAL n,x }
96 #define ROTATE_LEFT_A8(x, n) asm { rol.l #8,x } \
97 asm { rol.l LITERAL n-8,x }
98 #define ROTATE_LEFT_AA(x, n) asm { rol.l #8,x } \
99 asm { rol.l #8,x} \
100 asm { rol.l LITERAL n-16,x }
101
102 /* FF, GG, HH, and II transformations for rounds 1, 2, 3, and 4.
103 Rotation is separate from addition to prevent recomputation.
104 A0,A8,AA variants correspond to rotates above.
105 */
106 #define FFA0(a, b, c, d, x, s, ac) { \
107 (a) += F ((b), (c), (d)) + (x) + (UINT4)(ac); \
108 ROTATE_LEFT_A0 (a, s); \
109 (a) += (b); \
110 }
111 #define FFA8(a, b, c, d, x, s, ac) { \
112 (a) += F ((b), (c), (d)) + (x) + (UINT4)(ac); \
113 ROTATE_LEFT_A8 (a, s); \
114 (a) += (b); \
115 }
116 #define FFAA(a, b, c, d, x, s, ac) { \
117 (a) += F ((b), (c), (d)) + (x) + (UINT4)(ac); \
118 ROTATE_LEFT_AA (a, s); \
119 (a) += (b); \
120 }
121
122 #define GGA0(a, b, c, d, x, s, ac) { \
123 (a) += G ((b), (c), (d)) + (x) + (UINT4)(ac); \
124 ROTATE_LEFT_A0 (a, s); \
125 (a) += (b); \
126 }
127 #define GGA8(a, b, c, d, x, s, ac) { \
128 (a) += G ((b), (c), (d)) + (x) + (UINT4)(ac); \
129 ROTATE_LEFT_A8 (a, s); \
130 (a) += (b); \
131 }
132 #define GGAA(a, b, c, d, x, s, ac) { \
133 (a) += G ((b), (c), (d)) + (x) + (UINT4)(ac); \
134 ROTATE_LEFT_AA (a, s); \
135 (a) += (b); \
136 }
137
138 #define HHA0(a, b, c, d, x, s, ac) { \
139 (a) += H ((b), (c), (d)) + (x) + (UINT4)(ac); \
140 ROTATE_LEFT_A0 (a, s); \
141 (a) += (b); \
142 }
143 #define HHA8(a, b, c, d, x, s, ac) { \
144 (a) += H ((b), (c), (d)) + (x) + (UINT4)(ac); \
145 ROTATE_LEFT_A8 (a, s); \
146 (a) += (b); \
147 }
148 #define HHAA(a, b, c, d, x, s, ac) { \
149 (a) += H ((b), (c), (d)) + (x) + (UINT4)(ac); \
150 ROTATE_LEFT_AA (a, s); \
151 (a) += (b); \
152 }
153
154 #define IIA0(a, b, c, d, x, s, ac) { \
155 (a) += I ((b), (c), (d)) + (x) + (UINT4)(ac); \
156 ROTATE_LEFT_A0 (a, s); \
157 (a) += (b); \
158 }
159 #define IIA8(a, b, c, d, x, s, ac) { \
160 (a) += I ((b), (c), (d)) + (x) + (UINT4)(ac); \
161 ROTATE_LEFT_A8 (a, s); \
162 (a) += (b); \
163 }
164 #define IIAA(a, b, c, d, x, s, ac) { \
165 (a) += I ((b), (c), (d)) + (x) + (UINT4)(ac); \
166 ROTATE_LEFT_AA (a, s); \
167 (a) += (b); \
168 }
169
170 /* MD5 initialization. Begins an MD5 operation, writing a new context.
171 */
172 void MD5Init (context)
173 MD5_CTX *context; /* context */
174 {
175 context->count[0] = context->count[1] = 0;
176 /* Load magic initialization constants.
177 */
178 context->state[0] = 0x67452301;
179 context->state[1] = 0xefcdab89;
180 context->state[2] = 0x98badcfe;
181 context->state[3] = 0x10325476;
182 }
183
184 /* MD5 block update operation. Continues an MD5 message-digest
185 operation, processing another message block, and updating the
186 context.
187 */
188 void MD5Update (context, input, inputLen)
189 MD5_CTX *context; /* context */
190 unsigned char *input; /* input block */
191 unsigned int inputLen; /* length of input block */
192 {
193 unsigned int i, index, partLen;
194
195 /* Compute number of bytes mod 64 */
196 index = (unsigned int)((context->count[0] >> 3) & 0x3F);
197
198 /* Update number of bits */
199 if ((context->count[0] += ((UINT4)inputLen << 3))
200 < ((UINT4)inputLen << 3))
201 context->count[1]++;
202 context->count[1] += ((UINT4)inputLen >> 29);
203
204 partLen = 64 - index;
205
206 /* Transform as many times as possible.
207 */
208 if (inputLen >= partLen) {
209 MD5_memcpy
210 ((POINTER)&context->buffer[index], (POINTER)input, partLen);
211 MD5Transform (context->state, context->buffer);
212
213 for (i = partLen; i + 63 < inputLen; i += 64)
214 MD5Transform (context->state, &input[i]);
215
216 index = 0;
217 }
218 else
219 i = 0;
220
221 /* Buffer remaining input */
222 MD5_memcpy
223 ((POINTER)&context->buffer[index], (POINTER)&input[i],
224 inputLen-i);
225 }
226
227 /* MD5 finalization. Ends an MD5 message-digest operation, writing the
228 the message digest and zeroizing the context.
229 */
230 void MD5Final (digest, context)
231 unsigned char digest[16]; /* message digest */
232 MD5_CTX *context; /* context */
233 {
234 unsigned char bits[8];
235 unsigned int index, padLen;
236
237 /* Save number of bits */
238 Encode (bits, context->count, 8);
239
240 /* Pad out to 56 mod 64.
241 */
242 index = (unsigned int)((context->count[0] >> 3) & 0x3f);
243 padLen = (index < 56) ? (56 - index) : (120 - index);
244 MD5Update (context, PADDING, padLen);
245
246 /* Append length (before padding) */
247 MD5Update (context, bits, 8);
248 /* Store state in digest */
249 Encode (digest, context->state, 16);
250
251 /* Zeroize sensitive information.
252 */
253 MD5_memset ((POINTER)context, 0, sizeof (*context));
254 }
255
256
257 /* MD5 basic transformation. Transforms state based on block.
258 */
259 static void MD5Transform (state, block)
260 UINT4 state[4];
261 unsigned char block[64];
262 {
263 register unsigned long a, b, c, d;
264 unsigned long x[16];
265
266 // BUG - the assembler wants to copy x[0] instead of x into a0
267 // alias to a pointer to force it to copy the address
268 unsigned long *xp = x;
269
270 /* Inline Decode - translate block to 16 little endian integers */
271 asm {
272 movea.l block,a0 ;input addr
273 movea.l xp,a1 ;output addr
274 move.w #3,d0 ;loop counter
275 ;dbeq stops branching on -1
276 @dloop:
277 movem.l (a0),a/b/c/d ;input -> 4 reg
278
279 ;reverse a
280 rol.w #8,a ;1234 -> 1243
281 swap a ;1243 -> 4312
282 rol.w #8,a ;4312 -> 4321
283
284 ;reverse b
285 rol.w #8,b ;1234 -> 1243
286 swap b ;1243 -> 4312
287 rol.w #8,b ;4312 -> 4321
288
289 ;reverse c
290 rol.w #8,c ;1234 -> 1243
291 swap c ;1243 -> 4312
292 rol.w #8,c ;4312 -> 4321
293
294 ;reverse d
295 rol.w #8,d ;1234 -> 1243
296 swap d ;1243 -> 4312
297 rol.w #8,d ;4312 -> 4321
298
299 movem.l a/b/c/d,(a1) ;4 reg -> output
300 addi.l #16,a0 ;next input addr
301 addi.l #16,a1 ;next output addr
302 dbra d0,@dloop ;loop if > -1
303 }
304
305 a = state[0], b = state[1], c = state[2], d = state[3];
306
307 /* Round 1 */
308 FFA0 (a, b, c, d, x[ 0], S11, 0xd76aa478); /* 1 */
309 FFA8 (d, a, b, c, x[ 1], S12, 0xe8c7b756); /* 2 */
310 FFAA (c, d, a, b, x[ 2], S13, 0x242070db); /* 3 */
311 FFAA (b, c, d, a, x[ 3], S14, 0xc1bdceee); /* 4 */
312 FFA0 (a, b, c, d, x[ 4], S11, 0xf57c0faf); /* 5 */
313 FFA8 (d, a, b, c, x[ 5], S12, 0x4787c62a); /* 6 */
314 FFAA (c, d, a, b, x[ 6], S13, 0xa8304613); /* 7 */
315 FFAA (b, c, d, a, x[ 7], S14, 0xfd469501); /* 8 */
316 FFA0 (a, b, c, d, x[ 8], S11, 0x698098d8); /* 9 */
317 FFA8 (d, a, b, c, x[ 9], S12, 0x8b44f7af); /* 10 */
318 FFAA (c, d, a, b, x[10], S13, 0xffff5bb1); /* 11 */
319 FFAA (b, c, d, a, x[11], S14, 0x895cd7be); /* 12 */
320 FFA0 (a, b, c, d, x[12], S11, 0x6b901122); /* 13 */
321 FFA8 (d, a, b, c, x[13], S12, 0xfd987193); /* 14 */
322 FFAA (c, d, a, b, x[14], S13, 0xa679438e); /* 15 */
323 FFAA (b, c, d, a, x[15], S14, 0x49b40821); /* 16 */
324
325 /* Round 2 */
326 GGA0 (a, b, c, d, x[ 1], S21, 0xf61e2562); /* 17 */
327 GGA8 (d, a, b, c, x[ 6], S22, 0xc040b340); /* 18 */
328 GGA8 (c, d, a, b, x[11], S23, 0x265e5a51); /* 19 */
329 GGAA (b, c, d, a, x[ 0], S24, 0xe9b6c7aa); /* 20 */
330 GGA0 (a, b, c, d, x[ 5], S21, 0xd62f105d); /* 21 */
331 GGA8 (d, a, b, c, x[10], S22, 0x2441453); /* 22 */
332 GGA8 (c, d, a, b, x[15], S23, 0xd8a1e681); /* 23 */
333 GGAA (b, c, d, a, x[ 4], S24, 0xe7d3fbc8); /* 24 */
334 GGA0 (a, b, c, d, x[ 9], S21, 0x21e1cde6); /* 25 */
335 GGA8 (d, a, b, c, x[14], S22, 0xc33707d6); /* 26 */
336 GGA8 (c, d, a, b, x[ 3], S23, 0xf4d50d87); /* 27 */
337 GGAA (b, c, d, a, x[ 8], S24, 0x455a14ed); /* 28 */
338 GGA0 (a, b, c, d, x[13], S21, 0xa9e3e905); /* 29 */
339 GGA8 (d, a, b, c, x[ 2], S22, 0xfcefa3f8); /* 30 */
340 GGA8 (c, d, a, b, x[ 7], S23, 0x676f02d9); /* 31 */
341 GGAA (b, c, d, a, x[12], S24, 0x8d2a4c8a); /* 32 */
342
343 /* Round 3 */
344 HHA0 (a, b, c, d, x[ 5], S31, 0xfffa3942); /* 33 */
345 HHA8 (d, a, b, c, x[ 8], S32, 0x8771f681); /* 34 */
346 HHA8 (c, d, a, b, x[11], S33, 0x6d9d6122); /* 35 */
347 HHAA (b, c, d, a, x[14], S34, 0xfde5380c); /* 36 */
348 HHA0 (a, b, c, d, x[ 1], S31, 0xa4beea44); /* 37 */
349 HHA8 (d, a, b, c, x[ 4], S32, 0x4bdecfa9); /* 38 */
350 HHA8 (c, d, a, b, x[ 7], S33, 0xf6bb4b60); /* 39 */
351 HHAA (b, c, d, a, x[10], S34, 0xbebfbc70); /* 40 */
352 HHA0 (a, b, c, d, x[13], S31, 0x289b7ec6); /* 41 */
353 HHA8 (d, a, b, c, x[ 0], S32, 0xeaa127fa); /* 42 */
354 HHA8 (c, d, a, b, x[ 3], S33, 0xd4ef3085); /* 43 */
355 HHAA (b, c, d, a, x[ 6], S34, 0x4881d05); /* 44 */
356 HHA0 (a, b, c, d, x[ 9], S31, 0xd9d4d039); /* 45 */
357 HHA8 (d, a, b, c, x[12], S32, 0xe6db99e5); /* 46 */
358 HHA8 (c, d, a, b, x[15], S33, 0x1fa27cf8); /* 47 */
359 HHAA (b, c, d, a, x[ 2], S34, 0xc4ac5665); /* 48 */
360
361 /* Round 4 */
362 IIA0 (a, b, c, d, x[ 0], S41, 0xf4292244); /* 49 */
363 IIA8 (d, a, b, c, x[ 7], S42, 0x432aff97); /* 50 */
364 IIA8 (c, d, a, b, x[14], S43, 0xab9423a7); /* 51 */
365 IIAA (b, c, d, a, x[ 5], S44, 0xfc93a039); /* 52 */
366 IIA0 (a, b, c, d, x[12], S41, 0x655b59c3); /* 53 */
367 IIA8 (d, a, b, c, x[ 3], S42, 0x8f0ccc92); /* 54 */
368 IIA8 (c, d, a, b, x[10], S43, 0xffeff47d); /* 55 */
369 IIAA (b, c, d, a, x[ 1], S44, 0x85845dd1); /* 56 */
370 IIA0 (a, b, c, d, x[ 8], S41, 0x6fa87e4f); /* 57 */
371 IIA8 (d, a, b, c, x[15], S42, 0xfe2ce6e0); /* 58 */
372 IIA8 (c, d, a, b, x[ 6], S43, 0xa3014314); /* 59 */
373 IIAA (b, c, d, a, x[13], S44, 0x4e0811a1); /* 60 */
374 IIA0 (a, b, c, d, x[ 4], S41, 0xf7537e82); /* 61 */
375 IIA8 (d, a, b, c, x[11], S42, 0xbd3af235); /* 62 */
376 IIA8 (c, d, a, b, x[ 2], S43, 0x2ad7d2bb); /* 63 */
377 IIAA (b, c, d, a, x[ 9], S44, 0xeb86d391); /* 64 */
378
379 state[0] += a;
380 state[1] += b;
381 state[2] += c;
382 state[3] += d;
383
384 /* Zeroize sensitive information.
385 * Inline for speed.
386 */
387 asm {
388 movea.l xp,a0 ; A0 = output
389 move.l #15,a ; D2.L = n
390 @mloop move.b 0,(a0)+
391 dbra a,@mloop
392 }
393 }
394
395 /* Encodes input (UINT4) into output (unsigned char). Assumes len is
396 a multiple of 4.
397 */
398 static void Encode (output, input, len)
399 unsigned char *output;
400 UINT4 *input;
401 unsigned int len;
402 {
403 unsigned int i, j;
404
405 for (i = 0, j = 0; j < len; i++, j += 4) {
406 output[j] = (unsigned char)(input[i] & 0xff);
407 output[j+1] = (unsigned char)((input[i] >> 8) & 0xff);
408 output[j+2] = (unsigned char)((input[i] >> 16) & 0xff);
409 output[j+3] = (unsigned char)((input[i] >> 24) & 0xff);
410 }
411 }
412
413 /* Adapted from THINK C 5.0 mem.c
414 */
415 static void MD5_memcpy (output, input, len)
416 POINTER output;
417 POINTER input;
418 unsigned long len;
419 {
420 asm {
421 movea.l output,a0 ; A0 = output
422 movea.l input,a1 ; A1 = input
423 move.l len,d1 ; D1.L = n
424 bra.s @2
425 @1 move.b (a1)+,(a0)+
426 subq.l #1,d1
427 @2 bne.s @1
428 }
429 }
430
431 /* Adapted from THINK C 5.0 mem.c
432 */
433 static void MD5_memset (output, value, len)
434 POINTER output;
435 int value;
436 unsigned long len;
437 {
438 asm {
439 movea.l output,a0 ; A0 = output
440 move.b value,d1 ; D1.B = (unsigned char) c
441 move.l len,d2 ; D2.L = n
442 bra.s @2
443 @1 move.b d1,(a0)+
444 subq.l #1,d2
445 @2 bne.s @1
446 }
447 }