dnl -*- mode: m4; comment-start: "%" -*-
include(`macros.m4')divert(-1)
% $Id: sha.m4,v 1.9 1999/02/14 02:19:16 geoffk Exp $

dnl  v_W must start on a 64-byte boundary.  wcode must follow v_H.
dnl  v_A through v_E must be contiguous.
define(v_W,0x80)		The W array (64 bytes)
define(v_H,0xc0)		The hash
define(wcode,0xd4)		Some self-modifying code to help out
define(v_iteration,0xdd)	The iteration number
define(v_A,0xde)		The hash of this block
define(v_B,0xe2)
define(v_C,0xe6)
define(v_D,0xea)
define(v_E,0xee)
define(v_u,0xf2)		A temporary value (4 bytes)

divert`'dnl
K_table:
	const4(0x5a827999)
	const4(0x6ed9eba1)
	const4(0x8f1bbcdc)
	const4(0xca62c1d6)
	const4(0xca62c1d6)

sha_transform:
	ldx	#19
sha_hcopy_loop:
	lda	v_H,X
	sta	v_A,X
	decx
	bpl	sha_hcopy_loop	

	lda	#0
sha_transform_loop:
	sta	v_iteration
	cmp	#16*2
	bcs	sha_no_polynomial

% Handle the polynomial in the bits of W
	asla
	and	#0x3C
	cmp	#8*4
	bmi	poly_fix_1
	bne	poly_fix_2
	bclr6	wcode+3
	bra	poly_fix_done
poly_fix_1:
	cmp	#3*4
	bhi	poly_fix_done
	bne	poly_fix_1a
	bclr6	wcode+1
	bra	poly_fix_done
poly_fix_1a:
	cmp	#0
	bne	poly_fix_done
	bset6	wcode+5
	bset6	wcode+3
	bset6	wcode+1
	bra	poly_fix_done
poly_fix_2:
	cmp	#14*4
	bne	poly_fix_done
	bclr6	wcode+5
poly_fix_done:

	add	#v_W-0x40+4
	tax
%  The carry is clear here (from the add).
sha_poly_loop:
	decx
	jmp	wcode
sha_wcode_ret:
	eor	0x40,X
	rola
	sta	0x40,X
	txa
	bit	#3
	bne	sha_poly_loop
	lda	0x40+3,X
	adc	#0
	sta	0x40+3,X
sha_no_polynomial:
	
%  copy A into v_u, shifting it left by 7 and then complete the 
%  rotate of v_u left by 5, by rotating right by 2
	lda	v_A+3
	sta	v_u+2
	lsra
	lda	v_A
	rora
	sta	v_u+3
	lda	v_A+1
	rora
	sta	v_u
	lda	v_A+2
	rora
	sta	v_u+1
	ror	v_u+2

%  note that the accumulator now contains v_u+1
	lsra
	ror	v_u+2
	ror	v_u+3
	ror	v_u
	ror	v_u+1
	lsra
	ror	v_u+2
	ror	v_u+3
	ror	v_u
	ror	v_u+1

%  add E to it
	add4(v_u,v_E,v_u)
%  and add W[s]
	lda	v_iteration
	asla
	and	#0x3C
	tax
	add4(v_u,v_u,`v_W,X')

%  and add K_t
	ldx	v_iteration
%  26 is close enough to 4*256/(20*2)  (except that it gets 79 wrong)
	lda	#26
	mul
	txa
	and	#0x1C
	tax
	add4(v_u,v_u,`K_table,X')

%  now add f_t(B,C,D)
%  f_0 is equivalent to ((C xor D) & B) xor D
%  f_2 is equivalent to (B & (C | D)) | (C & D)
	lsrx
	lsrx
%  The carry will be clear here, from the lsrx.
	ldx	f_offsets,X
	jmp	f_offsets,X
f_offsets:
	byte	f_0-f_offsets
	byte	f_1-f_offsets
	byte	f_2-f_offsets
	byte	f_1-f_offsets
	byte	f_1-f_offsets

f_0:	ldx	#3
f_0_loop:
	lda	v_C,X
	eor	v_D,X
	and	v_B,X
	eor	v_D,X
	adc	v_u,X
	sta	v_u,X
	decx
	bpl	f_0_loop

	bra	donef
	stop
`%'  We can here use v_E for temporary.
f_2:	ldx	#3
f_2_loop:
	lda	v_C,X
	ora	v_D,X
	and	v_B,X
	sta	v_E
	lda	v_C,X
	and	v_D,X
	ora	v_E
	adc	v_u,X
	sta	v_u,X
	decx
	bpl	f_2_loop
	bra	donef
f_1:	ldx	#3
f_1_loop:
	lda	v_C,X
	eor	v_D,X
	eor	v_B,X
	adc	v_u,X
	sta	v_u,X
	decx
	bpl	f_1_loop
donef:
	
%  Rotate B right 2 bits
	rotr4(v_B,2)

%  Rotate the words around by one word
	ldx	#3
sha_word_rotate_loop:
	lda	v_D,X
	sta	v_E,X
	lda	v_C,X
	sta	v_D,X
	lda	v_B,X
	sta	v_C,X
	lda	v_A,X
	sta	v_B,X
	lda	v_u,X
	sta	v_A,X
	decx
	bpl	sha_word_rotate_loop

%  do the next round...
	lda	v_iteration
	add	#2
	cmp	#2*80
	beq	sha_transform_loop_done
	jmp	sha_transform_loop
sha_transform_loop_done:

%  now, add A-E back in to H.
%  The carry is clear here, from the cmp.
	ldx	#5*4-1
sha_add_h_loop:
	lda	v_A,X
	adc	v_H,X
	sta	v_H,X
	txa
	decx
	bit	#3
	bne	sha_add_h_loop
	cmp	#0
	bne	sha_add_h_loop
	rts

initialH:
	const4(0x67452301)
	const4(0xefcdab89)
	const4(0x98badcfe)
	const4(0x10325476)
	const4(0xc3d2e1f0)
dnl  This is exactly 9 bytes long.
	lda	13*4,X
	eor	8*4,X
	eor	2*4,X
	jmp	sha_wcode_ret
sha_first:
	ldx	#29-1
sha_first_loop:
	lda	initialH,X
	sta	v_H,X
	decx
	bpl	sha_first_loop
	rts

testprogram:
	ldx	#63
testloop0:
	clr	v_W,X
	decx
	bpl	testloop0
	lda	#0b01100001
	sta	v_W
	lda	#0b01100010
	sta	v_W+1
	lda	#0b01100011
	sta	v_W+2
	bset7	v_W+3
	lda	#0x18
	sta	v_W+63

	dbga
	jsr	sha_first
	dbga
	jsr	sha_transform
	dbga
	
	ldx	#19
testloop1:
	lda	testdata,X
	cmp	v_H,X
	bne	testfailed
	decx
	bpl	testloop1
	stop
testfailed:
	dbga
	dbg	v_H,X
	stop

testdata: xbytes(a9993e364706816aba3e25717850c26c9cd0d89d)

	org	m_RESETv
	word	testprogram
