dnl -*- mode: m4; comment-start: "%" -*-
include(`macros.m4')divert(-1)
% $Id: rijndael-h.m4,v 1.2 1999/03/17 13:04:05 geoffk Exp $

define(v_K, 0x80)	The key, stored big-endian.
define(v_state, 0xA0)	The block to encrypt
define(v_W, 0xC0)	The current round keys.

The locations
define(`v_chosen_sbox',`0xe3,0xe5,0xe6,0xe9,0xea,0xec,0xf1,0xf2')
are used for the S-box lookups.  They all have Hamming weight 5.

define(v_round, 0xE0)	The round number.
define(v_xor_cnt, 0xE1)	Counter for the key XOR.
define(v_sub_cnt, 0xE1)	Counter for the ByteSub operation
define(v_sbox_r, 0xE2)	Result of S-box lookup
define(v_xor_tmp, 0xE3)	Temporary for XOR.
define(v_mixc_tmp, 0xE1)Temporary for MixColumn
define(test_ram, 0xF3)	Memory for the test program.

define(NUMROUNDS,10)

% XOR the value at $1+$3 with that at $2+$3, put result in $1+$3.
% Leaves $3 in X, expects $3 in X.
define(xorH,`dnl
	lda	$1,X
	and	#0x0F
	ldx	$2,X
	ora	t_lowtohigh,X
	tax
	lda	t_xor,X
	and	#0x0F
	sta	v_xor_tmp
	ldx	$3
	lda	$2,X
	and	#0xF0
	ldx	$1,X
	ora	t_hightolow,X
	tax
	lda	t_xor,X
	and	#0xF0
	ora	v_xor_tmp
	ldx	$3
	sta	$1,X
')dnl

% Perform a S-box lookup of the value at $1+$2,$1+$2+16
% and put result in A (high nibble) and in v_sbox_r (low nibble)
% Expects $2 in X.
define(sboxH,`dnl
	ldx	$2
	ldx	$1,X
	lda	$1+16,X
	and	#0xC0
	ora	t_to_07_2C,X
	sta	v_sbox_tmp
	tax
	ldx	t_5_to_43_C2,X
forloop(`i',0,7,`dnl
	lda	t_sbox_low_`'i,X
	sta	choosei(i,`v_chosen_sbox')
')dnl
	ldx	$2
	lda	$1+16,X
	and	#0x3F
	tax
	ldx	t_to_E3_F2,X
	lda	,X
	sta	v_sbox_r
	ldx	v_sbox_tmp
	ldx	t_5_to_3D_BC,X
forloop(`i',0,7,`dnl
	lda	t_sbox_hi_`'i,X
	sta	choosei(i,`v_chosen_sbox')
')dnl
	ldx	$2
	lda	$1+16,X
	and	#0x3F
	tax
	ldx	t_to_E3_F2,X
	lda	,X
')

% Data format: the usual format is that 4 bits are put into each byte,
% interleaved with their complements, so A B C D goes to 
% A ~A B ~B C ~C D ~D .  In this format, each byte has Hamming weight 4,
% the minimum value is 0x55, the maximum value is 0xAA.

% However, some other forms are used.  These are expressed as
% XX_YY, and for these each value is mapped to a value between XX and
% YY (inclusive) of the same Hamming weight as XX and YY, in sequence.
% So, for instance, in the form 07_2C, the value 3 is represented as 0E.
% (07-2C has Hamming weight 3).

% There is also a form which contains 5 bits of information,
% in which the low 4 bits are encoded in the low 6 bits of the byte,
% in form 07_2C, and the high bit is encoded in the usual format in the
% high two bits of the byte.
% The minimum value of this form is 0x47, the maximum is 0xAC.  This form
% has Hamming weight 4.

% Names of tables:
% t_lowtohigh:	A ~A B ~B C ~C D ~D -> C ~C D ~D 0  0 0  0
% t_hightolow:	A ~A B ~B C ~C D ~D -> 0  0 0  0 A ~A B ~B
% t_xor:	A ~A B ~B C ~C D ~D -> (A^C) ~(A^C) (B^D) ~(B^D)
%					 (A^C) ~(A^C) (B^D) ~(B^D)
% t_to_07_2C:	A ~A B ~B C ~C D ~D -> 07_2C[ABCD]
% t_to_E3_F2:	0 0 B ~B C ~C D ~D  -> E3_F2[BCD]
% t_5_to_43_C2:	A ~A 07_2C[BCDE]    -> 43_C2[ABCDE]
% t_5_to_3D_BC:	A ~A 07_2C[BCDE]    -> 3D_BC[ABCDE]
% t_xtime_high:	A ~A 07_2C[BCDE]    -> C ~C D ~D E ~E (A^B) ~(A^B)
% t_xtime_low:	A ~A 07_2C[BCDE]    -> (C^A) ~(C^A) D ~D 
%					(E^A) ~(E^A) A ~A
% t_sbox_low_*: 43_C2[ABCDE]	    -> low 4 bits of S-box entry A*BCDE
%		(there are 8 of these tables)
% t_sbox_hi_*:	3D_BC[ABCDE]	    -> high 4 bits of S-box entry A*BCDE
%		(there are 8 of these tables)

divert`'dnl

const_RC:
	bytes(0x36,0x1B,0x80,0x40,0x20,0x10,0x08,0x04,0x02,0x01)

% Perform a S-box lookup of v_sub_cnt.
% Expect v_sub_cnt in X.
do_sbox:
	sboxH(0,v_sub_cnt)
	rts

% The main Rijndael routine.
% Inputs:  plaintext in v_state, key in v_K
% Outputs: ciphertext in v_state.
rijndael_e:
	ldx	#0x20-1
e_key_copy_loop:
	lda	v_K,X
	sta	v_W,X
	decx
	bpl	e_key_copy_loop
	
	lda	#NUMROUNDS
	sta	v_round
e_round_loop:
% First, perform the key XOR
	ldx	#v_state
e_key_loop:
	stx	v_xor_cnt
	xorH(0,v_W-v_state,v_xor_cnt)
	incx
	cpx	#v_state+32
	bne	e_key_loop

% Perhaps that was the last round.
	dec	v_round
	bpl	e_more_round_loop
	
% We're done!
	rts

e_more_round_loop:
% Update the key schedule
forloop(`i',0,3,`dnl
	ldx	#v_W+eval(3*4)+eval((i+1)&3)
	stx	v_sub_cnt
	jsr	do_sbox
	ldx	#v_W+i
	xorH(0,v_W+i
	eor	v_W+i
	sta	v_W+i
')dnl

	ldx	v_round
	lda	v_W
	eor	const_RC_high,X
	sta	v_W
	lda	v_W+1
	eor	const_RC_low,X
	sta	v_W+1
	
	
	ldx	#v_W
e_keysched_loop:
	stx	v_xor_cnt
	xorH(4,0,v_xor_cnt)
	incx
	cpx	#v_W+12
	bne	e_keysched_noskip
	ldx	#v_W+16
e_keysched_noskip:
	cpx	#v_W+16+12
	bne	e_keysched_loop

% Do the ByteSub transformation
	ldx	#v_state
e_bytesub_loop:
	stx	v_sub_cnt
	jsr	do_sbox
	ldx	v_sub_cnt
	sta	,X
	lda	v_sbox_r
	sta	1,X

	incx
	cpx	#v_state+16
	bne	e_bytesub_loop

% Do the ShiftRow transformation
	rotbl4(0*4+v_state+1,1,`,X')
	rotbl4(0*4+v_state+2,2,`,X')
	rotbl4(0*4+v_state+3,3,`,X')
	rotbl4(0*4+v_state+1+16,1,`,X')
	rotbl4(0*4+v_state+2+16,2,`,X')
	rotbl4(0*4+v_state+3+16,3,`,X')

% Do the MixColumn transformation, except during the final round
	tst	v_round
	beq	e_no_mixcolumn
	
	ldx	#v_state
e_mixcolumn_loop:
	lda	,X
	eor	1,X
	eor	2,X
	eor	3,X
	sta	v_mixc_tmp

define(`xtime',`lsla
newlabel`'dnl
	bcs	Tlbl`'_0
	nop
Tlbl`'_0:
	bcc	Tlbl`'_1
	eor	#0x1B
Tlbl`'_1:')dnl
forloop(`i',0,2,`dnl
	lda	i,X
	eor	i+1,X
	xtime
	eor	v_mixc_tmp
	eor	i,X
	sta	i,X
')dnl

% Do the last word specially
% Here, the accumulator contains the appropriate byte of the previous word.
	eor	,X
	eor	1,X
	eor	v_mixc_tmp
	sta	3,X

	txa
	add	#4
	tax
	cpx	#v_state+16
	bne	e_mixcolumn_loop
	
e_no_mixcolumn:
	jmp	e_round_loop

% These are the rijndael S-boxes.
rijndael_sbox_e:
define(`pow_tab',quote(define(`p',1)`'dnl
forloop(`i',0,254,`p, dnl
define(`log_tab_'p,i)dnl
define(`p',eval(p ^ ((p << 1) & 0xFF) ifelse(eval(p&0x80),0,`',^ 0x1b)))dnl
')p))dnl
	byte 0x63
forloop(`i',1,255,`dnl
define(`p',choosei(eval(255-first(`log_tab_'i)), `pow_tab'))dnl
	byte 0x63^p`'dnl
forloop(`j',1,4,`^eval(((p >> (8-j)) | (p << j)) & 0xFF)')
')dnl

test_program(test_ram,v_K,16,v_state,v_state,16,`',jsr rijndael_e)

test_data:
xbytes(00000000000000000000000000000000
	00000000000000000000000000000000 66e94bd4ef8a2c3b884cfa59ca342b2e)
xbytes(00000000000000000000000000000000
	66e94bd4ef8a2c3b884cfa59ca342b2e f795bd4a52e29ed713d313fa20e98dbc)
xbytes(00000000000000000000000000000000
	f795bd4a52e29ed713d313fa20e98dbc a10cf66d0fddf3405370b4bf8df5bfb3)
xbytes(a10cf66d0fddf3405370b4bf8df5bfb3
	00000000000000000000000000000000 d6f6a9c7e08242fc7e0c6eacd7257837)
xbytes(a10cf66d0fddf3405370b4bf8df5bfb3
	d6f6a9c7e08242fc7e0c6eacd7257837 6e3187c0e66f5bf72554093c6f4a03f4)
test_data_end:
