dnl -*- mode: m4; comment-start: "%" -*-
include(`macros.m4')divert(-1)
% $Id: e2.m4,v 1.6 1999/03/17 13:04:05 geoffk Exp $

define(v_K, 0x80)		The key.
define(v_kdata, 0x90)		The key schedule, 64 bytes.
define(SCHEDSIZE,2)		Number of rounds scheduled at once
				(should be power of 2)
define(v_U, 0xC8)
define(v_X0, 0xD0)		
define(v_X1, 0xD8)
define(v_M, 0xE0)		The block to encrypt, MSB-first
define(v_ftmp, v_kdata)		Temporary used during the f function
				(note that it overlaps v_kdata)
define(v_round, 0xF6)		The current round number.
define(v_bytenum,0xF5)		Counter for various loops in the F function
define(v_exchtmp,0xF4)		Byte used during swaps

define(test_ram, 0xF7)		Some RAM for the test program.

define(NUMROUNDS,12)

changecom(`')

The E2 sbox:
define(`pow_tab',quote(define(`p',1)`'
forloop(`i',0,254,`p, 
  define(`log_tab_'p,i)
  define(`p',eval(p ^ ((p << 1) & 0xFF) ifelse(eval(p&0x80),0,`',^ 0x1b)))
')p))
define(`e2_sbox_0',225)
forloop(`i',1,255,`
  define(`p',choosei(eval((127*first(`log_tab_'i))%255), `pow_tab'))
  define(`e2_sbox_'i,eval(((p*97)+225)&0xFF))
')

M4 versions of the S, P, and f functions.
define(`S_func_on',`foreach_c(`t',`$1',`e2_sbox_`'eval(t)')')
define(`P_func_on',`dnl
  pushdef(`P_f_tmp',eval(patsubst(`$1',`,',`^')0))dnl
  foreach_c(`t',``0,7',`1,4',`2,5',`3,6',`2,6,7',`3,7,4',`0,4,5',`1,5,6'',
    eval(P_f_tmp foreach(`u',`t',^ choosei(u,`$1'))))dnl
  popdef(`P_f_tmp')')
define(`f_func_on',`P_func_on(S_func_on(`$1'))')

The value V_{-1}:
define(`const_vm1',`0x01,0x23,0x45,0x67,0x89,0xab,0xcd,0xef')

The constant values set on K3 and K4 for short keys.
define(`const_k3',S_func_on(S_func_on(S_func_on(`const_vm1'))))
define(`const_k4',S_func_on(`const_k3'))

changecom(`%')

% Now, a definition of the real f function.

% The first argument must be a suitable argument for `pushdef',
% which loads the $1+1th byte of the input into the X register
% (for instance, `ldx myvariable+$i').  If i >= 4, it must not
% change the A register, or at least must load it back from $2.

% The second argument must store the A register back out to the $1+1th
% byte of the output.  If 4 <= i <= 6, it must not change the
% A register, and can use $2 as a temporary to help.

% The third argument must be an 8-byte temporary.
define(`f_function',`dnl
pushdef(`f_tmp_input',`$1')dnl
pushdef(`f_tmp_output',`$2')dnl
% do the S function and the first 8 xors of the P function
foreach(`i',`3,2,1,0',`dnl
	f_tmp_input(i,`')
	lda	e2_sbox,X
	sta	i+$3
	f_tmp_input(i+4,i+$3)
	eor	e2_sbox,X
	sta	$3+4+i
ifelse(eval(i <= 1),1,`dnl
	eor	$3+2+i
	sta	$3+2+i
	lda	$3+i
	eor	$3+6+i
	sta	$3+i
')')dnl
% now, do the next 8 xors of the P function
foreach(`i',`3,2,1,0',`dnl
	lda	$3+4+i
	eor	$3+eval((i+3)&3)
	f_tmp_output(i+4,$3+i+4)
ifelse(i,3,`',`dnl
	eor	$3+i
	f_tmp_output(i)
')')dnl
	lda	$3+3
	eor	$3+7
	f_tmp_output(3)
')

divert`'dnl
% The E2 sbox.
e2_sbox:
forloop(`i',0,255,`dnl
	byte	first(e2_sbox_`'i)
')dnl

% Table containing values for the BRL function
const_brl:
forloop(`i',0,7,`dnl
	byte	eval((i+7)&7)
')dnl

% E2 key-schedule-as-you-go
% Inputs: The key in v_K, the round number in v_round
% Output: The scheduled key in v_kdata.

% Multiply v_kdata+0 by v_kdata+16, treating v_M as a 4-element vector of 
% 32-bit big-endian words.  Put result in v_M.
key_mul:
	mul4(v_M,v_kdata,v_kdata+16)
	rts

% E2 encryption
% Input: The key in v_K, the block to encrypt in v_M
% Output: The encrypted block in v_M
e2:
	clr	v_round
	jsr	e2_sch
	
	ldx	#15
initial_key_loop:
	lda	v_M,X
	eor	v_kdata,X
	sta	v_kdata,X
	decx
	bpl	initial_key_loop

forloop(`i',0,3,`dnl
	bset0	i*4+3+v_kdata+16
')dnl
	jsr	key_mul

	bset4	v_round
round_loop:

% Compute the first key XOR of the F function.
	lda	#7
	sta	v_bytenum
first_half_loop:
	lda	v_round
	and	#eval((SCHEDSIZE-1)*16)
	add	v_bytenum
	tax
	lda	v_kdata,X
	ldx	v_bytenum
	eor	v_K+8,X
	sta	v_ftmp,X
	dec	v_bytenum
	bpl	first_half_loop

% Compute the f function.
	f_function(`ldx v_ftmp+$1',`sta v_ftmp+$1',v_ftmp)

% Compute the last key XOR of the F function, and
% XOR the result with the LHS of the message.
% Also swap the RHS and LHS of the message.
	lda	#7
	sta	v_bytenum
second_half_loop:
	lda	v_round
	and	#eval((SCHEDSIZE-1)*16)
	add	v_bytenum
	tax
	lda	v_kdata+8,X
	ldx	v_bytenum
	eor	v_ftmp,X
	tax
	lda	e2_sbox,X
	ldx	v_bytenum
	ldx	const_brl,X
	eor	v_K,X
	sta	v_exchtmp
	lda	v_K+8,X
	sta	v_K,X
	lda	v_exchtmp
	sta	v_K+8,X
	dec	v_bytenum
	bpl	second_half_loop

	lda	v_round
	add	#16
	sta	v_round
	bit	#eval((SCHEDSIZE-1)*16)
	bne	no_new_schedule
	jsr	e2_sch
next_round:
	jmp	round_loop
no_new_schedule:
	cmp	#NUMROUNDS*16+32
	bne	next_round

% compute (v_kdata+32)^-1 componentwise as a vector of 4 32-bit msb-first
% words, put result in v_kdata+16

	ldx	#7
final_copy_loop:
	lda	v_M+8,X
	sta	v_kdata,X
	lda	v_M,X
	sta	v_kdata+8,X
	decx
	bpl	final_copy_loop
	jsr	key_mul

	ldx	#15
final_key_loop:
	lda	v_M,X
	eor	v_kdata+eval(SCHEDSIZE*16-16),X
	sta	v_M,X
	decx
	bpl	final_key_loop
	
	rts
