dnl -*- mode: m4; comment-start: "%" -*-
include(`macros.m4')divert(-1)
% $Id: crypton.m4,v 1.6 1999/02/14 02:19:14 geoffk Exp $

define(v_E, 0xA0)	The `expanded round keys'.
define(v_K, 0xC0)	The current round keys.
define(v_U, 0xC0)	The key, pre-zero-padded.
define(v_A, 0xE0)	The block to encrypt, LSB-first
define(v_round, 0xF0)	The round number.
define(v_RC, 0xF1)	The current RC_i
define(v_pi_tmp2,0xF2)	Temporary for the pi permutation, 1 byte
define(v_pi_sel, 0xF3)	Do pi_o if this word is odd.
define(v_pi_tmp, 0xF4)	Temporary for the pi permutation, 4 bytes
define(v_ks_tmp, 0xF2)	Temporary for the key schedule, 1 byte
define(v_ks_tmp2, 0xF3)	Temporary for the key schedule, 1 byte
define(v_gma_sel, 0xF2)	The current byte being operated on by the gamma 
			permutation
define(v_rotvar, 0xF2)	Temporary for rotate.
define(v_sch_T0, 0xF0)	T_0 in the scheduler.
define(v_sch_T1, 0xF1)	T_1 in the scheduler.
define(test_ram, 0x90)	Memory for the test program.

define(NUMROUNDS,12)

divert`'dnl

const_M:
	bytes(0xfc, 0xf3, 0xcf, 0x3f, 0xfc, 0xf3, 0xcf, 0x3f)

% This is the bit permutation, pi.  There are 'even' and 'odd' versions.
crypton_pi_o:
	bset0	v_pi_sel
	bra	crypton_pi

crypton_pi_e:
	bclr0	v_pi_sel
	% and fall through.

% The generalised routine will do pi_e if bit 0 of v_pi_sel is clear,
% pi_o otherwise.
crypton_pi:
% the outer loop operates on the byte within the word.
	ldx	#3
pi_loop:

	brclr0	v_pi_sel,pi_is_even
forloop(`i',0,3,`dnl
	lda	v_A+eval(4*i),X
	sta	v_pi_tmp+i
')dnl
	bra	pi_done_evenodd
pi_is_even:
forloop(`i',0,3,`dnl
	lda	v_A+eval(4*((i+3)&3)),X
	sta	v_pi_tmp+i
')dnl
pi_done_evenodd:

forloop(`i',0,3,`dnl
	lda	v_pi_tmp
	and	const_M+i,X
forloop(`j',1,3,`dnl
	sta	v_pi_tmp2
	lda	v_pi_tmp+j
	and	const_M+eval((i+j)&3),X
	eor	v_pi_tmp2
')dnl
	sta	v_A+eval(4*i),X
')dnl
	decx
	bmi	pi_done
	jmp	pi_loop
pi_done:
	rts

% This is the `byte transposition', tau.
crypton_tau:
define(do_tau_swap,`ldx	$1
	lda	$2
	sta	$1
	stx	$2')dnl
	do_tau_swap(v_A+1,v_A+4)
	do_tau_swap(v_A+2,v_A+8)
	do_tau_swap(v_A+3,v_A+12)
	do_tau_swap(v_A+6,v_A+9)
	do_tau_swap(v_A+7,v_A+13)
	do_tau_swap(v_A+11,v_A+14)
	rts

% Rotate the variable at A left by (X%3+1) bytes, for X < 12.
rotate:
	ldx	rottab,X
	jmp	rottab,X

rottab:	bytes(rotate_r8-rottab, rotate_16-rottab, rotate_l8-rottab)
	bytes(rotate_r8-rottab, rotate_16-rottab, rotate_l8-rottab)
	bytes(rotate_r8-rottab, rotate_16-rottab, rotate_l8-rottab)
	bytes(rotate_r8-rottab, rotate_16-rottab, rotate_l8-rottab)
	bytes(rotate_r8-rottab)

rotate_l8:
	tax
	rotbl4(`0,X',1,v_rotvar)
	rts

rotate_r8:
	tax
	rotbl4(`0,X',3,v_rotvar)
	rts

rotate_16:
	tax
	rotbl4(`0,X',2,v_rotvar)
	rts

% This is the s-box lookup, gamma, applied to 4 bytes from X.
% It comes in two flavours, gamma_0start and gamma_1start.
gamma_0start:
	stx	v_gma_sel
	ldx	,X
	lda	crypton_S0,X
	ldx	v_gma_sel
	sta	,X
	ldx	1,X
	lda	crypton_S1,X
	ldx	v_gma_sel
	sta	1,X
	ldx	2,X
	lda	crypton_S0,X
	ldx	v_gma_sel
	sta	2,X
	ldx	3,X
	lda	crypton_S1,X
	ldx	v_gma_sel
	sta	3,X
	rts
gamma_1start:
	stx	v_gma_sel
	ldx	,X
	lda	crypton_S1,X
	ldx	v_gma_sel
	sta	,X
	ldx	1,X
	lda	crypton_S0,X
	ldx	v_gma_sel
	sta	1,X
	ldx	2,X
	lda	crypton_S1,X
	ldx	v_gma_sel
	sta	2,X
	ldx	3,X
	lda	crypton_S0,X
	ldx	v_gma_sel
	sta	3,X
	rts

% The main crypton routine.
% Inputs:  plaintext in v_A, scheduled key in v_E
% Outputs: ciphertext in v_A.
crypton:
	ldx	#0x20-1
key_copy_loop:
	lda	v_E,X
	sta	v_K,X
	decx
	bpl	key_copy_loop
	
	lda	#0x01
	sta	v_RC
	clr	v_round
round_loop:
% We are now doing the next round.
	inc	v_round
% First, perform the key XOR and exchange the current and next keys.
	ldx	#0x10-1
key_loop:
	lda	v_K,X
	sta	v_ks_tmp
	eor	v_A,X
	sta	v_A,X
	lda	v_K+0x10,X
	sta	v_K,X
	lda	v_ks_tmp
	sta	v_K+0x10,X
	decx
	bpl	key_loop

% Now update the just-used key, in v_K+0x10, to be the key after next.
	lda	v_round
	cmp	#NUMROUNDS
	beq	round_loop_no_sched

% Perform the constant additions (actually eors).
	lsla
	and	#4
	sta	v_ks_tmp2
	eor	#4
	tax
rc_loop:
	lda	v_K+0x10,X
	eor	v_RC
	sta	v_K+0x10,X
	lda	v_K+0x18,X
	eor	v_RC
	sta	v_K+0x18,X
	incx
	txa
	bit	#3
	bne	rc_loop

% Perform the rotations
	lda	v_ks_tmp2
	add	#v_K+0x10
	ldx	v_round
	decx
	jsr	rotate
	lda	v_ks_tmp2
	add	#v_K+0x18
	ldx	v_round
	jsr	rotate

% Update RC
	brset0	v_round,no_RC_shift
	lsl	v_RC
no_RC_shift:
round_loop_no_sched:

% Do the S-boxes, this is the `gamma' operator.
	ldx	#v_A
	brset0	v_round,gamma_o
	jsr	gamma_1start
	ldx	#v_A+4
	jsr	gamma_0start
	ldx	#v_A+8
	jsr	gamma_1start
	ldx	#v_A+12
	jsr	gamma_0start
	bra	gamma_done
gamma_o:
	jsr	gamma_0start
	ldx	#v_A+4
	jsr	gamma_1start
	ldx	#v_A+8
	jsr	gamma_0start
	ldx	#v_A+12
	jsr	gamma_1start
gamma_done:

% Do the permutations
	lda	v_round
	cmp	#NUMROUNDS
	beq	no_pi_last_round
	sta	v_pi_sel
	jsr	crypton_pi
	jsr	crypton_tau
	jmp	round_loop

no_pi_last_round:
% We here need to perform a tau, then a key XOR with the key permuted by
% phi_e.  We can combine these to save a tau.

% First, copy the state out, and copy the key in.
	ldx	#0x10-1
last_copy_loop:
	lda	v_A,X
	sta	v_K+0x10,X
	lda	v_K,X
	sta	v_A,X
	decx
	bpl	last_copy_loop
% Perform phi_e * tau^-1 on the key
	jsr	crypton_tau
	jsr	crypton_pi_e

% Perform the key XOR while copying the key back
	ldx	#0x10-1
last_key_loop:
	lda	v_K+0x10,X
	eor	v_A,X
	sta	v_A,X
	decx
	bpl	last_key_loop

% Perform the last tau and return.
	jmp	crypton_tau

% These are the crypton S-boxes.
define(s_p0,`15,9,6,8,9,9,4,12,6,2,6,10,1,3,5,15')dnl
define(s_p1,`10,15,4,7,5,2,14,6,9,3,12,8,13,1,11,0')dnl
define(s_p2,`0,4,8,4,2,15,8,13,1,1,15,7,2,11,14,15')dnl
dnl
crypton_S0:
forloop(`i',0,15,`dnl
forloop(`j',0,15,`dnl
define(`s_p0xr',choosei(j,`s_p0'))dnl
define(`s_yr',`eval(j ^ choosei(eval(i^s_p0xr), `s_p1'))')dnl
define(`s_yl',`eval(i ^ s_p0xr ^ choosei(s_yr, `s_p2'))')dnl
	byte 0x`'eval(s_yl << 4 | s_yr, 16, 2)
')')dnl

crypton_S1:
forloop(`i',0,15,`dnl
forloop(`j',0,15,`dnl
define(`s_p0xr',choosei(j,`s_p2'))dnl
define(`s_yr',eval(j ^ choosei(eval(i^s_p0xr), `s_p1')))dnl
define(`s_yl',eval(i ^ s_p0xr ^ choosei(s_yr, `s_p0')))dnl
	byte 0x`'eval(s_yl << 4 | s_yr, 16, 2)
')')dnl

% The crypton key schedule, 128-bit version
% Inputs: key in v_U, 128 bits
% Outputs: scheduled key in v_E
crypton_sch_128:
	lda	#0
	ldx	#7
sch_128_loop:
	sta	v_U+16,X
	sta	v_U+24,X
	decx
	bpl	sch_128_loop
% and fall through...

% The crypton key schedule for encryption
% Inputs:  key in v_U, 256 bits.
% Outputs: scheduled key in v_E
crypton_schedule:
	ldx	#3
sch_copy0:
forloop(`i',0,3,`dnl
	lda	v_U+eval(i*2*4),X
	sta	v_A+eval(i*4),X
')dnl
	decx
	bpl	sch_copy0
% do pi_o, sigma_P, gamma_o, tau, in that order.
	jsr	crypton_pi_o
	ldx	#15
sch_sigmaP_loop:
	lda	v_A,X
	eor	const_PQ,X
	sta	v_A,X
	decx
	bpl	sch_sigmaP_loop

	ldx	#v_A+0
	jsr	gamma_0start
	ldx	#v_A+4
	jsr	gamma_1start
	ldx	#v_A+8
	jsr	gamma_0start
	ldx	#v_A+12
	jsr	gamma_1start
	jsr	crypton_tau

	ldx	#15
sch_copy0out:
	lda	v_A,X
	sta	v_E,X
	decx
	bpl	sch_copy0out
	
	ldx	#3
sch_copy1:
forloop(`i',0,3,`dnl
	lda	v_U+eval(4+i*2*4),X
	sta	v_A+eval(i*4),X
')dnl
	decx
	bpl	sch_copy1

	jsr	crypton_pi_e
	ldx	#15
sch_sigmaQ_loop:
	lda	v_A,X
	eor	const_PQ+0x10,X
	sta	v_A,X
	decx
	bpl	sch_sigmaQ_loop
	ldx	#v_A+0
	jsr	gamma_1start
	ldx	#v_A+4
	jsr	gamma_0start
	ldx	#v_A+8
	jsr	gamma_1start
	ldx	#v_A+12
	jsr	gamma_0start
	jsr	crypton_tau
	
	ldx	#3
sch_T_loop:
	lda	v_E,X
forloop(`i',1,3,`dnl
	eor	v_E+eval(i*4),X
')dnl
	sta	v_sch_T0
	lda	v_A,X
forloop(`i',1,3,`dnl
	eor	v_A+eval(i*4),X
')dnl
	sta	v_sch_T1
forloop(`i',0,3,`dnl
	lda	v_E+eval(i*4),X
	eor	v_sch_T1
	sta	v_E+eval(i*4),X
')dnl
forloop(`i',0,3,`dnl
	lda	v_A+eval(i*4),X
	eor	v_sch_T0
	sta	v_E+eval(4*4+i*4),X
')dnl
	decx
	bpl	sch_T_loop
	rts

const_PQ:
	const4r(0xbb67ae85)
	const4r(0x3c6ef372)
	const4r(0xa54ff53a)
	const4r(0x510e527f)
	const4r(0x9b05688c)
	const4r(0x1f83d9ab)
	const4r(0x5be0cd19)
	const4r(0xcbbb9d5d)

test_program(test_ram,v_U,16,v_A,v_A,16,jsr crypton_sch_128,jsr crypton)

% Note that these have to be written little-endian in memory words.
test_data:
xbytes(00000000000000000000000000000000
	00000000000000000000000000000000 ec62e539bb6bbc811a60c06faccb7ec8)
xbytes(00000000000000000000000000000000
	ec62e539bb6bbc811a60c06faccb7ec8 58ec6c6143775f808c96931a31f8ecf0)
xbytes(00000000000000000000000000000000
	58ec6c6143775f808c96931a31f8ecf0 3654b05642f19ef30a2b126f80e61771)
xbytes(00000000000000000000000000000000
	3654b05642f19ef30a2b126f80e61771 ab78d0f2a100ec772d0904b01870bb48)
xbytes(3654b05642f19ef30a2b126f80e61771
	00000000000000000000000000000000 fbe1510a4a33f227800d071ec8acb4ee)
xbytes(3654b05642f19ef30a2b126f80e61771
	fbe1510a4a33f227800d071ec8acb4ee 10502a72d9bce016187d8e26aae8830d)
test_data_end:
