;************************************************************************************************/
;* Noekeon_ARM7_DPA.s
;*
;* Last Modified: 00/08/30             Created: 00/08/30
;*
;* Project    : Nessie Proposal: NOEKEON
;*
;* Authors    : Joan Daemen, Michael Peeters, Vincent Rijmen, Gilles Van Assche
;*
;* Written by : Michael Peeters
;*
;* References : [NESSIE] see http://cryptonessie.org
;*              [FSE2000] Bitslice Ciphers and Power Analysis Attacks
;*
;* Description: implementation on an ARM7 processor of NOEKEON in DIRECT KEY MODE
;*              timing-attack resistant    
;*              DPA-attack resistant
;*              straightforward implementation
;*
;* Comments:
;*
;*   About Random bits --> we assume those bits already available in an external buffer
;*
;*   Memory model is BIG ENDIAN: r0=0x12345678 
;*                               --> Write in memory at 0x1000:  0x1000-78 56 34 12
;*   PC=R15, LR=R14, SP=R13
;*   Stack when interfacing with c: Decreasing before PUSH, Increasing after POP
;*
;************************************************************************************************/

  EXPORT NESSIEencryptDPA
  EXPORT NESSIEdecryptDPA
  EXPORT NESSIEkeysetupDPA
  IMPORT rndbits

  AREA ASM_Noekeon_Data, DATA, READWRITE

thetakey1		DCD	0x00000000, 0x00000000, 0x00000000, 0x00000000
thetakey2		DCD	0x00000000, 0x00000000, 0x00000000, 0x00000000

  AREA ASM_Noekeon_Code, CODE, READONLY

NROUND			EQU 16				; Number of Computation rounds in the block cipher

rhocstad		DCD	rhocst               
rhocst			DCB	0x80, 0x1b, 0x36, 0x6c	; Value of constant rounds
				DCB	0xd8, 0xab, 0x4d, 0x9a
				DCB	0x2f, 0x5e, 0xbc, 0x63
				DCB	0xc6, 0x97, 0x35, 0x6a
				DCB 0xd4, 0x00, 0x00, 0x00  ; last three constants added for alignment

thetakeyad 		DCD thetakey1

state2			DCD	rndbits


;================================================================================================
; halftheta - First and Third stage of the LINEAR - MACRO
; -----------------------------------------------
;
; $rout0 <- $rout0 + F($rin0,$rin1)
; $rout1 <- $rout1 + F($rin0,$rin1)
;
; Modified reg. : $rout0,$rout1,$r9,$r10
;================================================================================================
		MACRO
$label halftheta $rout0,$rout1,$rin0,$rin1,$r9,$r10
$label                           
		eor $r10,$rin0,$rin1				     ; first half of theta
		eor $r9,$r10,$r10,ROR #8
		eor $r9,$r9,$r10,ROR #24
		eor $rout0,$rout0,$r9
		eor $rout1,$rout1,$r9
		MEND

;================================================================================================
; theta_no_add - LINEAR Step without key addition
; -----------------------------------------------
; DESCRIPTION
;   perform theta on input vector - used for key transformation for decryption
; INPUT:
;   r5-r8 = vector
; OUTPUT:
;   r5-r8 = new vector
;
; Reserved reg. : R0,R12-R15
; Modified reg. : R5-R10
;------------------------------------------------------------------------------------------------
theta_no_add
  halftheta r6,r8,r5,r7,r9,r10
  halftheta r5,r7,r6,r8,r9,r10
  
  mov pc,lr
;================================================================================================

;================================================================================================
; thetaDPA - LINEAR Step
; ----------------------
; DESCRIPTION
;   perform theta on state, as well as key addition
; INPUT:
;   r0    = key1 pointer, r0+#16 = key2 pointer
;   r1-r4 = state1
;   r5-r8 = state2
; OUTPUT:
;   r1-r4 = new state1
;   r5-r8 = new state2
;
; Reserved reg. : R0,R13-R15
; Modified reg. : R1-R12
;------------------------------------------------------------------------------------------------
thetaDPA

;--- state1 ---
  halftheta r2,r4,r1,r3,r9,r10
  
  ldmia r0,{r9-r12}			; load key1 vector
  eor r1,r1,r9              ; add key1 ...
  eor r2,r2,r10
  eor r3,r3,r11
  eor r4,r4,r12                           
  
  halftheta r1,r3,r2,r4,r9,r10
 
;--- state2 ---
  add r0,r0,#16				;select key2

  halftheta r6,r8,r5,r7,r9,r10
  
  ldmia r0,{r9-r12}			; load key2 vector
  eor r5,r5,r9              ; add key2 ...
  eor r6,r6,r10
  eor r7,r7,r11
  eor r8,r8,r12                           
  
  halftheta r5,r7,r6,r8,r9,r10

  sub r0,r0,#16
 
  mov pc,lr
;================================================================================================


;================================================================================================
; gammaDPA - NONLINEAR Step
; ---------------------
; DESCRIPTION
;   perform gamma on state
; INPUT:
;   r0    = key pointer
;   r1-r4 = state1
;   r5-r8 = state2
; OUTPUT:
;   r1-r4 = new state1
;   r5-r8 = new state2
;
; Reserved reg. : R0,R13-R15
; Modified reg. : R1-R11
;
; reduced definition of gamma is  
; ==>   r1 <- r1 ^ (r3 . r2)
;       r2 <- r2 ^ (r4 v r3)
;       r3 <- ~(r4 ^ r3 ^ r2 ^ r1)
;       r2 <- r2 ^ (r1 v r3)           
;       tmp<- r4 ^ (r3 . r2)
;       r4 <- r1
;       r1 <- tmp
;------------------------------------------------------------------------------------------------
gammaDPA

  ; --- 1 ---
  and r9,r3,r2               
  eor r1,r1,r9				;r1 <- r1 ^ (r3 . r2)
  and r9,r7,r2
  eor r1,r1,r9				;   <- r1 ^ (r3 . r2) ^ (r7 . r2)
  and r9,r7,r6               
  eor r5,r5,r9				;r5 <- r5 ^ (r7 . r6)
  and r9,r3,r6
  eor r5,r5,r9				;   <- r5 ^ (r7 . r6) ^ (r3 . r6)

  ; --- 2 ---
  orr r9,r4,r3
  eor r2,r2,r9              ;r2 <- r2 ^ (r4 v r3)
  and r9,r4,r7
  eor r2,r2,r9              ;   <- r2 ^ (r4 v r3) ^ (r4 . r7)
  orr r9,r8,r7
  eor r6,r6,r9              ;r6 <- r6 ^ (r8 v r7)
  and r9,r8,r3
  eor r6,r6,r9              ;   <- r6 ^ (r8 v r7) ^ (r8 . r3)

  ; --- 3 ---
  eor r3,r3,r4
  eor r3,r3,r2
  eor r3,r3,r1
  mvn r3,r3					;r3 <- ~(r4 ^ r3 ^ r2 ^ r1)
  eor r7,r7,r8
  eor r7,r7,r6
  eor r7,r7,r5				;r7 <-   r8 ^ r7 ^ r6 ^ r5
  
  ; --- 4 ---
  orr r9,r1,r3
  eor r2,r2,r9              ;r2 <- r2 ^ (r1 v r3)
  and r9,r1,r7
  eor r2,r2,r9              ;   <- r2 ^ (r1 v r3) ^ (r1 . r7)
  orr r9,r5,r7
  eor r6,r6,r9              ;r6 <- r6 ^ (r5 v r7)
  and r9,r5,r3
  eor r6,r6,r9              ;   <- r6 ^ (r5 v r7) ^ (r5 . r3)
  
  ; --- 5 ---
  and r11,r3,r2  
  eor r9,r4,r11              ;tmp1<- r4 ^ (r3 . r2)
  and r11,r7,r2  
  eor r9,r9,r11              ;    <- r4 ^ (r3 . r2) ^ (r7 . r2)
  and r11,r7,r6  
  eor r10,r8,r11             ;tmp2<- r8 ^ (r7 . r6)
  and r11,r3,r6  
  eor r10,r10,r11            ;    <- r8 ^ (r7 . r6) ^ (r3 . r6)

  ; --- 6 ---
  mov r4,r1                 ;r4 <- r1
  mov r8,r5                 ;r8 <- r5
  
  ; --- 7 ---
  mov r1,r9					;r1 <- tmp1
  mov r5,r10				;r5 <- tmp2	

  mov pc,lr
;================================================================================================

;================================================================================================
; pi1 - DISPERSION Step - MACRO
; ---------------------
; DESCRIPTION
;   perform pi1 on state
; INPUT:
;   $r1-$r4 = state
; OUTPUT:
;   $r1-$r4 = new state
;
; Modified reg. : $r2-$r4
;------------------------------------------------------------------------------------------------
  MACRO
$label pi1 $r1,$r2,$r3,$r4
$label                           
  mov $r2,$r2,ROR #31				;R2 = a1 ROL #1
  mov $r3,$r3,ROR #27				;R3 = a2 ROL #5
  mov $r4,$r4,ROR #30				;R4 = a3 ROL #2

   MEND

;================================================================================================


;================================================================================================
; pi2 - DISPERSION Step - MACRO
; ---------------------
; DESCRIPTION
;   perform pi2 on state
; INPUT:
;   $r1-$r4 = state
; OUTPUT:
;   $r1-$r4 = new state
;
; Modified reg. : $r2-$r4
;------------------------------------------------------------------------------------------------
  MACRO
$label pi2 $r1,$r2,$r3,$r4
$label                           
  mov $r2,$r2,ROR #1				;R2 = a1 ROR #1
  mov $r3,$r3,ROR #5				;R3 = a2 ROR #5
  mov $r4,$r4,ROR #2				;R4 = a3 ROR #2

  MEND
;================================================================================================



;================================================================================================
; void NESSIEencryptDPA (NESSIEstruct * const structpointer,
;                        const unsigned char * const plaintext,
;                        const unsigned char * const ciphertext)
; DESCRIPTION:
;    encrypt the plaintext
; INPUT:
;   r0 = NESSIEstruct * const structpointer
;   r1 = const unsigned char * const plaintext
;   r2 = const unsigned char * const ciphertext
; OUTPUT:
;   cipher text written at r2
;
; Reserved reg. : R12-R15
; Modified reg. : none
;------------------------------------------------------------------------------------------------
		^ 0,sp
key			# 4		;key = [sp,#0]
plain		# 4		;plain = [sp,#4]
cipher		# 4		;plain = [sp,#8]
rcounter	# 4		;rcounter = [sp,#12]

NESSIEencryptDPA
;-------------------------------------------------------------------------------------------------
; Entry code 
;-------------------------------------------------------------------------------------------------
  stmdb sp!,{r0-r12,lr}			;Work registers allowed are R0-R12

  eor r12,r12,r12
  stmdb sp!,{r0,r1,r2,r12}		;[sp]=key pointer, [sp+4]=plain pointer, [sp+8]= cipher pointer
								;[sp+12]=fwd const, [sp+16]=bwd const, [sp+20]=round counter

  ldmia r1,{r1-r4}				;Read state from memory
  								;      R1              R2              R3              R4
  								;w00-w01-w02-w03|w04-w05-w06-w07|w08-w09-w10-w11|w12-w13-w14-w15 |

;-------------------------------------------------------------------------------------------------
; preloop - pre-main loop 
;-------------------------------------------------------------------------------------------------
;Compute XORed states a' & a"
;-------------------------------------------------------------------------------------------------
  ldr r11,state2
  ldmia r11,{r5-r8}		;Load randomly generated state vector a"
  eor r1,r1,r5			;Compute Xored state a' from initial state a & a"
  eor r2,r2,r6
  eor r3,r3,r7
  eor r4,r4,r8

         
;-------------------------------------------------------------------------------------------------
;Pre: r12=round cntr, r0=key pointer, r1-r4=state1, r5-r8=state2
;-------------------------------------------------------------------------------------------------
          
encryptloop     					; Main Loop
								; Pre: r12=round cntr, r0=key1 pointer, r1-r4=state1, r5-r8=state2

;--- state1 & state2 theta+key+cst ---------------------------------------------------------------
  ldr r10,rhocstad		    	; r10 = round constants address in memory
  ldrb r10,[r10,r12]        	; r10 = first round constant for this round
  eor r1,r1,r10					; add round constant to state1 only
  bl thetaDPA					; do theta

;--- state1 & state2 pi1       -r1-r4: state1, r5-r8: state2 -------------------------------------
  pi1 r1,r2,r3,r4					; do pi1 on state1
  pi1 r5,r6,r7,r8					; do pi1 on state2

;--- state1 & state2 gamma     -r1-r4: state1, r5-r8: state2 -------------------------------------
  bl gammaDPA					; do gamma
  
;--- state1 & state2 pi2       -r1-r4: state1, r5-r8: state2 -------------------------------------
  pi2 r1,r2,r3,r4					; do pi2 on state1
  pi2 r5,r6,r7,r8					; do pi2 on state2


  ldr r12,rcounter				; load r12 = [sp+#12] = round counter
  add r12,r12,#1				; increment round counter
  str r12,rcounter				; store r12 = [sp+#12] = round counter

  teq r12,#NROUND
  bne encryptloop

;--- end of loop ---------------------------------------------------------------------------------
;--- state1 & state2 theta+key+cst ---------------------------------------------------------------
  ldr r10,rhocstad		    	; r10 = round constants address in memory
  ldrb r10,[r10,r12]        	; r10 = first round constant for this round
  eor r1,r1,r10					; add round constant to state1 only
  bl thetaDPA					; do theta

;-------------------------------------------------------------------------------------------------
;Collapse XORed states a' & a"
;-------------------------------------------------------------------------------------------------
  eor r1,r1,r5                       
  eor r2,r2,r6                       
  eor r3,r3,r7                       
  eor r4,r4,r8                       

  ldr r12,cipher				; [sp]=cipher pointer
  stmia r12,{r1-r4}             ; Write back state in memory

  ldmia sp!,{r0,r1,r2,r10}		; pop round counter
  ldmia sp!,{r0-r12,pc}			; Retrieve registers and return from subroutine
;================================================================================================



;================================================================================================
; void NESSIEdecryptDPA (NESSIEstruct * const structpointer,
;                        const unsigned char * const ciphertext,
;                        const unsigned char * const plaintext)
; DESCRIPTION:
;    decrypt the ciphertext
; INPUT:
;   r0 = NESSIEstruct * const structpointer
;   r1 = const unsigned char * const ciphertext
;   r2 = const unsigned char * const plaintext
; OUTPUT:
;   plaintext text written at r2
;
; Reserved reg. : R12-R15
; Modified reg. : none
;------------------------------------------------------------------------------------------------

NESSIEdecryptDPA
;-------------------------------------------------------------------------------------------------
; Entry code 
;-------------------------------------------------------------------------------------------------
  stmdb sp!,{r0-r12,lr}			;Work registers allowed are R0-R12
  
  mov r12,#NROUND
  stmdb sp!,{r0,r1,r2,r12}		;[sp]=key pointer, [sp+4]=plain pointer, [sp+8]= cipher pointer
								;[sp+12]=fwd const, [sp+16]=bwd const, [sp+20]=round counter

  ldmia r0!,{r5-r8}      		;load first key
  bl theta_no_add
  ldr r4,thetakeyad				;r4 = transformed key address
  stmia r4!,{r5-r8}				;store theta (first key)
  ldmia r0,{r5-r8}      		;load next key
  bl theta_no_add
  stmia r4,{r5-r8}				;store theta (second key)
  sub r0,r4,#16

  ldmia r1,{r1-r4}				;Read state from memory
  								;      R1              R2              R3              R4
  								;w00-w01-w02-w03|w04-w05-w06-w07|w08-w09-w10-w11|w12-w13-w14-w15 |

;-------------------------------------------------------------------------------------------------
; preloop - pre-main loop 
;-------------------------------------------------------------------------------------------------
;Compute XORed states a' & a"
;-------------------------------------------------------------------------------------------------
  ldr r11,state2
  ldmia r11,{r5-r8}		;Load randomly generated state vector a"
  eor r1,r1,r5			;Compute Xored state a' from initial state a & a"
  eor r2,r2,r6
  eor r3,r3,r7
  eor r4,r4,r8

         
;-------------------------------------------------------------------------------------------------
;Pre: r12=round cntr, r0=key pointer, r1-r4=state1, r5-r8=state2
;-------------------------------------------------------------------------------------------------
          
decryptloop    					; Main Loop
								; Pre: r12=round cntr, r0=key1 pointer, r1-r4=state1, r5-r8=state2

;--- state1 & state2 theta+key+cst ---------------------------------------------------------------
  bl thetaDPA					; do theta
  ldr r12,rcounter				; load r12 = [sp+#12] = round counter
  ldr r10,rhocstad		    	; r10 = round constants address in memory
  ldrb r10,[r10,r12]        	; r10 = first round constant for this round
  eor r1,r1,r10					; add round constant to state1 only

;--- state1 & state2 pi1       -r1-r4: state1, r5-r8: state2 -------------------------------------
  pi1 r1,r2,r3,r4					; do pi1 on state1
  pi1 r5,r6,r7,r8					; do pi1 on state2

;--- state1 & state2 gamma     -r1-r4: state1, r5-r8: state2 -------------------------------------
  bl gammaDPA					; do gamma
  
;--- state1 & state2 pi2       -r1-r4: state1, r5-r8: state2 -------------------------------------
  pi2 r1,r2,r3,r4					; do pi2 on state1
  pi2 r5,r6,r7,r8					; do pi2 on state2


  ldr r12,rcounter				; load r12 = [sp+#12] = round counter
  sub r12,r12,#1				; decrement round counter
  str r12,rcounter				; store r12 = [sp+#12] = round counter

  teq r12,#0
  bne decryptloop

;--- end of loop ---------------------------------------------------------------------------------
;--- state1 & state2 theta+key+cst ---------------------------------------------------------------
  bl thetaDPA					; do theta
  ldr r12,rcounter				; load r12 = [sp+#12] = round counter
  ldr r10,rhocstad		    	; r10 = round constants address in memory
  ldrb r10,[r10,r12]        	; r10 = first round constant for this round
  eor r1,r1,r10					; add round constant to state1 only

;-------------------------------------------------------------------------------------------------
;Collapse XORed states a' & a"
;-------------------------------------------------------------------------------------------------
  eor r1,r1,r5                       
  eor r2,r2,r6                       
  eor r3,r3,r7                       
  eor r4,r4,r8                       

  ldr r12,cipher				; [sp]=cipher pointer
  stmia r12,{r1-r4}             ; Write back state in memory

  ldmia sp!,{r0,r1,r2,r10}		; pop round counter
  ldmia sp!,{r0-r12,pc}			; Retrieve registers and return from subroutine
;================================================================================================







;================================================================================================
; void NESSIEekeysetupDPA (const unsigned char * const key,
;                          NESSIEstruct * const structpointer)
; DESCRIPTION:
;   Copy the two keys in the work buffer
; INPUT:
;   r0 = const unsigned char * const key
;   r1 = NESSIEstruct * const structpointer
; OUTPUT:
;   key written at r1
;
; Reserved reg. : R12-R15
; Modified reg. : none
;------------------------------------------------------------------------------------------------
NESSIEkeysetupDPA
  stmdb sp!,{r2-r9,lr} 		;Work registers allowed are R2-R6

  ldmia r0,{r2-r9}		    ;Read both keys from memory BIG ENDIAN MODEL
							;      R0              R1              R2              R3
							;k00-k01-k02-k03|k04-k05-k06-k07|k08-k09-k10-k11|k12-k13-k14-k15 |

  stmia r1,{r2-r9}

  ldmia sp!,{r2-r9,pc}  	; Retrieve registers and return from subroutine
;================================================================================================

  END
