/*************************************************
* Square Source File                             *
* (C) 1999-2001 The OpenCL Project               *
*************************************************/

#include <opencl/square.h>

namespace OpenCL {

/*************************************************
* Square Encryption                              *
*************************************************/
void Square::encrypt(const byte in[BLOCKSIZE], byte out[BLOCKSIZE]) const
   {
   u32bit B0 = make_u32bit(in[ 0], in[ 1], in[ 2], in[ 3]),
          B1 = make_u32bit(in[ 4], in[ 5], in[ 6], in[ 7]),
          B2 = make_u32bit(in[ 8], in[ 9], in[10], in[11]),
          B3 = make_u32bit(in[12], in[13], in[14], in[15]);
   B0 ^= EK[0]; B1 ^= EK[1]; B2 ^= EK[2]; B3 ^= EK[3];
   u32bit T0, T1, T2, T3;
   for(u32bit j = 1; j != 8; j++)
      {
      T0 = TE0[get_byte(0, B0)] ^ TE1[get_byte(0, B1)] ^
           TE2[get_byte(0, B2)] ^ TE3[get_byte(0, B3)] ^ EK[4*j];
      T1 = TE0[get_byte(1, B0)] ^ TE1[get_byte(1, B1)] ^
           TE2[get_byte(1, B2)] ^ TE3[get_byte(1, B3)] ^ EK[4*j+1];
      T2 = TE0[get_byte(2, B0)] ^ TE1[get_byte(2, B1)] ^
           TE2[get_byte(2, B2)] ^ TE3[get_byte(2, B3)] ^ EK[4*j+2];
      T3 = TE0[get_byte(3, B0)] ^ TE1[get_byte(3, B1)] ^
           TE2[get_byte(3, B2)] ^ TE3[get_byte(3, B3)] ^ EK[4*j+3];
      B0 = T0; B1 = T1; B2 = T2; B3 = T3;
      }
   T0 = make_u32bit(SE[get_byte(0, B0)], SE[get_byte(0, B1)],
                    SE[get_byte(0, B2)], SE[get_byte(0, B3)]) ^ EK[32];
   T1 = make_u32bit(SE[get_byte(1, B0)], SE[get_byte(1, B1)],
                    SE[get_byte(1, B2)], SE[get_byte(1, B3)]) ^ EK[33];
   T2 = make_u32bit(SE[get_byte(2, B0)], SE[get_byte(2, B1)],
                    SE[get_byte(2, B2)], SE[get_byte(2, B3)]) ^ EK[34];
   T3 = make_u32bit(SE[get_byte(3, B0)], SE[get_byte(3, B1)],
                    SE[get_byte(3, B2)], SE[get_byte(3, B3)]) ^ EK[35];
   out[ 0] = get_byte(0, T0); out[ 1] = get_byte(1, T0);
   out[ 2] = get_byte(2, T0); out[ 3] = get_byte(3, T0);
   out[ 4] = get_byte(0, T1); out[ 5] = get_byte(1, T1);
   out[ 6] = get_byte(2, T1); out[ 7] = get_byte(3, T1);
   out[ 8] = get_byte(0, T2); out[ 9] = get_byte(1, T2);
   out[10] = get_byte(2, T2); out[11] = get_byte(3, T2);
   out[12] = get_byte(0, T3); out[13] = get_byte(1, T3);
   out[14] = get_byte(2, T3); out[15] = get_byte(3, T3);
   }

/*************************************************
* Square Decryption                              *
*************************************************/
void Square::decrypt(const byte in[BLOCKSIZE], byte out[BLOCKSIZE]) const
   {
   u32bit B0 = make_u32bit(in[ 0], in[ 1], in[ 2], in[ 3]),
          B1 = make_u32bit(in[ 4], in[ 5], in[ 6], in[ 7]),
          B2 = make_u32bit(in[ 8], in[ 9], in[10], in[11]),
          B3 = make_u32bit(in[12], in[13], in[14], in[15]);
   B0 ^= DK[0]; B1 ^= DK[1]; B2 ^= DK[2]; B3 ^= DK[3];
   u32bit T0, T1, T2, T3;
   for(u32bit j = 1; j != 8; j++)
      {
      T0 = TD0[get_byte(0, B0)] ^ TD1[get_byte(0, B1)] ^
           TD2[get_byte(0, B2)] ^ TD3[get_byte(0, B3)] ^ DK[4*j];
      T1 = TD0[get_byte(1, B0)] ^ TD1[get_byte(1, B1)] ^
           TD2[get_byte(1, B2)] ^ TD3[get_byte(1, B3)] ^ DK[4*j+1];
      T2 = TD0[get_byte(2, B0)] ^ TD1[get_byte(2, B1)] ^
           TD2[get_byte(2, B2)] ^ TD3[get_byte(2, B3)] ^ DK[4*j+2];
      T3 = TD0[get_byte(3, B0)] ^ TD1[get_byte(3, B1)] ^
           TD2[get_byte(3, B2)] ^ TD3[get_byte(3, B3)] ^ DK[4*j+3];
      B0 = T0; B1 = T1; B2 = T2; B3 = T3;
      }
   T0 = make_u32bit(SD[get_byte(0, B0)], SD[get_byte(0, B1)],
                    SD[get_byte(0, B2)], SD[get_byte(0, B3)]) ^ DK[32];
   T1 = make_u32bit(SD[get_byte(1, B0)], SD[get_byte(1, B1)],
                    SD[get_byte(1, B2)], SD[get_byte(1, B3)]) ^ DK[33];
   T2 = make_u32bit(SD[get_byte(2, B0)], SD[get_byte(2, B1)],
                    SD[get_byte(2, B2)], SD[get_byte(2, B3)]) ^ DK[34];
   T3 = make_u32bit(SD[get_byte(3, B0)], SD[get_byte(3, B1)],
                    SD[get_byte(3, B2)], SD[get_byte(3, B3)]) ^ DK[35];
   out[ 0] = get_byte(0, T0); out[ 1] = get_byte(1, T0);
   out[ 2] = get_byte(2, T0); out[ 3] = get_byte(3, T0);
   out[ 4] = get_byte(0, T1); out[ 5] = get_byte(1, T1);
   out[ 6] = get_byte(2, T1); out[ 7] = get_byte(3, T1);
   out[ 8] = get_byte(0, T2); out[ 9] = get_byte(1, T2);
   out[10] = get_byte(2, T2); out[11] = get_byte(3, T2);
   out[12] = get_byte(0, T3); out[13] = get_byte(1, T3);
   out[14] = get_byte(2, T3); out[15] = get_byte(3, T3);
   }

/*************************************************
* Square Key Setup                               *
*************************************************/
void Square::set_key(const byte key[], u32bit length) throw(InvalidKeyLength)
   {
   if(!valid_keylength(length))
      throw InvalidKeyLength("Square", length);
   static const u32bit RC[8] = {
      0x01000000UL, 0x02000000UL, 0x04000000UL, 0x08000000UL, 0x10000000UL,
      0x20000000UL, 0x40000000UL, 0x80000000UL };
   for(u32bit j = 0; j != 4; j++)
      EK[j] = make_u32bit(key[4*j], key[4*j+1], key[4*j+2], key[4*j+3]);
   for(u32bit j = 1; j != 9; j++)
      {
      DK[32-4*j] = EK[4*j] = EK[4*j-4] ^ rotate_left(EK[4*j-1], 8) ^ RC[j-1];
      DK[33-4*j] = EK[4*j+1] = EK[4*j-3] ^ EK[4*j  ];
      DK[34-4*j] = EK[4*j+2] = EK[4*j-2] ^ EK[4*j+1];
      DK[35-4*j] = EK[4*j+3] = EK[4*j-1] ^ EK[4*j+2];
      transform(EK + 4*(j-1));
      }
   for(u32bit j = 0; j != 4; j++)
      DK[j+32] = EK[j];
   }

/*************************************************
* Square's Inverse Linear Transformation         *
*************************************************/
void Square::transform(u32bit round_key[4]) const
   {
   static const byte G[4][4] = {
      { 0x02, 0x01, 0x01, 0x03 },
      { 0x03, 0x02, 0x01, 0x01 },
      { 0x01, 0x03, 0x02, 0x01 },
      { 0x01, 0x01, 0x03, 0x02 } };
   SecureBuffer<byte, 4> A[4], B[4];
   for(u32bit j = 0; j != 4; j++)
      for(u32bit k = 0; k != 4; k++)
         A[j][k] = get_byte(k, round_key[j]);
   for(u32bit j = 0; j != 4; j++)
      for(u32bit k = 0; k != 4; k++)
         for(u32bit l = 0; l != 4; l++)
            B[j][k] ^= mul(A[j][l], G[l][k]);
   for(u32bit j = 0; j != 4; j++)
      round_key[j] = make_u32bit(B[j][0], B[j][1], B[j][2], B[j][3]);
   }

/*************************************************
* Multiply in GF(2^8)                            *
*************************************************/
byte Square::mul(byte a, byte b) const
   {
   if(a && b)
      return ALog[(Log[a] + Log[b])%255];
   else
      return 0;
   }

}
