diff --git a/aes-tboxes/aes.cpp b/aes-tboxes/aes.cpp index 0e2cc68..81f4d2e 100644 --- a/aes-tboxes/aes.cpp +++ b/aes-tboxes/aes.cpp @@ -1,5 +1,7 @@ #include +#include #include +#include #include #include #include @@ -14,7 +16,7 @@ http://csrc.nist.gov/publications/fips/fips197/fips-197.pdf */ #define WORD(byte0, byte1, byte2, byte3) ((((((uint16_t)(byte3 << 8) | byte2) << 8) | byte1) << 8) | byte0) -#define WBYTE(value, position) ((value >> (position * 8)) & 0xFF) +#define WBYTE(value, position) (((uint32_t)value >> (position * 8)) & 0xFF) /* AES Constants */ // AES polynomial @@ -38,7 +40,13 @@ const uint8_t SBOX[256] = { 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf, 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 -}; +}; + +// T-boxes +uint32_t T0[256]; +uint32_t T1[256]; +uint32_t T2[256]; +uint32_t T3[256]; const uint8_t rCon[12] = { 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, @@ -93,26 +101,6 @@ uint8_t xtime(uint8_t a) { return ((a << 1) ^ mask) & 0xFF; } -// not mandatory - mix a single column -uint32_t mixColumn(uint32_t c) { - uint32_t result = c; - uint8_t *source = (uint8_t*)(&c); - uint8_t *target = (uint8_t*)(&result); - uint8_t base = *source ^ *(source + 1) ^ *(source + 2) ^ *(source + 3); - *target ^= base ^ xtime(*source ^ *(source + 1)); - *(target + 1) ^= base ^ xtime(*(source + 1) ^ *(source + 2)); - *(target + 2) ^= base ^ xtime(*(source + 2) ^ *(source + 3)); - *(target + 3) ^= base ^ xtime(*(source + 3) ^ *source); - return result; -} - - -void mixColumns(t_state s) { - for (uint8_t i = 0; i < 4; i++) { - s[i] = mixColumn(s[i]); - } -} - /* * Key expansion from 128bits (4*32b) * to 11 round keys (11*4*32b) @@ -159,21 +147,27 @@ void aes(uint8_t *in, uint8_t *out, uint8_t *skey) uint32_t expKey[11 * 4]; expandKey(skey, expKey); - addRoundKey(state, expKey, 0); - for (int i = 1; i <= 10; i++) { - subBytes(state); + for (int i = 1; i < 10; i++) { + t_state tmp; - shiftRows(state); - - if (i < 10) { - mixColumns(state); + for (int j = 0; j < 4; j++) { + tmp[j] = + T0[WBYTE(state[j], 0)] ^ + T1[WBYTE(state[(j + 1) % 4], 1)] ^ + T2[WBYTE(state[(j + 2) % 4], 2)] ^ + T3[WBYTE(state[(j + 3) % 4], 3)]; } - addRoundKey(state, expKey, 4*i); + memcpy(state, tmp, sizeof(t_state)); + addRoundKey(state, expKey, 4 * i); } + subBytes(state); + shiftRows(state); + addRoundKey(state, expKey, 40); + for (int i = 0; i < 16; i++) { if (i < 4) out[i] = WBYTE(state[0], i % 4); else if (i < 8) out[i] = WBYTE(state[1], i % 4); @@ -198,6 +192,16 @@ int main(int argc, char* argv[]) cycles = std::atoi(argv[1]); } + for (int i = 0; i <= 0xFF; i++) { + uint8_t a1 = SBOX[i]; + uint8_t a2 = xtime(a1); + uint8_t a3 = a2 ^ a1; + T0[i] = WORD(a2, a1, a1, a3); + T1[i] = WORD(a3, a2, a1, a1); + T2[i] = WORD(a1, a3, a2, a1); + T3[i] = WORD(a1, a1, a3, a2); + } + const auto start{std::chrono::steady_clock::now()}; { for (int i = 0; i < cycles; i++) {