1 #include "cpusupport.h"
2 #ifdef CPUSUPPORT_X86_AESNI
8 #include "insecure_memzero.h"
11 #include "crypto_aes_aesni.h"
13 /* Expanded-key structure. */
14 struct crypto_aes_key_aesni
{
15 uint8_t rkeys_buf
[15 * sizeof(__m128i
) + (sizeof(__m128i
) - 1)];
20 /* Compute an AES-128 round key. */
21 #define MKRKEY128(rkeys, i, rcon) do { \
22 __m128i _s = rkeys[i - 1]; \
23 __m128i _t = rkeys[i - 1]; \
24 _s = _mm_xor_si128(_s, _mm_slli_si128(_s, 4)); \
25 _s = _mm_xor_si128(_s, _mm_slli_si128(_s, 8)); \
26 _t = _mm_aeskeygenassist_si128(_t, rcon); \
27 _t = _mm_shuffle_epi32(_t, 0xff); \
28 rkeys[i] = _mm_xor_si128(_s, _t); \
32 * crypto_aes_key_expand_128_aesni(key, rkeys):
33 * Expand the 128-bit AES key ${key} into the 11 round keys ${rkeys}. This
34 * implementation uses x86 AESNI instructions, and should only be used if
35 * CPUSUPPORT_X86_AESNI is defined and cpusupport_x86_aesni() returns nonzero.
38 crypto_aes_key_expand_128_aesni(const uint8_t key
[16], __m128i rkeys
[11])
41 /* The first round key is just the key. */
43 * XXX Compiler breakage:
44 * The intrinsic defined by Intel for _mm_loadu_si128 defines it as
45 * taking a (const __m128i *) parameter. This forces us to write a
46 * bug: The cast to (const __m128i *) is invalid since it increases
47 * the alignment requirement of the pointer. Alas, until compilers
48 * get fixed intrinsics, all we can do is code the bug and require
49 * that alignment-requirement-increasing compiler warnings get
52 rkeys
[0] = _mm_loadu_si128((const __m128i
*)&key
[0]);
55 * Each of the remaining round keys are computed from the preceding
56 * round key: rotword+subword+rcon (provided as aeskeygenassist) to
57 * compute the 'temp' value, then xor with 1, 2, 3, or all 4 of the
58 * 32-bit words from the preceding round key. Unfortunately, 'rcon'
59 * is encoded as an immediate value, so we need to write the loop out
60 * ourselves rather than allowing the compiler to expand it.
62 MKRKEY128(rkeys
, 1, 0x01);
63 MKRKEY128(rkeys
, 2, 0x02);
64 MKRKEY128(rkeys
, 3, 0x04);
65 MKRKEY128(rkeys
, 4, 0x08);
66 MKRKEY128(rkeys
, 5, 0x10);
67 MKRKEY128(rkeys
, 6, 0x20);
68 MKRKEY128(rkeys
, 7, 0x40);
69 MKRKEY128(rkeys
, 8, 0x80);
70 MKRKEY128(rkeys
, 9, 0x1b);
71 MKRKEY128(rkeys
, 10, 0x36);
74 /* Compute an AES-256 round key. */
75 #define MKRKEY256(rkeys, i, shuffle, rcon) do { \
76 __m128i _s = rkeys[i - 2]; \
77 __m128i _t = rkeys[i - 1]; \
78 _s = _mm_xor_si128(_s, _mm_slli_si128(_s, 4)); \
79 _s = _mm_xor_si128(_s, _mm_slli_si128(_s, 8)); \
80 _t = _mm_aeskeygenassist_si128(_t, rcon); \
81 _t = _mm_shuffle_epi32(_t, shuffle); \
82 rkeys[i] = _mm_xor_si128(_s, _t); \
86 * crypto_aes_key_expand_256_aesni(key, rkeys):
87 * Expand the 256-bit AES key ${key} into the 15 round keys ${rkeys}. This
88 * implementation uses x86 AESNI instructions, and should only be used if
89 * CPUSUPPORT_X86_AESNI is defined and cpusupport_x86_aesni() returns nonzero.
92 crypto_aes_key_expand_256_aesni(const uint8_t key
[32], __m128i rkeys
[15])
95 /* The first two round keys are just the key. */
97 * XXX Compiler breakage:
98 * The intrinsic defined by Intel for _mm_loadu_si128 defines it as
99 * taking a (const __m128i *) parameter. This forces us to write a
100 * bug: The cast to (const __m128i *) is invalid since it increases
101 * the alignment requirement of the pointer. Alas, until compilers
102 * get fixed intrinsics, all we can do is code the bug and require
103 * that alignment-requirement-increasing compiler warnings get
106 rkeys
[0] = _mm_loadu_si128((const __m128i
*)&key
[0]);
107 rkeys
[1] = _mm_loadu_si128((const __m128i
*)&key
[16]);
110 * Each of the remaining round keys are computed from the preceding
111 * pair of keys. Even rounds use rotword+subword+rcon, while odd
112 * rounds just use subword; the aeskeygenassist instruction computes
113 * both, and we use 0xff or 0xaa to select the one we need. The rcon
114 * value used is irrelevant for odd rounds since we ignore the value
115 * which it feeds into. Unfortunately, the 'shuffle' and 'rcon'
116 * values are encoded into the instructions as immediates, so we need
117 * to write the loop out ourselves rather than allowing the compiler
120 MKRKEY256(rkeys
, 2, 0xff, 0x01);
121 MKRKEY256(rkeys
, 3, 0xaa, 0x00);
122 MKRKEY256(rkeys
, 4, 0xff, 0x02);
123 MKRKEY256(rkeys
, 5, 0xaa, 0x00);
124 MKRKEY256(rkeys
, 6, 0xff, 0x04);
125 MKRKEY256(rkeys
, 7, 0xaa, 0x00);
126 MKRKEY256(rkeys
, 8, 0xff, 0x08);
127 MKRKEY256(rkeys
, 9, 0xaa, 0x00);
128 MKRKEY256(rkeys
, 10, 0xff, 0x10);
129 MKRKEY256(rkeys
, 11, 0xaa, 0x00);
130 MKRKEY256(rkeys
, 12, 0xff, 0x20);
131 MKRKEY256(rkeys
, 13, 0xaa, 0x00);
132 MKRKEY256(rkeys
, 14, 0xff, 0x40);
136 * crypto_aes_key_expand_aesni(key, len):
137 * Expand the ${len}-byte AES key ${key} into a structure which can be passed
138 * to crypto_aes_encrypt_block_aesni. The length must be 16 or 32. This
139 * implementation uses x86 AESNI instructions, and should only be used if
140 * CPUSUPPORT_X86_AESNI is defined and cpusupport_x86_aesni() returns nonzero.
143 crypto_aes_key_expand_aesni(const uint8_t * key
, size_t len
)
145 struct crypto_aes_key_aesni
* kexp
;
148 /* Allocate structure. */
149 if ((kexp
= malloc(sizeof(struct crypto_aes_key_aesni
))) == NULL
)
152 /* Figure out where to put the round keys. */
153 rkey_offset
= (uintptr_t)(&kexp
->rkeys_buf
[0]) % sizeof(__m128i
);
154 rkey_offset
= (sizeof(__m128i
) - rkey_offset
) % sizeof(__m128i
);
155 kexp
->rkeys
= (void *)&kexp
->rkeys_buf
[rkey_offset
];
157 /* Compute round keys. */
160 crypto_aes_key_expand_128_aesni(key
, kexp
->rkeys
);
161 } else if (len
== 32) {
163 crypto_aes_key_expand_256_aesni(key
, kexp
->rkeys
);
165 warn0("Unsupported AES key length: %zu bytes", len
);
180 * crypto_aes_encrypt_block_aesni(in, out, key):
181 * Using the expanded AES key ${key}, encrypt the block ${in} and write the
182 * resulting ciphertext to ${out}. This implementation uses x86 AESNI
183 * instructions, and should only be used if CPUSUPPORT_X86_AESNI is defined
184 * and cpusupport_x86_aesni() returns nonzero.
187 crypto_aes_encrypt_block_aesni(const uint8_t * in
, uint8_t * out
,
190 const struct crypto_aes_key_aesni
* _key
= key
;
191 const __m128i
* aes_key
= _key
->rkeys
;
193 size_t nr
= _key
->nr
;
195 aes_state
= _mm_loadu_si128((const __m128i
*)in
);
196 aes_state
= _mm_xor_si128(aes_state
, aes_key
[0]);
197 aes_state
= _mm_aesenc_si128(aes_state
, aes_key
[1]);
198 aes_state
= _mm_aesenc_si128(aes_state
, aes_key
[2]);
199 aes_state
= _mm_aesenc_si128(aes_state
, aes_key
[3]);
200 aes_state
= _mm_aesenc_si128(aes_state
, aes_key
[4]);
201 aes_state
= _mm_aesenc_si128(aes_state
, aes_key
[5]);
202 aes_state
= _mm_aesenc_si128(aes_state
, aes_key
[6]);
203 aes_state
= _mm_aesenc_si128(aes_state
, aes_key
[7]);
204 aes_state
= _mm_aesenc_si128(aes_state
, aes_key
[8]);
205 aes_state
= _mm_aesenc_si128(aes_state
, aes_key
[9]);
207 aes_state
= _mm_aesenc_si128(aes_state
, aes_key
[10]);
208 aes_state
= _mm_aesenc_si128(aes_state
, aes_key
[11]);
211 aes_state
= _mm_aesenc_si128(aes_state
, aes_key
[12]);
212 aes_state
= _mm_aesenc_si128(aes_state
, aes_key
[13]);
216 aes_state
= _mm_aesenclast_si128(aes_state
, aes_key
[nr
]);
217 _mm_storeu_si128((__m128i
*)out
, aes_state
);
221 * crypto_aes_key_free_aesni(key):
222 * Free the expanded AES key ${key}.
225 crypto_aes_key_free_aesni(void * key
)
228 /* Behave consistently with free(NULL). */
232 /* Attempt to zero the expanded key. */
233 insecure_memzero(key
, sizeof(struct crypto_aes_key_aesni
));
239 #endif /* CPUSUPPORT_X86_AESNI */
This page took 0.028056 seconds and 4 git commands to generate.