]>
iEval git - authen-passphrase-scrypt.git/blob - scrypt-1.2.1/lib/crypto/crypto_scrypt_smix_sse2.c
2 * Copyright 2009 Colin Percival
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * This file was originally written by Colin Percival as part of the Tarsnap
27 * online backup system.
29 #include "cpusupport.h"
30 #ifdef CPUSUPPORT_X86_SSE2
32 #include <emmintrin.h>
35 #include "sysendian.h"
37 #include "crypto_scrypt_smix_sse2.h"
39 static void blkcpy(void *, const void *, size_t);
40 static void blkxor(void *, const void *, size_t);
41 static void salsa20_8(__m128i
*);
42 static void blockmix_salsa8(const __m128i
*, __m128i
*, __m128i
*, size_t);
43 static uint64_t integerify(const void *, size_t);
46 blkcpy(void * dest
, const void * src
, size_t len
)
49 const __m128i
* S
= src
;
53 for (i
= 0; i
< L
; i
++)
58 blkxor(void * dest
, const void * src
, size_t len
)
61 const __m128i
* S
= src
;
65 for (i
= 0; i
< L
; i
++)
66 D
[i
] = _mm_xor_si128(D
[i
], S
[i
]);
71 * Apply the salsa20/8 core to the provided block.
74 salsa20_8(__m128i B
[4])
76 __m128i X0
, X1
, X2
, X3
;
85 for (i
= 0; i
< 8; i
+= 2) {
86 /* Operate on "columns". */
87 T
= _mm_add_epi32(X0
, X3
);
88 X1
= _mm_xor_si128(X1
, _mm_slli_epi32(T
, 7));
89 X1
= _mm_xor_si128(X1
, _mm_srli_epi32(T
, 25));
90 T
= _mm_add_epi32(X1
, X0
);
91 X2
= _mm_xor_si128(X2
, _mm_slli_epi32(T
, 9));
92 X2
= _mm_xor_si128(X2
, _mm_srli_epi32(T
, 23));
93 T
= _mm_add_epi32(X2
, X1
);
94 X3
= _mm_xor_si128(X3
, _mm_slli_epi32(T
, 13));
95 X3
= _mm_xor_si128(X3
, _mm_srli_epi32(T
, 19));
96 T
= _mm_add_epi32(X3
, X2
);
97 X0
= _mm_xor_si128(X0
, _mm_slli_epi32(T
, 18));
98 X0
= _mm_xor_si128(X0
, _mm_srli_epi32(T
, 14));
100 /* Rearrange data. */
101 X1
= _mm_shuffle_epi32(X1
, 0x93);
102 X2
= _mm_shuffle_epi32(X2
, 0x4E);
103 X3
= _mm_shuffle_epi32(X3
, 0x39);
105 /* Operate on "rows". */
106 T
= _mm_add_epi32(X0
, X1
);
107 X3
= _mm_xor_si128(X3
, _mm_slli_epi32(T
, 7));
108 X3
= _mm_xor_si128(X3
, _mm_srli_epi32(T
, 25));
109 T
= _mm_add_epi32(X3
, X0
);
110 X2
= _mm_xor_si128(X2
, _mm_slli_epi32(T
, 9));
111 X2
= _mm_xor_si128(X2
, _mm_srli_epi32(T
, 23));
112 T
= _mm_add_epi32(X2
, X3
);
113 X1
= _mm_xor_si128(X1
, _mm_slli_epi32(T
, 13));
114 X1
= _mm_xor_si128(X1
, _mm_srli_epi32(T
, 19));
115 T
= _mm_add_epi32(X1
, X2
);
116 X0
= _mm_xor_si128(X0
, _mm_slli_epi32(T
, 18));
117 X0
= _mm_xor_si128(X0
, _mm_srli_epi32(T
, 14));
119 /* Rearrange data. */
120 X1
= _mm_shuffle_epi32(X1
, 0x39);
121 X2
= _mm_shuffle_epi32(X2
, 0x4E);
122 X3
= _mm_shuffle_epi32(X3
, 0x93);
125 B
[0] = _mm_add_epi32(B
[0], X0
);
126 B
[1] = _mm_add_epi32(B
[1], X1
);
127 B
[2] = _mm_add_epi32(B
[2], X2
);
128 B
[3] = _mm_add_epi32(B
[3], X3
);
132 * blockmix_salsa8(Bin, Bout, X, r):
133 * Compute Bout = BlockMix_{salsa20/8, r}(Bin). The input Bin must be 128r
134 * bytes in length; the output Bout must also be the same size. The
135 * temporary space X must be 64 bytes.
138 blockmix_salsa8(const __m128i
* Bin
, __m128i
* Bout
, __m128i
* X
, size_t r
)
142 /* 1: X <-- B_{2r - 1} */
143 blkcpy(X
, &Bin
[8 * r
- 4], 64);
145 /* 2: for i = 0 to 2r - 1 do */
146 for (i
= 0; i
< r
; i
++) {
147 /* 3: X <-- H(X \xor B_i) */
148 blkxor(X
, &Bin
[i
* 8], 64);
152 /* 6: B' <-- (Y_0, Y_2 ... Y_{2r-2}, Y_1, Y_3 ... Y_{2r-1}) */
153 blkcpy(&Bout
[i
* 4], X
, 64);
155 /* 3: X <-- H(X \xor B_i) */
156 blkxor(X
, &Bin
[i
* 8 + 4], 64);
160 /* 6: B' <-- (Y_0, Y_2 ... Y_{2r-2}, Y_1, Y_3 ... Y_{2r-1}) */
161 blkcpy(&Bout
[(r
+ i
) * 4], X
, 64);
167 * Return the result of parsing B_{2r-1} as a little-endian integer.
168 * Note that B's layout is permuted compared to the generic implementation.
171 integerify(const void * B
, size_t r
)
173 const uint32_t * X
= (const void *)((uintptr_t)(B
) + (2 * r
- 1) * 64);
175 return (((uint64_t)(X
[13]) << 32) + X
[0]);
179 * crypto_scrypt_smix_sse2(B, r, N, V, XY):
180 * Compute B = SMix_r(B, N). The input B must be 128r bytes in length;
181 * the temporary storage V must be 128rN bytes in length; the temporary
182 * storage XY must be 256r + 64 bytes in length. The value N must be a
183 * power of 2 greater than 1. The arrays B, V, and XY must be aligned to a
184 * multiple of 64 bytes.
186 * Use SSE2 instructions.
189 crypto_scrypt_smix_sse2(uint8_t * B
, size_t r
, uint64_t N
, void * V
, void * XY
)
192 __m128i
* Y
= (void *)((uintptr_t)(XY
) + 128 * r
);
193 __m128i
* Z
= (void *)((uintptr_t)(XY
) + 256 * r
);
194 uint32_t * X32
= (void *)X
;
199 for (k
= 0; k
< 2 * r
; k
++) {
200 for (i
= 0; i
< 16; i
++) {
202 le32dec(&B
[(k
* 16 + (i
* 5 % 16)) * 4]);
206 /* 2: for i = 0 to N - 1 do */
207 for (i
= 0; i
< N
; i
+= 2) {
209 blkcpy((void *)((uintptr_t)(V
) + i
* 128 * r
), X
, 128 * r
);
212 blockmix_salsa8(X
, Y
, Z
, r
);
215 blkcpy((void *)((uintptr_t)(V
) + (i
+ 1) * 128 * r
),
219 blockmix_salsa8(Y
, X
, Z
, r
);
222 /* 6: for i = 0 to N - 1 do */
223 for (i
= 0; i
< N
; i
+= 2) {
224 /* 7: j <-- Integerify(X) mod N */
225 j
= integerify(X
, r
) & (N
- 1);
227 /* 8: X <-- H(X \xor V_j) */
228 blkxor(X
, (void *)((uintptr_t)(V
) + j
* 128 * r
), 128 * r
);
229 blockmix_salsa8(X
, Y
, Z
, r
);
231 /* 7: j <-- Integerify(X) mod N */
232 j
= integerify(Y
, r
) & (N
- 1);
234 /* 8: X <-- H(X \xor V_j) */
235 blkxor(Y
, (void *)((uintptr_t)(V
) + j
* 128 * r
), 128 * r
);
236 blockmix_salsa8(Y
, X
, Z
, r
);
240 for (k
= 0; k
< 2 * r
; k
++) {
241 for (i
= 0; i
< 16; i
++) {
242 le32enc(&B
[(k
* 16 + (i
* 5 % 16)) * 4],
248 #endif /* CPUSUPPORT_X86_SSE2 */
This page took 0.062962 seconds and 5 git commands to generate.