1 /*
2    BLAKE2 reference source code package - optimized C implementations
3 
4    Copyright 2012, Samuel Neves <sneves@dei.uc.pt>.  You may use this under the
5    terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
6    your option.  The terms of these licenses can be found at:
7 
8    - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
9    - OpenSSL license   : https://www.openssl.org/source/license.html
10    - Apache 2.0        : http://www.apache.org/licenses/LICENSE-2.0
11 
12    More information about the BLAKE2 hash function can be found at
13    https://blake2.net.
14 */
15 #pragma once
16 #ifndef __BLAKE2B_LOAD_SSE41_H__
17 #define __BLAKE2B_LOAD_SSE41_H__
18 
19 #define LOAD_MSG_0_1(b0, b1) \
20 do \
21 { \
22 b0 = _mm_unpacklo_epi64(m0, m1); \
23 b1 = _mm_unpacklo_epi64(m2, m3); \
24 } while(0)
25 
26 
27 #define LOAD_MSG_0_2(b0, b1) \
28 do \
29 { \
30 b0 = _mm_unpackhi_epi64(m0, m1); \
31 b1 = _mm_unpackhi_epi64(m2, m3); \
32 } while(0)
33 
34 
35 #define LOAD_MSG_0_3(b0, b1) \
36 do \
37 { \
38 b0 = _mm_unpacklo_epi64(m4, m5); \
39 b1 = _mm_unpacklo_epi64(m6, m7); \
40 } while(0)
41 
42 
43 #define LOAD_MSG_0_4(b0, b1) \
44 do \
45 { \
46 b0 = _mm_unpackhi_epi64(m4, m5); \
47 b1 = _mm_unpackhi_epi64(m6, m7); \
48 } while(0)
49 
50 
51 #define LOAD_MSG_1_1(b0, b1) \
52 do \
53 { \
54 b0 = _mm_unpacklo_epi64(m7, m2); \
55 b1 = _mm_unpackhi_epi64(m4, m6); \
56 } while(0)
57 
58 
59 #define LOAD_MSG_1_2(b0, b1) \
60 do \
61 { \
62 b0 = _mm_unpacklo_epi64(m5, m4); \
63 b1 = _mm_alignr_epi8(m3, m7, 8); \
64 } while(0)
65 
66 
67 #define LOAD_MSG_1_3(b0, b1) \
68 do \
69 { \
70 b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1,0,3,2)); \
71 b1 = _mm_unpackhi_epi64(m5, m2); \
72 } while(0)
73 
74 
75 #define LOAD_MSG_1_4(b0, b1) \
76 do \
77 { \
78 b0 = _mm_unpacklo_epi64(m6, m1); \
79 b1 = _mm_unpackhi_epi64(m3, m1); \
80 } while(0)
81 
82 
83 #define LOAD_MSG_2_1(b0, b1) \
84 do \
85 { \
86 b0 = _mm_alignr_epi8(m6, m5, 8); \
87 b1 = _mm_unpackhi_epi64(m2, m7); \
88 } while(0)
89 
90 
91 #define LOAD_MSG_2_2(b0, b1) \
92 do \
93 { \
94 b0 = _mm_unpacklo_epi64(m4, m0); \
95 b1 = _mm_blend_epi16(m1, m6, 0xF0); \
96 } while(0)
97 
98 
99 #define LOAD_MSG_2_3(b0, b1) \
100 do \
101 { \
102 b0 = _mm_blend_epi16(m5, m1, 0xF0); \
103 b1 = _mm_unpackhi_epi64(m3, m4); \
104 } while(0)
105 
106 
107 #define LOAD_MSG_2_4(b0, b1) \
108 do \
109 { \
110 b0 = _mm_unpacklo_epi64(m7, m3); \
111 b1 = _mm_alignr_epi8(m2, m0, 8); \
112 } while(0)
113 
114 
115 #define LOAD_MSG_3_1(b0, b1) \
116 do \
117 { \
118 b0 = _mm_unpackhi_epi64(m3, m1); \
119 b1 = _mm_unpackhi_epi64(m6, m5); \
120 } while(0)
121 
122 
123 #define LOAD_MSG_3_2(b0, b1) \
124 do \
125 { \
126 b0 = _mm_unpackhi_epi64(m4, m0); \
127 b1 = _mm_unpacklo_epi64(m6, m7); \
128 } while(0)
129 
130 
131 #define LOAD_MSG_3_3(b0, b1) \
132 do \
133 { \
134 b0 = _mm_blend_epi16(m1, m2, 0xF0); \
135 b1 = _mm_blend_epi16(m2, m7, 0xF0); \
136 } while(0)
137 
138 
139 #define LOAD_MSG_3_4(b0, b1) \
140 do \
141 { \
142 b0 = _mm_unpacklo_epi64(m3, m5); \
143 b1 = _mm_unpacklo_epi64(m0, m4); \
144 } while(0)
145 
146 
147 #define LOAD_MSG_4_1(b0, b1) \
148 do \
149 { \
150 b0 = _mm_unpackhi_epi64(m4, m2); \
151 b1 = _mm_unpacklo_epi64(m1, m5); \
152 } while(0)
153 
154 
155 #define LOAD_MSG_4_2(b0, b1) \
156 do \
157 { \
158 b0 = _mm_blend_epi16(m0, m3, 0xF0); \
159 b1 = _mm_blend_epi16(m2, m7, 0xF0); \
160 } while(0)
161 
162 
163 #define LOAD_MSG_4_3(b0, b1) \
164 do \
165 { \
166 b0 = _mm_blend_epi16(m7, m5, 0xF0); \
167 b1 = _mm_blend_epi16(m3, m1, 0xF0); \
168 } while(0)
169 
170 
171 #define LOAD_MSG_4_4(b0, b1) \
172 do \
173 { \
174 b0 = _mm_alignr_epi8(m6, m0, 8); \
175 b1 = _mm_blend_epi16(m4, m6, 0xF0); \
176 } while(0)
177 
178 
179 #define LOAD_MSG_5_1(b0, b1) \
180 do \
181 { \
182 b0 = _mm_unpacklo_epi64(m1, m3); \
183 b1 = _mm_unpacklo_epi64(m0, m4); \
184 } while(0)
185 
186 
187 #define LOAD_MSG_5_2(b0, b1) \
188 do \
189 { \
190 b0 = _mm_unpacklo_epi64(m6, m5); \
191 b1 = _mm_unpackhi_epi64(m5, m1); \
192 } while(0)
193 
194 
195 #define LOAD_MSG_5_3(b0, b1) \
196 do \
197 { \
198 b0 = _mm_blend_epi16(m2, m3, 0xF0); \
199 b1 = _mm_unpackhi_epi64(m7, m0); \
200 } while(0)
201 
202 
203 #define LOAD_MSG_5_4(b0, b1) \
204 do \
205 { \
206 b0 = _mm_unpackhi_epi64(m6, m2); \
207 b1 = _mm_blend_epi16(m7, m4, 0xF0); \
208 } while(0)
209 
210 
211 #define LOAD_MSG_6_1(b0, b1) \
212 do \
213 { \
214 b0 = _mm_blend_epi16(m6, m0, 0xF0); \
215 b1 = _mm_unpacklo_epi64(m7, m2); \
216 } while(0)
217 
218 
219 #define LOAD_MSG_6_2(b0, b1) \
220 do \
221 { \
222 b0 = _mm_unpackhi_epi64(m2, m7); \
223 b1 = _mm_alignr_epi8(m5, m6, 8); \
224 } while(0)
225 
226 
227 #define LOAD_MSG_6_3(b0, b1) \
228 do \
229 { \
230 b0 = _mm_unpacklo_epi64(m0, m3); \
231 b1 = _mm_shuffle_epi32(m4, _MM_SHUFFLE(1,0,3,2)); \
232 } while(0)
233 
234 
235 #define LOAD_MSG_6_4(b0, b1) \
236 do \
237 { \
238 b0 = _mm_unpackhi_epi64(m3, m1); \
239 b1 = _mm_blend_epi16(m1, m5, 0xF0); \
240 } while(0)
241 
242 
243 #define LOAD_MSG_7_1(b0, b1) \
244 do \
245 { \
246 b0 = _mm_unpackhi_epi64(m6, m3); \
247 b1 = _mm_blend_epi16(m6, m1, 0xF0); \
248 } while(0)
249 
250 
251 #define LOAD_MSG_7_2(b0, b1) \
252 do \
253 { \
254 b0 = _mm_alignr_epi8(m7, m5, 8); \
255 b1 = _mm_unpackhi_epi64(m0, m4); \
256 } while(0)
257 
258 
259 #define LOAD_MSG_7_3(b0, b1) \
260 do \
261 { \
262 b0 = _mm_unpackhi_epi64(m2, m7); \
263 b1 = _mm_unpacklo_epi64(m4, m1); \
264 } while(0)
265 
266 
267 #define LOAD_MSG_7_4(b0, b1) \
268 do \
269 { \
270 b0 = _mm_unpacklo_epi64(m0, m2); \
271 b1 = _mm_unpacklo_epi64(m3, m5); \
272 } while(0)
273 
274 
275 #define LOAD_MSG_8_1(b0, b1) \
276 do \
277 { \
278 b0 = _mm_unpacklo_epi64(m3, m7); \
279 b1 = _mm_alignr_epi8(m0, m5, 8); \
280 } while(0)
281 
282 
283 #define LOAD_MSG_8_2(b0, b1) \
284 do \
285 { \
286 b0 = _mm_unpackhi_epi64(m7, m4); \
287 b1 = _mm_alignr_epi8(m4, m1, 8); \
288 } while(0)
289 
290 
291 #define LOAD_MSG_8_3(b0, b1) \
292 do \
293 { \
294 b0 = m6; \
295 b1 = _mm_alignr_epi8(m5, m0, 8); \
296 } while(0)
297 
298 
299 #define LOAD_MSG_8_4(b0, b1) \
300 do \
301 { \
302 b0 = _mm_blend_epi16(m1, m3, 0xF0); \
303 b1 = m2; \
304 } while(0)
305 
306 
307 #define LOAD_MSG_9_1(b0, b1) \
308 do \
309 { \
310 b0 = _mm_unpacklo_epi64(m5, m4); \
311 b1 = _mm_unpackhi_epi64(m3, m0); \
312 } while(0)
313 
314 
315 #define LOAD_MSG_9_2(b0, b1) \
316 do \
317 { \
318 b0 = _mm_unpacklo_epi64(m1, m2); \
319 b1 = _mm_blend_epi16(m3, m2, 0xF0); \
320 } while(0)
321 
322 
323 #define LOAD_MSG_9_3(b0, b1) \
324 do \
325 { \
326 b0 = _mm_unpackhi_epi64(m7, m4); \
327 b1 = _mm_unpackhi_epi64(m1, m6); \
328 } while(0)
329 
330 
331 #define LOAD_MSG_9_4(b0, b1) \
332 do \
333 { \
334 b0 = _mm_alignr_epi8(m7, m5, 8); \
335 b1 = _mm_unpacklo_epi64(m6, m0); \
336 } while(0)
337 
338 
339 #define LOAD_MSG_10_1(b0, b1) \
340 do \
341 { \
342 b0 = _mm_unpacklo_epi64(m0, m1); \
343 b1 = _mm_unpacklo_epi64(m2, m3); \
344 } while(0)
345 
346 
347 #define LOAD_MSG_10_2(b0, b1) \
348 do \
349 { \
350 b0 = _mm_unpackhi_epi64(m0, m1); \
351 b1 = _mm_unpackhi_epi64(m2, m3); \
352 } while(0)
353 
354 
355 #define LOAD_MSG_10_3(b0, b1) \
356 do \
357 { \
358 b0 = _mm_unpacklo_epi64(m4, m5); \
359 b1 = _mm_unpacklo_epi64(m6, m7); \
360 } while(0)
361 
362 
363 #define LOAD_MSG_10_4(b0, b1) \
364 do \
365 { \
366 b0 = _mm_unpackhi_epi64(m4, m5); \
367 b1 = _mm_unpackhi_epi64(m6, m7); \
368 } while(0)
369 
370 
371 #define LOAD_MSG_11_1(b0, b1) \
372 do \
373 { \
374 b0 = _mm_unpacklo_epi64(m7, m2); \
375 b1 = _mm_unpackhi_epi64(m4, m6); \
376 } while(0)
377 
378 
379 #define LOAD_MSG_11_2(b0, b1) \
380 do \
381 { \
382 b0 = _mm_unpacklo_epi64(m5, m4); \
383 b1 = _mm_alignr_epi8(m3, m7, 8); \
384 } while(0)
385 
386 
387 #define LOAD_MSG_11_3(b0, b1) \
388 do \
389 { \
390 b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1,0,3,2)); \
391 b1 = _mm_unpackhi_epi64(m5, m2); \
392 } while(0)
393 
394 
395 #define LOAD_MSG_11_4(b0, b1) \
396 do \
397 { \
398 b0 = _mm_unpacklo_epi64(m6, m1); \
399 b1 = _mm_unpackhi_epi64(m3, m1); \
400 } while(0)
401 
402 
403 #endif
404 
405