1 /* ====================================================================
2 * Copyright (c) 2008 The OpenSSL Project. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in
13 * the documentation and/or other materials provided with the
14 * distribution.
15 *
16 * 3. All advertising materials mentioning features or use of this
17 * software must display the following acknowledgment:
18 * "This product includes software developed by the OpenSSL Project
19 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
20 *
21 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
22 * endorse or promote products derived from this software without
23 * prior written permission. For written permission, please contact
24 * openssl-core@openssl.org.
25 *
26 * 5. Products derived from this software may not be called "OpenSSL"
27 * nor may "OpenSSL" appear in their names without prior written
28 * permission of the OpenSSL Project.
29 *
30 * 6. Redistributions of any form whatsoever must retain the following
31 * acknowledgment:
32 * "This product includes software developed by the OpenSSL Project
33 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
34 *
35 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
36 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
38 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
39 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
41 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
42 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
44 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
45 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
46 * OF THE POSSIBILITY OF SUCH DAMAGE.
47 * ==================================================================== */
48
49 #include <openssl/base.h>
50
51 #include <assert.h>
52 #include <string.h>
53
54 #include <openssl/mem.h>
55 #include <openssl/cpu.h>
56
57 #include "internal.h"
58 #include "../internal.h"
59
60
61 #if !defined(OPENSSL_NO_ASM) && \
62 (defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || \
63 defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64) || \
64 defined(OPENSSL_PPC64LE))
65 #define GHASH_ASM
66 #endif
67
68 #define PACK(s) ((size_t)(s) << (sizeof(size_t) * 8 - 16))
69 #define REDUCE1BIT(V) \
70 do { \
71 if (sizeof(size_t) == 8) { \
72 uint64_t T = UINT64_C(0xe100000000000000) & (0 - ((V).lo & 1)); \
73 (V).lo = ((V).hi << 63) | ((V).lo >> 1); \
74 (V).hi = ((V).hi >> 1) ^ T; \
75 } else { \
76 uint32_t T = 0xe1000000U & (0 - (uint32_t)((V).lo & 1)); \
77 (V).lo = ((V).hi << 63) | ((V).lo >> 1); \
78 (V).hi = ((V).hi >> 1) ^ ((uint64_t)T << 32); \
79 } \
80 } while (0)
81
82 // kSizeTWithoutLower4Bits is a mask that can be used to zero the lower four
83 // bits of a |size_t|.
84 static const size_t kSizeTWithoutLower4Bits = (size_t) -16;
85
gcm_init_4bit(u128 Htable[16],uint64_t H[2])86 static void gcm_init_4bit(u128 Htable[16], uint64_t H[2]) {
87 u128 V;
88
89 Htable[0].hi = 0;
90 Htable[0].lo = 0;
91 V.hi = H[0];
92 V.lo = H[1];
93
94 Htable[8] = V;
95 REDUCE1BIT(V);
96 Htable[4] = V;
97 REDUCE1BIT(V);
98 Htable[2] = V;
99 REDUCE1BIT(V);
100 Htable[1] = V;
101 Htable[3].hi = V.hi ^ Htable[2].hi, Htable[3].lo = V.lo ^ Htable[2].lo;
102 V = Htable[4];
103 Htable[5].hi = V.hi ^ Htable[1].hi, Htable[5].lo = V.lo ^ Htable[1].lo;
104 Htable[6].hi = V.hi ^ Htable[2].hi, Htable[6].lo = V.lo ^ Htable[2].lo;
105 Htable[7].hi = V.hi ^ Htable[3].hi, Htable[7].lo = V.lo ^ Htable[3].lo;
106 V = Htable[8];
107 Htable[9].hi = V.hi ^ Htable[1].hi, Htable[9].lo = V.lo ^ Htable[1].lo;
108 Htable[10].hi = V.hi ^ Htable[2].hi, Htable[10].lo = V.lo ^ Htable[2].lo;
109 Htable[11].hi = V.hi ^ Htable[3].hi, Htable[11].lo = V.lo ^ Htable[3].lo;
110 Htable[12].hi = V.hi ^ Htable[4].hi, Htable[12].lo = V.lo ^ Htable[4].lo;
111 Htable[13].hi = V.hi ^ Htable[5].hi, Htable[13].lo = V.lo ^ Htable[5].lo;
112 Htable[14].hi = V.hi ^ Htable[6].hi, Htable[14].lo = V.lo ^ Htable[6].lo;
113 Htable[15].hi = V.hi ^ Htable[7].hi, Htable[15].lo = V.lo ^ Htable[7].lo;
114
115 #if defined(GHASH_ASM) && defined(OPENSSL_ARM)
116 for (int j = 0; j < 16; ++j) {
117 V = Htable[j];
118 Htable[j].hi = V.lo;
119 Htable[j].lo = V.hi;
120 }
121 #endif
122 }
123
124 #if !defined(GHASH_ASM) || defined(OPENSSL_AARCH64) || defined(OPENSSL_PPC64LE)
125 static const size_t rem_4bit[16] = {
126 PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
127 PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
128 PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
129 PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0)};
130
gcm_gmult_4bit(uint64_t Xi[2],const u128 Htable[16])131 static void gcm_gmult_4bit(uint64_t Xi[2], const u128 Htable[16]) {
132 u128 Z;
133 int cnt = 15;
134 size_t rem, nlo, nhi;
135
136 nlo = ((const uint8_t *)Xi)[15];
137 nhi = nlo >> 4;
138 nlo &= 0xf;
139
140 Z.hi = Htable[nlo].hi;
141 Z.lo = Htable[nlo].lo;
142
143 while (1) {
144 rem = (size_t)Z.lo & 0xf;
145 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
146 Z.hi = (Z.hi >> 4);
147 if (sizeof(size_t) == 8) {
148 Z.hi ^= rem_4bit[rem];
149 } else {
150 Z.hi ^= (uint64_t)rem_4bit[rem] << 32;
151 }
152
153 Z.hi ^= Htable[nhi].hi;
154 Z.lo ^= Htable[nhi].lo;
155
156 if (--cnt < 0) {
157 break;
158 }
159
160 nlo = ((const uint8_t *)Xi)[cnt];
161 nhi = nlo >> 4;
162 nlo &= 0xf;
163
164 rem = (size_t)Z.lo & 0xf;
165 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
166 Z.hi = (Z.hi >> 4);
167 if (sizeof(size_t) == 8) {
168 Z.hi ^= rem_4bit[rem];
169 } else {
170 Z.hi ^= (uint64_t)rem_4bit[rem] << 32;
171 }
172
173 Z.hi ^= Htable[nlo].hi;
174 Z.lo ^= Htable[nlo].lo;
175 }
176
177 Xi[0] = CRYPTO_bswap8(Z.hi);
178 Xi[1] = CRYPTO_bswap8(Z.lo);
179 }
180
181 /* Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
182 * details... Compiler-generated code doesn't seem to give any
183 * performance improvement, at least not on x86[_64]. It's here
184 * mostly as reference and a placeholder for possible future
185 * non-trivial optimization[s]... */
gcm_ghash_4bit(uint64_t Xi[2],const u128 Htable[16],const uint8_t * inp,size_t len)186 static void gcm_ghash_4bit(uint64_t Xi[2], const u128 Htable[16],
187 const uint8_t *inp, size_t len) {
188 u128 Z;
189 int cnt;
190 size_t rem, nlo, nhi;
191
192 do {
193 cnt = 15;
194 nlo = ((const uint8_t *)Xi)[15];
195 nlo ^= inp[15];
196 nhi = nlo >> 4;
197 nlo &= 0xf;
198
199 Z.hi = Htable[nlo].hi;
200 Z.lo = Htable[nlo].lo;
201
202 while (1) {
203 rem = (size_t)Z.lo & 0xf;
204 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
205 Z.hi = (Z.hi >> 4);
206 if (sizeof(size_t) == 8) {
207 Z.hi ^= rem_4bit[rem];
208 } else {
209 Z.hi ^= (uint64_t)rem_4bit[rem] << 32;
210 }
211
212 Z.hi ^= Htable[nhi].hi;
213 Z.lo ^= Htable[nhi].lo;
214
215 if (--cnt < 0) {
216 break;
217 }
218
219 nlo = ((const uint8_t *)Xi)[cnt];
220 nlo ^= inp[cnt];
221 nhi = nlo >> 4;
222 nlo &= 0xf;
223
224 rem = (size_t)Z.lo & 0xf;
225 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
226 Z.hi = (Z.hi >> 4);
227 if (sizeof(size_t) == 8) {
228 Z.hi ^= rem_4bit[rem];
229 } else {
230 Z.hi ^= (uint64_t)rem_4bit[rem] << 32;
231 }
232
233 Z.hi ^= Htable[nlo].hi;
234 Z.lo ^= Htable[nlo].lo;
235 }
236
237 Xi[0] = CRYPTO_bswap8(Z.hi);
238 Xi[1] = CRYPTO_bswap8(Z.lo);
239 } while (inp += 16, len -= 16);
240 }
241 #else /* GHASH_ASM */
242 void gcm_gmult_4bit(uint64_t Xi[2], const u128 Htable[16]);
243 void gcm_ghash_4bit(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
244 size_t len);
245 #endif
246
247 #define GCM_MUL(ctx, Xi) gcm_gmult_4bit((ctx)->Xi.u, (ctx)->Htable)
248 #if defined(GHASH_ASM)
249 #define GHASH(ctx, in, len) gcm_ghash_4bit((ctx)->Xi.u, (ctx)->Htable, in, len)
250 /* GHASH_CHUNK is "stride parameter" missioned to mitigate cache
251 * trashing effect. In other words idea is to hash data while it's
252 * still in L1 cache after encryption pass... */
253 #define GHASH_CHUNK (3 * 1024)
254 #endif
255
256
257 #if defined(GHASH_ASM)
258
259 #if defined(OPENSSL_X86) || defined(OPENSSL_X86_64)
260 #define GCM_FUNCREF_4BIT
261 void gcm_init_clmul(u128 Htable[16], const uint64_t Xi[2]);
262 void gcm_gmult_clmul(uint64_t Xi[2], const u128 Htable[16]);
263 void gcm_ghash_clmul(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
264 size_t len);
265
266 #if defined(OPENSSL_X86_64)
267 #define GHASH_ASM_X86_64
268 void gcm_init_avx(u128 Htable[16], const uint64_t Xi[2]);
269 void gcm_gmult_avx(uint64_t Xi[2], const u128 Htable[16]);
270 void gcm_ghash_avx(uint64_t Xi[2], const u128 Htable[16], const uint8_t *in,
271 size_t len);
272 #define AESNI_GCM
aesni_gcm_enabled(GCM128_CONTEXT * ctx,ctr128_f stream)273 static int aesni_gcm_enabled(GCM128_CONTEXT *ctx, ctr128_f stream) {
274 return stream == aesni_ctr32_encrypt_blocks &&
275 ctx->ghash == gcm_ghash_avx;
276 }
277
278 size_t aesni_gcm_encrypt(const uint8_t *in, uint8_t *out, size_t len,
279 const void *key, uint8_t ivec[16], uint64_t *Xi);
280 size_t aesni_gcm_decrypt(const uint8_t *in, uint8_t *out, size_t len,
281 const void *key, uint8_t ivec[16], uint64_t *Xi);
282 #endif
283
284 #if defined(OPENSSL_X86)
285 #define GHASH_ASM_X86
286 void gcm_gmult_4bit_mmx(uint64_t Xi[2], const u128 Htable[16]);
287 void gcm_ghash_4bit_mmx(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
288 size_t len);
289 #endif
290
291 #elif defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64)
292 #include <openssl/arm_arch.h>
293 #if __ARM_ARCH__ >= 7
294 #define GHASH_ASM_ARM
295 #define GCM_FUNCREF_4BIT
296
pmull_capable(void)297 static int pmull_capable(void) {
298 return CRYPTO_is_ARMv8_PMULL_capable();
299 }
300
301 void gcm_init_v8(u128 Htable[16], const uint64_t Xi[2]);
302 void gcm_gmult_v8(uint64_t Xi[2], const u128 Htable[16]);
303 void gcm_ghash_v8(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
304 size_t len);
305
306 #if defined(OPENSSL_ARM)
307 /* 32-bit ARM also has support for doing GCM with NEON instructions. */
neon_capable(void)308 static int neon_capable(void) {
309 return CRYPTO_is_NEON_capable();
310 }
311
312 void gcm_init_neon(u128 Htable[16], const uint64_t Xi[2]);
313 void gcm_gmult_neon(uint64_t Xi[2], const u128 Htable[16]);
314 void gcm_ghash_neon(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
315 size_t len);
316 #else
317 /* AArch64 only has the ARMv8 versions of functions. */
neon_capable(void)318 static int neon_capable(void) {
319 return 0;
320 }
gcm_init_neon(u128 Htable[16],const uint64_t Xi[2])321 static void gcm_init_neon(u128 Htable[16], const uint64_t Xi[2]) {
322 abort();
323 }
gcm_gmult_neon(uint64_t Xi[2],const u128 Htable[16])324 static void gcm_gmult_neon(uint64_t Xi[2], const u128 Htable[16]) {
325 abort();
326 }
gcm_ghash_neon(uint64_t Xi[2],const u128 Htable[16],const uint8_t * inp,size_t len)327 static void gcm_ghash_neon(uint64_t Xi[2], const u128 Htable[16],
328 const uint8_t *inp, size_t len) {
329 abort();
330 }
331 #endif
332
333 #endif
334 #elif defined(OPENSSL_PPC64LE)
335 #define GHASH_ASM_PPC64LE
336 #define GCM_FUNCREF_4BIT
337 void gcm_init_p8(u128 Htable[16], const uint64_t Xi[2]);
338 void gcm_gmult_p8(uint64_t Xi[2], const u128 Htable[16]);
339 void gcm_ghash_p8(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
340 size_t len);
341 #endif
342 #endif
343
344 #ifdef GCM_FUNCREF_4BIT
345 #undef GCM_MUL
346 #define GCM_MUL(ctx, Xi) (*gcm_gmult_p)((ctx)->Xi.u, (ctx)->Htable)
347 #ifdef GHASH
348 #undef GHASH
349 #define GHASH(ctx, in, len) (*gcm_ghash_p)((ctx)->Xi.u, (ctx)->Htable, in, len)
350 #endif
351 #endif
352
CRYPTO_ghash_init(gmult_func * out_mult,ghash_func * out_hash,u128 * out_key,u128 out_table[16],const uint8_t * gcm_key)353 void CRYPTO_ghash_init(gmult_func *out_mult, ghash_func *out_hash,
354 u128 *out_key, u128 out_table[16],
355 const uint8_t *gcm_key) {
356 union {
357 uint64_t u[2];
358 uint8_t c[16];
359 } H;
360
361 OPENSSL_memcpy(H.c, gcm_key, 16);
362
363 /* H is stored in host byte order */
364 H.u[0] = CRYPTO_bswap8(H.u[0]);
365 H.u[1] = CRYPTO_bswap8(H.u[1]);
366
367 OPENSSL_memcpy(out_key, H.c, 16);
368
369 #if defined(GHASH_ASM_X86_64)
370 if (crypto_gcm_clmul_enabled()) {
371 if (((OPENSSL_ia32cap_P[1] >> 22) & 0x41) == 0x41) { /* AVX+MOVBE */
372 gcm_init_avx(out_table, H.u);
373 *out_mult = gcm_gmult_avx;
374 *out_hash = gcm_ghash_avx;
375 return;
376 }
377 gcm_init_clmul(out_table, H.u);
378 *out_mult = gcm_gmult_clmul;
379 *out_hash = gcm_ghash_clmul;
380 return;
381 }
382 #elif defined(GHASH_ASM_X86)
383 if (crypto_gcm_clmul_enabled()) {
384 gcm_init_clmul(out_table, H.u);
385 *out_mult = gcm_gmult_clmul;
386 *out_hash = gcm_ghash_clmul;
387 return;
388 }
389 #elif defined(GHASH_ASM_ARM)
390 if (pmull_capable()) {
391 gcm_init_v8(out_table, H.u);
392 *out_mult = gcm_gmult_v8;
393 *out_hash = gcm_ghash_v8;
394 return;
395 }
396
397 if (neon_capable()) {
398 gcm_init_neon(out_table, H.u);
399 *out_mult = gcm_gmult_neon;
400 *out_hash = gcm_ghash_neon;
401 return;
402 }
403 #elif defined(GHASH_ASM_PPC64LE)
404 if (CRYPTO_is_PPC64LE_vcrypto_capable()) {
405 gcm_init_p8(out_table, H.u);
406 *out_mult = gcm_gmult_p8;
407 *out_hash = gcm_ghash_p8;
408 return;
409 }
410 #endif
411
412 gcm_init_4bit(out_table, H.u);
413 #if defined(GHASH_ASM_X86)
414 *out_mult = gcm_gmult_4bit_mmx;
415 *out_hash = gcm_ghash_4bit_mmx;
416 #else
417 *out_mult = gcm_gmult_4bit;
418 *out_hash = gcm_ghash_4bit;
419 #endif
420 }
421
CRYPTO_gcm128_init(GCM128_CONTEXT * ctx,const void * aes_key,block128_f block)422 void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, const void *aes_key,
423 block128_f block) {
424 OPENSSL_memset(ctx, 0, sizeof(*ctx));
425 ctx->block = block;
426
427 uint8_t gcm_key[16];
428 OPENSSL_memset(gcm_key, 0, sizeof(gcm_key));
429 (*block)(gcm_key, gcm_key, aes_key);
430
431 CRYPTO_ghash_init(&ctx->gmult, &ctx->ghash, &ctx->H, ctx->Htable, gcm_key);
432 }
433
CRYPTO_gcm128_setiv(GCM128_CONTEXT * ctx,const void * key,const uint8_t * iv,size_t len)434 void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const void *key,
435 const uint8_t *iv, size_t len) {
436 unsigned int ctr;
437 #ifdef GCM_FUNCREF_4BIT
438 void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
439 #endif
440
441 ctx->Yi.u[0] = 0;
442 ctx->Yi.u[1] = 0;
443 ctx->Xi.u[0] = 0;
444 ctx->Xi.u[1] = 0;
445 ctx->len.u[0] = 0; /* AAD length */
446 ctx->len.u[1] = 0; /* message length */
447 ctx->ares = 0;
448 ctx->mres = 0;
449
450 if (len == 12) {
451 OPENSSL_memcpy(ctx->Yi.c, iv, 12);
452 ctx->Yi.c[15] = 1;
453 ctr = 1;
454 } else {
455 uint64_t len0 = len;
456
457 while (len >= 16) {
458 for (size_t i = 0; i < 16; ++i) {
459 ctx->Yi.c[i] ^= iv[i];
460 }
461 GCM_MUL(ctx, Yi);
462 iv += 16;
463 len -= 16;
464 }
465 if (len) {
466 for (size_t i = 0; i < len; ++i) {
467 ctx->Yi.c[i] ^= iv[i];
468 }
469 GCM_MUL(ctx, Yi);
470 }
471 len0 <<= 3;
472 ctx->Yi.u[1] ^= CRYPTO_bswap8(len0);
473
474 GCM_MUL(ctx, Yi);
475 ctr = GETU32_aligned(ctx->Yi.c + 12);
476 }
477
478 (*ctx->block)(ctx->Yi.c, ctx->EK0.c, key);
479 ++ctr;
480 PUTU32_aligned(ctx->Yi.c + 12, ctr);
481 }
482
CRYPTO_gcm128_aad(GCM128_CONTEXT * ctx,const uint8_t * aad,size_t len)483 int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const uint8_t *aad, size_t len) {
484 unsigned int n;
485 uint64_t alen = ctx->len.u[0];
486 #ifdef GCM_FUNCREF_4BIT
487 void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
488 #ifdef GHASH
489 void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
490 size_t len) = ctx->ghash;
491 #endif
492 #endif
493
494 if (ctx->len.u[1]) {
495 return 0;
496 }
497
498 alen += len;
499 if (alen > (UINT64_C(1) << 61) || (sizeof(len) == 8 && alen < len)) {
500 return 0;
501 }
502 ctx->len.u[0] = alen;
503
504 n = ctx->ares;
505 if (n) {
506 while (n && len) {
507 ctx->Xi.c[n] ^= *(aad++);
508 --len;
509 n = (n + 1) % 16;
510 }
511 if (n == 0) {
512 GCM_MUL(ctx, Xi);
513 } else {
514 ctx->ares = n;
515 return 1;
516 }
517 }
518
519 /* Process a whole number of blocks. */
520 #ifdef GHASH
521 size_t len_blocks = len & kSizeTWithoutLower4Bits;
522 if (len_blocks != 0) {
523 GHASH(ctx, aad, len_blocks);
524 aad += len_blocks;
525 len -= len_blocks;
526 }
527 #else
528 while (len >= 16) {
529 for (size_t i = 0; i < 16; ++i) {
530 ctx->Xi.c[i] ^= aad[i];
531 }
532 GCM_MUL(ctx, Xi);
533 aad += 16;
534 len -= 16;
535 }
536 #endif
537
538 /* Process the remainder. */
539 if (len != 0) {
540 n = (unsigned int)len;
541 for (size_t i = 0; i < len; ++i) {
542 ctx->Xi.c[i] ^= aad[i];
543 }
544 }
545
546 ctx->ares = n;
547 return 1;
548 }
549
CRYPTO_gcm128_encrypt(GCM128_CONTEXT * ctx,const void * key,const unsigned char * in,unsigned char * out,size_t len)550 int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx, const void *key,
551 const unsigned char *in, unsigned char *out,
552 size_t len) {
553 unsigned int n, ctr;
554 uint64_t mlen = ctx->len.u[1];
555 block128_f block = ctx->block;
556 #ifdef GCM_FUNCREF_4BIT
557 void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
558 #ifdef GHASH
559 void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
560 size_t len) = ctx->ghash;
561 #endif
562 #endif
563
564 mlen += len;
565 if (mlen > ((UINT64_C(1) << 36) - 32) ||
566 (sizeof(len) == 8 && mlen < len)) {
567 return 0;
568 }
569 ctx->len.u[1] = mlen;
570
571 if (ctx->ares) {
572 /* First call to encrypt finalizes GHASH(AAD) */
573 GCM_MUL(ctx, Xi);
574 ctx->ares = 0;
575 }
576
577 ctr = GETU32_aligned(ctx->Yi.c + 12);
578
579 n = ctx->mres;
580 if (n) {
581 while (n && len) {
582 ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
583 --len;
584 n = (n + 1) % 16;
585 }
586 if (n == 0) {
587 GCM_MUL(ctx, Xi);
588 } else {
589 ctx->mres = n;
590 return 1;
591 }
592 }
593 if (STRICT_ALIGNMENT && ((size_t)in | (size_t)out) % sizeof(size_t) != 0) {
594 for (size_t i = 0; i < len; ++i) {
595 if (n == 0) {
596 (*block)(ctx->Yi.c, ctx->EKi.c, key);
597 ++ctr;
598 PUTU32_aligned(ctx->Yi.c + 12, ctr);
599 }
600 ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n];
601 n = (n + 1) % 16;
602 if (n == 0) {
603 GCM_MUL(ctx, Xi);
604 }
605 }
606
607 ctx->mres = n;
608 return 1;
609 }
610 #if defined(GHASH) && defined(GHASH_CHUNK)
611 while (len >= GHASH_CHUNK) {
612 size_t j = GHASH_CHUNK;
613
614 while (j) {
615 size_t *out_t = (size_t *)out;
616 const size_t *in_t = (const size_t *)in;
617
618 (*block)(ctx->Yi.c, ctx->EKi.c, key);
619 ++ctr;
620 PUTU32_aligned(ctx->Yi.c + 12, ctr);
621 for (size_t i = 0; i < 16 / sizeof(size_t); ++i) {
622 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
623 }
624 out += 16;
625 in += 16;
626 j -= 16;
627 }
628 GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK);
629 len -= GHASH_CHUNK;
630 }
631 size_t len_blocks = len & kSizeTWithoutLower4Bits;
632 if (len_blocks != 0) {
633 while (len >= 16) {
634 size_t *out_t = (size_t *)out;
635 const size_t *in_t = (const size_t *)in;
636
637 (*block)(ctx->Yi.c, ctx->EKi.c, key);
638 ++ctr;
639 PUTU32_aligned(ctx->Yi.c + 12, ctr);
640 for (size_t i = 0; i < 16 / sizeof(size_t); ++i) {
641 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
642 }
643 out += 16;
644 in += 16;
645 len -= 16;
646 }
647 GHASH(ctx, out - len_blocks, len_blocks);
648 }
649 #else
650 while (len >= 16) {
651 size_t *out_t = (size_t *)out;
652 const size_t *in_t = (const size_t *)in;
653
654 (*block)(ctx->Yi.c, ctx->EKi.c, key);
655 ++ctr;
656 PUTU32_aligned(ctx->Yi.c + 12, ctr);
657 for (size_t i = 0; i < 16 / sizeof(size_t); ++i) {
658 ctx->Xi.t[i] ^= out_t[i] = in_t[i] ^ ctx->EKi.t[i];
659 }
660 GCM_MUL(ctx, Xi);
661 out += 16;
662 in += 16;
663 len -= 16;
664 }
665 #endif
666 if (len) {
667 (*block)(ctx->Yi.c, ctx->EKi.c, key);
668 ++ctr;
669 PUTU32_aligned(ctx->Yi.c + 12, ctr);
670 while (len--) {
671 ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
672 ++n;
673 }
674 }
675
676 ctx->mres = n;
677 return 1;
678 }
679
CRYPTO_gcm128_decrypt(GCM128_CONTEXT * ctx,const void * key,const unsigned char * in,unsigned char * out,size_t len)680 int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, const void *key,
681 const unsigned char *in, unsigned char *out,
682 size_t len) {
683 unsigned int n, ctr;
684 uint64_t mlen = ctx->len.u[1];
685 block128_f block = ctx->block;
686 #ifdef GCM_FUNCREF_4BIT
687 void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
688 #ifdef GHASH
689 void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
690 size_t len) = ctx->ghash;
691 #endif
692 #endif
693
694 mlen += len;
695 if (mlen > ((UINT64_C(1) << 36) - 32) ||
696 (sizeof(len) == 8 && mlen < len)) {
697 return 0;
698 }
699 ctx->len.u[1] = mlen;
700
701 if (ctx->ares) {
702 /* First call to decrypt finalizes GHASH(AAD) */
703 GCM_MUL(ctx, Xi);
704 ctx->ares = 0;
705 }
706
707 ctr = GETU32_aligned(ctx->Yi.c + 12);
708
709 n = ctx->mres;
710 if (n) {
711 while (n && len) {
712 uint8_t c = *(in++);
713 *(out++) = c ^ ctx->EKi.c[n];
714 ctx->Xi.c[n] ^= c;
715 --len;
716 n = (n + 1) % 16;
717 }
718 if (n == 0) {
719 GCM_MUL(ctx, Xi);
720 } else {
721 ctx->mres = n;
722 return 1;
723 }
724 }
725 if (STRICT_ALIGNMENT && ((size_t)in | (size_t)out) % sizeof(size_t) != 0) {
726 for (size_t i = 0; i < len; ++i) {
727 uint8_t c;
728 if (n == 0) {
729 (*block)(ctx->Yi.c, ctx->EKi.c, key);
730 ++ctr;
731 PUTU32_aligned(ctx->Yi.c + 12, ctr);
732 }
733 c = in[i];
734 out[i] = c ^ ctx->EKi.c[n];
735 ctx->Xi.c[n] ^= c;
736 n = (n + 1) % 16;
737 if (n == 0) {
738 GCM_MUL(ctx, Xi);
739 }
740 }
741
742 ctx->mres = n;
743 return 1;
744 }
745 #if defined(GHASH) && defined(GHASH_CHUNK)
746 while (len >= GHASH_CHUNK) {
747 size_t j = GHASH_CHUNK;
748
749 GHASH(ctx, in, GHASH_CHUNK);
750 while (j) {
751 size_t *out_t = (size_t *)out;
752 const size_t *in_t = (const size_t *)in;
753
754 (*block)(ctx->Yi.c, ctx->EKi.c, key);
755 ++ctr;
756 PUTU32_aligned(ctx->Yi.c + 12, ctr);
757 for (size_t i = 0; i < 16 / sizeof(size_t); ++i) {
758 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
759 }
760 out += 16;
761 in += 16;
762 j -= 16;
763 }
764 len -= GHASH_CHUNK;
765 }
766 size_t len_blocks = len & kSizeTWithoutLower4Bits;
767 if (len_blocks != 0) {
768 GHASH(ctx, in, len_blocks);
769 while (len >= 16) {
770 size_t *out_t = (size_t *)out;
771 const size_t *in_t = (const size_t *)in;
772
773 (*block)(ctx->Yi.c, ctx->EKi.c, key);
774 ++ctr;
775 PUTU32_aligned(ctx->Yi.c + 12, ctr);
776 for (size_t i = 0; i < 16 / sizeof(size_t); ++i) {
777 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
778 }
779 out += 16;
780 in += 16;
781 len -= 16;
782 }
783 }
784 #else
785 while (len >= 16) {
786 size_t *out_t = (size_t *)out;
787 const size_t *in_t = (const size_t *)in;
788
789 (*block)(ctx->Yi.c, ctx->EKi.c, key);
790 ++ctr;
791 PUTU32_aligned(ctx->Yi.c + 12, ctr);
792 for (size_t i = 0; i < 16 / sizeof(size_t); ++i) {
793 size_t c = in_t[i];
794 out_t[i] = c ^ ctx->EKi.t[i];
795 ctx->Xi.t[i] ^= c;
796 }
797 GCM_MUL(ctx, Xi);
798 out += 16;
799 in += 16;
800 len -= 16;
801 }
802 #endif
803 if (len) {
804 (*block)(ctx->Yi.c, ctx->EKi.c, key);
805 ++ctr;
806 PUTU32_aligned(ctx->Yi.c + 12, ctr);
807 while (len--) {
808 uint8_t c = in[n];
809 ctx->Xi.c[n] ^= c;
810 out[n] = c ^ ctx->EKi.c[n];
811 ++n;
812 }
813 }
814
815 ctx->mres = n;
816 return 1;
817 }
818
CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT * ctx,const void * key,const uint8_t * in,uint8_t * out,size_t len,ctr128_f stream)819 int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx, const void *key,
820 const uint8_t *in, uint8_t *out, size_t len,
821 ctr128_f stream) {
822 unsigned int n, ctr;
823 uint64_t mlen = ctx->len.u[1];
824 #ifdef GCM_FUNCREF_4BIT
825 void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
826 #ifdef GHASH
827 void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
828 size_t len) = ctx->ghash;
829 #endif
830 #endif
831
832 mlen += len;
833 if (mlen > ((UINT64_C(1) << 36) - 32) ||
834 (sizeof(len) == 8 && mlen < len)) {
835 return 0;
836 }
837 ctx->len.u[1] = mlen;
838
839 if (ctx->ares) {
840 /* First call to encrypt finalizes GHASH(AAD) */
841 GCM_MUL(ctx, Xi);
842 ctx->ares = 0;
843 }
844
845 n = ctx->mres;
846 if (n) {
847 while (n && len) {
848 ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
849 --len;
850 n = (n + 1) % 16;
851 }
852 if (n == 0) {
853 GCM_MUL(ctx, Xi);
854 } else {
855 ctx->mres = n;
856 return 1;
857 }
858 }
859
860 #if defined(AESNI_GCM)
861 if (aesni_gcm_enabled(ctx, stream)) {
862 /* |aesni_gcm_encrypt| may not process all the input given to it. It may
863 * not process *any* of its input if it is deemed too small. */
864 size_t bulk = aesni_gcm_encrypt(in, out, len, key, ctx->Yi.c, ctx->Xi.u);
865 in += bulk;
866 out += bulk;
867 len -= bulk;
868 }
869 #endif
870
871 ctr = GETU32_aligned(ctx->Yi.c + 12);
872
873 #if defined(GHASH)
874 while (len >= GHASH_CHUNK) {
875 (*stream)(in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
876 ctr += GHASH_CHUNK / 16;
877 PUTU32_aligned(ctx->Yi.c + 12, ctr);
878 GHASH(ctx, out, GHASH_CHUNK);
879 out += GHASH_CHUNK;
880 in += GHASH_CHUNK;
881 len -= GHASH_CHUNK;
882 }
883 #endif
884 size_t i = len & kSizeTWithoutLower4Bits;
885 if (i != 0) {
886 size_t j = i / 16;
887
888 (*stream)(in, out, j, key, ctx->Yi.c);
889 ctr += (unsigned int)j;
890 PUTU32_aligned(ctx->Yi.c + 12, ctr);
891 in += i;
892 len -= i;
893 #if defined(GHASH)
894 GHASH(ctx, out, i);
895 out += i;
896 #else
897 while (j--) {
898 for (i = 0; i < 16; ++i) {
899 ctx->Xi.c[i] ^= out[i];
900 }
901 GCM_MUL(ctx, Xi);
902 out += 16;
903 }
904 #endif
905 }
906 if (len) {
907 (*ctx->block)(ctx->Yi.c, ctx->EKi.c, key);
908 ++ctr;
909 PUTU32_aligned(ctx->Yi.c + 12, ctr);
910 while (len--) {
911 ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
912 ++n;
913 }
914 }
915
916 ctx->mres = n;
917 return 1;
918 }
919
CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT * ctx,const void * key,const uint8_t * in,uint8_t * out,size_t len,ctr128_f stream)920 int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx, const void *key,
921 const uint8_t *in, uint8_t *out, size_t len,
922 ctr128_f stream) {
923 unsigned int n, ctr;
924 uint64_t mlen = ctx->len.u[1];
925 #ifdef GCM_FUNCREF_4BIT
926 void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
927 #ifdef GHASH
928 void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
929 size_t len) = ctx->ghash;
930 #endif
931 #endif
932
933 mlen += len;
934 if (mlen > ((UINT64_C(1) << 36) - 32) ||
935 (sizeof(len) == 8 && mlen < len)) {
936 return 0;
937 }
938 ctx->len.u[1] = mlen;
939
940 if (ctx->ares) {
941 /* First call to decrypt finalizes GHASH(AAD) */
942 GCM_MUL(ctx, Xi);
943 ctx->ares = 0;
944 }
945
946 n = ctx->mres;
947 if (n) {
948 while (n && len) {
949 uint8_t c = *(in++);
950 *(out++) = c ^ ctx->EKi.c[n];
951 ctx->Xi.c[n] ^= c;
952 --len;
953 n = (n + 1) % 16;
954 }
955 if (n == 0) {
956 GCM_MUL(ctx, Xi);
957 } else {
958 ctx->mres = n;
959 return 1;
960 }
961 }
962
963 #if defined(AESNI_GCM)
964 if (aesni_gcm_enabled(ctx, stream)) {
965 /* |aesni_gcm_decrypt| may not process all the input given to it. It may
966 * not process *any* of its input if it is deemed too small. */
967 size_t bulk = aesni_gcm_decrypt(in, out, len, key, ctx->Yi.c, ctx->Xi.u);
968 in += bulk;
969 out += bulk;
970 len -= bulk;
971 }
972 #endif
973
974 ctr = GETU32_aligned(ctx->Yi.c + 12);
975
976 #if defined(GHASH)
977 while (len >= GHASH_CHUNK) {
978 GHASH(ctx, in, GHASH_CHUNK);
979 (*stream)(in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
980 ctr += GHASH_CHUNK / 16;
981 PUTU32_aligned(ctx->Yi.c + 12, ctr);
982 out += GHASH_CHUNK;
983 in += GHASH_CHUNK;
984 len -= GHASH_CHUNK;
985 }
986 #endif
987 size_t i = len & kSizeTWithoutLower4Bits;
988 if (i != 0) {
989 size_t j = i / 16;
990
991 #if defined(GHASH)
992 GHASH(ctx, in, i);
993 #else
994 while (j--) {
995 size_t k;
996 for (k = 0; k < 16; ++k) {
997 ctx->Xi.c[k] ^= in[k];
998 }
999 GCM_MUL(ctx, Xi);
1000 in += 16;
1001 }
1002 j = i / 16;
1003 in -= i;
1004 #endif
1005 (*stream)(in, out, j, key, ctx->Yi.c);
1006 ctr += (unsigned int)j;
1007 PUTU32_aligned(ctx->Yi.c + 12, ctr);
1008 out += i;
1009 in += i;
1010 len -= i;
1011 }
1012 if (len) {
1013 (*ctx->block)(ctx->Yi.c, ctx->EKi.c, key);
1014 ++ctr;
1015 PUTU32_aligned(ctx->Yi.c + 12, ctr);
1016 while (len--) {
1017 uint8_t c = in[n];
1018 ctx->Xi.c[n] ^= c;
1019 out[n] = c ^ ctx->EKi.c[n];
1020 ++n;
1021 }
1022 }
1023
1024 ctx->mres = n;
1025 return 1;
1026 }
1027
CRYPTO_gcm128_finish(GCM128_CONTEXT * ctx,const uint8_t * tag,size_t len)1028 int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const uint8_t *tag, size_t len) {
1029 uint64_t alen = ctx->len.u[0] << 3;
1030 uint64_t clen = ctx->len.u[1] << 3;
1031 #ifdef GCM_FUNCREF_4BIT
1032 void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
1033 #endif
1034
1035 if (ctx->mres || ctx->ares) {
1036 GCM_MUL(ctx, Xi);
1037 }
1038
1039 alen = CRYPTO_bswap8(alen);
1040 clen = CRYPTO_bswap8(clen);
1041
1042 ctx->Xi.u[0] ^= alen;
1043 ctx->Xi.u[1] ^= clen;
1044 GCM_MUL(ctx, Xi);
1045
1046 ctx->Xi.u[0] ^= ctx->EK0.u[0];
1047 ctx->Xi.u[1] ^= ctx->EK0.u[1];
1048
1049 if (tag && len <= sizeof(ctx->Xi)) {
1050 return CRYPTO_memcmp(ctx->Xi.c, tag, len) == 0;
1051 } else {
1052 return 0;
1053 }
1054 }
1055
CRYPTO_gcm128_tag(GCM128_CONTEXT * ctx,unsigned char * tag,size_t len)1056 void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len) {
1057 CRYPTO_gcm128_finish(ctx, NULL, 0);
1058 OPENSSL_memcpy(tag, ctx->Xi.c,
1059 len <= sizeof(ctx->Xi.c) ? len : sizeof(ctx->Xi.c));
1060 }
1061
1062 #if defined(OPENSSL_X86) || defined(OPENSSL_X86_64)
crypto_gcm_clmul_enabled(void)1063 int crypto_gcm_clmul_enabled(void) {
1064 #ifdef GHASH_ASM
1065 return OPENSSL_ia32cap_P[0] & (1 << 24) && /* check FXSR bit */
1066 OPENSSL_ia32cap_P[1] & (1 << 1); /* check PCLMULQDQ bit */
1067 #else
1068 return 0;
1069 #endif
1070 }
1071 #endif
1072