1 /*
2  * Copyright (C) 2020 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 // Adiantum encryption mode
18 //
19 // Reference: "Adiantum: length-preserving encryption for entry-level
20 // processors" https://tosc.iacr.org/index.php/ToSC/article/view/7360
21 
22 #include <asm/byteorder.h>
23 #include <gtest/gtest.h>
24 #include <linux/types.h>
25 #include <openssl/aes.h>
26 #include <openssl/poly1305.h>
27 #include <string.h>
28 
29 #include "vts_kernel_encryption.h"
30 
31 namespace android {
32 namespace kernel {
33 
34 #define cpu_to_le32 __cpu_to_le32
35 #define cpu_to_le64 __cpu_to_le64
36 #define le32_to_cpu __le32_to_cpu
37 #define le64_to_cpu __le64_to_cpu
38 
get_unaligned_le32(const void * p)39 static uint32_t get_unaligned_le32(const void *p) {
40   __le32 x;
41 
42   memcpy(&x, p, sizeof(x));
43   return le32_to_cpu(x);
44 }
45 
put_unaligned_le32(uint32_t v,void * p)46 static void put_unaligned_le32(uint32_t v, void *p) {
47   __le32 x = cpu_to_le32(v);
48 
49   memcpy(p, &x, sizeof(x));
50 }
51 
put_unaligned_le64(uint64_t v,void * p)52 static void put_unaligned_le64(uint64_t v, void *p) {
53   __le64 x = cpu_to_le64(v);
54 
55   memcpy(p, &x, sizeof(x));
56 }
57 
round_up(unsigned int a,unsigned int b)58 static unsigned int round_up(unsigned int a, unsigned int b) {
59   return a + -a % b;
60 }
61 
rol32(uint32_t v,int n)62 static uint32_t rol32(uint32_t v, int n) { return (v << n) | (v >> (32 - n)); }
63 
le128_add(uint8_t res[16],const uint8_t a[16],const uint8_t b[16])64 static void le128_add(uint8_t res[16], const uint8_t a[16],
65                       const uint8_t b[16]) {
66   int carry = 0;
67   for (int i = 0; i < 16; i++) {
68     int sum = a[i] + b[i] + carry;
69 
70     res[i] = sum;
71     carry = sum >> 8;
72   }
73 }
74 
le128_sub(uint8_t res[16],const uint8_t a[16],const uint8_t b[16])75 static void le128_sub(uint8_t res[16], const uint8_t a[16],
76                       const uint8_t b[16]) {
77   int carry = 0;
78   for (int i = 0; i < 16; i++) {
79     int sum = a[i] - b[i] - carry;
80 
81     res[i] = sum;
82     carry = (sum < 0);
83   }
84 }
85 
86 constexpr int kChaChaKeySize = 32;
87 constexpr int kXChaChaKeySize = kChaChaKeySize;
88 constexpr int kXChaChaNonceSize = 24;
89 
ChaChaInitState(uint32_t state[16],const uint8_t key[kChaChaKeySize],const uint8_t iv[16])90 static void ChaChaInitState(uint32_t state[16],
91                             const uint8_t key[kChaChaKeySize],
92                             const uint8_t iv[16]) {
93   static const uint8_t consts[] = "expand 32-byte k";
94   int i;
95 
96   for (i = 0; i < 4; i++)
97     state[i] = get_unaligned_le32(&consts[i * sizeof(__le32)]);
98   for (i = 0; i < 8; i++)
99     state[4 + i] = get_unaligned_le32(&key[i * sizeof(__le32)]);
100   for (i = 0; i < 4; i++)
101     state[12 + i] = get_unaligned_le32(&iv[i * sizeof(__le32)]);
102 }
103 
104 #define CHACHA_QUARTERROUND(a, b, c, d) \
105   do {                                  \
106     a += b;                             \
107     d = rol32(d ^ a, 16);               \
108     c += d;                             \
109     b = rol32(b ^ c, 12);               \
110     a += b;                             \
111     d = rol32(d ^ a, 8);                \
112     c += d;                             \
113     b = rol32(b ^ c, 7);                \
114   } while (0)
115 
ChaChaPermute(uint32_t x[16],int nrounds)116 static void ChaChaPermute(uint32_t x[16], int nrounds) {
117   do {
118     // column round
119     CHACHA_QUARTERROUND(x[0], x[4], x[8], x[12]);
120     CHACHA_QUARTERROUND(x[1], x[5], x[9], x[13]);
121     CHACHA_QUARTERROUND(x[2], x[6], x[10], x[14]);
122     CHACHA_QUARTERROUND(x[3], x[7], x[11], x[15]);
123 
124     // diagonal round
125     CHACHA_QUARTERROUND(x[0], x[5], x[10], x[15]);
126     CHACHA_QUARTERROUND(x[1], x[6], x[11], x[12]);
127     CHACHA_QUARTERROUND(x[2], x[7], x[8], x[13]);
128     CHACHA_QUARTERROUND(x[3], x[4], x[9], x[14]);
129   } while ((nrounds -= 2) != 0);
130 }
131 
XChaCha(const uint8_t key[kXChaChaKeySize],const uint8_t nonce[kXChaChaNonceSize],const uint8_t * src,uint8_t * dst,int nbytes,int nrounds)132 static void XChaCha(const uint8_t key[kXChaChaKeySize],
133                     const uint8_t nonce[kXChaChaNonceSize], const uint8_t *src,
134                     uint8_t *dst, int nbytes, int nrounds) {
135   uint32_t state[16];
136   uint8_t real_key[kChaChaKeySize];
137   uint8_t real_iv[16] = {0};
138   int i, j;
139 
140   // Compute real key using original key and first 128 nonce bits
141   ChaChaInitState(state, key, nonce);
142   ChaChaPermute(state, nrounds);
143   for (i = 0; i < 8; i++)  // state words 0..3, 12..15
144     put_unaligned_le32(state[(i < 4 ? 0 : 8) + i],
145                        &real_key[i * sizeof(__le32)]);
146 
147   // Now do regular ChaCha, using real key and remaining nonce bits
148   memcpy(&real_iv[8], nonce + 16, 8);
149   ChaChaInitState(state, real_key, real_iv);
150   for (i = 0; i < nbytes; i += 64) {
151     uint32_t x[16];
152     union {
153       __le32 words[16];
154       uint8_t bytes[64];
155     } keystream;
156 
157     memcpy(x, state, 64);
158     ChaChaPermute(x, nrounds);
159     for (j = 0; j < 16; j++) keystream.words[j] = cpu_to_le32(x[j] + state[j]);
160     for (j = 0; j < std::min(nbytes - i, 64); j++)
161       dst[i + j] = src[i + j] ^ keystream.bytes[j];
162     if (++state[12] == 0) state[13]++;
163   }
164 }
165 
166 // XChaCha12 stream cipher
167 //
168 // References:
169 //   - "XChaCha: eXtended-nonce ChaCha and AEAD_XChaCha20_Poly1305"
170 //	https://tools.ietf.org/html/draft-arciszewski-xchacha-03
171 //
172 //   - "ChaCha, a variant of Salsa20"
173 //	https://cr.yp.to/chacha/chacha-20080128.pdf
174 //
175 //   - "Extending the Salsa20 nonce"
176 //	https://cr.yp.to/snuffle/xsalsa-20081128.pdf
XChaCha12(const uint8_t key[kXChaChaKeySize],const uint8_t nonce[kXChaChaNonceSize],const uint8_t * src,uint8_t * dst,int nbytes)177 static void XChaCha12(const uint8_t key[kXChaChaKeySize],
178                       const uint8_t nonce[kXChaChaNonceSize],
179                       const uint8_t *src, uint8_t *dst, int nbytes) {
180   XChaCha(key, nonce, src, dst, nbytes, 12);
181 }
182 
183 constexpr int kPoly1305BlockSize = 16;
184 constexpr int kPoly1305KeySize = 16;
185 constexpr int kPoly1305HashSize = 16;
186 
Poly1305(const uint8_t key[kPoly1305KeySize],const uint8_t * msg,int msglen,uint8_t out[kPoly1305HashSize])187 static void Poly1305(const uint8_t key[kPoly1305KeySize], const uint8_t *msg,
188                      int msglen, uint8_t out[kPoly1305HashSize]) {
189   // Adiantum wants just the Poly1305 ε-almost-∆-universal hash function, not
190   // the full MAC.  To get the correct result with BoringSSL's Poly1305 MAC
191   // implementation, leave the second half of the MAC key zeroed.  (The first
192   // half is the real Poly1305 key; the second half is the value which gets
193   // added at the end.)
194   uint8_t mac_key[2 * kPoly1305KeySize] = {0};
195 
196   memcpy(mac_key, key, kPoly1305KeySize);
197 
198   poly1305_state state;
199   CRYPTO_poly1305_init(&state, mac_key);
200   CRYPTO_poly1305_update(&state, msg, msglen);
201   CRYPTO_poly1305_finish(&state, out);
202 }
203 
204 constexpr int kNHBlockSize = 1024;
205 constexpr int kNHHashSize = 32;
206 constexpr int kNHKeySize = 1072;
207 constexpr int kNHKeyWords = kNHKeySize / sizeof(uint32_t);
208 constexpr int kNHMessageUnit = 16;
209 
NH_Add(const uint8_t * a,uint32_t b)210 static uint64_t NH_Add(const uint8_t *a, uint32_t b) {
211   return static_cast<uint32_t>(get_unaligned_le32(a) + b);
212 }
213 
NH_Pass(const uint32_t * key,const uint8_t * msg,int msglen)214 static uint64_t NH_Pass(const uint32_t *key, const uint8_t *msg, int msglen) {
215   uint64_t sum = 0;
216 
217   EXPECT_TRUE(msglen % kNHMessageUnit == 0);
218   while (msglen >= kNHMessageUnit) {
219     sum += NH_Add(msg + 0, key[0]) * NH_Add(msg + 8, key[2]);
220     sum += NH_Add(msg + 4, key[1]) * NH_Add(msg + 12, key[3]);
221     key += kNHMessageUnit / sizeof(key[0]);
222     msg += kNHMessageUnit;
223     msglen -= kNHMessageUnit;
224   }
225   return sum;
226 }
227 
228 // NH ε-almost-universal hash function
NH(const uint32_t * key,const uint8_t * msg,int msglen,uint8_t result[kNHHashSize])229 static void NH(const uint32_t *key, const uint8_t *msg, int msglen,
230                uint8_t result[kNHHashSize]) {
231   int i;
232 
233   for (i = 0; i < kNHHashSize; i += sizeof(__le64)) {
234     put_unaligned_le64(NH_Pass(key, msg, msglen), &result[i]);
235     key += kNHMessageUnit / sizeof(key[0]);
236   }
237 }
238 
239 constexpr int kAdiantumHashKeySize = (2 * kPoly1305KeySize) + kNHKeySize;
240 
241 // Adiantum's ε-almost-∆-universal hash function
AdiantumHash(const uint8_t key[kAdiantumHashKeySize],const uint8_t iv[kAdiantumIVSize],const uint8_t * msg,int msglen,uint8_t result[kPoly1305HashSize])242 static void AdiantumHash(const uint8_t key[kAdiantumHashKeySize],
243                          const uint8_t iv[kAdiantumIVSize], const uint8_t *msg,
244                          int msglen, uint8_t result[kPoly1305HashSize]) {
245   const uint8_t *header_poly_key = key;
246   const uint8_t *msg_poly_key = header_poly_key + kPoly1305KeySize;
247   const uint8_t *nh_key = msg_poly_key + kPoly1305KeySize;
248   uint32_t nh_key_words[kNHKeyWords];
249   uint8_t header[kPoly1305BlockSize + kAdiantumIVSize];
250   const int num_nh_blocks = (msglen + kNHBlockSize - 1) / kNHBlockSize;
251   std::unique_ptr<uint8_t> nh_hashes(new uint8_t[num_nh_blocks * kNHHashSize]);
252   const int padded_msglen = round_up(msglen, kNHMessageUnit);
253   std::unique_ptr<uint8_t> padded_msg(new uint8_t[padded_msglen]);
254   uint8_t hash1[kPoly1305HashSize], hash2[kPoly1305HashSize];
255   int i;
256 
257   for (i = 0; i < kNHKeyWords; i++)
258     nh_key_words[i] = get_unaligned_le32(&nh_key[i * sizeof(uint32_t)]);
259 
260   // Hash tweak and message length with first Poly1305 key
261   put_unaligned_le64(static_cast<uint64_t>(msglen) * 8, header);
262   put_unaligned_le64(0, &header[sizeof(__le64)]);
263   memcpy(&header[kPoly1305BlockSize], iv, kAdiantumIVSize);
264   Poly1305(header_poly_key, header, sizeof(header), hash1);
265 
266   // Hash NH hashes of message blocks using second Poly1305 key
267   // (using a super naive way of handling the padding)
268   memcpy(padded_msg.get(), msg, msglen);
269   memset(&padded_msg.get()[msglen], 0, padded_msglen - msglen);
270   for (i = 0; i < num_nh_blocks; i++) {
271     NH(nh_key_words, &padded_msg.get()[i * kNHBlockSize],
272        std::min(kNHBlockSize, padded_msglen - (i * kNHBlockSize)),
273        &nh_hashes.get()[i * kNHHashSize]);
274   }
275   Poly1305(msg_poly_key, nh_hashes.get(), num_nh_blocks * kNHHashSize, hash2);
276 
277   // Add the two hashes together to get the final hash
278   le128_add(result, hash1, hash2);
279 }
280 
DoEncrypt(const uint8_t key[kAdiantumKeySize],const uint8_t iv[kAdiantumIVSize],const uint8_t * src,uint8_t * dst,int nbytes) const281 bool AdiantumCipher::DoEncrypt(const uint8_t key[kAdiantumKeySize],
282                                const uint8_t iv[kAdiantumIVSize],
283                                const uint8_t *src, uint8_t *dst,
284                                int nbytes) const {
285   uint8_t rbuf[kXChaChaNonceSize] = {1};
286   uint8_t hash[kPoly1305HashSize];
287 
288   static_assert(kAdiantumKeySize == kXChaChaKeySize);
289   static_assert(kPoly1305HashSize == kAesBlockSize);
290   static_assert(kXChaChaNonceSize > kAesBlockSize);
291 
292   if (nbytes < kAesBlockSize) {
293     ADD_FAILURE() << "Bad input size";
294     return false;
295   }
296 
297   // Derive subkeys
298   uint8_t subkeys[kAes256KeySize + kAdiantumHashKeySize] = {0};
299   XChaCha12(key, rbuf, subkeys, subkeys, sizeof(subkeys));
300 
301   AES_KEY aes_key;
302   if (AES_set_encrypt_key(subkeys, kAes256KeySize * 8, &aes_key) != 0) {
303     ADD_FAILURE() << "Failed to set AES key";
304     return false;
305   }
306 
307   // Hash left part and add to right part
308   const int bulk_len = nbytes - kAesBlockSize;
309   AdiantumHash(&subkeys[kAes256KeySize], iv, src, bulk_len, hash);
310   le128_add(rbuf, &src[bulk_len], hash);
311 
312   // Encrypt right part with block cipher
313   AES_encrypt(rbuf, rbuf, &aes_key);
314 
315   // Encrypt left part with stream cipher, using the computed nonce
316   rbuf[kAesBlockSize] = 1;
317   XChaCha12(key, rbuf, src, dst, bulk_len);
318 
319   // Finalize right part by subtracting hash of left part
320   AdiantumHash(&subkeys[kAes256KeySize], iv, dst, bulk_len, hash);
321   le128_sub(&dst[bulk_len], rbuf, hash);
322   return true;
323 }
324 
325 }  // namespace kernel
326 }  // namespace android
327