1 /*
2  ---------------------------------------------------------------------------
3  Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved.
4 
5  LICENSE TERMS
6 
7  The redistribution and use of this software (with or without changes)
8  is allowed without the payment of fees or royalties provided that:
9 
10   1. source code distributions include the above copyright notice, this
11      list of conditions and the following disclaimer;
12 
13   2. binary distributions include the above copyright notice, this list
14      of conditions and the following disclaimer in their documentation;
15 
16   3. the name of the copyright holder is not used to endorse products
17      built using this software without specific written permission.
18 
19  DISCLAIMER
20 
21  This software is provided 'as is' with no explicit or implied warranties
22  in respect of its properties, including, but not limited to, correctness
23  and/or fitness for purpose.
24  ---------------------------------------------------------------------------
25  Issue 09/09/2006
26 
27  This is an AES implementation that uses only 8-bit byte operations on the
28  cipher state (there are options to use 32-bit types if available).
29 
30  The combination of mix columns and byte substitution used here is based on
31  that developed by Karl Malbrain. His contribution is acknowledged.
32  */
33 
34 /* define if you have a fast memcpy function on your system */
35 #if 1
36 #define HAVE_MEMCPY
37 #include <string.h>
38 #if 0
39 #if defined(_MSC_VER)
40 #include <intrin.h>
41 #pragma intrinsic(memcpy)
42 #endif
43 #endif
44 #endif
45 
46 #include <stdlib.h>
47 
48 /* add the target configuration to allow using internal data types and
49  * compilation options */
50 #include "bt_target.h"
51 
52 /* define if you have fast 32-bit types on your system */
53 #if 1
54 #define HAVE_UINT_32T
55 #endif
56 
57 /* define if you don't want any tables */
58 #if 1
59 #define USE_TABLES
60 #endif
61 
62 /*  On Intel Core 2 duo VERSION_1 is faster */
63 
64 /* alternative versions (test for performance on your system) */
65 #if 1
66 #define VERSION_1
67 #endif
68 
69 #include "aes.h"
70 
71 #if defined(HAVE_UINT_32T)
72 typedef uint32_t uint_32t;
73 #endif
74 
75 /* functions for finite field multiplication in the AES Galois field    */
76 
77 #define WPOLY 0x011b
78 #define BPOLY 0x1b
79 #define DPOLY 0x008d
80 
81 #define f1(x) (x)
82 #define f2(x) (((x) << 1) ^ ((((x) >> 7) & 1) * WPOLY))
83 #define f4(x) \
84   (((x) << 2) ^ ((((x) >> 6) & 1) * WPOLY) ^ ((((x) >> 6) & 2) * WPOLY))
85 #define f8(x)                                                             \
86   (((x) << 3) ^ ((((x) >> 5) & 1) * WPOLY) ^ ((((x) >> 5) & 2) * WPOLY) ^ \
87    ((((x) >> 5) & 4) * WPOLY))
88 #define d2(x) (((x) >> 1) ^ ((x)&1 ? DPOLY : 0))
89 
90 #define f3(x) (f2(x) ^ (x))
91 #define f9(x) (f8(x) ^ (x))
92 #define fb(x) (f8(x) ^ f2(x) ^ (x))
93 #define fd(x) (f8(x) ^ f4(x) ^ (x))
94 #define fe(x) (f8(x) ^ f4(x) ^ f2(x))
95 
96 #if defined(USE_TABLES)
97 
98 #define sb_data(w)                                                          \
99   { /* S Box data values */                                                 \
100     w(0x63), w(0x7c), w(0x77), w(0x7b), w(0xf2), w(0x6b), w(0x6f), w(0xc5), \
101         w(0x30), w(0x01), w(0x67), w(0x2b), w(0xfe), w(0xd7), w(0xab),      \
102         w(0x76), w(0xca), w(0x82), w(0xc9), w(0x7d), w(0xfa), w(0x59),      \
103         w(0x47), w(0xf0), w(0xad), w(0xd4), w(0xa2), w(0xaf), w(0x9c),      \
104         w(0xa4), w(0x72), w(0xc0), w(0xb7), w(0xfd), w(0x93), w(0x26),      \
105         w(0x36), w(0x3f), w(0xf7), w(0xcc), w(0x34), w(0xa5), w(0xe5),      \
106         w(0xf1), w(0x71), w(0xd8), w(0x31), w(0x15), w(0x04), w(0xc7),      \
107         w(0x23), w(0xc3), w(0x18), w(0x96), w(0x05), w(0x9a), w(0x07),      \
108         w(0x12), w(0x80), w(0xe2), w(0xeb), w(0x27), w(0xb2), w(0x75),      \
109         w(0x09), w(0x83), w(0x2c), w(0x1a), w(0x1b), w(0x6e), w(0x5a),      \
110         w(0xa0), w(0x52), w(0x3b), w(0xd6), w(0xb3), w(0x29), w(0xe3),      \
111         w(0x2f), w(0x84), w(0x53), w(0xd1), w(0x00), w(0xed), w(0x20),      \
112         w(0xfc), w(0xb1), w(0x5b), w(0x6a), w(0xcb), w(0xbe), w(0x39),      \
113         w(0x4a), w(0x4c), w(0x58), w(0xcf), w(0xd0), w(0xef), w(0xaa),      \
114         w(0xfb), w(0x43), w(0x4d), w(0x33), w(0x85), w(0x45), w(0xf9),      \
115         w(0x02), w(0x7f), w(0x50), w(0x3c), w(0x9f), w(0xa8), w(0x51),      \
116         w(0xa3), w(0x40), w(0x8f), w(0x92), w(0x9d), w(0x38), w(0xf5),      \
117         w(0xbc), w(0xb6), w(0xda), w(0x21), w(0x10), w(0xff), w(0xf3),      \
118         w(0xd2), w(0xcd), w(0x0c), w(0x13), w(0xec), w(0x5f), w(0x97),      \
119         w(0x44), w(0x17), w(0xc4), w(0xa7), w(0x7e), w(0x3d), w(0x64),      \
120         w(0x5d), w(0x19), w(0x73), w(0x60), w(0x81), w(0x4f), w(0xdc),      \
121         w(0x22), w(0x2a), w(0x90), w(0x88), w(0x46), w(0xee), w(0xb8),      \
122         w(0x14), w(0xde), w(0x5e), w(0x0b), w(0xdb), w(0xe0), w(0x32),      \
123         w(0x3a), w(0x0a), w(0x49), w(0x06), w(0x24), w(0x5c), w(0xc2),      \
124         w(0xd3), w(0xac), w(0x62), w(0x91), w(0x95), w(0xe4), w(0x79),      \
125         w(0xe7), w(0xc8), w(0x37), w(0x6d), w(0x8d), w(0xd5), w(0x4e),      \
126         w(0xa9), w(0x6c), w(0x56), w(0xf4), w(0xea), w(0x65), w(0x7a),      \
127         w(0xae), w(0x08), w(0xba), w(0x78), w(0x25), w(0x2e), w(0x1c),      \
128         w(0xa6), w(0xb4), w(0xc6), w(0xe8), w(0xdd), w(0x74), w(0x1f),      \
129         w(0x4b), w(0xbd), w(0x8b), w(0x8a), w(0x70), w(0x3e), w(0xb5),      \
130         w(0x66), w(0x48), w(0x03), w(0xf6), w(0x0e), w(0x61), w(0x35),      \
131         w(0x57), w(0xb9), w(0x86), w(0xc1), w(0x1d), w(0x9e), w(0xe1),      \
132         w(0xf8), w(0x98), w(0x11), w(0x69), w(0xd9), w(0x8e), w(0x94),      \
133         w(0x9b), w(0x1e), w(0x87), w(0xe9), w(0xce), w(0x55), w(0x28),      \
134         w(0xdf), w(0x8c), w(0xa1), w(0x89), w(0x0d), w(0xbf), w(0xe6),      \
135         w(0x42), w(0x68), w(0x41), w(0x99), w(0x2d), w(0x0f), w(0xb0),      \
136         w(0x54), w(0xbb), w(0x16)                                           \
137   }
138 
139 #define isb_data(w)                                                         \
140   { /* inverse S Box data values */                                         \
141     w(0x52), w(0x09), w(0x6a), w(0xd5), w(0x30), w(0x36), w(0xa5), w(0x38), \
142         w(0xbf), w(0x40), w(0xa3), w(0x9e), w(0x81), w(0xf3), w(0xd7),      \
143         w(0xfb), w(0x7c), w(0xe3), w(0x39), w(0x82), w(0x9b), w(0x2f),      \
144         w(0xff), w(0x87), w(0x34), w(0x8e), w(0x43), w(0x44), w(0xc4),      \
145         w(0xde), w(0xe9), w(0xcb), w(0x54), w(0x7b), w(0x94), w(0x32),      \
146         w(0xa6), w(0xc2), w(0x23), w(0x3d), w(0xee), w(0x4c), w(0x95),      \
147         w(0x0b), w(0x42), w(0xfa), w(0xc3), w(0x4e), w(0x08), w(0x2e),      \
148         w(0xa1), w(0x66), w(0x28), w(0xd9), w(0x24), w(0xb2), w(0x76),      \
149         w(0x5b), w(0xa2), w(0x49), w(0x6d), w(0x8b), w(0xd1), w(0x25),      \
150         w(0x72), w(0xf8), w(0xf6), w(0x64), w(0x86), w(0x68), w(0x98),      \
151         w(0x16), w(0xd4), w(0xa4), w(0x5c), w(0xcc), w(0x5d), w(0x65),      \
152         w(0xb6), w(0x92), w(0x6c), w(0x70), w(0x48), w(0x50), w(0xfd),      \
153         w(0xed), w(0xb9), w(0xda), w(0x5e), w(0x15), w(0x46), w(0x57),      \
154         w(0xa7), w(0x8d), w(0x9d), w(0x84), w(0x90), w(0xd8), w(0xab),      \
155         w(0x00), w(0x8c), w(0xbc), w(0xd3), w(0x0a), w(0xf7), w(0xe4),      \
156         w(0x58), w(0x05), w(0xb8), w(0xb3), w(0x45), w(0x06), w(0xd0),      \
157         w(0x2c), w(0x1e), w(0x8f), w(0xca), w(0x3f), w(0x0f), w(0x02),      \
158         w(0xc1), w(0xaf), w(0xbd), w(0x03), w(0x01), w(0x13), w(0x8a),      \
159         w(0x6b), w(0x3a), w(0x91), w(0x11), w(0x41), w(0x4f), w(0x67),      \
160         w(0xdc), w(0xea), w(0x97), w(0xf2), w(0xcf), w(0xce), w(0xf0),      \
161         w(0xb4), w(0xe6), w(0x73), w(0x96), w(0xac), w(0x74), w(0x22),      \
162         w(0xe7), w(0xad), w(0x35), w(0x85), w(0xe2), w(0xf9), w(0x37),      \
163         w(0xe8), w(0x1c), w(0x75), w(0xdf), w(0x6e), w(0x47), w(0xf1),      \
164         w(0x1a), w(0x71), w(0x1d), w(0x29), w(0xc5), w(0x89), w(0x6f),      \
165         w(0xb7), w(0x62), w(0x0e), w(0xaa), w(0x18), w(0xbe), w(0x1b),      \
166         w(0xfc), w(0x56), w(0x3e), w(0x4b), w(0xc6), w(0xd2), w(0x79),      \
167         w(0x20), w(0x9a), w(0xdb), w(0xc0), w(0xfe), w(0x78), w(0xcd),      \
168         w(0x5a), w(0xf4), w(0x1f), w(0xdd), w(0xa8), w(0x33), w(0x88),      \
169         w(0x07), w(0xc7), w(0x31), w(0xb1), w(0x12), w(0x10), w(0x59),      \
170         w(0x27), w(0x80), w(0xec), w(0x5f), w(0x60), w(0x51), w(0x7f),      \
171         w(0xa9), w(0x19), w(0xb5), w(0x4a), w(0x0d), w(0x2d), w(0xe5),      \
172         w(0x7a), w(0x9f), w(0x93), w(0xc9), w(0x9c), w(0xef), w(0xa0),      \
173         w(0xe0), w(0x3b), w(0x4d), w(0xae), w(0x2a), w(0xf5), w(0xb0),      \
174         w(0xc8), w(0xeb), w(0xbb), w(0x3c), w(0x83), w(0x53), w(0x99),      \
175         w(0x61), w(0x17), w(0x2b), w(0x04), w(0x7e), w(0xba), w(0x77),      \
176         w(0xd6), w(0x26), w(0xe1), w(0x69), w(0x14), w(0x63), w(0x55),      \
177         w(0x21), w(0x0c), w(0x7d)                                           \
178   }
179 
180 #define mm_data(w)                                                          \
181   { /* basic data for forming finite field tables */                        \
182     w(0x00), w(0x01), w(0x02), w(0x03), w(0x04), w(0x05), w(0x06), w(0x07), \
183         w(0x08), w(0x09), w(0x0a), w(0x0b), w(0x0c), w(0x0d), w(0x0e),      \
184         w(0x0f), w(0x10), w(0x11), w(0x12), w(0x13), w(0x14), w(0x15),      \
185         w(0x16), w(0x17), w(0x18), w(0x19), w(0x1a), w(0x1b), w(0x1c),      \
186         w(0x1d), w(0x1e), w(0x1f), w(0x20), w(0x21), w(0x22), w(0x23),      \
187         w(0x24), w(0x25), w(0x26), w(0x27), w(0x28), w(0x29), w(0x2a),      \
188         w(0x2b), w(0x2c), w(0x2d), w(0x2e), w(0x2f), w(0x30), w(0x31),      \
189         w(0x32), w(0x33), w(0x34), w(0x35), w(0x36), w(0x37), w(0x38),      \
190         w(0x39), w(0x3a), w(0x3b), w(0x3c), w(0x3d), w(0x3e), w(0x3f),      \
191         w(0x40), w(0x41), w(0x42), w(0x43), w(0x44), w(0x45), w(0x46),      \
192         w(0x47), w(0x48), w(0x49), w(0x4a), w(0x4b), w(0x4c), w(0x4d),      \
193         w(0x4e), w(0x4f), w(0x50), w(0x51), w(0x52), w(0x53), w(0x54),      \
194         w(0x55), w(0x56), w(0x57), w(0x58), w(0x59), w(0x5a), w(0x5b),      \
195         w(0x5c), w(0x5d), w(0x5e), w(0x5f), w(0x60), w(0x61), w(0x62),      \
196         w(0x63), w(0x64), w(0x65), w(0x66), w(0x67), w(0x68), w(0x69),      \
197         w(0x6a), w(0x6b), w(0x6c), w(0x6d), w(0x6e), w(0x6f), w(0x70),      \
198         w(0x71), w(0x72), w(0x73), w(0x74), w(0x75), w(0x76), w(0x77),      \
199         w(0x78), w(0x79), w(0x7a), w(0x7b), w(0x7c), w(0x7d), w(0x7e),      \
200         w(0x7f), w(0x80), w(0x81), w(0x82), w(0x83), w(0x84), w(0x85),      \
201         w(0x86), w(0x87), w(0x88), w(0x89), w(0x8a), w(0x8b), w(0x8c),      \
202         w(0x8d), w(0x8e), w(0x8f), w(0x90), w(0x91), w(0x92), w(0x93),      \
203         w(0x94), w(0x95), w(0x96), w(0x97), w(0x98), w(0x99), w(0x9a),      \
204         w(0x9b), w(0x9c), w(0x9d), w(0x9e), w(0x9f), w(0xa0), w(0xa1),      \
205         w(0xa2), w(0xa3), w(0xa4), w(0xa5), w(0xa6), w(0xa7), w(0xa8),      \
206         w(0xa9), w(0xaa), w(0xab), w(0xac), w(0xad), w(0xae), w(0xaf),      \
207         w(0xb0), w(0xb1), w(0xb2), w(0xb3), w(0xb4), w(0xb5), w(0xb6),      \
208         w(0xb7), w(0xb8), w(0xb9), w(0xba), w(0xbb), w(0xbc), w(0xbd),      \
209         w(0xbe), w(0xbf), w(0xc0), w(0xc1), w(0xc2), w(0xc3), w(0xc4),      \
210         w(0xc5), w(0xc6), w(0xc7), w(0xc8), w(0xc9), w(0xca), w(0xcb),      \
211         w(0xcc), w(0xcd), w(0xce), w(0xcf), w(0xd0), w(0xd1), w(0xd2),      \
212         w(0xd3), w(0xd4), w(0xd5), w(0xd6), w(0xd7), w(0xd8), w(0xd9),      \
213         w(0xda), w(0xdb), w(0xdc), w(0xdd), w(0xde), w(0xdf), w(0xe0),      \
214         w(0xe1), w(0xe2), w(0xe3), w(0xe4), w(0xe5), w(0xe6), w(0xe7),      \
215         w(0xe8), w(0xe9), w(0xea), w(0xeb), w(0xec), w(0xed), w(0xee),      \
216         w(0xef), w(0xf0), w(0xf1), w(0xf2), w(0xf3), w(0xf4), w(0xf5),      \
217         w(0xf6), w(0xf7), w(0xf8), w(0xf9), w(0xfa), w(0xfb), w(0xfc),      \
218         w(0xfd), w(0xfe), w(0xff)                                           \
219   }
220 
221 static const uint_8t sbox[256] = sb_data(f1);
222 static const uint_8t isbox[256] = isb_data(f1);
223 
224 static const uint_8t gfm2_sbox[256] = sb_data(f2);
225 static const uint_8t gfm3_sbox[256] = sb_data(f3);
226 
227 static const uint_8t gfmul_9[256] = mm_data(f9);
228 static const uint_8t gfmul_b[256] = mm_data(fb);
229 static const uint_8t gfmul_d[256] = mm_data(fd);
230 static const uint_8t gfmul_e[256] = mm_data(fe);
231 
232 #define s_box(x) sbox[(x)]
233 #define is_box(x) isbox[(x)]
234 #define gfm2_sb(x) gfm2_sbox[(x)]
235 #define gfm3_sb(x) gfm3_sbox[(x)]
236 #define gfm_9(x) gfmul_9[(x)]
237 #define gfm_b(x) gfmul_b[(x)]
238 #define gfm_d(x) gfmul_d[(x)]
239 #define gfm_e(x) gfmul_e[(x)]
240 
241 #else
242 
243 /* this is the high bit of x right shifted by 1 */
244 /* position. Since the starting polynomial has  */
245 /* 9 bits (0x11b), this right shift keeps the   */
246 /* values of all top bits within a byte         */
247 
hibit(const uint_8t x)248 static uint_8t hibit(const uint_8t x) {
249   uint_8t r = (uint_8t)((x >> 1) | (x >> 2));
250 
251   r |= (r >> 2);
252   r |= (r >> 4);
253   return (r + 1) >> 1;
254 }
255 
256 /* return the inverse of the finite field element x */
257 
gf_inv(const uint_8t x)258 static uint_8t gf_inv(const uint_8t x) {
259   uint_8t p1 = x, p2 = BPOLY, n1 = hibit(x), n2 = 0x80, v1 = 1, v2 = 0;
260 
261   if (x < 2) return x;
262 
263   for (;;) {
264     if (n1)
265       while (n2 >= n1) /* divide polynomial p2 by p1    */
266       {
267         n2 /= n1;               /* shift smaller polynomial left */
268         p2 ^= (p1 * n2) & 0xff; /* and remove from larger one    */
269         v2 ^= (v1 * n2);        /* shift accumulated value and   */
270         n2 = hibit(p2);         /* add into result               */
271       }
272     else
273       return v1;
274 
275     if (n2) /* repeat with values swapped    */
276       while (n1 >= n2) {
277         n1 /= n2;
278         p1 ^= p2 * n1;
279         v1 ^= v2 * n1;
280         n1 = hibit(p1);
281       }
282     else
283       return v2;
284   }
285 }
286 
287 /* The forward and inverse affine transformations used in the S-box */
fwd_affine(const uint_8t x)288 uint_8t fwd_affine(const uint_8t x) {
289 #if defined(HAVE_UINT_32T)
290   uint_32t w = x;
291   w ^= (w << 1) ^ (w << 2) ^ (w << 3) ^ (w << 4);
292   return 0x63 ^ ((w ^ (w >> 8)) & 0xff);
293 #else
294   return 0x63 ^ x ^ (x << 1) ^ (x << 2) ^ (x << 3) ^ (x << 4) ^ (x >> 7) ^
295          (x >> 6) ^ (x >> 5) ^ (x >> 4);
296 #endif
297 }
298 
inv_affine(const uint_8t x)299 uint_8t inv_affine(const uint_8t x) {
300 #if defined(HAVE_UINT_32T)
301   uint_32t w = x;
302   w = (w << 1) ^ (w << 3) ^ (w << 6);
303   return 0x05 ^ ((w ^ (w >> 8)) & 0xff);
304 #else
305   return 0x05 ^ (x << 1) ^ (x << 3) ^ (x << 6) ^ (x >> 7) ^ (x >> 5) ^ (x >> 2);
306 #endif
307 }
308 
309 #define s_box(x) fwd_affine(gf_inv(x))
310 #define is_box(x) gf_inv(inv_affine(x))
311 #define gfm2_sb(x) f2(s_box(x))
312 #define gfm3_sb(x) f3(s_box(x))
313 #define gfm_9(x) f9(x)
314 #define gfm_b(x) fb(x)
315 #define gfm_d(x) fd(x)
316 #define gfm_e(x) fe(x)
317 
318 #endif
319 
320 #if defined(HAVE_MEMCPY)
321 #define block_copy_nn(d, s, l) memcpy(d, s, l)
322 #define block_copy(d, s) memcpy(d, s, N_BLOCK)
323 #else
324 #define block_copy_nn(d, s, l) copy_block_nn(d, s, l)
325 #define block_copy(d, s) copy_block(d, s)
326 #endif
327 
328 #if !defined(HAVE_MEMCPY)
copy_block(void * d,const void * s)329 static void copy_block(void* d, const void* s) {
330 #if defined(HAVE_UINT_32T)
331   ((uint_32t*)d)[0] = ((uint_32t*)s)[0];
332   ((uint_32t*)d)[1] = ((uint_32t*)s)[1];
333   ((uint_32t*)d)[2] = ((uint_32t*)s)[2];
334   ((uint_32t*)d)[3] = ((uint_32t*)s)[3];
335 #else
336   ((uint_8t*)d)[0] = ((uint_8t*)s)[0];
337   ((uint_8t*)d)[1] = ((uint_8t*)s)[1];
338   ((uint_8t*)d)[2] = ((uint_8t*)s)[2];
339   ((uint_8t*)d)[3] = ((uint_8t*)s)[3];
340   ((uint_8t*)d)[4] = ((uint_8t*)s)[4];
341   ((uint_8t*)d)[5] = ((uint_8t*)s)[5];
342   ((uint_8t*)d)[6] = ((uint_8t*)s)[6];
343   ((uint_8t*)d)[7] = ((uint_8t*)s)[7];
344   ((uint_8t*)d)[8] = ((uint_8t*)s)[8];
345   ((uint_8t*)d)[9] = ((uint_8t*)s)[9];
346   ((uint_8t*)d)[10] = ((uint_8t*)s)[10];
347   ((uint_8t*)d)[11] = ((uint_8t*)s)[11];
348   ((uint_8t*)d)[12] = ((uint_8t*)s)[12];
349   ((uint_8t*)d)[13] = ((uint_8t*)s)[13];
350   ((uint_8t*)d)[14] = ((uint_8t*)s)[14];
351   ((uint_8t*)d)[15] = ((uint_8t*)s)[15];
352 #endif
353 }
354 
copy_block_nn(void * d,const void * s,uint_8t nn)355 static void copy_block_nn(void* d, const void* s, uint_8t nn) {
356   while (nn--) *((uint_8t*)d)++ = *((uint_8t*)s)++;
357 }
358 #endif
359 
xor_block(void * d,const void * s)360 static void xor_block(void* d, const void* s) {
361 #if defined(HAVE_UINT_32T)
362   ((uint_32t*)d)[0] ^= ((uint_32t*)s)[0];
363   ((uint_32t*)d)[1] ^= ((uint_32t*)s)[1];
364   ((uint_32t*)d)[2] ^= ((uint_32t*)s)[2];
365   ((uint_32t*)d)[3] ^= ((uint_32t*)s)[3];
366 #else
367   ((uint_8t*)d)[0] ^= ((uint_8t*)s)[0];
368   ((uint_8t*)d)[1] ^= ((uint_8t*)s)[1];
369   ((uint_8t*)d)[2] ^= ((uint_8t*)s)[2];
370   ((uint_8t*)d)[3] ^= ((uint_8t*)s)[3];
371   ((uint_8t*)d)[4] ^= ((uint_8t*)s)[4];
372   ((uint_8t*)d)[5] ^= ((uint_8t*)s)[5];
373   ((uint_8t*)d)[6] ^= ((uint_8t*)s)[6];
374   ((uint_8t*)d)[7] ^= ((uint_8t*)s)[7];
375   ((uint_8t*)d)[8] ^= ((uint_8t*)s)[8];
376   ((uint_8t*)d)[9] ^= ((uint_8t*)s)[9];
377   ((uint_8t*)d)[10] ^= ((uint_8t*)s)[10];
378   ((uint_8t*)d)[11] ^= ((uint_8t*)s)[11];
379   ((uint_8t*)d)[12] ^= ((uint_8t*)s)[12];
380   ((uint_8t*)d)[13] ^= ((uint_8t*)s)[13];
381   ((uint_8t*)d)[14] ^= ((uint_8t*)s)[14];
382   ((uint_8t*)d)[15] ^= ((uint_8t*)s)[15];
383 #endif
384 }
385 
copy_and_key(void * d,const void * s,const void * k)386 static void copy_and_key(void* d, const void* s, const void* k) {
387 #if defined(HAVE_UINT_32T)
388   ((uint_32t*)d)[0] = ((uint_32t*)s)[0] ^ ((uint_32t*)k)[0];
389   ((uint_32t*)d)[1] = ((uint_32t*)s)[1] ^ ((uint_32t*)k)[1];
390   ((uint_32t*)d)[2] = ((uint_32t*)s)[2] ^ ((uint_32t*)k)[2];
391   ((uint_32t*)d)[3] = ((uint_32t*)s)[3] ^ ((uint_32t*)k)[3];
392 #elif 1
393   ((uint_8t*)d)[0] = ((uint_8t*)s)[0] ^ ((uint_8t*)k)[0];
394   ((uint_8t*)d)[1] = ((uint_8t*)s)[1] ^ ((uint_8t*)k)[1];
395   ((uint_8t*)d)[2] = ((uint_8t*)s)[2] ^ ((uint_8t*)k)[2];
396   ((uint_8t*)d)[3] = ((uint_8t*)s)[3] ^ ((uint_8t*)k)[3];
397   ((uint_8t*)d)[4] = ((uint_8t*)s)[4] ^ ((uint_8t*)k)[4];
398   ((uint_8t*)d)[5] = ((uint_8t*)s)[5] ^ ((uint_8t*)k)[5];
399   ((uint_8t*)d)[6] = ((uint_8t*)s)[6] ^ ((uint_8t*)k)[6];
400   ((uint_8t*)d)[7] = ((uint_8t*)s)[7] ^ ((uint_8t*)k)[7];
401   ((uint_8t*)d)[8] = ((uint_8t*)s)[8] ^ ((uint_8t*)k)[8];
402   ((uint_8t*)d)[9] = ((uint_8t*)s)[9] ^ ((uint_8t*)k)[9];
403   ((uint_8t*)d)[10] = ((uint_8t*)s)[10] ^ ((uint_8t*)k)[10];
404   ((uint_8t*)d)[11] = ((uint_8t*)s)[11] ^ ((uint_8t*)k)[11];
405   ((uint_8t*)d)[12] = ((uint_8t*)s)[12] ^ ((uint_8t*)k)[12];
406   ((uint_8t*)d)[13] = ((uint_8t*)s)[13] ^ ((uint_8t*)k)[13];
407   ((uint_8t*)d)[14] = ((uint_8t*)s)[14] ^ ((uint_8t*)k)[14];
408   ((uint_8t*)d)[15] = ((uint_8t*)s)[15] ^ ((uint_8t*)k)[15];
409 #else
410   block_copy(d, s);
411   xor_block(d, k);
412 #endif
413 }
414 
add_round_key(uint_8t d[N_BLOCK],const uint_8t k[N_BLOCK])415 static void add_round_key(uint_8t d[N_BLOCK], const uint_8t k[N_BLOCK]) {
416   xor_block(d, k);
417 }
418 
shift_sub_rows(uint_8t st[N_BLOCK])419 static void shift_sub_rows(uint_8t st[N_BLOCK]) {
420   uint_8t tt;
421 
422   st[0] = s_box(st[0]);
423   st[4] = s_box(st[4]);
424   st[8] = s_box(st[8]);
425   st[12] = s_box(st[12]);
426 
427   tt = st[1];
428   st[1] = s_box(st[5]);
429   st[5] = s_box(st[9]);
430   st[9] = s_box(st[13]);
431   st[13] = s_box(tt);
432 
433   tt = st[2];
434   st[2] = s_box(st[10]);
435   st[10] = s_box(tt);
436   tt = st[6];
437   st[6] = s_box(st[14]);
438   st[14] = s_box(tt);
439 
440   tt = st[15];
441   st[15] = s_box(st[11]);
442   st[11] = s_box(st[7]);
443   st[7] = s_box(st[3]);
444   st[3] = s_box(tt);
445 }
446 
inv_shift_sub_rows(uint_8t st[N_BLOCK])447 static void inv_shift_sub_rows(uint_8t st[N_BLOCK]) {
448   uint_8t tt;
449 
450   st[0] = is_box(st[0]);
451   st[4] = is_box(st[4]);
452   st[8] = is_box(st[8]);
453   st[12] = is_box(st[12]);
454 
455   tt = st[13];
456   st[13] = is_box(st[9]);
457   st[9] = is_box(st[5]);
458   st[5] = is_box(st[1]);
459   st[1] = is_box(tt);
460 
461   tt = st[2];
462   st[2] = is_box(st[10]);
463   st[10] = is_box(tt);
464   tt = st[6];
465   st[6] = is_box(st[14]);
466   st[14] = is_box(tt);
467 
468   tt = st[3];
469   st[3] = is_box(st[7]);
470   st[7] = is_box(st[11]);
471   st[11] = is_box(st[15]);
472   st[15] = is_box(tt);
473 }
474 
475 #if defined(VERSION_1)
mix_sub_columns(uint_8t dt[N_BLOCK])476 static void mix_sub_columns(uint_8t dt[N_BLOCK]) {
477   uint_8t st[N_BLOCK];
478   block_copy(st, dt);
479 #else
480 static void mix_sub_columns(uint_8t dt[N_BLOCK], uint_8t st[N_BLOCK]) {
481 #endif
482   dt[0] = gfm2_sb(st[0]) ^ gfm3_sb(st[5]) ^ s_box(st[10]) ^ s_box(st[15]);
483   dt[1] = s_box(st[0]) ^ gfm2_sb(st[5]) ^ gfm3_sb(st[10]) ^ s_box(st[15]);
484   dt[2] = s_box(st[0]) ^ s_box(st[5]) ^ gfm2_sb(st[10]) ^ gfm3_sb(st[15]);
485   dt[3] = gfm3_sb(st[0]) ^ s_box(st[5]) ^ s_box(st[10]) ^ gfm2_sb(st[15]);
486 
487   dt[4] = gfm2_sb(st[4]) ^ gfm3_sb(st[9]) ^ s_box(st[14]) ^ s_box(st[3]);
488   dt[5] = s_box(st[4]) ^ gfm2_sb(st[9]) ^ gfm3_sb(st[14]) ^ s_box(st[3]);
489   dt[6] = s_box(st[4]) ^ s_box(st[9]) ^ gfm2_sb(st[14]) ^ gfm3_sb(st[3]);
490   dt[7] = gfm3_sb(st[4]) ^ s_box(st[9]) ^ s_box(st[14]) ^ gfm2_sb(st[3]);
491 
492   dt[8] = gfm2_sb(st[8]) ^ gfm3_sb(st[13]) ^ s_box(st[2]) ^ s_box(st[7]);
493   dt[9] = s_box(st[8]) ^ gfm2_sb(st[13]) ^ gfm3_sb(st[2]) ^ s_box(st[7]);
494   dt[10] = s_box(st[8]) ^ s_box(st[13]) ^ gfm2_sb(st[2]) ^ gfm3_sb(st[7]);
495   dt[11] = gfm3_sb(st[8]) ^ s_box(st[13]) ^ s_box(st[2]) ^ gfm2_sb(st[7]);
496 
497   dt[12] = gfm2_sb(st[12]) ^ gfm3_sb(st[1]) ^ s_box(st[6]) ^ s_box(st[11]);
498   dt[13] = s_box(st[12]) ^ gfm2_sb(st[1]) ^ gfm3_sb(st[6]) ^ s_box(st[11]);
499   dt[14] = s_box(st[12]) ^ s_box(st[1]) ^ gfm2_sb(st[6]) ^ gfm3_sb(st[11]);
500   dt[15] = gfm3_sb(st[12]) ^ s_box(st[1]) ^ s_box(st[6]) ^ gfm2_sb(st[11]);
501 }
502 
503 #if defined(VERSION_1)
504 static void inv_mix_sub_columns(uint_8t dt[N_BLOCK]) {
505   uint_8t st[N_BLOCK];
506   block_copy(st, dt);
507 #else
508 static void inv_mix_sub_columns(uint_8t dt[N_BLOCK], uint_8t st[N_BLOCK]) {
509 #endif
510   dt[0] = is_box(gfm_e(st[0]) ^ gfm_b(st[1]) ^ gfm_d(st[2]) ^ gfm_9(st[3]));
511   dt[5] = is_box(gfm_9(st[0]) ^ gfm_e(st[1]) ^ gfm_b(st[2]) ^ gfm_d(st[3]));
512   dt[10] = is_box(gfm_d(st[0]) ^ gfm_9(st[1]) ^ gfm_e(st[2]) ^ gfm_b(st[3]));
513   dt[15] = is_box(gfm_b(st[0]) ^ gfm_d(st[1]) ^ gfm_9(st[2]) ^ gfm_e(st[3]));
514 
515   dt[4] = is_box(gfm_e(st[4]) ^ gfm_b(st[5]) ^ gfm_d(st[6]) ^ gfm_9(st[7]));
516   dt[9] = is_box(gfm_9(st[4]) ^ gfm_e(st[5]) ^ gfm_b(st[6]) ^ gfm_d(st[7]));
517   dt[14] = is_box(gfm_d(st[4]) ^ gfm_9(st[5]) ^ gfm_e(st[6]) ^ gfm_b(st[7]));
518   dt[3] = is_box(gfm_b(st[4]) ^ gfm_d(st[5]) ^ gfm_9(st[6]) ^ gfm_e(st[7]));
519 
520   dt[8] = is_box(gfm_e(st[8]) ^ gfm_b(st[9]) ^ gfm_d(st[10]) ^ gfm_9(st[11]));
521   dt[13] = is_box(gfm_9(st[8]) ^ gfm_e(st[9]) ^ gfm_b(st[10]) ^ gfm_d(st[11]));
522   dt[2] = is_box(gfm_d(st[8]) ^ gfm_9(st[9]) ^ gfm_e(st[10]) ^ gfm_b(st[11]));
523   dt[7] = is_box(gfm_b(st[8]) ^ gfm_d(st[9]) ^ gfm_9(st[10]) ^ gfm_e(st[11]));
524 
525   dt[12] =
526       is_box(gfm_e(st[12]) ^ gfm_b(st[13]) ^ gfm_d(st[14]) ^ gfm_9(st[15]));
527   dt[1] = is_box(gfm_9(st[12]) ^ gfm_e(st[13]) ^ gfm_b(st[14]) ^ gfm_d(st[15]));
528   dt[6] = is_box(gfm_d(st[12]) ^ gfm_9(st[13]) ^ gfm_e(st[14]) ^ gfm_b(st[15]));
529   dt[11] =
530       is_box(gfm_b(st[12]) ^ gfm_d(st[13]) ^ gfm_9(st[14]) ^ gfm_e(st[15]));
531 }
532 
533 #if defined(AES_ENC_PREKEYED) || defined(AES_DEC_PREKEYED)
534 
535 /*  Set the cipher key for the pre-keyed version */
536 /*  NOTE: If the length_type used for the key length is an
537     unsigned 8-bit character, a key length of 256 bits must
538     be entered as a length in bytes (valid inputs are hence
539     128, 192, 16, 24 and 32).
540 */
541 
542 return_type aes_set_key(const unsigned char key[], length_type keylen,
543                         aes_context ctx[1]) {
544   uint_8t cc, rc, hi;
545 
546   switch (keylen) {
547     case 16:
548     case 128: /* length in bits (128 = 8*16) */
549       keylen = 16;
550       break;
551     case 24:
552     case 192: /* length in bits (192 = 8*24) */
553       keylen = 24;
554       break;
555     case 32:
556       /*    case 256:           length in bits (256 = 8*32) */
557       keylen = 32;
558       break;
559     default:
560       ctx->rnd = 0;
561       return (return_type)-1;
562   }
563   block_copy_nn(ctx->ksch, key, keylen);
564   hi = (keylen + 28) << 2;
565   ctx->rnd = (hi >> 4) - 1;
566   for (cc = keylen, rc = 1; cc < hi; cc += 4) {
567     uint_8t tt, t0, t1, t2, t3;
568 
569     t0 = ctx->ksch[cc - 4];
570     t1 = ctx->ksch[cc - 3];
571     t2 = ctx->ksch[cc - 2];
572     t3 = ctx->ksch[cc - 1];
573     if (cc % keylen == 0) {
574       tt = t0;
575       t0 = s_box(t1) ^ rc;
576       t1 = s_box(t2);
577       t2 = s_box(t3);
578       t3 = s_box(tt);
579       rc = f2(rc);
580     } else if (keylen > 24 && cc % keylen == 16) {
581       t0 = s_box(t0);
582       t1 = s_box(t1);
583       t2 = s_box(t2);
584       t3 = s_box(t3);
585     }
586     tt = cc - keylen;
587     ctx->ksch[cc + 0] = ctx->ksch[tt + 0] ^ t0;
588     ctx->ksch[cc + 1] = ctx->ksch[tt + 1] ^ t1;
589     ctx->ksch[cc + 2] = ctx->ksch[tt + 2] ^ t2;
590     ctx->ksch[cc + 3] = ctx->ksch[tt + 3] ^ t3;
591   }
592   return 0;
593 }
594 
595 #endif
596 
597 #if defined(AES_ENC_PREKEYED)
598 
599 /*  Encrypt a single block of 16 bytes */
600 
601 return_type aes_encrypt(const unsigned char in[N_BLOCK],
602                         unsigned char out[N_BLOCK], const aes_context ctx[1]) {
603   if (ctx->rnd) {
604     uint_8t s1[N_BLOCK], r;
605     copy_and_key(s1, in, ctx->ksch);
606 
607     for (r = 1; r < ctx->rnd; ++r)
608 #if defined(VERSION_1)
609     {
610       mix_sub_columns(s1);
611       add_round_key(s1, ctx->ksch + r * N_BLOCK);
612     }
613 #else
614     {
615       uint_8t s2[N_BLOCK];
616       mix_sub_columns(s2, s1);
617       copy_and_key(s1, s2, ctx->ksch + r * N_BLOCK);
618     }
619 #endif
620     shift_sub_rows(s1);
621     copy_and_key(out, s1, ctx->ksch + r * N_BLOCK);
622   } else
623     return (return_type)-1;
624   return 0;
625 }
626 
627 /* CBC encrypt a number of blocks (input and return an IV) */
628 
629 return_type aes_cbc_encrypt(const unsigned char* in, unsigned char* out,
630                             int n_block, unsigned char iv[N_BLOCK],
631                             const aes_context ctx[1]) {
632   while (n_block--) {
633     xor_block(iv, in);
634     if (aes_encrypt(iv, iv, ctx) != EXIT_SUCCESS) return EXIT_FAILURE;
635     memcpy(out, iv, N_BLOCK);
636     in += N_BLOCK;
637     out += N_BLOCK;
638   }
639   return EXIT_SUCCESS;
640 }
641 
642 #endif
643 
644 #if defined(AES_DEC_PREKEYED)
645 
646 /*  Decrypt a single block of 16 bytes */
647 
648 return_type aes_decrypt(const unsigned char in[N_BLOCK],
649                         unsigned char out[N_BLOCK], const aes_context ctx[1]) {
650   if (ctx->rnd) {
651     uint_8t s1[N_BLOCK], r;
652     copy_and_key(s1, in, ctx->ksch + ctx->rnd * N_BLOCK);
653     inv_shift_sub_rows(s1);
654 
655     for (r = ctx->rnd; --r;)
656 #if defined(VERSION_1)
657     {
658       add_round_key(s1, ctx->ksch + r * N_BLOCK);
659       inv_mix_sub_columns(s1);
660     }
661 #else
662     {
663       uint_8t s2[N_BLOCK];
664       copy_and_key(s2, s1, ctx->ksch + r * N_BLOCK);
665       inv_mix_sub_columns(s1, s2);
666     }
667 #endif
668     copy_and_key(out, s1, ctx->ksch);
669   } else
670     return (return_type)-1;
671   return 0;
672 }
673 
674 /* CBC decrypt a number of blocks (input and return an IV) */
675 
676 return_type aes_cbc_decrypt(const unsigned char* in, unsigned char* out,
677                             int n_block, unsigned char iv[N_BLOCK],
678                             const aes_context ctx[1]) {
679   while (n_block--) {
680     uint_8t tmp[N_BLOCK];
681 
682     memcpy(tmp, in, N_BLOCK);
683     if (aes_decrypt(in, out, ctx) != EXIT_SUCCESS) return EXIT_FAILURE;
684     xor_block(out, iv);
685     memcpy(iv, tmp, N_BLOCK);
686     in += N_BLOCK;
687     out += N_BLOCK;
688   }
689   return EXIT_SUCCESS;
690 }
691 
692 #endif
693 
694 #if defined(AES_ENC_128_OTFK)
695 
696 /*  The 'on the fly' encryption key update for for 128 bit keys */
697 
698 static void update_encrypt_key_128(uint_8t k[N_BLOCK], uint_8t* rc) {
699   uint_8t cc;
700 
701   k[0] ^= s_box(k[13]) ^ *rc;
702   k[1] ^= s_box(k[14]);
703   k[2] ^= s_box(k[15]);
704   k[3] ^= s_box(k[12]);
705   *rc = f2(*rc);
706 
707   for (cc = 4; cc < 16; cc += 4) {
708     k[cc + 0] ^= k[cc - 4];
709     k[cc + 1] ^= k[cc - 3];
710     k[cc + 2] ^= k[cc - 2];
711     k[cc + 3] ^= k[cc - 1];
712   }
713 }
714 
715 /*  Encrypt a single block of 16 bytes with 'on the fly' 128 bit keying */
716 
717 void aes_encrypt_128(const unsigned char in[N_BLOCK],
718                      unsigned char out[N_BLOCK],
719                      const unsigned char key[N_BLOCK],
720                      unsigned char o_key[N_BLOCK]) {
721   uint_8t s1[N_BLOCK], r, rc = 1;
722 
723   if (o_key != key) block_copy(o_key, key);
724   copy_and_key(s1, in, o_key);
725 
726   for (r = 1; r < 10; ++r)
727 #if defined(VERSION_1)
728   {
729     mix_sub_columns(s1);
730     update_encrypt_key_128(o_key, &rc);
731     add_round_key(s1, o_key);
732   }
733 #else
734   {
735     uint_8t s2[N_BLOCK];
736     mix_sub_columns(s2, s1);
737     update_encrypt_key_128(o_key, &rc);
738     copy_and_key(s1, s2, o_key);
739   }
740 #endif
741 
742   shift_sub_rows(s1);
743   update_encrypt_key_128(o_key, &rc);
744   copy_and_key(out, s1, o_key);
745 }
746 
747 #endif
748 
749 #if defined(AES_DEC_128_OTFK)
750 
751 /*  The 'on the fly' decryption key update for for 128 bit keys */
752 
753 static void update_decrypt_key_128(uint_8t k[N_BLOCK], uint_8t* rc) {
754   uint_8t cc;
755 
756   for (cc = 12; cc > 0; cc -= 4) {
757     k[cc + 0] ^= k[cc - 4];
758     k[cc + 1] ^= k[cc - 3];
759     k[cc + 2] ^= k[cc - 2];
760     k[cc + 3] ^= k[cc - 1];
761   }
762   *rc = d2(*rc);
763   k[0] ^= s_box(k[13]) ^ *rc;
764   k[1] ^= s_box(k[14]);
765   k[2] ^= s_box(k[15]);
766   k[3] ^= s_box(k[12]);
767 }
768 
769 /*  Decrypt a single block of 16 bytes with 'on the fly' 128 bit keying */
770 
771 void aes_decrypt_128(const unsigned char in[N_BLOCK],
772                      unsigned char out[N_BLOCK],
773                      const unsigned char key[N_BLOCK],
774                      unsigned char o_key[N_BLOCK]) {
775   uint_8t s1[N_BLOCK], r, rc = 0x6c;
776   if (o_key != key) block_copy(o_key, key);
777 
778   copy_and_key(s1, in, o_key);
779   inv_shift_sub_rows(s1);
780 
781   for (r = 10; --r;)
782 #if defined(VERSION_1)
783   {
784     update_decrypt_key_128(o_key, &rc);
785     add_round_key(s1, o_key);
786     inv_mix_sub_columns(s1);
787   }
788 #else
789   {
790     uint_8t s2[N_BLOCK];
791     update_decrypt_key_128(o_key, &rc);
792     copy_and_key(s2, s1, o_key);
793     inv_mix_sub_columns(s1, s2);
794   }
795 #endif
796   update_decrypt_key_128(o_key, &rc);
797   copy_and_key(out, s1, o_key);
798 }
799 
800 #endif
801 
802 #if defined(AES_ENC_256_OTFK)
803 
804 /*  The 'on the fly' encryption key update for for 256 bit keys */
805 
806 static void update_encrypt_key_256(uint_8t k[2 * N_BLOCK], uint_8t* rc) {
807   uint_8t cc;
808 
809   k[0] ^= s_box(k[29]) ^ *rc;
810   k[1] ^= s_box(k[30]);
811   k[2] ^= s_box(k[31]);
812   k[3] ^= s_box(k[28]);
813   *rc = f2(*rc);
814 
815   for (cc = 4; cc < 16; cc += 4) {
816     k[cc + 0] ^= k[cc - 4];
817     k[cc + 1] ^= k[cc - 3];
818     k[cc + 2] ^= k[cc - 2];
819     k[cc + 3] ^= k[cc - 1];
820   }
821 
822   k[16] ^= s_box(k[12]);
823   k[17] ^= s_box(k[13]);
824   k[18] ^= s_box(k[14]);
825   k[19] ^= s_box(k[15]);
826 
827   for (cc = 20; cc < 32; cc += 4) {
828     k[cc + 0] ^= k[cc - 4];
829     k[cc + 1] ^= k[cc - 3];
830     k[cc + 2] ^= k[cc - 2];
831     k[cc + 3] ^= k[cc - 1];
832   }
833 }
834 
835 /*  Encrypt a single block of 16 bytes with 'on the fly' 256 bit keying */
836 
837 void aes_encrypt_256(const unsigned char in[N_BLOCK],
838                      unsigned char out[N_BLOCK],
839                      const unsigned char key[2 * N_BLOCK],
840                      unsigned char o_key[2 * N_BLOCK]) {
841   uint_8t s1[N_BLOCK], r, rc = 1;
842   if (o_key != key) {
843     block_copy(o_key, key);
844     block_copy(o_key + 16, key + 16);
845   }
846   copy_and_key(s1, in, o_key);
847 
848   for (r = 1; r < 14; ++r)
849 #if defined(VERSION_1)
850   {
851     mix_sub_columns(s1);
852     if (r & 1)
853       add_round_key(s1, o_key + 16);
854     else {
855       update_encrypt_key_256(o_key, &rc);
856       add_round_key(s1, o_key);
857     }
858   }
859 #else
860   {
861     uint_8t s2[N_BLOCK];
862     mix_sub_columns(s2, s1);
863     if (r & 1)
864       copy_and_key(s1, s2, o_key + 16);
865     else {
866       update_encrypt_key_256(o_key, &rc);
867       copy_and_key(s1, s2, o_key);
868     }
869   }
870 #endif
871 
872   shift_sub_rows(s1);
873   update_encrypt_key_256(o_key, &rc);
874   copy_and_key(out, s1, o_key);
875 }
876 
877 #endif
878 
879 #if defined(AES_DEC_256_OTFK)
880 
881 /*  The 'on the fly' encryption key update for for 256 bit keys */
882 
883 static void update_decrypt_key_256(uint_8t k[2 * N_BLOCK], uint_8t* rc) {
884   uint_8t cc;
885 
886   for (cc = 28; cc > 16; cc -= 4) {
887     k[cc + 0] ^= k[cc - 4];
888     k[cc + 1] ^= k[cc - 3];
889     k[cc + 2] ^= k[cc - 2];
890     k[cc + 3] ^= k[cc - 1];
891   }
892 
893   k[16] ^= s_box(k[12]);
894   k[17] ^= s_box(k[13]);
895   k[18] ^= s_box(k[14]);
896   k[19] ^= s_box(k[15]);
897 
898   for (cc = 12; cc > 0; cc -= 4) {
899     k[cc + 0] ^= k[cc - 4];
900     k[cc + 1] ^= k[cc - 3];
901     k[cc + 2] ^= k[cc - 2];
902     k[cc + 3] ^= k[cc - 1];
903   }
904 
905   *rc = d2(*rc);
906   k[0] ^= s_box(k[29]) ^ *rc;
907   k[1] ^= s_box(k[30]);
908   k[2] ^= s_box(k[31]);
909   k[3] ^= s_box(k[28]);
910 }
911 
912 /*  Decrypt a single block of 16 bytes with 'on the fly'
913     256 bit keying
914 */
915 void aes_decrypt_256(const unsigned char in[N_BLOCK],
916                      unsigned char out[N_BLOCK],
917                      const unsigned char key[2 * N_BLOCK],
918                      unsigned char o_key[2 * N_BLOCK]) {
919   uint_8t s1[N_BLOCK], r, rc = 0x80;
920 
921   if (o_key != key) {
922     block_copy(o_key, key);
923     block_copy(o_key + 16, key + 16);
924   }
925 
926   copy_and_key(s1, in, o_key);
927   inv_shift_sub_rows(s1);
928 
929   for (r = 14; --r;)
930 #if defined(VERSION_1)
931   {
932     if ((r & 1)) {
933       update_decrypt_key_256(o_key, &rc);
934       add_round_key(s1, o_key + 16);
935     } else
936       add_round_key(s1, o_key);
937     inv_mix_sub_columns(s1);
938   }
939 #else
940   {
941     uint_8t s2[N_BLOCK];
942     if ((r & 1)) {
943       update_decrypt_key_256(o_key, &rc);
944       copy_and_key(s2, s1, o_key + 16);
945     } else
946       copy_and_key(s2, s1, o_key);
947     inv_mix_sub_columns(s1, s2);
948   }
949 #endif
950   copy_and_key(out, s1, o_key);
951 }
952 
953 #endif
954