1 /*
2  ---------------------------------------------------------------------------
3  Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved.
4 
5  LICENSE TERMS
6 
7  The redistribution and use of this software (with or without changes)
8  is allowed without the payment of fees or royalties provided that:
9 
10   1. source code distributions include the above copyright notice, this
11      list of conditions and the following disclaimer;
12 
13   2. binary distributions include the above copyright notice, this list
14      of conditions and the following disclaimer in their documentation;
15 
16   3. the name of the copyright holder is not used to endorse products
17      built using this software without specific written permission.
18 
19  DISCLAIMER
20 
21  This software is provided 'as is' with no explicit or implied warranties
22  in respect of its properties, including, but not limited to, correctness
23  and/or fitness for purpose.
24  ---------------------------------------------------------------------------
25  Issue 09/09/2006
26 
27  This is an AES implementation that uses only 8-bit byte operations on the
28  cipher state (there are options to use 32-bit types if available).
29 
30  The combination of mix columns and byte substitution used here is based on
31  that developed by Karl Malbrain. His contribution is acknowledged.
32  */
33 
34 /* define if you have a fast memcpy function on your system */
35 #if 1
36 #  define HAVE_MEMCPY
37 #  include <string.h>
38 #if 0
39 #  if defined( _MSC_VER )
40 #    include <intrin.h>
41 #    pragma intrinsic( memcpy )
42 #  endif
43 #endif
44 #endif
45 
46 #include <stdlib.h>
47 
48 /* add the target configuration to allow using internal data types and compilation options */
49 #include "bt_target.h"
50 
51 /* define if you have fast 32-bit types on your system */
52 #if 1
53 #  define HAVE_UINT_32T
54 #endif
55 
56 /* define if you don't want any tables */
57 #if 1
58 #  define USE_TABLES
59 #endif
60 
61 /*  On Intel Core 2 duo VERSION_1 is faster */
62 
63 /* alternative versions (test for performance on your system) */
64 #if 1
65 #  define VERSION_1
66 #endif
67 
68 #include "aes.h"
69 
70 #if defined( HAVE_UINT_32T )
71   typedef UINT32 uint_32t;
72 #endif
73 
74 /* functions for finite field multiplication in the AES Galois field    */
75 
76 #define WPOLY   0x011b
77 #define BPOLY     0x1b
78 #define DPOLY   0x008d
79 
80 #define f1(x)   (x)
81 #define f2(x)   ((x << 1) ^ (((x >> 7) & 1) * WPOLY))
82 #define f4(x)   ((x << 2) ^ (((x >> 6) & 1) * WPOLY) ^ (((x >> 6) & 2) * WPOLY))
83 #define f8(x)   ((x << 3) ^ (((x >> 5) & 1) * WPOLY) ^ (((x >> 5) & 2) * WPOLY) \
84                           ^ (((x >> 5) & 4) * WPOLY))
85 #define d2(x)   (((x) >> 1) ^ ((x) & 1 ? DPOLY : 0))
86 
87 #define f3(x)   (f2(x) ^ x)
88 #define f9(x)   (f8(x) ^ x)
89 #define fb(x)   (f8(x) ^ f2(x) ^ x)
90 #define fd(x)   (f8(x) ^ f4(x) ^ x)
91 #define fe(x)   (f8(x) ^ f4(x) ^ f2(x))
92 
93 #if defined( USE_TABLES )
94 
95 #define sb_data(w) {    /* S Box data values */                            \
96     w(0x63), w(0x7c), w(0x77), w(0x7b), w(0xf2), w(0x6b), w(0x6f), w(0xc5),\
97     w(0x30), w(0x01), w(0x67), w(0x2b), w(0xfe), w(0xd7), w(0xab), w(0x76),\
98     w(0xca), w(0x82), w(0xc9), w(0x7d), w(0xfa), w(0x59), w(0x47), w(0xf0),\
99     w(0xad), w(0xd4), w(0xa2), w(0xaf), w(0x9c), w(0xa4), w(0x72), w(0xc0),\
100     w(0xb7), w(0xfd), w(0x93), w(0x26), w(0x36), w(0x3f), w(0xf7), w(0xcc),\
101     w(0x34), w(0xa5), w(0xe5), w(0xf1), w(0x71), w(0xd8), w(0x31), w(0x15),\
102     w(0x04), w(0xc7), w(0x23), w(0xc3), w(0x18), w(0x96), w(0x05), w(0x9a),\
103     w(0x07), w(0x12), w(0x80), w(0xe2), w(0xeb), w(0x27), w(0xb2), w(0x75),\
104     w(0x09), w(0x83), w(0x2c), w(0x1a), w(0x1b), w(0x6e), w(0x5a), w(0xa0),\
105     w(0x52), w(0x3b), w(0xd6), w(0xb3), w(0x29), w(0xe3), w(0x2f), w(0x84),\
106     w(0x53), w(0xd1), w(0x00), w(0xed), w(0x20), w(0xfc), w(0xb1), w(0x5b),\
107     w(0x6a), w(0xcb), w(0xbe), w(0x39), w(0x4a), w(0x4c), w(0x58), w(0xcf),\
108     w(0xd0), w(0xef), w(0xaa), w(0xfb), w(0x43), w(0x4d), w(0x33), w(0x85),\
109     w(0x45), w(0xf9), w(0x02), w(0x7f), w(0x50), w(0x3c), w(0x9f), w(0xa8),\
110     w(0x51), w(0xa3), w(0x40), w(0x8f), w(0x92), w(0x9d), w(0x38), w(0xf5),\
111     w(0xbc), w(0xb6), w(0xda), w(0x21), w(0x10), w(0xff), w(0xf3), w(0xd2),\
112     w(0xcd), w(0x0c), w(0x13), w(0xec), w(0x5f), w(0x97), w(0x44), w(0x17),\
113     w(0xc4), w(0xa7), w(0x7e), w(0x3d), w(0x64), w(0x5d), w(0x19), w(0x73),\
114     w(0x60), w(0x81), w(0x4f), w(0xdc), w(0x22), w(0x2a), w(0x90), w(0x88),\
115     w(0x46), w(0xee), w(0xb8), w(0x14), w(0xde), w(0x5e), w(0x0b), w(0xdb),\
116     w(0xe0), w(0x32), w(0x3a), w(0x0a), w(0x49), w(0x06), w(0x24), w(0x5c),\
117     w(0xc2), w(0xd3), w(0xac), w(0x62), w(0x91), w(0x95), w(0xe4), w(0x79),\
118     w(0xe7), w(0xc8), w(0x37), w(0x6d), w(0x8d), w(0xd5), w(0x4e), w(0xa9),\
119     w(0x6c), w(0x56), w(0xf4), w(0xea), w(0x65), w(0x7a), w(0xae), w(0x08),\
120     w(0xba), w(0x78), w(0x25), w(0x2e), w(0x1c), w(0xa6), w(0xb4), w(0xc6),\
121     w(0xe8), w(0xdd), w(0x74), w(0x1f), w(0x4b), w(0xbd), w(0x8b), w(0x8a),\
122     w(0x70), w(0x3e), w(0xb5), w(0x66), w(0x48), w(0x03), w(0xf6), w(0x0e),\
123     w(0x61), w(0x35), w(0x57), w(0xb9), w(0x86), w(0xc1), w(0x1d), w(0x9e),\
124     w(0xe1), w(0xf8), w(0x98), w(0x11), w(0x69), w(0xd9), w(0x8e), w(0x94),\
125     w(0x9b), w(0x1e), w(0x87), w(0xe9), w(0xce), w(0x55), w(0x28), w(0xdf),\
126     w(0x8c), w(0xa1), w(0x89), w(0x0d), w(0xbf), w(0xe6), w(0x42), w(0x68),\
127     w(0x41), w(0x99), w(0x2d), w(0x0f), w(0xb0), w(0x54), w(0xbb), w(0x16) }
128 
129 #define isb_data(w) {   /* inverse S Box data values */                    \
130     w(0x52), w(0x09), w(0x6a), w(0xd5), w(0x30), w(0x36), w(0xa5), w(0x38),\
131     w(0xbf), w(0x40), w(0xa3), w(0x9e), w(0x81), w(0xf3), w(0xd7), w(0xfb),\
132     w(0x7c), w(0xe3), w(0x39), w(0x82), w(0x9b), w(0x2f), w(0xff), w(0x87),\
133     w(0x34), w(0x8e), w(0x43), w(0x44), w(0xc4), w(0xde), w(0xe9), w(0xcb),\
134     w(0x54), w(0x7b), w(0x94), w(0x32), w(0xa6), w(0xc2), w(0x23), w(0x3d),\
135     w(0xee), w(0x4c), w(0x95), w(0x0b), w(0x42), w(0xfa), w(0xc3), w(0x4e),\
136     w(0x08), w(0x2e), w(0xa1), w(0x66), w(0x28), w(0xd9), w(0x24), w(0xb2),\
137     w(0x76), w(0x5b), w(0xa2), w(0x49), w(0x6d), w(0x8b), w(0xd1), w(0x25),\
138     w(0x72), w(0xf8), w(0xf6), w(0x64), w(0x86), w(0x68), w(0x98), w(0x16),\
139     w(0xd4), w(0xa4), w(0x5c), w(0xcc), w(0x5d), w(0x65), w(0xb6), w(0x92),\
140     w(0x6c), w(0x70), w(0x48), w(0x50), w(0xfd), w(0xed), w(0xb9), w(0xda),\
141     w(0x5e), w(0x15), w(0x46), w(0x57), w(0xa7), w(0x8d), w(0x9d), w(0x84),\
142     w(0x90), w(0xd8), w(0xab), w(0x00), w(0x8c), w(0xbc), w(0xd3), w(0x0a),\
143     w(0xf7), w(0xe4), w(0x58), w(0x05), w(0xb8), w(0xb3), w(0x45), w(0x06),\
144     w(0xd0), w(0x2c), w(0x1e), w(0x8f), w(0xca), w(0x3f), w(0x0f), w(0x02),\
145     w(0xc1), w(0xaf), w(0xbd), w(0x03), w(0x01), w(0x13), w(0x8a), w(0x6b),\
146     w(0x3a), w(0x91), w(0x11), w(0x41), w(0x4f), w(0x67), w(0xdc), w(0xea),\
147     w(0x97), w(0xf2), w(0xcf), w(0xce), w(0xf0), w(0xb4), w(0xe6), w(0x73),\
148     w(0x96), w(0xac), w(0x74), w(0x22), w(0xe7), w(0xad), w(0x35), w(0x85),\
149     w(0xe2), w(0xf9), w(0x37), w(0xe8), w(0x1c), w(0x75), w(0xdf), w(0x6e),\
150     w(0x47), w(0xf1), w(0x1a), w(0x71), w(0x1d), w(0x29), w(0xc5), w(0x89),\
151     w(0x6f), w(0xb7), w(0x62), w(0x0e), w(0xaa), w(0x18), w(0xbe), w(0x1b),\
152     w(0xfc), w(0x56), w(0x3e), w(0x4b), w(0xc6), w(0xd2), w(0x79), w(0x20),\
153     w(0x9a), w(0xdb), w(0xc0), w(0xfe), w(0x78), w(0xcd), w(0x5a), w(0xf4),\
154     w(0x1f), w(0xdd), w(0xa8), w(0x33), w(0x88), w(0x07), w(0xc7), w(0x31),\
155     w(0xb1), w(0x12), w(0x10), w(0x59), w(0x27), w(0x80), w(0xec), w(0x5f),\
156     w(0x60), w(0x51), w(0x7f), w(0xa9), w(0x19), w(0xb5), w(0x4a), w(0x0d),\
157     w(0x2d), w(0xe5), w(0x7a), w(0x9f), w(0x93), w(0xc9), w(0x9c), w(0xef),\
158     w(0xa0), w(0xe0), w(0x3b), w(0x4d), w(0xae), w(0x2a), w(0xf5), w(0xb0),\
159     w(0xc8), w(0xeb), w(0xbb), w(0x3c), w(0x83), w(0x53), w(0x99), w(0x61),\
160     w(0x17), w(0x2b), w(0x04), w(0x7e), w(0xba), w(0x77), w(0xd6), w(0x26),\
161     w(0xe1), w(0x69), w(0x14), w(0x63), w(0x55), w(0x21), w(0x0c), w(0x7d) }
162 
163 #define mm_data(w) {    /* basic data for forming finite field tables */   \
164     w(0x00), w(0x01), w(0x02), w(0x03), w(0x04), w(0x05), w(0x06), w(0x07),\
165     w(0x08), w(0x09), w(0x0a), w(0x0b), w(0x0c), w(0x0d), w(0x0e), w(0x0f),\
166     w(0x10), w(0x11), w(0x12), w(0x13), w(0x14), w(0x15), w(0x16), w(0x17),\
167     w(0x18), w(0x19), w(0x1a), w(0x1b), w(0x1c), w(0x1d), w(0x1e), w(0x1f),\
168     w(0x20), w(0x21), w(0x22), w(0x23), w(0x24), w(0x25), w(0x26), w(0x27),\
169     w(0x28), w(0x29), w(0x2a), w(0x2b), w(0x2c), w(0x2d), w(0x2e), w(0x2f),\
170     w(0x30), w(0x31), w(0x32), w(0x33), w(0x34), w(0x35), w(0x36), w(0x37),\
171     w(0x38), w(0x39), w(0x3a), w(0x3b), w(0x3c), w(0x3d), w(0x3e), w(0x3f),\
172     w(0x40), w(0x41), w(0x42), w(0x43), w(0x44), w(0x45), w(0x46), w(0x47),\
173     w(0x48), w(0x49), w(0x4a), w(0x4b), w(0x4c), w(0x4d), w(0x4e), w(0x4f),\
174     w(0x50), w(0x51), w(0x52), w(0x53), w(0x54), w(0x55), w(0x56), w(0x57),\
175     w(0x58), w(0x59), w(0x5a), w(0x5b), w(0x5c), w(0x5d), w(0x5e), w(0x5f),\
176     w(0x60), w(0x61), w(0x62), w(0x63), w(0x64), w(0x65), w(0x66), w(0x67),\
177     w(0x68), w(0x69), w(0x6a), w(0x6b), w(0x6c), w(0x6d), w(0x6e), w(0x6f),\
178     w(0x70), w(0x71), w(0x72), w(0x73), w(0x74), w(0x75), w(0x76), w(0x77),\
179     w(0x78), w(0x79), w(0x7a), w(0x7b), w(0x7c), w(0x7d), w(0x7e), w(0x7f),\
180     w(0x80), w(0x81), w(0x82), w(0x83), w(0x84), w(0x85), w(0x86), w(0x87),\
181     w(0x88), w(0x89), w(0x8a), w(0x8b), w(0x8c), w(0x8d), w(0x8e), w(0x8f),\
182     w(0x90), w(0x91), w(0x92), w(0x93), w(0x94), w(0x95), w(0x96), w(0x97),\
183     w(0x98), w(0x99), w(0x9a), w(0x9b), w(0x9c), w(0x9d), w(0x9e), w(0x9f),\
184     w(0xa0), w(0xa1), w(0xa2), w(0xa3), w(0xa4), w(0xa5), w(0xa6), w(0xa7),\
185     w(0xa8), w(0xa9), w(0xaa), w(0xab), w(0xac), w(0xad), w(0xae), w(0xaf),\
186     w(0xb0), w(0xb1), w(0xb2), w(0xb3), w(0xb4), w(0xb5), w(0xb6), w(0xb7),\
187     w(0xb8), w(0xb9), w(0xba), w(0xbb), w(0xbc), w(0xbd), w(0xbe), w(0xbf),\
188     w(0xc0), w(0xc1), w(0xc2), w(0xc3), w(0xc4), w(0xc5), w(0xc6), w(0xc7),\
189     w(0xc8), w(0xc9), w(0xca), w(0xcb), w(0xcc), w(0xcd), w(0xce), w(0xcf),\
190     w(0xd0), w(0xd1), w(0xd2), w(0xd3), w(0xd4), w(0xd5), w(0xd6), w(0xd7),\
191     w(0xd8), w(0xd9), w(0xda), w(0xdb), w(0xdc), w(0xdd), w(0xde), w(0xdf),\
192     w(0xe0), w(0xe1), w(0xe2), w(0xe3), w(0xe4), w(0xe5), w(0xe6), w(0xe7),\
193     w(0xe8), w(0xe9), w(0xea), w(0xeb), w(0xec), w(0xed), w(0xee), w(0xef),\
194     w(0xf0), w(0xf1), w(0xf2), w(0xf3), w(0xf4), w(0xf5), w(0xf6), w(0xf7),\
195     w(0xf8), w(0xf9), w(0xfa), w(0xfb), w(0xfc), w(0xfd), w(0xfe), w(0xff) }
196 
197 static const uint_8t sbox[256]  =  sb_data(f1);
198 static const uint_8t isbox[256] = isb_data(f1);
199 
200 static const uint_8t gfm2_sbox[256] = sb_data(f2);
201 static const uint_8t gfm3_sbox[256] = sb_data(f3);
202 
203 static const uint_8t gfmul_9[256] = mm_data(f9);
204 static const uint_8t gfmul_b[256] = mm_data(fb);
205 static const uint_8t gfmul_d[256] = mm_data(fd);
206 static const uint_8t gfmul_e[256] = mm_data(fe);
207 
208 #define s_box(x)     sbox[(x)]
209 #define is_box(x)    isbox[(x)]
210 #define gfm2_sb(x)   gfm2_sbox[(x)]
211 #define gfm3_sb(x)   gfm3_sbox[(x)]
212 #define gfm_9(x)     gfmul_9[(x)]
213 #define gfm_b(x)     gfmul_b[(x)]
214 #define gfm_d(x)     gfmul_d[(x)]
215 #define gfm_e(x)     gfmul_e[(x)]
216 
217 #else
218 
219 /* this is the high bit of x right shifted by 1 */
220 /* position. Since the starting polynomial has  */
221 /* 9 bits (0x11b), this right shift keeps the   */
222 /* values of all top bits within a byte         */
223 
hibit(const uint_8t x)224 static uint_8t hibit(const uint_8t x)
225 {   uint_8t r = (uint_8t)((x >> 1) | (x >> 2));
226 
227     r |= (r >> 2);
228     r |= (r >> 4);
229     return (r + 1) >> 1;
230 }
231 
232 /* return the inverse of the finite field element x */
233 
gf_inv(const uint_8t x)234 static uint_8t gf_inv(const uint_8t x)
235 {   uint_8t p1 = x, p2 = BPOLY, n1 = hibit(x), n2 = 0x80, v1 = 1, v2 = 0;
236 
237     if(x < 2)
238         return x;
239 
240     for( ; ; )
241     {
242         if(n1)
243             while(n2 >= n1)             /* divide polynomial p2 by p1    */
244             {
245                 n2 /= n1;               /* shift smaller polynomial left */
246                 p2 ^= (p1 * n2) & 0xff; /* and remove from larger one    */
247                 v2 ^= (v1 * n2);        /* shift accumulated value and   */
248                 n2 = hibit(p2);         /* add into result               */
249             }
250         else
251             return v1;
252 
253         if(n2)                          /* repeat with values swapped    */
254             while(n1 >= n2)
255             {
256                 n1 /= n2;
257                 p1 ^= p2 * n1;
258                 v1 ^= v2 * n1;
259                 n1 = hibit(p1);
260             }
261         else
262             return v2;
263     }
264 }
265 
266 /* The forward and inverse affine transformations used in the S-box */
fwd_affine(const uint_8t x)267 uint_8t fwd_affine(const uint_8t x)
268 {
269 #if defined( HAVE_UINT_32T )
270     uint_32t w = x;
271     w ^= (w << 1) ^ (w << 2) ^ (w << 3) ^ (w << 4);
272     return 0x63 ^ ((w ^ (w >> 8)) & 0xff);
273 #else
274     return 0x63 ^ x ^ (x << 1) ^ (x << 2) ^ (x << 3) ^ (x << 4)
275                     ^ (x >> 7) ^ (x >> 6) ^ (x >> 5) ^ (x >> 4);
276 #endif
277 }
278 
inv_affine(const uint_8t x)279 uint_8t inv_affine(const uint_8t x)
280 {
281 #if defined( HAVE_UINT_32T )
282     uint_32t w = x;
283     w = (w << 1) ^ (w << 3) ^ (w << 6);
284     return 0x05 ^ ((w ^ (w >> 8)) & 0xff);
285 #else
286     return 0x05 ^ (x << 1) ^ (x << 3) ^ (x << 6)
287                 ^ (x >> 7) ^ (x >> 5) ^ (x >> 2);
288 #endif
289 }
290 
291 #define s_box(x)   fwd_affine(gf_inv(x))
292 #define is_box(x)  gf_inv(inv_affine(x))
293 #define gfm2_sb(x) f2(s_box(x))
294 #define gfm3_sb(x) f3(s_box(x))
295 #define gfm_9(x)   f9(x)
296 #define gfm_b(x)   fb(x)
297 #define gfm_d(x)   fd(x)
298 #define gfm_e(x)   fe(x)
299 
300 #endif
301 
302 #if defined( HAVE_MEMCPY )
303 #  define block_copy_nn(d, s, l)    memcpy(d, s, l)
304 #  define block_copy(d, s)          memcpy(d, s, N_BLOCK)
305 #else
306 #  define block_copy_nn(d, s, l)    copy_block_nn(d, s, l)
307 #  define block_copy(d, s)          copy_block(d, s)
308 #endif
309 
310 #if !defined( HAVE_MEMCPY )
copy_block(void * d,const void * s)311 static void copy_block( void *d, const void *s )
312 {
313 #if defined( HAVE_UINT_32T )
314     ((uint_32t*)d)[ 0] = ((uint_32t*)s)[ 0];
315     ((uint_32t*)d)[ 1] = ((uint_32t*)s)[ 1];
316     ((uint_32t*)d)[ 2] = ((uint_32t*)s)[ 2];
317     ((uint_32t*)d)[ 3] = ((uint_32t*)s)[ 3];
318 #else
319     ((uint_8t*)d)[ 0] = ((uint_8t*)s)[ 0];
320     ((uint_8t*)d)[ 1] = ((uint_8t*)s)[ 1];
321     ((uint_8t*)d)[ 2] = ((uint_8t*)s)[ 2];
322     ((uint_8t*)d)[ 3] = ((uint_8t*)s)[ 3];
323     ((uint_8t*)d)[ 4] = ((uint_8t*)s)[ 4];
324     ((uint_8t*)d)[ 5] = ((uint_8t*)s)[ 5];
325     ((uint_8t*)d)[ 6] = ((uint_8t*)s)[ 6];
326     ((uint_8t*)d)[ 7] = ((uint_8t*)s)[ 7];
327     ((uint_8t*)d)[ 8] = ((uint_8t*)s)[ 8];
328     ((uint_8t*)d)[ 9] = ((uint_8t*)s)[ 9];
329     ((uint_8t*)d)[10] = ((uint_8t*)s)[10];
330     ((uint_8t*)d)[11] = ((uint_8t*)s)[11];
331     ((uint_8t*)d)[12] = ((uint_8t*)s)[12];
332     ((uint_8t*)d)[13] = ((uint_8t*)s)[13];
333     ((uint_8t*)d)[14] = ((uint_8t*)s)[14];
334     ((uint_8t*)d)[15] = ((uint_8t*)s)[15];
335 #endif
336 }
337 
copy_block_nn(void * d,const void * s,uint_8t nn)338 static void copy_block_nn( void * d, const void *s, uint_8t nn )
339 {
340     while( nn-- )
341         *((uint_8t*)d)++ = *((uint_8t*)s)++;
342 }
343 #endif
344 
xor_block(void * d,const void * s)345 static void xor_block( void *d, const void *s )
346 {
347 #if defined( HAVE_UINT_32T )
348     ((uint_32t*)d)[ 0] ^= ((uint_32t*)s)[ 0];
349     ((uint_32t*)d)[ 1] ^= ((uint_32t*)s)[ 1];
350     ((uint_32t*)d)[ 2] ^= ((uint_32t*)s)[ 2];
351     ((uint_32t*)d)[ 3] ^= ((uint_32t*)s)[ 3];
352 #else
353     ((uint_8t*)d)[ 0] ^= ((uint_8t*)s)[ 0];
354     ((uint_8t*)d)[ 1] ^= ((uint_8t*)s)[ 1];
355     ((uint_8t*)d)[ 2] ^= ((uint_8t*)s)[ 2];
356     ((uint_8t*)d)[ 3] ^= ((uint_8t*)s)[ 3];
357     ((uint_8t*)d)[ 4] ^= ((uint_8t*)s)[ 4];
358     ((uint_8t*)d)[ 5] ^= ((uint_8t*)s)[ 5];
359     ((uint_8t*)d)[ 6] ^= ((uint_8t*)s)[ 6];
360     ((uint_8t*)d)[ 7] ^= ((uint_8t*)s)[ 7];
361     ((uint_8t*)d)[ 8] ^= ((uint_8t*)s)[ 8];
362     ((uint_8t*)d)[ 9] ^= ((uint_8t*)s)[ 9];
363     ((uint_8t*)d)[10] ^= ((uint_8t*)s)[10];
364     ((uint_8t*)d)[11] ^= ((uint_8t*)s)[11];
365     ((uint_8t*)d)[12] ^= ((uint_8t*)s)[12];
366     ((uint_8t*)d)[13] ^= ((uint_8t*)s)[13];
367     ((uint_8t*)d)[14] ^= ((uint_8t*)s)[14];
368     ((uint_8t*)d)[15] ^= ((uint_8t*)s)[15];
369 #endif
370 }
371 
copy_and_key(void * d,const void * s,const void * k)372 static void copy_and_key( void *d, const void *s, const void *k )
373 {
374 #if defined( HAVE_UINT_32T )
375     ((uint_32t*)d)[ 0] = ((uint_32t*)s)[ 0] ^ ((uint_32t*)k)[ 0];
376     ((uint_32t*)d)[ 1] = ((uint_32t*)s)[ 1] ^ ((uint_32t*)k)[ 1];
377     ((uint_32t*)d)[ 2] = ((uint_32t*)s)[ 2] ^ ((uint_32t*)k)[ 2];
378     ((uint_32t*)d)[ 3] = ((uint_32t*)s)[ 3] ^ ((uint_32t*)k)[ 3];
379 #elif 1
380     ((uint_8t*)d)[ 0] = ((uint_8t*)s)[ 0] ^ ((uint_8t*)k)[ 0];
381     ((uint_8t*)d)[ 1] = ((uint_8t*)s)[ 1] ^ ((uint_8t*)k)[ 1];
382     ((uint_8t*)d)[ 2] = ((uint_8t*)s)[ 2] ^ ((uint_8t*)k)[ 2];
383     ((uint_8t*)d)[ 3] = ((uint_8t*)s)[ 3] ^ ((uint_8t*)k)[ 3];
384     ((uint_8t*)d)[ 4] = ((uint_8t*)s)[ 4] ^ ((uint_8t*)k)[ 4];
385     ((uint_8t*)d)[ 5] = ((uint_8t*)s)[ 5] ^ ((uint_8t*)k)[ 5];
386     ((uint_8t*)d)[ 6] = ((uint_8t*)s)[ 6] ^ ((uint_8t*)k)[ 6];
387     ((uint_8t*)d)[ 7] = ((uint_8t*)s)[ 7] ^ ((uint_8t*)k)[ 7];
388     ((uint_8t*)d)[ 8] = ((uint_8t*)s)[ 8] ^ ((uint_8t*)k)[ 8];
389     ((uint_8t*)d)[ 9] = ((uint_8t*)s)[ 9] ^ ((uint_8t*)k)[ 9];
390     ((uint_8t*)d)[10] = ((uint_8t*)s)[10] ^ ((uint_8t*)k)[10];
391     ((uint_8t*)d)[11] = ((uint_8t*)s)[11] ^ ((uint_8t*)k)[11];
392     ((uint_8t*)d)[12] = ((uint_8t*)s)[12] ^ ((uint_8t*)k)[12];
393     ((uint_8t*)d)[13] = ((uint_8t*)s)[13] ^ ((uint_8t*)k)[13];
394     ((uint_8t*)d)[14] = ((uint_8t*)s)[14] ^ ((uint_8t*)k)[14];
395     ((uint_8t*)d)[15] = ((uint_8t*)s)[15] ^ ((uint_8t*)k)[15];
396 #else
397     block_copy(d, s);
398     xor_block(d, k);
399 #endif
400 }
401 
add_round_key(uint_8t d[N_BLOCK],const uint_8t k[N_BLOCK])402 static void add_round_key( uint_8t d[N_BLOCK], const uint_8t k[N_BLOCK] )
403 {
404     xor_block(d, k);
405 }
406 
shift_sub_rows(uint_8t st[N_BLOCK])407 static void shift_sub_rows( uint_8t st[N_BLOCK] )
408 {   uint_8t tt;
409 
410     st[ 0] = s_box(st[ 0]); st[ 4] = s_box(st[ 4]);
411     st[ 8] = s_box(st[ 8]); st[12] = s_box(st[12]);
412 
413     tt = st[1]; st[ 1] = s_box(st[ 5]); st[ 5] = s_box(st[ 9]);
414     st[ 9] = s_box(st[13]); st[13] = s_box( tt );
415 
416     tt = st[2]; st[ 2] = s_box(st[10]); st[10] = s_box( tt );
417     tt = st[6]; st[ 6] = s_box(st[14]); st[14] = s_box( tt );
418 
419     tt = st[15]; st[15] = s_box(st[11]); st[11] = s_box(st[ 7]);
420     st[ 7] = s_box(st[ 3]); st[ 3] = s_box( tt );
421 }
422 
inv_shift_sub_rows(uint_8t st[N_BLOCK])423 static void inv_shift_sub_rows( uint_8t st[N_BLOCK] )
424 {   uint_8t tt;
425 
426     st[ 0] = is_box(st[ 0]); st[ 4] = is_box(st[ 4]);
427     st[ 8] = is_box(st[ 8]); st[12] = is_box(st[12]);
428 
429     tt = st[13]; st[13] = is_box(st[9]); st[ 9] = is_box(st[5]);
430     st[ 5] = is_box(st[1]); st[ 1] = is_box( tt );
431 
432     tt = st[2]; st[ 2] = is_box(st[10]); st[10] = is_box( tt );
433     tt = st[6]; st[ 6] = is_box(st[14]); st[14] = is_box( tt );
434 
435     tt = st[3]; st[ 3] = is_box(st[ 7]); st[ 7] = is_box(st[11]);
436     st[11] = is_box(st[15]); st[15] = is_box( tt );
437 }
438 
439 #if defined( VERSION_1 )
mix_sub_columns(uint_8t dt[N_BLOCK])440   static void mix_sub_columns( uint_8t dt[N_BLOCK] )
441   { uint_8t st[N_BLOCK];
442     block_copy(st, dt);
443 #else
444   static void mix_sub_columns( uint_8t dt[N_BLOCK], uint_8t st[N_BLOCK] )
445   {
446 #endif
447     dt[ 0] = gfm2_sb(st[0]) ^ gfm3_sb(st[5]) ^ s_box(st[10]) ^ s_box(st[15]);
448     dt[ 1] = s_box(st[0]) ^ gfm2_sb(st[5]) ^ gfm3_sb(st[10]) ^ s_box(st[15]);
449     dt[ 2] = s_box(st[0]) ^ s_box(st[5]) ^ gfm2_sb(st[10]) ^ gfm3_sb(st[15]);
450     dt[ 3] = gfm3_sb(st[0]) ^ s_box(st[5]) ^ s_box(st[10]) ^ gfm2_sb(st[15]);
451 
452     dt[ 4] = gfm2_sb(st[4]) ^ gfm3_sb(st[9]) ^ s_box(st[14]) ^ s_box(st[3]);
453     dt[ 5] = s_box(st[4]) ^ gfm2_sb(st[9]) ^ gfm3_sb(st[14]) ^ s_box(st[3]);
454     dt[ 6] = s_box(st[4]) ^ s_box(st[9]) ^ gfm2_sb(st[14]) ^ gfm3_sb(st[3]);
455     dt[ 7] = gfm3_sb(st[4]) ^ s_box(st[9]) ^ s_box(st[14]) ^ gfm2_sb(st[3]);
456 
457     dt[ 8] = gfm2_sb(st[8]) ^ gfm3_sb(st[13]) ^ s_box(st[2]) ^ s_box(st[7]);
458     dt[ 9] = s_box(st[8]) ^ gfm2_sb(st[13]) ^ gfm3_sb(st[2]) ^ s_box(st[7]);
459     dt[10] = s_box(st[8]) ^ s_box(st[13]) ^ gfm2_sb(st[2]) ^ gfm3_sb(st[7]);
460     dt[11] = gfm3_sb(st[8]) ^ s_box(st[13]) ^ s_box(st[2]) ^ gfm2_sb(st[7]);
461 
462     dt[12] = gfm2_sb(st[12]) ^ gfm3_sb(st[1]) ^ s_box(st[6]) ^ s_box(st[11]);
463     dt[13] = s_box(st[12]) ^ gfm2_sb(st[1]) ^ gfm3_sb(st[6]) ^ s_box(st[11]);
464     dt[14] = s_box(st[12]) ^ s_box(st[1]) ^ gfm2_sb(st[6]) ^ gfm3_sb(st[11]);
465     dt[15] = gfm3_sb(st[12]) ^ s_box(st[1]) ^ s_box(st[6]) ^ gfm2_sb(st[11]);
466   }
467 
468 #if defined( VERSION_1 )
469   static void inv_mix_sub_columns( uint_8t dt[N_BLOCK] )
470   { uint_8t st[N_BLOCK];
471     block_copy(st, dt);
472 #else
473   static void inv_mix_sub_columns( uint_8t dt[N_BLOCK], uint_8t st[N_BLOCK] )
474   {
475 #endif
476     dt[ 0] = is_box(gfm_e(st[ 0]) ^ gfm_b(st[ 1]) ^ gfm_d(st[ 2]) ^ gfm_9(st[ 3]));
477     dt[ 5] = is_box(gfm_9(st[ 0]) ^ gfm_e(st[ 1]) ^ gfm_b(st[ 2]) ^ gfm_d(st[ 3]));
478     dt[10] = is_box(gfm_d(st[ 0]) ^ gfm_9(st[ 1]) ^ gfm_e(st[ 2]) ^ gfm_b(st[ 3]));
479     dt[15] = is_box(gfm_b(st[ 0]) ^ gfm_d(st[ 1]) ^ gfm_9(st[ 2]) ^ gfm_e(st[ 3]));
480 
481     dt[ 4] = is_box(gfm_e(st[ 4]) ^ gfm_b(st[ 5]) ^ gfm_d(st[ 6]) ^ gfm_9(st[ 7]));
482     dt[ 9] = is_box(gfm_9(st[ 4]) ^ gfm_e(st[ 5]) ^ gfm_b(st[ 6]) ^ gfm_d(st[ 7]));
483     dt[14] = is_box(gfm_d(st[ 4]) ^ gfm_9(st[ 5]) ^ gfm_e(st[ 6]) ^ gfm_b(st[ 7]));
484     dt[ 3] = is_box(gfm_b(st[ 4]) ^ gfm_d(st[ 5]) ^ gfm_9(st[ 6]) ^ gfm_e(st[ 7]));
485 
486     dt[ 8] = is_box(gfm_e(st[ 8]) ^ gfm_b(st[ 9]) ^ gfm_d(st[10]) ^ gfm_9(st[11]));
487     dt[13] = is_box(gfm_9(st[ 8]) ^ gfm_e(st[ 9]) ^ gfm_b(st[10]) ^ gfm_d(st[11]));
488     dt[ 2] = is_box(gfm_d(st[ 8]) ^ gfm_9(st[ 9]) ^ gfm_e(st[10]) ^ gfm_b(st[11]));
489     dt[ 7] = is_box(gfm_b(st[ 8]) ^ gfm_d(st[ 9]) ^ gfm_9(st[10]) ^ gfm_e(st[11]));
490 
491     dt[12] = is_box(gfm_e(st[12]) ^ gfm_b(st[13]) ^ gfm_d(st[14]) ^ gfm_9(st[15]));
492     dt[ 1] = is_box(gfm_9(st[12]) ^ gfm_e(st[13]) ^ gfm_b(st[14]) ^ gfm_d(st[15]));
493     dt[ 6] = is_box(gfm_d(st[12]) ^ gfm_9(st[13]) ^ gfm_e(st[14]) ^ gfm_b(st[15]));
494     dt[11] = is_box(gfm_b(st[12]) ^ gfm_d(st[13]) ^ gfm_9(st[14]) ^ gfm_e(st[15]));
495   }
496 
497 #if defined( AES_ENC_PREKEYED ) || defined( AES_DEC_PREKEYED )
498 
499 /*  Set the cipher key for the pre-keyed version */
500 /*  NOTE: If the length_type used for the key length is an
501     unsigned 8-bit character, a key length of 256 bits must
502     be entered as a length in bytes (valid inputs are hence
503     128, 192, 16, 24 and 32).
504 */
505 
506 return_type aes_set_key( const unsigned char key[], length_type keylen, aes_context ctx[1] )
507 {
508     uint_8t cc, rc, hi;
509 
510     switch( keylen )
511     {
512     case 16:
513     case 128:           /* length in bits (128 = 8*16) */
514         keylen = 16;
515         break;
516     case 24:
517     case 192:           /* length in bits (192 = 8*24) */
518         keylen = 24;
519         break;
520     case 32:
521 /*    case 256:           length in bits (256 = 8*32) */
522         keylen = 32;
523         break;
524     default:
525         ctx->rnd = 0;
526         return (return_type)-1;
527     }
528     block_copy_nn(ctx->ksch, key, keylen);
529     hi = (keylen + 28) << 2;
530     ctx->rnd = (hi >> 4) - 1;
531     for( cc = keylen, rc = 1; cc < hi; cc += 4 )
532     {   uint_8t tt, t0, t1, t2, t3;
533 
534         t0 = ctx->ksch[cc - 4];
535         t1 = ctx->ksch[cc - 3];
536         t2 = ctx->ksch[cc - 2];
537         t3 = ctx->ksch[cc - 1];
538         if( cc % keylen == 0 )
539         {
540             tt = t0;
541             t0 = s_box(t1) ^ rc;
542             t1 = s_box(t2);
543             t2 = s_box(t3);
544             t3 = s_box(tt);
545             rc = f2(rc);
546         }
547         else if( keylen > 24 && cc % keylen == 16 )
548         {
549             t0 = s_box(t0);
550             t1 = s_box(t1);
551             t2 = s_box(t2);
552             t3 = s_box(t3);
553         }
554         tt = cc - keylen;
555         ctx->ksch[cc + 0] = ctx->ksch[tt + 0] ^ t0;
556         ctx->ksch[cc + 1] = ctx->ksch[tt + 1] ^ t1;
557         ctx->ksch[cc + 2] = ctx->ksch[tt + 2] ^ t2;
558         ctx->ksch[cc + 3] = ctx->ksch[tt + 3] ^ t3;
559     }
560     return 0;
561 }
562 
563 #endif
564 
565 #if defined( AES_ENC_PREKEYED )
566 
567 /*  Encrypt a single block of 16 bytes */
568 
569 return_type aes_encrypt( const unsigned char in[N_BLOCK], unsigned char  out[N_BLOCK], const aes_context ctx[1] )
570 {
571     if( ctx->rnd )
572     {
573         uint_8t s1[N_BLOCK], r;
574         copy_and_key( s1, in, ctx->ksch );
575 
576         for( r = 1 ; r < ctx->rnd ; ++r )
577 #if defined( VERSION_1 )
578         {
579             mix_sub_columns( s1 );
580             add_round_key( s1, ctx->ksch + r * N_BLOCK);
581         }
582 #else
583         {   uint_8t s2[N_BLOCK];
584             mix_sub_columns( s2, s1 );
585             copy_and_key( s1, s2, ctx->ksch + r * N_BLOCK);
586         }
587 #endif
588         shift_sub_rows( s1 );
589         copy_and_key( out, s1, ctx->ksch + r * N_BLOCK );
590     }
591     else
592         return (return_type)-1;
593     return 0;
594 }
595 
596 /* CBC encrypt a number of blocks (input and return an IV) */
597 
598 return_type aes_cbc_encrypt( const unsigned char *in, unsigned char *out,
599                          int n_block, unsigned char iv[N_BLOCK], const aes_context ctx[1] )
600 {
601 
602     while(n_block--)
603     {
604         xor_block(iv, in);
605         if(aes_encrypt(iv, iv, ctx) != EXIT_SUCCESS)
606 			return EXIT_FAILURE;
607         memcpy(out, iv, N_BLOCK);
608         in += N_BLOCK;
609         out += N_BLOCK;
610     }
611     return EXIT_SUCCESS;
612 }
613 
614 #endif
615 
616 #if defined( AES_DEC_PREKEYED )
617 
618 /*  Decrypt a single block of 16 bytes */
619 
620 return_type aes_decrypt( const unsigned char in[N_BLOCK], unsigned char out[N_BLOCK], const aes_context ctx[1] )
621 {
622     if( ctx->rnd )
623     {
624         uint_8t s1[N_BLOCK], r;
625         copy_and_key( s1, in, ctx->ksch + ctx->rnd * N_BLOCK );
626         inv_shift_sub_rows( s1 );
627 
628         for( r = ctx->rnd ; --r ; )
629 #if defined( VERSION_1 )
630         {
631             add_round_key( s1, ctx->ksch + r * N_BLOCK );
632             inv_mix_sub_columns( s1 );
633         }
634 #else
635         {   uint_8t s2[N_BLOCK];
636             copy_and_key( s2, s1, ctx->ksch + r * N_BLOCK );
637             inv_mix_sub_columns( s1, s2 );
638         }
639 #endif
640         copy_and_key( out, s1, ctx->ksch );
641     }
642     else
643         return (return_type)-1;
644     return 0;
645 }
646 
647 /* CBC decrypt a number of blocks (input and return an IV) */
648 
649 return_type aes_cbc_decrypt( const unsigned char *in, unsigned char *out,
650                          int n_block, unsigned char iv[N_BLOCK], const aes_context ctx[1] )
651 {
652     while(n_block--)
653     {   uint_8t tmp[N_BLOCK];
654 
655         memcpy(tmp, in, N_BLOCK);
656         if(aes_decrypt(in, out, ctx) != EXIT_SUCCESS)
657 			return EXIT_FAILURE;
658         xor_block(out, iv);
659         memcpy(iv, tmp, N_BLOCK);
660         in += N_BLOCK;
661         out += N_BLOCK;
662     }
663     return EXIT_SUCCESS;
664 }
665 
666 #endif
667 
668 #if defined( AES_ENC_128_OTFK )
669 
670 /*  The 'on the fly' encryption key update for for 128 bit keys */
671 
672 static void update_encrypt_key_128( uint_8t k[N_BLOCK], uint_8t *rc )
673 {   uint_8t cc;
674 
675     k[0] ^= s_box(k[13]) ^ *rc;
676     k[1] ^= s_box(k[14]);
677     k[2] ^= s_box(k[15]);
678     k[3] ^= s_box(k[12]);
679     *rc = f2( *rc );
680 
681     for(cc = 4; cc < 16; cc += 4 )
682     {
683         k[cc + 0] ^= k[cc - 4];
684         k[cc + 1] ^= k[cc - 3];
685         k[cc + 2] ^= k[cc - 2];
686         k[cc + 3] ^= k[cc - 1];
687     }
688 }
689 
690 /*  Encrypt a single block of 16 bytes with 'on the fly' 128 bit keying */
691 
692 void aes_encrypt_128( const unsigned char in[N_BLOCK], unsigned char out[N_BLOCK],
693                      const unsigned char key[N_BLOCK], unsigned char o_key[N_BLOCK] )
694 {   uint_8t s1[N_BLOCK], r, rc = 1;
695 
696     if(o_key != key)
697         block_copy( o_key, key );
698     copy_and_key( s1, in, o_key );
699 
700     for( r = 1 ; r < 10 ; ++r )
701 #if defined( VERSION_1 )
702     {
703         mix_sub_columns( s1 );
704         update_encrypt_key_128( o_key, &rc );
705         add_round_key( s1, o_key );
706     }
707 #else
708     {   uint_8t s2[N_BLOCK];
709         mix_sub_columns( s2, s1 );
710         update_encrypt_key_128( o_key, &rc );
711         copy_and_key( s1, s2, o_key );
712     }
713 #endif
714 
715     shift_sub_rows( s1 );
716     update_encrypt_key_128( o_key, &rc );
717     copy_and_key( out, s1, o_key );
718 }
719 
720 #endif
721 
722 #if defined( AES_DEC_128_OTFK )
723 
724 /*  The 'on the fly' decryption key update for for 128 bit keys */
725 
726 static void update_decrypt_key_128( uint_8t k[N_BLOCK], uint_8t *rc )
727 {   uint_8t cc;
728 
729     for( cc = 12; cc > 0; cc -= 4 )
730     {
731         k[cc + 0] ^= k[cc - 4];
732         k[cc + 1] ^= k[cc - 3];
733         k[cc + 2] ^= k[cc - 2];
734         k[cc + 3] ^= k[cc - 1];
735     }
736     *rc = d2(*rc);
737     k[0] ^= s_box(k[13]) ^ *rc;
738     k[1] ^= s_box(k[14]);
739     k[2] ^= s_box(k[15]);
740     k[3] ^= s_box(k[12]);
741 }
742 
743 /*  Decrypt a single block of 16 bytes with 'on the fly' 128 bit keying */
744 
745 void aes_decrypt_128( const unsigned char in[N_BLOCK], unsigned char out[N_BLOCK],
746                       const unsigned char key[N_BLOCK], unsigned char o_key[N_BLOCK] )
747 {
748     uint_8t s1[N_BLOCK], r, rc = 0x6c;
749     if(o_key != key)
750         block_copy( o_key, key );
751 
752     copy_and_key( s1, in, o_key );
753     inv_shift_sub_rows( s1 );
754 
755     for( r = 10 ; --r ; )
756 #if defined( VERSION_1 )
757     {
758         update_decrypt_key_128( o_key, &rc );
759         add_round_key( s1, o_key );
760         inv_mix_sub_columns( s1 );
761     }
762 #else
763     {   uint_8t s2[N_BLOCK];
764         update_decrypt_key_128( o_key, &rc );
765         copy_and_key( s2, s1, o_key );
766         inv_mix_sub_columns( s1, s2 );
767     }
768 #endif
769     update_decrypt_key_128( o_key, &rc );
770     copy_and_key( out, s1, o_key );
771 }
772 
773 #endif
774 
775 #if defined( AES_ENC_256_OTFK )
776 
777 /*  The 'on the fly' encryption key update for for 256 bit keys */
778 
779 static void update_encrypt_key_256( uint_8t k[2 * N_BLOCK], uint_8t *rc )
780 {   uint_8t cc;
781 
782     k[0] ^= s_box(k[29]) ^ *rc;
783     k[1] ^= s_box(k[30]);
784     k[2] ^= s_box(k[31]);
785     k[3] ^= s_box(k[28]);
786     *rc = f2( *rc );
787 
788     for(cc = 4; cc < 16; cc += 4)
789     {
790         k[cc + 0] ^= k[cc - 4];
791         k[cc + 1] ^= k[cc - 3];
792         k[cc + 2] ^= k[cc - 2];
793         k[cc + 3] ^= k[cc - 1];
794     }
795 
796     k[16] ^= s_box(k[12]);
797     k[17] ^= s_box(k[13]);
798     k[18] ^= s_box(k[14]);
799     k[19] ^= s_box(k[15]);
800 
801     for( cc = 20; cc < 32; cc += 4 )
802     {
803         k[cc + 0] ^= k[cc - 4];
804         k[cc + 1] ^= k[cc - 3];
805         k[cc + 2] ^= k[cc - 2];
806         k[cc + 3] ^= k[cc - 1];
807     }
808 }
809 
810 /*  Encrypt a single block of 16 bytes with 'on the fly' 256 bit keying */
811 
812 void aes_encrypt_256( const unsigned char in[N_BLOCK], unsigned char out[N_BLOCK],
813                       const unsigned char key[2 * N_BLOCK], unsigned char o_key[2 * N_BLOCK] )
814 {
815     uint_8t s1[N_BLOCK], r, rc = 1;
816     if(o_key != key)
817     {
818         block_copy( o_key, key );
819         block_copy( o_key + 16, key + 16 );
820     }
821     copy_and_key( s1, in, o_key );
822 
823     for( r = 1 ; r < 14 ; ++r )
824 #if defined( VERSION_1 )
825     {
826         mix_sub_columns(s1);
827         if( r & 1 )
828             add_round_key( s1, o_key + 16 );
829         else
830         {
831             update_encrypt_key_256( o_key, &rc );
832             add_round_key( s1, o_key );
833         }
834     }
835 #else
836     {   uint_8t s2[N_BLOCK];
837         mix_sub_columns( s2, s1 );
838         if( r & 1 )
839             copy_and_key( s1, s2, o_key + 16 );
840         else
841         {
842             update_encrypt_key_256( o_key, &rc );
843             copy_and_key( s1, s2, o_key );
844         }
845     }
846 #endif
847 
848     shift_sub_rows( s1 );
849     update_encrypt_key_256( o_key, &rc );
850     copy_and_key( out, s1, o_key );
851 }
852 
853 #endif
854 
855 #if defined( AES_DEC_256_OTFK )
856 
857 /*  The 'on the fly' encryption key update for for 256 bit keys */
858 
859 static void update_decrypt_key_256( uint_8t k[2 * N_BLOCK], uint_8t *rc )
860 {   uint_8t cc;
861 
862     for(cc = 28; cc > 16; cc -= 4)
863     {
864         k[cc + 0] ^= k[cc - 4];
865         k[cc + 1] ^= k[cc - 3];
866         k[cc + 2] ^= k[cc - 2];
867         k[cc + 3] ^= k[cc - 1];
868     }
869 
870     k[16] ^= s_box(k[12]);
871     k[17] ^= s_box(k[13]);
872     k[18] ^= s_box(k[14]);
873     k[19] ^= s_box(k[15]);
874 
875     for(cc = 12; cc > 0; cc -= 4)
876     {
877         k[cc + 0] ^= k[cc - 4];
878         k[cc + 1] ^= k[cc - 3];
879         k[cc + 2] ^= k[cc - 2];
880         k[cc + 3] ^= k[cc - 1];
881     }
882 
883     *rc = d2(*rc);
884     k[0] ^= s_box(k[29]) ^ *rc;
885     k[1] ^= s_box(k[30]);
886     k[2] ^= s_box(k[31]);
887     k[3] ^= s_box(k[28]);
888 }
889 
890 /*  Decrypt a single block of 16 bytes with 'on the fly'
891     256 bit keying
892 */
893 void aes_decrypt_256( const unsigned char in[N_BLOCK], unsigned char out[N_BLOCK],
894                       const unsigned char key[2 * N_BLOCK], unsigned char o_key[2 * N_BLOCK] )
895 {
896     uint_8t s1[N_BLOCK], r, rc = 0x80;
897 
898     if(o_key != key)
899     {
900         block_copy( o_key, key );
901         block_copy( o_key + 16, key + 16 );
902     }
903 
904     copy_and_key( s1, in, o_key );
905     inv_shift_sub_rows( s1 );
906 
907     for( r = 14 ; --r ; )
908 #if defined( VERSION_1 )
909     {
910         if( ( r & 1 ) )
911         {
912             update_decrypt_key_256( o_key, &rc );
913             add_round_key( s1, o_key + 16 );
914         }
915         else
916             add_round_key( s1, o_key );
917         inv_mix_sub_columns( s1 );
918     }
919 #else
920     {   uint_8t s2[N_BLOCK];
921         if( ( r & 1 ) )
922         {
923             update_decrypt_key_256( o_key, &rc );
924             copy_and_key( s2, s1, o_key + 16 );
925         }
926         else
927             copy_and_key( s2, s1, o_key );
928         inv_mix_sub_columns( s1, s2 );
929     }
930 #endif
931     copy_and_key( out, s1, o_key );
932 }
933 
934 #endif
935