1 /*
2 chacha-merged.c version 20080118
3 D. J. Bernstein
4 Public domain.
5 */
6
7 /* $OpenBSD: chacha_private.h,v 1.2 2013/10/04 07:02:27 djm Exp $ */
8
9 typedef unsigned char u8;
10 typedef unsigned int u32;
11
12 typedef struct
13 {
14 u32 input[16]; /* could be compressed */
15 } chacha_ctx;
16
17 #define U8C(v) (v##U)
18 #define U32C(v) (v##U)
19
20 #define U8V(v) ((u8)(v) & U8C(0xFF))
21 #define U32V(v) ((u32)(v) & U32C(0xFFFFFFFF))
22
23 #define ROTL32(v, n) \
24 (U32V((v) << (n)) | ((v) >> (32 - (n))))
25
26 #define U8TO32_LITTLE(p) \
27 (((u32)((p)[0]) ) | \
28 ((u32)((p)[1]) << 8) | \
29 ((u32)((p)[2]) << 16) | \
30 ((u32)((p)[3]) << 24))
31
32 #define U32TO8_LITTLE(p, v) \
33 do { \
34 (p)[0] = U8V((v) ); \
35 (p)[1] = U8V((v) >> 8); \
36 (p)[2] = U8V((v) >> 16); \
37 (p)[3] = U8V((v) >> 24); \
38 } while (0)
39
40 #define ROTATE(v,c) (ROTL32(v,c))
41 #define XOR(v,w) ((v) ^ (w))
42 #define PLUS(v,w) (U32V((v) + (w)))
43 #define PLUSONE(v) (PLUS((v),1))
44
45 #define QUARTERROUND(a,b,c,d) \
46 a = PLUS(a,b); d = ROTATE(XOR(d,a),16); \
47 c = PLUS(c,d); b = ROTATE(XOR(b,c),12); \
48 a = PLUS(a,b); d = ROTATE(XOR(d,a), 8); \
49 c = PLUS(c,d); b = ROTATE(XOR(b,c), 7);
50
51 static const char sigma[16] = "expand 32-byte k";
52 static const char tau[16] = "expand 16-byte k";
53
54 static void
chacha_keysetup(chacha_ctx * x,const u8 * k,u32 kbits,u32 ivbits)55 chacha_keysetup(chacha_ctx *x,const u8 *k,u32 kbits,u32 ivbits)
56 {
57 const char *constants;
58
59 x->input[4] = U8TO32_LITTLE(k + 0);
60 x->input[5] = U8TO32_LITTLE(k + 4);
61 x->input[6] = U8TO32_LITTLE(k + 8);
62 x->input[7] = U8TO32_LITTLE(k + 12);
63 if (kbits == 256) { /* recommended */
64 k += 16;
65 constants = sigma;
66 } else { /* kbits == 128 */
67 constants = tau;
68 }
69 x->input[8] = U8TO32_LITTLE(k + 0);
70 x->input[9] = U8TO32_LITTLE(k + 4);
71 x->input[10] = U8TO32_LITTLE(k + 8);
72 x->input[11] = U8TO32_LITTLE(k + 12);
73 x->input[0] = U8TO32_LITTLE(constants + 0);
74 x->input[1] = U8TO32_LITTLE(constants + 4);
75 x->input[2] = U8TO32_LITTLE(constants + 8);
76 x->input[3] = U8TO32_LITTLE(constants + 12);
77 }
78
79 static void
chacha_ivsetup(chacha_ctx * x,const u8 * iv)80 chacha_ivsetup(chacha_ctx *x,const u8 *iv)
81 {
82 x->input[12] = 0;
83 x->input[13] = 0;
84 x->input[14] = U8TO32_LITTLE(iv + 0);
85 x->input[15] = U8TO32_LITTLE(iv + 4);
86 }
87
88 static void
chacha_encrypt_bytes(chacha_ctx * x,const u8 * m,u8 * c,u32 bytes)89 chacha_encrypt_bytes(chacha_ctx *x,const u8 *m,u8 *c,u32 bytes)
90 {
91 u32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
92 u32 j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15;
93 u8 *ctarget = NULL;
94 u8 tmp[64];
95 u_int i;
96
97 if (!bytes) return;
98
99 j0 = x->input[0];
100 j1 = x->input[1];
101 j2 = x->input[2];
102 j3 = x->input[3];
103 j4 = x->input[4];
104 j5 = x->input[5];
105 j6 = x->input[6];
106 j7 = x->input[7];
107 j8 = x->input[8];
108 j9 = x->input[9];
109 j10 = x->input[10];
110 j11 = x->input[11];
111 j12 = x->input[12];
112 j13 = x->input[13];
113 j14 = x->input[14];
114 j15 = x->input[15];
115
116 for (;;) {
117 if (bytes < 64) {
118 for (i = 0;i < bytes;++i) tmp[i] = m[i];
119 m = tmp;
120 ctarget = c;
121 c = tmp;
122 }
123 x0 = j0;
124 x1 = j1;
125 x2 = j2;
126 x3 = j3;
127 x4 = j4;
128 x5 = j5;
129 x6 = j6;
130 x7 = j7;
131 x8 = j8;
132 x9 = j9;
133 x10 = j10;
134 x11 = j11;
135 x12 = j12;
136 x13 = j13;
137 x14 = j14;
138 x15 = j15;
139 for (i = 20;i > 0;i -= 2) {
140 QUARTERROUND( x0, x4, x8,x12)
141 QUARTERROUND( x1, x5, x9,x13)
142 QUARTERROUND( x2, x6,x10,x14)
143 QUARTERROUND( x3, x7,x11,x15)
144 QUARTERROUND( x0, x5,x10,x15)
145 QUARTERROUND( x1, x6,x11,x12)
146 QUARTERROUND( x2, x7, x8,x13)
147 QUARTERROUND( x3, x4, x9,x14)
148 }
149 x0 = PLUS(x0,j0);
150 x1 = PLUS(x1,j1);
151 x2 = PLUS(x2,j2);
152 x3 = PLUS(x3,j3);
153 x4 = PLUS(x4,j4);
154 x5 = PLUS(x5,j5);
155 x6 = PLUS(x6,j6);
156 x7 = PLUS(x7,j7);
157 x8 = PLUS(x8,j8);
158 x9 = PLUS(x9,j9);
159 x10 = PLUS(x10,j10);
160 x11 = PLUS(x11,j11);
161 x12 = PLUS(x12,j12);
162 x13 = PLUS(x13,j13);
163 x14 = PLUS(x14,j14);
164 x15 = PLUS(x15,j15);
165
166 #ifndef KEYSTREAM_ONLY
167 x0 = XOR(x0,U8TO32_LITTLE(m + 0));
168 x1 = XOR(x1,U8TO32_LITTLE(m + 4));
169 x2 = XOR(x2,U8TO32_LITTLE(m + 8));
170 x3 = XOR(x3,U8TO32_LITTLE(m + 12));
171 x4 = XOR(x4,U8TO32_LITTLE(m + 16));
172 x5 = XOR(x5,U8TO32_LITTLE(m + 20));
173 x6 = XOR(x6,U8TO32_LITTLE(m + 24));
174 x7 = XOR(x7,U8TO32_LITTLE(m + 28));
175 x8 = XOR(x8,U8TO32_LITTLE(m + 32));
176 x9 = XOR(x9,U8TO32_LITTLE(m + 36));
177 x10 = XOR(x10,U8TO32_LITTLE(m + 40));
178 x11 = XOR(x11,U8TO32_LITTLE(m + 44));
179 x12 = XOR(x12,U8TO32_LITTLE(m + 48));
180 x13 = XOR(x13,U8TO32_LITTLE(m + 52));
181 x14 = XOR(x14,U8TO32_LITTLE(m + 56));
182 x15 = XOR(x15,U8TO32_LITTLE(m + 60));
183 #endif
184
185 j12 = PLUSONE(j12);
186 if (!j12) {
187 j13 = PLUSONE(j13);
188 /* stopping at 2^70 bytes per nonce is user's responsibility */
189 }
190
191 U32TO8_LITTLE(c + 0,x0);
192 U32TO8_LITTLE(c + 4,x1);
193 U32TO8_LITTLE(c + 8,x2);
194 U32TO8_LITTLE(c + 12,x3);
195 U32TO8_LITTLE(c + 16,x4);
196 U32TO8_LITTLE(c + 20,x5);
197 U32TO8_LITTLE(c + 24,x6);
198 U32TO8_LITTLE(c + 28,x7);
199 U32TO8_LITTLE(c + 32,x8);
200 U32TO8_LITTLE(c + 36,x9);
201 U32TO8_LITTLE(c + 40,x10);
202 U32TO8_LITTLE(c + 44,x11);
203 U32TO8_LITTLE(c + 48,x12);
204 U32TO8_LITTLE(c + 52,x13);
205 U32TO8_LITTLE(c + 56,x14);
206 U32TO8_LITTLE(c + 60,x15);
207
208 if (bytes <= 64) {
209 if (bytes < 64) {
210 for (i = 0;i < bytes;++i) ctarget[i] = c[i];
211 }
212 x->input[12] = j12;
213 x->input[13] = j13;
214 return;
215 }
216 bytes -= 64;
217 c += 64;
218 #ifndef KEYSTREAM_ONLY
219 m += 64;
220 #endif
221 }
222 }
223