1 // Copyright 2019 The CryptoCorrosion Contributors
2 // Copyright 2020 Developers of the Rand project.
3 //
4 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5 // https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6 // <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
7 // option. This file may not be copied, modified, or distributed
8 // except according to those terms.
9
10 //! The ChaCha random number generator.
11
12 use ppv_lite86::{dispatch, dispatch_light128};
13
14 pub use ppv_lite86::Machine;
15 use ppv_lite86::{vec128_storage, ArithOps, BitOps32, LaneWords4, MultiLane, StoreBytes, Vec4};
16
17 pub(crate) const BLOCK: usize = 64;
18 pub(crate) const BLOCK64: u64 = BLOCK as u64;
19 const LOG2_BUFBLOCKS: u64 = 2;
20 const BUFBLOCKS: u64 = 1 << LOG2_BUFBLOCKS;
21 pub(crate) const BUFSZ64: u64 = BLOCK64 * BUFBLOCKS;
22 pub(crate) const BUFSZ: usize = BUFSZ64 as usize;
23
24 #[derive(Clone, PartialEq, Eq)]
25 pub struct ChaCha {
26 pub(crate) b: vec128_storage,
27 pub(crate) c: vec128_storage,
28 pub(crate) d: vec128_storage,
29 }
30
31 #[derive(Clone)]
32 pub struct State<V> {
33 pub(crate) a: V,
34 pub(crate) b: V,
35 pub(crate) c: V,
36 pub(crate) d: V,
37 }
38
39 #[inline(always)]
round<V: ArithOps + BitOps32>(mut x: State<V>) -> State<V>40 pub(crate) fn round<V: ArithOps + BitOps32>(mut x: State<V>) -> State<V> {
41 x.a += x.b;
42 x.d = (x.d ^ x.a).rotate_each_word_right16();
43 x.c += x.d;
44 x.b = (x.b ^ x.c).rotate_each_word_right20();
45 x.a += x.b;
46 x.d = (x.d ^ x.a).rotate_each_word_right24();
47 x.c += x.d;
48 x.b = (x.b ^ x.c).rotate_each_word_right25();
49 x
50 }
51
52 #[inline(always)]
diagonalize<V: LaneWords4>(mut x: State<V>) -> State<V>53 pub(crate) fn diagonalize<V: LaneWords4>(mut x: State<V>) -> State<V> {
54 x.b = x.b.shuffle_lane_words3012();
55 x.c = x.c.shuffle_lane_words2301();
56 x.d = x.d.shuffle_lane_words1230();
57 x
58 }
59 #[inline(always)]
undiagonalize<V: LaneWords4>(mut x: State<V>) -> State<V>60 pub(crate) fn undiagonalize<V: LaneWords4>(mut x: State<V>) -> State<V> {
61 x.b = x.b.shuffle_lane_words1230();
62 x.c = x.c.shuffle_lane_words2301();
63 x.d = x.d.shuffle_lane_words3012();
64 x
65 }
66
67 impl ChaCha {
68 #[inline(always)]
new(key: &[u8; 32], nonce: &[u8]) -> Self69 pub fn new(key: &[u8; 32], nonce: &[u8]) -> Self {
70 init_chacha(key, nonce)
71 }
72
73 #[inline(always)]
pos64<M: Machine>(&self, m: M) -> u6474 fn pos64<M: Machine>(&self, m: M) -> u64 {
75 let d: M::u32x4 = m.unpack(self.d);
76 ((d.extract(1) as u64) << 32) | d.extract(0) as u64
77 }
78
79 /// Produce 4 blocks of output, advancing the state
80 #[inline(always)]
refill4(&mut self, drounds: u32, out: &mut [u8; BUFSZ])81 pub fn refill4(&mut self, drounds: u32, out: &mut [u8; BUFSZ]) {
82 refill_wide(self, drounds, out)
83 }
84
85 #[inline(always)]
set_stream_param(&mut self, param: u32, value: u64)86 pub fn set_stream_param(&mut self, param: u32, value: u64) {
87 set_stream_param(self, param, value)
88 }
89
90 #[inline(always)]
get_stream_param(&self, param: u32) -> u6491 pub fn get_stream_param(&self, param: u32) -> u64 {
92 get_stream_param(self, param)
93 }
94
95 /// Return whether rhs is equal in all parameters except current 64-bit position.
96 #[inline]
stream64_eq(&self, rhs: &Self) -> bool97 pub fn stream64_eq(&self, rhs: &Self) -> bool {
98 let self_d: [u32; 4] = self.d.into();
99 let rhs_d: [u32; 4] = rhs.d.into();
100 self.b == rhs.b && self.c == rhs.c && self_d[3] == rhs_d[3] && self_d[2] == rhs_d[2]
101 }
102 }
103
104 #[allow(clippy::many_single_char_names)]
105 #[inline(always)]
refill_wide_impl<Mach: Machine>( m: Mach, state: &mut ChaCha, drounds: u32, out: &mut [u8; BUFSZ], )106 fn refill_wide_impl<Mach: Machine>(
107 m: Mach, state: &mut ChaCha, drounds: u32, out: &mut [u8; BUFSZ],
108 ) {
109 let k = m.vec([0x6170_7865, 0x3320_646e, 0x7962_2d32, 0x6b20_6574]);
110 let mut pos = state.pos64(m);
111 let d0: Mach::u32x4 = m.unpack(state.d);
112 pos = pos.wrapping_add(1);
113 let d1 = d0.insert((pos >> 32) as u32, 1).insert(pos as u32, 0);
114 pos = pos.wrapping_add(1);
115 let d2 = d0.insert((pos >> 32) as u32, 1).insert(pos as u32, 0);
116 pos = pos.wrapping_add(1);
117 let d3 = d0.insert((pos >> 32) as u32, 1).insert(pos as u32, 0);
118
119 let b = m.unpack(state.b);
120 let c = m.unpack(state.c);
121 let mut x = State {
122 a: Mach::u32x4x4::from_lanes([k, k, k, k]),
123 b: Mach::u32x4x4::from_lanes([b, b, b, b]),
124 c: Mach::u32x4x4::from_lanes([c, c, c, c]),
125 d: m.unpack(Mach::u32x4x4::from_lanes([d0, d1, d2, d3]).into()),
126 };
127 for _ in 0..drounds {
128 x = round(x);
129 x = undiagonalize(round(diagonalize(x)));
130 }
131 let mut pos = state.pos64(m);
132 let d0: Mach::u32x4 = m.unpack(state.d);
133 pos = pos.wrapping_add(1);
134 let d1 = d0.insert((pos >> 32) as u32, 1).insert(pos as u32, 0);
135 pos = pos.wrapping_add(1);
136 let d2 = d0.insert((pos >> 32) as u32, 1).insert(pos as u32, 0);
137 pos = pos.wrapping_add(1);
138 let d3 = d0.insert((pos >> 32) as u32, 1).insert(pos as u32, 0);
139 pos = pos.wrapping_add(1);
140 let d4 = d0.insert((pos >> 32) as u32, 1).insert(pos as u32, 0);
141
142 let (a, b, c, d) = (
143 x.a.to_lanes(),
144 x.b.to_lanes(),
145 x.c.to_lanes(),
146 x.d.to_lanes(),
147 );
148 let sb = m.unpack(state.b);
149 let sc = m.unpack(state.c);
150 let sd = [m.unpack(state.d), d1, d2, d3];
151 state.d = d4.into();
152 let mut words = out.chunks_exact_mut(16);
153 for ((((&a, &b), &c), &d), &sd) in a.iter().zip(&b).zip(&c).zip(&d).zip(&sd) {
154 (a + k).write_le(words.next().unwrap());
155 (b + sb).write_le(words.next().unwrap());
156 (c + sc).write_le(words.next().unwrap());
157 (d + sd).write_le(words.next().unwrap());
158 }
159 }
160
161 dispatch!(m, Mach, {
162 fn refill_wide(state: &mut ChaCha, drounds: u32, out: &mut [u8; BUFSZ]) {
163 refill_wide_impl(m, state, drounds, out);
164 }
165 });
166
167 // Single-block, rounds-only; shared by try_apply_keystream for tails shorter than BUFSZ
168 // and XChaCha's setup step.
169 dispatch!(m, Mach, {
170 fn refill_narrow_rounds(state: &mut ChaCha, drounds: u32) -> State<vec128_storage> {
171 let k: Mach::u32x4 = m.vec([0x6170_7865, 0x3320_646e, 0x7962_2d32, 0x6b20_6574]);
172 let mut x = State {
173 a: k,
174 b: m.unpack(state.b),
175 c: m.unpack(state.c),
176 d: m.unpack(state.d),
177 };
178 for _ in 0..drounds {
179 x = round(x);
180 x = undiagonalize(round(diagonalize(x)));
181 }
182 State {
183 a: x.a.into(),
184 b: x.b.into(),
185 c: x.c.into(),
186 d: x.d.into(),
187 }
188 }
189 });
190
191 dispatch_light128!(m, Mach, {
192 fn set_stream_param(state: &mut ChaCha, param: u32, value: u64) {
193 let d: Mach::u32x4 = m.unpack(state.d);
194 state.d = d
195 .insert((value >> 32) as u32, (param << 1) | 1)
196 .insert(value as u32, param << 1)
197 .into();
198 }
199 });
200
201 dispatch_light128!(m, Mach, {
202 fn get_stream_param(state: &ChaCha, param: u32) -> u64 {
203 let d: Mach::u32x4 = m.unpack(state.d);
204 ((d.extract((param << 1) | 1) as u64) << 32) | d.extract(param << 1) as u64
205 }
206 });
207
read_u32le(xs: &[u8]) -> u32208 fn read_u32le(xs: &[u8]) -> u32 {
209 assert_eq!(xs.len(), 4);
210 u32::from(xs[0]) | (u32::from(xs[1]) << 8) | (u32::from(xs[2]) << 16) | (u32::from(xs[3]) << 24)
211 }
212
213 dispatch_light128!(m, Mach, {
214 fn init_chacha(key: &[u8; 32], nonce: &[u8]) -> ChaCha {
215 let ctr_nonce = [
216 0,
217 if nonce.len() == 12 {
218 read_u32le(&nonce[0..4])
219 } else {
220 0
221 },
222 read_u32le(&nonce[nonce.len() - 8..nonce.len() - 4]),
223 read_u32le(&nonce[nonce.len() - 4..]),
224 ];
225 let key0: Mach::u32x4 = m.read_le(&key[..16]);
226 let key1: Mach::u32x4 = m.read_le(&key[16..]);
227 ChaCha {
228 b: key0.into(),
229 c: key1.into(),
230 d: ctr_nonce.into(),
231 }
232 }
233 });
234
235 dispatch_light128!(m, Mach, {
236 fn init_chacha_x(key: &[u8; 32], nonce: &[u8; 24], rounds: u32) -> ChaCha {
237 let key0: Mach::u32x4 = m.read_le(&key[..16]);
238 let key1: Mach::u32x4 = m.read_le(&key[16..]);
239 let nonce0: Mach::u32x4 = m.read_le(&nonce[..16]);
240 let mut state = ChaCha {
241 b: key0.into(),
242 c: key1.into(),
243 d: nonce0.into(),
244 };
245 let x = refill_narrow_rounds(&mut state, rounds);
246 let ctr_nonce1 = [0, 0, read_u32le(&nonce[16..20]), read_u32le(&nonce[20..24])];
247 state.b = x.a;
248 state.c = x.d;
249 state.d = ctr_nonce1.into();
250 state
251 }
252 });
253