1 // Copyright 2017 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "absl/random/internal/randen_slow.h"
16 
17 #include <cstddef>
18 #include <cstdint>
19 #include <cstring>
20 
21 #include "absl/base/attributes.h"
22 #include "absl/random/internal/platform.h"
23 
24 #if ABSL_HAVE_ATTRIBUTE(always_inline) || \
25     (defined(__GNUC__) && !defined(__clang__))
26 #define ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE \
27   __attribute__((always_inline))
28 #elif defined(_MSC_VER)
29 // We can achieve something similar to attribute((always_inline)) with MSVC by
30 // using the __forceinline keyword, however this is not perfect. MSVC is
31 // much less aggressive about inlining, and even with the __forceinline keyword.
32 #define ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE __forceinline
33 #else
34 #define ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE
35 #endif
36 
37 namespace {
38 
39 // AES portions based on rijndael-alg-fst.c,
40 // https://fastcrypto.org/front/misc/rijndael-alg-fst.c
41 //
42 // Implementation of
43 // http://www.csrc.nist.gov/publications/fips/fips197/fips-197.pdf
44 constexpr uint32_t te0[256] = {
45     0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d, 0xfff2f20d, 0xd66b6bbd,
46     0xde6f6fb1, 0x91c5c554, 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
47     0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a, 0x8fcaca45, 0x1f82829d,
48     0x89c9c940, 0xfa7d7d87, 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
49     0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea, 0x239c9cbf, 0x53a4a4f7,
50     0xe4727296, 0x9bc0c05b, 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
51     0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f, 0x6834345c, 0x51a5a5f4,
52     0xd1e5e534, 0xf9f1f108, 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
53     0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e, 0x30181828, 0x379696a1,
54     0x0a05050f, 0x2f9a9ab5, 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
55     0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f, 0x1209091b, 0x1d83839e,
56     0x582c2c74, 0x341a1a2e, 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
57     0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce, 0x5229297b, 0xdde3e33e,
58     0x5e2f2f71, 0x13848497, 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
59     0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed, 0xd46a6abe, 0x8dcbcb46,
60     0x67bebed9, 0x7239394b, 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
61     0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16, 0x864343c5, 0x9a4d4dd7,
62     0x66333355, 0x11858594, 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
63     0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3, 0xa25151f3, 0x5da3a3fe,
64     0x804040c0, 0x058f8f8a, 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
65     0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163, 0x20101030, 0xe5ffff1a,
66     0xfdf3f30e, 0xbfd2d26d, 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
67     0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739, 0x93c4c457, 0x55a7a7f2,
68     0xfc7e7e82, 0x7a3d3d47, 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
69     0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f, 0x44222266, 0x542a2a7e,
70     0x3b9090ab, 0x0b888883, 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
71     0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76, 0xdbe0e03b, 0x64323256,
72     0x743a3a4e, 0x140a0a1e, 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
73     0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6, 0x399191a8, 0x319595a4,
74     0xd3e4e437, 0xf279798b, 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
75     0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0, 0xd86c6cb4, 0xac5656fa,
76     0xf3f4f407, 0xcfeaea25, 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
77     0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72, 0x381c1c24, 0x57a6a6f1,
78     0x73b4b4c7, 0x97c6c651, 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
79     0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85, 0xe0707090, 0x7c3e3e42,
80     0x71b5b5c4, 0xcc6666aa, 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
81     0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0, 0x17868691, 0x99c1c158,
82     0x3a1d1d27, 0x279e9eb9, 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
83     0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7, 0x2d9b9bb6, 0x3c1e1e22,
84     0x15878792, 0xc9e9e920, 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
85     0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17, 0x65bfbfda, 0xd7e6e631,
86     0x844242c6, 0xd06868b8, 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
87     0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a,
88 };
89 
90 constexpr uint32_t te1[256] = {
91     0xa5c66363, 0x84f87c7c, 0x99ee7777, 0x8df67b7b, 0x0dfff2f2, 0xbdd66b6b,
92     0xb1de6f6f, 0x5491c5c5, 0x50603030, 0x03020101, 0xa9ce6767, 0x7d562b2b,
93     0x19e7fefe, 0x62b5d7d7, 0xe64dabab, 0x9aec7676, 0x458fcaca, 0x9d1f8282,
94     0x4089c9c9, 0x87fa7d7d, 0x15effafa, 0xebb25959, 0xc98e4747, 0x0bfbf0f0,
95     0xec41adad, 0x67b3d4d4, 0xfd5fa2a2, 0xea45afaf, 0xbf239c9c, 0xf753a4a4,
96     0x96e47272, 0x5b9bc0c0, 0xc275b7b7, 0x1ce1fdfd, 0xae3d9393, 0x6a4c2626,
97     0x5a6c3636, 0x417e3f3f, 0x02f5f7f7, 0x4f83cccc, 0x5c683434, 0xf451a5a5,
98     0x34d1e5e5, 0x08f9f1f1, 0x93e27171, 0x73abd8d8, 0x53623131, 0x3f2a1515,
99     0x0c080404, 0x5295c7c7, 0x65462323, 0x5e9dc3c3, 0x28301818, 0xa1379696,
100     0x0f0a0505, 0xb52f9a9a, 0x090e0707, 0x36241212, 0x9b1b8080, 0x3ddfe2e2,
101     0x26cdebeb, 0x694e2727, 0xcd7fb2b2, 0x9fea7575, 0x1b120909, 0x9e1d8383,
102     0x74582c2c, 0x2e341a1a, 0x2d361b1b, 0xb2dc6e6e, 0xeeb45a5a, 0xfb5ba0a0,
103     0xf6a45252, 0x4d763b3b, 0x61b7d6d6, 0xce7db3b3, 0x7b522929, 0x3edde3e3,
104     0x715e2f2f, 0x97138484, 0xf5a65353, 0x68b9d1d1, 0x00000000, 0x2cc1eded,
105     0x60402020, 0x1fe3fcfc, 0xc879b1b1, 0xedb65b5b, 0xbed46a6a, 0x468dcbcb,
106     0xd967bebe, 0x4b723939, 0xde944a4a, 0xd4984c4c, 0xe8b05858, 0x4a85cfcf,
107     0x6bbbd0d0, 0x2ac5efef, 0xe54faaaa, 0x16edfbfb, 0xc5864343, 0xd79a4d4d,
108     0x55663333, 0x94118585, 0xcf8a4545, 0x10e9f9f9, 0x06040202, 0x81fe7f7f,
109     0xf0a05050, 0x44783c3c, 0xba259f9f, 0xe34ba8a8, 0xf3a25151, 0xfe5da3a3,
110     0xc0804040, 0x8a058f8f, 0xad3f9292, 0xbc219d9d, 0x48703838, 0x04f1f5f5,
111     0xdf63bcbc, 0xc177b6b6, 0x75afdada, 0x63422121, 0x30201010, 0x1ae5ffff,
112     0x0efdf3f3, 0x6dbfd2d2, 0x4c81cdcd, 0x14180c0c, 0x35261313, 0x2fc3ecec,
113     0xe1be5f5f, 0xa2359797, 0xcc884444, 0x392e1717, 0x5793c4c4, 0xf255a7a7,
114     0x82fc7e7e, 0x477a3d3d, 0xacc86464, 0xe7ba5d5d, 0x2b321919, 0x95e67373,
115     0xa0c06060, 0x98198181, 0xd19e4f4f, 0x7fa3dcdc, 0x66442222, 0x7e542a2a,
116     0xab3b9090, 0x830b8888, 0xca8c4646, 0x29c7eeee, 0xd36bb8b8, 0x3c281414,
117     0x79a7dede, 0xe2bc5e5e, 0x1d160b0b, 0x76addbdb, 0x3bdbe0e0, 0x56643232,
118     0x4e743a3a, 0x1e140a0a, 0xdb924949, 0x0a0c0606, 0x6c482424, 0xe4b85c5c,
119     0x5d9fc2c2, 0x6ebdd3d3, 0xef43acac, 0xa6c46262, 0xa8399191, 0xa4319595,
120     0x37d3e4e4, 0x8bf27979, 0x32d5e7e7, 0x438bc8c8, 0x596e3737, 0xb7da6d6d,
121     0x8c018d8d, 0x64b1d5d5, 0xd29c4e4e, 0xe049a9a9, 0xb4d86c6c, 0xfaac5656,
122     0x07f3f4f4, 0x25cfeaea, 0xafca6565, 0x8ef47a7a, 0xe947aeae, 0x18100808,
123     0xd56fbaba, 0x88f07878, 0x6f4a2525, 0x725c2e2e, 0x24381c1c, 0xf157a6a6,
124     0xc773b4b4, 0x5197c6c6, 0x23cbe8e8, 0x7ca1dddd, 0x9ce87474, 0x213e1f1f,
125     0xdd964b4b, 0xdc61bdbd, 0x860d8b8b, 0x850f8a8a, 0x90e07070, 0x427c3e3e,
126     0xc471b5b5, 0xaacc6666, 0xd8904848, 0x05060303, 0x01f7f6f6, 0x121c0e0e,
127     0xa3c26161, 0x5f6a3535, 0xf9ae5757, 0xd069b9b9, 0x91178686, 0x5899c1c1,
128     0x273a1d1d, 0xb9279e9e, 0x38d9e1e1, 0x13ebf8f8, 0xb32b9898, 0x33221111,
129     0xbbd26969, 0x70a9d9d9, 0x89078e8e, 0xa7339494, 0xb62d9b9b, 0x223c1e1e,
130     0x92158787, 0x20c9e9e9, 0x4987cece, 0xffaa5555, 0x78502828, 0x7aa5dfdf,
131     0x8f038c8c, 0xf859a1a1, 0x80098989, 0x171a0d0d, 0xda65bfbf, 0x31d7e6e6,
132     0xc6844242, 0xb8d06868, 0xc3824141, 0xb0299999, 0x775a2d2d, 0x111e0f0f,
133     0xcb7bb0b0, 0xfca85454, 0xd66dbbbb, 0x3a2c1616,
134 };
135 
136 constexpr uint32_t te2[256] = {
137     0x63a5c663, 0x7c84f87c, 0x7799ee77, 0x7b8df67b, 0xf20dfff2, 0x6bbdd66b,
138     0x6fb1de6f, 0xc55491c5, 0x30506030, 0x01030201, 0x67a9ce67, 0x2b7d562b,
139     0xfe19e7fe, 0xd762b5d7, 0xabe64dab, 0x769aec76, 0xca458fca, 0x829d1f82,
140     0xc94089c9, 0x7d87fa7d, 0xfa15effa, 0x59ebb259, 0x47c98e47, 0xf00bfbf0,
141     0xadec41ad, 0xd467b3d4, 0xa2fd5fa2, 0xafea45af, 0x9cbf239c, 0xa4f753a4,
142     0x7296e472, 0xc05b9bc0, 0xb7c275b7, 0xfd1ce1fd, 0x93ae3d93, 0x266a4c26,
143     0x365a6c36, 0x3f417e3f, 0xf702f5f7, 0xcc4f83cc, 0x345c6834, 0xa5f451a5,
144     0xe534d1e5, 0xf108f9f1, 0x7193e271, 0xd873abd8, 0x31536231, 0x153f2a15,
145     0x040c0804, 0xc75295c7, 0x23654623, 0xc35e9dc3, 0x18283018, 0x96a13796,
146     0x050f0a05, 0x9ab52f9a, 0x07090e07, 0x12362412, 0x809b1b80, 0xe23ddfe2,
147     0xeb26cdeb, 0x27694e27, 0xb2cd7fb2, 0x759fea75, 0x091b1209, 0x839e1d83,
148     0x2c74582c, 0x1a2e341a, 0x1b2d361b, 0x6eb2dc6e, 0x5aeeb45a, 0xa0fb5ba0,
149     0x52f6a452, 0x3b4d763b, 0xd661b7d6, 0xb3ce7db3, 0x297b5229, 0xe33edde3,
150     0x2f715e2f, 0x84971384, 0x53f5a653, 0xd168b9d1, 0x00000000, 0xed2cc1ed,
151     0x20604020, 0xfc1fe3fc, 0xb1c879b1, 0x5bedb65b, 0x6abed46a, 0xcb468dcb,
152     0xbed967be, 0x394b7239, 0x4ade944a, 0x4cd4984c, 0x58e8b058, 0xcf4a85cf,
153     0xd06bbbd0, 0xef2ac5ef, 0xaae54faa, 0xfb16edfb, 0x43c58643, 0x4dd79a4d,
154     0x33556633, 0x85941185, 0x45cf8a45, 0xf910e9f9, 0x02060402, 0x7f81fe7f,
155     0x50f0a050, 0x3c44783c, 0x9fba259f, 0xa8e34ba8, 0x51f3a251, 0xa3fe5da3,
156     0x40c08040, 0x8f8a058f, 0x92ad3f92, 0x9dbc219d, 0x38487038, 0xf504f1f5,
157     0xbcdf63bc, 0xb6c177b6, 0xda75afda, 0x21634221, 0x10302010, 0xff1ae5ff,
158     0xf30efdf3, 0xd26dbfd2, 0xcd4c81cd, 0x0c14180c, 0x13352613, 0xec2fc3ec,
159     0x5fe1be5f, 0x97a23597, 0x44cc8844, 0x17392e17, 0xc45793c4, 0xa7f255a7,
160     0x7e82fc7e, 0x3d477a3d, 0x64acc864, 0x5de7ba5d, 0x192b3219, 0x7395e673,
161     0x60a0c060, 0x81981981, 0x4fd19e4f, 0xdc7fa3dc, 0x22664422, 0x2a7e542a,
162     0x90ab3b90, 0x88830b88, 0x46ca8c46, 0xee29c7ee, 0xb8d36bb8, 0x143c2814,
163     0xde79a7de, 0x5ee2bc5e, 0x0b1d160b, 0xdb76addb, 0xe03bdbe0, 0x32566432,
164     0x3a4e743a, 0x0a1e140a, 0x49db9249, 0x060a0c06, 0x246c4824, 0x5ce4b85c,
165     0xc25d9fc2, 0xd36ebdd3, 0xacef43ac, 0x62a6c462, 0x91a83991, 0x95a43195,
166     0xe437d3e4, 0x798bf279, 0xe732d5e7, 0xc8438bc8, 0x37596e37, 0x6db7da6d,
167     0x8d8c018d, 0xd564b1d5, 0x4ed29c4e, 0xa9e049a9, 0x6cb4d86c, 0x56faac56,
168     0xf407f3f4, 0xea25cfea, 0x65afca65, 0x7a8ef47a, 0xaee947ae, 0x08181008,
169     0xbad56fba, 0x7888f078, 0x256f4a25, 0x2e725c2e, 0x1c24381c, 0xa6f157a6,
170     0xb4c773b4, 0xc65197c6, 0xe823cbe8, 0xdd7ca1dd, 0x749ce874, 0x1f213e1f,
171     0x4bdd964b, 0xbddc61bd, 0x8b860d8b, 0x8a850f8a, 0x7090e070, 0x3e427c3e,
172     0xb5c471b5, 0x66aacc66, 0x48d89048, 0x03050603, 0xf601f7f6, 0x0e121c0e,
173     0x61a3c261, 0x355f6a35, 0x57f9ae57, 0xb9d069b9, 0x86911786, 0xc15899c1,
174     0x1d273a1d, 0x9eb9279e, 0xe138d9e1, 0xf813ebf8, 0x98b32b98, 0x11332211,
175     0x69bbd269, 0xd970a9d9, 0x8e89078e, 0x94a73394, 0x9bb62d9b, 0x1e223c1e,
176     0x87921587, 0xe920c9e9, 0xce4987ce, 0x55ffaa55, 0x28785028, 0xdf7aa5df,
177     0x8c8f038c, 0xa1f859a1, 0x89800989, 0x0d171a0d, 0xbfda65bf, 0xe631d7e6,
178     0x42c68442, 0x68b8d068, 0x41c38241, 0x99b02999, 0x2d775a2d, 0x0f111e0f,
179     0xb0cb7bb0, 0x54fca854, 0xbbd66dbb, 0x163a2c16,
180 };
181 
182 constexpr uint32_t te3[256] = {
183     0x6363a5c6, 0x7c7c84f8, 0x777799ee, 0x7b7b8df6, 0xf2f20dff, 0x6b6bbdd6,
184     0x6f6fb1de, 0xc5c55491, 0x30305060, 0x01010302, 0x6767a9ce, 0x2b2b7d56,
185     0xfefe19e7, 0xd7d762b5, 0xababe64d, 0x76769aec, 0xcaca458f, 0x82829d1f,
186     0xc9c94089, 0x7d7d87fa, 0xfafa15ef, 0x5959ebb2, 0x4747c98e, 0xf0f00bfb,
187     0xadadec41, 0xd4d467b3, 0xa2a2fd5f, 0xafafea45, 0x9c9cbf23, 0xa4a4f753,
188     0x727296e4, 0xc0c05b9b, 0xb7b7c275, 0xfdfd1ce1, 0x9393ae3d, 0x26266a4c,
189     0x36365a6c, 0x3f3f417e, 0xf7f702f5, 0xcccc4f83, 0x34345c68, 0xa5a5f451,
190     0xe5e534d1, 0xf1f108f9, 0x717193e2, 0xd8d873ab, 0x31315362, 0x15153f2a,
191     0x04040c08, 0xc7c75295, 0x23236546, 0xc3c35e9d, 0x18182830, 0x9696a137,
192     0x05050f0a, 0x9a9ab52f, 0x0707090e, 0x12123624, 0x80809b1b, 0xe2e23ddf,
193     0xebeb26cd, 0x2727694e, 0xb2b2cd7f, 0x75759fea, 0x09091b12, 0x83839e1d,
194     0x2c2c7458, 0x1a1a2e34, 0x1b1b2d36, 0x6e6eb2dc, 0x5a5aeeb4, 0xa0a0fb5b,
195     0x5252f6a4, 0x3b3b4d76, 0xd6d661b7, 0xb3b3ce7d, 0x29297b52, 0xe3e33edd,
196     0x2f2f715e, 0x84849713, 0x5353f5a6, 0xd1d168b9, 0x00000000, 0xeded2cc1,
197     0x20206040, 0xfcfc1fe3, 0xb1b1c879, 0x5b5bedb6, 0x6a6abed4, 0xcbcb468d,
198     0xbebed967, 0x39394b72, 0x4a4ade94, 0x4c4cd498, 0x5858e8b0, 0xcfcf4a85,
199     0xd0d06bbb, 0xefef2ac5, 0xaaaae54f, 0xfbfb16ed, 0x4343c586, 0x4d4dd79a,
200     0x33335566, 0x85859411, 0x4545cf8a, 0xf9f910e9, 0x02020604, 0x7f7f81fe,
201     0x5050f0a0, 0x3c3c4478, 0x9f9fba25, 0xa8a8e34b, 0x5151f3a2, 0xa3a3fe5d,
202     0x4040c080, 0x8f8f8a05, 0x9292ad3f, 0x9d9dbc21, 0x38384870, 0xf5f504f1,
203     0xbcbcdf63, 0xb6b6c177, 0xdada75af, 0x21216342, 0x10103020, 0xffff1ae5,
204     0xf3f30efd, 0xd2d26dbf, 0xcdcd4c81, 0x0c0c1418, 0x13133526, 0xecec2fc3,
205     0x5f5fe1be, 0x9797a235, 0x4444cc88, 0x1717392e, 0xc4c45793, 0xa7a7f255,
206     0x7e7e82fc, 0x3d3d477a, 0x6464acc8, 0x5d5de7ba, 0x19192b32, 0x737395e6,
207     0x6060a0c0, 0x81819819, 0x4f4fd19e, 0xdcdc7fa3, 0x22226644, 0x2a2a7e54,
208     0x9090ab3b, 0x8888830b, 0x4646ca8c, 0xeeee29c7, 0xb8b8d36b, 0x14143c28,
209     0xdede79a7, 0x5e5ee2bc, 0x0b0b1d16, 0xdbdb76ad, 0xe0e03bdb, 0x32325664,
210     0x3a3a4e74, 0x0a0a1e14, 0x4949db92, 0x06060a0c, 0x24246c48, 0x5c5ce4b8,
211     0xc2c25d9f, 0xd3d36ebd, 0xacacef43, 0x6262a6c4, 0x9191a839, 0x9595a431,
212     0xe4e437d3, 0x79798bf2, 0xe7e732d5, 0xc8c8438b, 0x3737596e, 0x6d6db7da,
213     0x8d8d8c01, 0xd5d564b1, 0x4e4ed29c, 0xa9a9e049, 0x6c6cb4d8, 0x5656faac,
214     0xf4f407f3, 0xeaea25cf, 0x6565afca, 0x7a7a8ef4, 0xaeaee947, 0x08081810,
215     0xbabad56f, 0x787888f0, 0x25256f4a, 0x2e2e725c, 0x1c1c2438, 0xa6a6f157,
216     0xb4b4c773, 0xc6c65197, 0xe8e823cb, 0xdddd7ca1, 0x74749ce8, 0x1f1f213e,
217     0x4b4bdd96, 0xbdbddc61, 0x8b8b860d, 0x8a8a850f, 0x707090e0, 0x3e3e427c,
218     0xb5b5c471, 0x6666aacc, 0x4848d890, 0x03030506, 0xf6f601f7, 0x0e0e121c,
219     0x6161a3c2, 0x35355f6a, 0x5757f9ae, 0xb9b9d069, 0x86869117, 0xc1c15899,
220     0x1d1d273a, 0x9e9eb927, 0xe1e138d9, 0xf8f813eb, 0x9898b32b, 0x11113322,
221     0x6969bbd2, 0xd9d970a9, 0x8e8e8907, 0x9494a733, 0x9b9bb62d, 0x1e1e223c,
222     0x87879215, 0xe9e920c9, 0xcece4987, 0x5555ffaa, 0x28287850, 0xdfdf7aa5,
223     0x8c8c8f03, 0xa1a1f859, 0x89898009, 0x0d0d171a, 0xbfbfda65, 0xe6e631d7,
224     0x4242c684, 0x6868b8d0, 0x4141c382, 0x9999b029, 0x2d2d775a, 0x0f0f111e,
225     0xb0b0cb7b, 0x5454fca8, 0xbbbbd66d, 0x16163a2c,
226 };
227 
228 struct alignas(16) u64x2 {
u64x2__anona7b1242c0111::u64x2229   constexpr u64x2() : v{0, 0} {};
u64x2__anona7b1242c0111::u64x2230   constexpr u64x2(uint64_t hi, uint64_t lo) : v{lo, hi} {}
231 
232   uint64_t v[2];
233 };
234 
235 // Software implementation of the Vector128 class, using uint32_t
236 // as an underlying vector register.
237 //
238 struct Vector128 {
operator ^=__anona7b1242c0111::Vector128239   inline ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE Vector128& operator^=(
240       const Vector128& other) {
241     s[0] ^= other.s[0];
242     s[1] ^= other.s[1];
243     s[2] ^= other.s[2];
244     s[3] ^= other.s[3];
245     return *this;
246   }
247 
248   uint32_t s[4];
249 };
250 
251 inline ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE Vector128
Vector128Load(const void * ABSL_RANDOM_INTERNAL_RESTRICT from)252 Vector128Load(const void* ABSL_RANDOM_INTERNAL_RESTRICT from) {
253   Vector128 result;
254   const uint8_t* ABSL_RANDOM_INTERNAL_RESTRICT src =
255       reinterpret_cast<const uint8_t*>(from);
256 
257   result.s[0] = static_cast<uint32_t>(src[0]) << 24 |
258                 static_cast<uint32_t>(src[1]) << 16 |
259                 static_cast<uint32_t>(src[2]) << 8 |
260                 static_cast<uint32_t>(src[3]);
261   result.s[1] = static_cast<uint32_t>(src[4]) << 24 |
262                 static_cast<uint32_t>(src[5]) << 16 |
263                 static_cast<uint32_t>(src[6]) << 8 |
264                 static_cast<uint32_t>(src[7]);
265   result.s[2] = static_cast<uint32_t>(src[8]) << 24 |
266                 static_cast<uint32_t>(src[9]) << 16 |
267                 static_cast<uint32_t>(src[10]) << 8 |
268                 static_cast<uint32_t>(src[11]);
269   result.s[3] = static_cast<uint32_t>(src[12]) << 24 |
270                 static_cast<uint32_t>(src[13]) << 16 |
271                 static_cast<uint32_t>(src[14]) << 8 |
272                 static_cast<uint32_t>(src[15]);
273   return result;
274 }
275 
Vector128Store(const Vector128 & v,void * ABSL_RANDOM_INTERNAL_RESTRICT to)276 inline ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE void Vector128Store(
277     const Vector128& v, void* ABSL_RANDOM_INTERNAL_RESTRICT to) {
278   uint8_t* dst = reinterpret_cast<uint8_t*>(to);
279   dst[0] = static_cast<uint8_t>(v.s[0] >> 24);
280   dst[1] = static_cast<uint8_t>(v.s[0] >> 16);
281   dst[2] = static_cast<uint8_t>(v.s[0] >> 8);
282   dst[3] = static_cast<uint8_t>(v.s[0]);
283   dst[4] = static_cast<uint8_t>(v.s[1] >> 24);
284   dst[5] = static_cast<uint8_t>(v.s[1] >> 16);
285   dst[6] = static_cast<uint8_t>(v.s[1] >> 8);
286   dst[7] = static_cast<uint8_t>(v.s[1]);
287   dst[8] = static_cast<uint8_t>(v.s[2] >> 24);
288   dst[9] = static_cast<uint8_t>(v.s[2] >> 16);
289   dst[10] = static_cast<uint8_t>(v.s[2] >> 8);
290   dst[11] = static_cast<uint8_t>(v.s[2]);
291   dst[12] = static_cast<uint8_t>(v.s[3] >> 24);
292   dst[13] = static_cast<uint8_t>(v.s[3] >> 16);
293   dst[14] = static_cast<uint8_t>(v.s[3] >> 8);
294   dst[15] = static_cast<uint8_t>(v.s[3]);
295 }
296 
297 // One round of AES. "round_key" is a public constant for breaking the
298 // symmetry of AES (ensures previously equal columns differ afterwards).
299 inline ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE Vector128
AesRound(const Vector128 & state,const Vector128 & round_key)300 AesRound(const Vector128& state, const Vector128& round_key) {
301   // clang-format off
302   Vector128 result;
303   result.s[0] = round_key.s[0] ^
304                 te0[uint8_t(state.s[0] >> 24)] ^
305                 te1[uint8_t(state.s[1] >> 16)] ^
306                 te2[uint8_t(state.s[2] >> 8)] ^
307                 te3[uint8_t(state.s[3])];
308   result.s[1] = round_key.s[1] ^
309                 te0[uint8_t(state.s[1] >> 24)] ^
310                 te1[uint8_t(state.s[2] >> 16)] ^
311                 te2[uint8_t(state.s[3] >> 8)] ^
312                 te3[uint8_t(state.s[0])];
313   result.s[2] = round_key.s[2] ^
314                 te0[uint8_t(state.s[2] >> 24)] ^
315                 te1[uint8_t(state.s[3] >> 16)] ^
316                 te2[uint8_t(state.s[0] >> 8)] ^
317                 te3[uint8_t(state.s[1])];
318   result.s[3] = round_key.s[3] ^
319                 te0[uint8_t(state.s[3] >> 24)] ^
320                 te1[uint8_t(state.s[0] >> 16)] ^
321                 te2[uint8_t(state.s[1] >> 8)] ^
322                 te3[uint8_t(state.s[2])];
323   return result;
324   // clang-format on
325 }
326 
327 // RANDen = RANDom generator or beetroots in Swiss German.
328 // 'Strong' (well-distributed, unpredictable, backtracking-resistant) random
329 // generator, faster in some benchmarks than std::mt19937_64 and pcg64_c32.
330 //
331 // High-level summary:
332 // 1) Reverie (see "A Robust and Sponge-Like PRNG with Improved Efficiency") is
333 //    a sponge-like random generator that requires a cryptographic permutation.
334 //    It improves upon "Provably Robust Sponge-Based PRNGs and KDFs" by
335 //    achieving backtracking resistance with only one Permute() per buffer.
336 //
337 // 2) "Simpira v2: A Family of Efficient Permutations Using the AES Round
338 //    Function" constructs up to 1024-bit permutations using an improved
339 //    Generalized Feistel network with 2-round AES-128 functions. This Feistel
340 //    block shuffle achieves diffusion faster and is less vulnerable to
341 //    sliced-biclique attacks than the Type-2 cyclic shuffle.
342 //
343 // 3) "Improving the Generalized Feistel" and "New criterion for diffusion
344 //    property" extends the same kind of improved Feistel block shuffle to 16
345 //    branches, which enables a 2048-bit permutation.
346 //
347 // Combine these three ideas and also change Simpira's subround keys from
348 // structured/low-entropy counters to digits of Pi.
349 
350 // Randen constants.
351 constexpr size_t kFeistelBlocks = 16;
352 constexpr size_t kFeistelFunctions = kFeistelBlocks / 2;  // = 8
353 constexpr size_t kFeistelRounds = 16 + 1;  // > 4 * log2(kFeistelBlocks)
354 constexpr size_t kKeys = kFeistelRounds * kFeistelFunctions;
355 
356 // INCLUDE keys.
357 #include "absl/random/internal/randen-keys.inc"
358 
359 static_assert(kKeys == kRoundKeys, "kKeys and kRoundKeys must be equal");
360 
361 // 2 uint64_t lanes per Vector128
362 static constexpr size_t kLanes = 2;
363 
364 // The improved Feistel block shuffle function for 16 blocks.
BlockShuffle(uint64_t * ABSL_RANDOM_INTERNAL_RESTRICT state_u64)365 inline ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE void BlockShuffle(
366     uint64_t* ABSL_RANDOM_INTERNAL_RESTRICT state_u64) {
367   static_assert(kFeistelBlocks == 16,
368                 "Feistel block shuffle only works for 16 blocks.");
369 
370   constexpr size_t shuffle[kFeistelBlocks] = {7,  2, 13, 4,  11, 8,  3, 6,
371                                               15, 0, 9,  10, 1,  14, 5, 12};
372 
373   u64x2* ABSL_RANDOM_INTERNAL_RESTRICT state =
374       reinterpret_cast<u64x2*>(state_u64);
375 
376   // The fully unrolled loop without the memcpy improves the speed by about
377   // 30% over the equivalent (leaving code here as a comment):
378   if (false) {
379     u64x2 source[kFeistelBlocks];
380     std::memcpy(source, state, sizeof(source));
381     for (size_t i = 0; i < kFeistelBlocks; i++) {
382       const u64x2 v0 = source[shuffle[i]];
383       state[i] = v0;
384     }
385   }
386 
387   const u64x2 v0 = state[shuffle[0]];
388   const u64x2 v1 = state[shuffle[1]];
389   const u64x2 v2 = state[shuffle[2]];
390   const u64x2 v3 = state[shuffle[3]];
391   const u64x2 v4 = state[shuffle[4]];
392   const u64x2 v5 = state[shuffle[5]];
393   const u64x2 v6 = state[shuffle[6]];
394   const u64x2 v7 = state[shuffle[7]];
395   const u64x2 w0 = state[shuffle[8]];
396   const u64x2 w1 = state[shuffle[9]];
397   const u64x2 w2 = state[shuffle[10]];
398   const u64x2 w3 = state[shuffle[11]];
399   const u64x2 w4 = state[shuffle[12]];
400   const u64x2 w5 = state[shuffle[13]];
401   const u64x2 w6 = state[shuffle[14]];
402   const u64x2 w7 = state[shuffle[15]];
403   state[0] = v0;
404   state[1] = v1;
405   state[2] = v2;
406   state[3] = v3;
407   state[4] = v4;
408   state[5] = v5;
409   state[6] = v6;
410   state[7] = v7;
411   state[8] = w0;
412   state[9] = w1;
413   state[10] = w2;
414   state[11] = w3;
415   state[12] = w4;
416   state[13] = w5;
417   state[14] = w6;
418   state[15] = w7;
419 }
420 
421 // Feistel round function using two AES subrounds. Very similar to F()
422 // from Simpira v2, but with independent subround keys. Uses 17 AES rounds
423 // per 16 bytes (vs. 10 for AES-CTR). Computing eight round functions in
424 // parallel hides the 7-cycle AESNI latency on HSW. Note that the Feistel
425 // XORs are 'free' (included in the second AES instruction).
FeistelRound(uint64_t * ABSL_RANDOM_INTERNAL_RESTRICT state,const u64x2 * ABSL_RANDOM_INTERNAL_RESTRICT keys)426 inline ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE const u64x2* FeistelRound(
427     uint64_t* ABSL_RANDOM_INTERNAL_RESTRICT state,
428     const u64x2* ABSL_RANDOM_INTERNAL_RESTRICT keys) {
429   for (size_t branch = 0; branch < kFeistelBlocks; branch += 4) {
430     const Vector128 s0 = Vector128Load(state + kLanes * branch);
431     const Vector128 s1 = Vector128Load(state + kLanes * (branch + 1));
432     const Vector128 f0 = AesRound(s0, Vector128Load(keys));
433     keys++;
434     const Vector128 o1 = AesRound(f0, s1);
435     Vector128Store(o1, state + kLanes * (branch + 1));
436 
437     // Manually unroll this loop once. about 10% better than not unrolled.
438     const Vector128 s2 = Vector128Load(state + kLanes * (branch + 2));
439     const Vector128 s3 = Vector128Load(state + kLanes * (branch + 3));
440     const Vector128 f2 = AesRound(s2, Vector128Load(keys));
441     keys++;
442     const Vector128 o3 = AesRound(f2, s3);
443     Vector128Store(o3, state + kLanes * (branch + 3));
444   }
445   return keys;
446 }
447 
448 // Cryptographic permutation based via type-2 Generalized Feistel Network.
449 // Indistinguishable from ideal by chosen-ciphertext adversaries using less than
450 // 2^64 queries if the round function is a PRF. This is similar to the b=8 case
451 // of Simpira v2, but more efficient than its generic construction for b=16.
Permute(const void * keys,uint64_t * ABSL_RANDOM_INTERNAL_RESTRICT state)452 inline ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE void Permute(
453     const void* keys, uint64_t* ABSL_RANDOM_INTERNAL_RESTRICT state) {
454   const u64x2* ABSL_RANDOM_INTERNAL_RESTRICT keys128 =
455       static_cast<const u64x2*>(keys);
456   for (size_t round = 0; round < kFeistelRounds; ++round) {
457     keys128 = FeistelRound(state, keys128);
458     BlockShuffle(state);
459   }
460 }
461 
462 }  // namespace
463 
464 namespace absl {
465 ABSL_NAMESPACE_BEGIN
466 namespace random_internal {
467 
GetKeys()468 const void* RandenSlow::GetKeys() {
469   // Round keys for one AES per Feistel round and branch.
470   // The canonical implementation uses first digits of Pi.
471   return round_keys;
472 }
473 
Absorb(const void * seed_void,void * state_void)474 void RandenSlow::Absorb(const void* seed_void, void* state_void) {
475   uint64_t* ABSL_RANDOM_INTERNAL_RESTRICT state =
476       reinterpret_cast<uint64_t*>(state_void);
477   const uint64_t* ABSL_RANDOM_INTERNAL_RESTRICT seed =
478       reinterpret_cast<const uint64_t*>(seed_void);
479 
480   constexpr size_t kCapacityBlocks = kCapacityBytes / sizeof(uint64_t);
481   static_assert(kCapacityBlocks * sizeof(uint64_t) == kCapacityBytes,
482                 "Not i*V");
483   for (size_t i = kCapacityBlocks; i < kStateBytes / sizeof(uint64_t); ++i) {
484     state[i] ^= seed[i - kCapacityBlocks];
485   }
486 }
487 
Generate(const void * keys,void * state_void)488 void RandenSlow::Generate(const void* keys, void* state_void) {
489   static_assert(kCapacityBytes == sizeof(Vector128), "Capacity mismatch");
490 
491   uint64_t* ABSL_RANDOM_INTERNAL_RESTRICT state =
492       reinterpret_cast<uint64_t*>(state_void);
493 
494   const Vector128 prev_inner = Vector128Load(state);
495 
496   Permute(keys, state);
497 
498   // Ensure backtracking resistance.
499   Vector128 inner = Vector128Load(state);
500   inner ^= prev_inner;
501   Vector128Store(inner, state);
502 }
503 
504 }  // namespace random_internal
505 ABSL_NAMESPACE_END
506 }  // namespace absl
507