1 package org.bouncycastle.math.ec;
2 
3 import org.bouncycastle.util.Arrays;
4 
5 import java.math.BigInteger;
6 
7 class LongArray
8 {
9 //    private static long DEINTERLEAVE_MASK = 0x5555555555555555L;
10 
11     /*
12      * This expands 8 bit indices into 16 bit contents (high bit 14), by inserting 0s between bits.
13      * In a binary field, this operation is the same as squaring an 8 bit number.
14      */
15     private static final int[] INTERLEAVE2_TABLE = new int[]
16     {
17         0x0000, 0x0001, 0x0004, 0x0005, 0x0010, 0x0011, 0x0014, 0x0015,
18         0x0040, 0x0041, 0x0044, 0x0045, 0x0050, 0x0051, 0x0054, 0x0055,
19         0x0100, 0x0101, 0x0104, 0x0105, 0x0110, 0x0111, 0x0114, 0x0115,
20         0x0140, 0x0141, 0x0144, 0x0145, 0x0150, 0x0151, 0x0154, 0x0155,
21         0x0400, 0x0401, 0x0404, 0x0405, 0x0410, 0x0411, 0x0414, 0x0415,
22         0x0440, 0x0441, 0x0444, 0x0445, 0x0450, 0x0451, 0x0454, 0x0455,
23         0x0500, 0x0501, 0x0504, 0x0505, 0x0510, 0x0511, 0x0514, 0x0515,
24         0x0540, 0x0541, 0x0544, 0x0545, 0x0550, 0x0551, 0x0554, 0x0555,
25         0x1000, 0x1001, 0x1004, 0x1005, 0x1010, 0x1011, 0x1014, 0x1015,
26         0x1040, 0x1041, 0x1044, 0x1045, 0x1050, 0x1051, 0x1054, 0x1055,
27         0x1100, 0x1101, 0x1104, 0x1105, 0x1110, 0x1111, 0x1114, 0x1115,
28         0x1140, 0x1141, 0x1144, 0x1145, 0x1150, 0x1151, 0x1154, 0x1155,
29         0x1400, 0x1401, 0x1404, 0x1405, 0x1410, 0x1411, 0x1414, 0x1415,
30         0x1440, 0x1441, 0x1444, 0x1445, 0x1450, 0x1451, 0x1454, 0x1455,
31         0x1500, 0x1501, 0x1504, 0x1505, 0x1510, 0x1511, 0x1514, 0x1515,
32         0x1540, 0x1541, 0x1544, 0x1545, 0x1550, 0x1551, 0x1554, 0x1555,
33         0x4000, 0x4001, 0x4004, 0x4005, 0x4010, 0x4011, 0x4014, 0x4015,
34         0x4040, 0x4041, 0x4044, 0x4045, 0x4050, 0x4051, 0x4054, 0x4055,
35         0x4100, 0x4101, 0x4104, 0x4105, 0x4110, 0x4111, 0x4114, 0x4115,
36         0x4140, 0x4141, 0x4144, 0x4145, 0x4150, 0x4151, 0x4154, 0x4155,
37         0x4400, 0x4401, 0x4404, 0x4405, 0x4410, 0x4411, 0x4414, 0x4415,
38         0x4440, 0x4441, 0x4444, 0x4445, 0x4450, 0x4451, 0x4454, 0x4455,
39         0x4500, 0x4501, 0x4504, 0x4505, 0x4510, 0x4511, 0x4514, 0x4515,
40         0x4540, 0x4541, 0x4544, 0x4545, 0x4550, 0x4551, 0x4554, 0x4555,
41         0x5000, 0x5001, 0x5004, 0x5005, 0x5010, 0x5011, 0x5014, 0x5015,
42         0x5040, 0x5041, 0x5044, 0x5045, 0x5050, 0x5051, 0x5054, 0x5055,
43         0x5100, 0x5101, 0x5104, 0x5105, 0x5110, 0x5111, 0x5114, 0x5115,
44         0x5140, 0x5141, 0x5144, 0x5145, 0x5150, 0x5151, 0x5154, 0x5155,
45         0x5400, 0x5401, 0x5404, 0x5405, 0x5410, 0x5411, 0x5414, 0x5415,
46         0x5440, 0x5441, 0x5444, 0x5445, 0x5450, 0x5451, 0x5454, 0x5455,
47         0x5500, 0x5501, 0x5504, 0x5505, 0x5510, 0x5511, 0x5514, 0x5515,
48         0x5540, 0x5541, 0x5544, 0x5545, 0x5550, 0x5551, 0x5554, 0x5555
49     };
50 
51     /*
52      * This expands 7 bit indices into 21 bit contents (high bit 18), by inserting 0s between bits.
53      */
54     private static final int[] INTERLEAVE3_TABLE = new  int[]
55     {
56         0x00000, 0x00001, 0x00008, 0x00009, 0x00040, 0x00041, 0x00048, 0x00049,
57         0x00200, 0x00201, 0x00208, 0x00209, 0x00240, 0x00241, 0x00248, 0x00249,
58         0x01000, 0x01001, 0x01008, 0x01009, 0x01040, 0x01041, 0x01048, 0x01049,
59         0x01200, 0x01201, 0x01208, 0x01209, 0x01240, 0x01241, 0x01248, 0x01249,
60         0x08000, 0x08001, 0x08008, 0x08009, 0x08040, 0x08041, 0x08048, 0x08049,
61         0x08200, 0x08201, 0x08208, 0x08209, 0x08240, 0x08241, 0x08248, 0x08249,
62         0x09000, 0x09001, 0x09008, 0x09009, 0x09040, 0x09041, 0x09048, 0x09049,
63         0x09200, 0x09201, 0x09208, 0x09209, 0x09240, 0x09241, 0x09248, 0x09249,
64         0x40000, 0x40001, 0x40008, 0x40009, 0x40040, 0x40041, 0x40048, 0x40049,
65         0x40200, 0x40201, 0x40208, 0x40209, 0x40240, 0x40241, 0x40248, 0x40249,
66         0x41000, 0x41001, 0x41008, 0x41009, 0x41040, 0x41041, 0x41048, 0x41049,
67         0x41200, 0x41201, 0x41208, 0x41209, 0x41240, 0x41241, 0x41248, 0x41249,
68         0x48000, 0x48001, 0x48008, 0x48009, 0x48040, 0x48041, 0x48048, 0x48049,
69         0x48200, 0x48201, 0x48208, 0x48209, 0x48240, 0x48241, 0x48248, 0x48249,
70         0x49000, 0x49001, 0x49008, 0x49009, 0x49040, 0x49041, 0x49048, 0x49049,
71         0x49200, 0x49201, 0x49208, 0x49209, 0x49240, 0x49241, 0x49248, 0x49249
72     };
73 
74     /*
75      * This expands 8 bit indices into 32 bit contents (high bit 28), by inserting 0s between bits.
76      */
77     private static final int[] INTERLEAVE4_TABLE = new int[]
78     {
79         0x00000000, 0x00000001, 0x00000010, 0x00000011, 0x00000100, 0x00000101, 0x00000110, 0x00000111,
80         0x00001000, 0x00001001, 0x00001010, 0x00001011, 0x00001100, 0x00001101, 0x00001110, 0x00001111,
81         0x00010000, 0x00010001, 0x00010010, 0x00010011, 0x00010100, 0x00010101, 0x00010110, 0x00010111,
82         0x00011000, 0x00011001, 0x00011010, 0x00011011, 0x00011100, 0x00011101, 0x00011110, 0x00011111,
83         0x00100000, 0x00100001, 0x00100010, 0x00100011, 0x00100100, 0x00100101, 0x00100110, 0x00100111,
84         0x00101000, 0x00101001, 0x00101010, 0x00101011, 0x00101100, 0x00101101, 0x00101110, 0x00101111,
85         0x00110000, 0x00110001, 0x00110010, 0x00110011, 0x00110100, 0x00110101, 0x00110110, 0x00110111,
86         0x00111000, 0x00111001, 0x00111010, 0x00111011, 0x00111100, 0x00111101, 0x00111110, 0x00111111,
87         0x01000000, 0x01000001, 0x01000010, 0x01000011, 0x01000100, 0x01000101, 0x01000110, 0x01000111,
88         0x01001000, 0x01001001, 0x01001010, 0x01001011, 0x01001100, 0x01001101, 0x01001110, 0x01001111,
89         0x01010000, 0x01010001, 0x01010010, 0x01010011, 0x01010100, 0x01010101, 0x01010110, 0x01010111,
90         0x01011000, 0x01011001, 0x01011010, 0x01011011, 0x01011100, 0x01011101, 0x01011110, 0x01011111,
91         0x01100000, 0x01100001, 0x01100010, 0x01100011, 0x01100100, 0x01100101, 0x01100110, 0x01100111,
92         0x01101000, 0x01101001, 0x01101010, 0x01101011, 0x01101100, 0x01101101, 0x01101110, 0x01101111,
93         0x01110000, 0x01110001, 0x01110010, 0x01110011, 0x01110100, 0x01110101, 0x01110110, 0x01110111,
94         0x01111000, 0x01111001, 0x01111010, 0x01111011, 0x01111100, 0x01111101, 0x01111110, 0x01111111,
95         0x10000000, 0x10000001, 0x10000010, 0x10000011, 0x10000100, 0x10000101, 0x10000110, 0x10000111,
96         0x10001000, 0x10001001, 0x10001010, 0x10001011, 0x10001100, 0x10001101, 0x10001110, 0x10001111,
97         0x10010000, 0x10010001, 0x10010010, 0x10010011, 0x10010100, 0x10010101, 0x10010110, 0x10010111,
98         0x10011000, 0x10011001, 0x10011010, 0x10011011, 0x10011100, 0x10011101, 0x10011110, 0x10011111,
99         0x10100000, 0x10100001, 0x10100010, 0x10100011, 0x10100100, 0x10100101, 0x10100110, 0x10100111,
100         0x10101000, 0x10101001, 0x10101010, 0x10101011, 0x10101100, 0x10101101, 0x10101110, 0x10101111,
101         0x10110000, 0x10110001, 0x10110010, 0x10110011, 0x10110100, 0x10110101, 0x10110110, 0x10110111,
102         0x10111000, 0x10111001, 0x10111010, 0x10111011, 0x10111100, 0x10111101, 0x10111110, 0x10111111,
103         0x11000000, 0x11000001, 0x11000010, 0x11000011, 0x11000100, 0x11000101, 0x11000110, 0x11000111,
104         0x11001000, 0x11001001, 0x11001010, 0x11001011, 0x11001100, 0x11001101, 0x11001110, 0x11001111,
105         0x11010000, 0x11010001, 0x11010010, 0x11010011, 0x11010100, 0x11010101, 0x11010110, 0x11010111,
106         0x11011000, 0x11011001, 0x11011010, 0x11011011, 0x11011100, 0x11011101, 0x11011110, 0x11011111,
107         0x11100000, 0x11100001, 0x11100010, 0x11100011, 0x11100100, 0x11100101, 0x11100110, 0x11100111,
108         0x11101000, 0x11101001, 0x11101010, 0x11101011, 0x11101100, 0x11101101, 0x11101110, 0x11101111,
109         0x11110000, 0x11110001, 0x11110010, 0x11110011, 0x11110100, 0x11110101, 0x11110110, 0x11110111,
110         0x11111000, 0x11111001, 0x11111010, 0x11111011, 0x11111100, 0x11111101, 0x11111110, 0x11111111
111     };
112 
113     /*
114      * This expands 7 bit indices into 35 bit contents (high bit 30), by inserting 0s between bits.
115      */
116     private static final int[] INTERLEAVE5_TABLE = new int[] {
117         0x00000000, 0x00000001, 0x00000020, 0x00000021, 0x00000400, 0x00000401, 0x00000420, 0x00000421,
118         0x00008000, 0x00008001, 0x00008020, 0x00008021, 0x00008400, 0x00008401, 0x00008420, 0x00008421,
119         0x00100000, 0x00100001, 0x00100020, 0x00100021, 0x00100400, 0x00100401, 0x00100420, 0x00100421,
120         0x00108000, 0x00108001, 0x00108020, 0x00108021, 0x00108400, 0x00108401, 0x00108420, 0x00108421,
121         0x02000000, 0x02000001, 0x02000020, 0x02000021, 0x02000400, 0x02000401, 0x02000420, 0x02000421,
122         0x02008000, 0x02008001, 0x02008020, 0x02008021, 0x02008400, 0x02008401, 0x02008420, 0x02008421,
123         0x02100000, 0x02100001, 0x02100020, 0x02100021, 0x02100400, 0x02100401, 0x02100420, 0x02100421,
124         0x02108000, 0x02108001, 0x02108020, 0x02108021, 0x02108400, 0x02108401, 0x02108420, 0x02108421,
125         0x40000000, 0x40000001, 0x40000020, 0x40000021, 0x40000400, 0x40000401, 0x40000420, 0x40000421,
126         0x40008000, 0x40008001, 0x40008020, 0x40008021, 0x40008400, 0x40008401, 0x40008420, 0x40008421,
127         0x40100000, 0x40100001, 0x40100020, 0x40100021, 0x40100400, 0x40100401, 0x40100420, 0x40100421,
128         0x40108000, 0x40108001, 0x40108020, 0x40108021, 0x40108400, 0x40108401, 0x40108420, 0x40108421,
129         0x42000000, 0x42000001, 0x42000020, 0x42000021, 0x42000400, 0x42000401, 0x42000420, 0x42000421,
130         0x42008000, 0x42008001, 0x42008020, 0x42008021, 0x42008400, 0x42008401, 0x42008420, 0x42008421,
131         0x42100000, 0x42100001, 0x42100020, 0x42100021, 0x42100400, 0x42100401, 0x42100420, 0x42100421,
132         0x42108000, 0x42108001, 0x42108020, 0x42108021, 0x42108400, 0x42108401, 0x42108420, 0x42108421
133     };
134 
135     /*
136      * This expands 9 bit indices into 63 bit (long) contents (high bit 56), by inserting 0s between bits.
137      */
138     private static final long[] INTERLEAVE7_TABLE = new long[]
139     {
140         0x0000000000000000L, 0x0000000000000001L, 0x0000000000000080L, 0x0000000000000081L,
141         0x0000000000004000L, 0x0000000000004001L, 0x0000000000004080L, 0x0000000000004081L,
142         0x0000000000200000L, 0x0000000000200001L, 0x0000000000200080L, 0x0000000000200081L,
143         0x0000000000204000L, 0x0000000000204001L, 0x0000000000204080L, 0x0000000000204081L,
144         0x0000000010000000L, 0x0000000010000001L, 0x0000000010000080L, 0x0000000010000081L,
145         0x0000000010004000L, 0x0000000010004001L, 0x0000000010004080L, 0x0000000010004081L,
146         0x0000000010200000L, 0x0000000010200001L, 0x0000000010200080L, 0x0000000010200081L,
147         0x0000000010204000L, 0x0000000010204001L, 0x0000000010204080L, 0x0000000010204081L,
148         0x0000000800000000L, 0x0000000800000001L, 0x0000000800000080L, 0x0000000800000081L,
149         0x0000000800004000L, 0x0000000800004001L, 0x0000000800004080L, 0x0000000800004081L,
150         0x0000000800200000L, 0x0000000800200001L, 0x0000000800200080L, 0x0000000800200081L,
151         0x0000000800204000L, 0x0000000800204001L, 0x0000000800204080L, 0x0000000800204081L,
152         0x0000000810000000L, 0x0000000810000001L, 0x0000000810000080L, 0x0000000810000081L,
153         0x0000000810004000L, 0x0000000810004001L, 0x0000000810004080L, 0x0000000810004081L,
154         0x0000000810200000L, 0x0000000810200001L, 0x0000000810200080L, 0x0000000810200081L,
155         0x0000000810204000L, 0x0000000810204001L, 0x0000000810204080L, 0x0000000810204081L,
156         0x0000040000000000L, 0x0000040000000001L, 0x0000040000000080L, 0x0000040000000081L,
157         0x0000040000004000L, 0x0000040000004001L, 0x0000040000004080L, 0x0000040000004081L,
158         0x0000040000200000L, 0x0000040000200001L, 0x0000040000200080L, 0x0000040000200081L,
159         0x0000040000204000L, 0x0000040000204001L, 0x0000040000204080L, 0x0000040000204081L,
160         0x0000040010000000L, 0x0000040010000001L, 0x0000040010000080L, 0x0000040010000081L,
161         0x0000040010004000L, 0x0000040010004001L, 0x0000040010004080L, 0x0000040010004081L,
162         0x0000040010200000L, 0x0000040010200001L, 0x0000040010200080L, 0x0000040010200081L,
163         0x0000040010204000L, 0x0000040010204001L, 0x0000040010204080L, 0x0000040010204081L,
164         0x0000040800000000L, 0x0000040800000001L, 0x0000040800000080L, 0x0000040800000081L,
165         0x0000040800004000L, 0x0000040800004001L, 0x0000040800004080L, 0x0000040800004081L,
166         0x0000040800200000L, 0x0000040800200001L, 0x0000040800200080L, 0x0000040800200081L,
167         0x0000040800204000L, 0x0000040800204001L, 0x0000040800204080L, 0x0000040800204081L,
168         0x0000040810000000L, 0x0000040810000001L, 0x0000040810000080L, 0x0000040810000081L,
169         0x0000040810004000L, 0x0000040810004001L, 0x0000040810004080L, 0x0000040810004081L,
170         0x0000040810200000L, 0x0000040810200001L, 0x0000040810200080L, 0x0000040810200081L,
171         0x0000040810204000L, 0x0000040810204001L, 0x0000040810204080L, 0x0000040810204081L,
172         0x0002000000000000L, 0x0002000000000001L, 0x0002000000000080L, 0x0002000000000081L,
173         0x0002000000004000L, 0x0002000000004001L, 0x0002000000004080L, 0x0002000000004081L,
174         0x0002000000200000L, 0x0002000000200001L, 0x0002000000200080L, 0x0002000000200081L,
175         0x0002000000204000L, 0x0002000000204001L, 0x0002000000204080L, 0x0002000000204081L,
176         0x0002000010000000L, 0x0002000010000001L, 0x0002000010000080L, 0x0002000010000081L,
177         0x0002000010004000L, 0x0002000010004001L, 0x0002000010004080L, 0x0002000010004081L,
178         0x0002000010200000L, 0x0002000010200001L, 0x0002000010200080L, 0x0002000010200081L,
179         0x0002000010204000L, 0x0002000010204001L, 0x0002000010204080L, 0x0002000010204081L,
180         0x0002000800000000L, 0x0002000800000001L, 0x0002000800000080L, 0x0002000800000081L,
181         0x0002000800004000L, 0x0002000800004001L, 0x0002000800004080L, 0x0002000800004081L,
182         0x0002000800200000L, 0x0002000800200001L, 0x0002000800200080L, 0x0002000800200081L,
183         0x0002000800204000L, 0x0002000800204001L, 0x0002000800204080L, 0x0002000800204081L,
184         0x0002000810000000L, 0x0002000810000001L, 0x0002000810000080L, 0x0002000810000081L,
185         0x0002000810004000L, 0x0002000810004001L, 0x0002000810004080L, 0x0002000810004081L,
186         0x0002000810200000L, 0x0002000810200001L, 0x0002000810200080L, 0x0002000810200081L,
187         0x0002000810204000L, 0x0002000810204001L, 0x0002000810204080L, 0x0002000810204081L,
188         0x0002040000000000L, 0x0002040000000001L, 0x0002040000000080L, 0x0002040000000081L,
189         0x0002040000004000L, 0x0002040000004001L, 0x0002040000004080L, 0x0002040000004081L,
190         0x0002040000200000L, 0x0002040000200001L, 0x0002040000200080L, 0x0002040000200081L,
191         0x0002040000204000L, 0x0002040000204001L, 0x0002040000204080L, 0x0002040000204081L,
192         0x0002040010000000L, 0x0002040010000001L, 0x0002040010000080L, 0x0002040010000081L,
193         0x0002040010004000L, 0x0002040010004001L, 0x0002040010004080L, 0x0002040010004081L,
194         0x0002040010200000L, 0x0002040010200001L, 0x0002040010200080L, 0x0002040010200081L,
195         0x0002040010204000L, 0x0002040010204001L, 0x0002040010204080L, 0x0002040010204081L,
196         0x0002040800000000L, 0x0002040800000001L, 0x0002040800000080L, 0x0002040800000081L,
197         0x0002040800004000L, 0x0002040800004001L, 0x0002040800004080L, 0x0002040800004081L,
198         0x0002040800200000L, 0x0002040800200001L, 0x0002040800200080L, 0x0002040800200081L,
199         0x0002040800204000L, 0x0002040800204001L, 0x0002040800204080L, 0x0002040800204081L,
200         0x0002040810000000L, 0x0002040810000001L, 0x0002040810000080L, 0x0002040810000081L,
201         0x0002040810004000L, 0x0002040810004001L, 0x0002040810004080L, 0x0002040810004081L,
202         0x0002040810200000L, 0x0002040810200001L, 0x0002040810200080L, 0x0002040810200081L,
203         0x0002040810204000L, 0x0002040810204001L, 0x0002040810204080L, 0x0002040810204081L,
204         0x0100000000000000L, 0x0100000000000001L, 0x0100000000000080L, 0x0100000000000081L,
205         0x0100000000004000L, 0x0100000000004001L, 0x0100000000004080L, 0x0100000000004081L,
206         0x0100000000200000L, 0x0100000000200001L, 0x0100000000200080L, 0x0100000000200081L,
207         0x0100000000204000L, 0x0100000000204001L, 0x0100000000204080L, 0x0100000000204081L,
208         0x0100000010000000L, 0x0100000010000001L, 0x0100000010000080L, 0x0100000010000081L,
209         0x0100000010004000L, 0x0100000010004001L, 0x0100000010004080L, 0x0100000010004081L,
210         0x0100000010200000L, 0x0100000010200001L, 0x0100000010200080L, 0x0100000010200081L,
211         0x0100000010204000L, 0x0100000010204001L, 0x0100000010204080L, 0x0100000010204081L,
212         0x0100000800000000L, 0x0100000800000001L, 0x0100000800000080L, 0x0100000800000081L,
213         0x0100000800004000L, 0x0100000800004001L, 0x0100000800004080L, 0x0100000800004081L,
214         0x0100000800200000L, 0x0100000800200001L, 0x0100000800200080L, 0x0100000800200081L,
215         0x0100000800204000L, 0x0100000800204001L, 0x0100000800204080L, 0x0100000800204081L,
216         0x0100000810000000L, 0x0100000810000001L, 0x0100000810000080L, 0x0100000810000081L,
217         0x0100000810004000L, 0x0100000810004001L, 0x0100000810004080L, 0x0100000810004081L,
218         0x0100000810200000L, 0x0100000810200001L, 0x0100000810200080L, 0x0100000810200081L,
219         0x0100000810204000L, 0x0100000810204001L, 0x0100000810204080L, 0x0100000810204081L,
220         0x0100040000000000L, 0x0100040000000001L, 0x0100040000000080L, 0x0100040000000081L,
221         0x0100040000004000L, 0x0100040000004001L, 0x0100040000004080L, 0x0100040000004081L,
222         0x0100040000200000L, 0x0100040000200001L, 0x0100040000200080L, 0x0100040000200081L,
223         0x0100040000204000L, 0x0100040000204001L, 0x0100040000204080L, 0x0100040000204081L,
224         0x0100040010000000L, 0x0100040010000001L, 0x0100040010000080L, 0x0100040010000081L,
225         0x0100040010004000L, 0x0100040010004001L, 0x0100040010004080L, 0x0100040010004081L,
226         0x0100040010200000L, 0x0100040010200001L, 0x0100040010200080L, 0x0100040010200081L,
227         0x0100040010204000L, 0x0100040010204001L, 0x0100040010204080L, 0x0100040010204081L,
228         0x0100040800000000L, 0x0100040800000001L, 0x0100040800000080L, 0x0100040800000081L,
229         0x0100040800004000L, 0x0100040800004001L, 0x0100040800004080L, 0x0100040800004081L,
230         0x0100040800200000L, 0x0100040800200001L, 0x0100040800200080L, 0x0100040800200081L,
231         0x0100040800204000L, 0x0100040800204001L, 0x0100040800204080L, 0x0100040800204081L,
232         0x0100040810000000L, 0x0100040810000001L, 0x0100040810000080L, 0x0100040810000081L,
233         0x0100040810004000L, 0x0100040810004001L, 0x0100040810004080L, 0x0100040810004081L,
234         0x0100040810200000L, 0x0100040810200001L, 0x0100040810200080L, 0x0100040810200081L,
235         0x0100040810204000L, 0x0100040810204001L, 0x0100040810204080L, 0x0100040810204081L,
236         0x0102000000000000L, 0x0102000000000001L, 0x0102000000000080L, 0x0102000000000081L,
237         0x0102000000004000L, 0x0102000000004001L, 0x0102000000004080L, 0x0102000000004081L,
238         0x0102000000200000L, 0x0102000000200001L, 0x0102000000200080L, 0x0102000000200081L,
239         0x0102000000204000L, 0x0102000000204001L, 0x0102000000204080L, 0x0102000000204081L,
240         0x0102000010000000L, 0x0102000010000001L, 0x0102000010000080L, 0x0102000010000081L,
241         0x0102000010004000L, 0x0102000010004001L, 0x0102000010004080L, 0x0102000010004081L,
242         0x0102000010200000L, 0x0102000010200001L, 0x0102000010200080L, 0x0102000010200081L,
243         0x0102000010204000L, 0x0102000010204001L, 0x0102000010204080L, 0x0102000010204081L,
244         0x0102000800000000L, 0x0102000800000001L, 0x0102000800000080L, 0x0102000800000081L,
245         0x0102000800004000L, 0x0102000800004001L, 0x0102000800004080L, 0x0102000800004081L,
246         0x0102000800200000L, 0x0102000800200001L, 0x0102000800200080L, 0x0102000800200081L,
247         0x0102000800204000L, 0x0102000800204001L, 0x0102000800204080L, 0x0102000800204081L,
248         0x0102000810000000L, 0x0102000810000001L, 0x0102000810000080L, 0x0102000810000081L,
249         0x0102000810004000L, 0x0102000810004001L, 0x0102000810004080L, 0x0102000810004081L,
250         0x0102000810200000L, 0x0102000810200001L, 0x0102000810200080L, 0x0102000810200081L,
251         0x0102000810204000L, 0x0102000810204001L, 0x0102000810204080L, 0x0102000810204081L,
252         0x0102040000000000L, 0x0102040000000001L, 0x0102040000000080L, 0x0102040000000081L,
253         0x0102040000004000L, 0x0102040000004001L, 0x0102040000004080L, 0x0102040000004081L,
254         0x0102040000200000L, 0x0102040000200001L, 0x0102040000200080L, 0x0102040000200081L,
255         0x0102040000204000L, 0x0102040000204001L, 0x0102040000204080L, 0x0102040000204081L,
256         0x0102040010000000L, 0x0102040010000001L, 0x0102040010000080L, 0x0102040010000081L,
257         0x0102040010004000L, 0x0102040010004001L, 0x0102040010004080L, 0x0102040010004081L,
258         0x0102040010200000L, 0x0102040010200001L, 0x0102040010200080L, 0x0102040010200081L,
259         0x0102040010204000L, 0x0102040010204001L, 0x0102040010204080L, 0x0102040010204081L,
260         0x0102040800000000L, 0x0102040800000001L, 0x0102040800000080L, 0x0102040800000081L,
261         0x0102040800004000L, 0x0102040800004001L, 0x0102040800004080L, 0x0102040800004081L,
262         0x0102040800200000L, 0x0102040800200001L, 0x0102040800200080L, 0x0102040800200081L,
263         0x0102040800204000L, 0x0102040800204001L, 0x0102040800204080L, 0x0102040800204081L,
264         0x0102040810000000L, 0x0102040810000001L, 0x0102040810000080L, 0x0102040810000081L,
265         0x0102040810004000L, 0x0102040810004001L, 0x0102040810004080L, 0x0102040810004081L,
266         0x0102040810200000L, 0x0102040810200001L, 0x0102040810200080L, 0x0102040810200081L,
267         0x0102040810204000L, 0x0102040810204001L, 0x0102040810204080L, 0x0102040810204081L
268     };
269 
270     // For toString(); must have length 64
271     private static final String ZEROES = "0000000000000000000000000000000000000000000000000000000000000000";
272 
273     final static byte[] bitLengths =
274     {
275         0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
276         5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
277         6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
278         6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
279         7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
280         7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
281         7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
282         7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
283         8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
284         8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
285         8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
286         8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
287         8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
288         8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
289         8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
290         8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8
291     };
292 
293     // TODO make m fixed for the LongArray, and hence compute T once and for all
294 
295     private long[] m_ints;
296 
LongArray(int intLen)297     public LongArray(int intLen)
298     {
299         m_ints = new long[intLen];
300     }
301 
LongArray(long[] ints)302     public LongArray(long[] ints)
303     {
304         m_ints = ints;
305     }
306 
LongArray(long[] ints, int off, int len)307     public LongArray(long[] ints, int off, int len)
308     {
309         if (off == 0 && len == ints.length)
310         {
311             m_ints = ints;
312         }
313         else
314         {
315             m_ints = new long[len];
316             System.arraycopy(ints, off, m_ints, 0, len);
317         }
318     }
319 
LongArray(BigInteger bigInt)320     public LongArray(BigInteger bigInt)
321     {
322         if (bigInt == null || bigInt.signum() < 0)
323         {
324             throw new IllegalArgumentException("invalid F2m field value");
325         }
326 
327         if (bigInt.signum() == 0)
328         {
329             m_ints = new long[] { 0L };
330             return;
331         }
332 
333         byte[] barr = bigInt.toByteArray();
334         int barrLen = barr.length;
335         int barrStart = 0;
336         if (barr[0] == 0)
337         {
338             // First byte is 0 to enforce highest (=sign) bit is zero.
339             // In this case ignore barr[0].
340             barrLen--;
341             barrStart = 1;
342         }
343         int intLen = (barrLen + 7) / 8;
344         m_ints = new long[intLen];
345 
346         int iarrJ = intLen - 1;
347         int rem = barrLen % 8 + barrStart;
348         long temp = 0;
349         int barrI = barrStart;
350         if (barrStart < rem)
351         {
352             for (; barrI < rem; barrI++)
353             {
354                 temp <<= 8;
355                 int barrBarrI = barr[barrI] & 0xFF;
356                 temp |= barrBarrI;
357             }
358             m_ints[iarrJ--] = temp;
359         }
360 
361         for (; iarrJ >= 0; iarrJ--)
362         {
363             temp = 0;
364             for (int i = 0; i < 8; i++)
365             {
366                 temp <<= 8;
367                 int barrBarrI = barr[barrI++] & 0xFF;
368                 temp |= barrBarrI;
369             }
370             m_ints[iarrJ] = temp;
371         }
372     }
373 
isOne()374     public boolean isOne()
375     {
376         long[] a = m_ints;
377         if (a[0] != 1L)
378         {
379             return false;
380         }
381         for (int i = 1; i < a.length; ++i)
382         {
383             if (a[i] != 0L)
384             {
385                 return false;
386             }
387         }
388         return true;
389     }
390 
isZero()391     public boolean isZero()
392     {
393         long[] a = m_ints;
394         for (int i = 0; i < a.length; ++i)
395         {
396             if (a[i] != 0L)
397             {
398                 return false;
399             }
400         }
401         return true;
402     }
403 
getUsedLength()404     public int getUsedLength()
405     {
406         return getUsedLengthFrom(m_ints.length);
407     }
408 
getUsedLengthFrom(int from)409     public int getUsedLengthFrom(int from)
410     {
411         long[] a = m_ints;
412         from = Math.min(from, a.length);
413 
414         if (from < 1)
415         {
416             return 0;
417         }
418 
419         // Check if first element will act as sentinel
420         if (a[0] != 0)
421         {
422             while (a[--from] == 0)
423             {
424             }
425             return from + 1;
426         }
427 
428         do
429         {
430             if (a[--from] != 0)
431             {
432                 return from + 1;
433             }
434         }
435         while (from > 0);
436 
437         return 0;
438     }
439 
degree()440     public int degree()
441     {
442         int i = m_ints.length;
443         long w;
444         do
445         {
446             if (i == 0)
447             {
448                 return 0;
449             }
450             w = m_ints[--i];
451         }
452         while (w == 0);
453 
454         return (i << 6) + bitLength(w);
455     }
456 
degreeFrom(int limit)457     private int degreeFrom(int limit)
458     {
459         int i = (limit + 62) >>> 6;
460         long w;
461         do
462         {
463             if (i == 0)
464             {
465                 return 0;
466             }
467             w = m_ints[--i];
468         }
469         while (w == 0);
470 
471         return (i << 6) + bitLength(w);
472     }
473 
474 //    private int lowestCoefficient()
475 //    {
476 //        for (int i = 0; i < m_ints.length; ++i)
477 //        {
478 //            long mi = m_ints[i];
479 //            if (mi != 0)
480 //            {
481 //                int j = 0;
482 //                while ((mi & 0xFFL) == 0)
483 //                {
484 //                    j += 8;
485 //                    mi >>>= 8;
486 //                }
487 //                while ((mi & 1L) == 0)
488 //                {
489 //                    ++j;
490 //                    mi >>>= 1;
491 //                }
492 //                return (i << 6) + j;
493 //            }
494 //        }
495 //        return -1;
496 //    }
497 
bitLength(long w)498     private static int bitLength(long w)
499     {
500         int u = (int)(w >>> 32), b;
501         if (u == 0)
502         {
503             u = (int)w;
504             b = 0;
505         }
506         else
507         {
508             b = 32;
509         }
510 
511         int t = u >>> 16, k;
512         if (t == 0)
513         {
514             t = u >>> 8;
515             k = (t == 0) ? bitLengths[u] : 8 + bitLengths[t];
516         }
517         else
518         {
519             int v = t >>> 8;
520             k = (v == 0) ? 16 + bitLengths[t] : 24 + bitLengths[v];
521         }
522 
523         return b + k;
524     }
525 
resizedInts(int newLen)526     private long[] resizedInts(int newLen)
527     {
528         long[] newInts = new long[newLen];
529         System.arraycopy(m_ints, 0, newInts, 0, Math.min(m_ints.length, newLen));
530         return newInts;
531     }
532 
toBigInteger()533     public BigInteger toBigInteger()
534     {
535         int usedLen = getUsedLength();
536         if (usedLen == 0)
537         {
538             return ECConstants.ZERO;
539         }
540 
541         long highestInt = m_ints[usedLen - 1];
542         byte[] temp = new byte[8];
543         int barrI = 0;
544         boolean trailingZeroBytesDone = false;
545         for (int j = 7; j >= 0; j--)
546         {
547             byte thisByte = (byte)(highestInt >>> (8 * j));
548             if (trailingZeroBytesDone || (thisByte != 0))
549             {
550                 trailingZeroBytesDone = true;
551                 temp[barrI++] = thisByte;
552             }
553         }
554 
555         int barrLen = 8 * (usedLen - 1) + barrI;
556         byte[] barr = new byte[barrLen];
557         for (int j = 0; j < barrI; j++)
558         {
559             barr[j] = temp[j];
560         }
561         // Highest value int is done now
562 
563         for (int iarrJ = usedLen - 2; iarrJ >= 0; iarrJ--)
564         {
565             long mi = m_ints[iarrJ];
566             for (int j = 7; j >= 0; j--)
567             {
568                 barr[barrI++] = (byte)(mi >>> (8 * j));
569             }
570         }
571         return new BigInteger(1, barr);
572     }
573 
574 //    private static long shiftUp(long[] x, int xOff, int count)
575 //    {
576 //        long prev = 0;
577 //        for (int i = 0; i < count; ++i)
578 //        {
579 //            long next = x[xOff + i];
580 //            x[xOff + i] = (next << 1) | prev;
581 //            prev = next >>> 63;
582 //        }
583 //        return prev;
584 //    }
585 
shiftUp(long[] x, int xOff, int count, int shift)586     private static long shiftUp(long[] x, int xOff, int count, int shift)
587     {
588         int shiftInv = 64 - shift;
589         long prev = 0;
590         for (int i = 0; i < count; ++i)
591         {
592             long next = x[xOff + i];
593             x[xOff + i] = (next << shift) | prev;
594             prev = next >>> shiftInv;
595         }
596         return prev;
597     }
598 
shiftUp(long[] x, int xOff, long[] z, int zOff, int count, int shift)599     private static long shiftUp(long[] x, int xOff, long[] z, int zOff, int count, int shift)
600     {
601         int shiftInv = 64 - shift;
602         long prev = 0;
603         for (int i = 0; i < count; ++i)
604         {
605             long next = x[xOff + i];
606             z[zOff + i] = (next << shift) | prev;
607             prev = next >>> shiftInv;
608         }
609         return prev;
610     }
611 
addOne()612     public LongArray addOne()
613     {
614         if (m_ints.length == 0)
615         {
616             return new LongArray(new long[]{ 1L });
617         }
618 
619         int resultLen = Math.max(1, getUsedLength());
620         long[] ints = resizedInts(resultLen);
621         ints[0] ^= 1L;
622         return new LongArray(ints);
623     }
624 
625 //    private void addShiftedByBits(LongArray other, int bits)
626 //    {
627 //        int words = bits >>> 6;
628 //        int shift = bits & 0x3F;
629 //
630 //        if (shift == 0)
631 //        {
632 //            addShiftedByWords(other, words);
633 //            return;
634 //        }
635 //
636 //        int otherUsedLen = other.getUsedLength();
637 //        if (otherUsedLen == 0)
638 //        {
639 //            return;
640 //        }
641 //
642 //        int minLen = otherUsedLen + words + 1;
643 //        if (minLen > m_ints.length)
644 //        {
645 //            m_ints = resizedInts(minLen);
646 //        }
647 //
648 //        long carry = addShiftedByBits(m_ints, words, other.m_ints, 0, otherUsedLen, shift);
649 //        m_ints[otherUsedLen + words] ^= carry;
650 //    }
651 
addShiftedByBitsSafe(LongArray other, int otherDegree, int bits)652     private void addShiftedByBitsSafe(LongArray other, int otherDegree, int bits)
653     {
654         int otherLen = (otherDegree + 63) >>> 6;
655 
656         int words = bits >>> 6;
657         int shift = bits & 0x3F;
658 
659         if (shift == 0)
660         {
661             add(m_ints, words, other.m_ints, 0, otherLen);
662             return;
663         }
664 
665         long carry = addShiftedUp(m_ints, words, other.m_ints, 0, otherLen, shift);
666         if (carry != 0L)
667         {
668             m_ints[otherLen + words] ^= carry;
669         }
670     }
671 
addShiftedUp(long[] x, int xOff, long[] y, int yOff, int count, int shift)672     private static long addShiftedUp(long[] x, int xOff, long[] y, int yOff, int count, int shift)
673     {
674         int shiftInv = 64 - shift;
675         long prev = 0;
676         for (int i = 0; i < count; ++i)
677         {
678             long next = y[yOff + i];
679             x[xOff + i] ^= (next << shift) | prev;
680             prev = next >>> shiftInv;
681         }
682         return prev;
683     }
684 
addShiftedDown(long[] x, int xOff, long[] y, int yOff, int count, int shift)685     private static long addShiftedDown(long[] x, int xOff, long[] y, int yOff, int count, int shift)
686     {
687         int shiftInv = 64 - shift;
688         long prev = 0;
689         int i = count;
690         while (--i >= 0)
691         {
692             long next = y[yOff + i];
693             x[xOff + i] ^= (next >>> shift) | prev;
694             prev = next << shiftInv;
695         }
696         return prev;
697     }
698 
addShiftedByWords(LongArray other, int words)699     public void addShiftedByWords(LongArray other, int words)
700     {
701         int otherUsedLen = other.getUsedLength();
702         if (otherUsedLen == 0)
703         {
704             return;
705         }
706 
707         int minLen = otherUsedLen + words;
708         if (minLen > m_ints.length)
709         {
710             m_ints = resizedInts(minLen);
711         }
712 
713         add(m_ints, words, other.m_ints, 0, otherUsedLen);
714     }
715 
add(long[] x, int xOff, long[] y, int yOff, int count)716     private static void add(long[] x, int xOff, long[] y, int yOff, int count)
717     {
718         for (int i = 0; i < count; ++i)
719         {
720             x[xOff + i] ^= y[yOff + i];
721         }
722     }
723 
add(long[] x, int xOff, long[] y, int yOff, long[] z, int zOff, int count)724     private static void add(long[] x, int xOff, long[] y, int yOff, long[] z, int zOff, int count)
725     {
726         for (int i = 0; i < count; ++i)
727         {
728             z[zOff + i] = x[xOff + i] ^ y[yOff + i];
729         }
730     }
731 
addBoth(long[] x, int xOff, long[] y1, int y1Off, long[] y2, int y2Off, int count)732     private static void addBoth(long[] x, int xOff, long[] y1, int y1Off, long[] y2, int y2Off, int count)
733     {
734         for (int i = 0; i < count; ++i)
735         {
736             x[xOff + i] ^= y1[y1Off + i] ^ y2[y2Off + i];
737         }
738     }
739 
distribute(long[] x, int src, int dst1, int dst2, int count)740     private static void distribute(long[] x, int src, int dst1, int dst2, int count)
741     {
742         for (int i = 0; i < count; ++i)
743         {
744             long v = x[src + i];
745             x[dst1 + i] ^= v;
746             x[dst2 + i] ^= v;
747         }
748     }
749 
getLength()750     public int getLength()
751     {
752         return m_ints.length;
753     }
754 
flipWord(long[] buf, int off, int bit, long word)755     private static void flipWord(long[] buf, int off, int bit, long word)
756     {
757         int n = off + (bit >>> 6);
758         int shift = bit & 0x3F;
759         if (shift == 0)
760         {
761             buf[n] ^= word;
762         }
763         else
764         {
765             buf[n] ^= word << shift;
766             word >>>= (64 - shift);
767             if (word != 0)
768             {
769                 buf[++n] ^= word;
770             }
771         }
772     }
773 
774 //    private static long getWord(long[] buf, int off, int len, int bit)
775 //    {
776 //        int n = off + (bit >>> 6);
777 //        int shift = bit & 0x3F;
778 //        if (shift == 0)
779 //        {
780 //            return buf[n];
781 //        }
782 //        long result = buf[n] >>> shift;
783 //        if (++n < len)
784 //        {
785 //            result |= buf[n] << (64 - shift);
786 //        }
787 //        return result;
788 //    }
789 
testBitZero()790     public boolean testBitZero()
791     {
792         return m_ints.length > 0 && (m_ints[0] & 1L) != 0;
793     }
794 
testBit(long[] buf, int off, int n)795     private static boolean testBit(long[] buf, int off, int n)
796     {
797         // theInt = n / 64
798         int theInt = n >>> 6;
799         // theBit = n % 64
800         int theBit = n & 0x3F;
801         long tester = 1L << theBit;
802         return (buf[off + theInt] & tester) != 0;
803     }
804 
flipBit(long[] buf, int off, int n)805     private static void flipBit(long[] buf, int off, int n)
806     {
807         // theInt = n / 64
808         int theInt = n >>> 6;
809         // theBit = n % 64
810         int theBit = n & 0x3F;
811         long flipper = 1L << theBit;
812         buf[off + theInt] ^= flipper;
813     }
814 
815 //    private static void setBit(long[] buf, int off, int n)
816 //    {
817 //        // theInt = n / 64
818 //        int theInt = n >>> 6;
819 //        // theBit = n % 64
820 //        int theBit = n & 0x3F;
821 //        long setter = 1L << theBit;
822 //        buf[off + theInt] |= setter;
823 //    }
824 //
825 //    private static void clearBit(long[] buf, int off, int n)
826 //    {
827 //        // theInt = n / 64
828 //        int theInt = n >>> 6;
829 //        // theBit = n % 64
830 //        int theBit = n & 0x3F;
831 //        long setter = 1L << theBit;
832 //        buf[off + theInt] &= ~setter;
833 //    }
834 
multiplyWord(long a, long[] b, int bLen, long[] c, int cOff)835     private static void multiplyWord(long a, long[] b, int bLen, long[] c, int cOff)
836     {
837         if ((a & 1L) != 0L)
838         {
839             add(c, cOff, b, 0, bLen);
840         }
841         int k = 1;
842         while ((a >>>= 1) != 0L)
843         {
844             if ((a & 1L) != 0L)
845             {
846                 long carry = addShiftedUp(c, cOff, b, 0, bLen, k);
847                 if (carry != 0L)
848                 {
849                     c[cOff + bLen] ^= carry;
850                 }
851             }
852             ++k;
853         }
854     }
855 
modMultiplyLD(LongArray other, int m, int[] ks)856     public LongArray modMultiplyLD(LongArray other, int m, int[] ks)
857     {
858         /*
859          * Find out the degree of each argument and handle the zero cases
860          */
861         int aDeg = degree();
862         if (aDeg == 0)
863         {
864             return this;
865         }
866         int bDeg = other.degree();
867         if (bDeg == 0)
868         {
869             return other;
870         }
871 
872         /*
873          * Swap if necessary so that A is the smaller argument
874          */
875         LongArray A = this, B = other;
876         if (aDeg > bDeg)
877         {
878             A = other; B = this;
879             int tmp = aDeg; aDeg = bDeg; bDeg = tmp;
880         }
881 
882         /*
883          * Establish the word lengths of the arguments and result
884          */
885         int aLen = (aDeg + 63) >>> 6;
886         int bLen = (bDeg + 63) >>> 6;
887         int cLen = (aDeg + bDeg + 62) >>> 6;
888 
889         if (aLen == 1)
890         {
891             long a0 = A.m_ints[0];
892             if (a0 == 1L)
893             {
894                 return B;
895             }
896 
897             /*
898              * Fast path for small A, with performance dependent only on the number of set bits
899              */
900             long[] c0 = new long[cLen];
901             multiplyWord(a0, B.m_ints, bLen, c0, 0);
902 
903             /*
904              * Reduce the raw answer against the reduction coefficients
905              */
906             return reduceResult(c0, 0, cLen, m, ks);
907         }
908 
909         /*
910          * Determine if B will get bigger during shifting
911          */
912         int bMax = (bDeg + 7 + 63) >>> 6;
913 
914         /*
915          * Lookup table for the offset of each B in the tables
916          */
917         int[] ti = new int[16];
918 
919         /*
920          * Precompute table of all 4-bit products of B
921          */
922         long[] T0 = new long[bMax << 4];
923         int tOff = bMax;
924         ti[1] = tOff;
925         System.arraycopy(B.m_ints, 0, T0, tOff, bLen);
926         for (int i = 2; i < 16; ++i)
927         {
928             ti[i] = (tOff += bMax);
929             if ((i & 1) == 0)
930             {
931                 shiftUp(T0, tOff >>> 1, T0, tOff, bMax, 1);
932             }
933             else
934             {
935                 add(T0, bMax, T0, tOff - bMax, T0, tOff, bMax);
936             }
937         }
938 
939         /*
940          * Second table with all 4-bit products of B shifted 4 bits
941          */
942         long[] T1 = new long[T0.length];
943         shiftUp(T0, 0, T1, 0, T0.length, 4);
944 //        shiftUp(T0, bMax, T1, bMax, tOff, 4);
945 
946         long[] a = A.m_ints;
947         long[] c = new long[cLen];
948 
949         int MASK = 0xF;
950 
951         /*
952          * Lopez-Dahab algorithm
953          */
954 
955         for (int k = 56; k >= 0; k -= 8)
956         {
957             for (int j = 1; j < aLen; j += 2)
958             {
959                 int aVal = (int)(a[j] >>> k);
960                 int u = aVal & MASK;
961                 int v = (aVal >>> 4) & MASK;
962                 addBoth(c, j - 1, T0, ti[u], T1, ti[v], bMax);
963             }
964             shiftUp(c, 0, cLen, 8);
965         }
966 
967         for (int k = 56; k >= 0; k -= 8)
968         {
969             for (int j = 0; j < aLen; j += 2)
970             {
971                 int aVal = (int)(a[j] >>> k);
972                 int u = aVal & MASK;
973                 int v = (aVal >>> 4) & MASK;
974                 addBoth(c, j, T0, ti[u], T1, ti[v], bMax);
975             }
976             if (k > 0)
977             {
978                 shiftUp(c, 0, cLen, 8);
979             }
980         }
981 
982         /*
983          * Finally the raw answer is collected, reduce it against the reduction coefficients
984          */
985         return reduceResult(c, 0, cLen, m, ks);
986     }
987 
modMultiply(LongArray other, int m, int[] ks)988     public LongArray modMultiply(LongArray other, int m, int[] ks)
989     {
990         /*
991          * Find out the degree of each argument and handle the zero cases
992          */
993         int aDeg = degree();
994         if (aDeg == 0)
995         {
996             return this;
997         }
998         int bDeg = other.degree();
999         if (bDeg == 0)
1000         {
1001             return other;
1002         }
1003 
1004         /*
1005          * Swap if necessary so that A is the smaller argument
1006          */
1007         LongArray A = this, B = other;
1008         if (aDeg > bDeg)
1009         {
1010             A = other; B = this;
1011             int tmp = aDeg; aDeg = bDeg; bDeg = tmp;
1012         }
1013 
1014         /*
1015          * Establish the word lengths of the arguments and result
1016          */
1017         int aLen = (aDeg + 63) >>> 6;
1018         int bLen = (bDeg + 63) >>> 6;
1019         int cLen = (aDeg + bDeg + 62) >>> 6;
1020 
1021         if (aLen == 1)
1022         {
1023             long a0 = A.m_ints[0];
1024             if (a0 == 1L)
1025             {
1026                 return B;
1027             }
1028 
1029             /*
1030              * Fast path for small A, with performance dependent only on the number of set bits
1031              */
1032             long[] c0 = new long[cLen];
1033             multiplyWord(a0, B.m_ints, bLen, c0, 0);
1034 
1035             /*
1036              * Reduce the raw answer against the reduction coefficients
1037              */
1038             return reduceResult(c0, 0, cLen, m, ks);
1039         }
1040 
1041         /*
1042          * Determine if B will get bigger during shifting
1043          */
1044         int bMax = (bDeg + 7 + 63) >>> 6;
1045 
1046         /*
1047          * Lookup table for the offset of each B in the tables
1048          */
1049         int[] ti = new int[16];
1050 
1051         /*
1052          * Precompute table of all 4-bit products of B
1053          */
1054         long[] T0 = new long[bMax << 4];
1055         int tOff = bMax;
1056         ti[1] = tOff;
1057         System.arraycopy(B.m_ints, 0, T0, tOff, bLen);
1058         for (int i = 2; i < 16; ++i)
1059         {
1060             ti[i] = (tOff += bMax);
1061             if ((i & 1) == 0)
1062             {
1063                 shiftUp(T0, tOff >>> 1, T0, tOff, bMax, 1);
1064             }
1065             else
1066             {
1067                 add(T0, bMax, T0, tOff - bMax, T0, tOff, bMax);
1068             }
1069         }
1070 
1071         /*
1072          * Second table with all 4-bit products of B shifted 4 bits
1073          */
1074         long[] T1 = new long[T0.length];
1075         shiftUp(T0, 0, T1, 0, T0.length, 4);
1076 //        shiftUp(T0, bMax, T1, bMax, tOff, 4);
1077 
1078         long[] a = A.m_ints;
1079         long[] c = new long[cLen << 3];
1080 
1081         int MASK = 0xF;
1082 
1083         /*
1084          * Lopez-Dahab (Modified) algorithm
1085          */
1086 
1087         for (int aPos = 0; aPos < aLen; ++aPos)
1088         {
1089             long aVal = a[aPos];
1090             int cOff = aPos;
1091             for (;;)
1092             {
1093                 int u = (int)aVal & MASK;
1094                 aVal >>>= 4;
1095                 int v = (int)aVal & MASK;
1096                 addBoth(c, cOff, T0, ti[u], T1, ti[v], bMax);
1097                 aVal >>>= 4;
1098                 if (aVal == 0L)
1099                 {
1100                     break;
1101                 }
1102                 cOff += cLen;
1103             }
1104         }
1105 
1106         {
1107             int cOff = c.length;
1108             while ((cOff -= cLen) != 0)
1109             {
1110                 addShiftedUp(c, cOff - cLen, c, cOff, cLen, 8);
1111             }
1112         }
1113 
1114         /*
1115          * Finally the raw answer is collected, reduce it against the reduction coefficients
1116          */
1117         return reduceResult(c, 0, cLen, m, ks);
1118     }
1119 
modMultiplyAlt(LongArray other, int m, int[] ks)1120     public LongArray modMultiplyAlt(LongArray other, int m, int[] ks)
1121     {
1122         /*
1123          * Find out the degree of each argument and handle the zero cases
1124          */
1125         int aDeg = degree();
1126         if (aDeg == 0)
1127         {
1128             return this;
1129         }
1130         int bDeg = other.degree();
1131         if (bDeg == 0)
1132         {
1133             return other;
1134         }
1135 
1136         /*
1137          * Swap if necessary so that A is the smaller argument
1138          */
1139         LongArray A = this, B = other;
1140         if (aDeg > bDeg)
1141         {
1142             A = other; B = this;
1143             int tmp = aDeg; aDeg = bDeg; bDeg = tmp;
1144         }
1145 
1146         /*
1147          * Establish the word lengths of the arguments and result
1148          */
1149         int aLen = (aDeg + 63) >>> 6;
1150         int bLen = (bDeg + 63) >>> 6;
1151         int cLen = (aDeg + bDeg + 62) >>> 6;
1152 
1153         if (aLen == 1)
1154         {
1155             long a0 = A.m_ints[0];
1156             if (a0 == 1L)
1157             {
1158                 return B;
1159             }
1160 
1161             /*
1162              * Fast path for small A, with performance dependent only on the number of set bits
1163              */
1164             long[] c0 = new long[cLen];
1165             multiplyWord(a0, B.m_ints, bLen, c0, 0);
1166 
1167             /*
1168              * Reduce the raw answer against the reduction coefficients
1169              */
1170             return reduceResult(c0, 0, cLen, m, ks);
1171         }
1172 
1173         // NOTE: This works, but is slower than width 4 processing
1174 //        if (aLen == 2)
1175 //        {
1176 //            /*
1177 //             * Use common-multiplicand optimization to save ~1/4 of the adds
1178 //             */
1179 //            long a1 = A.m_ints[0], a2 = A.m_ints[1];
1180 //            long aa = a1 & a2; a1 ^= aa; a2 ^= aa;
1181 //
1182 //            long[] b = B.m_ints;
1183 //            long[] c = new long[cLen];
1184 //            multiplyWord(aa, b, bLen, c, 1);
1185 //            add(c, 0, c, 1, cLen - 1);
1186 //            multiplyWord(a1, b, bLen, c, 0);
1187 //            multiplyWord(a2, b, bLen, c, 1);
1188 //
1189 //            /*
1190 //             * Reduce the raw answer against the reduction coefficients
1191 //             */
1192 //            return reduceResult(c, 0, cLen, m, ks);
1193 //        }
1194 
1195         /*
1196          * Determine the parameters of the interleaved window algorithm: the 'width' in bits to
1197          * process together, the number of evaluation 'positions' implied by that width, and the
1198          * 'top' position at which the regular window algorithm stops.
1199          */
1200         int width, positions, top, banks;
1201 
1202         // NOTE: width 4 is the fastest over the entire range of sizes used in current crypto
1203 //        width = 1; positions = 64; top = 64; banks = 4;
1204 //        width = 2; positions = 32; top = 64; banks = 4;
1205 //        width = 3; positions = 21; top = 63; banks = 3;
1206         width = 4; positions = 16; top = 64; banks = 8;
1207 //        width = 5; positions = 13; top = 65; banks = 7;
1208 //        width = 7; positions = 9; top = 63; banks = 9;
1209 //        width = 8; positions = 8; top = 64; banks = 8;
1210 
1211         /*
1212          * Determine if B will get bigger during shifting
1213          */
1214         int shifts = top < 64 ? positions : positions - 1;
1215         int bMax = (bDeg + shifts + 63) >>> 6;
1216 
1217         int bTotal = bMax * banks, stride = width * banks;
1218 
1219         /*
1220          * Create a single temporary buffer, with an offset table to find the positions of things in it
1221          */
1222         int[] ci = new int[1 << width];
1223         int cTotal = aLen;
1224         {
1225             ci[0] = cTotal;
1226             cTotal += bTotal;
1227             ci[1] = cTotal;
1228             for (int i = 2; i < ci.length; ++i)
1229             {
1230                 cTotal += cLen;
1231                 ci[i] = cTotal;
1232             }
1233             cTotal += cLen;
1234         }
1235         // NOTE: Provide a safe dump for "high zeroes" since we are adding 'bMax' and not 'bLen'
1236         ++cTotal;
1237 
1238         long[] c = new long[cTotal];
1239 
1240         // Prepare A in interleaved form, according to the chosen width
1241         interleave(A.m_ints, 0, c, 0, aLen, width);
1242 
1243         // Make a working copy of B, since we will be shifting it
1244         {
1245             int bOff = aLen;
1246             System.arraycopy(B.m_ints, 0, c, bOff, bLen);
1247             for (int bank = 1; bank < banks; ++bank)
1248             {
1249                 shiftUp(c, aLen, c, bOff += bMax, bMax, bank);
1250             }
1251         }
1252 
1253         /*
1254          * The main loop analyzes the interleaved windows in A, and for each non-zero window
1255          * a single word-array XOR is performed to a carefully selected slice of 'c'. The loop is
1256          * breadth-first, checking the lowest window in each word, then looping again for the
1257          * next higher window position.
1258          */
1259         int MASK = (1 << width) - 1;
1260 
1261         int k = 0;
1262         for (;;)
1263         {
1264             int aPos = 0;
1265             do
1266             {
1267                 long aVal = c[aPos] >>> k;
1268                 int bank = 0, bOff = aLen;
1269                 for (;;)
1270                 {
1271                     int index = (int)(aVal) & MASK;
1272                     if (index != 0)
1273                     {
1274                         /*
1275                          * Add to a 'c' buffer based on the bit-pattern of 'index'. Since A is in
1276                          * interleaved form, the bits represent the current B shifted by 0, 'positions',
1277                          * 'positions' * 2, ..., 'positions' * ('width' - 1)
1278                          */
1279                         add(c, aPos + ci[index], c, bOff, bMax);
1280                     }
1281                     if (++bank == banks)
1282                     {
1283                         break;
1284                     }
1285                     bOff += bMax;
1286                     aVal >>>= width;
1287                 }
1288             }
1289             while (++aPos < aLen);
1290 
1291             if ((k += stride) >= top)
1292             {
1293                 if (k >= 64)
1294                 {
1295                     break;
1296                 }
1297 
1298                 /*
1299                  * Adjustment for window setups with top == 63, the final bit (if any) is processed
1300                  * as the top-bit of a window
1301                  */
1302                 k = 64 - width;
1303                 MASK &= MASK << (top - k);
1304             }
1305 
1306             /*
1307              * After each position has been checked for all words of A, B is shifted up 1 place
1308              */
1309             shiftUp(c, aLen, bTotal, banks);
1310         }
1311 
1312         int ciPos = ci.length;
1313         while (--ciPos > 1)
1314         {
1315             if ((ciPos & 1L) == 0L)
1316             {
1317                 /*
1318                  * For even numbers, shift contents and add to the half-position
1319                  */
1320                 addShiftedUp(c, ci[ciPos >>> 1], c, ci[ciPos], cLen, positions);
1321             }
1322             else
1323             {
1324                 /*
1325                  * For odd numbers, 'distribute' contents to the result and the next-lowest position
1326                  */
1327                 distribute(c, ci[ciPos], ci[ciPos - 1], ci[1], cLen);
1328             }
1329         }
1330 
1331         /*
1332          * Finally the raw answer is collected, reduce it against the reduction coefficients
1333          */
1334         return reduceResult(c, ci[1], cLen, m, ks);
1335     }
1336 
modReduce(int m, int[] ks)1337     public LongArray modReduce(int m, int[] ks)
1338     {
1339         long[] buf = Arrays.clone(m_ints);
1340         int rLen = reduceInPlace(buf, 0, buf.length, m, ks);
1341         return new LongArray(buf, 0, rLen);
1342     }
1343 
multiply(LongArray other, int m, int[] ks)1344     public LongArray multiply(LongArray other, int m, int[] ks)
1345     {
1346         /*
1347          * Find out the degree of each argument and handle the zero cases
1348          */
1349         int aDeg = degree();
1350         if (aDeg == 0)
1351         {
1352             return this;
1353         }
1354         int bDeg = other.degree();
1355         if (bDeg == 0)
1356         {
1357             return other;
1358         }
1359 
1360         /*
1361          * Swap if necessary so that A is the smaller argument
1362          */
1363         LongArray A = this, B = other;
1364         if (aDeg > bDeg)
1365         {
1366             A = other; B = this;
1367             int tmp = aDeg; aDeg = bDeg; bDeg = tmp;
1368         }
1369 
1370         /*
1371          * Establish the word lengths of the arguments and result
1372          */
1373         int aLen = (aDeg + 63) >>> 6;
1374         int bLen = (bDeg + 63) >>> 6;
1375         int cLen = (aDeg + bDeg + 62) >>> 6;
1376 
1377         if (aLen == 1)
1378         {
1379             long a0 = A.m_ints[0];
1380             if (a0 == 1L)
1381             {
1382                 return B;
1383             }
1384 
1385             /*
1386              * Fast path for small A, with performance dependent only on the number of set bits
1387              */
1388             long[] c0 = new long[cLen];
1389             multiplyWord(a0, B.m_ints, bLen, c0, 0);
1390 
1391             /*
1392              * Reduce the raw answer against the reduction coefficients
1393              */
1394 //            return reduceResult(c0, 0, cLen, m, ks);
1395             return new LongArray(c0, 0, cLen);
1396         }
1397 
1398         /*
1399          * Determine if B will get bigger during shifting
1400          */
1401         int bMax = (bDeg + 7 + 63) >>> 6;
1402 
1403         /*
1404          * Lookup table for the offset of each B in the tables
1405          */
1406         int[] ti = new int[16];
1407 
1408         /*
1409          * Precompute table of all 4-bit products of B
1410          */
1411         long[] T0 = new long[bMax << 4];
1412         int tOff = bMax;
1413         ti[1] = tOff;
1414         System.arraycopy(B.m_ints, 0, T0, tOff, bLen);
1415         for (int i = 2; i < 16; ++i)
1416         {
1417             ti[i] = (tOff += bMax);
1418             if ((i & 1) == 0)
1419             {
1420                 shiftUp(T0, tOff >>> 1, T0, tOff, bMax, 1);
1421             }
1422             else
1423             {
1424                 add(T0, bMax, T0, tOff - bMax, T0, tOff, bMax);
1425             }
1426         }
1427 
1428         /*
1429          * Second table with all 4-bit products of B shifted 4 bits
1430          */
1431         long[] T1 = new long[T0.length];
1432         shiftUp(T0, 0, T1, 0, T0.length, 4);
1433 //        shiftUp(T0, bMax, T1, bMax, tOff, 4);
1434 
1435         long[] a = A.m_ints;
1436         long[] c = new long[cLen << 3];
1437 
1438         int MASK = 0xF;
1439 
1440         /*
1441          * Lopez-Dahab (Modified) algorithm
1442          */
1443 
1444         for (int aPos = 0; aPos < aLen; ++aPos)
1445         {
1446             long aVal = a[aPos];
1447             int cOff = aPos;
1448             for (;;)
1449             {
1450                 int u = (int)aVal & MASK;
1451                 aVal >>>= 4;
1452                 int v = (int)aVal & MASK;
1453                 addBoth(c, cOff, T0, ti[u], T1, ti[v], bMax);
1454                 aVal >>>= 4;
1455                 if (aVal == 0L)
1456                 {
1457                     break;
1458                 }
1459                 cOff += cLen;
1460             }
1461         }
1462 
1463         {
1464             int cOff = c.length;
1465             while ((cOff -= cLen) != 0)
1466             {
1467                 addShiftedUp(c, cOff - cLen, c, cOff, cLen, 8);
1468             }
1469         }
1470 
1471         /*
1472          * Finally the raw answer is collected, reduce it against the reduction coefficients
1473          */
1474 //        return reduceResult(c, 0, cLen, m, ks);
1475         return new LongArray(c, 0, cLen);
1476     }
1477 
reduce(int m, int[] ks)1478     public void reduce(int m, int[] ks)
1479     {
1480         long[] buf = m_ints;
1481         int rLen = reduceInPlace(buf, 0, buf.length, m, ks);
1482         if (rLen < buf.length)
1483         {
1484             m_ints = new long[rLen];
1485             System.arraycopy(buf, 0, m_ints, 0, rLen);
1486         }
1487     }
1488 
reduceResult(long[] buf, int off, int len, int m, int[] ks)1489     private static LongArray reduceResult(long[] buf, int off, int len, int m, int[] ks)
1490     {
1491         int rLen = reduceInPlace(buf, off, len, m, ks);
1492         return new LongArray(buf, off, rLen);
1493     }
1494 
1495 //    private static void deInterleave(long[] x, int xOff, long[] z, int zOff, int count, int rounds)
1496 //    {
1497 //        for (int i = 0; i < count; ++i)
1498 //        {
1499 //            z[zOff + i] = deInterleave(x[zOff + i], rounds);
1500 //        }
1501 //    }
1502 //
1503 //    private static long deInterleave(long x, int rounds)
1504 //    {
1505 //        while (--rounds >= 0)
1506 //        {
1507 //            x = deInterleave32(x & DEINTERLEAVE_MASK) | (deInterleave32((x >>> 1) & DEINTERLEAVE_MASK) << 32);
1508 //        }
1509 //        return x;
1510 //    }
1511 //
1512 //    private static long deInterleave32(long x)
1513 //    {
1514 //        x = (x | (x >>> 1)) & 0x3333333333333333L;
1515 //        x = (x | (x >>> 2)) & 0x0F0F0F0F0F0F0F0FL;
1516 //        x = (x | (x >>> 4)) & 0x00FF00FF00FF00FFL;
1517 //        x = (x | (x >>> 8)) & 0x0000FFFF0000FFFFL;
1518 //        x = (x | (x >>> 16)) & 0x00000000FFFFFFFFL;
1519 //        return x;
1520 //    }
1521 
reduceInPlace(long[] buf, int off, int len, int m, int[] ks)1522     private static int reduceInPlace(long[] buf, int off, int len, int m, int[] ks)
1523     {
1524         int mLen = (m + 63) >>> 6;
1525         if (len < mLen)
1526         {
1527             return len;
1528         }
1529 
1530         int numBits = Math.min(len << 6, (m << 1) - 1); // TODO use actual degree?
1531         int excessBits = (len << 6) - numBits;
1532         while (excessBits >= 64)
1533         {
1534             --len;
1535             excessBits -= 64;
1536         }
1537 
1538         int kLen = ks.length, kMax = ks[kLen - 1], kNext = kLen > 1 ? ks[kLen - 2] : 0;
1539         int wordWiseLimit = Math.max(m, kMax + 64);
1540         int vectorableWords = (excessBits + Math.min(numBits - wordWiseLimit, m - kNext)) >> 6;
1541         if (vectorableWords > 1)
1542         {
1543             int vectorWiseWords = len - vectorableWords;
1544             reduceVectorWise(buf, off, len, vectorWiseWords, m, ks);
1545             while (len > vectorWiseWords)
1546             {
1547                 buf[off + --len] = 0L;
1548             }
1549             numBits = vectorWiseWords << 6;
1550         }
1551 
1552         if (numBits > wordWiseLimit)
1553         {
1554             reduceWordWise(buf, off, len, wordWiseLimit, m, ks);
1555             numBits = wordWiseLimit;
1556         }
1557 
1558         if (numBits > m)
1559         {
1560             reduceBitWise(buf, off, numBits, m, ks);
1561         }
1562 
1563         return mLen;
1564     }
1565 
reduceBitWise(long[] buf, int off, int bitlength, int m, int[] ks)1566     private static void reduceBitWise(long[] buf, int off, int bitlength, int m, int[] ks)
1567     {
1568         while (--bitlength >= m)
1569         {
1570             if (testBit(buf, off, bitlength))
1571             {
1572                 reduceBit(buf, off, bitlength, m, ks);
1573             }
1574         }
1575     }
1576 
reduceBit(long[] buf, int off, int bit, int m, int[] ks)1577     private static void reduceBit(long[] buf, int off, int bit, int m, int[] ks)
1578     {
1579         flipBit(buf, off, bit);
1580         int n = bit - m;
1581         int j = ks.length;
1582         while (--j >= 0)
1583         {
1584             flipBit(buf, off, ks[j] + n);
1585         }
1586         flipBit(buf, off, n);
1587     }
1588 
reduceWordWise(long[] buf, int off, int len, int toBit, int m, int[] ks)1589     private static void reduceWordWise(long[] buf, int off, int len, int toBit, int m, int[] ks)
1590     {
1591         int toPos = toBit >>> 6;
1592 
1593         while (--len > toPos)
1594         {
1595             long word = buf[off + len];
1596             if (word != 0)
1597             {
1598                 buf[off + len] = 0;
1599                 reduceWord(buf, off, (len << 6), word, m, ks);
1600             }
1601         }
1602 
1603         {
1604             int partial = toBit & 0x3F;
1605             long word = buf[off + toPos] >>> partial;
1606             if (word != 0)
1607             {
1608                 buf[off + toPos] ^= word << partial;
1609                 reduceWord(buf, off, toBit, word, m, ks);
1610             }
1611         }
1612     }
1613 
reduceWord(long[] buf, int off, int bit, long word, int m, int[] ks)1614     private static void reduceWord(long[] buf, int off, int bit, long word, int m, int[] ks)
1615     {
1616         int offset = bit - m;
1617         int j = ks.length;
1618         while (--j >= 0)
1619         {
1620             flipWord(buf, off, offset + ks[j], word);
1621         }
1622         flipWord(buf, off, offset, word);
1623     }
1624 
reduceVectorWise(long[] buf, int off, int len, int words, int m, int[] ks)1625     private static void reduceVectorWise(long[] buf, int off, int len, int words, int m, int[] ks)
1626     {
1627         /*
1628          * NOTE: It's important we go from highest coefficient to lowest, because for the highest
1629          * one (only) we allow the ranges to partially overlap, and therefore any changes must take
1630          * effect for the subsequent lower coefficients.
1631          */
1632         int baseBit = (words << 6) - m;
1633         int j = ks.length;
1634         while (--j >= 0)
1635         {
1636             flipVector(buf, off, buf, off + words, len - words, baseBit + ks[j]);
1637         }
1638         flipVector(buf, off, buf, off + words, len - words, baseBit);
1639     }
1640 
flipVector(long[] x, int xOff, long[] y, int yOff, int yLen, int bits)1641     private static void flipVector(long[] x, int xOff, long[] y, int yOff, int yLen, int bits)
1642     {
1643         xOff += bits >>> 6;
1644         bits &= 0x3F;
1645 
1646         if (bits == 0)
1647         {
1648             add(x, xOff, y, yOff, yLen);
1649         }
1650         else
1651         {
1652             long carry = addShiftedDown(x, xOff + 1, y, yOff, yLen, 64 - bits);
1653             x[xOff] ^= carry;
1654         }
1655     }
1656 
modSquare(int m, int[] ks)1657     public LongArray modSquare(int m, int[] ks)
1658     {
1659         int len = getUsedLength();
1660         if (len == 0)
1661         {
1662             return this;
1663         }
1664 
1665         int _2len = len << 1;
1666         long[] r = new long[_2len];
1667 
1668         int pos = 0;
1669         while (pos < _2len)
1670         {
1671             long mi = m_ints[pos >>> 1];
1672             r[pos++] = interleave2_32to64((int)mi);
1673             r[pos++] = interleave2_32to64((int)(mi >>> 32));
1674         }
1675 
1676         return new LongArray(r, 0, reduceInPlace(r, 0, r.length, m, ks));
1677     }
1678 
modSquareN(int n, int m, int[] ks)1679     public LongArray modSquareN(int n, int m, int[] ks)
1680     {
1681         int len = getUsedLength();
1682         if (len == 0)
1683         {
1684             return this;
1685         }
1686 
1687         int mLen = (m + 63) >>> 6;
1688         long[] r = new long[mLen << 1];
1689         System.arraycopy(m_ints, 0, r, 0, len);
1690 
1691         while (--n >= 0)
1692         {
1693             squareInPlace(r, len, m, ks);
1694             len = reduceInPlace(r, 0, r.length, m, ks);
1695         }
1696 
1697         return new LongArray(r, 0, len);
1698     }
1699 
square(int m, int[] ks)1700     public LongArray square(int m, int[] ks)
1701     {
1702         int len = getUsedLength();
1703         if (len == 0)
1704         {
1705             return this;
1706         }
1707 
1708         int _2len = len << 1;
1709         long[] r = new long[_2len];
1710 
1711         int pos = 0;
1712         while (pos < _2len)
1713         {
1714             long mi = m_ints[pos >>> 1];
1715             r[pos++] = interleave2_32to64((int)mi);
1716             r[pos++] = interleave2_32to64((int)(mi >>> 32));
1717         }
1718 
1719         return new LongArray(r, 0, r.length);
1720     }
1721 
squareInPlace(long[] x, int xLen, int m, int[] ks)1722     private static void squareInPlace(long[] x, int xLen, int m, int[] ks)
1723     {
1724         int pos = xLen << 1;
1725         while (--xLen >= 0)
1726         {
1727             long xVal = x[xLen];
1728             x[--pos] = interleave2_32to64((int)(xVal >>> 32));
1729             x[--pos] = interleave2_32to64((int)xVal);
1730         }
1731     }
1732 
interleave(long[] x, int xOff, long[] z, int zOff, int count, int width)1733     private static void interleave(long[] x, int xOff, long[] z, int zOff, int count, int width)
1734     {
1735         switch (width)
1736         {
1737         case 3:
1738             interleave3(x, xOff, z, zOff, count);
1739             break;
1740         case 5:
1741             interleave5(x, xOff, z, zOff, count);
1742             break;
1743         case 7:
1744             interleave7(x, xOff, z, zOff, count);
1745             break;
1746         default:
1747             interleave2_n(x, xOff, z, zOff, count, bitLengths[width] - 1);
1748             break;
1749         }
1750     }
1751 
interleave3(long[] x, int xOff, long[] z, int zOff, int count)1752     private static void interleave3(long[] x, int xOff, long[] z, int zOff, int count)
1753     {
1754         for (int i = 0; i < count; ++i)
1755         {
1756             z[zOff + i] = interleave3(x[xOff + i]);
1757         }
1758     }
1759 
interleave3(long x)1760     private static long interleave3(long x)
1761     {
1762         long z = x & (1L << 63);
1763         return z
1764             | interleave3_21to63((int)x & 0x1FFFFF)
1765             | interleave3_21to63((int)(x >>> 21) & 0x1FFFFF) << 1
1766             | interleave3_21to63((int)(x >>> 42) & 0x1FFFFF) << 2;
1767 
1768 //        int zPos = 0, wPos = 0, xPos = 0;
1769 //        for (;;)
1770 //        {
1771 //            z |= ((x >>> xPos) & 1L) << zPos;
1772 //            if (++zPos == 63)
1773 //            {
1774 //                String sz2 = Long.toBinaryString(z);
1775 //                return z;
1776 //            }
1777 //            if ((xPos += 21) >= 63)
1778 //            {
1779 //                xPos = ++wPos;
1780 //            }
1781 //        }
1782     }
1783 
interleave3_21to63(int x)1784     private static long interleave3_21to63(int x)
1785     {
1786         int r00 = INTERLEAVE3_TABLE[x & 0x7F];
1787         int r21 = INTERLEAVE3_TABLE[(x >>> 7) & 0x7F];
1788         int r42 = INTERLEAVE3_TABLE[x >>> 14];
1789         return (r42 & 0xFFFFFFFFL) << 42 | (r21 & 0xFFFFFFFFL) << 21 | (r00 & 0xFFFFFFFFL);
1790     }
1791 
interleave5(long[] x, int xOff, long[] z, int zOff, int count)1792     private static void interleave5(long[] x, int xOff, long[] z, int zOff, int count)
1793     {
1794         for (int i = 0; i < count; ++i)
1795         {
1796             z[zOff + i] = interleave5(x[xOff + i]);
1797         }
1798     }
1799 
interleave5(long x)1800     private static long interleave5(long x)
1801     {
1802         return interleave3_13to65((int)x & 0x1FFF)
1803             | interleave3_13to65((int)(x >>> 13) & 0x1FFF) << 1
1804             | interleave3_13to65((int)(x >>> 26) & 0x1FFF) << 2
1805             | interleave3_13to65((int)(x >>> 39) & 0x1FFF) << 3
1806             | interleave3_13to65((int)(x >>> 52) & 0x1FFF) << 4;
1807 
1808 //        long z = 0;
1809 //        int zPos = 0, wPos = 0, xPos = 0;
1810 //        for (;;)
1811 //        {
1812 //            z |= ((x >>> xPos) & 1L) << zPos;
1813 //            if (++zPos == 64)
1814 //            {
1815 //                return z;
1816 //            }
1817 //            if ((xPos += 13) >= 64)
1818 //            {
1819 //                xPos = ++wPos;
1820 //            }
1821 //        }
1822     }
1823 
interleave3_13to65(int x)1824     private static long interleave3_13to65(int x)
1825     {
1826         int r00 = INTERLEAVE5_TABLE[x & 0x7F];
1827         int r35 = INTERLEAVE5_TABLE[x >>> 7];
1828         return (r35 & 0xFFFFFFFFL) << 35 | (r00 & 0xFFFFFFFFL);
1829     }
1830 
interleave7(long[] x, int xOff, long[] z, int zOff, int count)1831     private static void interleave7(long[] x, int xOff, long[] z, int zOff, int count)
1832     {
1833         for (int i = 0; i < count; ++i)
1834         {
1835             z[zOff + i] = interleave7(x[xOff + i]);
1836         }
1837     }
1838 
interleave7(long x)1839     private static long interleave7(long x)
1840     {
1841         long z = x & (1L << 63);
1842         return z
1843             | INTERLEAVE7_TABLE[(int)x & 0x1FF]
1844             | INTERLEAVE7_TABLE[(int)(x >>> 9) & 0x1FF] << 1
1845             | INTERLEAVE7_TABLE[(int)(x >>> 18) & 0x1FF] << 2
1846             | INTERLEAVE7_TABLE[(int)(x >>> 27) & 0x1FF] << 3
1847             | INTERLEAVE7_TABLE[(int)(x >>> 36) & 0x1FF] << 4
1848             | INTERLEAVE7_TABLE[(int)(x >>> 45) & 0x1FF] << 5
1849             | INTERLEAVE7_TABLE[(int)(x >>> 54) & 0x1FF] << 6;
1850 
1851 //        int zPos = 0, wPos = 0, xPos = 0;
1852 //        for (;;)
1853 //        {
1854 //            z |= ((x >>> xPos) & 1L) << zPos;
1855 //            if (++zPos == 63)
1856 //            {
1857 //                return z;
1858 //            }
1859 //            if ((xPos += 9) >= 63)
1860 //            {
1861 //                xPos = ++wPos;
1862 //            }
1863 //        }
1864     }
1865 
interleave2_n(long[] x, int xOff, long[] z, int zOff, int count, int rounds)1866     private static void interleave2_n(long[] x, int xOff, long[] z, int zOff, int count, int rounds)
1867     {
1868         for (int i = 0; i < count; ++i)
1869         {
1870             z[zOff + i] = interleave2_n(x[xOff + i], rounds);
1871         }
1872     }
1873 
interleave2_n(long x, int rounds)1874     private static long interleave2_n(long x, int rounds)
1875     {
1876         while (rounds > 1)
1877         {
1878             rounds -= 2;
1879             x = interleave4_16to64((int)x & 0xFFFF)
1880                 | interleave4_16to64((int)(x >>> 16) & 0xFFFF) << 1
1881                 | interleave4_16to64((int)(x >>> 32) & 0xFFFF) << 2
1882                 | interleave4_16to64((int)(x >>> 48) & 0xFFFF) << 3;
1883         }
1884         if (rounds > 0)
1885         {
1886             x = interleave2_32to64((int)x) | interleave2_32to64((int)(x >>> 32)) << 1;
1887         }
1888         return x;
1889     }
1890 
interleave4_16to64(int x)1891     private static long interleave4_16to64(int x)
1892     {
1893         int r00 = INTERLEAVE4_TABLE[x & 0xFF];
1894         int r32 = INTERLEAVE4_TABLE[x >>> 8];
1895         return (r32 & 0xFFFFFFFFL) << 32 | (r00 & 0xFFFFFFFFL);
1896     }
1897 
interleave2_32to64(int x)1898     private static long interleave2_32to64(int x)
1899     {
1900         int r00 = INTERLEAVE2_TABLE[x & 0xFF] | INTERLEAVE2_TABLE[(x >>> 8) & 0xFF] << 16;
1901         int r32 = INTERLEAVE2_TABLE[(x >>> 16) & 0xFF] | INTERLEAVE2_TABLE[x >>> 24] << 16;
1902         return (r32 & 0xFFFFFFFFL) << 32 | (r00 & 0xFFFFFFFFL);
1903     }
1904 
1905 //    private static LongArray expItohTsujii2(LongArray B, int n, int m, int[] ks)
1906 //    {
1907 //        LongArray t1 = B, t3 = new LongArray(new long[]{ 1L });
1908 //        int scale = 1;
1909 //
1910 //        int numTerms = n;
1911 //        while (numTerms > 1)
1912 //        {
1913 //            if ((numTerms & 1) != 0)
1914 //            {
1915 //                t3 = t3.modMultiply(t1, m, ks);
1916 //                t1 = t1.modSquareN(scale, m, ks);
1917 //            }
1918 //
1919 //            LongArray t2 = t1.modSquareN(scale, m, ks);
1920 //            t1 = t1.modMultiply(t2, m, ks);
1921 //            numTerms >>>= 1; scale <<= 1;
1922 //        }
1923 //
1924 //        return t3.modMultiply(t1, m, ks);
1925 //    }
1926 //
1927 //    private static LongArray expItohTsujii23(LongArray B, int n, int m, int[] ks)
1928 //    {
1929 //        LongArray t1 = B, t3 = new LongArray(new long[]{ 1L });
1930 //        int scale = 1;
1931 //
1932 //        int numTerms = n;
1933 //        while (numTerms > 1)
1934 //        {
1935 //            boolean m03 = numTerms % 3 == 0;
1936 //            boolean m14 = !m03 && (numTerms & 1) != 0;
1937 //
1938 //            if (m14)
1939 //            {
1940 //                t3 = t3.modMultiply(t1, m, ks);
1941 //                t1 = t1.modSquareN(scale, m, ks);
1942 //            }
1943 //
1944 //            LongArray t2 = t1.modSquareN(scale, m, ks);
1945 //            t1 = t1.modMultiply(t2, m, ks);
1946 //
1947 //            if (m03)
1948 //            {
1949 //                t2 = t2.modSquareN(scale, m, ks);
1950 //                t1 = t1.modMultiply(t2, m, ks);
1951 //                numTerms /= 3; scale *= 3;
1952 //            }
1953 //            else
1954 //            {
1955 //                numTerms >>>= 1; scale <<= 1;
1956 //            }
1957 //        }
1958 //
1959 //        return t3.modMultiply(t1, m, ks);
1960 //    }
1961 //
1962 //    private static LongArray expItohTsujii235(LongArray B, int n, int m, int[] ks)
1963 //    {
1964 //        LongArray t1 = B, t4 = new LongArray(new long[]{ 1L });
1965 //        int scale = 1;
1966 //
1967 //        int numTerms = n;
1968 //        while (numTerms > 1)
1969 //        {
1970 //            if (numTerms % 5 == 0)
1971 //            {
1972 ////                t1 = expItohTsujii23(t1, 5, m, ks);
1973 //
1974 //                LongArray t3 = t1;
1975 //                t1 = t1.modSquareN(scale, m, ks);
1976 //
1977 //                LongArray t2 = t1.modSquareN(scale, m, ks);
1978 //                t1 = t1.modMultiply(t2, m, ks);
1979 //                t2 = t1.modSquareN(scale << 1, m, ks);
1980 //                t1 = t1.modMultiply(t2, m, ks);
1981 //
1982 //                t1 = t1.modMultiply(t3, m, ks);
1983 //
1984 //                numTerms /= 5; scale *= 5;
1985 //                continue;
1986 //            }
1987 //
1988 //            boolean m03 = numTerms % 3 == 0;
1989 //            boolean m14 = !m03 && (numTerms & 1) != 0;
1990 //
1991 //            if (m14)
1992 //            {
1993 //                t4 = t4.modMultiply(t1, m, ks);
1994 //                t1 = t1.modSquareN(scale, m, ks);
1995 //            }
1996 //
1997 //            LongArray t2 = t1.modSquareN(scale, m, ks);
1998 //            t1 = t1.modMultiply(t2, m, ks);
1999 //
2000 //            if (m03)
2001 //            {
2002 //                t2 = t2.modSquareN(scale, m, ks);
2003 //                t1 = t1.modMultiply(t2, m, ks);
2004 //                numTerms /= 3; scale *= 3;
2005 //            }
2006 //            else
2007 //            {
2008 //                numTerms >>>= 1; scale <<= 1;
2009 //            }
2010 //        }
2011 //
2012 //        return t4.modMultiply(t1, m, ks);
2013 //    }
2014 
modInverse(int m, int[] ks)2015     public LongArray modInverse(int m, int[] ks)
2016     {
2017         /*
2018          * Fermat's Little Theorem
2019          */
2020 //        LongArray A = this;
2021 //        LongArray B = A.modSquare(m, ks);
2022 //        LongArray R0 = B, R1 = B;
2023 //        for (int i = 2; i < m; ++i)
2024 //        {
2025 //            R1 = R1.modSquare(m, ks);
2026 //            R0 = R0.modMultiply(R1, m, ks);
2027 //        }
2028 //
2029 //        return R0;
2030 
2031         /*
2032          * Itoh-Tsujii
2033          */
2034 //        LongArray B = modSquare(m, ks);
2035 //        switch (m)
2036 //        {
2037 //        case 409:
2038 //            return expItohTsujii23(B, m - 1, m, ks);
2039 //        case 571:
2040 //            return expItohTsujii235(B, m - 1, m, ks);
2041 //        case 163:
2042 //        case 233:
2043 //        case 283:
2044 //        default:
2045 //            return expItohTsujii2(B, m - 1, m, ks);
2046 //        }
2047 
2048         /*
2049          * Inversion in F2m using the extended Euclidean algorithm
2050          *
2051          * Input: A nonzero polynomial a(z) of degree at most m-1
2052          * Output: a(z)^(-1) mod f(z)
2053          */
2054         int uzDegree = degree();
2055         if (uzDegree == 0)
2056         {
2057             throw new IllegalStateException();
2058         }
2059         if (uzDegree == 1)
2060         {
2061             return this;
2062         }
2063 
2064         // u(z) := a(z)
2065         LongArray uz = (LongArray)clone();
2066 
2067         int t = (m + 63) >>> 6;
2068 
2069         // v(z) := f(z)
2070         LongArray vz = new LongArray(t);
2071         reduceBit(vz.m_ints, 0, m, m, ks);
2072 
2073         // g1(z) := 1, g2(z) := 0
2074         LongArray g1z = new LongArray(t);
2075         g1z.m_ints[0] = 1L;
2076         LongArray g2z = new LongArray(t);
2077 
2078         int[] uvDeg = new int[]{ uzDegree, m + 1 };
2079         LongArray[] uv = new LongArray[]{ uz, vz };
2080 
2081         int[] ggDeg = new int[]{ 1, 0 };
2082         LongArray[] gg = new LongArray[]{ g1z, g2z };
2083 
2084         int b = 1;
2085         int duv1 = uvDeg[b];
2086         int dgg1 = ggDeg[b];
2087         int j = duv1 - uvDeg[1 - b];
2088 
2089         for (;;)
2090         {
2091             if (j < 0)
2092             {
2093                 j = -j;
2094                 uvDeg[b] = duv1;
2095                 ggDeg[b] = dgg1;
2096                 b = 1 - b;
2097                 duv1 = uvDeg[b];
2098                 dgg1 = ggDeg[b];
2099             }
2100 
2101             uv[b].addShiftedByBitsSafe(uv[1 - b], uvDeg[1 - b], j);
2102 
2103             int duv2 = uv[b].degreeFrom(duv1);
2104             if (duv2 == 0)
2105             {
2106                 return gg[1 - b];
2107             }
2108 
2109             {
2110                 int dgg2 = ggDeg[1 - b];
2111                 gg[b].addShiftedByBitsSafe(gg[1 - b], dgg2, j);
2112                 dgg2 += j;
2113 
2114                 if (dgg2 > dgg1)
2115                 {
2116                     dgg1 = dgg2;
2117                 }
2118                 else if (dgg2 == dgg1)
2119                 {
2120                     dgg1 = gg[b].degreeFrom(dgg1);
2121                 }
2122             }
2123 
2124             j += (duv2 - duv1);
2125             duv1 = duv2;
2126         }
2127     }
2128 
equals(Object o)2129     public boolean equals(Object o)
2130     {
2131         if (!(o instanceof LongArray))
2132         {
2133             return false;
2134         }
2135         LongArray other = (LongArray) o;
2136         int usedLen = getUsedLength();
2137         if (other.getUsedLength() != usedLen)
2138         {
2139             return false;
2140         }
2141         for (int i = 0; i < usedLen; i++)
2142         {
2143             if (m_ints[i] != other.m_ints[i])
2144             {
2145                 return false;
2146             }
2147         }
2148         return true;
2149     }
2150 
hashCode()2151     public int hashCode()
2152     {
2153         int usedLen = getUsedLength();
2154         int hash = 1;
2155         for (int i = 0; i < usedLen; i++)
2156         {
2157             long mi = m_ints[i];
2158             hash *= 31;
2159             hash ^= (int)mi;
2160             hash *= 31;
2161             hash ^= (int)(mi >>> 32);
2162         }
2163         return hash;
2164     }
2165 
clone()2166     public Object clone()
2167     {
2168         return new LongArray(Arrays.clone(m_ints));
2169     }
2170 
toString()2171     public String toString()
2172     {
2173         int i = getUsedLength();
2174         if (i == 0)
2175         {
2176             return "0";
2177         }
2178 
2179         StringBuffer sb = new StringBuffer(Long.toBinaryString(m_ints[--i]));
2180         while (--i >= 0)
2181         {
2182             String s = Long.toBinaryString(m_ints[i]);
2183 
2184             // Add leading zeroes, except for highest significant word
2185             int len = s.length();
2186             if (len < 64)
2187             {
2188                 sb.append(ZEROES.substring(len));
2189             }
2190 
2191             sb.append(s);
2192         }
2193         return sb.toString();
2194     }
2195 }