1 package org.bouncycastle.math.ec; 2 3 import org.bouncycastle.util.Arrays; 4 5 import java.math.BigInteger; 6 7 class LongArray 8 { 9 // private static long DEINTERLEAVE_MASK = 0x5555555555555555L; 10 11 /* 12 * This expands 8 bit indices into 16 bit contents (high bit 14), by inserting 0s between bits. 13 * In a binary field, this operation is the same as squaring an 8 bit number. 14 */ 15 private static final int[] INTERLEAVE2_TABLE = new int[] 16 { 17 0x0000, 0x0001, 0x0004, 0x0005, 0x0010, 0x0011, 0x0014, 0x0015, 18 0x0040, 0x0041, 0x0044, 0x0045, 0x0050, 0x0051, 0x0054, 0x0055, 19 0x0100, 0x0101, 0x0104, 0x0105, 0x0110, 0x0111, 0x0114, 0x0115, 20 0x0140, 0x0141, 0x0144, 0x0145, 0x0150, 0x0151, 0x0154, 0x0155, 21 0x0400, 0x0401, 0x0404, 0x0405, 0x0410, 0x0411, 0x0414, 0x0415, 22 0x0440, 0x0441, 0x0444, 0x0445, 0x0450, 0x0451, 0x0454, 0x0455, 23 0x0500, 0x0501, 0x0504, 0x0505, 0x0510, 0x0511, 0x0514, 0x0515, 24 0x0540, 0x0541, 0x0544, 0x0545, 0x0550, 0x0551, 0x0554, 0x0555, 25 0x1000, 0x1001, 0x1004, 0x1005, 0x1010, 0x1011, 0x1014, 0x1015, 26 0x1040, 0x1041, 0x1044, 0x1045, 0x1050, 0x1051, 0x1054, 0x1055, 27 0x1100, 0x1101, 0x1104, 0x1105, 0x1110, 0x1111, 0x1114, 0x1115, 28 0x1140, 0x1141, 0x1144, 0x1145, 0x1150, 0x1151, 0x1154, 0x1155, 29 0x1400, 0x1401, 0x1404, 0x1405, 0x1410, 0x1411, 0x1414, 0x1415, 30 0x1440, 0x1441, 0x1444, 0x1445, 0x1450, 0x1451, 0x1454, 0x1455, 31 0x1500, 0x1501, 0x1504, 0x1505, 0x1510, 0x1511, 0x1514, 0x1515, 32 0x1540, 0x1541, 0x1544, 0x1545, 0x1550, 0x1551, 0x1554, 0x1555, 33 0x4000, 0x4001, 0x4004, 0x4005, 0x4010, 0x4011, 0x4014, 0x4015, 34 0x4040, 0x4041, 0x4044, 0x4045, 0x4050, 0x4051, 0x4054, 0x4055, 35 0x4100, 0x4101, 0x4104, 0x4105, 0x4110, 0x4111, 0x4114, 0x4115, 36 0x4140, 0x4141, 0x4144, 0x4145, 0x4150, 0x4151, 0x4154, 0x4155, 37 0x4400, 0x4401, 0x4404, 0x4405, 0x4410, 0x4411, 0x4414, 0x4415, 38 0x4440, 0x4441, 0x4444, 0x4445, 0x4450, 0x4451, 0x4454, 0x4455, 39 0x4500, 0x4501, 0x4504, 0x4505, 0x4510, 0x4511, 0x4514, 0x4515, 40 0x4540, 0x4541, 0x4544, 0x4545, 0x4550, 0x4551, 0x4554, 0x4555, 41 0x5000, 0x5001, 0x5004, 0x5005, 0x5010, 0x5011, 0x5014, 0x5015, 42 0x5040, 0x5041, 0x5044, 0x5045, 0x5050, 0x5051, 0x5054, 0x5055, 43 0x5100, 0x5101, 0x5104, 0x5105, 0x5110, 0x5111, 0x5114, 0x5115, 44 0x5140, 0x5141, 0x5144, 0x5145, 0x5150, 0x5151, 0x5154, 0x5155, 45 0x5400, 0x5401, 0x5404, 0x5405, 0x5410, 0x5411, 0x5414, 0x5415, 46 0x5440, 0x5441, 0x5444, 0x5445, 0x5450, 0x5451, 0x5454, 0x5455, 47 0x5500, 0x5501, 0x5504, 0x5505, 0x5510, 0x5511, 0x5514, 0x5515, 48 0x5540, 0x5541, 0x5544, 0x5545, 0x5550, 0x5551, 0x5554, 0x5555 49 }; 50 51 /* 52 * This expands 7 bit indices into 21 bit contents (high bit 18), by inserting 0s between bits. 53 */ 54 private static final int[] INTERLEAVE3_TABLE = new int[] 55 { 56 0x00000, 0x00001, 0x00008, 0x00009, 0x00040, 0x00041, 0x00048, 0x00049, 57 0x00200, 0x00201, 0x00208, 0x00209, 0x00240, 0x00241, 0x00248, 0x00249, 58 0x01000, 0x01001, 0x01008, 0x01009, 0x01040, 0x01041, 0x01048, 0x01049, 59 0x01200, 0x01201, 0x01208, 0x01209, 0x01240, 0x01241, 0x01248, 0x01249, 60 0x08000, 0x08001, 0x08008, 0x08009, 0x08040, 0x08041, 0x08048, 0x08049, 61 0x08200, 0x08201, 0x08208, 0x08209, 0x08240, 0x08241, 0x08248, 0x08249, 62 0x09000, 0x09001, 0x09008, 0x09009, 0x09040, 0x09041, 0x09048, 0x09049, 63 0x09200, 0x09201, 0x09208, 0x09209, 0x09240, 0x09241, 0x09248, 0x09249, 64 0x40000, 0x40001, 0x40008, 0x40009, 0x40040, 0x40041, 0x40048, 0x40049, 65 0x40200, 0x40201, 0x40208, 0x40209, 0x40240, 0x40241, 0x40248, 0x40249, 66 0x41000, 0x41001, 0x41008, 0x41009, 0x41040, 0x41041, 0x41048, 0x41049, 67 0x41200, 0x41201, 0x41208, 0x41209, 0x41240, 0x41241, 0x41248, 0x41249, 68 0x48000, 0x48001, 0x48008, 0x48009, 0x48040, 0x48041, 0x48048, 0x48049, 69 0x48200, 0x48201, 0x48208, 0x48209, 0x48240, 0x48241, 0x48248, 0x48249, 70 0x49000, 0x49001, 0x49008, 0x49009, 0x49040, 0x49041, 0x49048, 0x49049, 71 0x49200, 0x49201, 0x49208, 0x49209, 0x49240, 0x49241, 0x49248, 0x49249 72 }; 73 74 /* 75 * This expands 8 bit indices into 32 bit contents (high bit 28), by inserting 0s between bits. 76 */ 77 private static final int[] INTERLEAVE4_TABLE = new int[] 78 { 79 0x00000000, 0x00000001, 0x00000010, 0x00000011, 0x00000100, 0x00000101, 0x00000110, 0x00000111, 80 0x00001000, 0x00001001, 0x00001010, 0x00001011, 0x00001100, 0x00001101, 0x00001110, 0x00001111, 81 0x00010000, 0x00010001, 0x00010010, 0x00010011, 0x00010100, 0x00010101, 0x00010110, 0x00010111, 82 0x00011000, 0x00011001, 0x00011010, 0x00011011, 0x00011100, 0x00011101, 0x00011110, 0x00011111, 83 0x00100000, 0x00100001, 0x00100010, 0x00100011, 0x00100100, 0x00100101, 0x00100110, 0x00100111, 84 0x00101000, 0x00101001, 0x00101010, 0x00101011, 0x00101100, 0x00101101, 0x00101110, 0x00101111, 85 0x00110000, 0x00110001, 0x00110010, 0x00110011, 0x00110100, 0x00110101, 0x00110110, 0x00110111, 86 0x00111000, 0x00111001, 0x00111010, 0x00111011, 0x00111100, 0x00111101, 0x00111110, 0x00111111, 87 0x01000000, 0x01000001, 0x01000010, 0x01000011, 0x01000100, 0x01000101, 0x01000110, 0x01000111, 88 0x01001000, 0x01001001, 0x01001010, 0x01001011, 0x01001100, 0x01001101, 0x01001110, 0x01001111, 89 0x01010000, 0x01010001, 0x01010010, 0x01010011, 0x01010100, 0x01010101, 0x01010110, 0x01010111, 90 0x01011000, 0x01011001, 0x01011010, 0x01011011, 0x01011100, 0x01011101, 0x01011110, 0x01011111, 91 0x01100000, 0x01100001, 0x01100010, 0x01100011, 0x01100100, 0x01100101, 0x01100110, 0x01100111, 92 0x01101000, 0x01101001, 0x01101010, 0x01101011, 0x01101100, 0x01101101, 0x01101110, 0x01101111, 93 0x01110000, 0x01110001, 0x01110010, 0x01110011, 0x01110100, 0x01110101, 0x01110110, 0x01110111, 94 0x01111000, 0x01111001, 0x01111010, 0x01111011, 0x01111100, 0x01111101, 0x01111110, 0x01111111, 95 0x10000000, 0x10000001, 0x10000010, 0x10000011, 0x10000100, 0x10000101, 0x10000110, 0x10000111, 96 0x10001000, 0x10001001, 0x10001010, 0x10001011, 0x10001100, 0x10001101, 0x10001110, 0x10001111, 97 0x10010000, 0x10010001, 0x10010010, 0x10010011, 0x10010100, 0x10010101, 0x10010110, 0x10010111, 98 0x10011000, 0x10011001, 0x10011010, 0x10011011, 0x10011100, 0x10011101, 0x10011110, 0x10011111, 99 0x10100000, 0x10100001, 0x10100010, 0x10100011, 0x10100100, 0x10100101, 0x10100110, 0x10100111, 100 0x10101000, 0x10101001, 0x10101010, 0x10101011, 0x10101100, 0x10101101, 0x10101110, 0x10101111, 101 0x10110000, 0x10110001, 0x10110010, 0x10110011, 0x10110100, 0x10110101, 0x10110110, 0x10110111, 102 0x10111000, 0x10111001, 0x10111010, 0x10111011, 0x10111100, 0x10111101, 0x10111110, 0x10111111, 103 0x11000000, 0x11000001, 0x11000010, 0x11000011, 0x11000100, 0x11000101, 0x11000110, 0x11000111, 104 0x11001000, 0x11001001, 0x11001010, 0x11001011, 0x11001100, 0x11001101, 0x11001110, 0x11001111, 105 0x11010000, 0x11010001, 0x11010010, 0x11010011, 0x11010100, 0x11010101, 0x11010110, 0x11010111, 106 0x11011000, 0x11011001, 0x11011010, 0x11011011, 0x11011100, 0x11011101, 0x11011110, 0x11011111, 107 0x11100000, 0x11100001, 0x11100010, 0x11100011, 0x11100100, 0x11100101, 0x11100110, 0x11100111, 108 0x11101000, 0x11101001, 0x11101010, 0x11101011, 0x11101100, 0x11101101, 0x11101110, 0x11101111, 109 0x11110000, 0x11110001, 0x11110010, 0x11110011, 0x11110100, 0x11110101, 0x11110110, 0x11110111, 110 0x11111000, 0x11111001, 0x11111010, 0x11111011, 0x11111100, 0x11111101, 0x11111110, 0x11111111 111 }; 112 113 /* 114 * This expands 7 bit indices into 35 bit contents (high bit 30), by inserting 0s between bits. 115 */ 116 private static final int[] INTERLEAVE5_TABLE = new int[] { 117 0x00000000, 0x00000001, 0x00000020, 0x00000021, 0x00000400, 0x00000401, 0x00000420, 0x00000421, 118 0x00008000, 0x00008001, 0x00008020, 0x00008021, 0x00008400, 0x00008401, 0x00008420, 0x00008421, 119 0x00100000, 0x00100001, 0x00100020, 0x00100021, 0x00100400, 0x00100401, 0x00100420, 0x00100421, 120 0x00108000, 0x00108001, 0x00108020, 0x00108021, 0x00108400, 0x00108401, 0x00108420, 0x00108421, 121 0x02000000, 0x02000001, 0x02000020, 0x02000021, 0x02000400, 0x02000401, 0x02000420, 0x02000421, 122 0x02008000, 0x02008001, 0x02008020, 0x02008021, 0x02008400, 0x02008401, 0x02008420, 0x02008421, 123 0x02100000, 0x02100001, 0x02100020, 0x02100021, 0x02100400, 0x02100401, 0x02100420, 0x02100421, 124 0x02108000, 0x02108001, 0x02108020, 0x02108021, 0x02108400, 0x02108401, 0x02108420, 0x02108421, 125 0x40000000, 0x40000001, 0x40000020, 0x40000021, 0x40000400, 0x40000401, 0x40000420, 0x40000421, 126 0x40008000, 0x40008001, 0x40008020, 0x40008021, 0x40008400, 0x40008401, 0x40008420, 0x40008421, 127 0x40100000, 0x40100001, 0x40100020, 0x40100021, 0x40100400, 0x40100401, 0x40100420, 0x40100421, 128 0x40108000, 0x40108001, 0x40108020, 0x40108021, 0x40108400, 0x40108401, 0x40108420, 0x40108421, 129 0x42000000, 0x42000001, 0x42000020, 0x42000021, 0x42000400, 0x42000401, 0x42000420, 0x42000421, 130 0x42008000, 0x42008001, 0x42008020, 0x42008021, 0x42008400, 0x42008401, 0x42008420, 0x42008421, 131 0x42100000, 0x42100001, 0x42100020, 0x42100021, 0x42100400, 0x42100401, 0x42100420, 0x42100421, 132 0x42108000, 0x42108001, 0x42108020, 0x42108021, 0x42108400, 0x42108401, 0x42108420, 0x42108421 133 }; 134 135 /* 136 * This expands 9 bit indices into 63 bit (long) contents (high bit 56), by inserting 0s between bits. 137 */ 138 private static final long[] INTERLEAVE7_TABLE = new long[] 139 { 140 0x0000000000000000L, 0x0000000000000001L, 0x0000000000000080L, 0x0000000000000081L, 141 0x0000000000004000L, 0x0000000000004001L, 0x0000000000004080L, 0x0000000000004081L, 142 0x0000000000200000L, 0x0000000000200001L, 0x0000000000200080L, 0x0000000000200081L, 143 0x0000000000204000L, 0x0000000000204001L, 0x0000000000204080L, 0x0000000000204081L, 144 0x0000000010000000L, 0x0000000010000001L, 0x0000000010000080L, 0x0000000010000081L, 145 0x0000000010004000L, 0x0000000010004001L, 0x0000000010004080L, 0x0000000010004081L, 146 0x0000000010200000L, 0x0000000010200001L, 0x0000000010200080L, 0x0000000010200081L, 147 0x0000000010204000L, 0x0000000010204001L, 0x0000000010204080L, 0x0000000010204081L, 148 0x0000000800000000L, 0x0000000800000001L, 0x0000000800000080L, 0x0000000800000081L, 149 0x0000000800004000L, 0x0000000800004001L, 0x0000000800004080L, 0x0000000800004081L, 150 0x0000000800200000L, 0x0000000800200001L, 0x0000000800200080L, 0x0000000800200081L, 151 0x0000000800204000L, 0x0000000800204001L, 0x0000000800204080L, 0x0000000800204081L, 152 0x0000000810000000L, 0x0000000810000001L, 0x0000000810000080L, 0x0000000810000081L, 153 0x0000000810004000L, 0x0000000810004001L, 0x0000000810004080L, 0x0000000810004081L, 154 0x0000000810200000L, 0x0000000810200001L, 0x0000000810200080L, 0x0000000810200081L, 155 0x0000000810204000L, 0x0000000810204001L, 0x0000000810204080L, 0x0000000810204081L, 156 0x0000040000000000L, 0x0000040000000001L, 0x0000040000000080L, 0x0000040000000081L, 157 0x0000040000004000L, 0x0000040000004001L, 0x0000040000004080L, 0x0000040000004081L, 158 0x0000040000200000L, 0x0000040000200001L, 0x0000040000200080L, 0x0000040000200081L, 159 0x0000040000204000L, 0x0000040000204001L, 0x0000040000204080L, 0x0000040000204081L, 160 0x0000040010000000L, 0x0000040010000001L, 0x0000040010000080L, 0x0000040010000081L, 161 0x0000040010004000L, 0x0000040010004001L, 0x0000040010004080L, 0x0000040010004081L, 162 0x0000040010200000L, 0x0000040010200001L, 0x0000040010200080L, 0x0000040010200081L, 163 0x0000040010204000L, 0x0000040010204001L, 0x0000040010204080L, 0x0000040010204081L, 164 0x0000040800000000L, 0x0000040800000001L, 0x0000040800000080L, 0x0000040800000081L, 165 0x0000040800004000L, 0x0000040800004001L, 0x0000040800004080L, 0x0000040800004081L, 166 0x0000040800200000L, 0x0000040800200001L, 0x0000040800200080L, 0x0000040800200081L, 167 0x0000040800204000L, 0x0000040800204001L, 0x0000040800204080L, 0x0000040800204081L, 168 0x0000040810000000L, 0x0000040810000001L, 0x0000040810000080L, 0x0000040810000081L, 169 0x0000040810004000L, 0x0000040810004001L, 0x0000040810004080L, 0x0000040810004081L, 170 0x0000040810200000L, 0x0000040810200001L, 0x0000040810200080L, 0x0000040810200081L, 171 0x0000040810204000L, 0x0000040810204001L, 0x0000040810204080L, 0x0000040810204081L, 172 0x0002000000000000L, 0x0002000000000001L, 0x0002000000000080L, 0x0002000000000081L, 173 0x0002000000004000L, 0x0002000000004001L, 0x0002000000004080L, 0x0002000000004081L, 174 0x0002000000200000L, 0x0002000000200001L, 0x0002000000200080L, 0x0002000000200081L, 175 0x0002000000204000L, 0x0002000000204001L, 0x0002000000204080L, 0x0002000000204081L, 176 0x0002000010000000L, 0x0002000010000001L, 0x0002000010000080L, 0x0002000010000081L, 177 0x0002000010004000L, 0x0002000010004001L, 0x0002000010004080L, 0x0002000010004081L, 178 0x0002000010200000L, 0x0002000010200001L, 0x0002000010200080L, 0x0002000010200081L, 179 0x0002000010204000L, 0x0002000010204001L, 0x0002000010204080L, 0x0002000010204081L, 180 0x0002000800000000L, 0x0002000800000001L, 0x0002000800000080L, 0x0002000800000081L, 181 0x0002000800004000L, 0x0002000800004001L, 0x0002000800004080L, 0x0002000800004081L, 182 0x0002000800200000L, 0x0002000800200001L, 0x0002000800200080L, 0x0002000800200081L, 183 0x0002000800204000L, 0x0002000800204001L, 0x0002000800204080L, 0x0002000800204081L, 184 0x0002000810000000L, 0x0002000810000001L, 0x0002000810000080L, 0x0002000810000081L, 185 0x0002000810004000L, 0x0002000810004001L, 0x0002000810004080L, 0x0002000810004081L, 186 0x0002000810200000L, 0x0002000810200001L, 0x0002000810200080L, 0x0002000810200081L, 187 0x0002000810204000L, 0x0002000810204001L, 0x0002000810204080L, 0x0002000810204081L, 188 0x0002040000000000L, 0x0002040000000001L, 0x0002040000000080L, 0x0002040000000081L, 189 0x0002040000004000L, 0x0002040000004001L, 0x0002040000004080L, 0x0002040000004081L, 190 0x0002040000200000L, 0x0002040000200001L, 0x0002040000200080L, 0x0002040000200081L, 191 0x0002040000204000L, 0x0002040000204001L, 0x0002040000204080L, 0x0002040000204081L, 192 0x0002040010000000L, 0x0002040010000001L, 0x0002040010000080L, 0x0002040010000081L, 193 0x0002040010004000L, 0x0002040010004001L, 0x0002040010004080L, 0x0002040010004081L, 194 0x0002040010200000L, 0x0002040010200001L, 0x0002040010200080L, 0x0002040010200081L, 195 0x0002040010204000L, 0x0002040010204001L, 0x0002040010204080L, 0x0002040010204081L, 196 0x0002040800000000L, 0x0002040800000001L, 0x0002040800000080L, 0x0002040800000081L, 197 0x0002040800004000L, 0x0002040800004001L, 0x0002040800004080L, 0x0002040800004081L, 198 0x0002040800200000L, 0x0002040800200001L, 0x0002040800200080L, 0x0002040800200081L, 199 0x0002040800204000L, 0x0002040800204001L, 0x0002040800204080L, 0x0002040800204081L, 200 0x0002040810000000L, 0x0002040810000001L, 0x0002040810000080L, 0x0002040810000081L, 201 0x0002040810004000L, 0x0002040810004001L, 0x0002040810004080L, 0x0002040810004081L, 202 0x0002040810200000L, 0x0002040810200001L, 0x0002040810200080L, 0x0002040810200081L, 203 0x0002040810204000L, 0x0002040810204001L, 0x0002040810204080L, 0x0002040810204081L, 204 0x0100000000000000L, 0x0100000000000001L, 0x0100000000000080L, 0x0100000000000081L, 205 0x0100000000004000L, 0x0100000000004001L, 0x0100000000004080L, 0x0100000000004081L, 206 0x0100000000200000L, 0x0100000000200001L, 0x0100000000200080L, 0x0100000000200081L, 207 0x0100000000204000L, 0x0100000000204001L, 0x0100000000204080L, 0x0100000000204081L, 208 0x0100000010000000L, 0x0100000010000001L, 0x0100000010000080L, 0x0100000010000081L, 209 0x0100000010004000L, 0x0100000010004001L, 0x0100000010004080L, 0x0100000010004081L, 210 0x0100000010200000L, 0x0100000010200001L, 0x0100000010200080L, 0x0100000010200081L, 211 0x0100000010204000L, 0x0100000010204001L, 0x0100000010204080L, 0x0100000010204081L, 212 0x0100000800000000L, 0x0100000800000001L, 0x0100000800000080L, 0x0100000800000081L, 213 0x0100000800004000L, 0x0100000800004001L, 0x0100000800004080L, 0x0100000800004081L, 214 0x0100000800200000L, 0x0100000800200001L, 0x0100000800200080L, 0x0100000800200081L, 215 0x0100000800204000L, 0x0100000800204001L, 0x0100000800204080L, 0x0100000800204081L, 216 0x0100000810000000L, 0x0100000810000001L, 0x0100000810000080L, 0x0100000810000081L, 217 0x0100000810004000L, 0x0100000810004001L, 0x0100000810004080L, 0x0100000810004081L, 218 0x0100000810200000L, 0x0100000810200001L, 0x0100000810200080L, 0x0100000810200081L, 219 0x0100000810204000L, 0x0100000810204001L, 0x0100000810204080L, 0x0100000810204081L, 220 0x0100040000000000L, 0x0100040000000001L, 0x0100040000000080L, 0x0100040000000081L, 221 0x0100040000004000L, 0x0100040000004001L, 0x0100040000004080L, 0x0100040000004081L, 222 0x0100040000200000L, 0x0100040000200001L, 0x0100040000200080L, 0x0100040000200081L, 223 0x0100040000204000L, 0x0100040000204001L, 0x0100040000204080L, 0x0100040000204081L, 224 0x0100040010000000L, 0x0100040010000001L, 0x0100040010000080L, 0x0100040010000081L, 225 0x0100040010004000L, 0x0100040010004001L, 0x0100040010004080L, 0x0100040010004081L, 226 0x0100040010200000L, 0x0100040010200001L, 0x0100040010200080L, 0x0100040010200081L, 227 0x0100040010204000L, 0x0100040010204001L, 0x0100040010204080L, 0x0100040010204081L, 228 0x0100040800000000L, 0x0100040800000001L, 0x0100040800000080L, 0x0100040800000081L, 229 0x0100040800004000L, 0x0100040800004001L, 0x0100040800004080L, 0x0100040800004081L, 230 0x0100040800200000L, 0x0100040800200001L, 0x0100040800200080L, 0x0100040800200081L, 231 0x0100040800204000L, 0x0100040800204001L, 0x0100040800204080L, 0x0100040800204081L, 232 0x0100040810000000L, 0x0100040810000001L, 0x0100040810000080L, 0x0100040810000081L, 233 0x0100040810004000L, 0x0100040810004001L, 0x0100040810004080L, 0x0100040810004081L, 234 0x0100040810200000L, 0x0100040810200001L, 0x0100040810200080L, 0x0100040810200081L, 235 0x0100040810204000L, 0x0100040810204001L, 0x0100040810204080L, 0x0100040810204081L, 236 0x0102000000000000L, 0x0102000000000001L, 0x0102000000000080L, 0x0102000000000081L, 237 0x0102000000004000L, 0x0102000000004001L, 0x0102000000004080L, 0x0102000000004081L, 238 0x0102000000200000L, 0x0102000000200001L, 0x0102000000200080L, 0x0102000000200081L, 239 0x0102000000204000L, 0x0102000000204001L, 0x0102000000204080L, 0x0102000000204081L, 240 0x0102000010000000L, 0x0102000010000001L, 0x0102000010000080L, 0x0102000010000081L, 241 0x0102000010004000L, 0x0102000010004001L, 0x0102000010004080L, 0x0102000010004081L, 242 0x0102000010200000L, 0x0102000010200001L, 0x0102000010200080L, 0x0102000010200081L, 243 0x0102000010204000L, 0x0102000010204001L, 0x0102000010204080L, 0x0102000010204081L, 244 0x0102000800000000L, 0x0102000800000001L, 0x0102000800000080L, 0x0102000800000081L, 245 0x0102000800004000L, 0x0102000800004001L, 0x0102000800004080L, 0x0102000800004081L, 246 0x0102000800200000L, 0x0102000800200001L, 0x0102000800200080L, 0x0102000800200081L, 247 0x0102000800204000L, 0x0102000800204001L, 0x0102000800204080L, 0x0102000800204081L, 248 0x0102000810000000L, 0x0102000810000001L, 0x0102000810000080L, 0x0102000810000081L, 249 0x0102000810004000L, 0x0102000810004001L, 0x0102000810004080L, 0x0102000810004081L, 250 0x0102000810200000L, 0x0102000810200001L, 0x0102000810200080L, 0x0102000810200081L, 251 0x0102000810204000L, 0x0102000810204001L, 0x0102000810204080L, 0x0102000810204081L, 252 0x0102040000000000L, 0x0102040000000001L, 0x0102040000000080L, 0x0102040000000081L, 253 0x0102040000004000L, 0x0102040000004001L, 0x0102040000004080L, 0x0102040000004081L, 254 0x0102040000200000L, 0x0102040000200001L, 0x0102040000200080L, 0x0102040000200081L, 255 0x0102040000204000L, 0x0102040000204001L, 0x0102040000204080L, 0x0102040000204081L, 256 0x0102040010000000L, 0x0102040010000001L, 0x0102040010000080L, 0x0102040010000081L, 257 0x0102040010004000L, 0x0102040010004001L, 0x0102040010004080L, 0x0102040010004081L, 258 0x0102040010200000L, 0x0102040010200001L, 0x0102040010200080L, 0x0102040010200081L, 259 0x0102040010204000L, 0x0102040010204001L, 0x0102040010204080L, 0x0102040010204081L, 260 0x0102040800000000L, 0x0102040800000001L, 0x0102040800000080L, 0x0102040800000081L, 261 0x0102040800004000L, 0x0102040800004001L, 0x0102040800004080L, 0x0102040800004081L, 262 0x0102040800200000L, 0x0102040800200001L, 0x0102040800200080L, 0x0102040800200081L, 263 0x0102040800204000L, 0x0102040800204001L, 0x0102040800204080L, 0x0102040800204081L, 264 0x0102040810000000L, 0x0102040810000001L, 0x0102040810000080L, 0x0102040810000081L, 265 0x0102040810004000L, 0x0102040810004001L, 0x0102040810004080L, 0x0102040810004081L, 266 0x0102040810200000L, 0x0102040810200001L, 0x0102040810200080L, 0x0102040810200081L, 267 0x0102040810204000L, 0x0102040810204001L, 0x0102040810204080L, 0x0102040810204081L 268 }; 269 270 // For toString(); must have length 64 271 private static final String ZEROES = "0000000000000000000000000000000000000000000000000000000000000000"; 272 273 final static byte[] bitLengths = 274 { 275 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 276 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 277 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 278 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 279 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 280 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 281 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 282 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 283 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 284 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 285 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 286 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 287 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 288 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 289 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 290 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 291 }; 292 293 // TODO make m fixed for the LongArray, and hence compute T once and for all 294 295 private long[] m_ints; 296 LongArray(int intLen)297 public LongArray(int intLen) 298 { 299 m_ints = new long[intLen]; 300 } 301 LongArray(long[] ints)302 public LongArray(long[] ints) 303 { 304 m_ints = ints; 305 } 306 LongArray(long[] ints, int off, int len)307 public LongArray(long[] ints, int off, int len) 308 { 309 if (off == 0 && len == ints.length) 310 { 311 m_ints = ints; 312 } 313 else 314 { 315 m_ints = new long[len]; 316 System.arraycopy(ints, off, m_ints, 0, len); 317 } 318 } 319 LongArray(BigInteger bigInt)320 public LongArray(BigInteger bigInt) 321 { 322 if (bigInt == null || bigInt.signum() < 0) 323 { 324 throw new IllegalArgumentException("invalid F2m field value"); 325 } 326 327 if (bigInt.signum() == 0) 328 { 329 m_ints = new long[] { 0L }; 330 return; 331 } 332 333 byte[] barr = bigInt.toByteArray(); 334 int barrLen = barr.length; 335 int barrStart = 0; 336 if (barr[0] == 0) 337 { 338 // First byte is 0 to enforce highest (=sign) bit is zero. 339 // In this case ignore barr[0]. 340 barrLen--; 341 barrStart = 1; 342 } 343 int intLen = (barrLen + 7) / 8; 344 m_ints = new long[intLen]; 345 346 int iarrJ = intLen - 1; 347 int rem = barrLen % 8 + barrStart; 348 long temp = 0; 349 int barrI = barrStart; 350 if (barrStart < rem) 351 { 352 for (; barrI < rem; barrI++) 353 { 354 temp <<= 8; 355 int barrBarrI = barr[barrI] & 0xFF; 356 temp |= barrBarrI; 357 } 358 m_ints[iarrJ--] = temp; 359 } 360 361 for (; iarrJ >= 0; iarrJ--) 362 { 363 temp = 0; 364 for (int i = 0; i < 8; i++) 365 { 366 temp <<= 8; 367 int barrBarrI = barr[barrI++] & 0xFF; 368 temp |= barrBarrI; 369 } 370 m_ints[iarrJ] = temp; 371 } 372 } 373 isOne()374 public boolean isOne() 375 { 376 long[] a = m_ints; 377 if (a[0] != 1L) 378 { 379 return false; 380 } 381 for (int i = 1; i < a.length; ++i) 382 { 383 if (a[i] != 0L) 384 { 385 return false; 386 } 387 } 388 return true; 389 } 390 isZero()391 public boolean isZero() 392 { 393 long[] a = m_ints; 394 for (int i = 0; i < a.length; ++i) 395 { 396 if (a[i] != 0L) 397 { 398 return false; 399 } 400 } 401 return true; 402 } 403 getUsedLength()404 public int getUsedLength() 405 { 406 return getUsedLengthFrom(m_ints.length); 407 } 408 getUsedLengthFrom(int from)409 public int getUsedLengthFrom(int from) 410 { 411 long[] a = m_ints; 412 from = Math.min(from, a.length); 413 414 if (from < 1) 415 { 416 return 0; 417 } 418 419 // Check if first element will act as sentinel 420 if (a[0] != 0) 421 { 422 while (a[--from] == 0) 423 { 424 } 425 return from + 1; 426 } 427 428 do 429 { 430 if (a[--from] != 0) 431 { 432 return from + 1; 433 } 434 } 435 while (from > 0); 436 437 return 0; 438 } 439 degree()440 public int degree() 441 { 442 int i = m_ints.length; 443 long w; 444 do 445 { 446 if (i == 0) 447 { 448 return 0; 449 } 450 w = m_ints[--i]; 451 } 452 while (w == 0); 453 454 return (i << 6) + bitLength(w); 455 } 456 degreeFrom(int limit)457 private int degreeFrom(int limit) 458 { 459 int i = (limit + 62) >>> 6; 460 long w; 461 do 462 { 463 if (i == 0) 464 { 465 return 0; 466 } 467 w = m_ints[--i]; 468 } 469 while (w == 0); 470 471 return (i << 6) + bitLength(w); 472 } 473 474 // private int lowestCoefficient() 475 // { 476 // for (int i = 0; i < m_ints.length; ++i) 477 // { 478 // long mi = m_ints[i]; 479 // if (mi != 0) 480 // { 481 // int j = 0; 482 // while ((mi & 0xFFL) == 0) 483 // { 484 // j += 8; 485 // mi >>>= 8; 486 // } 487 // while ((mi & 1L) == 0) 488 // { 489 // ++j; 490 // mi >>>= 1; 491 // } 492 // return (i << 6) + j; 493 // } 494 // } 495 // return -1; 496 // } 497 bitLength(long w)498 private static int bitLength(long w) 499 { 500 int u = (int)(w >>> 32), b; 501 if (u == 0) 502 { 503 u = (int)w; 504 b = 0; 505 } 506 else 507 { 508 b = 32; 509 } 510 511 int t = u >>> 16, k; 512 if (t == 0) 513 { 514 t = u >>> 8; 515 k = (t == 0) ? bitLengths[u] : 8 + bitLengths[t]; 516 } 517 else 518 { 519 int v = t >>> 8; 520 k = (v == 0) ? 16 + bitLengths[t] : 24 + bitLengths[v]; 521 } 522 523 return b + k; 524 } 525 resizedInts(int newLen)526 private long[] resizedInts(int newLen) 527 { 528 long[] newInts = new long[newLen]; 529 System.arraycopy(m_ints, 0, newInts, 0, Math.min(m_ints.length, newLen)); 530 return newInts; 531 } 532 toBigInteger()533 public BigInteger toBigInteger() 534 { 535 int usedLen = getUsedLength(); 536 if (usedLen == 0) 537 { 538 return ECConstants.ZERO; 539 } 540 541 long highestInt = m_ints[usedLen - 1]; 542 byte[] temp = new byte[8]; 543 int barrI = 0; 544 boolean trailingZeroBytesDone = false; 545 for (int j = 7; j >= 0; j--) 546 { 547 byte thisByte = (byte)(highestInt >>> (8 * j)); 548 if (trailingZeroBytesDone || (thisByte != 0)) 549 { 550 trailingZeroBytesDone = true; 551 temp[barrI++] = thisByte; 552 } 553 } 554 555 int barrLen = 8 * (usedLen - 1) + barrI; 556 byte[] barr = new byte[barrLen]; 557 for (int j = 0; j < barrI; j++) 558 { 559 barr[j] = temp[j]; 560 } 561 // Highest value int is done now 562 563 for (int iarrJ = usedLen - 2; iarrJ >= 0; iarrJ--) 564 { 565 long mi = m_ints[iarrJ]; 566 for (int j = 7; j >= 0; j--) 567 { 568 barr[barrI++] = (byte)(mi >>> (8 * j)); 569 } 570 } 571 return new BigInteger(1, barr); 572 } 573 574 // private static long shiftUp(long[] x, int xOff, int count) 575 // { 576 // long prev = 0; 577 // for (int i = 0; i < count; ++i) 578 // { 579 // long next = x[xOff + i]; 580 // x[xOff + i] = (next << 1) | prev; 581 // prev = next >>> 63; 582 // } 583 // return prev; 584 // } 585 shiftUp(long[] x, int xOff, int count, int shift)586 private static long shiftUp(long[] x, int xOff, int count, int shift) 587 { 588 int shiftInv = 64 - shift; 589 long prev = 0; 590 for (int i = 0; i < count; ++i) 591 { 592 long next = x[xOff + i]; 593 x[xOff + i] = (next << shift) | prev; 594 prev = next >>> shiftInv; 595 } 596 return prev; 597 } 598 shiftUp(long[] x, int xOff, long[] z, int zOff, int count, int shift)599 private static long shiftUp(long[] x, int xOff, long[] z, int zOff, int count, int shift) 600 { 601 int shiftInv = 64 - shift; 602 long prev = 0; 603 for (int i = 0; i < count; ++i) 604 { 605 long next = x[xOff + i]; 606 z[zOff + i] = (next << shift) | prev; 607 prev = next >>> shiftInv; 608 } 609 return prev; 610 } 611 addOne()612 public LongArray addOne() 613 { 614 if (m_ints.length == 0) 615 { 616 return new LongArray(new long[]{ 1L }); 617 } 618 619 int resultLen = Math.max(1, getUsedLength()); 620 long[] ints = resizedInts(resultLen); 621 ints[0] ^= 1L; 622 return new LongArray(ints); 623 } 624 625 // private void addShiftedByBits(LongArray other, int bits) 626 // { 627 // int words = bits >>> 6; 628 // int shift = bits & 0x3F; 629 // 630 // if (shift == 0) 631 // { 632 // addShiftedByWords(other, words); 633 // return; 634 // } 635 // 636 // int otherUsedLen = other.getUsedLength(); 637 // if (otherUsedLen == 0) 638 // { 639 // return; 640 // } 641 // 642 // int minLen = otherUsedLen + words + 1; 643 // if (minLen > m_ints.length) 644 // { 645 // m_ints = resizedInts(minLen); 646 // } 647 // 648 // long carry = addShiftedByBits(m_ints, words, other.m_ints, 0, otherUsedLen, shift); 649 // m_ints[otherUsedLen + words] ^= carry; 650 // } 651 addShiftedByBitsSafe(LongArray other, int otherDegree, int bits)652 private void addShiftedByBitsSafe(LongArray other, int otherDegree, int bits) 653 { 654 int otherLen = (otherDegree + 63) >>> 6; 655 656 int words = bits >>> 6; 657 int shift = bits & 0x3F; 658 659 if (shift == 0) 660 { 661 add(m_ints, words, other.m_ints, 0, otherLen); 662 return; 663 } 664 665 long carry = addShiftedUp(m_ints, words, other.m_ints, 0, otherLen, shift); 666 if (carry != 0L) 667 { 668 m_ints[otherLen + words] ^= carry; 669 } 670 } 671 addShiftedUp(long[] x, int xOff, long[] y, int yOff, int count, int shift)672 private static long addShiftedUp(long[] x, int xOff, long[] y, int yOff, int count, int shift) 673 { 674 int shiftInv = 64 - shift; 675 long prev = 0; 676 for (int i = 0; i < count; ++i) 677 { 678 long next = y[yOff + i]; 679 x[xOff + i] ^= (next << shift) | prev; 680 prev = next >>> shiftInv; 681 } 682 return prev; 683 } 684 addShiftedDown(long[] x, int xOff, long[] y, int yOff, int count, int shift)685 private static long addShiftedDown(long[] x, int xOff, long[] y, int yOff, int count, int shift) 686 { 687 int shiftInv = 64 - shift; 688 long prev = 0; 689 int i = count; 690 while (--i >= 0) 691 { 692 long next = y[yOff + i]; 693 x[xOff + i] ^= (next >>> shift) | prev; 694 prev = next << shiftInv; 695 } 696 return prev; 697 } 698 addShiftedByWords(LongArray other, int words)699 public void addShiftedByWords(LongArray other, int words) 700 { 701 int otherUsedLen = other.getUsedLength(); 702 if (otherUsedLen == 0) 703 { 704 return; 705 } 706 707 int minLen = otherUsedLen + words; 708 if (minLen > m_ints.length) 709 { 710 m_ints = resizedInts(minLen); 711 } 712 713 add(m_ints, words, other.m_ints, 0, otherUsedLen); 714 } 715 add(long[] x, int xOff, long[] y, int yOff, int count)716 private static void add(long[] x, int xOff, long[] y, int yOff, int count) 717 { 718 for (int i = 0; i < count; ++i) 719 { 720 x[xOff + i] ^= y[yOff + i]; 721 } 722 } 723 add(long[] x, int xOff, long[] y, int yOff, long[] z, int zOff, int count)724 private static void add(long[] x, int xOff, long[] y, int yOff, long[] z, int zOff, int count) 725 { 726 for (int i = 0; i < count; ++i) 727 { 728 z[zOff + i] = x[xOff + i] ^ y[yOff + i]; 729 } 730 } 731 addBoth(long[] x, int xOff, long[] y1, int y1Off, long[] y2, int y2Off, int count)732 private static void addBoth(long[] x, int xOff, long[] y1, int y1Off, long[] y2, int y2Off, int count) 733 { 734 for (int i = 0; i < count; ++i) 735 { 736 x[xOff + i] ^= y1[y1Off + i] ^ y2[y2Off + i]; 737 } 738 } 739 distribute(long[] x, int src, int dst1, int dst2, int count)740 private static void distribute(long[] x, int src, int dst1, int dst2, int count) 741 { 742 for (int i = 0; i < count; ++i) 743 { 744 long v = x[src + i]; 745 x[dst1 + i] ^= v; 746 x[dst2 + i] ^= v; 747 } 748 } 749 getLength()750 public int getLength() 751 { 752 return m_ints.length; 753 } 754 flipWord(long[] buf, int off, int bit, long word)755 private static void flipWord(long[] buf, int off, int bit, long word) 756 { 757 int n = off + (bit >>> 6); 758 int shift = bit & 0x3F; 759 if (shift == 0) 760 { 761 buf[n] ^= word; 762 } 763 else 764 { 765 buf[n] ^= word << shift; 766 word >>>= (64 - shift); 767 if (word != 0) 768 { 769 buf[++n] ^= word; 770 } 771 } 772 } 773 774 // private static long getWord(long[] buf, int off, int len, int bit) 775 // { 776 // int n = off + (bit >>> 6); 777 // int shift = bit & 0x3F; 778 // if (shift == 0) 779 // { 780 // return buf[n]; 781 // } 782 // long result = buf[n] >>> shift; 783 // if (++n < len) 784 // { 785 // result |= buf[n] << (64 - shift); 786 // } 787 // return result; 788 // } 789 testBitZero()790 public boolean testBitZero() 791 { 792 return m_ints.length > 0 && (m_ints[0] & 1L) != 0; 793 } 794 testBit(long[] buf, int off, int n)795 private static boolean testBit(long[] buf, int off, int n) 796 { 797 // theInt = n / 64 798 int theInt = n >>> 6; 799 // theBit = n % 64 800 int theBit = n & 0x3F; 801 long tester = 1L << theBit; 802 return (buf[off + theInt] & tester) != 0; 803 } 804 flipBit(long[] buf, int off, int n)805 private static void flipBit(long[] buf, int off, int n) 806 { 807 // theInt = n / 64 808 int theInt = n >>> 6; 809 // theBit = n % 64 810 int theBit = n & 0x3F; 811 long flipper = 1L << theBit; 812 buf[off + theInt] ^= flipper; 813 } 814 815 // private static void setBit(long[] buf, int off, int n) 816 // { 817 // // theInt = n / 64 818 // int theInt = n >>> 6; 819 // // theBit = n % 64 820 // int theBit = n & 0x3F; 821 // long setter = 1L << theBit; 822 // buf[off + theInt] |= setter; 823 // } 824 // 825 // private static void clearBit(long[] buf, int off, int n) 826 // { 827 // // theInt = n / 64 828 // int theInt = n >>> 6; 829 // // theBit = n % 64 830 // int theBit = n & 0x3F; 831 // long setter = 1L << theBit; 832 // buf[off + theInt] &= ~setter; 833 // } 834 multiplyWord(long a, long[] b, int bLen, long[] c, int cOff)835 private static void multiplyWord(long a, long[] b, int bLen, long[] c, int cOff) 836 { 837 if ((a & 1L) != 0L) 838 { 839 add(c, cOff, b, 0, bLen); 840 } 841 int k = 1; 842 while ((a >>>= 1) != 0L) 843 { 844 if ((a & 1L) != 0L) 845 { 846 long carry = addShiftedUp(c, cOff, b, 0, bLen, k); 847 if (carry != 0L) 848 { 849 c[cOff + bLen] ^= carry; 850 } 851 } 852 ++k; 853 } 854 } 855 modMultiplyLD(LongArray other, int m, int[] ks)856 public LongArray modMultiplyLD(LongArray other, int m, int[] ks) 857 { 858 /* 859 * Find out the degree of each argument and handle the zero cases 860 */ 861 int aDeg = degree(); 862 if (aDeg == 0) 863 { 864 return this; 865 } 866 int bDeg = other.degree(); 867 if (bDeg == 0) 868 { 869 return other; 870 } 871 872 /* 873 * Swap if necessary so that A is the smaller argument 874 */ 875 LongArray A = this, B = other; 876 if (aDeg > bDeg) 877 { 878 A = other; B = this; 879 int tmp = aDeg; aDeg = bDeg; bDeg = tmp; 880 } 881 882 /* 883 * Establish the word lengths of the arguments and result 884 */ 885 int aLen = (aDeg + 63) >>> 6; 886 int bLen = (bDeg + 63) >>> 6; 887 int cLen = (aDeg + bDeg + 62) >>> 6; 888 889 if (aLen == 1) 890 { 891 long a0 = A.m_ints[0]; 892 if (a0 == 1L) 893 { 894 return B; 895 } 896 897 /* 898 * Fast path for small A, with performance dependent only on the number of set bits 899 */ 900 long[] c0 = new long[cLen]; 901 multiplyWord(a0, B.m_ints, bLen, c0, 0); 902 903 /* 904 * Reduce the raw answer against the reduction coefficients 905 */ 906 return reduceResult(c0, 0, cLen, m, ks); 907 } 908 909 /* 910 * Determine if B will get bigger during shifting 911 */ 912 int bMax = (bDeg + 7 + 63) >>> 6; 913 914 /* 915 * Lookup table for the offset of each B in the tables 916 */ 917 int[] ti = new int[16]; 918 919 /* 920 * Precompute table of all 4-bit products of B 921 */ 922 long[] T0 = new long[bMax << 4]; 923 int tOff = bMax; 924 ti[1] = tOff; 925 System.arraycopy(B.m_ints, 0, T0, tOff, bLen); 926 for (int i = 2; i < 16; ++i) 927 { 928 ti[i] = (tOff += bMax); 929 if ((i & 1) == 0) 930 { 931 shiftUp(T0, tOff >>> 1, T0, tOff, bMax, 1); 932 } 933 else 934 { 935 add(T0, bMax, T0, tOff - bMax, T0, tOff, bMax); 936 } 937 } 938 939 /* 940 * Second table with all 4-bit products of B shifted 4 bits 941 */ 942 long[] T1 = new long[T0.length]; 943 shiftUp(T0, 0, T1, 0, T0.length, 4); 944 // shiftUp(T0, bMax, T1, bMax, tOff, 4); 945 946 long[] a = A.m_ints; 947 long[] c = new long[cLen]; 948 949 int MASK = 0xF; 950 951 /* 952 * Lopez-Dahab algorithm 953 */ 954 955 for (int k = 56; k >= 0; k -= 8) 956 { 957 for (int j = 1; j < aLen; j += 2) 958 { 959 int aVal = (int)(a[j] >>> k); 960 int u = aVal & MASK; 961 int v = (aVal >>> 4) & MASK; 962 addBoth(c, j - 1, T0, ti[u], T1, ti[v], bMax); 963 } 964 shiftUp(c, 0, cLen, 8); 965 } 966 967 for (int k = 56; k >= 0; k -= 8) 968 { 969 for (int j = 0; j < aLen; j += 2) 970 { 971 int aVal = (int)(a[j] >>> k); 972 int u = aVal & MASK; 973 int v = (aVal >>> 4) & MASK; 974 addBoth(c, j, T0, ti[u], T1, ti[v], bMax); 975 } 976 if (k > 0) 977 { 978 shiftUp(c, 0, cLen, 8); 979 } 980 } 981 982 /* 983 * Finally the raw answer is collected, reduce it against the reduction coefficients 984 */ 985 return reduceResult(c, 0, cLen, m, ks); 986 } 987 modMultiply(LongArray other, int m, int[] ks)988 public LongArray modMultiply(LongArray other, int m, int[] ks) 989 { 990 /* 991 * Find out the degree of each argument and handle the zero cases 992 */ 993 int aDeg = degree(); 994 if (aDeg == 0) 995 { 996 return this; 997 } 998 int bDeg = other.degree(); 999 if (bDeg == 0) 1000 { 1001 return other; 1002 } 1003 1004 /* 1005 * Swap if necessary so that A is the smaller argument 1006 */ 1007 LongArray A = this, B = other; 1008 if (aDeg > bDeg) 1009 { 1010 A = other; B = this; 1011 int tmp = aDeg; aDeg = bDeg; bDeg = tmp; 1012 } 1013 1014 /* 1015 * Establish the word lengths of the arguments and result 1016 */ 1017 int aLen = (aDeg + 63) >>> 6; 1018 int bLen = (bDeg + 63) >>> 6; 1019 int cLen = (aDeg + bDeg + 62) >>> 6; 1020 1021 if (aLen == 1) 1022 { 1023 long a0 = A.m_ints[0]; 1024 if (a0 == 1L) 1025 { 1026 return B; 1027 } 1028 1029 /* 1030 * Fast path for small A, with performance dependent only on the number of set bits 1031 */ 1032 long[] c0 = new long[cLen]; 1033 multiplyWord(a0, B.m_ints, bLen, c0, 0); 1034 1035 /* 1036 * Reduce the raw answer against the reduction coefficients 1037 */ 1038 return reduceResult(c0, 0, cLen, m, ks); 1039 } 1040 1041 /* 1042 * Determine if B will get bigger during shifting 1043 */ 1044 int bMax = (bDeg + 7 + 63) >>> 6; 1045 1046 /* 1047 * Lookup table for the offset of each B in the tables 1048 */ 1049 int[] ti = new int[16]; 1050 1051 /* 1052 * Precompute table of all 4-bit products of B 1053 */ 1054 long[] T0 = new long[bMax << 4]; 1055 int tOff = bMax; 1056 ti[1] = tOff; 1057 System.arraycopy(B.m_ints, 0, T0, tOff, bLen); 1058 for (int i = 2; i < 16; ++i) 1059 { 1060 ti[i] = (tOff += bMax); 1061 if ((i & 1) == 0) 1062 { 1063 shiftUp(T0, tOff >>> 1, T0, tOff, bMax, 1); 1064 } 1065 else 1066 { 1067 add(T0, bMax, T0, tOff - bMax, T0, tOff, bMax); 1068 } 1069 } 1070 1071 /* 1072 * Second table with all 4-bit products of B shifted 4 bits 1073 */ 1074 long[] T1 = new long[T0.length]; 1075 shiftUp(T0, 0, T1, 0, T0.length, 4); 1076 // shiftUp(T0, bMax, T1, bMax, tOff, 4); 1077 1078 long[] a = A.m_ints; 1079 long[] c = new long[cLen << 3]; 1080 1081 int MASK = 0xF; 1082 1083 /* 1084 * Lopez-Dahab (Modified) algorithm 1085 */ 1086 1087 for (int aPos = 0; aPos < aLen; ++aPos) 1088 { 1089 long aVal = a[aPos]; 1090 int cOff = aPos; 1091 for (;;) 1092 { 1093 int u = (int)aVal & MASK; 1094 aVal >>>= 4; 1095 int v = (int)aVal & MASK; 1096 addBoth(c, cOff, T0, ti[u], T1, ti[v], bMax); 1097 aVal >>>= 4; 1098 if (aVal == 0L) 1099 { 1100 break; 1101 } 1102 cOff += cLen; 1103 } 1104 } 1105 1106 { 1107 int cOff = c.length; 1108 while ((cOff -= cLen) != 0) 1109 { 1110 addShiftedUp(c, cOff - cLen, c, cOff, cLen, 8); 1111 } 1112 } 1113 1114 /* 1115 * Finally the raw answer is collected, reduce it against the reduction coefficients 1116 */ 1117 return reduceResult(c, 0, cLen, m, ks); 1118 } 1119 modMultiplyAlt(LongArray other, int m, int[] ks)1120 public LongArray modMultiplyAlt(LongArray other, int m, int[] ks) 1121 { 1122 /* 1123 * Find out the degree of each argument and handle the zero cases 1124 */ 1125 int aDeg = degree(); 1126 if (aDeg == 0) 1127 { 1128 return this; 1129 } 1130 int bDeg = other.degree(); 1131 if (bDeg == 0) 1132 { 1133 return other; 1134 } 1135 1136 /* 1137 * Swap if necessary so that A is the smaller argument 1138 */ 1139 LongArray A = this, B = other; 1140 if (aDeg > bDeg) 1141 { 1142 A = other; B = this; 1143 int tmp = aDeg; aDeg = bDeg; bDeg = tmp; 1144 } 1145 1146 /* 1147 * Establish the word lengths of the arguments and result 1148 */ 1149 int aLen = (aDeg + 63) >>> 6; 1150 int bLen = (bDeg + 63) >>> 6; 1151 int cLen = (aDeg + bDeg + 62) >>> 6; 1152 1153 if (aLen == 1) 1154 { 1155 long a0 = A.m_ints[0]; 1156 if (a0 == 1L) 1157 { 1158 return B; 1159 } 1160 1161 /* 1162 * Fast path for small A, with performance dependent only on the number of set bits 1163 */ 1164 long[] c0 = new long[cLen]; 1165 multiplyWord(a0, B.m_ints, bLen, c0, 0); 1166 1167 /* 1168 * Reduce the raw answer against the reduction coefficients 1169 */ 1170 return reduceResult(c0, 0, cLen, m, ks); 1171 } 1172 1173 // NOTE: This works, but is slower than width 4 processing 1174 // if (aLen == 2) 1175 // { 1176 // /* 1177 // * Use common-multiplicand optimization to save ~1/4 of the adds 1178 // */ 1179 // long a1 = A.m_ints[0], a2 = A.m_ints[1]; 1180 // long aa = a1 & a2; a1 ^= aa; a2 ^= aa; 1181 // 1182 // long[] b = B.m_ints; 1183 // long[] c = new long[cLen]; 1184 // multiplyWord(aa, b, bLen, c, 1); 1185 // add(c, 0, c, 1, cLen - 1); 1186 // multiplyWord(a1, b, bLen, c, 0); 1187 // multiplyWord(a2, b, bLen, c, 1); 1188 // 1189 // /* 1190 // * Reduce the raw answer against the reduction coefficients 1191 // */ 1192 // return reduceResult(c, 0, cLen, m, ks); 1193 // } 1194 1195 /* 1196 * Determine the parameters of the interleaved window algorithm: the 'width' in bits to 1197 * process together, the number of evaluation 'positions' implied by that width, and the 1198 * 'top' position at which the regular window algorithm stops. 1199 */ 1200 int width, positions, top, banks; 1201 1202 // NOTE: width 4 is the fastest over the entire range of sizes used in current crypto 1203 // width = 1; positions = 64; top = 64; banks = 4; 1204 // width = 2; positions = 32; top = 64; banks = 4; 1205 // width = 3; positions = 21; top = 63; banks = 3; 1206 width = 4; positions = 16; top = 64; banks = 8; 1207 // width = 5; positions = 13; top = 65; banks = 7; 1208 // width = 7; positions = 9; top = 63; banks = 9; 1209 // width = 8; positions = 8; top = 64; banks = 8; 1210 1211 /* 1212 * Determine if B will get bigger during shifting 1213 */ 1214 int shifts = top < 64 ? positions : positions - 1; 1215 int bMax = (bDeg + shifts + 63) >>> 6; 1216 1217 int bTotal = bMax * banks, stride = width * banks; 1218 1219 /* 1220 * Create a single temporary buffer, with an offset table to find the positions of things in it 1221 */ 1222 int[] ci = new int[1 << width]; 1223 int cTotal = aLen; 1224 { 1225 ci[0] = cTotal; 1226 cTotal += bTotal; 1227 ci[1] = cTotal; 1228 for (int i = 2; i < ci.length; ++i) 1229 { 1230 cTotal += cLen; 1231 ci[i] = cTotal; 1232 } 1233 cTotal += cLen; 1234 } 1235 // NOTE: Provide a safe dump for "high zeroes" since we are adding 'bMax' and not 'bLen' 1236 ++cTotal; 1237 1238 long[] c = new long[cTotal]; 1239 1240 // Prepare A in interleaved form, according to the chosen width 1241 interleave(A.m_ints, 0, c, 0, aLen, width); 1242 1243 // Make a working copy of B, since we will be shifting it 1244 { 1245 int bOff = aLen; 1246 System.arraycopy(B.m_ints, 0, c, bOff, bLen); 1247 for (int bank = 1; bank < banks; ++bank) 1248 { 1249 shiftUp(c, aLen, c, bOff += bMax, bMax, bank); 1250 } 1251 } 1252 1253 /* 1254 * The main loop analyzes the interleaved windows in A, and for each non-zero window 1255 * a single word-array XOR is performed to a carefully selected slice of 'c'. The loop is 1256 * breadth-first, checking the lowest window in each word, then looping again for the 1257 * next higher window position. 1258 */ 1259 int MASK = (1 << width) - 1; 1260 1261 int k = 0; 1262 for (;;) 1263 { 1264 int aPos = 0; 1265 do 1266 { 1267 long aVal = c[aPos] >>> k; 1268 int bank = 0, bOff = aLen; 1269 for (;;) 1270 { 1271 int index = (int)(aVal) & MASK; 1272 if (index != 0) 1273 { 1274 /* 1275 * Add to a 'c' buffer based on the bit-pattern of 'index'. Since A is in 1276 * interleaved form, the bits represent the current B shifted by 0, 'positions', 1277 * 'positions' * 2, ..., 'positions' * ('width' - 1) 1278 */ 1279 add(c, aPos + ci[index], c, bOff, bMax); 1280 } 1281 if (++bank == banks) 1282 { 1283 break; 1284 } 1285 bOff += bMax; 1286 aVal >>>= width; 1287 } 1288 } 1289 while (++aPos < aLen); 1290 1291 if ((k += stride) >= top) 1292 { 1293 if (k >= 64) 1294 { 1295 break; 1296 } 1297 1298 /* 1299 * Adjustment for window setups with top == 63, the final bit (if any) is processed 1300 * as the top-bit of a window 1301 */ 1302 k = 64 - width; 1303 MASK &= MASK << (top - k); 1304 } 1305 1306 /* 1307 * After each position has been checked for all words of A, B is shifted up 1 place 1308 */ 1309 shiftUp(c, aLen, bTotal, banks); 1310 } 1311 1312 int ciPos = ci.length; 1313 while (--ciPos > 1) 1314 { 1315 if ((ciPos & 1L) == 0L) 1316 { 1317 /* 1318 * For even numbers, shift contents and add to the half-position 1319 */ 1320 addShiftedUp(c, ci[ciPos >>> 1], c, ci[ciPos], cLen, positions); 1321 } 1322 else 1323 { 1324 /* 1325 * For odd numbers, 'distribute' contents to the result and the next-lowest position 1326 */ 1327 distribute(c, ci[ciPos], ci[ciPos - 1], ci[1], cLen); 1328 } 1329 } 1330 1331 /* 1332 * Finally the raw answer is collected, reduce it against the reduction coefficients 1333 */ 1334 return reduceResult(c, ci[1], cLen, m, ks); 1335 } 1336 modReduce(int m, int[] ks)1337 public LongArray modReduce(int m, int[] ks) 1338 { 1339 long[] buf = Arrays.clone(m_ints); 1340 int rLen = reduceInPlace(buf, 0, buf.length, m, ks); 1341 return new LongArray(buf, 0, rLen); 1342 } 1343 multiply(LongArray other, int m, int[] ks)1344 public LongArray multiply(LongArray other, int m, int[] ks) 1345 { 1346 /* 1347 * Find out the degree of each argument and handle the zero cases 1348 */ 1349 int aDeg = degree(); 1350 if (aDeg == 0) 1351 { 1352 return this; 1353 } 1354 int bDeg = other.degree(); 1355 if (bDeg == 0) 1356 { 1357 return other; 1358 } 1359 1360 /* 1361 * Swap if necessary so that A is the smaller argument 1362 */ 1363 LongArray A = this, B = other; 1364 if (aDeg > bDeg) 1365 { 1366 A = other; B = this; 1367 int tmp = aDeg; aDeg = bDeg; bDeg = tmp; 1368 } 1369 1370 /* 1371 * Establish the word lengths of the arguments and result 1372 */ 1373 int aLen = (aDeg + 63) >>> 6; 1374 int bLen = (bDeg + 63) >>> 6; 1375 int cLen = (aDeg + bDeg + 62) >>> 6; 1376 1377 if (aLen == 1) 1378 { 1379 long a0 = A.m_ints[0]; 1380 if (a0 == 1L) 1381 { 1382 return B; 1383 } 1384 1385 /* 1386 * Fast path for small A, with performance dependent only on the number of set bits 1387 */ 1388 long[] c0 = new long[cLen]; 1389 multiplyWord(a0, B.m_ints, bLen, c0, 0); 1390 1391 /* 1392 * Reduce the raw answer against the reduction coefficients 1393 */ 1394 // return reduceResult(c0, 0, cLen, m, ks); 1395 return new LongArray(c0, 0, cLen); 1396 } 1397 1398 /* 1399 * Determine if B will get bigger during shifting 1400 */ 1401 int bMax = (bDeg + 7 + 63) >>> 6; 1402 1403 /* 1404 * Lookup table for the offset of each B in the tables 1405 */ 1406 int[] ti = new int[16]; 1407 1408 /* 1409 * Precompute table of all 4-bit products of B 1410 */ 1411 long[] T0 = new long[bMax << 4]; 1412 int tOff = bMax; 1413 ti[1] = tOff; 1414 System.arraycopy(B.m_ints, 0, T0, tOff, bLen); 1415 for (int i = 2; i < 16; ++i) 1416 { 1417 ti[i] = (tOff += bMax); 1418 if ((i & 1) == 0) 1419 { 1420 shiftUp(T0, tOff >>> 1, T0, tOff, bMax, 1); 1421 } 1422 else 1423 { 1424 add(T0, bMax, T0, tOff - bMax, T0, tOff, bMax); 1425 } 1426 } 1427 1428 /* 1429 * Second table with all 4-bit products of B shifted 4 bits 1430 */ 1431 long[] T1 = new long[T0.length]; 1432 shiftUp(T0, 0, T1, 0, T0.length, 4); 1433 // shiftUp(T0, bMax, T1, bMax, tOff, 4); 1434 1435 long[] a = A.m_ints; 1436 long[] c = new long[cLen << 3]; 1437 1438 int MASK = 0xF; 1439 1440 /* 1441 * Lopez-Dahab (Modified) algorithm 1442 */ 1443 1444 for (int aPos = 0; aPos < aLen; ++aPos) 1445 { 1446 long aVal = a[aPos]; 1447 int cOff = aPos; 1448 for (;;) 1449 { 1450 int u = (int)aVal & MASK; 1451 aVal >>>= 4; 1452 int v = (int)aVal & MASK; 1453 addBoth(c, cOff, T0, ti[u], T1, ti[v], bMax); 1454 aVal >>>= 4; 1455 if (aVal == 0L) 1456 { 1457 break; 1458 } 1459 cOff += cLen; 1460 } 1461 } 1462 1463 { 1464 int cOff = c.length; 1465 while ((cOff -= cLen) != 0) 1466 { 1467 addShiftedUp(c, cOff - cLen, c, cOff, cLen, 8); 1468 } 1469 } 1470 1471 /* 1472 * Finally the raw answer is collected, reduce it against the reduction coefficients 1473 */ 1474 // return reduceResult(c, 0, cLen, m, ks); 1475 return new LongArray(c, 0, cLen); 1476 } 1477 reduce(int m, int[] ks)1478 public void reduce(int m, int[] ks) 1479 { 1480 long[] buf = m_ints; 1481 int rLen = reduceInPlace(buf, 0, buf.length, m, ks); 1482 if (rLen < buf.length) 1483 { 1484 m_ints = new long[rLen]; 1485 System.arraycopy(buf, 0, m_ints, 0, rLen); 1486 } 1487 } 1488 reduceResult(long[] buf, int off, int len, int m, int[] ks)1489 private static LongArray reduceResult(long[] buf, int off, int len, int m, int[] ks) 1490 { 1491 int rLen = reduceInPlace(buf, off, len, m, ks); 1492 return new LongArray(buf, off, rLen); 1493 } 1494 1495 // private static void deInterleave(long[] x, int xOff, long[] z, int zOff, int count, int rounds) 1496 // { 1497 // for (int i = 0; i < count; ++i) 1498 // { 1499 // z[zOff + i] = deInterleave(x[zOff + i], rounds); 1500 // } 1501 // } 1502 // 1503 // private static long deInterleave(long x, int rounds) 1504 // { 1505 // while (--rounds >= 0) 1506 // { 1507 // x = deInterleave32(x & DEINTERLEAVE_MASK) | (deInterleave32((x >>> 1) & DEINTERLEAVE_MASK) << 32); 1508 // } 1509 // return x; 1510 // } 1511 // 1512 // private static long deInterleave32(long x) 1513 // { 1514 // x = (x | (x >>> 1)) & 0x3333333333333333L; 1515 // x = (x | (x >>> 2)) & 0x0F0F0F0F0F0F0F0FL; 1516 // x = (x | (x >>> 4)) & 0x00FF00FF00FF00FFL; 1517 // x = (x | (x >>> 8)) & 0x0000FFFF0000FFFFL; 1518 // x = (x | (x >>> 16)) & 0x00000000FFFFFFFFL; 1519 // return x; 1520 // } 1521 reduceInPlace(long[] buf, int off, int len, int m, int[] ks)1522 private static int reduceInPlace(long[] buf, int off, int len, int m, int[] ks) 1523 { 1524 int mLen = (m + 63) >>> 6; 1525 if (len < mLen) 1526 { 1527 return len; 1528 } 1529 1530 int numBits = Math.min(len << 6, (m << 1) - 1); // TODO use actual degree? 1531 int excessBits = (len << 6) - numBits; 1532 while (excessBits >= 64) 1533 { 1534 --len; 1535 excessBits -= 64; 1536 } 1537 1538 int kLen = ks.length, kMax = ks[kLen - 1], kNext = kLen > 1 ? ks[kLen - 2] : 0; 1539 int wordWiseLimit = Math.max(m, kMax + 64); 1540 int vectorableWords = (excessBits + Math.min(numBits - wordWiseLimit, m - kNext)) >> 6; 1541 if (vectorableWords > 1) 1542 { 1543 int vectorWiseWords = len - vectorableWords; 1544 reduceVectorWise(buf, off, len, vectorWiseWords, m, ks); 1545 while (len > vectorWiseWords) 1546 { 1547 buf[off + --len] = 0L; 1548 } 1549 numBits = vectorWiseWords << 6; 1550 } 1551 1552 if (numBits > wordWiseLimit) 1553 { 1554 reduceWordWise(buf, off, len, wordWiseLimit, m, ks); 1555 numBits = wordWiseLimit; 1556 } 1557 1558 if (numBits > m) 1559 { 1560 reduceBitWise(buf, off, numBits, m, ks); 1561 } 1562 1563 return mLen; 1564 } 1565 reduceBitWise(long[] buf, int off, int bitlength, int m, int[] ks)1566 private static void reduceBitWise(long[] buf, int off, int bitlength, int m, int[] ks) 1567 { 1568 while (--bitlength >= m) 1569 { 1570 if (testBit(buf, off, bitlength)) 1571 { 1572 reduceBit(buf, off, bitlength, m, ks); 1573 } 1574 } 1575 } 1576 reduceBit(long[] buf, int off, int bit, int m, int[] ks)1577 private static void reduceBit(long[] buf, int off, int bit, int m, int[] ks) 1578 { 1579 flipBit(buf, off, bit); 1580 int n = bit - m; 1581 int j = ks.length; 1582 while (--j >= 0) 1583 { 1584 flipBit(buf, off, ks[j] + n); 1585 } 1586 flipBit(buf, off, n); 1587 } 1588 reduceWordWise(long[] buf, int off, int len, int toBit, int m, int[] ks)1589 private static void reduceWordWise(long[] buf, int off, int len, int toBit, int m, int[] ks) 1590 { 1591 int toPos = toBit >>> 6; 1592 1593 while (--len > toPos) 1594 { 1595 long word = buf[off + len]; 1596 if (word != 0) 1597 { 1598 buf[off + len] = 0; 1599 reduceWord(buf, off, (len << 6), word, m, ks); 1600 } 1601 } 1602 1603 { 1604 int partial = toBit & 0x3F; 1605 long word = buf[off + toPos] >>> partial; 1606 if (word != 0) 1607 { 1608 buf[off + toPos] ^= word << partial; 1609 reduceWord(buf, off, toBit, word, m, ks); 1610 } 1611 } 1612 } 1613 reduceWord(long[] buf, int off, int bit, long word, int m, int[] ks)1614 private static void reduceWord(long[] buf, int off, int bit, long word, int m, int[] ks) 1615 { 1616 int offset = bit - m; 1617 int j = ks.length; 1618 while (--j >= 0) 1619 { 1620 flipWord(buf, off, offset + ks[j], word); 1621 } 1622 flipWord(buf, off, offset, word); 1623 } 1624 reduceVectorWise(long[] buf, int off, int len, int words, int m, int[] ks)1625 private static void reduceVectorWise(long[] buf, int off, int len, int words, int m, int[] ks) 1626 { 1627 /* 1628 * NOTE: It's important we go from highest coefficient to lowest, because for the highest 1629 * one (only) we allow the ranges to partially overlap, and therefore any changes must take 1630 * effect for the subsequent lower coefficients. 1631 */ 1632 int baseBit = (words << 6) - m; 1633 int j = ks.length; 1634 while (--j >= 0) 1635 { 1636 flipVector(buf, off, buf, off + words, len - words, baseBit + ks[j]); 1637 } 1638 flipVector(buf, off, buf, off + words, len - words, baseBit); 1639 } 1640 flipVector(long[] x, int xOff, long[] y, int yOff, int yLen, int bits)1641 private static void flipVector(long[] x, int xOff, long[] y, int yOff, int yLen, int bits) 1642 { 1643 xOff += bits >>> 6; 1644 bits &= 0x3F; 1645 1646 if (bits == 0) 1647 { 1648 add(x, xOff, y, yOff, yLen); 1649 } 1650 else 1651 { 1652 long carry = addShiftedDown(x, xOff + 1, y, yOff, yLen, 64 - bits); 1653 x[xOff] ^= carry; 1654 } 1655 } 1656 modSquare(int m, int[] ks)1657 public LongArray modSquare(int m, int[] ks) 1658 { 1659 int len = getUsedLength(); 1660 if (len == 0) 1661 { 1662 return this; 1663 } 1664 1665 int _2len = len << 1; 1666 long[] r = new long[_2len]; 1667 1668 int pos = 0; 1669 while (pos < _2len) 1670 { 1671 long mi = m_ints[pos >>> 1]; 1672 r[pos++] = interleave2_32to64((int)mi); 1673 r[pos++] = interleave2_32to64((int)(mi >>> 32)); 1674 } 1675 1676 return new LongArray(r, 0, reduceInPlace(r, 0, r.length, m, ks)); 1677 } 1678 modSquareN(int n, int m, int[] ks)1679 public LongArray modSquareN(int n, int m, int[] ks) 1680 { 1681 int len = getUsedLength(); 1682 if (len == 0) 1683 { 1684 return this; 1685 } 1686 1687 int mLen = (m + 63) >>> 6; 1688 long[] r = new long[mLen << 1]; 1689 System.arraycopy(m_ints, 0, r, 0, len); 1690 1691 while (--n >= 0) 1692 { 1693 squareInPlace(r, len, m, ks); 1694 len = reduceInPlace(r, 0, r.length, m, ks); 1695 } 1696 1697 return new LongArray(r, 0, len); 1698 } 1699 square(int m, int[] ks)1700 public LongArray square(int m, int[] ks) 1701 { 1702 int len = getUsedLength(); 1703 if (len == 0) 1704 { 1705 return this; 1706 } 1707 1708 int _2len = len << 1; 1709 long[] r = new long[_2len]; 1710 1711 int pos = 0; 1712 while (pos < _2len) 1713 { 1714 long mi = m_ints[pos >>> 1]; 1715 r[pos++] = interleave2_32to64((int)mi); 1716 r[pos++] = interleave2_32to64((int)(mi >>> 32)); 1717 } 1718 1719 return new LongArray(r, 0, r.length); 1720 } 1721 squareInPlace(long[] x, int xLen, int m, int[] ks)1722 private static void squareInPlace(long[] x, int xLen, int m, int[] ks) 1723 { 1724 int pos = xLen << 1; 1725 while (--xLen >= 0) 1726 { 1727 long xVal = x[xLen]; 1728 x[--pos] = interleave2_32to64((int)(xVal >>> 32)); 1729 x[--pos] = interleave2_32to64((int)xVal); 1730 } 1731 } 1732 interleave(long[] x, int xOff, long[] z, int zOff, int count, int width)1733 private static void interleave(long[] x, int xOff, long[] z, int zOff, int count, int width) 1734 { 1735 switch (width) 1736 { 1737 case 3: 1738 interleave3(x, xOff, z, zOff, count); 1739 break; 1740 case 5: 1741 interleave5(x, xOff, z, zOff, count); 1742 break; 1743 case 7: 1744 interleave7(x, xOff, z, zOff, count); 1745 break; 1746 default: 1747 interleave2_n(x, xOff, z, zOff, count, bitLengths[width] - 1); 1748 break; 1749 } 1750 } 1751 interleave3(long[] x, int xOff, long[] z, int zOff, int count)1752 private static void interleave3(long[] x, int xOff, long[] z, int zOff, int count) 1753 { 1754 for (int i = 0; i < count; ++i) 1755 { 1756 z[zOff + i] = interleave3(x[xOff + i]); 1757 } 1758 } 1759 interleave3(long x)1760 private static long interleave3(long x) 1761 { 1762 long z = x & (1L << 63); 1763 return z 1764 | interleave3_21to63((int)x & 0x1FFFFF) 1765 | interleave3_21to63((int)(x >>> 21) & 0x1FFFFF) << 1 1766 | interleave3_21to63((int)(x >>> 42) & 0x1FFFFF) << 2; 1767 1768 // int zPos = 0, wPos = 0, xPos = 0; 1769 // for (;;) 1770 // { 1771 // z |= ((x >>> xPos) & 1L) << zPos; 1772 // if (++zPos == 63) 1773 // { 1774 // String sz2 = Long.toBinaryString(z); 1775 // return z; 1776 // } 1777 // if ((xPos += 21) >= 63) 1778 // { 1779 // xPos = ++wPos; 1780 // } 1781 // } 1782 } 1783 interleave3_21to63(int x)1784 private static long interleave3_21to63(int x) 1785 { 1786 int r00 = INTERLEAVE3_TABLE[x & 0x7F]; 1787 int r21 = INTERLEAVE3_TABLE[(x >>> 7) & 0x7F]; 1788 int r42 = INTERLEAVE3_TABLE[x >>> 14]; 1789 return (r42 & 0xFFFFFFFFL) << 42 | (r21 & 0xFFFFFFFFL) << 21 | (r00 & 0xFFFFFFFFL); 1790 } 1791 interleave5(long[] x, int xOff, long[] z, int zOff, int count)1792 private static void interleave5(long[] x, int xOff, long[] z, int zOff, int count) 1793 { 1794 for (int i = 0; i < count; ++i) 1795 { 1796 z[zOff + i] = interleave5(x[xOff + i]); 1797 } 1798 } 1799 interleave5(long x)1800 private static long interleave5(long x) 1801 { 1802 return interleave3_13to65((int)x & 0x1FFF) 1803 | interleave3_13to65((int)(x >>> 13) & 0x1FFF) << 1 1804 | interleave3_13to65((int)(x >>> 26) & 0x1FFF) << 2 1805 | interleave3_13to65((int)(x >>> 39) & 0x1FFF) << 3 1806 | interleave3_13to65((int)(x >>> 52) & 0x1FFF) << 4; 1807 1808 // long z = 0; 1809 // int zPos = 0, wPos = 0, xPos = 0; 1810 // for (;;) 1811 // { 1812 // z |= ((x >>> xPos) & 1L) << zPos; 1813 // if (++zPos == 64) 1814 // { 1815 // return z; 1816 // } 1817 // if ((xPos += 13) >= 64) 1818 // { 1819 // xPos = ++wPos; 1820 // } 1821 // } 1822 } 1823 interleave3_13to65(int x)1824 private static long interleave3_13to65(int x) 1825 { 1826 int r00 = INTERLEAVE5_TABLE[x & 0x7F]; 1827 int r35 = INTERLEAVE5_TABLE[x >>> 7]; 1828 return (r35 & 0xFFFFFFFFL) << 35 | (r00 & 0xFFFFFFFFL); 1829 } 1830 interleave7(long[] x, int xOff, long[] z, int zOff, int count)1831 private static void interleave7(long[] x, int xOff, long[] z, int zOff, int count) 1832 { 1833 for (int i = 0; i < count; ++i) 1834 { 1835 z[zOff + i] = interleave7(x[xOff + i]); 1836 } 1837 } 1838 interleave7(long x)1839 private static long interleave7(long x) 1840 { 1841 long z = x & (1L << 63); 1842 return z 1843 | INTERLEAVE7_TABLE[(int)x & 0x1FF] 1844 | INTERLEAVE7_TABLE[(int)(x >>> 9) & 0x1FF] << 1 1845 | INTERLEAVE7_TABLE[(int)(x >>> 18) & 0x1FF] << 2 1846 | INTERLEAVE7_TABLE[(int)(x >>> 27) & 0x1FF] << 3 1847 | INTERLEAVE7_TABLE[(int)(x >>> 36) & 0x1FF] << 4 1848 | INTERLEAVE7_TABLE[(int)(x >>> 45) & 0x1FF] << 5 1849 | INTERLEAVE7_TABLE[(int)(x >>> 54) & 0x1FF] << 6; 1850 1851 // int zPos = 0, wPos = 0, xPos = 0; 1852 // for (;;) 1853 // { 1854 // z |= ((x >>> xPos) & 1L) << zPos; 1855 // if (++zPos == 63) 1856 // { 1857 // return z; 1858 // } 1859 // if ((xPos += 9) >= 63) 1860 // { 1861 // xPos = ++wPos; 1862 // } 1863 // } 1864 } 1865 interleave2_n(long[] x, int xOff, long[] z, int zOff, int count, int rounds)1866 private static void interleave2_n(long[] x, int xOff, long[] z, int zOff, int count, int rounds) 1867 { 1868 for (int i = 0; i < count; ++i) 1869 { 1870 z[zOff + i] = interleave2_n(x[xOff + i], rounds); 1871 } 1872 } 1873 interleave2_n(long x, int rounds)1874 private static long interleave2_n(long x, int rounds) 1875 { 1876 while (rounds > 1) 1877 { 1878 rounds -= 2; 1879 x = interleave4_16to64((int)x & 0xFFFF) 1880 | interleave4_16to64((int)(x >>> 16) & 0xFFFF) << 1 1881 | interleave4_16to64((int)(x >>> 32) & 0xFFFF) << 2 1882 | interleave4_16to64((int)(x >>> 48) & 0xFFFF) << 3; 1883 } 1884 if (rounds > 0) 1885 { 1886 x = interleave2_32to64((int)x) | interleave2_32to64((int)(x >>> 32)) << 1; 1887 } 1888 return x; 1889 } 1890 interleave4_16to64(int x)1891 private static long interleave4_16to64(int x) 1892 { 1893 int r00 = INTERLEAVE4_TABLE[x & 0xFF]; 1894 int r32 = INTERLEAVE4_TABLE[x >>> 8]; 1895 return (r32 & 0xFFFFFFFFL) << 32 | (r00 & 0xFFFFFFFFL); 1896 } 1897 interleave2_32to64(int x)1898 private static long interleave2_32to64(int x) 1899 { 1900 int r00 = INTERLEAVE2_TABLE[x & 0xFF] | INTERLEAVE2_TABLE[(x >>> 8) & 0xFF] << 16; 1901 int r32 = INTERLEAVE2_TABLE[(x >>> 16) & 0xFF] | INTERLEAVE2_TABLE[x >>> 24] << 16; 1902 return (r32 & 0xFFFFFFFFL) << 32 | (r00 & 0xFFFFFFFFL); 1903 } 1904 1905 // private static LongArray expItohTsujii2(LongArray B, int n, int m, int[] ks) 1906 // { 1907 // LongArray t1 = B, t3 = new LongArray(new long[]{ 1L }); 1908 // int scale = 1; 1909 // 1910 // int numTerms = n; 1911 // while (numTerms > 1) 1912 // { 1913 // if ((numTerms & 1) != 0) 1914 // { 1915 // t3 = t3.modMultiply(t1, m, ks); 1916 // t1 = t1.modSquareN(scale, m, ks); 1917 // } 1918 // 1919 // LongArray t2 = t1.modSquareN(scale, m, ks); 1920 // t1 = t1.modMultiply(t2, m, ks); 1921 // numTerms >>>= 1; scale <<= 1; 1922 // } 1923 // 1924 // return t3.modMultiply(t1, m, ks); 1925 // } 1926 // 1927 // private static LongArray expItohTsujii23(LongArray B, int n, int m, int[] ks) 1928 // { 1929 // LongArray t1 = B, t3 = new LongArray(new long[]{ 1L }); 1930 // int scale = 1; 1931 // 1932 // int numTerms = n; 1933 // while (numTerms > 1) 1934 // { 1935 // boolean m03 = numTerms % 3 == 0; 1936 // boolean m14 = !m03 && (numTerms & 1) != 0; 1937 // 1938 // if (m14) 1939 // { 1940 // t3 = t3.modMultiply(t1, m, ks); 1941 // t1 = t1.modSquareN(scale, m, ks); 1942 // } 1943 // 1944 // LongArray t2 = t1.modSquareN(scale, m, ks); 1945 // t1 = t1.modMultiply(t2, m, ks); 1946 // 1947 // if (m03) 1948 // { 1949 // t2 = t2.modSquareN(scale, m, ks); 1950 // t1 = t1.modMultiply(t2, m, ks); 1951 // numTerms /= 3; scale *= 3; 1952 // } 1953 // else 1954 // { 1955 // numTerms >>>= 1; scale <<= 1; 1956 // } 1957 // } 1958 // 1959 // return t3.modMultiply(t1, m, ks); 1960 // } 1961 // 1962 // private static LongArray expItohTsujii235(LongArray B, int n, int m, int[] ks) 1963 // { 1964 // LongArray t1 = B, t4 = new LongArray(new long[]{ 1L }); 1965 // int scale = 1; 1966 // 1967 // int numTerms = n; 1968 // while (numTerms > 1) 1969 // { 1970 // if (numTerms % 5 == 0) 1971 // { 1972 //// t1 = expItohTsujii23(t1, 5, m, ks); 1973 // 1974 // LongArray t3 = t1; 1975 // t1 = t1.modSquareN(scale, m, ks); 1976 // 1977 // LongArray t2 = t1.modSquareN(scale, m, ks); 1978 // t1 = t1.modMultiply(t2, m, ks); 1979 // t2 = t1.modSquareN(scale << 1, m, ks); 1980 // t1 = t1.modMultiply(t2, m, ks); 1981 // 1982 // t1 = t1.modMultiply(t3, m, ks); 1983 // 1984 // numTerms /= 5; scale *= 5; 1985 // continue; 1986 // } 1987 // 1988 // boolean m03 = numTerms % 3 == 0; 1989 // boolean m14 = !m03 && (numTerms & 1) != 0; 1990 // 1991 // if (m14) 1992 // { 1993 // t4 = t4.modMultiply(t1, m, ks); 1994 // t1 = t1.modSquareN(scale, m, ks); 1995 // } 1996 // 1997 // LongArray t2 = t1.modSquareN(scale, m, ks); 1998 // t1 = t1.modMultiply(t2, m, ks); 1999 // 2000 // if (m03) 2001 // { 2002 // t2 = t2.modSquareN(scale, m, ks); 2003 // t1 = t1.modMultiply(t2, m, ks); 2004 // numTerms /= 3; scale *= 3; 2005 // } 2006 // else 2007 // { 2008 // numTerms >>>= 1; scale <<= 1; 2009 // } 2010 // } 2011 // 2012 // return t4.modMultiply(t1, m, ks); 2013 // } 2014 modInverse(int m, int[] ks)2015 public LongArray modInverse(int m, int[] ks) 2016 { 2017 /* 2018 * Fermat's Little Theorem 2019 */ 2020 // LongArray A = this; 2021 // LongArray B = A.modSquare(m, ks); 2022 // LongArray R0 = B, R1 = B; 2023 // for (int i = 2; i < m; ++i) 2024 // { 2025 // R1 = R1.modSquare(m, ks); 2026 // R0 = R0.modMultiply(R1, m, ks); 2027 // } 2028 // 2029 // return R0; 2030 2031 /* 2032 * Itoh-Tsujii 2033 */ 2034 // LongArray B = modSquare(m, ks); 2035 // switch (m) 2036 // { 2037 // case 409: 2038 // return expItohTsujii23(B, m - 1, m, ks); 2039 // case 571: 2040 // return expItohTsujii235(B, m - 1, m, ks); 2041 // case 163: 2042 // case 233: 2043 // case 283: 2044 // default: 2045 // return expItohTsujii2(B, m - 1, m, ks); 2046 // } 2047 2048 /* 2049 * Inversion in F2m using the extended Euclidean algorithm 2050 * 2051 * Input: A nonzero polynomial a(z) of degree at most m-1 2052 * Output: a(z)^(-1) mod f(z) 2053 */ 2054 int uzDegree = degree(); 2055 if (uzDegree == 0) 2056 { 2057 throw new IllegalStateException(); 2058 } 2059 if (uzDegree == 1) 2060 { 2061 return this; 2062 } 2063 2064 // u(z) := a(z) 2065 LongArray uz = (LongArray)clone(); 2066 2067 int t = (m + 63) >>> 6; 2068 2069 // v(z) := f(z) 2070 LongArray vz = new LongArray(t); 2071 reduceBit(vz.m_ints, 0, m, m, ks); 2072 2073 // g1(z) := 1, g2(z) := 0 2074 LongArray g1z = new LongArray(t); 2075 g1z.m_ints[0] = 1L; 2076 LongArray g2z = new LongArray(t); 2077 2078 int[] uvDeg = new int[]{ uzDegree, m + 1 }; 2079 LongArray[] uv = new LongArray[]{ uz, vz }; 2080 2081 int[] ggDeg = new int[]{ 1, 0 }; 2082 LongArray[] gg = new LongArray[]{ g1z, g2z }; 2083 2084 int b = 1; 2085 int duv1 = uvDeg[b]; 2086 int dgg1 = ggDeg[b]; 2087 int j = duv1 - uvDeg[1 - b]; 2088 2089 for (;;) 2090 { 2091 if (j < 0) 2092 { 2093 j = -j; 2094 uvDeg[b] = duv1; 2095 ggDeg[b] = dgg1; 2096 b = 1 - b; 2097 duv1 = uvDeg[b]; 2098 dgg1 = ggDeg[b]; 2099 } 2100 2101 uv[b].addShiftedByBitsSafe(uv[1 - b], uvDeg[1 - b], j); 2102 2103 int duv2 = uv[b].degreeFrom(duv1); 2104 if (duv2 == 0) 2105 { 2106 return gg[1 - b]; 2107 } 2108 2109 { 2110 int dgg2 = ggDeg[1 - b]; 2111 gg[b].addShiftedByBitsSafe(gg[1 - b], dgg2, j); 2112 dgg2 += j; 2113 2114 if (dgg2 > dgg1) 2115 { 2116 dgg1 = dgg2; 2117 } 2118 else if (dgg2 == dgg1) 2119 { 2120 dgg1 = gg[b].degreeFrom(dgg1); 2121 } 2122 } 2123 2124 j += (duv2 - duv1); 2125 duv1 = duv2; 2126 } 2127 } 2128 equals(Object o)2129 public boolean equals(Object o) 2130 { 2131 if (!(o instanceof LongArray)) 2132 { 2133 return false; 2134 } 2135 LongArray other = (LongArray) o; 2136 int usedLen = getUsedLength(); 2137 if (other.getUsedLength() != usedLen) 2138 { 2139 return false; 2140 } 2141 for (int i = 0; i < usedLen; i++) 2142 { 2143 if (m_ints[i] != other.m_ints[i]) 2144 { 2145 return false; 2146 } 2147 } 2148 return true; 2149 } 2150 hashCode()2151 public int hashCode() 2152 { 2153 int usedLen = getUsedLength(); 2154 int hash = 1; 2155 for (int i = 0; i < usedLen; i++) 2156 { 2157 long mi = m_ints[i]; 2158 hash *= 31; 2159 hash ^= (int)mi; 2160 hash *= 31; 2161 hash ^= (int)(mi >>> 32); 2162 } 2163 return hash; 2164 } 2165 clone()2166 public Object clone() 2167 { 2168 return new LongArray(Arrays.clone(m_ints)); 2169 } 2170 toString()2171 public String toString() 2172 { 2173 int i = getUsedLength(); 2174 if (i == 0) 2175 { 2176 return "0"; 2177 } 2178 2179 StringBuffer sb = new StringBuffer(Long.toBinaryString(m_ints[--i])); 2180 while (--i >= 0) 2181 { 2182 String s = Long.toBinaryString(m_ints[i]); 2183 2184 // Add leading zeroes, except for highest significant word 2185 int len = s.length(); 2186 if (len < 64) 2187 { 2188 sb.append(ZEROES.substring(len)); 2189 } 2190 2191 sb.append(s); 2192 } 2193 return sb.toString(); 2194 } 2195 }