Lines Matching refs:b
27 movi v19.16b, #0xe1
29 ext v3.16b, v17.16b, v17.16b, #8
32 ext v16.16b, v18.16b, v19.16b, #8 // t0=0xc2....01
35 and v18.16b, v18.16b, v16.16b
37 ext v18.16b, v18.16b, v18.16b, #8
38 and v16.16b, v16.16b, v17.16b
39 orr v3.16b, v3.16b, v18.16b // H<<<=1
40 eor v5.16b, v3.16b, v16.16b // twisted H
51 ld1 {v3.16b}, [x0] // load Xi
57 rev64 v3.16b, v3.16b // byteswap Xi
58 ext v3.16b, v3.16b, v3.16b, #8
59 eor v7.8b, v5.8b, v6.8b // Karatsuba pre-processing
62 b Lgmult_neon
71 ld1 {v0.16b}, [x0] // load Xi
77 rev64 v0.16b, v0.16b // byteswap Xi
78 ext v0.16b, v0.16b, v0.16b, #8
79 eor v7.8b, v5.8b, v6.8b // Karatsuba pre-processing
82 ld1 {v3.16b}, [x2], #16 // load inp
83 rev64 v3.16b, v3.16b // byteswap inp
84 ext v3.16b, v3.16b, v3.16b, #8
85 eor v3.16b, v3.16b, v0.16b // inp ^= Xi
91 ext v16.8b, v5.8b, v5.8b, #1 // A1
92 pmull v16.8h, v16.8b, v3.8b // F = A1*B
93 ext v0.8b, v3.8b, v3.8b, #1 // B1
94 pmull v0.8h, v5.8b, v0.8b // E = A*B1
95 ext v17.8b, v5.8b, v5.8b, #2 // A2
96 pmull v17.8h, v17.8b, v3.8b // H = A2*B
97 ext v19.8b, v3.8b, v3.8b, #2 // B2
98 pmull v19.8h, v5.8b, v19.8b // G = A*B2
99 ext v18.8b, v5.8b, v5.8b, #3 // A3
100 eor v16.16b, v16.16b, v0.16b // L = E + F
101 pmull v18.8h, v18.8b, v3.8b // J = A3*B
102 ext v0.8b, v3.8b, v3.8b, #3 // B3
103 eor v17.16b, v17.16b, v19.16b // M = G + H
104 pmull v0.8h, v5.8b, v0.8b // I = A*B3
129 ext v19.8b, v3.8b, v3.8b, #4 // B4
130 eor v18.16b, v18.16b, v0.16b // N = I + J
131 pmull v19.8h, v5.8b, v19.8b // K = A*B4
139 eor v20.16b, v20.16b, v21.16b
140 eor v22.16b, v22.16b, v23.16b
141 and v21.16b, v21.16b, v24.16b
142 and v23.16b, v23.16b, v25.16b
143 eor v20.16b, v20.16b, v21.16b
144 eor v22.16b, v22.16b, v23.16b
150 ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8
151 ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16
152 pmull v0.8h, v5.8b, v3.8b // D = A*B
153 ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32
154 ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24
155 eor v16.16b, v16.16b, v17.16b
156 eor v18.16b, v18.16b, v19.16b
157 eor v0.16b, v0.16b, v16.16b
158 eor v0.16b, v0.16b, v18.16b
159 eor v3.8b, v3.8b, v4.8b // Karatsuba pre-processing
160 ext v16.8b, v7.8b, v7.8b, #1 // A1
161 pmull v16.8h, v16.8b, v3.8b // F = A1*B
162 ext v1.8b, v3.8b, v3.8b, #1 // B1
163 pmull v1.8h, v7.8b, v1.8b // E = A*B1
164 ext v17.8b, v7.8b, v7.8b, #2 // A2
165 pmull v17.8h, v17.8b, v3.8b // H = A2*B
166 ext v19.8b, v3.8b, v3.8b, #2 // B2
167 pmull v19.8h, v7.8b, v19.8b // G = A*B2
168 ext v18.8b, v7.8b, v7.8b, #3 // A3
169 eor v16.16b, v16.16b, v1.16b // L = E + F
170 pmull v18.8h, v18.8b, v3.8b // J = A3*B
171 ext v1.8b, v3.8b, v3.8b, #3 // B3
172 eor v17.16b, v17.16b, v19.16b // M = G + H
173 pmull v1.8h, v7.8b, v1.8b // I = A*B3
198 ext v19.8b, v3.8b, v3.8b, #4 // B4
199 eor v18.16b, v18.16b, v1.16b // N = I + J
200 pmull v19.8h, v7.8b, v19.8b // K = A*B4
208 eor v20.16b, v20.16b, v21.16b
209 eor v22.16b, v22.16b, v23.16b
210 and v21.16b, v21.16b, v24.16b
211 and v23.16b, v23.16b, v25.16b
212 eor v20.16b, v20.16b, v21.16b
213 eor v22.16b, v22.16b, v23.16b
219 ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8
220 ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16
221 pmull v1.8h, v7.8b, v3.8b // D = A*B
222 ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32
223 ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24
224 eor v16.16b, v16.16b, v17.16b
225 eor v18.16b, v18.16b, v19.16b
226 eor v1.16b, v1.16b, v16.16b
227 eor v1.16b, v1.16b, v18.16b
228 ext v16.8b, v6.8b, v6.8b, #1 // A1
229 pmull v16.8h, v16.8b, v4.8b // F = A1*B
230 ext v2.8b, v4.8b, v4.8b, #1 // B1
231 pmull v2.8h, v6.8b, v2.8b // E = A*B1
232 ext v17.8b, v6.8b, v6.8b, #2 // A2
233 pmull v17.8h, v17.8b, v4.8b // H = A2*B
234 ext v19.8b, v4.8b, v4.8b, #2 // B2
235 pmull v19.8h, v6.8b, v19.8b // G = A*B2
236 ext v18.8b, v6.8b, v6.8b, #3 // A3
237 eor v16.16b, v16.16b, v2.16b // L = E + F
238 pmull v18.8h, v18.8b, v4.8b // J = A3*B
239 ext v2.8b, v4.8b, v4.8b, #3 // B3
240 eor v17.16b, v17.16b, v19.16b // M = G + H
241 pmull v2.8h, v6.8b, v2.8b // I = A*B3
266 ext v19.8b, v4.8b, v4.8b, #4 // B4
267 eor v18.16b, v18.16b, v2.16b // N = I + J
268 pmull v19.8h, v6.8b, v19.8b // K = A*B4
276 eor v20.16b, v20.16b, v21.16b
277 eor v22.16b, v22.16b, v23.16b
278 and v21.16b, v21.16b, v24.16b
279 and v23.16b, v23.16b, v25.16b
280 eor v20.16b, v20.16b, v21.16b
281 eor v22.16b, v22.16b, v23.16b
287 ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8
288 ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16
289 pmull v2.8h, v6.8b, v4.8b // D = A*B
290 ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32
291 ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24
292 eor v16.16b, v16.16b, v17.16b
293 eor v18.16b, v18.16b, v19.16b
294 eor v2.16b, v2.16b, v16.16b
295 eor v2.16b, v2.16b, v18.16b
296 ext v16.16b, v0.16b, v2.16b, #8
297 eor v1.16b, v1.16b, v0.16b // Karatsuba post-processing
298 eor v1.16b, v1.16b, v2.16b
299 eor v1.16b, v1.16b, v16.16b // Xm overlaps Xh.lo and Xl.hi
307 eor v18.16b, v18.16b, v17.16b //
309 eor v18.16b, v18.16b, v17.16b //
311 eor v18.16b, v18.16b, v1.16b
316 eor v2.16b, v2.16b,v0.16b
317 eor v0.16b, v0.16b,v18.16b //
320 eor v0.16b, v0.16b, v2.16b //
321 eor v0.16b, v0.16b, v18.16b //
326 rev64 v0.16b, v0.16b // byteswap Xi and write
327 ext v0.16b, v0.16b, v0.16b, #8
328 st1 {v0.16b}, [x0]