Lines Matching refs:b
28 movi v19.16b, #0xe1
30 ext v3.16b, v17.16b, v17.16b, #8
33 ext v16.16b, v18.16b, v19.16b, #8 // t0=0xc2....01
36 and v18.16b, v18.16b, v16.16b
38 ext v18.16b, v18.16b, v18.16b, #8
39 and v16.16b, v16.16b, v17.16b
40 orr v3.16b, v3.16b, v18.16b // H<<<=1
41 eor v5.16b, v3.16b, v16.16b // twisted H
52 ld1 {v3.16b}, [x0] // load Xi
58 rev64 v3.16b, v3.16b // byteswap Xi
59 ext v3.16b, v3.16b, v3.16b, #8
60 eor v7.8b, v5.8b, v6.8b // Karatsuba pre-processing
63 b .Lgmult_neon
72 ld1 {v0.16b}, [x0] // load Xi
78 rev64 v0.16b, v0.16b // byteswap Xi
79 ext v0.16b, v0.16b, v0.16b, #8
80 eor v7.8b, v5.8b, v6.8b // Karatsuba pre-processing
83 ld1 {v3.16b}, [x2], #16 // load inp
84 rev64 v3.16b, v3.16b // byteswap inp
85 ext v3.16b, v3.16b, v3.16b, #8
86 eor v3.16b, v3.16b, v0.16b // inp ^= Xi
92 ext v16.8b, v5.8b, v5.8b, #1 // A1
93 pmull v16.8h, v16.8b, v3.8b // F = A1*B
94 ext v0.8b, v3.8b, v3.8b, #1 // B1
95 pmull v0.8h, v5.8b, v0.8b // E = A*B1
96 ext v17.8b, v5.8b, v5.8b, #2 // A2
97 pmull v17.8h, v17.8b, v3.8b // H = A2*B
98 ext v19.8b, v3.8b, v3.8b, #2 // B2
99 pmull v19.8h, v5.8b, v19.8b // G = A*B2
100 ext v18.8b, v5.8b, v5.8b, #3 // A3
101 eor v16.16b, v16.16b, v0.16b // L = E + F
102 pmull v18.8h, v18.8b, v3.8b // J = A3*B
103 ext v0.8b, v3.8b, v3.8b, #3 // B3
104 eor v17.16b, v17.16b, v19.16b // M = G + H
105 pmull v0.8h, v5.8b, v0.8b // I = A*B3
130 ext v19.8b, v3.8b, v3.8b, #4 // B4
131 eor v18.16b, v18.16b, v0.16b // N = I + J
132 pmull v19.8h, v5.8b, v19.8b // K = A*B4
140 eor v20.16b, v20.16b, v21.16b
141 eor v22.16b, v22.16b, v23.16b
142 and v21.16b, v21.16b, v24.16b
143 and v23.16b, v23.16b, v25.16b
144 eor v20.16b, v20.16b, v21.16b
145 eor v22.16b, v22.16b, v23.16b
151 ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8
152 ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16
153 pmull v0.8h, v5.8b, v3.8b // D = A*B
154 ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32
155 ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24
156 eor v16.16b, v16.16b, v17.16b
157 eor v18.16b, v18.16b, v19.16b
158 eor v0.16b, v0.16b, v16.16b
159 eor v0.16b, v0.16b, v18.16b
160 eor v3.8b, v3.8b, v4.8b // Karatsuba pre-processing
161 ext v16.8b, v7.8b, v7.8b, #1 // A1
162 pmull v16.8h, v16.8b, v3.8b // F = A1*B
163 ext v1.8b, v3.8b, v3.8b, #1 // B1
164 pmull v1.8h, v7.8b, v1.8b // E = A*B1
165 ext v17.8b, v7.8b, v7.8b, #2 // A2
166 pmull v17.8h, v17.8b, v3.8b // H = A2*B
167 ext v19.8b, v3.8b, v3.8b, #2 // B2
168 pmull v19.8h, v7.8b, v19.8b // G = A*B2
169 ext v18.8b, v7.8b, v7.8b, #3 // A3
170 eor v16.16b, v16.16b, v1.16b // L = E + F
171 pmull v18.8h, v18.8b, v3.8b // J = A3*B
172 ext v1.8b, v3.8b, v3.8b, #3 // B3
173 eor v17.16b, v17.16b, v19.16b // M = G + H
174 pmull v1.8h, v7.8b, v1.8b // I = A*B3
199 ext v19.8b, v3.8b, v3.8b, #4 // B4
200 eor v18.16b, v18.16b, v1.16b // N = I + J
201 pmull v19.8h, v7.8b, v19.8b // K = A*B4
209 eor v20.16b, v20.16b, v21.16b
210 eor v22.16b, v22.16b, v23.16b
211 and v21.16b, v21.16b, v24.16b
212 and v23.16b, v23.16b, v25.16b
213 eor v20.16b, v20.16b, v21.16b
214 eor v22.16b, v22.16b, v23.16b
220 ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8
221 ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16
222 pmull v1.8h, v7.8b, v3.8b // D = A*B
223 ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32
224 ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24
225 eor v16.16b, v16.16b, v17.16b
226 eor v18.16b, v18.16b, v19.16b
227 eor v1.16b, v1.16b, v16.16b
228 eor v1.16b, v1.16b, v18.16b
229 ext v16.8b, v6.8b, v6.8b, #1 // A1
230 pmull v16.8h, v16.8b, v4.8b // F = A1*B
231 ext v2.8b, v4.8b, v4.8b, #1 // B1
232 pmull v2.8h, v6.8b, v2.8b // E = A*B1
233 ext v17.8b, v6.8b, v6.8b, #2 // A2
234 pmull v17.8h, v17.8b, v4.8b // H = A2*B
235 ext v19.8b, v4.8b, v4.8b, #2 // B2
236 pmull v19.8h, v6.8b, v19.8b // G = A*B2
237 ext v18.8b, v6.8b, v6.8b, #3 // A3
238 eor v16.16b, v16.16b, v2.16b // L = E + F
239 pmull v18.8h, v18.8b, v4.8b // J = A3*B
240 ext v2.8b, v4.8b, v4.8b, #3 // B3
241 eor v17.16b, v17.16b, v19.16b // M = G + H
242 pmull v2.8h, v6.8b, v2.8b // I = A*B3
267 ext v19.8b, v4.8b, v4.8b, #4 // B4
268 eor v18.16b, v18.16b, v2.16b // N = I + J
269 pmull v19.8h, v6.8b, v19.8b // K = A*B4
277 eor v20.16b, v20.16b, v21.16b
278 eor v22.16b, v22.16b, v23.16b
279 and v21.16b, v21.16b, v24.16b
280 and v23.16b, v23.16b, v25.16b
281 eor v20.16b, v20.16b, v21.16b
282 eor v22.16b, v22.16b, v23.16b
288 ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8
289 ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16
290 pmull v2.8h, v6.8b, v4.8b // D = A*B
291 ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32
292 ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24
293 eor v16.16b, v16.16b, v17.16b
294 eor v18.16b, v18.16b, v19.16b
295 eor v2.16b, v2.16b, v16.16b
296 eor v2.16b, v2.16b, v18.16b
297 ext v16.16b, v0.16b, v2.16b, #8
298 eor v1.16b, v1.16b, v0.16b // Karatsuba post-processing
299 eor v1.16b, v1.16b, v2.16b
300 eor v1.16b, v1.16b, v16.16b // Xm overlaps Xh.lo and Xl.hi
308 eor v18.16b, v18.16b, v17.16b //
310 eor v18.16b, v18.16b, v17.16b //
312 eor v18.16b, v18.16b, v1.16b
317 eor v2.16b, v2.16b,v0.16b
318 eor v0.16b, v0.16b,v18.16b //
321 eor v0.16b, v0.16b, v2.16b //
322 eor v0.16b, v0.16b, v18.16b //
327 rev64 v0.16b, v0.16b // byteswap Xi and write
328 ext v0.16b, v0.16b, v0.16b, #8
329 st1 {v0.16b}, [x0]