Lines Matching refs:b
25 movi v19.16b, #0xe1
27 ext v3.16b, v17.16b, v17.16b, #8
30 ext v16.16b, v18.16b, v19.16b, #8 // t0=0xc2....01
33 and v18.16b, v18.16b, v16.16b
35 ext v18.16b, v18.16b, v18.16b, #8
36 and v16.16b, v16.16b, v17.16b
37 orr v3.16b, v3.16b, v18.16b // H<<<=1
38 eor v5.16b, v3.16b, v16.16b // twisted H
49 ld1 {v3.16b}, [x0] // load Xi
55 rev64 v3.16b, v3.16b // byteswap Xi
56 ext v3.16b, v3.16b, v3.16b, #8
57 eor v7.8b, v5.8b, v6.8b // Karatsuba pre-processing
60 b .Lgmult_neon
69 ld1 {v0.16b}, [x0] // load Xi
75 rev64 v0.16b, v0.16b // byteswap Xi
76 ext v0.16b, v0.16b, v0.16b, #8
77 eor v7.8b, v5.8b, v6.8b // Karatsuba pre-processing
80 ld1 {v3.16b}, [x2], #16 // load inp
81 rev64 v3.16b, v3.16b // byteswap inp
82 ext v3.16b, v3.16b, v3.16b, #8
83 eor v3.16b, v3.16b, v0.16b // inp ^= Xi
89 ext v16.8b, v5.8b, v5.8b, #1 // A1
90 pmull v16.8h, v16.8b, v3.8b // F = A1*B
91 ext v0.8b, v3.8b, v3.8b, #1 // B1
92 pmull v0.8h, v5.8b, v0.8b // E = A*B1
93 ext v17.8b, v5.8b, v5.8b, #2 // A2
94 pmull v17.8h, v17.8b, v3.8b // H = A2*B
95 ext v19.8b, v3.8b, v3.8b, #2 // B2
96 pmull v19.8h, v5.8b, v19.8b // G = A*B2
97 ext v18.8b, v5.8b, v5.8b, #3 // A3
98 eor v16.16b, v16.16b, v0.16b // L = E + F
99 pmull v18.8h, v18.8b, v3.8b // J = A3*B
100 ext v0.8b, v3.8b, v3.8b, #3 // B3
101 eor v17.16b, v17.16b, v19.16b // M = G + H
102 pmull v0.8h, v5.8b, v0.8b // I = A*B3
127 ext v19.8b, v3.8b, v3.8b, #4 // B4
128 eor v18.16b, v18.16b, v0.16b // N = I + J
129 pmull v19.8h, v5.8b, v19.8b // K = A*B4
137 eor v20.16b, v20.16b, v21.16b
138 eor v22.16b, v22.16b, v23.16b
139 and v21.16b, v21.16b, v24.16b
140 and v23.16b, v23.16b, v25.16b
141 eor v20.16b, v20.16b, v21.16b
142 eor v22.16b, v22.16b, v23.16b
148 ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8
149 ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16
150 pmull v0.8h, v5.8b, v3.8b // D = A*B
151 ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32
152 ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24
153 eor v16.16b, v16.16b, v17.16b
154 eor v18.16b, v18.16b, v19.16b
155 eor v0.16b, v0.16b, v16.16b
156 eor v0.16b, v0.16b, v18.16b
157 eor v3.8b, v3.8b, v4.8b // Karatsuba pre-processing
158 ext v16.8b, v7.8b, v7.8b, #1 // A1
159 pmull v16.8h, v16.8b, v3.8b // F = A1*B
160 ext v1.8b, v3.8b, v3.8b, #1 // B1
161 pmull v1.8h, v7.8b, v1.8b // E = A*B1
162 ext v17.8b, v7.8b, v7.8b, #2 // A2
163 pmull v17.8h, v17.8b, v3.8b // H = A2*B
164 ext v19.8b, v3.8b, v3.8b, #2 // B2
165 pmull v19.8h, v7.8b, v19.8b // G = A*B2
166 ext v18.8b, v7.8b, v7.8b, #3 // A3
167 eor v16.16b, v16.16b, v1.16b // L = E + F
168 pmull v18.8h, v18.8b, v3.8b // J = A3*B
169 ext v1.8b, v3.8b, v3.8b, #3 // B3
170 eor v17.16b, v17.16b, v19.16b // M = G + H
171 pmull v1.8h, v7.8b, v1.8b // I = A*B3
196 ext v19.8b, v3.8b, v3.8b, #4 // B4
197 eor v18.16b, v18.16b, v1.16b // N = I + J
198 pmull v19.8h, v7.8b, v19.8b // K = A*B4
206 eor v20.16b, v20.16b, v21.16b
207 eor v22.16b, v22.16b, v23.16b
208 and v21.16b, v21.16b, v24.16b
209 and v23.16b, v23.16b, v25.16b
210 eor v20.16b, v20.16b, v21.16b
211 eor v22.16b, v22.16b, v23.16b
217 ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8
218 ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16
219 pmull v1.8h, v7.8b, v3.8b // D = A*B
220 ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32
221 ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24
222 eor v16.16b, v16.16b, v17.16b
223 eor v18.16b, v18.16b, v19.16b
224 eor v1.16b, v1.16b, v16.16b
225 eor v1.16b, v1.16b, v18.16b
226 ext v16.8b, v6.8b, v6.8b, #1 // A1
227 pmull v16.8h, v16.8b, v4.8b // F = A1*B
228 ext v2.8b, v4.8b, v4.8b, #1 // B1
229 pmull v2.8h, v6.8b, v2.8b // E = A*B1
230 ext v17.8b, v6.8b, v6.8b, #2 // A2
231 pmull v17.8h, v17.8b, v4.8b // H = A2*B
232 ext v19.8b, v4.8b, v4.8b, #2 // B2
233 pmull v19.8h, v6.8b, v19.8b // G = A*B2
234 ext v18.8b, v6.8b, v6.8b, #3 // A3
235 eor v16.16b, v16.16b, v2.16b // L = E + F
236 pmull v18.8h, v18.8b, v4.8b // J = A3*B
237 ext v2.8b, v4.8b, v4.8b, #3 // B3
238 eor v17.16b, v17.16b, v19.16b // M = G + H
239 pmull v2.8h, v6.8b, v2.8b // I = A*B3
264 ext v19.8b, v4.8b, v4.8b, #4 // B4
265 eor v18.16b, v18.16b, v2.16b // N = I + J
266 pmull v19.8h, v6.8b, v19.8b // K = A*B4
274 eor v20.16b, v20.16b, v21.16b
275 eor v22.16b, v22.16b, v23.16b
276 and v21.16b, v21.16b, v24.16b
277 and v23.16b, v23.16b, v25.16b
278 eor v20.16b, v20.16b, v21.16b
279 eor v22.16b, v22.16b, v23.16b
285 ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8
286 ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16
287 pmull v2.8h, v6.8b, v4.8b // D = A*B
288 ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32
289 ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24
290 eor v16.16b, v16.16b, v17.16b
291 eor v18.16b, v18.16b, v19.16b
292 eor v2.16b, v2.16b, v16.16b
293 eor v2.16b, v2.16b, v18.16b
294 ext v16.16b, v0.16b, v2.16b, #8
295 eor v1.16b, v1.16b, v0.16b // Karatsuba post-processing
296 eor v1.16b, v1.16b, v2.16b
297 eor v1.16b, v1.16b, v16.16b // Xm overlaps Xh.lo and Xl.hi
305 eor v18.16b, v18.16b, v17.16b //
307 eor v18.16b, v18.16b, v17.16b //
309 eor v18.16b, v18.16b, v1.16b
314 eor v2.16b, v2.16b,v0.16b
315 eor v0.16b, v0.16b,v18.16b //
318 eor v0.16b, v0.16b, v2.16b //
319 eor v0.16b, v0.16b, v18.16b //
324 rev64 v0.16b, v0.16b // byteswap Xi and write
325 ext v0.16b, v0.16b, v0.16b, #8
326 st1 {v0.16b}, [x0]