Lines Matching refs:b
30 movi v19.16b, #0xe1
32 ext v3.16b, v17.16b, v17.16b, #8
35 ext v16.16b, v18.16b, v19.16b, #8 // t0=0xc2....01
38 and v18.16b, v18.16b, v16.16b
40 ext v18.16b, v18.16b, v18.16b, #8
41 and v16.16b, v16.16b, v17.16b
42 orr v3.16b, v3.16b, v18.16b // H<<<=1
43 eor v5.16b, v3.16b, v16.16b // twisted H
56 ld1 {v3.16b}, [x0] // load Xi
62 rev64 v3.16b, v3.16b // byteswap Xi
63 ext v3.16b, v3.16b, v3.16b, #8
64 eor v7.8b, v5.8b, v6.8b // Karatsuba pre-processing
67 b Lgmult_neon
78 ld1 {v0.16b}, [x0] // load Xi
84 rev64 v0.16b, v0.16b // byteswap Xi
85 ext v0.16b, v0.16b, v0.16b, #8
86 eor v7.8b, v5.8b, v6.8b // Karatsuba pre-processing
89 ld1 {v3.16b}, [x2], #16 // load inp
90 rev64 v3.16b, v3.16b // byteswap inp
91 ext v3.16b, v3.16b, v3.16b, #8
92 eor v3.16b, v3.16b, v0.16b // inp ^= Xi
98 ext v16.8b, v5.8b, v5.8b, #1 // A1
99 pmull v16.8h, v16.8b, v3.8b // F = A1*B
100 ext v0.8b, v3.8b, v3.8b, #1 // B1
101 pmull v0.8h, v5.8b, v0.8b // E = A*B1
102 ext v17.8b, v5.8b, v5.8b, #2 // A2
103 pmull v17.8h, v17.8b, v3.8b // H = A2*B
104 ext v19.8b, v3.8b, v3.8b, #2 // B2
105 pmull v19.8h, v5.8b, v19.8b // G = A*B2
106 ext v18.8b, v5.8b, v5.8b, #3 // A3
107 eor v16.16b, v16.16b, v0.16b // L = E + F
108 pmull v18.8h, v18.8b, v3.8b // J = A3*B
109 ext v0.8b, v3.8b, v3.8b, #3 // B3
110 eor v17.16b, v17.16b, v19.16b // M = G + H
111 pmull v0.8h, v5.8b, v0.8b // I = A*B3
136 ext v19.8b, v3.8b, v3.8b, #4 // B4
137 eor v18.16b, v18.16b, v0.16b // N = I + J
138 pmull v19.8h, v5.8b, v19.8b // K = A*B4
146 eor v20.16b, v20.16b, v21.16b
147 eor v22.16b, v22.16b, v23.16b
148 and v21.16b, v21.16b, v24.16b
149 and v23.16b, v23.16b, v25.16b
150 eor v20.16b, v20.16b, v21.16b
151 eor v22.16b, v22.16b, v23.16b
157 ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8
158 ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16
159 pmull v0.8h, v5.8b, v3.8b // D = A*B
160 ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32
161 ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24
162 eor v16.16b, v16.16b, v17.16b
163 eor v18.16b, v18.16b, v19.16b
164 eor v0.16b, v0.16b, v16.16b
165 eor v0.16b, v0.16b, v18.16b
166 eor v3.8b, v3.8b, v4.8b // Karatsuba pre-processing
167 ext v16.8b, v7.8b, v7.8b, #1 // A1
168 pmull v16.8h, v16.8b, v3.8b // F = A1*B
169 ext v1.8b, v3.8b, v3.8b, #1 // B1
170 pmull v1.8h, v7.8b, v1.8b // E = A*B1
171 ext v17.8b, v7.8b, v7.8b, #2 // A2
172 pmull v17.8h, v17.8b, v3.8b // H = A2*B
173 ext v19.8b, v3.8b, v3.8b, #2 // B2
174 pmull v19.8h, v7.8b, v19.8b // G = A*B2
175 ext v18.8b, v7.8b, v7.8b, #3 // A3
176 eor v16.16b, v16.16b, v1.16b // L = E + F
177 pmull v18.8h, v18.8b, v3.8b // J = A3*B
178 ext v1.8b, v3.8b, v3.8b, #3 // B3
179 eor v17.16b, v17.16b, v19.16b // M = G + H
180 pmull v1.8h, v7.8b, v1.8b // I = A*B3
205 ext v19.8b, v3.8b, v3.8b, #4 // B4
206 eor v18.16b, v18.16b, v1.16b // N = I + J
207 pmull v19.8h, v7.8b, v19.8b // K = A*B4
215 eor v20.16b, v20.16b, v21.16b
216 eor v22.16b, v22.16b, v23.16b
217 and v21.16b, v21.16b, v24.16b
218 and v23.16b, v23.16b, v25.16b
219 eor v20.16b, v20.16b, v21.16b
220 eor v22.16b, v22.16b, v23.16b
226 ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8
227 ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16
228 pmull v1.8h, v7.8b, v3.8b // D = A*B
229 ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32
230 ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24
231 eor v16.16b, v16.16b, v17.16b
232 eor v18.16b, v18.16b, v19.16b
233 eor v1.16b, v1.16b, v16.16b
234 eor v1.16b, v1.16b, v18.16b
235 ext v16.8b, v6.8b, v6.8b, #1 // A1
236 pmull v16.8h, v16.8b, v4.8b // F = A1*B
237 ext v2.8b, v4.8b, v4.8b, #1 // B1
238 pmull v2.8h, v6.8b, v2.8b // E = A*B1
239 ext v17.8b, v6.8b, v6.8b, #2 // A2
240 pmull v17.8h, v17.8b, v4.8b // H = A2*B
241 ext v19.8b, v4.8b, v4.8b, #2 // B2
242 pmull v19.8h, v6.8b, v19.8b // G = A*B2
243 ext v18.8b, v6.8b, v6.8b, #3 // A3
244 eor v16.16b, v16.16b, v2.16b // L = E + F
245 pmull v18.8h, v18.8b, v4.8b // J = A3*B
246 ext v2.8b, v4.8b, v4.8b, #3 // B3
247 eor v17.16b, v17.16b, v19.16b // M = G + H
248 pmull v2.8h, v6.8b, v2.8b // I = A*B3
273 ext v19.8b, v4.8b, v4.8b, #4 // B4
274 eor v18.16b, v18.16b, v2.16b // N = I + J
275 pmull v19.8h, v6.8b, v19.8b // K = A*B4
283 eor v20.16b, v20.16b, v21.16b
284 eor v22.16b, v22.16b, v23.16b
285 and v21.16b, v21.16b, v24.16b
286 and v23.16b, v23.16b, v25.16b
287 eor v20.16b, v20.16b, v21.16b
288 eor v22.16b, v22.16b, v23.16b
294 ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8
295 ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16
296 pmull v2.8h, v6.8b, v4.8b // D = A*B
297 ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32
298 ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24
299 eor v16.16b, v16.16b, v17.16b
300 eor v18.16b, v18.16b, v19.16b
301 eor v2.16b, v2.16b, v16.16b
302 eor v2.16b, v2.16b, v18.16b
303 ext v16.16b, v0.16b, v2.16b, #8
304 eor v1.16b, v1.16b, v0.16b // Karatsuba post-processing
305 eor v1.16b, v1.16b, v2.16b
306 eor v1.16b, v1.16b, v16.16b // Xm overlaps Xh.lo and Xl.hi
314 eor v18.16b, v18.16b, v17.16b //
316 eor v18.16b, v18.16b, v17.16b //
318 eor v18.16b, v18.16b, v1.16b
323 eor v2.16b, v2.16b,v0.16b
324 eor v0.16b, v0.16b,v18.16b //
327 eor v0.16b, v0.16b, v2.16b //
328 eor v0.16b, v0.16b, v18.16b //
333 rev64 v0.16b, v0.16b // byteswap Xi and write
334 ext v0.16b, v0.16b, v0.16b, #8
335 st1 {v0.16b}, [x0]