Lines Matching +full:0 +full:x14
5 #define __has_feature(x) 0
29 add x29,sp,#0
34 ldr x9,[x2],#8 // bp[0]
36 ldp x7,x8,[x1],#16 // ap[0..1]
40 ldp x13,x14,[x3],#16 // np[0..1]
42 mul x6,x7,x9 // ap[0]*bp[0]
45 mul x10,x8,x9 // ap[1]*bp[0]
48 mul x15,x6,x4 // "tp[0]"*n0
51 // (*) mul x12,x13,x15 // np[0]*m1
53 mul x16,x14,x15 // np[1]*m1
65 umulh x17,x14,x15
75 ldr x14,[x3],#8
77 mul x10,x8,x9 // ap[j]*bp[0]
82 mul x16,x14,x15 // np[j]*m1
84 umulh x17,x14,x15
107 ldr x23,[sp] // tp[0]
110 mul x6,x7,x9 // ap[0]*bp[i]
113 ldp x13,x14,[x3],#16
122 // (*) mul x12,x13,x15 // np[0]*m1
124 mul x16,x14,x15 // np[1]*m1
127 umulh x17,x14,x15
139 ldr x14,[x3],#8
147 mul x16,x14,x15 // np[j]*m1
149 umulh x17,x14,x15
179 ldr x23,[sp] // tp[0]
181 ldr x14,[x3],#8 // np[0]
185 sbcs x8,x23,x14 // tp[j]-np[j]
188 ldr x14,[x3],#8
192 sbcs x8,x23,x14
196 ldr x23,[sp] // tp[0]
198 ldr x8,[x0],#8 // rp[0]
203 csel x14,x23,x8,lo // did it borrow?
207 str x14,[x0,#-16]
210 csel x14,x23,x8,lo
212 str x14,[x0,#-8]
232 add x29,sp,#0
240 ldp x6,x7,[x1,#8*0]
254 stp xzr,xzr,[x2,#8*0]
282 // a[1]a[0] (i)
283 // a[2]a[0]
284 // a[3]a[0]
285 // a[4]a[0]
286 // a[5]a[0]
287 // a[6]a[0]
288 // a[7]a[0]
311 mul x14,x7,x6 // lo(a[1..7]*a[0]) (i)
315 adds x20,x20,x14 // t[1]+lo(a[1]*a[0])
316 mul x14,x11,x6
322 umulh x17,x7,x6 // hi(a[1..7]*a[0])
323 adcs x24,x24,x14
324 umulh x14,x8,x6
329 stp x19,x20,[x2],#8*2 // t[0..1]
331 adds x21,x21,x17 // t[2]+lo(a[1]*a[0])
333 adcs x22,x22,x14
334 umulh x14,x12,x6
341 adcs x26,x26,x14
342 mul x14,x10,x7
350 adcs x24,x24,x14
351 umulh x14,x8,x7 // hi(a[2..7]*a[1])
360 adds x23,x23,x14
361 umulh x14,x12,x7
368 adcs x19,x19,x14
369 mul x14,x11,x8
377 adcs x26,x26,x14
378 umulh x14,x10,x8
387 adcs x26,x26,x14
388 mul x14,x10,x9 // lo(a[4..7]*a[3]) (iv)
396 adds x26,x26,x14
397 umulh x14,x10,x9 // hi(a[4..7]*a[3])
406 adds x19,x19,x14
407 mul x14,x11,x10 // lo(a[5..7]*a[4]) (v)
415 adds x20,x20,x14
416 umulh x14,x12,x10
424 adcs x22,x22,x14
425 umulh x14,x12,x11 // hi(a[6..7]*a[5])
434 adds x23,x23,x14
439 sub x14,x3,x5 // rewinded ap
446 ldp x6,x7,[x2,#8*0]
452 ldp x6,x7,[x1,#8*0]
467 // a[8]a[0]
468 // a[9]a[0]
469 // a[a]a[0]
470 // a[b]a[0]
471 // a[c]a[0]
472 // a[d]a[0]
473 // a[e]a[0]
474 // a[f]a[0]
490 mul x14,x6,x4
496 adds x19,x19,x14
497 mul x14,x10,x4
504 adcs x23,x23,x14
505 umulh x14,x6,x4
514 adds x19,x20,x14
515 umulh x14,x10,x4
523 adcs x23,x24,x14
534 ldp x6,x7,[x2,#8*0]
541 ldp x6,x7,[x1,#8*0]
558 ldp x6,x7,[x0,#8*0]
561 sub x14,x3,x1 // is it last iteration?
563 sub x15,x2,x14
565 cbz x14,.Lsqr8x_outer_loop
567 stp x19,x20,[x2,#8*0]
568 ldp x19,x20,[x15,#8*0]
580 // Now multiply above result by 2 and add a[n-1]*a[n-1]|...|a[0]*a[0]
581 ldp x7,x9,[x14,#8*0] // recall that x14 is &a[0]
583 ldp x11,x13,[x14,#8*2]
584 add x1,x14,#8*4
585 ldp x17,x14,[sp,#8*3]
587 stp x19,x20,[x2,#8*0]
611 extr x17,x14,x17,#63
612 stp x19,x20,[x2,#8*0]
614 extr x14,x15,x14,#63
616 adcs x24,x11,x14
617 ldp x17,x14,[x2,#8*7]
629 extr x17,x14,x17,#63
633 extr x14,x15,x14,#63
634 adcs x20,x7,x14
635 ldp x17,x14,[x2,#8*3]
646 stp x19,x20,[x2,#8*0]
650 extr x17,x14,x17,#63
652 extr x14,x15,x14,#63
653 ldp x19,x20,[sp,#8*0]
654 adcs x24,x11,x14
656 ldp x6,x7,[x1,#8*0]
664 mul x28,x4,x19 // t[0]*n0
678 // (*) mul x14,x6,x28 // lo(n[0-7])*lo(t[0]*n0)
682 str x28,[x2],#8 // put aside t[0]*n0 for tail processing
684 // (*) adds xzr,x19,x14
686 mul x14,x10,x28
693 adcs x22,x23,x14
694 umulh x14,x6,x28 // hi(n[0-7])*lo(t[0]*n0)
702 adds x19,x19,x14
703 umulh x14,x10,x28
710 mul x28,x4,x19 // next t[0]*n0
711 adcs x23,x23,x14
717 ldp x14,x15,[x2,#8*0]
721 adds x19,x19,x14
723 ldp x14,x15,[x2,#8*4]
727 adcs x23,x23,x14
735 ldp x6,x7,[x1,#8*0]
743 mul x14,x6,x4
749 adds x19,x19,x14
750 mul x14,x10,x4
757 adcs x23,x23,x14
758 umulh x14,x6,x4
767 adds x19,x20,x14
768 umulh x14,x10,x4
776 adcs x23,x24,x14
784 ldp x6,x7,[x2,#8*0]
795 ldp x6,x7,[x1,#8*0]
816 adcs x14,x19,x6
818 ldp x19,x20,[x0,#8*0]
820 ldp x6,x7,[x16,#8*0] // recall that x16 is &n[0]
832 stp x14,x15,[x2,#8*0]
850 subs x14,x19,x6
857 ldp x6,x7,[x1,#8*0]
859 stp x14,x15,[x0,#8*0]
860 sbcs x14,x23,x10
869 ldp x19,x20,[x2,#8*0]
875 stp x14,x15,[x0,#8*4]
876 sbcs x14,x19,x6
885 ldp x6,x7,[x3,#8*0]
887 stp x14,x15,[x0,#8*0]
888 sbcs x14,x23,x10
893 ldp x19,x20,[x1,#8*0]
898 stp x14,x15,[x0,#8*4]
904 csel x14,x19,x6,lo
905 stp xzr,xzr,[x2,#8*0]
916 stp x14,x15,[x3,#8*0]
919 stp xzr,xzr,[x1,#8*0]
923 csel x14,x19,x6,lo
924 stp xzr,xzr,[x2,#8*0]
929 stp x14,x15,[x3,#8*0]
942 stp xzr,xzr,[sp,#8*0]
963 stp x6,x7,[x1,#8*0]
992 add x29,sp,#0
1008 ldr x24,[x2,#8*0] // b[0]
1009 ldp x6,x7,[x1,#8*0] // a[0..3]
1016 ldp x14,x15,[x3,#8*0] // n[0..3]
1020 mov x28,#0
1024 mul x10,x6,x24 // lo(a[0..3]*b[0])
1032 umulh x10,x6,x24 // hi(a[0..3]*b[0])
1034 mul x25,x19,x4 // t[0]*n0
1041 ldr x24,[x2,x28] // next b[i] (or b[0])
1043 // (*) mul x10,x14,x25 // lo(n[0..3]*t[0]*n0)
1044 str x25,[x26],#8 // put aside t[0]*n0 for tail processing
1053 umulh x10,x14,x25 // hi(n[0..3]*t[0]*n0)
1072 ldp x6,x7,[x1,#8*0] // a[4..7]
1075 ldr x25,[sp] // a[0]*n0
1076 ldp x14,x15,[x3,#8*0] // n[4..7]
1097 ldr x24,[x2,x28] // next b[i] (or b[0])
1099 mul x10,x14,x25 // lo(n[4..7]*a[0]*n0)
1107 umulh x10,x14,x25 // hi(n[4..7]*a[0]*n0)
1116 ldr x25,[sp,x28] // next t[0]*n0
1129 ldp x6,x7,[x1,#8*0]
1132 ldp x14,x15,[x3,#8*0]
1141 ldp x6,x7,[x11,#8*0] // a[0..3]
1146 stp x19,x20,[x26,#8*0] // result!!!
1147 ldp x19,x20,[sp,#8*4] // t[0..3]
1151 ldp x14,x15,[x3,#8*0] // n[0..3]
1159 mul x10,x6,x24 // lo(a[0..3]*b[4])
1167 umulh x10,x6,x24 // hi(a[0..3]*b[4])
1169 mul x25,x19,x4 // t[0]*n0
1178 // (*) mul x10,x14,x25
1179 str x25,[x26],#8 // put aside t[0]*n0 for tail processing
1181 mul x11,x15,x25 // lo(n[0..3]*t[0]*n0
1188 umulh x10,x14,x25 // hi(n[0..3]*t[0]*n0
1207 ldp x6,x7,[x1,#8*0] // a[4..7]
1216 ldr x25,[sp] // t[0]*n0
1217 ldp x14,x15,[x3,#8*0] // n[4..7]
1241 mul x10,x14,x25 // lo(n[4..7]*t[0]*n0)
1249 umulh x10,x14,x25 // hi(n[4..7]*t[0]*n0)
1257 ldr x25,[sp,x28] // next a[0]*n0
1274 ldp x6,x7,[x1,#8*0]
1282 ldp x14,x15,[x3,#8*0]
1295 stp x19,x20,[x26,#8*0] // result!!!
1297 ldp x19,x20,[sp,#8*4] // t[0..3]
1302 ldp x14,x15,[x11,#8*0] // n[0..3]
1308 ldp x6,x7,[x1,#8*0] // a[0..3]
1323 subs x10,x19,x14
1330 ldp x14,x15,[x3,#8*0]
1332 ldp x19,x20,[x26,#8*0]
1338 stp x10,x11,[x0,#8*0]
1339 sbcs x10,x19,x14
1348 ldp x6,x7,[x27,#8*0]
1350 stp x10,x11,[x0,#8*0]
1353 ldp x19,x20,[x1,#8*0]
1362 stp xzr,xzr,[x26,#8*0]
1373 stp x10,x11,[x27,#8*0]
1379 stp xzr,xzr,[x26,#8*0]
1386 stp x10,x11,[x27,#8*0]
1395 // x19-3,x0 hold result, x14-7 hold modulus
1396 subs x6,x19,x14
1399 stp xzr,xzr,[sp,#8*0]
1412 stp x6,x7,[x1,#8*0]
1428 …9,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0