Lines Matching +full:0 +full:x13
5 #define __has_feature(x) 0
28 add x29,sp,#0
33 ldr x9,[x2],#8 // bp[0]
35 ldp x7,x8,[x1],#16 // ap[0..1]
39 ldp x13,x14,[x3],#16 // np[0..1]
41 mul x6,x7,x9 // ap[0]*bp[0]
44 mul x10,x8,x9 // ap[1]*bp[0]
47 mul x15,x6,x4 // "tp[0]"*n0
50 // (*) mul x12,x13,x15 // np[0]*m1
51 umulh x13,x13,x15
65 adc x13,x13,xzr
75 adds x12,x16,x13
76 mul x10,x8,x9 // ap[j]*bp[0]
77 adc x13,x17,xzr
82 adc x13,x13,xzr
92 adds x12,x16,x13
94 adc x13,x17,xzr
98 adcs x13,x13,x7
101 stp x12,x13,[x22]
106 ldr x23,[sp] // tp[0]
109 mul x6,x7,x9 // ap[0]*bp[i]
112 ldp x13,x14,[x3],#16
121 // (*) mul x12,x13,x15 // np[0]*m1
122 umulh x13,x13,x15
131 adc x13,x13,xzr
137 adds x12,x16,x13
139 adc x13,x17,xzr
154 adc x13,x13,xzr
159 adds x12,x16,x13
161 adcs x13,x17,x19
168 adcs x13,x13,x7
170 stp x12,x13,[x22,#-16]
178 ldr x23,[sp] // tp[0]
180 ldr x14,[x3],#8 // np[0]
195 ldr x23,[sp] // tp[0]
197 ldr x8,[x0],#8 // rp[0]
231 add x29,sp,#0
239 ldp x6,x7,[x1,#8*0]
242 ldp x12,x13,[x1,#8*6]
253 stp xzr,xzr,[x2,#8*0]
281 // a[1]a[0] (i)
282 // a[2]a[0]
283 // a[3]a[0]
284 // a[4]a[0]
285 // a[5]a[0]
286 // a[6]a[0]
287 // a[7]a[0]
310 mul x14,x7,x6 // lo(a[1..7]*a[0]) (i)
314 adds x20,x20,x14 // t[1]+lo(a[1]*a[0])
319 mul x16,x13,x6
321 umulh x17,x7,x6 // hi(a[1..7]*a[0])
328 stp x19,x20,[x2],#8*2 // t[0..1]
330 adds x21,x21,x17 // t[2]+lo(a[1]*a[0])
335 umulh x15,x13,x6
348 mul x17,x13,x7
362 umulh x15,x13,x7
373 mul x16,x13,x8
385 umulh x17,x13,x8
394 mul x17,x13,x9
402 umulh x17,x13,x9
410 mul x16,x13,x10
417 umulh x15,x13,x10
422 mul x17,x13,x11
427 umulh x15,x13,x11
429 mul x16,x13,x12 // lo(a[7]*a[6]) (vii)
431 umulh x17,x13,x12 // hi(a[7]*a[6])
445 ldp x6,x7,[x2,#8*0]
448 ldp x12,x13,[x2,#8*6]
451 ldp x6,x7,[x1,#8*0]
460 adcs x26,xzr,x13
461 ldp x12,x13,[x1,#8*6]
466 // a[8]a[0]
467 // a[9]a[0]
468 // a[a]a[0]
469 // a[b]a[0]
470 // a[c]a[0]
471 // a[d]a[0]
472 // a[e]a[0]
473 // a[f]a[0]
502 mul x17,x13,x4
520 umulh x17,x13,x4
533 ldp x6,x7,[x2,#8*0]
536 ldp x12,x13,[x2,#8*6]
540 ldp x6,x7,[x1,#8*0]
549 adcs x26,x26,x13
550 ldp x12,x13,[x1,#8*6]
557 ldp x6,x7,[x0,#8*0]
563 ldp x12,x13,[x0,#8*6]
566 stp x19,x20,[x2,#8*0]
567 ldp x19,x20,[x15,#8*0]
579 // Now multiply above result by 2 and add a[n-1]*a[n-1]|...|a[0]*a[0]
580 ldp x7,x9,[x14,#8*0] // recall that x14 is &a[0]
582 ldp x11,x13,[x14,#8*2]
586 stp x19,x20,[x2,#8*0]
608 mul x12,x13,x13
609 umulh x13,x13,x13
611 stp x19,x20,[x2,#8*0]
620 adcs x26,x13,x16
623 ldp x11,x13,[x1],#8*2
645 stp x19,x20,[x2,#8*0]
646 mul x12,x13,x13
647 umulh x13,x13,x13
652 ldp x19,x20,[sp,#8*0]
655 ldp x6,x7,[x1,#8*0]
659 adc x26,x13,x16
663 mul x28,x4,x19 // t[0]*n0
664 ldp x12,x13,[x1,#8*6]
677 // (*) mul x14,x6,x28 // lo(n[0-7])*lo(t[0]*n0)
681 str x28,[x2],#8 // put aside t[0]*n0 for tail processing
691 mul x17,x13,x28
693 umulh x14,x6,x28 // hi(n[0-7])*lo(t[0]*n0)
708 umulh x17,x13,x28
709 mul x28,x4,x19 // next t[0]*n0
716 ldp x14,x15,[x2,#8*0]
734 ldp x6,x7,[x1,#8*0]
738 ldp x12,x13,[x1,#8*6]
755 mul x17,x13,x4
773 umulh x17,x13,x4
783 ldp x6,x7,[x2,#8*0]
788 ldp x12,x13,[x2,#8*6]
794 ldp x6,x7,[x1,#8*0]
803 adcs x26,x26,x13
804 ldp x12,x13,[x1,#8*6]
817 ldp x19,x20,[x0,#8*0]
819 ldp x6,x7,[x16,#8*0] // recall that x16 is &n[0]
826 adcs x26,x26,x13
827 ldp x12,x13,[x16,#8*6]
831 stp x14,x15,[x2,#8*0]
856 ldp x6,x7,[x1,#8*0]
858 stp x14,x15,[x0,#8*0]
865 sbcs x17,x26,x13
866 ldp x12,x13,[x1,#8*6]
868 ldp x19,x20,[x2,#8*0]
884 ldp x6,x7,[x3,#8*0]
886 stp x14,x15,[x0,#8*0]
892 ldp x19,x20,[x1,#8*0]
893 sbcs x17,x26,x13
904 stp xzr,xzr,[x2,#8*0]
915 stp x14,x15,[x3,#8*0]
918 stp xzr,xzr,[x1,#8*0]
923 stp xzr,xzr,[x2,#8*0]
928 stp x14,x15,[x3,#8*0]
941 stp xzr,xzr,[sp,#8*0]
952 sbcs x13,x26,x13
962 stp x6,x7,[x1,#8*0]
967 csel x13,x26,x13,lo
969 stp x12,x13,[x1,#8*6]
991 add x29,sp,#0
1007 ldr x24,[x2,#8*0] // b[0]
1008 ldp x6,x7,[x1,#8*0] // a[0..3]
1015 ldp x14,x15,[x3,#8*0] // n[0..3]
1019 mov x28,#0
1023 mul x10,x6,x24 // lo(a[0..3]*b[0])
1029 mul x13,x9,x24
1031 umulh x10,x6,x24 // hi(a[0..3]*b[0])
1033 mul x25,x19,x4 // t[0]*n0
1036 adcs x22,x22,x13
1039 umulh x13,x9,x24
1040 ldr x24,[x2,x28] // next b[i] (or b[0])
1042 // (*) mul x10,x14,x25 // lo(n[0..3]*t[0]*n0)
1043 str x25,[x26],#8 // put aside t[0]*n0 for tail processing
1048 adc x23,x23,x13 // can't overflow
1049 mul x13,x17,x25
1052 umulh x10,x14,x25 // hi(n[0..3]*t[0]*n0)
1057 adcs x21,x22,x13
1058 umulh x13,x17,x25
1065 adcs x22,x22,x13
1071 ldp x6,x7,[x1,#8*0] // a[4..7]
1074 ldr x25,[sp] // a[0]*n0
1075 ldp x14,x15,[x3,#8*0] // n[4..7]
1086 mul x13,x9,x24
1093 adcs x22,x22,x13
1094 umulh x13,x9,x24
1096 ldr x24,[x2,x28] // next b[i] (or b[0])
1098 mul x10,x14,x25 // lo(n[4..7]*a[0]*n0)
1103 adc x23,x23,x13 // can't overflow
1104 mul x13,x17,x25
1106 umulh x10,x14,x25 // hi(n[4..7]*a[0]*n0)
1111 adcs x22,x22,x13
1113 umulh x13,x17,x25
1115 ldr x25,[sp,x28] // next t[0]*n0
1121 adcs x22,x23,x13
1128 ldp x6,x7,[x1,#8*0]
1131 ldp x14,x15,[x3,#8*0]
1140 ldp x6,x7,[x11,#8*0] // a[0..3]
1145 stp x19,x20,[x26,#8*0] // result!!!
1146 ldp x19,x20,[sp,#8*4] // t[0..3]
1150 ldp x14,x15,[x3,#8*0] // n[0..3]
1158 mul x10,x6,x24 // lo(a[0..3]*b[4])
1164 mul x13,x9,x24
1166 umulh x10,x6,x24 // hi(a[0..3]*b[4])
1168 mul x25,x19,x4 // t[0]*n0
1171 adcs x22,x22,x13
1174 umulh x13,x9,x24
1178 str x25,[x26],#8 // put aside t[0]*n0 for tail processing
1180 mul x11,x15,x25 // lo(n[0..3]*t[0]*n0
1183 adc x23,x23,x13 // can't overflow
1184 mul x13,x17,x25
1187 umulh x10,x14,x25 // hi(n[0..3]*t[0]*n0
1192 adcs x21,x22,x13
1193 umulh x13,x17,x25
1199 adcs x22,x22,x13
1205 ldp x12,x13,[x26,#8*6]
1206 ldp x6,x7,[x1,#8*0] // a[4..7]
1212 adcs x22,x22,x13
1215 ldr x25,[sp] // t[0]*n0
1216 ldp x14,x15,[x3,#8*0] // n[4..7]
1228 mul x13,x9,x24
1235 adcs x22,x22,x13
1236 umulh x13,x9,x24
1240 mul x10,x14,x25 // lo(n[4..7]*t[0]*n0)
1245 adc x23,x23,x13 // can't overflow
1246 mul x13,x17,x25
1248 umulh x10,x14,x25 // hi(n[4..7]*t[0]*n0)
1253 adcs x22,x22,x13
1254 umulh x13,x17,x25
1256 ldr x25,[sp,x28] // next a[0]*n0
1263 adcs x22,x23,x13
1272 ldp x12,x13,[x26,#8*6]
1273 ldp x6,x7,[x1,#8*0]
1279 adcs x22,x22,x13
1281 ldp x14,x15,[x3,#8*0]
1288 ldp x12,x13,[x29,#96] // pull rp and &b[num]
1294 stp x19,x20,[x26,#8*0] // result!!!
1296 ldp x19,x20,[sp,#8*4] // t[0..3]
1299 cmp x2,x13 // done yet?
1301 ldp x14,x15,[x11,#8*0] // n[0..3]
1307 ldp x6,x7,[x1,#8*0] // a[0..3]
1329 ldp x14,x15,[x3,#8*0]
1331 ldp x19,x20,[x26,#8*0]
1332 sbcs x13,x22,x17
1337 stp x10,x11,[x0,#8*0]
1339 stp x12,x13,[x0,#8*2]
1347 ldp x6,x7,[x27,#8*0]
1348 sbcs x13,x22,x17
1349 stp x10,x11,[x0,#8*0]
1351 stp x12,x13,[x0,#8*2]
1352 ldp x19,x20,[x1,#8*0]
1361 stp xzr,xzr,[x26,#8*0]
1368 csel x13,x22,x9,lo
1372 stp x10,x11,[x27,#8*0]
1373 stp x12,x13,[x27,#8*2]
1378 stp xzr,xzr,[x26,#8*0]
1383 csel x13,x22,x9,lo
1385 stp x10,x11,[x27,#8*0]
1386 stp x12,x13,[x27,#8*2]
1398 stp xzr,xzr,[sp,#8*0]
1411 stp x6,x7,[x1,#8*0]
1427 …9,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0