1// This file is generated from a similarly-named Perl script in the BoringSSL 2// source tree. Do not edit by hand. 3 4#if !defined(__has_feature) 5#define __has_feature(x) 0 6#endif 7#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) 8#define OPENSSL_NO_ASM 9#endif 10 11#if !defined(OPENSSL_NO_ASM) 12#if defined(__aarch64__) 13#include <GFp/arm_arch.h> 14 15.text 16.align 5 17.Lpoly: 18.quad 0xffffffffffffffff,0x00000000ffffffff,0x0000000000000000,0xffffffff00000001 19.Lone_mont: 20.quad 0x0000000000000001,0xffffffff00000000,0xffffffffffffffff,0x00000000fffffffe 21.Lone: 22.quad 1,0,0,0 23.byte 69,67,80,95,78,73,83,84,90,50,53,54,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 24.align 2 25 26// void GFp_nistz256_mul_mont(BN_ULONG x0[4],const BN_ULONG x1[4], 27// const BN_ULONG x2[4]); 28.globl GFp_nistz256_mul_mont 29.hidden GFp_nistz256_mul_mont 30.type GFp_nistz256_mul_mont,%function 31.align 4 32GFp_nistz256_mul_mont: 33 stp x29,x30,[sp,#-32]! 34 add x29,sp,#0 35 stp x19,x20,[sp,#16] 36 37 ldr x3,[x2] // bp[0] 38 ldp x4,x5,[x1] 39 ldp x6,x7,[x1,#16] 40 ldr x12,.Lpoly+8 41 ldr x13,.Lpoly+24 42 43 bl __ecp_nistz256_mul_mont 44 45 ldp x19,x20,[sp,#16] 46 ldp x29,x30,[sp],#32 47 ret 48.size GFp_nistz256_mul_mont,.-GFp_nistz256_mul_mont 49 50// void GFp_nistz256_sqr_mont(BN_ULONG x0[4],const BN_ULONG x1[4]); 51.globl GFp_nistz256_sqr_mont 52.hidden GFp_nistz256_sqr_mont 53.type GFp_nistz256_sqr_mont,%function 54.align 4 55GFp_nistz256_sqr_mont: 56 stp x29,x30,[sp,#-32]! 57 add x29,sp,#0 58 stp x19,x20,[sp,#16] 59 60 ldp x4,x5,[x1] 61 ldp x6,x7,[x1,#16] 62 ldr x12,.Lpoly+8 63 ldr x13,.Lpoly+24 64 65 bl __ecp_nistz256_sqr_mont 66 67 ldp x19,x20,[sp,#16] 68 ldp x29,x30,[sp],#32 69 ret 70.size GFp_nistz256_sqr_mont,.-GFp_nistz256_sqr_mont 71 72// void GFp_nistz256_add(BN_ULONG x0[4],const BN_ULONG x1[4], 73// const BN_ULONG x2[4]); 74.globl GFp_nistz256_add 75.hidden GFp_nistz256_add 76.type GFp_nistz256_add,%function 77.align 4 78GFp_nistz256_add: 79 stp x29,x30,[sp,#-16]! 80 add x29,sp,#0 81 82 ldp x14,x15,[x1] 83 ldp x8,x9,[x2] 84 ldp x16,x17,[x1,#16] 85 ldp x10,x11,[x2,#16] 86 ldr x12,.Lpoly+8 87 ldr x13,.Lpoly+24 88 89 bl __ecp_nistz256_add 90 91 ldp x29,x30,[sp],#16 92 ret 93.size GFp_nistz256_add,.-GFp_nistz256_add 94 95// void GFp_nistz256_neg(BN_ULONG x0[4],const BN_ULONG x1[4]); 96.globl GFp_nistz256_neg 97.hidden GFp_nistz256_neg 98.type GFp_nistz256_neg,%function 99.align 4 100GFp_nistz256_neg: 101 stp x29,x30,[sp,#-16]! 102 add x29,sp,#0 103 104 mov x2,x1 105 mov x14,xzr // a = 0 106 mov x15,xzr 107 mov x16,xzr 108 mov x17,xzr 109 ldr x12,.Lpoly+8 110 ldr x13,.Lpoly+24 111 112 bl __ecp_nistz256_sub_from 113 114 ldp x29,x30,[sp],#16 115 ret 116.size GFp_nistz256_neg,.-GFp_nistz256_neg 117 118// note that __ecp_nistz256_mul_mont expects a[0-3] input pre-loaded 119// to x4-x7 and b[0] - to x3 120.type __ecp_nistz256_mul_mont,%function 121.align 4 122__ecp_nistz256_mul_mont: 123 mul x14,x4,x3 // a[0]*b[0] 124 umulh x8,x4,x3 125 126 mul x15,x5,x3 // a[1]*b[0] 127 umulh x9,x5,x3 128 129 mul x16,x6,x3 // a[2]*b[0] 130 umulh x10,x6,x3 131 132 mul x17,x7,x3 // a[3]*b[0] 133 umulh x11,x7,x3 134 ldr x3,[x2,#8] // b[1] 135 136 adds x15,x15,x8 // accumulate high parts of multiplication 137 lsl x8,x14,#32 138 adcs x16,x16,x9 139 lsr x9,x14,#32 140 adcs x17,x17,x10 141 adc x19,xzr,x11 142 mov x20,xzr 143 subs x10,x14,x8 // "*0xffff0001" 144 sbc x11,x14,x9 145 adds x14,x15,x8 // +=acc[0]<<96 and omit acc[0] 146 mul x8,x4,x3 // lo(a[0]*b[i]) 147 adcs x15,x16,x9 148 mul x9,x5,x3 // lo(a[1]*b[i]) 149 adcs x16,x17,x10 // +=acc[0]*0xffff0001 150 mul x10,x6,x3 // lo(a[2]*b[i]) 151 adcs x17,x19,x11 152 mul x11,x7,x3 // lo(a[3]*b[i]) 153 adc x19,x20,xzr 154 155 adds x14,x14,x8 // accumulate low parts of multiplication 156 umulh x8,x4,x3 // hi(a[0]*b[i]) 157 adcs x15,x15,x9 158 umulh x9,x5,x3 // hi(a[1]*b[i]) 159 adcs x16,x16,x10 160 umulh x10,x6,x3 // hi(a[2]*b[i]) 161 adcs x17,x17,x11 162 umulh x11,x7,x3 // hi(a[3]*b[i]) 163 adc x19,x19,xzr 164 ldr x3,[x2,#8*(1+1)] // b[1+1] 165 adds x15,x15,x8 // accumulate high parts of multiplication 166 lsl x8,x14,#32 167 adcs x16,x16,x9 168 lsr x9,x14,#32 169 adcs x17,x17,x10 170 adcs x19,x19,x11 171 adc x20,xzr,xzr 172 subs x10,x14,x8 // "*0xffff0001" 173 sbc x11,x14,x9 174 adds x14,x15,x8 // +=acc[0]<<96 and omit acc[0] 175 mul x8,x4,x3 // lo(a[0]*b[i]) 176 adcs x15,x16,x9 177 mul x9,x5,x3 // lo(a[1]*b[i]) 178 adcs x16,x17,x10 // +=acc[0]*0xffff0001 179 mul x10,x6,x3 // lo(a[2]*b[i]) 180 adcs x17,x19,x11 181 mul x11,x7,x3 // lo(a[3]*b[i]) 182 adc x19,x20,xzr 183 184 adds x14,x14,x8 // accumulate low parts of multiplication 185 umulh x8,x4,x3 // hi(a[0]*b[i]) 186 adcs x15,x15,x9 187 umulh x9,x5,x3 // hi(a[1]*b[i]) 188 adcs x16,x16,x10 189 umulh x10,x6,x3 // hi(a[2]*b[i]) 190 adcs x17,x17,x11 191 umulh x11,x7,x3 // hi(a[3]*b[i]) 192 adc x19,x19,xzr 193 ldr x3,[x2,#8*(2+1)] // b[2+1] 194 adds x15,x15,x8 // accumulate high parts of multiplication 195 lsl x8,x14,#32 196 adcs x16,x16,x9 197 lsr x9,x14,#32 198 adcs x17,x17,x10 199 adcs x19,x19,x11 200 adc x20,xzr,xzr 201 subs x10,x14,x8 // "*0xffff0001" 202 sbc x11,x14,x9 203 adds x14,x15,x8 // +=acc[0]<<96 and omit acc[0] 204 mul x8,x4,x3 // lo(a[0]*b[i]) 205 adcs x15,x16,x9 206 mul x9,x5,x3 // lo(a[1]*b[i]) 207 adcs x16,x17,x10 // +=acc[0]*0xffff0001 208 mul x10,x6,x3 // lo(a[2]*b[i]) 209 adcs x17,x19,x11 210 mul x11,x7,x3 // lo(a[3]*b[i]) 211 adc x19,x20,xzr 212 213 adds x14,x14,x8 // accumulate low parts of multiplication 214 umulh x8,x4,x3 // hi(a[0]*b[i]) 215 adcs x15,x15,x9 216 umulh x9,x5,x3 // hi(a[1]*b[i]) 217 adcs x16,x16,x10 218 umulh x10,x6,x3 // hi(a[2]*b[i]) 219 adcs x17,x17,x11 220 umulh x11,x7,x3 // hi(a[3]*b[i]) 221 adc x19,x19,xzr 222 adds x15,x15,x8 // accumulate high parts of multiplication 223 lsl x8,x14,#32 224 adcs x16,x16,x9 225 lsr x9,x14,#32 226 adcs x17,x17,x10 227 adcs x19,x19,x11 228 adc x20,xzr,xzr 229 // last reduction 230 subs x10,x14,x8 // "*0xffff0001" 231 sbc x11,x14,x9 232 adds x14,x15,x8 // +=acc[0]<<96 and omit acc[0] 233 adcs x15,x16,x9 234 adcs x16,x17,x10 // +=acc[0]*0xffff0001 235 adcs x17,x19,x11 236 adc x19,x20,xzr 237 238 adds x8,x14,#1 // subs x8,x14,#-1 // tmp = ret-modulus 239 sbcs x9,x15,x12 240 sbcs x10,x16,xzr 241 sbcs x11,x17,x13 242 sbcs xzr,x19,xzr // did it borrow? 243 244 csel x14,x14,x8,lo // ret = borrow ? ret : ret-modulus 245 csel x15,x15,x9,lo 246 csel x16,x16,x10,lo 247 stp x14,x15,[x0] 248 csel x17,x17,x11,lo 249 stp x16,x17,[x0,#16] 250 251 ret 252.size __ecp_nistz256_mul_mont,.-__ecp_nistz256_mul_mont 253 254// note that __ecp_nistz256_sqr_mont expects a[0-3] input pre-loaded 255// to x4-x7 256.type __ecp_nistz256_sqr_mont,%function 257.align 4 258__ecp_nistz256_sqr_mont: 259 // | | | | | |a1*a0| | 260 // | | | | |a2*a0| | | 261 // | |a3*a2|a3*a0| | | | 262 // | | | |a2*a1| | | | 263 // | | |a3*a1| | | | | 264 // *| | | | | | | | 2| 265 // +|a3*a3|a2*a2|a1*a1|a0*a0| 266 // |--+--+--+--+--+--+--+--| 267 // |A7|A6|A5|A4|A3|A2|A1|A0|, where Ax is , i.e. follow 268 // 269 // "can't overflow" below mark carrying into high part of 270 // multiplication result, which can't overflow, because it 271 // can never be all ones. 272 273 mul x15,x5,x4 // a[1]*a[0] 274 umulh x9,x5,x4 275 mul x16,x6,x4 // a[2]*a[0] 276 umulh x10,x6,x4 277 mul x17,x7,x4 // a[3]*a[0] 278 umulh x19,x7,x4 279 280 adds x16,x16,x9 // accumulate high parts of multiplication 281 mul x8,x6,x5 // a[2]*a[1] 282 umulh x9,x6,x5 283 adcs x17,x17,x10 284 mul x10,x7,x5 // a[3]*a[1] 285 umulh x11,x7,x5 286 adc x19,x19,xzr // can't overflow 287 288 mul x20,x7,x6 // a[3]*a[2] 289 umulh x1,x7,x6 290 291 adds x9,x9,x10 // accumulate high parts of multiplication 292 mul x14,x4,x4 // a[0]*a[0] 293 adc x10,x11,xzr // can't overflow 294 295 adds x17,x17,x8 // accumulate low parts of multiplication 296 umulh x4,x4,x4 297 adcs x19,x19,x9 298 mul x9,x5,x5 // a[1]*a[1] 299 adcs x20,x20,x10 300 umulh x5,x5,x5 301 adc x1,x1,xzr // can't overflow 302 303 adds x15,x15,x15 // acc[1-6]*=2 304 mul x10,x6,x6 // a[2]*a[2] 305 adcs x16,x16,x16 306 umulh x6,x6,x6 307 adcs x17,x17,x17 308 mul x11,x7,x7 // a[3]*a[3] 309 adcs x19,x19,x19 310 umulh x7,x7,x7 311 adcs x20,x20,x20 312 adcs x1,x1,x1 313 adc x2,xzr,xzr 314 315 adds x15,x15,x4 // +a[i]*a[i] 316 adcs x16,x16,x9 317 adcs x17,x17,x5 318 adcs x19,x19,x10 319 adcs x20,x20,x6 320 lsl x8,x14,#32 321 adcs x1,x1,x11 322 lsr x9,x14,#32 323 adc x2,x2,x7 324 subs x10,x14,x8 // "*0xffff0001" 325 sbc x11,x14,x9 326 adds x14,x15,x8 // +=acc[0]<<96 and omit acc[0] 327 adcs x15,x16,x9 328 lsl x8,x14,#32 329 adcs x16,x17,x10 // +=acc[0]*0xffff0001 330 lsr x9,x14,#32 331 adc x17,x11,xzr // can't overflow 332 subs x10,x14,x8 // "*0xffff0001" 333 sbc x11,x14,x9 334 adds x14,x15,x8 // +=acc[0]<<96 and omit acc[0] 335 adcs x15,x16,x9 336 lsl x8,x14,#32 337 adcs x16,x17,x10 // +=acc[0]*0xffff0001 338 lsr x9,x14,#32 339 adc x17,x11,xzr // can't overflow 340 subs x10,x14,x8 // "*0xffff0001" 341 sbc x11,x14,x9 342 adds x14,x15,x8 // +=acc[0]<<96 and omit acc[0] 343 adcs x15,x16,x9 344 lsl x8,x14,#32 345 adcs x16,x17,x10 // +=acc[0]*0xffff0001 346 lsr x9,x14,#32 347 adc x17,x11,xzr // can't overflow 348 subs x10,x14,x8 // "*0xffff0001" 349 sbc x11,x14,x9 350 adds x14,x15,x8 // +=acc[0]<<96 and omit acc[0] 351 adcs x15,x16,x9 352 adcs x16,x17,x10 // +=acc[0]*0xffff0001 353 adc x17,x11,xzr // can't overflow 354 355 adds x14,x14,x19 // accumulate upper half 356 adcs x15,x15,x20 357 adcs x16,x16,x1 358 adcs x17,x17,x2 359 adc x19,xzr,xzr 360 361 adds x8,x14,#1 // subs x8,x14,#-1 // tmp = ret-modulus 362 sbcs x9,x15,x12 363 sbcs x10,x16,xzr 364 sbcs x11,x17,x13 365 sbcs xzr,x19,xzr // did it borrow? 366 367 csel x14,x14,x8,lo // ret = borrow ? ret : ret-modulus 368 csel x15,x15,x9,lo 369 csel x16,x16,x10,lo 370 stp x14,x15,[x0] 371 csel x17,x17,x11,lo 372 stp x16,x17,[x0,#16] 373 374 ret 375.size __ecp_nistz256_sqr_mont,.-__ecp_nistz256_sqr_mont 376 377// Note that __ecp_nistz256_add expects both input vectors pre-loaded to 378// x4-x7 and x8-x11. This is done because it's used in multiple 379// contexts, e.g. in multiplication by 2 and 3... 380.type __ecp_nistz256_add,%function 381.align 4 382__ecp_nistz256_add: 383 adds x14,x14,x8 // ret = a+b 384 adcs x15,x15,x9 385 adcs x16,x16,x10 386 adcs x17,x17,x11 387 adc x1,xzr,xzr // zap x1 388 389 adds x8,x14,#1 // subs x8,x4,#-1 // tmp = ret-modulus 390 sbcs x9,x15,x12 391 sbcs x10,x16,xzr 392 sbcs x11,x17,x13 393 sbcs xzr,x1,xzr // did subtraction borrow? 394 395 csel x14,x14,x8,lo // ret = borrow ? ret : ret-modulus 396 csel x15,x15,x9,lo 397 csel x16,x16,x10,lo 398 stp x14,x15,[x0] 399 csel x17,x17,x11,lo 400 stp x16,x17,[x0,#16] 401 402 ret 403.size __ecp_nistz256_add,.-__ecp_nistz256_add 404 405.type __ecp_nistz256_sub_from,%function 406.align 4 407__ecp_nistz256_sub_from: 408 ldp x8,x9,[x2] 409 ldp x10,x11,[x2,#16] 410 subs x14,x14,x8 // ret = a-b 411 sbcs x15,x15,x9 412 sbcs x16,x16,x10 413 sbcs x17,x17,x11 414 sbc x1,xzr,xzr // zap x1 415 416 subs x8,x14,#1 // adds x8,x4,#-1 // tmp = ret+modulus 417 adcs x9,x15,x12 418 adcs x10,x16,xzr 419 adc x11,x17,x13 420 cmp x1,xzr // did subtraction borrow? 421 422 csel x14,x14,x8,eq // ret = borrow ? ret+modulus : ret 423 csel x15,x15,x9,eq 424 csel x16,x16,x10,eq 425 stp x14,x15,[x0] 426 csel x17,x17,x11,eq 427 stp x16,x17,[x0,#16] 428 429 ret 430.size __ecp_nistz256_sub_from,.-__ecp_nistz256_sub_from 431 432.type __ecp_nistz256_sub_morf,%function 433.align 4 434__ecp_nistz256_sub_morf: 435 ldp x8,x9,[x2] 436 ldp x10,x11,[x2,#16] 437 subs x14,x8,x14 // ret = b-a 438 sbcs x15,x9,x15 439 sbcs x16,x10,x16 440 sbcs x17,x11,x17 441 sbc x1,xzr,xzr // zap x1 442 443 subs x8,x14,#1 // adds x8,x4,#-1 // tmp = ret+modulus 444 adcs x9,x15,x12 445 adcs x10,x16,xzr 446 adc x11,x17,x13 447 cmp x1,xzr // did subtraction borrow? 448 449 csel x14,x14,x8,eq // ret = borrow ? ret+modulus : ret 450 csel x15,x15,x9,eq 451 csel x16,x16,x10,eq 452 stp x14,x15,[x0] 453 csel x17,x17,x11,eq 454 stp x16,x17,[x0,#16] 455 456 ret 457.size __ecp_nistz256_sub_morf,.-__ecp_nistz256_sub_morf 458 459.type __ecp_nistz256_div_by_2,%function 460.align 4 461__ecp_nistz256_div_by_2: 462 subs x8,x14,#1 // adds x8,x4,#-1 // tmp = a+modulus 463 adcs x9,x15,x12 464 adcs x10,x16,xzr 465 adcs x11,x17,x13 466 adc x1,xzr,xzr // zap x1 467 tst x14,#1 // is a even? 468 469 csel x14,x14,x8,eq // ret = even ? a : a+modulus 470 csel x15,x15,x9,eq 471 csel x16,x16,x10,eq 472 csel x17,x17,x11,eq 473 csel x1,xzr,x1,eq 474 475 lsr x14,x14,#1 // ret >>= 1 476 orr x14,x14,x15,lsl#63 477 lsr x15,x15,#1 478 orr x15,x15,x16,lsl#63 479 lsr x16,x16,#1 480 orr x16,x16,x17,lsl#63 481 lsr x17,x17,#1 482 stp x14,x15,[x0] 483 orr x17,x17,x1,lsl#63 484 stp x16,x17,[x0,#16] 485 486 ret 487.size __ecp_nistz256_div_by_2,.-__ecp_nistz256_div_by_2 488.globl GFp_nistz256_point_double 489.hidden GFp_nistz256_point_double 490.type GFp_nistz256_point_double,%function 491.align 5 492GFp_nistz256_point_double: 493 stp x29,x30,[sp,#-80]! 494 add x29,sp,#0 495 stp x19,x20,[sp,#16] 496 stp x21,x22,[sp,#32] 497 sub sp,sp,#32*4 498 499.Ldouble_shortcut: 500 ldp x14,x15,[x1,#32] 501 mov x21,x0 502 ldp x16,x17,[x1,#48] 503 mov x22,x1 504 ldr x12,.Lpoly+8 505 mov x8,x14 506 ldr x13,.Lpoly+24 507 mov x9,x15 508 ldp x4,x5,[x22,#64] // forward load for p256_sqr_mont 509 mov x10,x16 510 mov x11,x17 511 ldp x6,x7,[x22,#64+16] 512 add x0,sp,#0 513 bl __ecp_nistz256_add // p256_mul_by_2(S, in_y); 514 515 add x0,sp,#64 516 bl __ecp_nistz256_sqr_mont // p256_sqr_mont(Zsqr, in_z); 517 518 ldp x8,x9,[x22] 519 ldp x10,x11,[x22,#16] 520 mov x4,x14 // put Zsqr aside for p256_sub 521 mov x5,x15 522 mov x6,x16 523 mov x7,x17 524 add x0,sp,#32 525 bl __ecp_nistz256_add // p256_add(M, Zsqr, in_x); 526 527 add x2,x22,#0 528 mov x14,x4 // restore Zsqr 529 mov x15,x5 530 ldp x4,x5,[sp,#0] // forward load for p256_sqr_mont 531 mov x16,x6 532 mov x17,x7 533 ldp x6,x7,[sp,#0+16] 534 add x0,sp,#64 535 bl __ecp_nistz256_sub_morf // p256_sub(Zsqr, in_x, Zsqr); 536 537 add x0,sp,#0 538 bl __ecp_nistz256_sqr_mont // p256_sqr_mont(S, S); 539 540 ldr x3,[x22,#32] 541 ldp x4,x5,[x22,#64] 542 ldp x6,x7,[x22,#64+16] 543 add x2,x22,#32 544 add x0,sp,#96 545 bl __ecp_nistz256_mul_mont // p256_mul_mont(tmp0, in_z, in_y); 546 547 mov x8,x14 548 mov x9,x15 549 ldp x4,x5,[sp,#0] // forward load for p256_sqr_mont 550 mov x10,x16 551 mov x11,x17 552 ldp x6,x7,[sp,#0+16] 553 add x0,x21,#64 554 bl __ecp_nistz256_add // p256_mul_by_2(res_z, tmp0); 555 556 add x0,sp,#96 557 bl __ecp_nistz256_sqr_mont // p256_sqr_mont(tmp0, S); 558 559 ldr x3,[sp,#64] // forward load for p256_mul_mont 560 ldp x4,x5,[sp,#32] 561 ldp x6,x7,[sp,#32+16] 562 add x0,x21,#32 563 bl __ecp_nistz256_div_by_2 // p256_div_by_2(res_y, tmp0); 564 565 add x2,sp,#64 566 add x0,sp,#32 567 bl __ecp_nistz256_mul_mont // p256_mul_mont(M, M, Zsqr); 568 569 mov x8,x14 // duplicate M 570 mov x9,x15 571 mov x10,x16 572 mov x11,x17 573 mov x4,x14 // put M aside 574 mov x5,x15 575 mov x6,x16 576 mov x7,x17 577 add x0,sp,#32 578 bl __ecp_nistz256_add 579 mov x8,x4 // restore M 580 mov x9,x5 581 ldr x3,[x22] // forward load for p256_mul_mont 582 mov x10,x6 583 ldp x4,x5,[sp,#0] 584 mov x11,x7 585 ldp x6,x7,[sp,#0+16] 586 bl __ecp_nistz256_add // p256_mul_by_3(M, M); 587 588 add x2,x22,#0 589 add x0,sp,#0 590 bl __ecp_nistz256_mul_mont // p256_mul_mont(S, S, in_x); 591 592 mov x8,x14 593 mov x9,x15 594 ldp x4,x5,[sp,#32] // forward load for p256_sqr_mont 595 mov x10,x16 596 mov x11,x17 597 ldp x6,x7,[sp,#32+16] 598 add x0,sp,#96 599 bl __ecp_nistz256_add // p256_mul_by_2(tmp0, S); 600 601 add x0,x21,#0 602 bl __ecp_nistz256_sqr_mont // p256_sqr_mont(res_x, M); 603 604 add x2,sp,#96 605 bl __ecp_nistz256_sub_from // p256_sub(res_x, res_x, tmp0); 606 607 add x2,sp,#0 608 add x0,sp,#0 609 bl __ecp_nistz256_sub_morf // p256_sub(S, S, res_x); 610 611 ldr x3,[sp,#32] 612 mov x4,x14 // copy S 613 mov x5,x15 614 mov x6,x16 615 mov x7,x17 616 add x2,sp,#32 617 bl __ecp_nistz256_mul_mont // p256_mul_mont(S, S, M); 618 619 add x2,x21,#32 620 add x0,x21,#32 621 bl __ecp_nistz256_sub_from // p256_sub(res_y, S, res_y); 622 623 add sp,x29,#0 // destroy frame 624 ldp x19,x20,[x29,#16] 625 ldp x21,x22,[x29,#32] 626 ldp x29,x30,[sp],#80 627 ret 628.size GFp_nistz256_point_double,.-GFp_nistz256_point_double 629.globl GFp_nistz256_point_add_affine 630.hidden GFp_nistz256_point_add_affine 631.type GFp_nistz256_point_add_affine,%function 632.align 5 633GFp_nistz256_point_add_affine: 634 stp x29,x30,[sp,#-80]! 635 add x29,sp,#0 636 stp x19,x20,[sp,#16] 637 stp x21,x22,[sp,#32] 638 stp x23,x24,[sp,#48] 639 stp x25,x26,[sp,#64] 640 sub sp,sp,#32*10 641 642 mov x21,x0 643 mov x22,x1 644 mov x23,x2 645 ldr x12,.Lpoly+8 646 ldr x13,.Lpoly+24 647 648 ldp x4,x5,[x1,#64] // in1_z 649 ldp x6,x7,[x1,#64+16] 650 orr x8,x4,x5 651 orr x10,x6,x7 652 orr x24,x8,x10 653 cmp x24,#0 654 csetm x24,ne // !in1infty 655 656 ldp x14,x15,[x2] // in2_x 657 ldp x16,x17,[x2,#16] 658 ldp x8,x9,[x2,#32] // in2_y 659 ldp x10,x11,[x2,#48] 660 orr x14,x14,x15 661 orr x16,x16,x17 662 orr x8,x8,x9 663 orr x10,x10,x11 664 orr x14,x14,x16 665 orr x8,x8,x10 666 orr x25,x14,x8 667 cmp x25,#0 668 csetm x25,ne // !in2infty 669 670 add x0,sp,#128 671 bl __ecp_nistz256_sqr_mont // p256_sqr_mont(Z1sqr, in1_z); 672 673 mov x4,x14 674 mov x5,x15 675 mov x6,x16 676 mov x7,x17 677 ldr x3,[x23] 678 add x2,x23,#0 679 add x0,sp,#96 680 bl __ecp_nistz256_mul_mont // p256_mul_mont(U2, Z1sqr, in2_x); 681 682 add x2,x22,#0 683 ldr x3,[x22,#64] // forward load for p256_mul_mont 684 ldp x4,x5,[sp,#128] 685 ldp x6,x7,[sp,#128+16] 686 add x0,sp,#160 687 bl __ecp_nistz256_sub_from // p256_sub(H, U2, in1_x); 688 689 add x2,x22,#64 690 add x0,sp,#128 691 bl __ecp_nistz256_mul_mont // p256_mul_mont(S2, Z1sqr, in1_z); 692 693 ldr x3,[x22,#64] 694 ldp x4,x5,[sp,#160] 695 ldp x6,x7,[sp,#160+16] 696 add x2,x22,#64 697 add x0,sp,#64 698 bl __ecp_nistz256_mul_mont // p256_mul_mont(res_z, H, in1_z); 699 700 ldr x3,[x23,#32] 701 ldp x4,x5,[sp,#128] 702 ldp x6,x7,[sp,#128+16] 703 add x2,x23,#32 704 add x0,sp,#128 705 bl __ecp_nistz256_mul_mont // p256_mul_mont(S2, S2, in2_y); 706 707 add x2,x22,#32 708 ldp x4,x5,[sp,#160] // forward load for p256_sqr_mont 709 ldp x6,x7,[sp,#160+16] 710 add x0,sp,#192 711 bl __ecp_nistz256_sub_from // p256_sub(R, S2, in1_y); 712 713 add x0,sp,#224 714 bl __ecp_nistz256_sqr_mont // p256_sqr_mont(Hsqr, H); 715 716 ldp x4,x5,[sp,#192] 717 ldp x6,x7,[sp,#192+16] 718 add x0,sp,#288 719 bl __ecp_nistz256_sqr_mont // p256_sqr_mont(Rsqr, R); 720 721 ldr x3,[sp,#160] 722 ldp x4,x5,[sp,#224] 723 ldp x6,x7,[sp,#224+16] 724 add x2,sp,#160 725 add x0,sp,#256 726 bl __ecp_nistz256_mul_mont // p256_mul_mont(Hcub, Hsqr, H); 727 728 ldr x3,[x22] 729 ldp x4,x5,[sp,#224] 730 ldp x6,x7,[sp,#224+16] 731 add x2,x22,#0 732 add x0,sp,#96 733 bl __ecp_nistz256_mul_mont // p256_mul_mont(U2, in1_x, Hsqr); 734 735 mov x8,x14 736 mov x9,x15 737 mov x10,x16 738 mov x11,x17 739 add x0,sp,#224 740 bl __ecp_nistz256_add // p256_mul_by_2(Hsqr, U2); 741 742 add x2,sp,#288 743 add x0,sp,#0 744 bl __ecp_nistz256_sub_morf // p256_sub(res_x, Rsqr, Hsqr); 745 746 add x2,sp,#256 747 bl __ecp_nistz256_sub_from // p256_sub(res_x, res_x, Hcub); 748 749 add x2,sp,#96 750 ldr x3,[x22,#32] // forward load for p256_mul_mont 751 ldp x4,x5,[sp,#256] 752 ldp x6,x7,[sp,#256+16] 753 add x0,sp,#32 754 bl __ecp_nistz256_sub_morf // p256_sub(res_y, U2, res_x); 755 756 add x2,x22,#32 757 add x0,sp,#128 758 bl __ecp_nistz256_mul_mont // p256_mul_mont(S2, in1_y, Hcub); 759 760 ldr x3,[sp,#192] 761 ldp x4,x5,[sp,#32] 762 ldp x6,x7,[sp,#32+16] 763 add x2,sp,#192 764 add x0,sp,#32 765 bl __ecp_nistz256_mul_mont // p256_mul_mont(res_y, res_y, R); 766 767 add x2,sp,#128 768 bl __ecp_nistz256_sub_from // p256_sub(res_y, res_y, S2); 769 770 ldp x4,x5,[sp,#0] // res 771 ldp x6,x7,[sp,#0+16] 772 ldp x8,x9,[x23] // in2 773 ldp x10,x11,[x23,#16] 774 ldp x14,x15,[x22,#0] // in1 775 cmp x24,#0 // !, remember? 776 ldp x16,x17,[x22,#0+16] 777 csel x8,x4,x8,ne 778 csel x9,x5,x9,ne 779 ldp x4,x5,[sp,#0+0+32] // res 780 csel x10,x6,x10,ne 781 csel x11,x7,x11,ne 782 cmp x25,#0 // !, remember? 783 ldp x6,x7,[sp,#0+0+48] 784 csel x14,x8,x14,ne 785 csel x15,x9,x15,ne 786 ldp x8,x9,[x23,#0+32] // in2 787 csel x16,x10,x16,ne 788 csel x17,x11,x17,ne 789 ldp x10,x11,[x23,#0+48] 790 stp x14,x15,[x21,#0] 791 stp x16,x17,[x21,#0+16] 792 adr x23,.Lone_mont-64 793 ldp x14,x15,[x22,#32] // in1 794 cmp x24,#0 // !, remember? 795 ldp x16,x17,[x22,#32+16] 796 csel x8,x4,x8,ne 797 csel x9,x5,x9,ne 798 ldp x4,x5,[sp,#0+32+32] // res 799 csel x10,x6,x10,ne 800 csel x11,x7,x11,ne 801 cmp x25,#0 // !, remember? 802 ldp x6,x7,[sp,#0+32+48] 803 csel x14,x8,x14,ne 804 csel x15,x9,x15,ne 805 ldp x8,x9,[x23,#32+32] // in2 806 csel x16,x10,x16,ne 807 csel x17,x11,x17,ne 808 ldp x10,x11,[x23,#32+48] 809 stp x14,x15,[x21,#32] 810 stp x16,x17,[x21,#32+16] 811 ldp x14,x15,[x22,#64] // in1 812 cmp x24,#0 // !, remember? 813 ldp x16,x17,[x22,#64+16] 814 csel x8,x4,x8,ne 815 csel x9,x5,x9,ne 816 csel x10,x6,x10,ne 817 csel x11,x7,x11,ne 818 cmp x25,#0 // !, remember? 819 csel x14,x8,x14,ne 820 csel x15,x9,x15,ne 821 csel x16,x10,x16,ne 822 csel x17,x11,x17,ne 823 stp x14,x15,[x21,#64] 824 stp x16,x17,[x21,#64+16] 825 826 add sp,x29,#0 // destroy frame 827 ldp x19,x20,[x29,#16] 828 ldp x21,x22,[x29,#32] 829 ldp x23,x24,[x29,#48] 830 ldp x25,x26,[x29,#64] 831 ldp x29,x30,[sp],#80 832 ret 833.size GFp_nistz256_point_add_affine,.-GFp_nistz256_point_add_affine 834#endif 835#endif // !OPENSSL_NO_ASM 836.section .note.GNU-stack,"",%progbits 837