1; This file is generated from a similarly-named Perl script in the BoringSSL 2; source tree. Do not edit by hand. 3 4%ifdef BORINGSSL_PREFIX 5%include "boringssl_prefix_symbols_nasm.inc" 6%endif 7%ifidn __OUTPUT_FORMAT__,obj 8section code use32 class=code align=64 9%elifidn __OUTPUT_FORMAT__,win32 10%ifdef __YASM_VERSION_ID__ 11%if __YASM_VERSION_ID__ < 01010000h 12%error yasm version 1.1.0 or later needed. 13%endif 14; Yasm automatically includes .00 and complains about redefining it. 15; https://www.tortall.net/projects/yasm/manual/html/objfmt-win32-safeseh.html 16%else 17$@feat.00 equ 1 18%endif 19section .text code align=64 20%else 21section .text code 22%endif 23global _bn_mul_comba8 24align 16 25_bn_mul_comba8: 26L$_bn_mul_comba8_begin: 27 push esi 28 mov esi,DWORD [12+esp] 29 push edi 30 mov edi,DWORD [20+esp] 31 push ebp 32 push ebx 33 xor ebx,ebx 34 mov eax,DWORD [esi] 35 xor ecx,ecx 36 mov edx,DWORD [edi] 37 ; ################## Calculate word 0 38 xor ebp,ebp 39 ; mul a[0]*b[0] 40 mul edx 41 add ebx,eax 42 mov eax,DWORD [20+esp] 43 adc ecx,edx 44 mov edx,DWORD [edi] 45 adc ebp,0 46 mov DWORD [eax],ebx 47 mov eax,DWORD [4+esi] 48 ; saved r[0] 49 ; ################## Calculate word 1 50 xor ebx,ebx 51 ; mul a[1]*b[0] 52 mul edx 53 add ecx,eax 54 mov eax,DWORD [esi] 55 adc ebp,edx 56 mov edx,DWORD [4+edi] 57 adc ebx,0 58 ; mul a[0]*b[1] 59 mul edx 60 add ecx,eax 61 mov eax,DWORD [20+esp] 62 adc ebp,edx 63 mov edx,DWORD [edi] 64 adc ebx,0 65 mov DWORD [4+eax],ecx 66 mov eax,DWORD [8+esi] 67 ; saved r[1] 68 ; ################## Calculate word 2 69 xor ecx,ecx 70 ; mul a[2]*b[0] 71 mul edx 72 add ebp,eax 73 mov eax,DWORD [4+esi] 74 adc ebx,edx 75 mov edx,DWORD [4+edi] 76 adc ecx,0 77 ; mul a[1]*b[1] 78 mul edx 79 add ebp,eax 80 mov eax,DWORD [esi] 81 adc ebx,edx 82 mov edx,DWORD [8+edi] 83 adc ecx,0 84 ; mul a[0]*b[2] 85 mul edx 86 add ebp,eax 87 mov eax,DWORD [20+esp] 88 adc ebx,edx 89 mov edx,DWORD [edi] 90 adc ecx,0 91 mov DWORD [8+eax],ebp 92 mov eax,DWORD [12+esi] 93 ; saved r[2] 94 ; ################## Calculate word 3 95 xor ebp,ebp 96 ; mul a[3]*b[0] 97 mul edx 98 add ebx,eax 99 mov eax,DWORD [8+esi] 100 adc ecx,edx 101 mov edx,DWORD [4+edi] 102 adc ebp,0 103 ; mul a[2]*b[1] 104 mul edx 105 add ebx,eax 106 mov eax,DWORD [4+esi] 107 adc ecx,edx 108 mov edx,DWORD [8+edi] 109 adc ebp,0 110 ; mul a[1]*b[2] 111 mul edx 112 add ebx,eax 113 mov eax,DWORD [esi] 114 adc ecx,edx 115 mov edx,DWORD [12+edi] 116 adc ebp,0 117 ; mul a[0]*b[3] 118 mul edx 119 add ebx,eax 120 mov eax,DWORD [20+esp] 121 adc ecx,edx 122 mov edx,DWORD [edi] 123 adc ebp,0 124 mov DWORD [12+eax],ebx 125 mov eax,DWORD [16+esi] 126 ; saved r[3] 127 ; ################## Calculate word 4 128 xor ebx,ebx 129 ; mul a[4]*b[0] 130 mul edx 131 add ecx,eax 132 mov eax,DWORD [12+esi] 133 adc ebp,edx 134 mov edx,DWORD [4+edi] 135 adc ebx,0 136 ; mul a[3]*b[1] 137 mul edx 138 add ecx,eax 139 mov eax,DWORD [8+esi] 140 adc ebp,edx 141 mov edx,DWORD [8+edi] 142 adc ebx,0 143 ; mul a[2]*b[2] 144 mul edx 145 add ecx,eax 146 mov eax,DWORD [4+esi] 147 adc ebp,edx 148 mov edx,DWORD [12+edi] 149 adc ebx,0 150 ; mul a[1]*b[3] 151 mul edx 152 add ecx,eax 153 mov eax,DWORD [esi] 154 adc ebp,edx 155 mov edx,DWORD [16+edi] 156 adc ebx,0 157 ; mul a[0]*b[4] 158 mul edx 159 add ecx,eax 160 mov eax,DWORD [20+esp] 161 adc ebp,edx 162 mov edx,DWORD [edi] 163 adc ebx,0 164 mov DWORD [16+eax],ecx 165 mov eax,DWORD [20+esi] 166 ; saved r[4] 167 ; ################## Calculate word 5 168 xor ecx,ecx 169 ; mul a[5]*b[0] 170 mul edx 171 add ebp,eax 172 mov eax,DWORD [16+esi] 173 adc ebx,edx 174 mov edx,DWORD [4+edi] 175 adc ecx,0 176 ; mul a[4]*b[1] 177 mul edx 178 add ebp,eax 179 mov eax,DWORD [12+esi] 180 adc ebx,edx 181 mov edx,DWORD [8+edi] 182 adc ecx,0 183 ; mul a[3]*b[2] 184 mul edx 185 add ebp,eax 186 mov eax,DWORD [8+esi] 187 adc ebx,edx 188 mov edx,DWORD [12+edi] 189 adc ecx,0 190 ; mul a[2]*b[3] 191 mul edx 192 add ebp,eax 193 mov eax,DWORD [4+esi] 194 adc ebx,edx 195 mov edx,DWORD [16+edi] 196 adc ecx,0 197 ; mul a[1]*b[4] 198 mul edx 199 add ebp,eax 200 mov eax,DWORD [esi] 201 adc ebx,edx 202 mov edx,DWORD [20+edi] 203 adc ecx,0 204 ; mul a[0]*b[5] 205 mul edx 206 add ebp,eax 207 mov eax,DWORD [20+esp] 208 adc ebx,edx 209 mov edx,DWORD [edi] 210 adc ecx,0 211 mov DWORD [20+eax],ebp 212 mov eax,DWORD [24+esi] 213 ; saved r[5] 214 ; ################## Calculate word 6 215 xor ebp,ebp 216 ; mul a[6]*b[0] 217 mul edx 218 add ebx,eax 219 mov eax,DWORD [20+esi] 220 adc ecx,edx 221 mov edx,DWORD [4+edi] 222 adc ebp,0 223 ; mul a[5]*b[1] 224 mul edx 225 add ebx,eax 226 mov eax,DWORD [16+esi] 227 adc ecx,edx 228 mov edx,DWORD [8+edi] 229 adc ebp,0 230 ; mul a[4]*b[2] 231 mul edx 232 add ebx,eax 233 mov eax,DWORD [12+esi] 234 adc ecx,edx 235 mov edx,DWORD [12+edi] 236 adc ebp,0 237 ; mul a[3]*b[3] 238 mul edx 239 add ebx,eax 240 mov eax,DWORD [8+esi] 241 adc ecx,edx 242 mov edx,DWORD [16+edi] 243 adc ebp,0 244 ; mul a[2]*b[4] 245 mul edx 246 add ebx,eax 247 mov eax,DWORD [4+esi] 248 adc ecx,edx 249 mov edx,DWORD [20+edi] 250 adc ebp,0 251 ; mul a[1]*b[5] 252 mul edx 253 add ebx,eax 254 mov eax,DWORD [esi] 255 adc ecx,edx 256 mov edx,DWORD [24+edi] 257 adc ebp,0 258 ; mul a[0]*b[6] 259 mul edx 260 add ebx,eax 261 mov eax,DWORD [20+esp] 262 adc ecx,edx 263 mov edx,DWORD [edi] 264 adc ebp,0 265 mov DWORD [24+eax],ebx 266 mov eax,DWORD [28+esi] 267 ; saved r[6] 268 ; ################## Calculate word 7 269 xor ebx,ebx 270 ; mul a[7]*b[0] 271 mul edx 272 add ecx,eax 273 mov eax,DWORD [24+esi] 274 adc ebp,edx 275 mov edx,DWORD [4+edi] 276 adc ebx,0 277 ; mul a[6]*b[1] 278 mul edx 279 add ecx,eax 280 mov eax,DWORD [20+esi] 281 adc ebp,edx 282 mov edx,DWORD [8+edi] 283 adc ebx,0 284 ; mul a[5]*b[2] 285 mul edx 286 add ecx,eax 287 mov eax,DWORD [16+esi] 288 adc ebp,edx 289 mov edx,DWORD [12+edi] 290 adc ebx,0 291 ; mul a[4]*b[3] 292 mul edx 293 add ecx,eax 294 mov eax,DWORD [12+esi] 295 adc ebp,edx 296 mov edx,DWORD [16+edi] 297 adc ebx,0 298 ; mul a[3]*b[4] 299 mul edx 300 add ecx,eax 301 mov eax,DWORD [8+esi] 302 adc ebp,edx 303 mov edx,DWORD [20+edi] 304 adc ebx,0 305 ; mul a[2]*b[5] 306 mul edx 307 add ecx,eax 308 mov eax,DWORD [4+esi] 309 adc ebp,edx 310 mov edx,DWORD [24+edi] 311 adc ebx,0 312 ; mul a[1]*b[6] 313 mul edx 314 add ecx,eax 315 mov eax,DWORD [esi] 316 adc ebp,edx 317 mov edx,DWORD [28+edi] 318 adc ebx,0 319 ; mul a[0]*b[7] 320 mul edx 321 add ecx,eax 322 mov eax,DWORD [20+esp] 323 adc ebp,edx 324 mov edx,DWORD [4+edi] 325 adc ebx,0 326 mov DWORD [28+eax],ecx 327 mov eax,DWORD [28+esi] 328 ; saved r[7] 329 ; ################## Calculate word 8 330 xor ecx,ecx 331 ; mul a[7]*b[1] 332 mul edx 333 add ebp,eax 334 mov eax,DWORD [24+esi] 335 adc ebx,edx 336 mov edx,DWORD [8+edi] 337 adc ecx,0 338 ; mul a[6]*b[2] 339 mul edx 340 add ebp,eax 341 mov eax,DWORD [20+esi] 342 adc ebx,edx 343 mov edx,DWORD [12+edi] 344 adc ecx,0 345 ; mul a[5]*b[3] 346 mul edx 347 add ebp,eax 348 mov eax,DWORD [16+esi] 349 adc ebx,edx 350 mov edx,DWORD [16+edi] 351 adc ecx,0 352 ; mul a[4]*b[4] 353 mul edx 354 add ebp,eax 355 mov eax,DWORD [12+esi] 356 adc ebx,edx 357 mov edx,DWORD [20+edi] 358 adc ecx,0 359 ; mul a[3]*b[5] 360 mul edx 361 add ebp,eax 362 mov eax,DWORD [8+esi] 363 adc ebx,edx 364 mov edx,DWORD [24+edi] 365 adc ecx,0 366 ; mul a[2]*b[6] 367 mul edx 368 add ebp,eax 369 mov eax,DWORD [4+esi] 370 adc ebx,edx 371 mov edx,DWORD [28+edi] 372 adc ecx,0 373 ; mul a[1]*b[7] 374 mul edx 375 add ebp,eax 376 mov eax,DWORD [20+esp] 377 adc ebx,edx 378 mov edx,DWORD [8+edi] 379 adc ecx,0 380 mov DWORD [32+eax],ebp 381 mov eax,DWORD [28+esi] 382 ; saved r[8] 383 ; ################## Calculate word 9 384 xor ebp,ebp 385 ; mul a[7]*b[2] 386 mul edx 387 add ebx,eax 388 mov eax,DWORD [24+esi] 389 adc ecx,edx 390 mov edx,DWORD [12+edi] 391 adc ebp,0 392 ; mul a[6]*b[3] 393 mul edx 394 add ebx,eax 395 mov eax,DWORD [20+esi] 396 adc ecx,edx 397 mov edx,DWORD [16+edi] 398 adc ebp,0 399 ; mul a[5]*b[4] 400 mul edx 401 add ebx,eax 402 mov eax,DWORD [16+esi] 403 adc ecx,edx 404 mov edx,DWORD [20+edi] 405 adc ebp,0 406 ; mul a[4]*b[5] 407 mul edx 408 add ebx,eax 409 mov eax,DWORD [12+esi] 410 adc ecx,edx 411 mov edx,DWORD [24+edi] 412 adc ebp,0 413 ; mul a[3]*b[6] 414 mul edx 415 add ebx,eax 416 mov eax,DWORD [8+esi] 417 adc ecx,edx 418 mov edx,DWORD [28+edi] 419 adc ebp,0 420 ; mul a[2]*b[7] 421 mul edx 422 add ebx,eax 423 mov eax,DWORD [20+esp] 424 adc ecx,edx 425 mov edx,DWORD [12+edi] 426 adc ebp,0 427 mov DWORD [36+eax],ebx 428 mov eax,DWORD [28+esi] 429 ; saved r[9] 430 ; ################## Calculate word 10 431 xor ebx,ebx 432 ; mul a[7]*b[3] 433 mul edx 434 add ecx,eax 435 mov eax,DWORD [24+esi] 436 adc ebp,edx 437 mov edx,DWORD [16+edi] 438 adc ebx,0 439 ; mul a[6]*b[4] 440 mul edx 441 add ecx,eax 442 mov eax,DWORD [20+esi] 443 adc ebp,edx 444 mov edx,DWORD [20+edi] 445 adc ebx,0 446 ; mul a[5]*b[5] 447 mul edx 448 add ecx,eax 449 mov eax,DWORD [16+esi] 450 adc ebp,edx 451 mov edx,DWORD [24+edi] 452 adc ebx,0 453 ; mul a[4]*b[6] 454 mul edx 455 add ecx,eax 456 mov eax,DWORD [12+esi] 457 adc ebp,edx 458 mov edx,DWORD [28+edi] 459 adc ebx,0 460 ; mul a[3]*b[7] 461 mul edx 462 add ecx,eax 463 mov eax,DWORD [20+esp] 464 adc ebp,edx 465 mov edx,DWORD [16+edi] 466 adc ebx,0 467 mov DWORD [40+eax],ecx 468 mov eax,DWORD [28+esi] 469 ; saved r[10] 470 ; ################## Calculate word 11 471 xor ecx,ecx 472 ; mul a[7]*b[4] 473 mul edx 474 add ebp,eax 475 mov eax,DWORD [24+esi] 476 adc ebx,edx 477 mov edx,DWORD [20+edi] 478 adc ecx,0 479 ; mul a[6]*b[5] 480 mul edx 481 add ebp,eax 482 mov eax,DWORD [20+esi] 483 adc ebx,edx 484 mov edx,DWORD [24+edi] 485 adc ecx,0 486 ; mul a[5]*b[6] 487 mul edx 488 add ebp,eax 489 mov eax,DWORD [16+esi] 490 adc ebx,edx 491 mov edx,DWORD [28+edi] 492 adc ecx,0 493 ; mul a[4]*b[7] 494 mul edx 495 add ebp,eax 496 mov eax,DWORD [20+esp] 497 adc ebx,edx 498 mov edx,DWORD [20+edi] 499 adc ecx,0 500 mov DWORD [44+eax],ebp 501 mov eax,DWORD [28+esi] 502 ; saved r[11] 503 ; ################## Calculate word 12 504 xor ebp,ebp 505 ; mul a[7]*b[5] 506 mul edx 507 add ebx,eax 508 mov eax,DWORD [24+esi] 509 adc ecx,edx 510 mov edx,DWORD [24+edi] 511 adc ebp,0 512 ; mul a[6]*b[6] 513 mul edx 514 add ebx,eax 515 mov eax,DWORD [20+esi] 516 adc ecx,edx 517 mov edx,DWORD [28+edi] 518 adc ebp,0 519 ; mul a[5]*b[7] 520 mul edx 521 add ebx,eax 522 mov eax,DWORD [20+esp] 523 adc ecx,edx 524 mov edx,DWORD [24+edi] 525 adc ebp,0 526 mov DWORD [48+eax],ebx 527 mov eax,DWORD [28+esi] 528 ; saved r[12] 529 ; ################## Calculate word 13 530 xor ebx,ebx 531 ; mul a[7]*b[6] 532 mul edx 533 add ecx,eax 534 mov eax,DWORD [24+esi] 535 adc ebp,edx 536 mov edx,DWORD [28+edi] 537 adc ebx,0 538 ; mul a[6]*b[7] 539 mul edx 540 add ecx,eax 541 mov eax,DWORD [20+esp] 542 adc ebp,edx 543 mov edx,DWORD [28+edi] 544 adc ebx,0 545 mov DWORD [52+eax],ecx 546 mov eax,DWORD [28+esi] 547 ; saved r[13] 548 ; ################## Calculate word 14 549 xor ecx,ecx 550 ; mul a[7]*b[7] 551 mul edx 552 add ebp,eax 553 mov eax,DWORD [20+esp] 554 adc ebx,edx 555 adc ecx,0 556 mov DWORD [56+eax],ebp 557 ; saved r[14] 558 ; save r[15] 559 mov DWORD [60+eax],ebx 560 pop ebx 561 pop ebp 562 pop edi 563 pop esi 564 ret 565global _bn_mul_comba4 566align 16 567_bn_mul_comba4: 568L$_bn_mul_comba4_begin: 569 push esi 570 mov esi,DWORD [12+esp] 571 push edi 572 mov edi,DWORD [20+esp] 573 push ebp 574 push ebx 575 xor ebx,ebx 576 mov eax,DWORD [esi] 577 xor ecx,ecx 578 mov edx,DWORD [edi] 579 ; ################## Calculate word 0 580 xor ebp,ebp 581 ; mul a[0]*b[0] 582 mul edx 583 add ebx,eax 584 mov eax,DWORD [20+esp] 585 adc ecx,edx 586 mov edx,DWORD [edi] 587 adc ebp,0 588 mov DWORD [eax],ebx 589 mov eax,DWORD [4+esi] 590 ; saved r[0] 591 ; ################## Calculate word 1 592 xor ebx,ebx 593 ; mul a[1]*b[0] 594 mul edx 595 add ecx,eax 596 mov eax,DWORD [esi] 597 adc ebp,edx 598 mov edx,DWORD [4+edi] 599 adc ebx,0 600 ; mul a[0]*b[1] 601 mul edx 602 add ecx,eax 603 mov eax,DWORD [20+esp] 604 adc ebp,edx 605 mov edx,DWORD [edi] 606 adc ebx,0 607 mov DWORD [4+eax],ecx 608 mov eax,DWORD [8+esi] 609 ; saved r[1] 610 ; ################## Calculate word 2 611 xor ecx,ecx 612 ; mul a[2]*b[0] 613 mul edx 614 add ebp,eax 615 mov eax,DWORD [4+esi] 616 adc ebx,edx 617 mov edx,DWORD [4+edi] 618 adc ecx,0 619 ; mul a[1]*b[1] 620 mul edx 621 add ebp,eax 622 mov eax,DWORD [esi] 623 adc ebx,edx 624 mov edx,DWORD [8+edi] 625 adc ecx,0 626 ; mul a[0]*b[2] 627 mul edx 628 add ebp,eax 629 mov eax,DWORD [20+esp] 630 adc ebx,edx 631 mov edx,DWORD [edi] 632 adc ecx,0 633 mov DWORD [8+eax],ebp 634 mov eax,DWORD [12+esi] 635 ; saved r[2] 636 ; ################## Calculate word 3 637 xor ebp,ebp 638 ; mul a[3]*b[0] 639 mul edx 640 add ebx,eax 641 mov eax,DWORD [8+esi] 642 adc ecx,edx 643 mov edx,DWORD [4+edi] 644 adc ebp,0 645 ; mul a[2]*b[1] 646 mul edx 647 add ebx,eax 648 mov eax,DWORD [4+esi] 649 adc ecx,edx 650 mov edx,DWORD [8+edi] 651 adc ebp,0 652 ; mul a[1]*b[2] 653 mul edx 654 add ebx,eax 655 mov eax,DWORD [esi] 656 adc ecx,edx 657 mov edx,DWORD [12+edi] 658 adc ebp,0 659 ; mul a[0]*b[3] 660 mul edx 661 add ebx,eax 662 mov eax,DWORD [20+esp] 663 adc ecx,edx 664 mov edx,DWORD [4+edi] 665 adc ebp,0 666 mov DWORD [12+eax],ebx 667 mov eax,DWORD [12+esi] 668 ; saved r[3] 669 ; ################## Calculate word 4 670 xor ebx,ebx 671 ; mul a[3]*b[1] 672 mul edx 673 add ecx,eax 674 mov eax,DWORD [8+esi] 675 adc ebp,edx 676 mov edx,DWORD [8+edi] 677 adc ebx,0 678 ; mul a[2]*b[2] 679 mul edx 680 add ecx,eax 681 mov eax,DWORD [4+esi] 682 adc ebp,edx 683 mov edx,DWORD [12+edi] 684 adc ebx,0 685 ; mul a[1]*b[3] 686 mul edx 687 add ecx,eax 688 mov eax,DWORD [20+esp] 689 adc ebp,edx 690 mov edx,DWORD [8+edi] 691 adc ebx,0 692 mov DWORD [16+eax],ecx 693 mov eax,DWORD [12+esi] 694 ; saved r[4] 695 ; ################## Calculate word 5 696 xor ecx,ecx 697 ; mul a[3]*b[2] 698 mul edx 699 add ebp,eax 700 mov eax,DWORD [8+esi] 701 adc ebx,edx 702 mov edx,DWORD [12+edi] 703 adc ecx,0 704 ; mul a[2]*b[3] 705 mul edx 706 add ebp,eax 707 mov eax,DWORD [20+esp] 708 adc ebx,edx 709 mov edx,DWORD [12+edi] 710 adc ecx,0 711 mov DWORD [20+eax],ebp 712 mov eax,DWORD [12+esi] 713 ; saved r[5] 714 ; ################## Calculate word 6 715 xor ebp,ebp 716 ; mul a[3]*b[3] 717 mul edx 718 add ebx,eax 719 mov eax,DWORD [20+esp] 720 adc ecx,edx 721 adc ebp,0 722 mov DWORD [24+eax],ebx 723 ; saved r[6] 724 ; save r[7] 725 mov DWORD [28+eax],ecx 726 pop ebx 727 pop ebp 728 pop edi 729 pop esi 730 ret 731global _bn_sqr_comba8 732align 16 733_bn_sqr_comba8: 734L$_bn_sqr_comba8_begin: 735 push esi 736 push edi 737 push ebp 738 push ebx 739 mov edi,DWORD [20+esp] 740 mov esi,DWORD [24+esp] 741 xor ebx,ebx 742 xor ecx,ecx 743 mov eax,DWORD [esi] 744 ; ############### Calculate word 0 745 xor ebp,ebp 746 ; sqr a[0]*a[0] 747 mul eax 748 add ebx,eax 749 adc ecx,edx 750 mov edx,DWORD [esi] 751 adc ebp,0 752 mov DWORD [edi],ebx 753 mov eax,DWORD [4+esi] 754 ; saved r[0] 755 ; ############### Calculate word 1 756 xor ebx,ebx 757 ; sqr a[1]*a[0] 758 mul edx 759 add eax,eax 760 adc edx,edx 761 adc ebx,0 762 add ecx,eax 763 adc ebp,edx 764 mov eax,DWORD [8+esi] 765 adc ebx,0 766 mov DWORD [4+edi],ecx 767 mov edx,DWORD [esi] 768 ; saved r[1] 769 ; ############### Calculate word 2 770 xor ecx,ecx 771 ; sqr a[2]*a[0] 772 mul edx 773 add eax,eax 774 adc edx,edx 775 adc ecx,0 776 add ebp,eax 777 adc ebx,edx 778 mov eax,DWORD [4+esi] 779 adc ecx,0 780 ; sqr a[1]*a[1] 781 mul eax 782 add ebp,eax 783 adc ebx,edx 784 mov edx,DWORD [esi] 785 adc ecx,0 786 mov DWORD [8+edi],ebp 787 mov eax,DWORD [12+esi] 788 ; saved r[2] 789 ; ############### Calculate word 3 790 xor ebp,ebp 791 ; sqr a[3]*a[0] 792 mul edx 793 add eax,eax 794 adc edx,edx 795 adc ebp,0 796 add ebx,eax 797 adc ecx,edx 798 mov eax,DWORD [8+esi] 799 adc ebp,0 800 mov edx,DWORD [4+esi] 801 ; sqr a[2]*a[1] 802 mul edx 803 add eax,eax 804 adc edx,edx 805 adc ebp,0 806 add ebx,eax 807 adc ecx,edx 808 mov eax,DWORD [16+esi] 809 adc ebp,0 810 mov DWORD [12+edi],ebx 811 mov edx,DWORD [esi] 812 ; saved r[3] 813 ; ############### Calculate word 4 814 xor ebx,ebx 815 ; sqr a[4]*a[0] 816 mul edx 817 add eax,eax 818 adc edx,edx 819 adc ebx,0 820 add ecx,eax 821 adc ebp,edx 822 mov eax,DWORD [12+esi] 823 adc ebx,0 824 mov edx,DWORD [4+esi] 825 ; sqr a[3]*a[1] 826 mul edx 827 add eax,eax 828 adc edx,edx 829 adc ebx,0 830 add ecx,eax 831 adc ebp,edx 832 mov eax,DWORD [8+esi] 833 adc ebx,0 834 ; sqr a[2]*a[2] 835 mul eax 836 add ecx,eax 837 adc ebp,edx 838 mov edx,DWORD [esi] 839 adc ebx,0 840 mov DWORD [16+edi],ecx 841 mov eax,DWORD [20+esi] 842 ; saved r[4] 843 ; ############### Calculate word 5 844 xor ecx,ecx 845 ; sqr a[5]*a[0] 846 mul edx 847 add eax,eax 848 adc edx,edx 849 adc ecx,0 850 add ebp,eax 851 adc ebx,edx 852 mov eax,DWORD [16+esi] 853 adc ecx,0 854 mov edx,DWORD [4+esi] 855 ; sqr a[4]*a[1] 856 mul edx 857 add eax,eax 858 adc edx,edx 859 adc ecx,0 860 add ebp,eax 861 adc ebx,edx 862 mov eax,DWORD [12+esi] 863 adc ecx,0 864 mov edx,DWORD [8+esi] 865 ; sqr a[3]*a[2] 866 mul edx 867 add eax,eax 868 adc edx,edx 869 adc ecx,0 870 add ebp,eax 871 adc ebx,edx 872 mov eax,DWORD [24+esi] 873 adc ecx,0 874 mov DWORD [20+edi],ebp 875 mov edx,DWORD [esi] 876 ; saved r[5] 877 ; ############### Calculate word 6 878 xor ebp,ebp 879 ; sqr a[6]*a[0] 880 mul edx 881 add eax,eax 882 adc edx,edx 883 adc ebp,0 884 add ebx,eax 885 adc ecx,edx 886 mov eax,DWORD [20+esi] 887 adc ebp,0 888 mov edx,DWORD [4+esi] 889 ; sqr a[5]*a[1] 890 mul edx 891 add eax,eax 892 adc edx,edx 893 adc ebp,0 894 add ebx,eax 895 adc ecx,edx 896 mov eax,DWORD [16+esi] 897 adc ebp,0 898 mov edx,DWORD [8+esi] 899 ; sqr a[4]*a[2] 900 mul edx 901 add eax,eax 902 adc edx,edx 903 adc ebp,0 904 add ebx,eax 905 adc ecx,edx 906 mov eax,DWORD [12+esi] 907 adc ebp,0 908 ; sqr a[3]*a[3] 909 mul eax 910 add ebx,eax 911 adc ecx,edx 912 mov edx,DWORD [esi] 913 adc ebp,0 914 mov DWORD [24+edi],ebx 915 mov eax,DWORD [28+esi] 916 ; saved r[6] 917 ; ############### Calculate word 7 918 xor ebx,ebx 919 ; sqr a[7]*a[0] 920 mul edx 921 add eax,eax 922 adc edx,edx 923 adc ebx,0 924 add ecx,eax 925 adc ebp,edx 926 mov eax,DWORD [24+esi] 927 adc ebx,0 928 mov edx,DWORD [4+esi] 929 ; sqr a[6]*a[1] 930 mul edx 931 add eax,eax 932 adc edx,edx 933 adc ebx,0 934 add ecx,eax 935 adc ebp,edx 936 mov eax,DWORD [20+esi] 937 adc ebx,0 938 mov edx,DWORD [8+esi] 939 ; sqr a[5]*a[2] 940 mul edx 941 add eax,eax 942 adc edx,edx 943 adc ebx,0 944 add ecx,eax 945 adc ebp,edx 946 mov eax,DWORD [16+esi] 947 adc ebx,0 948 mov edx,DWORD [12+esi] 949 ; sqr a[4]*a[3] 950 mul edx 951 add eax,eax 952 adc edx,edx 953 adc ebx,0 954 add ecx,eax 955 adc ebp,edx 956 mov eax,DWORD [28+esi] 957 adc ebx,0 958 mov DWORD [28+edi],ecx 959 mov edx,DWORD [4+esi] 960 ; saved r[7] 961 ; ############### Calculate word 8 962 xor ecx,ecx 963 ; sqr a[7]*a[1] 964 mul edx 965 add eax,eax 966 adc edx,edx 967 adc ecx,0 968 add ebp,eax 969 adc ebx,edx 970 mov eax,DWORD [24+esi] 971 adc ecx,0 972 mov edx,DWORD [8+esi] 973 ; sqr a[6]*a[2] 974 mul edx 975 add eax,eax 976 adc edx,edx 977 adc ecx,0 978 add ebp,eax 979 adc ebx,edx 980 mov eax,DWORD [20+esi] 981 adc ecx,0 982 mov edx,DWORD [12+esi] 983 ; sqr a[5]*a[3] 984 mul edx 985 add eax,eax 986 adc edx,edx 987 adc ecx,0 988 add ebp,eax 989 adc ebx,edx 990 mov eax,DWORD [16+esi] 991 adc ecx,0 992 ; sqr a[4]*a[4] 993 mul eax 994 add ebp,eax 995 adc ebx,edx 996 mov edx,DWORD [8+esi] 997 adc ecx,0 998 mov DWORD [32+edi],ebp 999 mov eax,DWORD [28+esi] 1000 ; saved r[8] 1001 ; ############### Calculate word 9 1002 xor ebp,ebp 1003 ; sqr a[7]*a[2] 1004 mul edx 1005 add eax,eax 1006 adc edx,edx 1007 adc ebp,0 1008 add ebx,eax 1009 adc ecx,edx 1010 mov eax,DWORD [24+esi] 1011 adc ebp,0 1012 mov edx,DWORD [12+esi] 1013 ; sqr a[6]*a[3] 1014 mul edx 1015 add eax,eax 1016 adc edx,edx 1017 adc ebp,0 1018 add ebx,eax 1019 adc ecx,edx 1020 mov eax,DWORD [20+esi] 1021 adc ebp,0 1022 mov edx,DWORD [16+esi] 1023 ; sqr a[5]*a[4] 1024 mul edx 1025 add eax,eax 1026 adc edx,edx 1027 adc ebp,0 1028 add ebx,eax 1029 adc ecx,edx 1030 mov eax,DWORD [28+esi] 1031 adc ebp,0 1032 mov DWORD [36+edi],ebx 1033 mov edx,DWORD [12+esi] 1034 ; saved r[9] 1035 ; ############### Calculate word 10 1036 xor ebx,ebx 1037 ; sqr a[7]*a[3] 1038 mul edx 1039 add eax,eax 1040 adc edx,edx 1041 adc ebx,0 1042 add ecx,eax 1043 adc ebp,edx 1044 mov eax,DWORD [24+esi] 1045 adc ebx,0 1046 mov edx,DWORD [16+esi] 1047 ; sqr a[6]*a[4] 1048 mul edx 1049 add eax,eax 1050 adc edx,edx 1051 adc ebx,0 1052 add ecx,eax 1053 adc ebp,edx 1054 mov eax,DWORD [20+esi] 1055 adc ebx,0 1056 ; sqr a[5]*a[5] 1057 mul eax 1058 add ecx,eax 1059 adc ebp,edx 1060 mov edx,DWORD [16+esi] 1061 adc ebx,0 1062 mov DWORD [40+edi],ecx 1063 mov eax,DWORD [28+esi] 1064 ; saved r[10] 1065 ; ############### Calculate word 11 1066 xor ecx,ecx 1067 ; sqr a[7]*a[4] 1068 mul edx 1069 add eax,eax 1070 adc edx,edx 1071 adc ecx,0 1072 add ebp,eax 1073 adc ebx,edx 1074 mov eax,DWORD [24+esi] 1075 adc ecx,0 1076 mov edx,DWORD [20+esi] 1077 ; sqr a[6]*a[5] 1078 mul edx 1079 add eax,eax 1080 adc edx,edx 1081 adc ecx,0 1082 add ebp,eax 1083 adc ebx,edx 1084 mov eax,DWORD [28+esi] 1085 adc ecx,0 1086 mov DWORD [44+edi],ebp 1087 mov edx,DWORD [20+esi] 1088 ; saved r[11] 1089 ; ############### Calculate word 12 1090 xor ebp,ebp 1091 ; sqr a[7]*a[5] 1092 mul edx 1093 add eax,eax 1094 adc edx,edx 1095 adc ebp,0 1096 add ebx,eax 1097 adc ecx,edx 1098 mov eax,DWORD [24+esi] 1099 adc ebp,0 1100 ; sqr a[6]*a[6] 1101 mul eax 1102 add ebx,eax 1103 adc ecx,edx 1104 mov edx,DWORD [24+esi] 1105 adc ebp,0 1106 mov DWORD [48+edi],ebx 1107 mov eax,DWORD [28+esi] 1108 ; saved r[12] 1109 ; ############### Calculate word 13 1110 xor ebx,ebx 1111 ; sqr a[7]*a[6] 1112 mul edx 1113 add eax,eax 1114 adc edx,edx 1115 adc ebx,0 1116 add ecx,eax 1117 adc ebp,edx 1118 mov eax,DWORD [28+esi] 1119 adc ebx,0 1120 mov DWORD [52+edi],ecx 1121 ; saved r[13] 1122 ; ############### Calculate word 14 1123 xor ecx,ecx 1124 ; sqr a[7]*a[7] 1125 mul eax 1126 add ebp,eax 1127 adc ebx,edx 1128 adc ecx,0 1129 mov DWORD [56+edi],ebp 1130 ; saved r[14] 1131 mov DWORD [60+edi],ebx 1132 pop ebx 1133 pop ebp 1134 pop edi 1135 pop esi 1136 ret 1137global _bn_sqr_comba4 1138align 16 1139_bn_sqr_comba4: 1140L$_bn_sqr_comba4_begin: 1141 push esi 1142 push edi 1143 push ebp 1144 push ebx 1145 mov edi,DWORD [20+esp] 1146 mov esi,DWORD [24+esp] 1147 xor ebx,ebx 1148 xor ecx,ecx 1149 mov eax,DWORD [esi] 1150 ; ############### Calculate word 0 1151 xor ebp,ebp 1152 ; sqr a[0]*a[0] 1153 mul eax 1154 add ebx,eax 1155 adc ecx,edx 1156 mov edx,DWORD [esi] 1157 adc ebp,0 1158 mov DWORD [edi],ebx 1159 mov eax,DWORD [4+esi] 1160 ; saved r[0] 1161 ; ############### Calculate word 1 1162 xor ebx,ebx 1163 ; sqr a[1]*a[0] 1164 mul edx 1165 add eax,eax 1166 adc edx,edx 1167 adc ebx,0 1168 add ecx,eax 1169 adc ebp,edx 1170 mov eax,DWORD [8+esi] 1171 adc ebx,0 1172 mov DWORD [4+edi],ecx 1173 mov edx,DWORD [esi] 1174 ; saved r[1] 1175 ; ############### Calculate word 2 1176 xor ecx,ecx 1177 ; sqr a[2]*a[0] 1178 mul edx 1179 add eax,eax 1180 adc edx,edx 1181 adc ecx,0 1182 add ebp,eax 1183 adc ebx,edx 1184 mov eax,DWORD [4+esi] 1185 adc ecx,0 1186 ; sqr a[1]*a[1] 1187 mul eax 1188 add ebp,eax 1189 adc ebx,edx 1190 mov edx,DWORD [esi] 1191 adc ecx,0 1192 mov DWORD [8+edi],ebp 1193 mov eax,DWORD [12+esi] 1194 ; saved r[2] 1195 ; ############### Calculate word 3 1196 xor ebp,ebp 1197 ; sqr a[3]*a[0] 1198 mul edx 1199 add eax,eax 1200 adc edx,edx 1201 adc ebp,0 1202 add ebx,eax 1203 adc ecx,edx 1204 mov eax,DWORD [8+esi] 1205 adc ebp,0 1206 mov edx,DWORD [4+esi] 1207 ; sqr a[2]*a[1] 1208 mul edx 1209 add eax,eax 1210 adc edx,edx 1211 adc ebp,0 1212 add ebx,eax 1213 adc ecx,edx 1214 mov eax,DWORD [12+esi] 1215 adc ebp,0 1216 mov DWORD [12+edi],ebx 1217 mov edx,DWORD [4+esi] 1218 ; saved r[3] 1219 ; ############### Calculate word 4 1220 xor ebx,ebx 1221 ; sqr a[3]*a[1] 1222 mul edx 1223 add eax,eax 1224 adc edx,edx 1225 adc ebx,0 1226 add ecx,eax 1227 adc ebp,edx 1228 mov eax,DWORD [8+esi] 1229 adc ebx,0 1230 ; sqr a[2]*a[2] 1231 mul eax 1232 add ecx,eax 1233 adc ebp,edx 1234 mov edx,DWORD [8+esi] 1235 adc ebx,0 1236 mov DWORD [16+edi],ecx 1237 mov eax,DWORD [12+esi] 1238 ; saved r[4] 1239 ; ############### Calculate word 5 1240 xor ecx,ecx 1241 ; sqr a[3]*a[2] 1242 mul edx 1243 add eax,eax 1244 adc edx,edx 1245 adc ecx,0 1246 add ebp,eax 1247 adc ebx,edx 1248 mov eax,DWORD [12+esi] 1249 adc ecx,0 1250 mov DWORD [20+edi],ebp 1251 ; saved r[5] 1252 ; ############### Calculate word 6 1253 xor ebp,ebp 1254 ; sqr a[3]*a[3] 1255 mul eax 1256 add ebx,eax 1257 adc ecx,edx 1258 adc ebp,0 1259 mov DWORD [24+edi],ebx 1260 ; saved r[6] 1261 mov DWORD [28+edi],ecx 1262 pop ebx 1263 pop ebp 1264 pop edi 1265 pop esi 1266 ret 1267