1// Copyright 2012 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package main 6 7// Based on original, public domain implementation from NaCl by D. J. 8// Bernstein. 9 10import ( 11 "crypto/subtle" 12 "math" 13) 14 15const ( 16 alpham80 = 0.00000000558793544769287109375 17 alpham48 = 24.0 18 alpham16 = 103079215104.0 19 alpha0 = 6755399441055744.0 20 alpha18 = 1770887431076116955136.0 21 alpha32 = 29014219670751100192948224.0 22 alpha50 = 7605903601369376408980219232256.0 23 alpha64 = 124615124604835863084731911901282304.0 24 alpha82 = 32667107224410092492483962313449748299776.0 25 alpha96 = 535217884764734955396857238543560676143529984.0 26 alpha112 = 35076039295941670036888435985190792471742381031424.0 27 alpha130 = 9194973245195333150150082162901855101712434733101613056.0 28 scale = 0.0000000000000000000000000000000000000036734198463196484624023016788195177431833298649127735047148490821200539357960224151611328125 29 offset0 = 6755408030990331.0 30 offset1 = 29014256564239239022116864.0 31 offset2 = 124615283061160854719918951570079744.0 32 offset3 = 535219245894202480694386063513315216128475136.0 33) 34 35// poly1305Verify returns true if mac is a valid authenticator for m with the 36// given key. 37func poly1305Verify(mac *[16]byte, m []byte, key *[32]byte) bool { 38 var tmp [16]byte 39 poly1305Sum(&tmp, m, key) 40 return subtle.ConstantTimeCompare(tmp[:], mac[:]) == 1 41} 42 43// poly1305Sum generates an authenticator for m using a one-time key and puts 44// the 16-byte result into out. Authenticating two different messages with the 45// same key allows an attacker to forge messages at will. 46func poly1305Sum(out *[16]byte, m []byte, key *[32]byte) { 47 r := key 48 s := key[16:] 49 var ( 50 y7 float64 51 y6 float64 52 y1 float64 53 y0 float64 54 y5 float64 55 y4 float64 56 x7 float64 57 x6 float64 58 x1 float64 59 x0 float64 60 y3 float64 61 y2 float64 62 x5 float64 63 r3lowx0 float64 64 x4 float64 65 r0lowx6 float64 66 x3 float64 67 r3highx0 float64 68 x2 float64 69 r0highx6 float64 70 r0lowx0 float64 71 sr1lowx6 float64 72 r0highx0 float64 73 sr1highx6 float64 74 sr3low float64 75 r1lowx0 float64 76 sr2lowx6 float64 77 r1highx0 float64 78 sr2highx6 float64 79 r2lowx0 float64 80 sr3lowx6 float64 81 r2highx0 float64 82 sr3highx6 float64 83 r1highx4 float64 84 r1lowx4 float64 85 r0highx4 float64 86 r0lowx4 float64 87 sr3highx4 float64 88 sr3lowx4 float64 89 sr2highx4 float64 90 sr2lowx4 float64 91 r0lowx2 float64 92 r0highx2 float64 93 r1lowx2 float64 94 r1highx2 float64 95 r2lowx2 float64 96 r2highx2 float64 97 sr3lowx2 float64 98 sr3highx2 float64 99 z0 float64 100 z1 float64 101 z2 float64 102 z3 float64 103 m0 int64 104 m1 int64 105 m2 int64 106 m3 int64 107 m00 uint32 108 m01 uint32 109 m02 uint32 110 m03 uint32 111 m10 uint32 112 m11 uint32 113 m12 uint32 114 m13 uint32 115 m20 uint32 116 m21 uint32 117 m22 uint32 118 m23 uint32 119 m30 uint32 120 m31 uint32 121 m32 uint32 122 m33 uint64 123 lbelow2 int32 124 lbelow3 int32 125 lbelow4 int32 126 lbelow5 int32 127 lbelow6 int32 128 lbelow7 int32 129 lbelow8 int32 130 lbelow9 int32 131 lbelow10 int32 132 lbelow11 int32 133 lbelow12 int32 134 lbelow13 int32 135 lbelow14 int32 136 lbelow15 int32 137 s00 uint32 138 s01 uint32 139 s02 uint32 140 s03 uint32 141 s10 uint32 142 s11 uint32 143 s12 uint32 144 s13 uint32 145 s20 uint32 146 s21 uint32 147 s22 uint32 148 s23 uint32 149 s30 uint32 150 s31 uint32 151 s32 uint32 152 s33 uint32 153 bits32 uint64 154 f uint64 155 f0 uint64 156 f1 uint64 157 f2 uint64 158 f3 uint64 159 f4 uint64 160 g uint64 161 g0 uint64 162 g1 uint64 163 g2 uint64 164 g3 uint64 165 g4 uint64 166 ) 167 168 var p int32 169 170 l := int32(len(m)) 171 172 r00 := uint32(r[0]) 173 174 r01 := uint32(r[1]) 175 176 r02 := uint32(r[2]) 177 r0 := int64(2151) 178 179 r03 := uint32(r[3]) 180 r03 &= 15 181 r0 <<= 51 182 183 r10 := uint32(r[4]) 184 r10 &= 252 185 r01 <<= 8 186 r0 += int64(r00) 187 188 r11 := uint32(r[5]) 189 r02 <<= 16 190 r0 += int64(r01) 191 192 r12 := uint32(r[6]) 193 r03 <<= 24 194 r0 += int64(r02) 195 196 r13 := uint32(r[7]) 197 r13 &= 15 198 r1 := int64(2215) 199 r0 += int64(r03) 200 201 d0 := r0 202 r1 <<= 51 203 r2 := int64(2279) 204 205 r20 := uint32(r[8]) 206 r20 &= 252 207 r11 <<= 8 208 r1 += int64(r10) 209 210 r21 := uint32(r[9]) 211 r12 <<= 16 212 r1 += int64(r11) 213 214 r22 := uint32(r[10]) 215 r13 <<= 24 216 r1 += int64(r12) 217 218 r23 := uint32(r[11]) 219 r23 &= 15 220 r2 <<= 51 221 r1 += int64(r13) 222 223 d1 := r1 224 r21 <<= 8 225 r2 += int64(r20) 226 227 r30 := uint32(r[12]) 228 r30 &= 252 229 r22 <<= 16 230 r2 += int64(r21) 231 232 r31 := uint32(r[13]) 233 r23 <<= 24 234 r2 += int64(r22) 235 236 r32 := uint32(r[14]) 237 r2 += int64(r23) 238 r3 := int64(2343) 239 240 d2 := r2 241 r3 <<= 51 242 243 r33 := uint32(r[15]) 244 r33 &= 15 245 r31 <<= 8 246 r3 += int64(r30) 247 248 r32 <<= 16 249 r3 += int64(r31) 250 251 r33 <<= 24 252 r3 += int64(r32) 253 254 r3 += int64(r33) 255 h0 := alpha32 - alpha32 256 257 d3 := r3 258 h1 := alpha32 - alpha32 259 260 h2 := alpha32 - alpha32 261 262 h3 := alpha32 - alpha32 263 264 h4 := alpha32 - alpha32 265 266 r0low := math.Float64frombits(uint64(d0)) 267 h5 := alpha32 - alpha32 268 269 r1low := math.Float64frombits(uint64(d1)) 270 h6 := alpha32 - alpha32 271 272 r2low := math.Float64frombits(uint64(d2)) 273 h7 := alpha32 - alpha32 274 275 r0low -= alpha0 276 277 r1low -= alpha32 278 279 r2low -= alpha64 280 281 r0high := r0low + alpha18 282 283 r3low := math.Float64frombits(uint64(d3)) 284 285 r1high := r1low + alpha50 286 sr1low := scale * r1low 287 288 r2high := r2low + alpha82 289 sr2low := scale * r2low 290 291 r0high -= alpha18 292 r0high_stack := r0high 293 294 r3low -= alpha96 295 296 r1high -= alpha50 297 r1high_stack := r1high 298 299 sr1high := sr1low + alpham80 300 301 r0low -= r0high 302 303 r2high -= alpha82 304 sr3low = scale * r3low 305 306 sr2high := sr2low + alpham48 307 308 r1low -= r1high 309 r1low_stack := r1low 310 311 sr1high -= alpham80 312 sr1high_stack := sr1high 313 314 r2low -= r2high 315 r2low_stack := r2low 316 317 sr2high -= alpham48 318 sr2high_stack := sr2high 319 320 r3high := r3low + alpha112 321 r0low_stack := r0low 322 323 sr1low -= sr1high 324 sr1low_stack := sr1low 325 326 sr3high := sr3low + alpham16 327 r2high_stack := r2high 328 329 sr2low -= sr2high 330 sr2low_stack := sr2low 331 332 r3high -= alpha112 333 r3high_stack := r3high 334 335 sr3high -= alpham16 336 sr3high_stack := sr3high 337 338 r3low -= r3high 339 r3low_stack := r3low 340 341 sr3low -= sr3high 342 sr3low_stack := sr3low 343 344 if l < 16 { 345 goto addatmost15bytes 346 } 347 348 m00 = uint32(m[p+0]) 349 m0 = 2151 350 351 m0 <<= 51 352 m1 = 2215 353 m01 = uint32(m[p+1]) 354 355 m1 <<= 51 356 m2 = 2279 357 m02 = uint32(m[p+2]) 358 359 m2 <<= 51 360 m3 = 2343 361 m03 = uint32(m[p+3]) 362 363 m10 = uint32(m[p+4]) 364 m01 <<= 8 365 m0 += int64(m00) 366 367 m11 = uint32(m[p+5]) 368 m02 <<= 16 369 m0 += int64(m01) 370 371 m12 = uint32(m[p+6]) 372 m03 <<= 24 373 m0 += int64(m02) 374 375 m13 = uint32(m[p+7]) 376 m3 <<= 51 377 m0 += int64(m03) 378 379 m20 = uint32(m[p+8]) 380 m11 <<= 8 381 m1 += int64(m10) 382 383 m21 = uint32(m[p+9]) 384 m12 <<= 16 385 m1 += int64(m11) 386 387 m22 = uint32(m[p+10]) 388 m13 <<= 24 389 m1 += int64(m12) 390 391 m23 = uint32(m[p+11]) 392 m1 += int64(m13) 393 394 m30 = uint32(m[p+12]) 395 m21 <<= 8 396 m2 += int64(m20) 397 398 m31 = uint32(m[p+13]) 399 m22 <<= 16 400 m2 += int64(m21) 401 402 m32 = uint32(m[p+14]) 403 m23 <<= 24 404 m2 += int64(m22) 405 406 m33 = uint64(m[p+15]) 407 m2 += int64(m23) 408 409 d0 = m0 410 m31 <<= 8 411 m3 += int64(m30) 412 413 d1 = m1 414 m32 <<= 16 415 m3 += int64(m31) 416 417 d2 = m2 418 m33 += 256 419 420 m33 <<= 24 421 m3 += int64(m32) 422 423 m3 += int64(m33) 424 d3 = m3 425 426 p += 16 427 l -= 16 428 429 z0 = math.Float64frombits(uint64(d0)) 430 431 z1 = math.Float64frombits(uint64(d1)) 432 433 z2 = math.Float64frombits(uint64(d2)) 434 435 z3 = math.Float64frombits(uint64(d3)) 436 437 z0 -= alpha0 438 439 z1 -= alpha32 440 441 z2 -= alpha64 442 443 z3 -= alpha96 444 445 h0 += z0 446 447 h1 += z1 448 449 h3 += z2 450 451 h5 += z3 452 453 if l < 16 { 454 goto multiplyaddatmost15bytes 455 } 456 457multiplyaddatleast16bytes: 458 459 m2 = 2279 460 m20 = uint32(m[p+8]) 461 y7 = h7 + alpha130 462 463 m2 <<= 51 464 m3 = 2343 465 m21 = uint32(m[p+9]) 466 y6 = h6 + alpha130 467 468 m3 <<= 51 469 m0 = 2151 470 m22 = uint32(m[p+10]) 471 y1 = h1 + alpha32 472 473 m0 <<= 51 474 m1 = 2215 475 m23 = uint32(m[p+11]) 476 y0 = h0 + alpha32 477 478 m1 <<= 51 479 m30 = uint32(m[p+12]) 480 y7 -= alpha130 481 482 m21 <<= 8 483 m2 += int64(m20) 484 m31 = uint32(m[p+13]) 485 y6 -= alpha130 486 487 m22 <<= 16 488 m2 += int64(m21) 489 m32 = uint32(m[p+14]) 490 y1 -= alpha32 491 492 m23 <<= 24 493 m2 += int64(m22) 494 m33 = uint64(m[p+15]) 495 y0 -= alpha32 496 497 m2 += int64(m23) 498 m00 = uint32(m[p+0]) 499 y5 = h5 + alpha96 500 501 m31 <<= 8 502 m3 += int64(m30) 503 m01 = uint32(m[p+1]) 504 y4 = h4 + alpha96 505 506 m32 <<= 16 507 m02 = uint32(m[p+2]) 508 x7 = h7 - y7 509 y7 *= scale 510 511 m33 += 256 512 m03 = uint32(m[p+3]) 513 x6 = h6 - y6 514 y6 *= scale 515 516 m33 <<= 24 517 m3 += int64(m31) 518 m10 = uint32(m[p+4]) 519 x1 = h1 - y1 520 521 m01 <<= 8 522 m3 += int64(m32) 523 m11 = uint32(m[p+5]) 524 x0 = h0 - y0 525 526 m3 += int64(m33) 527 m0 += int64(m00) 528 m12 = uint32(m[p+6]) 529 y5 -= alpha96 530 531 m02 <<= 16 532 m0 += int64(m01) 533 m13 = uint32(m[p+7]) 534 y4 -= alpha96 535 536 m03 <<= 24 537 m0 += int64(m02) 538 d2 = m2 539 x1 += y7 540 541 m0 += int64(m03) 542 d3 = m3 543 x0 += y6 544 545 m11 <<= 8 546 m1 += int64(m10) 547 d0 = m0 548 x7 += y5 549 550 m12 <<= 16 551 m1 += int64(m11) 552 x6 += y4 553 554 m13 <<= 24 555 m1 += int64(m12) 556 y3 = h3 + alpha64 557 558 m1 += int64(m13) 559 d1 = m1 560 y2 = h2 + alpha64 561 562 x0 += x1 563 564 x6 += x7 565 566 y3 -= alpha64 567 r3low = r3low_stack 568 569 y2 -= alpha64 570 r0low = r0low_stack 571 572 x5 = h5 - y5 573 r3lowx0 = r3low * x0 574 r3high = r3high_stack 575 576 x4 = h4 - y4 577 r0lowx6 = r0low * x6 578 r0high = r0high_stack 579 580 x3 = h3 - y3 581 r3highx0 = r3high * x0 582 sr1low = sr1low_stack 583 584 x2 = h2 - y2 585 r0highx6 = r0high * x6 586 sr1high = sr1high_stack 587 588 x5 += y3 589 r0lowx0 = r0low * x0 590 r1low = r1low_stack 591 592 h6 = r3lowx0 + r0lowx6 593 sr1lowx6 = sr1low * x6 594 r1high = r1high_stack 595 596 x4 += y2 597 r0highx0 = r0high * x0 598 sr2low = sr2low_stack 599 600 h7 = r3highx0 + r0highx6 601 sr1highx6 = sr1high * x6 602 sr2high = sr2high_stack 603 604 x3 += y1 605 r1lowx0 = r1low * x0 606 r2low = r2low_stack 607 608 h0 = r0lowx0 + sr1lowx6 609 sr2lowx6 = sr2low * x6 610 r2high = r2high_stack 611 612 x2 += y0 613 r1highx0 = r1high * x0 614 sr3low = sr3low_stack 615 616 h1 = r0highx0 + sr1highx6 617 sr2highx6 = sr2high * x6 618 sr3high = sr3high_stack 619 620 x4 += x5 621 r2lowx0 = r2low * x0 622 z2 = math.Float64frombits(uint64(d2)) 623 624 h2 = r1lowx0 + sr2lowx6 625 sr3lowx6 = sr3low * x6 626 627 x2 += x3 628 r2highx0 = r2high * x0 629 z3 = math.Float64frombits(uint64(d3)) 630 631 h3 = r1highx0 + sr2highx6 632 sr3highx6 = sr3high * x6 633 634 r1highx4 = r1high * x4 635 z2 -= alpha64 636 637 h4 = r2lowx0 + sr3lowx6 638 r1lowx4 = r1low * x4 639 640 r0highx4 = r0high * x4 641 z3 -= alpha96 642 643 h5 = r2highx0 + sr3highx6 644 r0lowx4 = r0low * x4 645 646 h7 += r1highx4 647 sr3highx4 = sr3high * x4 648 649 h6 += r1lowx4 650 sr3lowx4 = sr3low * x4 651 652 h5 += r0highx4 653 sr2highx4 = sr2high * x4 654 655 h4 += r0lowx4 656 sr2lowx4 = sr2low * x4 657 658 h3 += sr3highx4 659 r0lowx2 = r0low * x2 660 661 h2 += sr3lowx4 662 r0highx2 = r0high * x2 663 664 h1 += sr2highx4 665 r1lowx2 = r1low * x2 666 667 h0 += sr2lowx4 668 r1highx2 = r1high * x2 669 670 h2 += r0lowx2 671 r2lowx2 = r2low * x2 672 673 h3 += r0highx2 674 r2highx2 = r2high * x2 675 676 h4 += r1lowx2 677 sr3lowx2 = sr3low * x2 678 679 h5 += r1highx2 680 sr3highx2 = sr3high * x2 681 682 p += 16 683 l -= 16 684 h6 += r2lowx2 685 686 h7 += r2highx2 687 688 z1 = math.Float64frombits(uint64(d1)) 689 h0 += sr3lowx2 690 691 z0 = math.Float64frombits(uint64(d0)) 692 h1 += sr3highx2 693 694 z1 -= alpha32 695 696 z0 -= alpha0 697 698 h5 += z3 699 700 h3 += z2 701 702 h1 += z1 703 704 h0 += z0 705 706 if l >= 16 { 707 goto multiplyaddatleast16bytes 708 } 709 710multiplyaddatmost15bytes: 711 712 y7 = h7 + alpha130 713 714 y6 = h6 + alpha130 715 716 y1 = h1 + alpha32 717 718 y0 = h0 + alpha32 719 720 y7 -= alpha130 721 722 y6 -= alpha130 723 724 y1 -= alpha32 725 726 y0 -= alpha32 727 728 y5 = h5 + alpha96 729 730 y4 = h4 + alpha96 731 732 x7 = h7 - y7 733 y7 *= scale 734 735 x6 = h6 - y6 736 y6 *= scale 737 738 x1 = h1 - y1 739 740 x0 = h0 - y0 741 742 y5 -= alpha96 743 744 y4 -= alpha96 745 746 x1 += y7 747 748 x0 += y6 749 750 x7 += y5 751 752 x6 += y4 753 754 y3 = h3 + alpha64 755 756 y2 = h2 + alpha64 757 758 x0 += x1 759 760 x6 += x7 761 762 y3 -= alpha64 763 r3low = r3low_stack 764 765 y2 -= alpha64 766 r0low = r0low_stack 767 768 x5 = h5 - y5 769 r3lowx0 = r3low * x0 770 r3high = r3high_stack 771 772 x4 = h4 - y4 773 r0lowx6 = r0low * x6 774 r0high = r0high_stack 775 776 x3 = h3 - y3 777 r3highx0 = r3high * x0 778 sr1low = sr1low_stack 779 780 x2 = h2 - y2 781 r0highx6 = r0high * x6 782 sr1high = sr1high_stack 783 784 x5 += y3 785 r0lowx0 = r0low * x0 786 r1low = r1low_stack 787 788 h6 = r3lowx0 + r0lowx6 789 sr1lowx6 = sr1low * x6 790 r1high = r1high_stack 791 792 x4 += y2 793 r0highx0 = r0high * x0 794 sr2low = sr2low_stack 795 796 h7 = r3highx0 + r0highx6 797 sr1highx6 = sr1high * x6 798 sr2high = sr2high_stack 799 800 x3 += y1 801 r1lowx0 = r1low * x0 802 r2low = r2low_stack 803 804 h0 = r0lowx0 + sr1lowx6 805 sr2lowx6 = sr2low * x6 806 r2high = r2high_stack 807 808 x2 += y0 809 r1highx0 = r1high * x0 810 sr3low = sr3low_stack 811 812 h1 = r0highx0 + sr1highx6 813 sr2highx6 = sr2high * x6 814 sr3high = sr3high_stack 815 816 x4 += x5 817 r2lowx0 = r2low * x0 818 819 h2 = r1lowx0 + sr2lowx6 820 sr3lowx6 = sr3low * x6 821 822 x2 += x3 823 r2highx0 = r2high * x0 824 825 h3 = r1highx0 + sr2highx6 826 sr3highx6 = sr3high * x6 827 828 r1highx4 = r1high * x4 829 830 h4 = r2lowx0 + sr3lowx6 831 r1lowx4 = r1low * x4 832 833 r0highx4 = r0high * x4 834 835 h5 = r2highx0 + sr3highx6 836 r0lowx4 = r0low * x4 837 838 h7 += r1highx4 839 sr3highx4 = sr3high * x4 840 841 h6 += r1lowx4 842 sr3lowx4 = sr3low * x4 843 844 h5 += r0highx4 845 sr2highx4 = sr2high * x4 846 847 h4 += r0lowx4 848 sr2lowx4 = sr2low * x4 849 850 h3 += sr3highx4 851 r0lowx2 = r0low * x2 852 853 h2 += sr3lowx4 854 r0highx2 = r0high * x2 855 856 h1 += sr2highx4 857 r1lowx2 = r1low * x2 858 859 h0 += sr2lowx4 860 r1highx2 = r1high * x2 861 862 h2 += r0lowx2 863 r2lowx2 = r2low * x2 864 865 h3 += r0highx2 866 r2highx2 = r2high * x2 867 868 h4 += r1lowx2 869 sr3lowx2 = sr3low * x2 870 871 h5 += r1highx2 872 sr3highx2 = sr3high * x2 873 874 h6 += r2lowx2 875 876 h7 += r2highx2 877 878 h0 += sr3lowx2 879 880 h1 += sr3highx2 881 882addatmost15bytes: 883 884 if l == 0 { 885 goto nomorebytes 886 } 887 888 lbelow2 = l - 2 889 890 lbelow3 = l - 3 891 892 lbelow2 >>= 31 893 lbelow4 = l - 4 894 895 m00 = uint32(m[p+0]) 896 lbelow3 >>= 31 897 p += lbelow2 898 899 m01 = uint32(m[p+1]) 900 lbelow4 >>= 31 901 p += lbelow3 902 903 m02 = uint32(m[p+2]) 904 p += lbelow4 905 m0 = 2151 906 907 m03 = uint32(m[p+3]) 908 m0 <<= 51 909 m1 = 2215 910 911 m0 += int64(m00) 912 m01 &^= uint32(lbelow2) 913 914 m02 &^= uint32(lbelow3) 915 m01 -= uint32(lbelow2) 916 917 m01 <<= 8 918 m03 &^= uint32(lbelow4) 919 920 m0 += int64(m01) 921 lbelow2 -= lbelow3 922 923 m02 += uint32(lbelow2) 924 lbelow3 -= lbelow4 925 926 m02 <<= 16 927 m03 += uint32(lbelow3) 928 929 m03 <<= 24 930 m0 += int64(m02) 931 932 m0 += int64(m03) 933 lbelow5 = l - 5 934 935 lbelow6 = l - 6 936 lbelow7 = l - 7 937 938 lbelow5 >>= 31 939 lbelow8 = l - 8 940 941 lbelow6 >>= 31 942 p += lbelow5 943 944 m10 = uint32(m[p+4]) 945 lbelow7 >>= 31 946 p += lbelow6 947 948 m11 = uint32(m[p+5]) 949 lbelow8 >>= 31 950 p += lbelow7 951 952 m12 = uint32(m[p+6]) 953 m1 <<= 51 954 p += lbelow8 955 956 m13 = uint32(m[p+7]) 957 m10 &^= uint32(lbelow5) 958 lbelow4 -= lbelow5 959 960 m10 += uint32(lbelow4) 961 lbelow5 -= lbelow6 962 963 m11 &^= uint32(lbelow6) 964 m11 += uint32(lbelow5) 965 966 m11 <<= 8 967 m1 += int64(m10) 968 969 m1 += int64(m11) 970 m12 &^= uint32(lbelow7) 971 972 lbelow6 -= lbelow7 973 m13 &^= uint32(lbelow8) 974 975 m12 += uint32(lbelow6) 976 lbelow7 -= lbelow8 977 978 m12 <<= 16 979 m13 += uint32(lbelow7) 980 981 m13 <<= 24 982 m1 += int64(m12) 983 984 m1 += int64(m13) 985 m2 = 2279 986 987 lbelow9 = l - 9 988 m3 = 2343 989 990 lbelow10 = l - 10 991 lbelow11 = l - 11 992 993 lbelow9 >>= 31 994 lbelow12 = l - 12 995 996 lbelow10 >>= 31 997 p += lbelow9 998 999 m20 = uint32(m[p+8]) 1000 lbelow11 >>= 31 1001 p += lbelow10 1002 1003 m21 = uint32(m[p+9]) 1004 lbelow12 >>= 31 1005 p += lbelow11 1006 1007 m22 = uint32(m[p+10]) 1008 m2 <<= 51 1009 p += lbelow12 1010 1011 m23 = uint32(m[p+11]) 1012 m20 &^= uint32(lbelow9) 1013 lbelow8 -= lbelow9 1014 1015 m20 += uint32(lbelow8) 1016 lbelow9 -= lbelow10 1017 1018 m21 &^= uint32(lbelow10) 1019 m21 += uint32(lbelow9) 1020 1021 m21 <<= 8 1022 m2 += int64(m20) 1023 1024 m2 += int64(m21) 1025 m22 &^= uint32(lbelow11) 1026 1027 lbelow10 -= lbelow11 1028 m23 &^= uint32(lbelow12) 1029 1030 m22 += uint32(lbelow10) 1031 lbelow11 -= lbelow12 1032 1033 m22 <<= 16 1034 m23 += uint32(lbelow11) 1035 1036 m23 <<= 24 1037 m2 += int64(m22) 1038 1039 m3 <<= 51 1040 lbelow13 = l - 13 1041 1042 lbelow13 >>= 31 1043 lbelow14 = l - 14 1044 1045 lbelow14 >>= 31 1046 p += lbelow13 1047 lbelow15 = l - 15 1048 1049 m30 = uint32(m[p+12]) 1050 lbelow15 >>= 31 1051 p += lbelow14 1052 1053 m31 = uint32(m[p+13]) 1054 p += lbelow15 1055 m2 += int64(m23) 1056 1057 m32 = uint32(m[p+14]) 1058 m30 &^= uint32(lbelow13) 1059 lbelow12 -= lbelow13 1060 1061 m30 += uint32(lbelow12) 1062 lbelow13 -= lbelow14 1063 1064 m3 += int64(m30) 1065 m31 &^= uint32(lbelow14) 1066 1067 m31 += uint32(lbelow13) 1068 m32 &^= uint32(lbelow15) 1069 1070 m31 <<= 8 1071 lbelow14 -= lbelow15 1072 1073 m3 += int64(m31) 1074 m32 += uint32(lbelow14) 1075 d0 = m0 1076 1077 m32 <<= 16 1078 m33 = uint64(lbelow15 + 1) 1079 d1 = m1 1080 1081 m33 <<= 24 1082 m3 += int64(m32) 1083 d2 = m2 1084 1085 m3 += int64(m33) 1086 d3 = m3 1087 1088 z3 = math.Float64frombits(uint64(d3)) 1089 1090 z2 = math.Float64frombits(uint64(d2)) 1091 1092 z1 = math.Float64frombits(uint64(d1)) 1093 1094 z0 = math.Float64frombits(uint64(d0)) 1095 1096 z3 -= alpha96 1097 1098 z2 -= alpha64 1099 1100 z1 -= alpha32 1101 1102 z0 -= alpha0 1103 1104 h5 += z3 1105 1106 h3 += z2 1107 1108 h1 += z1 1109 1110 h0 += z0 1111 1112 y7 = h7 + alpha130 1113 1114 y6 = h6 + alpha130 1115 1116 y1 = h1 + alpha32 1117 1118 y0 = h0 + alpha32 1119 1120 y7 -= alpha130 1121 1122 y6 -= alpha130 1123 1124 y1 -= alpha32 1125 1126 y0 -= alpha32 1127 1128 y5 = h5 + alpha96 1129 1130 y4 = h4 + alpha96 1131 1132 x7 = h7 - y7 1133 y7 *= scale 1134 1135 x6 = h6 - y6 1136 y6 *= scale 1137 1138 x1 = h1 - y1 1139 1140 x0 = h0 - y0 1141 1142 y5 -= alpha96 1143 1144 y4 -= alpha96 1145 1146 x1 += y7 1147 1148 x0 += y6 1149 1150 x7 += y5 1151 1152 x6 += y4 1153 1154 y3 = h3 + alpha64 1155 1156 y2 = h2 + alpha64 1157 1158 x0 += x1 1159 1160 x6 += x7 1161 1162 y3 -= alpha64 1163 r3low = r3low_stack 1164 1165 y2 -= alpha64 1166 r0low = r0low_stack 1167 1168 x5 = h5 - y5 1169 r3lowx0 = r3low * x0 1170 r3high = r3high_stack 1171 1172 x4 = h4 - y4 1173 r0lowx6 = r0low * x6 1174 r0high = r0high_stack 1175 1176 x3 = h3 - y3 1177 r3highx0 = r3high * x0 1178 sr1low = sr1low_stack 1179 1180 x2 = h2 - y2 1181 r0highx6 = r0high * x6 1182 sr1high = sr1high_stack 1183 1184 x5 += y3 1185 r0lowx0 = r0low * x0 1186 r1low = r1low_stack 1187 1188 h6 = r3lowx0 + r0lowx6 1189 sr1lowx6 = sr1low * x6 1190 r1high = r1high_stack 1191 1192 x4 += y2 1193 r0highx0 = r0high * x0 1194 sr2low = sr2low_stack 1195 1196 h7 = r3highx0 + r0highx6 1197 sr1highx6 = sr1high * x6 1198 sr2high = sr2high_stack 1199 1200 x3 += y1 1201 r1lowx0 = r1low * x0 1202 r2low = r2low_stack 1203 1204 h0 = r0lowx0 + sr1lowx6 1205 sr2lowx6 = sr2low * x6 1206 r2high = r2high_stack 1207 1208 x2 += y0 1209 r1highx0 = r1high * x0 1210 sr3low = sr3low_stack 1211 1212 h1 = r0highx0 + sr1highx6 1213 sr2highx6 = sr2high * x6 1214 sr3high = sr3high_stack 1215 1216 x4 += x5 1217 r2lowx0 = r2low * x0 1218 1219 h2 = r1lowx0 + sr2lowx6 1220 sr3lowx6 = sr3low * x6 1221 1222 x2 += x3 1223 r2highx0 = r2high * x0 1224 1225 h3 = r1highx0 + sr2highx6 1226 sr3highx6 = sr3high * x6 1227 1228 r1highx4 = r1high * x4 1229 1230 h4 = r2lowx0 + sr3lowx6 1231 r1lowx4 = r1low * x4 1232 1233 r0highx4 = r0high * x4 1234 1235 h5 = r2highx0 + sr3highx6 1236 r0lowx4 = r0low * x4 1237 1238 h7 += r1highx4 1239 sr3highx4 = sr3high * x4 1240 1241 h6 += r1lowx4 1242 sr3lowx4 = sr3low * x4 1243 1244 h5 += r0highx4 1245 sr2highx4 = sr2high * x4 1246 1247 h4 += r0lowx4 1248 sr2lowx4 = sr2low * x4 1249 1250 h3 += sr3highx4 1251 r0lowx2 = r0low * x2 1252 1253 h2 += sr3lowx4 1254 r0highx2 = r0high * x2 1255 1256 h1 += sr2highx4 1257 r1lowx2 = r1low * x2 1258 1259 h0 += sr2lowx4 1260 r1highx2 = r1high * x2 1261 1262 h2 += r0lowx2 1263 r2lowx2 = r2low * x2 1264 1265 h3 += r0highx2 1266 r2highx2 = r2high * x2 1267 1268 h4 += r1lowx2 1269 sr3lowx2 = sr3low * x2 1270 1271 h5 += r1highx2 1272 sr3highx2 = sr3high * x2 1273 1274 h6 += r2lowx2 1275 1276 h7 += r2highx2 1277 1278 h0 += sr3lowx2 1279 1280 h1 += sr3highx2 1281 1282nomorebytes: 1283 1284 y7 = h7 + alpha130 1285 1286 y0 = h0 + alpha32 1287 1288 y1 = h1 + alpha32 1289 1290 y2 = h2 + alpha64 1291 1292 y7 -= alpha130 1293 1294 y3 = h3 + alpha64 1295 1296 y4 = h4 + alpha96 1297 1298 y5 = h5 + alpha96 1299 1300 x7 = h7 - y7 1301 y7 *= scale 1302 1303 y0 -= alpha32 1304 1305 y1 -= alpha32 1306 1307 y2 -= alpha64 1308 1309 h6 += x7 1310 1311 y3 -= alpha64 1312 1313 y4 -= alpha96 1314 1315 y5 -= alpha96 1316 1317 y6 = h6 + alpha130 1318 1319 x0 = h0 - y0 1320 1321 x1 = h1 - y1 1322 1323 x2 = h2 - y2 1324 1325 y6 -= alpha130 1326 1327 x0 += y7 1328 1329 x3 = h3 - y3 1330 1331 x4 = h4 - y4 1332 1333 x5 = h5 - y5 1334 1335 x6 = h6 - y6 1336 1337 y6 *= scale 1338 1339 x2 += y0 1340 1341 x3 += y1 1342 1343 x4 += y2 1344 1345 x0 += y6 1346 1347 x5 += y3 1348 1349 x6 += y4 1350 1351 x2 += x3 1352 1353 x0 += x1 1354 1355 x4 += x5 1356 1357 x6 += y5 1358 1359 x2 += offset1 1360 d1 = int64(math.Float64bits(x2)) 1361 1362 x0 += offset0 1363 d0 = int64(math.Float64bits(x0)) 1364 1365 x4 += offset2 1366 d2 = int64(math.Float64bits(x4)) 1367 1368 x6 += offset3 1369 d3 = int64(math.Float64bits(x6)) 1370 1371 f0 = uint64(d0) 1372 1373 f1 = uint64(d1) 1374 bits32 = math.MaxUint64 1375 1376 f2 = uint64(d2) 1377 bits32 >>= 32 1378 1379 f3 = uint64(d3) 1380 f = f0 >> 32 1381 1382 f0 &= bits32 1383 f &= 255 1384 1385 f1 += f 1386 g0 = f0 + 5 1387 1388 g = g0 >> 32 1389 g0 &= bits32 1390 1391 f = f1 >> 32 1392 f1 &= bits32 1393 1394 f &= 255 1395 g1 = f1 + g 1396 1397 g = g1 >> 32 1398 f2 += f 1399 1400 f = f2 >> 32 1401 g1 &= bits32 1402 1403 f2 &= bits32 1404 f &= 255 1405 1406 f3 += f 1407 g2 = f2 + g 1408 1409 g = g2 >> 32 1410 g2 &= bits32 1411 1412 f4 = f3 >> 32 1413 f3 &= bits32 1414 1415 f4 &= 255 1416 g3 = f3 + g 1417 1418 g = g3 >> 32 1419 g3 &= bits32 1420 1421 g4 = f4 + g 1422 1423 g4 = g4 - 4 1424 s00 = uint32(s[0]) 1425 1426 f = uint64(int64(g4) >> 63) 1427 s01 = uint32(s[1]) 1428 1429 f0 &= f 1430 g0 &^= f 1431 s02 = uint32(s[2]) 1432 1433 f1 &= f 1434 f0 |= g0 1435 s03 = uint32(s[3]) 1436 1437 g1 &^= f 1438 f2 &= f 1439 s10 = uint32(s[4]) 1440 1441 f3 &= f 1442 g2 &^= f 1443 s11 = uint32(s[5]) 1444 1445 g3 &^= f 1446 f1 |= g1 1447 s12 = uint32(s[6]) 1448 1449 f2 |= g2 1450 f3 |= g3 1451 s13 = uint32(s[7]) 1452 1453 s01 <<= 8 1454 f0 += uint64(s00) 1455 s20 = uint32(s[8]) 1456 1457 s02 <<= 16 1458 f0 += uint64(s01) 1459 s21 = uint32(s[9]) 1460 1461 s03 <<= 24 1462 f0 += uint64(s02) 1463 s22 = uint32(s[10]) 1464 1465 s11 <<= 8 1466 f1 += uint64(s10) 1467 s23 = uint32(s[11]) 1468 1469 s12 <<= 16 1470 f1 += uint64(s11) 1471 s30 = uint32(s[12]) 1472 1473 s13 <<= 24 1474 f1 += uint64(s12) 1475 s31 = uint32(s[13]) 1476 1477 f0 += uint64(s03) 1478 f1 += uint64(s13) 1479 s32 = uint32(s[14]) 1480 1481 s21 <<= 8 1482 f2 += uint64(s20) 1483 s33 = uint32(s[15]) 1484 1485 s22 <<= 16 1486 f2 += uint64(s21) 1487 1488 s23 <<= 24 1489 f2 += uint64(s22) 1490 1491 s31 <<= 8 1492 f3 += uint64(s30) 1493 1494 s32 <<= 16 1495 f3 += uint64(s31) 1496 1497 s33 <<= 24 1498 f3 += uint64(s32) 1499 1500 f2 += uint64(s23) 1501 f3 += uint64(s33) 1502 1503 out[0] = byte(f0) 1504 f0 >>= 8 1505 out[1] = byte(f0) 1506 f0 >>= 8 1507 out[2] = byte(f0) 1508 f0 >>= 8 1509 out[3] = byte(f0) 1510 f0 >>= 8 1511 f1 += f0 1512 1513 out[4] = byte(f1) 1514 f1 >>= 8 1515 out[5] = byte(f1) 1516 f1 >>= 8 1517 out[6] = byte(f1) 1518 f1 >>= 8 1519 out[7] = byte(f1) 1520 f1 >>= 8 1521 f2 += f1 1522 1523 out[8] = byte(f2) 1524 f2 >>= 8 1525 out[9] = byte(f2) 1526 f2 >>= 8 1527 out[10] = byte(f2) 1528 f2 >>= 8 1529 out[11] = byte(f2) 1530 f2 >>= 8 1531 f3 += f2 1532 1533 out[12] = byte(f3) 1534 f3 >>= 8 1535 out[13] = byte(f3) 1536 f3 >>= 8 1537 out[14] = byte(f3) 1538 f3 >>= 8 1539 out[15] = byte(f3) 1540} 1541