1#! /usr/bin/env perl 2# Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved. 3# 4# Licensed under the OpenSSL license (the "License"). You may not use 5# this file except in compliance with the License. You can obtain a copy 6# in the file LICENSE in the source distribution or at 7# https://www.openssl.org/source/license.html 8 9 10$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 11push(@INC,"${dir}","${dir}../../../perlasm"); 12require "x86asm.pl"; 13 14$output = pop; 15open STDOUT,">$output"; 16 17&asm_init($ARGV[0]); 18 19$sse2=0; 20for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } 21 22&external_label("OPENSSL_ia32cap_P") if ($sse2); 23 24&bn_mul_add_words("bn_mul_add_words"); 25&bn_mul_words("bn_mul_words"); 26&bn_sqr_words("bn_sqr_words"); 27&bn_div_words("bn_div_words"); 28&bn_add_words("bn_add_words"); 29&bn_sub_words("bn_sub_words"); 30&bn_sub_part_words("bn_sub_part_words"); 31 32&asm_finish(); 33 34close STDOUT; 35 36sub bn_mul_add_words 37 { 38 local($name)=@_; 39 40 &function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":""); 41 42 $r="eax"; 43 $a="edx"; 44 $c="ecx"; 45 46 if ($sse2) { 47 &picmeup("eax","OPENSSL_ia32cap_P"); 48 &bt(&DWP(0,"eax"),26); 49 &jnc(&label("maw_non_sse2")); 50 51 &mov($r,&wparam(0)); 52 &mov($a,&wparam(1)); 53 &mov($c,&wparam(2)); 54 &movd("mm0",&wparam(3)); # mm0 = w 55 &pxor("mm1","mm1"); # mm1 = carry_in 56 &jmp(&label("maw_sse2_entry")); 57 58 &set_label("maw_sse2_unrolled",16); 59 &movd("mm3",&DWP(0,$r,"",0)); # mm3 = r[0] 60 &paddq("mm1","mm3"); # mm1 = carry_in + r[0] 61 &movd("mm2",&DWP(0,$a,"",0)); # mm2 = a[0] 62 &pmuludq("mm2","mm0"); # mm2 = w*a[0] 63 &movd("mm4",&DWP(4,$a,"",0)); # mm4 = a[1] 64 &pmuludq("mm4","mm0"); # mm4 = w*a[1] 65 &movd("mm6",&DWP(8,$a,"",0)); # mm6 = a[2] 66 &pmuludq("mm6","mm0"); # mm6 = w*a[2] 67 &movd("mm7",&DWP(12,$a,"",0)); # mm7 = a[3] 68 &pmuludq("mm7","mm0"); # mm7 = w*a[3] 69 &paddq("mm1","mm2"); # mm1 = carry_in + r[0] + w*a[0] 70 &movd("mm3",&DWP(4,$r,"",0)); # mm3 = r[1] 71 &paddq("mm3","mm4"); # mm3 = r[1] + w*a[1] 72 &movd("mm5",&DWP(8,$r,"",0)); # mm5 = r[2] 73 &paddq("mm5","mm6"); # mm5 = r[2] + w*a[2] 74 &movd("mm4",&DWP(12,$r,"",0)); # mm4 = r[3] 75 &paddq("mm7","mm4"); # mm7 = r[3] + w*a[3] 76 &movd(&DWP(0,$r,"",0),"mm1"); 77 &movd("mm2",&DWP(16,$a,"",0)); # mm2 = a[4] 78 &pmuludq("mm2","mm0"); # mm2 = w*a[4] 79 &psrlq("mm1",32); # mm1 = carry0 80 &movd("mm4",&DWP(20,$a,"",0)); # mm4 = a[5] 81 &pmuludq("mm4","mm0"); # mm4 = w*a[5] 82 &paddq("mm1","mm3"); # mm1 = carry0 + r[1] + w*a[1] 83 &movd("mm6",&DWP(24,$a,"",0)); # mm6 = a[6] 84 &pmuludq("mm6","mm0"); # mm6 = w*a[6] 85 &movd(&DWP(4,$r,"",0),"mm1"); 86 &psrlq("mm1",32); # mm1 = carry1 87 &movd("mm3",&DWP(28,$a,"",0)); # mm3 = a[7] 88 &add($a,32); 89 &pmuludq("mm3","mm0"); # mm3 = w*a[7] 90 &paddq("mm1","mm5"); # mm1 = carry1 + r[2] + w*a[2] 91 &movd("mm5",&DWP(16,$r,"",0)); # mm5 = r[4] 92 &paddq("mm2","mm5"); # mm2 = r[4] + w*a[4] 93 &movd(&DWP(8,$r,"",0),"mm1"); 94 &psrlq("mm1",32); # mm1 = carry2 95 &paddq("mm1","mm7"); # mm1 = carry2 + r[3] + w*a[3] 96 &movd("mm5",&DWP(20,$r,"",0)); # mm5 = r[5] 97 &paddq("mm4","mm5"); # mm4 = r[5] + w*a[5] 98 &movd(&DWP(12,$r,"",0),"mm1"); 99 &psrlq("mm1",32); # mm1 = carry3 100 &paddq("mm1","mm2"); # mm1 = carry3 + r[4] + w*a[4] 101 &movd("mm5",&DWP(24,$r,"",0)); # mm5 = r[6] 102 &paddq("mm6","mm5"); # mm6 = r[6] + w*a[6] 103 &movd(&DWP(16,$r,"",0),"mm1"); 104 &psrlq("mm1",32); # mm1 = carry4 105 &paddq("mm1","mm4"); # mm1 = carry4 + r[5] + w*a[5] 106 &movd("mm5",&DWP(28,$r,"",0)); # mm5 = r[7] 107 &paddq("mm3","mm5"); # mm3 = r[7] + w*a[7] 108 &movd(&DWP(20,$r,"",0),"mm1"); 109 &psrlq("mm1",32); # mm1 = carry5 110 &paddq("mm1","mm6"); # mm1 = carry5 + r[6] + w*a[6] 111 &movd(&DWP(24,$r,"",0),"mm1"); 112 &psrlq("mm1",32); # mm1 = carry6 113 &paddq("mm1","mm3"); # mm1 = carry6 + r[7] + w*a[7] 114 &movd(&DWP(28,$r,"",0),"mm1"); 115 &lea($r,&DWP(32,$r)); 116 &psrlq("mm1",32); # mm1 = carry_out 117 118 &sub($c,8); 119 &jz(&label("maw_sse2_exit")); 120 &set_label("maw_sse2_entry"); 121 &test($c,0xfffffff8); 122 &jnz(&label("maw_sse2_unrolled")); 123 124 &set_label("maw_sse2_loop",4); 125 &movd("mm2",&DWP(0,$a)); # mm2 = a[i] 126 &movd("mm3",&DWP(0,$r)); # mm3 = r[i] 127 &pmuludq("mm2","mm0"); # a[i] *= w 128 &lea($a,&DWP(4,$a)); 129 &paddq("mm1","mm3"); # carry += r[i] 130 &paddq("mm1","mm2"); # carry += a[i]*w 131 &movd(&DWP(0,$r),"mm1"); # r[i] = carry_low 132 &sub($c,1); 133 &psrlq("mm1",32); # carry = carry_high 134 &lea($r,&DWP(4,$r)); 135 &jnz(&label("maw_sse2_loop")); 136 &set_label("maw_sse2_exit"); 137 &movd("eax","mm1"); # c = carry_out 138 &emms(); 139 &ret(); 140 141 &set_label("maw_non_sse2",16); 142 } 143 144 # function_begin prologue 145 &push("ebp"); 146 &push("ebx"); 147 &push("esi"); 148 &push("edi"); 149 150 &comment(""); 151 $Low="eax"; 152 $High="edx"; 153 $a="ebx"; 154 $w="ebp"; 155 $r="edi"; 156 $c="esi"; 157 158 &xor($c,$c); # clear carry 159 &mov($r,&wparam(0)); # 160 161 &mov("ecx",&wparam(2)); # 162 &mov($a,&wparam(1)); # 163 164 &and("ecx",0xfffffff8); # num / 8 165 &mov($w,&wparam(3)); # 166 167 &push("ecx"); # Up the stack for a tmp variable 168 169 &jz(&label("maw_finish")); 170 171 &set_label("maw_loop",16); 172 173 for ($i=0; $i<32; $i+=4) 174 { 175 &comment("Round $i"); 176 177 &mov("eax",&DWP($i,$a)); # *a 178 &mul($w); # *a * w 179 &add("eax",$c); # L(t)+= c 180 &adc("edx",0); # H(t)+=carry 181 &add("eax",&DWP($i,$r)); # L(t)+= *r 182 &adc("edx",0); # H(t)+=carry 183 &mov(&DWP($i,$r),"eax"); # *r= L(t); 184 &mov($c,"edx"); # c= H(t); 185 } 186 187 &comment(""); 188 &sub("ecx",8); 189 &lea($a,&DWP(32,$a)); 190 &lea($r,&DWP(32,$r)); 191 &jnz(&label("maw_loop")); 192 193 &set_label("maw_finish",0); 194 &mov("ecx",&wparam(2)); # get num 195 &and("ecx",7); 196 &jnz(&label("maw_finish2")); # helps branch prediction 197 &jmp(&label("maw_end")); 198 199 &set_label("maw_finish2",1); 200 for ($i=0; $i<7; $i++) 201 { 202 &comment("Tail Round $i"); 203 &mov("eax",&DWP($i*4,$a)); # *a 204 &mul($w); # *a * w 205 &add("eax",$c); # L(t)+=c 206 &adc("edx",0); # H(t)+=carry 207 &add("eax",&DWP($i*4,$r)); # L(t)+= *r 208 &adc("edx",0); # H(t)+=carry 209 &dec("ecx") if ($i != 7-1); 210 &mov(&DWP($i*4,$r),"eax"); # *r= L(t); 211 &mov($c,"edx"); # c= H(t); 212 &jz(&label("maw_end")) if ($i != 7-1); 213 } 214 &set_label("maw_end",0); 215 &mov("eax",$c); 216 217 &pop("ecx"); # clear variable from 218 219 &function_end($name); 220 } 221 222sub bn_mul_words 223 { 224 local($name)=@_; 225 226 &function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":""); 227 228 $r="eax"; 229 $a="edx"; 230 $c="ecx"; 231 232 if ($sse2) { 233 &picmeup("eax","OPENSSL_ia32cap_P"); 234 &bt(&DWP(0,"eax"),26); 235 &jnc(&label("mw_non_sse2")); 236 237 &mov($r,&wparam(0)); 238 &mov($a,&wparam(1)); 239 &mov($c,&wparam(2)); 240 &movd("mm0",&wparam(3)); # mm0 = w 241 &pxor("mm1","mm1"); # mm1 = carry = 0 242 243 &set_label("mw_sse2_loop",16); 244 &movd("mm2",&DWP(0,$a)); # mm2 = a[i] 245 &pmuludq("mm2","mm0"); # a[i] *= w 246 &lea($a,&DWP(4,$a)); 247 &paddq("mm1","mm2"); # carry += a[i]*w 248 &movd(&DWP(0,$r),"mm1"); # r[i] = carry_low 249 &sub($c,1); 250 &psrlq("mm1",32); # carry = carry_high 251 &lea($r,&DWP(4,$r)); 252 &jnz(&label("mw_sse2_loop")); 253 254 &movd("eax","mm1"); # return carry 255 &emms(); 256 &ret(); 257 &set_label("mw_non_sse2",16); 258 } 259 260 # function_begin prologue 261 &push("ebp"); 262 &push("ebx"); 263 &push("esi"); 264 &push("edi"); 265 266 &comment(""); 267 $Low="eax"; 268 $High="edx"; 269 $a="ebx"; 270 $w="ecx"; 271 $r="edi"; 272 $c="esi"; 273 $num="ebp"; 274 275 &xor($c,$c); # clear carry 276 &mov($r,&wparam(0)); # 277 &mov($a,&wparam(1)); # 278 &mov($num,&wparam(2)); # 279 &mov($w,&wparam(3)); # 280 281 &and($num,0xfffffff8); # num / 8 282 &jz(&label("mw_finish")); 283 284 &set_label("mw_loop",0); 285 for ($i=0; $i<32; $i+=4) 286 { 287 &comment("Round $i"); 288 289 &mov("eax",&DWP($i,$a,"",0)); # *a 290 &mul($w); # *a * w 291 &add("eax",$c); # L(t)+=c 292 # XXX 293 294 &adc("edx",0); # H(t)+=carry 295 &mov(&DWP($i,$r,"",0),"eax"); # *r= L(t); 296 297 &mov($c,"edx"); # c= H(t); 298 } 299 300 &comment(""); 301 &add($a,32); 302 &add($r,32); 303 &sub($num,8); 304 &jz(&label("mw_finish")); 305 &jmp(&label("mw_loop")); 306 307 &set_label("mw_finish",0); 308 &mov($num,&wparam(2)); # get num 309 &and($num,7); 310 &jnz(&label("mw_finish2")); 311 &jmp(&label("mw_end")); 312 313 &set_label("mw_finish2",1); 314 for ($i=0; $i<7; $i++) 315 { 316 &comment("Tail Round $i"); 317 &mov("eax",&DWP($i*4,$a,"",0));# *a 318 &mul($w); # *a * w 319 &add("eax",$c); # L(t)+=c 320 # XXX 321 &adc("edx",0); # H(t)+=carry 322 &mov(&DWP($i*4,$r,"",0),"eax");# *r= L(t); 323 &mov($c,"edx"); # c= H(t); 324 &dec($num) if ($i != 7-1); 325 &jz(&label("mw_end")) if ($i != 7-1); 326 } 327 &set_label("mw_end",0); 328 &mov("eax",$c); 329 330 &function_end($name); 331 } 332 333sub bn_sqr_words 334 { 335 local($name)=@_; 336 337 &function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":""); 338 339 $r="eax"; 340 $a="edx"; 341 $c="ecx"; 342 343 if ($sse2) { 344 &picmeup("eax","OPENSSL_ia32cap_P"); 345 &bt(&DWP(0,"eax"),26); 346 &jnc(&label("sqr_non_sse2")); 347 348 &mov($r,&wparam(0)); 349 &mov($a,&wparam(1)); 350 &mov($c,&wparam(2)); 351 352 &set_label("sqr_sse2_loop",16); 353 &movd("mm0",&DWP(0,$a)); # mm0 = a[i] 354 &pmuludq("mm0","mm0"); # a[i] *= a[i] 355 &lea($a,&DWP(4,$a)); # a++ 356 &movq(&QWP(0,$r),"mm0"); # r[i] = a[i]*a[i] 357 &sub($c,1); 358 &lea($r,&DWP(8,$r)); # r += 2 359 &jnz(&label("sqr_sse2_loop")); 360 361 &emms(); 362 &ret(); 363 &set_label("sqr_non_sse2",16); 364 } 365 366 # function_begin prologue 367 &push("ebp"); 368 &push("ebx"); 369 &push("esi"); 370 &push("edi"); 371 372 &comment(""); 373 $r="esi"; 374 $a="edi"; 375 $num="ebx"; 376 377 &mov($r,&wparam(0)); # 378 &mov($a,&wparam(1)); # 379 &mov($num,&wparam(2)); # 380 381 &and($num,0xfffffff8); # num / 8 382 &jz(&label("sw_finish")); 383 384 &set_label("sw_loop",0); 385 for ($i=0; $i<32; $i+=4) 386 { 387 &comment("Round $i"); 388 &mov("eax",&DWP($i,$a,"",0)); # *a 389 # XXX 390 &mul("eax"); # *a * *a 391 &mov(&DWP($i*2,$r,"",0),"eax"); # 392 &mov(&DWP($i*2+4,$r,"",0),"edx");# 393 } 394 395 &comment(""); 396 &add($a,32); 397 &add($r,64); 398 &sub($num,8); 399 &jnz(&label("sw_loop")); 400 401 &set_label("sw_finish",0); 402 &mov($num,&wparam(2)); # get num 403 &and($num,7); 404 &jz(&label("sw_end")); 405 406 for ($i=0; $i<7; $i++) 407 { 408 &comment("Tail Round $i"); 409 &mov("eax",&DWP($i*4,$a,"",0)); # *a 410 # XXX 411 &mul("eax"); # *a * *a 412 &mov(&DWP($i*8,$r,"",0),"eax"); # 413 &dec($num) if ($i != 7-1); 414 &mov(&DWP($i*8+4,$r,"",0),"edx"); 415 &jz(&label("sw_end")) if ($i != 7-1); 416 } 417 &set_label("sw_end",0); 418 419 &function_end($name); 420 } 421 422sub bn_div_words 423 { 424 local($name)=@_; 425 426 &function_begin_B($name,""); 427 &mov("edx",&wparam(0)); # 428 &mov("eax",&wparam(1)); # 429 &mov("ecx",&wparam(2)); # 430 &div("ecx"); 431 &ret(); 432 &function_end_B($name); 433 } 434 435sub bn_add_words 436 { 437 local($name)=@_; 438 439 &function_begin($name,""); 440 441 &comment(""); 442 $a="esi"; 443 $b="edi"; 444 $c="eax"; 445 $r="ebx"; 446 $tmp1="ecx"; 447 $tmp2="edx"; 448 $num="ebp"; 449 450 &mov($r,&wparam(0)); # get r 451 &mov($a,&wparam(1)); # get a 452 &mov($b,&wparam(2)); # get b 453 &mov($num,&wparam(3)); # get num 454 &xor($c,$c); # clear carry 455 &and($num,0xfffffff8); # num / 8 456 457 &jz(&label("aw_finish")); 458 459 &set_label("aw_loop",0); 460 for ($i=0; $i<8; $i++) 461 { 462 &comment("Round $i"); 463 464 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a 465 &mov($tmp2,&DWP($i*4,$b,"",0)); # *b 466 &add($tmp1,$c); 467 &mov($c,0); 468 &adc($c,$c); 469 &add($tmp1,$tmp2); 470 &adc($c,0); 471 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 472 } 473 474 &comment(""); 475 &add($a,32); 476 &add($b,32); 477 &add($r,32); 478 &sub($num,8); 479 &jnz(&label("aw_loop")); 480 481 &set_label("aw_finish",0); 482 &mov($num,&wparam(3)); # get num 483 &and($num,7); 484 &jz(&label("aw_end")); 485 486 for ($i=0; $i<7; $i++) 487 { 488 &comment("Tail Round $i"); 489 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a 490 &mov($tmp2,&DWP($i*4,$b,"",0));# *b 491 &add($tmp1,$c); 492 &mov($c,0); 493 &adc($c,$c); 494 &add($tmp1,$tmp2); 495 &adc($c,0); 496 &dec($num) if ($i != 6); 497 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 498 &jz(&label("aw_end")) if ($i != 6); 499 } 500 &set_label("aw_end",0); 501 502# &mov("eax",$c); # $c is "eax" 503 504 &function_end($name); 505 } 506 507sub bn_sub_words 508 { 509 local($name)=@_; 510 511 &function_begin($name,""); 512 513 &comment(""); 514 $a="esi"; 515 $b="edi"; 516 $c="eax"; 517 $r="ebx"; 518 $tmp1="ecx"; 519 $tmp2="edx"; 520 $num="ebp"; 521 522 &mov($r,&wparam(0)); # get r 523 &mov($a,&wparam(1)); # get a 524 &mov($b,&wparam(2)); # get b 525 &mov($num,&wparam(3)); # get num 526 &xor($c,$c); # clear carry 527 &and($num,0xfffffff8); # num / 8 528 529 &jz(&label("aw_finish")); 530 531 &set_label("aw_loop",0); 532 for ($i=0; $i<8; $i++) 533 { 534 &comment("Round $i"); 535 536 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a 537 &mov($tmp2,&DWP($i*4,$b,"",0)); # *b 538 &sub($tmp1,$c); 539 &mov($c,0); 540 &adc($c,$c); 541 &sub($tmp1,$tmp2); 542 &adc($c,0); 543 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 544 } 545 546 &comment(""); 547 &add($a,32); 548 &add($b,32); 549 &add($r,32); 550 &sub($num,8); 551 &jnz(&label("aw_loop")); 552 553 &set_label("aw_finish",0); 554 &mov($num,&wparam(3)); # get num 555 &and($num,7); 556 &jz(&label("aw_end")); 557 558 for ($i=0; $i<7; $i++) 559 { 560 &comment("Tail Round $i"); 561 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a 562 &mov($tmp2,&DWP($i*4,$b,"",0));# *b 563 &sub($tmp1,$c); 564 &mov($c,0); 565 &adc($c,$c); 566 &sub($tmp1,$tmp2); 567 &adc($c,0); 568 &dec($num) if ($i != 6); 569 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 570 &jz(&label("aw_end")) if ($i != 6); 571 } 572 &set_label("aw_end",0); 573 574# &mov("eax",$c); # $c is "eax" 575 576 &function_end($name); 577 } 578 579sub bn_sub_part_words 580 { 581 local($name)=@_; 582 583 &function_begin($name,""); 584 585 &comment(""); 586 $a="esi"; 587 $b="edi"; 588 $c="eax"; 589 $r="ebx"; 590 $tmp1="ecx"; 591 $tmp2="edx"; 592 $num="ebp"; 593 594 &mov($r,&wparam(0)); # get r 595 &mov($a,&wparam(1)); # get a 596 &mov($b,&wparam(2)); # get b 597 &mov($num,&wparam(3)); # get num 598 &xor($c,$c); # clear carry 599 &and($num,0xfffffff8); # num / 8 600 601 &jz(&label("aw_finish")); 602 603 &set_label("aw_loop",0); 604 for ($i=0; $i<8; $i++) 605 { 606 &comment("Round $i"); 607 608 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a 609 &mov($tmp2,&DWP($i*4,$b,"",0)); # *b 610 &sub($tmp1,$c); 611 &mov($c,0); 612 &adc($c,$c); 613 &sub($tmp1,$tmp2); 614 &adc($c,0); 615 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 616 } 617 618 &comment(""); 619 &add($a,32); 620 &add($b,32); 621 &add($r,32); 622 &sub($num,8); 623 &jnz(&label("aw_loop")); 624 625 &set_label("aw_finish",0); 626 &mov($num,&wparam(3)); # get num 627 &and($num,7); 628 &jz(&label("aw_end")); 629 630 for ($i=0; $i<7; $i++) 631 { 632 &comment("Tail Round $i"); 633 &mov($tmp1,&DWP(0,$a,"",0)); # *a 634 &mov($tmp2,&DWP(0,$b,"",0));# *b 635 &sub($tmp1,$c); 636 &mov($c,0); 637 &adc($c,$c); 638 &sub($tmp1,$tmp2); 639 &adc($c,0); 640 &mov(&DWP(0,$r,"",0),$tmp1); # *r 641 &add($a, 4); 642 &add($b, 4); 643 &add($r, 4); 644 &dec($num) if ($i != 6); 645 &jz(&label("aw_end")) if ($i != 6); 646 } 647 &set_label("aw_end",0); 648 649 &cmp(&wparam(4),0); 650 &je(&label("pw_end")); 651 652 &mov($num,&wparam(4)); # get dl 653 &cmp($num,0); 654 &je(&label("pw_end")); 655 &jge(&label("pw_pos")); 656 657 &comment("pw_neg"); 658 &mov($tmp2,0); 659 &sub($tmp2,$num); 660 &mov($num,$tmp2); 661 &and($num,0xfffffff8); # num / 8 662 &jz(&label("pw_neg_finish")); 663 664 &set_label("pw_neg_loop",0); 665 for ($i=0; $i<8; $i++) 666 { 667 &comment("dl<0 Round $i"); 668 669 &mov($tmp1,0); 670 &mov($tmp2,&DWP($i*4,$b,"",0)); # *b 671 &sub($tmp1,$c); 672 &mov($c,0); 673 &adc($c,$c); 674 &sub($tmp1,$tmp2); 675 &adc($c,0); 676 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 677 } 678 679 &comment(""); 680 &add($b,32); 681 &add($r,32); 682 &sub($num,8); 683 &jnz(&label("pw_neg_loop")); 684 685 &set_label("pw_neg_finish",0); 686 &mov($tmp2,&wparam(4)); # get dl 687 &mov($num,0); 688 &sub($num,$tmp2); 689 &and($num,7); 690 &jz(&label("pw_end")); 691 692 for ($i=0; $i<7; $i++) 693 { 694 &comment("dl<0 Tail Round $i"); 695 &mov($tmp1,0); 696 &mov($tmp2,&DWP($i*4,$b,"",0));# *b 697 &sub($tmp1,$c); 698 &mov($c,0); 699 &adc($c,$c); 700 &sub($tmp1,$tmp2); 701 &adc($c,0); 702 &dec($num) if ($i != 6); 703 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 704 &jz(&label("pw_end")) if ($i != 6); 705 } 706 707 &jmp(&label("pw_end")); 708 709 &set_label("pw_pos",0); 710 711 &and($num,0xfffffff8); # num / 8 712 &jz(&label("pw_pos_finish")); 713 714 &set_label("pw_pos_loop",0); 715 716 for ($i=0; $i<8; $i++) 717 { 718 &comment("dl>0 Round $i"); 719 720 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a 721 &sub($tmp1,$c); 722 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 723 &jnc(&label("pw_nc".$i)); 724 } 725 726 &comment(""); 727 &add($a,32); 728 &add($r,32); 729 &sub($num,8); 730 &jnz(&label("pw_pos_loop")); 731 732 &set_label("pw_pos_finish",0); 733 &mov($num,&wparam(4)); # get dl 734 &and($num,7); 735 &jz(&label("pw_end")); 736 737 for ($i=0; $i<7; $i++) 738 { 739 &comment("dl>0 Tail Round $i"); 740 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a 741 &sub($tmp1,$c); 742 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 743 &jnc(&label("pw_tail_nc".$i)); 744 &dec($num) if ($i != 6); 745 &jz(&label("pw_end")) if ($i != 6); 746 } 747 &mov($c,1); 748 &jmp(&label("pw_end")); 749 750 &set_label("pw_nc_loop",0); 751 for ($i=0; $i<8; $i++) 752 { 753 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a 754 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 755 &set_label("pw_nc".$i,0); 756 } 757 758 &comment(""); 759 &add($a,32); 760 &add($r,32); 761 &sub($num,8); 762 &jnz(&label("pw_nc_loop")); 763 764 &mov($num,&wparam(4)); # get dl 765 &and($num,7); 766 &jz(&label("pw_nc_end")); 767 768 for ($i=0; $i<7; $i++) 769 { 770 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a 771 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 772 &set_label("pw_tail_nc".$i,0); 773 &dec($num) if ($i != 6); 774 &jz(&label("pw_nc_end")) if ($i != 6); 775 } 776 777 &set_label("pw_nc_end",0); 778 &mov($c,0); 779 780 &set_label("pw_end",0); 781 782# &mov("eax",$c); # $c is "eax" 783 784 &function_end($name); 785 } 786