Lines Matching refs:np

97 my $np="%rdx";	# const BN_ULONG *np,
139 $np="%r13"; # reassigned argument
173 mov %rdx, $np # reassigned argument
175 mov $np, $tmp
178 sub \$-128, $np
180 and \$4095, $tmp # see if $np crosses page
187 # cause >2x performance degradation here, so if $np does
191 vmovdqu 32*0-128($np), $ACC0
193 vmovdqu 32*1-128($np), $ACC1
194 vmovdqu 32*2-128($np), $ACC2
195 vmovdqu 32*3-128($np), $ACC3
196 vmovdqu 32*4-128($np), $ACC4
197 vmovdqu 32*5-128($np), $ACC5
198 vmovdqu 32*6-128($np), $ACC6
199 vmovdqu 32*7-128($np), $ACC7
200 vmovdqu 32*8-128($np), $ACC8
201 lea $FrameSize+128(%rsp),$np
202 vmovdqu $ACC0, 32*0-128($np)
203 vmovdqu $ACC1, 32*1-128($np)
204 vmovdqu $ACC2, 32*2-128($np)
205 vmovdqu $ACC3, 32*3-128($np)
206 vmovdqu $ACC4, 32*4-128($np)
207 vmovdqu $ACC5, 32*5-128($np)
208 vmovdqu $ACC6, 32*6-128($np)
209 vmovdqu $ACC7, 32*7-128($np)
210 vmovdqu $ACC8, 32*8-128($np)
211 vmovdqu $ACC9, 32*9-128($np) # $ACC9 is zero
472 imulq -128($np), %rax
476 imulq 8-128($np), %rax
480 imulq 16-128($np), %rax
483 imulq 24-128($np), %rdx
498 vpmuludq 32*1-128($np), $Y1, $TEMP0
500 imulq -128($np), %rax
503 vpmuludq 32*2-128($np), $Y1, $TEMP1
505 imulq 8-128($np), %rax
507 vpmuludq 32*3-128($np), $Y1, $TEMP2
512 imulq 16-128($np), %rax
515 vpmuludq 32*4-128($np), $Y1, $TEMP0
519 vpmuludq 32*5-128($np), $Y1, $TEMP1
523 vpmuludq 32*6-128($np), $Y1, $TEMP2
526 vpmuludq 32*7-128($np), $Y1, $TEMP0
528 vpmuludq 32*8-128($np), $Y1, $TEMP1
530 #vmovdqu 32*1-8-128($np), $TEMP2 # moved below
532 #vmovdqu 32*2-8-128($np), $TEMP0 # moved below
535 vpmuludq 32*1-8-128($np), $Y2, $TEMP2 # see above
536 vmovdqu 32*3-8-128($np), $TEMP1
538 imulq -128($np), %rax
540 vpmuludq 32*2-8-128($np), $Y2, $TEMP0 # see above
541 vmovdqu 32*4-8-128($np), $TEMP2
544 imulq 8-128($np), %rax
549 vmovdqu 32*5-8-128($np), $TEMP0
553 vmovdqu 32*6-8-128($np), $TEMP1
559 .byte 0xc4,0x41,0x7e,0x6f,0x9d,0x58,0x00,0x00,0x00 # vmovdqu 32*7-8-128($np), $TEMP2
563 vmovdqu 32*8-8-128($np), $TEMP0
566 vmovdqu 32*9-8-128($np), $ACC9
568 imulq -128($np), %rax
571 vmovdqu 32*1-16-128($np), $TEMP1
575 vmovdqu 32*2-16-128($np), $TEMP2
581 vmovdqu 32*1-24-128($np), $ACC0
583 vmovdqu 32*3-16-128($np), $TEMP0
587 .byte 0xc4,0x41,0x7e,0x6f,0xb5,0xf0,0xff,0xff,0xff # vmovdqu 32*4-16-128($np), $TEMP1
591 vmovdqu 32*5-16-128($np), $TEMP2
597 vmovdqu 32*6-16-128($np), $TEMP0
600 vmovdqu 32*7-16-128($np), $TEMP1
603 vmovdqu 32*8-16-128($np), $TEMP2
607 vmovdqu 32*9-16-128($np), $TEMP0
611 #vmovdqu 32*2-24-128($np), $TEMP1 # moved below
618 vmovdqu 32*3-24-128($np), $TEMP2
623 vpmuludq 32*2-24-128($np), $Y2, $TEMP1 # see above
624 vmovdqu 32*4-24-128($np), $TEMP0
626 imulq -128($np), %rax
630 vmovdqu 32*5-24-128($np), $TEMP1
633 imulq 8-128($np), %rax
639 vmovdqu 32*6-24-128($np), $TEMP2
642 imulq 16-128($np), %rax
645 vmovdqu 32*7-24-128($np), $TEMP0
646 imulq 24-128($np), %rdx # future $r3
651 vmovdqu 32*8-24-128($np), $TEMP1
656 vmovdqu 32*9-24-128($np), $TEMP2
836 my $np="%rcx"; # const BN_ULONG *np,
923 mov $np, $tmp
925 sub \$-128,$np
928 and \$4095, $tmp # see if $np crosses page
935 # cause severe performance degradation here, so if $np does
939 vmovdqu 32*0-128($np), $ACC0
941 vmovdqu 32*1-128($np), $ACC1
942 vmovdqu 32*2-128($np), $ACC2
943 vmovdqu 32*3-128($np), $ACC3
944 vmovdqu 32*4-128($np), $ACC4
945 vmovdqu 32*5-128($np), $ACC5
946 vmovdqu 32*6-128($np), $ACC6
947 vmovdqu 32*7-128($np), $ACC7
948 vmovdqu 32*8-128($np), $ACC8
949 lea 64+128(%rsp),$np
950 vmovdqu $ACC0, 32*0-128($np)
952 vmovdqu $ACC1, 32*1-128($np)
954 vmovdqu $ACC2, 32*2-128($np)
956 vmovdqu $ACC3, 32*3-128($np)
958 vmovdqu $ACC4, 32*4-128($np)
960 vmovdqu $ACC5, 32*5-128($np)
962 vmovdqu $ACC6, 32*6-128($np)
964 vmovdqu $ACC7, 32*7-128($np)
966 vmovdqu $ACC8, 32*8-128($np)
968 vmovdqu $ACC9, 32*9-128($np) # $ACC9 is zero after vzeroall
1030 imulq -128($np),%rax
1033 imulq 8-128($np),%rax
1036 imulq 16-128($np),%rax
1039 imulq 24-128($np),%rdx
1043 vpmuludq 32*1-128($np),$Yi,$TEMP2
1046 vpmuludq 32*2-128($np),$Yi,$TEMP0
1048 vpmuludq 32*3-128($np),$Yi,$TEMP1
1050 vpmuludq 32*4-128($np),$Yi,$TEMP2
1052 vpmuludq 32*5-128($np),$Yi,$TEMP0
1054 vpmuludq 32*6-128($np),$Yi,$TEMP1
1056 vpmuludq 32*7-128($np),$Yi,$TEMP2
1059 vpmuludq 32*8-128($np),$Yi,$TEMP0
1107 imulq -128($np),%rax
1109 vmovdqu -8+32*1-128($np),$TEMP0
1111 imulq 8-128($np),%rax
1113 vmovdqu -8+32*2-128($np),$TEMP1
1115 imulq 16-128($np),%rdx
1121 vmovdqu -8+32*3-128($np),$TEMP2
1124 vmovdqu -8+32*4-128($np),$TEMP0
1127 vmovdqu -8+32*5-128($np),$TEMP1
1130 vmovdqu -8+32*6-128($np),$TEMP2
1133 vmovdqu -8+32*7-128($np),$TEMP0
1136 vmovdqu -8+32*8-128($np),$TEMP1
1139 vmovdqu -8+32*9-128($np),$TEMP2
1187 vmovdqu -16+32*1-128($np),$TEMP0
1189 imulq -128($np),%rax
1191 vmovdqu -16+32*2-128($np),$TEMP1
1192 imulq 8-128($np),%rdx
1198 vmovdqu -16+32*3-128($np),$TEMP2
1201 vmovdqu -16+32*4-128($np),$TEMP0
1204 vmovdqu -16+32*5-128($np),$TEMP1
1207 vmovdqu -16+32*6-128($np),$TEMP2
1210 vmovdqu -16+32*7-128($np),$TEMP0
1213 vmovdqu -16+32*8-128($np),$TEMP1
1216 vmovdqu -16+32*9-128($np),$TEMP2
1263 vmovdqu -24+32*1-128($np),$TEMP0
1264 imulq -128($np),%rax
1268 vmovdqu -24+32*2-128($np),$TEMP1
1271 vmovdqu -24+32*3-128($np),$TEMP2
1276 vmovdqu -24+32*4-128($np),$TEMP0
1278 vmovdqu -24+32*5-128($np),$TEMP1
1281 vmovdqu -24+32*6-128($np),$TEMP2
1284 vmovdqu -24+32*7-128($np),$TEMP0
1287 vmovdqu -24+32*8-128($np),$TEMP1
1290 vmovdqu -24+32*9-128($np),$TEMP2