1#! /usr/bin/env perl 2# Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved. 3# 4# Licensed under the OpenSSL license (the "License"). You may not use 5# this file except in compliance with the License. You can obtain a copy 6# in the file LICENSE in the source distribution or at 7# https://www.openssl.org/source/license.html 8 9# 10# ==================================================================== 11# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 12# project. The module is, however, dual licensed under OpenSSL and 13# CRYPTOGAMS licenses depending on where you obtain it. For further 14# details see http://www.openssl.org/~appro/cryptogams/. 15# ==================================================================== 16# 17# This module implements support for AES instructions as per PowerISA 18# specification version 2.07, first implemented by POWER8 processor. 19# The module is endian-agnostic in sense that it supports both big- 20# and little-endian cases. Data alignment in parallelizable modes is 21# handled with VSX loads and stores, which implies MSR.VSX flag being 22# set. It should also be noted that ISA specification doesn't prohibit 23# alignment exceptions for these instructions on page boundaries. 24# Initially alignment was handled in pure AltiVec/VMX way [when data 25# is aligned programmatically, which in turn guarantees exception- 26# free execution], but it turned to hamper performance when vcipher 27# instructions are interleaved. It's reckoned that eventual 28# misalignment penalties at page boundaries are in average lower 29# than additional overhead in pure AltiVec approach. 30# 31# May 2016 32# 33# Add XTS subroutine, 9x on little- and 12x improvement on big-endian 34# systems were measured. 35# 36###################################################################### 37# Current large-block performance in cycles per byte processed with 38# 128-bit key (less is better). 39# 40# CBC en-/decrypt CTR XTS 41# POWER8[le] 3.96/0.72 0.74 1.1 42# POWER8[be] 3.75/0.65 0.66 1.0 43 44$flavour = shift; 45 46if ($flavour =~ /64/) { 47 $SIZE_T =8; 48 $LRSAVE =2*$SIZE_T; 49 $STU ="stdu"; 50 $POP ="ld"; 51 $PUSH ="std"; 52 $UCMP ="cmpld"; 53 $SHL ="sldi"; 54} elsif ($flavour =~ /32/) { 55 $SIZE_T =4; 56 $LRSAVE =$SIZE_T; 57 $STU ="stwu"; 58 $POP ="lwz"; 59 $PUSH ="stw"; 60 $UCMP ="cmplw"; 61 $SHL ="slwi"; 62} else { die "nonsense $flavour"; } 63 64$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0; 65 66$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 67( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or 68( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or 69die "can't locate ppc-xlate.pl"; 70 71open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!"; 72 73$FRAME=8*$SIZE_T; 74$prefix="aes_hw"; 75 76$sp="r1"; 77$vrsave="r12"; 78 79######################################################################### 80{{{ # Key setup procedures # 81my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8)); 82my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6)); 83my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11)); 84 85$code.=<<___; 86.machine "any" 87 88.text 89 90.align 7 91rcon: 92.long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev 93.long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev 94.long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev 95.long 0,0,0,0 ?asis 96Lconsts: 97 mflr r0 98 bcl 20,31,\$+4 99 mflr $ptr #vvvvv "distance between . and rcon 100 addi $ptr,$ptr,-0x48 101 mtlr r0 102 blr 103 .long 0 104 .byte 0,12,0x14,0,0,0,0,0 105.asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>" 106 107.globl .${prefix}_set_encrypt_key 108.align 5 109.${prefix}_set_encrypt_key: 110Lset_encrypt_key: 111 mflr r11 112 $PUSH r11,$LRSAVE($sp) 113 114 li $ptr,-1 115 ${UCMP}i $inp,0 116 beq- Lenc_key_abort # if ($inp==0) return -1; 117 ${UCMP}i $out,0 118 beq- Lenc_key_abort # if ($out==0) return -1; 119 li $ptr,-2 120 cmpwi $bits,128 121 blt- Lenc_key_abort 122 cmpwi $bits,256 123 bgt- Lenc_key_abort 124 andi. r0,$bits,0x3f 125 bne- Lenc_key_abort 126 127 lis r0,0xfff0 128 mfspr $vrsave,256 129 mtspr 256,r0 130 131 bl Lconsts 132 mtlr r11 133 134 neg r9,$inp 135 lvx $in0,0,$inp 136 addi $inp,$inp,15 # 15 is not typo 137 lvsr $key,0,r9 # borrow $key 138 li r8,0x20 139 cmpwi $bits,192 140 lvx $in1,0,$inp 141 le?vspltisb $mask,0x0f # borrow $mask 142 lvx $rcon,0,$ptr 143 le?vxor $key,$key,$mask # adjust for byte swap 144 lvx $mask,r8,$ptr 145 addi $ptr,$ptr,0x10 146 vperm $in0,$in0,$in1,$key # align [and byte swap in LE] 147 li $cnt,8 148 vxor $zero,$zero,$zero 149 mtctr $cnt 150 151 ?lvsr $outperm,0,$out 152 vspltisb $outmask,-1 153 lvx $outhead,0,$out 154 ?vperm $outmask,$zero,$outmask,$outperm 155 156 blt Loop128 157 addi $inp,$inp,8 158 beq L192 159 addi $inp,$inp,8 160 b L256 161 162.align 4 163Loop128: 164 vperm $key,$in0,$in0,$mask # rotate-n-splat 165 vsldoi $tmp,$zero,$in0,12 # >>32 166 vperm $outtail,$in0,$in0,$outperm # rotate 167 vsel $stage,$outhead,$outtail,$outmask 168 vmr $outhead,$outtail 169 vcipherlast $key,$key,$rcon 170 stvx $stage,0,$out 171 addi $out,$out,16 172 173 vxor $in0,$in0,$tmp 174 vsldoi $tmp,$zero,$tmp,12 # >>32 175 vxor $in0,$in0,$tmp 176 vsldoi $tmp,$zero,$tmp,12 # >>32 177 vxor $in0,$in0,$tmp 178 vadduwm $rcon,$rcon,$rcon 179 vxor $in0,$in0,$key 180 bdnz Loop128 181 182 lvx $rcon,0,$ptr # last two round keys 183 184 vperm $key,$in0,$in0,$mask # rotate-n-splat 185 vsldoi $tmp,$zero,$in0,12 # >>32 186 vperm $outtail,$in0,$in0,$outperm # rotate 187 vsel $stage,$outhead,$outtail,$outmask 188 vmr $outhead,$outtail 189 vcipherlast $key,$key,$rcon 190 stvx $stage,0,$out 191 addi $out,$out,16 192 193 vxor $in0,$in0,$tmp 194 vsldoi $tmp,$zero,$tmp,12 # >>32 195 vxor $in0,$in0,$tmp 196 vsldoi $tmp,$zero,$tmp,12 # >>32 197 vxor $in0,$in0,$tmp 198 vadduwm $rcon,$rcon,$rcon 199 vxor $in0,$in0,$key 200 201 vperm $key,$in0,$in0,$mask # rotate-n-splat 202 vsldoi $tmp,$zero,$in0,12 # >>32 203 vperm $outtail,$in0,$in0,$outperm # rotate 204 vsel $stage,$outhead,$outtail,$outmask 205 vmr $outhead,$outtail 206 vcipherlast $key,$key,$rcon 207 stvx $stage,0,$out 208 addi $out,$out,16 209 210 vxor $in0,$in0,$tmp 211 vsldoi $tmp,$zero,$tmp,12 # >>32 212 vxor $in0,$in0,$tmp 213 vsldoi $tmp,$zero,$tmp,12 # >>32 214 vxor $in0,$in0,$tmp 215 vxor $in0,$in0,$key 216 vperm $outtail,$in0,$in0,$outperm # rotate 217 vsel $stage,$outhead,$outtail,$outmask 218 vmr $outhead,$outtail 219 stvx $stage,0,$out 220 221 addi $inp,$out,15 # 15 is not typo 222 addi $out,$out,0x50 223 224 li $rounds,10 225 b Ldone 226 227.align 4 228L192: 229 lvx $tmp,0,$inp 230 li $cnt,4 231 vperm $outtail,$in0,$in0,$outperm # rotate 232 vsel $stage,$outhead,$outtail,$outmask 233 vmr $outhead,$outtail 234 stvx $stage,0,$out 235 addi $out,$out,16 236 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE] 237 vspltisb $key,8 # borrow $key 238 mtctr $cnt 239 vsububm $mask,$mask,$key # adjust the mask 240 241Loop192: 242 vperm $key,$in1,$in1,$mask # roate-n-splat 243 vsldoi $tmp,$zero,$in0,12 # >>32 244 vcipherlast $key,$key,$rcon 245 246 vxor $in0,$in0,$tmp 247 vsldoi $tmp,$zero,$tmp,12 # >>32 248 vxor $in0,$in0,$tmp 249 vsldoi $tmp,$zero,$tmp,12 # >>32 250 vxor $in0,$in0,$tmp 251 252 vsldoi $stage,$zero,$in1,8 253 vspltw $tmp,$in0,3 254 vxor $tmp,$tmp,$in1 255 vsldoi $in1,$zero,$in1,12 # >>32 256 vadduwm $rcon,$rcon,$rcon 257 vxor $in1,$in1,$tmp 258 vxor $in0,$in0,$key 259 vxor $in1,$in1,$key 260 vsldoi $stage,$stage,$in0,8 261 262 vperm $key,$in1,$in1,$mask # rotate-n-splat 263 vsldoi $tmp,$zero,$in0,12 # >>32 264 vperm $outtail,$stage,$stage,$outperm # rotate 265 vsel $stage,$outhead,$outtail,$outmask 266 vmr $outhead,$outtail 267 vcipherlast $key,$key,$rcon 268 stvx $stage,0,$out 269 addi $out,$out,16 270 271 vsldoi $stage,$in0,$in1,8 272 vxor $in0,$in0,$tmp 273 vsldoi $tmp,$zero,$tmp,12 # >>32 274 vperm $outtail,$stage,$stage,$outperm # rotate 275 vsel $stage,$outhead,$outtail,$outmask 276 vmr $outhead,$outtail 277 vxor $in0,$in0,$tmp 278 vsldoi $tmp,$zero,$tmp,12 # >>32 279 vxor $in0,$in0,$tmp 280 stvx $stage,0,$out 281 addi $out,$out,16 282 283 vspltw $tmp,$in0,3 284 vxor $tmp,$tmp,$in1 285 vsldoi $in1,$zero,$in1,12 # >>32 286 vadduwm $rcon,$rcon,$rcon 287 vxor $in1,$in1,$tmp 288 vxor $in0,$in0,$key 289 vxor $in1,$in1,$key 290 vperm $outtail,$in0,$in0,$outperm # rotate 291 vsel $stage,$outhead,$outtail,$outmask 292 vmr $outhead,$outtail 293 stvx $stage,0,$out 294 addi $inp,$out,15 # 15 is not typo 295 addi $out,$out,16 296 bdnz Loop192 297 298 li $rounds,12 299 addi $out,$out,0x20 300 b Ldone 301 302.align 4 303L256: 304 lvx $tmp,0,$inp 305 li $cnt,7 306 li $rounds,14 307 vperm $outtail,$in0,$in0,$outperm # rotate 308 vsel $stage,$outhead,$outtail,$outmask 309 vmr $outhead,$outtail 310 stvx $stage,0,$out 311 addi $out,$out,16 312 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE] 313 mtctr $cnt 314 315Loop256: 316 vperm $key,$in1,$in1,$mask # rotate-n-splat 317 vsldoi $tmp,$zero,$in0,12 # >>32 318 vperm $outtail,$in1,$in1,$outperm # rotate 319 vsel $stage,$outhead,$outtail,$outmask 320 vmr $outhead,$outtail 321 vcipherlast $key,$key,$rcon 322 stvx $stage,0,$out 323 addi $out,$out,16 324 325 vxor $in0,$in0,$tmp 326 vsldoi $tmp,$zero,$tmp,12 # >>32 327 vxor $in0,$in0,$tmp 328 vsldoi $tmp,$zero,$tmp,12 # >>32 329 vxor $in0,$in0,$tmp 330 vadduwm $rcon,$rcon,$rcon 331 vxor $in0,$in0,$key 332 vperm $outtail,$in0,$in0,$outperm # rotate 333 vsel $stage,$outhead,$outtail,$outmask 334 vmr $outhead,$outtail 335 stvx $stage,0,$out 336 addi $inp,$out,15 # 15 is not typo 337 addi $out,$out,16 338 bdz Ldone 339 340 vspltw $key,$in0,3 # just splat 341 vsldoi $tmp,$zero,$in1,12 # >>32 342 vsbox $key,$key 343 344 vxor $in1,$in1,$tmp 345 vsldoi $tmp,$zero,$tmp,12 # >>32 346 vxor $in1,$in1,$tmp 347 vsldoi $tmp,$zero,$tmp,12 # >>32 348 vxor $in1,$in1,$tmp 349 350 vxor $in1,$in1,$key 351 b Loop256 352 353.align 4 354Ldone: 355 lvx $in1,0,$inp # redundant in aligned case 356 vsel $in1,$outhead,$in1,$outmask 357 stvx $in1,0,$inp 358 li $ptr,0 359 mtspr 256,$vrsave 360 stw $rounds,0($out) 361 362Lenc_key_abort: 363 mr r3,$ptr 364 blr 365 .long 0 366 .byte 0,12,0x14,1,0,0,3,0 367 .long 0 368.size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key 369 370.globl .${prefix}_set_decrypt_key 371.align 5 372.${prefix}_set_decrypt_key: 373 $STU $sp,-$FRAME($sp) 374 mflr r10 375 $PUSH r10,$FRAME+$LRSAVE($sp) 376 bl Lset_encrypt_key 377 mtlr r10 378 379 cmpwi r3,0 380 bne- Ldec_key_abort 381 382 slwi $cnt,$rounds,4 383 subi $inp,$out,240 # first round key 384 srwi $rounds,$rounds,1 385 add $out,$inp,$cnt # last round key 386 mtctr $rounds 387 388Ldeckey: 389 lwz r0, 0($inp) 390 lwz r6, 4($inp) 391 lwz r7, 8($inp) 392 lwz r8, 12($inp) 393 addi $inp,$inp,16 394 lwz r9, 0($out) 395 lwz r10,4($out) 396 lwz r11,8($out) 397 lwz r12,12($out) 398 stw r0, 0($out) 399 stw r6, 4($out) 400 stw r7, 8($out) 401 stw r8, 12($out) 402 subi $out,$out,16 403 stw r9, -16($inp) 404 stw r10,-12($inp) 405 stw r11,-8($inp) 406 stw r12,-4($inp) 407 bdnz Ldeckey 408 409 xor r3,r3,r3 # return value 410Ldec_key_abort: 411 addi $sp,$sp,$FRAME 412 blr 413 .long 0 414 .byte 0,12,4,1,0x80,0,3,0 415 .long 0 416.size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key 417___ 418}}} 419######################################################################### 420{{{ # Single block en- and decrypt procedures # 421sub gen_block () { 422my $dir = shift; 423my $n = $dir eq "de" ? "n" : ""; 424my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7)); 425 426$code.=<<___; 427.globl .${prefix}_${dir}crypt 428.align 5 429.${prefix}_${dir}crypt: 430 lwz $rounds,240($key) 431 lis r0,0xfc00 432 mfspr $vrsave,256 433 li $idx,15 # 15 is not typo 434 mtspr 256,r0 435 436 lvx v0,0,$inp 437 neg r11,$out 438 lvx v1,$idx,$inp 439 lvsl v2,0,$inp # inpperm 440 le?vspltisb v4,0x0f 441 ?lvsl v3,0,r11 # outperm 442 le?vxor v2,v2,v4 443 li $idx,16 444 vperm v0,v0,v1,v2 # align [and byte swap in LE] 445 lvx v1,0,$key 446 ?lvsl v5,0,$key # keyperm 447 srwi $rounds,$rounds,1 448 lvx v2,$idx,$key 449 addi $idx,$idx,16 450 subi $rounds,$rounds,1 451 ?vperm v1,v1,v2,v5 # align round key 452 453 vxor v0,v0,v1 454 lvx v1,$idx,$key 455 addi $idx,$idx,16 456 mtctr $rounds 457 458Loop_${dir}c: 459 ?vperm v2,v2,v1,v5 460 v${n}cipher v0,v0,v2 461 lvx v2,$idx,$key 462 addi $idx,$idx,16 463 ?vperm v1,v1,v2,v5 464 v${n}cipher v0,v0,v1 465 lvx v1,$idx,$key 466 addi $idx,$idx,16 467 bdnz Loop_${dir}c 468 469 ?vperm v2,v2,v1,v5 470 v${n}cipher v0,v0,v2 471 lvx v2,$idx,$key 472 ?vperm v1,v1,v2,v5 473 v${n}cipherlast v0,v0,v1 474 475 vspltisb v2,-1 476 vxor v1,v1,v1 477 li $idx,15 # 15 is not typo 478 ?vperm v2,v1,v2,v3 # outmask 479 le?vxor v3,v3,v4 480 lvx v1,0,$out # outhead 481 vperm v0,v0,v0,v3 # rotate [and byte swap in LE] 482 vsel v1,v1,v0,v2 483 lvx v4,$idx,$out 484 stvx v1,0,$out 485 vsel v0,v0,v4,v2 486 stvx v0,$idx,$out 487 488 mtspr 256,$vrsave 489 blr 490 .long 0 491 .byte 0,12,0x14,0,0,0,3,0 492 .long 0 493.size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt 494___ 495} 496&gen_block("en"); 497&gen_block("de"); 498}}} 499######################################################################### 500{{{ # CBC en- and decrypt procedures # 501my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10)); 502my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3)); 503my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)= 504 map("v$_",(4..10)); 505$code.=<<___; 506.globl .${prefix}_cbc_encrypt 507.align 5 508.${prefix}_cbc_encrypt: 509 ${UCMP}i $len,16 510 bltlr- 511 512 cmpwi $enc,0 # test direction 513 lis r0,0xffe0 514 mfspr $vrsave,256 515 mtspr 256,r0 516 517 li $idx,15 518 vxor $rndkey0,$rndkey0,$rndkey0 519 le?vspltisb $tmp,0x0f 520 521 lvx $ivec,0,$ivp # load [unaligned] iv 522 lvsl $inpperm,0,$ivp 523 lvx $inptail,$idx,$ivp 524 le?vxor $inpperm,$inpperm,$tmp 525 vperm $ivec,$ivec,$inptail,$inpperm 526 527 neg r11,$inp 528 ?lvsl $keyperm,0,$key # prepare for unaligned key 529 lwz $rounds,240($key) 530 531 lvsr $inpperm,0,r11 # prepare for unaligned load 532 lvx $inptail,0,$inp 533 addi $inp,$inp,15 # 15 is not typo 534 le?vxor $inpperm,$inpperm,$tmp 535 536 ?lvsr $outperm,0,$out # prepare for unaligned store 537 vspltisb $outmask,-1 538 lvx $outhead,0,$out 539 ?vperm $outmask,$rndkey0,$outmask,$outperm 540 le?vxor $outperm,$outperm,$tmp 541 542 srwi $rounds,$rounds,1 543 li $idx,16 544 subi $rounds,$rounds,1 545 beq Lcbc_dec 546 547Lcbc_enc: 548 vmr $inout,$inptail 549 lvx $inptail,0,$inp 550 addi $inp,$inp,16 551 mtctr $rounds 552 subi $len,$len,16 # len-=16 553 554 lvx $rndkey0,0,$key 555 vperm $inout,$inout,$inptail,$inpperm 556 lvx $rndkey1,$idx,$key 557 addi $idx,$idx,16 558 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 559 vxor $inout,$inout,$rndkey0 560 lvx $rndkey0,$idx,$key 561 addi $idx,$idx,16 562 vxor $inout,$inout,$ivec 563 564Loop_cbc_enc: 565 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 566 vcipher $inout,$inout,$rndkey1 567 lvx $rndkey1,$idx,$key 568 addi $idx,$idx,16 569 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 570 vcipher $inout,$inout,$rndkey0 571 lvx $rndkey0,$idx,$key 572 addi $idx,$idx,16 573 bdnz Loop_cbc_enc 574 575 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 576 vcipher $inout,$inout,$rndkey1 577 lvx $rndkey1,$idx,$key 578 li $idx,16 579 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 580 vcipherlast $ivec,$inout,$rndkey0 581 ${UCMP}i $len,16 582 583 vperm $tmp,$ivec,$ivec,$outperm 584 vsel $inout,$outhead,$tmp,$outmask 585 vmr $outhead,$tmp 586 stvx $inout,0,$out 587 addi $out,$out,16 588 bge Lcbc_enc 589 590 b Lcbc_done 591 592.align 4 593Lcbc_dec: 594 ${UCMP}i $len,128 595 bge _aesp8_cbc_decrypt8x 596 vmr $tmp,$inptail 597 lvx $inptail,0,$inp 598 addi $inp,$inp,16 599 mtctr $rounds 600 subi $len,$len,16 # len-=16 601 602 lvx $rndkey0,0,$key 603 vperm $tmp,$tmp,$inptail,$inpperm 604 lvx $rndkey1,$idx,$key 605 addi $idx,$idx,16 606 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 607 vxor $inout,$tmp,$rndkey0 608 lvx $rndkey0,$idx,$key 609 addi $idx,$idx,16 610 611Loop_cbc_dec: 612 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 613 vncipher $inout,$inout,$rndkey1 614 lvx $rndkey1,$idx,$key 615 addi $idx,$idx,16 616 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 617 vncipher $inout,$inout,$rndkey0 618 lvx $rndkey0,$idx,$key 619 addi $idx,$idx,16 620 bdnz Loop_cbc_dec 621 622 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 623 vncipher $inout,$inout,$rndkey1 624 lvx $rndkey1,$idx,$key 625 li $idx,16 626 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 627 vncipherlast $inout,$inout,$rndkey0 628 ${UCMP}i $len,16 629 630 vxor $inout,$inout,$ivec 631 vmr $ivec,$tmp 632 vperm $tmp,$inout,$inout,$outperm 633 vsel $inout,$outhead,$tmp,$outmask 634 vmr $outhead,$tmp 635 stvx $inout,0,$out 636 addi $out,$out,16 637 bge Lcbc_dec 638 639Lcbc_done: 640 addi $out,$out,-1 641 lvx $inout,0,$out # redundant in aligned case 642 vsel $inout,$outhead,$inout,$outmask 643 stvx $inout,0,$out 644 645 neg $enc,$ivp # write [unaligned] iv 646 li $idx,15 # 15 is not typo 647 vxor $rndkey0,$rndkey0,$rndkey0 648 vspltisb $outmask,-1 649 le?vspltisb $tmp,0x0f 650 ?lvsl $outperm,0,$enc 651 ?vperm $outmask,$rndkey0,$outmask,$outperm 652 le?vxor $outperm,$outperm,$tmp 653 lvx $outhead,0,$ivp 654 vperm $ivec,$ivec,$ivec,$outperm 655 vsel $inout,$outhead,$ivec,$outmask 656 lvx $inptail,$idx,$ivp 657 stvx $inout,0,$ivp 658 vsel $inout,$ivec,$inptail,$outmask 659 stvx $inout,$idx,$ivp 660 661 mtspr 256,$vrsave 662 blr 663 .long 0 664 .byte 0,12,0x14,0,0,0,6,0 665 .long 0 666___ 667######################################################################### 668{{ # Optimized CBC decrypt procedure # 669my $key_="r11"; 670my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31)); 671 $x00=0 if ($flavour =~ /osx/); 672my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13)); 673my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21)); 674my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys 675 # v26-v31 last 6 round keys 676my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment 677 678$code.=<<___; 679.align 5 680_aesp8_cbc_decrypt8x: 681 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) 682 li r10,`$FRAME+8*16+15` 683 li r11,`$FRAME+8*16+31` 684 stvx v20,r10,$sp # ABI says so 685 addi r10,r10,32 686 stvx v21,r11,$sp 687 addi r11,r11,32 688 stvx v22,r10,$sp 689 addi r10,r10,32 690 stvx v23,r11,$sp 691 addi r11,r11,32 692 stvx v24,r10,$sp 693 addi r10,r10,32 694 stvx v25,r11,$sp 695 addi r11,r11,32 696 stvx v26,r10,$sp 697 addi r10,r10,32 698 stvx v27,r11,$sp 699 addi r11,r11,32 700 stvx v28,r10,$sp 701 addi r10,r10,32 702 stvx v29,r11,$sp 703 addi r11,r11,32 704 stvx v30,r10,$sp 705 stvx v31,r11,$sp 706 li r0,-1 707 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave 708 li $x10,0x10 709 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) 710 li $x20,0x20 711 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) 712 li $x30,0x30 713 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) 714 li $x40,0x40 715 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) 716 li $x50,0x50 717 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) 718 li $x60,0x60 719 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) 720 li $x70,0x70 721 mtspr 256,r0 722 723 subi $rounds,$rounds,3 # -4 in total 724 subi $len,$len,128 # bias 725 726 lvx $rndkey0,$x00,$key # load key schedule 727 lvx v30,$x10,$key 728 addi $key,$key,0x20 729 lvx v31,$x00,$key 730 ?vperm $rndkey0,$rndkey0,v30,$keyperm 731 addi $key_,$sp,$FRAME+15 732 mtctr $rounds 733 734Load_cbc_dec_key: 735 ?vperm v24,v30,v31,$keyperm 736 lvx v30,$x10,$key 737 addi $key,$key,0x20 738 stvx v24,$x00,$key_ # off-load round[1] 739 ?vperm v25,v31,v30,$keyperm 740 lvx v31,$x00,$key 741 stvx v25,$x10,$key_ # off-load round[2] 742 addi $key_,$key_,0x20 743 bdnz Load_cbc_dec_key 744 745 lvx v26,$x10,$key 746 ?vperm v24,v30,v31,$keyperm 747 lvx v27,$x20,$key 748 stvx v24,$x00,$key_ # off-load round[3] 749 ?vperm v25,v31,v26,$keyperm 750 lvx v28,$x30,$key 751 stvx v25,$x10,$key_ # off-load round[4] 752 addi $key_,$sp,$FRAME+15 # rewind $key_ 753 ?vperm v26,v26,v27,$keyperm 754 lvx v29,$x40,$key 755 ?vperm v27,v27,v28,$keyperm 756 lvx v30,$x50,$key 757 ?vperm v28,v28,v29,$keyperm 758 lvx v31,$x60,$key 759 ?vperm v29,v29,v30,$keyperm 760 lvx $out0,$x70,$key # borrow $out0 761 ?vperm v30,v30,v31,$keyperm 762 lvx v24,$x00,$key_ # pre-load round[1] 763 ?vperm v31,v31,$out0,$keyperm 764 lvx v25,$x10,$key_ # pre-load round[2] 765 766 #lvx $inptail,0,$inp # "caller" already did this 767 #addi $inp,$inp,15 # 15 is not typo 768 subi $inp,$inp,15 # undo "caller" 769 770 le?li $idx,8 771 lvx_u $in0,$x00,$inp # load first 8 "words" 772 le?lvsl $inpperm,0,$idx 773 le?vspltisb $tmp,0x0f 774 lvx_u $in1,$x10,$inp 775 le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u 776 lvx_u $in2,$x20,$inp 777 le?vperm $in0,$in0,$in0,$inpperm 778 lvx_u $in3,$x30,$inp 779 le?vperm $in1,$in1,$in1,$inpperm 780 lvx_u $in4,$x40,$inp 781 le?vperm $in2,$in2,$in2,$inpperm 782 vxor $out0,$in0,$rndkey0 783 lvx_u $in5,$x50,$inp 784 le?vperm $in3,$in3,$in3,$inpperm 785 vxor $out1,$in1,$rndkey0 786 lvx_u $in6,$x60,$inp 787 le?vperm $in4,$in4,$in4,$inpperm 788 vxor $out2,$in2,$rndkey0 789 lvx_u $in7,$x70,$inp 790 addi $inp,$inp,0x80 791 le?vperm $in5,$in5,$in5,$inpperm 792 vxor $out3,$in3,$rndkey0 793 le?vperm $in6,$in6,$in6,$inpperm 794 vxor $out4,$in4,$rndkey0 795 le?vperm $in7,$in7,$in7,$inpperm 796 vxor $out5,$in5,$rndkey0 797 vxor $out6,$in6,$rndkey0 798 vxor $out7,$in7,$rndkey0 799 800 mtctr $rounds 801 b Loop_cbc_dec8x 802.align 5 803Loop_cbc_dec8x: 804 vncipher $out0,$out0,v24 805 vncipher $out1,$out1,v24 806 vncipher $out2,$out2,v24 807 vncipher $out3,$out3,v24 808 vncipher $out4,$out4,v24 809 vncipher $out5,$out5,v24 810 vncipher $out6,$out6,v24 811 vncipher $out7,$out7,v24 812 lvx v24,$x20,$key_ # round[3] 813 addi $key_,$key_,0x20 814 815 vncipher $out0,$out0,v25 816 vncipher $out1,$out1,v25 817 vncipher $out2,$out2,v25 818 vncipher $out3,$out3,v25 819 vncipher $out4,$out4,v25 820 vncipher $out5,$out5,v25 821 vncipher $out6,$out6,v25 822 vncipher $out7,$out7,v25 823 lvx v25,$x10,$key_ # round[4] 824 bdnz Loop_cbc_dec8x 825 826 subic $len,$len,128 # $len-=128 827 vncipher $out0,$out0,v24 828 vncipher $out1,$out1,v24 829 vncipher $out2,$out2,v24 830 vncipher $out3,$out3,v24 831 vncipher $out4,$out4,v24 832 vncipher $out5,$out5,v24 833 vncipher $out6,$out6,v24 834 vncipher $out7,$out7,v24 835 836 subfe. r0,r0,r0 # borrow?-1:0 837 vncipher $out0,$out0,v25 838 vncipher $out1,$out1,v25 839 vncipher $out2,$out2,v25 840 vncipher $out3,$out3,v25 841 vncipher $out4,$out4,v25 842 vncipher $out5,$out5,v25 843 vncipher $out6,$out6,v25 844 vncipher $out7,$out7,v25 845 846 and r0,r0,$len 847 vncipher $out0,$out0,v26 848 vncipher $out1,$out1,v26 849 vncipher $out2,$out2,v26 850 vncipher $out3,$out3,v26 851 vncipher $out4,$out4,v26 852 vncipher $out5,$out5,v26 853 vncipher $out6,$out6,v26 854 vncipher $out7,$out7,v26 855 856 add $inp,$inp,r0 # $inp is adjusted in such 857 # way that at exit from the 858 # loop inX-in7 are loaded 859 # with last "words" 860 vncipher $out0,$out0,v27 861 vncipher $out1,$out1,v27 862 vncipher $out2,$out2,v27 863 vncipher $out3,$out3,v27 864 vncipher $out4,$out4,v27 865 vncipher $out5,$out5,v27 866 vncipher $out6,$out6,v27 867 vncipher $out7,$out7,v27 868 869 addi $key_,$sp,$FRAME+15 # rewind $key_ 870 vncipher $out0,$out0,v28 871 vncipher $out1,$out1,v28 872 vncipher $out2,$out2,v28 873 vncipher $out3,$out3,v28 874 vncipher $out4,$out4,v28 875 vncipher $out5,$out5,v28 876 vncipher $out6,$out6,v28 877 vncipher $out7,$out7,v28 878 lvx v24,$x00,$key_ # re-pre-load round[1] 879 880 vncipher $out0,$out0,v29 881 vncipher $out1,$out1,v29 882 vncipher $out2,$out2,v29 883 vncipher $out3,$out3,v29 884 vncipher $out4,$out4,v29 885 vncipher $out5,$out5,v29 886 vncipher $out6,$out6,v29 887 vncipher $out7,$out7,v29 888 lvx v25,$x10,$key_ # re-pre-load round[2] 889 890 vncipher $out0,$out0,v30 891 vxor $ivec,$ivec,v31 # xor with last round key 892 vncipher $out1,$out1,v30 893 vxor $in0,$in0,v31 894 vncipher $out2,$out2,v30 895 vxor $in1,$in1,v31 896 vncipher $out3,$out3,v30 897 vxor $in2,$in2,v31 898 vncipher $out4,$out4,v30 899 vxor $in3,$in3,v31 900 vncipher $out5,$out5,v30 901 vxor $in4,$in4,v31 902 vncipher $out6,$out6,v30 903 vxor $in5,$in5,v31 904 vncipher $out7,$out7,v30 905 vxor $in6,$in6,v31 906 907 vncipherlast $out0,$out0,$ivec 908 vncipherlast $out1,$out1,$in0 909 lvx_u $in0,$x00,$inp # load next input block 910 vncipherlast $out2,$out2,$in1 911 lvx_u $in1,$x10,$inp 912 vncipherlast $out3,$out3,$in2 913 le?vperm $in0,$in0,$in0,$inpperm 914 lvx_u $in2,$x20,$inp 915 vncipherlast $out4,$out4,$in3 916 le?vperm $in1,$in1,$in1,$inpperm 917 lvx_u $in3,$x30,$inp 918 vncipherlast $out5,$out5,$in4 919 le?vperm $in2,$in2,$in2,$inpperm 920 lvx_u $in4,$x40,$inp 921 vncipherlast $out6,$out6,$in5 922 le?vperm $in3,$in3,$in3,$inpperm 923 lvx_u $in5,$x50,$inp 924 vncipherlast $out7,$out7,$in6 925 le?vperm $in4,$in4,$in4,$inpperm 926 lvx_u $in6,$x60,$inp 927 vmr $ivec,$in7 928 le?vperm $in5,$in5,$in5,$inpperm 929 lvx_u $in7,$x70,$inp 930 addi $inp,$inp,0x80 931 932 le?vperm $out0,$out0,$out0,$inpperm 933 le?vperm $out1,$out1,$out1,$inpperm 934 stvx_u $out0,$x00,$out 935 le?vperm $in6,$in6,$in6,$inpperm 936 vxor $out0,$in0,$rndkey0 937 le?vperm $out2,$out2,$out2,$inpperm 938 stvx_u $out1,$x10,$out 939 le?vperm $in7,$in7,$in7,$inpperm 940 vxor $out1,$in1,$rndkey0 941 le?vperm $out3,$out3,$out3,$inpperm 942 stvx_u $out2,$x20,$out 943 vxor $out2,$in2,$rndkey0 944 le?vperm $out4,$out4,$out4,$inpperm 945 stvx_u $out3,$x30,$out 946 vxor $out3,$in3,$rndkey0 947 le?vperm $out5,$out5,$out5,$inpperm 948 stvx_u $out4,$x40,$out 949 vxor $out4,$in4,$rndkey0 950 le?vperm $out6,$out6,$out6,$inpperm 951 stvx_u $out5,$x50,$out 952 vxor $out5,$in5,$rndkey0 953 le?vperm $out7,$out7,$out7,$inpperm 954 stvx_u $out6,$x60,$out 955 vxor $out6,$in6,$rndkey0 956 stvx_u $out7,$x70,$out 957 addi $out,$out,0x80 958 vxor $out7,$in7,$rndkey0 959 960 mtctr $rounds 961 beq Loop_cbc_dec8x # did $len-=128 borrow? 962 963 addic. $len,$len,128 964 beq Lcbc_dec8x_done 965 nop 966 nop 967 968Loop_cbc_dec8x_tail: # up to 7 "words" tail... 969 vncipher $out1,$out1,v24 970 vncipher $out2,$out2,v24 971 vncipher $out3,$out3,v24 972 vncipher $out4,$out4,v24 973 vncipher $out5,$out5,v24 974 vncipher $out6,$out6,v24 975 vncipher $out7,$out7,v24 976 lvx v24,$x20,$key_ # round[3] 977 addi $key_,$key_,0x20 978 979 vncipher $out1,$out1,v25 980 vncipher $out2,$out2,v25 981 vncipher $out3,$out3,v25 982 vncipher $out4,$out4,v25 983 vncipher $out5,$out5,v25 984 vncipher $out6,$out6,v25 985 vncipher $out7,$out7,v25 986 lvx v25,$x10,$key_ # round[4] 987 bdnz Loop_cbc_dec8x_tail 988 989 vncipher $out1,$out1,v24 990 vncipher $out2,$out2,v24 991 vncipher $out3,$out3,v24 992 vncipher $out4,$out4,v24 993 vncipher $out5,$out5,v24 994 vncipher $out6,$out6,v24 995 vncipher $out7,$out7,v24 996 997 vncipher $out1,$out1,v25 998 vncipher $out2,$out2,v25 999 vncipher $out3,$out3,v25 1000 vncipher $out4,$out4,v25 1001 vncipher $out5,$out5,v25 1002 vncipher $out6,$out6,v25 1003 vncipher $out7,$out7,v25 1004 1005 vncipher $out1,$out1,v26 1006 vncipher $out2,$out2,v26 1007 vncipher $out3,$out3,v26 1008 vncipher $out4,$out4,v26 1009 vncipher $out5,$out5,v26 1010 vncipher $out6,$out6,v26 1011 vncipher $out7,$out7,v26 1012 1013 vncipher $out1,$out1,v27 1014 vncipher $out2,$out2,v27 1015 vncipher $out3,$out3,v27 1016 vncipher $out4,$out4,v27 1017 vncipher $out5,$out5,v27 1018 vncipher $out6,$out6,v27 1019 vncipher $out7,$out7,v27 1020 1021 vncipher $out1,$out1,v28 1022 vncipher $out2,$out2,v28 1023 vncipher $out3,$out3,v28 1024 vncipher $out4,$out4,v28 1025 vncipher $out5,$out5,v28 1026 vncipher $out6,$out6,v28 1027 vncipher $out7,$out7,v28 1028 1029 vncipher $out1,$out1,v29 1030 vncipher $out2,$out2,v29 1031 vncipher $out3,$out3,v29 1032 vncipher $out4,$out4,v29 1033 vncipher $out5,$out5,v29 1034 vncipher $out6,$out6,v29 1035 vncipher $out7,$out7,v29 1036 1037 vncipher $out1,$out1,v30 1038 vxor $ivec,$ivec,v31 # last round key 1039 vncipher $out2,$out2,v30 1040 vxor $in1,$in1,v31 1041 vncipher $out3,$out3,v30 1042 vxor $in2,$in2,v31 1043 vncipher $out4,$out4,v30 1044 vxor $in3,$in3,v31 1045 vncipher $out5,$out5,v30 1046 vxor $in4,$in4,v31 1047 vncipher $out6,$out6,v30 1048 vxor $in5,$in5,v31 1049 vncipher $out7,$out7,v30 1050 vxor $in6,$in6,v31 1051 1052 cmplwi $len,32 # switch($len) 1053 blt Lcbc_dec8x_one 1054 nop 1055 beq Lcbc_dec8x_two 1056 cmplwi $len,64 1057 blt Lcbc_dec8x_three 1058 nop 1059 beq Lcbc_dec8x_four 1060 cmplwi $len,96 1061 blt Lcbc_dec8x_five 1062 nop 1063 beq Lcbc_dec8x_six 1064 1065Lcbc_dec8x_seven: 1066 vncipherlast $out1,$out1,$ivec 1067 vncipherlast $out2,$out2,$in1 1068 vncipherlast $out3,$out3,$in2 1069 vncipherlast $out4,$out4,$in3 1070 vncipherlast $out5,$out5,$in4 1071 vncipherlast $out6,$out6,$in5 1072 vncipherlast $out7,$out7,$in6 1073 vmr $ivec,$in7 1074 1075 le?vperm $out1,$out1,$out1,$inpperm 1076 le?vperm $out2,$out2,$out2,$inpperm 1077 stvx_u $out1,$x00,$out 1078 le?vperm $out3,$out3,$out3,$inpperm 1079 stvx_u $out2,$x10,$out 1080 le?vperm $out4,$out4,$out4,$inpperm 1081 stvx_u $out3,$x20,$out 1082 le?vperm $out5,$out5,$out5,$inpperm 1083 stvx_u $out4,$x30,$out 1084 le?vperm $out6,$out6,$out6,$inpperm 1085 stvx_u $out5,$x40,$out 1086 le?vperm $out7,$out7,$out7,$inpperm 1087 stvx_u $out6,$x50,$out 1088 stvx_u $out7,$x60,$out 1089 addi $out,$out,0x70 1090 b Lcbc_dec8x_done 1091 1092.align 5 1093Lcbc_dec8x_six: 1094 vncipherlast $out2,$out2,$ivec 1095 vncipherlast $out3,$out3,$in2 1096 vncipherlast $out4,$out4,$in3 1097 vncipherlast $out5,$out5,$in4 1098 vncipherlast $out6,$out6,$in5 1099 vncipherlast $out7,$out7,$in6 1100 vmr $ivec,$in7 1101 1102 le?vperm $out2,$out2,$out2,$inpperm 1103 le?vperm $out3,$out3,$out3,$inpperm 1104 stvx_u $out2,$x00,$out 1105 le?vperm $out4,$out4,$out4,$inpperm 1106 stvx_u $out3,$x10,$out 1107 le?vperm $out5,$out5,$out5,$inpperm 1108 stvx_u $out4,$x20,$out 1109 le?vperm $out6,$out6,$out6,$inpperm 1110 stvx_u $out5,$x30,$out 1111 le?vperm $out7,$out7,$out7,$inpperm 1112 stvx_u $out6,$x40,$out 1113 stvx_u $out7,$x50,$out 1114 addi $out,$out,0x60 1115 b Lcbc_dec8x_done 1116 1117.align 5 1118Lcbc_dec8x_five: 1119 vncipherlast $out3,$out3,$ivec 1120 vncipherlast $out4,$out4,$in3 1121 vncipherlast $out5,$out5,$in4 1122 vncipherlast $out6,$out6,$in5 1123 vncipherlast $out7,$out7,$in6 1124 vmr $ivec,$in7 1125 1126 le?vperm $out3,$out3,$out3,$inpperm 1127 le?vperm $out4,$out4,$out4,$inpperm 1128 stvx_u $out3,$x00,$out 1129 le?vperm $out5,$out5,$out5,$inpperm 1130 stvx_u $out4,$x10,$out 1131 le?vperm $out6,$out6,$out6,$inpperm 1132 stvx_u $out5,$x20,$out 1133 le?vperm $out7,$out7,$out7,$inpperm 1134 stvx_u $out6,$x30,$out 1135 stvx_u $out7,$x40,$out 1136 addi $out,$out,0x50 1137 b Lcbc_dec8x_done 1138 1139.align 5 1140Lcbc_dec8x_four: 1141 vncipherlast $out4,$out4,$ivec 1142 vncipherlast $out5,$out5,$in4 1143 vncipherlast $out6,$out6,$in5 1144 vncipherlast $out7,$out7,$in6 1145 vmr $ivec,$in7 1146 1147 le?vperm $out4,$out4,$out4,$inpperm 1148 le?vperm $out5,$out5,$out5,$inpperm 1149 stvx_u $out4,$x00,$out 1150 le?vperm $out6,$out6,$out6,$inpperm 1151 stvx_u $out5,$x10,$out 1152 le?vperm $out7,$out7,$out7,$inpperm 1153 stvx_u $out6,$x20,$out 1154 stvx_u $out7,$x30,$out 1155 addi $out,$out,0x40 1156 b Lcbc_dec8x_done 1157 1158.align 5 1159Lcbc_dec8x_three: 1160 vncipherlast $out5,$out5,$ivec 1161 vncipherlast $out6,$out6,$in5 1162 vncipherlast $out7,$out7,$in6 1163 vmr $ivec,$in7 1164 1165 le?vperm $out5,$out5,$out5,$inpperm 1166 le?vperm $out6,$out6,$out6,$inpperm 1167 stvx_u $out5,$x00,$out 1168 le?vperm $out7,$out7,$out7,$inpperm 1169 stvx_u $out6,$x10,$out 1170 stvx_u $out7,$x20,$out 1171 addi $out,$out,0x30 1172 b Lcbc_dec8x_done 1173 1174.align 5 1175Lcbc_dec8x_two: 1176 vncipherlast $out6,$out6,$ivec 1177 vncipherlast $out7,$out7,$in6 1178 vmr $ivec,$in7 1179 1180 le?vperm $out6,$out6,$out6,$inpperm 1181 le?vperm $out7,$out7,$out7,$inpperm 1182 stvx_u $out6,$x00,$out 1183 stvx_u $out7,$x10,$out 1184 addi $out,$out,0x20 1185 b Lcbc_dec8x_done 1186 1187.align 5 1188Lcbc_dec8x_one: 1189 vncipherlast $out7,$out7,$ivec 1190 vmr $ivec,$in7 1191 1192 le?vperm $out7,$out7,$out7,$inpperm 1193 stvx_u $out7,0,$out 1194 addi $out,$out,0x10 1195 1196Lcbc_dec8x_done: 1197 le?vperm $ivec,$ivec,$ivec,$inpperm 1198 stvx_u $ivec,0,$ivp # write [unaligned] iv 1199 1200 li r10,`$FRAME+15` 1201 li r11,`$FRAME+31` 1202 stvx $inpperm,r10,$sp # wipe copies of round keys 1203 addi r10,r10,32 1204 stvx $inpperm,r11,$sp 1205 addi r11,r11,32 1206 stvx $inpperm,r10,$sp 1207 addi r10,r10,32 1208 stvx $inpperm,r11,$sp 1209 addi r11,r11,32 1210 stvx $inpperm,r10,$sp 1211 addi r10,r10,32 1212 stvx $inpperm,r11,$sp 1213 addi r11,r11,32 1214 stvx $inpperm,r10,$sp 1215 addi r10,r10,32 1216 stvx $inpperm,r11,$sp 1217 addi r11,r11,32 1218 1219 mtspr 256,$vrsave 1220 lvx v20,r10,$sp # ABI says so 1221 addi r10,r10,32 1222 lvx v21,r11,$sp 1223 addi r11,r11,32 1224 lvx v22,r10,$sp 1225 addi r10,r10,32 1226 lvx v23,r11,$sp 1227 addi r11,r11,32 1228 lvx v24,r10,$sp 1229 addi r10,r10,32 1230 lvx v25,r11,$sp 1231 addi r11,r11,32 1232 lvx v26,r10,$sp 1233 addi r10,r10,32 1234 lvx v27,r11,$sp 1235 addi r11,r11,32 1236 lvx v28,r10,$sp 1237 addi r10,r10,32 1238 lvx v29,r11,$sp 1239 addi r11,r11,32 1240 lvx v30,r10,$sp 1241 lvx v31,r11,$sp 1242 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) 1243 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) 1244 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) 1245 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) 1246 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) 1247 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) 1248 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` 1249 blr 1250 .long 0 1251 .byte 0,12,0x04,0,0x80,6,6,0 1252 .long 0 1253.size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt 1254___ 1255}} }}} 1256 1257######################################################################### 1258{{{ # CTR procedure[s] # 1259my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10)); 1260my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3)); 1261my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)= 1262 map("v$_",(4..11)); 1263my $dat=$tmp; 1264 1265$code.=<<___; 1266.globl .${prefix}_ctr32_encrypt_blocks 1267.align 5 1268.${prefix}_ctr32_encrypt_blocks: 1269 ${UCMP}i $len,1 1270 bltlr- 1271 1272 lis r0,0xfff0 1273 mfspr $vrsave,256 1274 mtspr 256,r0 1275 1276 li $idx,15 1277 vxor $rndkey0,$rndkey0,$rndkey0 1278 le?vspltisb $tmp,0x0f 1279 1280 lvx $ivec,0,$ivp # load [unaligned] iv 1281 lvsl $inpperm,0,$ivp 1282 lvx $inptail,$idx,$ivp 1283 vspltisb $one,1 1284 le?vxor $inpperm,$inpperm,$tmp 1285 vperm $ivec,$ivec,$inptail,$inpperm 1286 vsldoi $one,$rndkey0,$one,1 1287 1288 neg r11,$inp 1289 ?lvsl $keyperm,0,$key # prepare for unaligned key 1290 lwz $rounds,240($key) 1291 1292 lvsr $inpperm,0,r11 # prepare for unaligned load 1293 lvx $inptail,0,$inp 1294 addi $inp,$inp,15 # 15 is not typo 1295 le?vxor $inpperm,$inpperm,$tmp 1296 1297 srwi $rounds,$rounds,1 1298 li $idx,16 1299 subi $rounds,$rounds,1 1300 1301 ${UCMP}i $len,8 1302 bge _aesp8_ctr32_encrypt8x 1303 1304 ?lvsr $outperm,0,$out # prepare for unaligned store 1305 vspltisb $outmask,-1 1306 lvx $outhead,0,$out 1307 ?vperm $outmask,$rndkey0,$outmask,$outperm 1308 le?vxor $outperm,$outperm,$tmp 1309 1310 lvx $rndkey0,0,$key 1311 mtctr $rounds 1312 lvx $rndkey1,$idx,$key 1313 addi $idx,$idx,16 1314 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 1315 vxor $inout,$ivec,$rndkey0 1316 lvx $rndkey0,$idx,$key 1317 addi $idx,$idx,16 1318 b Loop_ctr32_enc 1319 1320.align 5 1321Loop_ctr32_enc: 1322 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 1323 vcipher $inout,$inout,$rndkey1 1324 lvx $rndkey1,$idx,$key 1325 addi $idx,$idx,16 1326 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 1327 vcipher $inout,$inout,$rndkey0 1328 lvx $rndkey0,$idx,$key 1329 addi $idx,$idx,16 1330 bdnz Loop_ctr32_enc 1331 1332 vadduwm $ivec,$ivec,$one 1333 vmr $dat,$inptail 1334 lvx $inptail,0,$inp 1335 addi $inp,$inp,16 1336 subic. $len,$len,1 # blocks-- 1337 1338 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 1339 vcipher $inout,$inout,$rndkey1 1340 lvx $rndkey1,$idx,$key 1341 vperm $dat,$dat,$inptail,$inpperm 1342 li $idx,16 1343 ?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm 1344 lvx $rndkey0,0,$key 1345 vxor $dat,$dat,$rndkey1 # last round key 1346 vcipherlast $inout,$inout,$dat 1347 1348 lvx $rndkey1,$idx,$key 1349 addi $idx,$idx,16 1350 vperm $inout,$inout,$inout,$outperm 1351 vsel $dat,$outhead,$inout,$outmask 1352 mtctr $rounds 1353 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 1354 vmr $outhead,$inout 1355 vxor $inout,$ivec,$rndkey0 1356 lvx $rndkey0,$idx,$key 1357 addi $idx,$idx,16 1358 stvx $dat,0,$out 1359 addi $out,$out,16 1360 bne Loop_ctr32_enc 1361 1362 addi $out,$out,-1 1363 lvx $inout,0,$out # redundant in aligned case 1364 vsel $inout,$outhead,$inout,$outmask 1365 stvx $inout,0,$out 1366 1367 mtspr 256,$vrsave 1368 blr 1369 .long 0 1370 .byte 0,12,0x14,0,0,0,6,0 1371 .long 0 1372___ 1373######################################################################### 1374{{ # Optimized CTR procedure # 1375my $key_="r11"; 1376my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31)); 1377 $x00=0 if ($flavour =~ /osx/); 1378my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14)); 1379my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22)); 1380my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys 1381 # v26-v31 last 6 round keys 1382my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment 1383my ($two,$three,$four)=($outhead,$outperm,$outmask); 1384 1385$code.=<<___; 1386.align 5 1387_aesp8_ctr32_encrypt8x: 1388 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) 1389 li r10,`$FRAME+8*16+15` 1390 li r11,`$FRAME+8*16+31` 1391 stvx v20,r10,$sp # ABI says so 1392 addi r10,r10,32 1393 stvx v21,r11,$sp 1394 addi r11,r11,32 1395 stvx v22,r10,$sp 1396 addi r10,r10,32 1397 stvx v23,r11,$sp 1398 addi r11,r11,32 1399 stvx v24,r10,$sp 1400 addi r10,r10,32 1401 stvx v25,r11,$sp 1402 addi r11,r11,32 1403 stvx v26,r10,$sp 1404 addi r10,r10,32 1405 stvx v27,r11,$sp 1406 addi r11,r11,32 1407 stvx v28,r10,$sp 1408 addi r10,r10,32 1409 stvx v29,r11,$sp 1410 addi r11,r11,32 1411 stvx v30,r10,$sp 1412 stvx v31,r11,$sp 1413 li r0,-1 1414 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave 1415 li $x10,0x10 1416 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) 1417 li $x20,0x20 1418 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) 1419 li $x30,0x30 1420 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) 1421 li $x40,0x40 1422 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) 1423 li $x50,0x50 1424 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) 1425 li $x60,0x60 1426 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) 1427 li $x70,0x70 1428 mtspr 256,r0 1429 1430 subi $rounds,$rounds,3 # -4 in total 1431 1432 lvx $rndkey0,$x00,$key # load key schedule 1433 lvx v30,$x10,$key 1434 addi $key,$key,0x20 1435 lvx v31,$x00,$key 1436 ?vperm $rndkey0,$rndkey0,v30,$keyperm 1437 addi $key_,$sp,$FRAME+15 1438 mtctr $rounds 1439 1440Load_ctr32_enc_key: 1441 ?vperm v24,v30,v31,$keyperm 1442 lvx v30,$x10,$key 1443 addi $key,$key,0x20 1444 stvx v24,$x00,$key_ # off-load round[1] 1445 ?vperm v25,v31,v30,$keyperm 1446 lvx v31,$x00,$key 1447 stvx v25,$x10,$key_ # off-load round[2] 1448 addi $key_,$key_,0x20 1449 bdnz Load_ctr32_enc_key 1450 1451 lvx v26,$x10,$key 1452 ?vperm v24,v30,v31,$keyperm 1453 lvx v27,$x20,$key 1454 stvx v24,$x00,$key_ # off-load round[3] 1455 ?vperm v25,v31,v26,$keyperm 1456 lvx v28,$x30,$key 1457 stvx v25,$x10,$key_ # off-load round[4] 1458 addi $key_,$sp,$FRAME+15 # rewind $key_ 1459 ?vperm v26,v26,v27,$keyperm 1460 lvx v29,$x40,$key 1461 ?vperm v27,v27,v28,$keyperm 1462 lvx v30,$x50,$key 1463 ?vperm v28,v28,v29,$keyperm 1464 lvx v31,$x60,$key 1465 ?vperm v29,v29,v30,$keyperm 1466 lvx $out0,$x70,$key # borrow $out0 1467 ?vperm v30,v30,v31,$keyperm 1468 lvx v24,$x00,$key_ # pre-load round[1] 1469 ?vperm v31,v31,$out0,$keyperm 1470 lvx v25,$x10,$key_ # pre-load round[2] 1471 1472 vadduwm $two,$one,$one 1473 subi $inp,$inp,15 # undo "caller" 1474 $SHL $len,$len,4 1475 1476 vadduwm $out1,$ivec,$one # counter values ... 1477 vadduwm $out2,$ivec,$two 1478 vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0] 1479 le?li $idx,8 1480 vadduwm $out3,$out1,$two 1481 vxor $out1,$out1,$rndkey0 1482 le?lvsl $inpperm,0,$idx 1483 vadduwm $out4,$out2,$two 1484 vxor $out2,$out2,$rndkey0 1485 le?vspltisb $tmp,0x0f 1486 vadduwm $out5,$out3,$two 1487 vxor $out3,$out3,$rndkey0 1488 le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u 1489 vadduwm $out6,$out4,$two 1490 vxor $out4,$out4,$rndkey0 1491 vadduwm $out7,$out5,$two 1492 vxor $out5,$out5,$rndkey0 1493 vadduwm $ivec,$out6,$two # next counter value 1494 vxor $out6,$out6,$rndkey0 1495 vxor $out7,$out7,$rndkey0 1496 1497 mtctr $rounds 1498 b Loop_ctr32_enc8x 1499.align 5 1500Loop_ctr32_enc8x: 1501 vcipher $out0,$out0,v24 1502 vcipher $out1,$out1,v24 1503 vcipher $out2,$out2,v24 1504 vcipher $out3,$out3,v24 1505 vcipher $out4,$out4,v24 1506 vcipher $out5,$out5,v24 1507 vcipher $out6,$out6,v24 1508 vcipher $out7,$out7,v24 1509Loop_ctr32_enc8x_middle: 1510 lvx v24,$x20,$key_ # round[3] 1511 addi $key_,$key_,0x20 1512 1513 vcipher $out0,$out0,v25 1514 vcipher $out1,$out1,v25 1515 vcipher $out2,$out2,v25 1516 vcipher $out3,$out3,v25 1517 vcipher $out4,$out4,v25 1518 vcipher $out5,$out5,v25 1519 vcipher $out6,$out6,v25 1520 vcipher $out7,$out7,v25 1521 lvx v25,$x10,$key_ # round[4] 1522 bdnz Loop_ctr32_enc8x 1523 1524 subic r11,$len,256 # $len-256, borrow $key_ 1525 vcipher $out0,$out0,v24 1526 vcipher $out1,$out1,v24 1527 vcipher $out2,$out2,v24 1528 vcipher $out3,$out3,v24 1529 vcipher $out4,$out4,v24 1530 vcipher $out5,$out5,v24 1531 vcipher $out6,$out6,v24 1532 vcipher $out7,$out7,v24 1533 1534 subfe r0,r0,r0 # borrow?-1:0 1535 vcipher $out0,$out0,v25 1536 vcipher $out1,$out1,v25 1537 vcipher $out2,$out2,v25 1538 vcipher $out3,$out3,v25 1539 vcipher $out4,$out4,v25 1540 vcipher $out5,$out5,v25 1541 vcipher $out6,$out6,v25 1542 vcipher $out7,$out7,v25 1543 1544 and r0,r0,r11 1545 addi $key_,$sp,$FRAME+15 # rewind $key_ 1546 vcipher $out0,$out0,v26 1547 vcipher $out1,$out1,v26 1548 vcipher $out2,$out2,v26 1549 vcipher $out3,$out3,v26 1550 vcipher $out4,$out4,v26 1551 vcipher $out5,$out5,v26 1552 vcipher $out6,$out6,v26 1553 vcipher $out7,$out7,v26 1554 lvx v24,$x00,$key_ # re-pre-load round[1] 1555 1556 subic $len,$len,129 # $len-=129 1557 vcipher $out0,$out0,v27 1558 addi $len,$len,1 # $len-=128 really 1559 vcipher $out1,$out1,v27 1560 vcipher $out2,$out2,v27 1561 vcipher $out3,$out3,v27 1562 vcipher $out4,$out4,v27 1563 vcipher $out5,$out5,v27 1564 vcipher $out6,$out6,v27 1565 vcipher $out7,$out7,v27 1566 lvx v25,$x10,$key_ # re-pre-load round[2] 1567 1568 vcipher $out0,$out0,v28 1569 lvx_u $in0,$x00,$inp # load input 1570 vcipher $out1,$out1,v28 1571 lvx_u $in1,$x10,$inp 1572 vcipher $out2,$out2,v28 1573 lvx_u $in2,$x20,$inp 1574 vcipher $out3,$out3,v28 1575 lvx_u $in3,$x30,$inp 1576 vcipher $out4,$out4,v28 1577 lvx_u $in4,$x40,$inp 1578 vcipher $out5,$out5,v28 1579 lvx_u $in5,$x50,$inp 1580 vcipher $out6,$out6,v28 1581 lvx_u $in6,$x60,$inp 1582 vcipher $out7,$out7,v28 1583 lvx_u $in7,$x70,$inp 1584 addi $inp,$inp,0x80 1585 1586 vcipher $out0,$out0,v29 1587 le?vperm $in0,$in0,$in0,$inpperm 1588 vcipher $out1,$out1,v29 1589 le?vperm $in1,$in1,$in1,$inpperm 1590 vcipher $out2,$out2,v29 1591 le?vperm $in2,$in2,$in2,$inpperm 1592 vcipher $out3,$out3,v29 1593 le?vperm $in3,$in3,$in3,$inpperm 1594 vcipher $out4,$out4,v29 1595 le?vperm $in4,$in4,$in4,$inpperm 1596 vcipher $out5,$out5,v29 1597 le?vperm $in5,$in5,$in5,$inpperm 1598 vcipher $out6,$out6,v29 1599 le?vperm $in6,$in6,$in6,$inpperm 1600 vcipher $out7,$out7,v29 1601 le?vperm $in7,$in7,$in7,$inpperm 1602 1603 add $inp,$inp,r0 # $inp is adjusted in such 1604 # way that at exit from the 1605 # loop inX-in7 are loaded 1606 # with last "words" 1607 subfe. r0,r0,r0 # borrow?-1:0 1608 vcipher $out0,$out0,v30 1609 vxor $in0,$in0,v31 # xor with last round key 1610 vcipher $out1,$out1,v30 1611 vxor $in1,$in1,v31 1612 vcipher $out2,$out2,v30 1613 vxor $in2,$in2,v31 1614 vcipher $out3,$out3,v30 1615 vxor $in3,$in3,v31 1616 vcipher $out4,$out4,v30 1617 vxor $in4,$in4,v31 1618 vcipher $out5,$out5,v30 1619 vxor $in5,$in5,v31 1620 vcipher $out6,$out6,v30 1621 vxor $in6,$in6,v31 1622 vcipher $out7,$out7,v30 1623 vxor $in7,$in7,v31 1624 1625 bne Lctr32_enc8x_break # did $len-129 borrow? 1626 1627 vcipherlast $in0,$out0,$in0 1628 vcipherlast $in1,$out1,$in1 1629 vadduwm $out1,$ivec,$one # counter values ... 1630 vcipherlast $in2,$out2,$in2 1631 vadduwm $out2,$ivec,$two 1632 vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0] 1633 vcipherlast $in3,$out3,$in3 1634 vadduwm $out3,$out1,$two 1635 vxor $out1,$out1,$rndkey0 1636 vcipherlast $in4,$out4,$in4 1637 vadduwm $out4,$out2,$two 1638 vxor $out2,$out2,$rndkey0 1639 vcipherlast $in5,$out5,$in5 1640 vadduwm $out5,$out3,$two 1641 vxor $out3,$out3,$rndkey0 1642 vcipherlast $in6,$out6,$in6 1643 vadduwm $out6,$out4,$two 1644 vxor $out4,$out4,$rndkey0 1645 vcipherlast $in7,$out7,$in7 1646 vadduwm $out7,$out5,$two 1647 vxor $out5,$out5,$rndkey0 1648 le?vperm $in0,$in0,$in0,$inpperm 1649 vadduwm $ivec,$out6,$two # next counter value 1650 vxor $out6,$out6,$rndkey0 1651 le?vperm $in1,$in1,$in1,$inpperm 1652 vxor $out7,$out7,$rndkey0 1653 mtctr $rounds 1654 1655 vcipher $out0,$out0,v24 1656 stvx_u $in0,$x00,$out 1657 le?vperm $in2,$in2,$in2,$inpperm 1658 vcipher $out1,$out1,v24 1659 stvx_u $in1,$x10,$out 1660 le?vperm $in3,$in3,$in3,$inpperm 1661 vcipher $out2,$out2,v24 1662 stvx_u $in2,$x20,$out 1663 le?vperm $in4,$in4,$in4,$inpperm 1664 vcipher $out3,$out3,v24 1665 stvx_u $in3,$x30,$out 1666 le?vperm $in5,$in5,$in5,$inpperm 1667 vcipher $out4,$out4,v24 1668 stvx_u $in4,$x40,$out 1669 le?vperm $in6,$in6,$in6,$inpperm 1670 vcipher $out5,$out5,v24 1671 stvx_u $in5,$x50,$out 1672 le?vperm $in7,$in7,$in7,$inpperm 1673 vcipher $out6,$out6,v24 1674 stvx_u $in6,$x60,$out 1675 vcipher $out7,$out7,v24 1676 stvx_u $in7,$x70,$out 1677 addi $out,$out,0x80 1678 1679 b Loop_ctr32_enc8x_middle 1680 1681.align 5 1682Lctr32_enc8x_break: 1683 cmpwi $len,-0x60 1684 blt Lctr32_enc8x_one 1685 nop 1686 beq Lctr32_enc8x_two 1687 cmpwi $len,-0x40 1688 blt Lctr32_enc8x_three 1689 nop 1690 beq Lctr32_enc8x_four 1691 cmpwi $len,-0x20 1692 blt Lctr32_enc8x_five 1693 nop 1694 beq Lctr32_enc8x_six 1695 cmpwi $len,0x00 1696 blt Lctr32_enc8x_seven 1697 1698Lctr32_enc8x_eight: 1699 vcipherlast $out0,$out0,$in0 1700 vcipherlast $out1,$out1,$in1 1701 vcipherlast $out2,$out2,$in2 1702 vcipherlast $out3,$out3,$in3 1703 vcipherlast $out4,$out4,$in4 1704 vcipherlast $out5,$out5,$in5 1705 vcipherlast $out6,$out6,$in6 1706 vcipherlast $out7,$out7,$in7 1707 1708 le?vperm $out0,$out0,$out0,$inpperm 1709 le?vperm $out1,$out1,$out1,$inpperm 1710 stvx_u $out0,$x00,$out 1711 le?vperm $out2,$out2,$out2,$inpperm 1712 stvx_u $out1,$x10,$out 1713 le?vperm $out3,$out3,$out3,$inpperm 1714 stvx_u $out2,$x20,$out 1715 le?vperm $out4,$out4,$out4,$inpperm 1716 stvx_u $out3,$x30,$out 1717 le?vperm $out5,$out5,$out5,$inpperm 1718 stvx_u $out4,$x40,$out 1719 le?vperm $out6,$out6,$out6,$inpperm 1720 stvx_u $out5,$x50,$out 1721 le?vperm $out7,$out7,$out7,$inpperm 1722 stvx_u $out6,$x60,$out 1723 stvx_u $out7,$x70,$out 1724 addi $out,$out,0x80 1725 b Lctr32_enc8x_done 1726 1727.align 5 1728Lctr32_enc8x_seven: 1729 vcipherlast $out0,$out0,$in1 1730 vcipherlast $out1,$out1,$in2 1731 vcipherlast $out2,$out2,$in3 1732 vcipherlast $out3,$out3,$in4 1733 vcipherlast $out4,$out4,$in5 1734 vcipherlast $out5,$out5,$in6 1735 vcipherlast $out6,$out6,$in7 1736 1737 le?vperm $out0,$out0,$out0,$inpperm 1738 le?vperm $out1,$out1,$out1,$inpperm 1739 stvx_u $out0,$x00,$out 1740 le?vperm $out2,$out2,$out2,$inpperm 1741 stvx_u $out1,$x10,$out 1742 le?vperm $out3,$out3,$out3,$inpperm 1743 stvx_u $out2,$x20,$out 1744 le?vperm $out4,$out4,$out4,$inpperm 1745 stvx_u $out3,$x30,$out 1746 le?vperm $out5,$out5,$out5,$inpperm 1747 stvx_u $out4,$x40,$out 1748 le?vperm $out6,$out6,$out6,$inpperm 1749 stvx_u $out5,$x50,$out 1750 stvx_u $out6,$x60,$out 1751 addi $out,$out,0x70 1752 b Lctr32_enc8x_done 1753 1754.align 5 1755Lctr32_enc8x_six: 1756 vcipherlast $out0,$out0,$in2 1757 vcipherlast $out1,$out1,$in3 1758 vcipherlast $out2,$out2,$in4 1759 vcipherlast $out3,$out3,$in5 1760 vcipherlast $out4,$out4,$in6 1761 vcipherlast $out5,$out5,$in7 1762 1763 le?vperm $out0,$out0,$out0,$inpperm 1764 le?vperm $out1,$out1,$out1,$inpperm 1765 stvx_u $out0,$x00,$out 1766 le?vperm $out2,$out2,$out2,$inpperm 1767 stvx_u $out1,$x10,$out 1768 le?vperm $out3,$out3,$out3,$inpperm 1769 stvx_u $out2,$x20,$out 1770 le?vperm $out4,$out4,$out4,$inpperm 1771 stvx_u $out3,$x30,$out 1772 le?vperm $out5,$out5,$out5,$inpperm 1773 stvx_u $out4,$x40,$out 1774 stvx_u $out5,$x50,$out 1775 addi $out,$out,0x60 1776 b Lctr32_enc8x_done 1777 1778.align 5 1779Lctr32_enc8x_five: 1780 vcipherlast $out0,$out0,$in3 1781 vcipherlast $out1,$out1,$in4 1782 vcipherlast $out2,$out2,$in5 1783 vcipherlast $out3,$out3,$in6 1784 vcipherlast $out4,$out4,$in7 1785 1786 le?vperm $out0,$out0,$out0,$inpperm 1787 le?vperm $out1,$out1,$out1,$inpperm 1788 stvx_u $out0,$x00,$out 1789 le?vperm $out2,$out2,$out2,$inpperm 1790 stvx_u $out1,$x10,$out 1791 le?vperm $out3,$out3,$out3,$inpperm 1792 stvx_u $out2,$x20,$out 1793 le?vperm $out4,$out4,$out4,$inpperm 1794 stvx_u $out3,$x30,$out 1795 stvx_u $out4,$x40,$out 1796 addi $out,$out,0x50 1797 b Lctr32_enc8x_done 1798 1799.align 5 1800Lctr32_enc8x_four: 1801 vcipherlast $out0,$out0,$in4 1802 vcipherlast $out1,$out1,$in5 1803 vcipherlast $out2,$out2,$in6 1804 vcipherlast $out3,$out3,$in7 1805 1806 le?vperm $out0,$out0,$out0,$inpperm 1807 le?vperm $out1,$out1,$out1,$inpperm 1808 stvx_u $out0,$x00,$out 1809 le?vperm $out2,$out2,$out2,$inpperm 1810 stvx_u $out1,$x10,$out 1811 le?vperm $out3,$out3,$out3,$inpperm 1812 stvx_u $out2,$x20,$out 1813 stvx_u $out3,$x30,$out 1814 addi $out,$out,0x40 1815 b Lctr32_enc8x_done 1816 1817.align 5 1818Lctr32_enc8x_three: 1819 vcipherlast $out0,$out0,$in5 1820 vcipherlast $out1,$out1,$in6 1821 vcipherlast $out2,$out2,$in7 1822 1823 le?vperm $out0,$out0,$out0,$inpperm 1824 le?vperm $out1,$out1,$out1,$inpperm 1825 stvx_u $out0,$x00,$out 1826 le?vperm $out2,$out2,$out2,$inpperm 1827 stvx_u $out1,$x10,$out 1828 stvx_u $out2,$x20,$out 1829 addi $out,$out,0x30 1830 b Lcbc_dec8x_done 1831 1832.align 5 1833Lctr32_enc8x_two: 1834 vcipherlast $out0,$out0,$in6 1835 vcipherlast $out1,$out1,$in7 1836 1837 le?vperm $out0,$out0,$out0,$inpperm 1838 le?vperm $out1,$out1,$out1,$inpperm 1839 stvx_u $out0,$x00,$out 1840 stvx_u $out1,$x10,$out 1841 addi $out,$out,0x20 1842 b Lcbc_dec8x_done 1843 1844.align 5 1845Lctr32_enc8x_one: 1846 vcipherlast $out0,$out0,$in7 1847 1848 le?vperm $out0,$out0,$out0,$inpperm 1849 stvx_u $out0,0,$out 1850 addi $out,$out,0x10 1851 1852Lctr32_enc8x_done: 1853 li r10,`$FRAME+15` 1854 li r11,`$FRAME+31` 1855 stvx $inpperm,r10,$sp # wipe copies of round keys 1856 addi r10,r10,32 1857 stvx $inpperm,r11,$sp 1858 addi r11,r11,32 1859 stvx $inpperm,r10,$sp 1860 addi r10,r10,32 1861 stvx $inpperm,r11,$sp 1862 addi r11,r11,32 1863 stvx $inpperm,r10,$sp 1864 addi r10,r10,32 1865 stvx $inpperm,r11,$sp 1866 addi r11,r11,32 1867 stvx $inpperm,r10,$sp 1868 addi r10,r10,32 1869 stvx $inpperm,r11,$sp 1870 addi r11,r11,32 1871 1872 mtspr 256,$vrsave 1873 lvx v20,r10,$sp # ABI says so 1874 addi r10,r10,32 1875 lvx v21,r11,$sp 1876 addi r11,r11,32 1877 lvx v22,r10,$sp 1878 addi r10,r10,32 1879 lvx v23,r11,$sp 1880 addi r11,r11,32 1881 lvx v24,r10,$sp 1882 addi r10,r10,32 1883 lvx v25,r11,$sp 1884 addi r11,r11,32 1885 lvx v26,r10,$sp 1886 addi r10,r10,32 1887 lvx v27,r11,$sp 1888 addi r11,r11,32 1889 lvx v28,r10,$sp 1890 addi r10,r10,32 1891 lvx v29,r11,$sp 1892 addi r11,r11,32 1893 lvx v30,r10,$sp 1894 lvx v31,r11,$sp 1895 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) 1896 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) 1897 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) 1898 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) 1899 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) 1900 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) 1901 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` 1902 blr 1903 .long 0 1904 .byte 0,12,0x04,0,0x80,6,6,0 1905 .long 0 1906.size .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks 1907___ 1908}} }}} 1909 1910######################################################################### 1911{{{ # XTS procedures # 1912# int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len, # 1913# const AES_KEY *key1, const AES_KEY *key2, # 1914# [const] unsigned char iv[16]); # 1915# If $key2 is NULL, then a "tweak chaining" mode is engaged, in which # 1916# input tweak value is assumed to be encrypted already, and last tweak # 1917# value, one suitable for consecutive call on same chunk of data, is # 1918# written back to original buffer. In addition, in "tweak chaining" # 1919# mode only complete input blocks are processed. # 1920 1921my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) = map("r$_",(3..10)); 1922my ($rndkey0,$rndkey1,$inout) = map("v$_",(0..2)); 1923my ($output,$inptail,$inpperm,$leperm,$keyperm) = map("v$_",(3..7)); 1924my ($tweak,$seven,$eighty7,$tmp,$tweak1) = map("v$_",(8..12)); 1925my $taillen = $key2; 1926 1927 ($inp,$idx) = ($idx,$inp); # reassign 1928 1929$code.=<<___; 1930.globl .${prefix}_xts_encrypt 1931.align 5 1932.${prefix}_xts_encrypt: 1933 mr $inp,r3 # reassign 1934 li r3,-1 1935 ${UCMP}i $len,16 1936 bltlr- 1937 1938 lis r0,0xfff0 1939 mfspr r12,256 # save vrsave 1940 li r11,0 1941 mtspr 256,r0 1942 1943 vspltisb $seven,0x07 # 0x070707..07 1944 le?lvsl $leperm,r11,r11 1945 le?vspltisb $tmp,0x0f 1946 le?vxor $leperm,$leperm,$seven 1947 1948 li $idx,15 1949 lvx $tweak,0,$ivp # load [unaligned] iv 1950 lvsl $inpperm,0,$ivp 1951 lvx $inptail,$idx,$ivp 1952 le?vxor $inpperm,$inpperm,$tmp 1953 vperm $tweak,$tweak,$inptail,$inpperm 1954 1955 neg r11,$inp 1956 lvsr $inpperm,0,r11 # prepare for unaligned load 1957 lvx $inout,0,$inp 1958 addi $inp,$inp,15 # 15 is not typo 1959 le?vxor $inpperm,$inpperm,$tmp 1960 1961 ${UCMP}i $key2,0 # key2==NULL? 1962 beq Lxts_enc_no_key2 1963 1964 ?lvsl $keyperm,0,$key2 # prepare for unaligned key 1965 lwz $rounds,240($key2) 1966 srwi $rounds,$rounds,1 1967 subi $rounds,$rounds,1 1968 li $idx,16 1969 1970 lvx $rndkey0,0,$key2 1971 lvx $rndkey1,$idx,$key2 1972 addi $idx,$idx,16 1973 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 1974 vxor $tweak,$tweak,$rndkey0 1975 lvx $rndkey0,$idx,$key2 1976 addi $idx,$idx,16 1977 mtctr $rounds 1978 1979Ltweak_xts_enc: 1980 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 1981 vcipher $tweak,$tweak,$rndkey1 1982 lvx $rndkey1,$idx,$key2 1983 addi $idx,$idx,16 1984 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 1985 vcipher $tweak,$tweak,$rndkey0 1986 lvx $rndkey0,$idx,$key2 1987 addi $idx,$idx,16 1988 bdnz Ltweak_xts_enc 1989 1990 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 1991 vcipher $tweak,$tweak,$rndkey1 1992 lvx $rndkey1,$idx,$key2 1993 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 1994 vcipherlast $tweak,$tweak,$rndkey0 1995 1996 li $ivp,0 # don't chain the tweak 1997 b Lxts_enc 1998 1999Lxts_enc_no_key2: 2000 li $idx,-16 2001 and $len,$len,$idx # in "tweak chaining" 2002 # mode only complete 2003 # blocks are processed 2004Lxts_enc: 2005 lvx $inptail,0,$inp 2006 addi $inp,$inp,16 2007 2008 ?lvsl $keyperm,0,$key1 # prepare for unaligned key 2009 lwz $rounds,240($key1) 2010 srwi $rounds,$rounds,1 2011 subi $rounds,$rounds,1 2012 li $idx,16 2013 2014 vslb $eighty7,$seven,$seven # 0x808080..80 2015 vor $eighty7,$eighty7,$seven # 0x878787..87 2016 vspltisb $tmp,1 # 0x010101..01 2017 vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01 2018 2019 ${UCMP}i $len,96 2020 bge _aesp8_xts_encrypt6x 2021 2022 andi. $taillen,$len,15 2023 subic r0,$len,32 2024 subi $taillen,$taillen,16 2025 subfe r0,r0,r0 2026 and r0,r0,$taillen 2027 add $inp,$inp,r0 2028 2029 lvx $rndkey0,0,$key1 2030 lvx $rndkey1,$idx,$key1 2031 addi $idx,$idx,16 2032 vperm $inout,$inout,$inptail,$inpperm 2033 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2034 vxor $inout,$inout,$tweak 2035 vxor $inout,$inout,$rndkey0 2036 lvx $rndkey0,$idx,$key1 2037 addi $idx,$idx,16 2038 mtctr $rounds 2039 b Loop_xts_enc 2040 2041.align 5 2042Loop_xts_enc: 2043 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2044 vcipher $inout,$inout,$rndkey1 2045 lvx $rndkey1,$idx,$key1 2046 addi $idx,$idx,16 2047 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2048 vcipher $inout,$inout,$rndkey0 2049 lvx $rndkey0,$idx,$key1 2050 addi $idx,$idx,16 2051 bdnz Loop_xts_enc 2052 2053 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2054 vcipher $inout,$inout,$rndkey1 2055 lvx $rndkey1,$idx,$key1 2056 li $idx,16 2057 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2058 vxor $rndkey0,$rndkey0,$tweak 2059 vcipherlast $output,$inout,$rndkey0 2060 2061 le?vperm $tmp,$output,$output,$leperm 2062 be?nop 2063 le?stvx_u $tmp,0,$out 2064 be?stvx_u $output,0,$out 2065 addi $out,$out,16 2066 2067 subic. $len,$len,16 2068 beq Lxts_enc_done 2069 2070 vmr $inout,$inptail 2071 lvx $inptail,0,$inp 2072 addi $inp,$inp,16 2073 lvx $rndkey0,0,$key1 2074 lvx $rndkey1,$idx,$key1 2075 addi $idx,$idx,16 2076 2077 subic r0,$len,32 2078 subfe r0,r0,r0 2079 and r0,r0,$taillen 2080 add $inp,$inp,r0 2081 2082 vsrab $tmp,$tweak,$seven # next tweak value 2083 vaddubm $tweak,$tweak,$tweak 2084 vsldoi $tmp,$tmp,$tmp,15 2085 vand $tmp,$tmp,$eighty7 2086 vxor $tweak,$tweak,$tmp 2087 2088 vperm $inout,$inout,$inptail,$inpperm 2089 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2090 vxor $inout,$inout,$tweak 2091 vxor $output,$output,$rndkey0 # just in case $len<16 2092 vxor $inout,$inout,$rndkey0 2093 lvx $rndkey0,$idx,$key1 2094 addi $idx,$idx,16 2095 2096 mtctr $rounds 2097 ${UCMP}i $len,16 2098 bge Loop_xts_enc 2099 2100 vxor $output,$output,$tweak 2101 lvsr $inpperm,0,$len # $inpperm is no longer needed 2102 vxor $inptail,$inptail,$inptail # $inptail is no longer needed 2103 vspltisb $tmp,-1 2104 vperm $inptail,$inptail,$tmp,$inpperm 2105 vsel $inout,$inout,$output,$inptail 2106 2107 subi r11,$out,17 2108 subi $out,$out,16 2109 mtctr $len 2110 li $len,16 2111Loop_xts_enc_steal: 2112 lbzu r0,1(r11) 2113 stb r0,16(r11) 2114 bdnz Loop_xts_enc_steal 2115 2116 mtctr $rounds 2117 b Loop_xts_enc # one more time... 2118 2119Lxts_enc_done: 2120 ${UCMP}i $ivp,0 2121 beq Lxts_enc_ret 2122 2123 vsrab $tmp,$tweak,$seven # next tweak value 2124 vaddubm $tweak,$tweak,$tweak 2125 vsldoi $tmp,$tmp,$tmp,15 2126 vand $tmp,$tmp,$eighty7 2127 vxor $tweak,$tweak,$tmp 2128 2129 le?vperm $tweak,$tweak,$tweak,$leperm 2130 stvx_u $tweak,0,$ivp 2131 2132Lxts_enc_ret: 2133 mtspr 256,r12 # restore vrsave 2134 li r3,0 2135 blr 2136 .long 0 2137 .byte 0,12,0x04,0,0x80,6,6,0 2138 .long 0 2139.size .${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt 2140 2141.globl .${prefix}_xts_decrypt 2142.align 5 2143.${prefix}_xts_decrypt: 2144 mr $inp,r3 # reassign 2145 li r3,-1 2146 ${UCMP}i $len,16 2147 bltlr- 2148 2149 lis r0,0xfff8 2150 mfspr r12,256 # save vrsave 2151 li r11,0 2152 mtspr 256,r0 2153 2154 andi. r0,$len,15 2155 neg r0,r0 2156 andi. r0,r0,16 2157 sub $len,$len,r0 2158 2159 vspltisb $seven,0x07 # 0x070707..07 2160 le?lvsl $leperm,r11,r11 2161 le?vspltisb $tmp,0x0f 2162 le?vxor $leperm,$leperm,$seven 2163 2164 li $idx,15 2165 lvx $tweak,0,$ivp # load [unaligned] iv 2166 lvsl $inpperm,0,$ivp 2167 lvx $inptail,$idx,$ivp 2168 le?vxor $inpperm,$inpperm,$tmp 2169 vperm $tweak,$tweak,$inptail,$inpperm 2170 2171 neg r11,$inp 2172 lvsr $inpperm,0,r11 # prepare for unaligned load 2173 lvx $inout,0,$inp 2174 addi $inp,$inp,15 # 15 is not typo 2175 le?vxor $inpperm,$inpperm,$tmp 2176 2177 ${UCMP}i $key2,0 # key2==NULL? 2178 beq Lxts_dec_no_key2 2179 2180 ?lvsl $keyperm,0,$key2 # prepare for unaligned key 2181 lwz $rounds,240($key2) 2182 srwi $rounds,$rounds,1 2183 subi $rounds,$rounds,1 2184 li $idx,16 2185 2186 lvx $rndkey0,0,$key2 2187 lvx $rndkey1,$idx,$key2 2188 addi $idx,$idx,16 2189 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2190 vxor $tweak,$tweak,$rndkey0 2191 lvx $rndkey0,$idx,$key2 2192 addi $idx,$idx,16 2193 mtctr $rounds 2194 2195Ltweak_xts_dec: 2196 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2197 vcipher $tweak,$tweak,$rndkey1 2198 lvx $rndkey1,$idx,$key2 2199 addi $idx,$idx,16 2200 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2201 vcipher $tweak,$tweak,$rndkey0 2202 lvx $rndkey0,$idx,$key2 2203 addi $idx,$idx,16 2204 bdnz Ltweak_xts_dec 2205 2206 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2207 vcipher $tweak,$tweak,$rndkey1 2208 lvx $rndkey1,$idx,$key2 2209 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2210 vcipherlast $tweak,$tweak,$rndkey0 2211 2212 li $ivp,0 # don't chain the tweak 2213 b Lxts_dec 2214 2215Lxts_dec_no_key2: 2216 neg $idx,$len 2217 andi. $idx,$idx,15 2218 add $len,$len,$idx # in "tweak chaining" 2219 # mode only complete 2220 # blocks are processed 2221Lxts_dec: 2222 lvx $inptail,0,$inp 2223 addi $inp,$inp,16 2224 2225 ?lvsl $keyperm,0,$key1 # prepare for unaligned key 2226 lwz $rounds,240($key1) 2227 srwi $rounds,$rounds,1 2228 subi $rounds,$rounds,1 2229 li $idx,16 2230 2231 vslb $eighty7,$seven,$seven # 0x808080..80 2232 vor $eighty7,$eighty7,$seven # 0x878787..87 2233 vspltisb $tmp,1 # 0x010101..01 2234 vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01 2235 2236 ${UCMP}i $len,96 2237 bge _aesp8_xts_decrypt6x 2238 2239 lvx $rndkey0,0,$key1 2240 lvx $rndkey1,$idx,$key1 2241 addi $idx,$idx,16 2242 vperm $inout,$inout,$inptail,$inpperm 2243 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2244 vxor $inout,$inout,$tweak 2245 vxor $inout,$inout,$rndkey0 2246 lvx $rndkey0,$idx,$key1 2247 addi $idx,$idx,16 2248 mtctr $rounds 2249 2250 ${UCMP}i $len,16 2251 blt Ltail_xts_dec 2252 be?b Loop_xts_dec 2253 2254.align 5 2255Loop_xts_dec: 2256 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2257 vncipher $inout,$inout,$rndkey1 2258 lvx $rndkey1,$idx,$key1 2259 addi $idx,$idx,16 2260 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2261 vncipher $inout,$inout,$rndkey0 2262 lvx $rndkey0,$idx,$key1 2263 addi $idx,$idx,16 2264 bdnz Loop_xts_dec 2265 2266 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2267 vncipher $inout,$inout,$rndkey1 2268 lvx $rndkey1,$idx,$key1 2269 li $idx,16 2270 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2271 vxor $rndkey0,$rndkey0,$tweak 2272 vncipherlast $output,$inout,$rndkey0 2273 2274 le?vperm $tmp,$output,$output,$leperm 2275 be?nop 2276 le?stvx_u $tmp,0,$out 2277 be?stvx_u $output,0,$out 2278 addi $out,$out,16 2279 2280 subic. $len,$len,16 2281 beq Lxts_dec_done 2282 2283 vmr $inout,$inptail 2284 lvx $inptail,0,$inp 2285 addi $inp,$inp,16 2286 lvx $rndkey0,0,$key1 2287 lvx $rndkey1,$idx,$key1 2288 addi $idx,$idx,16 2289 2290 vsrab $tmp,$tweak,$seven # next tweak value 2291 vaddubm $tweak,$tweak,$tweak 2292 vsldoi $tmp,$tmp,$tmp,15 2293 vand $tmp,$tmp,$eighty7 2294 vxor $tweak,$tweak,$tmp 2295 2296 vperm $inout,$inout,$inptail,$inpperm 2297 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2298 vxor $inout,$inout,$tweak 2299 vxor $inout,$inout,$rndkey0 2300 lvx $rndkey0,$idx,$key1 2301 addi $idx,$idx,16 2302 2303 mtctr $rounds 2304 ${UCMP}i $len,16 2305 bge Loop_xts_dec 2306 2307Ltail_xts_dec: 2308 vsrab $tmp,$tweak,$seven # next tweak value 2309 vaddubm $tweak1,$tweak,$tweak 2310 vsldoi $tmp,$tmp,$tmp,15 2311 vand $tmp,$tmp,$eighty7 2312 vxor $tweak1,$tweak1,$tmp 2313 2314 subi $inp,$inp,16 2315 add $inp,$inp,$len 2316 2317 vxor $inout,$inout,$tweak # :-( 2318 vxor $inout,$inout,$tweak1 # :-) 2319 2320Loop_xts_dec_short: 2321 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2322 vncipher $inout,$inout,$rndkey1 2323 lvx $rndkey1,$idx,$key1 2324 addi $idx,$idx,16 2325 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2326 vncipher $inout,$inout,$rndkey0 2327 lvx $rndkey0,$idx,$key1 2328 addi $idx,$idx,16 2329 bdnz Loop_xts_dec_short 2330 2331 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2332 vncipher $inout,$inout,$rndkey1 2333 lvx $rndkey1,$idx,$key1 2334 li $idx,16 2335 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2336 vxor $rndkey0,$rndkey0,$tweak1 2337 vncipherlast $output,$inout,$rndkey0 2338 2339 le?vperm $tmp,$output,$output,$leperm 2340 be?nop 2341 le?stvx_u $tmp,0,$out 2342 be?stvx_u $output,0,$out 2343 2344 vmr $inout,$inptail 2345 lvx $inptail,0,$inp 2346 #addi $inp,$inp,16 2347 lvx $rndkey0,0,$key1 2348 lvx $rndkey1,$idx,$key1 2349 addi $idx,$idx,16 2350 vperm $inout,$inout,$inptail,$inpperm 2351 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2352 2353 lvsr $inpperm,0,$len # $inpperm is no longer needed 2354 vxor $inptail,$inptail,$inptail # $inptail is no longer needed 2355 vspltisb $tmp,-1 2356 vperm $inptail,$inptail,$tmp,$inpperm 2357 vsel $inout,$inout,$output,$inptail 2358 2359 vxor $rndkey0,$rndkey0,$tweak 2360 vxor $inout,$inout,$rndkey0 2361 lvx $rndkey0,$idx,$key1 2362 addi $idx,$idx,16 2363 2364 subi r11,$out,1 2365 mtctr $len 2366 li $len,16 2367Loop_xts_dec_steal: 2368 lbzu r0,1(r11) 2369 stb r0,16(r11) 2370 bdnz Loop_xts_dec_steal 2371 2372 mtctr $rounds 2373 b Loop_xts_dec # one more time... 2374 2375Lxts_dec_done: 2376 ${UCMP}i $ivp,0 2377 beq Lxts_dec_ret 2378 2379 vsrab $tmp,$tweak,$seven # next tweak value 2380 vaddubm $tweak,$tweak,$tweak 2381 vsldoi $tmp,$tmp,$tmp,15 2382 vand $tmp,$tmp,$eighty7 2383 vxor $tweak,$tweak,$tmp 2384 2385 le?vperm $tweak,$tweak,$tweak,$leperm 2386 stvx_u $tweak,0,$ivp 2387 2388Lxts_dec_ret: 2389 mtspr 256,r12 # restore vrsave 2390 li r3,0 2391 blr 2392 .long 0 2393 .byte 0,12,0x04,0,0x80,6,6,0 2394 .long 0 2395.size .${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt 2396___ 2397######################################################################### 2398{{ # Optimized XTS procedures # 2399my $key_=$key2; 2400my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31)); 2401 $x00=0 if ($flavour =~ /osx/); 2402my ($in0, $in1, $in2, $in3, $in4, $in5 )=map("v$_",(0..5)); 2403my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16)); 2404my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22)); 2405my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys 2406 # v26-v31 last 6 round keys 2407my ($keyperm)=($out0); # aliases with "caller", redundant assignment 2408my $taillen=$x70; 2409 2410$code.=<<___; 2411.align 5 2412_aesp8_xts_encrypt6x: 2413 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) 2414 mflr r11 2415 li r7,`$FRAME+8*16+15` 2416 li r3,`$FRAME+8*16+31` 2417 $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp) 2418 stvx v20,r7,$sp # ABI says so 2419 addi r7,r7,32 2420 stvx v21,r3,$sp 2421 addi r3,r3,32 2422 stvx v22,r7,$sp 2423 addi r7,r7,32 2424 stvx v23,r3,$sp 2425 addi r3,r3,32 2426 stvx v24,r7,$sp 2427 addi r7,r7,32 2428 stvx v25,r3,$sp 2429 addi r3,r3,32 2430 stvx v26,r7,$sp 2431 addi r7,r7,32 2432 stvx v27,r3,$sp 2433 addi r3,r3,32 2434 stvx v28,r7,$sp 2435 addi r7,r7,32 2436 stvx v29,r3,$sp 2437 addi r3,r3,32 2438 stvx v30,r7,$sp 2439 stvx v31,r3,$sp 2440 li r0,-1 2441 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave 2442 li $x10,0x10 2443 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) 2444 li $x20,0x20 2445 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) 2446 li $x30,0x30 2447 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) 2448 li $x40,0x40 2449 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) 2450 li $x50,0x50 2451 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) 2452 li $x60,0x60 2453 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) 2454 li $x70,0x70 2455 mtspr 256,r0 2456 2457 subi $rounds,$rounds,3 # -4 in total 2458 2459 lvx $rndkey0,$x00,$key1 # load key schedule 2460 lvx v30,$x10,$key1 2461 addi $key1,$key1,0x20 2462 lvx v31,$x00,$key1 2463 ?vperm $rndkey0,$rndkey0,v30,$keyperm 2464 addi $key_,$sp,$FRAME+15 2465 mtctr $rounds 2466 2467Load_xts_enc_key: 2468 ?vperm v24,v30,v31,$keyperm 2469 lvx v30,$x10,$key1 2470 addi $key1,$key1,0x20 2471 stvx v24,$x00,$key_ # off-load round[1] 2472 ?vperm v25,v31,v30,$keyperm 2473 lvx v31,$x00,$key1 2474 stvx v25,$x10,$key_ # off-load round[2] 2475 addi $key_,$key_,0x20 2476 bdnz Load_xts_enc_key 2477 2478 lvx v26,$x10,$key1 2479 ?vperm v24,v30,v31,$keyperm 2480 lvx v27,$x20,$key1 2481 stvx v24,$x00,$key_ # off-load round[3] 2482 ?vperm v25,v31,v26,$keyperm 2483 lvx v28,$x30,$key1 2484 stvx v25,$x10,$key_ # off-load round[4] 2485 addi $key_,$sp,$FRAME+15 # rewind $key_ 2486 ?vperm v26,v26,v27,$keyperm 2487 lvx v29,$x40,$key1 2488 ?vperm v27,v27,v28,$keyperm 2489 lvx v30,$x50,$key1 2490 ?vperm v28,v28,v29,$keyperm 2491 lvx v31,$x60,$key1 2492 ?vperm v29,v29,v30,$keyperm 2493 lvx $twk5,$x70,$key1 # borrow $twk5 2494 ?vperm v30,v30,v31,$keyperm 2495 lvx v24,$x00,$key_ # pre-load round[1] 2496 ?vperm v31,v31,$twk5,$keyperm 2497 lvx v25,$x10,$key_ # pre-load round[2] 2498 2499 vperm $in0,$inout,$inptail,$inpperm 2500 subi $inp,$inp,31 # undo "caller" 2501 vxor $twk0,$tweak,$rndkey0 2502 vsrab $tmp,$tweak,$seven # next tweak value 2503 vaddubm $tweak,$tweak,$tweak 2504 vsldoi $tmp,$tmp,$tmp,15 2505 vand $tmp,$tmp,$eighty7 2506 vxor $out0,$in0,$twk0 2507 vxor $tweak,$tweak,$tmp 2508 2509 lvx_u $in1,$x10,$inp 2510 vxor $twk1,$tweak,$rndkey0 2511 vsrab $tmp,$tweak,$seven # next tweak value 2512 vaddubm $tweak,$tweak,$tweak 2513 vsldoi $tmp,$tmp,$tmp,15 2514 le?vperm $in1,$in1,$in1,$leperm 2515 vand $tmp,$tmp,$eighty7 2516 vxor $out1,$in1,$twk1 2517 vxor $tweak,$tweak,$tmp 2518 2519 lvx_u $in2,$x20,$inp 2520 andi. $taillen,$len,15 2521 vxor $twk2,$tweak,$rndkey0 2522 vsrab $tmp,$tweak,$seven # next tweak value 2523 vaddubm $tweak,$tweak,$tweak 2524 vsldoi $tmp,$tmp,$tmp,15 2525 le?vperm $in2,$in2,$in2,$leperm 2526 vand $tmp,$tmp,$eighty7 2527 vxor $out2,$in2,$twk2 2528 vxor $tweak,$tweak,$tmp 2529 2530 lvx_u $in3,$x30,$inp 2531 sub $len,$len,$taillen 2532 vxor $twk3,$tweak,$rndkey0 2533 vsrab $tmp,$tweak,$seven # next tweak value 2534 vaddubm $tweak,$tweak,$tweak 2535 vsldoi $tmp,$tmp,$tmp,15 2536 le?vperm $in3,$in3,$in3,$leperm 2537 vand $tmp,$tmp,$eighty7 2538 vxor $out3,$in3,$twk3 2539 vxor $tweak,$tweak,$tmp 2540 2541 lvx_u $in4,$x40,$inp 2542 subi $len,$len,0x60 2543 vxor $twk4,$tweak,$rndkey0 2544 vsrab $tmp,$tweak,$seven # next tweak value 2545 vaddubm $tweak,$tweak,$tweak 2546 vsldoi $tmp,$tmp,$tmp,15 2547 le?vperm $in4,$in4,$in4,$leperm 2548 vand $tmp,$tmp,$eighty7 2549 vxor $out4,$in4,$twk4 2550 vxor $tweak,$tweak,$tmp 2551 2552 lvx_u $in5,$x50,$inp 2553 addi $inp,$inp,0x60 2554 vxor $twk5,$tweak,$rndkey0 2555 vsrab $tmp,$tweak,$seven # next tweak value 2556 vaddubm $tweak,$tweak,$tweak 2557 vsldoi $tmp,$tmp,$tmp,15 2558 le?vperm $in5,$in5,$in5,$leperm 2559 vand $tmp,$tmp,$eighty7 2560 vxor $out5,$in5,$twk5 2561 vxor $tweak,$tweak,$tmp 2562 2563 vxor v31,v31,$rndkey0 2564 mtctr $rounds 2565 b Loop_xts_enc6x 2566 2567.align 5 2568Loop_xts_enc6x: 2569 vcipher $out0,$out0,v24 2570 vcipher $out1,$out1,v24 2571 vcipher $out2,$out2,v24 2572 vcipher $out3,$out3,v24 2573 vcipher $out4,$out4,v24 2574 vcipher $out5,$out5,v24 2575 lvx v24,$x20,$key_ # round[3] 2576 addi $key_,$key_,0x20 2577 2578 vcipher $out0,$out0,v25 2579 vcipher $out1,$out1,v25 2580 vcipher $out2,$out2,v25 2581 vcipher $out3,$out3,v25 2582 vcipher $out4,$out4,v25 2583 vcipher $out5,$out5,v25 2584 lvx v25,$x10,$key_ # round[4] 2585 bdnz Loop_xts_enc6x 2586 2587 subic $len,$len,96 # $len-=96 2588 vxor $in0,$twk0,v31 # xor with last round key 2589 vcipher $out0,$out0,v24 2590 vcipher $out1,$out1,v24 2591 vsrab $tmp,$tweak,$seven # next tweak value 2592 vxor $twk0,$tweak,$rndkey0 2593 vaddubm $tweak,$tweak,$tweak 2594 vcipher $out2,$out2,v24 2595 vcipher $out3,$out3,v24 2596 vsldoi $tmp,$tmp,$tmp,15 2597 vcipher $out4,$out4,v24 2598 vcipher $out5,$out5,v24 2599 2600 subfe. r0,r0,r0 # borrow?-1:0 2601 vand $tmp,$tmp,$eighty7 2602 vcipher $out0,$out0,v25 2603 vcipher $out1,$out1,v25 2604 vxor $tweak,$tweak,$tmp 2605 vcipher $out2,$out2,v25 2606 vcipher $out3,$out3,v25 2607 vxor $in1,$twk1,v31 2608 vsrab $tmp,$tweak,$seven # next tweak value 2609 vxor $twk1,$tweak,$rndkey0 2610 vcipher $out4,$out4,v25 2611 vcipher $out5,$out5,v25 2612 2613 and r0,r0,$len 2614 vaddubm $tweak,$tweak,$tweak 2615 vsldoi $tmp,$tmp,$tmp,15 2616 vcipher $out0,$out0,v26 2617 vcipher $out1,$out1,v26 2618 vand $tmp,$tmp,$eighty7 2619 vcipher $out2,$out2,v26 2620 vcipher $out3,$out3,v26 2621 vxor $tweak,$tweak,$tmp 2622 vcipher $out4,$out4,v26 2623 vcipher $out5,$out5,v26 2624 2625 add $inp,$inp,r0 # $inp is adjusted in such 2626 # way that at exit from the 2627 # loop inX-in5 are loaded 2628 # with last "words" 2629 vxor $in2,$twk2,v31 2630 vsrab $tmp,$tweak,$seven # next tweak value 2631 vxor $twk2,$tweak,$rndkey0 2632 vaddubm $tweak,$tweak,$tweak 2633 vcipher $out0,$out0,v27 2634 vcipher $out1,$out1,v27 2635 vsldoi $tmp,$tmp,$tmp,15 2636 vcipher $out2,$out2,v27 2637 vcipher $out3,$out3,v27 2638 vand $tmp,$tmp,$eighty7 2639 vcipher $out4,$out4,v27 2640 vcipher $out5,$out5,v27 2641 2642 addi $key_,$sp,$FRAME+15 # rewind $key_ 2643 vxor $tweak,$tweak,$tmp 2644 vcipher $out0,$out0,v28 2645 vcipher $out1,$out1,v28 2646 vxor $in3,$twk3,v31 2647 vsrab $tmp,$tweak,$seven # next tweak value 2648 vxor $twk3,$tweak,$rndkey0 2649 vcipher $out2,$out2,v28 2650 vcipher $out3,$out3,v28 2651 vaddubm $tweak,$tweak,$tweak 2652 vsldoi $tmp,$tmp,$tmp,15 2653 vcipher $out4,$out4,v28 2654 vcipher $out5,$out5,v28 2655 lvx v24,$x00,$key_ # re-pre-load round[1] 2656 vand $tmp,$tmp,$eighty7 2657 2658 vcipher $out0,$out0,v29 2659 vcipher $out1,$out1,v29 2660 vxor $tweak,$tweak,$tmp 2661 vcipher $out2,$out2,v29 2662 vcipher $out3,$out3,v29 2663 vxor $in4,$twk4,v31 2664 vsrab $tmp,$tweak,$seven # next tweak value 2665 vxor $twk4,$tweak,$rndkey0 2666 vcipher $out4,$out4,v29 2667 vcipher $out5,$out5,v29 2668 lvx v25,$x10,$key_ # re-pre-load round[2] 2669 vaddubm $tweak,$tweak,$tweak 2670 vsldoi $tmp,$tmp,$tmp,15 2671 2672 vcipher $out0,$out0,v30 2673 vcipher $out1,$out1,v30 2674 vand $tmp,$tmp,$eighty7 2675 vcipher $out2,$out2,v30 2676 vcipher $out3,$out3,v30 2677 vxor $tweak,$tweak,$tmp 2678 vcipher $out4,$out4,v30 2679 vcipher $out5,$out5,v30 2680 vxor $in5,$twk5,v31 2681 vsrab $tmp,$tweak,$seven # next tweak value 2682 vxor $twk5,$tweak,$rndkey0 2683 2684 vcipherlast $out0,$out0,$in0 2685 lvx_u $in0,$x00,$inp # load next input block 2686 vaddubm $tweak,$tweak,$tweak 2687 vsldoi $tmp,$tmp,$tmp,15 2688 vcipherlast $out1,$out1,$in1 2689 lvx_u $in1,$x10,$inp 2690 vcipherlast $out2,$out2,$in2 2691 le?vperm $in0,$in0,$in0,$leperm 2692 lvx_u $in2,$x20,$inp 2693 vand $tmp,$tmp,$eighty7 2694 vcipherlast $out3,$out3,$in3 2695 le?vperm $in1,$in1,$in1,$leperm 2696 lvx_u $in3,$x30,$inp 2697 vcipherlast $out4,$out4,$in4 2698 le?vperm $in2,$in2,$in2,$leperm 2699 lvx_u $in4,$x40,$inp 2700 vxor $tweak,$tweak,$tmp 2701 vcipherlast $tmp,$out5,$in5 # last block might be needed 2702 # in stealing mode 2703 le?vperm $in3,$in3,$in3,$leperm 2704 lvx_u $in5,$x50,$inp 2705 addi $inp,$inp,0x60 2706 le?vperm $in4,$in4,$in4,$leperm 2707 le?vperm $in5,$in5,$in5,$leperm 2708 2709 le?vperm $out0,$out0,$out0,$leperm 2710 le?vperm $out1,$out1,$out1,$leperm 2711 stvx_u $out0,$x00,$out # store output 2712 vxor $out0,$in0,$twk0 2713 le?vperm $out2,$out2,$out2,$leperm 2714 stvx_u $out1,$x10,$out 2715 vxor $out1,$in1,$twk1 2716 le?vperm $out3,$out3,$out3,$leperm 2717 stvx_u $out2,$x20,$out 2718 vxor $out2,$in2,$twk2 2719 le?vperm $out4,$out4,$out4,$leperm 2720 stvx_u $out3,$x30,$out 2721 vxor $out3,$in3,$twk3 2722 le?vperm $out5,$tmp,$tmp,$leperm 2723 stvx_u $out4,$x40,$out 2724 vxor $out4,$in4,$twk4 2725 le?stvx_u $out5,$x50,$out 2726 be?stvx_u $tmp, $x50,$out 2727 vxor $out5,$in5,$twk5 2728 addi $out,$out,0x60 2729 2730 mtctr $rounds 2731 beq Loop_xts_enc6x # did $len-=96 borrow? 2732 2733 addic. $len,$len,0x60 2734 beq Lxts_enc6x_zero 2735 cmpwi $len,0x20 2736 blt Lxts_enc6x_one 2737 nop 2738 beq Lxts_enc6x_two 2739 cmpwi $len,0x40 2740 blt Lxts_enc6x_three 2741 nop 2742 beq Lxts_enc6x_four 2743 2744Lxts_enc6x_five: 2745 vxor $out0,$in1,$twk0 2746 vxor $out1,$in2,$twk1 2747 vxor $out2,$in3,$twk2 2748 vxor $out3,$in4,$twk3 2749 vxor $out4,$in5,$twk4 2750 2751 bl _aesp8_xts_enc5x 2752 2753 le?vperm $out0,$out0,$out0,$leperm 2754 vmr $twk0,$twk5 # unused tweak 2755 le?vperm $out1,$out1,$out1,$leperm 2756 stvx_u $out0,$x00,$out # store output 2757 le?vperm $out2,$out2,$out2,$leperm 2758 stvx_u $out1,$x10,$out 2759 le?vperm $out3,$out3,$out3,$leperm 2760 stvx_u $out2,$x20,$out 2761 vxor $tmp,$out4,$twk5 # last block prep for stealing 2762 le?vperm $out4,$out4,$out4,$leperm 2763 stvx_u $out3,$x30,$out 2764 stvx_u $out4,$x40,$out 2765 addi $out,$out,0x50 2766 bne Lxts_enc6x_steal 2767 b Lxts_enc6x_done 2768 2769.align 4 2770Lxts_enc6x_four: 2771 vxor $out0,$in2,$twk0 2772 vxor $out1,$in3,$twk1 2773 vxor $out2,$in4,$twk2 2774 vxor $out3,$in5,$twk3 2775 vxor $out4,$out4,$out4 2776 2777 bl _aesp8_xts_enc5x 2778 2779 le?vperm $out0,$out0,$out0,$leperm 2780 vmr $twk0,$twk4 # unused tweak 2781 le?vperm $out1,$out1,$out1,$leperm 2782 stvx_u $out0,$x00,$out # store output 2783 le?vperm $out2,$out2,$out2,$leperm 2784 stvx_u $out1,$x10,$out 2785 vxor $tmp,$out3,$twk4 # last block prep for stealing 2786 le?vperm $out3,$out3,$out3,$leperm 2787 stvx_u $out2,$x20,$out 2788 stvx_u $out3,$x30,$out 2789 addi $out,$out,0x40 2790 bne Lxts_enc6x_steal 2791 b Lxts_enc6x_done 2792 2793.align 4 2794Lxts_enc6x_three: 2795 vxor $out0,$in3,$twk0 2796 vxor $out1,$in4,$twk1 2797 vxor $out2,$in5,$twk2 2798 vxor $out3,$out3,$out3 2799 vxor $out4,$out4,$out4 2800 2801 bl _aesp8_xts_enc5x 2802 2803 le?vperm $out0,$out0,$out0,$leperm 2804 vmr $twk0,$twk3 # unused tweak 2805 le?vperm $out1,$out1,$out1,$leperm 2806 stvx_u $out0,$x00,$out # store output 2807 vxor $tmp,$out2,$twk3 # last block prep for stealing 2808 le?vperm $out2,$out2,$out2,$leperm 2809 stvx_u $out1,$x10,$out 2810 stvx_u $out2,$x20,$out 2811 addi $out,$out,0x30 2812 bne Lxts_enc6x_steal 2813 b Lxts_enc6x_done 2814 2815.align 4 2816Lxts_enc6x_two: 2817 vxor $out0,$in4,$twk0 2818 vxor $out1,$in5,$twk1 2819 vxor $out2,$out2,$out2 2820 vxor $out3,$out3,$out3 2821 vxor $out4,$out4,$out4 2822 2823 bl _aesp8_xts_enc5x 2824 2825 le?vperm $out0,$out0,$out0,$leperm 2826 vmr $twk0,$twk2 # unused tweak 2827 vxor $tmp,$out1,$twk2 # last block prep for stealing 2828 le?vperm $out1,$out1,$out1,$leperm 2829 stvx_u $out0,$x00,$out # store output 2830 stvx_u $out1,$x10,$out 2831 addi $out,$out,0x20 2832 bne Lxts_enc6x_steal 2833 b Lxts_enc6x_done 2834 2835.align 4 2836Lxts_enc6x_one: 2837 vxor $out0,$in5,$twk0 2838 nop 2839Loop_xts_enc1x: 2840 vcipher $out0,$out0,v24 2841 lvx v24,$x20,$key_ # round[3] 2842 addi $key_,$key_,0x20 2843 2844 vcipher $out0,$out0,v25 2845 lvx v25,$x10,$key_ # round[4] 2846 bdnz Loop_xts_enc1x 2847 2848 add $inp,$inp,$taillen 2849 cmpwi $taillen,0 2850 vcipher $out0,$out0,v24 2851 2852 subi $inp,$inp,16 2853 vcipher $out0,$out0,v25 2854 2855 lvsr $inpperm,0,$taillen 2856 vcipher $out0,$out0,v26 2857 2858 lvx_u $in0,0,$inp 2859 vcipher $out0,$out0,v27 2860 2861 addi $key_,$sp,$FRAME+15 # rewind $key_ 2862 vcipher $out0,$out0,v28 2863 lvx v24,$x00,$key_ # re-pre-load round[1] 2864 2865 vcipher $out0,$out0,v29 2866 lvx v25,$x10,$key_ # re-pre-load round[2] 2867 vxor $twk0,$twk0,v31 2868 2869 le?vperm $in0,$in0,$in0,$leperm 2870 vcipher $out0,$out0,v30 2871 2872 vperm $in0,$in0,$in0,$inpperm 2873 vcipherlast $out0,$out0,$twk0 2874 2875 vmr $twk0,$twk1 # unused tweak 2876 vxor $tmp,$out0,$twk1 # last block prep for stealing 2877 le?vperm $out0,$out0,$out0,$leperm 2878 stvx_u $out0,$x00,$out # store output 2879 addi $out,$out,0x10 2880 bne Lxts_enc6x_steal 2881 b Lxts_enc6x_done 2882 2883.align 4 2884Lxts_enc6x_zero: 2885 cmpwi $taillen,0 2886 beq Lxts_enc6x_done 2887 2888 add $inp,$inp,$taillen 2889 subi $inp,$inp,16 2890 lvx_u $in0,0,$inp 2891 lvsr $inpperm,0,$taillen # $in5 is no more 2892 le?vperm $in0,$in0,$in0,$leperm 2893 vperm $in0,$in0,$in0,$inpperm 2894 vxor $tmp,$tmp,$twk0 2895Lxts_enc6x_steal: 2896 vxor $in0,$in0,$twk0 2897 vxor $out0,$out0,$out0 2898 vspltisb $out1,-1 2899 vperm $out0,$out0,$out1,$inpperm 2900 vsel $out0,$in0,$tmp,$out0 # $tmp is last block, remember? 2901 2902 subi r30,$out,17 2903 subi $out,$out,16 2904 mtctr $taillen 2905Loop_xts_enc6x_steal: 2906 lbzu r0,1(r30) 2907 stb r0,16(r30) 2908 bdnz Loop_xts_enc6x_steal 2909 2910 li $taillen,0 2911 mtctr $rounds 2912 b Loop_xts_enc1x # one more time... 2913 2914.align 4 2915Lxts_enc6x_done: 2916 ${UCMP}i $ivp,0 2917 beq Lxts_enc6x_ret 2918 2919 vxor $tweak,$twk0,$rndkey0 2920 le?vperm $tweak,$tweak,$tweak,$leperm 2921 stvx_u $tweak,0,$ivp 2922 2923Lxts_enc6x_ret: 2924 mtlr r11 2925 li r10,`$FRAME+15` 2926 li r11,`$FRAME+31` 2927 stvx $seven,r10,$sp # wipe copies of round keys 2928 addi r10,r10,32 2929 stvx $seven,r11,$sp 2930 addi r11,r11,32 2931 stvx $seven,r10,$sp 2932 addi r10,r10,32 2933 stvx $seven,r11,$sp 2934 addi r11,r11,32 2935 stvx $seven,r10,$sp 2936 addi r10,r10,32 2937 stvx $seven,r11,$sp 2938 addi r11,r11,32 2939 stvx $seven,r10,$sp 2940 addi r10,r10,32 2941 stvx $seven,r11,$sp 2942 addi r11,r11,32 2943 2944 mtspr 256,$vrsave 2945 lvx v20,r10,$sp # ABI says so 2946 addi r10,r10,32 2947 lvx v21,r11,$sp 2948 addi r11,r11,32 2949 lvx v22,r10,$sp 2950 addi r10,r10,32 2951 lvx v23,r11,$sp 2952 addi r11,r11,32 2953 lvx v24,r10,$sp 2954 addi r10,r10,32 2955 lvx v25,r11,$sp 2956 addi r11,r11,32 2957 lvx v26,r10,$sp 2958 addi r10,r10,32 2959 lvx v27,r11,$sp 2960 addi r11,r11,32 2961 lvx v28,r10,$sp 2962 addi r10,r10,32 2963 lvx v29,r11,$sp 2964 addi r11,r11,32 2965 lvx v30,r10,$sp 2966 lvx v31,r11,$sp 2967 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) 2968 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) 2969 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) 2970 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) 2971 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) 2972 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) 2973 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` 2974 blr 2975 .long 0 2976 .byte 0,12,0x04,1,0x80,6,6,0 2977 .long 0 2978 2979.align 5 2980_aesp8_xts_enc5x: 2981 vcipher $out0,$out0,v24 2982 vcipher $out1,$out1,v24 2983 vcipher $out2,$out2,v24 2984 vcipher $out3,$out3,v24 2985 vcipher $out4,$out4,v24 2986 lvx v24,$x20,$key_ # round[3] 2987 addi $key_,$key_,0x20 2988 2989 vcipher $out0,$out0,v25 2990 vcipher $out1,$out1,v25 2991 vcipher $out2,$out2,v25 2992 vcipher $out3,$out3,v25 2993 vcipher $out4,$out4,v25 2994 lvx v25,$x10,$key_ # round[4] 2995 bdnz _aesp8_xts_enc5x 2996 2997 add $inp,$inp,$taillen 2998 cmpwi $taillen,0 2999 vcipher $out0,$out0,v24 3000 vcipher $out1,$out1,v24 3001 vcipher $out2,$out2,v24 3002 vcipher $out3,$out3,v24 3003 vcipher $out4,$out4,v24 3004 3005 subi $inp,$inp,16 3006 vcipher $out0,$out0,v25 3007 vcipher $out1,$out1,v25 3008 vcipher $out2,$out2,v25 3009 vcipher $out3,$out3,v25 3010 vcipher $out4,$out4,v25 3011 vxor $twk0,$twk0,v31 3012 3013 vcipher $out0,$out0,v26 3014 lvsr $inpperm,0,$taillen # $in5 is no more 3015 vcipher $out1,$out1,v26 3016 vcipher $out2,$out2,v26 3017 vcipher $out3,$out3,v26 3018 vcipher $out4,$out4,v26 3019 vxor $in1,$twk1,v31 3020 3021 vcipher $out0,$out0,v27 3022 lvx_u $in0,0,$inp 3023 vcipher $out1,$out1,v27 3024 vcipher $out2,$out2,v27 3025 vcipher $out3,$out3,v27 3026 vcipher $out4,$out4,v27 3027 vxor $in2,$twk2,v31 3028 3029 addi $key_,$sp,$FRAME+15 # rewind $key_ 3030 vcipher $out0,$out0,v28 3031 vcipher $out1,$out1,v28 3032 vcipher $out2,$out2,v28 3033 vcipher $out3,$out3,v28 3034 vcipher $out4,$out4,v28 3035 lvx v24,$x00,$key_ # re-pre-load round[1] 3036 vxor $in3,$twk3,v31 3037 3038 vcipher $out0,$out0,v29 3039 le?vperm $in0,$in0,$in0,$leperm 3040 vcipher $out1,$out1,v29 3041 vcipher $out2,$out2,v29 3042 vcipher $out3,$out3,v29 3043 vcipher $out4,$out4,v29 3044 lvx v25,$x10,$key_ # re-pre-load round[2] 3045 vxor $in4,$twk4,v31 3046 3047 vcipher $out0,$out0,v30 3048 vperm $in0,$in0,$in0,$inpperm 3049 vcipher $out1,$out1,v30 3050 vcipher $out2,$out2,v30 3051 vcipher $out3,$out3,v30 3052 vcipher $out4,$out4,v30 3053 3054 vcipherlast $out0,$out0,$twk0 3055 vcipherlast $out1,$out1,$in1 3056 vcipherlast $out2,$out2,$in2 3057 vcipherlast $out3,$out3,$in3 3058 vcipherlast $out4,$out4,$in4 3059 blr 3060 .long 0 3061 .byte 0,12,0x14,0,0,0,0,0 3062 3063.align 5 3064_aesp8_xts_decrypt6x: 3065 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) 3066 mflr r11 3067 li r7,`$FRAME+8*16+15` 3068 li r3,`$FRAME+8*16+31` 3069 $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp) 3070 stvx v20,r7,$sp # ABI says so 3071 addi r7,r7,32 3072 stvx v21,r3,$sp 3073 addi r3,r3,32 3074 stvx v22,r7,$sp 3075 addi r7,r7,32 3076 stvx v23,r3,$sp 3077 addi r3,r3,32 3078 stvx v24,r7,$sp 3079 addi r7,r7,32 3080 stvx v25,r3,$sp 3081 addi r3,r3,32 3082 stvx v26,r7,$sp 3083 addi r7,r7,32 3084 stvx v27,r3,$sp 3085 addi r3,r3,32 3086 stvx v28,r7,$sp 3087 addi r7,r7,32 3088 stvx v29,r3,$sp 3089 addi r3,r3,32 3090 stvx v30,r7,$sp 3091 stvx v31,r3,$sp 3092 li r0,-1 3093 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave 3094 li $x10,0x10 3095 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) 3096 li $x20,0x20 3097 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) 3098 li $x30,0x30 3099 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) 3100 li $x40,0x40 3101 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) 3102 li $x50,0x50 3103 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) 3104 li $x60,0x60 3105 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) 3106 li $x70,0x70 3107 mtspr 256,r0 3108 3109 subi $rounds,$rounds,3 # -4 in total 3110 3111 lvx $rndkey0,$x00,$key1 # load key schedule 3112 lvx v30,$x10,$key1 3113 addi $key1,$key1,0x20 3114 lvx v31,$x00,$key1 3115 ?vperm $rndkey0,$rndkey0,v30,$keyperm 3116 addi $key_,$sp,$FRAME+15 3117 mtctr $rounds 3118 3119Load_xts_dec_key: 3120 ?vperm v24,v30,v31,$keyperm 3121 lvx v30,$x10,$key1 3122 addi $key1,$key1,0x20 3123 stvx v24,$x00,$key_ # off-load round[1] 3124 ?vperm v25,v31,v30,$keyperm 3125 lvx v31,$x00,$key1 3126 stvx v25,$x10,$key_ # off-load round[2] 3127 addi $key_,$key_,0x20 3128 bdnz Load_xts_dec_key 3129 3130 lvx v26,$x10,$key1 3131 ?vperm v24,v30,v31,$keyperm 3132 lvx v27,$x20,$key1 3133 stvx v24,$x00,$key_ # off-load round[3] 3134 ?vperm v25,v31,v26,$keyperm 3135 lvx v28,$x30,$key1 3136 stvx v25,$x10,$key_ # off-load round[4] 3137 addi $key_,$sp,$FRAME+15 # rewind $key_ 3138 ?vperm v26,v26,v27,$keyperm 3139 lvx v29,$x40,$key1 3140 ?vperm v27,v27,v28,$keyperm 3141 lvx v30,$x50,$key1 3142 ?vperm v28,v28,v29,$keyperm 3143 lvx v31,$x60,$key1 3144 ?vperm v29,v29,v30,$keyperm 3145 lvx $twk5,$x70,$key1 # borrow $twk5 3146 ?vperm v30,v30,v31,$keyperm 3147 lvx v24,$x00,$key_ # pre-load round[1] 3148 ?vperm v31,v31,$twk5,$keyperm 3149 lvx v25,$x10,$key_ # pre-load round[2] 3150 3151 vperm $in0,$inout,$inptail,$inpperm 3152 subi $inp,$inp,31 # undo "caller" 3153 vxor $twk0,$tweak,$rndkey0 3154 vsrab $tmp,$tweak,$seven # next tweak value 3155 vaddubm $tweak,$tweak,$tweak 3156 vsldoi $tmp,$tmp,$tmp,15 3157 vand $tmp,$tmp,$eighty7 3158 vxor $out0,$in0,$twk0 3159 vxor $tweak,$tweak,$tmp 3160 3161 lvx_u $in1,$x10,$inp 3162 vxor $twk1,$tweak,$rndkey0 3163 vsrab $tmp,$tweak,$seven # next tweak value 3164 vaddubm $tweak,$tweak,$tweak 3165 vsldoi $tmp,$tmp,$tmp,15 3166 le?vperm $in1,$in1,$in1,$leperm 3167 vand $tmp,$tmp,$eighty7 3168 vxor $out1,$in1,$twk1 3169 vxor $tweak,$tweak,$tmp 3170 3171 lvx_u $in2,$x20,$inp 3172 andi. $taillen,$len,15 3173 vxor $twk2,$tweak,$rndkey0 3174 vsrab $tmp,$tweak,$seven # next tweak value 3175 vaddubm $tweak,$tweak,$tweak 3176 vsldoi $tmp,$tmp,$tmp,15 3177 le?vperm $in2,$in2,$in2,$leperm 3178 vand $tmp,$tmp,$eighty7 3179 vxor $out2,$in2,$twk2 3180 vxor $tweak,$tweak,$tmp 3181 3182 lvx_u $in3,$x30,$inp 3183 sub $len,$len,$taillen 3184 vxor $twk3,$tweak,$rndkey0 3185 vsrab $tmp,$tweak,$seven # next tweak value 3186 vaddubm $tweak,$tweak,$tweak 3187 vsldoi $tmp,$tmp,$tmp,15 3188 le?vperm $in3,$in3,$in3,$leperm 3189 vand $tmp,$tmp,$eighty7 3190 vxor $out3,$in3,$twk3 3191 vxor $tweak,$tweak,$tmp 3192 3193 lvx_u $in4,$x40,$inp 3194 subi $len,$len,0x60 3195 vxor $twk4,$tweak,$rndkey0 3196 vsrab $tmp,$tweak,$seven # next tweak value 3197 vaddubm $tweak,$tweak,$tweak 3198 vsldoi $tmp,$tmp,$tmp,15 3199 le?vperm $in4,$in4,$in4,$leperm 3200 vand $tmp,$tmp,$eighty7 3201 vxor $out4,$in4,$twk4 3202 vxor $tweak,$tweak,$tmp 3203 3204 lvx_u $in5,$x50,$inp 3205 addi $inp,$inp,0x60 3206 vxor $twk5,$tweak,$rndkey0 3207 vsrab $tmp,$tweak,$seven # next tweak value 3208 vaddubm $tweak,$tweak,$tweak 3209 vsldoi $tmp,$tmp,$tmp,15 3210 le?vperm $in5,$in5,$in5,$leperm 3211 vand $tmp,$tmp,$eighty7 3212 vxor $out5,$in5,$twk5 3213 vxor $tweak,$tweak,$tmp 3214 3215 vxor v31,v31,$rndkey0 3216 mtctr $rounds 3217 b Loop_xts_dec6x 3218 3219.align 5 3220Loop_xts_dec6x: 3221 vncipher $out0,$out0,v24 3222 vncipher $out1,$out1,v24 3223 vncipher $out2,$out2,v24 3224 vncipher $out3,$out3,v24 3225 vncipher $out4,$out4,v24 3226 vncipher $out5,$out5,v24 3227 lvx v24,$x20,$key_ # round[3] 3228 addi $key_,$key_,0x20 3229 3230 vncipher $out0,$out0,v25 3231 vncipher $out1,$out1,v25 3232 vncipher $out2,$out2,v25 3233 vncipher $out3,$out3,v25 3234 vncipher $out4,$out4,v25 3235 vncipher $out5,$out5,v25 3236 lvx v25,$x10,$key_ # round[4] 3237 bdnz Loop_xts_dec6x 3238 3239 subic $len,$len,96 # $len-=96 3240 vxor $in0,$twk0,v31 # xor with last round key 3241 vncipher $out0,$out0,v24 3242 vncipher $out1,$out1,v24 3243 vsrab $tmp,$tweak,$seven # next tweak value 3244 vxor $twk0,$tweak,$rndkey0 3245 vaddubm $tweak,$tweak,$tweak 3246 vncipher $out2,$out2,v24 3247 vncipher $out3,$out3,v24 3248 vsldoi $tmp,$tmp,$tmp,15 3249 vncipher $out4,$out4,v24 3250 vncipher $out5,$out5,v24 3251 3252 subfe. r0,r0,r0 # borrow?-1:0 3253 vand $tmp,$tmp,$eighty7 3254 vncipher $out0,$out0,v25 3255 vncipher $out1,$out1,v25 3256 vxor $tweak,$tweak,$tmp 3257 vncipher $out2,$out2,v25 3258 vncipher $out3,$out3,v25 3259 vxor $in1,$twk1,v31 3260 vsrab $tmp,$tweak,$seven # next tweak value 3261 vxor $twk1,$tweak,$rndkey0 3262 vncipher $out4,$out4,v25 3263 vncipher $out5,$out5,v25 3264 3265 and r0,r0,$len 3266 vaddubm $tweak,$tweak,$tweak 3267 vsldoi $tmp,$tmp,$tmp,15 3268 vncipher $out0,$out0,v26 3269 vncipher $out1,$out1,v26 3270 vand $tmp,$tmp,$eighty7 3271 vncipher $out2,$out2,v26 3272 vncipher $out3,$out3,v26 3273 vxor $tweak,$tweak,$tmp 3274 vncipher $out4,$out4,v26 3275 vncipher $out5,$out5,v26 3276 3277 add $inp,$inp,r0 # $inp is adjusted in such 3278 # way that at exit from the 3279 # loop inX-in5 are loaded 3280 # with last "words" 3281 vxor $in2,$twk2,v31 3282 vsrab $tmp,$tweak,$seven # next tweak value 3283 vxor $twk2,$tweak,$rndkey0 3284 vaddubm $tweak,$tweak,$tweak 3285 vncipher $out0,$out0,v27 3286 vncipher $out1,$out1,v27 3287 vsldoi $tmp,$tmp,$tmp,15 3288 vncipher $out2,$out2,v27 3289 vncipher $out3,$out3,v27 3290 vand $tmp,$tmp,$eighty7 3291 vncipher $out4,$out4,v27 3292 vncipher $out5,$out5,v27 3293 3294 addi $key_,$sp,$FRAME+15 # rewind $key_ 3295 vxor $tweak,$tweak,$tmp 3296 vncipher $out0,$out0,v28 3297 vncipher $out1,$out1,v28 3298 vxor $in3,$twk3,v31 3299 vsrab $tmp,$tweak,$seven # next tweak value 3300 vxor $twk3,$tweak,$rndkey0 3301 vncipher $out2,$out2,v28 3302 vncipher $out3,$out3,v28 3303 vaddubm $tweak,$tweak,$tweak 3304 vsldoi $tmp,$tmp,$tmp,15 3305 vncipher $out4,$out4,v28 3306 vncipher $out5,$out5,v28 3307 lvx v24,$x00,$key_ # re-pre-load round[1] 3308 vand $tmp,$tmp,$eighty7 3309 3310 vncipher $out0,$out0,v29 3311 vncipher $out1,$out1,v29 3312 vxor $tweak,$tweak,$tmp 3313 vncipher $out2,$out2,v29 3314 vncipher $out3,$out3,v29 3315 vxor $in4,$twk4,v31 3316 vsrab $tmp,$tweak,$seven # next tweak value 3317 vxor $twk4,$tweak,$rndkey0 3318 vncipher $out4,$out4,v29 3319 vncipher $out5,$out5,v29 3320 lvx v25,$x10,$key_ # re-pre-load round[2] 3321 vaddubm $tweak,$tweak,$tweak 3322 vsldoi $tmp,$tmp,$tmp,15 3323 3324 vncipher $out0,$out0,v30 3325 vncipher $out1,$out1,v30 3326 vand $tmp,$tmp,$eighty7 3327 vncipher $out2,$out2,v30 3328 vncipher $out3,$out3,v30 3329 vxor $tweak,$tweak,$tmp 3330 vncipher $out4,$out4,v30 3331 vncipher $out5,$out5,v30 3332 vxor $in5,$twk5,v31 3333 vsrab $tmp,$tweak,$seven # next tweak value 3334 vxor $twk5,$tweak,$rndkey0 3335 3336 vncipherlast $out0,$out0,$in0 3337 lvx_u $in0,$x00,$inp # load next input block 3338 vaddubm $tweak,$tweak,$tweak 3339 vsldoi $tmp,$tmp,$tmp,15 3340 vncipherlast $out1,$out1,$in1 3341 lvx_u $in1,$x10,$inp 3342 vncipherlast $out2,$out2,$in2 3343 le?vperm $in0,$in0,$in0,$leperm 3344 lvx_u $in2,$x20,$inp 3345 vand $tmp,$tmp,$eighty7 3346 vncipherlast $out3,$out3,$in3 3347 le?vperm $in1,$in1,$in1,$leperm 3348 lvx_u $in3,$x30,$inp 3349 vncipherlast $out4,$out4,$in4 3350 le?vperm $in2,$in2,$in2,$leperm 3351 lvx_u $in4,$x40,$inp 3352 vxor $tweak,$tweak,$tmp 3353 vncipherlast $out5,$out5,$in5 3354 le?vperm $in3,$in3,$in3,$leperm 3355 lvx_u $in5,$x50,$inp 3356 addi $inp,$inp,0x60 3357 le?vperm $in4,$in4,$in4,$leperm 3358 le?vperm $in5,$in5,$in5,$leperm 3359 3360 le?vperm $out0,$out0,$out0,$leperm 3361 le?vperm $out1,$out1,$out1,$leperm 3362 stvx_u $out0,$x00,$out # store output 3363 vxor $out0,$in0,$twk0 3364 le?vperm $out2,$out2,$out2,$leperm 3365 stvx_u $out1,$x10,$out 3366 vxor $out1,$in1,$twk1 3367 le?vperm $out3,$out3,$out3,$leperm 3368 stvx_u $out2,$x20,$out 3369 vxor $out2,$in2,$twk2 3370 le?vperm $out4,$out4,$out4,$leperm 3371 stvx_u $out3,$x30,$out 3372 vxor $out3,$in3,$twk3 3373 le?vperm $out5,$out5,$out5,$leperm 3374 stvx_u $out4,$x40,$out 3375 vxor $out4,$in4,$twk4 3376 stvx_u $out5,$x50,$out 3377 vxor $out5,$in5,$twk5 3378 addi $out,$out,0x60 3379 3380 mtctr $rounds 3381 beq Loop_xts_dec6x # did $len-=96 borrow? 3382 3383 addic. $len,$len,0x60 3384 beq Lxts_dec6x_zero 3385 cmpwi $len,0x20 3386 blt Lxts_dec6x_one 3387 nop 3388 beq Lxts_dec6x_two 3389 cmpwi $len,0x40 3390 blt Lxts_dec6x_three 3391 nop 3392 beq Lxts_dec6x_four 3393 3394Lxts_dec6x_five: 3395 vxor $out0,$in1,$twk0 3396 vxor $out1,$in2,$twk1 3397 vxor $out2,$in3,$twk2 3398 vxor $out3,$in4,$twk3 3399 vxor $out4,$in5,$twk4 3400 3401 bl _aesp8_xts_dec5x 3402 3403 le?vperm $out0,$out0,$out0,$leperm 3404 vmr $twk0,$twk5 # unused tweak 3405 vxor $twk1,$tweak,$rndkey0 3406 le?vperm $out1,$out1,$out1,$leperm 3407 stvx_u $out0,$x00,$out # store output 3408 vxor $out0,$in0,$twk1 3409 le?vperm $out2,$out2,$out2,$leperm 3410 stvx_u $out1,$x10,$out 3411 le?vperm $out3,$out3,$out3,$leperm 3412 stvx_u $out2,$x20,$out 3413 le?vperm $out4,$out4,$out4,$leperm 3414 stvx_u $out3,$x30,$out 3415 stvx_u $out4,$x40,$out 3416 addi $out,$out,0x50 3417 bne Lxts_dec6x_steal 3418 b Lxts_dec6x_done 3419 3420.align 4 3421Lxts_dec6x_four: 3422 vxor $out0,$in2,$twk0 3423 vxor $out1,$in3,$twk1 3424 vxor $out2,$in4,$twk2 3425 vxor $out3,$in5,$twk3 3426 vxor $out4,$out4,$out4 3427 3428 bl _aesp8_xts_dec5x 3429 3430 le?vperm $out0,$out0,$out0,$leperm 3431 vmr $twk0,$twk4 # unused tweak 3432 vmr $twk1,$twk5 3433 le?vperm $out1,$out1,$out1,$leperm 3434 stvx_u $out0,$x00,$out # store output 3435 vxor $out0,$in0,$twk5 3436 le?vperm $out2,$out2,$out2,$leperm 3437 stvx_u $out1,$x10,$out 3438 le?vperm $out3,$out3,$out3,$leperm 3439 stvx_u $out2,$x20,$out 3440 stvx_u $out3,$x30,$out 3441 addi $out,$out,0x40 3442 bne Lxts_dec6x_steal 3443 b Lxts_dec6x_done 3444 3445.align 4 3446Lxts_dec6x_three: 3447 vxor $out0,$in3,$twk0 3448 vxor $out1,$in4,$twk1 3449 vxor $out2,$in5,$twk2 3450 vxor $out3,$out3,$out3 3451 vxor $out4,$out4,$out4 3452 3453 bl _aesp8_xts_dec5x 3454 3455 le?vperm $out0,$out0,$out0,$leperm 3456 vmr $twk0,$twk3 # unused tweak 3457 vmr $twk1,$twk4 3458 le?vperm $out1,$out1,$out1,$leperm 3459 stvx_u $out0,$x00,$out # store output 3460 vxor $out0,$in0,$twk4 3461 le?vperm $out2,$out2,$out2,$leperm 3462 stvx_u $out1,$x10,$out 3463 stvx_u $out2,$x20,$out 3464 addi $out,$out,0x30 3465 bne Lxts_dec6x_steal 3466 b Lxts_dec6x_done 3467 3468.align 4 3469Lxts_dec6x_two: 3470 vxor $out0,$in4,$twk0 3471 vxor $out1,$in5,$twk1 3472 vxor $out2,$out2,$out2 3473 vxor $out3,$out3,$out3 3474 vxor $out4,$out4,$out4 3475 3476 bl _aesp8_xts_dec5x 3477 3478 le?vperm $out0,$out0,$out0,$leperm 3479 vmr $twk0,$twk2 # unused tweak 3480 vmr $twk1,$twk3 3481 le?vperm $out1,$out1,$out1,$leperm 3482 stvx_u $out0,$x00,$out # store output 3483 vxor $out0,$in0,$twk3 3484 stvx_u $out1,$x10,$out 3485 addi $out,$out,0x20 3486 bne Lxts_dec6x_steal 3487 b Lxts_dec6x_done 3488 3489.align 4 3490Lxts_dec6x_one: 3491 vxor $out0,$in5,$twk0 3492 nop 3493Loop_xts_dec1x: 3494 vncipher $out0,$out0,v24 3495 lvx v24,$x20,$key_ # round[3] 3496 addi $key_,$key_,0x20 3497 3498 vncipher $out0,$out0,v25 3499 lvx v25,$x10,$key_ # round[4] 3500 bdnz Loop_xts_dec1x 3501 3502 subi r0,$taillen,1 3503 vncipher $out0,$out0,v24 3504 3505 andi. r0,r0,16 3506 cmpwi $taillen,0 3507 vncipher $out0,$out0,v25 3508 3509 sub $inp,$inp,r0 3510 vncipher $out0,$out0,v26 3511 3512 lvx_u $in0,0,$inp 3513 vncipher $out0,$out0,v27 3514 3515 addi $key_,$sp,$FRAME+15 # rewind $key_ 3516 vncipher $out0,$out0,v28 3517 lvx v24,$x00,$key_ # re-pre-load round[1] 3518 3519 vncipher $out0,$out0,v29 3520 lvx v25,$x10,$key_ # re-pre-load round[2] 3521 vxor $twk0,$twk0,v31 3522 3523 le?vperm $in0,$in0,$in0,$leperm 3524 vncipher $out0,$out0,v30 3525 3526 mtctr $rounds 3527 vncipherlast $out0,$out0,$twk0 3528 3529 vmr $twk0,$twk1 # unused tweak 3530 vmr $twk1,$twk2 3531 le?vperm $out0,$out0,$out0,$leperm 3532 stvx_u $out0,$x00,$out # store output 3533 addi $out,$out,0x10 3534 vxor $out0,$in0,$twk2 3535 bne Lxts_dec6x_steal 3536 b Lxts_dec6x_done 3537 3538.align 4 3539Lxts_dec6x_zero: 3540 cmpwi $taillen,0 3541 beq Lxts_dec6x_done 3542 3543 lvx_u $in0,0,$inp 3544 le?vperm $in0,$in0,$in0,$leperm 3545 vxor $out0,$in0,$twk1 3546Lxts_dec6x_steal: 3547 vncipher $out0,$out0,v24 3548 lvx v24,$x20,$key_ # round[3] 3549 addi $key_,$key_,0x20 3550 3551 vncipher $out0,$out0,v25 3552 lvx v25,$x10,$key_ # round[4] 3553 bdnz Lxts_dec6x_steal 3554 3555 add $inp,$inp,$taillen 3556 vncipher $out0,$out0,v24 3557 3558 cmpwi $taillen,0 3559 vncipher $out0,$out0,v25 3560 3561 lvx_u $in0,0,$inp 3562 vncipher $out0,$out0,v26 3563 3564 lvsr $inpperm,0,$taillen # $in5 is no more 3565 vncipher $out0,$out0,v27 3566 3567 addi $key_,$sp,$FRAME+15 # rewind $key_ 3568 vncipher $out0,$out0,v28 3569 lvx v24,$x00,$key_ # re-pre-load round[1] 3570 3571 vncipher $out0,$out0,v29 3572 lvx v25,$x10,$key_ # re-pre-load round[2] 3573 vxor $twk1,$twk1,v31 3574 3575 le?vperm $in0,$in0,$in0,$leperm 3576 vncipher $out0,$out0,v30 3577 3578 vperm $in0,$in0,$in0,$inpperm 3579 vncipherlast $tmp,$out0,$twk1 3580 3581 le?vperm $out0,$tmp,$tmp,$leperm 3582 le?stvx_u $out0,0,$out 3583 be?stvx_u $tmp,0,$out 3584 3585 vxor $out0,$out0,$out0 3586 vspltisb $out1,-1 3587 vperm $out0,$out0,$out1,$inpperm 3588 vsel $out0,$in0,$tmp,$out0 3589 vxor $out0,$out0,$twk0 3590 3591 subi r30,$out,1 3592 mtctr $taillen 3593Loop_xts_dec6x_steal: 3594 lbzu r0,1(r30) 3595 stb r0,16(r30) 3596 bdnz Loop_xts_dec6x_steal 3597 3598 li $taillen,0 3599 mtctr $rounds 3600 b Loop_xts_dec1x # one more time... 3601 3602.align 4 3603Lxts_dec6x_done: 3604 ${UCMP}i $ivp,0 3605 beq Lxts_dec6x_ret 3606 3607 vxor $tweak,$twk0,$rndkey0 3608 le?vperm $tweak,$tweak,$tweak,$leperm 3609 stvx_u $tweak,0,$ivp 3610 3611Lxts_dec6x_ret: 3612 mtlr r11 3613 li r10,`$FRAME+15` 3614 li r11,`$FRAME+31` 3615 stvx $seven,r10,$sp # wipe copies of round keys 3616 addi r10,r10,32 3617 stvx $seven,r11,$sp 3618 addi r11,r11,32 3619 stvx $seven,r10,$sp 3620 addi r10,r10,32 3621 stvx $seven,r11,$sp 3622 addi r11,r11,32 3623 stvx $seven,r10,$sp 3624 addi r10,r10,32 3625 stvx $seven,r11,$sp 3626 addi r11,r11,32 3627 stvx $seven,r10,$sp 3628 addi r10,r10,32 3629 stvx $seven,r11,$sp 3630 addi r11,r11,32 3631 3632 mtspr 256,$vrsave 3633 lvx v20,r10,$sp # ABI says so 3634 addi r10,r10,32 3635 lvx v21,r11,$sp 3636 addi r11,r11,32 3637 lvx v22,r10,$sp 3638 addi r10,r10,32 3639 lvx v23,r11,$sp 3640 addi r11,r11,32 3641 lvx v24,r10,$sp 3642 addi r10,r10,32 3643 lvx v25,r11,$sp 3644 addi r11,r11,32 3645 lvx v26,r10,$sp 3646 addi r10,r10,32 3647 lvx v27,r11,$sp 3648 addi r11,r11,32 3649 lvx v28,r10,$sp 3650 addi r10,r10,32 3651 lvx v29,r11,$sp 3652 addi r11,r11,32 3653 lvx v30,r10,$sp 3654 lvx v31,r11,$sp 3655 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) 3656 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) 3657 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) 3658 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) 3659 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) 3660 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) 3661 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` 3662 blr 3663 .long 0 3664 .byte 0,12,0x04,1,0x80,6,6,0 3665 .long 0 3666 3667.align 5 3668_aesp8_xts_dec5x: 3669 vncipher $out0,$out0,v24 3670 vncipher $out1,$out1,v24 3671 vncipher $out2,$out2,v24 3672 vncipher $out3,$out3,v24 3673 vncipher $out4,$out4,v24 3674 lvx v24,$x20,$key_ # round[3] 3675 addi $key_,$key_,0x20 3676 3677 vncipher $out0,$out0,v25 3678 vncipher $out1,$out1,v25 3679 vncipher $out2,$out2,v25 3680 vncipher $out3,$out3,v25 3681 vncipher $out4,$out4,v25 3682 lvx v25,$x10,$key_ # round[4] 3683 bdnz _aesp8_xts_dec5x 3684 3685 subi r0,$taillen,1 3686 vncipher $out0,$out0,v24 3687 vncipher $out1,$out1,v24 3688 vncipher $out2,$out2,v24 3689 vncipher $out3,$out3,v24 3690 vncipher $out4,$out4,v24 3691 3692 andi. r0,r0,16 3693 cmpwi $taillen,0 3694 vncipher $out0,$out0,v25 3695 vncipher $out1,$out1,v25 3696 vncipher $out2,$out2,v25 3697 vncipher $out3,$out3,v25 3698 vncipher $out4,$out4,v25 3699 vxor $twk0,$twk0,v31 3700 3701 sub $inp,$inp,r0 3702 vncipher $out0,$out0,v26 3703 vncipher $out1,$out1,v26 3704 vncipher $out2,$out2,v26 3705 vncipher $out3,$out3,v26 3706 vncipher $out4,$out4,v26 3707 vxor $in1,$twk1,v31 3708 3709 vncipher $out0,$out0,v27 3710 lvx_u $in0,0,$inp 3711 vncipher $out1,$out1,v27 3712 vncipher $out2,$out2,v27 3713 vncipher $out3,$out3,v27 3714 vncipher $out4,$out4,v27 3715 vxor $in2,$twk2,v31 3716 3717 addi $key_,$sp,$FRAME+15 # rewind $key_ 3718 vncipher $out0,$out0,v28 3719 vncipher $out1,$out1,v28 3720 vncipher $out2,$out2,v28 3721 vncipher $out3,$out3,v28 3722 vncipher $out4,$out4,v28 3723 lvx v24,$x00,$key_ # re-pre-load round[1] 3724 vxor $in3,$twk3,v31 3725 3726 vncipher $out0,$out0,v29 3727 le?vperm $in0,$in0,$in0,$leperm 3728 vncipher $out1,$out1,v29 3729 vncipher $out2,$out2,v29 3730 vncipher $out3,$out3,v29 3731 vncipher $out4,$out4,v29 3732 lvx v25,$x10,$key_ # re-pre-load round[2] 3733 vxor $in4,$twk4,v31 3734 3735 vncipher $out0,$out0,v30 3736 vncipher $out1,$out1,v30 3737 vncipher $out2,$out2,v30 3738 vncipher $out3,$out3,v30 3739 vncipher $out4,$out4,v30 3740 3741 vncipherlast $out0,$out0,$twk0 3742 vncipherlast $out1,$out1,$in1 3743 vncipherlast $out2,$out2,$in2 3744 vncipherlast $out3,$out3,$in3 3745 vncipherlast $out4,$out4,$in4 3746 mtctr $rounds 3747 blr 3748 .long 0 3749 .byte 0,12,0x14,0,0,0,0,0 3750___ 3751}} }}} 3752 3753my $consts=1; 3754foreach(split("\n",$code)) { 3755 s/\`([^\`]*)\`/eval($1)/geo; 3756 3757 # constants table endian-specific conversion 3758 if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) { 3759 my $conv=$3; 3760 my @bytes=(); 3761 3762 # convert to endian-agnostic format 3763 if ($1 eq "long") { 3764 foreach (split(/,\s*/,$2)) { 3765 my $l = /^0/?oct:int; 3766 push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff; 3767 } 3768 } else { 3769 @bytes = map(/^0/?oct:int,split(/,\s*/,$2)); 3770 } 3771 3772 # little-endian conversion 3773 if ($flavour =~ /le$/o) { 3774 SWITCH: for($conv) { 3775 /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; }; 3776 /\?rev/ && do { @bytes=reverse(@bytes); last; }; 3777 } 3778 } 3779 3780 #emit 3781 print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n"; 3782 next; 3783 } 3784 $consts=0 if (m/Lconsts:/o); # end of table 3785 3786 # instructions prefixed with '?' are endian-specific and need 3787 # to be adjusted accordingly... 3788 if ($flavour =~ /le$/o) { # little-endian 3789 s/le\?//o or 3790 s/be\?/#be#/o or 3791 s/\?lvsr/lvsl/o or 3792 s/\?lvsl/lvsr/o or 3793 s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or 3794 s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or 3795 s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o; 3796 } else { # big-endian 3797 s/le\?/#le#/o or 3798 s/be\?//o or 3799 s/\?([a-z]+)/$1/o; 3800 } 3801 3802 print $_,"\n"; 3803} 3804 3805close STDOUT; 3806