1#! /usr/bin/env perl 2# Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved. 3# 4# Licensed under the OpenSSL license (the "License"). You may not use 5# this file except in compliance with the License. You can obtain a copy 6# in the file LICENSE in the source distribution or at 7# https://www.openssl.org/source/license.html 8 9# 10# ==================================================================== 11# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 12# project. The module is, however, dual licensed under OpenSSL and 13# CRYPTOGAMS licenses depending on where you obtain it. For further 14# details see http://www.openssl.org/~appro/cryptogams/. 15# ==================================================================== 16# 17# This module implements support for ARMv8 AES instructions. The 18# module is endian-agnostic in sense that it supports both big- and 19# little-endian cases. As does it support both 32- and 64-bit modes 20# of operation. Latter is achieved by limiting amount of utilized 21# registers to 16, which implies additional NEON load and integer 22# instructions. This has no effect on mighty Apple A7, where results 23# are literally equal to the theoretical estimates based on AES 24# instruction latencies and issue rates. On Cortex-A53, an in-order 25# execution core, this costs up to 10-15%, which is partially 26# compensated by implementing dedicated code path for 128-bit 27# CBC encrypt case. On Cortex-A57 parallelizable mode performance 28# seems to be limited by sheer amount of NEON instructions... 29# 30# Performance in cycles per byte processed with 128-bit key: 31# 32# CBC enc CBC dec CTR 33# Apple A7 2.39 1.20 1.20 34# Cortex-A53 1.32 1.29 1.46 35# Cortex-A57(*) 1.95 0.85 0.93 36# Denver 1.96 0.86 0.80 37# Mongoose 1.33 1.20 1.20 38# 39# (*) original 3.64/1.34/1.32 results were for r0p0 revision 40# and are still same even for updated module; 41 42$flavour = shift; 43$output = shift; 44 45$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 46( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or 47( $xlate="${dir}../../../perlasm/arm-xlate.pl" and -f $xlate) or 48die "can't locate arm-xlate.pl"; 49 50open OUT,"| \"$^X\" $xlate $flavour $output"; 51*STDOUT=*OUT; 52 53$prefix="aes_hw"; 54 55$code=<<___; 56#include <GFp/arm_arch.h> 57 58#if __ARM_MAX_ARCH__>=7 59.text 60___ 61$code.=".arch armv8-a+crypto\n" if ($flavour =~ /64/); 62$code.=<<___ if ($flavour !~ /64/); 63.arch armv7-a // don't confuse not-so-latest binutils with argv8 :-) 64.fpu neon 65.code 32 66#undef __thumb2__ 67___ 68 69# Assembler mnemonics are an eclectic mix of 32- and 64-bit syntax, 70# NEON is mostly 32-bit mnemonics, integer - mostly 64. Goal is to 71# maintain both 32- and 64-bit codes within single module and 72# transliterate common code to either flavour with regex vodoo. 73# 74{{{ 75my ($inp,$bits,$out,$ptr,$rounds)=("x0","w1","x2","x3","w12"); 76my ($zero,$rcon,$mask,$in0,$in1,$tmp,$key)= 77 $flavour=~/64/? map("q$_",(0..6)) : map("q$_",(0..3,8..10)); 78 79 80# On AArch64, put the data .rodata and use adrp + add for compatibility with 81# execute-only memory. On AArch32, put it in .text and use adr. 82$code.= ".section .rodata\n" if ($flavour =~ /64/); 83$code.=<<___; 84.align 5 85.Lrcon: 86.long 0x01,0x01,0x01,0x01 87.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat 88.long 0x1b,0x1b,0x1b,0x1b 89 90.text 91 92.globl GFp_${prefix}_set_encrypt_key 93.type GFp_${prefix}_set_encrypt_key,%function 94.align 5 95GFp_${prefix}_set_encrypt_key: 96.Lenc_key: 97___ 98$code.=<<___ if ($flavour =~ /64/); 99 // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. 100 AARCH64_VALID_CALL_TARGET 101 stp x29,x30,[sp,#-16]! 102 add x29,sp,#0 103___ 104$code.=<<___; 105 mov $ptr,#-1 106 cmp $inp,#0 107 b.eq .Lenc_key_abort 108 cmp $out,#0 109 b.eq .Lenc_key_abort 110 mov $ptr,#-2 111 cmp $bits,#128 112 b.lt .Lenc_key_abort 113 cmp $bits,#256 114 b.gt .Lenc_key_abort 115 tst $bits,#0x3f 116 b.ne .Lenc_key_abort 117 118___ 119$code.=<<___ if ($flavour =~ /64/); 120 adrp $ptr,:pg_hi21:.Lrcon 121 add $ptr,$ptr,:lo12:.Lrcon 122___ 123$code.=<<___ if ($flavour !~ /64/); 124 adr $ptr,.Lrcon 125___ 126$code.=<<___; 127 cmp $bits,#192 128 129 veor $zero,$zero,$zero 130 vld1.8 {$in0},[$inp],#16 131 mov $bits,#8 // reuse $bits 132 vld1.32 {$rcon,$mask},[$ptr],#32 133 134 b.lt .Loop128 135 // 192-bit key support was removed. 136 b .L256 137 138.align 4 139.Loop128: 140 vtbl.8 $key,{$in0},$mask 141 vext.8 $tmp,$zero,$in0,#12 142 vst1.32 {$in0},[$out],#16 143 aese $key,$zero 144 subs $bits,$bits,#1 145 146 veor $in0,$in0,$tmp 147 vext.8 $tmp,$zero,$tmp,#12 148 veor $in0,$in0,$tmp 149 vext.8 $tmp,$zero,$tmp,#12 150 veor $key,$key,$rcon 151 veor $in0,$in0,$tmp 152 vshl.u8 $rcon,$rcon,#1 153 veor $in0,$in0,$key 154 b.ne .Loop128 155 156 vld1.32 {$rcon},[$ptr] 157 158 vtbl.8 $key,{$in0},$mask 159 vext.8 $tmp,$zero,$in0,#12 160 vst1.32 {$in0},[$out],#16 161 aese $key,$zero 162 163 veor $in0,$in0,$tmp 164 vext.8 $tmp,$zero,$tmp,#12 165 veor $in0,$in0,$tmp 166 vext.8 $tmp,$zero,$tmp,#12 167 veor $key,$key,$rcon 168 veor $in0,$in0,$tmp 169 vshl.u8 $rcon,$rcon,#1 170 veor $in0,$in0,$key 171 172 vtbl.8 $key,{$in0},$mask 173 vext.8 $tmp,$zero,$in0,#12 174 vst1.32 {$in0},[$out],#16 175 aese $key,$zero 176 177 veor $in0,$in0,$tmp 178 vext.8 $tmp,$zero,$tmp,#12 179 veor $in0,$in0,$tmp 180 vext.8 $tmp,$zero,$tmp,#12 181 veor $key,$key,$rcon 182 veor $in0,$in0,$tmp 183 veor $in0,$in0,$key 184 vst1.32 {$in0},[$out] 185 add $out,$out,#0x50 186 187 mov $rounds,#10 188 b .Ldone 189 190// 192-bit key support was removed. 191 192.align 4 193.L256: 194 vld1.8 {$in1},[$inp] 195 mov $bits,#7 196 mov $rounds,#14 197 vst1.32 {$in0},[$out],#16 198 199.Loop256: 200 vtbl.8 $key,{$in1},$mask 201 vext.8 $tmp,$zero,$in0,#12 202 vst1.32 {$in1},[$out],#16 203 aese $key,$zero 204 subs $bits,$bits,#1 205 206 veor $in0,$in0,$tmp 207 vext.8 $tmp,$zero,$tmp,#12 208 veor $in0,$in0,$tmp 209 vext.8 $tmp,$zero,$tmp,#12 210 veor $key,$key,$rcon 211 veor $in0,$in0,$tmp 212 vshl.u8 $rcon,$rcon,#1 213 veor $in0,$in0,$key 214 vst1.32 {$in0},[$out],#16 215 b.eq .Ldone 216 217 vdup.32 $key,${in0}[3] // just splat 218 vext.8 $tmp,$zero,$in1,#12 219 aese $key,$zero 220 221 veor $in1,$in1,$tmp 222 vext.8 $tmp,$zero,$tmp,#12 223 veor $in1,$in1,$tmp 224 vext.8 $tmp,$zero,$tmp,#12 225 veor $in1,$in1,$tmp 226 227 veor $in1,$in1,$key 228 b .Loop256 229 230.Ldone: 231 str $rounds,[$out] 232 mov $ptr,#0 233 234.Lenc_key_abort: 235 mov x0,$ptr // return value 236 `"ldr x29,[sp],#16" if ($flavour =~ /64/)` 237 ret 238.size GFp_${prefix}_set_encrypt_key,.-GFp_${prefix}_set_encrypt_key 239___ 240}}} 241{{{ 242sub gen_block () { 243my $dir = shift; 244my ($e,$mc) = $dir eq "en" ? ("e","mc") : ("d","imc"); 245my ($inp,$out,$key)=map("x$_",(0..2)); 246my $rounds="w3"; 247my ($rndkey0,$rndkey1,$inout)=map("q$_",(0..3)); 248 249$code.=<<___; 250.globl GFp_${prefix}_${dir}crypt 251.type GFp_${prefix}_${dir}crypt,%function 252.align 5 253GFp_${prefix}_${dir}crypt: 254 AARCH64_VALID_CALL_TARGET 255 ldr $rounds,[$key,#240] 256 vld1.32 {$rndkey0},[$key],#16 257 vld1.8 {$inout},[$inp] 258 sub $rounds,$rounds,#2 259 vld1.32 {$rndkey1},[$key],#16 260 261.Loop_${dir}c: 262 aes$e $inout,$rndkey0 263 aes$mc $inout,$inout 264 vld1.32 {$rndkey0},[$key],#16 265 subs $rounds,$rounds,#2 266 aes$e $inout,$rndkey1 267 aes$mc $inout,$inout 268 vld1.32 {$rndkey1},[$key],#16 269 b.gt .Loop_${dir}c 270 271 aes$e $inout,$rndkey0 272 aes$mc $inout,$inout 273 vld1.32 {$rndkey0},[$key] 274 aes$e $inout,$rndkey1 275 veor $inout,$inout,$rndkey0 276 277 vst1.8 {$inout},[$out] 278 ret 279.size GFp_${prefix}_${dir}crypt,.-GFp_${prefix}_${dir}crypt 280___ 281} 282&gen_block("en"); 283&gen_block("de"); 284}}} 285{{{ 286my ($inp,$out,$len,$key,$ivp)=map("x$_",(0..4)); 287my ($rounds,$cnt,$key_)=("w5","w6","x7"); 288my ($ctr,$tctr0,$tctr1,$tctr2)=map("w$_",(8..10,12)); 289my $step="x12"; # aliases with $tctr2 290 291my ($dat0,$dat1,$in0,$in1,$tmp0,$tmp1,$ivec,$rndlast)=map("q$_",(0..7)); 292my ($dat2,$in2,$tmp2)=map("q$_",(10,11,9)); 293 294my ($dat,$tmp)=($dat0,$tmp0); 295 296### q8-q15 preloaded key schedule 297 298$code.=<<___; 299.globl GFp_${prefix}_ctr32_encrypt_blocks 300.type GFp_${prefix}_ctr32_encrypt_blocks,%function 301.align 5 302GFp_${prefix}_ctr32_encrypt_blocks: 303___ 304$code.=<<___ if ($flavour =~ /64/); 305 // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. 306 AARCH64_VALID_CALL_TARGET 307 stp x29,x30,[sp,#-16]! 308 add x29,sp,#0 309___ 310$code.=<<___ if ($flavour !~ /64/); 311 mov ip,sp 312 stmdb sp!,{r4-r10,lr} 313 vstmdb sp!,{d8-d15} @ ABI specification says so 314 ldr r4, [ip] @ load remaining arg 315___ 316$code.=<<___; 317 ldr $rounds,[$key,#240] 318 319 ldr $ctr, [$ivp, #12] 320 vld1.32 {$dat0},[$ivp] 321 322 vld1.32 {q8-q9},[$key] // load key schedule... 323 sub $rounds,$rounds,#4 324 mov $step,#16 325 cmp $len,#2 326 add $key_,$key,x5,lsl#4 // pointer to last 5 round keys 327 sub $rounds,$rounds,#2 328 vld1.32 {q12-q13},[$key_],#32 329 vld1.32 {q14-q15},[$key_],#32 330 vld1.32 {$rndlast},[$key_] 331 add $key_,$key,#32 332 mov $cnt,$rounds 333 cclr $step,lo 334 335 // ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are 336 // affected by silicon errata #1742098 [0] and #1655431 [1], 337 // respectively, where the second instruction of an aese/aesmc 338 // instruction pair may execute twice if an interrupt is taken right 339 // after the first instruction consumes an input register of which a 340 // single 32-bit lane has been updated the last time it was modified. 341 // 342 // This function uses a counter in one 32-bit lane. The vmov.32 lines 343 // could write to $dat1 and $dat2 directly, but that trips this bugs. 344 // We write to $ivec and copy to the final register as a workaround. 345 // 346 // [0] ARM-EPM-049219 v23 Cortex-A57 MPCore Software Developers Errata Notice 347 // [1] ARM-EPM-012079 v11.0 Cortex-A72 MPCore Software Developers Errata Notice 348#ifndef __ARMEB__ 349 rev $ctr, $ctr 350#endif 351 add $tctr1, $ctr, #1 352 vorr $ivec,$dat0,$dat0 353 rev $tctr1, $tctr1 354 vmov.32 ${ivec}[3],$tctr1 355 add $ctr, $ctr, #2 356 vorr $dat1,$ivec,$ivec 357 b.ls .Lctr32_tail 358 rev $tctr2, $ctr 359 vmov.32 ${ivec}[3],$tctr2 360 sub $len,$len,#3 // bias 361 vorr $dat2,$ivec,$ivec 362 b .Loop3x_ctr32 363 364.align 4 365.Loop3x_ctr32: 366 aese $dat0,q8 367 aesmc $dat0,$dat0 368 aese $dat1,q8 369 aesmc $dat1,$dat1 370 aese $dat2,q8 371 aesmc $dat2,$dat2 372 vld1.32 {q8},[$key_],#16 373 subs $cnt,$cnt,#2 374 aese $dat0,q9 375 aesmc $dat0,$dat0 376 aese $dat1,q9 377 aesmc $dat1,$dat1 378 aese $dat2,q9 379 aesmc $dat2,$dat2 380 vld1.32 {q9},[$key_],#16 381 b.gt .Loop3x_ctr32 382 383 aese $dat0,q8 384 aesmc $tmp0,$dat0 385 aese $dat1,q8 386 aesmc $tmp1,$dat1 387 vld1.8 {$in0},[$inp],#16 388 add $tctr0,$ctr,#1 389 aese $dat2,q8 390 aesmc $dat2,$dat2 391 vld1.8 {$in1},[$inp],#16 392 rev $tctr0,$tctr0 393 aese $tmp0,q9 394 aesmc $tmp0,$tmp0 395 aese $tmp1,q9 396 aesmc $tmp1,$tmp1 397 vld1.8 {$in2},[$inp],#16 398 mov $key_,$key 399 aese $dat2,q9 400 aesmc $tmp2,$dat2 401 aese $tmp0,q12 402 aesmc $tmp0,$tmp0 403 aese $tmp1,q12 404 aesmc $tmp1,$tmp1 405 veor $in0,$in0,$rndlast 406 add $tctr1,$ctr,#2 407 aese $tmp2,q12 408 aesmc $tmp2,$tmp2 409 veor $in1,$in1,$rndlast 410 add $ctr,$ctr,#3 411 aese $tmp0,q13 412 aesmc $tmp0,$tmp0 413 aese $tmp1,q13 414 aesmc $tmp1,$tmp1 415 // Note the logic to update $dat0, $dat1, and $dat1 is written to work 416 // around a bug in ARM Cortex-A57 and Cortex-A72 cores running in 417 // 32-bit mode. See the comment above. 418 veor $in2,$in2,$rndlast 419 vmov.32 ${ivec}[3], $tctr0 420 aese $tmp2,q13 421 aesmc $tmp2,$tmp2 422 vorr $dat0,$ivec,$ivec 423 rev $tctr1,$tctr1 424 aese $tmp0,q14 425 aesmc $tmp0,$tmp0 426 vmov.32 ${ivec}[3], $tctr1 427 rev $tctr2,$ctr 428 aese $tmp1,q14 429 aesmc $tmp1,$tmp1 430 vorr $dat1,$ivec,$ivec 431 vmov.32 ${ivec}[3], $tctr2 432 aese $tmp2,q14 433 aesmc $tmp2,$tmp2 434 vorr $dat2,$ivec,$ivec 435 subs $len,$len,#3 436 aese $tmp0,q15 437 aese $tmp1,q15 438 aese $tmp2,q15 439 440 veor $in0,$in0,$tmp0 441 vld1.32 {q8},[$key_],#16 // re-pre-load rndkey[0] 442 vst1.8 {$in0},[$out],#16 443 veor $in1,$in1,$tmp1 444 mov $cnt,$rounds 445 vst1.8 {$in1},[$out],#16 446 veor $in2,$in2,$tmp2 447 vld1.32 {q9},[$key_],#16 // re-pre-load rndkey[1] 448 vst1.8 {$in2},[$out],#16 449 b.hs .Loop3x_ctr32 450 451 adds $len,$len,#3 452 b.eq .Lctr32_done 453 cmp $len,#1 454 mov $step,#16 455 cclr $step,eq 456 457.Lctr32_tail: 458 aese $dat0,q8 459 aesmc $dat0,$dat0 460 aese $dat1,q8 461 aesmc $dat1,$dat1 462 vld1.32 {q8},[$key_],#16 463 subs $cnt,$cnt,#2 464 aese $dat0,q9 465 aesmc $dat0,$dat0 466 aese $dat1,q9 467 aesmc $dat1,$dat1 468 vld1.32 {q9},[$key_],#16 469 b.gt .Lctr32_tail 470 471 aese $dat0,q8 472 aesmc $dat0,$dat0 473 aese $dat1,q8 474 aesmc $dat1,$dat1 475 aese $dat0,q9 476 aesmc $dat0,$dat0 477 aese $dat1,q9 478 aesmc $dat1,$dat1 479 vld1.8 {$in0},[$inp],$step 480 aese $dat0,q12 481 aesmc $dat0,$dat0 482 aese $dat1,q12 483 aesmc $dat1,$dat1 484 vld1.8 {$in1},[$inp] 485 aese $dat0,q13 486 aesmc $dat0,$dat0 487 aese $dat1,q13 488 aesmc $dat1,$dat1 489 veor $in0,$in0,$rndlast 490 aese $dat0,q14 491 aesmc $dat0,$dat0 492 aese $dat1,q14 493 aesmc $dat1,$dat1 494 veor $in1,$in1,$rndlast 495 aese $dat0,q15 496 aese $dat1,q15 497 498 cmp $len,#1 499 veor $in0,$in0,$dat0 500 veor $in1,$in1,$dat1 501 vst1.8 {$in0},[$out],#16 502 b.eq .Lctr32_done 503 vst1.8 {$in1},[$out] 504 505.Lctr32_done: 506___ 507$code.=<<___ if ($flavour !~ /64/); 508 vldmia sp!,{d8-d15} 509 ldmia sp!,{r4-r10,pc} 510___ 511$code.=<<___ if ($flavour =~ /64/); 512 ldr x29,[sp],#16 513 ret 514___ 515$code.=<<___; 516.size GFp_${prefix}_ctr32_encrypt_blocks,.-GFp_${prefix}_ctr32_encrypt_blocks 517___ 518}}} 519$code.=<<___; 520#endif 521___ 522######################################## 523if ($flavour =~ /64/) { ######## 64-bit code 524 my %opcode = ( 525 "aesd" => 0x4e285800, "aese" => 0x4e284800, 526 "aesimc"=> 0x4e287800, "aesmc" => 0x4e286800 ); 527 528 local *unaes = sub { 529 my ($mnemonic,$arg)=@_; 530 531 $arg =~ m/[qv]([0-9]+)[^,]*,\s*[qv]([0-9]+)/o && 532 sprintf ".inst\t0x%08x\t//%s %s", 533 $opcode{$mnemonic}|$1|($2<<5), 534 $mnemonic,$arg; 535 }; 536 537 foreach(split("\n",$code)) { 538 s/\`([^\`]*)\`/eval($1)/geo; 539 540 s/\bq([0-9]+)\b/"v".($1<8?$1:$1+8).".16b"/geo; # old->new registers 541 s/@\s/\/\//o; # old->new style commentary 542 543 #s/[v]?(aes\w+)\s+([qv].*)/unaes($1,$2)/geo or 544 s/cclr\s+([wx])([^,]+),\s*([a-z]+)/csel $1$2,$1zr,$1$2,$3/o or 545 s/mov\.([a-z]+)\s+([wx][0-9]+),\s*([wx][0-9]+)/csel $2,$3,$2,$1/o or 546 s/vmov\.i8/movi/o or # fix up legacy mnemonics 547 s/vext\.8/ext/o or 548 s/vrev32\.8/rev32/o or 549 s/vtst\.8/cmtst/o or 550 s/vshr/ushr/o or 551 s/^(\s+)v/$1/o or # strip off v prefix 552 s/\bbx\s+lr\b/ret/o; 553 554 # fix up remaining legacy suffixes 555 s/\.[ui]?8//o; 556 m/\],#8/o and s/\.16b/\.8b/go; 557 s/\.[ui]?32//o and s/\.16b/\.4s/go; 558 s/\.[ui]?64//o and s/\.16b/\.2d/go; 559 s/\.[42]([sd])\[([0-3])\]/\.$1\[$2\]/o; 560 561 print $_,"\n"; 562 } 563} else { ######## 32-bit code 564 my %opcode = ( 565 "aesd" => 0xf3b00340, "aese" => 0xf3b00300, 566 "aesimc"=> 0xf3b003c0, "aesmc" => 0xf3b00380 ); 567 568 local *unaes = sub { 569 my ($mnemonic,$arg)=@_; 570 571 if ($arg =~ m/[qv]([0-9]+)[^,]*,\s*[qv]([0-9]+)/o) { 572 my $word = $opcode{$mnemonic}|(($1&7)<<13)|(($1&8)<<19) 573 |(($2&7)<<1) |(($2&8)<<2); 574 # since ARMv7 instructions are always encoded little-endian. 575 # correct solution is to use .inst directive, but older 576 # assemblers don't implement it:-( 577 sprintf ".byte\t0x%02x,0x%02x,0x%02x,0x%02x\t@ %s %s", 578 $word&0xff,($word>>8)&0xff, 579 ($word>>16)&0xff,($word>>24)&0xff, 580 $mnemonic,$arg; 581 } 582 }; 583 584 sub unvtbl { 585 my $arg=shift; 586 587 $arg =~ m/q([0-9]+),\s*\{q([0-9]+)\},\s*q([0-9]+)/o && 588 sprintf "vtbl.8 d%d,{q%d},d%d\n\t". 589 "vtbl.8 d%d,{q%d},d%d", 2*$1,$2,2*$3, 2*$1+1,$2,2*$3+1; 590 } 591 592 sub unvdup32 { 593 my $arg=shift; 594 595 $arg =~ m/q([0-9]+),\s*q([0-9]+)\[([0-3])\]/o && 596 sprintf "vdup.32 q%d,d%d[%d]",$1,2*$2+($3>>1),$3&1; 597 } 598 599 sub unvmov32 { 600 my $arg=shift; 601 602 $arg =~ m/q([0-9]+)\[([0-3])\],(.*)/o && 603 sprintf "vmov.32 d%d[%d],%s",2*$1+($2>>1),$2&1,$3; 604 } 605 606 foreach(split("\n",$code)) { 607 s/\`([^\`]*)\`/eval($1)/geo; 608 609 s/\b[wx]([0-9]+)\b/r$1/go; # new->old registers 610 s/\bv([0-9])\.[12468]+[bsd]\b/q$1/go; # new->old registers 611 s/\/\/\s?/@ /o; # new->old style commentary 612 613 # fix up remaining new-style suffixes 614 s/\{q([0-9]+)\},\s*\[(.+)\],#8/sprintf "{d%d},[$2]!",2*$1/eo or 615 s/\],#[0-9]+/]!/o; 616 617 s/[v]?(aes\w+)\s+([qv].*)/unaes($1,$2)/geo or 618 s/cclr\s+([^,]+),\s*([a-z]+)/mov$2 $1,#0/o or 619 s/vtbl\.8\s+(.*)/unvtbl($1)/geo or 620 s/vdup\.32\s+(.*)/unvdup32($1)/geo or 621 s/vmov\.32\s+(.*)/unvmov32($1)/geo or 622 s/^(\s+)b\./$1b/o or 623 s/^(\s+)mov\./$1mov/o or 624 s/^(\s+)ret/$1bx\tlr/o; 625 626 print $_,"\n"; 627 } 628} 629 630close STDOUT or die "error closing STDOUT"; 631