1// This file is generated from a similarly-named Perl script in the BoringSSL 2// source tree. Do not edit by hand. 3 4#if !defined(__has_feature) 5#define __has_feature(x) 0 6#endif 7#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) 8#define OPENSSL_NO_ASM 9#endif 10 11#if !defined(OPENSSL_NO_ASM) 12#if defined(BORINGSSL_PREFIX) 13#include <boringssl_prefix_symbols_asm.h> 14#endif 15#include <openssl/arm_arch.h> 16 17#if __ARM_MAX_ARCH__>=7 18.text 19 20.section __TEXT,__const 21.align 5 22Lrcon: 23.long 0x01,0x01,0x01,0x01 24.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat 25.long 0x1b,0x1b,0x1b,0x1b 26 27.text 28 29.globl _aes_hw_set_encrypt_key 30.private_extern _aes_hw_set_encrypt_key 31 32.align 5 33_aes_hw_set_encrypt_key: 34Lenc_key: 35 // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. 36 AARCH64_VALID_CALL_TARGET 37 stp x29,x30,[sp,#-16]! 38 add x29,sp,#0 39 mov x3,#-1 40 cmp x0,#0 41 b.eq Lenc_key_abort 42 cmp x2,#0 43 b.eq Lenc_key_abort 44 mov x3,#-2 45 cmp w1,#128 46 b.lt Lenc_key_abort 47 cmp w1,#256 48 b.gt Lenc_key_abort 49 tst w1,#0x3f 50 b.ne Lenc_key_abort 51 52 adrp x3,Lrcon@PAGE 53 add x3,x3,Lrcon@PAGEOFF 54 cmp w1,#192 55 56 eor v0.16b,v0.16b,v0.16b 57 ld1 {v3.16b},[x0],#16 58 mov w1,#8 // reuse w1 59 ld1 {v1.4s,v2.4s},[x3],#32 60 61 b.lt Loop128 62 b.eq L192 63 b L256 64 65.align 4 66Loop128: 67 tbl v6.16b,{v3.16b},v2.16b 68 ext v5.16b,v0.16b,v3.16b,#12 69 st1 {v3.4s},[x2],#16 70 aese v6.16b,v0.16b 71 subs w1,w1,#1 72 73 eor v3.16b,v3.16b,v5.16b 74 ext v5.16b,v0.16b,v5.16b,#12 75 eor v3.16b,v3.16b,v5.16b 76 ext v5.16b,v0.16b,v5.16b,#12 77 eor v6.16b,v6.16b,v1.16b 78 eor v3.16b,v3.16b,v5.16b 79 shl v1.16b,v1.16b,#1 80 eor v3.16b,v3.16b,v6.16b 81 b.ne Loop128 82 83 ld1 {v1.4s},[x3] 84 85 tbl v6.16b,{v3.16b},v2.16b 86 ext v5.16b,v0.16b,v3.16b,#12 87 st1 {v3.4s},[x2],#16 88 aese v6.16b,v0.16b 89 90 eor v3.16b,v3.16b,v5.16b 91 ext v5.16b,v0.16b,v5.16b,#12 92 eor v3.16b,v3.16b,v5.16b 93 ext v5.16b,v0.16b,v5.16b,#12 94 eor v6.16b,v6.16b,v1.16b 95 eor v3.16b,v3.16b,v5.16b 96 shl v1.16b,v1.16b,#1 97 eor v3.16b,v3.16b,v6.16b 98 99 tbl v6.16b,{v3.16b},v2.16b 100 ext v5.16b,v0.16b,v3.16b,#12 101 st1 {v3.4s},[x2],#16 102 aese v6.16b,v0.16b 103 104 eor v3.16b,v3.16b,v5.16b 105 ext v5.16b,v0.16b,v5.16b,#12 106 eor v3.16b,v3.16b,v5.16b 107 ext v5.16b,v0.16b,v5.16b,#12 108 eor v6.16b,v6.16b,v1.16b 109 eor v3.16b,v3.16b,v5.16b 110 eor v3.16b,v3.16b,v6.16b 111 st1 {v3.4s},[x2] 112 add x2,x2,#0x50 113 114 mov w12,#10 115 b Ldone 116 117.align 4 118L192: 119 ld1 {v4.8b},[x0],#8 120 movi v6.16b,#8 // borrow v6.16b 121 st1 {v3.4s},[x2],#16 122 sub v2.16b,v2.16b,v6.16b // adjust the mask 123 124Loop192: 125 tbl v6.16b,{v4.16b},v2.16b 126 ext v5.16b,v0.16b,v3.16b,#12 127 st1 {v4.8b},[x2],#8 128 aese v6.16b,v0.16b 129 subs w1,w1,#1 130 131 eor v3.16b,v3.16b,v5.16b 132 ext v5.16b,v0.16b,v5.16b,#12 133 eor v3.16b,v3.16b,v5.16b 134 ext v5.16b,v0.16b,v5.16b,#12 135 eor v3.16b,v3.16b,v5.16b 136 137 dup v5.4s,v3.s[3] 138 eor v5.16b,v5.16b,v4.16b 139 eor v6.16b,v6.16b,v1.16b 140 ext v4.16b,v0.16b,v4.16b,#12 141 shl v1.16b,v1.16b,#1 142 eor v4.16b,v4.16b,v5.16b 143 eor v3.16b,v3.16b,v6.16b 144 eor v4.16b,v4.16b,v6.16b 145 st1 {v3.4s},[x2],#16 146 b.ne Loop192 147 148 mov w12,#12 149 add x2,x2,#0x20 150 b Ldone 151 152.align 4 153L256: 154 ld1 {v4.16b},[x0] 155 mov w1,#7 156 mov w12,#14 157 st1 {v3.4s},[x2],#16 158 159Loop256: 160 tbl v6.16b,{v4.16b},v2.16b 161 ext v5.16b,v0.16b,v3.16b,#12 162 st1 {v4.4s},[x2],#16 163 aese v6.16b,v0.16b 164 subs w1,w1,#1 165 166 eor v3.16b,v3.16b,v5.16b 167 ext v5.16b,v0.16b,v5.16b,#12 168 eor v3.16b,v3.16b,v5.16b 169 ext v5.16b,v0.16b,v5.16b,#12 170 eor v6.16b,v6.16b,v1.16b 171 eor v3.16b,v3.16b,v5.16b 172 shl v1.16b,v1.16b,#1 173 eor v3.16b,v3.16b,v6.16b 174 st1 {v3.4s},[x2],#16 175 b.eq Ldone 176 177 dup v6.4s,v3.s[3] // just splat 178 ext v5.16b,v0.16b,v4.16b,#12 179 aese v6.16b,v0.16b 180 181 eor v4.16b,v4.16b,v5.16b 182 ext v5.16b,v0.16b,v5.16b,#12 183 eor v4.16b,v4.16b,v5.16b 184 ext v5.16b,v0.16b,v5.16b,#12 185 eor v4.16b,v4.16b,v5.16b 186 187 eor v4.16b,v4.16b,v6.16b 188 b Loop256 189 190Ldone: 191 str w12,[x2] 192 mov x3,#0 193 194Lenc_key_abort: 195 mov x0,x3 // return value 196 ldr x29,[sp],#16 197 ret 198 199 200.globl _aes_hw_set_decrypt_key 201.private_extern _aes_hw_set_decrypt_key 202 203.align 5 204_aes_hw_set_decrypt_key: 205 AARCH64_SIGN_LINK_REGISTER 206 stp x29,x30,[sp,#-16]! 207 add x29,sp,#0 208 bl Lenc_key 209 210 cmp x0,#0 211 b.ne Ldec_key_abort 212 213 sub x2,x2,#240 // restore original x2 214 mov x4,#-16 215 add x0,x2,x12,lsl#4 // end of key schedule 216 217 ld1 {v0.4s},[x2] 218 ld1 {v1.4s},[x0] 219 st1 {v0.4s},[x0],x4 220 st1 {v1.4s},[x2],#16 221 222Loop_imc: 223 ld1 {v0.4s},[x2] 224 ld1 {v1.4s},[x0] 225 aesimc v0.16b,v0.16b 226 aesimc v1.16b,v1.16b 227 st1 {v0.4s},[x0],x4 228 st1 {v1.4s},[x2],#16 229 cmp x0,x2 230 b.hi Loop_imc 231 232 ld1 {v0.4s},[x2] 233 aesimc v0.16b,v0.16b 234 st1 {v0.4s},[x0] 235 236 eor x0,x0,x0 // return value 237Ldec_key_abort: 238 ldp x29,x30,[sp],#16 239 AARCH64_VALIDATE_LINK_REGISTER 240 ret 241 242.globl _aes_hw_encrypt 243.private_extern _aes_hw_encrypt 244 245.align 5 246_aes_hw_encrypt: 247 AARCH64_VALID_CALL_TARGET 248 ldr w3,[x2,#240] 249 ld1 {v0.4s},[x2],#16 250 ld1 {v2.16b},[x0] 251 sub w3,w3,#2 252 ld1 {v1.4s},[x2],#16 253 254Loop_enc: 255 aese v2.16b,v0.16b 256 aesmc v2.16b,v2.16b 257 ld1 {v0.4s},[x2],#16 258 subs w3,w3,#2 259 aese v2.16b,v1.16b 260 aesmc v2.16b,v2.16b 261 ld1 {v1.4s},[x2],#16 262 b.gt Loop_enc 263 264 aese v2.16b,v0.16b 265 aesmc v2.16b,v2.16b 266 ld1 {v0.4s},[x2] 267 aese v2.16b,v1.16b 268 eor v2.16b,v2.16b,v0.16b 269 270 st1 {v2.16b},[x1] 271 ret 272 273.globl _aes_hw_decrypt 274.private_extern _aes_hw_decrypt 275 276.align 5 277_aes_hw_decrypt: 278 AARCH64_VALID_CALL_TARGET 279 ldr w3,[x2,#240] 280 ld1 {v0.4s},[x2],#16 281 ld1 {v2.16b},[x0] 282 sub w3,w3,#2 283 ld1 {v1.4s},[x2],#16 284 285Loop_dec: 286 aesd v2.16b,v0.16b 287 aesimc v2.16b,v2.16b 288 ld1 {v0.4s},[x2],#16 289 subs w3,w3,#2 290 aesd v2.16b,v1.16b 291 aesimc v2.16b,v2.16b 292 ld1 {v1.4s},[x2],#16 293 b.gt Loop_dec 294 295 aesd v2.16b,v0.16b 296 aesimc v2.16b,v2.16b 297 ld1 {v0.4s},[x2] 298 aesd v2.16b,v1.16b 299 eor v2.16b,v2.16b,v0.16b 300 301 st1 {v2.16b},[x1] 302 ret 303 304.globl _aes_hw_cbc_encrypt 305.private_extern _aes_hw_cbc_encrypt 306 307.align 5 308_aes_hw_cbc_encrypt: 309 // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. 310 AARCH64_VALID_CALL_TARGET 311 stp x29,x30,[sp,#-16]! 312 add x29,sp,#0 313 subs x2,x2,#16 314 mov x8,#16 315 b.lo Lcbc_abort 316 csel x8,xzr,x8,eq 317 318 cmp w5,#0 // en- or decrypting? 319 ldr w5,[x3,#240] 320 and x2,x2,#-16 321 ld1 {v6.16b},[x4] 322 ld1 {v0.16b},[x0],x8 323 324 ld1 {v16.4s,v17.4s},[x3] // load key schedule... 325 sub w5,w5,#6 326 add x7,x3,x5,lsl#4 // pointer to last 7 round keys 327 sub w5,w5,#2 328 ld1 {v18.4s,v19.4s},[x7],#32 329 ld1 {v20.4s,v21.4s},[x7],#32 330 ld1 {v22.4s,v23.4s},[x7],#32 331 ld1 {v7.4s},[x7] 332 333 add x7,x3,#32 334 mov w6,w5 335 b.eq Lcbc_dec 336 337 cmp w5,#2 338 eor v0.16b,v0.16b,v6.16b 339 eor v5.16b,v16.16b,v7.16b 340 b.eq Lcbc_enc128 341 342 ld1 {v2.4s,v3.4s},[x7] 343 add x7,x3,#16 344 add x6,x3,#16*4 345 add x12,x3,#16*5 346 aese v0.16b,v16.16b 347 aesmc v0.16b,v0.16b 348 add x14,x3,#16*6 349 add x3,x3,#16*7 350 b Lenter_cbc_enc 351 352.align 4 353Loop_cbc_enc: 354 aese v0.16b,v16.16b 355 aesmc v0.16b,v0.16b 356 st1 {v6.16b},[x1],#16 357Lenter_cbc_enc: 358 aese v0.16b,v17.16b 359 aesmc v0.16b,v0.16b 360 aese v0.16b,v2.16b 361 aesmc v0.16b,v0.16b 362 ld1 {v16.4s},[x6] 363 cmp w5,#4 364 aese v0.16b,v3.16b 365 aesmc v0.16b,v0.16b 366 ld1 {v17.4s},[x12] 367 b.eq Lcbc_enc192 368 369 aese v0.16b,v16.16b 370 aesmc v0.16b,v0.16b 371 ld1 {v16.4s},[x14] 372 aese v0.16b,v17.16b 373 aesmc v0.16b,v0.16b 374 ld1 {v17.4s},[x3] 375 nop 376 377Lcbc_enc192: 378 aese v0.16b,v16.16b 379 aesmc v0.16b,v0.16b 380 subs x2,x2,#16 381 aese v0.16b,v17.16b 382 aesmc v0.16b,v0.16b 383 csel x8,xzr,x8,eq 384 aese v0.16b,v18.16b 385 aesmc v0.16b,v0.16b 386 aese v0.16b,v19.16b 387 aesmc v0.16b,v0.16b 388 ld1 {v16.16b},[x0],x8 389 aese v0.16b,v20.16b 390 aesmc v0.16b,v0.16b 391 eor v16.16b,v16.16b,v5.16b 392 aese v0.16b,v21.16b 393 aesmc v0.16b,v0.16b 394 ld1 {v17.4s},[x7] // re-pre-load rndkey[1] 395 aese v0.16b,v22.16b 396 aesmc v0.16b,v0.16b 397 aese v0.16b,v23.16b 398 eor v6.16b,v0.16b,v7.16b 399 b.hs Loop_cbc_enc 400 401 st1 {v6.16b},[x1],#16 402 b Lcbc_done 403 404.align 5 405Lcbc_enc128: 406 ld1 {v2.4s,v3.4s},[x7] 407 aese v0.16b,v16.16b 408 aesmc v0.16b,v0.16b 409 b Lenter_cbc_enc128 410Loop_cbc_enc128: 411 aese v0.16b,v16.16b 412 aesmc v0.16b,v0.16b 413 st1 {v6.16b},[x1],#16 414Lenter_cbc_enc128: 415 aese v0.16b,v17.16b 416 aesmc v0.16b,v0.16b 417 subs x2,x2,#16 418 aese v0.16b,v2.16b 419 aesmc v0.16b,v0.16b 420 csel x8,xzr,x8,eq 421 aese v0.16b,v3.16b 422 aesmc v0.16b,v0.16b 423 aese v0.16b,v18.16b 424 aesmc v0.16b,v0.16b 425 aese v0.16b,v19.16b 426 aesmc v0.16b,v0.16b 427 ld1 {v16.16b},[x0],x8 428 aese v0.16b,v20.16b 429 aesmc v0.16b,v0.16b 430 aese v0.16b,v21.16b 431 aesmc v0.16b,v0.16b 432 aese v0.16b,v22.16b 433 aesmc v0.16b,v0.16b 434 eor v16.16b,v16.16b,v5.16b 435 aese v0.16b,v23.16b 436 eor v6.16b,v0.16b,v7.16b 437 b.hs Loop_cbc_enc128 438 439 st1 {v6.16b},[x1],#16 440 b Lcbc_done 441.align 5 442Lcbc_dec: 443 ld1 {v18.16b},[x0],#16 444 subs x2,x2,#32 // bias 445 add w6,w5,#2 446 orr v3.16b,v0.16b,v0.16b 447 orr v1.16b,v0.16b,v0.16b 448 orr v19.16b,v18.16b,v18.16b 449 b.lo Lcbc_dec_tail 450 451 orr v1.16b,v18.16b,v18.16b 452 ld1 {v18.16b},[x0],#16 453 orr v2.16b,v0.16b,v0.16b 454 orr v3.16b,v1.16b,v1.16b 455 orr v19.16b,v18.16b,v18.16b 456 457Loop3x_cbc_dec: 458 aesd v0.16b,v16.16b 459 aesimc v0.16b,v0.16b 460 aesd v1.16b,v16.16b 461 aesimc v1.16b,v1.16b 462 aesd v18.16b,v16.16b 463 aesimc v18.16b,v18.16b 464 ld1 {v16.4s},[x7],#16 465 subs w6,w6,#2 466 aesd v0.16b,v17.16b 467 aesimc v0.16b,v0.16b 468 aesd v1.16b,v17.16b 469 aesimc v1.16b,v1.16b 470 aesd v18.16b,v17.16b 471 aesimc v18.16b,v18.16b 472 ld1 {v17.4s},[x7],#16 473 b.gt Loop3x_cbc_dec 474 475 aesd v0.16b,v16.16b 476 aesimc v0.16b,v0.16b 477 aesd v1.16b,v16.16b 478 aesimc v1.16b,v1.16b 479 aesd v18.16b,v16.16b 480 aesimc v18.16b,v18.16b 481 eor v4.16b,v6.16b,v7.16b 482 subs x2,x2,#0x30 483 eor v5.16b,v2.16b,v7.16b 484 csel x6,x2,x6,lo // x6, w6, is zero at this point 485 aesd v0.16b,v17.16b 486 aesimc v0.16b,v0.16b 487 aesd v1.16b,v17.16b 488 aesimc v1.16b,v1.16b 489 aesd v18.16b,v17.16b 490 aesimc v18.16b,v18.16b 491 eor v17.16b,v3.16b,v7.16b 492 add x0,x0,x6 // x0 is adjusted in such way that 493 // at exit from the loop v1.16b-v18.16b 494 // are loaded with last "words" 495 orr v6.16b,v19.16b,v19.16b 496 mov x7,x3 497 aesd v0.16b,v20.16b 498 aesimc v0.16b,v0.16b 499 aesd v1.16b,v20.16b 500 aesimc v1.16b,v1.16b 501 aesd v18.16b,v20.16b 502 aesimc v18.16b,v18.16b 503 ld1 {v2.16b},[x0],#16 504 aesd v0.16b,v21.16b 505 aesimc v0.16b,v0.16b 506 aesd v1.16b,v21.16b 507 aesimc v1.16b,v1.16b 508 aesd v18.16b,v21.16b 509 aesimc v18.16b,v18.16b 510 ld1 {v3.16b},[x0],#16 511 aesd v0.16b,v22.16b 512 aesimc v0.16b,v0.16b 513 aesd v1.16b,v22.16b 514 aesimc v1.16b,v1.16b 515 aesd v18.16b,v22.16b 516 aesimc v18.16b,v18.16b 517 ld1 {v19.16b},[x0],#16 518 aesd v0.16b,v23.16b 519 aesd v1.16b,v23.16b 520 aesd v18.16b,v23.16b 521 ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] 522 add w6,w5,#2 523 eor v4.16b,v4.16b,v0.16b 524 eor v5.16b,v5.16b,v1.16b 525 eor v18.16b,v18.16b,v17.16b 526 ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] 527 st1 {v4.16b},[x1],#16 528 orr v0.16b,v2.16b,v2.16b 529 st1 {v5.16b},[x1],#16 530 orr v1.16b,v3.16b,v3.16b 531 st1 {v18.16b},[x1],#16 532 orr v18.16b,v19.16b,v19.16b 533 b.hs Loop3x_cbc_dec 534 535 cmn x2,#0x30 536 b.eq Lcbc_done 537 nop 538 539Lcbc_dec_tail: 540 aesd v1.16b,v16.16b 541 aesimc v1.16b,v1.16b 542 aesd v18.16b,v16.16b 543 aesimc v18.16b,v18.16b 544 ld1 {v16.4s},[x7],#16 545 subs w6,w6,#2 546 aesd v1.16b,v17.16b 547 aesimc v1.16b,v1.16b 548 aesd v18.16b,v17.16b 549 aesimc v18.16b,v18.16b 550 ld1 {v17.4s},[x7],#16 551 b.gt Lcbc_dec_tail 552 553 aesd v1.16b,v16.16b 554 aesimc v1.16b,v1.16b 555 aesd v18.16b,v16.16b 556 aesimc v18.16b,v18.16b 557 aesd v1.16b,v17.16b 558 aesimc v1.16b,v1.16b 559 aesd v18.16b,v17.16b 560 aesimc v18.16b,v18.16b 561 aesd v1.16b,v20.16b 562 aesimc v1.16b,v1.16b 563 aesd v18.16b,v20.16b 564 aesimc v18.16b,v18.16b 565 cmn x2,#0x20 566 aesd v1.16b,v21.16b 567 aesimc v1.16b,v1.16b 568 aesd v18.16b,v21.16b 569 aesimc v18.16b,v18.16b 570 eor v5.16b,v6.16b,v7.16b 571 aesd v1.16b,v22.16b 572 aesimc v1.16b,v1.16b 573 aesd v18.16b,v22.16b 574 aesimc v18.16b,v18.16b 575 eor v17.16b,v3.16b,v7.16b 576 aesd v1.16b,v23.16b 577 aesd v18.16b,v23.16b 578 b.eq Lcbc_dec_one 579 eor v5.16b,v5.16b,v1.16b 580 eor v17.16b,v17.16b,v18.16b 581 orr v6.16b,v19.16b,v19.16b 582 st1 {v5.16b},[x1],#16 583 st1 {v17.16b},[x1],#16 584 b Lcbc_done 585 586Lcbc_dec_one: 587 eor v5.16b,v5.16b,v18.16b 588 orr v6.16b,v19.16b,v19.16b 589 st1 {v5.16b},[x1],#16 590 591Lcbc_done: 592 st1 {v6.16b},[x4] 593Lcbc_abort: 594 ldr x29,[sp],#16 595 ret 596 597.globl _aes_hw_ctr32_encrypt_blocks 598.private_extern _aes_hw_ctr32_encrypt_blocks 599 600.align 5 601_aes_hw_ctr32_encrypt_blocks: 602 // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. 603 AARCH64_VALID_CALL_TARGET 604 stp x29,x30,[sp,#-16]! 605 add x29,sp,#0 606 ldr w5,[x3,#240] 607 608 ldr w8, [x4, #12] 609 ld1 {v0.4s},[x4] 610 611 ld1 {v16.4s,v17.4s},[x3] // load key schedule... 612 sub w5,w5,#4 613 mov x12,#16 614 cmp x2,#2 615 add x7,x3,x5,lsl#4 // pointer to last 5 round keys 616 sub w5,w5,#2 617 ld1 {v20.4s,v21.4s},[x7],#32 618 ld1 {v22.4s,v23.4s},[x7],#32 619 ld1 {v7.4s},[x7] 620 add x7,x3,#32 621 mov w6,w5 622 csel x12,xzr,x12,lo 623 624 // ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are 625 // affected by silicon errata #1742098 [0] and #1655431 [1], 626 // respectively, where the second instruction of an aese/aesmc 627 // instruction pair may execute twice if an interrupt is taken right 628 // after the first instruction consumes an input register of which a 629 // single 32-bit lane has been updated the last time it was modified. 630 // 631 // This function uses a counter in one 32-bit lane. The vmov lines 632 // could write to v1.16b and v18.16b directly, but that trips this bugs. 633 // We write to v6.16b and copy to the final register as a workaround. 634 // 635 // [0] ARM-EPM-049219 v23 Cortex-A57 MPCore Software Developers Errata Notice 636 // [1] ARM-EPM-012079 v11.0 Cortex-A72 MPCore Software Developers Errata Notice 637#ifndef __ARMEB__ 638 rev w8, w8 639#endif 640 add w10, w8, #1 641 orr v6.16b,v0.16b,v0.16b 642 rev w10, w10 643 mov v6.s[3],w10 644 add w8, w8, #2 645 orr v1.16b,v6.16b,v6.16b 646 b.ls Lctr32_tail 647 rev w12, w8 648 mov v6.s[3],w12 649 sub x2,x2,#3 // bias 650 orr v18.16b,v6.16b,v6.16b 651 b Loop3x_ctr32 652 653.align 4 654Loop3x_ctr32: 655 aese v0.16b,v16.16b 656 aesmc v0.16b,v0.16b 657 aese v1.16b,v16.16b 658 aesmc v1.16b,v1.16b 659 aese v18.16b,v16.16b 660 aesmc v18.16b,v18.16b 661 ld1 {v16.4s},[x7],#16 662 subs w6,w6,#2 663 aese v0.16b,v17.16b 664 aesmc v0.16b,v0.16b 665 aese v1.16b,v17.16b 666 aesmc v1.16b,v1.16b 667 aese v18.16b,v17.16b 668 aesmc v18.16b,v18.16b 669 ld1 {v17.4s},[x7],#16 670 b.gt Loop3x_ctr32 671 672 aese v0.16b,v16.16b 673 aesmc v4.16b,v0.16b 674 aese v1.16b,v16.16b 675 aesmc v5.16b,v1.16b 676 ld1 {v2.16b},[x0],#16 677 add w9,w8,#1 678 aese v18.16b,v16.16b 679 aesmc v18.16b,v18.16b 680 ld1 {v3.16b},[x0],#16 681 rev w9,w9 682 aese v4.16b,v17.16b 683 aesmc v4.16b,v4.16b 684 aese v5.16b,v17.16b 685 aesmc v5.16b,v5.16b 686 ld1 {v19.16b},[x0],#16 687 mov x7,x3 688 aese v18.16b,v17.16b 689 aesmc v17.16b,v18.16b 690 aese v4.16b,v20.16b 691 aesmc v4.16b,v4.16b 692 aese v5.16b,v20.16b 693 aesmc v5.16b,v5.16b 694 eor v2.16b,v2.16b,v7.16b 695 add w10,w8,#2 696 aese v17.16b,v20.16b 697 aesmc v17.16b,v17.16b 698 eor v3.16b,v3.16b,v7.16b 699 add w8,w8,#3 700 aese v4.16b,v21.16b 701 aesmc v4.16b,v4.16b 702 aese v5.16b,v21.16b 703 aesmc v5.16b,v5.16b 704 // Note the logic to update v0.16b, v1.16b, and v1.16b is written to work 705 // around a bug in ARM Cortex-A57 and Cortex-A72 cores running in 706 // 32-bit mode. See the comment above. 707 eor v19.16b,v19.16b,v7.16b 708 mov v6.s[3], w9 709 aese v17.16b,v21.16b 710 aesmc v17.16b,v17.16b 711 orr v0.16b,v6.16b,v6.16b 712 rev w10,w10 713 aese v4.16b,v22.16b 714 aesmc v4.16b,v4.16b 715 mov v6.s[3], w10 716 rev w12,w8 717 aese v5.16b,v22.16b 718 aesmc v5.16b,v5.16b 719 orr v1.16b,v6.16b,v6.16b 720 mov v6.s[3], w12 721 aese v17.16b,v22.16b 722 aesmc v17.16b,v17.16b 723 orr v18.16b,v6.16b,v6.16b 724 subs x2,x2,#3 725 aese v4.16b,v23.16b 726 aese v5.16b,v23.16b 727 aese v17.16b,v23.16b 728 729 eor v2.16b,v2.16b,v4.16b 730 ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] 731 st1 {v2.16b},[x1],#16 732 eor v3.16b,v3.16b,v5.16b 733 mov w6,w5 734 st1 {v3.16b},[x1],#16 735 eor v19.16b,v19.16b,v17.16b 736 ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] 737 st1 {v19.16b},[x1],#16 738 b.hs Loop3x_ctr32 739 740 adds x2,x2,#3 741 b.eq Lctr32_done 742 cmp x2,#1 743 mov x12,#16 744 csel x12,xzr,x12,eq 745 746Lctr32_tail: 747 aese v0.16b,v16.16b 748 aesmc v0.16b,v0.16b 749 aese v1.16b,v16.16b 750 aesmc v1.16b,v1.16b 751 ld1 {v16.4s},[x7],#16 752 subs w6,w6,#2 753 aese v0.16b,v17.16b 754 aesmc v0.16b,v0.16b 755 aese v1.16b,v17.16b 756 aesmc v1.16b,v1.16b 757 ld1 {v17.4s},[x7],#16 758 b.gt Lctr32_tail 759 760 aese v0.16b,v16.16b 761 aesmc v0.16b,v0.16b 762 aese v1.16b,v16.16b 763 aesmc v1.16b,v1.16b 764 aese v0.16b,v17.16b 765 aesmc v0.16b,v0.16b 766 aese v1.16b,v17.16b 767 aesmc v1.16b,v1.16b 768 ld1 {v2.16b},[x0],x12 769 aese v0.16b,v20.16b 770 aesmc v0.16b,v0.16b 771 aese v1.16b,v20.16b 772 aesmc v1.16b,v1.16b 773 ld1 {v3.16b},[x0] 774 aese v0.16b,v21.16b 775 aesmc v0.16b,v0.16b 776 aese v1.16b,v21.16b 777 aesmc v1.16b,v1.16b 778 eor v2.16b,v2.16b,v7.16b 779 aese v0.16b,v22.16b 780 aesmc v0.16b,v0.16b 781 aese v1.16b,v22.16b 782 aesmc v1.16b,v1.16b 783 eor v3.16b,v3.16b,v7.16b 784 aese v0.16b,v23.16b 785 aese v1.16b,v23.16b 786 787 cmp x2,#1 788 eor v2.16b,v2.16b,v0.16b 789 eor v3.16b,v3.16b,v1.16b 790 st1 {v2.16b},[x1],#16 791 b.eq Lctr32_done 792 st1 {v3.16b},[x1] 793 794Lctr32_done: 795 ldr x29,[sp],#16 796 ret 797 798#endif 799#endif // !OPENSSL_NO_ASM 800