1// This file is generated from a similarly-named Perl script in the BoringSSL 2// source tree. Do not edit by hand. 3 4#if !defined(__has_feature) 5#define __has_feature(x) 0 6#endif 7#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) 8#define OPENSSL_NO_ASM 9#endif 10 11#if !defined(OPENSSL_NO_ASM) 12#if defined(__aarch64__) 13#if defined(BORINGSSL_PREFIX) 14#include <boringssl_prefix_symbols_asm.h> 15#endif 16#include <openssl/arm_arch.h> 17 18#if __ARM_MAX_ARCH__>=7 19.text 20.arch armv8-a+crypto 21.section .rodata 22.align 5 23.Lrcon: 24.long 0x01,0x01,0x01,0x01 25.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat 26.long 0x1b,0x1b,0x1b,0x1b 27 28.text 29 30.globl aes_hw_set_encrypt_key 31.hidden aes_hw_set_encrypt_key 32.type aes_hw_set_encrypt_key,%function 33.align 5 34aes_hw_set_encrypt_key: 35.Lenc_key: 36 // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. 37 AARCH64_VALID_CALL_TARGET 38 stp x29,x30,[sp,#-16]! 39 add x29,sp,#0 40 mov x3,#-1 41 cmp x0,#0 42 b.eq .Lenc_key_abort 43 cmp x2,#0 44 b.eq .Lenc_key_abort 45 mov x3,#-2 46 cmp w1,#128 47 b.lt .Lenc_key_abort 48 cmp w1,#256 49 b.gt .Lenc_key_abort 50 tst w1,#0x3f 51 b.ne .Lenc_key_abort 52 53 adrp x3,.Lrcon 54 add x3,x3,:lo12:.Lrcon 55 cmp w1,#192 56 57 eor v0.16b,v0.16b,v0.16b 58 ld1 {v3.16b},[x0],#16 59 mov w1,#8 // reuse w1 60 ld1 {v1.4s,v2.4s},[x3],#32 61 62 b.lt .Loop128 63 b.eq .L192 64 b .L256 65 66.align 4 67.Loop128: 68 tbl v6.16b,{v3.16b},v2.16b 69 ext v5.16b,v0.16b,v3.16b,#12 70 st1 {v3.4s},[x2],#16 71 aese v6.16b,v0.16b 72 subs w1,w1,#1 73 74 eor v3.16b,v3.16b,v5.16b 75 ext v5.16b,v0.16b,v5.16b,#12 76 eor v3.16b,v3.16b,v5.16b 77 ext v5.16b,v0.16b,v5.16b,#12 78 eor v6.16b,v6.16b,v1.16b 79 eor v3.16b,v3.16b,v5.16b 80 shl v1.16b,v1.16b,#1 81 eor v3.16b,v3.16b,v6.16b 82 b.ne .Loop128 83 84 ld1 {v1.4s},[x3] 85 86 tbl v6.16b,{v3.16b},v2.16b 87 ext v5.16b,v0.16b,v3.16b,#12 88 st1 {v3.4s},[x2],#16 89 aese v6.16b,v0.16b 90 91 eor v3.16b,v3.16b,v5.16b 92 ext v5.16b,v0.16b,v5.16b,#12 93 eor v3.16b,v3.16b,v5.16b 94 ext v5.16b,v0.16b,v5.16b,#12 95 eor v6.16b,v6.16b,v1.16b 96 eor v3.16b,v3.16b,v5.16b 97 shl v1.16b,v1.16b,#1 98 eor v3.16b,v3.16b,v6.16b 99 100 tbl v6.16b,{v3.16b},v2.16b 101 ext v5.16b,v0.16b,v3.16b,#12 102 st1 {v3.4s},[x2],#16 103 aese v6.16b,v0.16b 104 105 eor v3.16b,v3.16b,v5.16b 106 ext v5.16b,v0.16b,v5.16b,#12 107 eor v3.16b,v3.16b,v5.16b 108 ext v5.16b,v0.16b,v5.16b,#12 109 eor v6.16b,v6.16b,v1.16b 110 eor v3.16b,v3.16b,v5.16b 111 eor v3.16b,v3.16b,v6.16b 112 st1 {v3.4s},[x2] 113 add x2,x2,#0x50 114 115 mov w12,#10 116 b .Ldone 117 118.align 4 119.L192: 120 ld1 {v4.8b},[x0],#8 121 movi v6.16b,#8 // borrow v6.16b 122 st1 {v3.4s},[x2],#16 123 sub v2.16b,v2.16b,v6.16b // adjust the mask 124 125.Loop192: 126 tbl v6.16b,{v4.16b},v2.16b 127 ext v5.16b,v0.16b,v3.16b,#12 128 st1 {v4.8b},[x2],#8 129 aese v6.16b,v0.16b 130 subs w1,w1,#1 131 132 eor v3.16b,v3.16b,v5.16b 133 ext v5.16b,v0.16b,v5.16b,#12 134 eor v3.16b,v3.16b,v5.16b 135 ext v5.16b,v0.16b,v5.16b,#12 136 eor v3.16b,v3.16b,v5.16b 137 138 dup v5.4s,v3.s[3] 139 eor v5.16b,v5.16b,v4.16b 140 eor v6.16b,v6.16b,v1.16b 141 ext v4.16b,v0.16b,v4.16b,#12 142 shl v1.16b,v1.16b,#1 143 eor v4.16b,v4.16b,v5.16b 144 eor v3.16b,v3.16b,v6.16b 145 eor v4.16b,v4.16b,v6.16b 146 st1 {v3.4s},[x2],#16 147 b.ne .Loop192 148 149 mov w12,#12 150 add x2,x2,#0x20 151 b .Ldone 152 153.align 4 154.L256: 155 ld1 {v4.16b},[x0] 156 mov w1,#7 157 mov w12,#14 158 st1 {v3.4s},[x2],#16 159 160.Loop256: 161 tbl v6.16b,{v4.16b},v2.16b 162 ext v5.16b,v0.16b,v3.16b,#12 163 st1 {v4.4s},[x2],#16 164 aese v6.16b,v0.16b 165 subs w1,w1,#1 166 167 eor v3.16b,v3.16b,v5.16b 168 ext v5.16b,v0.16b,v5.16b,#12 169 eor v3.16b,v3.16b,v5.16b 170 ext v5.16b,v0.16b,v5.16b,#12 171 eor v6.16b,v6.16b,v1.16b 172 eor v3.16b,v3.16b,v5.16b 173 shl v1.16b,v1.16b,#1 174 eor v3.16b,v3.16b,v6.16b 175 st1 {v3.4s},[x2],#16 176 b.eq .Ldone 177 178 dup v6.4s,v3.s[3] // just splat 179 ext v5.16b,v0.16b,v4.16b,#12 180 aese v6.16b,v0.16b 181 182 eor v4.16b,v4.16b,v5.16b 183 ext v5.16b,v0.16b,v5.16b,#12 184 eor v4.16b,v4.16b,v5.16b 185 ext v5.16b,v0.16b,v5.16b,#12 186 eor v4.16b,v4.16b,v5.16b 187 188 eor v4.16b,v4.16b,v6.16b 189 b .Loop256 190 191.Ldone: 192 str w12,[x2] 193 mov x3,#0 194 195.Lenc_key_abort: 196 mov x0,x3 // return value 197 ldr x29,[sp],#16 198 ret 199.size aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key 200 201.globl aes_hw_set_decrypt_key 202.hidden aes_hw_set_decrypt_key 203.type aes_hw_set_decrypt_key,%function 204.align 5 205aes_hw_set_decrypt_key: 206 AARCH64_SIGN_LINK_REGISTER 207 stp x29,x30,[sp,#-16]! 208 add x29,sp,#0 209 bl .Lenc_key 210 211 cmp x0,#0 212 b.ne .Ldec_key_abort 213 214 sub x2,x2,#240 // restore original x2 215 mov x4,#-16 216 add x0,x2,x12,lsl#4 // end of key schedule 217 218 ld1 {v0.4s},[x2] 219 ld1 {v1.4s},[x0] 220 st1 {v0.4s},[x0],x4 221 st1 {v1.4s},[x2],#16 222 223.Loop_imc: 224 ld1 {v0.4s},[x2] 225 ld1 {v1.4s},[x0] 226 aesimc v0.16b,v0.16b 227 aesimc v1.16b,v1.16b 228 st1 {v0.4s},[x0],x4 229 st1 {v1.4s},[x2],#16 230 cmp x0,x2 231 b.hi .Loop_imc 232 233 ld1 {v0.4s},[x2] 234 aesimc v0.16b,v0.16b 235 st1 {v0.4s},[x0] 236 237 eor x0,x0,x0 // return value 238.Ldec_key_abort: 239 ldp x29,x30,[sp],#16 240 AARCH64_VALIDATE_LINK_REGISTER 241 ret 242.size aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key 243.globl aes_hw_encrypt 244.hidden aes_hw_encrypt 245.type aes_hw_encrypt,%function 246.align 5 247aes_hw_encrypt: 248 AARCH64_VALID_CALL_TARGET 249 ldr w3,[x2,#240] 250 ld1 {v0.4s},[x2],#16 251 ld1 {v2.16b},[x0] 252 sub w3,w3,#2 253 ld1 {v1.4s},[x2],#16 254 255.Loop_enc: 256 aese v2.16b,v0.16b 257 aesmc v2.16b,v2.16b 258 ld1 {v0.4s},[x2],#16 259 subs w3,w3,#2 260 aese v2.16b,v1.16b 261 aesmc v2.16b,v2.16b 262 ld1 {v1.4s},[x2],#16 263 b.gt .Loop_enc 264 265 aese v2.16b,v0.16b 266 aesmc v2.16b,v2.16b 267 ld1 {v0.4s},[x2] 268 aese v2.16b,v1.16b 269 eor v2.16b,v2.16b,v0.16b 270 271 st1 {v2.16b},[x1] 272 ret 273.size aes_hw_encrypt,.-aes_hw_encrypt 274.globl aes_hw_decrypt 275.hidden aes_hw_decrypt 276.type aes_hw_decrypt,%function 277.align 5 278aes_hw_decrypt: 279 AARCH64_VALID_CALL_TARGET 280 ldr w3,[x2,#240] 281 ld1 {v0.4s},[x2],#16 282 ld1 {v2.16b},[x0] 283 sub w3,w3,#2 284 ld1 {v1.4s},[x2],#16 285 286.Loop_dec: 287 aesd v2.16b,v0.16b 288 aesimc v2.16b,v2.16b 289 ld1 {v0.4s},[x2],#16 290 subs w3,w3,#2 291 aesd v2.16b,v1.16b 292 aesimc v2.16b,v2.16b 293 ld1 {v1.4s},[x2],#16 294 b.gt .Loop_dec 295 296 aesd v2.16b,v0.16b 297 aesimc v2.16b,v2.16b 298 ld1 {v0.4s},[x2] 299 aesd v2.16b,v1.16b 300 eor v2.16b,v2.16b,v0.16b 301 302 st1 {v2.16b},[x1] 303 ret 304.size aes_hw_decrypt,.-aes_hw_decrypt 305.globl aes_hw_cbc_encrypt 306.hidden aes_hw_cbc_encrypt 307.type aes_hw_cbc_encrypt,%function 308.align 5 309aes_hw_cbc_encrypt: 310 // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. 311 AARCH64_VALID_CALL_TARGET 312 stp x29,x30,[sp,#-16]! 313 add x29,sp,#0 314 subs x2,x2,#16 315 mov x8,#16 316 b.lo .Lcbc_abort 317 csel x8,xzr,x8,eq 318 319 cmp w5,#0 // en- or decrypting? 320 ldr w5,[x3,#240] 321 and x2,x2,#-16 322 ld1 {v6.16b},[x4] 323 ld1 {v0.16b},[x0],x8 324 325 ld1 {v16.4s,v17.4s},[x3] // load key schedule... 326 sub w5,w5,#6 327 add x7,x3,x5,lsl#4 // pointer to last 7 round keys 328 sub w5,w5,#2 329 ld1 {v18.4s,v19.4s},[x7],#32 330 ld1 {v20.4s,v21.4s},[x7],#32 331 ld1 {v22.4s,v23.4s},[x7],#32 332 ld1 {v7.4s},[x7] 333 334 add x7,x3,#32 335 mov w6,w5 336 b.eq .Lcbc_dec 337 338 cmp w5,#2 339 eor v0.16b,v0.16b,v6.16b 340 eor v5.16b,v16.16b,v7.16b 341 b.eq .Lcbc_enc128 342 343 ld1 {v2.4s,v3.4s},[x7] 344 add x7,x3,#16 345 add x6,x3,#16*4 346 add x12,x3,#16*5 347 aese v0.16b,v16.16b 348 aesmc v0.16b,v0.16b 349 add x14,x3,#16*6 350 add x3,x3,#16*7 351 b .Lenter_cbc_enc 352 353.align 4 354.Loop_cbc_enc: 355 aese v0.16b,v16.16b 356 aesmc v0.16b,v0.16b 357 st1 {v6.16b},[x1],#16 358.Lenter_cbc_enc: 359 aese v0.16b,v17.16b 360 aesmc v0.16b,v0.16b 361 aese v0.16b,v2.16b 362 aesmc v0.16b,v0.16b 363 ld1 {v16.4s},[x6] 364 cmp w5,#4 365 aese v0.16b,v3.16b 366 aesmc v0.16b,v0.16b 367 ld1 {v17.4s},[x12] 368 b.eq .Lcbc_enc192 369 370 aese v0.16b,v16.16b 371 aesmc v0.16b,v0.16b 372 ld1 {v16.4s},[x14] 373 aese v0.16b,v17.16b 374 aesmc v0.16b,v0.16b 375 ld1 {v17.4s},[x3] 376 nop 377 378.Lcbc_enc192: 379 aese v0.16b,v16.16b 380 aesmc v0.16b,v0.16b 381 subs x2,x2,#16 382 aese v0.16b,v17.16b 383 aesmc v0.16b,v0.16b 384 csel x8,xzr,x8,eq 385 aese v0.16b,v18.16b 386 aesmc v0.16b,v0.16b 387 aese v0.16b,v19.16b 388 aesmc v0.16b,v0.16b 389 ld1 {v16.16b},[x0],x8 390 aese v0.16b,v20.16b 391 aesmc v0.16b,v0.16b 392 eor v16.16b,v16.16b,v5.16b 393 aese v0.16b,v21.16b 394 aesmc v0.16b,v0.16b 395 ld1 {v17.4s},[x7] // re-pre-load rndkey[1] 396 aese v0.16b,v22.16b 397 aesmc v0.16b,v0.16b 398 aese v0.16b,v23.16b 399 eor v6.16b,v0.16b,v7.16b 400 b.hs .Loop_cbc_enc 401 402 st1 {v6.16b},[x1],#16 403 b .Lcbc_done 404 405.align 5 406.Lcbc_enc128: 407 ld1 {v2.4s,v3.4s},[x7] 408 aese v0.16b,v16.16b 409 aesmc v0.16b,v0.16b 410 b .Lenter_cbc_enc128 411.Loop_cbc_enc128: 412 aese v0.16b,v16.16b 413 aesmc v0.16b,v0.16b 414 st1 {v6.16b},[x1],#16 415.Lenter_cbc_enc128: 416 aese v0.16b,v17.16b 417 aesmc v0.16b,v0.16b 418 subs x2,x2,#16 419 aese v0.16b,v2.16b 420 aesmc v0.16b,v0.16b 421 csel x8,xzr,x8,eq 422 aese v0.16b,v3.16b 423 aesmc v0.16b,v0.16b 424 aese v0.16b,v18.16b 425 aesmc v0.16b,v0.16b 426 aese v0.16b,v19.16b 427 aesmc v0.16b,v0.16b 428 ld1 {v16.16b},[x0],x8 429 aese v0.16b,v20.16b 430 aesmc v0.16b,v0.16b 431 aese v0.16b,v21.16b 432 aesmc v0.16b,v0.16b 433 aese v0.16b,v22.16b 434 aesmc v0.16b,v0.16b 435 eor v16.16b,v16.16b,v5.16b 436 aese v0.16b,v23.16b 437 eor v6.16b,v0.16b,v7.16b 438 b.hs .Loop_cbc_enc128 439 440 st1 {v6.16b},[x1],#16 441 b .Lcbc_done 442.align 5 443.Lcbc_dec: 444 ld1 {v18.16b},[x0],#16 445 subs x2,x2,#32 // bias 446 add w6,w5,#2 447 orr v3.16b,v0.16b,v0.16b 448 orr v1.16b,v0.16b,v0.16b 449 orr v19.16b,v18.16b,v18.16b 450 b.lo .Lcbc_dec_tail 451 452 orr v1.16b,v18.16b,v18.16b 453 ld1 {v18.16b},[x0],#16 454 orr v2.16b,v0.16b,v0.16b 455 orr v3.16b,v1.16b,v1.16b 456 orr v19.16b,v18.16b,v18.16b 457 458.Loop3x_cbc_dec: 459 aesd v0.16b,v16.16b 460 aesimc v0.16b,v0.16b 461 aesd v1.16b,v16.16b 462 aesimc v1.16b,v1.16b 463 aesd v18.16b,v16.16b 464 aesimc v18.16b,v18.16b 465 ld1 {v16.4s},[x7],#16 466 subs w6,w6,#2 467 aesd v0.16b,v17.16b 468 aesimc v0.16b,v0.16b 469 aesd v1.16b,v17.16b 470 aesimc v1.16b,v1.16b 471 aesd v18.16b,v17.16b 472 aesimc v18.16b,v18.16b 473 ld1 {v17.4s},[x7],#16 474 b.gt .Loop3x_cbc_dec 475 476 aesd v0.16b,v16.16b 477 aesimc v0.16b,v0.16b 478 aesd v1.16b,v16.16b 479 aesimc v1.16b,v1.16b 480 aesd v18.16b,v16.16b 481 aesimc v18.16b,v18.16b 482 eor v4.16b,v6.16b,v7.16b 483 subs x2,x2,#0x30 484 eor v5.16b,v2.16b,v7.16b 485 csel x6,x2,x6,lo // x6, w6, is zero at this point 486 aesd v0.16b,v17.16b 487 aesimc v0.16b,v0.16b 488 aesd v1.16b,v17.16b 489 aesimc v1.16b,v1.16b 490 aesd v18.16b,v17.16b 491 aesimc v18.16b,v18.16b 492 eor v17.16b,v3.16b,v7.16b 493 add x0,x0,x6 // x0 is adjusted in such way that 494 // at exit from the loop v1.16b-v18.16b 495 // are loaded with last "words" 496 orr v6.16b,v19.16b,v19.16b 497 mov x7,x3 498 aesd v0.16b,v20.16b 499 aesimc v0.16b,v0.16b 500 aesd v1.16b,v20.16b 501 aesimc v1.16b,v1.16b 502 aesd v18.16b,v20.16b 503 aesimc v18.16b,v18.16b 504 ld1 {v2.16b},[x0],#16 505 aesd v0.16b,v21.16b 506 aesimc v0.16b,v0.16b 507 aesd v1.16b,v21.16b 508 aesimc v1.16b,v1.16b 509 aesd v18.16b,v21.16b 510 aesimc v18.16b,v18.16b 511 ld1 {v3.16b},[x0],#16 512 aesd v0.16b,v22.16b 513 aesimc v0.16b,v0.16b 514 aesd v1.16b,v22.16b 515 aesimc v1.16b,v1.16b 516 aesd v18.16b,v22.16b 517 aesimc v18.16b,v18.16b 518 ld1 {v19.16b},[x0],#16 519 aesd v0.16b,v23.16b 520 aesd v1.16b,v23.16b 521 aesd v18.16b,v23.16b 522 ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] 523 add w6,w5,#2 524 eor v4.16b,v4.16b,v0.16b 525 eor v5.16b,v5.16b,v1.16b 526 eor v18.16b,v18.16b,v17.16b 527 ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] 528 st1 {v4.16b},[x1],#16 529 orr v0.16b,v2.16b,v2.16b 530 st1 {v5.16b},[x1],#16 531 orr v1.16b,v3.16b,v3.16b 532 st1 {v18.16b},[x1],#16 533 orr v18.16b,v19.16b,v19.16b 534 b.hs .Loop3x_cbc_dec 535 536 cmn x2,#0x30 537 b.eq .Lcbc_done 538 nop 539 540.Lcbc_dec_tail: 541 aesd v1.16b,v16.16b 542 aesimc v1.16b,v1.16b 543 aesd v18.16b,v16.16b 544 aesimc v18.16b,v18.16b 545 ld1 {v16.4s},[x7],#16 546 subs w6,w6,#2 547 aesd v1.16b,v17.16b 548 aesimc v1.16b,v1.16b 549 aesd v18.16b,v17.16b 550 aesimc v18.16b,v18.16b 551 ld1 {v17.4s},[x7],#16 552 b.gt .Lcbc_dec_tail 553 554 aesd v1.16b,v16.16b 555 aesimc v1.16b,v1.16b 556 aesd v18.16b,v16.16b 557 aesimc v18.16b,v18.16b 558 aesd v1.16b,v17.16b 559 aesimc v1.16b,v1.16b 560 aesd v18.16b,v17.16b 561 aesimc v18.16b,v18.16b 562 aesd v1.16b,v20.16b 563 aesimc v1.16b,v1.16b 564 aesd v18.16b,v20.16b 565 aesimc v18.16b,v18.16b 566 cmn x2,#0x20 567 aesd v1.16b,v21.16b 568 aesimc v1.16b,v1.16b 569 aesd v18.16b,v21.16b 570 aesimc v18.16b,v18.16b 571 eor v5.16b,v6.16b,v7.16b 572 aesd v1.16b,v22.16b 573 aesimc v1.16b,v1.16b 574 aesd v18.16b,v22.16b 575 aesimc v18.16b,v18.16b 576 eor v17.16b,v3.16b,v7.16b 577 aesd v1.16b,v23.16b 578 aesd v18.16b,v23.16b 579 b.eq .Lcbc_dec_one 580 eor v5.16b,v5.16b,v1.16b 581 eor v17.16b,v17.16b,v18.16b 582 orr v6.16b,v19.16b,v19.16b 583 st1 {v5.16b},[x1],#16 584 st1 {v17.16b},[x1],#16 585 b .Lcbc_done 586 587.Lcbc_dec_one: 588 eor v5.16b,v5.16b,v18.16b 589 orr v6.16b,v19.16b,v19.16b 590 st1 {v5.16b},[x1],#16 591 592.Lcbc_done: 593 st1 {v6.16b},[x4] 594.Lcbc_abort: 595 ldr x29,[sp],#16 596 ret 597.size aes_hw_cbc_encrypt,.-aes_hw_cbc_encrypt 598.globl aes_hw_ctr32_encrypt_blocks 599.hidden aes_hw_ctr32_encrypt_blocks 600.type aes_hw_ctr32_encrypt_blocks,%function 601.align 5 602aes_hw_ctr32_encrypt_blocks: 603 // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. 604 AARCH64_VALID_CALL_TARGET 605 stp x29,x30,[sp,#-16]! 606 add x29,sp,#0 607 ldr w5,[x3,#240] 608 609 ldr w8, [x4, #12] 610 ld1 {v0.4s},[x4] 611 612 ld1 {v16.4s,v17.4s},[x3] // load key schedule... 613 sub w5,w5,#4 614 mov x12,#16 615 cmp x2,#2 616 add x7,x3,x5,lsl#4 // pointer to last 5 round keys 617 sub w5,w5,#2 618 ld1 {v20.4s,v21.4s},[x7],#32 619 ld1 {v22.4s,v23.4s},[x7],#32 620 ld1 {v7.4s},[x7] 621 add x7,x3,#32 622 mov w6,w5 623 csel x12,xzr,x12,lo 624 625 // ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are 626 // affected by silicon errata #1742098 [0] and #1655431 [1], 627 // respectively, where the second instruction of an aese/aesmc 628 // instruction pair may execute twice if an interrupt is taken right 629 // after the first instruction consumes an input register of which a 630 // single 32-bit lane has been updated the last time it was modified. 631 // 632 // This function uses a counter in one 32-bit lane. The vmov lines 633 // could write to v1.16b and v18.16b directly, but that trips this bugs. 634 // We write to v6.16b and copy to the final register as a workaround. 635 // 636 // [0] ARM-EPM-049219 v23 Cortex-A57 MPCore Software Developers Errata Notice 637 // [1] ARM-EPM-012079 v11.0 Cortex-A72 MPCore Software Developers Errata Notice 638#ifndef __ARMEB__ 639 rev w8, w8 640#endif 641 add w10, w8, #1 642 orr v6.16b,v0.16b,v0.16b 643 rev w10, w10 644 mov v6.s[3],w10 645 add w8, w8, #2 646 orr v1.16b,v6.16b,v6.16b 647 b.ls .Lctr32_tail 648 rev w12, w8 649 mov v6.s[3],w12 650 sub x2,x2,#3 // bias 651 orr v18.16b,v6.16b,v6.16b 652 b .Loop3x_ctr32 653 654.align 4 655.Loop3x_ctr32: 656 aese v0.16b,v16.16b 657 aesmc v0.16b,v0.16b 658 aese v1.16b,v16.16b 659 aesmc v1.16b,v1.16b 660 aese v18.16b,v16.16b 661 aesmc v18.16b,v18.16b 662 ld1 {v16.4s},[x7],#16 663 subs w6,w6,#2 664 aese v0.16b,v17.16b 665 aesmc v0.16b,v0.16b 666 aese v1.16b,v17.16b 667 aesmc v1.16b,v1.16b 668 aese v18.16b,v17.16b 669 aesmc v18.16b,v18.16b 670 ld1 {v17.4s},[x7],#16 671 b.gt .Loop3x_ctr32 672 673 aese v0.16b,v16.16b 674 aesmc v4.16b,v0.16b 675 aese v1.16b,v16.16b 676 aesmc v5.16b,v1.16b 677 ld1 {v2.16b},[x0],#16 678 add w9,w8,#1 679 aese v18.16b,v16.16b 680 aesmc v18.16b,v18.16b 681 ld1 {v3.16b},[x0],#16 682 rev w9,w9 683 aese v4.16b,v17.16b 684 aesmc v4.16b,v4.16b 685 aese v5.16b,v17.16b 686 aesmc v5.16b,v5.16b 687 ld1 {v19.16b},[x0],#16 688 mov x7,x3 689 aese v18.16b,v17.16b 690 aesmc v17.16b,v18.16b 691 aese v4.16b,v20.16b 692 aesmc v4.16b,v4.16b 693 aese v5.16b,v20.16b 694 aesmc v5.16b,v5.16b 695 eor v2.16b,v2.16b,v7.16b 696 add w10,w8,#2 697 aese v17.16b,v20.16b 698 aesmc v17.16b,v17.16b 699 eor v3.16b,v3.16b,v7.16b 700 add w8,w8,#3 701 aese v4.16b,v21.16b 702 aesmc v4.16b,v4.16b 703 aese v5.16b,v21.16b 704 aesmc v5.16b,v5.16b 705 // Note the logic to update v0.16b, v1.16b, and v1.16b is written to work 706 // around a bug in ARM Cortex-A57 and Cortex-A72 cores running in 707 // 32-bit mode. See the comment above. 708 eor v19.16b,v19.16b,v7.16b 709 mov v6.s[3], w9 710 aese v17.16b,v21.16b 711 aesmc v17.16b,v17.16b 712 orr v0.16b,v6.16b,v6.16b 713 rev w10,w10 714 aese v4.16b,v22.16b 715 aesmc v4.16b,v4.16b 716 mov v6.s[3], w10 717 rev w12,w8 718 aese v5.16b,v22.16b 719 aesmc v5.16b,v5.16b 720 orr v1.16b,v6.16b,v6.16b 721 mov v6.s[3], w12 722 aese v17.16b,v22.16b 723 aesmc v17.16b,v17.16b 724 orr v18.16b,v6.16b,v6.16b 725 subs x2,x2,#3 726 aese v4.16b,v23.16b 727 aese v5.16b,v23.16b 728 aese v17.16b,v23.16b 729 730 eor v2.16b,v2.16b,v4.16b 731 ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] 732 st1 {v2.16b},[x1],#16 733 eor v3.16b,v3.16b,v5.16b 734 mov w6,w5 735 st1 {v3.16b},[x1],#16 736 eor v19.16b,v19.16b,v17.16b 737 ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] 738 st1 {v19.16b},[x1],#16 739 b.hs .Loop3x_ctr32 740 741 adds x2,x2,#3 742 b.eq .Lctr32_done 743 cmp x2,#1 744 mov x12,#16 745 csel x12,xzr,x12,eq 746 747.Lctr32_tail: 748 aese v0.16b,v16.16b 749 aesmc v0.16b,v0.16b 750 aese v1.16b,v16.16b 751 aesmc v1.16b,v1.16b 752 ld1 {v16.4s},[x7],#16 753 subs w6,w6,#2 754 aese v0.16b,v17.16b 755 aesmc v0.16b,v0.16b 756 aese v1.16b,v17.16b 757 aesmc v1.16b,v1.16b 758 ld1 {v17.4s},[x7],#16 759 b.gt .Lctr32_tail 760 761 aese v0.16b,v16.16b 762 aesmc v0.16b,v0.16b 763 aese v1.16b,v16.16b 764 aesmc v1.16b,v1.16b 765 aese v0.16b,v17.16b 766 aesmc v0.16b,v0.16b 767 aese v1.16b,v17.16b 768 aesmc v1.16b,v1.16b 769 ld1 {v2.16b},[x0],x12 770 aese v0.16b,v20.16b 771 aesmc v0.16b,v0.16b 772 aese v1.16b,v20.16b 773 aesmc v1.16b,v1.16b 774 ld1 {v3.16b},[x0] 775 aese v0.16b,v21.16b 776 aesmc v0.16b,v0.16b 777 aese v1.16b,v21.16b 778 aesmc v1.16b,v1.16b 779 eor v2.16b,v2.16b,v7.16b 780 aese v0.16b,v22.16b 781 aesmc v0.16b,v0.16b 782 aese v1.16b,v22.16b 783 aesmc v1.16b,v1.16b 784 eor v3.16b,v3.16b,v7.16b 785 aese v0.16b,v23.16b 786 aese v1.16b,v23.16b 787 788 cmp x2,#1 789 eor v2.16b,v2.16b,v0.16b 790 eor v3.16b,v3.16b,v1.16b 791 st1 {v2.16b},[x1],#16 792 b.eq .Lctr32_done 793 st1 {v3.16b},[x1] 794 795.Lctr32_done: 796 ldr x29,[sp],#16 797 ret 798.size aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks 799#endif 800#endif 801#endif // !OPENSSL_NO_ASM 802.section .note.GNU-stack,"",%progbits 803