1#if defined(__aarch64__) 2#include <openssl/arm_arch.h> 3 4#if __ARM_MAX_ARCH__>=7 5.text 6#if !defined(__clang__) || defined(BORINGSSL_CLANG_SUPPORTS_DOT_ARCH) 7.arch armv8-a+crypto 8#endif 9.align 5 10.Lrcon: 11.long 0x01,0x01,0x01,0x01 12.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat 13.long 0x1b,0x1b,0x1b,0x1b 14 15.globl aes_hw_set_encrypt_key 16.hidden aes_hw_set_encrypt_key 17.type aes_hw_set_encrypt_key,%function 18.align 5 19aes_hw_set_encrypt_key: 20.Lenc_key: 21 stp x29,x30,[sp,#-16]! 22 add x29,sp,#0 23 mov x3,#-1 24 cmp x0,#0 25 b.eq .Lenc_key_abort 26 cmp x2,#0 27 b.eq .Lenc_key_abort 28 mov x3,#-2 29 cmp w1,#128 30 b.lt .Lenc_key_abort 31 cmp w1,#256 32 b.gt .Lenc_key_abort 33 tst w1,#0x3f 34 b.ne .Lenc_key_abort 35 36 adr x3,.Lrcon 37 cmp w1,#192 38 39 eor v0.16b,v0.16b,v0.16b 40 ld1 {v3.16b},[x0],#16 41 mov w1,#8 // reuse w1 42 ld1 {v1.4s,v2.4s},[x3],#32 43 44 b.lt .Loop128 45 b.eq .L192 46 b .L256 47 48.align 4 49.Loop128: 50 tbl v6.16b,{v3.16b},v2.16b 51 ext v5.16b,v0.16b,v3.16b,#12 52 st1 {v3.4s},[x2],#16 53 aese v6.16b,v0.16b 54 subs w1,w1,#1 55 56 eor v3.16b,v3.16b,v5.16b 57 ext v5.16b,v0.16b,v5.16b,#12 58 eor v3.16b,v3.16b,v5.16b 59 ext v5.16b,v0.16b,v5.16b,#12 60 eor v6.16b,v6.16b,v1.16b 61 eor v3.16b,v3.16b,v5.16b 62 shl v1.16b,v1.16b,#1 63 eor v3.16b,v3.16b,v6.16b 64 b.ne .Loop128 65 66 ld1 {v1.4s},[x3] 67 68 tbl v6.16b,{v3.16b},v2.16b 69 ext v5.16b,v0.16b,v3.16b,#12 70 st1 {v3.4s},[x2],#16 71 aese v6.16b,v0.16b 72 73 eor v3.16b,v3.16b,v5.16b 74 ext v5.16b,v0.16b,v5.16b,#12 75 eor v3.16b,v3.16b,v5.16b 76 ext v5.16b,v0.16b,v5.16b,#12 77 eor v6.16b,v6.16b,v1.16b 78 eor v3.16b,v3.16b,v5.16b 79 shl v1.16b,v1.16b,#1 80 eor v3.16b,v3.16b,v6.16b 81 82 tbl v6.16b,{v3.16b},v2.16b 83 ext v5.16b,v0.16b,v3.16b,#12 84 st1 {v3.4s},[x2],#16 85 aese v6.16b,v0.16b 86 87 eor v3.16b,v3.16b,v5.16b 88 ext v5.16b,v0.16b,v5.16b,#12 89 eor v3.16b,v3.16b,v5.16b 90 ext v5.16b,v0.16b,v5.16b,#12 91 eor v6.16b,v6.16b,v1.16b 92 eor v3.16b,v3.16b,v5.16b 93 eor v3.16b,v3.16b,v6.16b 94 st1 {v3.4s},[x2] 95 add x2,x2,#0x50 96 97 mov w12,#10 98 b .Ldone 99 100.align 4 101.L192: 102 ld1 {v4.8b},[x0],#8 103 movi v6.16b,#8 // borrow v6.16b 104 st1 {v3.4s},[x2],#16 105 sub v2.16b,v2.16b,v6.16b // adjust the mask 106 107.Loop192: 108 tbl v6.16b,{v4.16b},v2.16b 109 ext v5.16b,v0.16b,v3.16b,#12 110 st1 {v4.8b},[x2],#8 111 aese v6.16b,v0.16b 112 subs w1,w1,#1 113 114 eor v3.16b,v3.16b,v5.16b 115 ext v5.16b,v0.16b,v5.16b,#12 116 eor v3.16b,v3.16b,v5.16b 117 ext v5.16b,v0.16b,v5.16b,#12 118 eor v3.16b,v3.16b,v5.16b 119 120 dup v5.4s,v3.s[3] 121 eor v5.16b,v5.16b,v4.16b 122 eor v6.16b,v6.16b,v1.16b 123 ext v4.16b,v0.16b,v4.16b,#12 124 shl v1.16b,v1.16b,#1 125 eor v4.16b,v4.16b,v5.16b 126 eor v3.16b,v3.16b,v6.16b 127 eor v4.16b,v4.16b,v6.16b 128 st1 {v3.4s},[x2],#16 129 b.ne .Loop192 130 131 mov w12,#12 132 add x2,x2,#0x20 133 b .Ldone 134 135.align 4 136.L256: 137 ld1 {v4.16b},[x0] 138 mov w1,#7 139 mov w12,#14 140 st1 {v3.4s},[x2],#16 141 142.Loop256: 143 tbl v6.16b,{v4.16b},v2.16b 144 ext v5.16b,v0.16b,v3.16b,#12 145 st1 {v4.4s},[x2],#16 146 aese v6.16b,v0.16b 147 subs w1,w1,#1 148 149 eor v3.16b,v3.16b,v5.16b 150 ext v5.16b,v0.16b,v5.16b,#12 151 eor v3.16b,v3.16b,v5.16b 152 ext v5.16b,v0.16b,v5.16b,#12 153 eor v6.16b,v6.16b,v1.16b 154 eor v3.16b,v3.16b,v5.16b 155 shl v1.16b,v1.16b,#1 156 eor v3.16b,v3.16b,v6.16b 157 st1 {v3.4s},[x2],#16 158 b.eq .Ldone 159 160 dup v6.4s,v3.s[3] // just splat 161 ext v5.16b,v0.16b,v4.16b,#12 162 aese v6.16b,v0.16b 163 164 eor v4.16b,v4.16b,v5.16b 165 ext v5.16b,v0.16b,v5.16b,#12 166 eor v4.16b,v4.16b,v5.16b 167 ext v5.16b,v0.16b,v5.16b,#12 168 eor v4.16b,v4.16b,v5.16b 169 170 eor v4.16b,v4.16b,v6.16b 171 b .Loop256 172 173.Ldone: 174 str w12,[x2] 175 mov x3,#0 176 177.Lenc_key_abort: 178 mov x0,x3 // return value 179 ldr x29,[sp],#16 180 ret 181.size aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key 182 183.globl aes_hw_set_decrypt_key 184.hidden aes_hw_set_decrypt_key 185.type aes_hw_set_decrypt_key,%function 186.align 5 187aes_hw_set_decrypt_key: 188 stp x29,x30,[sp,#-16]! 189 add x29,sp,#0 190 bl .Lenc_key 191 192 cmp x0,#0 193 b.ne .Ldec_key_abort 194 195 sub x2,x2,#240 // restore original x2 196 mov x4,#-16 197 add x0,x2,x12,lsl#4 // end of key schedule 198 199 ld1 {v0.4s},[x2] 200 ld1 {v1.4s},[x0] 201 st1 {v0.4s},[x0],x4 202 st1 {v1.4s},[x2],#16 203 204.Loop_imc: 205 ld1 {v0.4s},[x2] 206 ld1 {v1.4s},[x0] 207 aesimc v0.16b,v0.16b 208 aesimc v1.16b,v1.16b 209 st1 {v0.4s},[x0],x4 210 st1 {v1.4s},[x2],#16 211 cmp x0,x2 212 b.hi .Loop_imc 213 214 ld1 {v0.4s},[x2] 215 aesimc v0.16b,v0.16b 216 st1 {v0.4s},[x0] 217 218 eor x0,x0,x0 // return value 219.Ldec_key_abort: 220 ldp x29,x30,[sp],#16 221 ret 222.size aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key 223.globl aes_hw_encrypt 224.hidden aes_hw_encrypt 225.type aes_hw_encrypt,%function 226.align 5 227aes_hw_encrypt: 228 ldr w3,[x2,#240] 229 ld1 {v0.4s},[x2],#16 230 ld1 {v2.16b},[x0] 231 sub w3,w3,#2 232 ld1 {v1.4s},[x2],#16 233 234.Loop_enc: 235 aese v2.16b,v0.16b 236 aesmc v2.16b,v2.16b 237 ld1 {v0.4s},[x2],#16 238 subs w3,w3,#2 239 aese v2.16b,v1.16b 240 aesmc v2.16b,v2.16b 241 ld1 {v1.4s},[x2],#16 242 b.gt .Loop_enc 243 244 aese v2.16b,v0.16b 245 aesmc v2.16b,v2.16b 246 ld1 {v0.4s},[x2] 247 aese v2.16b,v1.16b 248 eor v2.16b,v2.16b,v0.16b 249 250 st1 {v2.16b},[x1] 251 ret 252.size aes_hw_encrypt,.-aes_hw_encrypt 253.globl aes_hw_decrypt 254.hidden aes_hw_decrypt 255.type aes_hw_decrypt,%function 256.align 5 257aes_hw_decrypt: 258 ldr w3,[x2,#240] 259 ld1 {v0.4s},[x2],#16 260 ld1 {v2.16b},[x0] 261 sub w3,w3,#2 262 ld1 {v1.4s},[x2],#16 263 264.Loop_dec: 265 aesd v2.16b,v0.16b 266 aesimc v2.16b,v2.16b 267 ld1 {v0.4s},[x2],#16 268 subs w3,w3,#2 269 aesd v2.16b,v1.16b 270 aesimc v2.16b,v2.16b 271 ld1 {v1.4s},[x2],#16 272 b.gt .Loop_dec 273 274 aesd v2.16b,v0.16b 275 aesimc v2.16b,v2.16b 276 ld1 {v0.4s},[x2] 277 aesd v2.16b,v1.16b 278 eor v2.16b,v2.16b,v0.16b 279 280 st1 {v2.16b},[x1] 281 ret 282.size aes_hw_decrypt,.-aes_hw_decrypt 283.globl aes_hw_cbc_encrypt 284.hidden aes_hw_cbc_encrypt 285.type aes_hw_cbc_encrypt,%function 286.align 5 287aes_hw_cbc_encrypt: 288 stp x29,x30,[sp,#-16]! 289 add x29,sp,#0 290 subs x2,x2,#16 291 mov x8,#16 292 b.lo .Lcbc_abort 293 csel x8,xzr,x8,eq 294 295 cmp w5,#0 // en- or decrypting? 296 ldr w5,[x3,#240] 297 and x2,x2,#-16 298 ld1 {v6.16b},[x4] 299 ld1 {v0.16b},[x0],x8 300 301 ld1 {v16.4s,v17.4s},[x3] // load key schedule... 302 sub w5,w5,#6 303 add x7,x3,x5,lsl#4 // pointer to last 7 round keys 304 sub w5,w5,#2 305 ld1 {v18.4s,v19.4s},[x7],#32 306 ld1 {v20.4s,v21.4s},[x7],#32 307 ld1 {v22.4s,v23.4s},[x7],#32 308 ld1 {v7.4s},[x7] 309 310 add x7,x3,#32 311 mov w6,w5 312 b.eq .Lcbc_dec 313 314 cmp w5,#2 315 eor v0.16b,v0.16b,v6.16b 316 eor v5.16b,v16.16b,v7.16b 317 b.eq .Lcbc_enc128 318 319 ld1 {v2.4s,v3.4s},[x7] 320 add x7,x3,#16 321 add x6,x3,#16*4 322 add x12,x3,#16*5 323 aese v0.16b,v16.16b 324 aesmc v0.16b,v0.16b 325 add x14,x3,#16*6 326 add x3,x3,#16*7 327 b .Lenter_cbc_enc 328 329.align 4 330.Loop_cbc_enc: 331 aese v0.16b,v16.16b 332 aesmc v0.16b,v0.16b 333 st1 {v6.16b},[x1],#16 334.Lenter_cbc_enc: 335 aese v0.16b,v17.16b 336 aesmc v0.16b,v0.16b 337 aese v0.16b,v2.16b 338 aesmc v0.16b,v0.16b 339 ld1 {v16.4s},[x6] 340 cmp w5,#4 341 aese v0.16b,v3.16b 342 aesmc v0.16b,v0.16b 343 ld1 {v17.4s},[x12] 344 b.eq .Lcbc_enc192 345 346 aese v0.16b,v16.16b 347 aesmc v0.16b,v0.16b 348 ld1 {v16.4s},[x14] 349 aese v0.16b,v17.16b 350 aesmc v0.16b,v0.16b 351 ld1 {v17.4s},[x3] 352 nop 353 354.Lcbc_enc192: 355 aese v0.16b,v16.16b 356 aesmc v0.16b,v0.16b 357 subs x2,x2,#16 358 aese v0.16b,v17.16b 359 aesmc v0.16b,v0.16b 360 csel x8,xzr,x8,eq 361 aese v0.16b,v18.16b 362 aesmc v0.16b,v0.16b 363 aese v0.16b,v19.16b 364 aesmc v0.16b,v0.16b 365 ld1 {v16.16b},[x0],x8 366 aese v0.16b,v20.16b 367 aesmc v0.16b,v0.16b 368 eor v16.16b,v16.16b,v5.16b 369 aese v0.16b,v21.16b 370 aesmc v0.16b,v0.16b 371 ld1 {v17.4s},[x7] // re-pre-load rndkey[1] 372 aese v0.16b,v22.16b 373 aesmc v0.16b,v0.16b 374 aese v0.16b,v23.16b 375 eor v6.16b,v0.16b,v7.16b 376 b.hs .Loop_cbc_enc 377 378 st1 {v6.16b},[x1],#16 379 b .Lcbc_done 380 381.align 5 382.Lcbc_enc128: 383 ld1 {v2.4s,v3.4s},[x7] 384 aese v0.16b,v16.16b 385 aesmc v0.16b,v0.16b 386 b .Lenter_cbc_enc128 387.Loop_cbc_enc128: 388 aese v0.16b,v16.16b 389 aesmc v0.16b,v0.16b 390 st1 {v6.16b},[x1],#16 391.Lenter_cbc_enc128: 392 aese v0.16b,v17.16b 393 aesmc v0.16b,v0.16b 394 subs x2,x2,#16 395 aese v0.16b,v2.16b 396 aesmc v0.16b,v0.16b 397 csel x8,xzr,x8,eq 398 aese v0.16b,v3.16b 399 aesmc v0.16b,v0.16b 400 aese v0.16b,v18.16b 401 aesmc v0.16b,v0.16b 402 aese v0.16b,v19.16b 403 aesmc v0.16b,v0.16b 404 ld1 {v16.16b},[x0],x8 405 aese v0.16b,v20.16b 406 aesmc v0.16b,v0.16b 407 aese v0.16b,v21.16b 408 aesmc v0.16b,v0.16b 409 aese v0.16b,v22.16b 410 aesmc v0.16b,v0.16b 411 eor v16.16b,v16.16b,v5.16b 412 aese v0.16b,v23.16b 413 eor v6.16b,v0.16b,v7.16b 414 b.hs .Loop_cbc_enc128 415 416 st1 {v6.16b},[x1],#16 417 b .Lcbc_done 418.align 5 419.Lcbc_dec: 420 ld1 {v18.16b},[x0],#16 421 subs x2,x2,#32 // bias 422 add w6,w5,#2 423 orr v3.16b,v0.16b,v0.16b 424 orr v1.16b,v0.16b,v0.16b 425 orr v19.16b,v18.16b,v18.16b 426 b.lo .Lcbc_dec_tail 427 428 orr v1.16b,v18.16b,v18.16b 429 ld1 {v18.16b},[x0],#16 430 orr v2.16b,v0.16b,v0.16b 431 orr v3.16b,v1.16b,v1.16b 432 orr v19.16b,v18.16b,v18.16b 433 434.Loop3x_cbc_dec: 435 aesd v0.16b,v16.16b 436 aesimc v0.16b,v0.16b 437 aesd v1.16b,v16.16b 438 aesimc v1.16b,v1.16b 439 aesd v18.16b,v16.16b 440 aesimc v18.16b,v18.16b 441 ld1 {v16.4s},[x7],#16 442 subs w6,w6,#2 443 aesd v0.16b,v17.16b 444 aesimc v0.16b,v0.16b 445 aesd v1.16b,v17.16b 446 aesimc v1.16b,v1.16b 447 aesd v18.16b,v17.16b 448 aesimc v18.16b,v18.16b 449 ld1 {v17.4s},[x7],#16 450 b.gt .Loop3x_cbc_dec 451 452 aesd v0.16b,v16.16b 453 aesimc v0.16b,v0.16b 454 aesd v1.16b,v16.16b 455 aesimc v1.16b,v1.16b 456 aesd v18.16b,v16.16b 457 aesimc v18.16b,v18.16b 458 eor v4.16b,v6.16b,v7.16b 459 subs x2,x2,#0x30 460 eor v5.16b,v2.16b,v7.16b 461 csel x6,x2,x6,lo // x6, w6, is zero at this point 462 aesd v0.16b,v17.16b 463 aesimc v0.16b,v0.16b 464 aesd v1.16b,v17.16b 465 aesimc v1.16b,v1.16b 466 aesd v18.16b,v17.16b 467 aesimc v18.16b,v18.16b 468 eor v17.16b,v3.16b,v7.16b 469 add x0,x0,x6 // x0 is adjusted in such way that 470 // at exit from the loop v1.16b-v18.16b 471 // are loaded with last "words" 472 orr v6.16b,v19.16b,v19.16b 473 mov x7,x3 474 aesd v0.16b,v20.16b 475 aesimc v0.16b,v0.16b 476 aesd v1.16b,v20.16b 477 aesimc v1.16b,v1.16b 478 aesd v18.16b,v20.16b 479 aesimc v18.16b,v18.16b 480 ld1 {v2.16b},[x0],#16 481 aesd v0.16b,v21.16b 482 aesimc v0.16b,v0.16b 483 aesd v1.16b,v21.16b 484 aesimc v1.16b,v1.16b 485 aesd v18.16b,v21.16b 486 aesimc v18.16b,v18.16b 487 ld1 {v3.16b},[x0],#16 488 aesd v0.16b,v22.16b 489 aesimc v0.16b,v0.16b 490 aesd v1.16b,v22.16b 491 aesimc v1.16b,v1.16b 492 aesd v18.16b,v22.16b 493 aesimc v18.16b,v18.16b 494 ld1 {v19.16b},[x0],#16 495 aesd v0.16b,v23.16b 496 aesd v1.16b,v23.16b 497 aesd v18.16b,v23.16b 498 ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] 499 add w6,w5,#2 500 eor v4.16b,v4.16b,v0.16b 501 eor v5.16b,v5.16b,v1.16b 502 eor v18.16b,v18.16b,v17.16b 503 ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] 504 st1 {v4.16b},[x1],#16 505 orr v0.16b,v2.16b,v2.16b 506 st1 {v5.16b},[x1],#16 507 orr v1.16b,v3.16b,v3.16b 508 st1 {v18.16b},[x1],#16 509 orr v18.16b,v19.16b,v19.16b 510 b.hs .Loop3x_cbc_dec 511 512 cmn x2,#0x30 513 b.eq .Lcbc_done 514 nop 515 516.Lcbc_dec_tail: 517 aesd v1.16b,v16.16b 518 aesimc v1.16b,v1.16b 519 aesd v18.16b,v16.16b 520 aesimc v18.16b,v18.16b 521 ld1 {v16.4s},[x7],#16 522 subs w6,w6,#2 523 aesd v1.16b,v17.16b 524 aesimc v1.16b,v1.16b 525 aesd v18.16b,v17.16b 526 aesimc v18.16b,v18.16b 527 ld1 {v17.4s},[x7],#16 528 b.gt .Lcbc_dec_tail 529 530 aesd v1.16b,v16.16b 531 aesimc v1.16b,v1.16b 532 aesd v18.16b,v16.16b 533 aesimc v18.16b,v18.16b 534 aesd v1.16b,v17.16b 535 aesimc v1.16b,v1.16b 536 aesd v18.16b,v17.16b 537 aesimc v18.16b,v18.16b 538 aesd v1.16b,v20.16b 539 aesimc v1.16b,v1.16b 540 aesd v18.16b,v20.16b 541 aesimc v18.16b,v18.16b 542 cmn x2,#0x20 543 aesd v1.16b,v21.16b 544 aesimc v1.16b,v1.16b 545 aesd v18.16b,v21.16b 546 aesimc v18.16b,v18.16b 547 eor v5.16b,v6.16b,v7.16b 548 aesd v1.16b,v22.16b 549 aesimc v1.16b,v1.16b 550 aesd v18.16b,v22.16b 551 aesimc v18.16b,v18.16b 552 eor v17.16b,v3.16b,v7.16b 553 aesd v1.16b,v23.16b 554 aesd v18.16b,v23.16b 555 b.eq .Lcbc_dec_one 556 eor v5.16b,v5.16b,v1.16b 557 eor v17.16b,v17.16b,v18.16b 558 orr v6.16b,v19.16b,v19.16b 559 st1 {v5.16b},[x1],#16 560 st1 {v17.16b},[x1],#16 561 b .Lcbc_done 562 563.Lcbc_dec_one: 564 eor v5.16b,v5.16b,v18.16b 565 orr v6.16b,v19.16b,v19.16b 566 st1 {v5.16b},[x1],#16 567 568.Lcbc_done: 569 st1 {v6.16b},[x4] 570.Lcbc_abort: 571 ldr x29,[sp],#16 572 ret 573.size aes_hw_cbc_encrypt,.-aes_hw_cbc_encrypt 574.globl aes_hw_ctr32_encrypt_blocks 575.hidden aes_hw_ctr32_encrypt_blocks 576.type aes_hw_ctr32_encrypt_blocks,%function 577.align 5 578aes_hw_ctr32_encrypt_blocks: 579 stp x29,x30,[sp,#-16]! 580 add x29,sp,#0 581 ldr w5,[x3,#240] 582 583 ldr w8, [x4, #12] 584 ld1 {v0.4s},[x4] 585 586 ld1 {v16.4s,v17.4s},[x3] // load key schedule... 587 sub w5,w5,#4 588 mov x12,#16 589 cmp x2,#2 590 add x7,x3,x5,lsl#4 // pointer to last 5 round keys 591 sub w5,w5,#2 592 ld1 {v20.4s,v21.4s},[x7],#32 593 ld1 {v22.4s,v23.4s},[x7],#32 594 ld1 {v7.4s},[x7] 595 add x7,x3,#32 596 mov w6,w5 597 csel x12,xzr,x12,lo 598#ifndef __ARMEB__ 599 rev w8, w8 600#endif 601 orr v1.16b,v0.16b,v0.16b 602 add w10, w8, #1 603 orr v18.16b,v0.16b,v0.16b 604 add w8, w8, #2 605 orr v6.16b,v0.16b,v0.16b 606 rev w10, w10 607 mov v1.s[3],w10 608 b.ls .Lctr32_tail 609 rev w12, w8 610 sub x2,x2,#3 // bias 611 mov v18.s[3],w12 612 b .Loop3x_ctr32 613 614.align 4 615.Loop3x_ctr32: 616 aese v0.16b,v16.16b 617 aesmc v0.16b,v0.16b 618 aese v1.16b,v16.16b 619 aesmc v1.16b,v1.16b 620 aese v18.16b,v16.16b 621 aesmc v18.16b,v18.16b 622 ld1 {v16.4s},[x7],#16 623 subs w6,w6,#2 624 aese v0.16b,v17.16b 625 aesmc v0.16b,v0.16b 626 aese v1.16b,v17.16b 627 aesmc v1.16b,v1.16b 628 aese v18.16b,v17.16b 629 aesmc v18.16b,v18.16b 630 ld1 {v17.4s},[x7],#16 631 b.gt .Loop3x_ctr32 632 633 aese v0.16b,v16.16b 634 aesmc v4.16b,v0.16b 635 aese v1.16b,v16.16b 636 aesmc v5.16b,v1.16b 637 ld1 {v2.16b},[x0],#16 638 orr v0.16b,v6.16b,v6.16b 639 aese v18.16b,v16.16b 640 aesmc v18.16b,v18.16b 641 ld1 {v3.16b},[x0],#16 642 orr v1.16b,v6.16b,v6.16b 643 aese v4.16b,v17.16b 644 aesmc v4.16b,v4.16b 645 aese v5.16b,v17.16b 646 aesmc v5.16b,v5.16b 647 ld1 {v19.16b},[x0],#16 648 mov x7,x3 649 aese v18.16b,v17.16b 650 aesmc v17.16b,v18.16b 651 orr v18.16b,v6.16b,v6.16b 652 add w9,w8,#1 653 aese v4.16b,v20.16b 654 aesmc v4.16b,v4.16b 655 aese v5.16b,v20.16b 656 aesmc v5.16b,v5.16b 657 eor v2.16b,v2.16b,v7.16b 658 add w10,w8,#2 659 aese v17.16b,v20.16b 660 aesmc v17.16b,v17.16b 661 eor v3.16b,v3.16b,v7.16b 662 add w8,w8,#3 663 aese v4.16b,v21.16b 664 aesmc v4.16b,v4.16b 665 aese v5.16b,v21.16b 666 aesmc v5.16b,v5.16b 667 eor v19.16b,v19.16b,v7.16b 668 rev w9,w9 669 aese v17.16b,v21.16b 670 aesmc v17.16b,v17.16b 671 mov v0.s[3], w9 672 rev w10,w10 673 aese v4.16b,v22.16b 674 aesmc v4.16b,v4.16b 675 aese v5.16b,v22.16b 676 aesmc v5.16b,v5.16b 677 mov v1.s[3], w10 678 rev w12,w8 679 aese v17.16b,v22.16b 680 aesmc v17.16b,v17.16b 681 mov v18.s[3], w12 682 subs x2,x2,#3 683 aese v4.16b,v23.16b 684 aese v5.16b,v23.16b 685 aese v17.16b,v23.16b 686 687 eor v2.16b,v2.16b,v4.16b 688 ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] 689 st1 {v2.16b},[x1],#16 690 eor v3.16b,v3.16b,v5.16b 691 mov w6,w5 692 st1 {v3.16b},[x1],#16 693 eor v19.16b,v19.16b,v17.16b 694 ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] 695 st1 {v19.16b},[x1],#16 696 b.hs .Loop3x_ctr32 697 698 adds x2,x2,#3 699 b.eq .Lctr32_done 700 cmp x2,#1 701 mov x12,#16 702 csel x12,xzr,x12,eq 703 704.Lctr32_tail: 705 aese v0.16b,v16.16b 706 aesmc v0.16b,v0.16b 707 aese v1.16b,v16.16b 708 aesmc v1.16b,v1.16b 709 ld1 {v16.4s},[x7],#16 710 subs w6,w6,#2 711 aese v0.16b,v17.16b 712 aesmc v0.16b,v0.16b 713 aese v1.16b,v17.16b 714 aesmc v1.16b,v1.16b 715 ld1 {v17.4s},[x7],#16 716 b.gt .Lctr32_tail 717 718 aese v0.16b,v16.16b 719 aesmc v0.16b,v0.16b 720 aese v1.16b,v16.16b 721 aesmc v1.16b,v1.16b 722 aese v0.16b,v17.16b 723 aesmc v0.16b,v0.16b 724 aese v1.16b,v17.16b 725 aesmc v1.16b,v1.16b 726 ld1 {v2.16b},[x0],x12 727 aese v0.16b,v20.16b 728 aesmc v0.16b,v0.16b 729 aese v1.16b,v20.16b 730 aesmc v1.16b,v1.16b 731 ld1 {v3.16b},[x0] 732 aese v0.16b,v21.16b 733 aesmc v0.16b,v0.16b 734 aese v1.16b,v21.16b 735 aesmc v1.16b,v1.16b 736 eor v2.16b,v2.16b,v7.16b 737 aese v0.16b,v22.16b 738 aesmc v0.16b,v0.16b 739 aese v1.16b,v22.16b 740 aesmc v1.16b,v1.16b 741 eor v3.16b,v3.16b,v7.16b 742 aese v0.16b,v23.16b 743 aese v1.16b,v23.16b 744 745 cmp x2,#1 746 eor v2.16b,v2.16b,v0.16b 747 eor v3.16b,v3.16b,v1.16b 748 st1 {v2.16b},[x1],#16 749 b.eq .Lctr32_done 750 st1 {v3.16b},[x1] 751 752.Lctr32_done: 753 ldr x29,[sp],#16 754 ret 755.size aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks 756#endif 757#endif 758