1#if defined(__aarch64__) 2#include <openssl/arm_arch.h> 3 4#if __ARM_MAX_ARCH__>=7 5.text 6.arch armv8-a+crypto 7.align 5 8.Lrcon: 9.long 0x01,0x01,0x01,0x01 10.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat 11.long 0x1b,0x1b,0x1b,0x1b 12 13.globl aes_hw_set_encrypt_key 14.hidden aes_hw_set_encrypt_key 15.type aes_hw_set_encrypt_key,%function 16.align 5 17aes_hw_set_encrypt_key: 18.Lenc_key: 19 stp x29,x30,[sp,#-16]! 20 add x29,sp,#0 21 mov x3,#-1 22 cmp x0,#0 23 b.eq .Lenc_key_abort 24 cmp x2,#0 25 b.eq .Lenc_key_abort 26 mov x3,#-2 27 cmp w1,#128 28 b.lt .Lenc_key_abort 29 cmp w1,#256 30 b.gt .Lenc_key_abort 31 tst w1,#0x3f 32 b.ne .Lenc_key_abort 33 34 adr x3,.Lrcon 35 cmp w1,#192 36 37 eor v0.16b,v0.16b,v0.16b 38 ld1 {v3.16b},[x0],#16 39 mov w1,#8 // reuse w1 40 ld1 {v1.4s,v2.4s},[x3],#32 41 42 b.lt .Loop128 43 b.eq .L192 44 b .L256 45 46.align 4 47.Loop128: 48 tbl v6.16b,{v3.16b},v2.16b 49 ext v5.16b,v0.16b,v3.16b,#12 50 st1 {v3.4s},[x2],#16 51 aese v6.16b,v0.16b 52 subs w1,w1,#1 53 54 eor v3.16b,v3.16b,v5.16b 55 ext v5.16b,v0.16b,v5.16b,#12 56 eor v3.16b,v3.16b,v5.16b 57 ext v5.16b,v0.16b,v5.16b,#12 58 eor v6.16b,v6.16b,v1.16b 59 eor v3.16b,v3.16b,v5.16b 60 shl v1.16b,v1.16b,#1 61 eor v3.16b,v3.16b,v6.16b 62 b.ne .Loop128 63 64 ld1 {v1.4s},[x3] 65 66 tbl v6.16b,{v3.16b},v2.16b 67 ext v5.16b,v0.16b,v3.16b,#12 68 st1 {v3.4s},[x2],#16 69 aese v6.16b,v0.16b 70 71 eor v3.16b,v3.16b,v5.16b 72 ext v5.16b,v0.16b,v5.16b,#12 73 eor v3.16b,v3.16b,v5.16b 74 ext v5.16b,v0.16b,v5.16b,#12 75 eor v6.16b,v6.16b,v1.16b 76 eor v3.16b,v3.16b,v5.16b 77 shl v1.16b,v1.16b,#1 78 eor v3.16b,v3.16b,v6.16b 79 80 tbl v6.16b,{v3.16b},v2.16b 81 ext v5.16b,v0.16b,v3.16b,#12 82 st1 {v3.4s},[x2],#16 83 aese v6.16b,v0.16b 84 85 eor v3.16b,v3.16b,v5.16b 86 ext v5.16b,v0.16b,v5.16b,#12 87 eor v3.16b,v3.16b,v5.16b 88 ext v5.16b,v0.16b,v5.16b,#12 89 eor v6.16b,v6.16b,v1.16b 90 eor v3.16b,v3.16b,v5.16b 91 eor v3.16b,v3.16b,v6.16b 92 st1 {v3.4s},[x2] 93 add x2,x2,#0x50 94 95 mov w12,#10 96 b .Ldone 97 98.align 4 99.L192: 100 ld1 {v4.8b},[x0],#8 101 movi v6.16b,#8 // borrow v6.16b 102 st1 {v3.4s},[x2],#16 103 sub v2.16b,v2.16b,v6.16b // adjust the mask 104 105.Loop192: 106 tbl v6.16b,{v4.16b},v2.16b 107 ext v5.16b,v0.16b,v3.16b,#12 108 st1 {v4.8b},[x2],#8 109 aese v6.16b,v0.16b 110 subs w1,w1,#1 111 112 eor v3.16b,v3.16b,v5.16b 113 ext v5.16b,v0.16b,v5.16b,#12 114 eor v3.16b,v3.16b,v5.16b 115 ext v5.16b,v0.16b,v5.16b,#12 116 eor v3.16b,v3.16b,v5.16b 117 118 dup v5.4s,v3.s[3] 119 eor v5.16b,v5.16b,v4.16b 120 eor v6.16b,v6.16b,v1.16b 121 ext v4.16b,v0.16b,v4.16b,#12 122 shl v1.16b,v1.16b,#1 123 eor v4.16b,v4.16b,v5.16b 124 eor v3.16b,v3.16b,v6.16b 125 eor v4.16b,v4.16b,v6.16b 126 st1 {v3.4s},[x2],#16 127 b.ne .Loop192 128 129 mov w12,#12 130 add x2,x2,#0x20 131 b .Ldone 132 133.align 4 134.L256: 135 ld1 {v4.16b},[x0] 136 mov w1,#7 137 mov w12,#14 138 st1 {v3.4s},[x2],#16 139 140.Loop256: 141 tbl v6.16b,{v4.16b},v2.16b 142 ext v5.16b,v0.16b,v3.16b,#12 143 st1 {v4.4s},[x2],#16 144 aese v6.16b,v0.16b 145 subs w1,w1,#1 146 147 eor v3.16b,v3.16b,v5.16b 148 ext v5.16b,v0.16b,v5.16b,#12 149 eor v3.16b,v3.16b,v5.16b 150 ext v5.16b,v0.16b,v5.16b,#12 151 eor v6.16b,v6.16b,v1.16b 152 eor v3.16b,v3.16b,v5.16b 153 shl v1.16b,v1.16b,#1 154 eor v3.16b,v3.16b,v6.16b 155 st1 {v3.4s},[x2],#16 156 b.eq .Ldone 157 158 dup v6.4s,v3.s[3] // just splat 159 ext v5.16b,v0.16b,v4.16b,#12 160 aese v6.16b,v0.16b 161 162 eor v4.16b,v4.16b,v5.16b 163 ext v5.16b,v0.16b,v5.16b,#12 164 eor v4.16b,v4.16b,v5.16b 165 ext v5.16b,v0.16b,v5.16b,#12 166 eor v4.16b,v4.16b,v5.16b 167 168 eor v4.16b,v4.16b,v6.16b 169 b .Loop256 170 171.Ldone: 172 str w12,[x2] 173 mov x3,#0 174 175.Lenc_key_abort: 176 mov x0,x3 // return value 177 ldr x29,[sp],#16 178 ret 179.size aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key 180 181.globl aes_hw_set_decrypt_key 182.hidden aes_hw_set_decrypt_key 183.type aes_hw_set_decrypt_key,%function 184.align 5 185aes_hw_set_decrypt_key: 186 stp x29,x30,[sp,#-16]! 187 add x29,sp,#0 188 bl .Lenc_key 189 190 cmp x0,#0 191 b.ne .Ldec_key_abort 192 193 sub x2,x2,#240 // restore original x2 194 mov x4,#-16 195 add x0,x2,x12,lsl#4 // end of key schedule 196 197 ld1 {v0.4s},[x2] 198 ld1 {v1.4s},[x0] 199 st1 {v0.4s},[x0],x4 200 st1 {v1.4s},[x2],#16 201 202.Loop_imc: 203 ld1 {v0.4s},[x2] 204 ld1 {v1.4s},[x0] 205 aesimc v0.16b,v0.16b 206 aesimc v1.16b,v1.16b 207 st1 {v0.4s},[x0],x4 208 st1 {v1.4s},[x2],#16 209 cmp x0,x2 210 b.hi .Loop_imc 211 212 ld1 {v0.4s},[x2] 213 aesimc v0.16b,v0.16b 214 st1 {v0.4s},[x0] 215 216 eor x0,x0,x0 // return value 217.Ldec_key_abort: 218 ldp x29,x30,[sp],#16 219 ret 220.size aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key 221.globl aes_hw_encrypt 222.hidden aes_hw_encrypt 223.type aes_hw_encrypt,%function 224.align 5 225aes_hw_encrypt: 226 ldr w3,[x2,#240] 227 ld1 {v0.4s},[x2],#16 228 ld1 {v2.16b},[x0] 229 sub w3,w3,#2 230 ld1 {v1.4s},[x2],#16 231 232.Loop_enc: 233 aese v2.16b,v0.16b 234 aesmc v2.16b,v2.16b 235 ld1 {v0.4s},[x2],#16 236 subs w3,w3,#2 237 aese v2.16b,v1.16b 238 aesmc v2.16b,v2.16b 239 ld1 {v1.4s},[x2],#16 240 b.gt .Loop_enc 241 242 aese v2.16b,v0.16b 243 aesmc v2.16b,v2.16b 244 ld1 {v0.4s},[x2] 245 aese v2.16b,v1.16b 246 eor v2.16b,v2.16b,v0.16b 247 248 st1 {v2.16b},[x1] 249 ret 250.size aes_hw_encrypt,.-aes_hw_encrypt 251.globl aes_hw_decrypt 252.hidden aes_hw_decrypt 253.type aes_hw_decrypt,%function 254.align 5 255aes_hw_decrypt: 256 ldr w3,[x2,#240] 257 ld1 {v0.4s},[x2],#16 258 ld1 {v2.16b},[x0] 259 sub w3,w3,#2 260 ld1 {v1.4s},[x2],#16 261 262.Loop_dec: 263 aesd v2.16b,v0.16b 264 aesimc v2.16b,v2.16b 265 ld1 {v0.4s},[x2],#16 266 subs w3,w3,#2 267 aesd v2.16b,v1.16b 268 aesimc v2.16b,v2.16b 269 ld1 {v1.4s},[x2],#16 270 b.gt .Loop_dec 271 272 aesd v2.16b,v0.16b 273 aesimc v2.16b,v2.16b 274 ld1 {v0.4s},[x2] 275 aesd v2.16b,v1.16b 276 eor v2.16b,v2.16b,v0.16b 277 278 st1 {v2.16b},[x1] 279 ret 280.size aes_hw_decrypt,.-aes_hw_decrypt 281.globl aes_hw_cbc_encrypt 282.hidden aes_hw_cbc_encrypt 283.type aes_hw_cbc_encrypt,%function 284.align 5 285aes_hw_cbc_encrypt: 286 stp x29,x30,[sp,#-16]! 287 add x29,sp,#0 288 subs x2,x2,#16 289 mov x8,#16 290 b.lo .Lcbc_abort 291 csel x8,xzr,x8,eq 292 293 cmp w5,#0 // en- or decrypting? 294 ldr w5,[x3,#240] 295 and x2,x2,#-16 296 ld1 {v6.16b},[x4] 297 ld1 {v0.16b},[x0],x8 298 299 ld1 {v16.4s,v17.4s},[x3] // load key schedule... 300 sub w5,w5,#6 301 add x7,x3,x5,lsl#4 // pointer to last 7 round keys 302 sub w5,w5,#2 303 ld1 {v18.4s,v19.4s},[x7],#32 304 ld1 {v20.4s,v21.4s},[x7],#32 305 ld1 {v22.4s,v23.4s},[x7],#32 306 ld1 {v7.4s},[x7] 307 308 add x7,x3,#32 309 mov w6,w5 310 b.eq .Lcbc_dec 311 312 cmp w5,#2 313 eor v0.16b,v0.16b,v6.16b 314 eor v5.16b,v16.16b,v7.16b 315 b.eq .Lcbc_enc128 316 317 ld1 {v2.4s,v3.4s},[x7] 318 add x7,x3,#16 319 add x6,x3,#16*4 320 add x12,x3,#16*5 321 aese v0.16b,v16.16b 322 aesmc v0.16b,v0.16b 323 add x14,x3,#16*6 324 add x3,x3,#16*7 325 b .Lenter_cbc_enc 326 327.align 4 328.Loop_cbc_enc: 329 aese v0.16b,v16.16b 330 aesmc v0.16b,v0.16b 331 st1 {v6.16b},[x1],#16 332.Lenter_cbc_enc: 333 aese v0.16b,v17.16b 334 aesmc v0.16b,v0.16b 335 aese v0.16b,v2.16b 336 aesmc v0.16b,v0.16b 337 ld1 {v16.4s},[x6] 338 cmp w5,#4 339 aese v0.16b,v3.16b 340 aesmc v0.16b,v0.16b 341 ld1 {v17.4s},[x12] 342 b.eq .Lcbc_enc192 343 344 aese v0.16b,v16.16b 345 aesmc v0.16b,v0.16b 346 ld1 {v16.4s},[x14] 347 aese v0.16b,v17.16b 348 aesmc v0.16b,v0.16b 349 ld1 {v17.4s},[x3] 350 nop 351 352.Lcbc_enc192: 353 aese v0.16b,v16.16b 354 aesmc v0.16b,v0.16b 355 subs x2,x2,#16 356 aese v0.16b,v17.16b 357 aesmc v0.16b,v0.16b 358 csel x8,xzr,x8,eq 359 aese v0.16b,v18.16b 360 aesmc v0.16b,v0.16b 361 aese v0.16b,v19.16b 362 aesmc v0.16b,v0.16b 363 ld1 {v16.16b},[x0],x8 364 aese v0.16b,v20.16b 365 aesmc v0.16b,v0.16b 366 eor v16.16b,v16.16b,v5.16b 367 aese v0.16b,v21.16b 368 aesmc v0.16b,v0.16b 369 ld1 {v17.4s},[x7] // re-pre-load rndkey[1] 370 aese v0.16b,v22.16b 371 aesmc v0.16b,v0.16b 372 aese v0.16b,v23.16b 373 eor v6.16b,v0.16b,v7.16b 374 b.hs .Loop_cbc_enc 375 376 st1 {v6.16b},[x1],#16 377 b .Lcbc_done 378 379.align 5 380.Lcbc_enc128: 381 ld1 {v2.4s,v3.4s},[x7] 382 aese v0.16b,v16.16b 383 aesmc v0.16b,v0.16b 384 b .Lenter_cbc_enc128 385.Loop_cbc_enc128: 386 aese v0.16b,v16.16b 387 aesmc v0.16b,v0.16b 388 st1 {v6.16b},[x1],#16 389.Lenter_cbc_enc128: 390 aese v0.16b,v17.16b 391 aesmc v0.16b,v0.16b 392 subs x2,x2,#16 393 aese v0.16b,v2.16b 394 aesmc v0.16b,v0.16b 395 csel x8,xzr,x8,eq 396 aese v0.16b,v3.16b 397 aesmc v0.16b,v0.16b 398 aese v0.16b,v18.16b 399 aesmc v0.16b,v0.16b 400 aese v0.16b,v19.16b 401 aesmc v0.16b,v0.16b 402 ld1 {v16.16b},[x0],x8 403 aese v0.16b,v20.16b 404 aesmc v0.16b,v0.16b 405 aese v0.16b,v21.16b 406 aesmc v0.16b,v0.16b 407 aese v0.16b,v22.16b 408 aesmc v0.16b,v0.16b 409 eor v16.16b,v16.16b,v5.16b 410 aese v0.16b,v23.16b 411 eor v6.16b,v0.16b,v7.16b 412 b.hs .Loop_cbc_enc128 413 414 st1 {v6.16b},[x1],#16 415 b .Lcbc_done 416.align 5 417.Lcbc_dec: 418 ld1 {v18.16b},[x0],#16 419 subs x2,x2,#32 // bias 420 add w6,w5,#2 421 orr v3.16b,v0.16b,v0.16b 422 orr v1.16b,v0.16b,v0.16b 423 orr v19.16b,v18.16b,v18.16b 424 b.lo .Lcbc_dec_tail 425 426 orr v1.16b,v18.16b,v18.16b 427 ld1 {v18.16b},[x0],#16 428 orr v2.16b,v0.16b,v0.16b 429 orr v3.16b,v1.16b,v1.16b 430 orr v19.16b,v18.16b,v18.16b 431 432.Loop3x_cbc_dec: 433 aesd v0.16b,v16.16b 434 aesimc v0.16b,v0.16b 435 aesd v1.16b,v16.16b 436 aesimc v1.16b,v1.16b 437 aesd v18.16b,v16.16b 438 aesimc v18.16b,v18.16b 439 ld1 {v16.4s},[x7],#16 440 subs w6,w6,#2 441 aesd v0.16b,v17.16b 442 aesimc v0.16b,v0.16b 443 aesd v1.16b,v17.16b 444 aesimc v1.16b,v1.16b 445 aesd v18.16b,v17.16b 446 aesimc v18.16b,v18.16b 447 ld1 {v17.4s},[x7],#16 448 b.gt .Loop3x_cbc_dec 449 450 aesd v0.16b,v16.16b 451 aesimc v0.16b,v0.16b 452 aesd v1.16b,v16.16b 453 aesimc v1.16b,v1.16b 454 aesd v18.16b,v16.16b 455 aesimc v18.16b,v18.16b 456 eor v4.16b,v6.16b,v7.16b 457 subs x2,x2,#0x30 458 eor v5.16b,v2.16b,v7.16b 459 csel x6,x2,x6,lo // x6, w6, is zero at this point 460 aesd v0.16b,v17.16b 461 aesimc v0.16b,v0.16b 462 aesd v1.16b,v17.16b 463 aesimc v1.16b,v1.16b 464 aesd v18.16b,v17.16b 465 aesimc v18.16b,v18.16b 466 eor v17.16b,v3.16b,v7.16b 467 add x0,x0,x6 // x0 is adjusted in such way that 468 // at exit from the loop v1.16b-v18.16b 469 // are loaded with last "words" 470 orr v6.16b,v19.16b,v19.16b 471 mov x7,x3 472 aesd v0.16b,v20.16b 473 aesimc v0.16b,v0.16b 474 aesd v1.16b,v20.16b 475 aesimc v1.16b,v1.16b 476 aesd v18.16b,v20.16b 477 aesimc v18.16b,v18.16b 478 ld1 {v2.16b},[x0],#16 479 aesd v0.16b,v21.16b 480 aesimc v0.16b,v0.16b 481 aesd v1.16b,v21.16b 482 aesimc v1.16b,v1.16b 483 aesd v18.16b,v21.16b 484 aesimc v18.16b,v18.16b 485 ld1 {v3.16b},[x0],#16 486 aesd v0.16b,v22.16b 487 aesimc v0.16b,v0.16b 488 aesd v1.16b,v22.16b 489 aesimc v1.16b,v1.16b 490 aesd v18.16b,v22.16b 491 aesimc v18.16b,v18.16b 492 ld1 {v19.16b},[x0],#16 493 aesd v0.16b,v23.16b 494 aesd v1.16b,v23.16b 495 aesd v18.16b,v23.16b 496 ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] 497 add w6,w5,#2 498 eor v4.16b,v4.16b,v0.16b 499 eor v5.16b,v5.16b,v1.16b 500 eor v18.16b,v18.16b,v17.16b 501 ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] 502 st1 {v4.16b},[x1],#16 503 orr v0.16b,v2.16b,v2.16b 504 st1 {v5.16b},[x1],#16 505 orr v1.16b,v3.16b,v3.16b 506 st1 {v18.16b},[x1],#16 507 orr v18.16b,v19.16b,v19.16b 508 b.hs .Loop3x_cbc_dec 509 510 cmn x2,#0x30 511 b.eq .Lcbc_done 512 nop 513 514.Lcbc_dec_tail: 515 aesd v1.16b,v16.16b 516 aesimc v1.16b,v1.16b 517 aesd v18.16b,v16.16b 518 aesimc v18.16b,v18.16b 519 ld1 {v16.4s},[x7],#16 520 subs w6,w6,#2 521 aesd v1.16b,v17.16b 522 aesimc v1.16b,v1.16b 523 aesd v18.16b,v17.16b 524 aesimc v18.16b,v18.16b 525 ld1 {v17.4s},[x7],#16 526 b.gt .Lcbc_dec_tail 527 528 aesd v1.16b,v16.16b 529 aesimc v1.16b,v1.16b 530 aesd v18.16b,v16.16b 531 aesimc v18.16b,v18.16b 532 aesd v1.16b,v17.16b 533 aesimc v1.16b,v1.16b 534 aesd v18.16b,v17.16b 535 aesimc v18.16b,v18.16b 536 aesd v1.16b,v20.16b 537 aesimc v1.16b,v1.16b 538 aesd v18.16b,v20.16b 539 aesimc v18.16b,v18.16b 540 cmn x2,#0x20 541 aesd v1.16b,v21.16b 542 aesimc v1.16b,v1.16b 543 aesd v18.16b,v21.16b 544 aesimc v18.16b,v18.16b 545 eor v5.16b,v6.16b,v7.16b 546 aesd v1.16b,v22.16b 547 aesimc v1.16b,v1.16b 548 aesd v18.16b,v22.16b 549 aesimc v18.16b,v18.16b 550 eor v17.16b,v3.16b,v7.16b 551 aesd v1.16b,v23.16b 552 aesd v18.16b,v23.16b 553 b.eq .Lcbc_dec_one 554 eor v5.16b,v5.16b,v1.16b 555 eor v17.16b,v17.16b,v18.16b 556 orr v6.16b,v19.16b,v19.16b 557 st1 {v5.16b},[x1],#16 558 st1 {v17.16b},[x1],#16 559 b .Lcbc_done 560 561.Lcbc_dec_one: 562 eor v5.16b,v5.16b,v18.16b 563 orr v6.16b,v19.16b,v19.16b 564 st1 {v5.16b},[x1],#16 565 566.Lcbc_done: 567 st1 {v6.16b},[x4] 568.Lcbc_abort: 569 ldr x29,[sp],#16 570 ret 571.size aes_hw_cbc_encrypt,.-aes_hw_cbc_encrypt 572.globl aes_hw_ctr32_encrypt_blocks 573.hidden aes_hw_ctr32_encrypt_blocks 574.type aes_hw_ctr32_encrypt_blocks,%function 575.align 5 576aes_hw_ctr32_encrypt_blocks: 577 stp x29,x30,[sp,#-16]! 578 add x29,sp,#0 579 ldr w5,[x3,#240] 580 581 ldr w8, [x4, #12] 582 ld1 {v0.4s},[x4] 583 584 ld1 {v16.4s,v17.4s},[x3] // load key schedule... 585 sub w5,w5,#4 586 mov x12,#16 587 cmp x2,#2 588 add x7,x3,x5,lsl#4 // pointer to last 5 round keys 589 sub w5,w5,#2 590 ld1 {v20.4s,v21.4s},[x7],#32 591 ld1 {v22.4s,v23.4s},[x7],#32 592 ld1 {v7.4s},[x7] 593 add x7,x3,#32 594 mov w6,w5 595 csel x12,xzr,x12,lo 596#ifndef __ARMEB__ 597 rev w8, w8 598#endif 599 orr v1.16b,v0.16b,v0.16b 600 add w10, w8, #1 601 orr v18.16b,v0.16b,v0.16b 602 add w8, w8, #2 603 orr v6.16b,v0.16b,v0.16b 604 rev w10, w10 605 mov v1.s[3],w10 606 b.ls .Lctr32_tail 607 rev w12, w8 608 sub x2,x2,#3 // bias 609 mov v18.s[3],w12 610 b .Loop3x_ctr32 611 612.align 4 613.Loop3x_ctr32: 614 aese v0.16b,v16.16b 615 aesmc v0.16b,v0.16b 616 aese v1.16b,v16.16b 617 aesmc v1.16b,v1.16b 618 aese v18.16b,v16.16b 619 aesmc v18.16b,v18.16b 620 ld1 {v16.4s},[x7],#16 621 subs w6,w6,#2 622 aese v0.16b,v17.16b 623 aesmc v0.16b,v0.16b 624 aese v1.16b,v17.16b 625 aesmc v1.16b,v1.16b 626 aese v18.16b,v17.16b 627 aesmc v18.16b,v18.16b 628 ld1 {v17.4s},[x7],#16 629 b.gt .Loop3x_ctr32 630 631 aese v0.16b,v16.16b 632 aesmc v4.16b,v0.16b 633 aese v1.16b,v16.16b 634 aesmc v5.16b,v1.16b 635 ld1 {v2.16b},[x0],#16 636 orr v0.16b,v6.16b,v6.16b 637 aese v18.16b,v16.16b 638 aesmc v18.16b,v18.16b 639 ld1 {v3.16b},[x0],#16 640 orr v1.16b,v6.16b,v6.16b 641 aese v4.16b,v17.16b 642 aesmc v4.16b,v4.16b 643 aese v5.16b,v17.16b 644 aesmc v5.16b,v5.16b 645 ld1 {v19.16b},[x0],#16 646 mov x7,x3 647 aese v18.16b,v17.16b 648 aesmc v17.16b,v18.16b 649 orr v18.16b,v6.16b,v6.16b 650 add w9,w8,#1 651 aese v4.16b,v20.16b 652 aesmc v4.16b,v4.16b 653 aese v5.16b,v20.16b 654 aesmc v5.16b,v5.16b 655 eor v2.16b,v2.16b,v7.16b 656 add w10,w8,#2 657 aese v17.16b,v20.16b 658 aesmc v17.16b,v17.16b 659 eor v3.16b,v3.16b,v7.16b 660 add w8,w8,#3 661 aese v4.16b,v21.16b 662 aesmc v4.16b,v4.16b 663 aese v5.16b,v21.16b 664 aesmc v5.16b,v5.16b 665 eor v19.16b,v19.16b,v7.16b 666 rev w9,w9 667 aese v17.16b,v21.16b 668 aesmc v17.16b,v17.16b 669 mov v0.s[3], w9 670 rev w10,w10 671 aese v4.16b,v22.16b 672 aesmc v4.16b,v4.16b 673 aese v5.16b,v22.16b 674 aesmc v5.16b,v5.16b 675 mov v1.s[3], w10 676 rev w12,w8 677 aese v17.16b,v22.16b 678 aesmc v17.16b,v17.16b 679 mov v18.s[3], w12 680 subs x2,x2,#3 681 aese v4.16b,v23.16b 682 aese v5.16b,v23.16b 683 aese v17.16b,v23.16b 684 685 eor v2.16b,v2.16b,v4.16b 686 ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] 687 st1 {v2.16b},[x1],#16 688 eor v3.16b,v3.16b,v5.16b 689 mov w6,w5 690 st1 {v3.16b},[x1],#16 691 eor v19.16b,v19.16b,v17.16b 692 ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] 693 st1 {v19.16b},[x1],#16 694 b.hs .Loop3x_ctr32 695 696 adds x2,x2,#3 697 b.eq .Lctr32_done 698 cmp x2,#1 699 mov x12,#16 700 csel x12,xzr,x12,eq 701 702.Lctr32_tail: 703 aese v0.16b,v16.16b 704 aesmc v0.16b,v0.16b 705 aese v1.16b,v16.16b 706 aesmc v1.16b,v1.16b 707 ld1 {v16.4s},[x7],#16 708 subs w6,w6,#2 709 aese v0.16b,v17.16b 710 aesmc v0.16b,v0.16b 711 aese v1.16b,v17.16b 712 aesmc v1.16b,v1.16b 713 ld1 {v17.4s},[x7],#16 714 b.gt .Lctr32_tail 715 716 aese v0.16b,v16.16b 717 aesmc v0.16b,v0.16b 718 aese v1.16b,v16.16b 719 aesmc v1.16b,v1.16b 720 aese v0.16b,v17.16b 721 aesmc v0.16b,v0.16b 722 aese v1.16b,v17.16b 723 aesmc v1.16b,v1.16b 724 ld1 {v2.16b},[x0],x12 725 aese v0.16b,v20.16b 726 aesmc v0.16b,v0.16b 727 aese v1.16b,v20.16b 728 aesmc v1.16b,v1.16b 729 ld1 {v3.16b},[x0] 730 aese v0.16b,v21.16b 731 aesmc v0.16b,v0.16b 732 aese v1.16b,v21.16b 733 aesmc v1.16b,v1.16b 734 eor v2.16b,v2.16b,v7.16b 735 aese v0.16b,v22.16b 736 aesmc v0.16b,v0.16b 737 aese v1.16b,v22.16b 738 aesmc v1.16b,v1.16b 739 eor v3.16b,v3.16b,v7.16b 740 aese v0.16b,v23.16b 741 aese v1.16b,v23.16b 742 743 cmp x2,#1 744 eor v2.16b,v2.16b,v0.16b 745 eor v3.16b,v3.16b,v1.16b 746 st1 {v2.16b},[x1],#16 747 b.eq .Lctr32_done 748 st1 {v3.16b},[x1] 749 750.Lctr32_done: 751 ldr x29,[sp],#16 752 ret 753.size aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks 754#endif 755#endif 756