1#include "arm_arch.h" 2 3#if __ARM_ARCH__>=7 4.text 5.arch armv8-a+crypto 6.align 5 7rcon: 8.long 0x01,0x01,0x01,0x01 9.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat 10.long 0x1b,0x1b,0x1b,0x1b 11 12.globl aes_v8_set_encrypt_key 13.type aes_v8_set_encrypt_key,%function 14.align 5 15aes_v8_set_encrypt_key: 16.Lenc_key: 17 stp x29,x30,[sp,#-16]! 18 add x29,sp,#0 19 adr x3,rcon 20 cmp w1,#192 21 22 eor v0.16b,v0.16b,v0.16b 23 ld1 {v3.16b},[x0],#16 24 mov w1,#8 // reuse w1 25 ld1 {v1.4s,v2.4s},[x3],#32 26 27 b.lt .Loop128 28 b.eq .L192 29 b .L256 30 31.align 4 32.Loop128: 33 tbl v6.16b,{v3.16b},v2.16b 34 ext v5.16b,v0.16b,v3.16b,#12 35 st1 {v3.4s},[x2],#16 36 aese v6.16b,v0.16b 37 subs w1,w1,#1 38 39 eor v3.16b,v3.16b,v5.16b 40 ext v5.16b,v0.16b,v5.16b,#12 41 eor v3.16b,v3.16b,v5.16b 42 ext v5.16b,v0.16b,v5.16b,#12 43 eor v6.16b,v6.16b,v1.16b 44 eor v3.16b,v3.16b,v5.16b 45 shl v1.16b,v1.16b,#1 46 eor v3.16b,v3.16b,v6.16b 47 b.ne .Loop128 48 49 ld1 {v1.4s},[x3] 50 51 tbl v6.16b,{v3.16b},v2.16b 52 ext v5.16b,v0.16b,v3.16b,#12 53 st1 {v3.4s},[x2],#16 54 aese v6.16b,v0.16b 55 56 eor v3.16b,v3.16b,v5.16b 57 ext v5.16b,v0.16b,v5.16b,#12 58 eor v3.16b,v3.16b,v5.16b 59 ext v5.16b,v0.16b,v5.16b,#12 60 eor v6.16b,v6.16b,v1.16b 61 eor v3.16b,v3.16b,v5.16b 62 shl v1.16b,v1.16b,#1 63 eor v3.16b,v3.16b,v6.16b 64 65 tbl v6.16b,{v3.16b},v2.16b 66 ext v5.16b,v0.16b,v3.16b,#12 67 st1 {v3.4s},[x2],#16 68 aese v6.16b,v0.16b 69 70 eor v3.16b,v3.16b,v5.16b 71 ext v5.16b,v0.16b,v5.16b,#12 72 eor v3.16b,v3.16b,v5.16b 73 ext v5.16b,v0.16b,v5.16b,#12 74 eor v6.16b,v6.16b,v1.16b 75 eor v3.16b,v3.16b,v5.16b 76 eor v3.16b,v3.16b,v6.16b 77 st1 {v3.4s},[x2] 78 add x2,x2,#0x50 79 80 mov w12,#10 81 b .Ldone 82 83.align 4 84.L192: 85 ld1 {v4.8b},[x0],#8 86 movi v6.16b,#8 // borrow v6.16b 87 st1 {v3.4s},[x2],#16 88 sub v2.16b,v2.16b,v6.16b // adjust the mask 89 90.Loop192: 91 tbl v6.16b,{v4.16b},v2.16b 92 ext v5.16b,v0.16b,v3.16b,#12 93 st1 {v4.8b},[x2],#8 94 aese v6.16b,v0.16b 95 subs w1,w1,#1 96 97 eor v3.16b,v3.16b,v5.16b 98 ext v5.16b,v0.16b,v5.16b,#12 99 eor v3.16b,v3.16b,v5.16b 100 ext v5.16b,v0.16b,v5.16b,#12 101 eor v3.16b,v3.16b,v5.16b 102 103 dup v5.4s,v3.s[3] 104 eor v5.16b,v5.16b,v4.16b 105 eor v6.16b,v6.16b,v1.16b 106 ext v4.16b,v0.16b,v4.16b,#12 107 shl v1.16b,v1.16b,#1 108 eor v4.16b,v4.16b,v5.16b 109 eor v3.16b,v3.16b,v6.16b 110 eor v4.16b,v4.16b,v6.16b 111 st1 {v3.4s},[x2],#16 112 b.ne .Loop192 113 114 mov w12,#12 115 add x2,x2,#0x20 116 b .Ldone 117 118.align 4 119.L256: 120 ld1 {v4.16b},[x0] 121 mov w1,#7 122 mov w12,#14 123 st1 {v3.4s},[x2],#16 124 125.Loop256: 126 tbl v6.16b,{v4.16b},v2.16b 127 ext v5.16b,v0.16b,v3.16b,#12 128 st1 {v4.4s},[x2],#16 129 aese v6.16b,v0.16b 130 subs w1,w1,#1 131 132 eor v3.16b,v3.16b,v5.16b 133 ext v5.16b,v0.16b,v5.16b,#12 134 eor v3.16b,v3.16b,v5.16b 135 ext v5.16b,v0.16b,v5.16b,#12 136 eor v6.16b,v6.16b,v1.16b 137 eor v3.16b,v3.16b,v5.16b 138 shl v1.16b,v1.16b,#1 139 eor v3.16b,v3.16b,v6.16b 140 st1 {v3.4s},[x2],#16 141 b.eq .Ldone 142 143 dup v6.4s,v3.s[3] // just splat 144 ext v5.16b,v0.16b,v4.16b,#12 145 aese v6.16b,v0.16b 146 147 eor v4.16b,v4.16b,v5.16b 148 ext v5.16b,v0.16b,v5.16b,#12 149 eor v4.16b,v4.16b,v5.16b 150 ext v5.16b,v0.16b,v5.16b,#12 151 eor v4.16b,v4.16b,v5.16b 152 153 eor v4.16b,v4.16b,v6.16b 154 b .Loop256 155 156.Ldone: 157 str w12,[x2] 158 159 eor x0,x0,x0 // return value 160 ldr x29,[sp],#16 161 ret 162.size aes_v8_set_encrypt_key,.-aes_v8_set_encrypt_key 163 164.globl aes_v8_set_decrypt_key 165.type aes_v8_set_decrypt_key,%function 166.align 5 167aes_v8_set_decrypt_key: 168 stp x29,x30,[sp,#-16]! 169 add x29,sp,#0 170 bl .Lenc_key 171 172 sub x2,x2,#240 // restore original x2 173 mov x4,#-16 174 add x0,x2,x12,lsl#4 // end of key schedule 175 176 ld1 {v0.4s},[x2] 177 ld1 {v1.4s},[x0] 178 st1 {v0.4s},[x0],x4 179 st1 {v1.4s},[x2],#16 180 181.Loop_imc: 182 ld1 {v0.4s},[x2] 183 ld1 {v1.4s},[x0] 184 aesimc v0.16b,v0.16b 185 aesimc v1.16b,v1.16b 186 st1 {v0.4s},[x0],x4 187 st1 {v1.4s},[x2],#16 188 cmp x0,x2 189 b.hi .Loop_imc 190 191 ld1 {v0.4s},[x2] 192 aesimc v0.16b,v0.16b 193 st1 {v0.4s},[x0] 194 195 eor x0,x0,x0 // return value 196 ldp x29,x30,[sp],#16 197 ret 198.size aes_v8_set_decrypt_key,.-aes_v8_set_decrypt_key 199.globl aes_v8_encrypt 200.type aes_v8_encrypt,%function 201.align 5 202aes_v8_encrypt: 203 ldr w3,[x2,#240] 204 ld1 {v0.4s},[x2],#16 205 ld1 {v2.16b},[x0] 206 sub w3,w3,#2 207 ld1 {v1.4s},[x2],#16 208 209.Loop_enc: 210 aese v2.16b,v0.16b 211 ld1 {v0.4s},[x2],#16 212 aesmc v2.16b,v2.16b 213 subs w3,w3,#2 214 aese v2.16b,v1.16b 215 ld1 {v1.4s},[x2],#16 216 aesmc v2.16b,v2.16b 217 b.gt .Loop_enc 218 219 aese v2.16b,v0.16b 220 ld1 {v0.4s},[x2] 221 aesmc v2.16b,v2.16b 222 aese v2.16b,v1.16b 223 eor v2.16b,v2.16b,v0.16b 224 225 st1 {v2.16b},[x1] 226 ret 227.size aes_v8_encrypt,.-aes_v8_encrypt 228.globl aes_v8_decrypt 229.type aes_v8_decrypt,%function 230.align 5 231aes_v8_decrypt: 232 ldr w3,[x2,#240] 233 ld1 {v0.4s},[x2],#16 234 ld1 {v2.16b},[x0] 235 sub w3,w3,#2 236 ld1 {v1.4s},[x2],#16 237 238.Loop_dec: 239 aesd v2.16b,v0.16b 240 ld1 {v0.4s},[x2],#16 241 aesimc v2.16b,v2.16b 242 subs w3,w3,#2 243 aesd v2.16b,v1.16b 244 ld1 {v1.4s},[x2],#16 245 aesimc v2.16b,v2.16b 246 b.gt .Loop_dec 247 248 aesd v2.16b,v0.16b 249 ld1 {v0.4s},[x2] 250 aesimc v2.16b,v2.16b 251 aesd v2.16b,v1.16b 252 eor v2.16b,v2.16b,v0.16b 253 254 st1 {v2.16b},[x1] 255 ret 256.size aes_v8_decrypt,.-aes_v8_decrypt 257.globl aes_v8_cbc_encrypt 258.type aes_v8_cbc_encrypt,%function 259.align 5 260aes_v8_cbc_encrypt: 261 stp x29,x30,[sp,#-16]! 262 add x29,sp,#0 263 subs x2,x2,#16 264 mov x8,#16 265 b.lo .Lcbc_abort 266 csel x8,xzr,x8,eq 267 268 cmp w5,#0 // en- or decrypting? 269 ldr w5,[x3,#240] 270 and x2,x2,#-16 271 ld1 {v6.16b},[x4] 272 ld1 {v0.16b},[x0],x8 273 274 ld1 {v16.4s-v17.4s},[x3] // load key schedule... 275 sub w5,w5,#6 276 add x7,x3,x5,lsl#4 // pointer to last 7 round keys 277 sub w5,w5,#2 278 ld1 {v18.4s-v19.4s},[x7],#32 279 ld1 {v20.4s-v21.4s},[x7],#32 280 ld1 {v22.4s-v23.4s},[x7],#32 281 ld1 {v7.4s},[x7] 282 283 add x7,x3,#32 284 mov w6,w5 285 b.eq .Lcbc_dec 286 287 cmp w5,#2 288 eor v0.16b,v0.16b,v6.16b 289 eor v5.16b,v16.16b,v7.16b 290 b.eq .Lcbc_enc128 291 292.Loop_cbc_enc: 293 aese v0.16b,v16.16b 294 ld1 {v16.4s},[x7],#16 295 aesmc v0.16b,v0.16b 296 subs w6,w6,#2 297 aese v0.16b,v17.16b 298 ld1 {v17.4s},[x7],#16 299 aesmc v0.16b,v0.16b 300 b.gt .Loop_cbc_enc 301 302 aese v0.16b,v16.16b 303 aesmc v0.16b,v0.16b 304 subs x2,x2,#16 305 aese v0.16b,v17.16b 306 aesmc v0.16b,v0.16b 307 csel x8,xzr,x8,eq 308 aese v0.16b,v18.16b 309 aesmc v0.16b,v0.16b 310 add x7,x3,#16 311 aese v0.16b,v19.16b 312 aesmc v0.16b,v0.16b 313 ld1 {v16.16b},[x0],x8 314 aese v0.16b,v20.16b 315 aesmc v0.16b,v0.16b 316 eor v16.16b,v16.16b,v5.16b 317 aese v0.16b,v21.16b 318 aesmc v0.16b,v0.16b 319 ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] 320 aese v0.16b,v22.16b 321 aesmc v0.16b,v0.16b 322 aese v0.16b,v23.16b 323 324 mov w6,w5 325 eor v6.16b,v0.16b,v7.16b 326 st1 {v6.16b},[x1],#16 327 b.hs .Loop_cbc_enc 328 329 b .Lcbc_done 330 331.align 5 332.Lcbc_enc128: 333 ld1 {v2.4s-v3.4s},[x7] 334 aese v0.16b,v16.16b 335 aesmc v0.16b,v0.16b 336 b .Lenter_cbc_enc128 337.Loop_cbc_enc128: 338 aese v0.16b,v16.16b 339 aesmc v0.16b,v0.16b 340 st1 {v6.16b},[x1],#16 341.Lenter_cbc_enc128: 342 aese v0.16b,v17.16b 343 aesmc v0.16b,v0.16b 344 subs x2,x2,#16 345 aese v0.16b,v2.16b 346 aesmc v0.16b,v0.16b 347 csel x8,xzr,x8,eq 348 aese v0.16b,v3.16b 349 aesmc v0.16b,v0.16b 350 aese v0.16b,v18.16b 351 aesmc v0.16b,v0.16b 352 aese v0.16b,v19.16b 353 aesmc v0.16b,v0.16b 354 ld1 {v16.16b},[x0],x8 355 aese v0.16b,v20.16b 356 aesmc v0.16b,v0.16b 357 aese v0.16b,v21.16b 358 aesmc v0.16b,v0.16b 359 aese v0.16b,v22.16b 360 aesmc v0.16b,v0.16b 361 eor v16.16b,v16.16b,v5.16b 362 aese v0.16b,v23.16b 363 eor v6.16b,v0.16b,v7.16b 364 b.hs .Loop_cbc_enc128 365 366 st1 {v6.16b},[x1],#16 367 b .Lcbc_done 368 369.align 5 370.Lcbc_dec128: 371 ld1 {v4.4s-v5.4s},[x7] 372 eor v6.16b,v6.16b,v7.16b 373 eor v2.16b,v0.16b,v7.16b 374 mov x12,x8 375 376.Loop2x_cbc_dec128: 377 aesd v0.16b,v16.16b 378 aesd v1.16b,v16.16b 379 aesimc v0.16b,v0.16b 380 aesimc v1.16b,v1.16b 381 subs x2,x2,#32 382 aesd v0.16b,v17.16b 383 aesd v1.16b,v17.16b 384 aesimc v0.16b,v0.16b 385 aesimc v1.16b,v1.16b 386 csel x8,xzr,x8,lo 387 aesd v0.16b,v4.16b 388 aesd v1.16b,v4.16b 389 aesimc v0.16b,v0.16b 390 aesimc v1.16b,v1.16b 391 csel x12,xzr,x12,ls 392 aesd v0.16b,v5.16b 393 aesd v1.16b,v5.16b 394 aesimc v0.16b,v0.16b 395 aesimc v1.16b,v1.16b 396 aesd v0.16b,v18.16b 397 aesd v1.16b,v18.16b 398 aesimc v0.16b,v0.16b 399 aesimc v1.16b,v1.16b 400 aesd v0.16b,v19.16b 401 aesd v1.16b,v19.16b 402 aesimc v0.16b,v0.16b 403 aesimc v1.16b,v1.16b 404 aesd v0.16b,v20.16b 405 aesd v1.16b,v20.16b 406 aesimc v0.16b,v0.16b 407 aesimc v1.16b,v1.16b 408 aesd v0.16b,v21.16b 409 aesd v1.16b,v21.16b 410 aesimc v0.16b,v0.16b 411 aesimc v1.16b,v1.16b 412 aesd v0.16b,v22.16b 413 aesd v1.16b,v22.16b 414 aesimc v0.16b,v0.16b 415 aesimc v1.16b,v1.16b 416 aesd v0.16b,v23.16b 417 aesd v1.16b,v23.16b 418 419 eor v6.16b,v6.16b,v0.16b 420 ld1 {v0.16b},[x0],x8 421 eor v2.16b,v2.16b,v1.16b 422 ld1 {v1.16b},[x0],x12 423 st1 {v6.16b},[x1],#16 424 eor v6.16b,v3.16b,v7.16b 425 st1 {v2.16b},[x1],#16 426 eor v2.16b,v0.16b,v7.16b 427 orr v3.16b,v1.16b,v1.16b 428 b.hs .Loop2x_cbc_dec128 429 430 adds x2,x2,#32 431 eor v6.16b,v6.16b,v7.16b 432 b.eq .Lcbc_done 433 eor v2.16b,v2.16b,v7.16b 434 b .Lcbc_dec_tail 435 436.align 5 437.Lcbc_dec: 438 subs x2,x2,#16 439 orr v2.16b,v0.16b,v0.16b 440 b.lo .Lcbc_dec_tail 441 442 csel x8,xzr,x8,eq 443 cmp w5,#2 444 ld1 {v1.16b},[x0],x8 445 orr v3.16b,v1.16b,v1.16b 446 b.eq .Lcbc_dec128 447 448.Loop2x_cbc_dec: 449 aesd v0.16b,v16.16b 450 aesd v1.16b,v16.16b 451 ld1 {v16.4s},[x7],#16 452 aesimc v0.16b,v0.16b 453 aesimc v1.16b,v1.16b 454 subs w6,w6,#2 455 aesd v0.16b,v17.16b 456 aesd v1.16b,v17.16b 457 ld1 {v17.4s},[x7],#16 458 aesimc v0.16b,v0.16b 459 aesimc v1.16b,v1.16b 460 b.gt .Loop2x_cbc_dec 461 462 aesd v0.16b,v16.16b 463 aesd v1.16b,v16.16b 464 aesimc v0.16b,v0.16b 465 aesimc v1.16b,v1.16b 466 eor v4.16b,v6.16b,v7.16b 467 eor v5.16b,v2.16b,v7.16b 468 aesd v0.16b,v17.16b 469 aesd v1.16b,v17.16b 470 aesimc v0.16b,v0.16b 471 aesimc v1.16b,v1.16b 472 orr v6.16b,v3.16b,v3.16b 473 subs x2,x2,#32 474 aesd v0.16b,v18.16b 475 aesd v1.16b,v18.16b 476 aesimc v0.16b,v0.16b 477 csel x8,xzr,x8,lo 478 aesimc v1.16b,v1.16b 479 mov x7,x3 480 aesd v0.16b,v19.16b 481 aesd v1.16b,v19.16b 482 aesimc v0.16b,v0.16b 483 ld1 {v2.16b},[x0],x8 484 aesimc v1.16b,v1.16b 485 csel x8,xzr,x8,ls 486 aesd v0.16b,v20.16b 487 aesd v1.16b,v20.16b 488 aesimc v0.16b,v0.16b 489 aesimc v1.16b,v1.16b 490 ld1 {v3.16b},[x0],x8 491 aesd v0.16b,v21.16b 492 aesd v1.16b,v21.16b 493 aesimc v0.16b,v0.16b 494 aesimc v1.16b,v1.16b 495 ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] 496 aesd v0.16b,v22.16b 497 aesd v1.16b,v22.16b 498 aesimc v0.16b,v0.16b 499 aesimc v1.16b,v1.16b 500 ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] 501 aesd v0.16b,v23.16b 502 aesd v1.16b,v23.16b 503 504 mov w6,w5 505 eor v4.16b,v4.16b,v0.16b 506 eor v5.16b,v5.16b,v1.16b 507 orr v0.16b,v2.16b,v2.16b 508 st1 {v4.16b},[x1],#16 509 orr v1.16b,v3.16b,v3.16b 510 st1 {v5.16b},[x1],#16 511 b.hs .Loop2x_cbc_dec 512 513 adds x2,x2,#32 514 b.eq .Lcbc_done 515 516.Lcbc_dec_tail: 517 aesd v0.16b,v16.16b 518 ld1 {v16.4s},[x7],#16 519 aesimc v0.16b,v0.16b 520 subs w6,w6,#2 521 aesd v0.16b,v17.16b 522 ld1 {v17.4s},[x7],#16 523 aesimc v0.16b,v0.16b 524 b.gt .Lcbc_dec_tail 525 526 aesd v0.16b,v16.16b 527 aesimc v0.16b,v0.16b 528 aesd v0.16b,v17.16b 529 aesimc v0.16b,v0.16b 530 eor v4.16b,v6.16b,v7.16b 531 aesd v0.16b,v18.16b 532 aesimc v0.16b,v0.16b 533 orr v6.16b,v2.16b,v2.16b 534 aesd v0.16b,v19.16b 535 aesimc v0.16b,v0.16b 536 aesd v0.16b,v20.16b 537 aesimc v0.16b,v0.16b 538 aesd v0.16b,v21.16b 539 aesimc v0.16b,v0.16b 540 aesd v0.16b,v22.16b 541 aesimc v0.16b,v0.16b 542 aesd v0.16b,v23.16b 543 544 eor v4.16b,v4.16b,v0.16b 545 st1 {v4.16b},[x1],#16 546 547.Lcbc_done: 548 st1 {v6.16b},[x4] 549.Lcbc_abort: 550 ldr x29,[sp],#16 551 ret 552.size aes_v8_cbc_encrypt,.-aes_v8_cbc_encrypt 553.globl aes_v8_ctr32_encrypt_blocks 554.type aes_v8_ctr32_encrypt_blocks,%function 555.align 5 556aes_v8_ctr32_encrypt_blocks: 557 stp x29,x30,[sp,#-16]! 558 add x29,sp,#0 559 ldr w5,[x3,#240] 560 561 ldr w8, [x4, #12] 562 ld1 {v0.4s},[x4] 563 564 ld1 {v16.4s-v17.4s},[x3] // load key schedule... 565 sub w5,w5,#6 566 add x7,x3,x5,lsl#4 // pointer to last 7 round keys 567 sub w5,w5,#2 568 ld1 {v18.4s-v19.4s},[x7],#32 569 ld1 {v20.4s-v21.4s},[x7],#32 570 ld1 {v22.4s-v23.4s},[x7],#32 571 ld1 {v7.4s},[x7] 572 573 add x7,x3,#32 574 mov w6,w5 575 576 subs x2,x2,#2 577 b.lo .Lctr32_tail 578 579#ifndef __ARMEB__ 580 rev w8, w8 581#endif 582 orr v1.16b,v0.16b,v0.16b 583 add w8, w8, #1 584 orr v6.16b,v0.16b,v0.16b 585 rev w10, w8 586 cmp w5,#2 587 mov v1.s[3],w10 588 b.eq .Lctr32_128 589 590.Loop2x_ctr32: 591 aese v0.16b,v16.16b 592 aese v1.16b,v16.16b 593 ld1 {v16.4s},[x7],#16 594 aesmc v0.16b,v0.16b 595 aesmc v1.16b,v1.16b 596 subs w6,w6,#2 597 aese v0.16b,v17.16b 598 aese v1.16b,v17.16b 599 ld1 {v17.4s},[x7],#16 600 aesmc v0.16b,v0.16b 601 aesmc v1.16b,v1.16b 602 b.gt .Loop2x_ctr32 603 604 aese v0.16b,v16.16b 605 aese v1.16b,v16.16b 606 aesmc v4.16b,v0.16b 607 orr v0.16b,v6.16b,v6.16b 608 aesmc v5.16b,v1.16b 609 orr v1.16b,v6.16b,v6.16b 610 aese v4.16b,v17.16b 611 aese v5.16b,v17.16b 612 ld1 {v2.16b},[x0],#16 613 aesmc v4.16b,v4.16b 614 ld1 {v3.16b},[x0],#16 615 aesmc v5.16b,v5.16b 616 add w8,w8,#1 617 aese v4.16b,v18.16b 618 aese v5.16b,v18.16b 619 rev w9,w8 620 aesmc v4.16b,v4.16b 621 aesmc v5.16b,v5.16b 622 add w8,w8,#1 623 aese v4.16b,v19.16b 624 aese v5.16b,v19.16b 625 eor v2.16b,v2.16b,v7.16b 626 rev w10,w8 627 aesmc v4.16b,v4.16b 628 aesmc v5.16b,v5.16b 629 eor v3.16b,v3.16b,v7.16b 630 mov x7,x3 631 aese v4.16b,v20.16b 632 aese v5.16b,v20.16b 633 subs x2,x2,#2 634 aesmc v4.16b,v4.16b 635 aesmc v5.16b,v5.16b 636 ld1 {v16.4s-v17.4s},[x7],#32 // re-pre-load rndkey[0-1] 637 aese v4.16b,v21.16b 638 aese v5.16b,v21.16b 639 aesmc v4.16b,v4.16b 640 aesmc v5.16b,v5.16b 641 aese v4.16b,v22.16b 642 aese v5.16b,v22.16b 643 mov v0.s[3], w9 644 aesmc v4.16b,v4.16b 645 mov v1.s[3], w10 646 aesmc v5.16b,v5.16b 647 aese v4.16b,v23.16b 648 aese v5.16b,v23.16b 649 650 mov w6,w5 651 eor v2.16b,v2.16b,v4.16b 652 eor v3.16b,v3.16b,v5.16b 653 st1 {v2.16b},[x1],#16 654 st1 {v3.16b},[x1],#16 655 b.hs .Loop2x_ctr32 656 657 adds x2,x2,#2 658 b.eq .Lctr32_done 659 b .Lctr32_tail 660 661.Lctr32_128: 662 ld1 {v4.4s-v5.4s},[x7] 663 664.Loop2x_ctr32_128: 665 aese v0.16b,v16.16b 666 aese v1.16b,v16.16b 667 aesmc v0.16b,v0.16b 668 ld1 {v2.16b},[x0],#16 669 aesmc v1.16b,v1.16b 670 ld1 {v3.16b},[x0],#16 671 aese v0.16b,v17.16b 672 aese v1.16b,v17.16b 673 add w8,w8,#1 674 aesmc v0.16b,v0.16b 675 aesmc v1.16b,v1.16b 676 rev w9,w8 677 aese v0.16b,v4.16b 678 aese v1.16b,v4.16b 679 add w8,w8,#1 680 aesmc v0.16b,v0.16b 681 aesmc v1.16b,v1.16b 682 rev w10,w8 683 aese v0.16b,v5.16b 684 aese v1.16b,v5.16b 685 subs x2,x2,#2 686 aesmc v0.16b,v0.16b 687 aesmc v1.16b,v1.16b 688 aese v0.16b,v18.16b 689 aese v1.16b,v18.16b 690 aesmc v0.16b,v0.16b 691 aesmc v1.16b,v1.16b 692 aese v0.16b,v19.16b 693 aese v1.16b,v19.16b 694 aesmc v0.16b,v0.16b 695 aesmc v1.16b,v1.16b 696 aese v0.16b,v20.16b 697 aese v1.16b,v20.16b 698 aesmc v0.16b,v0.16b 699 aesmc v1.16b,v1.16b 700 aese v0.16b,v21.16b 701 aese v1.16b,v21.16b 702 aesmc v0.16b,v0.16b 703 aesmc v1.16b,v1.16b 704 aese v0.16b,v22.16b 705 aese v1.16b,v22.16b 706 aesmc v0.16b,v0.16b 707 aesmc v1.16b,v1.16b 708 eor v2.16b,v2.16b,v7.16b 709 aese v0.16b,v23.16b 710 eor v3.16b,v3.16b,v7.16b 711 aese v1.16b,v23.16b 712 713 eor v2.16b,v2.16b,v0.16b 714 orr v0.16b,v6.16b,v6.16b 715 eor v3.16b,v3.16b,v1.16b 716 orr v1.16b,v6.16b,v6.16b 717 st1 {v2.16b},[x1],#16 718 mov v0.s[3], w9 719 st1 {v3.16b},[x1],#16 720 mov v1.s[3], w10 721 b.hs .Loop2x_ctr32_128 722 723 adds x2,x2,#2 724 b.eq .Lctr32_done 725 726.Lctr32_tail: 727 aese v0.16b,v16.16b 728 ld1 {v16.4s},[x7],#16 729 aesmc v0.16b,v0.16b 730 subs w6,w6,#2 731 aese v0.16b,v17.16b 732 ld1 {v17.4s},[x7],#16 733 aesmc v0.16b,v0.16b 734 b.gt .Lctr32_tail 735 736 aese v0.16b,v16.16b 737 aesmc v0.16b,v0.16b 738 aese v0.16b,v17.16b 739 aesmc v0.16b,v0.16b 740 ld1 {v2.16b},[x0] 741 aese v0.16b,v18.16b 742 aesmc v0.16b,v0.16b 743 aese v0.16b,v19.16b 744 aesmc v0.16b,v0.16b 745 aese v0.16b,v20.16b 746 aesmc v0.16b,v0.16b 747 aese v0.16b,v21.16b 748 aesmc v0.16b,v0.16b 749 aese v0.16b,v22.16b 750 aesmc v0.16b,v0.16b 751 eor v2.16b,v2.16b,v7.16b 752 aese v0.16b,v23.16b 753 754 eor v2.16b,v2.16b,v0.16b 755 st1 {v2.16b},[x1] 756 757.Lctr32_done: 758 ldr x29,[sp],#16 759 ret 760.size aes_v8_ctr32_encrypt_blocks,.-aes_v8_ctr32_encrypt_blocks 761#endif 762