1// This file is generated from a similarly-named Perl script in the BoringSSL 2// source tree. Do not edit by hand. 3 4#if !defined(__has_feature) 5#define __has_feature(x) 0 6#endif 7#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) 8#define OPENSSL_NO_ASM 9#endif 10 11#if !defined(OPENSSL_NO_ASM) 12#if defined(__aarch64__) 13#include <GFp/arm_arch.h> 14 15#if __ARM_MAX_ARCH__>=7 16.text 17.arch armv8-a+crypto 18.section .rodata 19.align 5 20.Lrcon: 21.long 0x01,0x01,0x01,0x01 22.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat 23.long 0x1b,0x1b,0x1b,0x1b 24 25.text 26 27.globl GFp_aes_hw_set_encrypt_key 28.hidden GFp_aes_hw_set_encrypt_key 29.type GFp_aes_hw_set_encrypt_key,%function 30.align 5 31GFp_aes_hw_set_encrypt_key: 32.Lenc_key: 33 // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. 34 AARCH64_VALID_CALL_TARGET 35 stp x29,x30,[sp,#-16]! 36 add x29,sp,#0 37 mov x3,#-1 38 cmp x0,#0 39 b.eq .Lenc_key_abort 40 cmp x2,#0 41 b.eq .Lenc_key_abort 42 mov x3,#-2 43 cmp w1,#128 44 b.lt .Lenc_key_abort 45 cmp w1,#256 46 b.gt .Lenc_key_abort 47 tst w1,#0x3f 48 b.ne .Lenc_key_abort 49 50 adrp x3,.Lrcon 51 add x3,x3,:lo12:.Lrcon 52 cmp w1,#192 53 54 eor v0.16b,v0.16b,v0.16b 55 ld1 {v3.16b},[x0],#16 56 mov w1,#8 // reuse w1 57 ld1 {v1.4s,v2.4s},[x3],#32 58 59 b.lt .Loop128 60 // 192-bit key support was removed. 61 b .L256 62 63.align 4 64.Loop128: 65 tbl v6.16b,{v3.16b},v2.16b 66 ext v5.16b,v0.16b,v3.16b,#12 67 st1 {v3.4s},[x2],#16 68 aese v6.16b,v0.16b 69 subs w1,w1,#1 70 71 eor v3.16b,v3.16b,v5.16b 72 ext v5.16b,v0.16b,v5.16b,#12 73 eor v3.16b,v3.16b,v5.16b 74 ext v5.16b,v0.16b,v5.16b,#12 75 eor v6.16b,v6.16b,v1.16b 76 eor v3.16b,v3.16b,v5.16b 77 shl v1.16b,v1.16b,#1 78 eor v3.16b,v3.16b,v6.16b 79 b.ne .Loop128 80 81 ld1 {v1.4s},[x3] 82 83 tbl v6.16b,{v3.16b},v2.16b 84 ext v5.16b,v0.16b,v3.16b,#12 85 st1 {v3.4s},[x2],#16 86 aese v6.16b,v0.16b 87 88 eor v3.16b,v3.16b,v5.16b 89 ext v5.16b,v0.16b,v5.16b,#12 90 eor v3.16b,v3.16b,v5.16b 91 ext v5.16b,v0.16b,v5.16b,#12 92 eor v6.16b,v6.16b,v1.16b 93 eor v3.16b,v3.16b,v5.16b 94 shl v1.16b,v1.16b,#1 95 eor v3.16b,v3.16b,v6.16b 96 97 tbl v6.16b,{v3.16b},v2.16b 98 ext v5.16b,v0.16b,v3.16b,#12 99 st1 {v3.4s},[x2],#16 100 aese v6.16b,v0.16b 101 102 eor v3.16b,v3.16b,v5.16b 103 ext v5.16b,v0.16b,v5.16b,#12 104 eor v3.16b,v3.16b,v5.16b 105 ext v5.16b,v0.16b,v5.16b,#12 106 eor v6.16b,v6.16b,v1.16b 107 eor v3.16b,v3.16b,v5.16b 108 eor v3.16b,v3.16b,v6.16b 109 st1 {v3.4s},[x2] 110 add x2,x2,#0x50 111 112 mov w12,#10 113 b .Ldone 114 115// 192-bit key support was removed. 116 117.align 4 118.L256: 119 ld1 {v4.16b},[x0] 120 mov w1,#7 121 mov w12,#14 122 st1 {v3.4s},[x2],#16 123 124.Loop256: 125 tbl v6.16b,{v4.16b},v2.16b 126 ext v5.16b,v0.16b,v3.16b,#12 127 st1 {v4.4s},[x2],#16 128 aese v6.16b,v0.16b 129 subs w1,w1,#1 130 131 eor v3.16b,v3.16b,v5.16b 132 ext v5.16b,v0.16b,v5.16b,#12 133 eor v3.16b,v3.16b,v5.16b 134 ext v5.16b,v0.16b,v5.16b,#12 135 eor v6.16b,v6.16b,v1.16b 136 eor v3.16b,v3.16b,v5.16b 137 shl v1.16b,v1.16b,#1 138 eor v3.16b,v3.16b,v6.16b 139 st1 {v3.4s},[x2],#16 140 b.eq .Ldone 141 142 dup v6.4s,v3.s[3] // just splat 143 ext v5.16b,v0.16b,v4.16b,#12 144 aese v6.16b,v0.16b 145 146 eor v4.16b,v4.16b,v5.16b 147 ext v5.16b,v0.16b,v5.16b,#12 148 eor v4.16b,v4.16b,v5.16b 149 ext v5.16b,v0.16b,v5.16b,#12 150 eor v4.16b,v4.16b,v5.16b 151 152 eor v4.16b,v4.16b,v6.16b 153 b .Loop256 154 155.Ldone: 156 str w12,[x2] 157 mov x3,#0 158 159.Lenc_key_abort: 160 mov x0,x3 // return value 161 ldr x29,[sp],#16 162 ret 163.size GFp_aes_hw_set_encrypt_key,.-GFp_aes_hw_set_encrypt_key 164.globl GFp_aes_hw_encrypt 165.hidden GFp_aes_hw_encrypt 166.type GFp_aes_hw_encrypt,%function 167.align 5 168GFp_aes_hw_encrypt: 169 AARCH64_VALID_CALL_TARGET 170 ldr w3,[x2,#240] 171 ld1 {v0.4s},[x2],#16 172 ld1 {v2.16b},[x0] 173 sub w3,w3,#2 174 ld1 {v1.4s},[x2],#16 175 176.Loop_enc: 177 aese v2.16b,v0.16b 178 aesmc v2.16b,v2.16b 179 ld1 {v0.4s},[x2],#16 180 subs w3,w3,#2 181 aese v2.16b,v1.16b 182 aesmc v2.16b,v2.16b 183 ld1 {v1.4s},[x2],#16 184 b.gt .Loop_enc 185 186 aese v2.16b,v0.16b 187 aesmc v2.16b,v2.16b 188 ld1 {v0.4s},[x2] 189 aese v2.16b,v1.16b 190 eor v2.16b,v2.16b,v0.16b 191 192 st1 {v2.16b},[x1] 193 ret 194.size GFp_aes_hw_encrypt,.-GFp_aes_hw_encrypt 195.globl GFp_aes_hw_decrypt 196.hidden GFp_aes_hw_decrypt 197.type GFp_aes_hw_decrypt,%function 198.align 5 199GFp_aes_hw_decrypt: 200 AARCH64_VALID_CALL_TARGET 201 ldr w3,[x2,#240] 202 ld1 {v0.4s},[x2],#16 203 ld1 {v2.16b},[x0] 204 sub w3,w3,#2 205 ld1 {v1.4s},[x2],#16 206 207.Loop_dec: 208 aesd v2.16b,v0.16b 209 aesimc v2.16b,v2.16b 210 ld1 {v0.4s},[x2],#16 211 subs w3,w3,#2 212 aesd v2.16b,v1.16b 213 aesimc v2.16b,v2.16b 214 ld1 {v1.4s},[x2],#16 215 b.gt .Loop_dec 216 217 aesd v2.16b,v0.16b 218 aesimc v2.16b,v2.16b 219 ld1 {v0.4s},[x2] 220 aesd v2.16b,v1.16b 221 eor v2.16b,v2.16b,v0.16b 222 223 st1 {v2.16b},[x1] 224 ret 225.size GFp_aes_hw_decrypt,.-GFp_aes_hw_decrypt 226.globl GFp_aes_hw_ctr32_encrypt_blocks 227.hidden GFp_aes_hw_ctr32_encrypt_blocks 228.type GFp_aes_hw_ctr32_encrypt_blocks,%function 229.align 5 230GFp_aes_hw_ctr32_encrypt_blocks: 231 // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. 232 AARCH64_VALID_CALL_TARGET 233 stp x29,x30,[sp,#-16]! 234 add x29,sp,#0 235 ldr w5,[x3,#240] 236 237 ldr w8, [x4, #12] 238 ld1 {v0.4s},[x4] 239 240 ld1 {v16.4s,v17.4s},[x3] // load key schedule... 241 sub w5,w5,#4 242 mov x12,#16 243 cmp x2,#2 244 add x7,x3,x5,lsl#4 // pointer to last 5 round keys 245 sub w5,w5,#2 246 ld1 {v20.4s,v21.4s},[x7],#32 247 ld1 {v22.4s,v23.4s},[x7],#32 248 ld1 {v7.4s},[x7] 249 add x7,x3,#32 250 mov w6,w5 251 csel x12,xzr,x12,lo 252 253 // ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are 254 // affected by silicon errata #1742098 [0] and #1655431 [1], 255 // respectively, where the second instruction of an aese/aesmc 256 // instruction pair may execute twice if an interrupt is taken right 257 // after the first instruction consumes an input register of which a 258 // single 32-bit lane has been updated the last time it was modified. 259 // 260 // This function uses a counter in one 32-bit lane. The vmov lines 261 // could write to v1.16b and v18.16b directly, but that trips this bugs. 262 // We write to v6.16b and copy to the final register as a workaround. 263 // 264 // [0] ARM-EPM-049219 v23 Cortex-A57 MPCore Software Developers Errata Notice 265 // [1] ARM-EPM-012079 v11.0 Cortex-A72 MPCore Software Developers Errata Notice 266#ifndef __ARMEB__ 267 rev w8, w8 268#endif 269 add w10, w8, #1 270 orr v6.16b,v0.16b,v0.16b 271 rev w10, w10 272 mov v6.s[3],w10 273 add w8, w8, #2 274 orr v1.16b,v6.16b,v6.16b 275 b.ls .Lctr32_tail 276 rev w12, w8 277 mov v6.s[3],w12 278 sub x2,x2,#3 // bias 279 orr v18.16b,v6.16b,v6.16b 280 b .Loop3x_ctr32 281 282.align 4 283.Loop3x_ctr32: 284 aese v0.16b,v16.16b 285 aesmc v0.16b,v0.16b 286 aese v1.16b,v16.16b 287 aesmc v1.16b,v1.16b 288 aese v18.16b,v16.16b 289 aesmc v18.16b,v18.16b 290 ld1 {v16.4s},[x7],#16 291 subs w6,w6,#2 292 aese v0.16b,v17.16b 293 aesmc v0.16b,v0.16b 294 aese v1.16b,v17.16b 295 aesmc v1.16b,v1.16b 296 aese v18.16b,v17.16b 297 aesmc v18.16b,v18.16b 298 ld1 {v17.4s},[x7],#16 299 b.gt .Loop3x_ctr32 300 301 aese v0.16b,v16.16b 302 aesmc v4.16b,v0.16b 303 aese v1.16b,v16.16b 304 aesmc v5.16b,v1.16b 305 ld1 {v2.16b},[x0],#16 306 add w9,w8,#1 307 aese v18.16b,v16.16b 308 aesmc v18.16b,v18.16b 309 ld1 {v3.16b},[x0],#16 310 rev w9,w9 311 aese v4.16b,v17.16b 312 aesmc v4.16b,v4.16b 313 aese v5.16b,v17.16b 314 aesmc v5.16b,v5.16b 315 ld1 {v19.16b},[x0],#16 316 mov x7,x3 317 aese v18.16b,v17.16b 318 aesmc v17.16b,v18.16b 319 aese v4.16b,v20.16b 320 aesmc v4.16b,v4.16b 321 aese v5.16b,v20.16b 322 aesmc v5.16b,v5.16b 323 eor v2.16b,v2.16b,v7.16b 324 add w10,w8,#2 325 aese v17.16b,v20.16b 326 aesmc v17.16b,v17.16b 327 eor v3.16b,v3.16b,v7.16b 328 add w8,w8,#3 329 aese v4.16b,v21.16b 330 aesmc v4.16b,v4.16b 331 aese v5.16b,v21.16b 332 aesmc v5.16b,v5.16b 333 // Note the logic to update v0.16b, v1.16b, and v1.16b is written to work 334 // around a bug in ARM Cortex-A57 and Cortex-A72 cores running in 335 // 32-bit mode. See the comment above. 336 eor v19.16b,v19.16b,v7.16b 337 mov v6.s[3], w9 338 aese v17.16b,v21.16b 339 aesmc v17.16b,v17.16b 340 orr v0.16b,v6.16b,v6.16b 341 rev w10,w10 342 aese v4.16b,v22.16b 343 aesmc v4.16b,v4.16b 344 mov v6.s[3], w10 345 rev w12,w8 346 aese v5.16b,v22.16b 347 aesmc v5.16b,v5.16b 348 orr v1.16b,v6.16b,v6.16b 349 mov v6.s[3], w12 350 aese v17.16b,v22.16b 351 aesmc v17.16b,v17.16b 352 orr v18.16b,v6.16b,v6.16b 353 subs x2,x2,#3 354 aese v4.16b,v23.16b 355 aese v5.16b,v23.16b 356 aese v17.16b,v23.16b 357 358 eor v2.16b,v2.16b,v4.16b 359 ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] 360 st1 {v2.16b},[x1],#16 361 eor v3.16b,v3.16b,v5.16b 362 mov w6,w5 363 st1 {v3.16b},[x1],#16 364 eor v19.16b,v19.16b,v17.16b 365 ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] 366 st1 {v19.16b},[x1],#16 367 b.hs .Loop3x_ctr32 368 369 adds x2,x2,#3 370 b.eq .Lctr32_done 371 cmp x2,#1 372 mov x12,#16 373 csel x12,xzr,x12,eq 374 375.Lctr32_tail: 376 aese v0.16b,v16.16b 377 aesmc v0.16b,v0.16b 378 aese v1.16b,v16.16b 379 aesmc v1.16b,v1.16b 380 ld1 {v16.4s},[x7],#16 381 subs w6,w6,#2 382 aese v0.16b,v17.16b 383 aesmc v0.16b,v0.16b 384 aese v1.16b,v17.16b 385 aesmc v1.16b,v1.16b 386 ld1 {v17.4s},[x7],#16 387 b.gt .Lctr32_tail 388 389 aese v0.16b,v16.16b 390 aesmc v0.16b,v0.16b 391 aese v1.16b,v16.16b 392 aesmc v1.16b,v1.16b 393 aese v0.16b,v17.16b 394 aesmc v0.16b,v0.16b 395 aese v1.16b,v17.16b 396 aesmc v1.16b,v1.16b 397 ld1 {v2.16b},[x0],x12 398 aese v0.16b,v20.16b 399 aesmc v0.16b,v0.16b 400 aese v1.16b,v20.16b 401 aesmc v1.16b,v1.16b 402 ld1 {v3.16b},[x0] 403 aese v0.16b,v21.16b 404 aesmc v0.16b,v0.16b 405 aese v1.16b,v21.16b 406 aesmc v1.16b,v1.16b 407 eor v2.16b,v2.16b,v7.16b 408 aese v0.16b,v22.16b 409 aesmc v0.16b,v0.16b 410 aese v1.16b,v22.16b 411 aesmc v1.16b,v1.16b 412 eor v3.16b,v3.16b,v7.16b 413 aese v0.16b,v23.16b 414 aese v1.16b,v23.16b 415 416 cmp x2,#1 417 eor v2.16b,v2.16b,v0.16b 418 eor v3.16b,v3.16b,v1.16b 419 st1 {v2.16b},[x1],#16 420 b.eq .Lctr32_done 421 st1 {v3.16b},[x1] 422 423.Lctr32_done: 424 ldr x29,[sp],#16 425 ret 426.size GFp_aes_hw_ctr32_encrypt_blocks,.-GFp_aes_hw_ctr32_encrypt_blocks 427#endif 428#endif 429#endif // !OPENSSL_NO_ASM 430.section .note.GNU-stack,"",%progbits 431