1// This file is generated from a similarly-named Perl script in the BoringSSL 2// source tree. Do not edit by hand. 3 4#if !defined(__has_feature) 5#define __has_feature(x) 0 6#endif 7#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) 8#define OPENSSL_NO_ASM 9#endif 10 11#if !defined(OPENSSL_NO_ASM) 12#include <GFp/arm_arch.h> 13 14#if __ARM_MAX_ARCH__>=7 15.text 16 17.section __TEXT,__const 18.align 5 19Lrcon: 20.long 0x01,0x01,0x01,0x01 21.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat 22.long 0x1b,0x1b,0x1b,0x1b 23 24.text 25 26.globl _GFp_aes_hw_set_encrypt_key 27.private_extern _GFp_aes_hw_set_encrypt_key 28 29.align 5 30_GFp_aes_hw_set_encrypt_key: 31Lenc_key: 32 // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. 33 AARCH64_VALID_CALL_TARGET 34 stp x29,x30,[sp,#-16]! 35 add x29,sp,#0 36 mov x3,#-1 37 cmp x0,#0 38 b.eq Lenc_key_abort 39 cmp x2,#0 40 b.eq Lenc_key_abort 41 mov x3,#-2 42 cmp w1,#128 43 b.lt Lenc_key_abort 44 cmp w1,#256 45 b.gt Lenc_key_abort 46 tst w1,#0x3f 47 b.ne Lenc_key_abort 48 49 adrp x3,Lrcon@PAGE 50 add x3,x3,Lrcon@PAGEOFF 51 cmp w1,#192 52 53 eor v0.16b,v0.16b,v0.16b 54 ld1 {v3.16b},[x0],#16 55 mov w1,#8 // reuse w1 56 ld1 {v1.4s,v2.4s},[x3],#32 57 58 b.lt Loop128 59 // 192-bit key support was removed. 60 b L256 61 62.align 4 63Loop128: 64 tbl v6.16b,{v3.16b},v2.16b 65 ext v5.16b,v0.16b,v3.16b,#12 66 st1 {v3.4s},[x2],#16 67 aese v6.16b,v0.16b 68 subs w1,w1,#1 69 70 eor v3.16b,v3.16b,v5.16b 71 ext v5.16b,v0.16b,v5.16b,#12 72 eor v3.16b,v3.16b,v5.16b 73 ext v5.16b,v0.16b,v5.16b,#12 74 eor v6.16b,v6.16b,v1.16b 75 eor v3.16b,v3.16b,v5.16b 76 shl v1.16b,v1.16b,#1 77 eor v3.16b,v3.16b,v6.16b 78 b.ne Loop128 79 80 ld1 {v1.4s},[x3] 81 82 tbl v6.16b,{v3.16b},v2.16b 83 ext v5.16b,v0.16b,v3.16b,#12 84 st1 {v3.4s},[x2],#16 85 aese v6.16b,v0.16b 86 87 eor v3.16b,v3.16b,v5.16b 88 ext v5.16b,v0.16b,v5.16b,#12 89 eor v3.16b,v3.16b,v5.16b 90 ext v5.16b,v0.16b,v5.16b,#12 91 eor v6.16b,v6.16b,v1.16b 92 eor v3.16b,v3.16b,v5.16b 93 shl v1.16b,v1.16b,#1 94 eor v3.16b,v3.16b,v6.16b 95 96 tbl v6.16b,{v3.16b},v2.16b 97 ext v5.16b,v0.16b,v3.16b,#12 98 st1 {v3.4s},[x2],#16 99 aese v6.16b,v0.16b 100 101 eor v3.16b,v3.16b,v5.16b 102 ext v5.16b,v0.16b,v5.16b,#12 103 eor v3.16b,v3.16b,v5.16b 104 ext v5.16b,v0.16b,v5.16b,#12 105 eor v6.16b,v6.16b,v1.16b 106 eor v3.16b,v3.16b,v5.16b 107 eor v3.16b,v3.16b,v6.16b 108 st1 {v3.4s},[x2] 109 add x2,x2,#0x50 110 111 mov w12,#10 112 b Ldone 113 114// 192-bit key support was removed. 115 116.align 4 117L256: 118 ld1 {v4.16b},[x0] 119 mov w1,#7 120 mov w12,#14 121 st1 {v3.4s},[x2],#16 122 123Loop256: 124 tbl v6.16b,{v4.16b},v2.16b 125 ext v5.16b,v0.16b,v3.16b,#12 126 st1 {v4.4s},[x2],#16 127 aese v6.16b,v0.16b 128 subs w1,w1,#1 129 130 eor v3.16b,v3.16b,v5.16b 131 ext v5.16b,v0.16b,v5.16b,#12 132 eor v3.16b,v3.16b,v5.16b 133 ext v5.16b,v0.16b,v5.16b,#12 134 eor v6.16b,v6.16b,v1.16b 135 eor v3.16b,v3.16b,v5.16b 136 shl v1.16b,v1.16b,#1 137 eor v3.16b,v3.16b,v6.16b 138 st1 {v3.4s},[x2],#16 139 b.eq Ldone 140 141 dup v6.4s,v3.s[3] // just splat 142 ext v5.16b,v0.16b,v4.16b,#12 143 aese v6.16b,v0.16b 144 145 eor v4.16b,v4.16b,v5.16b 146 ext v5.16b,v0.16b,v5.16b,#12 147 eor v4.16b,v4.16b,v5.16b 148 ext v5.16b,v0.16b,v5.16b,#12 149 eor v4.16b,v4.16b,v5.16b 150 151 eor v4.16b,v4.16b,v6.16b 152 b Loop256 153 154Ldone: 155 str w12,[x2] 156 mov x3,#0 157 158Lenc_key_abort: 159 mov x0,x3 // return value 160 ldr x29,[sp],#16 161 ret 162 163.globl _GFp_aes_hw_encrypt 164.private_extern _GFp_aes_hw_encrypt 165 166.align 5 167_GFp_aes_hw_encrypt: 168 AARCH64_VALID_CALL_TARGET 169 ldr w3,[x2,#240] 170 ld1 {v0.4s},[x2],#16 171 ld1 {v2.16b},[x0] 172 sub w3,w3,#2 173 ld1 {v1.4s},[x2],#16 174 175Loop_enc: 176 aese v2.16b,v0.16b 177 aesmc v2.16b,v2.16b 178 ld1 {v0.4s},[x2],#16 179 subs w3,w3,#2 180 aese v2.16b,v1.16b 181 aesmc v2.16b,v2.16b 182 ld1 {v1.4s},[x2],#16 183 b.gt Loop_enc 184 185 aese v2.16b,v0.16b 186 aesmc v2.16b,v2.16b 187 ld1 {v0.4s},[x2] 188 aese v2.16b,v1.16b 189 eor v2.16b,v2.16b,v0.16b 190 191 st1 {v2.16b},[x1] 192 ret 193 194.globl _GFp_aes_hw_decrypt 195.private_extern _GFp_aes_hw_decrypt 196 197.align 5 198_GFp_aes_hw_decrypt: 199 AARCH64_VALID_CALL_TARGET 200 ldr w3,[x2,#240] 201 ld1 {v0.4s},[x2],#16 202 ld1 {v2.16b},[x0] 203 sub w3,w3,#2 204 ld1 {v1.4s},[x2],#16 205 206Loop_dec: 207 aesd v2.16b,v0.16b 208 aesimc v2.16b,v2.16b 209 ld1 {v0.4s},[x2],#16 210 subs w3,w3,#2 211 aesd v2.16b,v1.16b 212 aesimc v2.16b,v2.16b 213 ld1 {v1.4s},[x2],#16 214 b.gt Loop_dec 215 216 aesd v2.16b,v0.16b 217 aesimc v2.16b,v2.16b 218 ld1 {v0.4s},[x2] 219 aesd v2.16b,v1.16b 220 eor v2.16b,v2.16b,v0.16b 221 222 st1 {v2.16b},[x1] 223 ret 224 225.globl _GFp_aes_hw_ctr32_encrypt_blocks 226.private_extern _GFp_aes_hw_ctr32_encrypt_blocks 227 228.align 5 229_GFp_aes_hw_ctr32_encrypt_blocks: 230 // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. 231 AARCH64_VALID_CALL_TARGET 232 stp x29,x30,[sp,#-16]! 233 add x29,sp,#0 234 ldr w5,[x3,#240] 235 236 ldr w8, [x4, #12] 237 ld1 {v0.4s},[x4] 238 239 ld1 {v16.4s,v17.4s},[x3] // load key schedule... 240 sub w5,w5,#4 241 mov x12,#16 242 cmp x2,#2 243 add x7,x3,x5,lsl#4 // pointer to last 5 round keys 244 sub w5,w5,#2 245 ld1 {v20.4s,v21.4s},[x7],#32 246 ld1 {v22.4s,v23.4s},[x7],#32 247 ld1 {v7.4s},[x7] 248 add x7,x3,#32 249 mov w6,w5 250 csel x12,xzr,x12,lo 251 252 // ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are 253 // affected by silicon errata #1742098 [0] and #1655431 [1], 254 // respectively, where the second instruction of an aese/aesmc 255 // instruction pair may execute twice if an interrupt is taken right 256 // after the first instruction consumes an input register of which a 257 // single 32-bit lane has been updated the last time it was modified. 258 // 259 // This function uses a counter in one 32-bit lane. The vmov lines 260 // could write to v1.16b and v18.16b directly, but that trips this bugs. 261 // We write to v6.16b and copy to the final register as a workaround. 262 // 263 // [0] ARM-EPM-049219 v23 Cortex-A57 MPCore Software Developers Errata Notice 264 // [1] ARM-EPM-012079 v11.0 Cortex-A72 MPCore Software Developers Errata Notice 265#ifndef __ARMEB__ 266 rev w8, w8 267#endif 268 add w10, w8, #1 269 orr v6.16b,v0.16b,v0.16b 270 rev w10, w10 271 mov v6.s[3],w10 272 add w8, w8, #2 273 orr v1.16b,v6.16b,v6.16b 274 b.ls Lctr32_tail 275 rev w12, w8 276 mov v6.s[3],w12 277 sub x2,x2,#3 // bias 278 orr v18.16b,v6.16b,v6.16b 279 b Loop3x_ctr32 280 281.align 4 282Loop3x_ctr32: 283 aese v0.16b,v16.16b 284 aesmc v0.16b,v0.16b 285 aese v1.16b,v16.16b 286 aesmc v1.16b,v1.16b 287 aese v18.16b,v16.16b 288 aesmc v18.16b,v18.16b 289 ld1 {v16.4s},[x7],#16 290 subs w6,w6,#2 291 aese v0.16b,v17.16b 292 aesmc v0.16b,v0.16b 293 aese v1.16b,v17.16b 294 aesmc v1.16b,v1.16b 295 aese v18.16b,v17.16b 296 aesmc v18.16b,v18.16b 297 ld1 {v17.4s},[x7],#16 298 b.gt Loop3x_ctr32 299 300 aese v0.16b,v16.16b 301 aesmc v4.16b,v0.16b 302 aese v1.16b,v16.16b 303 aesmc v5.16b,v1.16b 304 ld1 {v2.16b},[x0],#16 305 add w9,w8,#1 306 aese v18.16b,v16.16b 307 aesmc v18.16b,v18.16b 308 ld1 {v3.16b},[x0],#16 309 rev w9,w9 310 aese v4.16b,v17.16b 311 aesmc v4.16b,v4.16b 312 aese v5.16b,v17.16b 313 aesmc v5.16b,v5.16b 314 ld1 {v19.16b},[x0],#16 315 mov x7,x3 316 aese v18.16b,v17.16b 317 aesmc v17.16b,v18.16b 318 aese v4.16b,v20.16b 319 aesmc v4.16b,v4.16b 320 aese v5.16b,v20.16b 321 aesmc v5.16b,v5.16b 322 eor v2.16b,v2.16b,v7.16b 323 add w10,w8,#2 324 aese v17.16b,v20.16b 325 aesmc v17.16b,v17.16b 326 eor v3.16b,v3.16b,v7.16b 327 add w8,w8,#3 328 aese v4.16b,v21.16b 329 aesmc v4.16b,v4.16b 330 aese v5.16b,v21.16b 331 aesmc v5.16b,v5.16b 332 // Note the logic to update v0.16b, v1.16b, and v1.16b is written to work 333 // around a bug in ARM Cortex-A57 and Cortex-A72 cores running in 334 // 32-bit mode. See the comment above. 335 eor v19.16b,v19.16b,v7.16b 336 mov v6.s[3], w9 337 aese v17.16b,v21.16b 338 aesmc v17.16b,v17.16b 339 orr v0.16b,v6.16b,v6.16b 340 rev w10,w10 341 aese v4.16b,v22.16b 342 aesmc v4.16b,v4.16b 343 mov v6.s[3], w10 344 rev w12,w8 345 aese v5.16b,v22.16b 346 aesmc v5.16b,v5.16b 347 orr v1.16b,v6.16b,v6.16b 348 mov v6.s[3], w12 349 aese v17.16b,v22.16b 350 aesmc v17.16b,v17.16b 351 orr v18.16b,v6.16b,v6.16b 352 subs x2,x2,#3 353 aese v4.16b,v23.16b 354 aese v5.16b,v23.16b 355 aese v17.16b,v23.16b 356 357 eor v2.16b,v2.16b,v4.16b 358 ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] 359 st1 {v2.16b},[x1],#16 360 eor v3.16b,v3.16b,v5.16b 361 mov w6,w5 362 st1 {v3.16b},[x1],#16 363 eor v19.16b,v19.16b,v17.16b 364 ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] 365 st1 {v19.16b},[x1],#16 366 b.hs Loop3x_ctr32 367 368 adds x2,x2,#3 369 b.eq Lctr32_done 370 cmp x2,#1 371 mov x12,#16 372 csel x12,xzr,x12,eq 373 374Lctr32_tail: 375 aese v0.16b,v16.16b 376 aesmc v0.16b,v0.16b 377 aese v1.16b,v16.16b 378 aesmc v1.16b,v1.16b 379 ld1 {v16.4s},[x7],#16 380 subs w6,w6,#2 381 aese v0.16b,v17.16b 382 aesmc v0.16b,v0.16b 383 aese v1.16b,v17.16b 384 aesmc v1.16b,v1.16b 385 ld1 {v17.4s},[x7],#16 386 b.gt Lctr32_tail 387 388 aese v0.16b,v16.16b 389 aesmc v0.16b,v0.16b 390 aese v1.16b,v16.16b 391 aesmc v1.16b,v1.16b 392 aese v0.16b,v17.16b 393 aesmc v0.16b,v0.16b 394 aese v1.16b,v17.16b 395 aesmc v1.16b,v1.16b 396 ld1 {v2.16b},[x0],x12 397 aese v0.16b,v20.16b 398 aesmc v0.16b,v0.16b 399 aese v1.16b,v20.16b 400 aesmc v1.16b,v1.16b 401 ld1 {v3.16b},[x0] 402 aese v0.16b,v21.16b 403 aesmc v0.16b,v0.16b 404 aese v1.16b,v21.16b 405 aesmc v1.16b,v1.16b 406 eor v2.16b,v2.16b,v7.16b 407 aese v0.16b,v22.16b 408 aesmc v0.16b,v0.16b 409 aese v1.16b,v22.16b 410 aesmc v1.16b,v1.16b 411 eor v3.16b,v3.16b,v7.16b 412 aese v0.16b,v23.16b 413 aese v1.16b,v23.16b 414 415 cmp x2,#1 416 eor v2.16b,v2.16b,v0.16b 417 eor v3.16b,v3.16b,v1.16b 418 st1 {v2.16b},[x1],#16 419 b.eq Lctr32_done 420 st1 {v3.16b},[x1] 421 422Lctr32_done: 423 ldr x29,[sp],#16 424 ret 425 426#endif 427#endif // !OPENSSL_NO_ASM 428