1#include <openssl/arm_arch.h> 2 3#if __ARM_MAX_ARCH__>=7 4.text 5 6 7.code 32 8#undef __thumb2__ 9.align 5 10Lrcon: 11.long 0x01,0x01,0x01,0x01 12.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d @ rotate-n-splat 13.long 0x1b,0x1b,0x1b,0x1b 14 15.globl _aes_hw_set_encrypt_key 16.private_extern _aes_hw_set_encrypt_key 17#ifdef __thumb2__ 18.thumb_func _aes_hw_set_encrypt_key 19#endif 20.align 5 21_aes_hw_set_encrypt_key: 22Lenc_key: 23 mov r3,#-1 24 cmp r0,#0 25 beq Lenc_key_abort 26 cmp r2,#0 27 beq Lenc_key_abort 28 mov r3,#-2 29 cmp r1,#128 30 blt Lenc_key_abort 31 cmp r1,#256 32 bgt Lenc_key_abort 33 tst r1,#0x3f 34 bne Lenc_key_abort 35 36 adr r3,Lrcon 37 cmp r1,#192 38 39 veor q0,q0,q0 40 vld1.8 {q3},[r0]! 41 mov r1,#8 @ reuse r1 42 vld1.32 {q1,q2},[r3]! 43 44 blt Loop128 45 beq L192 46 b L256 47 48.align 4 49Loop128: 50 vtbl.8 d20,{q3},d4 51 vtbl.8 d21,{q3},d5 52 vext.8 q9,q0,q3,#12 53 vst1.32 {q3},[r2]! 54.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 55 subs r1,r1,#1 56 57 veor q3,q3,q9 58 vext.8 q9,q0,q9,#12 59 veor q3,q3,q9 60 vext.8 q9,q0,q9,#12 61 veor q10,q10,q1 62 veor q3,q3,q9 63 vshl.u8 q1,q1,#1 64 veor q3,q3,q10 65 bne Loop128 66 67 vld1.32 {q1},[r3] 68 69 vtbl.8 d20,{q3},d4 70 vtbl.8 d21,{q3},d5 71 vext.8 q9,q0,q3,#12 72 vst1.32 {q3},[r2]! 73.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 74 75 veor q3,q3,q9 76 vext.8 q9,q0,q9,#12 77 veor q3,q3,q9 78 vext.8 q9,q0,q9,#12 79 veor q10,q10,q1 80 veor q3,q3,q9 81 vshl.u8 q1,q1,#1 82 veor q3,q3,q10 83 84 vtbl.8 d20,{q3},d4 85 vtbl.8 d21,{q3},d5 86 vext.8 q9,q0,q3,#12 87 vst1.32 {q3},[r2]! 88.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 89 90 veor q3,q3,q9 91 vext.8 q9,q0,q9,#12 92 veor q3,q3,q9 93 vext.8 q9,q0,q9,#12 94 veor q10,q10,q1 95 veor q3,q3,q9 96 veor q3,q3,q10 97 vst1.32 {q3},[r2] 98 add r2,r2,#0x50 99 100 mov r12,#10 101 b Ldone 102 103.align 4 104L192: 105 vld1.8 {d16},[r0]! 106 vmov.i8 q10,#8 @ borrow q10 107 vst1.32 {q3},[r2]! 108 vsub.i8 q2,q2,q10 @ adjust the mask 109 110Loop192: 111 vtbl.8 d20,{q8},d4 112 vtbl.8 d21,{q8},d5 113 vext.8 q9,q0,q3,#12 114 vst1.32 {d16},[r2]! 115.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 116 subs r1,r1,#1 117 118 veor q3,q3,q9 119 vext.8 q9,q0,q9,#12 120 veor q3,q3,q9 121 vext.8 q9,q0,q9,#12 122 veor q3,q3,q9 123 124 vdup.32 q9,d7[1] 125 veor q9,q9,q8 126 veor q10,q10,q1 127 vext.8 q8,q0,q8,#12 128 vshl.u8 q1,q1,#1 129 veor q8,q8,q9 130 veor q3,q3,q10 131 veor q8,q8,q10 132 vst1.32 {q3},[r2]! 133 bne Loop192 134 135 mov r12,#12 136 add r2,r2,#0x20 137 b Ldone 138 139.align 4 140L256: 141 vld1.8 {q8},[r0] 142 mov r1,#7 143 mov r12,#14 144 vst1.32 {q3},[r2]! 145 146Loop256: 147 vtbl.8 d20,{q8},d4 148 vtbl.8 d21,{q8},d5 149 vext.8 q9,q0,q3,#12 150 vst1.32 {q8},[r2]! 151.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 152 subs r1,r1,#1 153 154 veor q3,q3,q9 155 vext.8 q9,q0,q9,#12 156 veor q3,q3,q9 157 vext.8 q9,q0,q9,#12 158 veor q10,q10,q1 159 veor q3,q3,q9 160 vshl.u8 q1,q1,#1 161 veor q3,q3,q10 162 vst1.32 {q3},[r2]! 163 beq Ldone 164 165 vdup.32 q10,d7[1] 166 vext.8 q9,q0,q8,#12 167.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 168 169 veor q8,q8,q9 170 vext.8 q9,q0,q9,#12 171 veor q8,q8,q9 172 vext.8 q9,q0,q9,#12 173 veor q8,q8,q9 174 175 veor q8,q8,q10 176 b Loop256 177 178Ldone: 179 str r12,[r2] 180 mov r3,#0 181 182Lenc_key_abort: 183 mov r0,r3 @ return value 184 185 bx lr 186 187 188.globl _aes_hw_set_decrypt_key 189.private_extern _aes_hw_set_decrypt_key 190#ifdef __thumb2__ 191.thumb_func _aes_hw_set_decrypt_key 192#endif 193.align 5 194_aes_hw_set_decrypt_key: 195 stmdb sp!,{r4,lr} 196 bl Lenc_key 197 198 cmp r0,#0 199 bne Ldec_key_abort 200 201 sub r2,r2,#240 @ restore original r2 202 mov r4,#-16 203 add r0,r2,r12,lsl#4 @ end of key schedule 204 205 vld1.32 {q0},[r2] 206 vld1.32 {q1},[r0] 207 vst1.32 {q0},[r0],r4 208 vst1.32 {q1},[r2]! 209 210Loop_imc: 211 vld1.32 {q0},[r2] 212 vld1.32 {q1},[r0] 213.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 214.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 215 vst1.32 {q0},[r0],r4 216 vst1.32 {q1},[r2]! 217 cmp r0,r2 218 bhi Loop_imc 219 220 vld1.32 {q0},[r2] 221.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 222 vst1.32 {q0},[r0] 223 224 eor r0,r0,r0 @ return value 225Ldec_key_abort: 226 ldmia sp!,{r4,pc} 227 228.globl _aes_hw_encrypt 229.private_extern _aes_hw_encrypt 230#ifdef __thumb2__ 231.thumb_func _aes_hw_encrypt 232#endif 233.align 5 234_aes_hw_encrypt: 235 ldr r3,[r2,#240] 236 vld1.32 {q0},[r2]! 237 vld1.8 {q2},[r0] 238 sub r3,r3,#2 239 vld1.32 {q1},[r2]! 240 241Loop_enc: 242.byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0 243.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 244 vld1.32 {q0},[r2]! 245 subs r3,r3,#2 246.byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1 247.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 248 vld1.32 {q1},[r2]! 249 bgt Loop_enc 250 251.byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0 252.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 253 vld1.32 {q0},[r2] 254.byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1 255 veor q2,q2,q0 256 257 vst1.8 {q2},[r1] 258 bx lr 259 260.globl _aes_hw_decrypt 261.private_extern _aes_hw_decrypt 262#ifdef __thumb2__ 263.thumb_func _aes_hw_decrypt 264#endif 265.align 5 266_aes_hw_decrypt: 267 ldr r3,[r2,#240] 268 vld1.32 {q0},[r2]! 269 vld1.8 {q2},[r0] 270 sub r3,r3,#2 271 vld1.32 {q1},[r2]! 272 273Loop_dec: 274.byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0 275.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 276 vld1.32 {q0},[r2]! 277 subs r3,r3,#2 278.byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1 279.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 280 vld1.32 {q1},[r2]! 281 bgt Loop_dec 282 283.byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0 284.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 285 vld1.32 {q0},[r2] 286.byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1 287 veor q2,q2,q0 288 289 vst1.8 {q2},[r1] 290 bx lr 291 292.globl _aes_hw_cbc_encrypt 293.private_extern _aes_hw_cbc_encrypt 294#ifdef __thumb2__ 295.thumb_func _aes_hw_cbc_encrypt 296#endif 297.align 5 298_aes_hw_cbc_encrypt: 299 mov ip,sp 300 stmdb sp!,{r4,r5,r6,r7,r8,lr} 301 vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so 302 ldmia ip,{r4,r5} @ load remaining args 303 subs r2,r2,#16 304 mov r8,#16 305 blo Lcbc_abort 306 moveq r8,#0 307 308 cmp r5,#0 @ en- or decrypting? 309 ldr r5,[r3,#240] 310 and r2,r2,#-16 311 vld1.8 {q6},[r4] 312 vld1.8 {q0},[r0],r8 313 314 vld1.32 {q8,q9},[r3] @ load key schedule... 315 sub r5,r5,#6 316 add r7,r3,r5,lsl#4 @ pointer to last 7 round keys 317 sub r5,r5,#2 318 vld1.32 {q10,q11},[r7]! 319 vld1.32 {q12,q13},[r7]! 320 vld1.32 {q14,q15},[r7]! 321 vld1.32 {q7},[r7] 322 323 add r7,r3,#32 324 mov r6,r5 325 beq Lcbc_dec 326 327 cmp r5,#2 328 veor q0,q0,q6 329 veor q5,q8,q7 330 beq Lcbc_enc128 331 332 vld1.32 {q2,q3},[r7] 333 add r7,r3,#16 334 add r6,r3,#16*4 335 add r12,r3,#16*5 336.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 337.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 338 add r14,r3,#16*6 339 add r3,r3,#16*7 340 b Lenter_cbc_enc 341 342.align 4 343Loop_cbc_enc: 344.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 345.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 346 vst1.8 {q6},[r1]! 347Lenter_cbc_enc: 348.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 349.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 350.byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2 351.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 352 vld1.32 {q8},[r6] 353 cmp r5,#4 354.byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3 355.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 356 vld1.32 {q9},[r12] 357 beq Lcbc_enc192 358 359.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 360.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 361 vld1.32 {q8},[r14] 362.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 363.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 364 vld1.32 {q9},[r3] 365 nop 366 367Lcbc_enc192: 368.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 369.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 370 subs r2,r2,#16 371.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 372.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 373 moveq r8,#0 374.byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10 375.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 376.byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11 377.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 378 vld1.8 {q8},[r0],r8 379.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 380.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 381 veor q8,q8,q5 382.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 383.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 384 vld1.32 {q9},[r7] @ re-pre-load rndkey[1] 385.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 386.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 387.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 388 veor q6,q0,q7 389 bhs Loop_cbc_enc 390 391 vst1.8 {q6},[r1]! 392 b Lcbc_done 393 394.align 5 395Lcbc_enc128: 396 vld1.32 {q2,q3},[r7] 397.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 398.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 399 b Lenter_cbc_enc128 400Loop_cbc_enc128: 401.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 402.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 403 vst1.8 {q6},[r1]! 404Lenter_cbc_enc128: 405.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 406.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 407 subs r2,r2,#16 408.byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2 409.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 410 moveq r8,#0 411.byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3 412.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 413.byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10 414.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 415.byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11 416.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 417 vld1.8 {q8},[r0],r8 418.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 419.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 420.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 421.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 422.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 423.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 424 veor q8,q8,q5 425.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 426 veor q6,q0,q7 427 bhs Loop_cbc_enc128 428 429 vst1.8 {q6},[r1]! 430 b Lcbc_done 431.align 5 432Lcbc_dec: 433 vld1.8 {q10},[r0]! 434 subs r2,r2,#32 @ bias 435 add r6,r5,#2 436 vorr q3,q0,q0 437 vorr q1,q0,q0 438 vorr q11,q10,q10 439 blo Lcbc_dec_tail 440 441 vorr q1,q10,q10 442 vld1.8 {q10},[r0]! 443 vorr q2,q0,q0 444 vorr q3,q1,q1 445 vorr q11,q10,q10 446 447Loop3x_cbc_dec: 448.byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8 449.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 450.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 451.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 452.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 453.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 454 vld1.32 {q8},[r7]! 455 subs r6,r6,#2 456.byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9 457.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 458.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 459.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 460.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 461.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 462 vld1.32 {q9},[r7]! 463 bgt Loop3x_cbc_dec 464 465.byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8 466.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 467.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 468.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 469.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 470.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 471 veor q4,q6,q7 472 subs r2,r2,#0x30 473 veor q5,q2,q7 474 movlo r6,r2 @ r6, r6, is zero at this point 475.byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9 476.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 477.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 478.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 479.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 480.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 481 veor q9,q3,q7 482 add r0,r0,r6 @ r0 is adjusted in such way that 483 @ at exit from the loop q1-q10 484 @ are loaded with last "words" 485 vorr q6,q11,q11 486 mov r7,r3 487.byte 0x68,0x03,0xb0,0xf3 @ aesd q0,q12 488.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 489.byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12 490.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 491.byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12 492.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 493 vld1.8 {q2},[r0]! 494.byte 0x6a,0x03,0xb0,0xf3 @ aesd q0,q13 495.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 496.byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13 497.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 498.byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13 499.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 500 vld1.8 {q3},[r0]! 501.byte 0x6c,0x03,0xb0,0xf3 @ aesd q0,q14 502.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 503.byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14 504.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 505.byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14 506.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 507 vld1.8 {q11},[r0]! 508.byte 0x6e,0x03,0xb0,0xf3 @ aesd q0,q15 509.byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15 510.byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15 511 vld1.32 {q8},[r7]! @ re-pre-load rndkey[0] 512 add r6,r5,#2 513 veor q4,q4,q0 514 veor q5,q5,q1 515 veor q10,q10,q9 516 vld1.32 {q9},[r7]! @ re-pre-load rndkey[1] 517 vst1.8 {q4},[r1]! 518 vorr q0,q2,q2 519 vst1.8 {q5},[r1]! 520 vorr q1,q3,q3 521 vst1.8 {q10},[r1]! 522 vorr q10,q11,q11 523 bhs Loop3x_cbc_dec 524 525 cmn r2,#0x30 526 beq Lcbc_done 527 nop 528 529Lcbc_dec_tail: 530.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 531.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 532.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 533.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 534 vld1.32 {q8},[r7]! 535 subs r6,r6,#2 536.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 537.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 538.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 539.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 540 vld1.32 {q9},[r7]! 541 bgt Lcbc_dec_tail 542 543.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 544.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 545.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 546.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 547.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 548.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 549.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 550.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 551.byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12 552.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 553.byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12 554.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 555 cmn r2,#0x20 556.byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13 557.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 558.byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13 559.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 560 veor q5,q6,q7 561.byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14 562.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 563.byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14 564.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 565 veor q9,q3,q7 566.byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15 567.byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15 568 beq Lcbc_dec_one 569 veor q5,q5,q1 570 veor q9,q9,q10 571 vorr q6,q11,q11 572 vst1.8 {q5},[r1]! 573 vst1.8 {q9},[r1]! 574 b Lcbc_done 575 576Lcbc_dec_one: 577 veor q5,q5,q10 578 vorr q6,q11,q11 579 vst1.8 {q5},[r1]! 580 581Lcbc_done: 582 vst1.8 {q6},[r4] 583Lcbc_abort: 584 vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15} 585 ldmia sp!,{r4,r5,r6,r7,r8,pc} 586 587.globl _aes_hw_ctr32_encrypt_blocks 588.private_extern _aes_hw_ctr32_encrypt_blocks 589#ifdef __thumb2__ 590.thumb_func _aes_hw_ctr32_encrypt_blocks 591#endif 592.align 5 593_aes_hw_ctr32_encrypt_blocks: 594 mov ip,sp 595 stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,lr} 596 vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so 597 ldr r4, [ip] @ load remaining arg 598 ldr r5,[r3,#240] 599 600 ldr r8, [r4, #12] 601 vld1.32 {q0},[r4] 602 603 vld1.32 {q8,q9},[r3] @ load key schedule... 604 sub r5,r5,#4 605 mov r12,#16 606 cmp r2,#2 607 add r7,r3,r5,lsl#4 @ pointer to last 5 round keys 608 sub r5,r5,#2 609 vld1.32 {q12,q13},[r7]! 610 vld1.32 {q14,q15},[r7]! 611 vld1.32 {q7},[r7] 612 add r7,r3,#32 613 mov r6,r5 614 movlo r12,#0 615#ifndef __ARMEB__ 616 rev r8, r8 617#endif 618 vorr q1,q0,q0 619 add r10, r8, #1 620 vorr q10,q0,q0 621 add r8, r8, #2 622 vorr q6,q0,q0 623 rev r10, r10 624 vmov.32 d3[1],r10 625 bls Lctr32_tail 626 rev r12, r8 627 sub r2,r2,#3 @ bias 628 vmov.32 d21[1],r12 629 b Loop3x_ctr32 630 631.align 4 632Loop3x_ctr32: 633.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 634.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 635.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 636.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 637.byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8 638.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10 639 vld1.32 {q8},[r7]! 640 subs r6,r6,#2 641.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 642.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 643.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 644.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 645.byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9 646.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10 647 vld1.32 {q9},[r7]! 648 bgt Loop3x_ctr32 649 650.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 651.byte 0x80,0x83,0xb0,0xf3 @ aesmc q4,q0 652.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 653.byte 0x82,0xa3,0xb0,0xf3 @ aesmc q5,q1 654 vld1.8 {q2},[r0]! 655 vorr q0,q6,q6 656.byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8 657.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10 658 vld1.8 {q3},[r0]! 659 vorr q1,q6,q6 660.byte 0x22,0x83,0xb0,0xf3 @ aese q4,q9 661.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 662.byte 0x22,0xa3,0xb0,0xf3 @ aese q5,q9 663.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 664 vld1.8 {q11},[r0]! 665 mov r7,r3 666.byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9 667.byte 0xa4,0x23,0xf0,0xf3 @ aesmc q9,q10 668 vorr q10,q6,q6 669 add r9,r8,#1 670.byte 0x28,0x83,0xb0,0xf3 @ aese q4,q12 671.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 672.byte 0x28,0xa3,0xb0,0xf3 @ aese q5,q12 673.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 674 veor q2,q2,q7 675 add r10,r8,#2 676.byte 0x28,0x23,0xf0,0xf3 @ aese q9,q12 677.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9 678 veor q3,q3,q7 679 add r8,r8,#3 680.byte 0x2a,0x83,0xb0,0xf3 @ aese q4,q13 681.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 682.byte 0x2a,0xa3,0xb0,0xf3 @ aese q5,q13 683.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 684 veor q11,q11,q7 685 rev r9,r9 686.byte 0x2a,0x23,0xf0,0xf3 @ aese q9,q13 687.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9 688 vmov.32 d1[1], r9 689 rev r10,r10 690.byte 0x2c,0x83,0xb0,0xf3 @ aese q4,q14 691.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 692.byte 0x2c,0xa3,0xb0,0xf3 @ aese q5,q14 693.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 694 vmov.32 d3[1], r10 695 rev r12,r8 696.byte 0x2c,0x23,0xf0,0xf3 @ aese q9,q14 697.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9 698 vmov.32 d21[1], r12 699 subs r2,r2,#3 700.byte 0x2e,0x83,0xb0,0xf3 @ aese q4,q15 701.byte 0x2e,0xa3,0xb0,0xf3 @ aese q5,q15 702.byte 0x2e,0x23,0xf0,0xf3 @ aese q9,q15 703 704 veor q2,q2,q4 705 vld1.32 {q8},[r7]! @ re-pre-load rndkey[0] 706 vst1.8 {q2},[r1]! 707 veor q3,q3,q5 708 mov r6,r5 709 vst1.8 {q3},[r1]! 710 veor q11,q11,q9 711 vld1.32 {q9},[r7]! @ re-pre-load rndkey[1] 712 vst1.8 {q11},[r1]! 713 bhs Loop3x_ctr32 714 715 adds r2,r2,#3 716 beq Lctr32_done 717 cmp r2,#1 718 mov r12,#16 719 moveq r12,#0 720 721Lctr32_tail: 722.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 723.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 724.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 725.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 726 vld1.32 {q8},[r7]! 727 subs r6,r6,#2 728.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 729.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 730.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 731.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 732 vld1.32 {q9},[r7]! 733 bgt Lctr32_tail 734 735.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 736.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 737.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 738.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 739.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 740.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 741.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 742.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 743 vld1.8 {q2},[r0],r12 744.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 745.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 746.byte 0x28,0x23,0xb0,0xf3 @ aese q1,q12 747.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 748 vld1.8 {q3},[r0] 749.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 750.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 751.byte 0x2a,0x23,0xb0,0xf3 @ aese q1,q13 752.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 753 veor q2,q2,q7 754.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 755.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 756.byte 0x2c,0x23,0xb0,0xf3 @ aese q1,q14 757.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 758 veor q3,q3,q7 759.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 760.byte 0x2e,0x23,0xb0,0xf3 @ aese q1,q15 761 762 cmp r2,#1 763 veor q2,q2,q0 764 veor q3,q3,q1 765 vst1.8 {q2},[r1]! 766 beq Lctr32_done 767 vst1.8 {q3},[r1] 768 769Lctr32_done: 770 vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15} 771 ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,pc} 772 773#endif 774