1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr=+avx2 | FileCheck %s --check-prefix=X32 3; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 | FileCheck %s --check-prefix=X64 4 5define void @f(<8 x float> %A, i8* %B, <4 x double> %C, <4 x i64> %E, <8 x i32> %F, <16 x i16> %G, <32 x i8> %H) nounwind { 6; X32-LABEL: f: 7; X32: # BB#0: 8; X32-NEXT: pushl %ebp 9; X32-NEXT: movl %esp, %ebp 10; X32-NEXT: andl $-32, %esp 11; X32-NEXT: subl $32, %esp 12; X32-NEXT: vmovdqa 104(%ebp), %ymm3 13; X32-NEXT: vmovdqa 72(%ebp), %ymm4 14; X32-NEXT: vmovdqa 40(%ebp), %ymm5 15; X32-NEXT: movl 8(%ebp), %eax 16; X32-NEXT: vaddps .LCPI0_0, %ymm0, %ymm0 17; X32-NEXT: vmovntps %ymm0, (%eax) 18; X32-NEXT: vpaddq .LCPI0_1, %ymm2, %ymm0 19; X32-NEXT: vmovntdq %ymm0, (%eax) 20; X32-NEXT: vaddpd .LCPI0_2, %ymm1, %ymm0 21; X32-NEXT: vmovntpd %ymm0, (%eax) 22; X32-NEXT: vpaddd .LCPI0_3, %ymm5, %ymm0 23; X32-NEXT: vmovntdq %ymm0, (%eax) 24; X32-NEXT: vpaddw .LCPI0_4, %ymm4, %ymm0 25; X32-NEXT: vmovntdq %ymm0, (%eax) 26; X32-NEXT: vpaddb .LCPI0_5, %ymm3, %ymm0 27; X32-NEXT: vmovntdq %ymm0, (%eax) 28; X32-NEXT: movl %ebp, %esp 29; X32-NEXT: popl %ebp 30; X32-NEXT: vzeroupper 31; X32-NEXT: retl 32; 33; X64-LABEL: f: 34; X64: # BB#0: 35; X64-NEXT: vaddps {{.*}}(%rip), %ymm0, %ymm0 36; X64-NEXT: vmovntps %ymm0, (%rdi) 37; X64-NEXT: vpaddq {{.*}}(%rip), %ymm2, %ymm0 38; X64-NEXT: vmovntdq %ymm0, (%rdi) 39; X64-NEXT: vaddpd {{.*}}(%rip), %ymm1, %ymm0 40; X64-NEXT: vmovntpd %ymm0, (%rdi) 41; X64-NEXT: vpaddd {{.*}}(%rip), %ymm3, %ymm0 42; X64-NEXT: vmovntdq %ymm0, (%rdi) 43; X64-NEXT: vpaddw {{.*}}(%rip), %ymm4, %ymm0 44; X64-NEXT: vmovntdq %ymm0, (%rdi) 45; X64-NEXT: vpaddb {{.*}}(%rip), %ymm5, %ymm0 46; X64-NEXT: vmovntdq %ymm0, (%rdi) 47; X64-NEXT: vzeroupper 48; X64-NEXT: retq 49 %cast = bitcast i8* %B to <8 x float>* 50 %A2 = fadd <8 x float> %A, <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0> 51 store <8 x float> %A2, <8 x float>* %cast, align 32, !nontemporal !0 52 %cast1 = bitcast i8* %B to <4 x i64>* 53 %E2 = add <4 x i64> %E, <i64 1, i64 2, i64 3, i64 4> 54 store <4 x i64> %E2, <4 x i64>* %cast1, align 32, !nontemporal !0 55 %cast2 = bitcast i8* %B to <4 x double>* 56 %C2 = fadd <4 x double> %C, <double 1.0, double 2.0, double 3.0, double 4.0> 57 store <4 x double> %C2, <4 x double>* %cast2, align 32, !nontemporal !0 58 %cast3 = bitcast i8* %B to <8 x i32>* 59 %F2 = add <8 x i32> %F, <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8> 60 store <8 x i32> %F2, <8 x i32>* %cast3, align 32, !nontemporal !0 61 %cast4 = bitcast i8* %B to <16 x i16>* 62 %G2 = add <16 x i16> %G, <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8> 63 store <16 x i16> %G2, <16 x i16>* %cast4, align 32, !nontemporal !0 64 %cast5 = bitcast i8* %B to <32 x i8>* 65 %H2 = add <32 x i8> %H, <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8> 66 store <32 x i8> %H2, <32 x i8>* %cast5, align 32, !nontemporal !0 67 ret void 68} 69 70!0 = !{i32 1} 71