1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE41 4 5; A single 16-bit load + a single 16-bit store 6define void @load_2_i8(<2 x i8>* %A) { 7; SSE2-LABEL: load_2_i8: 8; SSE2: # %bb.0: 9; SSE2-NEXT: movzwl (%rdi), %eax 10; SSE2-NEXT: movd %eax, %xmm0 11; SSE2-NEXT: paddb {{.*}}(%rip), %xmm0 12; SSE2-NEXT: movd %xmm0, %eax 13; SSE2-NEXT: movw %ax, (%rdi) 14; SSE2-NEXT: retq 15; 16; SSE41-LABEL: load_2_i8: 17; SSE41: # %bb.0: 18; SSE41-NEXT: movzwl (%rdi), %eax 19; SSE41-NEXT: movd %eax, %xmm0 20; SSE41-NEXT: paddb {{.*}}(%rip), %xmm0 21; SSE41-NEXT: pextrw $0, %xmm0, (%rdi) 22; SSE41-NEXT: retq 23 %T = load <2 x i8>, <2 x i8>* %A 24 %G = add <2 x i8> %T, <i8 9, i8 7> 25 store <2 x i8> %G, <2 x i8>* %A 26 ret void 27} 28 29; Read 32-bits 30define void @load_2_i16(<2 x i16>* %A) { 31; CHECK-LABEL: load_2_i16: 32; CHECK: # %bb.0: 33; CHECK-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 34; CHECK-NEXT: paddw {{.*}}(%rip), %xmm0 35; CHECK-NEXT: movd %xmm0, (%rdi) 36; CHECK-NEXT: retq 37 %T = load <2 x i16>, <2 x i16>* %A 38 %G = add <2 x i16> %T, <i16 9, i16 7> 39 store <2 x i16> %G, <2 x i16>* %A 40 ret void 41} 42 43define void @load_2_i32(<2 x i32>* %A) { 44; CHECK-LABEL: load_2_i32: 45; CHECK: # %bb.0: 46; CHECK-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 47; CHECK-NEXT: paddd {{.*}}(%rip), %xmm0 48; CHECK-NEXT: movq %xmm0, (%rdi) 49; CHECK-NEXT: retq 50 %T = load <2 x i32>, <2 x i32>* %A 51 %G = add <2 x i32> %T, <i32 9, i32 7> 52 store <2 x i32> %G, <2 x i32>* %A 53 ret void 54} 55 56define void @load_4_i8(<4 x i8>* %A) { 57; CHECK-LABEL: load_4_i8: 58; CHECK: # %bb.0: 59; CHECK-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 60; CHECK-NEXT: paddb {{.*}}(%rip), %xmm0 61; CHECK-NEXT: movd %xmm0, (%rdi) 62; CHECK-NEXT: retq 63 %T = load <4 x i8>, <4 x i8>* %A 64 %G = add <4 x i8> %T, <i8 1, i8 4, i8 9, i8 7> 65 store <4 x i8> %G, <4 x i8>* %A 66 ret void 67} 68 69define void @load_4_i16(<4 x i16>* %A) { 70; CHECK-LABEL: load_4_i16: 71; CHECK: # %bb.0: 72; CHECK-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 73; CHECK-NEXT: paddw {{.*}}(%rip), %xmm0 74; CHECK-NEXT: movq %xmm0, (%rdi) 75; CHECK-NEXT: retq 76 %T = load <4 x i16>, <4 x i16>* %A 77 %G = add <4 x i16> %T, <i16 1, i16 4, i16 9, i16 7> 78 store <4 x i16> %G, <4 x i16>* %A 79 ret void 80} 81 82define void @load_8_i8(<8 x i8>* %A) { 83; CHECK-LABEL: load_8_i8: 84; CHECK: # %bb.0: 85; CHECK-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 86; CHECK-NEXT: paddb %xmm0, %xmm0 87; CHECK-NEXT: movq %xmm0, (%rdi) 88; CHECK-NEXT: retq 89 %T = load <8 x i8>, <8 x i8>* %A 90 %G = add <8 x i8> %T, %T 91 store <8 x i8> %G, <8 x i8>* %A 92 ret void 93} 94