1; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -mtriple=x86_64-unknown-unknown -mattr=+sse2 < %s | FileCheck %s --check-prefix=SSE --check-prefix=ALL 2; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -mtriple=x86_64-unknown-unknown -mattr=+avx < %s | FileCheck %s --check-prefix=AVX --check-prefix=ALL 3 4; Verify that fast-isel knows how to select aligned/unaligned vector loads. 5; Also verify that the selected load instruction is in the correct domain. 6 7define <16 x i8> @test_v16i8(<16 x i8>* %V) { 8; ALL-LABEL: test_v16i8: 9; SSE: movdqa (%rdi), %xmm0 10; AVX: vmovdqa (%rdi), %xmm0 11; ALL-NEXT: retq 12entry: 13 %0 = load <16 x i8>, <16 x i8>* %V, align 16 14 ret <16 x i8> %0 15} 16 17define <8 x i16> @test_v8i16(<8 x i16>* %V) { 18; ALL-LABEL: test_v8i16: 19; SSE: movdqa (%rdi), %xmm0 20; AVX: vmovdqa (%rdi), %xmm0 21; ALL-NEXT: retq 22entry: 23 %0 = load <8 x i16>, <8 x i16>* %V, align 16 24 ret <8 x i16> %0 25} 26 27define <4 x i32> @test_v4i32(<4 x i32>* %V) { 28; ALL-LABEL: test_v4i32: 29; SSE: movdqa (%rdi), %xmm0 30; AVX: vmovdqa (%rdi), %xmm0 31; ALL-NEXT: retq 32entry: 33 %0 = load <4 x i32>, <4 x i32>* %V, align 16 34 ret <4 x i32> %0 35} 36 37define <2 x i64> @test_v2i64(<2 x i64>* %V) { 38; ALL-LABEL: test_v2i64: 39; SSE: movdqa (%rdi), %xmm0 40; AVX: vmovdqa (%rdi), %xmm0 41; ALL-NEXT: retq 42entry: 43 %0 = load <2 x i64>, <2 x i64>* %V, align 16 44 ret <2 x i64> %0 45} 46 47define <16 x i8> @test_v16i8_unaligned(<16 x i8>* %V) { 48; ALL-LABEL: test_v16i8_unaligned: 49; SSE: movdqu (%rdi), %xmm0 50; AVX: vmovdqu (%rdi), %xmm0 51; ALL-NEXT: retq 52entry: 53 %0 = load <16 x i8>, <16 x i8>* %V, align 4 54 ret <16 x i8> %0 55} 56 57define <8 x i16> @test_v8i16_unaligned(<8 x i16>* %V) { 58; ALL-LABEL: test_v8i16_unaligned: 59; SSE: movdqu (%rdi), %xmm0 60; AVX: vmovdqu (%rdi), %xmm0 61; ALL-NEXT: retq 62entry: 63 %0 = load <8 x i16>, <8 x i16>* %V, align 4 64 ret <8 x i16> %0 65} 66 67define <4 x i32> @test_v4i32_unaligned(<4 x i32>* %V) { 68; ALL-LABEL: test_v4i32_unaligned: 69; SSE: movdqu (%rdi), %xmm0 70; AVX: vmovdqu (%rdi), %xmm0 71; ALL-NEXT: retq 72entry: 73 %0 = load <4 x i32>, <4 x i32>* %V, align 4 74 ret <4 x i32> %0 75} 76 77define <2 x i64> @test_v2i64_unaligned(<2 x i64>* %V) { 78; ALL-LABEL: test_v2i64_unaligned: 79; SSE: movdqu (%rdi), %xmm0 80; AVX: vmovdqu (%rdi), %xmm0 81; ALL-NEXT: retq 82entry: 83 %0 = load <2 x i64>, <2 x i64>* %V, align 4 84 ret <2 x i64> %0 85} 86 87define <4 x float> @test_v4f32(<4 x float>* %V) { 88; ALL-LABEL: test_v4f32: 89; SSE: movaps (%rdi), %xmm0 90; AVX: vmovaps (%rdi), %xmm0 91; ALL-NEXT: retq 92entry: 93 %0 = load <4 x float>, <4 x float>* %V, align 16 94 ret <4 x float> %0 95} 96 97define <2 x double> @test_v2f64(<2 x double>* %V) { 98; ALL-LABEL: test_v2f64: 99; SSE: movapd (%rdi), %xmm0 100; AVX: vmovapd (%rdi), %xmm0 101; ALL-NEXT: retq 102entry: 103 %0 = load <2 x double>, <2 x double>* %V, align 16 104 ret <2 x double> %0 105} 106 107define <4 x float> @test_v4f32_unaligned(<4 x float>* %V) { 108; ALL-LABEL: test_v4f32_unaligned: 109; SSE: movups (%rdi), %xmm0 110; AVX: vmovups (%rdi), %xmm0 111; ALL-NEXT: retq 112entry: 113 %0 = load <4 x float>, <4 x float>* %V, align 4 114 ret <4 x float> %0 115} 116 117define <2 x double> @test_v2f64_unaligned(<2 x double>* %V) { 118; ALL-LABEL: test_v2f64_unaligned: 119; SSE: movupd (%rdi), %xmm0 120; AVX: vmovupd (%rdi), %xmm0 121; ALL-NEXT: retq 122entry: 123 %0 = load <2 x double>, <2 x double>* %V, align 4 124 ret <2 x double> %0 125} 126 127define <16 x i8> @test_v16i8_abi_alignment(<16 x i8>* %V) { 128; ALL-LABEL: test_v16i8_abi_alignment: 129; SSE: movdqa (%rdi), %xmm0 130; AVX: vmovdqa (%rdi), %xmm0 131; ALL-NEXT: retq 132entry: 133 %0 = load <16 x i8>, <16 x i8>* %V 134 ret <16 x i8> %0 135} 136 137define <8 x i16> @test_v8i16_abi_alignment(<8 x i16>* %V) { 138; ALL-LABEL: test_v8i16_abi_alignment: 139; SSE: movdqa (%rdi), %xmm0 140; AVX: vmovdqa (%rdi), %xmm0 141; ALL-NEXT: retq 142entry: 143 %0 = load <8 x i16>, <8 x i16>* %V 144 ret <8 x i16> %0 145} 146 147define <4 x i32> @test_v4i32_abi_alignment(<4 x i32>* %V) { 148; ALL-LABEL: test_v4i32_abi_alignment: 149; SSE: movdqa (%rdi), %xmm0 150; AVX: vmovdqa (%rdi), %xmm0 151; ALL-NEXT: retq 152entry: 153 %0 = load <4 x i32>, <4 x i32>* %V 154 ret <4 x i32> %0 155} 156 157define <2 x i64> @test_v2i64_abi_alignment(<2 x i64>* %V) { 158; ALL-LABEL: test_v2i64_abi_alignment: 159; SSE: movdqa (%rdi), %xmm0 160; AVX: vmovdqa (%rdi), %xmm0 161; ALL-NEXT: retq 162entry: 163 %0 = load <2 x i64>, <2 x i64>* %V 164 ret <2 x i64> %0 165} 166 167define <4 x float> @test_v4f32_abi_alignment(<4 x float>* %V) { 168; ALL-LABEL: test_v4f32_abi_alignment: 169; SSE: movaps (%rdi), %xmm0 170; AVX: vmovaps (%rdi), %xmm0 171; ALL-NEXT: retq 172entry: 173 %0 = load <4 x float>, <4 x float>* %V 174 ret <4 x float> %0 175} 176 177define <2 x double> @test_v2f64_abi_alignment(<2 x double>* %V) { 178; ALL-LABEL: test_v2f64_abi_alignment: 179; SSE: movapd (%rdi), %xmm0 180; AVX: vmovapd (%rdi), %xmm0 181; ALL-NEXT: retq 182entry: 183 %0 = load <2 x double>, <2 x double>* %V 184 ret <2 x double> %0 185} 186