; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s ; GCN-LABEL: {{^}}test_concat_v1i32: ; 0x80f000 is the high 32 bits of the resource descriptor used by MUBUF ; instructions that access scratch memory. Bit 23, which is the add_tid_enable ; bit, is only set for scratch access, so we can check for the absence of this ; value if we want to ensure scratch memory is not being used. ; GCN-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000 ; GCN-NOT: movrel define amdgpu_kernel void @test_concat_v1i32(<2 x i32> addrspace(1)* %out, <1 x i32> %a, <1 x i32> %b) nounwind { %concat = shufflevector <1 x i32> %a, <1 x i32> %b, <2 x i32> store <2 x i32> %concat, <2 x i32> addrspace(1)* %out, align 8 ret void } ; GCN-LABEL: {{^}}test_concat_v2i32: ; GCN-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000 ; GCN-NOT: movrel define amdgpu_kernel void @test_concat_v2i32(<4 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind { %concat = shufflevector <2 x i32> %a, <2 x i32> %b, <4 x i32> store <4 x i32> %concat, <4 x i32> addrspace(1)* %out, align 16 ret void } ; GCN-LABEL: {{^}}test_concat_v4i32: ; GCN-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000 ; GCN-NOT: movrel define amdgpu_kernel void @test_concat_v4i32(<8 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) nounwind { %concat = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> store <8 x i32> %concat, <8 x i32> addrspace(1)* %out, align 32 ret void } ; GCN-LABEL: {{^}}test_concat_v8i32: ; GCN-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000 ; GCN-NOT: movrel define amdgpu_kernel void @test_concat_v8i32(<16 x i32> addrspace(1)* %out, <8 x i32> %a, <8 x i32> %b) nounwind { %concat = shufflevector <8 x i32> %a, <8 x i32> %b, <16 x i32> store <16 x i32> %concat, <16 x i32> addrspace(1)* %out, align 64 ret void } ; GCN-LABEL: {{^}}test_concat_v16i32: ; GCN-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000 ; GCN-NOT: movrel define amdgpu_kernel void @test_concat_v16i32(<32 x i32> addrspace(1)* %out, <16 x i32> %a, <16 x i32> %b) nounwind { %concat = shufflevector <16 x i32> %a, <16 x i32> %b, <32 x i32> store <32 x i32> %concat, <32 x i32> addrspace(1)* %out, align 128 ret void } ; GCN-LABEL: {{^}}test_concat_v1f32: ; GCN-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000 ; GCN-NOT: movrel define amdgpu_kernel void @test_concat_v1f32(<2 x float> addrspace(1)* %out, <1 x float> %a, <1 x float> %b) nounwind { %concat = shufflevector <1 x float> %a, <1 x float> %b, <2 x i32> store <2 x float> %concat, <2 x float> addrspace(1)* %out, align 8 ret void } ; GCN-LABEL: {{^}}test_concat_v2f32: ; GCN-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000 ; GCN-NOT: movrel define amdgpu_kernel void @test_concat_v2f32(<4 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) nounwind { %concat = shufflevector <2 x float> %a, <2 x float> %b, <4 x i32> store <4 x float> %concat, <4 x float> addrspace(1)* %out, align 16 ret void } ; GCN-LABEL: {{^}}test_concat_v4f32: ; GCN-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000 ; GCN-NOT: movrel define amdgpu_kernel void @test_concat_v4f32(<8 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) nounwind { %concat = shufflevector <4 x float> %a, <4 x float> %b, <8 x i32> store <8 x float> %concat, <8 x float> addrspace(1)* %out, align 32 ret void } ; GCN-LABEL: {{^}}test_concat_v8f32: ; GCN-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000 ; GCN-NOT: movrel define amdgpu_kernel void @test_concat_v8f32(<16 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b) nounwind { %concat = shufflevector <8 x float> %a, <8 x float> %b, <16 x i32> store <16 x float> %concat, <16 x float> addrspace(1)* %out, align 64 ret void } ; GCN-LABEL: {{^}}test_concat_v16f32: ; GCN-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000 ; GCN-NOT: movrel define amdgpu_kernel void @test_concat_v16f32(<32 x float> addrspace(1)* %out, <16 x float> %a, <16 x float> %b) nounwind { %concat = shufflevector <16 x float> %a, <16 x float> %b, <32 x i32> store <32 x float> %concat, <32 x float> addrspace(1)* %out, align 128 ret void } ; GCN-LABEL: {{^}}test_concat_v1i64: ; GCN-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000 ; GCN-NOT: movrel define amdgpu_kernel void @test_concat_v1i64(<2 x double> addrspace(1)* %out, <1 x double> %a, <1 x double> %b) nounwind { %concat = shufflevector <1 x double> %a, <1 x double> %b, <2 x i32> store <2 x double> %concat, <2 x double> addrspace(1)* %out, align 16 ret void } ; GCN-LABEL: {{^}}test_concat_v2i64: ; GCN-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000 ; GCN-NOT: movrel define amdgpu_kernel void @test_concat_v2i64(<4 x double> addrspace(1)* %out, <2 x double> %a, <2 x double> %b) nounwind { %concat = shufflevector <2 x double> %a, <2 x double> %b, <4 x i32> store <4 x double> %concat, <4 x double> addrspace(1)* %out, align 32 ret void } ; GCN-LABEL: {{^}}test_concat_v4i64: ; GCN-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000 ; GCN-NOT: movrel define amdgpu_kernel void @test_concat_v4i64(<8 x double> addrspace(1)* %out, <4 x double> %a, <4 x double> %b) nounwind { %concat = shufflevector <4 x double> %a, <4 x double> %b, <8 x i32> store <8 x double> %concat, <8 x double> addrspace(1)* %out, align 64 ret void } ; GCN-LABEL: {{^}}test_concat_v8i64: ; GCN-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000 ; GCN-NOT: movrel define amdgpu_kernel void @test_concat_v8i64(<16 x double> addrspace(1)* %out, <8 x double> %a, <8 x double> %b) nounwind { %concat = shufflevector <8 x double> %a, <8 x double> %b, <16 x i32> store <16 x double> %concat, <16 x double> addrspace(1)* %out, align 128 ret void } ; GCN-LABEL: {{^}}test_concat_v16i64: ; GCN-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000 ; GCN-NOT: movrel define amdgpu_kernel void @test_concat_v16i64(<32 x double> addrspace(1)* %out, <16 x double> %a, <16 x double> %b) nounwind { %concat = shufflevector <16 x double> %a, <16 x double> %b, <32 x i32> store <32 x double> %concat, <32 x double> addrspace(1)* %out, align 256 ret void } ; GCN-LABEL: {{^}}test_concat_v1f64: ; GCN-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000 ; GCN-NOT: movrel define amdgpu_kernel void @test_concat_v1f64(<2 x double> addrspace(1)* %out, <1 x double> %a, <1 x double> %b) nounwind { %concat = shufflevector <1 x double> %a, <1 x double> %b, <2 x i32> store <2 x double> %concat, <2 x double> addrspace(1)* %out, align 16 ret void } ; GCN-LABEL: {{^}}test_concat_v2f64: ; GCN-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000 ; GCN-NOT: movrel define amdgpu_kernel void @test_concat_v2f64(<4 x double> addrspace(1)* %out, <2 x double> %a, <2 x double> %b) nounwind { %concat = shufflevector <2 x double> %a, <2 x double> %b, <4 x i32> store <4 x double> %concat, <4 x double> addrspace(1)* %out, align 32 ret void } ; GCN-LABEL: {{^}}test_concat_v4f64: ; GCN-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000 ; GCN-NOT: movrel define amdgpu_kernel void @test_concat_v4f64(<8 x double> addrspace(1)* %out, <4 x double> %a, <4 x double> %b) nounwind { %concat = shufflevector <4 x double> %a, <4 x double> %b, <8 x i32> store <8 x double> %concat, <8 x double> addrspace(1)* %out, align 64 ret void } ; GCN-LABEL: {{^}}test_concat_v8f64: ; GCN-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000 ; GCN-NOT: movrel define amdgpu_kernel void @test_concat_v8f64(<16 x double> addrspace(1)* %out, <8 x double> %a, <8 x double> %b) nounwind { %concat = shufflevector <8 x double> %a, <8 x double> %b, <16 x i32> store <16 x double> %concat, <16 x double> addrspace(1)* %out, align 128 ret void } ; GCN-LABEL: {{^}}test_concat_v16f64: ; GCN-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000 ; GCN-NOT: movrel define amdgpu_kernel void @test_concat_v16f64(<32 x double> addrspace(1)* %out, <16 x double> %a, <16 x double> %b) nounwind { %concat = shufflevector <16 x double> %a, <16 x double> %b, <32 x i32> store <32 x double> %concat, <32 x double> addrspace(1)* %out, align 256 ret void } ; GCN-LABEL: {{^}}test_concat_v1i1: ; GCN-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000 ; GCN-NOT: movrel define amdgpu_kernel void @test_concat_v1i1(<2 x i1> addrspace(1)* %out, <1 x i1> %a, <1 x i1> %b) nounwind { %concat = shufflevector <1 x i1> %a, <1 x i1> %b, <2 x i32> store <2 x i1> %concat, <2 x i1> addrspace(1)* %out ret void } ; GCN-LABEL: {{^}}test_concat_v2i1: ; GCN-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000 ; GCN-NOT: movrel define amdgpu_kernel void @test_concat_v2i1(<4 x i1> addrspace(1)* %out, <2 x i1> %a, <2 x i1> %b) nounwind { %concat = shufflevector <2 x i1> %a, <2 x i1> %b, <4 x i32> store <4 x i1> %concat, <4 x i1> addrspace(1)* %out ret void } ; GCN-LABEL: {{^}}test_concat_v4i1: ; GCN-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000 ; GCN-NOT: movrel define amdgpu_kernel void @test_concat_v4i1(<8 x i1> addrspace(1)* %out, <4 x i1> %a, <4 x i1> %b) nounwind { %concat = shufflevector <4 x i1> %a, <4 x i1> %b, <8 x i32> store <8 x i1> %concat, <8 x i1> addrspace(1)* %out ret void } ; GCN-LABEL: {{^}}test_concat_v8i1: ; GCN-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000 ; GCN-NOT: movrel define amdgpu_kernel void @test_concat_v8i1(<16 x i1> addrspace(1)* %out, <8 x i1> %a, <8 x i1> %b) nounwind { %concat = shufflevector <8 x i1> %a, <8 x i1> %b, <16 x i32> store <16 x i1> %concat, <16 x i1> addrspace(1)* %out ret void } ; GCN-LABEL: {{^}}test_concat_v16i1: ; GCN-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000 ; GCN-NOT: movrel define amdgpu_kernel void @test_concat_v16i1(<32 x i1> addrspace(1)* %out, <16 x i1> %a, <16 x i1> %b) nounwind { %concat = shufflevector <16 x i1> %a, <16 x i1> %b, <32 x i32> store <32 x i1> %concat, <32 x i1> addrspace(1)* %out ret void } ; GCN-LABEL: {{^}}test_concat_v32i1: ; GCN-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000 ; GCN-NOT: movrel define amdgpu_kernel void @test_concat_v32i1(<64 x i1> addrspace(1)* %out, <32 x i1> %a, <32 x i1> %b) nounwind { %concat = shufflevector <32 x i1> %a, <32 x i1> %b, <64 x i32> store <64 x i1> %concat, <64 x i1> addrspace(1)* %out ret void } ; GCN-LABEL: {{^}}test_concat_v1i16: ; GCN-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000 ; GCN-NOT: movrel define amdgpu_kernel void @test_concat_v1i16(<2 x i16> addrspace(1)* %out, <1 x i16> %a, <1 x i16> %b) nounwind { %concat = shufflevector <1 x i16> %a, <1 x i16> %b, <2 x i32> store <2 x i16> %concat, <2 x i16> addrspace(1)* %out, align 4 ret void } ; GCN-LABEL: {{^}}test_concat_v2i16: ; GCN-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000 ; GCN-NOT: movrel define amdgpu_kernel void @test_concat_v2i16(<4 x i16> addrspace(1)* %out, <2 x i16> %a, <2 x i16> %b) nounwind { %concat = shufflevector <2 x i16> %a, <2 x i16> %b, <4 x i32> store <4 x i16> %concat, <4 x i16> addrspace(1)* %out, align 8 ret void } ; GCN-LABEL: {{^}}test_concat_v4i16: ; GCN-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000 ; GCN-NOT: movrel define amdgpu_kernel void @test_concat_v4i16(<8 x i16> addrspace(1)* %out, <4 x i16> %a, <4 x i16> %b) nounwind { %concat = shufflevector <4 x i16> %a, <4 x i16> %b, <8 x i32> store <8 x i16> %concat, <8 x i16> addrspace(1)* %out, align 16 ret void } ; GCN-LABEL: {{^}}test_concat_v8i16: ; GCN-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000 ; GCN-NOT: movrel define amdgpu_kernel void @test_concat_v8i16(<16 x i16> addrspace(1)* %out, <8 x i16> %a, <8 x i16> %b) nounwind { %concat = shufflevector <8 x i16> %a, <8 x i16> %b, <16 x i32> store <16 x i16> %concat, <16 x i16> addrspace(1)* %out, align 32 ret void } ; GCN-LABEL: {{^}}test_concat_v16i16: ; GCN-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000 ; GCN-NOT: movrel define amdgpu_kernel void @test_concat_v16i16(<32 x i16> addrspace(1)* %out, <16 x i16> %a, <16 x i16> %b) nounwind { %concat = shufflevector <16 x i16> %a, <16 x i16> %b, <32 x i32> store <32 x i16> %concat, <32 x i16> addrspace(1)* %out, align 64 ret void } ; GCN-LABEL: {{^}}concat_vector_crash: ; GCN: s_endpgm define amdgpu_kernel void @concat_vector_crash(<8 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) { bb: %tmp = load <2 x float>, <2 x float> addrspace(1)* %in, align 4 %tmp1 = shufflevector <2 x float> %tmp, <2 x float> undef, <8 x i32> %tmp2 = shufflevector <8 x float> undef, <8 x float> %tmp1, <8 x i32> store <8 x float> %tmp2, <8 x float> addrspace(1)* %out, align 32 ret void } ; GCN-LABEL: {{^}}concat_vector_crash2: ; GCN: s_endpgm define amdgpu_kernel void @concat_vector_crash2(<8 x i8> addrspace(1)* %out, i32 addrspace(1)* %in) { %tmp = load i32, i32 addrspace(1)* %in, align 1 %tmp1 = trunc i32 %tmp to i24 %tmp2 = bitcast i24 %tmp1 to <3 x i8> %tmp3 = shufflevector <3 x i8> %tmp2, <3 x i8> undef, <8 x i32> %tmp4 = shufflevector <8 x i8> %tmp3, <8 x i8> , <8 x i32> store <8 x i8> %tmp4, <8 x i8> addrspace(1)* %out, align 8 ret void } ; GCN-LABEL: {{^}}build_vector_splat_concat_v8i16: ; VI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}} ; VI: ds_write_b128 ; VI: ds_write_b128 define amdgpu_kernel void @build_vector_splat_concat_v8i16() { entry: store <8 x i16> zeroinitializer, <8 x i16> addrspace(3)* undef, align 16 store <8 x i16> zeroinitializer, <8 x i16> addrspace(3)* null, align 16 ret void }