1; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI %s 2; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,NOSI %s 3 4@compute_lds = external addrspace(3) global [512 x i32], align 16 5 6; GCN-LABEL: {{^}}store_aligned: 7; GCN: ds_write_b64 8define amdgpu_cs void @store_aligned(i32 addrspace(3)* %ptr) #0 { 9entry: 10 %ptr.gep.1 = getelementptr i32, i32 addrspace(3)* %ptr, i32 1 11 12 store i32 42, i32 addrspace(3)* %ptr, align 8 13 store i32 43, i32 addrspace(3)* %ptr.gep.1 14 ret void 15} 16 17 18; GCN-LABEL: {{^}}load_aligned: 19; GCN: ds_read_b64 20define amdgpu_cs <2 x float> @load_aligned(i32 addrspace(3)* %ptr) #0 { 21entry: 22 %ptr.gep.1 = getelementptr i32, i32 addrspace(3)* %ptr, i32 1 23 24 %v.0 = load i32, i32 addrspace(3)* %ptr, align 8 25 %v.1 = load i32, i32 addrspace(3)* %ptr.gep.1 26 27 %r.0 = insertelement <2 x i32> undef, i32 %v.0, i32 0 28 %r.1 = insertelement <2 x i32> %r.0, i32 %v.1, i32 1 29 %bc = bitcast <2 x i32> %r.1 to <2 x float> 30 ret <2 x float> %bc 31} 32 33 34; GCN-LABEL: {{^}}store_global_const_idx: 35; GCN: ds_write2_b32 36define amdgpu_cs void @store_global_const_idx() #0 { 37entry: 38 %ptr.a = getelementptr [512 x i32], [512 x i32] addrspace(3)* @compute_lds, i32 0, i32 3 39 %ptr.b = getelementptr [512 x i32], [512 x i32] addrspace(3)* @compute_lds, i32 0, i32 4 40 41 store i32 42, i32 addrspace(3)* %ptr.a 42 store i32 43, i32 addrspace(3)* %ptr.b 43 ret void 44} 45 46 47; GCN-LABEL: {{^}}load_global_const_idx: 48; GCN: ds_read2_b32 49define amdgpu_cs <2 x float> @load_global_const_idx() #0 { 50entry: 51 %ptr.a = getelementptr [512 x i32], [512 x i32] addrspace(3)* @compute_lds, i32 0, i32 3 52 %ptr.b = getelementptr [512 x i32], [512 x i32] addrspace(3)* @compute_lds, i32 0, i32 4 53 54 %v.0 = load i32, i32 addrspace(3)* %ptr.a 55 %v.1 = load i32, i32 addrspace(3)* %ptr.b 56 57 %r.0 = insertelement <2 x i32> undef, i32 %v.0, i32 0 58 %r.1 = insertelement <2 x i32> %r.0, i32 %v.1, i32 1 59 %bc = bitcast <2 x i32> %r.1 to <2 x float> 60 ret <2 x float> %bc 61} 62 63 64; GCN-LABEL: {{^}}store_global_var_idx_case1: 65; SI: ds_write_b32 66; SI: ds_write_b32 67; NOSI: ds_write2_b32 68define amdgpu_cs void @store_global_var_idx_case1(i32 %idx) #0 { 69entry: 70 %ptr.a = getelementptr [512 x i32], [512 x i32] addrspace(3)* @compute_lds, i32 0, i32 %idx 71 %ptr.b = getelementptr i32, i32 addrspace(3)* %ptr.a, i32 1 72 73 store i32 42, i32 addrspace(3)* %ptr.a 74 store i32 43, i32 addrspace(3)* %ptr.b 75 ret void 76} 77 78 79; GCN-LABEL: {{^}}load_global_var_idx_case1: 80; SI: ds_read_b32 81; SI: ds_read_b32 82; NOSI: ds_read2_b32 83define amdgpu_cs <2 x float> @load_global_var_idx_case1(i32 %idx) #0 { 84entry: 85 %ptr.a = getelementptr [512 x i32], [512 x i32] addrspace(3)* @compute_lds, i32 0, i32 %idx 86 %ptr.b = getelementptr i32, i32 addrspace(3)* %ptr.a, i32 1 87 88 %v.0 = load i32, i32 addrspace(3)* %ptr.a 89 %v.1 = load i32, i32 addrspace(3)* %ptr.b 90 91 %r.0 = insertelement <2 x i32> undef, i32 %v.0, i32 0 92 %r.1 = insertelement <2 x i32> %r.0, i32 %v.1, i32 1 93 %bc = bitcast <2 x i32> %r.1 to <2 x float> 94 ret <2 x float> %bc 95} 96 97 98; GCN-LABEL: {{^}}store_global_var_idx_case2: 99; GCN: ds_write2_b32 100define amdgpu_cs void @store_global_var_idx_case2(i32 %idx) #0 { 101entry: 102 %idx.and = and i32 %idx, 255 103 %ptr.a = getelementptr [512 x i32], [512 x i32] addrspace(3)* @compute_lds, i32 0, i32 %idx.and 104 %ptr.b = getelementptr i32, i32 addrspace(3)* %ptr.a, i32 1 105 106 store i32 42, i32 addrspace(3)* %ptr.a 107 store i32 43, i32 addrspace(3)* %ptr.b 108 ret void 109} 110 111 112; GCN-LABEL: {{^}}load_global_var_idx_case2: 113; GCN: ds_read2_b32 114define amdgpu_cs <2 x float> @load_global_var_idx_case2(i32 %idx) #0 { 115entry: 116 %idx.and = and i32 %idx, 255 117 %ptr.a = getelementptr [512 x i32], [512 x i32] addrspace(3)* @compute_lds, i32 0, i32 %idx.and 118 %ptr.b = getelementptr i32, i32 addrspace(3)* %ptr.a, i32 1 119 120 %v.0 = load i32, i32 addrspace(3)* %ptr.a 121 %v.1 = load i32, i32 addrspace(3)* %ptr.b 122 123 %r.0 = insertelement <2 x i32> undef, i32 %v.0, i32 0 124 %r.1 = insertelement <2 x i32> %r.0, i32 %v.1, i32 1 125 %bc = bitcast <2 x i32> %r.1 to <2 x float> 126 ret <2 x float> %bc 127} 128 129attributes #0 = { nounwind } 130