1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; FIXME: Manually added checks for metadata nodes at bottom 3; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -o - -amdgpu-lower-kernel-arguments %s | FileCheck -check-prefix=HSA %s 4; RUN: opt -mtriple=amdgcn-- -S -o - -amdgpu-lower-kernel-arguments %s | FileCheck -check-prefix=MESA %s 5 6define amdgpu_kernel void @kern_noargs() { 7; HSA-LABEL: @kern_noargs( 8; HSA-NEXT: ret void 9; 10; MESA-LABEL: @kern_noargs( 11; MESA-NEXT: ret void 12; 13 ret void 14} 15 16define amdgpu_kernel void @kern_i8(i8 %arg) #0 { 17; HSA-LABEL: @kern_i8( 18; HSA-NEXT: [[KERN_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 19; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I8_KERNARG_SEGMENT]], i64 0 20; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 21; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 22; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 23; HSA-NEXT: store i8 [[TMP2]], i8 addrspace(1)* undef, align 1 24; HSA-NEXT: ret void 25; 26; MESA-LABEL: @kern_i8( 27; MESA-NEXT: [[KERN_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 28; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I8_KERNARG_SEGMENT]], i64 36 29; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 30; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 31; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 32; MESA-NEXT: store i8 [[TMP2]], i8 addrspace(1)* undef, align 1 33; MESA-NEXT: ret void 34; 35 store i8 %arg, i8 addrspace(1)* undef, align 1 36 ret void 37} 38 39define amdgpu_kernel void @kern_i16(i16 %arg) #0 { 40; HSA-LABEL: @kern_i16( 41; HSA-NEXT: [[KERN_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 42; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I16_KERNARG_SEGMENT]], i64 0 43; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 44; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 45; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 46; HSA-NEXT: store i16 [[TMP2]], i16 addrspace(1)* undef, align 1 47; HSA-NEXT: ret void 48; 49; MESA-LABEL: @kern_i16( 50; MESA-NEXT: [[KERN_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 51; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I16_KERNARG_SEGMENT]], i64 36 52; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 53; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 54; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 55; MESA-NEXT: store i16 [[TMP2]], i16 addrspace(1)* undef, align 1 56; MESA-NEXT: ret void 57; 58 store i16 %arg, i16 addrspace(1)* undef, align 1 59 ret void 60} 61 62define amdgpu_kernel void @kern_f16(half %arg) #0 { 63; HSA-LABEL: @kern_f16( 64; HSA-NEXT: [[KERN_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 65; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_F16_KERNARG_SEGMENT]], i64 0 66; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 67; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 68; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 69; HSA-NEXT: [[ARG_LOAD:%.*]] = bitcast i16 [[TMP2]] to half 70; HSA-NEXT: store half [[ARG_LOAD]], half addrspace(1)* undef, align 1 71; HSA-NEXT: ret void 72; 73; MESA-LABEL: @kern_f16( 74; MESA-NEXT: [[KERN_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 75; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_F16_KERNARG_SEGMENT]], i64 36 76; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 77; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 78; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 79; MESA-NEXT: [[ARG_LOAD:%.*]] = bitcast i16 [[TMP2]] to half 80; MESA-NEXT: store half [[ARG_LOAD]], half addrspace(1)* undef, align 1 81; MESA-NEXT: ret void 82; 83 store half %arg, half addrspace(1)* undef, align 1 84 ret void 85} 86 87define amdgpu_kernel void @kern_zeroext_i8(i8 zeroext %arg) #0 { 88; HSA-LABEL: @kern_zeroext_i8( 89; HSA-NEXT: [[KERN_ZEROEXT_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 90; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_ZEROEXT_I8_KERNARG_SEGMENT]], i64 0 91; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 92; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 93; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 94; HSA-NEXT: store i8 [[TMP2]], i8 addrspace(1)* undef, align 1 95; HSA-NEXT: ret void 96; 97; MESA-LABEL: @kern_zeroext_i8( 98; MESA-NEXT: [[KERN_ZEROEXT_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 99; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_ZEROEXT_I8_KERNARG_SEGMENT]], i64 36 100; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 101; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 102; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 103; MESA-NEXT: store i8 [[TMP2]], i8 addrspace(1)* undef, align 1 104; MESA-NEXT: ret void 105; 106 store i8 %arg, i8 addrspace(1)* undef, align 1 107 ret void 108} 109 110define amdgpu_kernel void @kern_zeroext_i16(i16 zeroext %arg) #0 { 111; HSA-LABEL: @kern_zeroext_i16( 112; HSA-NEXT: [[KERN_ZEROEXT_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 113; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_ZEROEXT_I16_KERNARG_SEGMENT]], i64 0 114; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 115; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 116; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 117; HSA-NEXT: store i16 [[TMP2]], i16 addrspace(1)* undef, align 1 118; HSA-NEXT: ret void 119; 120; MESA-LABEL: @kern_zeroext_i16( 121; MESA-NEXT: [[KERN_ZEROEXT_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 122; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_ZEROEXT_I16_KERNARG_SEGMENT]], i64 36 123; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 124; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 125; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 126; MESA-NEXT: store i16 [[TMP2]], i16 addrspace(1)* undef, align 1 127; MESA-NEXT: ret void 128; 129 store i16 %arg, i16 addrspace(1)* undef, align 1 130 ret void 131} 132 133define amdgpu_kernel void @kern_signext_i8(i8 signext %arg) #0 { 134; HSA-LABEL: @kern_signext_i8( 135; HSA-NEXT: [[KERN_SIGNEXT_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 136; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_SIGNEXT_I8_KERNARG_SEGMENT]], i64 0 137; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 138; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 139; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 140; HSA-NEXT: store i8 [[TMP2]], i8 addrspace(1)* undef, align 1 141; HSA-NEXT: ret void 142; 143; MESA-LABEL: @kern_signext_i8( 144; MESA-NEXT: [[KERN_SIGNEXT_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 145; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_SIGNEXT_I8_KERNARG_SEGMENT]], i64 36 146; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 147; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 148; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 149; MESA-NEXT: store i8 [[TMP2]], i8 addrspace(1)* undef, align 1 150; MESA-NEXT: ret void 151; 152 store i8 %arg, i8 addrspace(1)* undef, align 1 153 ret void 154} 155 156define amdgpu_kernel void @kern_signext_i16(i16 signext %arg) #0 { 157; HSA-LABEL: @kern_signext_i16( 158; HSA-NEXT: [[KERN_SIGNEXT_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 159; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_SIGNEXT_I16_KERNARG_SEGMENT]], i64 0 160; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 161; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 162; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 163; HSA-NEXT: store i16 [[TMP2]], i16 addrspace(1)* undef, align 1 164; HSA-NEXT: ret void 165; 166; MESA-LABEL: @kern_signext_i16( 167; MESA-NEXT: [[KERN_SIGNEXT_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 168; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_SIGNEXT_I16_KERNARG_SEGMENT]], i64 36 169; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 170; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 171; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 172; MESA-NEXT: store i16 [[TMP2]], i16 addrspace(1)* undef, align 1 173; MESA-NEXT: ret void 174; 175 store i16 %arg, i16 addrspace(1)* undef, align 1 176 ret void 177} 178 179define amdgpu_kernel void @kern_i8_i8(i8 %arg0, i8 %arg1) { 180; HSA-LABEL: @kern_i8_i8( 181; HSA-NEXT: [[KERN_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 182; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I8_I8_KERNARG_SEGMENT]], i64 0 183; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 184; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 185; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 186; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I8_I8_KERNARG_SEGMENT]], i64 0 187; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 188; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 189; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8 190; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8 191; HSA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef, align 1 192; HSA-NEXT: store volatile i8 [[TMP5]], i8 addrspace(1)* undef, align 1 193; HSA-NEXT: ret void 194; 195; MESA-LABEL: @kern_i8_i8( 196; MESA-NEXT: [[KERN_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 197; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I8_I8_KERNARG_SEGMENT]], i64 36 198; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 199; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 200; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 201; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I8_I8_KERNARG_SEGMENT]], i64 36 202; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 203; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 204; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8 205; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8 206; MESA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef, align 1 207; MESA-NEXT: store volatile i8 [[TMP5]], i8 addrspace(1)* undef, align 1 208; MESA-NEXT: ret void 209; 210 store volatile i8 %arg0, i8 addrspace(1)* undef, align 1 211 store volatile i8 %arg1, i8 addrspace(1)* undef, align 1 212 ret void 213} 214 215define amdgpu_kernel void @kern_v3i8(<3 x i8> %arg) { 216; HSA-LABEL: @kern_v3i8( 217; HSA-NEXT: [[KERN_V3I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 218; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V3I8_KERNARG_SEGMENT]], i64 0 219; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 220; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 221; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i24 222; HSA-NEXT: [[ARG_LOAD:%.*]] = bitcast i24 [[TMP2]] to <3 x i8> 223; HSA-NEXT: store <3 x i8> [[ARG_LOAD]], <3 x i8> addrspace(1)* undef, align 4 224; HSA-NEXT: ret void 225; 226; MESA-LABEL: @kern_v3i8( 227; MESA-NEXT: [[KERN_V3I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 228; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V3I8_KERNARG_SEGMENT]], i64 36 229; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 230; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 231; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i24 232; MESA-NEXT: [[ARG_LOAD:%.*]] = bitcast i24 [[TMP2]] to <3 x i8> 233; MESA-NEXT: store <3 x i8> [[ARG_LOAD]], <3 x i8> addrspace(1)* undef, align 4 234; MESA-NEXT: ret void 235; 236 store <3 x i8> %arg, <3 x i8> addrspace(1)* undef, align 4 237 ret void 238} 239 240define amdgpu_kernel void @kern_i24(i24 %arg0) { 241; HSA-LABEL: @kern_i24( 242; HSA-NEXT: [[KERN_I24_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 243; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I24_KERNARG_SEGMENT]], i64 0 244; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 245; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 246; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i24 247; HSA-NEXT: store i24 [[TMP2]], i24 addrspace(1)* undef 248; HSA-NEXT: ret void 249; 250; MESA-LABEL: @kern_i24( 251; MESA-NEXT: [[KERN_I24_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 252; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I24_KERNARG_SEGMENT]], i64 36 253; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 254; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 255; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i24 256; MESA-NEXT: store i24 [[TMP2]], i24 addrspace(1)* undef 257; MESA-NEXT: ret void 258; 259 store i24 %arg0, i24 addrspace(1)* undef 260 ret void 261} 262 263define amdgpu_kernel void @kern_i32(i32 %arg0) { 264; HSA-LABEL: @kern_i32( 265; HSA-NEXT: [[KERN_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 266; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I32_KERNARG_SEGMENT]], i64 0 267; HSA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to i32 addrspace(4)* 268; HSA-NEXT: [[ARG0_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0 269; HSA-NEXT: store i32 [[ARG0_LOAD]], i32 addrspace(1)* undef 270; HSA-NEXT: ret void 271; 272; MESA-LABEL: @kern_i32( 273; MESA-NEXT: [[KERN_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 274; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I32_KERNARG_SEGMENT]], i64 36 275; MESA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to i32 addrspace(4)* 276; MESA-NEXT: [[ARG0_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0 277; MESA-NEXT: store i32 [[ARG0_LOAD]], i32 addrspace(1)* undef 278; MESA-NEXT: ret void 279; 280 store i32 %arg0, i32 addrspace(1)* undef 281 ret void 282} 283 284define amdgpu_kernel void @kern_f32(float %arg0) { 285; HSA-LABEL: @kern_f32( 286; HSA-NEXT: [[KERN_F32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 287; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_F32_KERNARG_SEGMENT]], i64 0 288; HSA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to float addrspace(4)* 289; HSA-NEXT: [[ARG0_LOAD:%.*]] = load float, float addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0 290; HSA-NEXT: store float [[ARG0_LOAD]], float addrspace(1)* undef 291; HSA-NEXT: ret void 292; 293; MESA-LABEL: @kern_f32( 294; MESA-NEXT: [[KERN_F32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 295; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_F32_KERNARG_SEGMENT]], i64 36 296; MESA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to float addrspace(4)* 297; MESA-NEXT: [[ARG0_LOAD:%.*]] = load float, float addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0 298; MESA-NEXT: store float [[ARG0_LOAD]], float addrspace(1)* undef 299; MESA-NEXT: ret void 300; 301 store float %arg0, float addrspace(1)* undef 302 ret void 303} 304 305define amdgpu_kernel void @kern_v3i32(<3 x i32> %arg0) { 306; HSA-LABEL: @kern_v3i32( 307; HSA-NEXT: [[KERN_V3I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(16) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 308; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V3I32_KERNARG_SEGMENT]], i64 0 309; HSA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to <3 x i32> addrspace(4)* 310; HSA-NEXT: [[TMP1:%.*]] = bitcast <3 x i32> addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]] to <4 x i32> addrspace(4)* 311; HSA-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32> addrspace(4)* [[TMP1]], align 16, !invariant.load !0 312; HSA-NEXT: [[ARG0_LOAD:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2> 313; HSA-NEXT: store <3 x i32> [[ARG0_LOAD]], <3 x i32> addrspace(1)* undef, align 4 314; HSA-NEXT: ret void 315; 316; MESA-LABEL: @kern_v3i32( 317; MESA-NEXT: [[KERN_V3I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(52) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 318; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V3I32_KERNARG_SEGMENT]], i64 36 319; MESA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to <3 x i32> addrspace(4)* 320; MESA-NEXT: [[TMP1:%.*]] = bitcast <3 x i32> addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]] to <4 x i32> addrspace(4)* 321; MESA-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32> addrspace(4)* [[TMP1]], align 4, !invariant.load !0 322; MESA-NEXT: [[ARG0_LOAD:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2> 323; MESA-NEXT: store <3 x i32> [[ARG0_LOAD]], <3 x i32> addrspace(1)* undef, align 4 324; MESA-NEXT: ret void 325; 326 store <3 x i32> %arg0, <3 x i32> addrspace(1)* undef, align 4 327 ret void 328} 329 330define amdgpu_kernel void @kern_v8i32(<8 x i32> %arg) #0 { 331; HSA-LABEL: @kern_v8i32( 332; HSA-NEXT: [[KERN_V8I32_KERNARG_SEGMENT:%.*]] = call nonnull align 32 dereferenceable(32) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 333; HSA-NEXT: [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V8I32_KERNARG_SEGMENT]], i64 0 334; HSA-NEXT: [[ARG_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET]] to <8 x i32> addrspace(4)* 335; HSA-NEXT: [[ARG_LOAD:%.*]] = load <8 x i32>, <8 x i32> addrspace(4)* [[ARG_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0 336; HSA-NEXT: store <8 x i32> [[ARG_LOAD]], <8 x i32> addrspace(1)* undef 337; HSA-NEXT: ret void 338; 339; MESA-LABEL: @kern_v8i32( 340; MESA-NEXT: [[KERN_V8I32_KERNARG_SEGMENT:%.*]] = call nonnull align 32 dereferenceable(68) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 341; MESA-NEXT: [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V8I32_KERNARG_SEGMENT]], i64 36 342; MESA-NEXT: [[ARG_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET]] to <8 x i32> addrspace(4)* 343; MESA-NEXT: [[ARG_LOAD:%.*]] = load <8 x i32>, <8 x i32> addrspace(4)* [[ARG_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0 344; MESA-NEXT: store <8 x i32> [[ARG_LOAD]], <8 x i32> addrspace(1)* undef 345; MESA-NEXT: ret void 346; 347 store <8 x i32> %arg, <8 x i32> addrspace(1)* undef 348 ret void 349} 350 351define amdgpu_kernel void @kern_v8i64(<8 x i64> %arg) #0 { 352; HSA-LABEL: @kern_v8i64( 353; HSA-NEXT: [[KERN_V8I64_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 354; HSA-NEXT: [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V8I64_KERNARG_SEGMENT]], i64 0 355; HSA-NEXT: [[ARG_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET]] to <8 x i64> addrspace(4)* 356; HSA-NEXT: [[ARG_LOAD:%.*]] = load <8 x i64>, <8 x i64> addrspace(4)* [[ARG_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0 357; HSA-NEXT: store <8 x i64> [[ARG_LOAD]], <8 x i64> addrspace(1)* undef 358; HSA-NEXT: ret void 359; 360; MESA-LABEL: @kern_v8i64( 361; MESA-NEXT: [[KERN_V8I64_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(100) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 362; MESA-NEXT: [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V8I64_KERNARG_SEGMENT]], i64 36 363; MESA-NEXT: [[ARG_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET]] to <8 x i64> addrspace(4)* 364; MESA-NEXT: [[ARG_LOAD:%.*]] = load <8 x i64>, <8 x i64> addrspace(4)* [[ARG_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0 365; MESA-NEXT: store <8 x i64> [[ARG_LOAD]], <8 x i64> addrspace(1)* undef 366; MESA-NEXT: ret void 367; 368 store <8 x i64> %arg, <8 x i64> addrspace(1)* undef 369 ret void 370} 371 372define amdgpu_kernel void @kern_v16i64(<16 x i64> %arg) #0 { 373; HSA-LABEL: @kern_v16i64( 374; HSA-NEXT: [[KERN_V16I64_KERNARG_SEGMENT:%.*]] = call nonnull align 128 dereferenceable(128) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 375; HSA-NEXT: [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V16I64_KERNARG_SEGMENT]], i64 0 376; HSA-NEXT: [[ARG_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET]] to <16 x i64> addrspace(4)* 377; HSA-NEXT: [[ARG_LOAD:%.*]] = load <16 x i64>, <16 x i64> addrspace(4)* [[ARG_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0 378; HSA-NEXT: store <16 x i64> [[ARG_LOAD]], <16 x i64> addrspace(1)* undef 379; HSA-NEXT: ret void 380; 381; MESA-LABEL: @kern_v16i64( 382; MESA-NEXT: [[KERN_V16I64_KERNARG_SEGMENT:%.*]] = call nonnull align 128 dereferenceable(164) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 383; MESA-NEXT: [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V16I64_KERNARG_SEGMENT]], i64 36 384; MESA-NEXT: [[ARG_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET]] to <16 x i64> addrspace(4)* 385; MESA-NEXT: [[ARG_LOAD:%.*]] = load <16 x i64>, <16 x i64> addrspace(4)* [[ARG_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0 386; MESA-NEXT: store <16 x i64> [[ARG_LOAD]], <16 x i64> addrspace(1)* undef 387; MESA-NEXT: ret void 388; 389 store <16 x i64> %arg, <16 x i64> addrspace(1)* undef 390 ret void 391} 392 393define amdgpu_kernel void @kern_i32_v3i32(i32 %arg0, <3 x i32> %arg1) { 394; HSA-LABEL: @kern_i32_v3i32( 395; HSA-NEXT: [[KERN_I32_V3I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(32) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 396; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I32_V3I32_KERNARG_SEGMENT]], i64 0 397; HSA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to i32 addrspace(4)* 398; HSA-NEXT: [[ARG0_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0 399; HSA-NEXT: [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I32_V3I32_KERNARG_SEGMENT]], i64 16 400; HSA-NEXT: [[ARG1_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET]] to <3 x i32> addrspace(4)* 401; HSA-NEXT: [[TMP1:%.*]] = bitcast <3 x i32> addrspace(4)* [[ARG1_KERNARG_OFFSET_CAST]] to <4 x i32> addrspace(4)* 402; HSA-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32> addrspace(4)* [[TMP1]], align 16, !invariant.load !0 403; HSA-NEXT: [[ARG1_LOAD:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2> 404; HSA-NEXT: store i32 [[ARG0_LOAD]], i32 addrspace(1)* undef 405; HSA-NEXT: store <3 x i32> [[ARG1_LOAD]], <3 x i32> addrspace(1)* undef, align 4 406; HSA-NEXT: ret void 407; 408; MESA-LABEL: @kern_i32_v3i32( 409; MESA-NEXT: [[KERN_I32_V3I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(68) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 410; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I32_V3I32_KERNARG_SEGMENT]], i64 36 411; MESA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to i32 addrspace(4)* 412; MESA-NEXT: [[ARG0_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0 413; MESA-NEXT: [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I32_V3I32_KERNARG_SEGMENT]], i64 52 414; MESA-NEXT: [[ARG1_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET]] to <3 x i32> addrspace(4)* 415; MESA-NEXT: [[TMP1:%.*]] = bitcast <3 x i32> addrspace(4)* [[ARG1_KERNARG_OFFSET_CAST]] to <4 x i32> addrspace(4)* 416; MESA-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32> addrspace(4)* [[TMP1]], align 4, !invariant.load !0 417; MESA-NEXT: [[ARG1_LOAD:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2> 418; MESA-NEXT: store i32 [[ARG0_LOAD]], i32 addrspace(1)* undef 419; MESA-NEXT: store <3 x i32> [[ARG1_LOAD]], <3 x i32> addrspace(1)* undef, align 4 420; MESA-NEXT: ret void 421; 422 store i32 %arg0, i32 addrspace(1)* undef 423 store <3 x i32> %arg1, <3 x i32> addrspace(1)* undef, align 4 424 ret void 425} 426 427%struct.a = type { i32, i8, [4 x i8] } 428%struct.b.packed = type { i8, i32, [3 x i16], <2 x double> } 429 430define amdgpu_kernel void @kern_struct_a(%struct.a %arg0) { 431; HSA-LABEL: @kern_struct_a( 432; HSA-NEXT: [[KERN_STRUCT_A_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(12) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 433; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_STRUCT_A_KERNARG_SEGMENT]], i64 0 434; HSA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to [[STRUCT_A:%.*]] addrspace(4)* 435; HSA-NEXT: [[ARG0_LOAD:%.*]] = load [[STRUCT_A]], [[STRUCT_A]] addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0 436; HSA-NEXT: store [[STRUCT_A]] %arg0.load, [[STRUCT_A]] addrspace(1)* undef 437; HSA-NEXT: ret void 438; 439; MESA-LABEL: @kern_struct_a( 440; MESA-NEXT: [[KERN_STRUCT_A_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(48) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 441; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_STRUCT_A_KERNARG_SEGMENT]], i64 36 442; MESA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to [[STRUCT_A:%.*]] addrspace(4)* 443; MESA-NEXT: [[ARG0_LOAD:%.*]] = load [[STRUCT_A]], [[STRUCT_A]] addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0 444; MESA-NEXT: store [[STRUCT_A]] %arg0.load, [[STRUCT_A]] addrspace(1)* undef 445; MESA-NEXT: ret void 446; 447 store %struct.a %arg0, %struct.a addrspace(1)* undef 448 ret void 449} 450 451define amdgpu_kernel void @kern_struct_b_packed(%struct.b.packed %arg0) #0 { 452; HSA-LABEL: @kern_struct_b_packed( 453; HSA-NEXT: [[KERN_STRUCT_B_PACKED_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(32) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 454; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_STRUCT_B_PACKED_KERNARG_SEGMENT]], i64 0 455; HSA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to [[STRUCT_B_PACKED:%.*]] addrspace(4)* 456; HSA-NEXT: [[ARG0_LOAD:%.*]] = load [[STRUCT_B_PACKED]], [[STRUCT_B_PACKED]] addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0 457; HSA-NEXT: store [[STRUCT_B_PACKED]] %arg0.load, [[STRUCT_B_PACKED]] addrspace(1)* undef 458; HSA-NEXT: ret void 459; 460; MESA-LABEL: @kern_struct_b_packed( 461; MESA-NEXT: [[KERN_STRUCT_B_PACKED_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(68) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 462; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_STRUCT_B_PACKED_KERNARG_SEGMENT]], i64 36 463; MESA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to [[STRUCT_B_PACKED:%.*]] addrspace(4)* 464; MESA-NEXT: [[ARG0_LOAD:%.*]] = load [[STRUCT_B_PACKED]], [[STRUCT_B_PACKED]] addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0 465; MESA-NEXT: store [[STRUCT_B_PACKED]] %arg0.load, [[STRUCT_B_PACKED]] addrspace(1)* undef 466; MESA-NEXT: ret void 467; 468 store %struct.b.packed %arg0, %struct.b.packed addrspace(1)* undef 469 ret void 470} 471 472define amdgpu_kernel void @kern_implicit_arg_num_bytes(i32 %arg0) #1 { 473; HSA-LABEL: @kern_implicit_arg_num_bytes( 474; HSA-NEXT: [[KERN_IMPLICIT_ARG_NUM_BYTES_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(48) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 475; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_IMPLICIT_ARG_NUM_BYTES_KERNARG_SEGMENT]], i64 0 476; HSA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to i32 addrspace(4)* 477; HSA-NEXT: [[ARG0_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0 478; HSA-NEXT: store i32 [[ARG0_LOAD]], i32 addrspace(1)* undef 479; HSA-NEXT: ret void 480; 481; MESA-LABEL: @kern_implicit_arg_num_bytes( 482; MESA-NEXT: [[KERN_IMPLICIT_ARG_NUM_BYTES_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 483; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_IMPLICIT_ARG_NUM_BYTES_KERNARG_SEGMENT]], i64 36 484; MESA-NEXT: [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to i32 addrspace(4)* 485; MESA-NEXT: [[ARG0_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0 486; MESA-NEXT: store i32 [[ARG0_LOAD]], i32 addrspace(1)* undef 487; MESA-NEXT: ret void 488; 489 store i32 %arg0, i32 addrspace(1)* undef 490 ret void 491} 492 493define amdgpu_kernel void @kernel_implicitarg_no_struct_align(<16 x i32>, i32 %arg1) #1 { 494; HSA-LABEL: @kernel_implicitarg_no_struct_align( 495; HSA-NEXT: [[KERNEL_IMPLICITARG_NO_STRUCT_ALIGN_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(112) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 496; HSA-NEXT: [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERNEL_IMPLICITARG_NO_STRUCT_ALIGN_KERNARG_SEGMENT]], i64 64 497; HSA-NEXT: [[ARG1_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET]] to i32 addrspace(4)* 498; HSA-NEXT: [[ARG1_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0 499; HSA-NEXT: store i32 [[ARG1_LOAD]], i32 addrspace(1)* undef 500; HSA-NEXT: ret void 501; 502; MESA-LABEL: @kernel_implicitarg_no_struct_align( 503; MESA-NEXT: [[KERNEL_IMPLICITARG_NO_STRUCT_ALIGN_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(108) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 504; MESA-NEXT: [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERNEL_IMPLICITARG_NO_STRUCT_ALIGN_KERNARG_SEGMENT]], i64 100 505; MESA-NEXT: [[ARG1_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET]] to i32 addrspace(4)* 506; MESA-NEXT: [[ARG1_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0 507; MESA-NEXT: store i32 [[ARG1_LOAD]], i32 addrspace(1)* undef 508; MESA-NEXT: ret void 509; 510 store i32 %arg1, i32 addrspace(1)* undef 511 ret void 512} 513 514define amdgpu_kernel void @kern_lds_ptr(i32 addrspace(3)* %lds) #0 { 515; HSA-LABEL: @kern_lds_ptr( 516; HSA-NEXT: [[KERN_LDS_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 517; HSA-NEXT: [[LDS_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_LDS_PTR_KERNARG_SEGMENT]], i64 0 518; HSA-NEXT: [[LDS_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[LDS_KERNARG_OFFSET]] to i32 addrspace(3)* addrspace(4)* 519; HSA-NEXT: [[LDS_LOAD:%.*]] = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(4)* [[LDS_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0 520; HSA-NEXT: store i32 0, i32 addrspace(3)* [[LDS_LOAD]], align 4 521; HSA-NEXT: ret void 522; 523; MESA-LABEL: @kern_lds_ptr( 524; MESA-NEXT: [[KERN_LDS_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 525; MESA-NEXT: [[LDS_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_LDS_PTR_KERNARG_SEGMENT]], i64 36 526; MESA-NEXT: [[LDS_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[LDS_KERNARG_OFFSET]] to i32 addrspace(3)* addrspace(4)* 527; MESA-NEXT: [[LDS_LOAD:%.*]] = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(4)* [[LDS_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0 528; MESA-NEXT: store i32 0, i32 addrspace(3)* [[LDS_LOAD]], align 4 529; MESA-NEXT: ret void 530; 531 store i32 0, i32 addrspace(3)* %lds, align 4 532 ret void 533} 534 535define amdgpu_kernel void @kern_lds_ptr_si(i32 addrspace(3)* %lds) #2 { 536; HSA-LABEL: @kern_lds_ptr_si( 537; HSA-NEXT: [[KERN_LDS_PTR_SI_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 538; HSA-NEXT: store i32 0, i32 addrspace(3)* [[LDS:%.*]], align 4 539; HSA-NEXT: ret void 540; 541; MESA-LABEL: @kern_lds_ptr_si( 542; MESA-NEXT: [[KERN_LDS_PTR_SI_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 543; MESA-NEXT: store i32 0, i32 addrspace(3)* [[LDS:%.*]], align 4 544; MESA-NEXT: ret void 545; 546 store i32 0, i32 addrspace(3)* %lds, align 4 547 ret void 548} 549 550define amdgpu_kernel void @kern_realign_i8_i8(i8 %arg0, i8 %arg1) #0 { 551; HSA-LABEL: @kern_realign_i8_i8( 552; HSA-NEXT: [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 553; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT]], i64 0 554; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 555; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 556; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 557; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT]], i64 0 558; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 559; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 560; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8 561; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8 562; HSA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef 563; HSA-NEXT: store volatile i8 [[TMP5]], i8 addrspace(1)* undef 564; HSA-NEXT: ret void 565; 566; MESA-LABEL: @kern_realign_i8_i8( 567; MESA-NEXT: [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 568; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT]], i64 36 569; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 570; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 571; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 572; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT]], i64 36 573; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 574; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 575; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8 576; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8 577; MESA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef 578; MESA-NEXT: store volatile i8 [[TMP5]], i8 addrspace(1)* undef 579; MESA-NEXT: ret void 580; 581 store volatile i8 %arg0, i8 addrspace(1)* undef 582 store volatile i8 %arg1, i8 addrspace(1)* undef 583 ret void 584} 585 586define amdgpu_kernel void @kern_realign_i8_i8_i8(i8 %arg0, i8 %arg1, i8 %arg2) #0 { 587; HSA-LABEL: @kern_realign_i8_i8_i8( 588; HSA-NEXT: [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 589; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT]], i64 0 590; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 591; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 592; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 593; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT]], i64 0 594; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 595; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 596; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8 597; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8 598; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT]], i64 0 599; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 600; HSA-NEXT: [[TMP6:%.*]] = load i32, i32 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 601; HSA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16 602; HSA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i8 603; HSA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef 604; HSA-NEXT: store volatile i8 [[TMP5]], i8 addrspace(1)* undef 605; HSA-NEXT: store volatile i8 [[TMP8]], i8 addrspace(1)* undef 606; HSA-NEXT: ret void 607; 608; MESA-LABEL: @kern_realign_i8_i8_i8( 609; MESA-NEXT: [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 610; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT]], i64 36 611; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 612; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 613; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 614; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT]], i64 36 615; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 616; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 617; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8 618; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8 619; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT]], i64 36 620; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 621; MESA-NEXT: [[TMP6:%.*]] = load i32, i32 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 622; MESA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16 623; MESA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i8 624; MESA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef 625; MESA-NEXT: store volatile i8 [[TMP5]], i8 addrspace(1)* undef 626; MESA-NEXT: store volatile i8 [[TMP8]], i8 addrspace(1)* undef 627; MESA-NEXT: ret void 628; 629 store volatile i8 %arg0, i8 addrspace(1)* undef 630 store volatile i8 %arg1, i8 addrspace(1)* undef 631 store volatile i8 %arg2, i8 addrspace(1)* undef 632 ret void 633} 634 635define amdgpu_kernel void @kern_realign_i8_i8_i8_i8(i8 %arg0, i8 %arg1, i8 %arg2, i8 %arg3) #0 { 636; HSA-LABEL: @kern_realign_i8_i8_i8_i8( 637; HSA-NEXT: [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 638; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0 639; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 640; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 641; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 642; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0 643; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 644; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 645; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8 646; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8 647; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0 648; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 649; HSA-NEXT: [[TMP6:%.*]] = load i32, i32 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 650; HSA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16 651; HSA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i8 652; HSA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0 653; HSA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 654; HSA-NEXT: [[TMP9:%.*]] = load i32, i32 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 655; HSA-NEXT: [[TMP10:%.*]] = lshr i32 [[TMP9]], 24 656; HSA-NEXT: [[TMP11:%.*]] = trunc i32 [[TMP10]] to i8 657; HSA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef 658; HSA-NEXT: store volatile i8 [[TMP5]], i8 addrspace(1)* undef 659; HSA-NEXT: store volatile i8 [[TMP8]], i8 addrspace(1)* undef 660; HSA-NEXT: store volatile i8 [[TMP11]], i8 addrspace(1)* undef 661; HSA-NEXT: ret void 662; 663; MESA-LABEL: @kern_realign_i8_i8_i8_i8( 664; MESA-NEXT: [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 665; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36 666; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 667; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 668; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 669; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36 670; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 671; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 672; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8 673; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8 674; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36 675; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 676; MESA-NEXT: [[TMP6:%.*]] = load i32, i32 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 677; MESA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16 678; MESA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i8 679; MESA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36 680; MESA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 681; MESA-NEXT: [[TMP9:%.*]] = load i32, i32 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 682; MESA-NEXT: [[TMP10:%.*]] = lshr i32 [[TMP9]], 24 683; MESA-NEXT: [[TMP11:%.*]] = trunc i32 [[TMP10]] to i8 684; MESA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef 685; MESA-NEXT: store volatile i8 [[TMP5]], i8 addrspace(1)* undef 686; MESA-NEXT: store volatile i8 [[TMP8]], i8 addrspace(1)* undef 687; MESA-NEXT: store volatile i8 [[TMP11]], i8 addrspace(1)* undef 688; MESA-NEXT: ret void 689; 690 store volatile i8 %arg0, i8 addrspace(1)* undef 691 store volatile i8 %arg1, i8 addrspace(1)* undef 692 store volatile i8 %arg2, i8 addrspace(1)* undef 693 store volatile i8 %arg3, i8 addrspace(1)* undef 694 ret void 695} 696 697define amdgpu_kernel void @kern_realign_i8_v3i8(i8 %arg0, <3 x i8> %arg1) #0 { 698; HSA-LABEL: @kern_realign_i8_v3i8( 699; HSA-NEXT: [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 700; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT]], i64 0 701; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 702; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 703; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 704; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT]], i64 4 705; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 706; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 707; HSA-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i24 708; HSA-NEXT: [[ARG1_LOAD:%.*]] = bitcast i24 [[TMP4]] to <3 x i8> 709; HSA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef 710; HSA-NEXT: store volatile <3 x i8> [[ARG1_LOAD]], <3 x i8> addrspace(1)* undef 711; HSA-NEXT: ret void 712; 713; MESA-LABEL: @kern_realign_i8_v3i8( 714; MESA-NEXT: [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 715; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT]], i64 36 716; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 717; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 718; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 719; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT]], i64 40 720; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 721; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 8, !invariant.load !0 722; MESA-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i24 723; MESA-NEXT: [[ARG1_LOAD:%.*]] = bitcast i24 [[TMP4]] to <3 x i8> 724; MESA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef 725; MESA-NEXT: store volatile <3 x i8> [[ARG1_LOAD]], <3 x i8> addrspace(1)* undef 726; MESA-NEXT: ret void 727; 728 store volatile i8 %arg0, i8 addrspace(1)* undef 729 store volatile <3 x i8> %arg1, <3 x i8> addrspace(1)* undef 730 ret void 731} 732 733define amdgpu_kernel void @kern_realign_i8_i16(i8 %arg0, i16 %arg1) #0 { 734; HSA-LABEL: @kern_realign_i8_i16( 735; HSA-NEXT: [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 736; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT]], i64 0 737; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 738; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 739; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 740; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT]], i64 0 741; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 742; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 743; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 16 744; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16 745; HSA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef 746; HSA-NEXT: store volatile i16 [[TMP5]], i16 addrspace(1)* undef 747; HSA-NEXT: ret void 748; 749; MESA-LABEL: @kern_realign_i8_i16( 750; MESA-NEXT: [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 751; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT]], i64 36 752; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 753; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 754; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 755; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT]], i64 36 756; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 757; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 758; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 16 759; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16 760; MESA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef 761; MESA-NEXT: store volatile i16 [[TMP5]], i16 addrspace(1)* undef 762; MESA-NEXT: ret void 763; 764 store volatile i8 %arg0, i8 addrspace(1)* undef 765 store volatile i16 %arg1, i16 addrspace(1)* undef 766 ret void 767} 768 769define amdgpu_kernel void @kern_realign_i1_i1(i1 %arg0, i1 %arg1) #0 { 770; HSA-LABEL: @kern_realign_i1_i1( 771; HSA-NEXT: [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 772; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT]], i64 0 773; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 774; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 775; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1 776; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT]], i64 0 777; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 778; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 779; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8 780; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i1 781; HSA-NEXT: store volatile i1 [[TMP2]], i1 addrspace(1)* undef 782; HSA-NEXT: store volatile i1 [[TMP5]], i1 addrspace(1)* undef 783; HSA-NEXT: ret void 784; 785; MESA-LABEL: @kern_realign_i1_i1( 786; MESA-NEXT: [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 787; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT]], i64 36 788; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 789; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 790; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1 791; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT]], i64 36 792; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 793; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 794; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8 795; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i1 796; MESA-NEXT: store volatile i1 [[TMP2]], i1 addrspace(1)* undef 797; MESA-NEXT: store volatile i1 [[TMP5]], i1 addrspace(1)* undef 798; MESA-NEXT: ret void 799; 800 store volatile i1 %arg0, i1 addrspace(1)* undef 801 store volatile i1 %arg1, i1 addrspace(1)* undef 802 ret void 803} 804 805define amdgpu_kernel void @kern_realign_i1_i1_i1(i1 %arg0, i1 %arg1, i1 %arg2) #0 { 806; HSA-LABEL: @kern_realign_i1_i1_i1( 807; HSA-NEXT: [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 808; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT]], i64 0 809; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 810; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 811; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1 812; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT]], i64 0 813; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 814; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 815; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8 816; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i1 817; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT]], i64 0 818; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 819; HSA-NEXT: [[TMP6:%.*]] = load i32, i32 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 820; HSA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16 821; HSA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i1 822; HSA-NEXT: store volatile i1 [[TMP2]], i1 addrspace(1)* undef 823; HSA-NEXT: store volatile i1 [[TMP5]], i1 addrspace(1)* undef 824; HSA-NEXT: store volatile i1 [[TMP8]], i1 addrspace(1)* undef 825; HSA-NEXT: ret void 826; 827; MESA-LABEL: @kern_realign_i1_i1_i1( 828; MESA-NEXT: [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 829; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT]], i64 36 830; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 831; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 832; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1 833; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT]], i64 36 834; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 835; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 836; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8 837; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i1 838; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT]], i64 36 839; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 840; MESA-NEXT: [[TMP6:%.*]] = load i32, i32 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 841; MESA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16 842; MESA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i1 843; MESA-NEXT: store volatile i1 [[TMP2]], i1 addrspace(1)* undef 844; MESA-NEXT: store volatile i1 [[TMP5]], i1 addrspace(1)* undef 845; MESA-NEXT: store volatile i1 [[TMP8]], i1 addrspace(1)* undef 846; MESA-NEXT: ret void 847; 848 store volatile i1 %arg0, i1 addrspace(1)* undef 849 store volatile i1 %arg1, i1 addrspace(1)* undef 850 store volatile i1 %arg2, i1 addrspace(1)* undef 851 ret void 852} 853 854define amdgpu_kernel void @kern_realign_i1_i1_i1_i1(i1 %arg0, i1 %arg1, i1 %arg2, i1 %arg3) #0 { 855; HSA-LABEL: @kern_realign_i1_i1_i1_i1( 856; HSA-NEXT: [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 857; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 0 858; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 859; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 860; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1 861; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 0 862; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 863; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 864; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8 865; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i1 866; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 0 867; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 868; HSA-NEXT: [[TMP6:%.*]] = load i32, i32 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 869; HSA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16 870; HSA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i1 871; HSA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 0 872; HSA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 873; HSA-NEXT: [[TMP9:%.*]] = load i32, i32 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 874; HSA-NEXT: [[TMP10:%.*]] = lshr i32 [[TMP9]], 24 875; HSA-NEXT: [[TMP11:%.*]] = trunc i32 [[TMP10]] to i1 876; HSA-NEXT: store volatile i1 [[TMP2]], i1 addrspace(1)* undef 877; HSA-NEXT: store volatile i1 [[TMP5]], i1 addrspace(1)* undef 878; HSA-NEXT: store volatile i1 [[TMP8]], i1 addrspace(1)* undef 879; HSA-NEXT: store volatile i1 [[TMP11]], i1 addrspace(1)* undef 880; HSA-NEXT: ret void 881; 882; MESA-LABEL: @kern_realign_i1_i1_i1_i1( 883; MESA-NEXT: [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 884; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 36 885; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 886; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 887; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1 888; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 36 889; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 890; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 891; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8 892; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i1 893; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 36 894; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 895; MESA-NEXT: [[TMP6:%.*]] = load i32, i32 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 896; MESA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16 897; MESA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i1 898; MESA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 36 899; MESA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 900; MESA-NEXT: [[TMP9:%.*]] = load i32, i32 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 901; MESA-NEXT: [[TMP10:%.*]] = lshr i32 [[TMP9]], 24 902; MESA-NEXT: [[TMP11:%.*]] = trunc i32 [[TMP10]] to i1 903; MESA-NEXT: store volatile i1 [[TMP2]], i1 addrspace(1)* undef 904; MESA-NEXT: store volatile i1 [[TMP5]], i1 addrspace(1)* undef 905; MESA-NEXT: store volatile i1 [[TMP8]], i1 addrspace(1)* undef 906; MESA-NEXT: store volatile i1 [[TMP11]], i1 addrspace(1)* undef 907; MESA-NEXT: ret void 908; 909 store volatile i1 %arg0, i1 addrspace(1)* undef 910 store volatile i1 %arg1, i1 addrspace(1)* undef 911 store volatile i1 %arg2, i1 addrspace(1)* undef 912 store volatile i1 %arg3, i1 addrspace(1)* undef 913 ret void 914} 915 916define amdgpu_kernel void @kern_realign_i1_v3i1(i1 %arg0, <3 x i1> %arg1) #0 { 917; HSA-LABEL: @kern_realign_i1_v3i1( 918; HSA-NEXT: [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 919; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT]], i64 0 920; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 921; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 922; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1 923; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT]], i64 4 924; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 925; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 926; HSA-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3 927; HSA-NEXT: [[ARG1_LOAD:%.*]] = bitcast i3 [[TMP4]] to <3 x i1> 928; HSA-NEXT: store volatile i1 [[TMP2]], i1 addrspace(1)* undef 929; HSA-NEXT: store volatile <3 x i1> [[ARG1_LOAD]], <3 x i1> addrspace(1)* undef 930; HSA-NEXT: ret void 931; 932; MESA-LABEL: @kern_realign_i1_v3i1( 933; MESA-NEXT: [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 934; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT]], i64 36 935; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 936; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 937; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1 938; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT]], i64 40 939; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 940; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 8, !invariant.load !0 941; MESA-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3 942; MESA-NEXT: [[ARG1_LOAD:%.*]] = bitcast i3 [[TMP4]] to <3 x i1> 943; MESA-NEXT: store volatile i1 [[TMP2]], i1 addrspace(1)* undef 944; MESA-NEXT: store volatile <3 x i1> [[ARG1_LOAD]], <3 x i1> addrspace(1)* undef 945; MESA-NEXT: ret void 946; 947 store volatile i1 %arg0, i1 addrspace(1)* undef 948 store volatile <3 x i1> %arg1, <3 x i1> addrspace(1)* undef 949 ret void 950} 951 952define amdgpu_kernel void @kern_realign_i1_i16(i1 %arg0, i16 %arg1) #0 { 953; HSA-LABEL: @kern_realign_i1_i16( 954; HSA-NEXT: [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 955; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT]], i64 0 956; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 957; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 958; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1 959; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT]], i64 0 960; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 961; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 962; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 16 963; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16 964; HSA-NEXT: store volatile i1 [[TMP2]], i1 addrspace(1)* undef 965; HSA-NEXT: store volatile i16 [[TMP5]], i16 addrspace(1)* undef 966; HSA-NEXT: ret void 967; 968; MESA-LABEL: @kern_realign_i1_i16( 969; MESA-NEXT: [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 970; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT]], i64 36 971; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 972; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 973; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1 974; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT]], i64 36 975; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 976; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 977; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 16 978; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16 979; MESA-NEXT: store volatile i1 [[TMP2]], i1 addrspace(1)* undef 980; MESA-NEXT: store volatile i16 [[TMP5]], i16 addrspace(1)* undef 981; MESA-NEXT: ret void 982; 983 store volatile i1 %arg0, i1 addrspace(1)* undef 984 store volatile i16 %arg1, i16 addrspace(1)* undef 985 ret void 986} 987 988define amdgpu_kernel void @kern_realign_i8_i8_i8_i8_i8_i8_i8_i8(i8 %arg0, i8 %arg1, i8 %arg2, i8 %arg3, i8 %arg4, i8 %arg5, i8 %arg6, i8 %arg7) #0 { 989; HSA-LABEL: @kern_realign_i8_i8_i8_i8_i8_i8_i8_i8( 990; HSA-NEXT: [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 991; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0 992; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 993; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 994; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 995; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0 996; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 997; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 998; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8 999; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8 1000; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0 1001; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 1002; HSA-NEXT: [[TMP6:%.*]] = load i32, i32 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 1003; HSA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16 1004; HSA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i8 1005; HSA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0 1006; HSA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 1007; HSA-NEXT: [[TMP9:%.*]] = load i32, i32 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 1008; HSA-NEXT: [[TMP10:%.*]] = lshr i32 [[TMP9]], 24 1009; HSA-NEXT: [[TMP11:%.*]] = trunc i32 [[TMP10]] to i8 1010; HSA-NEXT: [[ARG5_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 4 1011; HSA-NEXT: [[ARG5_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG5_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 1012; HSA-NEXT: [[TMP12:%.*]] = load i32, i32 addrspace(4)* [[ARG5_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 1013; HSA-NEXT: [[TMP13:%.*]] = lshr i32 [[TMP12]], 8 1014; HSA-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i8 1015; HSA-NEXT: [[ARG6_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 4 1016; HSA-NEXT: [[ARG6_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG6_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 1017; HSA-NEXT: [[TMP15:%.*]] = load i32, i32 addrspace(4)* [[ARG6_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 1018; HSA-NEXT: [[TMP16:%.*]] = lshr i32 [[TMP15]], 16 1019; HSA-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i8 1020; HSA-NEXT: [[ARG7_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 4 1021; HSA-NEXT: [[ARG7_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG7_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 1022; HSA-NEXT: [[TMP18:%.*]] = load i32, i32 addrspace(4)* [[ARG7_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 1023; HSA-NEXT: [[TMP19:%.*]] = lshr i32 [[TMP18]], 24 1024; HSA-NEXT: [[TMP20:%.*]] = trunc i32 [[TMP19]] to i8 1025; HSA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef 1026; HSA-NEXT: store volatile i8 [[TMP5]], i8 addrspace(1)* undef 1027; HSA-NEXT: store volatile i8 [[TMP8]], i8 addrspace(1)* undef 1028; HSA-NEXT: store volatile i8 [[TMP11]], i8 addrspace(1)* undef 1029; HSA-NEXT: store volatile i8 [[TMP14]], i8 addrspace(1)* undef 1030; HSA-NEXT: store volatile i8 [[TMP17]], i8 addrspace(1)* undef 1031; HSA-NEXT: store volatile i8 [[TMP20]], i8 addrspace(1)* undef 1032; HSA-NEXT: ret void 1033; 1034; MESA-LABEL: @kern_realign_i8_i8_i8_i8_i8_i8_i8_i8( 1035; MESA-NEXT: [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 1036; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36 1037; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 1038; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 1039; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 1040; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36 1041; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 1042; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 1043; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8 1044; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8 1045; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36 1046; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 1047; MESA-NEXT: [[TMP6:%.*]] = load i32, i32 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 1048; MESA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16 1049; MESA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i8 1050; MESA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36 1051; MESA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 1052; MESA-NEXT: [[TMP9:%.*]] = load i32, i32 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 1053; MESA-NEXT: [[TMP10:%.*]] = lshr i32 [[TMP9]], 24 1054; MESA-NEXT: [[TMP11:%.*]] = trunc i32 [[TMP10]] to i8 1055; MESA-NEXT: [[ARG5_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 40 1056; MESA-NEXT: [[ARG5_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG5_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 1057; MESA-NEXT: [[TMP12:%.*]] = load i32, i32 addrspace(4)* [[ARG5_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 8, !invariant.load !0 1058; MESA-NEXT: [[TMP13:%.*]] = lshr i32 [[TMP12]], 8 1059; MESA-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i8 1060; MESA-NEXT: [[ARG6_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 40 1061; MESA-NEXT: [[ARG6_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG6_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 1062; MESA-NEXT: [[TMP15:%.*]] = load i32, i32 addrspace(4)* [[ARG6_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 8, !invariant.load !0 1063; MESA-NEXT: [[TMP16:%.*]] = lshr i32 [[TMP15]], 16 1064; MESA-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i8 1065; MESA-NEXT: [[ARG7_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 40 1066; MESA-NEXT: [[ARG7_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG7_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 1067; MESA-NEXT: [[TMP18:%.*]] = load i32, i32 addrspace(4)* [[ARG7_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 8, !invariant.load !0 1068; MESA-NEXT: [[TMP19:%.*]] = lshr i32 [[TMP18]], 24 1069; MESA-NEXT: [[TMP20:%.*]] = trunc i32 [[TMP19]] to i8 1070; MESA-NEXT: store volatile i8 [[TMP2]], i8 addrspace(1)* undef 1071; MESA-NEXT: store volatile i8 [[TMP5]], i8 addrspace(1)* undef 1072; MESA-NEXT: store volatile i8 [[TMP8]], i8 addrspace(1)* undef 1073; MESA-NEXT: store volatile i8 [[TMP11]], i8 addrspace(1)* undef 1074; MESA-NEXT: store volatile i8 [[TMP14]], i8 addrspace(1)* undef 1075; MESA-NEXT: store volatile i8 [[TMP17]], i8 addrspace(1)* undef 1076; MESA-NEXT: store volatile i8 [[TMP20]], i8 addrspace(1)* undef 1077; MESA-NEXT: ret void 1078; 1079 store volatile i8 %arg0, i8 addrspace(1)* undef 1080 store volatile i8 %arg1, i8 addrspace(1)* undef 1081 store volatile i8 %arg2, i8 addrspace(1)* undef 1082 store volatile i8 %arg3, i8 addrspace(1)* undef 1083 store volatile i8 %arg5, i8 addrspace(1)* undef 1084 store volatile i8 %arg6, i8 addrspace(1)* undef 1085 store volatile i8 %arg7, i8 addrspace(1)* undef 1086 ret void 1087} 1088 1089define amdgpu_kernel void @kern_realign_f16_f16(half %arg0, half %arg1) #0 { 1090; HSA-LABEL: @kern_realign_f16_f16( 1091; HSA-NEXT: [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 1092; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT]], i64 0 1093; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 1094; HSA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 1095; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 1096; HSA-NEXT: [[ARG0_LOAD:%.*]] = bitcast i16 [[TMP2]] to half 1097; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT]], i64 0 1098; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 1099; HSA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0 1100; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 16 1101; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16 1102; HSA-NEXT: [[ARG1_LOAD:%.*]] = bitcast i16 [[TMP5]] to half 1103; HSA-NEXT: store volatile half [[ARG0_LOAD]], half addrspace(1)* undef 1104; HSA-NEXT: store volatile half [[ARG1_LOAD]], half addrspace(1)* undef 1105; HSA-NEXT: ret void 1106; 1107; MESA-LABEL: @kern_realign_f16_f16( 1108; MESA-NEXT: [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 1109; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT]], i64 36 1110; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 1111; MESA-NEXT: [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 1112; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 1113; MESA-NEXT: [[ARG0_LOAD:%.*]] = bitcast i16 [[TMP2]] to half 1114; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT]], i64 36 1115; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)* 1116; MESA-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0 1117; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 16 1118; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16 1119; MESA-NEXT: [[ARG1_LOAD:%.*]] = bitcast i16 [[TMP5]] to half 1120; MESA-NEXT: store volatile half [[ARG0_LOAD]], half addrspace(1)* undef 1121; MESA-NEXT: store volatile half [[ARG1_LOAD]], half addrspace(1)* undef 1122; MESA-NEXT: ret void 1123; 1124 store volatile half %arg0, half addrspace(1)* undef 1125 store volatile half %arg1, half addrspace(1)* undef 1126 ret void 1127} 1128 1129define amdgpu_kernel void @kern_global_ptr(i8 addrspace(1)* %ptr) #0 { 1130; HSA-LABEL: @kern_global_ptr( 1131; HSA-NEXT: [[KERN_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 1132; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_GLOBAL_PTR_KERNARG_SEGMENT]], i64 0 1133; HSA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)* 1134; HSA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0 1135; HSA-NEXT: store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef 1136; HSA-NEXT: ret void 1137; 1138; MESA-LABEL: @kern_global_ptr( 1139; MESA-NEXT: [[KERN_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 1140; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_GLOBAL_PTR_KERNARG_SEGMENT]], i64 36 1141; MESA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)* 1142; MESA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0 1143; MESA-NEXT: store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef 1144; MESA-NEXT: ret void 1145; 1146 store volatile i8 addrspace(1)* %ptr, i8 addrspace(1)* addrspace(1)* undef 1147 ret void 1148} 1149 1150define amdgpu_kernel void @kern_global_ptr_dereferencable(i8 addrspace(1)* dereferenceable(42) %ptr) #0 { 1151; HSA-LABEL: @kern_global_ptr_dereferencable( 1152; HSA-NEXT: [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 1153; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT]], i64 0 1154; HSA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)* 1155; HSA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0, !dereferenceable !1 1156; HSA-NEXT: store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef 1157; HSA-NEXT: ret void 1158; 1159; MESA-LABEL: @kern_global_ptr_dereferencable( 1160; MESA-NEXT: [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 1161; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT]], i64 36 1162; MESA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)* 1163; MESA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0, !dereferenceable !1 1164; MESA-NEXT: store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef 1165; MESA-NEXT: ret void 1166; 1167 store volatile i8 addrspace(1)* %ptr, i8 addrspace(1)* addrspace(1)* undef 1168 ret void 1169} 1170 1171define amdgpu_kernel void @kern_global_ptr_dereferencable_or_null(i8 addrspace(1)* dereferenceable_or_null(128) %ptr) #0 { 1172; HSA-LABEL: @kern_global_ptr_dereferencable_or_null( 1173; HSA-NEXT: [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 1174; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT]], i64 0 1175; HSA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)* 1176; HSA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0, !dereferenceable_or_null !2 1177; HSA-NEXT: store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef 1178; HSA-NEXT: ret void 1179; 1180; MESA-LABEL: @kern_global_ptr_dereferencable_or_null( 1181; MESA-NEXT: [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 1182; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT]], i64 36 1183; MESA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)* 1184; MESA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0, !dereferenceable_or_null !2 1185; MESA-NEXT: store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef 1186; MESA-NEXT: ret void 1187; 1188 store volatile i8 addrspace(1)* %ptr, i8 addrspace(1)* addrspace(1)* undef 1189 ret void 1190} 1191 1192define amdgpu_kernel void @kern_nonnull_global_ptr(i8 addrspace(1)* nonnull %ptr) #0 { 1193; HSA-LABEL: @kern_nonnull_global_ptr( 1194; HSA-NEXT: [[KERN_NONNULL_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 1195; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_NONNULL_GLOBAL_PTR_KERNARG_SEGMENT]], i64 0 1196; HSA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)* 1197; HSA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0, !nonnull !0 1198; HSA-NEXT: store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef 1199; HSA-NEXT: ret void 1200; 1201; MESA-LABEL: @kern_nonnull_global_ptr( 1202; MESA-NEXT: [[KERN_NONNULL_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 1203; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_NONNULL_GLOBAL_PTR_KERNARG_SEGMENT]], i64 36 1204; MESA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)* 1205; MESA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0, !nonnull !0 1206; MESA-NEXT: store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef 1207; MESA-NEXT: ret void 1208; 1209 store volatile i8 addrspace(1)* %ptr, i8 addrspace(1)* addrspace(1)* undef 1210 ret void 1211} 1212 1213define amdgpu_kernel void @kern_align32_global_ptr(i8 addrspace(1)* align 1024 %ptr) #0 { 1214; HSA-LABEL: @kern_align32_global_ptr( 1215; HSA-NEXT: [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 1216; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT]], i64 0 1217; HSA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)* 1218; HSA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0, !align !3 1219; HSA-NEXT: store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef 1220; HSA-NEXT: ret void 1221; 1222; MESA-LABEL: @kern_align32_global_ptr( 1223; MESA-NEXT: [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 1224; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT]], i64 36 1225; MESA-NEXT: [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)* 1226; MESA-NEXT: [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0, !align !3 1227; MESA-NEXT: store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef 1228; MESA-NEXT: ret void 1229; 1230 store volatile i8 addrspace(1)* %ptr, i8 addrspace(1)* addrspace(1)* undef 1231 ret void 1232} 1233 1234define amdgpu_kernel void @kern_noalias_global_ptr(i8 addrspace(1)* noalias %ptr) #0 { 1235; HSA-LABEL: @kern_noalias_global_ptr( 1236; HSA-NEXT: [[KERN_NOALIAS_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 1237; HSA-NEXT: store volatile i8 addrspace(1)* [[PTR:%.*]], i8 addrspace(1)* addrspace(1)* undef 1238; HSA-NEXT: ret void 1239; 1240; MESA-LABEL: @kern_noalias_global_ptr( 1241; MESA-NEXT: [[KERN_NOALIAS_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 1242; MESA-NEXT: store volatile i8 addrspace(1)* [[PTR:%.*]], i8 addrspace(1)* addrspace(1)* undef 1243; MESA-NEXT: ret void 1244; 1245 store volatile i8 addrspace(1)* %ptr, i8 addrspace(1)* addrspace(1)* undef 1246 ret void 1247} 1248 1249define amdgpu_kernel void @kern_noalias_global_ptr_x2(i8 addrspace(1)* noalias %ptr0, i8 addrspace(1)* noalias %ptr1) #0 { 1250; HSA-LABEL: @kern_noalias_global_ptr_x2( 1251; HSA-NEXT: [[KERN_NOALIAS_GLOBAL_PTR_X2_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(16) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 1252; HSA-NEXT: store volatile i8 addrspace(1)* [[PTR0:%.*]], i8 addrspace(1)* addrspace(1)* undef 1253; HSA-NEXT: store volatile i8 addrspace(1)* [[PTR1:%.*]], i8 addrspace(1)* addrspace(1)* undef 1254; HSA-NEXT: ret void 1255; 1256; MESA-LABEL: @kern_noalias_global_ptr_x2( 1257; MESA-NEXT: [[KERN_NOALIAS_GLOBAL_PTR_X2_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(52) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 1258; MESA-NEXT: store volatile i8 addrspace(1)* [[PTR0:%.*]], i8 addrspace(1)* addrspace(1)* undef 1259; MESA-NEXT: store volatile i8 addrspace(1)* [[PTR1:%.*]], i8 addrspace(1)* addrspace(1)* undef 1260; MESA-NEXT: ret void 1261; 1262 store volatile i8 addrspace(1)* %ptr0, i8 addrspace(1)* addrspace(1)* undef 1263 store volatile i8 addrspace(1)* %ptr1, i8 addrspace(1)* addrspace(1)* undef 1264 ret void 1265} 1266 1267define amdgpu_kernel void @struct_i8_i8_arg({i8, i8} %in) #0 { 1268; HSA-LABEL: @struct_i8_i8_arg( 1269; HSA-NEXT: entry: 1270; HSA-NEXT: [[STRUCT_I8_I8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 1271; HSA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[STRUCT_I8_I8_ARG_KERNARG_SEGMENT]], i64 0 1272; HSA-NEXT: [[IN_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[IN_KERNARG_OFFSET]] to { i8, i8 } addrspace(4)* 1273; HSA-NEXT: [[IN_LOAD:%.*]] = load { i8, i8 }, { i8, i8 } addrspace(4)* [[IN_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0 1274; HSA-NEXT: [[ELT0:%.*]] = extractvalue { i8, i8 } [[IN_LOAD]], 0 1275; HSA-NEXT: [[ELT1:%.*]] = extractvalue { i8, i8 } [[IN_LOAD]], 1 1276; HSA-NEXT: store volatile i8 [[ELT0]], i8 addrspace(1)* null, align 4 1277; HSA-NEXT: store volatile i8 [[ELT1]], i8 addrspace(1)* null, align 4 1278; HSA-NEXT: ret void 1279; 1280; MESA-LABEL: @struct_i8_i8_arg( 1281; MESA-NEXT: entry: 1282; MESA-NEXT: [[STRUCT_I8_I8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 1283; MESA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[STRUCT_I8_I8_ARG_KERNARG_SEGMENT]], i64 36 1284; MESA-NEXT: [[IN_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[IN_KERNARG_OFFSET]] to { i8, i8 } addrspace(4)* 1285; MESA-NEXT: [[IN_LOAD:%.*]] = load { i8, i8 }, { i8, i8 } addrspace(4)* [[IN_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0 1286; MESA-NEXT: [[ELT0:%.*]] = extractvalue { i8, i8 } [[IN_LOAD]], 0 1287; MESA-NEXT: [[ELT1:%.*]] = extractvalue { i8, i8 } [[IN_LOAD]], 1 1288; MESA-NEXT: store volatile i8 [[ELT0]], i8 addrspace(1)* null, align 4 1289; MESA-NEXT: store volatile i8 [[ELT1]], i8 addrspace(1)* null, align 4 1290; MESA-NEXT: ret void 1291; 1292entry: 1293 %elt0 = extractvalue {i8, i8} %in, 0 1294 %elt1 = extractvalue {i8, i8} %in, 1 1295 store volatile i8 %elt0, i8 addrspace(1)* null, align 4 1296 store volatile i8 %elt1, i8 addrspace(1)* null, align 4 1297 ret void 1298} 1299 1300define amdgpu_kernel void @struct_i8_i16_arg({i8, i16} %in) #0 { 1301; HSA-LABEL: @struct_i8_i16_arg( 1302; HSA-NEXT: entry: 1303; HSA-NEXT: [[STRUCT_I8_I16_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 1304; HSA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[STRUCT_I8_I16_ARG_KERNARG_SEGMENT]], i64 0 1305; HSA-NEXT: [[IN_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[IN_KERNARG_OFFSET]] to { i8, i16 } addrspace(4)* 1306; HSA-NEXT: [[IN_LOAD:%.*]] = load { i8, i16 }, { i8, i16 } addrspace(4)* [[IN_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0 1307; HSA-NEXT: [[ELT0:%.*]] = extractvalue { i8, i16 } [[IN_LOAD]], 0 1308; HSA-NEXT: [[ELT1:%.*]] = extractvalue { i8, i16 } [[IN_LOAD]], 1 1309; HSA-NEXT: store volatile i8 [[ELT0]], i8 addrspace(1)* null, align 4 1310; HSA-NEXT: store volatile i16 [[ELT1]], i16 addrspace(1)* null, align 4 1311; HSA-NEXT: ret void 1312; 1313; MESA-LABEL: @struct_i8_i16_arg( 1314; MESA-NEXT: entry: 1315; MESA-NEXT: [[STRUCT_I8_I16_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 1316; MESA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[STRUCT_I8_I16_ARG_KERNARG_SEGMENT]], i64 36 1317; MESA-NEXT: [[IN_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[IN_KERNARG_OFFSET]] to { i8, i16 } addrspace(4)* 1318; MESA-NEXT: [[IN_LOAD:%.*]] = load { i8, i16 }, { i8, i16 } addrspace(4)* [[IN_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0 1319; MESA-NEXT: [[ELT0:%.*]] = extractvalue { i8, i16 } [[IN_LOAD]], 0 1320; MESA-NEXT: [[ELT1:%.*]] = extractvalue { i8, i16 } [[IN_LOAD]], 1 1321; MESA-NEXT: store volatile i8 [[ELT0]], i8 addrspace(1)* null, align 4 1322; MESA-NEXT: store volatile i16 [[ELT1]], i16 addrspace(1)* null, align 4 1323; MESA-NEXT: ret void 1324; 1325entry: 1326 %elt0 = extractvalue {i8, i16} %in, 0 1327 %elt1 = extractvalue {i8, i16} %in, 1 1328 store volatile i8 %elt0, i8 addrspace(1)* null, align 4 1329 store volatile i16 %elt1, i16 addrspace(1)* null, align 4 1330 ret void 1331} 1332 1333define amdgpu_kernel void @array_2xi8_arg([2 x i8] %in) #0 { 1334; HSA-LABEL: @array_2xi8_arg( 1335; HSA-NEXT: entry: 1336; HSA-NEXT: [[ARRAY_2XI8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 1337; HSA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[ARRAY_2XI8_ARG_KERNARG_SEGMENT]], i64 0 1338; HSA-NEXT: [[IN_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[IN_KERNARG_OFFSET]] to [2 x i8] addrspace(4)* 1339; HSA-NEXT: [[IN_LOAD:%.*]] = load [2 x i8], [2 x i8] addrspace(4)* [[IN_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0 1340; HSA-NEXT: [[ELT0:%.*]] = extractvalue [2 x i8] [[IN_LOAD]], 0 1341; HSA-NEXT: [[ELT1:%.*]] = extractvalue [2 x i8] [[IN_LOAD]], 1 1342; HSA-NEXT: store volatile i8 [[ELT0]], i8 addrspace(1)* null, align 4 1343; HSA-NEXT: store volatile i8 [[ELT1]], i8 addrspace(1)* null, align 4 1344; HSA-NEXT: ret void 1345; 1346; MESA-LABEL: @array_2xi8_arg( 1347; MESA-NEXT: entry: 1348; MESA-NEXT: [[ARRAY_2XI8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 1349; MESA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[ARRAY_2XI8_ARG_KERNARG_SEGMENT]], i64 36 1350; MESA-NEXT: [[IN_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[IN_KERNARG_OFFSET]] to [2 x i8] addrspace(4)* 1351; MESA-NEXT: [[IN_LOAD:%.*]] = load [2 x i8], [2 x i8] addrspace(4)* [[IN_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0 1352; MESA-NEXT: [[ELT0:%.*]] = extractvalue [2 x i8] [[IN_LOAD]], 0 1353; MESA-NEXT: [[ELT1:%.*]] = extractvalue [2 x i8] [[IN_LOAD]], 1 1354; MESA-NEXT: store volatile i8 [[ELT0]], i8 addrspace(1)* null, align 4 1355; MESA-NEXT: store volatile i8 [[ELT1]], i8 addrspace(1)* null, align 4 1356; MESA-NEXT: ret void 1357; 1358entry: 1359 %elt0 = extractvalue [2 x i8] %in, 0 1360 %elt1 = extractvalue [2 x i8] %in, 1 1361 store volatile i8 %elt0, i8 addrspace(1)* null, align 4 1362 store volatile i8 %elt1, i8 addrspace(1)* null, align 4 1363 ret void 1364} 1365 1366define amdgpu_kernel void @array_2xi1_arg([2 x i1] %in) #0 { 1367; HSA-LABEL: @array_2xi1_arg( 1368; HSA-NEXT: entry: 1369; HSA-NEXT: [[ARRAY_2XI1_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 1370; HSA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[ARRAY_2XI1_ARG_KERNARG_SEGMENT]], i64 0 1371; HSA-NEXT: [[IN_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[IN_KERNARG_OFFSET]] to [2 x i1] addrspace(4)* 1372; HSA-NEXT: [[IN_LOAD:%.*]] = load [2 x i1], [2 x i1] addrspace(4)* [[IN_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0 1373; HSA-NEXT: [[ELT0:%.*]] = extractvalue [2 x i1] [[IN_LOAD]], 0 1374; HSA-NEXT: [[ELT1:%.*]] = extractvalue [2 x i1] [[IN_LOAD]], 1 1375; HSA-NEXT: store volatile i1 [[ELT0]], i1 addrspace(1)* null, align 4 1376; HSA-NEXT: store volatile i1 [[ELT1]], i1 addrspace(1)* null, align 4 1377; HSA-NEXT: ret void 1378; 1379; MESA-LABEL: @array_2xi1_arg( 1380; MESA-NEXT: entry: 1381; MESA-NEXT: [[ARRAY_2XI1_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 1382; MESA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[ARRAY_2XI1_ARG_KERNARG_SEGMENT]], i64 36 1383; MESA-NEXT: [[IN_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[IN_KERNARG_OFFSET]] to [2 x i1] addrspace(4)* 1384; MESA-NEXT: [[IN_LOAD:%.*]] = load [2 x i1], [2 x i1] addrspace(4)* [[IN_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0 1385; MESA-NEXT: [[ELT0:%.*]] = extractvalue [2 x i1] [[IN_LOAD]], 0 1386; MESA-NEXT: [[ELT1:%.*]] = extractvalue [2 x i1] [[IN_LOAD]], 1 1387; MESA-NEXT: store volatile i1 [[ELT0]], i1 addrspace(1)* null, align 4 1388; MESA-NEXT: store volatile i1 [[ELT1]], i1 addrspace(1)* null, align 4 1389; MESA-NEXT: ret void 1390; 1391entry: 1392 %elt0 = extractvalue [2 x i1] %in, 0 1393 %elt1 = extractvalue [2 x i1] %in, 1 1394 store volatile i1 %elt0, i1 addrspace(1)* null, align 4 1395 store volatile i1 %elt1, i1 addrspace(1)* null, align 4 1396 ret void 1397} 1398 1399define amdgpu_kernel void @only_empty_struct({} %empty) #0 { 1400; HSA-LABEL: @only_empty_struct( 1401; HSA-NEXT: ret void 1402; 1403; MESA-LABEL: @only_empty_struct( 1404; MESA-NEXT: [[ONLY_EMPTY_STRUCT_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(36) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 1405; MESA-NEXT: ret void 1406; 1407 ret void 1408} 1409 1410define amdgpu_kernel void @empty_struct_with_other({} %empty, i32 %arg1) #0 { 1411; HSA-LABEL: @empty_struct_with_other( 1412; HSA-NEXT: [[EMPTY_STRUCT_WITH_OTHER_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 1413; HSA-NEXT: [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[EMPTY_STRUCT_WITH_OTHER_KERNARG_SEGMENT]], i64 0 1414; HSA-NEXT: [[ARG1_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET]] to i32 addrspace(4)* 1415; HSA-NEXT: [[ARG1_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0 1416; HSA-NEXT: store i32 [[ARG1_LOAD]], i32 addrspace(1)* undef 1417; HSA-NEXT: ret void 1418; 1419; MESA-LABEL: @empty_struct_with_other( 1420; MESA-NEXT: [[EMPTY_STRUCT_WITH_OTHER_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 1421; MESA-NEXT: [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[EMPTY_STRUCT_WITH_OTHER_KERNARG_SEGMENT]], i64 36 1422; MESA-NEXT: [[ARG1_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET]] to i32 addrspace(4)* 1423; MESA-NEXT: [[ARG1_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0 1424; MESA-NEXT: store i32 [[ARG1_LOAD]], i32 addrspace(1)* undef 1425; MESA-NEXT: ret void 1426; 1427 store i32 %arg1, i32 addrspace(1)* undef 1428 ret void 1429} 1430 1431attributes #0 = { nounwind "target-cpu"="kaveri" } 1432attributes #1 = { nounwind "target-cpu"="kaveri" "amdgpu-implicitarg-num-bytes"="40" } 1433attributes #2 = { nounwind "target-cpu"="tahiti" } 1434 1435; GCN: 0 = !{} 1436; GCN: !1 = !{i64 42} 1437; GCN: !2 = !{i64 128} 1438; GCN: !3 = !{i64 1024} 1439