1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GPRIDX %s 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=MOVREL %s 4; RUN: not --crash llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck -check-prefix=ERR %s 5 6; FIXME: Need constant bus fixup pre-gfx10 for movrel 7; ERR: Bad machine code: VOP* instruction violates constant bus restriction 8 9define amdgpu_ps <8 x i32> @dyn_insertelement_v8i32_s_s_s(<8 x i32> inreg %vec, i32 inreg %val, i32 inreg %idx) { 10; GPRIDX-LABEL: dyn_insertelement_v8i32_s_s_s: 11; GPRIDX: ; %bb.0: ; %entry 12; GPRIDX-NEXT: s_cmp_eq_u32 s11, 0 13; GPRIDX-NEXT: s_cselect_b32 s0, s10, s2 14; GPRIDX-NEXT: s_cmp_eq_u32 s11, 1 15; GPRIDX-NEXT: s_cselect_b32 s1, s10, s3 16; GPRIDX-NEXT: s_cmp_eq_u32 s11, 2 17; GPRIDX-NEXT: s_cselect_b32 s2, s10, s4 18; GPRIDX-NEXT: s_cmp_eq_u32 s11, 3 19; GPRIDX-NEXT: s_cselect_b32 s3, s10, s5 20; GPRIDX-NEXT: s_cmp_eq_u32 s11, 4 21; GPRIDX-NEXT: s_cselect_b32 s4, s10, s6 22; GPRIDX-NEXT: s_cmp_eq_u32 s11, 5 23; GPRIDX-NEXT: s_cselect_b32 s5, s10, s7 24; GPRIDX-NEXT: s_cmp_eq_u32 s11, 6 25; GPRIDX-NEXT: s_cselect_b32 s6, s10, s8 26; GPRIDX-NEXT: s_cmp_eq_u32 s11, 7 27; GPRIDX-NEXT: s_cselect_b32 s7, s10, s9 28; GPRIDX-NEXT: ; return to shader part epilog 29; 30; MOVREL-LABEL: dyn_insertelement_v8i32_s_s_s: 31; MOVREL: ; %bb.0: ; %entry 32; MOVREL-NEXT: s_cmp_eq_u32 s11, 0 33; MOVREL-NEXT: s_cselect_b32 s0, s10, s2 34; MOVREL-NEXT: s_cmp_eq_u32 s11, 1 35; MOVREL-NEXT: s_cselect_b32 s1, s10, s3 36; MOVREL-NEXT: s_cmp_eq_u32 s11, 2 37; MOVREL-NEXT: s_cselect_b32 s2, s10, s4 38; MOVREL-NEXT: s_cmp_eq_u32 s11, 3 39; MOVREL-NEXT: s_cselect_b32 s3, s10, s5 40; MOVREL-NEXT: s_cmp_eq_u32 s11, 4 41; MOVREL-NEXT: s_cselect_b32 s4, s10, s6 42; MOVREL-NEXT: s_cmp_eq_u32 s11, 5 43; MOVREL-NEXT: s_cselect_b32 s5, s10, s7 44; MOVREL-NEXT: s_cmp_eq_u32 s11, 6 45; MOVREL-NEXT: s_cselect_b32 s6, s10, s8 46; MOVREL-NEXT: s_cmp_eq_u32 s11, 7 47; MOVREL-NEXT: s_cselect_b32 s7, s10, s9 48; MOVREL-NEXT: ; return to shader part epilog 49entry: 50 %insert = insertelement <8 x i32> %vec, i32 %val, i32 %idx 51 ret <8 x i32> %insert 52} 53 54define amdgpu_ps <8 x i8 addrspace(3)*> @dyn_insertelement_v8p3i8_s_s_s(<8 x i8 addrspace(3)*> inreg %vec, i8 addrspace(3)* inreg %val, i32 inreg %idx) { 55; GPRIDX-LABEL: dyn_insertelement_v8p3i8_s_s_s: 56; GPRIDX: ; %bb.0: ; %entry 57; GPRIDX-NEXT: s_cmp_eq_u32 s11, 0 58; GPRIDX-NEXT: s_cselect_b32 s0, s10, s2 59; GPRIDX-NEXT: s_cmp_eq_u32 s11, 1 60; GPRIDX-NEXT: s_cselect_b32 s1, s10, s3 61; GPRIDX-NEXT: s_cmp_eq_u32 s11, 2 62; GPRIDX-NEXT: s_cselect_b32 s2, s10, s4 63; GPRIDX-NEXT: s_cmp_eq_u32 s11, 3 64; GPRIDX-NEXT: s_cselect_b32 s3, s10, s5 65; GPRIDX-NEXT: s_cmp_eq_u32 s11, 4 66; GPRIDX-NEXT: s_cselect_b32 s4, s10, s6 67; GPRIDX-NEXT: s_cmp_eq_u32 s11, 5 68; GPRIDX-NEXT: s_cselect_b32 s5, s10, s7 69; GPRIDX-NEXT: s_cmp_eq_u32 s11, 6 70; GPRIDX-NEXT: s_cselect_b32 s6, s10, s8 71; GPRIDX-NEXT: s_cmp_eq_u32 s11, 7 72; GPRIDX-NEXT: s_cselect_b32 s7, s10, s9 73; GPRIDX-NEXT: ; return to shader part epilog 74; 75; MOVREL-LABEL: dyn_insertelement_v8p3i8_s_s_s: 76; MOVREL: ; %bb.0: ; %entry 77; MOVREL-NEXT: s_cmp_eq_u32 s11, 0 78; MOVREL-NEXT: s_cselect_b32 s0, s10, s2 79; MOVREL-NEXT: s_cmp_eq_u32 s11, 1 80; MOVREL-NEXT: s_cselect_b32 s1, s10, s3 81; MOVREL-NEXT: s_cmp_eq_u32 s11, 2 82; MOVREL-NEXT: s_cselect_b32 s2, s10, s4 83; MOVREL-NEXT: s_cmp_eq_u32 s11, 3 84; MOVREL-NEXT: s_cselect_b32 s3, s10, s5 85; MOVREL-NEXT: s_cmp_eq_u32 s11, 4 86; MOVREL-NEXT: s_cselect_b32 s4, s10, s6 87; MOVREL-NEXT: s_cmp_eq_u32 s11, 5 88; MOVREL-NEXT: s_cselect_b32 s5, s10, s7 89; MOVREL-NEXT: s_cmp_eq_u32 s11, 6 90; MOVREL-NEXT: s_cselect_b32 s6, s10, s8 91; MOVREL-NEXT: s_cmp_eq_u32 s11, 7 92; MOVREL-NEXT: s_cselect_b32 s7, s10, s9 93; MOVREL-NEXT: ; return to shader part epilog 94entry: 95 %insert = insertelement <8 x i8 addrspace(3)*> %vec, i8 addrspace(3)* %val, i32 %idx 96 ret <8 x i8 addrspace(3)*> %insert 97} 98 99define <8 x float> @dyn_insertelement_v8f32_const_s_v_v(float %val, i32 %idx) { 100; GPRIDX-LABEL: dyn_insertelement_v8f32_const_s_v_v: 101; GPRIDX: ; %bb.0: ; %entry 102; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 103; GPRIDX-NEXT: s_mov_b32 s11, 0x41000000 104; GPRIDX-NEXT: s_mov_b32 s10, 0x40e00000 105; GPRIDX-NEXT: s_mov_b32 s9, 0x40c00000 106; GPRIDX-NEXT: s_mov_b32 s8, 0x40a00000 107; GPRIDX-NEXT: s_mov_b32 s7, 4.0 108; GPRIDX-NEXT: s_mov_b32 s6, 0x40400000 109; GPRIDX-NEXT: s_mov_b32 s5, 2.0 110; GPRIDX-NEXT: s_mov_b32 s4, 1.0 111; GPRIDX-NEXT: v_mov_b32_e32 v15, s11 112; GPRIDX-NEXT: v_mov_b32_e32 v8, s4 113; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 114; GPRIDX-NEXT: v_cndmask_b32_e32 v8, v8, v0, vcc 115; GPRIDX-NEXT: v_mov_b32_e32 v9, s5 116; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 117; GPRIDX-NEXT: v_cndmask_b32_e32 v9, v9, v0, vcc 118; GPRIDX-NEXT: v_mov_b32_e32 v10, s6 119; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v1 120; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v10, v0, vcc 121; GPRIDX-NEXT: v_mov_b32_e32 v11, s7 122; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v1 123; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v11, v0, vcc 124; GPRIDX-NEXT: v_mov_b32_e32 v12, s8 125; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v1 126; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v12, v0, vcc 127; GPRIDX-NEXT: v_mov_b32_e32 v13, s9 128; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v1 129; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v13, v0, vcc 130; GPRIDX-NEXT: v_mov_b32_e32 v14, s10 131; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v1 132; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v14, v0, vcc 133; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v1 134; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v15, v0, vcc 135; GPRIDX-NEXT: v_mov_b32_e32 v0, v8 136; GPRIDX-NEXT: v_mov_b32_e32 v1, v9 137; GPRIDX-NEXT: s_setpc_b64 s[30:31] 138; 139; MOVREL-LABEL: dyn_insertelement_v8f32_const_s_v_v: 140; MOVREL: ; %bb.0: ; %entry 141; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 142; MOVREL-NEXT: s_waitcnt_vscnt null, 0x0 143; MOVREL-NEXT: s_mov_b32 s11, 0x41000000 144; MOVREL-NEXT: s_mov_b32 s4, 1.0 145; MOVREL-NEXT: s_mov_b32 s10, 0x40e00000 146; MOVREL-NEXT: s_mov_b32 s9, 0x40c00000 147; MOVREL-NEXT: s_mov_b32 s8, 0x40a00000 148; MOVREL-NEXT: s_mov_b32 s7, 4.0 149; MOVREL-NEXT: s_mov_b32 s6, 0x40400000 150; MOVREL-NEXT: s_mov_b32 s5, 2.0 151; MOVREL-NEXT: v_mov_b32_e32 v15, s11 152; MOVREL-NEXT: v_mov_b32_e32 v8, s4 153; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 154; MOVREL-NEXT: v_mov_b32_e32 v9, s5 155; MOVREL-NEXT: v_mov_b32_e32 v10, s6 156; MOVREL-NEXT: v_mov_b32_e32 v11, s7 157; MOVREL-NEXT: v_mov_b32_e32 v12, s8 158; MOVREL-NEXT: v_cndmask_b32_e32 v8, v8, v0, vcc_lo 159; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1 160; MOVREL-NEXT: v_mov_b32_e32 v13, s9 161; MOVREL-NEXT: v_mov_b32_e32 v14, s10 162; MOVREL-NEXT: v_cndmask_b32_e32 v9, v9, v0, vcc_lo 163; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v1 164; MOVREL-NEXT: v_cndmask_b32_e32 v2, v10, v0, vcc_lo 165; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v1 166; MOVREL-NEXT: v_cndmask_b32_e32 v3, v11, v0, vcc_lo 167; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v1 168; MOVREL-NEXT: v_cndmask_b32_e32 v4, v12, v0, vcc_lo 169; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v1 170; MOVREL-NEXT: v_cndmask_b32_e32 v5, v13, v0, vcc_lo 171; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v1 172; MOVREL-NEXT: v_cndmask_b32_e32 v6, v14, v0, vcc_lo 173; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v1 174; MOVREL-NEXT: v_mov_b32_e32 v1, v9 175; MOVREL-NEXT: v_cndmask_b32_e32 v7, v15, v0, vcc_lo 176; MOVREL-NEXT: v_mov_b32_e32 v0, v8 177; MOVREL-NEXT: s_setpc_b64 s[30:31] 178entry: 179 %insert = insertelement <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, float %val, i32 %idx 180 ret <8 x float> %insert 181} 182 183define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_s_s_v(<8 x float> inreg %vec, float inreg %val, i32 %idx) { 184; GPRIDX-LABEL: dyn_insertelement_v8f32_s_s_v: 185; GPRIDX: ; %bb.0: ; %entry 186; GPRIDX-NEXT: s_mov_b32 s1, s3 187; GPRIDX-NEXT: s_mov_b32 s3, s5 188; GPRIDX-NEXT: s_mov_b32 s5, s7 189; GPRIDX-NEXT: s_mov_b32 s7, s9 190; GPRIDX-NEXT: s_mov_b32 s0, s2 191; GPRIDX-NEXT: s_mov_b32 s2, s4 192; GPRIDX-NEXT: s_mov_b32 s4, s6 193; GPRIDX-NEXT: s_mov_b32 s6, s8 194; GPRIDX-NEXT: v_mov_b32_e32 v15, s7 195; GPRIDX-NEXT: v_mov_b32_e32 v7, s10 196; GPRIDX-NEXT: v_mov_b32_e32 v8, s0 197; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 198; GPRIDX-NEXT: v_cndmask_b32_e32 v8, v8, v7, vcc 199; GPRIDX-NEXT: v_mov_b32_e32 v9, s1 200; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 201; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v9, v7, vcc 202; GPRIDX-NEXT: v_mov_b32_e32 v10, s2 203; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 204; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v10, v7, vcc 205; GPRIDX-NEXT: v_mov_b32_e32 v11, s3 206; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 207; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v11, v7, vcc 208; GPRIDX-NEXT: v_mov_b32_e32 v12, s4 209; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 210; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v12, v7, vcc 211; GPRIDX-NEXT: v_mov_b32_e32 v13, s5 212; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 213; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v13, v7, vcc 214; GPRIDX-NEXT: v_mov_b32_e32 v14, s6 215; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0 216; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v14, v7, vcc 217; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0 218; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v15, v7, vcc 219; GPRIDX-NEXT: v_mov_b32_e32 v0, v8 220; GPRIDX-NEXT: ; return to shader part epilog 221; 222; MOVREL-LABEL: dyn_insertelement_v8f32_s_s_v: 223; MOVREL: ; %bb.0: ; %entry 224; MOVREL-NEXT: s_mov_b32 s1, s3 225; MOVREL-NEXT: s_mov_b32 s3, s5 226; MOVREL-NEXT: s_mov_b32 s5, s7 227; MOVREL-NEXT: s_mov_b32 s7, s9 228; MOVREL-NEXT: s_mov_b32 s0, s2 229; MOVREL-NEXT: s_mov_b32 s2, s4 230; MOVREL-NEXT: s_mov_b32 s4, s6 231; MOVREL-NEXT: s_mov_b32 s6, s8 232; MOVREL-NEXT: v_mov_b32_e32 v15, s7 233; MOVREL-NEXT: v_mov_b32_e32 v7, s10 234; MOVREL-NEXT: v_mov_b32_e32 v8, s0 235; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 236; MOVREL-NEXT: v_mov_b32_e32 v9, s1 237; MOVREL-NEXT: v_mov_b32_e32 v10, s2 238; MOVREL-NEXT: v_mov_b32_e32 v11, s3 239; MOVREL-NEXT: v_mov_b32_e32 v12, s4 240; MOVREL-NEXT: v_cndmask_b32_e32 v8, v8, v7, vcc_lo 241; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 242; MOVREL-NEXT: v_mov_b32_e32 v13, s5 243; MOVREL-NEXT: v_mov_b32_e32 v14, s6 244; MOVREL-NEXT: v_cndmask_b32_e32 v1, v9, v7, vcc_lo 245; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 246; MOVREL-NEXT: v_cndmask_b32_e32 v2, v10, v7, vcc_lo 247; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 248; MOVREL-NEXT: v_cndmask_b32_e32 v3, v11, v7, vcc_lo 249; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 250; MOVREL-NEXT: v_cndmask_b32_e32 v4, v12, v7, vcc_lo 251; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 252; MOVREL-NEXT: v_cndmask_b32_e32 v5, v13, v7, vcc_lo 253; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 254; MOVREL-NEXT: v_cndmask_b32_e32 v6, v14, v7, vcc_lo 255; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 256; MOVREL-NEXT: v_mov_b32_e32 v0, v8 257; MOVREL-NEXT: v_cndmask_b32_e32 v7, v15, v7, vcc_lo 258; MOVREL-NEXT: ; return to shader part epilog 259entry: 260 %insert = insertelement <8 x float> %vec, float %val, i32 %idx 261 ret <8 x float> %insert 262} 263 264define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_s_v_s(<8 x float> inreg %vec, float %val, i32 inreg %idx) { 265; GPRIDX-LABEL: dyn_insertelement_v8f32_s_v_s: 266; GPRIDX: ; %bb.0: ; %entry 267; GPRIDX-NEXT: s_mov_b32 s1, s3 268; GPRIDX-NEXT: s_mov_b32 s3, s5 269; GPRIDX-NEXT: s_mov_b32 s5, s7 270; GPRIDX-NEXT: s_mov_b32 s7, s9 271; GPRIDX-NEXT: s_mov_b32 s0, s2 272; GPRIDX-NEXT: s_mov_b32 s2, s4 273; GPRIDX-NEXT: s_mov_b32 s4, s6 274; GPRIDX-NEXT: s_mov_b32 s6, s8 275; GPRIDX-NEXT: v_mov_b32_e32 v15, s7 276; GPRIDX-NEXT: v_mov_b32_e32 v8, s0 277; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s10, 0 278; GPRIDX-NEXT: v_cndmask_b32_e32 v8, v8, v0, vcc 279; GPRIDX-NEXT: v_mov_b32_e32 v9, s1 280; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s10, 1 281; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v9, v0, vcc 282; GPRIDX-NEXT: v_mov_b32_e32 v10, s2 283; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s10, 2 284; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v10, v0, vcc 285; GPRIDX-NEXT: v_mov_b32_e32 v11, s3 286; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s10, 3 287; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v11, v0, vcc 288; GPRIDX-NEXT: v_mov_b32_e32 v12, s4 289; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s10, 4 290; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v12, v0, vcc 291; GPRIDX-NEXT: v_mov_b32_e32 v13, s5 292; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s10, 5 293; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v13, v0, vcc 294; GPRIDX-NEXT: v_mov_b32_e32 v14, s6 295; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s10, 6 296; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v14, v0, vcc 297; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s10, 7 298; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v15, v0, vcc 299; GPRIDX-NEXT: v_mov_b32_e32 v0, v8 300; GPRIDX-NEXT: ; return to shader part epilog 301; 302; MOVREL-LABEL: dyn_insertelement_v8f32_s_v_s: 303; MOVREL: ; %bb.0: ; %entry 304; MOVREL-NEXT: s_mov_b32 s1, s3 305; MOVREL-NEXT: s_mov_b32 s3, s5 306; MOVREL-NEXT: s_mov_b32 s5, s7 307; MOVREL-NEXT: s_mov_b32 s7, s9 308; MOVREL-NEXT: s_mov_b32 s0, s2 309; MOVREL-NEXT: s_mov_b32 s2, s4 310; MOVREL-NEXT: s_mov_b32 s4, s6 311; MOVREL-NEXT: s_mov_b32 s6, s8 312; MOVREL-NEXT: v_mov_b32_e32 v15, s7 313; MOVREL-NEXT: v_mov_b32_e32 v8, s0 314; MOVREL-NEXT: v_cmp_eq_u32_e64 vcc_lo, s10, 0 315; MOVREL-NEXT: v_mov_b32_e32 v9, s1 316; MOVREL-NEXT: v_mov_b32_e32 v10, s2 317; MOVREL-NEXT: v_mov_b32_e32 v11, s3 318; MOVREL-NEXT: v_mov_b32_e32 v12, s4 319; MOVREL-NEXT: v_cndmask_b32_e32 v8, v8, v0, vcc_lo 320; MOVREL-NEXT: v_cmp_eq_u32_e64 vcc_lo, s10, 1 321; MOVREL-NEXT: v_mov_b32_e32 v13, s5 322; MOVREL-NEXT: v_mov_b32_e32 v14, s6 323; MOVREL-NEXT: v_cndmask_b32_e32 v1, v9, v0, vcc_lo 324; MOVREL-NEXT: v_cmp_eq_u32_e64 vcc_lo, s10, 2 325; MOVREL-NEXT: v_cndmask_b32_e32 v2, v10, v0, vcc_lo 326; MOVREL-NEXT: v_cmp_eq_u32_e64 vcc_lo, s10, 3 327; MOVREL-NEXT: v_cndmask_b32_e32 v3, v11, v0, vcc_lo 328; MOVREL-NEXT: v_cmp_eq_u32_e64 vcc_lo, s10, 4 329; MOVREL-NEXT: v_cndmask_b32_e32 v4, v12, v0, vcc_lo 330; MOVREL-NEXT: v_cmp_eq_u32_e64 vcc_lo, s10, 5 331; MOVREL-NEXT: v_cndmask_b32_e32 v5, v13, v0, vcc_lo 332; MOVREL-NEXT: v_cmp_eq_u32_e64 vcc_lo, s10, 6 333; MOVREL-NEXT: v_cndmask_b32_e32 v6, v14, v0, vcc_lo 334; MOVREL-NEXT: v_cmp_eq_u32_e64 vcc_lo, s10, 7 335; MOVREL-NEXT: v_cndmask_b32_e32 v7, v15, v0, vcc_lo 336; MOVREL-NEXT: v_mov_b32_e32 v0, v8 337; MOVREL-NEXT: ; return to shader part epilog 338entry: 339 %insert = insertelement <8 x float> %vec, float %val, i32 %idx 340 ret <8 x float> %insert 341} 342 343define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_v_s_s(<8 x float> %vec, float inreg %val, i32 inreg %idx) { 344; GPRIDX-LABEL: dyn_insertelement_v8f32_v_s_s: 345; GPRIDX: ; %bb.0: ; %entry 346; GPRIDX-NEXT: v_mov_b32_e32 v8, s2 347; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s3, 0 348; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc 349; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s3, 1 350; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v8, vcc 351; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s3, 2 352; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc 353; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s3, 3 354; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v8, vcc 355; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s3, 4 356; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc 357; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s3, 5 358; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v5, v8, vcc 359; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s3, 6 360; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v6, v8, vcc 361; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s3, 7 362; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v7, v8, vcc 363; GPRIDX-NEXT: ; return to shader part epilog 364; 365; MOVREL-LABEL: dyn_insertelement_v8f32_v_s_s: 366; MOVREL: ; %bb.0: ; %entry 367; MOVREL-NEXT: v_mov_b32_e32 v8, s2 368; MOVREL-NEXT: v_cmp_eq_u32_e64 vcc_lo, s3, 0 369; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo 370; MOVREL-NEXT: v_cmp_eq_u32_e64 vcc_lo, s3, 1 371; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v8, vcc_lo 372; MOVREL-NEXT: v_cmp_eq_u32_e64 vcc_lo, s3, 2 373; MOVREL-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc_lo 374; MOVREL-NEXT: v_cmp_eq_u32_e64 vcc_lo, s3, 3 375; MOVREL-NEXT: v_cndmask_b32_e32 v3, v3, v8, vcc_lo 376; MOVREL-NEXT: v_cmp_eq_u32_e64 vcc_lo, s3, 4 377; MOVREL-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc_lo 378; MOVREL-NEXT: v_cmp_eq_u32_e64 vcc_lo, s3, 5 379; MOVREL-NEXT: v_cndmask_b32_e32 v5, v5, v8, vcc_lo 380; MOVREL-NEXT: v_cmp_eq_u32_e64 vcc_lo, s3, 6 381; MOVREL-NEXT: v_cndmask_b32_e32 v6, v6, v8, vcc_lo 382; MOVREL-NEXT: v_cmp_eq_u32_e64 vcc_lo, s3, 7 383; MOVREL-NEXT: v_cndmask_b32_e32 v7, v7, v8, vcc_lo 384; MOVREL-NEXT: ; return to shader part epilog 385entry: 386 %insert = insertelement <8 x float> %vec, float %val, i32 %idx 387 ret <8 x float> %insert 388} 389 390define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_s_v_v(<8 x float> inreg %vec, float %val, i32 %idx) { 391; GPRIDX-LABEL: dyn_insertelement_v8f32_s_v_v: 392; GPRIDX: ; %bb.0: ; %entry 393; GPRIDX-NEXT: s_mov_b32 s1, s3 394; GPRIDX-NEXT: s_mov_b32 s3, s5 395; GPRIDX-NEXT: s_mov_b32 s5, s7 396; GPRIDX-NEXT: s_mov_b32 s7, s9 397; GPRIDX-NEXT: s_mov_b32 s0, s2 398; GPRIDX-NEXT: s_mov_b32 s2, s4 399; GPRIDX-NEXT: s_mov_b32 s4, s6 400; GPRIDX-NEXT: s_mov_b32 s6, s8 401; GPRIDX-NEXT: v_mov_b32_e32 v15, s7 402; GPRIDX-NEXT: v_mov_b32_e32 v8, s0 403; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 404; GPRIDX-NEXT: v_cndmask_b32_e32 v8, v8, v0, vcc 405; GPRIDX-NEXT: v_mov_b32_e32 v9, s1 406; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 407; GPRIDX-NEXT: v_cndmask_b32_e32 v9, v9, v0, vcc 408; GPRIDX-NEXT: v_mov_b32_e32 v10, s2 409; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v1 410; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v10, v0, vcc 411; GPRIDX-NEXT: v_mov_b32_e32 v11, s3 412; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v1 413; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v11, v0, vcc 414; GPRIDX-NEXT: v_mov_b32_e32 v12, s4 415; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v1 416; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v12, v0, vcc 417; GPRIDX-NEXT: v_mov_b32_e32 v13, s5 418; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v1 419; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v13, v0, vcc 420; GPRIDX-NEXT: v_mov_b32_e32 v14, s6 421; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v1 422; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v14, v0, vcc 423; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v1 424; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v15, v0, vcc 425; GPRIDX-NEXT: v_mov_b32_e32 v0, v8 426; GPRIDX-NEXT: v_mov_b32_e32 v1, v9 427; GPRIDX-NEXT: ; return to shader part epilog 428; 429; MOVREL-LABEL: dyn_insertelement_v8f32_s_v_v: 430; MOVREL: ; %bb.0: ; %entry 431; MOVREL-NEXT: s_mov_b32 s1, s3 432; MOVREL-NEXT: s_mov_b32 s3, s5 433; MOVREL-NEXT: s_mov_b32 s5, s7 434; MOVREL-NEXT: s_mov_b32 s7, s9 435; MOVREL-NEXT: s_mov_b32 s0, s2 436; MOVREL-NEXT: s_mov_b32 s2, s4 437; MOVREL-NEXT: s_mov_b32 s4, s6 438; MOVREL-NEXT: s_mov_b32 s6, s8 439; MOVREL-NEXT: v_mov_b32_e32 v15, s7 440; MOVREL-NEXT: v_mov_b32_e32 v8, s0 441; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 442; MOVREL-NEXT: v_mov_b32_e32 v9, s1 443; MOVREL-NEXT: v_mov_b32_e32 v10, s2 444; MOVREL-NEXT: v_mov_b32_e32 v11, s3 445; MOVREL-NEXT: v_mov_b32_e32 v12, s4 446; MOVREL-NEXT: v_cndmask_b32_e32 v8, v8, v0, vcc_lo 447; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1 448; MOVREL-NEXT: v_mov_b32_e32 v13, s5 449; MOVREL-NEXT: v_mov_b32_e32 v14, s6 450; MOVREL-NEXT: v_cndmask_b32_e32 v9, v9, v0, vcc_lo 451; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v1 452; MOVREL-NEXT: v_cndmask_b32_e32 v2, v10, v0, vcc_lo 453; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v1 454; MOVREL-NEXT: v_cndmask_b32_e32 v3, v11, v0, vcc_lo 455; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v1 456; MOVREL-NEXT: v_cndmask_b32_e32 v4, v12, v0, vcc_lo 457; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v1 458; MOVREL-NEXT: v_cndmask_b32_e32 v5, v13, v0, vcc_lo 459; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v1 460; MOVREL-NEXT: v_cndmask_b32_e32 v6, v14, v0, vcc_lo 461; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v1 462; MOVREL-NEXT: v_mov_b32_e32 v1, v9 463; MOVREL-NEXT: v_cndmask_b32_e32 v7, v15, v0, vcc_lo 464; MOVREL-NEXT: v_mov_b32_e32 v0, v8 465; MOVREL-NEXT: ; return to shader part epilog 466entry: 467 %insert = insertelement <8 x float> %vec, float %val, i32 %idx 468 ret <8 x float> %insert 469} 470 471define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_v_s_v(<8 x float> %vec, float inreg %val, i32 %idx) { 472; GPRIDX-LABEL: dyn_insertelement_v8f32_v_s_v: 473; GPRIDX: ; %bb.0: ; %entry 474; GPRIDX-NEXT: v_mov_b32_e32 v9, s2 475; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v8 476; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc 477; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8 478; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc 479; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v8 480; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v9, vcc 481; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v8 482; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v9, vcc 483; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v8 484; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v9, vcc 485; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v8 486; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v5, v9, vcc 487; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v8 488; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v6, v9, vcc 489; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v8 490; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v7, v9, vcc 491; GPRIDX-NEXT: ; return to shader part epilog 492; 493; MOVREL-LABEL: dyn_insertelement_v8f32_v_s_v: 494; MOVREL: ; %bb.0: ; %entry 495; MOVREL-NEXT: v_mov_b32_e32 v9, s2 496; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v8 497; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc_lo 498; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v8 499; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc_lo 500; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v8 501; MOVREL-NEXT: v_cndmask_b32_e32 v2, v2, v9, vcc_lo 502; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v8 503; MOVREL-NEXT: v_cndmask_b32_e32 v3, v3, v9, vcc_lo 504; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v8 505; MOVREL-NEXT: v_cndmask_b32_e32 v4, v4, v9, vcc_lo 506; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v8 507; MOVREL-NEXT: v_cndmask_b32_e32 v5, v5, v9, vcc_lo 508; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v8 509; MOVREL-NEXT: v_cndmask_b32_e32 v6, v6, v9, vcc_lo 510; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v8 511; MOVREL-NEXT: v_cndmask_b32_e32 v7, v7, v9, vcc_lo 512; MOVREL-NEXT: ; return to shader part epilog 513entry: 514 %insert = insertelement <8 x float> %vec, float %val, i32 %idx 515 ret <8 x float> %insert 516} 517 518define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_v_v_s(<8 x float> %vec, float %val, i32 inreg %idx) { 519; GPRIDX-LABEL: dyn_insertelement_v8f32_v_v_s: 520; GPRIDX: ; %bb.0: ; %entry 521; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 0 522; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc 523; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 1 524; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v8, vcc 525; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 2 526; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc 527; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 3 528; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v8, vcc 529; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 4 530; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc 531; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 5 532; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v5, v8, vcc 533; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 6 534; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v6, v8, vcc 535; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 7 536; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v7, v8, vcc 537; GPRIDX-NEXT: ; return to shader part epilog 538; 539; MOVREL-LABEL: dyn_insertelement_v8f32_v_v_s: 540; MOVREL: ; %bb.0: ; %entry 541; MOVREL-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 0 542; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo 543; MOVREL-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 1 544; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v8, vcc_lo 545; MOVREL-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 2 546; MOVREL-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc_lo 547; MOVREL-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 3 548; MOVREL-NEXT: v_cndmask_b32_e32 v3, v3, v8, vcc_lo 549; MOVREL-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 4 550; MOVREL-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc_lo 551; MOVREL-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 5 552; MOVREL-NEXT: v_cndmask_b32_e32 v5, v5, v8, vcc_lo 553; MOVREL-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 6 554; MOVREL-NEXT: v_cndmask_b32_e32 v6, v6, v8, vcc_lo 555; MOVREL-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 7 556; MOVREL-NEXT: v_cndmask_b32_e32 v7, v7, v8, vcc_lo 557; MOVREL-NEXT: ; return to shader part epilog 558entry: 559 %insert = insertelement <8 x float> %vec, float %val, i32 %idx 560 ret <8 x float> %insert 561} 562 563define amdgpu_ps <8 x float> @dyn_insertelement_v8p3i8_v_v_s(<8 x i8 addrspace(3)*> %vec, i8 addrspace(3)* %val, i32 inreg %idx) { 564; GPRIDX-LABEL: dyn_insertelement_v8p3i8_v_v_s: 565; GPRIDX: ; %bb.0: ; %entry 566; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 0 567; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc 568; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 1 569; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v8, vcc 570; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 2 571; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc 572; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 3 573; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v8, vcc 574; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 4 575; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc 576; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 5 577; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v5, v8, vcc 578; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 6 579; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v6, v8, vcc 580; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 7 581; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v7, v8, vcc 582; GPRIDX-NEXT: ; return to shader part epilog 583; 584; MOVREL-LABEL: dyn_insertelement_v8p3i8_v_v_s: 585; MOVREL: ; %bb.0: ; %entry 586; MOVREL-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 0 587; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo 588; MOVREL-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 1 589; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v8, vcc_lo 590; MOVREL-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 2 591; MOVREL-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc_lo 592; MOVREL-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 3 593; MOVREL-NEXT: v_cndmask_b32_e32 v3, v3, v8, vcc_lo 594; MOVREL-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 4 595; MOVREL-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc_lo 596; MOVREL-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 5 597; MOVREL-NEXT: v_cndmask_b32_e32 v5, v5, v8, vcc_lo 598; MOVREL-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 6 599; MOVREL-NEXT: v_cndmask_b32_e32 v6, v6, v8, vcc_lo 600; MOVREL-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 7 601; MOVREL-NEXT: v_cndmask_b32_e32 v7, v7, v8, vcc_lo 602; MOVREL-NEXT: ; return to shader part epilog 603entry: 604 %insert = insertelement <8 x i8 addrspace(3)*> %vec, i8 addrspace(3)* %val, i32 %idx 605 %cast.0 = ptrtoint <8 x i8 addrspace(3)*> %insert to <8 x i32> 606 %cast.1 = bitcast <8 x i32> %cast.0 to <8 x float> 607 ret <8 x float> %cast.1 608} 609 610define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_v_v_v(<8 x float> %vec, float %val, i32 %idx) { 611; GPRIDX-LABEL: dyn_insertelement_v8f32_v_v_v: 612; GPRIDX: ; %bb.0: ; %entry 613; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v9 614; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc 615; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v9 616; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v8, vcc 617; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v9 618; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc 619; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v9 620; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v8, vcc 621; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v9 622; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc 623; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v9 624; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v5, v8, vcc 625; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v9 626; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v6, v8, vcc 627; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v9 628; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v7, v8, vcc 629; GPRIDX-NEXT: ; return to shader part epilog 630; 631; MOVREL-LABEL: dyn_insertelement_v8f32_v_v_v: 632; MOVREL: ; %bb.0: ; %entry 633; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v9 634; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo 635; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v9 636; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v8, vcc_lo 637; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v9 638; MOVREL-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc_lo 639; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v9 640; MOVREL-NEXT: v_cndmask_b32_e32 v3, v3, v8, vcc_lo 641; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v9 642; MOVREL-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc_lo 643; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v9 644; MOVREL-NEXT: v_cndmask_b32_e32 v5, v5, v8, vcc_lo 645; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v9 646; MOVREL-NEXT: v_cndmask_b32_e32 v6, v6, v8, vcc_lo 647; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v9 648; MOVREL-NEXT: v_cndmask_b32_e32 v7, v7, v8, vcc_lo 649; MOVREL-NEXT: ; return to shader part epilog 650entry: 651 %insert = insertelement <8 x float> %vec, float %val, i32 %idx 652 ret <8 x float> %insert 653} 654 655define amdgpu_ps <8 x i64> @dyn_insertelement_v8i64_s_s_s(<8 x i64> inreg %vec, i64 inreg %val, i32 inreg %idx) { 656; GPRIDX-LABEL: dyn_insertelement_v8i64_s_s_s: 657; GPRIDX: ; %bb.0: ; %entry 658; GPRIDX-NEXT: s_mov_b32 s0, s2 659; GPRIDX-NEXT: s_mov_b32 s1, s3 660; GPRIDX-NEXT: s_mov_b32 s2, s4 661; GPRIDX-NEXT: s_mov_b32 s3, s5 662; GPRIDX-NEXT: s_mov_b32 s4, s6 663; GPRIDX-NEXT: s_mov_b32 s5, s7 664; GPRIDX-NEXT: s_mov_b32 s6, s8 665; GPRIDX-NEXT: s_mov_b32 s7, s9 666; GPRIDX-NEXT: s_mov_b32 s8, s10 667; GPRIDX-NEXT: s_mov_b32 s9, s11 668; GPRIDX-NEXT: s_mov_b32 s10, s12 669; GPRIDX-NEXT: s_mov_b32 s11, s13 670; GPRIDX-NEXT: s_mov_b32 s12, s14 671; GPRIDX-NEXT: s_mov_b32 s13, s15 672; GPRIDX-NEXT: s_mov_b32 s14, s16 673; GPRIDX-NEXT: s_mov_b32 s15, s17 674; GPRIDX-NEXT: s_mov_b32 m0, s20 675; GPRIDX-NEXT: s_nop 0 676; GPRIDX-NEXT: s_movreld_b64 s[0:1], s[18:19] 677; GPRIDX-NEXT: ; return to shader part epilog 678; 679; MOVREL-LABEL: dyn_insertelement_v8i64_s_s_s: 680; MOVREL: ; %bb.0: ; %entry 681; MOVREL-NEXT: s_mov_b32 s0, s2 682; MOVREL-NEXT: s_mov_b32 s1, s3 683; MOVREL-NEXT: s_mov_b32 m0, s20 684; MOVREL-NEXT: s_mov_b32 s2, s4 685; MOVREL-NEXT: s_mov_b32 s3, s5 686; MOVREL-NEXT: s_mov_b32 s4, s6 687; MOVREL-NEXT: s_mov_b32 s5, s7 688; MOVREL-NEXT: s_mov_b32 s6, s8 689; MOVREL-NEXT: s_mov_b32 s7, s9 690; MOVREL-NEXT: s_mov_b32 s8, s10 691; MOVREL-NEXT: s_mov_b32 s9, s11 692; MOVREL-NEXT: s_mov_b32 s10, s12 693; MOVREL-NEXT: s_mov_b32 s11, s13 694; MOVREL-NEXT: s_mov_b32 s12, s14 695; MOVREL-NEXT: s_mov_b32 s13, s15 696; MOVREL-NEXT: s_mov_b32 s14, s16 697; MOVREL-NEXT: s_mov_b32 s15, s17 698; MOVREL-NEXT: s_movreld_b64 s[0:1], s[18:19] 699; MOVREL-NEXT: ; return to shader part epilog 700entry: 701 %insert = insertelement <8 x i64> %vec, i64 %val, i32 %idx 702 ret <8 x i64> %insert 703} 704 705define amdgpu_ps <8 x i8 addrspace(1)*> @dyn_insertelement_v8p1i8_s_s_s(<8 x i8 addrspace(1)*> inreg %vec, i8 addrspace(1)* inreg %val, i32 inreg %idx) { 706; GPRIDX-LABEL: dyn_insertelement_v8p1i8_s_s_s: 707; GPRIDX: ; %bb.0: ; %entry 708; GPRIDX-NEXT: s_mov_b32 s0, s2 709; GPRIDX-NEXT: s_mov_b32 s1, s3 710; GPRIDX-NEXT: s_mov_b32 s2, s4 711; GPRIDX-NEXT: s_mov_b32 s3, s5 712; GPRIDX-NEXT: s_mov_b32 s4, s6 713; GPRIDX-NEXT: s_mov_b32 s5, s7 714; GPRIDX-NEXT: s_mov_b32 s6, s8 715; GPRIDX-NEXT: s_mov_b32 s7, s9 716; GPRIDX-NEXT: s_mov_b32 s8, s10 717; GPRIDX-NEXT: s_mov_b32 s9, s11 718; GPRIDX-NEXT: s_mov_b32 s10, s12 719; GPRIDX-NEXT: s_mov_b32 s11, s13 720; GPRIDX-NEXT: s_mov_b32 s12, s14 721; GPRIDX-NEXT: s_mov_b32 s13, s15 722; GPRIDX-NEXT: s_mov_b32 s14, s16 723; GPRIDX-NEXT: s_mov_b32 s15, s17 724; GPRIDX-NEXT: s_mov_b32 m0, s20 725; GPRIDX-NEXT: s_nop 0 726; GPRIDX-NEXT: s_movreld_b64 s[0:1], s[18:19] 727; GPRIDX-NEXT: ; return to shader part epilog 728; 729; MOVREL-LABEL: dyn_insertelement_v8p1i8_s_s_s: 730; MOVREL: ; %bb.0: ; %entry 731; MOVREL-NEXT: s_mov_b32 s0, s2 732; MOVREL-NEXT: s_mov_b32 s1, s3 733; MOVREL-NEXT: s_mov_b32 m0, s20 734; MOVREL-NEXT: s_mov_b32 s2, s4 735; MOVREL-NEXT: s_mov_b32 s3, s5 736; MOVREL-NEXT: s_mov_b32 s4, s6 737; MOVREL-NEXT: s_mov_b32 s5, s7 738; MOVREL-NEXT: s_mov_b32 s6, s8 739; MOVREL-NEXT: s_mov_b32 s7, s9 740; MOVREL-NEXT: s_mov_b32 s8, s10 741; MOVREL-NEXT: s_mov_b32 s9, s11 742; MOVREL-NEXT: s_mov_b32 s10, s12 743; MOVREL-NEXT: s_mov_b32 s11, s13 744; MOVREL-NEXT: s_mov_b32 s12, s14 745; MOVREL-NEXT: s_mov_b32 s13, s15 746; MOVREL-NEXT: s_mov_b32 s14, s16 747; MOVREL-NEXT: s_mov_b32 s15, s17 748; MOVREL-NEXT: s_movreld_b64 s[0:1], s[18:19] 749; MOVREL-NEXT: ; return to shader part epilog 750entry: 751 %insert = insertelement <8 x i8 addrspace(1)*> %vec, i8 addrspace(1)* %val, i32 %idx 752 ret <8 x i8 addrspace(1)*> %insert 753} 754 755define void @dyn_insertelement_v8f64_const_s_v_v(double %val, i32 %idx) { 756; GPRIDX-LABEL: dyn_insertelement_v8f64_const_s_v_v: 757; GPRIDX: ; %bb.0: ; %entry 758; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 759; GPRIDX-NEXT: s_mov_b32 s18, 0 760; GPRIDX-NEXT: s_mov_b64 s[4:5], 1.0 761; GPRIDX-NEXT: s_mov_b32 s19, 0x40200000 762; GPRIDX-NEXT: s_mov_b32 s17, 0x401c0000 763; GPRIDX-NEXT: s_mov_b32 s16, s18 764; GPRIDX-NEXT: s_mov_b32 s15, 0x40180000 765; GPRIDX-NEXT: s_mov_b32 s14, s18 766; GPRIDX-NEXT: s_mov_b32 s13, 0x40140000 767; GPRIDX-NEXT: s_mov_b32 s12, s18 768; GPRIDX-NEXT: s_mov_b64 s[10:11], 4.0 769; GPRIDX-NEXT: s_mov_b32 s9, 0x40080000 770; GPRIDX-NEXT: s_mov_b32 s8, s18 771; GPRIDX-NEXT: s_mov_b64 s[6:7], 2.0 772; GPRIDX-NEXT: v_mov_b32_e32 v3, s4 773; GPRIDX-NEXT: v_mov_b32_e32 v4, s5 774; GPRIDX-NEXT: v_mov_b32_e32 v5, s6 775; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 776; GPRIDX-NEXT: v_mov_b32_e32 v6, s7 777; GPRIDX-NEXT: v_mov_b32_e32 v7, s8 778; GPRIDX-NEXT: v_mov_b32_e32 v8, s9 779; GPRIDX-NEXT: v_mov_b32_e32 v9, s10 780; GPRIDX-NEXT: v_mov_b32_e32 v10, s11 781; GPRIDX-NEXT: v_mov_b32_e32 v11, s12 782; GPRIDX-NEXT: v_mov_b32_e32 v12, s13 783; GPRIDX-NEXT: v_mov_b32_e32 v13, s14 784; GPRIDX-NEXT: v_mov_b32_e32 v14, s15 785; GPRIDX-NEXT: v_mov_b32_e32 v15, s16 786; GPRIDX-NEXT: v_mov_b32_e32 v16, s17 787; GPRIDX-NEXT: v_mov_b32_e32 v17, s18 788; GPRIDX-NEXT: v_mov_b32_e32 v18, s19 789; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[16:17], 0, v2 790; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[4:5], 2, v2 791; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[6:7], 3, v2 792; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[8:9], 4, v2 793; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[10:11], 5, v2 794; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[12:13], 6, v2 795; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[14:15], 7, v2 796; GPRIDX-NEXT: v_cndmask_b32_e64 v3, v3, v0, s[16:17] 797; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v5, v0, vcc 798; GPRIDX-NEXT: v_cndmask_b32_e64 v4, v4, v1, s[16:17] 799; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v6, v1, vcc 800; GPRIDX-NEXT: v_cndmask_b32_e64 v7, v7, v0, s[4:5] 801; GPRIDX-NEXT: v_cndmask_b32_e64 v9, v9, v0, s[6:7] 802; GPRIDX-NEXT: v_cndmask_b32_e64 v11, v11, v0, s[8:9] 803; GPRIDX-NEXT: v_cndmask_b32_e64 v13, v13, v0, s[10:11] 804; GPRIDX-NEXT: v_cndmask_b32_e64 v15, v15, v0, s[12:13] 805; GPRIDX-NEXT: v_cndmask_b32_e64 v17, v17, v0, s[14:15] 806; GPRIDX-NEXT: v_cndmask_b32_e64 v8, v8, v1, s[4:5] 807; GPRIDX-NEXT: v_cndmask_b32_e64 v10, v10, v1, s[6:7] 808; GPRIDX-NEXT: v_cndmask_b32_e64 v12, v12, v1, s[8:9] 809; GPRIDX-NEXT: v_cndmask_b32_e64 v14, v14, v1, s[10:11] 810; GPRIDX-NEXT: v_cndmask_b32_e64 v16, v16, v1, s[12:13] 811; GPRIDX-NEXT: v_cndmask_b32_e64 v18, v18, v1, s[14:15] 812; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[3:6], off 813; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[7:10], off 814; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[11:14], off 815; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[15:18], off 816; GPRIDX-NEXT: s_waitcnt vmcnt(0) 817; GPRIDX-NEXT: s_setpc_b64 s[30:31] 818; 819; MOVREL-LABEL: dyn_insertelement_v8f64_const_s_v_v: 820; MOVREL: ; %bb.0: ; %entry 821; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 822; MOVREL-NEXT: s_waitcnt_vscnt null, 0x0 823; MOVREL-NEXT: s_mov_b32 s18, 0 824; MOVREL-NEXT: s_mov_b64 s[4:5], 1.0 825; MOVREL-NEXT: s_mov_b32 s19, 0x40200000 826; MOVREL-NEXT: s_mov_b32 s17, 0x401c0000 827; MOVREL-NEXT: s_mov_b32 s16, s18 828; MOVREL-NEXT: s_mov_b32 s15, 0x40180000 829; MOVREL-NEXT: s_mov_b32 s14, s18 830; MOVREL-NEXT: s_mov_b32 s13, 0x40140000 831; MOVREL-NEXT: s_mov_b32 s12, s18 832; MOVREL-NEXT: s_mov_b64 s[10:11], 4.0 833; MOVREL-NEXT: s_mov_b32 s9, 0x40080000 834; MOVREL-NEXT: s_mov_b32 s8, s18 835; MOVREL-NEXT: s_mov_b64 s[6:7], 2.0 836; MOVREL-NEXT: v_mov_b32_e32 v3, s4 837; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v2 838; MOVREL-NEXT: v_mov_b32_e32 v4, s5 839; MOVREL-NEXT: v_mov_b32_e32 v5, s6 840; MOVREL-NEXT: v_mov_b32_e32 v6, s7 841; MOVREL-NEXT: v_mov_b32_e32 v7, s8 842; MOVREL-NEXT: v_mov_b32_e32 v8, s9 843; MOVREL-NEXT: v_mov_b32_e32 v9, s10 844; MOVREL-NEXT: v_mov_b32_e32 v10, s11 845; MOVREL-NEXT: v_mov_b32_e32 v11, s12 846; MOVREL-NEXT: v_mov_b32_e32 v12, s13 847; MOVREL-NEXT: v_mov_b32_e32 v13, s14 848; MOVREL-NEXT: v_mov_b32_e32 v14, s15 849; MOVREL-NEXT: v_mov_b32_e32 v15, s16 850; MOVREL-NEXT: v_mov_b32_e32 v16, s17 851; MOVREL-NEXT: v_mov_b32_e32 v17, s18 852; MOVREL-NEXT: v_mov_b32_e32 v18, s19 853; MOVREL-NEXT: v_cmp_eq_u32_e64 s4, 1, v2 854; MOVREL-NEXT: v_cmp_eq_u32_e64 s5, 3, v2 855; MOVREL-NEXT: v_cmp_eq_u32_e64 s10, 2, v2 856; MOVREL-NEXT: v_cmp_eq_u32_e64 s6, 4, v2 857; MOVREL-NEXT: v_cmp_eq_u32_e64 s7, 5, v2 858; MOVREL-NEXT: v_cmp_eq_u32_e64 s8, 6, v2 859; MOVREL-NEXT: v_cmp_eq_u32_e64 s9, 7, v2 860; MOVREL-NEXT: v_cndmask_b32_e32 v3, v3, v0, vcc_lo 861; MOVREL-NEXT: v_cndmask_b32_e64 v5, v5, v0, s4 862; MOVREL-NEXT: v_cndmask_b32_e32 v4, v4, v1, vcc_lo 863; MOVREL-NEXT: v_cndmask_b32_e64 v6, v6, v1, s4 864; MOVREL-NEXT: v_cndmask_b32_e64 v7, v7, v0, s10 865; MOVREL-NEXT: v_cndmask_b32_e64 v9, v9, v0, s5 866; MOVREL-NEXT: v_cndmask_b32_e64 v8, v8, v1, s10 867; MOVREL-NEXT: v_cndmask_b32_e64 v10, v10, v1, s5 868; MOVREL-NEXT: v_cndmask_b32_e64 v11, v11, v0, s6 869; MOVREL-NEXT: v_cndmask_b32_e64 v13, v13, v0, s7 870; MOVREL-NEXT: v_cndmask_b32_e64 v12, v12, v1, s6 871; MOVREL-NEXT: v_cndmask_b32_e64 v14, v14, v1, s7 872; MOVREL-NEXT: v_cndmask_b32_e64 v15, v15, v0, s8 873; MOVREL-NEXT: v_cndmask_b32_e64 v17, v17, v0, s9 874; MOVREL-NEXT: v_cndmask_b32_e64 v16, v16, v1, s8 875; MOVREL-NEXT: v_cndmask_b32_e64 v18, v18, v1, s9 876; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[3:6], off 877; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[7:10], off 878; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[11:14], off 879; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[15:18], off 880; MOVREL-NEXT: s_waitcnt_vscnt null, 0x0 881; MOVREL-NEXT: s_setpc_b64 s[30:31] 882entry: 883 %insert = insertelement <8 x double> <double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0, double 8.0>, double %val, i32 %idx 884 %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1> 885 %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3> 886 %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5> 887 %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7> 888 store volatile <2 x double> %vec.0, <2 x double> addrspace(1)* undef 889 store volatile <2 x double> %vec.1, <2 x double> addrspace(1)* undef 890 store volatile <2 x double> %vec.2, <2 x double> addrspace(1)* undef 891 store volatile <2 x double> %vec.3, <2 x double> addrspace(1)* undef 892 ret void 893} 894 895define amdgpu_ps void @dyn_insertelement_v8f64_s_s_v(<8 x double> inreg %vec, double inreg %val, i32 %idx) { 896; GPRIDX-LABEL: dyn_insertelement_v8f64_s_s_v: 897; GPRIDX: ; %bb.0: ; %entry 898; GPRIDX-NEXT: s_mov_b32 s1, s3 899; GPRIDX-NEXT: s_mov_b32 s3, s5 900; GPRIDX-NEXT: s_mov_b32 s5, s7 901; GPRIDX-NEXT: s_mov_b32 s7, s9 902; GPRIDX-NEXT: s_mov_b32 s9, s11 903; GPRIDX-NEXT: s_mov_b32 s11, s13 904; GPRIDX-NEXT: s_mov_b32 s13, s15 905; GPRIDX-NEXT: s_mov_b32 s15, s17 906; GPRIDX-NEXT: s_mov_b32 s0, s2 907; GPRIDX-NEXT: s_mov_b32 s2, s4 908; GPRIDX-NEXT: s_mov_b32 s4, s6 909; GPRIDX-NEXT: s_mov_b32 s6, s8 910; GPRIDX-NEXT: s_mov_b32 s8, s10 911; GPRIDX-NEXT: s_mov_b32 s10, s12 912; GPRIDX-NEXT: s_mov_b32 s12, s14 913; GPRIDX-NEXT: s_mov_b32 s14, s16 914; GPRIDX-NEXT: v_mov_b32_e32 v16, s15 915; GPRIDX-NEXT: v_mov_b32_e32 v15, s14 916; GPRIDX-NEXT: v_mov_b32_e32 v14, s13 917; GPRIDX-NEXT: v_mov_b32_e32 v13, s12 918; GPRIDX-NEXT: v_mov_b32_e32 v12, s11 919; GPRIDX-NEXT: v_mov_b32_e32 v11, s10 920; GPRIDX-NEXT: v_mov_b32_e32 v10, s9 921; GPRIDX-NEXT: v_mov_b32_e32 v9, s8 922; GPRIDX-NEXT: v_mov_b32_e32 v8, s7 923; GPRIDX-NEXT: v_mov_b32_e32 v7, s6 924; GPRIDX-NEXT: v_mov_b32_e32 v6, s5 925; GPRIDX-NEXT: v_mov_b32_e32 v5, s4 926; GPRIDX-NEXT: v_mov_b32_e32 v4, s3 927; GPRIDX-NEXT: v_mov_b32_e32 v3, s2 928; GPRIDX-NEXT: v_mov_b32_e32 v2, s1 929; GPRIDX-NEXT: v_mov_b32_e32 v1, s0 930; GPRIDX-NEXT: v_mov_b32_e32 v17, s18 931; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 932; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[0:1], 2, v0 933; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[2:3], 3, v0 934; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[4:5], 4, v0 935; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[6:7], 5, v0 936; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[8:9], 6, v0 937; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[10:11], 7, v0 938; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[12:13], 0, v0 939; GPRIDX-NEXT: v_mov_b32_e32 v0, s19 940; GPRIDX-NEXT: v_cndmask_b32_e64 v1, v1, v17, s[12:13] 941; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v17, vcc 942; GPRIDX-NEXT: v_cndmask_b32_e64 v2, v2, v0, s[12:13] 943; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v0, vcc 944; GPRIDX-NEXT: v_cndmask_b32_e64 v5, v5, v17, s[0:1] 945; GPRIDX-NEXT: v_cndmask_b32_e64 v7, v7, v17, s[2:3] 946; GPRIDX-NEXT: v_cndmask_b32_e64 v9, v9, v17, s[4:5] 947; GPRIDX-NEXT: v_cndmask_b32_e64 v11, v11, v17, s[6:7] 948; GPRIDX-NEXT: v_cndmask_b32_e64 v13, v13, v17, s[8:9] 949; GPRIDX-NEXT: v_cndmask_b32_e64 v15, v15, v17, s[10:11] 950; GPRIDX-NEXT: v_cndmask_b32_e64 v6, v6, v0, s[0:1] 951; GPRIDX-NEXT: v_cndmask_b32_e64 v8, v8, v0, s[2:3] 952; GPRIDX-NEXT: v_cndmask_b32_e64 v10, v10, v0, s[4:5] 953; GPRIDX-NEXT: v_cndmask_b32_e64 v12, v12, v0, s[6:7] 954; GPRIDX-NEXT: v_cndmask_b32_e64 v14, v14, v0, s[8:9] 955; GPRIDX-NEXT: v_cndmask_b32_e64 v16, v16, v0, s[10:11] 956; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[1:4], off 957; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[5:8], off 958; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[9:12], off 959; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[13:16], off 960; GPRIDX-NEXT: s_endpgm 961; 962; MOVREL-LABEL: dyn_insertelement_v8f64_s_s_v: 963; MOVREL: ; %bb.0: ; %entry 964; MOVREL-NEXT: s_mov_b32 s1, s3 965; MOVREL-NEXT: s_mov_b32 s3, s5 966; MOVREL-NEXT: s_mov_b32 s5, s7 967; MOVREL-NEXT: s_mov_b32 s7, s9 968; MOVREL-NEXT: s_mov_b32 s9, s11 969; MOVREL-NEXT: s_mov_b32 s11, s13 970; MOVREL-NEXT: s_mov_b32 s13, s15 971; MOVREL-NEXT: s_mov_b32 s15, s17 972; MOVREL-NEXT: s_mov_b32 s0, s2 973; MOVREL-NEXT: s_mov_b32 s2, s4 974; MOVREL-NEXT: s_mov_b32 s4, s6 975; MOVREL-NEXT: s_mov_b32 s6, s8 976; MOVREL-NEXT: s_mov_b32 s8, s10 977; MOVREL-NEXT: s_mov_b32 s10, s12 978; MOVREL-NEXT: s_mov_b32 s12, s14 979; MOVREL-NEXT: s_mov_b32 s14, s16 980; MOVREL-NEXT: v_mov_b32_e32 v16, s15 981; MOVREL-NEXT: v_mov_b32_e32 v2, s1 982; MOVREL-NEXT: v_mov_b32_e32 v1, s0 983; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 984; MOVREL-NEXT: v_mov_b32_e32 v15, s14 985; MOVREL-NEXT: v_mov_b32_e32 v14, s13 986; MOVREL-NEXT: v_mov_b32_e32 v13, s12 987; MOVREL-NEXT: v_mov_b32_e32 v12, s11 988; MOVREL-NEXT: v_mov_b32_e32 v11, s10 989; MOVREL-NEXT: v_mov_b32_e32 v10, s9 990; MOVREL-NEXT: v_mov_b32_e32 v9, s8 991; MOVREL-NEXT: v_mov_b32_e32 v8, s7 992; MOVREL-NEXT: v_mov_b32_e32 v7, s6 993; MOVREL-NEXT: v_mov_b32_e32 v6, s5 994; MOVREL-NEXT: v_mov_b32_e32 v5, s4 995; MOVREL-NEXT: v_mov_b32_e32 v4, s3 996; MOVREL-NEXT: v_mov_b32_e32 v3, s2 997; MOVREL-NEXT: v_cmp_eq_u32_e64 s0, 1, v0 998; MOVREL-NEXT: s_mov_b32 s30, s18 999; MOVREL-NEXT: s_mov_b32 s31, s19 1000; MOVREL-NEXT: v_cmp_eq_u32_e64 s1, 2, v0 1001; MOVREL-NEXT: v_cndmask_b32_e64 v1, v1, s30, vcc_lo 1002; MOVREL-NEXT: v_cndmask_b32_e64 v2, v2, s31, vcc_lo 1003; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 1004; MOVREL-NEXT: v_cmp_eq_u32_e64 s2, 5, v0 1005; MOVREL-NEXT: v_cndmask_b32_e64 v3, v3, s30, s0 1006; MOVREL-NEXT: v_cndmask_b32_e64 v4, v4, s31, s0 1007; MOVREL-NEXT: v_cmp_eq_u32_e64 s0, 4, v0 1008; MOVREL-NEXT: v_cmp_eq_u32_e64 s3, 6, v0 1009; MOVREL-NEXT: v_cmp_eq_u32_e64 s4, 7, v0 1010; MOVREL-NEXT: v_cndmask_b32_e64 v5, v5, s30, s1 1011; MOVREL-NEXT: v_cndmask_b32_e64 v6, v6, s31, s1 1012; MOVREL-NEXT: v_cndmask_b32_e64 v7, v7, s30, vcc_lo 1013; MOVREL-NEXT: v_cndmask_b32_e64 v8, v8, s31, vcc_lo 1014; MOVREL-NEXT: v_cndmask_b32_e64 v9, v9, s30, s0 1015; MOVREL-NEXT: v_cndmask_b32_e64 v10, v10, s31, s0 1016; MOVREL-NEXT: v_cndmask_b32_e64 v11, v11, s30, s2 1017; MOVREL-NEXT: v_cndmask_b32_e64 v12, v12, s31, s2 1018; MOVREL-NEXT: v_cndmask_b32_e64 v13, v13, s30, s3 1019; MOVREL-NEXT: v_cndmask_b32_e64 v14, v14, s31, s3 1020; MOVREL-NEXT: v_cndmask_b32_e64 v15, v15, s30, s4 1021; MOVREL-NEXT: v_cndmask_b32_e64 v16, v16, s31, s4 1022; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[1:4], off 1023; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[5:8], off 1024; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[9:12], off 1025; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[13:16], off 1026; MOVREL-NEXT: s_endpgm 1027entry: 1028 %insert = insertelement <8 x double> %vec, double %val, i32 %idx 1029 %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1> 1030 %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3> 1031 %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5> 1032 %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7> 1033 store volatile <2 x double> %vec.0, <2 x double> addrspace(1)* undef 1034 store volatile <2 x double> %vec.1, <2 x double> addrspace(1)* undef 1035 store volatile <2 x double> %vec.2, <2 x double> addrspace(1)* undef 1036 store volatile <2 x double> %vec.3, <2 x double> addrspace(1)* undef 1037 ret void 1038} 1039 1040define amdgpu_ps void @dyn_insertelement_v8f64_s_v_s(<8 x double> inreg %vec, double %val, i32 inreg %idx) { 1041; GPRIDX-LABEL: dyn_insertelement_v8f64_s_v_s: 1042; GPRIDX: ; %bb.0: ; %entry 1043; GPRIDX-NEXT: s_mov_b32 s1, s3 1044; GPRIDX-NEXT: s_mov_b32 s3, s5 1045; GPRIDX-NEXT: s_mov_b32 s5, s7 1046; GPRIDX-NEXT: s_mov_b32 s7, s9 1047; GPRIDX-NEXT: s_mov_b32 s9, s11 1048; GPRIDX-NEXT: s_mov_b32 s11, s13 1049; GPRIDX-NEXT: s_mov_b32 s13, s15 1050; GPRIDX-NEXT: s_mov_b32 s15, s17 1051; GPRIDX-NEXT: s_mov_b32 s0, s2 1052; GPRIDX-NEXT: s_mov_b32 s2, s4 1053; GPRIDX-NEXT: s_mov_b32 s4, s6 1054; GPRIDX-NEXT: s_mov_b32 s6, s8 1055; GPRIDX-NEXT: s_mov_b32 s8, s10 1056; GPRIDX-NEXT: s_mov_b32 s10, s12 1057; GPRIDX-NEXT: s_mov_b32 s12, s14 1058; GPRIDX-NEXT: s_mov_b32 s14, s16 1059; GPRIDX-NEXT: v_mov_b32_e32 v17, s15 1060; GPRIDX-NEXT: v_mov_b32_e32 v16, s14 1061; GPRIDX-NEXT: v_mov_b32_e32 v15, s13 1062; GPRIDX-NEXT: v_mov_b32_e32 v14, s12 1063; GPRIDX-NEXT: v_mov_b32_e32 v13, s11 1064; GPRIDX-NEXT: v_mov_b32_e32 v12, s10 1065; GPRIDX-NEXT: v_mov_b32_e32 v11, s9 1066; GPRIDX-NEXT: v_mov_b32_e32 v10, s8 1067; GPRIDX-NEXT: v_mov_b32_e32 v9, s7 1068; GPRIDX-NEXT: v_mov_b32_e32 v8, s6 1069; GPRIDX-NEXT: v_mov_b32_e32 v7, s5 1070; GPRIDX-NEXT: v_mov_b32_e32 v6, s4 1071; GPRIDX-NEXT: v_mov_b32_e32 v5, s3 1072; GPRIDX-NEXT: v_mov_b32_e32 v4, s2 1073; GPRIDX-NEXT: v_mov_b32_e32 v3, s1 1074; GPRIDX-NEXT: v_mov_b32_e32 v2, s0 1075; GPRIDX-NEXT: s_lshl_b32 s0, s18, 1 1076; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(DST) 1077; GPRIDX-NEXT: v_mov_b32_e32 v2, v0 1078; GPRIDX-NEXT: v_mov_b32_e32 v3, v1 1079; GPRIDX-NEXT: s_set_gpr_idx_off 1080; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[2:5], off 1081; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[6:9], off 1082; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[10:13], off 1083; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[14:17], off 1084; GPRIDX-NEXT: s_endpgm 1085; 1086; MOVREL-LABEL: dyn_insertelement_v8f64_s_v_s: 1087; MOVREL: ; %bb.0: ; %entry 1088; MOVREL-NEXT: s_mov_b32 s1, s3 1089; MOVREL-NEXT: s_mov_b32 s3, s5 1090; MOVREL-NEXT: s_mov_b32 s5, s7 1091; MOVREL-NEXT: s_mov_b32 s7, s9 1092; MOVREL-NEXT: s_mov_b32 s9, s11 1093; MOVREL-NEXT: s_mov_b32 s11, s13 1094; MOVREL-NEXT: s_mov_b32 s13, s15 1095; MOVREL-NEXT: s_mov_b32 s15, s17 1096; MOVREL-NEXT: s_mov_b32 s0, s2 1097; MOVREL-NEXT: s_mov_b32 s2, s4 1098; MOVREL-NEXT: s_mov_b32 s4, s6 1099; MOVREL-NEXT: s_mov_b32 s6, s8 1100; MOVREL-NEXT: s_mov_b32 s8, s10 1101; MOVREL-NEXT: s_mov_b32 s10, s12 1102; MOVREL-NEXT: s_mov_b32 s12, s14 1103; MOVREL-NEXT: s_mov_b32 s14, s16 1104; MOVREL-NEXT: v_mov_b32_e32 v17, s15 1105; MOVREL-NEXT: v_mov_b32_e32 v2, s0 1106; MOVREL-NEXT: s_lshl_b32 m0, s18, 1 1107; MOVREL-NEXT: v_mov_b32_e32 v16, s14 1108; MOVREL-NEXT: v_mov_b32_e32 v15, s13 1109; MOVREL-NEXT: v_mov_b32_e32 v14, s12 1110; MOVREL-NEXT: v_mov_b32_e32 v13, s11 1111; MOVREL-NEXT: v_mov_b32_e32 v12, s10 1112; MOVREL-NEXT: v_mov_b32_e32 v11, s9 1113; MOVREL-NEXT: v_mov_b32_e32 v10, s8 1114; MOVREL-NEXT: v_mov_b32_e32 v9, s7 1115; MOVREL-NEXT: v_mov_b32_e32 v8, s6 1116; MOVREL-NEXT: v_mov_b32_e32 v7, s5 1117; MOVREL-NEXT: v_mov_b32_e32 v6, s4 1118; MOVREL-NEXT: v_mov_b32_e32 v5, s3 1119; MOVREL-NEXT: v_mov_b32_e32 v4, s2 1120; MOVREL-NEXT: v_mov_b32_e32 v3, s1 1121; MOVREL-NEXT: v_movreld_b32_e32 v2, v0 1122; MOVREL-NEXT: v_movreld_b32_e32 v3, v1 1123; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[2:5], off 1124; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[6:9], off 1125; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[10:13], off 1126; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[14:17], off 1127; MOVREL-NEXT: s_endpgm 1128entry: 1129 %insert = insertelement <8 x double> %vec, double %val, i32 %idx 1130 %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1> 1131 %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3> 1132 %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5> 1133 %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7> 1134 store volatile <2 x double> %vec.0, <2 x double> addrspace(1)* undef 1135 store volatile <2 x double> %vec.1, <2 x double> addrspace(1)* undef 1136 store volatile <2 x double> %vec.2, <2 x double> addrspace(1)* undef 1137 store volatile <2 x double> %vec.3, <2 x double> addrspace(1)* undef 1138 ret void 1139} 1140 1141define amdgpu_ps void @dyn_insertelement_v8f64_v_s_s(<8 x double> %vec, double inreg %val, i32 inreg %idx) { 1142; GPRIDX-LABEL: dyn_insertelement_v8f64_v_s_s: 1143; GPRIDX: ; %bb.0: ; %entry 1144; GPRIDX-NEXT: s_lshl_b32 s0, s4, 1 1145; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(DST) 1146; GPRIDX-NEXT: v_mov_b32_e32 v0, s2 1147; GPRIDX-NEXT: v_mov_b32_e32 v1, s3 1148; GPRIDX-NEXT: s_set_gpr_idx_off 1149; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 1150; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[4:7], off 1151; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[8:11], off 1152; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[12:15], off 1153; GPRIDX-NEXT: s_endpgm 1154; 1155; MOVREL-LABEL: dyn_insertelement_v8f64_v_s_s: 1156; MOVREL: ; %bb.0: ; %entry 1157; MOVREL-NEXT: s_lshl_b32 m0, s4, 1 1158; MOVREL-NEXT: v_movreld_b32_e32 v0, s2 1159; MOVREL-NEXT: v_movreld_b32_e32 v1, s3 1160; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 1161; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[4:7], off 1162; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[8:11], off 1163; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[12:15], off 1164; MOVREL-NEXT: s_endpgm 1165entry: 1166 %insert = insertelement <8 x double> %vec, double %val, i32 %idx 1167 %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1> 1168 %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3> 1169 %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5> 1170 %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7> 1171 store volatile <2 x double> %vec.0, <2 x double> addrspace(1)* undef 1172 store volatile <2 x double> %vec.1, <2 x double> addrspace(1)* undef 1173 store volatile <2 x double> %vec.2, <2 x double> addrspace(1)* undef 1174 store volatile <2 x double> %vec.3, <2 x double> addrspace(1)* undef 1175 ret void 1176} 1177 1178define amdgpu_ps void @dyn_insertelement_v8f64_s_v_v(<8 x double> inreg %vec, double %val, i32 %idx) { 1179; GPRIDX-LABEL: dyn_insertelement_v8f64_s_v_v: 1180; GPRIDX: ; %bb.0: ; %entry 1181; GPRIDX-NEXT: s_mov_b32 s1, s3 1182; GPRIDX-NEXT: s_mov_b32 s3, s5 1183; GPRIDX-NEXT: s_mov_b32 s5, s7 1184; GPRIDX-NEXT: s_mov_b32 s7, s9 1185; GPRIDX-NEXT: s_mov_b32 s9, s11 1186; GPRIDX-NEXT: s_mov_b32 s11, s13 1187; GPRIDX-NEXT: s_mov_b32 s13, s15 1188; GPRIDX-NEXT: s_mov_b32 s15, s17 1189; GPRIDX-NEXT: s_mov_b32 s0, s2 1190; GPRIDX-NEXT: s_mov_b32 s2, s4 1191; GPRIDX-NEXT: s_mov_b32 s4, s6 1192; GPRIDX-NEXT: s_mov_b32 s6, s8 1193; GPRIDX-NEXT: s_mov_b32 s8, s10 1194; GPRIDX-NEXT: s_mov_b32 s10, s12 1195; GPRIDX-NEXT: s_mov_b32 s12, s14 1196; GPRIDX-NEXT: s_mov_b32 s14, s16 1197; GPRIDX-NEXT: v_mov_b32_e32 v18, s15 1198; GPRIDX-NEXT: v_mov_b32_e32 v17, s14 1199; GPRIDX-NEXT: v_mov_b32_e32 v16, s13 1200; GPRIDX-NEXT: v_mov_b32_e32 v15, s12 1201; GPRIDX-NEXT: v_mov_b32_e32 v14, s11 1202; GPRIDX-NEXT: v_mov_b32_e32 v13, s10 1203; GPRIDX-NEXT: v_mov_b32_e32 v12, s9 1204; GPRIDX-NEXT: v_mov_b32_e32 v11, s8 1205; GPRIDX-NEXT: v_mov_b32_e32 v10, s7 1206; GPRIDX-NEXT: v_mov_b32_e32 v9, s6 1207; GPRIDX-NEXT: v_mov_b32_e32 v8, s5 1208; GPRIDX-NEXT: v_mov_b32_e32 v7, s4 1209; GPRIDX-NEXT: v_mov_b32_e32 v6, s3 1210; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 1211; GPRIDX-NEXT: v_mov_b32_e32 v5, s2 1212; GPRIDX-NEXT: v_mov_b32_e32 v4, s1 1213; GPRIDX-NEXT: v_mov_b32_e32 v3, s0 1214; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[12:13], 0, v2 1215; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[0:1], 2, v2 1216; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[2:3], 3, v2 1217; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[4:5], 4, v2 1218; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[6:7], 5, v2 1219; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[8:9], 6, v2 1220; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[10:11], 7, v2 1221; GPRIDX-NEXT: v_cndmask_b32_e64 v3, v3, v0, s[12:13] 1222; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v5, v0, vcc 1223; GPRIDX-NEXT: v_cndmask_b32_e64 v4, v4, v1, s[12:13] 1224; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v6, v1, vcc 1225; GPRIDX-NEXT: v_cndmask_b32_e64 v7, v7, v0, s[0:1] 1226; GPRIDX-NEXT: v_cndmask_b32_e64 v9, v9, v0, s[2:3] 1227; GPRIDX-NEXT: v_cndmask_b32_e64 v11, v11, v0, s[4:5] 1228; GPRIDX-NEXT: v_cndmask_b32_e64 v13, v13, v0, s[6:7] 1229; GPRIDX-NEXT: v_cndmask_b32_e64 v15, v15, v0, s[8:9] 1230; GPRIDX-NEXT: v_cndmask_b32_e64 v17, v17, v0, s[10:11] 1231; GPRIDX-NEXT: v_cndmask_b32_e64 v8, v8, v1, s[0:1] 1232; GPRIDX-NEXT: v_cndmask_b32_e64 v10, v10, v1, s[2:3] 1233; GPRIDX-NEXT: v_cndmask_b32_e64 v12, v12, v1, s[4:5] 1234; GPRIDX-NEXT: v_cndmask_b32_e64 v14, v14, v1, s[6:7] 1235; GPRIDX-NEXT: v_cndmask_b32_e64 v16, v16, v1, s[8:9] 1236; GPRIDX-NEXT: v_cndmask_b32_e64 v18, v18, v1, s[10:11] 1237; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[3:6], off 1238; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[7:10], off 1239; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[11:14], off 1240; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[15:18], off 1241; GPRIDX-NEXT: s_endpgm 1242; 1243; MOVREL-LABEL: dyn_insertelement_v8f64_s_v_v: 1244; MOVREL: ; %bb.0: ; %entry 1245; MOVREL-NEXT: s_mov_b32 s1, s3 1246; MOVREL-NEXT: s_mov_b32 s3, s5 1247; MOVREL-NEXT: s_mov_b32 s5, s7 1248; MOVREL-NEXT: s_mov_b32 s7, s9 1249; MOVREL-NEXT: s_mov_b32 s9, s11 1250; MOVREL-NEXT: s_mov_b32 s11, s13 1251; MOVREL-NEXT: s_mov_b32 s13, s15 1252; MOVREL-NEXT: s_mov_b32 s15, s17 1253; MOVREL-NEXT: s_mov_b32 s0, s2 1254; MOVREL-NEXT: s_mov_b32 s2, s4 1255; MOVREL-NEXT: s_mov_b32 s4, s6 1256; MOVREL-NEXT: s_mov_b32 s6, s8 1257; MOVREL-NEXT: s_mov_b32 s8, s10 1258; MOVREL-NEXT: s_mov_b32 s10, s12 1259; MOVREL-NEXT: s_mov_b32 s12, s14 1260; MOVREL-NEXT: s_mov_b32 s14, s16 1261; MOVREL-NEXT: v_mov_b32_e32 v18, s15 1262; MOVREL-NEXT: v_mov_b32_e32 v17, s14 1263; MOVREL-NEXT: v_mov_b32_e32 v16, s13 1264; MOVREL-NEXT: v_mov_b32_e32 v15, s12 1265; MOVREL-NEXT: v_mov_b32_e32 v14, s11 1266; MOVREL-NEXT: v_mov_b32_e32 v13, s10 1267; MOVREL-NEXT: v_mov_b32_e32 v12, s9 1268; MOVREL-NEXT: v_mov_b32_e32 v11, s8 1269; MOVREL-NEXT: v_mov_b32_e32 v10, s7 1270; MOVREL-NEXT: v_mov_b32_e32 v9, s6 1271; MOVREL-NEXT: v_mov_b32_e32 v8, s5 1272; MOVREL-NEXT: v_mov_b32_e32 v7, s4 1273; MOVREL-NEXT: v_mov_b32_e32 v6, s3 1274; MOVREL-NEXT: v_mov_b32_e32 v5, s2 1275; MOVREL-NEXT: v_mov_b32_e32 v4, s1 1276; MOVREL-NEXT: v_mov_b32_e32 v3, s0 1277; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v2 1278; MOVREL-NEXT: v_cmp_eq_u32_e64 s0, 1, v2 1279; MOVREL-NEXT: v_cmp_eq_u32_e64 s1, 3, v2 1280; MOVREL-NEXT: v_cmp_eq_u32_e64 s6, 2, v2 1281; MOVREL-NEXT: v_cmp_eq_u32_e64 s2, 4, v2 1282; MOVREL-NEXT: v_cmp_eq_u32_e64 s3, 5, v2 1283; MOVREL-NEXT: v_cmp_eq_u32_e64 s4, 6, v2 1284; MOVREL-NEXT: v_cmp_eq_u32_e64 s5, 7, v2 1285; MOVREL-NEXT: v_cndmask_b32_e32 v3, v3, v0, vcc_lo 1286; MOVREL-NEXT: v_cndmask_b32_e64 v5, v5, v0, s0 1287; MOVREL-NEXT: v_cndmask_b32_e32 v4, v4, v1, vcc_lo 1288; MOVREL-NEXT: v_cndmask_b32_e64 v6, v6, v1, s0 1289; MOVREL-NEXT: v_cndmask_b32_e64 v7, v7, v0, s6 1290; MOVREL-NEXT: v_cndmask_b32_e64 v9, v9, v0, s1 1291; MOVREL-NEXT: v_cndmask_b32_e64 v8, v8, v1, s6 1292; MOVREL-NEXT: v_cndmask_b32_e64 v10, v10, v1, s1 1293; MOVREL-NEXT: v_cndmask_b32_e64 v11, v11, v0, s2 1294; MOVREL-NEXT: v_cndmask_b32_e64 v13, v13, v0, s3 1295; MOVREL-NEXT: v_cndmask_b32_e64 v12, v12, v1, s2 1296; MOVREL-NEXT: v_cndmask_b32_e64 v14, v14, v1, s3 1297; MOVREL-NEXT: v_cndmask_b32_e64 v15, v15, v0, s4 1298; MOVREL-NEXT: v_cndmask_b32_e64 v17, v17, v0, s5 1299; MOVREL-NEXT: v_cndmask_b32_e64 v16, v16, v1, s4 1300; MOVREL-NEXT: v_cndmask_b32_e64 v18, v18, v1, s5 1301; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[3:6], off 1302; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[7:10], off 1303; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[11:14], off 1304; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[15:18], off 1305; MOVREL-NEXT: s_endpgm 1306entry: 1307 %insert = insertelement <8 x double> %vec, double %val, i32 %idx 1308 %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1> 1309 %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3> 1310 %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5> 1311 %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7> 1312 store volatile <2 x double> %vec.0, <2 x double> addrspace(1)* undef 1313 store volatile <2 x double> %vec.1, <2 x double> addrspace(1)* undef 1314 store volatile <2 x double> %vec.2, <2 x double> addrspace(1)* undef 1315 store volatile <2 x double> %vec.3, <2 x double> addrspace(1)* undef 1316 ret void 1317} 1318 1319define amdgpu_ps void @dyn_insertelement_v8f64_v_s_v(<8 x double> %vec, double inreg %val, i32 %idx) { 1320; GPRIDX-LABEL: dyn_insertelement_v8f64_v_s_v: 1321; GPRIDX: ; %bb.0: ; %entry 1322; GPRIDX-NEXT: v_mov_b32_e32 v17, s2 1323; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v16 1324; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[0:1], 1, v16 1325; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[4:5], 2, v16 1326; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[6:7], 3, v16 1327; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[8:9], 4, v16 1328; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[10:11], 5, v16 1329; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[12:13], 7, v16 1330; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[14:15], 6, v16 1331; GPRIDX-NEXT: v_mov_b32_e32 v16, s3 1332; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v17, vcc 1333; GPRIDX-NEXT: v_cndmask_b32_e64 v2, v2, v17, s[0:1] 1334; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v16, vcc 1335; GPRIDX-NEXT: v_cndmask_b32_e64 v3, v3, v16, s[0:1] 1336; GPRIDX-NEXT: v_cndmask_b32_e64 v4, v4, v17, s[4:5] 1337; GPRIDX-NEXT: v_cndmask_b32_e64 v6, v6, v17, s[6:7] 1338; GPRIDX-NEXT: v_cndmask_b32_e64 v8, v8, v17, s[8:9] 1339; GPRIDX-NEXT: v_cndmask_b32_e64 v10, v10, v17, s[10:11] 1340; GPRIDX-NEXT: v_cndmask_b32_e64 v12, v12, v17, s[14:15] 1341; GPRIDX-NEXT: v_cndmask_b32_e64 v14, v14, v17, s[12:13] 1342; GPRIDX-NEXT: v_cndmask_b32_e64 v5, v5, v16, s[4:5] 1343; GPRIDX-NEXT: v_cndmask_b32_e64 v7, v7, v16, s[6:7] 1344; GPRIDX-NEXT: v_cndmask_b32_e64 v9, v9, v16, s[8:9] 1345; GPRIDX-NEXT: v_cndmask_b32_e64 v11, v11, v16, s[10:11] 1346; GPRIDX-NEXT: v_cndmask_b32_e64 v13, v13, v16, s[14:15] 1347; GPRIDX-NEXT: v_cndmask_b32_e64 v15, v15, v16, s[12:13] 1348; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 1349; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[4:7], off 1350; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[8:11], off 1351; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[12:15], off 1352; GPRIDX-NEXT: s_endpgm 1353; 1354; MOVREL-LABEL: dyn_insertelement_v8f64_v_s_v: 1355; MOVREL: ; %bb.0: ; %entry 1356; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v16 1357; MOVREL-NEXT: v_cndmask_b32_e64 v0, v0, s2, vcc_lo 1358; MOVREL-NEXT: v_cndmask_b32_e64 v1, v1, s3, vcc_lo 1359; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v16 1360; MOVREL-NEXT: v_cndmask_b32_e64 v2, v2, s2, vcc_lo 1361; MOVREL-NEXT: v_cndmask_b32_e64 v3, v3, s3, vcc_lo 1362; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v16 1363; MOVREL-NEXT: v_cndmask_b32_e64 v4, v4, s2, vcc_lo 1364; MOVREL-NEXT: v_cndmask_b32_e64 v5, v5, s3, vcc_lo 1365; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v16 1366; MOVREL-NEXT: v_cndmask_b32_e64 v6, v6, s2, vcc_lo 1367; MOVREL-NEXT: v_cndmask_b32_e64 v7, v7, s3, vcc_lo 1368; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v16 1369; MOVREL-NEXT: v_cndmask_b32_e64 v8, v8, s2, vcc_lo 1370; MOVREL-NEXT: v_cndmask_b32_e64 v9, v9, s3, vcc_lo 1371; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v16 1372; MOVREL-NEXT: v_cndmask_b32_e64 v10, v10, s2, vcc_lo 1373; MOVREL-NEXT: v_cndmask_b32_e64 v11, v11, s3, vcc_lo 1374; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v16 1375; MOVREL-NEXT: v_cndmask_b32_e64 v12, v12, s2, vcc_lo 1376; MOVREL-NEXT: v_cndmask_b32_e64 v13, v13, s3, vcc_lo 1377; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v16 1378; MOVREL-NEXT: v_cndmask_b32_e64 v14, v14, s2, vcc_lo 1379; MOVREL-NEXT: v_cndmask_b32_e64 v15, v15, s3, vcc_lo 1380; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 1381; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[4:7], off 1382; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[8:11], off 1383; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[12:15], off 1384; MOVREL-NEXT: s_endpgm 1385entry: 1386 %insert = insertelement <8 x double> %vec, double %val, i32 %idx 1387 %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1> 1388 %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3> 1389 %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5> 1390 %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7> 1391 store volatile <2 x double> %vec.0, <2 x double> addrspace(1)* undef 1392 store volatile <2 x double> %vec.1, <2 x double> addrspace(1)* undef 1393 store volatile <2 x double> %vec.2, <2 x double> addrspace(1)* undef 1394 store volatile <2 x double> %vec.3, <2 x double> addrspace(1)* undef 1395 ret void 1396} 1397 1398define amdgpu_ps void @dyn_insertelement_v8f64_v_v_s(<8 x double> %vec, double %val, i32 inreg %idx) { 1399; GPRIDX-LABEL: dyn_insertelement_v8f64_v_v_s: 1400; GPRIDX: ; %bb.0: ; %entry 1401; GPRIDX-NEXT: s_lshl_b32 s0, s2, 1 1402; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(DST) 1403; GPRIDX-NEXT: v_mov_b32_e32 v0, v16 1404; GPRIDX-NEXT: v_mov_b32_e32 v1, v17 1405; GPRIDX-NEXT: s_set_gpr_idx_off 1406; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 1407; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[4:7], off 1408; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[8:11], off 1409; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[12:15], off 1410; GPRIDX-NEXT: s_endpgm 1411; 1412; MOVREL-LABEL: dyn_insertelement_v8f64_v_v_s: 1413; MOVREL: ; %bb.0: ; %entry 1414; MOVREL-NEXT: s_lshl_b32 m0, s2, 1 1415; MOVREL-NEXT: v_movreld_b32_e32 v0, v16 1416; MOVREL-NEXT: v_movreld_b32_e32 v1, v17 1417; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 1418; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[4:7], off 1419; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[8:11], off 1420; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[12:15], off 1421; MOVREL-NEXT: s_endpgm 1422entry: 1423 %insert = insertelement <8 x double> %vec, double %val, i32 %idx 1424 %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1> 1425 %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3> 1426 %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5> 1427 %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7> 1428 store volatile <2 x double> %vec.0, <2 x double> addrspace(1)* undef 1429 store volatile <2 x double> %vec.1, <2 x double> addrspace(1)* undef 1430 store volatile <2 x double> %vec.2, <2 x double> addrspace(1)* undef 1431 store volatile <2 x double> %vec.3, <2 x double> addrspace(1)* undef 1432 ret void 1433} 1434 1435define amdgpu_ps void @dyn_insertelement_v8f64_v_v_v(<8 x double> %vec, double %val, i32 %idx) { 1436; GPRIDX-LABEL: dyn_insertelement_v8f64_v_v_v: 1437; GPRIDX: ; %bb.0: ; %entry 1438; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v18 1439; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[0:1], 1, v18 1440; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[2:3], 2, v18 1441; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[4:5], 3, v18 1442; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[6:7], 4, v18 1443; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[8:9], 5, v18 1444; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[10:11], 7, v18 1445; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[12:13], 6, v18 1446; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v16, vcc 1447; GPRIDX-NEXT: v_cndmask_b32_e64 v2, v2, v16, s[0:1] 1448; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v17, vcc 1449; GPRIDX-NEXT: v_cndmask_b32_e64 v3, v3, v17, s[0:1] 1450; GPRIDX-NEXT: v_cndmask_b32_e64 v4, v4, v16, s[2:3] 1451; GPRIDX-NEXT: v_cndmask_b32_e64 v6, v6, v16, s[4:5] 1452; GPRIDX-NEXT: v_cndmask_b32_e64 v8, v8, v16, s[6:7] 1453; GPRIDX-NEXT: v_cndmask_b32_e64 v10, v10, v16, s[8:9] 1454; GPRIDX-NEXT: v_cndmask_b32_e64 v12, v12, v16, s[12:13] 1455; GPRIDX-NEXT: v_cndmask_b32_e64 v14, v14, v16, s[10:11] 1456; GPRIDX-NEXT: v_cndmask_b32_e64 v5, v5, v17, s[2:3] 1457; GPRIDX-NEXT: v_cndmask_b32_e64 v7, v7, v17, s[4:5] 1458; GPRIDX-NEXT: v_cndmask_b32_e64 v9, v9, v17, s[6:7] 1459; GPRIDX-NEXT: v_cndmask_b32_e64 v11, v11, v17, s[8:9] 1460; GPRIDX-NEXT: v_cndmask_b32_e64 v13, v13, v17, s[12:13] 1461; GPRIDX-NEXT: v_cndmask_b32_e64 v15, v15, v17, s[10:11] 1462; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 1463; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[4:7], off 1464; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[8:11], off 1465; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[12:15], off 1466; GPRIDX-NEXT: s_endpgm 1467; 1468; MOVREL-LABEL: dyn_insertelement_v8f64_v_v_v: 1469; MOVREL: ; %bb.0: ; %entry 1470; MOVREL-NEXT: v_cmp_eq_u32_e64 s0, 1, v18 1471; MOVREL-NEXT: v_mov_b32_e32 v19, v0 1472; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v18 1473; MOVREL-NEXT: v_mov_b32_e32 v23, v1 1474; MOVREL-NEXT: v_cmp_eq_u32_e64 s1, 2, v18 1475; MOVREL-NEXT: v_cmp_eq_u32_e64 s2, 3, v18 1476; MOVREL-NEXT: v_cmp_eq_u32_e64 s3, 4, v18 1477; MOVREL-NEXT: v_cmp_eq_u32_e64 s4, 5, v18 1478; MOVREL-NEXT: v_cmp_eq_u32_e64 s5, 7, v18 1479; MOVREL-NEXT: v_cmp_eq_u32_e64 s6, 6, v18 1480; MOVREL-NEXT: v_cndmask_b32_e32 v0, v19, v16, vcc_lo 1481; MOVREL-NEXT: v_cndmask_b32_e64 v2, v2, v16, s0 1482; MOVREL-NEXT: v_cndmask_b32_e32 v1, v23, v17, vcc_lo 1483; MOVREL-NEXT: v_cndmask_b32_e64 v3, v3, v17, s0 1484; MOVREL-NEXT: v_cndmask_b32_e64 v4, v4, v16, s1 1485; MOVREL-NEXT: v_cndmask_b32_e64 v6, v6, v16, s2 1486; MOVREL-NEXT: v_cndmask_b32_e64 v5, v5, v17, s1 1487; MOVREL-NEXT: v_cndmask_b32_e64 v7, v7, v17, s2 1488; MOVREL-NEXT: v_cndmask_b32_e64 v8, v8, v16, s3 1489; MOVREL-NEXT: v_cndmask_b32_e64 v10, v10, v16, s4 1490; MOVREL-NEXT: v_cndmask_b32_e64 v9, v9, v17, s3 1491; MOVREL-NEXT: v_cndmask_b32_e64 v11, v11, v17, s4 1492; MOVREL-NEXT: v_cndmask_b32_e64 v12, v12, v16, s6 1493; MOVREL-NEXT: v_cndmask_b32_e64 v14, v14, v16, s5 1494; MOVREL-NEXT: v_cndmask_b32_e64 v13, v13, v17, s6 1495; MOVREL-NEXT: v_cndmask_b32_e64 v15, v15, v17, s5 1496; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 1497; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[4:7], off 1498; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[8:11], off 1499; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[12:15], off 1500; MOVREL-NEXT: s_endpgm 1501entry: 1502 %insert = insertelement <8 x double> %vec, double %val, i32 %idx 1503 %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1> 1504 %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3> 1505 %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5> 1506 %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7> 1507 store volatile <2 x double> %vec.0, <2 x double> addrspace(1)* undef 1508 store volatile <2 x double> %vec.1, <2 x double> addrspace(1)* undef 1509 store volatile <2 x double> %vec.2, <2 x double> addrspace(1)* undef 1510 store volatile <2 x double> %vec.3, <2 x double> addrspace(1)* undef 1511 ret void 1512} 1513 1514define amdgpu_ps <3 x i32> @dyn_insertelement_v3i32_s_s_s(<3 x i32> inreg %vec, i32 inreg %val, i32 inreg %idx) { 1515; GPRIDX-LABEL: dyn_insertelement_v3i32_s_s_s: 1516; GPRIDX: ; %bb.0: ; %entry 1517; GPRIDX-NEXT: s_cmp_eq_u32 s6, 0 1518; GPRIDX-NEXT: s_cselect_b32 s0, s5, s2 1519; GPRIDX-NEXT: s_cmp_eq_u32 s6, 1 1520; GPRIDX-NEXT: s_cselect_b32 s1, s5, s3 1521; GPRIDX-NEXT: s_cmp_eq_u32 s6, 2 1522; GPRIDX-NEXT: s_cselect_b32 s2, s5, s4 1523; GPRIDX-NEXT: ; return to shader part epilog 1524; 1525; MOVREL-LABEL: dyn_insertelement_v3i32_s_s_s: 1526; MOVREL: ; %bb.0: ; %entry 1527; MOVREL-NEXT: s_cmp_eq_u32 s6, 0 1528; MOVREL-NEXT: s_cselect_b32 s0, s5, s2 1529; MOVREL-NEXT: s_cmp_eq_u32 s6, 1 1530; MOVREL-NEXT: s_cselect_b32 s1, s5, s3 1531; MOVREL-NEXT: s_cmp_eq_u32 s6, 2 1532; MOVREL-NEXT: s_cselect_b32 s2, s5, s4 1533; MOVREL-NEXT: ; return to shader part epilog 1534entry: 1535 %insert = insertelement <3 x i32> %vec, i32 %val, i32 %idx 1536 ret <3 x i32> %insert 1537} 1538 1539define amdgpu_ps <3 x float> @dyn_insertelement_v3i32_v_v_s(<3 x float> %vec, float %val, i32 inreg %idx) { 1540; GPRIDX-LABEL: dyn_insertelement_v3i32_v_v_s: 1541; GPRIDX: ; %bb.0: ; %entry 1542; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 0 1543; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 1544; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 1 1545; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 1546; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 2 1547; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 1548; GPRIDX-NEXT: ; return to shader part epilog 1549; 1550; MOVREL-LABEL: dyn_insertelement_v3i32_v_v_s: 1551; MOVREL: ; %bb.0: ; %entry 1552; MOVREL-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 0 1553; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo 1554; MOVREL-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 1 1555; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo 1556; MOVREL-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 2 1557; MOVREL-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo 1558; MOVREL-NEXT: ; return to shader part epilog 1559entry: 1560 %insert = insertelement <3 x float> %vec, float %val, i32 %idx 1561 ret <3 x float> %insert 1562} 1563 1564define amdgpu_ps <5 x i32> @dyn_insertelement_v5i32_s_s_s(<5 x i32> inreg %vec, i32 inreg %val, i32 inreg %idx) { 1565; GPRIDX-LABEL: dyn_insertelement_v5i32_s_s_s: 1566; GPRIDX: ; %bb.0: ; %entry 1567; GPRIDX-NEXT: s_cmp_eq_u32 s8, 0 1568; GPRIDX-NEXT: s_cselect_b32 s0, s7, s2 1569; GPRIDX-NEXT: s_cmp_eq_u32 s8, 1 1570; GPRIDX-NEXT: s_cselect_b32 s1, s7, s3 1571; GPRIDX-NEXT: s_cmp_eq_u32 s8, 2 1572; GPRIDX-NEXT: s_cselect_b32 s2, s7, s4 1573; GPRIDX-NEXT: s_cmp_eq_u32 s8, 3 1574; GPRIDX-NEXT: s_cselect_b32 s3, s7, s5 1575; GPRIDX-NEXT: s_cmp_eq_u32 s8, 4 1576; GPRIDX-NEXT: s_cselect_b32 s4, s7, s6 1577; GPRIDX-NEXT: ; return to shader part epilog 1578; 1579; MOVREL-LABEL: dyn_insertelement_v5i32_s_s_s: 1580; MOVREL: ; %bb.0: ; %entry 1581; MOVREL-NEXT: s_cmp_eq_u32 s8, 0 1582; MOVREL-NEXT: s_cselect_b32 s0, s7, s2 1583; MOVREL-NEXT: s_cmp_eq_u32 s8, 1 1584; MOVREL-NEXT: s_cselect_b32 s1, s7, s3 1585; MOVREL-NEXT: s_cmp_eq_u32 s8, 2 1586; MOVREL-NEXT: s_cselect_b32 s2, s7, s4 1587; MOVREL-NEXT: s_cmp_eq_u32 s8, 3 1588; MOVREL-NEXT: s_cselect_b32 s3, s7, s5 1589; MOVREL-NEXT: s_cmp_eq_u32 s8, 4 1590; MOVREL-NEXT: s_cselect_b32 s4, s7, s6 1591; MOVREL-NEXT: ; return to shader part epilog 1592entry: 1593 %insert = insertelement <5 x i32> %vec, i32 %val, i32 %idx 1594 ret <5 x i32> %insert 1595} 1596 1597define amdgpu_ps <5 x float> @dyn_insertelement_v5i32_v_v_s(<5 x float> %vec, float %val, i32 inreg %idx) { 1598; GPRIDX-LABEL: dyn_insertelement_v5i32_v_v_s: 1599; GPRIDX: ; %bb.0: ; %entry 1600; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 0 1601; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 1602; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 1 1603; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 1604; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 2 1605; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc 1606; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 3 1607; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc 1608; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 4 1609; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc 1610; GPRIDX-NEXT: ; return to shader part epilog 1611; 1612; MOVREL-LABEL: dyn_insertelement_v5i32_v_v_s: 1613; MOVREL: ; %bb.0: ; %entry 1614; MOVREL-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 0 1615; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo 1616; MOVREL-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 1 1617; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo 1618; MOVREL-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 2 1619; MOVREL-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc_lo 1620; MOVREL-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 3 1621; MOVREL-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc_lo 1622; MOVREL-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 4 1623; MOVREL-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc_lo 1624; MOVREL-NEXT: ; return to shader part epilog 1625entry: 1626 %insert = insertelement <5 x float> %vec, float %val, i32 %idx 1627 ret <5 x float> %insert 1628} 1629 1630define amdgpu_ps <32 x i32> @dyn_insertelement_v32i32_s_s_s(<32 x i32> inreg %vec, i32 inreg %val, i32 inreg %idx) { 1631; GPRIDX-LABEL: dyn_insertelement_v32i32_s_s_s: 1632; GPRIDX: ; %bb.0: ; %entry 1633; GPRIDX-NEXT: s_mov_b32 s0, s2 1634; GPRIDX-NEXT: s_mov_b32 s1, s3 1635; GPRIDX-NEXT: s_mov_b32 s2, s4 1636; GPRIDX-NEXT: s_mov_b32 s3, s5 1637; GPRIDX-NEXT: s_mov_b32 s4, s6 1638; GPRIDX-NEXT: s_mov_b32 s5, s7 1639; GPRIDX-NEXT: s_mov_b32 s6, s8 1640; GPRIDX-NEXT: s_mov_b32 s7, s9 1641; GPRIDX-NEXT: s_mov_b32 s8, s10 1642; GPRIDX-NEXT: s_mov_b32 s9, s11 1643; GPRIDX-NEXT: s_mov_b32 s10, s12 1644; GPRIDX-NEXT: s_mov_b32 s11, s13 1645; GPRIDX-NEXT: s_mov_b32 s12, s14 1646; GPRIDX-NEXT: s_mov_b32 s13, s15 1647; GPRIDX-NEXT: s_mov_b32 s14, s16 1648; GPRIDX-NEXT: s_mov_b32 s15, s17 1649; GPRIDX-NEXT: s_mov_b32 s16, s18 1650; GPRIDX-NEXT: s_mov_b32 s17, s19 1651; GPRIDX-NEXT: s_mov_b32 s18, s20 1652; GPRIDX-NEXT: s_mov_b32 s19, s21 1653; GPRIDX-NEXT: s_mov_b32 s20, s22 1654; GPRIDX-NEXT: s_mov_b32 s21, s23 1655; GPRIDX-NEXT: s_mov_b32 s22, s24 1656; GPRIDX-NEXT: s_mov_b32 s23, s25 1657; GPRIDX-NEXT: s_mov_b32 s24, s26 1658; GPRIDX-NEXT: s_mov_b32 s25, s27 1659; GPRIDX-NEXT: s_mov_b32 s26, s28 1660; GPRIDX-NEXT: s_mov_b32 s27, s29 1661; GPRIDX-NEXT: s_mov_b32 s28, s30 1662; GPRIDX-NEXT: s_mov_b32 s29, s31 1663; GPRIDX-NEXT: s_mov_b32 s31, s33 1664; GPRIDX-NEXT: s_mov_b32 s30, s32 1665; GPRIDX-NEXT: s_mov_b32 m0, s35 1666; GPRIDX-NEXT: s_nop 0 1667; GPRIDX-NEXT: s_movreld_b32 s0, s34 1668; GPRIDX-NEXT: ; return to shader part epilog 1669; 1670; MOVREL-LABEL: dyn_insertelement_v32i32_s_s_s: 1671; MOVREL: ; %bb.0: ; %entry 1672; MOVREL-NEXT: s_mov_b32 s0, s2 1673; MOVREL-NEXT: s_mov_b32 m0, s35 1674; MOVREL-NEXT: s_mov_b32 s1, s3 1675; MOVREL-NEXT: s_mov_b32 s2, s4 1676; MOVREL-NEXT: s_mov_b32 s3, s5 1677; MOVREL-NEXT: s_mov_b32 s4, s6 1678; MOVREL-NEXT: s_mov_b32 s5, s7 1679; MOVREL-NEXT: s_mov_b32 s6, s8 1680; MOVREL-NEXT: s_mov_b32 s7, s9 1681; MOVREL-NEXT: s_mov_b32 s8, s10 1682; MOVREL-NEXT: s_mov_b32 s9, s11 1683; MOVREL-NEXT: s_mov_b32 s10, s12 1684; MOVREL-NEXT: s_mov_b32 s11, s13 1685; MOVREL-NEXT: s_mov_b32 s12, s14 1686; MOVREL-NEXT: s_mov_b32 s13, s15 1687; MOVREL-NEXT: s_mov_b32 s14, s16 1688; MOVREL-NEXT: s_mov_b32 s15, s17 1689; MOVREL-NEXT: s_mov_b32 s16, s18 1690; MOVREL-NEXT: s_mov_b32 s17, s19 1691; MOVREL-NEXT: s_mov_b32 s18, s20 1692; MOVREL-NEXT: s_mov_b32 s19, s21 1693; MOVREL-NEXT: s_mov_b32 s20, s22 1694; MOVREL-NEXT: s_mov_b32 s21, s23 1695; MOVREL-NEXT: s_mov_b32 s22, s24 1696; MOVREL-NEXT: s_mov_b32 s23, s25 1697; MOVREL-NEXT: s_mov_b32 s24, s26 1698; MOVREL-NEXT: s_mov_b32 s25, s27 1699; MOVREL-NEXT: s_mov_b32 s26, s28 1700; MOVREL-NEXT: s_mov_b32 s27, s29 1701; MOVREL-NEXT: s_mov_b32 s28, s30 1702; MOVREL-NEXT: s_mov_b32 s29, s31 1703; MOVREL-NEXT: s_mov_b32 s31, s33 1704; MOVREL-NEXT: s_mov_b32 s30, s32 1705; MOVREL-NEXT: s_movreld_b32 s0, s34 1706; MOVREL-NEXT: ; return to shader part epilog 1707entry: 1708 %insert = insertelement <32 x i32> %vec, i32 %val, i32 %idx 1709 ret <32 x i32> %insert 1710} 1711 1712define amdgpu_ps <32 x float> @dyn_insertelement_v32i32_v_v_s(<32 x float> %vec, float %val, i32 inreg %idx) { 1713; GPRIDX-LABEL: dyn_insertelement_v32i32_v_v_s: 1714; GPRIDX: ; %bb.0: ; %entry 1715; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(DST) 1716; GPRIDX-NEXT: v_mov_b32_e32 v0, v32 1717; GPRIDX-NEXT: s_set_gpr_idx_off 1718; GPRIDX-NEXT: ; return to shader part epilog 1719; 1720; MOVREL-LABEL: dyn_insertelement_v32i32_v_v_s: 1721; MOVREL: ; %bb.0: ; %entry 1722; MOVREL-NEXT: s_mov_b32 m0, s2 1723; MOVREL-NEXT: v_movreld_b32_e32 v0, v32 1724; MOVREL-NEXT: ; return to shader part epilog 1725entry: 1726 %insert = insertelement <32 x float> %vec, float %val, i32 %idx 1727 ret <32 x float> %insert 1728} 1729 1730define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_s_s_s_add_1(<8 x float> inreg %vec, float inreg %val, i32 inreg %idx) { 1731; GPRIDX-LABEL: dyn_insertelement_v8f32_s_s_s_add_1: 1732; GPRIDX: ; %bb.0: ; %entry 1733; GPRIDX-NEXT: s_add_i32 s11, s11, 1 1734; GPRIDX-NEXT: s_cmp_eq_u32 s11, 0 1735; GPRIDX-NEXT: s_cselect_b32 s0, s10, s2 1736; GPRIDX-NEXT: s_cmp_eq_u32 s11, 1 1737; GPRIDX-NEXT: s_cselect_b32 s1, s10, s3 1738; GPRIDX-NEXT: s_cmp_eq_u32 s11, 2 1739; GPRIDX-NEXT: s_cselect_b32 s2, s10, s4 1740; GPRIDX-NEXT: s_cmp_eq_u32 s11, 3 1741; GPRIDX-NEXT: s_cselect_b32 s3, s10, s5 1742; GPRIDX-NEXT: s_cmp_eq_u32 s11, 4 1743; GPRIDX-NEXT: s_cselect_b32 s4, s10, s6 1744; GPRIDX-NEXT: s_cmp_eq_u32 s11, 5 1745; GPRIDX-NEXT: s_cselect_b32 s5, s10, s7 1746; GPRIDX-NEXT: s_cmp_eq_u32 s11, 6 1747; GPRIDX-NEXT: s_cselect_b32 s6, s10, s8 1748; GPRIDX-NEXT: s_cmp_eq_u32 s11, 7 1749; GPRIDX-NEXT: s_cselect_b32 s7, s10, s9 1750; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 1751; GPRIDX-NEXT: v_mov_b32_e32 v1, s1 1752; GPRIDX-NEXT: v_mov_b32_e32 v2, s2 1753; GPRIDX-NEXT: v_mov_b32_e32 v3, s3 1754; GPRIDX-NEXT: v_mov_b32_e32 v4, s4 1755; GPRIDX-NEXT: v_mov_b32_e32 v5, s5 1756; GPRIDX-NEXT: v_mov_b32_e32 v6, s6 1757; GPRIDX-NEXT: v_mov_b32_e32 v7, s7 1758; GPRIDX-NEXT: ; return to shader part epilog 1759; 1760; MOVREL-LABEL: dyn_insertelement_v8f32_s_s_s_add_1: 1761; MOVREL: ; %bb.0: ; %entry 1762; MOVREL-NEXT: s_add_i32 s11, s11, 1 1763; MOVREL-NEXT: s_cmp_eq_u32 s11, 0 1764; MOVREL-NEXT: s_cselect_b32 s0, s10, s2 1765; MOVREL-NEXT: s_cmp_eq_u32 s11, 1 1766; MOVREL-NEXT: v_mov_b32_e32 v0, s0 1767; MOVREL-NEXT: s_cselect_b32 s1, s10, s3 1768; MOVREL-NEXT: s_cmp_eq_u32 s11, 2 1769; MOVREL-NEXT: v_mov_b32_e32 v1, s1 1770; MOVREL-NEXT: s_cselect_b32 s2, s10, s4 1771; MOVREL-NEXT: s_cmp_eq_u32 s11, 3 1772; MOVREL-NEXT: v_mov_b32_e32 v2, s2 1773; MOVREL-NEXT: s_cselect_b32 s3, s10, s5 1774; MOVREL-NEXT: s_cmp_eq_u32 s11, 4 1775; MOVREL-NEXT: v_mov_b32_e32 v3, s3 1776; MOVREL-NEXT: s_cselect_b32 s4, s10, s6 1777; MOVREL-NEXT: s_cmp_eq_u32 s11, 5 1778; MOVREL-NEXT: v_mov_b32_e32 v4, s4 1779; MOVREL-NEXT: s_cselect_b32 s5, s10, s7 1780; MOVREL-NEXT: s_cmp_eq_u32 s11, 6 1781; MOVREL-NEXT: v_mov_b32_e32 v5, s5 1782; MOVREL-NEXT: s_cselect_b32 s6, s10, s8 1783; MOVREL-NEXT: s_cmp_eq_u32 s11, 7 1784; MOVREL-NEXT: v_mov_b32_e32 v6, s6 1785; MOVREL-NEXT: s_cselect_b32 s7, s10, s9 1786; MOVREL-NEXT: v_mov_b32_e32 v7, s7 1787; MOVREL-NEXT: ; return to shader part epilog 1788entry: 1789 %idx.add = add i32 %idx, 1 1790 %insert = insertelement <8 x float> %vec, float %val, i32 %idx.add 1791 ret <8 x float> %insert 1792} 1793 1794define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_s_s_s_add_7(<8 x float> inreg %vec, float inreg %val, i32 inreg %idx) { 1795; GPRIDX-LABEL: dyn_insertelement_v8f32_s_s_s_add_7: 1796; GPRIDX: ; %bb.0: ; %entry 1797; GPRIDX-NEXT: s_add_i32 s11, s11, 7 1798; GPRIDX-NEXT: s_cmp_eq_u32 s11, 0 1799; GPRIDX-NEXT: s_cselect_b32 s0, s10, s2 1800; GPRIDX-NEXT: s_cmp_eq_u32 s11, 1 1801; GPRIDX-NEXT: s_cselect_b32 s1, s10, s3 1802; GPRIDX-NEXT: s_cmp_eq_u32 s11, 2 1803; GPRIDX-NEXT: s_cselect_b32 s2, s10, s4 1804; GPRIDX-NEXT: s_cmp_eq_u32 s11, 3 1805; GPRIDX-NEXT: s_cselect_b32 s3, s10, s5 1806; GPRIDX-NEXT: s_cmp_eq_u32 s11, 4 1807; GPRIDX-NEXT: s_cselect_b32 s4, s10, s6 1808; GPRIDX-NEXT: s_cmp_eq_u32 s11, 5 1809; GPRIDX-NEXT: s_cselect_b32 s5, s10, s7 1810; GPRIDX-NEXT: s_cmp_eq_u32 s11, 6 1811; GPRIDX-NEXT: s_cselect_b32 s6, s10, s8 1812; GPRIDX-NEXT: s_cmp_eq_u32 s11, 7 1813; GPRIDX-NEXT: s_cselect_b32 s7, s10, s9 1814; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 1815; GPRIDX-NEXT: v_mov_b32_e32 v1, s1 1816; GPRIDX-NEXT: v_mov_b32_e32 v2, s2 1817; GPRIDX-NEXT: v_mov_b32_e32 v3, s3 1818; GPRIDX-NEXT: v_mov_b32_e32 v4, s4 1819; GPRIDX-NEXT: v_mov_b32_e32 v5, s5 1820; GPRIDX-NEXT: v_mov_b32_e32 v6, s6 1821; GPRIDX-NEXT: v_mov_b32_e32 v7, s7 1822; GPRIDX-NEXT: ; return to shader part epilog 1823; 1824; MOVREL-LABEL: dyn_insertelement_v8f32_s_s_s_add_7: 1825; MOVREL: ; %bb.0: ; %entry 1826; MOVREL-NEXT: s_add_i32 s11, s11, 7 1827; MOVREL-NEXT: s_cmp_eq_u32 s11, 0 1828; MOVREL-NEXT: s_cselect_b32 s0, s10, s2 1829; MOVREL-NEXT: s_cmp_eq_u32 s11, 1 1830; MOVREL-NEXT: v_mov_b32_e32 v0, s0 1831; MOVREL-NEXT: s_cselect_b32 s1, s10, s3 1832; MOVREL-NEXT: s_cmp_eq_u32 s11, 2 1833; MOVREL-NEXT: v_mov_b32_e32 v1, s1 1834; MOVREL-NEXT: s_cselect_b32 s2, s10, s4 1835; MOVREL-NEXT: s_cmp_eq_u32 s11, 3 1836; MOVREL-NEXT: v_mov_b32_e32 v2, s2 1837; MOVREL-NEXT: s_cselect_b32 s3, s10, s5 1838; MOVREL-NEXT: s_cmp_eq_u32 s11, 4 1839; MOVREL-NEXT: v_mov_b32_e32 v3, s3 1840; MOVREL-NEXT: s_cselect_b32 s4, s10, s6 1841; MOVREL-NEXT: s_cmp_eq_u32 s11, 5 1842; MOVREL-NEXT: v_mov_b32_e32 v4, s4 1843; MOVREL-NEXT: s_cselect_b32 s5, s10, s7 1844; MOVREL-NEXT: s_cmp_eq_u32 s11, 6 1845; MOVREL-NEXT: v_mov_b32_e32 v5, s5 1846; MOVREL-NEXT: s_cselect_b32 s6, s10, s8 1847; MOVREL-NEXT: s_cmp_eq_u32 s11, 7 1848; MOVREL-NEXT: v_mov_b32_e32 v6, s6 1849; MOVREL-NEXT: s_cselect_b32 s7, s10, s9 1850; MOVREL-NEXT: v_mov_b32_e32 v7, s7 1851; MOVREL-NEXT: ; return to shader part epilog 1852entry: 1853 %idx.add = add i32 %idx, 7 1854 %insert = insertelement <8 x float> %vec, float %val, i32 %idx.add 1855 ret <8 x float> %insert 1856} 1857 1858define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_v_v_v_add_1(<8 x float> %vec, float %val, i32 %idx) { 1859; GPRIDX-LABEL: dyn_insertelement_v8f32_v_v_v_add_1: 1860; GPRIDX: ; %bb.0: ; %entry 1861; GPRIDX-NEXT: v_add_u32_e32 v9, 1, v9 1862; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v9 1863; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc 1864; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v9 1865; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v8, vcc 1866; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v9 1867; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc 1868; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v9 1869; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v8, vcc 1870; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v9 1871; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc 1872; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v9 1873; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v5, v8, vcc 1874; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v9 1875; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v6, v8, vcc 1876; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v9 1877; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v7, v8, vcc 1878; GPRIDX-NEXT: ; return to shader part epilog 1879; 1880; MOVREL-LABEL: dyn_insertelement_v8f32_v_v_v_add_1: 1881; MOVREL: ; %bb.0: ; %entry 1882; MOVREL-NEXT: v_add_nc_u32_e32 v9, 1, v9 1883; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v9 1884; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo 1885; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v9 1886; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v8, vcc_lo 1887; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v9 1888; MOVREL-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc_lo 1889; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v9 1890; MOVREL-NEXT: v_cndmask_b32_e32 v3, v3, v8, vcc_lo 1891; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v9 1892; MOVREL-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc_lo 1893; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v9 1894; MOVREL-NEXT: v_cndmask_b32_e32 v5, v5, v8, vcc_lo 1895; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v9 1896; MOVREL-NEXT: v_cndmask_b32_e32 v6, v6, v8, vcc_lo 1897; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v9 1898; MOVREL-NEXT: v_cndmask_b32_e32 v7, v7, v8, vcc_lo 1899; MOVREL-NEXT: ; return to shader part epilog 1900entry: 1901 %idx.add = add i32 %idx, 1 1902 %insert = insertelement <8 x float> %vec, float %val, i32 %idx.add 1903 ret <8 x float> %insert 1904} 1905 1906define amdgpu_ps <8 x float> @dyn_insertelement_v8f32_v_v_v_add_7(<8 x float> %vec, float %val, i32 %idx) { 1907; GPRIDX-LABEL: dyn_insertelement_v8f32_v_v_v_add_7: 1908; GPRIDX: ; %bb.0: ; %entry 1909; GPRIDX-NEXT: v_add_u32_e32 v9, 7, v9 1910; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v9 1911; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc 1912; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v9 1913; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v8, vcc 1914; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v9 1915; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc 1916; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v9 1917; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v8, vcc 1918; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v9 1919; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc 1920; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v9 1921; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v5, v8, vcc 1922; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v9 1923; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v6, v8, vcc 1924; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 7, v9 1925; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v7, v8, vcc 1926; GPRIDX-NEXT: ; return to shader part epilog 1927; 1928; MOVREL-LABEL: dyn_insertelement_v8f32_v_v_v_add_7: 1929; MOVREL: ; %bb.0: ; %entry 1930; MOVREL-NEXT: v_add_nc_u32_e32 v9, 7, v9 1931; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v9 1932; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo 1933; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v9 1934; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v8, vcc_lo 1935; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v9 1936; MOVREL-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc_lo 1937; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v9 1938; MOVREL-NEXT: v_cndmask_b32_e32 v3, v3, v8, vcc_lo 1939; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v9 1940; MOVREL-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc_lo 1941; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v9 1942; MOVREL-NEXT: v_cndmask_b32_e32 v5, v5, v8, vcc_lo 1943; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v9 1944; MOVREL-NEXT: v_cndmask_b32_e32 v6, v6, v8, vcc_lo 1945; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v9 1946; MOVREL-NEXT: v_cndmask_b32_e32 v7, v7, v8, vcc_lo 1947; MOVREL-NEXT: ; return to shader part epilog 1948entry: 1949 %idx.add = add i32 %idx, 7 1950 %insert = insertelement <8 x float> %vec, float %val, i32 %idx.add 1951 ret <8 x float> %insert 1952} 1953 1954define amdgpu_ps void @dyn_insertelement_v8f64_s_s_s_add_1(<8 x double> inreg %vec, double inreg %val, i32 inreg %idx) { 1955; GPRIDX-LABEL: dyn_insertelement_v8f64_s_s_s_add_1: 1956; GPRIDX: ; %bb.0: ; %entry 1957; GPRIDX-NEXT: s_mov_b32 s0, s2 1958; GPRIDX-NEXT: s_mov_b32 s1, s3 1959; GPRIDX-NEXT: s_mov_b32 s2, s4 1960; GPRIDX-NEXT: s_mov_b32 s3, s5 1961; GPRIDX-NEXT: s_mov_b32 s4, s6 1962; GPRIDX-NEXT: s_mov_b32 s5, s7 1963; GPRIDX-NEXT: s_mov_b32 s6, s8 1964; GPRIDX-NEXT: s_mov_b32 s7, s9 1965; GPRIDX-NEXT: s_mov_b32 s8, s10 1966; GPRIDX-NEXT: s_mov_b32 s9, s11 1967; GPRIDX-NEXT: s_mov_b32 s10, s12 1968; GPRIDX-NEXT: s_mov_b32 s11, s13 1969; GPRIDX-NEXT: s_mov_b32 s12, s14 1970; GPRIDX-NEXT: s_mov_b32 s13, s15 1971; GPRIDX-NEXT: s_mov_b32 s14, s16 1972; GPRIDX-NEXT: s_mov_b32 s15, s17 1973; GPRIDX-NEXT: s_mov_b32 m0, s20 1974; GPRIDX-NEXT: s_nop 0 1975; GPRIDX-NEXT: s_movreld_b64 s[2:3], s[18:19] 1976; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 1977; GPRIDX-NEXT: v_mov_b32_e32 v1, s1 1978; GPRIDX-NEXT: v_mov_b32_e32 v2, s2 1979; GPRIDX-NEXT: v_mov_b32_e32 v3, s3 1980; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 1981; GPRIDX-NEXT: s_nop 0 1982; GPRIDX-NEXT: v_mov_b32_e32 v0, s4 1983; GPRIDX-NEXT: v_mov_b32_e32 v1, s5 1984; GPRIDX-NEXT: v_mov_b32_e32 v2, s6 1985; GPRIDX-NEXT: v_mov_b32_e32 v3, s7 1986; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 1987; GPRIDX-NEXT: s_nop 0 1988; GPRIDX-NEXT: v_mov_b32_e32 v0, s8 1989; GPRIDX-NEXT: v_mov_b32_e32 v1, s9 1990; GPRIDX-NEXT: v_mov_b32_e32 v2, s10 1991; GPRIDX-NEXT: v_mov_b32_e32 v3, s11 1992; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 1993; GPRIDX-NEXT: s_nop 0 1994; GPRIDX-NEXT: v_mov_b32_e32 v0, s12 1995; GPRIDX-NEXT: v_mov_b32_e32 v1, s13 1996; GPRIDX-NEXT: v_mov_b32_e32 v2, s14 1997; GPRIDX-NEXT: v_mov_b32_e32 v3, s15 1998; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 1999; GPRIDX-NEXT: s_endpgm 2000; 2001; MOVREL-LABEL: dyn_insertelement_v8f64_s_s_s_add_1: 2002; MOVREL: ; %bb.0: ; %entry 2003; MOVREL-NEXT: s_mov_b32 s0, s2 2004; MOVREL-NEXT: s_mov_b32 s1, s3 2005; MOVREL-NEXT: s_mov_b32 s2, s4 2006; MOVREL-NEXT: s_mov_b32 s3, s5 2007; MOVREL-NEXT: s_mov_b32 m0, s20 2008; MOVREL-NEXT: s_mov_b32 s4, s6 2009; MOVREL-NEXT: s_mov_b32 s5, s7 2010; MOVREL-NEXT: s_mov_b32 s6, s8 2011; MOVREL-NEXT: s_mov_b32 s7, s9 2012; MOVREL-NEXT: s_mov_b32 s8, s10 2013; MOVREL-NEXT: s_mov_b32 s9, s11 2014; MOVREL-NEXT: s_mov_b32 s10, s12 2015; MOVREL-NEXT: s_mov_b32 s11, s13 2016; MOVREL-NEXT: s_mov_b32 s12, s14 2017; MOVREL-NEXT: s_mov_b32 s13, s15 2018; MOVREL-NEXT: s_mov_b32 s14, s16 2019; MOVREL-NEXT: s_mov_b32 s15, s17 2020; MOVREL-NEXT: s_movreld_b64 s[2:3], s[18:19] 2021; MOVREL-NEXT: v_mov_b32_e32 v0, s0 2022; MOVREL-NEXT: v_mov_b32_e32 v4, s4 2023; MOVREL-NEXT: v_mov_b32_e32 v1, s1 2024; MOVREL-NEXT: v_mov_b32_e32 v2, s2 2025; MOVREL-NEXT: v_mov_b32_e32 v3, s3 2026; MOVREL-NEXT: v_mov_b32_e32 v8, s8 2027; MOVREL-NEXT: v_mov_b32_e32 v5, s5 2028; MOVREL-NEXT: v_mov_b32_e32 v6, s6 2029; MOVREL-NEXT: v_mov_b32_e32 v7, s7 2030; MOVREL-NEXT: v_mov_b32_e32 v12, s12 2031; MOVREL-NEXT: v_mov_b32_e32 v9, s9 2032; MOVREL-NEXT: v_mov_b32_e32 v10, s10 2033; MOVREL-NEXT: v_mov_b32_e32 v11, s11 2034; MOVREL-NEXT: v_mov_b32_e32 v13, s13 2035; MOVREL-NEXT: v_mov_b32_e32 v14, s14 2036; MOVREL-NEXT: v_mov_b32_e32 v15, s15 2037; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 2038; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[4:7], off 2039; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[8:11], off 2040; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[12:15], off 2041; MOVREL-NEXT: s_endpgm 2042entry: 2043 %idx.add = add i32 %idx, 1 2044 %insert = insertelement <8 x double> %vec, double %val, i32 %idx.add 2045 %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1> 2046 %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3> 2047 %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5> 2048 %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7> 2049 store volatile <2 x double> %vec.0, <2 x double> addrspace(1)* undef 2050 store volatile <2 x double> %vec.1, <2 x double> addrspace(1)* undef 2051 store volatile <2 x double> %vec.2, <2 x double> addrspace(1)* undef 2052 store volatile <2 x double> %vec.3, <2 x double> addrspace(1)* undef 2053 ret void 2054} 2055 2056define amdgpu_ps void @dyn_insertelement_v8f64_v_v_v_add_1(<8 x double> %vec, double %val, i32 %idx) { 2057; GPRIDX-LABEL: dyn_insertelement_v8f64_v_v_v_add_1: 2058; GPRIDX: ; %bb.0: ; %entry 2059; GPRIDX-NEXT: v_add_u32_e32 v18, 1, v18 2060; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v18 2061; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[0:1], 1, v18 2062; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[2:3], 2, v18 2063; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[4:5], 3, v18 2064; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[6:7], 4, v18 2065; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[8:9], 5, v18 2066; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[10:11], 7, v18 2067; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[12:13], 6, v18 2068; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v16, vcc 2069; GPRIDX-NEXT: v_cndmask_b32_e64 v2, v2, v16, s[0:1] 2070; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v17, vcc 2071; GPRIDX-NEXT: v_cndmask_b32_e64 v3, v3, v17, s[0:1] 2072; GPRIDX-NEXT: v_cndmask_b32_e64 v4, v4, v16, s[2:3] 2073; GPRIDX-NEXT: v_cndmask_b32_e64 v6, v6, v16, s[4:5] 2074; GPRIDX-NEXT: v_cndmask_b32_e64 v8, v8, v16, s[6:7] 2075; GPRIDX-NEXT: v_cndmask_b32_e64 v10, v10, v16, s[8:9] 2076; GPRIDX-NEXT: v_cndmask_b32_e64 v12, v12, v16, s[12:13] 2077; GPRIDX-NEXT: v_cndmask_b32_e64 v14, v14, v16, s[10:11] 2078; GPRIDX-NEXT: v_cndmask_b32_e64 v5, v5, v17, s[2:3] 2079; GPRIDX-NEXT: v_cndmask_b32_e64 v7, v7, v17, s[4:5] 2080; GPRIDX-NEXT: v_cndmask_b32_e64 v9, v9, v17, s[6:7] 2081; GPRIDX-NEXT: v_cndmask_b32_e64 v11, v11, v17, s[8:9] 2082; GPRIDX-NEXT: v_cndmask_b32_e64 v13, v13, v17, s[12:13] 2083; GPRIDX-NEXT: v_cndmask_b32_e64 v15, v15, v17, s[10:11] 2084; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 2085; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[4:7], off 2086; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[8:11], off 2087; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[12:15], off 2088; GPRIDX-NEXT: s_endpgm 2089; 2090; MOVREL-LABEL: dyn_insertelement_v8f64_v_v_v_add_1: 2091; MOVREL: ; %bb.0: ; %entry 2092; MOVREL-NEXT: v_add_nc_u32_e32 v18, 1, v18 2093; MOVREL-NEXT: v_mov_b32_e32 v19, v0 2094; MOVREL-NEXT: v_mov_b32_e32 v23, v1 2095; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v18 2096; MOVREL-NEXT: v_cmp_eq_u32_e64 s0, 1, v18 2097; MOVREL-NEXT: v_cmp_eq_u32_e64 s1, 2, v18 2098; MOVREL-NEXT: v_cmp_eq_u32_e64 s2, 3, v18 2099; MOVREL-NEXT: v_cmp_eq_u32_e64 s3, 4, v18 2100; MOVREL-NEXT: v_cmp_eq_u32_e64 s4, 5, v18 2101; MOVREL-NEXT: v_cmp_eq_u32_e64 s5, 7, v18 2102; MOVREL-NEXT: v_cmp_eq_u32_e64 s6, 6, v18 2103; MOVREL-NEXT: v_cndmask_b32_e32 v0, v19, v16, vcc_lo 2104; MOVREL-NEXT: v_cndmask_b32_e64 v2, v2, v16, s0 2105; MOVREL-NEXT: v_cndmask_b32_e32 v1, v23, v17, vcc_lo 2106; MOVREL-NEXT: v_cndmask_b32_e64 v3, v3, v17, s0 2107; MOVREL-NEXT: v_cndmask_b32_e64 v4, v4, v16, s1 2108; MOVREL-NEXT: v_cndmask_b32_e64 v6, v6, v16, s2 2109; MOVREL-NEXT: v_cndmask_b32_e64 v5, v5, v17, s1 2110; MOVREL-NEXT: v_cndmask_b32_e64 v7, v7, v17, s2 2111; MOVREL-NEXT: v_cndmask_b32_e64 v8, v8, v16, s3 2112; MOVREL-NEXT: v_cndmask_b32_e64 v10, v10, v16, s4 2113; MOVREL-NEXT: v_cndmask_b32_e64 v9, v9, v17, s3 2114; MOVREL-NEXT: v_cndmask_b32_e64 v11, v11, v17, s4 2115; MOVREL-NEXT: v_cndmask_b32_e64 v12, v12, v16, s6 2116; MOVREL-NEXT: v_cndmask_b32_e64 v14, v14, v16, s5 2117; MOVREL-NEXT: v_cndmask_b32_e64 v13, v13, v17, s6 2118; MOVREL-NEXT: v_cndmask_b32_e64 v15, v15, v17, s5 2119; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 2120; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[4:7], off 2121; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[8:11], off 2122; MOVREL-NEXT: global_store_dwordx4 v[0:1], v[12:15], off 2123; MOVREL-NEXT: s_endpgm 2124entry: 2125 %idx.add = add i32 %idx, 1 2126 %insert = insertelement <8 x double> %vec, double %val, i32 %idx.add 2127 %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1> 2128 %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3> 2129 %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5> 2130 %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7> 2131 store volatile <2 x double> %vec.0, <2 x double> addrspace(1)* undef 2132 store volatile <2 x double> %vec.1, <2 x double> addrspace(1)* undef 2133 store volatile <2 x double> %vec.2, <2 x double> addrspace(1)* undef 2134 store volatile <2 x double> %vec.3, <2 x double> addrspace(1)* undef 2135 ret void 2136} 2137 2138define amdgpu_ps <16 x i32> @dyn_insertelement_v16i32_s_s_s(<16 x i32> inreg %vec, i32 inreg %val, i32 inreg %idx) { 2139; GPRIDX-LABEL: dyn_insertelement_v16i32_s_s_s: 2140; GPRIDX: ; %bb.0: ; %entry 2141; GPRIDX-NEXT: s_mov_b32 s0, s2 2142; GPRIDX-NEXT: s_mov_b32 s1, s3 2143; GPRIDX-NEXT: s_mov_b32 s2, s4 2144; GPRIDX-NEXT: s_mov_b32 s3, s5 2145; GPRIDX-NEXT: s_mov_b32 s4, s6 2146; GPRIDX-NEXT: s_mov_b32 s5, s7 2147; GPRIDX-NEXT: s_mov_b32 s6, s8 2148; GPRIDX-NEXT: s_mov_b32 s7, s9 2149; GPRIDX-NEXT: s_mov_b32 s8, s10 2150; GPRIDX-NEXT: s_mov_b32 s9, s11 2151; GPRIDX-NEXT: s_mov_b32 s10, s12 2152; GPRIDX-NEXT: s_mov_b32 s11, s13 2153; GPRIDX-NEXT: s_mov_b32 s12, s14 2154; GPRIDX-NEXT: s_mov_b32 s13, s15 2155; GPRIDX-NEXT: s_mov_b32 s14, s16 2156; GPRIDX-NEXT: s_mov_b32 s15, s17 2157; GPRIDX-NEXT: s_mov_b32 m0, s19 2158; GPRIDX-NEXT: s_nop 0 2159; GPRIDX-NEXT: s_movreld_b32 s0, s18 2160; GPRIDX-NEXT: ; return to shader part epilog 2161; 2162; MOVREL-LABEL: dyn_insertelement_v16i32_s_s_s: 2163; MOVREL: ; %bb.0: ; %entry 2164; MOVREL-NEXT: s_mov_b32 s0, s2 2165; MOVREL-NEXT: s_mov_b32 m0, s19 2166; MOVREL-NEXT: s_mov_b32 s1, s3 2167; MOVREL-NEXT: s_mov_b32 s2, s4 2168; MOVREL-NEXT: s_mov_b32 s3, s5 2169; MOVREL-NEXT: s_mov_b32 s4, s6 2170; MOVREL-NEXT: s_mov_b32 s5, s7 2171; MOVREL-NEXT: s_mov_b32 s6, s8 2172; MOVREL-NEXT: s_mov_b32 s7, s9 2173; MOVREL-NEXT: s_mov_b32 s8, s10 2174; MOVREL-NEXT: s_mov_b32 s9, s11 2175; MOVREL-NEXT: s_mov_b32 s10, s12 2176; MOVREL-NEXT: s_mov_b32 s11, s13 2177; MOVREL-NEXT: s_mov_b32 s12, s14 2178; MOVREL-NEXT: s_mov_b32 s13, s15 2179; MOVREL-NEXT: s_mov_b32 s14, s16 2180; MOVREL-NEXT: s_mov_b32 s15, s17 2181; MOVREL-NEXT: s_movreld_b32 s0, s18 2182; MOVREL-NEXT: ; return to shader part epilog 2183entry: 2184 %insert = insertelement <16 x i32> %vec, i32 %val, i32 %idx 2185 ret <16 x i32> %insert 2186} 2187 2188define amdgpu_ps <16 x float> @dyn_insertelement_v16f32_s_s_s(<16 x float> inreg %vec, float inreg %val, i32 inreg %idx) { 2189; GPRIDX-LABEL: dyn_insertelement_v16f32_s_s_s: 2190; GPRIDX: ; %bb.0: ; %entry 2191; GPRIDX-NEXT: s_mov_b32 s0, s2 2192; GPRIDX-NEXT: s_mov_b32 s1, s3 2193; GPRIDX-NEXT: s_mov_b32 s2, s4 2194; GPRIDX-NEXT: s_mov_b32 s3, s5 2195; GPRIDX-NEXT: s_mov_b32 s4, s6 2196; GPRIDX-NEXT: s_mov_b32 s5, s7 2197; GPRIDX-NEXT: s_mov_b32 s6, s8 2198; GPRIDX-NEXT: s_mov_b32 s7, s9 2199; GPRIDX-NEXT: s_mov_b32 s8, s10 2200; GPRIDX-NEXT: s_mov_b32 s9, s11 2201; GPRIDX-NEXT: s_mov_b32 s10, s12 2202; GPRIDX-NEXT: s_mov_b32 s11, s13 2203; GPRIDX-NEXT: s_mov_b32 s12, s14 2204; GPRIDX-NEXT: s_mov_b32 s13, s15 2205; GPRIDX-NEXT: s_mov_b32 s14, s16 2206; GPRIDX-NEXT: s_mov_b32 s15, s17 2207; GPRIDX-NEXT: s_mov_b32 m0, s19 2208; GPRIDX-NEXT: s_nop 0 2209; GPRIDX-NEXT: s_movreld_b32 s0, s18 2210; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 2211; GPRIDX-NEXT: v_mov_b32_e32 v1, s1 2212; GPRIDX-NEXT: v_mov_b32_e32 v2, s2 2213; GPRIDX-NEXT: v_mov_b32_e32 v3, s3 2214; GPRIDX-NEXT: v_mov_b32_e32 v4, s4 2215; GPRIDX-NEXT: v_mov_b32_e32 v5, s5 2216; GPRIDX-NEXT: v_mov_b32_e32 v6, s6 2217; GPRIDX-NEXT: v_mov_b32_e32 v7, s7 2218; GPRIDX-NEXT: v_mov_b32_e32 v8, s8 2219; GPRIDX-NEXT: v_mov_b32_e32 v9, s9 2220; GPRIDX-NEXT: v_mov_b32_e32 v10, s10 2221; GPRIDX-NEXT: v_mov_b32_e32 v11, s11 2222; GPRIDX-NEXT: v_mov_b32_e32 v12, s12 2223; GPRIDX-NEXT: v_mov_b32_e32 v13, s13 2224; GPRIDX-NEXT: v_mov_b32_e32 v14, s14 2225; GPRIDX-NEXT: v_mov_b32_e32 v15, s15 2226; GPRIDX-NEXT: ; return to shader part epilog 2227; 2228; MOVREL-LABEL: dyn_insertelement_v16f32_s_s_s: 2229; MOVREL: ; %bb.0: ; %entry 2230; MOVREL-NEXT: s_mov_b32 s0, s2 2231; MOVREL-NEXT: s_mov_b32 m0, s19 2232; MOVREL-NEXT: s_mov_b32 s1, s3 2233; MOVREL-NEXT: s_mov_b32 s2, s4 2234; MOVREL-NEXT: s_mov_b32 s3, s5 2235; MOVREL-NEXT: s_mov_b32 s4, s6 2236; MOVREL-NEXT: s_mov_b32 s5, s7 2237; MOVREL-NEXT: s_mov_b32 s6, s8 2238; MOVREL-NEXT: s_mov_b32 s7, s9 2239; MOVREL-NEXT: s_mov_b32 s8, s10 2240; MOVREL-NEXT: s_mov_b32 s9, s11 2241; MOVREL-NEXT: s_mov_b32 s10, s12 2242; MOVREL-NEXT: s_mov_b32 s11, s13 2243; MOVREL-NEXT: s_mov_b32 s12, s14 2244; MOVREL-NEXT: s_mov_b32 s13, s15 2245; MOVREL-NEXT: s_mov_b32 s14, s16 2246; MOVREL-NEXT: s_mov_b32 s15, s17 2247; MOVREL-NEXT: s_movreld_b32 s0, s18 2248; MOVREL-NEXT: v_mov_b32_e32 v0, s0 2249; MOVREL-NEXT: v_mov_b32_e32 v1, s1 2250; MOVREL-NEXT: v_mov_b32_e32 v2, s2 2251; MOVREL-NEXT: v_mov_b32_e32 v3, s3 2252; MOVREL-NEXT: v_mov_b32_e32 v4, s4 2253; MOVREL-NEXT: v_mov_b32_e32 v5, s5 2254; MOVREL-NEXT: v_mov_b32_e32 v6, s6 2255; MOVREL-NEXT: v_mov_b32_e32 v7, s7 2256; MOVREL-NEXT: v_mov_b32_e32 v8, s8 2257; MOVREL-NEXT: v_mov_b32_e32 v9, s9 2258; MOVREL-NEXT: v_mov_b32_e32 v10, s10 2259; MOVREL-NEXT: v_mov_b32_e32 v11, s11 2260; MOVREL-NEXT: v_mov_b32_e32 v12, s12 2261; MOVREL-NEXT: v_mov_b32_e32 v13, s13 2262; MOVREL-NEXT: v_mov_b32_e32 v14, s14 2263; MOVREL-NEXT: v_mov_b32_e32 v15, s15 2264; MOVREL-NEXT: ; return to shader part epilog 2265entry: 2266 %insert = insertelement <16 x float> %vec, float %val, i32 %idx 2267 ret <16 x float> %insert 2268} 2269 2270define amdgpu_ps <32 x float> @dyn_insertelement_v32f32_s_s_s(<32 x float> inreg %vec, float inreg %val, i32 inreg %idx) { 2271; GPRIDX-LABEL: dyn_insertelement_v32f32_s_s_s: 2272; GPRIDX: ; %bb.0: ; %entry 2273; GPRIDX-NEXT: s_mov_b32 s0, s2 2274; GPRIDX-NEXT: s_mov_b32 s1, s3 2275; GPRIDX-NEXT: s_mov_b32 s2, s4 2276; GPRIDX-NEXT: s_mov_b32 s3, s5 2277; GPRIDX-NEXT: s_mov_b32 s4, s6 2278; GPRIDX-NEXT: s_mov_b32 s5, s7 2279; GPRIDX-NEXT: s_mov_b32 s6, s8 2280; GPRIDX-NEXT: s_mov_b32 s7, s9 2281; GPRIDX-NEXT: s_mov_b32 s8, s10 2282; GPRIDX-NEXT: s_mov_b32 s9, s11 2283; GPRIDX-NEXT: s_mov_b32 s10, s12 2284; GPRIDX-NEXT: s_mov_b32 s11, s13 2285; GPRIDX-NEXT: s_mov_b32 s12, s14 2286; GPRIDX-NEXT: s_mov_b32 s13, s15 2287; GPRIDX-NEXT: s_mov_b32 s14, s16 2288; GPRIDX-NEXT: s_mov_b32 s15, s17 2289; GPRIDX-NEXT: s_mov_b32 s16, s18 2290; GPRIDX-NEXT: s_mov_b32 s17, s19 2291; GPRIDX-NEXT: s_mov_b32 s18, s20 2292; GPRIDX-NEXT: s_mov_b32 s19, s21 2293; GPRIDX-NEXT: s_mov_b32 s20, s22 2294; GPRIDX-NEXT: s_mov_b32 s21, s23 2295; GPRIDX-NEXT: s_mov_b32 s22, s24 2296; GPRIDX-NEXT: s_mov_b32 s23, s25 2297; GPRIDX-NEXT: s_mov_b32 s24, s26 2298; GPRIDX-NEXT: s_mov_b32 s25, s27 2299; GPRIDX-NEXT: s_mov_b32 s26, s28 2300; GPRIDX-NEXT: s_mov_b32 s27, s29 2301; GPRIDX-NEXT: s_mov_b32 s28, s30 2302; GPRIDX-NEXT: s_mov_b32 s29, s31 2303; GPRIDX-NEXT: s_mov_b32 s31, s33 2304; GPRIDX-NEXT: s_mov_b32 s30, s32 2305; GPRIDX-NEXT: s_mov_b32 m0, s35 2306; GPRIDX-NEXT: s_nop 0 2307; GPRIDX-NEXT: s_movreld_b32 s0, s34 2308; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 2309; GPRIDX-NEXT: v_mov_b32_e32 v1, s1 2310; GPRIDX-NEXT: v_mov_b32_e32 v2, s2 2311; GPRIDX-NEXT: v_mov_b32_e32 v3, s3 2312; GPRIDX-NEXT: v_mov_b32_e32 v4, s4 2313; GPRIDX-NEXT: v_mov_b32_e32 v5, s5 2314; GPRIDX-NEXT: v_mov_b32_e32 v6, s6 2315; GPRIDX-NEXT: v_mov_b32_e32 v7, s7 2316; GPRIDX-NEXT: v_mov_b32_e32 v8, s8 2317; GPRIDX-NEXT: v_mov_b32_e32 v9, s9 2318; GPRIDX-NEXT: v_mov_b32_e32 v10, s10 2319; GPRIDX-NEXT: v_mov_b32_e32 v11, s11 2320; GPRIDX-NEXT: v_mov_b32_e32 v12, s12 2321; GPRIDX-NEXT: v_mov_b32_e32 v13, s13 2322; GPRIDX-NEXT: v_mov_b32_e32 v14, s14 2323; GPRIDX-NEXT: v_mov_b32_e32 v15, s15 2324; GPRIDX-NEXT: v_mov_b32_e32 v16, s16 2325; GPRIDX-NEXT: v_mov_b32_e32 v17, s17 2326; GPRIDX-NEXT: v_mov_b32_e32 v18, s18 2327; GPRIDX-NEXT: v_mov_b32_e32 v19, s19 2328; GPRIDX-NEXT: v_mov_b32_e32 v20, s20 2329; GPRIDX-NEXT: v_mov_b32_e32 v21, s21 2330; GPRIDX-NEXT: v_mov_b32_e32 v22, s22 2331; GPRIDX-NEXT: v_mov_b32_e32 v23, s23 2332; GPRIDX-NEXT: v_mov_b32_e32 v24, s24 2333; GPRIDX-NEXT: v_mov_b32_e32 v25, s25 2334; GPRIDX-NEXT: v_mov_b32_e32 v26, s26 2335; GPRIDX-NEXT: v_mov_b32_e32 v27, s27 2336; GPRIDX-NEXT: v_mov_b32_e32 v28, s28 2337; GPRIDX-NEXT: v_mov_b32_e32 v29, s29 2338; GPRIDX-NEXT: v_mov_b32_e32 v30, s30 2339; GPRIDX-NEXT: v_mov_b32_e32 v31, s31 2340; GPRIDX-NEXT: ; return to shader part epilog 2341; 2342; MOVREL-LABEL: dyn_insertelement_v32f32_s_s_s: 2343; MOVREL: ; %bb.0: ; %entry 2344; MOVREL-NEXT: s_mov_b32 s0, s2 2345; MOVREL-NEXT: s_mov_b32 m0, s35 2346; MOVREL-NEXT: s_mov_b32 s1, s3 2347; MOVREL-NEXT: s_mov_b32 s2, s4 2348; MOVREL-NEXT: s_mov_b32 s3, s5 2349; MOVREL-NEXT: s_mov_b32 s4, s6 2350; MOVREL-NEXT: s_mov_b32 s5, s7 2351; MOVREL-NEXT: s_mov_b32 s6, s8 2352; MOVREL-NEXT: s_mov_b32 s7, s9 2353; MOVREL-NEXT: s_mov_b32 s8, s10 2354; MOVREL-NEXT: s_mov_b32 s9, s11 2355; MOVREL-NEXT: s_mov_b32 s10, s12 2356; MOVREL-NEXT: s_mov_b32 s11, s13 2357; MOVREL-NEXT: s_mov_b32 s12, s14 2358; MOVREL-NEXT: s_mov_b32 s13, s15 2359; MOVREL-NEXT: s_mov_b32 s14, s16 2360; MOVREL-NEXT: s_mov_b32 s15, s17 2361; MOVREL-NEXT: s_mov_b32 s16, s18 2362; MOVREL-NEXT: s_mov_b32 s17, s19 2363; MOVREL-NEXT: s_mov_b32 s18, s20 2364; MOVREL-NEXT: s_mov_b32 s19, s21 2365; MOVREL-NEXT: s_mov_b32 s20, s22 2366; MOVREL-NEXT: s_mov_b32 s21, s23 2367; MOVREL-NEXT: s_mov_b32 s22, s24 2368; MOVREL-NEXT: s_mov_b32 s23, s25 2369; MOVREL-NEXT: s_mov_b32 s24, s26 2370; MOVREL-NEXT: s_mov_b32 s25, s27 2371; MOVREL-NEXT: s_mov_b32 s26, s28 2372; MOVREL-NEXT: s_mov_b32 s27, s29 2373; MOVREL-NEXT: s_mov_b32 s28, s30 2374; MOVREL-NEXT: s_mov_b32 s29, s31 2375; MOVREL-NEXT: s_mov_b32 s31, s33 2376; MOVREL-NEXT: s_mov_b32 s30, s32 2377; MOVREL-NEXT: s_movreld_b32 s0, s34 2378; MOVREL-NEXT: v_mov_b32_e32 v0, s0 2379; MOVREL-NEXT: v_mov_b32_e32 v1, s1 2380; MOVREL-NEXT: v_mov_b32_e32 v2, s2 2381; MOVREL-NEXT: v_mov_b32_e32 v3, s3 2382; MOVREL-NEXT: v_mov_b32_e32 v4, s4 2383; MOVREL-NEXT: v_mov_b32_e32 v5, s5 2384; MOVREL-NEXT: v_mov_b32_e32 v6, s6 2385; MOVREL-NEXT: v_mov_b32_e32 v7, s7 2386; MOVREL-NEXT: v_mov_b32_e32 v8, s8 2387; MOVREL-NEXT: v_mov_b32_e32 v9, s9 2388; MOVREL-NEXT: v_mov_b32_e32 v10, s10 2389; MOVREL-NEXT: v_mov_b32_e32 v11, s11 2390; MOVREL-NEXT: v_mov_b32_e32 v12, s12 2391; MOVREL-NEXT: v_mov_b32_e32 v13, s13 2392; MOVREL-NEXT: v_mov_b32_e32 v14, s14 2393; MOVREL-NEXT: v_mov_b32_e32 v15, s15 2394; MOVREL-NEXT: v_mov_b32_e32 v16, s16 2395; MOVREL-NEXT: v_mov_b32_e32 v17, s17 2396; MOVREL-NEXT: v_mov_b32_e32 v18, s18 2397; MOVREL-NEXT: v_mov_b32_e32 v19, s19 2398; MOVREL-NEXT: v_mov_b32_e32 v20, s20 2399; MOVREL-NEXT: v_mov_b32_e32 v21, s21 2400; MOVREL-NEXT: v_mov_b32_e32 v22, s22 2401; MOVREL-NEXT: v_mov_b32_e32 v23, s23 2402; MOVREL-NEXT: v_mov_b32_e32 v24, s24 2403; MOVREL-NEXT: v_mov_b32_e32 v25, s25 2404; MOVREL-NEXT: v_mov_b32_e32 v26, s26 2405; MOVREL-NEXT: v_mov_b32_e32 v27, s27 2406; MOVREL-NEXT: v_mov_b32_e32 v28, s28 2407; MOVREL-NEXT: v_mov_b32_e32 v29, s29 2408; MOVREL-NEXT: v_mov_b32_e32 v30, s30 2409; MOVREL-NEXT: v_mov_b32_e32 v31, s31 2410; MOVREL-NEXT: ; return to shader part epilog 2411entry: 2412 %insert = insertelement <32 x float> %vec, float %val, i32 %idx 2413 ret <32 x float> %insert 2414} 2415 2416define amdgpu_ps <16 x i64> @dyn_insertelement_v16i64_s_s_s(<16 x i64> inreg %vec, i64 inreg %val, i32 inreg %idx) { 2417; GPRIDX-LABEL: dyn_insertelement_v16i64_s_s_s: 2418; GPRIDX: ; %bb.0: ; %entry 2419; GPRIDX-NEXT: s_mov_b32 s0, s2 2420; GPRIDX-NEXT: s_mov_b32 s1, s3 2421; GPRIDX-NEXT: s_mov_b32 s2, s4 2422; GPRIDX-NEXT: s_mov_b32 s3, s5 2423; GPRIDX-NEXT: s_mov_b32 s4, s6 2424; GPRIDX-NEXT: s_mov_b32 s5, s7 2425; GPRIDX-NEXT: s_mov_b32 s6, s8 2426; GPRIDX-NEXT: s_mov_b32 s7, s9 2427; GPRIDX-NEXT: s_mov_b32 s8, s10 2428; GPRIDX-NEXT: s_mov_b32 s9, s11 2429; GPRIDX-NEXT: s_mov_b32 s10, s12 2430; GPRIDX-NEXT: s_mov_b32 s11, s13 2431; GPRIDX-NEXT: s_mov_b32 s12, s14 2432; GPRIDX-NEXT: s_mov_b32 s13, s15 2433; GPRIDX-NEXT: s_mov_b32 s14, s16 2434; GPRIDX-NEXT: s_mov_b32 s15, s17 2435; GPRIDX-NEXT: s_mov_b32 s16, s18 2436; GPRIDX-NEXT: s_mov_b32 s17, s19 2437; GPRIDX-NEXT: s_mov_b32 s18, s20 2438; GPRIDX-NEXT: s_mov_b32 s19, s21 2439; GPRIDX-NEXT: s_mov_b32 s20, s22 2440; GPRIDX-NEXT: s_mov_b32 s21, s23 2441; GPRIDX-NEXT: s_mov_b32 s22, s24 2442; GPRIDX-NEXT: s_mov_b32 s23, s25 2443; GPRIDX-NEXT: s_mov_b32 s24, s26 2444; GPRIDX-NEXT: s_mov_b32 s25, s27 2445; GPRIDX-NEXT: s_mov_b32 s26, s28 2446; GPRIDX-NEXT: s_mov_b32 s27, s29 2447; GPRIDX-NEXT: s_mov_b32 s28, s30 2448; GPRIDX-NEXT: s_mov_b32 s29, s31 2449; GPRIDX-NEXT: s_mov_b32 s31, s33 2450; GPRIDX-NEXT: s_mov_b32 s30, s32 2451; GPRIDX-NEXT: s_mov_b32 m0, s36 2452; GPRIDX-NEXT: s_nop 0 2453; GPRIDX-NEXT: s_movreld_b64 s[0:1], s[34:35] 2454; GPRIDX-NEXT: ; return to shader part epilog 2455; 2456; MOVREL-LABEL: dyn_insertelement_v16i64_s_s_s: 2457; MOVREL: ; %bb.0: ; %entry 2458; MOVREL-NEXT: s_mov_b32 s0, s2 2459; MOVREL-NEXT: s_mov_b32 s1, s3 2460; MOVREL-NEXT: s_mov_b32 m0, s36 2461; MOVREL-NEXT: s_mov_b32 s2, s4 2462; MOVREL-NEXT: s_mov_b32 s3, s5 2463; MOVREL-NEXT: s_mov_b32 s4, s6 2464; MOVREL-NEXT: s_mov_b32 s5, s7 2465; MOVREL-NEXT: s_mov_b32 s6, s8 2466; MOVREL-NEXT: s_mov_b32 s7, s9 2467; MOVREL-NEXT: s_mov_b32 s8, s10 2468; MOVREL-NEXT: s_mov_b32 s9, s11 2469; MOVREL-NEXT: s_mov_b32 s10, s12 2470; MOVREL-NEXT: s_mov_b32 s11, s13 2471; MOVREL-NEXT: s_mov_b32 s12, s14 2472; MOVREL-NEXT: s_mov_b32 s13, s15 2473; MOVREL-NEXT: s_mov_b32 s14, s16 2474; MOVREL-NEXT: s_mov_b32 s15, s17 2475; MOVREL-NEXT: s_mov_b32 s16, s18 2476; MOVREL-NEXT: s_mov_b32 s17, s19 2477; MOVREL-NEXT: s_mov_b32 s18, s20 2478; MOVREL-NEXT: s_mov_b32 s19, s21 2479; MOVREL-NEXT: s_mov_b32 s20, s22 2480; MOVREL-NEXT: s_mov_b32 s21, s23 2481; MOVREL-NEXT: s_mov_b32 s22, s24 2482; MOVREL-NEXT: s_mov_b32 s23, s25 2483; MOVREL-NEXT: s_mov_b32 s24, s26 2484; MOVREL-NEXT: s_mov_b32 s25, s27 2485; MOVREL-NEXT: s_mov_b32 s26, s28 2486; MOVREL-NEXT: s_mov_b32 s27, s29 2487; MOVREL-NEXT: s_mov_b32 s28, s30 2488; MOVREL-NEXT: s_mov_b32 s29, s31 2489; MOVREL-NEXT: s_mov_b32 s31, s33 2490; MOVREL-NEXT: s_mov_b32 s30, s32 2491; MOVREL-NEXT: s_movreld_b64 s[0:1], s[34:35] 2492; MOVREL-NEXT: ; return to shader part epilog 2493entry: 2494 %insert = insertelement <16 x i64> %vec, i64 %val, i32 %idx 2495 ret <16 x i64> %insert 2496} 2497 2498define amdgpu_ps <16 x double> @dyn_insertelement_v16f64_s_s_s(<16 x double> inreg %vec, double inreg %val, i32 inreg %idx) { 2499; GPRIDX-LABEL: dyn_insertelement_v16f64_s_s_s: 2500; GPRIDX: ; %bb.0: ; %entry 2501; GPRIDX-NEXT: s_mov_b32 s0, s2 2502; GPRIDX-NEXT: s_mov_b32 s1, s3 2503; GPRIDX-NEXT: s_mov_b32 s2, s4 2504; GPRIDX-NEXT: s_mov_b32 s3, s5 2505; GPRIDX-NEXT: s_mov_b32 s4, s6 2506; GPRIDX-NEXT: s_mov_b32 s5, s7 2507; GPRIDX-NEXT: s_mov_b32 s6, s8 2508; GPRIDX-NEXT: s_mov_b32 s7, s9 2509; GPRIDX-NEXT: s_mov_b32 s8, s10 2510; GPRIDX-NEXT: s_mov_b32 s9, s11 2511; GPRIDX-NEXT: s_mov_b32 s10, s12 2512; GPRIDX-NEXT: s_mov_b32 s11, s13 2513; GPRIDX-NEXT: s_mov_b32 s12, s14 2514; GPRIDX-NEXT: s_mov_b32 s13, s15 2515; GPRIDX-NEXT: s_mov_b32 s14, s16 2516; GPRIDX-NEXT: s_mov_b32 s15, s17 2517; GPRIDX-NEXT: s_mov_b32 s16, s18 2518; GPRIDX-NEXT: s_mov_b32 s17, s19 2519; GPRIDX-NEXT: s_mov_b32 s18, s20 2520; GPRIDX-NEXT: s_mov_b32 s19, s21 2521; GPRIDX-NEXT: s_mov_b32 s20, s22 2522; GPRIDX-NEXT: s_mov_b32 s21, s23 2523; GPRIDX-NEXT: s_mov_b32 s22, s24 2524; GPRIDX-NEXT: s_mov_b32 s23, s25 2525; GPRIDX-NEXT: s_mov_b32 s24, s26 2526; GPRIDX-NEXT: s_mov_b32 s25, s27 2527; GPRIDX-NEXT: s_mov_b32 s26, s28 2528; GPRIDX-NEXT: s_mov_b32 s27, s29 2529; GPRIDX-NEXT: s_mov_b32 s28, s30 2530; GPRIDX-NEXT: s_mov_b32 s29, s31 2531; GPRIDX-NEXT: s_mov_b32 s31, s33 2532; GPRIDX-NEXT: s_mov_b32 s30, s32 2533; GPRIDX-NEXT: s_mov_b32 m0, s36 2534; GPRIDX-NEXT: s_nop 0 2535; GPRIDX-NEXT: s_movreld_b64 s[0:1], s[34:35] 2536; GPRIDX-NEXT: ; return to shader part epilog 2537; 2538; MOVREL-LABEL: dyn_insertelement_v16f64_s_s_s: 2539; MOVREL: ; %bb.0: ; %entry 2540; MOVREL-NEXT: s_mov_b32 s0, s2 2541; MOVREL-NEXT: s_mov_b32 s1, s3 2542; MOVREL-NEXT: s_mov_b32 m0, s36 2543; MOVREL-NEXT: s_mov_b32 s2, s4 2544; MOVREL-NEXT: s_mov_b32 s3, s5 2545; MOVREL-NEXT: s_mov_b32 s4, s6 2546; MOVREL-NEXT: s_mov_b32 s5, s7 2547; MOVREL-NEXT: s_mov_b32 s6, s8 2548; MOVREL-NEXT: s_mov_b32 s7, s9 2549; MOVREL-NEXT: s_mov_b32 s8, s10 2550; MOVREL-NEXT: s_mov_b32 s9, s11 2551; MOVREL-NEXT: s_mov_b32 s10, s12 2552; MOVREL-NEXT: s_mov_b32 s11, s13 2553; MOVREL-NEXT: s_mov_b32 s12, s14 2554; MOVREL-NEXT: s_mov_b32 s13, s15 2555; MOVREL-NEXT: s_mov_b32 s14, s16 2556; MOVREL-NEXT: s_mov_b32 s15, s17 2557; MOVREL-NEXT: s_mov_b32 s16, s18 2558; MOVREL-NEXT: s_mov_b32 s17, s19 2559; MOVREL-NEXT: s_mov_b32 s18, s20 2560; MOVREL-NEXT: s_mov_b32 s19, s21 2561; MOVREL-NEXT: s_mov_b32 s20, s22 2562; MOVREL-NEXT: s_mov_b32 s21, s23 2563; MOVREL-NEXT: s_mov_b32 s22, s24 2564; MOVREL-NEXT: s_mov_b32 s23, s25 2565; MOVREL-NEXT: s_mov_b32 s24, s26 2566; MOVREL-NEXT: s_mov_b32 s25, s27 2567; MOVREL-NEXT: s_mov_b32 s26, s28 2568; MOVREL-NEXT: s_mov_b32 s27, s29 2569; MOVREL-NEXT: s_mov_b32 s28, s30 2570; MOVREL-NEXT: s_mov_b32 s29, s31 2571; MOVREL-NEXT: s_mov_b32 s31, s33 2572; MOVREL-NEXT: s_mov_b32 s30, s32 2573; MOVREL-NEXT: s_movreld_b64 s[0:1], s[34:35] 2574; MOVREL-NEXT: ; return to shader part epilog 2575entry: 2576 %insert = insertelement <16 x double> %vec, double %val, i32 %idx 2577 ret <16 x double> %insert 2578} 2579 2580define amdgpu_ps <16 x i32> @dyn_insertelement_v16i32_s_v_s(<16 x i32> inreg %vec, i32 %val, i32 inreg %idx) { 2581; GPRIDX-LABEL: dyn_insertelement_v16i32_s_v_s: 2582; GPRIDX: ; %bb.0: ; %entry 2583; GPRIDX-NEXT: s_mov_b32 s1, s3 2584; GPRIDX-NEXT: s_mov_b32 s3, s5 2585; GPRIDX-NEXT: s_mov_b32 s5, s7 2586; GPRIDX-NEXT: s_mov_b32 s7, s9 2587; GPRIDX-NEXT: s_mov_b32 s9, s11 2588; GPRIDX-NEXT: s_mov_b32 s11, s13 2589; GPRIDX-NEXT: s_mov_b32 s13, s15 2590; GPRIDX-NEXT: s_mov_b32 s15, s17 2591; GPRIDX-NEXT: s_mov_b32 s0, s2 2592; GPRIDX-NEXT: s_mov_b32 s2, s4 2593; GPRIDX-NEXT: s_mov_b32 s4, s6 2594; GPRIDX-NEXT: s_mov_b32 s6, s8 2595; GPRIDX-NEXT: s_mov_b32 s8, s10 2596; GPRIDX-NEXT: s_mov_b32 s10, s12 2597; GPRIDX-NEXT: s_mov_b32 s12, s14 2598; GPRIDX-NEXT: s_mov_b32 s14, s16 2599; GPRIDX-NEXT: v_mov_b32_e32 v16, s15 2600; GPRIDX-NEXT: v_mov_b32_e32 v15, s14 2601; GPRIDX-NEXT: v_mov_b32_e32 v14, s13 2602; GPRIDX-NEXT: v_mov_b32_e32 v13, s12 2603; GPRIDX-NEXT: v_mov_b32_e32 v12, s11 2604; GPRIDX-NEXT: v_mov_b32_e32 v11, s10 2605; GPRIDX-NEXT: v_mov_b32_e32 v10, s9 2606; GPRIDX-NEXT: v_mov_b32_e32 v9, s8 2607; GPRIDX-NEXT: v_mov_b32_e32 v8, s7 2608; GPRIDX-NEXT: v_mov_b32_e32 v7, s6 2609; GPRIDX-NEXT: v_mov_b32_e32 v6, s5 2610; GPRIDX-NEXT: v_mov_b32_e32 v5, s4 2611; GPRIDX-NEXT: v_mov_b32_e32 v4, s3 2612; GPRIDX-NEXT: v_mov_b32_e32 v3, s2 2613; GPRIDX-NEXT: v_mov_b32_e32 v2, s1 2614; GPRIDX-NEXT: v_mov_b32_e32 v1, s0 2615; GPRIDX-NEXT: s_set_gpr_idx_on s18, gpr_idx(DST) 2616; GPRIDX-NEXT: v_mov_b32_e32 v1, v0 2617; GPRIDX-NEXT: s_set_gpr_idx_off 2618; GPRIDX-NEXT: v_readfirstlane_b32 s0, v1 2619; GPRIDX-NEXT: v_readfirstlane_b32 s1, v2 2620; GPRIDX-NEXT: v_readfirstlane_b32 s2, v3 2621; GPRIDX-NEXT: v_readfirstlane_b32 s3, v4 2622; GPRIDX-NEXT: v_readfirstlane_b32 s4, v5 2623; GPRIDX-NEXT: v_readfirstlane_b32 s5, v6 2624; GPRIDX-NEXT: v_readfirstlane_b32 s6, v7 2625; GPRIDX-NEXT: v_readfirstlane_b32 s7, v8 2626; GPRIDX-NEXT: v_readfirstlane_b32 s8, v9 2627; GPRIDX-NEXT: v_readfirstlane_b32 s9, v10 2628; GPRIDX-NEXT: v_readfirstlane_b32 s10, v11 2629; GPRIDX-NEXT: v_readfirstlane_b32 s11, v12 2630; GPRIDX-NEXT: v_readfirstlane_b32 s12, v13 2631; GPRIDX-NEXT: v_readfirstlane_b32 s13, v14 2632; GPRIDX-NEXT: v_readfirstlane_b32 s14, v15 2633; GPRIDX-NEXT: v_readfirstlane_b32 s15, v16 2634; GPRIDX-NEXT: ; return to shader part epilog 2635; 2636; MOVREL-LABEL: dyn_insertelement_v16i32_s_v_s: 2637; MOVREL: ; %bb.0: ; %entry 2638; MOVREL-NEXT: s_mov_b32 s1, s3 2639; MOVREL-NEXT: s_mov_b32 s3, s5 2640; MOVREL-NEXT: s_mov_b32 s5, s7 2641; MOVREL-NEXT: s_mov_b32 s7, s9 2642; MOVREL-NEXT: s_mov_b32 s9, s11 2643; MOVREL-NEXT: s_mov_b32 s11, s13 2644; MOVREL-NEXT: s_mov_b32 s13, s15 2645; MOVREL-NEXT: s_mov_b32 s15, s17 2646; MOVREL-NEXT: s_mov_b32 s0, s2 2647; MOVREL-NEXT: s_mov_b32 s2, s4 2648; MOVREL-NEXT: s_mov_b32 s4, s6 2649; MOVREL-NEXT: s_mov_b32 s6, s8 2650; MOVREL-NEXT: s_mov_b32 s8, s10 2651; MOVREL-NEXT: s_mov_b32 s10, s12 2652; MOVREL-NEXT: s_mov_b32 s12, s14 2653; MOVREL-NEXT: s_mov_b32 s14, s16 2654; MOVREL-NEXT: v_mov_b32_e32 v16, s15 2655; MOVREL-NEXT: v_mov_b32_e32 v1, s0 2656; MOVREL-NEXT: s_mov_b32 m0, s18 2657; MOVREL-NEXT: v_mov_b32_e32 v15, s14 2658; MOVREL-NEXT: v_mov_b32_e32 v14, s13 2659; MOVREL-NEXT: v_mov_b32_e32 v13, s12 2660; MOVREL-NEXT: v_mov_b32_e32 v12, s11 2661; MOVREL-NEXT: v_mov_b32_e32 v11, s10 2662; MOVREL-NEXT: v_mov_b32_e32 v10, s9 2663; MOVREL-NEXT: v_mov_b32_e32 v9, s8 2664; MOVREL-NEXT: v_mov_b32_e32 v8, s7 2665; MOVREL-NEXT: v_mov_b32_e32 v7, s6 2666; MOVREL-NEXT: v_mov_b32_e32 v6, s5 2667; MOVREL-NEXT: v_mov_b32_e32 v5, s4 2668; MOVREL-NEXT: v_mov_b32_e32 v4, s3 2669; MOVREL-NEXT: v_mov_b32_e32 v3, s2 2670; MOVREL-NEXT: v_mov_b32_e32 v2, s1 2671; MOVREL-NEXT: v_movreld_b32_e32 v1, v0 2672; MOVREL-NEXT: v_readfirstlane_b32 s0, v1 2673; MOVREL-NEXT: v_readfirstlane_b32 s1, v2 2674; MOVREL-NEXT: v_readfirstlane_b32 s2, v3 2675; MOVREL-NEXT: v_readfirstlane_b32 s3, v4 2676; MOVREL-NEXT: v_readfirstlane_b32 s4, v5 2677; MOVREL-NEXT: v_readfirstlane_b32 s5, v6 2678; MOVREL-NEXT: v_readfirstlane_b32 s6, v7 2679; MOVREL-NEXT: v_readfirstlane_b32 s7, v8 2680; MOVREL-NEXT: v_readfirstlane_b32 s8, v9 2681; MOVREL-NEXT: v_readfirstlane_b32 s9, v10 2682; MOVREL-NEXT: v_readfirstlane_b32 s10, v11 2683; MOVREL-NEXT: v_readfirstlane_b32 s11, v12 2684; MOVREL-NEXT: v_readfirstlane_b32 s12, v13 2685; MOVREL-NEXT: v_readfirstlane_b32 s13, v14 2686; MOVREL-NEXT: v_readfirstlane_b32 s14, v15 2687; MOVREL-NEXT: v_readfirstlane_b32 s15, v16 2688; MOVREL-NEXT: ; return to shader part epilog 2689entry: 2690 %insert = insertelement <16 x i32> %vec, i32 %val, i32 %idx 2691 ret <16 x i32> %insert 2692} 2693 2694define amdgpu_ps <16 x float> @dyn_insertelement_v16f32_s_v_s(<16 x float> inreg %vec, float %val, i32 inreg %idx) { 2695; GPRIDX-LABEL: dyn_insertelement_v16f32_s_v_s: 2696; GPRIDX: ; %bb.0: ; %entry 2697; GPRIDX-NEXT: s_mov_b32 s0, s2 2698; GPRIDX-NEXT: s_mov_b32 s1, s3 2699; GPRIDX-NEXT: s_mov_b32 s2, s4 2700; GPRIDX-NEXT: s_mov_b32 s3, s5 2701; GPRIDX-NEXT: s_mov_b32 s4, s6 2702; GPRIDX-NEXT: s_mov_b32 s5, s7 2703; GPRIDX-NEXT: s_mov_b32 s6, s8 2704; GPRIDX-NEXT: s_mov_b32 s7, s9 2705; GPRIDX-NEXT: s_mov_b32 s8, s10 2706; GPRIDX-NEXT: s_mov_b32 s9, s11 2707; GPRIDX-NEXT: s_mov_b32 s10, s12 2708; GPRIDX-NEXT: s_mov_b32 s11, s13 2709; GPRIDX-NEXT: s_mov_b32 s12, s14 2710; GPRIDX-NEXT: s_mov_b32 s13, s15 2711; GPRIDX-NEXT: s_mov_b32 s14, s16 2712; GPRIDX-NEXT: s_mov_b32 s15, s17 2713; GPRIDX-NEXT: v_mov_b32_e32 v16, v0 2714; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 2715; GPRIDX-NEXT: v_mov_b32_e32 v1, s1 2716; GPRIDX-NEXT: v_mov_b32_e32 v2, s2 2717; GPRIDX-NEXT: v_mov_b32_e32 v3, s3 2718; GPRIDX-NEXT: v_mov_b32_e32 v4, s4 2719; GPRIDX-NEXT: v_mov_b32_e32 v5, s5 2720; GPRIDX-NEXT: v_mov_b32_e32 v6, s6 2721; GPRIDX-NEXT: v_mov_b32_e32 v7, s7 2722; GPRIDX-NEXT: v_mov_b32_e32 v8, s8 2723; GPRIDX-NEXT: v_mov_b32_e32 v9, s9 2724; GPRIDX-NEXT: v_mov_b32_e32 v10, s10 2725; GPRIDX-NEXT: v_mov_b32_e32 v11, s11 2726; GPRIDX-NEXT: v_mov_b32_e32 v12, s12 2727; GPRIDX-NEXT: v_mov_b32_e32 v13, s13 2728; GPRIDX-NEXT: v_mov_b32_e32 v14, s14 2729; GPRIDX-NEXT: v_mov_b32_e32 v15, s15 2730; GPRIDX-NEXT: s_set_gpr_idx_on s18, gpr_idx(DST) 2731; GPRIDX-NEXT: v_mov_b32_e32 v0, v16 2732; GPRIDX-NEXT: s_set_gpr_idx_off 2733; GPRIDX-NEXT: ; return to shader part epilog 2734; 2735; MOVREL-LABEL: dyn_insertelement_v16f32_s_v_s: 2736; MOVREL: ; %bb.0: ; %entry 2737; MOVREL-NEXT: s_mov_b32 s0, s2 2738; MOVREL-NEXT: s_mov_b32 s1, s3 2739; MOVREL-NEXT: s_mov_b32 s2, s4 2740; MOVREL-NEXT: s_mov_b32 s3, s5 2741; MOVREL-NEXT: s_mov_b32 s4, s6 2742; MOVREL-NEXT: s_mov_b32 s5, s7 2743; MOVREL-NEXT: s_mov_b32 s6, s8 2744; MOVREL-NEXT: s_mov_b32 s7, s9 2745; MOVREL-NEXT: s_mov_b32 s8, s10 2746; MOVREL-NEXT: s_mov_b32 s9, s11 2747; MOVREL-NEXT: s_mov_b32 s10, s12 2748; MOVREL-NEXT: s_mov_b32 s11, s13 2749; MOVREL-NEXT: s_mov_b32 s12, s14 2750; MOVREL-NEXT: s_mov_b32 s13, s15 2751; MOVREL-NEXT: s_mov_b32 s14, s16 2752; MOVREL-NEXT: s_mov_b32 s15, s17 2753; MOVREL-NEXT: v_mov_b32_e32 v16, v0 2754; MOVREL-NEXT: v_mov_b32_e32 v0, s0 2755; MOVREL-NEXT: s_mov_b32 m0, s18 2756; MOVREL-NEXT: v_mov_b32_e32 v1, s1 2757; MOVREL-NEXT: v_mov_b32_e32 v2, s2 2758; MOVREL-NEXT: v_mov_b32_e32 v3, s3 2759; MOVREL-NEXT: v_mov_b32_e32 v4, s4 2760; MOVREL-NEXT: v_mov_b32_e32 v5, s5 2761; MOVREL-NEXT: v_mov_b32_e32 v6, s6 2762; MOVREL-NEXT: v_mov_b32_e32 v7, s7 2763; MOVREL-NEXT: v_mov_b32_e32 v8, s8 2764; MOVREL-NEXT: v_mov_b32_e32 v9, s9 2765; MOVREL-NEXT: v_mov_b32_e32 v10, s10 2766; MOVREL-NEXT: v_mov_b32_e32 v11, s11 2767; MOVREL-NEXT: v_mov_b32_e32 v12, s12 2768; MOVREL-NEXT: v_mov_b32_e32 v13, s13 2769; MOVREL-NEXT: v_mov_b32_e32 v14, s14 2770; MOVREL-NEXT: v_mov_b32_e32 v15, s15 2771; MOVREL-NEXT: v_movreld_b32_e32 v0, v16 2772; MOVREL-NEXT: ; return to shader part epilog 2773entry: 2774 %insert = insertelement <16 x float> %vec, float %val, i32 %idx 2775 ret <16 x float> %insert 2776} 2777 2778define amdgpu_ps <32 x float> @dyn_insertelement_v32f32_s_v_s(<32 x float> inreg %vec, float %val, i32 inreg %idx) { 2779; GPRIDX-LABEL: dyn_insertelement_v32f32_s_v_s: 2780; GPRIDX: ; %bb.0: ; %entry 2781; GPRIDX-NEXT: s_mov_b32 s0, s2 2782; GPRIDX-NEXT: s_mov_b32 s1, s3 2783; GPRIDX-NEXT: s_mov_b32 s2, s4 2784; GPRIDX-NEXT: s_mov_b32 s3, s5 2785; GPRIDX-NEXT: s_mov_b32 s4, s6 2786; GPRIDX-NEXT: s_mov_b32 s5, s7 2787; GPRIDX-NEXT: s_mov_b32 s6, s8 2788; GPRIDX-NEXT: s_mov_b32 s7, s9 2789; GPRIDX-NEXT: s_mov_b32 s8, s10 2790; GPRIDX-NEXT: s_mov_b32 s9, s11 2791; GPRIDX-NEXT: s_mov_b32 s10, s12 2792; GPRIDX-NEXT: s_mov_b32 s11, s13 2793; GPRIDX-NEXT: s_mov_b32 s12, s14 2794; GPRIDX-NEXT: s_mov_b32 s13, s15 2795; GPRIDX-NEXT: s_mov_b32 s14, s16 2796; GPRIDX-NEXT: s_mov_b32 s15, s17 2797; GPRIDX-NEXT: s_mov_b32 s16, s18 2798; GPRIDX-NEXT: s_mov_b32 s17, s19 2799; GPRIDX-NEXT: s_mov_b32 s18, s20 2800; GPRIDX-NEXT: s_mov_b32 s19, s21 2801; GPRIDX-NEXT: s_mov_b32 s20, s22 2802; GPRIDX-NEXT: s_mov_b32 s21, s23 2803; GPRIDX-NEXT: s_mov_b32 s22, s24 2804; GPRIDX-NEXT: s_mov_b32 s23, s25 2805; GPRIDX-NEXT: s_mov_b32 s24, s26 2806; GPRIDX-NEXT: s_mov_b32 s25, s27 2807; GPRIDX-NEXT: s_mov_b32 s26, s28 2808; GPRIDX-NEXT: s_mov_b32 s27, s29 2809; GPRIDX-NEXT: s_mov_b32 s28, s30 2810; GPRIDX-NEXT: s_mov_b32 s29, s31 2811; GPRIDX-NEXT: s_mov_b32 s31, s33 2812; GPRIDX-NEXT: v_mov_b32_e32 v32, v0 2813; GPRIDX-NEXT: s_mov_b32 s30, s32 2814; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 2815; GPRIDX-NEXT: v_mov_b32_e32 v1, s1 2816; GPRIDX-NEXT: v_mov_b32_e32 v2, s2 2817; GPRIDX-NEXT: v_mov_b32_e32 v3, s3 2818; GPRIDX-NEXT: v_mov_b32_e32 v4, s4 2819; GPRIDX-NEXT: v_mov_b32_e32 v5, s5 2820; GPRIDX-NEXT: v_mov_b32_e32 v6, s6 2821; GPRIDX-NEXT: v_mov_b32_e32 v7, s7 2822; GPRIDX-NEXT: v_mov_b32_e32 v8, s8 2823; GPRIDX-NEXT: v_mov_b32_e32 v9, s9 2824; GPRIDX-NEXT: v_mov_b32_e32 v10, s10 2825; GPRIDX-NEXT: v_mov_b32_e32 v11, s11 2826; GPRIDX-NEXT: v_mov_b32_e32 v12, s12 2827; GPRIDX-NEXT: v_mov_b32_e32 v13, s13 2828; GPRIDX-NEXT: v_mov_b32_e32 v14, s14 2829; GPRIDX-NEXT: v_mov_b32_e32 v15, s15 2830; GPRIDX-NEXT: v_mov_b32_e32 v16, s16 2831; GPRIDX-NEXT: v_mov_b32_e32 v17, s17 2832; GPRIDX-NEXT: v_mov_b32_e32 v18, s18 2833; GPRIDX-NEXT: v_mov_b32_e32 v19, s19 2834; GPRIDX-NEXT: v_mov_b32_e32 v20, s20 2835; GPRIDX-NEXT: v_mov_b32_e32 v21, s21 2836; GPRIDX-NEXT: v_mov_b32_e32 v22, s22 2837; GPRIDX-NEXT: v_mov_b32_e32 v23, s23 2838; GPRIDX-NEXT: v_mov_b32_e32 v24, s24 2839; GPRIDX-NEXT: v_mov_b32_e32 v25, s25 2840; GPRIDX-NEXT: v_mov_b32_e32 v26, s26 2841; GPRIDX-NEXT: v_mov_b32_e32 v27, s27 2842; GPRIDX-NEXT: v_mov_b32_e32 v28, s28 2843; GPRIDX-NEXT: v_mov_b32_e32 v29, s29 2844; GPRIDX-NEXT: v_mov_b32_e32 v30, s30 2845; GPRIDX-NEXT: v_mov_b32_e32 v31, s31 2846; GPRIDX-NEXT: s_set_gpr_idx_on s34, gpr_idx(DST) 2847; GPRIDX-NEXT: v_mov_b32_e32 v0, v32 2848; GPRIDX-NEXT: s_set_gpr_idx_off 2849; GPRIDX-NEXT: ; return to shader part epilog 2850; 2851; MOVREL-LABEL: dyn_insertelement_v32f32_s_v_s: 2852; MOVREL: ; %bb.0: ; %entry 2853; MOVREL-NEXT: s_mov_b32 s0, s2 2854; MOVREL-NEXT: s_mov_b32 s1, s3 2855; MOVREL-NEXT: s_mov_b32 s2, s4 2856; MOVREL-NEXT: s_mov_b32 s3, s5 2857; MOVREL-NEXT: s_mov_b32 s4, s6 2858; MOVREL-NEXT: s_mov_b32 s5, s7 2859; MOVREL-NEXT: s_mov_b32 s6, s8 2860; MOVREL-NEXT: s_mov_b32 s7, s9 2861; MOVREL-NEXT: s_mov_b32 s8, s10 2862; MOVREL-NEXT: s_mov_b32 s9, s11 2863; MOVREL-NEXT: s_mov_b32 s10, s12 2864; MOVREL-NEXT: s_mov_b32 s11, s13 2865; MOVREL-NEXT: s_mov_b32 s12, s14 2866; MOVREL-NEXT: s_mov_b32 s13, s15 2867; MOVREL-NEXT: s_mov_b32 s14, s16 2868; MOVREL-NEXT: s_mov_b32 s15, s17 2869; MOVREL-NEXT: s_mov_b32 s16, s18 2870; MOVREL-NEXT: s_mov_b32 s17, s19 2871; MOVREL-NEXT: s_mov_b32 s18, s20 2872; MOVREL-NEXT: s_mov_b32 s19, s21 2873; MOVREL-NEXT: s_mov_b32 s20, s22 2874; MOVREL-NEXT: s_mov_b32 s21, s23 2875; MOVREL-NEXT: s_mov_b32 s22, s24 2876; MOVREL-NEXT: s_mov_b32 s23, s25 2877; MOVREL-NEXT: s_mov_b32 s24, s26 2878; MOVREL-NEXT: s_mov_b32 s25, s27 2879; MOVREL-NEXT: s_mov_b32 s26, s28 2880; MOVREL-NEXT: s_mov_b32 s27, s29 2881; MOVREL-NEXT: s_mov_b32 s28, s30 2882; MOVREL-NEXT: s_mov_b32 s29, s31 2883; MOVREL-NEXT: s_mov_b32 s31, s33 2884; MOVREL-NEXT: v_mov_b32_e32 v32, v0 2885; MOVREL-NEXT: s_mov_b32 s30, s32 2886; MOVREL-NEXT: v_mov_b32_e32 v0, s0 2887; MOVREL-NEXT: s_mov_b32 m0, s34 2888; MOVREL-NEXT: v_mov_b32_e32 v1, s1 2889; MOVREL-NEXT: v_mov_b32_e32 v2, s2 2890; MOVREL-NEXT: v_mov_b32_e32 v3, s3 2891; MOVREL-NEXT: v_mov_b32_e32 v4, s4 2892; MOVREL-NEXT: v_mov_b32_e32 v5, s5 2893; MOVREL-NEXT: v_mov_b32_e32 v6, s6 2894; MOVREL-NEXT: v_mov_b32_e32 v7, s7 2895; MOVREL-NEXT: v_mov_b32_e32 v8, s8 2896; MOVREL-NEXT: v_mov_b32_e32 v9, s9 2897; MOVREL-NEXT: v_mov_b32_e32 v10, s10 2898; MOVREL-NEXT: v_mov_b32_e32 v11, s11 2899; MOVREL-NEXT: v_mov_b32_e32 v12, s12 2900; MOVREL-NEXT: v_mov_b32_e32 v13, s13 2901; MOVREL-NEXT: v_mov_b32_e32 v14, s14 2902; MOVREL-NEXT: v_mov_b32_e32 v15, s15 2903; MOVREL-NEXT: v_mov_b32_e32 v16, s16 2904; MOVREL-NEXT: v_mov_b32_e32 v17, s17 2905; MOVREL-NEXT: v_mov_b32_e32 v18, s18 2906; MOVREL-NEXT: v_mov_b32_e32 v19, s19 2907; MOVREL-NEXT: v_mov_b32_e32 v20, s20 2908; MOVREL-NEXT: v_mov_b32_e32 v21, s21 2909; MOVREL-NEXT: v_mov_b32_e32 v22, s22 2910; MOVREL-NEXT: v_mov_b32_e32 v23, s23 2911; MOVREL-NEXT: v_mov_b32_e32 v24, s24 2912; MOVREL-NEXT: v_mov_b32_e32 v25, s25 2913; MOVREL-NEXT: v_mov_b32_e32 v26, s26 2914; MOVREL-NEXT: v_mov_b32_e32 v27, s27 2915; MOVREL-NEXT: v_mov_b32_e32 v28, s28 2916; MOVREL-NEXT: v_mov_b32_e32 v29, s29 2917; MOVREL-NEXT: v_mov_b32_e32 v30, s30 2918; MOVREL-NEXT: v_mov_b32_e32 v31, s31 2919; MOVREL-NEXT: v_movreld_b32_e32 v0, v32 2920; MOVREL-NEXT: ; return to shader part epilog 2921entry: 2922 %insert = insertelement <32 x float> %vec, float %val, i32 %idx 2923 ret <32 x float> %insert 2924} 2925 2926define amdgpu_ps <16 x i64> @dyn_insertelement_v16i64_s_v_s(<16 x i64> inreg %vec, i64 %val, i32 inreg %idx) { 2927; GPRIDX-LABEL: dyn_insertelement_v16i64_s_v_s: 2928; GPRIDX: ; %bb.0: ; %entry 2929; GPRIDX-NEXT: s_mov_b32 s1, s3 2930; GPRIDX-NEXT: s_mov_b32 s3, s5 2931; GPRIDX-NEXT: s_mov_b32 s5, s7 2932; GPRIDX-NEXT: s_mov_b32 s7, s9 2933; GPRIDX-NEXT: s_mov_b32 s9, s11 2934; GPRIDX-NEXT: s_mov_b32 s11, s13 2935; GPRIDX-NEXT: s_mov_b32 s13, s15 2936; GPRIDX-NEXT: s_mov_b32 s15, s17 2937; GPRIDX-NEXT: s_mov_b32 s17, s19 2938; GPRIDX-NEXT: s_mov_b32 s19, s21 2939; GPRIDX-NEXT: s_mov_b32 s21, s23 2940; GPRIDX-NEXT: s_mov_b32 s23, s25 2941; GPRIDX-NEXT: s_mov_b32 s25, s27 2942; GPRIDX-NEXT: s_mov_b32 s27, s29 2943; GPRIDX-NEXT: s_mov_b32 s29, s31 2944; GPRIDX-NEXT: s_mov_b32 s31, s33 2945; GPRIDX-NEXT: s_mov_b32 s0, s2 2946; GPRIDX-NEXT: s_mov_b32 s2, s4 2947; GPRIDX-NEXT: s_mov_b32 s4, s6 2948; GPRIDX-NEXT: s_mov_b32 s6, s8 2949; GPRIDX-NEXT: s_mov_b32 s8, s10 2950; GPRIDX-NEXT: s_mov_b32 s10, s12 2951; GPRIDX-NEXT: s_mov_b32 s12, s14 2952; GPRIDX-NEXT: s_mov_b32 s14, s16 2953; GPRIDX-NEXT: s_mov_b32 s16, s18 2954; GPRIDX-NEXT: s_mov_b32 s18, s20 2955; GPRIDX-NEXT: s_mov_b32 s20, s22 2956; GPRIDX-NEXT: s_mov_b32 s22, s24 2957; GPRIDX-NEXT: s_mov_b32 s24, s26 2958; GPRIDX-NEXT: s_mov_b32 s26, s28 2959; GPRIDX-NEXT: s_mov_b32 s28, s30 2960; GPRIDX-NEXT: s_mov_b32 s30, s32 2961; GPRIDX-NEXT: v_mov_b32_e32 v33, s31 2962; GPRIDX-NEXT: s_lshl_b32 s33, s34, 1 2963; GPRIDX-NEXT: v_mov_b32_e32 v32, s30 2964; GPRIDX-NEXT: v_mov_b32_e32 v31, s29 2965; GPRIDX-NEXT: v_mov_b32_e32 v30, s28 2966; GPRIDX-NEXT: v_mov_b32_e32 v29, s27 2967; GPRIDX-NEXT: v_mov_b32_e32 v28, s26 2968; GPRIDX-NEXT: v_mov_b32_e32 v27, s25 2969; GPRIDX-NEXT: v_mov_b32_e32 v26, s24 2970; GPRIDX-NEXT: v_mov_b32_e32 v25, s23 2971; GPRIDX-NEXT: v_mov_b32_e32 v24, s22 2972; GPRIDX-NEXT: v_mov_b32_e32 v23, s21 2973; GPRIDX-NEXT: v_mov_b32_e32 v22, s20 2974; GPRIDX-NEXT: v_mov_b32_e32 v21, s19 2975; GPRIDX-NEXT: v_mov_b32_e32 v20, s18 2976; GPRIDX-NEXT: v_mov_b32_e32 v19, s17 2977; GPRIDX-NEXT: v_mov_b32_e32 v18, s16 2978; GPRIDX-NEXT: v_mov_b32_e32 v17, s15 2979; GPRIDX-NEXT: v_mov_b32_e32 v16, s14 2980; GPRIDX-NEXT: v_mov_b32_e32 v15, s13 2981; GPRIDX-NEXT: v_mov_b32_e32 v14, s12 2982; GPRIDX-NEXT: v_mov_b32_e32 v13, s11 2983; GPRIDX-NEXT: v_mov_b32_e32 v12, s10 2984; GPRIDX-NEXT: v_mov_b32_e32 v11, s9 2985; GPRIDX-NEXT: v_mov_b32_e32 v10, s8 2986; GPRIDX-NEXT: v_mov_b32_e32 v9, s7 2987; GPRIDX-NEXT: v_mov_b32_e32 v8, s6 2988; GPRIDX-NEXT: v_mov_b32_e32 v7, s5 2989; GPRIDX-NEXT: v_mov_b32_e32 v6, s4 2990; GPRIDX-NEXT: v_mov_b32_e32 v5, s3 2991; GPRIDX-NEXT: v_mov_b32_e32 v4, s2 2992; GPRIDX-NEXT: v_mov_b32_e32 v3, s1 2993; GPRIDX-NEXT: v_mov_b32_e32 v2, s0 2994; GPRIDX-NEXT: s_set_gpr_idx_on s33, gpr_idx(DST) 2995; GPRIDX-NEXT: v_mov_b32_e32 v2, v0 2996; GPRIDX-NEXT: v_mov_b32_e32 v3, v1 2997; GPRIDX-NEXT: s_set_gpr_idx_off 2998; GPRIDX-NEXT: v_readfirstlane_b32 s0, v2 2999; GPRIDX-NEXT: v_readfirstlane_b32 s1, v3 3000; GPRIDX-NEXT: v_readfirstlane_b32 s2, v4 3001; GPRIDX-NEXT: v_readfirstlane_b32 s3, v5 3002; GPRIDX-NEXT: v_readfirstlane_b32 s4, v6 3003; GPRIDX-NEXT: v_readfirstlane_b32 s5, v7 3004; GPRIDX-NEXT: v_readfirstlane_b32 s6, v8 3005; GPRIDX-NEXT: v_readfirstlane_b32 s7, v9 3006; GPRIDX-NEXT: v_readfirstlane_b32 s8, v10 3007; GPRIDX-NEXT: v_readfirstlane_b32 s9, v11 3008; GPRIDX-NEXT: v_readfirstlane_b32 s10, v12 3009; GPRIDX-NEXT: v_readfirstlane_b32 s11, v13 3010; GPRIDX-NEXT: v_readfirstlane_b32 s12, v14 3011; GPRIDX-NEXT: v_readfirstlane_b32 s13, v15 3012; GPRIDX-NEXT: v_readfirstlane_b32 s14, v16 3013; GPRIDX-NEXT: v_readfirstlane_b32 s15, v17 3014; GPRIDX-NEXT: v_readfirstlane_b32 s16, v18 3015; GPRIDX-NEXT: v_readfirstlane_b32 s17, v19 3016; GPRIDX-NEXT: v_readfirstlane_b32 s18, v20 3017; GPRIDX-NEXT: v_readfirstlane_b32 s19, v21 3018; GPRIDX-NEXT: v_readfirstlane_b32 s20, v22 3019; GPRIDX-NEXT: v_readfirstlane_b32 s21, v23 3020; GPRIDX-NEXT: v_readfirstlane_b32 s22, v24 3021; GPRIDX-NEXT: v_readfirstlane_b32 s23, v25 3022; GPRIDX-NEXT: v_readfirstlane_b32 s24, v26 3023; GPRIDX-NEXT: v_readfirstlane_b32 s25, v27 3024; GPRIDX-NEXT: v_readfirstlane_b32 s26, v28 3025; GPRIDX-NEXT: v_readfirstlane_b32 s27, v29 3026; GPRIDX-NEXT: v_readfirstlane_b32 s28, v30 3027; GPRIDX-NEXT: v_readfirstlane_b32 s29, v31 3028; GPRIDX-NEXT: v_readfirstlane_b32 s30, v32 3029; GPRIDX-NEXT: v_readfirstlane_b32 s31, v33 3030; GPRIDX-NEXT: ; return to shader part epilog 3031; 3032; MOVREL-LABEL: dyn_insertelement_v16i64_s_v_s: 3033; MOVREL: ; %bb.0: ; %entry 3034; MOVREL-NEXT: s_mov_b32 s1, s3 3035; MOVREL-NEXT: s_mov_b32 s3, s5 3036; MOVREL-NEXT: s_mov_b32 s5, s7 3037; MOVREL-NEXT: s_mov_b32 s7, s9 3038; MOVREL-NEXT: s_mov_b32 s9, s11 3039; MOVREL-NEXT: s_mov_b32 s11, s13 3040; MOVREL-NEXT: s_mov_b32 s13, s15 3041; MOVREL-NEXT: s_mov_b32 s15, s17 3042; MOVREL-NEXT: s_mov_b32 s17, s19 3043; MOVREL-NEXT: s_mov_b32 s19, s21 3044; MOVREL-NEXT: s_mov_b32 s21, s23 3045; MOVREL-NEXT: s_mov_b32 s23, s25 3046; MOVREL-NEXT: s_mov_b32 s25, s27 3047; MOVREL-NEXT: s_mov_b32 s27, s29 3048; MOVREL-NEXT: s_mov_b32 s29, s31 3049; MOVREL-NEXT: s_mov_b32 s31, s33 3050; MOVREL-NEXT: s_mov_b32 s0, s2 3051; MOVREL-NEXT: s_mov_b32 s2, s4 3052; MOVREL-NEXT: s_mov_b32 s4, s6 3053; MOVREL-NEXT: s_mov_b32 s6, s8 3054; MOVREL-NEXT: s_mov_b32 s8, s10 3055; MOVREL-NEXT: s_mov_b32 s10, s12 3056; MOVREL-NEXT: s_mov_b32 s12, s14 3057; MOVREL-NEXT: s_mov_b32 s14, s16 3058; MOVREL-NEXT: s_mov_b32 s16, s18 3059; MOVREL-NEXT: s_mov_b32 s18, s20 3060; MOVREL-NEXT: s_mov_b32 s20, s22 3061; MOVREL-NEXT: s_mov_b32 s22, s24 3062; MOVREL-NEXT: s_mov_b32 s24, s26 3063; MOVREL-NEXT: s_mov_b32 s26, s28 3064; MOVREL-NEXT: s_mov_b32 s28, s30 3065; MOVREL-NEXT: s_mov_b32 s30, s32 3066; MOVREL-NEXT: v_mov_b32_e32 v33, s31 3067; MOVREL-NEXT: v_mov_b32_e32 v2, s0 3068; MOVREL-NEXT: s_lshl_b32 m0, s34, 1 3069; MOVREL-NEXT: v_mov_b32_e32 v32, s30 3070; MOVREL-NEXT: v_mov_b32_e32 v31, s29 3071; MOVREL-NEXT: v_mov_b32_e32 v30, s28 3072; MOVREL-NEXT: v_mov_b32_e32 v29, s27 3073; MOVREL-NEXT: v_mov_b32_e32 v28, s26 3074; MOVREL-NEXT: v_mov_b32_e32 v27, s25 3075; MOVREL-NEXT: v_mov_b32_e32 v26, s24 3076; MOVREL-NEXT: v_mov_b32_e32 v25, s23 3077; MOVREL-NEXT: v_mov_b32_e32 v24, s22 3078; MOVREL-NEXT: v_mov_b32_e32 v23, s21 3079; MOVREL-NEXT: v_mov_b32_e32 v22, s20 3080; MOVREL-NEXT: v_mov_b32_e32 v21, s19 3081; MOVREL-NEXT: v_mov_b32_e32 v20, s18 3082; MOVREL-NEXT: v_mov_b32_e32 v19, s17 3083; MOVREL-NEXT: v_mov_b32_e32 v18, s16 3084; MOVREL-NEXT: v_mov_b32_e32 v17, s15 3085; MOVREL-NEXT: v_mov_b32_e32 v16, s14 3086; MOVREL-NEXT: v_mov_b32_e32 v15, s13 3087; MOVREL-NEXT: v_mov_b32_e32 v14, s12 3088; MOVREL-NEXT: v_mov_b32_e32 v13, s11 3089; MOVREL-NEXT: v_mov_b32_e32 v12, s10 3090; MOVREL-NEXT: v_mov_b32_e32 v11, s9 3091; MOVREL-NEXT: v_mov_b32_e32 v10, s8 3092; MOVREL-NEXT: v_mov_b32_e32 v9, s7 3093; MOVREL-NEXT: v_mov_b32_e32 v8, s6 3094; MOVREL-NEXT: v_mov_b32_e32 v7, s5 3095; MOVREL-NEXT: v_mov_b32_e32 v6, s4 3096; MOVREL-NEXT: v_mov_b32_e32 v5, s3 3097; MOVREL-NEXT: v_mov_b32_e32 v4, s2 3098; MOVREL-NEXT: v_mov_b32_e32 v3, s1 3099; MOVREL-NEXT: v_movreld_b32_e32 v2, v0 3100; MOVREL-NEXT: v_movreld_b32_e32 v3, v1 3101; MOVREL-NEXT: v_readfirstlane_b32 s0, v2 3102; MOVREL-NEXT: v_readfirstlane_b32 s1, v3 3103; MOVREL-NEXT: v_readfirstlane_b32 s2, v4 3104; MOVREL-NEXT: v_readfirstlane_b32 s3, v5 3105; MOVREL-NEXT: v_readfirstlane_b32 s4, v6 3106; MOVREL-NEXT: v_readfirstlane_b32 s5, v7 3107; MOVREL-NEXT: v_readfirstlane_b32 s6, v8 3108; MOVREL-NEXT: v_readfirstlane_b32 s7, v9 3109; MOVREL-NEXT: v_readfirstlane_b32 s8, v10 3110; MOVREL-NEXT: v_readfirstlane_b32 s9, v11 3111; MOVREL-NEXT: v_readfirstlane_b32 s10, v12 3112; MOVREL-NEXT: v_readfirstlane_b32 s11, v13 3113; MOVREL-NEXT: v_readfirstlane_b32 s12, v14 3114; MOVREL-NEXT: v_readfirstlane_b32 s13, v15 3115; MOVREL-NEXT: v_readfirstlane_b32 s14, v16 3116; MOVREL-NEXT: v_readfirstlane_b32 s15, v17 3117; MOVREL-NEXT: v_readfirstlane_b32 s16, v18 3118; MOVREL-NEXT: v_readfirstlane_b32 s17, v19 3119; MOVREL-NEXT: v_readfirstlane_b32 s18, v20 3120; MOVREL-NEXT: v_readfirstlane_b32 s19, v21 3121; MOVREL-NEXT: v_readfirstlane_b32 s20, v22 3122; MOVREL-NEXT: v_readfirstlane_b32 s21, v23 3123; MOVREL-NEXT: v_readfirstlane_b32 s22, v24 3124; MOVREL-NEXT: v_readfirstlane_b32 s23, v25 3125; MOVREL-NEXT: v_readfirstlane_b32 s24, v26 3126; MOVREL-NEXT: v_readfirstlane_b32 s25, v27 3127; MOVREL-NEXT: v_readfirstlane_b32 s26, v28 3128; MOVREL-NEXT: v_readfirstlane_b32 s27, v29 3129; MOVREL-NEXT: v_readfirstlane_b32 s28, v30 3130; MOVREL-NEXT: v_readfirstlane_b32 s29, v31 3131; MOVREL-NEXT: v_readfirstlane_b32 s30, v32 3132; MOVREL-NEXT: v_readfirstlane_b32 s31, v33 3133; MOVREL-NEXT: ; return to shader part epilog 3134entry: 3135 %insert = insertelement <16 x i64> %vec, i64 %val, i32 %idx 3136 ret <16 x i64> %insert 3137} 3138 3139define amdgpu_ps <16 x double> @dyn_insertelement_v16f64_s_v_s(<16 x double> inreg %vec, double %val, i32 inreg %idx) { 3140; GPRIDX-LABEL: dyn_insertelement_v16f64_s_v_s: 3141; GPRIDX: ; %bb.0: ; %entry 3142; GPRIDX-NEXT: s_mov_b32 s1, s3 3143; GPRIDX-NEXT: s_mov_b32 s3, s5 3144; GPRIDX-NEXT: s_mov_b32 s5, s7 3145; GPRIDX-NEXT: s_mov_b32 s7, s9 3146; GPRIDX-NEXT: s_mov_b32 s9, s11 3147; GPRIDX-NEXT: s_mov_b32 s11, s13 3148; GPRIDX-NEXT: s_mov_b32 s13, s15 3149; GPRIDX-NEXT: s_mov_b32 s15, s17 3150; GPRIDX-NEXT: s_mov_b32 s17, s19 3151; GPRIDX-NEXT: s_mov_b32 s19, s21 3152; GPRIDX-NEXT: s_mov_b32 s21, s23 3153; GPRIDX-NEXT: s_mov_b32 s23, s25 3154; GPRIDX-NEXT: s_mov_b32 s25, s27 3155; GPRIDX-NEXT: s_mov_b32 s27, s29 3156; GPRIDX-NEXT: s_mov_b32 s29, s31 3157; GPRIDX-NEXT: s_mov_b32 s31, s33 3158; GPRIDX-NEXT: s_mov_b32 s0, s2 3159; GPRIDX-NEXT: s_mov_b32 s2, s4 3160; GPRIDX-NEXT: s_mov_b32 s4, s6 3161; GPRIDX-NEXT: s_mov_b32 s6, s8 3162; GPRIDX-NEXT: s_mov_b32 s8, s10 3163; GPRIDX-NEXT: s_mov_b32 s10, s12 3164; GPRIDX-NEXT: s_mov_b32 s12, s14 3165; GPRIDX-NEXT: s_mov_b32 s14, s16 3166; GPRIDX-NEXT: s_mov_b32 s16, s18 3167; GPRIDX-NEXT: s_mov_b32 s18, s20 3168; GPRIDX-NEXT: s_mov_b32 s20, s22 3169; GPRIDX-NEXT: s_mov_b32 s22, s24 3170; GPRIDX-NEXT: s_mov_b32 s24, s26 3171; GPRIDX-NEXT: s_mov_b32 s26, s28 3172; GPRIDX-NEXT: s_mov_b32 s28, s30 3173; GPRIDX-NEXT: s_mov_b32 s30, s32 3174; GPRIDX-NEXT: v_mov_b32_e32 v33, s31 3175; GPRIDX-NEXT: s_lshl_b32 s33, s34, 1 3176; GPRIDX-NEXT: v_mov_b32_e32 v32, s30 3177; GPRIDX-NEXT: v_mov_b32_e32 v31, s29 3178; GPRIDX-NEXT: v_mov_b32_e32 v30, s28 3179; GPRIDX-NEXT: v_mov_b32_e32 v29, s27 3180; GPRIDX-NEXT: v_mov_b32_e32 v28, s26 3181; GPRIDX-NEXT: v_mov_b32_e32 v27, s25 3182; GPRIDX-NEXT: v_mov_b32_e32 v26, s24 3183; GPRIDX-NEXT: v_mov_b32_e32 v25, s23 3184; GPRIDX-NEXT: v_mov_b32_e32 v24, s22 3185; GPRIDX-NEXT: v_mov_b32_e32 v23, s21 3186; GPRIDX-NEXT: v_mov_b32_e32 v22, s20 3187; GPRIDX-NEXT: v_mov_b32_e32 v21, s19 3188; GPRIDX-NEXT: v_mov_b32_e32 v20, s18 3189; GPRIDX-NEXT: v_mov_b32_e32 v19, s17 3190; GPRIDX-NEXT: v_mov_b32_e32 v18, s16 3191; GPRIDX-NEXT: v_mov_b32_e32 v17, s15 3192; GPRIDX-NEXT: v_mov_b32_e32 v16, s14 3193; GPRIDX-NEXT: v_mov_b32_e32 v15, s13 3194; GPRIDX-NEXT: v_mov_b32_e32 v14, s12 3195; GPRIDX-NEXT: v_mov_b32_e32 v13, s11 3196; GPRIDX-NEXT: v_mov_b32_e32 v12, s10 3197; GPRIDX-NEXT: v_mov_b32_e32 v11, s9 3198; GPRIDX-NEXT: v_mov_b32_e32 v10, s8 3199; GPRIDX-NEXT: v_mov_b32_e32 v9, s7 3200; GPRIDX-NEXT: v_mov_b32_e32 v8, s6 3201; GPRIDX-NEXT: v_mov_b32_e32 v7, s5 3202; GPRIDX-NEXT: v_mov_b32_e32 v6, s4 3203; GPRIDX-NEXT: v_mov_b32_e32 v5, s3 3204; GPRIDX-NEXT: v_mov_b32_e32 v4, s2 3205; GPRIDX-NEXT: v_mov_b32_e32 v3, s1 3206; GPRIDX-NEXT: v_mov_b32_e32 v2, s0 3207; GPRIDX-NEXT: s_set_gpr_idx_on s33, gpr_idx(DST) 3208; GPRIDX-NEXT: v_mov_b32_e32 v2, v0 3209; GPRIDX-NEXT: v_mov_b32_e32 v3, v1 3210; GPRIDX-NEXT: s_set_gpr_idx_off 3211; GPRIDX-NEXT: v_readfirstlane_b32 s0, v2 3212; GPRIDX-NEXT: v_readfirstlane_b32 s1, v3 3213; GPRIDX-NEXT: v_readfirstlane_b32 s2, v4 3214; GPRIDX-NEXT: v_readfirstlane_b32 s3, v5 3215; GPRIDX-NEXT: v_readfirstlane_b32 s4, v6 3216; GPRIDX-NEXT: v_readfirstlane_b32 s5, v7 3217; GPRIDX-NEXT: v_readfirstlane_b32 s6, v8 3218; GPRIDX-NEXT: v_readfirstlane_b32 s7, v9 3219; GPRIDX-NEXT: v_readfirstlane_b32 s8, v10 3220; GPRIDX-NEXT: v_readfirstlane_b32 s9, v11 3221; GPRIDX-NEXT: v_readfirstlane_b32 s10, v12 3222; GPRIDX-NEXT: v_readfirstlane_b32 s11, v13 3223; GPRIDX-NEXT: v_readfirstlane_b32 s12, v14 3224; GPRIDX-NEXT: v_readfirstlane_b32 s13, v15 3225; GPRIDX-NEXT: v_readfirstlane_b32 s14, v16 3226; GPRIDX-NEXT: v_readfirstlane_b32 s15, v17 3227; GPRIDX-NEXT: v_readfirstlane_b32 s16, v18 3228; GPRIDX-NEXT: v_readfirstlane_b32 s17, v19 3229; GPRIDX-NEXT: v_readfirstlane_b32 s18, v20 3230; GPRIDX-NEXT: v_readfirstlane_b32 s19, v21 3231; GPRIDX-NEXT: v_readfirstlane_b32 s20, v22 3232; GPRIDX-NEXT: v_readfirstlane_b32 s21, v23 3233; GPRIDX-NEXT: v_readfirstlane_b32 s22, v24 3234; GPRIDX-NEXT: v_readfirstlane_b32 s23, v25 3235; GPRIDX-NEXT: v_readfirstlane_b32 s24, v26 3236; GPRIDX-NEXT: v_readfirstlane_b32 s25, v27 3237; GPRIDX-NEXT: v_readfirstlane_b32 s26, v28 3238; GPRIDX-NEXT: v_readfirstlane_b32 s27, v29 3239; GPRIDX-NEXT: v_readfirstlane_b32 s28, v30 3240; GPRIDX-NEXT: v_readfirstlane_b32 s29, v31 3241; GPRIDX-NEXT: v_readfirstlane_b32 s30, v32 3242; GPRIDX-NEXT: v_readfirstlane_b32 s31, v33 3243; GPRIDX-NEXT: ; return to shader part epilog 3244; 3245; MOVREL-LABEL: dyn_insertelement_v16f64_s_v_s: 3246; MOVREL: ; %bb.0: ; %entry 3247; MOVREL-NEXT: s_mov_b32 s1, s3 3248; MOVREL-NEXT: s_mov_b32 s3, s5 3249; MOVREL-NEXT: s_mov_b32 s5, s7 3250; MOVREL-NEXT: s_mov_b32 s7, s9 3251; MOVREL-NEXT: s_mov_b32 s9, s11 3252; MOVREL-NEXT: s_mov_b32 s11, s13 3253; MOVREL-NEXT: s_mov_b32 s13, s15 3254; MOVREL-NEXT: s_mov_b32 s15, s17 3255; MOVREL-NEXT: s_mov_b32 s17, s19 3256; MOVREL-NEXT: s_mov_b32 s19, s21 3257; MOVREL-NEXT: s_mov_b32 s21, s23 3258; MOVREL-NEXT: s_mov_b32 s23, s25 3259; MOVREL-NEXT: s_mov_b32 s25, s27 3260; MOVREL-NEXT: s_mov_b32 s27, s29 3261; MOVREL-NEXT: s_mov_b32 s29, s31 3262; MOVREL-NEXT: s_mov_b32 s31, s33 3263; MOVREL-NEXT: s_mov_b32 s0, s2 3264; MOVREL-NEXT: s_mov_b32 s2, s4 3265; MOVREL-NEXT: s_mov_b32 s4, s6 3266; MOVREL-NEXT: s_mov_b32 s6, s8 3267; MOVREL-NEXT: s_mov_b32 s8, s10 3268; MOVREL-NEXT: s_mov_b32 s10, s12 3269; MOVREL-NEXT: s_mov_b32 s12, s14 3270; MOVREL-NEXT: s_mov_b32 s14, s16 3271; MOVREL-NEXT: s_mov_b32 s16, s18 3272; MOVREL-NEXT: s_mov_b32 s18, s20 3273; MOVREL-NEXT: s_mov_b32 s20, s22 3274; MOVREL-NEXT: s_mov_b32 s22, s24 3275; MOVREL-NEXT: s_mov_b32 s24, s26 3276; MOVREL-NEXT: s_mov_b32 s26, s28 3277; MOVREL-NEXT: s_mov_b32 s28, s30 3278; MOVREL-NEXT: s_mov_b32 s30, s32 3279; MOVREL-NEXT: v_mov_b32_e32 v33, s31 3280; MOVREL-NEXT: v_mov_b32_e32 v2, s0 3281; MOVREL-NEXT: s_lshl_b32 m0, s34, 1 3282; MOVREL-NEXT: v_mov_b32_e32 v32, s30 3283; MOVREL-NEXT: v_mov_b32_e32 v31, s29 3284; MOVREL-NEXT: v_mov_b32_e32 v30, s28 3285; MOVREL-NEXT: v_mov_b32_e32 v29, s27 3286; MOVREL-NEXT: v_mov_b32_e32 v28, s26 3287; MOVREL-NEXT: v_mov_b32_e32 v27, s25 3288; MOVREL-NEXT: v_mov_b32_e32 v26, s24 3289; MOVREL-NEXT: v_mov_b32_e32 v25, s23 3290; MOVREL-NEXT: v_mov_b32_e32 v24, s22 3291; MOVREL-NEXT: v_mov_b32_e32 v23, s21 3292; MOVREL-NEXT: v_mov_b32_e32 v22, s20 3293; MOVREL-NEXT: v_mov_b32_e32 v21, s19 3294; MOVREL-NEXT: v_mov_b32_e32 v20, s18 3295; MOVREL-NEXT: v_mov_b32_e32 v19, s17 3296; MOVREL-NEXT: v_mov_b32_e32 v18, s16 3297; MOVREL-NEXT: v_mov_b32_e32 v17, s15 3298; MOVREL-NEXT: v_mov_b32_e32 v16, s14 3299; MOVREL-NEXT: v_mov_b32_e32 v15, s13 3300; MOVREL-NEXT: v_mov_b32_e32 v14, s12 3301; MOVREL-NEXT: v_mov_b32_e32 v13, s11 3302; MOVREL-NEXT: v_mov_b32_e32 v12, s10 3303; MOVREL-NEXT: v_mov_b32_e32 v11, s9 3304; MOVREL-NEXT: v_mov_b32_e32 v10, s8 3305; MOVREL-NEXT: v_mov_b32_e32 v9, s7 3306; MOVREL-NEXT: v_mov_b32_e32 v8, s6 3307; MOVREL-NEXT: v_mov_b32_e32 v7, s5 3308; MOVREL-NEXT: v_mov_b32_e32 v6, s4 3309; MOVREL-NEXT: v_mov_b32_e32 v5, s3 3310; MOVREL-NEXT: v_mov_b32_e32 v4, s2 3311; MOVREL-NEXT: v_mov_b32_e32 v3, s1 3312; MOVREL-NEXT: v_movreld_b32_e32 v2, v0 3313; MOVREL-NEXT: v_movreld_b32_e32 v3, v1 3314; MOVREL-NEXT: v_readfirstlane_b32 s0, v2 3315; MOVREL-NEXT: v_readfirstlane_b32 s1, v3 3316; MOVREL-NEXT: v_readfirstlane_b32 s2, v4 3317; MOVREL-NEXT: v_readfirstlane_b32 s3, v5 3318; MOVREL-NEXT: v_readfirstlane_b32 s4, v6 3319; MOVREL-NEXT: v_readfirstlane_b32 s5, v7 3320; MOVREL-NEXT: v_readfirstlane_b32 s6, v8 3321; MOVREL-NEXT: v_readfirstlane_b32 s7, v9 3322; MOVREL-NEXT: v_readfirstlane_b32 s8, v10 3323; MOVREL-NEXT: v_readfirstlane_b32 s9, v11 3324; MOVREL-NEXT: v_readfirstlane_b32 s10, v12 3325; MOVREL-NEXT: v_readfirstlane_b32 s11, v13 3326; MOVREL-NEXT: v_readfirstlane_b32 s12, v14 3327; MOVREL-NEXT: v_readfirstlane_b32 s13, v15 3328; MOVREL-NEXT: v_readfirstlane_b32 s14, v16 3329; MOVREL-NEXT: v_readfirstlane_b32 s15, v17 3330; MOVREL-NEXT: v_readfirstlane_b32 s16, v18 3331; MOVREL-NEXT: v_readfirstlane_b32 s17, v19 3332; MOVREL-NEXT: v_readfirstlane_b32 s18, v20 3333; MOVREL-NEXT: v_readfirstlane_b32 s19, v21 3334; MOVREL-NEXT: v_readfirstlane_b32 s20, v22 3335; MOVREL-NEXT: v_readfirstlane_b32 s21, v23 3336; MOVREL-NEXT: v_readfirstlane_b32 s22, v24 3337; MOVREL-NEXT: v_readfirstlane_b32 s23, v25 3338; MOVREL-NEXT: v_readfirstlane_b32 s24, v26 3339; MOVREL-NEXT: v_readfirstlane_b32 s25, v27 3340; MOVREL-NEXT: v_readfirstlane_b32 s26, v28 3341; MOVREL-NEXT: v_readfirstlane_b32 s27, v29 3342; MOVREL-NEXT: v_readfirstlane_b32 s28, v30 3343; MOVREL-NEXT: v_readfirstlane_b32 s29, v31 3344; MOVREL-NEXT: v_readfirstlane_b32 s30, v32 3345; MOVREL-NEXT: v_readfirstlane_b32 s31, v33 3346; MOVREL-NEXT: ; return to shader part epilog 3347entry: 3348 %insert = insertelement <16 x double> %vec, double %val, i32 %idx 3349 ret <16 x double> %insert 3350} 3351 3352define amdgpu_ps <7 x i32> @dyn_insertelement_v7i32_s_s_s(<7 x i32> inreg %vec, i32 inreg %val, i32 inreg %idx) { 3353; GPRIDX-LABEL: dyn_insertelement_v7i32_s_s_s: 3354; GPRIDX: ; %bb.0: ; %entry 3355; GPRIDX-NEXT: s_cmp_eq_u32 s10, 0 3356; GPRIDX-NEXT: s_cselect_b32 s0, s9, s2 3357; GPRIDX-NEXT: s_cmp_eq_u32 s10, 1 3358; GPRIDX-NEXT: s_cselect_b32 s1, s9, s3 3359; GPRIDX-NEXT: s_cmp_eq_u32 s10, 2 3360; GPRIDX-NEXT: s_cselect_b32 s2, s9, s4 3361; GPRIDX-NEXT: s_cmp_eq_u32 s10, 3 3362; GPRIDX-NEXT: s_cselect_b32 s3, s9, s5 3363; GPRIDX-NEXT: s_cmp_eq_u32 s10, 4 3364; GPRIDX-NEXT: s_cselect_b32 s4, s9, s6 3365; GPRIDX-NEXT: s_cmp_eq_u32 s10, 5 3366; GPRIDX-NEXT: s_cselect_b32 s5, s9, s7 3367; GPRIDX-NEXT: s_cmp_eq_u32 s10, 6 3368; GPRIDX-NEXT: s_cselect_b32 s6, s9, s8 3369; GPRIDX-NEXT: ; return to shader part epilog 3370; 3371; MOVREL-LABEL: dyn_insertelement_v7i32_s_s_s: 3372; MOVREL: ; %bb.0: ; %entry 3373; MOVREL-NEXT: s_cmp_eq_u32 s10, 0 3374; MOVREL-NEXT: s_cselect_b32 s0, s9, s2 3375; MOVREL-NEXT: s_cmp_eq_u32 s10, 1 3376; MOVREL-NEXT: s_cselect_b32 s1, s9, s3 3377; MOVREL-NEXT: s_cmp_eq_u32 s10, 2 3378; MOVREL-NEXT: s_cselect_b32 s2, s9, s4 3379; MOVREL-NEXT: s_cmp_eq_u32 s10, 3 3380; MOVREL-NEXT: s_cselect_b32 s3, s9, s5 3381; MOVREL-NEXT: s_cmp_eq_u32 s10, 4 3382; MOVREL-NEXT: s_cselect_b32 s4, s9, s6 3383; MOVREL-NEXT: s_cmp_eq_u32 s10, 5 3384; MOVREL-NEXT: s_cselect_b32 s5, s9, s7 3385; MOVREL-NEXT: s_cmp_eq_u32 s10, 6 3386; MOVREL-NEXT: s_cselect_b32 s6, s9, s8 3387; MOVREL-NEXT: ; return to shader part epilog 3388entry: 3389 %insert = insertelement <7 x i32> %vec, i32 %val, i32 %idx 3390 ret <7 x i32> %insert 3391} 3392 3393define amdgpu_ps <7 x i8 addrspace(3)*> @dyn_insertelement_v7p3i8_s_s_s(<7 x i8 addrspace(3)*> inreg %vec, i8 addrspace(3)* inreg %val, i32 inreg %idx) { 3394; GPRIDX-LABEL: dyn_insertelement_v7p3i8_s_s_s: 3395; GPRIDX: ; %bb.0: ; %entry 3396; GPRIDX-NEXT: s_cmp_eq_u32 s10, 0 3397; GPRIDX-NEXT: s_cselect_b32 s0, s9, s2 3398; GPRIDX-NEXT: s_cmp_eq_u32 s10, 1 3399; GPRIDX-NEXT: s_cselect_b32 s1, s9, s3 3400; GPRIDX-NEXT: s_cmp_eq_u32 s10, 2 3401; GPRIDX-NEXT: s_cselect_b32 s2, s9, s4 3402; GPRIDX-NEXT: s_cmp_eq_u32 s10, 3 3403; GPRIDX-NEXT: s_cselect_b32 s3, s9, s5 3404; GPRIDX-NEXT: s_cmp_eq_u32 s10, 4 3405; GPRIDX-NEXT: s_cselect_b32 s4, s9, s6 3406; GPRIDX-NEXT: s_cmp_eq_u32 s10, 5 3407; GPRIDX-NEXT: s_cselect_b32 s5, s9, s7 3408; GPRIDX-NEXT: s_cmp_eq_u32 s10, 6 3409; GPRIDX-NEXT: s_cselect_b32 s6, s9, s8 3410; GPRIDX-NEXT: ; return to shader part epilog 3411; 3412; MOVREL-LABEL: dyn_insertelement_v7p3i8_s_s_s: 3413; MOVREL: ; %bb.0: ; %entry 3414; MOVREL-NEXT: s_cmp_eq_u32 s10, 0 3415; MOVREL-NEXT: s_cselect_b32 s0, s9, s2 3416; MOVREL-NEXT: s_cmp_eq_u32 s10, 1 3417; MOVREL-NEXT: s_cselect_b32 s1, s9, s3 3418; MOVREL-NEXT: s_cmp_eq_u32 s10, 2 3419; MOVREL-NEXT: s_cselect_b32 s2, s9, s4 3420; MOVREL-NEXT: s_cmp_eq_u32 s10, 3 3421; MOVREL-NEXT: s_cselect_b32 s3, s9, s5 3422; MOVREL-NEXT: s_cmp_eq_u32 s10, 4 3423; MOVREL-NEXT: s_cselect_b32 s4, s9, s6 3424; MOVREL-NEXT: s_cmp_eq_u32 s10, 5 3425; MOVREL-NEXT: s_cselect_b32 s5, s9, s7 3426; MOVREL-NEXT: s_cmp_eq_u32 s10, 6 3427; MOVREL-NEXT: s_cselect_b32 s6, s9, s8 3428; MOVREL-NEXT: ; return to shader part epilog 3429entry: 3430 %insert = insertelement <7 x i8 addrspace(3)*> %vec, i8 addrspace(3)* %val, i32 %idx 3431 ret <7 x i8 addrspace(3)*> %insert 3432} 3433 3434define amdgpu_ps <7 x float> @dyn_insertelement_v7f32_s_v_s(<7 x float> inreg %vec, float %val, i32 inreg %idx) { 3435; GPRIDX-LABEL: dyn_insertelement_v7f32_s_v_s: 3436; GPRIDX: ; %bb.0: ; %entry 3437; GPRIDX-NEXT: s_mov_b32 s0, s2 3438; GPRIDX-NEXT: s_mov_b32 s1, s3 3439; GPRIDX-NEXT: s_mov_b32 s2, s4 3440; GPRIDX-NEXT: s_mov_b32 s3, s5 3441; GPRIDX-NEXT: s_mov_b32 s4, s6 3442; GPRIDX-NEXT: s_mov_b32 s5, s7 3443; GPRIDX-NEXT: s_mov_b32 s6, s8 3444; GPRIDX-NEXT: v_mov_b32_e32 v14, s7 3445; GPRIDX-NEXT: v_mov_b32_e32 v7, s0 3446; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s9, 0 3447; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v7, v0, vcc 3448; GPRIDX-NEXT: v_mov_b32_e32 v8, s1 3449; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s9, 1 3450; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v8, v0, vcc 3451; GPRIDX-NEXT: v_mov_b32_e32 v9, s2 3452; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s9, 2 3453; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v9, v0, vcc 3454; GPRIDX-NEXT: v_mov_b32_e32 v10, s3 3455; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s9, 3 3456; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v10, v0, vcc 3457; GPRIDX-NEXT: v_mov_b32_e32 v11, s4 3458; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s9, 4 3459; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v11, v0, vcc 3460; GPRIDX-NEXT: v_mov_b32_e32 v12, s5 3461; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s9, 5 3462; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v12, v0, vcc 3463; GPRIDX-NEXT: v_mov_b32_e32 v13, s6 3464; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s9, 6 3465; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v13, v0, vcc 3466; GPRIDX-NEXT: v_mov_b32_e32 v0, v7 3467; GPRIDX-NEXT: ; return to shader part epilog 3468; 3469; MOVREL-LABEL: dyn_insertelement_v7f32_s_v_s: 3470; MOVREL: ; %bb.0: ; %entry 3471; MOVREL-NEXT: s_mov_b32 s0, s2 3472; MOVREL-NEXT: s_mov_b32 s1, s3 3473; MOVREL-NEXT: s_mov_b32 s2, s4 3474; MOVREL-NEXT: s_mov_b32 s3, s5 3475; MOVREL-NEXT: s_mov_b32 s4, s6 3476; MOVREL-NEXT: s_mov_b32 s5, s7 3477; MOVREL-NEXT: s_mov_b32 s6, s8 3478; MOVREL-NEXT: v_mov_b32_e32 v16, s7 3479; MOVREL-NEXT: v_mov_b32_e32 v9, s0 3480; MOVREL-NEXT: v_cmp_eq_u32_e64 vcc_lo, s9, 0 3481; MOVREL-NEXT: v_mov_b32_e32 v10, s1 3482; MOVREL-NEXT: v_mov_b32_e32 v11, s2 3483; MOVREL-NEXT: v_mov_b32_e32 v12, s3 3484; MOVREL-NEXT: v_mov_b32_e32 v13, s4 3485; MOVREL-NEXT: v_cndmask_b32_e32 v7, v9, v0, vcc_lo 3486; MOVREL-NEXT: v_cmp_eq_u32_e64 vcc_lo, s9, 1 3487; MOVREL-NEXT: v_mov_b32_e32 v14, s5 3488; MOVREL-NEXT: v_mov_b32_e32 v15, s6 3489; MOVREL-NEXT: v_cndmask_b32_e32 v1, v10, v0, vcc_lo 3490; MOVREL-NEXT: v_cmp_eq_u32_e64 vcc_lo, s9, 2 3491; MOVREL-NEXT: v_cndmask_b32_e32 v2, v11, v0, vcc_lo 3492; MOVREL-NEXT: v_cmp_eq_u32_e64 vcc_lo, s9, 3 3493; MOVREL-NEXT: v_cndmask_b32_e32 v3, v12, v0, vcc_lo 3494; MOVREL-NEXT: v_cmp_eq_u32_e64 vcc_lo, s9, 4 3495; MOVREL-NEXT: v_cndmask_b32_e32 v4, v13, v0, vcc_lo 3496; MOVREL-NEXT: v_cmp_eq_u32_e64 vcc_lo, s9, 5 3497; MOVREL-NEXT: v_cndmask_b32_e32 v5, v14, v0, vcc_lo 3498; MOVREL-NEXT: v_cmp_eq_u32_e64 vcc_lo, s9, 6 3499; MOVREL-NEXT: v_cndmask_b32_e32 v6, v15, v0, vcc_lo 3500; MOVREL-NEXT: v_mov_b32_e32 v0, v7 3501; MOVREL-NEXT: ; return to shader part epilog 3502entry: 3503 %insert = insertelement <7 x float> %vec, float %val, i32 %idx 3504 ret <7 x float> %insert 3505} 3506 3507define amdgpu_ps <7 x float> @dyn_insertelement_v7f32_s_v_v(<7 x float> inreg %vec, float %val, i32 %idx) { 3508; GPRIDX-LABEL: dyn_insertelement_v7f32_s_v_v: 3509; GPRIDX: ; %bb.0: ; %entry 3510; GPRIDX-NEXT: s_mov_b32 s0, s2 3511; GPRIDX-NEXT: s_mov_b32 s1, s3 3512; GPRIDX-NEXT: s_mov_b32 s2, s4 3513; GPRIDX-NEXT: s_mov_b32 s3, s5 3514; GPRIDX-NEXT: s_mov_b32 s4, s6 3515; GPRIDX-NEXT: s_mov_b32 s5, s7 3516; GPRIDX-NEXT: s_mov_b32 s6, s8 3517; GPRIDX-NEXT: v_mov_b32_e32 v15, s7 3518; GPRIDX-NEXT: v_mov_b32_e32 v8, s0 3519; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 3520; GPRIDX-NEXT: v_cndmask_b32_e32 v8, v8, v0, vcc 3521; GPRIDX-NEXT: v_mov_b32_e32 v9, s1 3522; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 3523; GPRIDX-NEXT: v_cndmask_b32_e32 v7, v9, v0, vcc 3524; GPRIDX-NEXT: v_mov_b32_e32 v10, s2 3525; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v1 3526; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v10, v0, vcc 3527; GPRIDX-NEXT: v_mov_b32_e32 v11, s3 3528; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v1 3529; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v11, v0, vcc 3530; GPRIDX-NEXT: v_mov_b32_e32 v12, s4 3531; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v1 3532; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v12, v0, vcc 3533; GPRIDX-NEXT: v_mov_b32_e32 v13, s5 3534; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v1 3535; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v13, v0, vcc 3536; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v1 3537; GPRIDX-NEXT: v_mov_b32_e32 v14, s6 3538; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v14, v0, vcc 3539; GPRIDX-NEXT: v_mov_b32_e32 v0, v8 3540; GPRIDX-NEXT: v_mov_b32_e32 v1, v7 3541; GPRIDX-NEXT: ; return to shader part epilog 3542; 3543; MOVREL-LABEL: dyn_insertelement_v7f32_s_v_v: 3544; MOVREL: ; %bb.0: ; %entry 3545; MOVREL-NEXT: s_mov_b32 s0, s2 3546; MOVREL-NEXT: s_mov_b32 s1, s3 3547; MOVREL-NEXT: s_mov_b32 s2, s4 3548; MOVREL-NEXT: s_mov_b32 s3, s5 3549; MOVREL-NEXT: s_mov_b32 s4, s6 3550; MOVREL-NEXT: s_mov_b32 s5, s7 3551; MOVREL-NEXT: s_mov_b32 s6, s8 3552; MOVREL-NEXT: v_mov_b32_e32 v16, s7 3553; MOVREL-NEXT: v_mov_b32_e32 v9, s0 3554; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 3555; MOVREL-NEXT: v_mov_b32_e32 v10, s1 3556; MOVREL-NEXT: v_mov_b32_e32 v11, s2 3557; MOVREL-NEXT: v_mov_b32_e32 v12, s3 3558; MOVREL-NEXT: v_mov_b32_e32 v13, s4 3559; MOVREL-NEXT: v_cndmask_b32_e32 v8, v9, v0, vcc_lo 3560; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1 3561; MOVREL-NEXT: v_mov_b32_e32 v14, s5 3562; MOVREL-NEXT: v_mov_b32_e32 v15, s6 3563; MOVREL-NEXT: v_cndmask_b32_e32 v7, v10, v0, vcc_lo 3564; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v1 3565; MOVREL-NEXT: v_cndmask_b32_e32 v2, v11, v0, vcc_lo 3566; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v1 3567; MOVREL-NEXT: v_cndmask_b32_e32 v3, v12, v0, vcc_lo 3568; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v1 3569; MOVREL-NEXT: v_cndmask_b32_e32 v4, v13, v0, vcc_lo 3570; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v1 3571; MOVREL-NEXT: v_cndmask_b32_e32 v5, v14, v0, vcc_lo 3572; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v1 3573; MOVREL-NEXT: v_mov_b32_e32 v1, v7 3574; MOVREL-NEXT: v_cndmask_b32_e32 v6, v15, v0, vcc_lo 3575; MOVREL-NEXT: v_mov_b32_e32 v0, v8 3576; MOVREL-NEXT: ; return to shader part epilog 3577entry: 3578 %insert = insertelement <7 x float> %vec, float %val, i32 %idx 3579 ret <7 x float> %insert 3580} 3581 3582define amdgpu_ps <7 x float> @dyn_insertelement_v7f32_v_v_s(<7 x float> %vec, float %val, i32 inreg %idx) { 3583; GPRIDX-LABEL: dyn_insertelement_v7f32_v_v_s: 3584; GPRIDX: ; %bb.0: ; %entry 3585; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 0 3586; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc 3587; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 1 3588; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 3589; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 2 3590; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc 3591; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 3 3592; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc 3593; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 4 3594; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc 3595; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 5 3596; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc 3597; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 6 3598; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v6, v7, vcc 3599; GPRIDX-NEXT: ; return to shader part epilog 3600; 3601; MOVREL-LABEL: dyn_insertelement_v7f32_v_v_s: 3602; MOVREL: ; %bb.0: ; %entry 3603; MOVREL-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 0 3604; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc_lo 3605; MOVREL-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 1 3606; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc_lo 3607; MOVREL-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 2 3608; MOVREL-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc_lo 3609; MOVREL-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 3 3610; MOVREL-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc_lo 3611; MOVREL-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 4 3612; MOVREL-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc_lo 3613; MOVREL-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 5 3614; MOVREL-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc_lo 3615; MOVREL-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 6 3616; MOVREL-NEXT: v_cndmask_b32_e32 v6, v6, v7, vcc_lo 3617; MOVREL-NEXT: ; return to shader part epilog 3618entry: 3619 %insert = insertelement <7 x float> %vec, float %val, i32 %idx 3620 ret <7 x float> %insert 3621} 3622 3623define amdgpu_ps <7 x float> @dyn_insertelement_v7f32_v_v_v(<7 x float> %vec, float %val, i32 %idx) { 3624; GPRIDX-LABEL: dyn_insertelement_v7f32_v_v_v: 3625; GPRIDX: ; %bb.0: ; %entry 3626; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v8 3627; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc 3628; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8 3629; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 3630; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 2, v8 3631; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc 3632; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 3, v8 3633; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc 3634; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 4, v8 3635; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc 3636; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 5, v8 3637; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc 3638; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 6, v8 3639; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v6, v7, vcc 3640; GPRIDX-NEXT: ; return to shader part epilog 3641; 3642; MOVREL-LABEL: dyn_insertelement_v7f32_v_v_v: 3643; MOVREL: ; %bb.0: ; %entry 3644; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v8 3645; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc_lo 3646; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v8 3647; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc_lo 3648; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v8 3649; MOVREL-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc_lo 3650; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v8 3651; MOVREL-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc_lo 3652; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v8 3653; MOVREL-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc_lo 3654; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v8 3655; MOVREL-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc_lo 3656; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v8 3657; MOVREL-NEXT: v_cndmask_b32_e32 v6, v6, v7, vcc_lo 3658; MOVREL-NEXT: ; return to shader part epilog 3659entry: 3660 %insert = insertelement <7 x float> %vec, float %val, i32 %idx 3661 ret <7 x float> %insert 3662} 3663 3664define amdgpu_ps <7 x double> @dyn_insertelement_v7f64_s_s_s(<7 x double> inreg %vec, double inreg %val, i32 inreg %idx) { 3665; GPRIDX-LABEL: dyn_insertelement_v7f64_s_s_s: 3666; GPRIDX: ; %bb.0: ; %entry 3667; GPRIDX-NEXT: s_mov_b32 s0, s2 3668; GPRIDX-NEXT: s_mov_b32 s1, s3 3669; GPRIDX-NEXT: s_mov_b32 s2, s4 3670; GPRIDX-NEXT: s_mov_b32 s3, s5 3671; GPRIDX-NEXT: s_mov_b32 s4, s6 3672; GPRIDX-NEXT: s_mov_b32 s5, s7 3673; GPRIDX-NEXT: s_mov_b32 s6, s8 3674; GPRIDX-NEXT: s_mov_b32 s7, s9 3675; GPRIDX-NEXT: s_mov_b32 s8, s10 3676; GPRIDX-NEXT: s_mov_b32 s9, s11 3677; GPRIDX-NEXT: s_mov_b32 s10, s12 3678; GPRIDX-NEXT: s_mov_b32 s11, s13 3679; GPRIDX-NEXT: s_mov_b32 s12, s14 3680; GPRIDX-NEXT: s_mov_b32 s13, s15 3681; GPRIDX-NEXT: s_mov_b32 m0, s18 3682; GPRIDX-NEXT: s_nop 0 3683; GPRIDX-NEXT: s_movreld_b64 s[0:1], s[16:17] 3684; GPRIDX-NEXT: ; return to shader part epilog 3685; 3686; MOVREL-LABEL: dyn_insertelement_v7f64_s_s_s: 3687; MOVREL: ; %bb.0: ; %entry 3688; MOVREL-NEXT: s_mov_b32 s0, s2 3689; MOVREL-NEXT: s_mov_b32 s1, s3 3690; MOVREL-NEXT: s_mov_b32 m0, s18 3691; MOVREL-NEXT: s_mov_b32 s2, s4 3692; MOVREL-NEXT: s_mov_b32 s3, s5 3693; MOVREL-NEXT: s_mov_b32 s4, s6 3694; MOVREL-NEXT: s_mov_b32 s5, s7 3695; MOVREL-NEXT: s_mov_b32 s6, s8 3696; MOVREL-NEXT: s_mov_b32 s7, s9 3697; MOVREL-NEXT: s_mov_b32 s8, s10 3698; MOVREL-NEXT: s_mov_b32 s9, s11 3699; MOVREL-NEXT: s_mov_b32 s10, s12 3700; MOVREL-NEXT: s_mov_b32 s11, s13 3701; MOVREL-NEXT: s_mov_b32 s12, s14 3702; MOVREL-NEXT: s_mov_b32 s13, s15 3703; MOVREL-NEXT: s_movreld_b64 s[0:1], s[16:17] 3704; MOVREL-NEXT: ; return to shader part epilog 3705entry: 3706 %insert = insertelement <7 x double> %vec, double %val, i32 %idx 3707 ret <7 x double> %insert 3708} 3709 3710define amdgpu_ps <7 x double> @dyn_insertelement_v7f64_s_v_s(<7 x double> inreg %vec, double %val, i32 inreg %idx) { 3711; GPRIDX-LABEL: dyn_insertelement_v7f64_s_v_s: 3712; GPRIDX: ; %bb.0: ; %entry 3713; GPRIDX-NEXT: s_mov_b32 s0, s2 3714; GPRIDX-NEXT: s_mov_b32 s1, s3 3715; GPRIDX-NEXT: s_mov_b32 s2, s4 3716; GPRIDX-NEXT: s_mov_b32 s3, s5 3717; GPRIDX-NEXT: s_mov_b32 s4, s6 3718; GPRIDX-NEXT: s_mov_b32 s5, s7 3719; GPRIDX-NEXT: s_mov_b32 s6, s8 3720; GPRIDX-NEXT: s_mov_b32 s7, s9 3721; GPRIDX-NEXT: s_mov_b32 s8, s10 3722; GPRIDX-NEXT: s_mov_b32 s9, s11 3723; GPRIDX-NEXT: s_mov_b32 s10, s12 3724; GPRIDX-NEXT: s_mov_b32 s11, s13 3725; GPRIDX-NEXT: s_mov_b32 s12, s14 3726; GPRIDX-NEXT: s_mov_b32 s13, s15 3727; GPRIDX-NEXT: v_mov_b32_e32 v17, s15 3728; GPRIDX-NEXT: v_mov_b32_e32 v16, s14 3729; GPRIDX-NEXT: v_mov_b32_e32 v15, s13 3730; GPRIDX-NEXT: v_mov_b32_e32 v14, s12 3731; GPRIDX-NEXT: v_mov_b32_e32 v13, s11 3732; GPRIDX-NEXT: v_mov_b32_e32 v12, s10 3733; GPRIDX-NEXT: v_mov_b32_e32 v11, s9 3734; GPRIDX-NEXT: v_mov_b32_e32 v10, s8 3735; GPRIDX-NEXT: v_mov_b32_e32 v9, s7 3736; GPRIDX-NEXT: v_mov_b32_e32 v8, s6 3737; GPRIDX-NEXT: v_mov_b32_e32 v7, s5 3738; GPRIDX-NEXT: v_mov_b32_e32 v6, s4 3739; GPRIDX-NEXT: v_mov_b32_e32 v5, s3 3740; GPRIDX-NEXT: v_mov_b32_e32 v4, s2 3741; GPRIDX-NEXT: v_mov_b32_e32 v3, s1 3742; GPRIDX-NEXT: v_mov_b32_e32 v2, s0 3743; GPRIDX-NEXT: s_lshl_b32 s0, s16, 1 3744; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(DST) 3745; GPRIDX-NEXT: v_mov_b32_e32 v2, v0 3746; GPRIDX-NEXT: v_mov_b32_e32 v3, v1 3747; GPRIDX-NEXT: s_set_gpr_idx_off 3748; GPRIDX-NEXT: v_readfirstlane_b32 s0, v2 3749; GPRIDX-NEXT: v_readfirstlane_b32 s1, v3 3750; GPRIDX-NEXT: v_readfirstlane_b32 s2, v4 3751; GPRIDX-NEXT: v_readfirstlane_b32 s3, v5 3752; GPRIDX-NEXT: v_readfirstlane_b32 s4, v6 3753; GPRIDX-NEXT: v_readfirstlane_b32 s5, v7 3754; GPRIDX-NEXT: v_readfirstlane_b32 s6, v8 3755; GPRIDX-NEXT: v_readfirstlane_b32 s7, v9 3756; GPRIDX-NEXT: v_readfirstlane_b32 s8, v10 3757; GPRIDX-NEXT: v_readfirstlane_b32 s9, v11 3758; GPRIDX-NEXT: v_readfirstlane_b32 s10, v12 3759; GPRIDX-NEXT: v_readfirstlane_b32 s11, v13 3760; GPRIDX-NEXT: v_readfirstlane_b32 s12, v14 3761; GPRIDX-NEXT: v_readfirstlane_b32 s13, v15 3762; GPRIDX-NEXT: ; return to shader part epilog 3763; 3764; MOVREL-LABEL: dyn_insertelement_v7f64_s_v_s: 3765; MOVREL: ; %bb.0: ; %entry 3766; MOVREL-NEXT: s_mov_b32 s0, s2 3767; MOVREL-NEXT: s_mov_b32 s1, s3 3768; MOVREL-NEXT: s_mov_b32 s2, s4 3769; MOVREL-NEXT: s_mov_b32 s3, s5 3770; MOVREL-NEXT: s_mov_b32 s4, s6 3771; MOVREL-NEXT: s_mov_b32 s5, s7 3772; MOVREL-NEXT: s_mov_b32 s6, s8 3773; MOVREL-NEXT: s_mov_b32 s7, s9 3774; MOVREL-NEXT: s_mov_b32 s8, s10 3775; MOVREL-NEXT: s_mov_b32 s9, s11 3776; MOVREL-NEXT: s_mov_b32 s10, s12 3777; MOVREL-NEXT: s_mov_b32 s11, s13 3778; MOVREL-NEXT: s_mov_b32 s12, s14 3779; MOVREL-NEXT: s_mov_b32 s13, s15 3780; MOVREL-NEXT: v_mov_b32_e32 v17, s15 3781; MOVREL-NEXT: v_mov_b32_e32 v2, s0 3782; MOVREL-NEXT: s_lshl_b32 m0, s16, 1 3783; MOVREL-NEXT: v_mov_b32_e32 v16, s14 3784; MOVREL-NEXT: v_mov_b32_e32 v15, s13 3785; MOVREL-NEXT: v_mov_b32_e32 v14, s12 3786; MOVREL-NEXT: v_mov_b32_e32 v13, s11 3787; MOVREL-NEXT: v_mov_b32_e32 v12, s10 3788; MOVREL-NEXT: v_mov_b32_e32 v11, s9 3789; MOVREL-NEXT: v_mov_b32_e32 v10, s8 3790; MOVREL-NEXT: v_mov_b32_e32 v9, s7 3791; MOVREL-NEXT: v_mov_b32_e32 v8, s6 3792; MOVREL-NEXT: v_mov_b32_e32 v7, s5 3793; MOVREL-NEXT: v_mov_b32_e32 v6, s4 3794; MOVREL-NEXT: v_mov_b32_e32 v5, s3 3795; MOVREL-NEXT: v_mov_b32_e32 v4, s2 3796; MOVREL-NEXT: v_mov_b32_e32 v3, s1 3797; MOVREL-NEXT: v_movreld_b32_e32 v2, v0 3798; MOVREL-NEXT: v_movreld_b32_e32 v3, v1 3799; MOVREL-NEXT: v_readfirstlane_b32 s0, v2 3800; MOVREL-NEXT: v_readfirstlane_b32 s1, v3 3801; MOVREL-NEXT: v_readfirstlane_b32 s2, v4 3802; MOVREL-NEXT: v_readfirstlane_b32 s3, v5 3803; MOVREL-NEXT: v_readfirstlane_b32 s4, v6 3804; MOVREL-NEXT: v_readfirstlane_b32 s5, v7 3805; MOVREL-NEXT: v_readfirstlane_b32 s6, v8 3806; MOVREL-NEXT: v_readfirstlane_b32 s7, v9 3807; MOVREL-NEXT: v_readfirstlane_b32 s8, v10 3808; MOVREL-NEXT: v_readfirstlane_b32 s9, v11 3809; MOVREL-NEXT: v_readfirstlane_b32 s10, v12 3810; MOVREL-NEXT: v_readfirstlane_b32 s11, v13 3811; MOVREL-NEXT: v_readfirstlane_b32 s12, v14 3812; MOVREL-NEXT: v_readfirstlane_b32 s13, v15 3813; MOVREL-NEXT: ; return to shader part epilog 3814entry: 3815 %insert = insertelement <7 x double> %vec, double %val, i32 %idx 3816 ret <7 x double> %insert 3817} 3818 3819define amdgpu_ps <7 x double> @dyn_insertelement_v7f64_s_v_v(<7 x double> inreg %vec, double %val, i32 %idx) { 3820; GPRIDX-LABEL: dyn_insertelement_v7f64_s_v_v: 3821; GPRIDX: ; %bb.0: ; %entry 3822; GPRIDX-NEXT: s_mov_b32 s0, s2 3823; GPRIDX-NEXT: s_mov_b32 s1, s3 3824; GPRIDX-NEXT: s_mov_b32 s2, s4 3825; GPRIDX-NEXT: s_mov_b32 s3, s5 3826; GPRIDX-NEXT: s_mov_b32 s4, s6 3827; GPRIDX-NEXT: s_mov_b32 s5, s7 3828; GPRIDX-NEXT: s_mov_b32 s6, s8 3829; GPRIDX-NEXT: s_mov_b32 s7, s9 3830; GPRIDX-NEXT: s_mov_b32 s8, s10 3831; GPRIDX-NEXT: s_mov_b32 s9, s11 3832; GPRIDX-NEXT: s_mov_b32 s10, s12 3833; GPRIDX-NEXT: s_mov_b32 s11, s13 3834; GPRIDX-NEXT: s_mov_b32 s12, s14 3835; GPRIDX-NEXT: s_mov_b32 s13, s15 3836; GPRIDX-NEXT: v_mov_b32_e32 v18, s15 3837; GPRIDX-NEXT: v_mov_b32_e32 v17, s14 3838; GPRIDX-NEXT: v_mov_b32_e32 v16, s13 3839; GPRIDX-NEXT: v_mov_b32_e32 v15, s12 3840; GPRIDX-NEXT: v_mov_b32_e32 v14, s11 3841; GPRIDX-NEXT: v_mov_b32_e32 v13, s10 3842; GPRIDX-NEXT: v_mov_b32_e32 v12, s9 3843; GPRIDX-NEXT: v_mov_b32_e32 v11, s8 3844; GPRIDX-NEXT: v_mov_b32_e32 v10, s7 3845; GPRIDX-NEXT: v_mov_b32_e32 v9, s6 3846; GPRIDX-NEXT: v_mov_b32_e32 v8, s5 3847; GPRIDX-NEXT: v_mov_b32_e32 v7, s4 3848; GPRIDX-NEXT: v_mov_b32_e32 v6, s3 3849; GPRIDX-NEXT: v_mov_b32_e32 v5, s2 3850; GPRIDX-NEXT: v_mov_b32_e32 v4, s1 3851; GPRIDX-NEXT: v_mov_b32_e32 v3, s0 3852; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 3853; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[0:1], 2, v2 3854; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[2:3], 3, v2 3855; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[4:5], 4, v2 3856; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[10:11], 1, v2 3857; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[6:7], 5, v2 3858; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[8:9], 6, v2 3859; GPRIDX-NEXT: v_cndmask_b32_e64 v2, v5, v0, s[10:11] 3860; GPRIDX-NEXT: v_cndmask_b32_e64 v5, v7, v0, s[0:1] 3861; GPRIDX-NEXT: v_cndmask_b32_e64 v7, v9, v0, s[2:3] 3862; GPRIDX-NEXT: v_cndmask_b32_e64 v9, v11, v0, s[4:5] 3863; GPRIDX-NEXT: v_cndmask_b32_e64 v11, v13, v0, s[6:7] 3864; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v0, vcc 3865; GPRIDX-NEXT: v_cndmask_b32_e64 v0, v15, v0, s[8:9] 3866; GPRIDX-NEXT: v_cndmask_b32_e64 v6, v6, v1, s[10:11] 3867; GPRIDX-NEXT: v_cndmask_b32_e64 v10, v10, v1, s[2:3] 3868; GPRIDX-NEXT: v_cndmask_b32_e64 v13, v14, v1, s[6:7] 3869; GPRIDX-NEXT: v_cndmask_b32_e64 v8, v8, v1, s[0:1] 3870; GPRIDX-NEXT: v_cndmask_b32_e64 v12, v12, v1, s[4:5] 3871; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v1, vcc 3872; GPRIDX-NEXT: v_cndmask_b32_e64 v1, v16, v1, s[8:9] 3873; GPRIDX-NEXT: v_readfirstlane_b32 s0, v3 3874; GPRIDX-NEXT: v_readfirstlane_b32 s1, v4 3875; GPRIDX-NEXT: v_readfirstlane_b32 s2, v2 3876; GPRIDX-NEXT: v_readfirstlane_b32 s3, v6 3877; GPRIDX-NEXT: v_readfirstlane_b32 s4, v5 3878; GPRIDX-NEXT: v_readfirstlane_b32 s5, v8 3879; GPRIDX-NEXT: v_readfirstlane_b32 s6, v7 3880; GPRIDX-NEXT: v_readfirstlane_b32 s7, v10 3881; GPRIDX-NEXT: v_readfirstlane_b32 s8, v9 3882; GPRIDX-NEXT: v_readfirstlane_b32 s9, v12 3883; GPRIDX-NEXT: v_readfirstlane_b32 s10, v11 3884; GPRIDX-NEXT: v_readfirstlane_b32 s11, v13 3885; GPRIDX-NEXT: v_readfirstlane_b32 s12, v0 3886; GPRIDX-NEXT: v_readfirstlane_b32 s13, v1 3887; GPRIDX-NEXT: ; return to shader part epilog 3888; 3889; MOVREL-LABEL: dyn_insertelement_v7f64_s_v_v: 3890; MOVREL: ; %bb.0: ; %entry 3891; MOVREL-NEXT: s_mov_b32 s0, s2 3892; MOVREL-NEXT: s_mov_b32 s1, s3 3893; MOVREL-NEXT: s_mov_b32 s2, s4 3894; MOVREL-NEXT: s_mov_b32 s3, s5 3895; MOVREL-NEXT: s_mov_b32 s4, s6 3896; MOVREL-NEXT: s_mov_b32 s5, s7 3897; MOVREL-NEXT: s_mov_b32 s6, s8 3898; MOVREL-NEXT: s_mov_b32 s7, s9 3899; MOVREL-NEXT: s_mov_b32 s8, s10 3900; MOVREL-NEXT: s_mov_b32 s9, s11 3901; MOVREL-NEXT: s_mov_b32 s10, s12 3902; MOVREL-NEXT: s_mov_b32 s11, s13 3903; MOVREL-NEXT: s_mov_b32 s12, s14 3904; MOVREL-NEXT: s_mov_b32 s13, s15 3905; MOVREL-NEXT: v_mov_b32_e32 v18, s15 3906; MOVREL-NEXT: v_mov_b32_e32 v17, s14 3907; MOVREL-NEXT: v_mov_b32_e32 v16, s13 3908; MOVREL-NEXT: v_mov_b32_e32 v15, s12 3909; MOVREL-NEXT: v_mov_b32_e32 v14, s11 3910; MOVREL-NEXT: v_mov_b32_e32 v13, s10 3911; MOVREL-NEXT: v_mov_b32_e32 v12, s9 3912; MOVREL-NEXT: v_mov_b32_e32 v11, s8 3913; MOVREL-NEXT: v_mov_b32_e32 v10, s7 3914; MOVREL-NEXT: v_mov_b32_e32 v9, s6 3915; MOVREL-NEXT: v_mov_b32_e32 v8, s5 3916; MOVREL-NEXT: v_mov_b32_e32 v7, s4 3917; MOVREL-NEXT: v_mov_b32_e32 v6, s3 3918; MOVREL-NEXT: v_mov_b32_e32 v5, s2 3919; MOVREL-NEXT: v_mov_b32_e32 v4, s1 3920; MOVREL-NEXT: v_mov_b32_e32 v3, s0 3921; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v2 3922; MOVREL-NEXT: v_cmp_eq_u32_e64 s0, 1, v2 3923; MOVREL-NEXT: v_cmp_eq_u32_e64 s1, 6, v2 3924; MOVREL-NEXT: v_cndmask_b32_e32 v3, v3, v0, vcc_lo 3925; MOVREL-NEXT: v_cndmask_b32_e32 v4, v4, v1, vcc_lo 3926; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v2 3927; MOVREL-NEXT: v_cndmask_b32_e64 v5, v5, v0, s0 3928; MOVREL-NEXT: v_cndmask_b32_e64 v6, v6, v1, s0 3929; MOVREL-NEXT: v_cmp_eq_u32_e64 s0, 3, v2 3930; MOVREL-NEXT: v_cndmask_b32_e32 v7, v7, v0, vcc_lo 3931; MOVREL-NEXT: v_cndmask_b32_e32 v8, v8, v1, vcc_lo 3932; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v2 3933; MOVREL-NEXT: v_cndmask_b32_e64 v9, v9, v0, s0 3934; MOVREL-NEXT: v_cndmask_b32_e64 v10, v10, v1, s0 3935; MOVREL-NEXT: v_cmp_eq_u32_e64 s0, 5, v2 3936; MOVREL-NEXT: v_readfirstlane_b32 s2, v5 3937; MOVREL-NEXT: v_cndmask_b32_e32 v2, v12, v1, vcc_lo 3938; MOVREL-NEXT: v_cndmask_b32_e32 v11, v11, v0, vcc_lo 3939; MOVREL-NEXT: v_readfirstlane_b32 s3, v6 3940; MOVREL-NEXT: v_cndmask_b32_e64 v12, v13, v0, s0 3941; MOVREL-NEXT: v_cndmask_b32_e64 v13, v14, v1, s0 3942; MOVREL-NEXT: v_cndmask_b32_e64 v0, v15, v0, s1 3943; MOVREL-NEXT: v_cndmask_b32_e64 v1, v16, v1, s1 3944; MOVREL-NEXT: v_readfirstlane_b32 s0, v3 3945; MOVREL-NEXT: v_readfirstlane_b32 s1, v4 3946; MOVREL-NEXT: v_readfirstlane_b32 s4, v7 3947; MOVREL-NEXT: v_readfirstlane_b32 s5, v8 3948; MOVREL-NEXT: v_readfirstlane_b32 s6, v9 3949; MOVREL-NEXT: v_readfirstlane_b32 s7, v10 3950; MOVREL-NEXT: v_readfirstlane_b32 s8, v11 3951; MOVREL-NEXT: v_readfirstlane_b32 s9, v2 3952; MOVREL-NEXT: v_readfirstlane_b32 s10, v12 3953; MOVREL-NEXT: v_readfirstlane_b32 s11, v13 3954; MOVREL-NEXT: v_readfirstlane_b32 s12, v0 3955; MOVREL-NEXT: v_readfirstlane_b32 s13, v1 3956; MOVREL-NEXT: ; return to shader part epilog 3957entry: 3958 %insert = insertelement <7 x double> %vec, double %val, i32 %idx 3959 ret <7 x double> %insert 3960} 3961 3962define amdgpu_ps <7 x double> @dyn_insertelement_v7f64_v_v_s(<7 x double> %vec, double %val, i32 inreg %idx) { 3963; GPRIDX-LABEL: dyn_insertelement_v7f64_v_v_s: 3964; GPRIDX: ; %bb.0: ; %entry 3965; GPRIDX-NEXT: s_lshl_b32 s0, s2, 1 3966; GPRIDX-NEXT: v_mov_b32_e32 v16, v15 3967; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(DST) 3968; GPRIDX-NEXT: v_mov_b32_e32 v0, v14 3969; GPRIDX-NEXT: v_mov_b32_e32 v1, v16 3970; GPRIDX-NEXT: s_set_gpr_idx_off 3971; GPRIDX-NEXT: v_readfirstlane_b32 s0, v0 3972; GPRIDX-NEXT: v_readfirstlane_b32 s1, v1 3973; GPRIDX-NEXT: v_readfirstlane_b32 s2, v2 3974; GPRIDX-NEXT: v_readfirstlane_b32 s3, v3 3975; GPRIDX-NEXT: v_readfirstlane_b32 s4, v4 3976; GPRIDX-NEXT: v_readfirstlane_b32 s5, v5 3977; GPRIDX-NEXT: v_readfirstlane_b32 s6, v6 3978; GPRIDX-NEXT: v_readfirstlane_b32 s7, v7 3979; GPRIDX-NEXT: v_readfirstlane_b32 s8, v8 3980; GPRIDX-NEXT: v_readfirstlane_b32 s9, v9 3981; GPRIDX-NEXT: v_readfirstlane_b32 s10, v10 3982; GPRIDX-NEXT: v_readfirstlane_b32 s11, v11 3983; GPRIDX-NEXT: v_readfirstlane_b32 s12, v12 3984; GPRIDX-NEXT: v_readfirstlane_b32 s13, v13 3985; GPRIDX-NEXT: ; return to shader part epilog 3986; 3987; MOVREL-LABEL: dyn_insertelement_v7f64_v_v_s: 3988; MOVREL: ; %bb.0: ; %entry 3989; MOVREL-NEXT: v_mov_b32_e32 v16, v15 3990; MOVREL-NEXT: s_lshl_b32 m0, s2, 1 3991; MOVREL-NEXT: v_movreld_b32_e32 v0, v14 3992; MOVREL-NEXT: v_movreld_b32_e32 v1, v16 3993; MOVREL-NEXT: v_readfirstlane_b32 s0, v0 3994; MOVREL-NEXT: v_readfirstlane_b32 s1, v1 3995; MOVREL-NEXT: v_readfirstlane_b32 s2, v2 3996; MOVREL-NEXT: v_readfirstlane_b32 s3, v3 3997; MOVREL-NEXT: v_readfirstlane_b32 s4, v4 3998; MOVREL-NEXT: v_readfirstlane_b32 s5, v5 3999; MOVREL-NEXT: v_readfirstlane_b32 s6, v6 4000; MOVREL-NEXT: v_readfirstlane_b32 s7, v7 4001; MOVREL-NEXT: v_readfirstlane_b32 s8, v8 4002; MOVREL-NEXT: v_readfirstlane_b32 s9, v9 4003; MOVREL-NEXT: v_readfirstlane_b32 s10, v10 4004; MOVREL-NEXT: v_readfirstlane_b32 s11, v11 4005; MOVREL-NEXT: v_readfirstlane_b32 s12, v12 4006; MOVREL-NEXT: v_readfirstlane_b32 s13, v13 4007; MOVREL-NEXT: ; return to shader part epilog 4008entry: 4009 %insert = insertelement <7 x double> %vec, double %val, i32 %idx 4010 ret <7 x double> %insert 4011} 4012 4013define amdgpu_ps <7 x double> @dyn_insertelement_v7f64_v_v_v(<7 x double> %vec, double %val, i32 %idx) { 4014; GPRIDX-LABEL: dyn_insertelement_v7f64_v_v_v: 4015; GPRIDX: ; %bb.0: ; %entry 4016; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v16 4017; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[0:1], 1, v16 4018; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[2:3], 2, v16 4019; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[4:5], 3, v16 4020; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[6:7], 4, v16 4021; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[8:9], 5, v16 4022; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[10:11], 6, v16 4023; GPRIDX-NEXT: v_cndmask_b32_e64 v12, v12, v14, s[10:11] 4024; GPRIDX-NEXT: v_cndmask_b32_e64 v13, v13, v15, s[10:11] 4025; GPRIDX-NEXT: v_cndmask_b32_e64 v10, v10, v14, s[8:9] 4026; GPRIDX-NEXT: v_cndmask_b32_e64 v11, v11, v15, s[8:9] 4027; GPRIDX-NEXT: v_cndmask_b32_e64 v8, v8, v14, s[6:7] 4028; GPRIDX-NEXT: v_cndmask_b32_e64 v9, v9, v15, s[6:7] 4029; GPRIDX-NEXT: v_cndmask_b32_e64 v6, v6, v14, s[4:5] 4030; GPRIDX-NEXT: v_cndmask_b32_e64 v7, v7, v15, s[4:5] 4031; GPRIDX-NEXT: v_cndmask_b32_e64 v4, v4, v14, s[2:3] 4032; GPRIDX-NEXT: v_cndmask_b32_e64 v5, v5, v15, s[2:3] 4033; GPRIDX-NEXT: v_cndmask_b32_e64 v2, v2, v14, s[0:1] 4034; GPRIDX-NEXT: v_cndmask_b32_e64 v3, v3, v15, s[0:1] 4035; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc 4036; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc 4037; GPRIDX-NEXT: v_readfirstlane_b32 s0, v0 4038; GPRIDX-NEXT: v_readfirstlane_b32 s1, v1 4039; GPRIDX-NEXT: v_readfirstlane_b32 s2, v2 4040; GPRIDX-NEXT: v_readfirstlane_b32 s3, v3 4041; GPRIDX-NEXT: v_readfirstlane_b32 s4, v4 4042; GPRIDX-NEXT: v_readfirstlane_b32 s5, v5 4043; GPRIDX-NEXT: v_readfirstlane_b32 s6, v6 4044; GPRIDX-NEXT: v_readfirstlane_b32 s7, v7 4045; GPRIDX-NEXT: v_readfirstlane_b32 s8, v8 4046; GPRIDX-NEXT: v_readfirstlane_b32 s9, v9 4047; GPRIDX-NEXT: v_readfirstlane_b32 s10, v10 4048; GPRIDX-NEXT: v_readfirstlane_b32 s11, v11 4049; GPRIDX-NEXT: v_readfirstlane_b32 s12, v12 4050; GPRIDX-NEXT: v_readfirstlane_b32 s13, v13 4051; GPRIDX-NEXT: ; return to shader part epilog 4052; 4053; MOVREL-LABEL: dyn_insertelement_v7f64_v_v_v: 4054; MOVREL: ; %bb.0: ; %entry 4055; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v16 4056; MOVREL-NEXT: v_cmp_eq_u32_e64 s1, 2, v16 4057; MOVREL-NEXT: v_cmp_eq_u32_e64 s2, 3, v16 4058; MOVREL-NEXT: v_cmp_eq_u32_e64 s3, 4, v16 4059; MOVREL-NEXT: v_cmp_eq_u32_e64 s4, 5, v16 4060; MOVREL-NEXT: v_cmp_eq_u32_e64 s5, 6, v16 4061; MOVREL-NEXT: v_mov_b32_e32 v19, v2 4062; MOVREL-NEXT: v_cmp_eq_u32_e64 s0, 1, v16 4063; MOVREL-NEXT: v_mov_b32_e32 v18, v3 4064; MOVREL-NEXT: v_cndmask_b32_e64 v6, v6, v14, s2 4065; MOVREL-NEXT: v_cndmask_b32_e64 v8, v8, v14, s3 4066; MOVREL-NEXT: v_cndmask_b32_e64 v10, v10, v14, s4 4067; MOVREL-NEXT: v_cndmask_b32_e64 v12, v12, v14, s5 4068; MOVREL-NEXT: v_cndmask_b32_e64 v7, v7, v15, s2 4069; MOVREL-NEXT: v_cndmask_b32_e64 v2, v19, v14, s0 4070; MOVREL-NEXT: v_cndmask_b32_e64 v9, v9, v15, s3 4071; MOVREL-NEXT: v_cndmask_b32_e64 v3, v18, v15, s0 4072; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc_lo 4073; MOVREL-NEXT: v_cndmask_b32_e64 v11, v11, v15, s4 4074; MOVREL-NEXT: v_cndmask_b32_e64 v4, v4, v14, s1 4075; MOVREL-NEXT: v_cndmask_b32_e64 v13, v13, v15, s5 4076; MOVREL-NEXT: v_cndmask_b32_e64 v5, v5, v15, s1 4077; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v15, vcc_lo 4078; MOVREL-NEXT: v_readfirstlane_b32 s0, v0 4079; MOVREL-NEXT: v_readfirstlane_b32 s2, v2 4080; MOVREL-NEXT: v_readfirstlane_b32 s3, v3 4081; MOVREL-NEXT: v_readfirstlane_b32 s4, v4 4082; MOVREL-NEXT: v_readfirstlane_b32 s1, v1 4083; MOVREL-NEXT: v_readfirstlane_b32 s5, v5 4084; MOVREL-NEXT: v_readfirstlane_b32 s6, v6 4085; MOVREL-NEXT: v_readfirstlane_b32 s7, v7 4086; MOVREL-NEXT: v_readfirstlane_b32 s8, v8 4087; MOVREL-NEXT: v_readfirstlane_b32 s9, v9 4088; MOVREL-NEXT: v_readfirstlane_b32 s10, v10 4089; MOVREL-NEXT: v_readfirstlane_b32 s11, v11 4090; MOVREL-NEXT: v_readfirstlane_b32 s12, v12 4091; MOVREL-NEXT: v_readfirstlane_b32 s13, v13 4092; MOVREL-NEXT: ; return to shader part epilog 4093entry: 4094 %insert = insertelement <7 x double> %vec, double %val, i32 %idx 4095 ret <7 x double> %insert 4096} 4097 4098define amdgpu_ps <5 x double> @dyn_insertelement_v5f64_s_s_s(<5 x double> inreg %vec, double inreg %val, i32 inreg %idx) { 4099; GPRIDX-LABEL: dyn_insertelement_v5f64_s_s_s: 4100; GPRIDX: ; %bb.0: ; %entry 4101; GPRIDX-NEXT: s_cmp_eq_u32 s14, 0 4102; GPRIDX-NEXT: s_cselect_b64 s[0:1], s[12:13], s[2:3] 4103; GPRIDX-NEXT: s_cmp_eq_u32 s14, 1 4104; GPRIDX-NEXT: s_cselect_b64 s[2:3], s[12:13], s[4:5] 4105; GPRIDX-NEXT: s_cmp_eq_u32 s14, 2 4106; GPRIDX-NEXT: s_cselect_b64 s[4:5], s[12:13], s[6:7] 4107; GPRIDX-NEXT: s_cmp_eq_u32 s14, 3 4108; GPRIDX-NEXT: s_cselect_b64 s[6:7], s[12:13], s[8:9] 4109; GPRIDX-NEXT: s_cmp_eq_u32 s14, 4 4110; GPRIDX-NEXT: s_cselect_b64 s[8:9], s[12:13], s[10:11] 4111; GPRIDX-NEXT: ; return to shader part epilog 4112; 4113; MOVREL-LABEL: dyn_insertelement_v5f64_s_s_s: 4114; MOVREL: ; %bb.0: ; %entry 4115; MOVREL-NEXT: s_cmp_eq_u32 s14, 0 4116; MOVREL-NEXT: s_cselect_b64 s[0:1], s[12:13], s[2:3] 4117; MOVREL-NEXT: s_cmp_eq_u32 s14, 1 4118; MOVREL-NEXT: s_cselect_b64 s[2:3], s[12:13], s[4:5] 4119; MOVREL-NEXT: s_cmp_eq_u32 s14, 2 4120; MOVREL-NEXT: s_cselect_b64 s[4:5], s[12:13], s[6:7] 4121; MOVREL-NEXT: s_cmp_eq_u32 s14, 3 4122; MOVREL-NEXT: s_cselect_b64 s[6:7], s[12:13], s[8:9] 4123; MOVREL-NEXT: s_cmp_eq_u32 s14, 4 4124; MOVREL-NEXT: s_cselect_b64 s[8:9], s[12:13], s[10:11] 4125; MOVREL-NEXT: ; return to shader part epilog 4126entry: 4127 %insert = insertelement <5 x double> %vec, double %val, i32 %idx 4128 ret <5 x double> %insert 4129} 4130 4131define amdgpu_ps <5 x double> @dyn_insertelement_v5f64_s_v_s(<5 x double> inreg %vec, double %val, i32 inreg %idx) { 4132; GPRIDX-LABEL: dyn_insertelement_v5f64_s_v_s: 4133; GPRIDX: ; %bb.0: ; %entry 4134; GPRIDX-NEXT: s_mov_b32 s0, s2 4135; GPRIDX-NEXT: s_mov_b32 s1, s3 4136; GPRIDX-NEXT: s_mov_b32 s2, s4 4137; GPRIDX-NEXT: s_mov_b32 s3, s5 4138; GPRIDX-NEXT: s_mov_b32 s4, s6 4139; GPRIDX-NEXT: s_mov_b32 s5, s7 4140; GPRIDX-NEXT: s_mov_b32 s6, s8 4141; GPRIDX-NEXT: s_mov_b32 s7, s9 4142; GPRIDX-NEXT: s_mov_b32 s8, s10 4143; GPRIDX-NEXT: s_mov_b32 s9, s11 4144; GPRIDX-NEXT: v_mov_b32_e32 v17, s15 4145; GPRIDX-NEXT: v_mov_b32_e32 v16, s14 4146; GPRIDX-NEXT: v_mov_b32_e32 v15, s13 4147; GPRIDX-NEXT: v_mov_b32_e32 v14, s12 4148; GPRIDX-NEXT: v_mov_b32_e32 v13, s11 4149; GPRIDX-NEXT: v_mov_b32_e32 v12, s10 4150; GPRIDX-NEXT: v_mov_b32_e32 v11, s9 4151; GPRIDX-NEXT: v_mov_b32_e32 v10, s8 4152; GPRIDX-NEXT: v_mov_b32_e32 v9, s7 4153; GPRIDX-NEXT: v_mov_b32_e32 v8, s6 4154; GPRIDX-NEXT: v_mov_b32_e32 v7, s5 4155; GPRIDX-NEXT: v_mov_b32_e32 v6, s4 4156; GPRIDX-NEXT: v_mov_b32_e32 v5, s3 4157; GPRIDX-NEXT: v_mov_b32_e32 v4, s2 4158; GPRIDX-NEXT: v_mov_b32_e32 v3, s1 4159; GPRIDX-NEXT: v_mov_b32_e32 v2, s0 4160; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s12, 0 4161; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[0:1], s12, 1 4162; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[2:3], s12, 3 4163; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[6:7], s12, 2 4164; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[4:5], s12, 4 4165; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc 4166; GPRIDX-NEXT: v_cndmask_b32_e64 v4, v4, v0, s[0:1] 4167; GPRIDX-NEXT: v_cndmask_b32_e64 v5, v5, v1, s[0:1] 4168; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc 4169; GPRIDX-NEXT: v_cndmask_b32_e64 v6, v6, v0, s[6:7] 4170; GPRIDX-NEXT: v_cndmask_b32_e64 v8, v8, v0, s[2:3] 4171; GPRIDX-NEXT: v_cndmask_b32_e64 v9, v9, v1, s[2:3] 4172; GPRIDX-NEXT: v_cndmask_b32_e64 v7, v7, v1, s[6:7] 4173; GPRIDX-NEXT: v_cndmask_b32_e64 v0, v10, v0, s[4:5] 4174; GPRIDX-NEXT: v_cndmask_b32_e64 v1, v11, v1, s[4:5] 4175; GPRIDX-NEXT: v_readfirstlane_b32 s0, v2 4176; GPRIDX-NEXT: v_readfirstlane_b32 s1, v3 4177; GPRIDX-NEXT: v_readfirstlane_b32 s2, v4 4178; GPRIDX-NEXT: v_readfirstlane_b32 s3, v5 4179; GPRIDX-NEXT: v_readfirstlane_b32 s4, v6 4180; GPRIDX-NEXT: v_readfirstlane_b32 s5, v7 4181; GPRIDX-NEXT: v_readfirstlane_b32 s6, v8 4182; GPRIDX-NEXT: v_readfirstlane_b32 s7, v9 4183; GPRIDX-NEXT: v_readfirstlane_b32 s8, v0 4184; GPRIDX-NEXT: v_readfirstlane_b32 s9, v1 4185; GPRIDX-NEXT: ; return to shader part epilog 4186; 4187; MOVREL-LABEL: dyn_insertelement_v5f64_s_v_s: 4188; MOVREL: ; %bb.0: ; %entry 4189; MOVREL-NEXT: s_mov_b32 s0, s2 4190; MOVREL-NEXT: s_mov_b32 s1, s3 4191; MOVREL-NEXT: s_mov_b32 s2, s4 4192; MOVREL-NEXT: s_mov_b32 s3, s5 4193; MOVREL-NEXT: s_mov_b32 s4, s6 4194; MOVREL-NEXT: s_mov_b32 s5, s7 4195; MOVREL-NEXT: s_mov_b32 s6, s8 4196; MOVREL-NEXT: s_mov_b32 s7, s9 4197; MOVREL-NEXT: s_mov_b32 s8, s10 4198; MOVREL-NEXT: s_mov_b32 s9, s11 4199; MOVREL-NEXT: v_mov_b32_e32 v20, s15 4200; MOVREL-NEXT: v_mov_b32_e32 v19, s14 4201; MOVREL-NEXT: v_mov_b32_e32 v18, s13 4202; MOVREL-NEXT: v_mov_b32_e32 v17, s12 4203; MOVREL-NEXT: v_mov_b32_e32 v16, s11 4204; MOVREL-NEXT: v_mov_b32_e32 v15, s10 4205; MOVREL-NEXT: v_mov_b32_e32 v14, s9 4206; MOVREL-NEXT: v_mov_b32_e32 v13, s8 4207; MOVREL-NEXT: v_mov_b32_e32 v12, s7 4208; MOVREL-NEXT: v_mov_b32_e32 v11, s6 4209; MOVREL-NEXT: v_mov_b32_e32 v10, s5 4210; MOVREL-NEXT: v_mov_b32_e32 v9, s4 4211; MOVREL-NEXT: v_mov_b32_e32 v8, s3 4212; MOVREL-NEXT: v_mov_b32_e32 v7, s2 4213; MOVREL-NEXT: v_mov_b32_e32 v6, s1 4214; MOVREL-NEXT: v_mov_b32_e32 v5, s0 4215; MOVREL-NEXT: v_cmp_eq_u32_e64 vcc_lo, s12, 0 4216; MOVREL-NEXT: v_cmp_eq_u32_e64 s0, s12, 1 4217; MOVREL-NEXT: v_cmp_eq_u32_e64 s1, s12, 4 4218; MOVREL-NEXT: v_cndmask_b32_e32 v2, v5, v0, vcc_lo 4219; MOVREL-NEXT: v_cndmask_b32_e32 v3, v6, v1, vcc_lo 4220; MOVREL-NEXT: v_cndmask_b32_e64 v4, v7, v0, s0 4221; MOVREL-NEXT: v_cmp_eq_u32_e64 vcc_lo, s12, 2 4222; MOVREL-NEXT: v_cndmask_b32_e64 v5, v8, v1, s0 4223; MOVREL-NEXT: v_cmp_eq_u32_e64 s0, s12, 3 4224; MOVREL-NEXT: v_readfirstlane_b32 s2, v4 4225; MOVREL-NEXT: v_cndmask_b32_e32 v6, v9, v0, vcc_lo 4226; MOVREL-NEXT: v_cndmask_b32_e32 v7, v10, v1, vcc_lo 4227; MOVREL-NEXT: v_cndmask_b32_e64 v8, v11, v0, s0 4228; MOVREL-NEXT: v_cndmask_b32_e64 v9, v12, v1, s0 4229; MOVREL-NEXT: v_cndmask_b32_e64 v0, v13, v0, s1 4230; MOVREL-NEXT: v_cndmask_b32_e64 v1, v14, v1, s1 4231; MOVREL-NEXT: v_readfirstlane_b32 s0, v2 4232; MOVREL-NEXT: v_readfirstlane_b32 s1, v3 4233; MOVREL-NEXT: v_readfirstlane_b32 s3, v5 4234; MOVREL-NEXT: v_readfirstlane_b32 s4, v6 4235; MOVREL-NEXT: v_readfirstlane_b32 s5, v7 4236; MOVREL-NEXT: v_readfirstlane_b32 s6, v8 4237; MOVREL-NEXT: v_readfirstlane_b32 s7, v9 4238; MOVREL-NEXT: v_readfirstlane_b32 s8, v0 4239; MOVREL-NEXT: v_readfirstlane_b32 s9, v1 4240; MOVREL-NEXT: ; return to shader part epilog 4241entry: 4242 %insert = insertelement <5 x double> %vec, double %val, i32 %idx 4243 ret <5 x double> %insert 4244} 4245 4246define amdgpu_ps <5 x double> @dyn_insertelement_v5f64_s_v_v(<5 x double> inreg %vec, double %val, i32 %idx) { 4247; GPRIDX-LABEL: dyn_insertelement_v5f64_s_v_v: 4248; GPRIDX: ; %bb.0: ; %entry 4249; GPRIDX-NEXT: s_mov_b32 s0, s2 4250; GPRIDX-NEXT: s_mov_b32 s1, s3 4251; GPRIDX-NEXT: s_mov_b32 s2, s4 4252; GPRIDX-NEXT: s_mov_b32 s3, s5 4253; GPRIDX-NEXT: s_mov_b32 s4, s6 4254; GPRIDX-NEXT: s_mov_b32 s5, s7 4255; GPRIDX-NEXT: s_mov_b32 s6, s8 4256; GPRIDX-NEXT: s_mov_b32 s7, s9 4257; GPRIDX-NEXT: s_mov_b32 s8, s10 4258; GPRIDX-NEXT: s_mov_b32 s9, s11 4259; GPRIDX-NEXT: v_mov_b32_e32 v18, s15 4260; GPRIDX-NEXT: v_mov_b32_e32 v17, s14 4261; GPRIDX-NEXT: v_mov_b32_e32 v16, s13 4262; GPRIDX-NEXT: v_mov_b32_e32 v15, s12 4263; GPRIDX-NEXT: v_mov_b32_e32 v14, s11 4264; GPRIDX-NEXT: v_mov_b32_e32 v13, s10 4265; GPRIDX-NEXT: v_mov_b32_e32 v12, s9 4266; GPRIDX-NEXT: v_mov_b32_e32 v11, s8 4267; GPRIDX-NEXT: v_mov_b32_e32 v10, s7 4268; GPRIDX-NEXT: v_mov_b32_e32 v9, s6 4269; GPRIDX-NEXT: v_mov_b32_e32 v8, s5 4270; GPRIDX-NEXT: v_mov_b32_e32 v7, s4 4271; GPRIDX-NEXT: v_mov_b32_e32 v6, s3 4272; GPRIDX-NEXT: v_mov_b32_e32 v5, s2 4273; GPRIDX-NEXT: v_mov_b32_e32 v4, s1 4274; GPRIDX-NEXT: v_mov_b32_e32 v3, s0 4275; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 4276; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[0:1], 2, v2 4277; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[6:7], 1, v2 4278; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[2:3], 3, v2 4279; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[4:5], 4, v2 4280; GPRIDX-NEXT: v_cndmask_b32_e64 v2, v5, v0, s[6:7] 4281; GPRIDX-NEXT: v_cndmask_b32_e64 v5, v7, v0, s[0:1] 4282; GPRIDX-NEXT: v_cndmask_b32_e64 v7, v9, v0, s[2:3] 4283; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v0, vcc 4284; GPRIDX-NEXT: v_cndmask_b32_e64 v0, v11, v0, s[4:5] 4285; GPRIDX-NEXT: v_cndmask_b32_e64 v6, v6, v1, s[6:7] 4286; GPRIDX-NEXT: v_cndmask_b32_e64 v9, v10, v1, s[2:3] 4287; GPRIDX-NEXT: v_cndmask_b32_e64 v8, v8, v1, s[0:1] 4288; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v1, vcc 4289; GPRIDX-NEXT: v_cndmask_b32_e64 v1, v12, v1, s[4:5] 4290; GPRIDX-NEXT: v_readfirstlane_b32 s0, v3 4291; GPRIDX-NEXT: v_readfirstlane_b32 s1, v4 4292; GPRIDX-NEXT: v_readfirstlane_b32 s2, v2 4293; GPRIDX-NEXT: v_readfirstlane_b32 s3, v6 4294; GPRIDX-NEXT: v_readfirstlane_b32 s4, v5 4295; GPRIDX-NEXT: v_readfirstlane_b32 s5, v8 4296; GPRIDX-NEXT: v_readfirstlane_b32 s6, v7 4297; GPRIDX-NEXT: v_readfirstlane_b32 s7, v9 4298; GPRIDX-NEXT: v_readfirstlane_b32 s8, v0 4299; GPRIDX-NEXT: v_readfirstlane_b32 s9, v1 4300; GPRIDX-NEXT: ; return to shader part epilog 4301; 4302; MOVREL-LABEL: dyn_insertelement_v5f64_s_v_v: 4303; MOVREL: ; %bb.0: ; %entry 4304; MOVREL-NEXT: s_mov_b32 s0, s2 4305; MOVREL-NEXT: s_mov_b32 s1, s3 4306; MOVREL-NEXT: s_mov_b32 s2, s4 4307; MOVREL-NEXT: s_mov_b32 s3, s5 4308; MOVREL-NEXT: s_mov_b32 s4, s6 4309; MOVREL-NEXT: s_mov_b32 s5, s7 4310; MOVREL-NEXT: s_mov_b32 s6, s8 4311; MOVREL-NEXT: s_mov_b32 s7, s9 4312; MOVREL-NEXT: s_mov_b32 s8, s10 4313; MOVREL-NEXT: s_mov_b32 s9, s11 4314; MOVREL-NEXT: v_mov_b32_e32 v18, s15 4315; MOVREL-NEXT: v_mov_b32_e32 v17, s14 4316; MOVREL-NEXT: v_mov_b32_e32 v16, s13 4317; MOVREL-NEXT: v_mov_b32_e32 v15, s12 4318; MOVREL-NEXT: v_mov_b32_e32 v14, s11 4319; MOVREL-NEXT: v_mov_b32_e32 v13, s10 4320; MOVREL-NEXT: v_mov_b32_e32 v12, s9 4321; MOVREL-NEXT: v_mov_b32_e32 v11, s8 4322; MOVREL-NEXT: v_mov_b32_e32 v10, s7 4323; MOVREL-NEXT: v_mov_b32_e32 v9, s6 4324; MOVREL-NEXT: v_mov_b32_e32 v8, s5 4325; MOVREL-NEXT: v_mov_b32_e32 v7, s4 4326; MOVREL-NEXT: v_mov_b32_e32 v6, s3 4327; MOVREL-NEXT: v_mov_b32_e32 v5, s2 4328; MOVREL-NEXT: v_mov_b32_e32 v4, s1 4329; MOVREL-NEXT: v_mov_b32_e32 v3, s0 4330; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v2 4331; MOVREL-NEXT: v_cmp_eq_u32_e64 s0, 1, v2 4332; MOVREL-NEXT: v_cmp_eq_u32_e64 s1, 4, v2 4333; MOVREL-NEXT: v_cndmask_b32_e32 v3, v3, v0, vcc_lo 4334; MOVREL-NEXT: v_cndmask_b32_e32 v4, v4, v1, vcc_lo 4335; MOVREL-NEXT: v_cndmask_b32_e64 v5, v5, v0, s0 4336; MOVREL-NEXT: v_cndmask_b32_e64 v6, v6, v1, s0 4337; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v2 4338; MOVREL-NEXT: v_cmp_eq_u32_e64 s0, 3, v2 4339; MOVREL-NEXT: v_readfirstlane_b32 s2, v5 4340; MOVREL-NEXT: v_readfirstlane_b32 s3, v6 4341; MOVREL-NEXT: v_cndmask_b32_e32 v2, v8, v1, vcc_lo 4342; MOVREL-NEXT: v_cndmask_b32_e64 v8, v9, v0, s0 4343; MOVREL-NEXT: v_cndmask_b32_e64 v9, v10, v1, s0 4344; MOVREL-NEXT: v_cndmask_b32_e32 v7, v7, v0, vcc_lo 4345; MOVREL-NEXT: v_cndmask_b32_e64 v0, v11, v0, s1 4346; MOVREL-NEXT: v_cndmask_b32_e64 v1, v12, v1, s1 4347; MOVREL-NEXT: v_readfirstlane_b32 s0, v3 4348; MOVREL-NEXT: v_readfirstlane_b32 s1, v4 4349; MOVREL-NEXT: v_readfirstlane_b32 s4, v7 4350; MOVREL-NEXT: v_readfirstlane_b32 s5, v2 4351; MOVREL-NEXT: v_readfirstlane_b32 s6, v8 4352; MOVREL-NEXT: v_readfirstlane_b32 s7, v9 4353; MOVREL-NEXT: v_readfirstlane_b32 s8, v0 4354; MOVREL-NEXT: v_readfirstlane_b32 s9, v1 4355; MOVREL-NEXT: ; return to shader part epilog 4356entry: 4357 %insert = insertelement <5 x double> %vec, double %val, i32 %idx 4358 ret <5 x double> %insert 4359} 4360 4361define amdgpu_ps <5 x double> @dyn_insertelement_v5f64_v_v_s(<5 x double> %vec, double %val, i32 inreg %idx) { 4362; GPRIDX-LABEL: dyn_insertelement_v5f64_v_v_s: 4363; GPRIDX: ; %bb.0: ; %entry 4364; GPRIDX-NEXT: v_cmp_eq_u32_e64 vcc, s2, 0 4365; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[0:1], s2, 1 4366; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[8:9], s2, 2 4367; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[4:5], s2, 3 4368; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[6:7], s2, 4 4369; GPRIDX-NEXT: v_cndmask_b32_e64 v2, v2, v10, s[0:1] 4370; GPRIDX-NEXT: v_cndmask_b32_e64 v3, v3, v11, s[0:1] 4371; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc 4372; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc 4373; GPRIDX-NEXT: v_cndmask_b32_e64 v4, v4, v10, s[8:9] 4374; GPRIDX-NEXT: v_cndmask_b32_e64 v6, v6, v10, s[4:5] 4375; GPRIDX-NEXT: v_cndmask_b32_e64 v7, v7, v11, s[4:5] 4376; GPRIDX-NEXT: v_cndmask_b32_e64 v5, v5, v11, s[8:9] 4377; GPRIDX-NEXT: v_cndmask_b32_e64 v8, v8, v10, s[6:7] 4378; GPRIDX-NEXT: v_cndmask_b32_e64 v9, v9, v11, s[6:7] 4379; GPRIDX-NEXT: v_readfirstlane_b32 s0, v0 4380; GPRIDX-NEXT: v_readfirstlane_b32 s1, v1 4381; GPRIDX-NEXT: v_readfirstlane_b32 s2, v2 4382; GPRIDX-NEXT: v_readfirstlane_b32 s3, v3 4383; GPRIDX-NEXT: v_readfirstlane_b32 s4, v4 4384; GPRIDX-NEXT: v_readfirstlane_b32 s5, v5 4385; GPRIDX-NEXT: v_readfirstlane_b32 s6, v6 4386; GPRIDX-NEXT: v_readfirstlane_b32 s7, v7 4387; GPRIDX-NEXT: v_readfirstlane_b32 s8, v8 4388; GPRIDX-NEXT: v_readfirstlane_b32 s9, v9 4389; GPRIDX-NEXT: ; return to shader part epilog 4390; 4391; MOVREL-LABEL: dyn_insertelement_v5f64_v_v_s: 4392; MOVREL: ; %bb.0: ; %entry 4393; MOVREL-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 0 4394; MOVREL-NEXT: v_mov_b32_e32 v15, v2 4395; MOVREL-NEXT: v_mov_b32_e32 v14, v3 4396; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo 4397; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc_lo 4398; MOVREL-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 1 4399; MOVREL-NEXT: v_readfirstlane_b32 s0, v0 4400; MOVREL-NEXT: v_readfirstlane_b32 s1, v1 4401; MOVREL-NEXT: v_cndmask_b32_e32 v3, v14, v11, vcc_lo 4402; MOVREL-NEXT: v_cndmask_b32_e32 v2, v15, v10, vcc_lo 4403; MOVREL-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 2 4404; MOVREL-NEXT: v_readfirstlane_b32 s3, v3 4405; MOVREL-NEXT: v_cndmask_b32_e32 v4, v4, v10, vcc_lo 4406; MOVREL-NEXT: v_cndmask_b32_e32 v5, v5, v11, vcc_lo 4407; MOVREL-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 3 4408; MOVREL-NEXT: v_readfirstlane_b32 s4, v4 4409; MOVREL-NEXT: v_readfirstlane_b32 s5, v5 4410; MOVREL-NEXT: v_cndmask_b32_e32 v6, v6, v10, vcc_lo 4411; MOVREL-NEXT: v_cndmask_b32_e32 v7, v7, v11, vcc_lo 4412; MOVREL-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 4 4413; MOVREL-NEXT: v_readfirstlane_b32 s2, v2 4414; MOVREL-NEXT: v_readfirstlane_b32 s6, v6 4415; MOVREL-NEXT: v_readfirstlane_b32 s7, v7 4416; MOVREL-NEXT: v_cndmask_b32_e32 v8, v8, v10, vcc_lo 4417; MOVREL-NEXT: v_cndmask_b32_e32 v9, v9, v11, vcc_lo 4418; MOVREL-NEXT: v_readfirstlane_b32 s8, v8 4419; MOVREL-NEXT: v_readfirstlane_b32 s9, v9 4420; MOVREL-NEXT: ; return to shader part epilog 4421entry: 4422 %insert = insertelement <5 x double> %vec, double %val, i32 %idx 4423 ret <5 x double> %insert 4424} 4425 4426define amdgpu_ps <5 x double> @dyn_insertelement_v5f64_v_v_v(<5 x double> %vec, double %val, i32 %idx) { 4427; GPRIDX-LABEL: dyn_insertelement_v5f64_v_v_v: 4428; GPRIDX: ; %bb.0: ; %entry 4429; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 0, v12 4430; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[0:1], 1, v12 4431; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[2:3], 2, v12 4432; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[4:5], 3, v12 4433; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[6:7], 4, v12 4434; GPRIDX-NEXT: v_cndmask_b32_e64 v8, v8, v10, s[6:7] 4435; GPRIDX-NEXT: v_cndmask_b32_e64 v9, v9, v11, s[6:7] 4436; GPRIDX-NEXT: v_cndmask_b32_e64 v6, v6, v10, s[4:5] 4437; GPRIDX-NEXT: v_cndmask_b32_e64 v7, v7, v11, s[4:5] 4438; GPRIDX-NEXT: v_cndmask_b32_e64 v4, v4, v10, s[2:3] 4439; GPRIDX-NEXT: v_cndmask_b32_e64 v5, v5, v11, s[2:3] 4440; GPRIDX-NEXT: v_cndmask_b32_e64 v2, v2, v10, s[0:1] 4441; GPRIDX-NEXT: v_cndmask_b32_e64 v3, v3, v11, s[0:1] 4442; GPRIDX-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc 4443; GPRIDX-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc 4444; GPRIDX-NEXT: v_readfirstlane_b32 s0, v0 4445; GPRIDX-NEXT: v_readfirstlane_b32 s1, v1 4446; GPRIDX-NEXT: v_readfirstlane_b32 s2, v2 4447; GPRIDX-NEXT: v_readfirstlane_b32 s3, v3 4448; GPRIDX-NEXT: v_readfirstlane_b32 s4, v4 4449; GPRIDX-NEXT: v_readfirstlane_b32 s5, v5 4450; GPRIDX-NEXT: v_readfirstlane_b32 s6, v6 4451; GPRIDX-NEXT: v_readfirstlane_b32 s7, v7 4452; GPRIDX-NEXT: v_readfirstlane_b32 s8, v8 4453; GPRIDX-NEXT: v_readfirstlane_b32 s9, v9 4454; GPRIDX-NEXT: ; return to shader part epilog 4455; 4456; MOVREL-LABEL: dyn_insertelement_v5f64_v_v_v: 4457; MOVREL: ; %bb.0: ; %entry 4458; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v12 4459; MOVREL-NEXT: v_mov_b32_e32 v15, v2 4460; MOVREL-NEXT: v_mov_b32_e32 v14, v3 4461; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo 4462; MOVREL-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc_lo 4463; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v12 4464; MOVREL-NEXT: v_readfirstlane_b32 s0, v0 4465; MOVREL-NEXT: v_readfirstlane_b32 s1, v1 4466; MOVREL-NEXT: v_cndmask_b32_e32 v2, v15, v10, vcc_lo 4467; MOVREL-NEXT: v_cndmask_b32_e32 v3, v14, v11, vcc_lo 4468; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v12 4469; MOVREL-NEXT: v_readfirstlane_b32 s2, v2 4470; MOVREL-NEXT: v_readfirstlane_b32 s3, v3 4471; MOVREL-NEXT: v_cndmask_b32_e32 v4, v4, v10, vcc_lo 4472; MOVREL-NEXT: v_cndmask_b32_e32 v5, v5, v11, vcc_lo 4473; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v12 4474; MOVREL-NEXT: v_readfirstlane_b32 s4, v4 4475; MOVREL-NEXT: v_readfirstlane_b32 s5, v5 4476; MOVREL-NEXT: v_cndmask_b32_e32 v6, v6, v10, vcc_lo 4477; MOVREL-NEXT: v_cndmask_b32_e32 v7, v7, v11, vcc_lo 4478; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v12 4479; MOVREL-NEXT: v_readfirstlane_b32 s6, v6 4480; MOVREL-NEXT: v_readfirstlane_b32 s7, v7 4481; MOVREL-NEXT: v_cndmask_b32_e32 v8, v8, v10, vcc_lo 4482; MOVREL-NEXT: v_cndmask_b32_e32 v9, v9, v11, vcc_lo 4483; MOVREL-NEXT: v_readfirstlane_b32 s8, v8 4484; MOVREL-NEXT: v_readfirstlane_b32 s9, v9 4485; MOVREL-NEXT: ; return to shader part epilog 4486entry: 4487 %insert = insertelement <5 x double> %vec, double %val, i32 %idx 4488 ret <5 x double> %insert 4489} 4490