1; This checks support for insertelement and extractelement. 2 3; RUN: %p2i -i %s --filetype=obj --disassemble --args -O2 \ 4; RUN: | FileCheck %s 5; RUN: %p2i -i %s --filetype=obj --disassemble --args -Om1 \ 6; RUN: | FileCheck %s 7; RUN: %p2i -i %s --filetype=obj --disassemble --args -O2 -mattr=sse4.1 \ 8; RUN: | FileCheck --check-prefix=SSE41 %s 9; RUN: %p2i -i %s --filetype=obj --disassemble --args -Om1 -mattr=sse4.1 \ 10; RUN: | FileCheck --check-prefix=SSE41 %s 11 12; RUN: %if --need=target_MIPS32 --need=allow_dump \ 13; RUN: --command %p2i --filetype=asm --assemble --disassemble --target mips32\ 14; RUN: -i %s --args -O2 \ 15; RUN: | %if --need=target_MIPS32 --need=allow_dump \ 16; RUN: --command FileCheck --check-prefix MIPS32 %s 17 18; insertelement operations 19 20define internal <4 x float> @insertelement_v4f32_0(<4 x float> %vec, 21 float %elt) { 22entry: 23 %res = insertelement <4 x float> %vec, float %elt, i32 0 24 ret <4 x float> %res 25; CHECK-LABEL: insertelement_v4f32_0 26; CHECK: movss 27 28; SSE41-LABEL: insertelement_v4f32_0 29; SSE41: insertps {{.*}},{{.*}},0x0 30 31; *** a0 - implicit return <4 x float> 32; *** a1 - unused due to alignment of %vec 33; *** a2:a3:sp[16]:s[20] - %vec 34; *** sp[24] - %elt 35; MIPS32-LABEL: insertelement_v4f32_0 36; *** Load element 2 and 3 of %vec 37; MIPS32: lw [[BV_E2:.*]], 38; MIPS32: lw [[BV_E3:.*]], 39; *** Load %elt 40; MIPS32: lwc1 [[ELT:.*]], 41; *** Insert %elt at %vec[0] 42; MIPS32: mfc1 [[RV_E0:.*]],[[ELT]] 43; MIPS32: move [[RET_PTR:.*]],a0 44; MIPS32: sw [[RV_E0]],0([[RET_PTR]]) 45; MIPS32: sw a3,4([[RET_PTR]]) 46; MIPS32: sw [[BV_E2]],8([[RET_PTR]]) 47; MIPS32: sw [[BV_E3]],12([[RET_PTR]]) 48} 49 50define internal <4 x i32> @insertelement_v4i32_0(<4 x i32> %vec, i32 %elt) { 51entry: 52 %res = insertelement <4 x i32> %vec, i32 %elt, i32 0 53 ret <4 x i32> %res 54; CHECK-LABEL: insertelement_v4i32_0 55; CHECK: movd xmm{{.*}}, 56; CHECK: movss 57 58; SSE41-LABEL: insertelement_v4i32_0 59; SSE41: pinsrd {{.*}},{{.*}},0x0 60 61; *** a0:a1:a2:a3 - %vec 62; *** sp[16] - %elt 63; MIPS32-LABEL: insertelement_v4i32_0 64; *** Load %elt 65; MIPS32: lw v0,16(sp) 66; MIPS32: move v1,a1 67; MIPS32: move a0,a2 68; MIPS32: move a1,a3 69} 70 71 72define internal <4 x float> @insertelement_v4f32_1(<4 x float> %vec, 73 float %elt) { 74entry: 75 %res = insertelement <4 x float> %vec, float %elt, i32 1 76 ret <4 x float> %res 77; CHECK-LABEL: insertelement_v4f32_1 78; CHECK: shufps 79; CHECK: shufps 80 81; SSE41-LABEL: insertelement_v4f32_1 82; SSE41: insertps {{.*}},{{.*}},0x10 83 84; MIPS32-LABEL: insertelement_v4f32_1 85; MIPS32: lw [[VEC_E2:.*]],16(sp) 86; MIPS32: lw [[VEC_E3:.*]],20(sp) 87; MIPS32: lwc1 [[ELT:.*]],24(sp) 88; MIPS32: mfc1 [[R_E1:.*]],[[ELT]] 89; MIPS32: move [[PTR:.*]],a0 90; MIPS32: sw a2,0([[PTR]]) 91; MIPS32: sw [[R_E1]],4([[PTR]]) 92; MIPS32: sw [[VEC_E2]],8([[PTR]]) 93; MIPS32: sw [[VEC_E3]],12([[PTR]]) 94} 95 96define internal <4 x i32> @insertelement_v4i32_1(<4 x i32> %vec, i32 %elt) { 97entry: 98 %res = insertelement <4 x i32> %vec, i32 %elt, i32 1 99 ret <4 x i32> %res 100; CHECK-LABEL: insertelement_v4i32_1 101; CHECK: shufps 102; CHECK: shufps 103 104; SSE41-LABEL: insertelement_v4i32_1 105; SSE41: pinsrd {{.*}},{{.*}},0x1 106 107; MIPS32-LABEL: insertelement_v4i32_1 108; MIPS32: lw [[ELT:.*]],16(sp) 109; MIPS32: move v1,[[ELT]] 110; MIPS32: move v0,a0 111; MIPS32: move a0,a2 112; MIPS32: move a1,a3 113} 114 115define internal <8 x i16> @insertelement_v8i16(<8 x i16> %vec, i32 %elt.arg) { 116entry: 117 %elt = trunc i32 %elt.arg to i16 118 %res = insertelement <8 x i16> %vec, i16 %elt, i32 1 119 ret <8 x i16> %res 120; CHECK-LABEL: insertelement_v8i16 121; CHECK: pinsrw 122 123; SSE41-LABEL: insertelement_v8i16 124; SSE41: pinsrw 125 126; MIPS32-LABEL: insertelement_v8i16 127; MIPS32: lw [[ELT:.*]],16(sp) 128; MIPS32: sll [[ELT]],[[ELT]],0x10 129; MIPS32: sll a0,a0,0x10 130; MIPS32: srl a0,a0,0x10 131; MIPS32: or v0,[[ELT]],a0 132; MIPS32: move v1,a1 133; MIPS32: move a0,a2 134; MIPS32: move a1,a3 135} 136 137define internal <16 x i8> @insertelement_v16i8(<16 x i8> %vec, i32 %elt.arg) { 138entry: 139 %elt = trunc i32 %elt.arg to i8 140 %res = insertelement <16 x i8> %vec, i8 %elt, i32 1 141 ret <16 x i8> %res 142; CHECK-LABEL: insertelement_v16i8 143; CHECK: movups 144; CHECK: lea 145; CHECK: mov 146 147; SSE41-LABEL: insertelement_v16i8 148; SSE41: pinsrb 149 150; MIPS32-LABEL: insertelement_v16i8 151; MIPS32: lw [[ELT:.*]],16(sp) 152; MIPS32: andi [[ELT]],[[ELT]],0xff 153; MIPS32: sll [[ELT]],[[ELT]],0x8 154; MIPS32: lui [[T:.*]],0xffff 155; MIPS32: ori [[T]],[[T]],0xff 156; MIPS32: and a0,a0,[[T]] 157; MIPS32: or v0,v0,a0 158; MIPS32: move v1,a1 159; MIPS32: move a0,a2 160; MIPS32: move a1,a3 161} 162 163define internal <4 x i1> @insertelement_v4i1_0(<4 x i1> %vec, i32 %elt.arg) { 164entry: 165 %elt = trunc i32 %elt.arg to i1 166 %res = insertelement <4 x i1> %vec, i1 %elt, i32 0 167 ret <4 x i1> %res 168; CHECK-LABEL: insertelement_v4i1_0 169; CHECK: movss 170 171; SSE41-LABEL: insertelement_v4i1_0 172; SSE41: pinsrd {{.*}},{{.*}},0x0 173 174; MIPS32-LABEL: insertelement_v4i1_0 175; MIPS32: lw v0,16(sp) 176; MIPS32: move v1,a1 177; MIPS32: move a0,a2 178; MIPS32: move a1,a3 179} 180 181define internal <4 x i1> @insertelement_v4i1_1(<4 x i1> %vec, i32 %elt.arg) { 182entry: 183 %elt = trunc i32 %elt.arg to i1 184 %res = insertelement <4 x i1> %vec, i1 %elt, i32 1 185 ret <4 x i1> %res 186; CHECK-LABEL: insertelement_v4i1_1 187; CHECK: shufps 188; CHECK: shufps 189 190; SSE41-LABEL: insertelement_v4i1_1 191; SSE41: pinsrd {{.*}},{{.*}},0x1 192 193; MIPS32-LABEL: insertelement_v4i1_1 194; MIPS32: lw [[ELT:.*]],16(sp) 195; MIPS32: move v1,[[ELT]] 196; MIPS32: move v0,a0 197; MIPS32: move a0,a2 198; MIPS32: move a1,a3 199} 200 201define internal <8 x i1> @insertelement_v8i1(<8 x i1> %vec, i32 %elt.arg) { 202entry: 203 %elt = trunc i32 %elt.arg to i1 204 %res = insertelement <8 x i1> %vec, i1 %elt, i32 1 205 ret <8 x i1> %res 206; CHECK-LABEL: insertelement_v8i1 207; CHECK: pinsrw 208 209; SSE41-LABEL: insertelement_v8i1 210; SSE41: pinsrw 211 212; MIPS32-LABEL: insertelement_v8i1 213; MIPS32: lw [[ELT:.*]],16(sp) 214; MIPS32: sll [[ELT]],[[ELT]],0x10 215; MIPS32: sll a0,a0,0x10 216; MIPS32: srl a0,a0,0x10 217; MIPS32: or v0,[[ELT]],a0 218; MIPS32: move v1,a1 219; MIPS32: move a0,a2 220; MIPS32: move a1,a3 221} 222 223define internal <16 x i1> @insertelement_v16i1(<16 x i1> %vec, i32 %elt.arg) { 224entry: 225 %elt = trunc i32 %elt.arg to i1 226 %res = insertelement <16 x i1> %vec, i1 %elt, i32 1 227 ret <16 x i1> %res 228; CHECK-LABEL: insertelement_v16i1 229; CHECK: movups 230; CHECK: lea 231; CHECK: mov 232 233; SSE41-LABEL: insertelement_v16i1 234; SSE41: pinsrb 235 236; MIPS32-LABEL: insertelement_v16i1 237; MIPS32: lw [[ELT:.*]],16(sp) 238; MIPS32: andi [[ELT]],[[ELT]],0xff 239; MIPS32: sll [[ELT]],[[ELT]],0x8 240; MIPS32: lui [[T:.*]],0xffff 241; MIPS32: ori [[T]],[[T]],0xff 242; MIPS32: and a0,a0,[[T]] 243; MIPS32: or v0,[[ELT]],a0 244; MIPS32: move v1,a1 245; MIPS32: move a0,a2 246; MIPS32: move a1,a3 247} 248 249; extractelement operations 250 251define internal float @extractelement_v4f32(<4 x float> %vec) { 252entry: 253 %res = extractelement <4 x float> %vec, i32 1 254 ret float %res 255; CHECK-LABEL: extractelement_v4f32 256; CHECK: pshufd 257 258; SSE41-LABEL: extractelement_v4f32 259; SSE41: pshufd 260 261; MIPS32-LABEL: extractelement_v4f32 262; MIPS32: mtc1 a1,$f0 263} 264 265define internal i32 @extractelement_v4i32(<4 x i32> %vec) { 266entry: 267 %res = extractelement <4 x i32> %vec, i32 1 268 ret i32 %res 269; CHECK-LABEL: extractelement_v4i32 270; CHECK: pshufd 271; CHECK: movd {{.*}},xmm 272 273; SSE41-LABEL: extractelement_v4i32 274; SSE41: pextrd 275 276; MIPS32-LABEL: extractelement_v4i32 277; MIPS32L move v0,a1 278} 279 280define internal i32 @extractelement_v8i16(<8 x i16> %vec) { 281entry: 282 %res = extractelement <8 x i16> %vec, i32 1 283 %res.ext = zext i16 %res to i32 284 ret i32 %res.ext 285; CHECK-LABEL: extractelement_v8i16 286; CHECK: pextrw 287 288; SSE41-LABEL: extractelement_v8i16 289; SSE41: pextrw 290 291; MIPS32-LABEL: extractelement_v8i16 292; MIPS32: srl a0,a0,0x10 293; MIPS32: andi a0,a0,0xffff 294; MIPS32: move v0,a0 295} 296 297define internal i32 @extractelement_v16i8(<16 x i8> %vec) { 298entry: 299 %res = extractelement <16 x i8> %vec, i32 1 300 %res.ext = zext i8 %res to i32 301 ret i32 %res.ext 302; CHECK-LABEL: extractelement_v16i8 303; CHECK: movups 304; CHECK: lea 305; CHECK: mov 306 307; SSE41-LABEL: extractelement_v16i8 308; SSE41: pextrb 309 310; MIPS32-LABEL: extractelement_v16i8 311; MIPS32: srl a0,a0,0x8 312; MIPS32: andi a0,a0,0xff 313; MIPS32: andi a0,a0,0xff 314; MIPS32: move v0,a0 315} 316 317define internal i32 @extractelement_v4i1(<4 x i1> %vec) { 318entry: 319 %res = extractelement <4 x i1> %vec, i32 1 320 %res.ext = zext i1 %res to i32 321 ret i32 %res.ext 322; CHECK-LABEL: extractelement_v4i1 323; CHECK: pshufd 324 325; SSE41-LABEL: extractelement_v4i1 326; SSE41: pextrd 327 328; MIPS32-LABEL: extractelement_v4i1 329; MIPS32: andi a1,a1,0x1 330; MIPS32: andi a1,a1,0x1 331; MIPS32: move v0,a1 332} 333 334define internal i32 @extractelement_v8i1(<8 x i1> %vec) { 335entry: 336 %res = extractelement <8 x i1> %vec, i32 1 337 %res.ext = zext i1 %res to i32 338 ret i32 %res.ext 339; CHECK-LABEL: extractelement_v8i1 340; CHECK: pextrw 341 342; SSE41-LABEL: extractelement_v8i1 343; SSE41: pextrw 344 345; MIPS32-LABEL: extractelement_v8i1 346; MIPS32: srl a0,a0,0x10 347; MIPS32: andi a0,a0,0x1 348; MIPS32: andi a0,a0,0x1 349; MIPS32: move v0,a0 350} 351 352define internal i32 @extractelement_v16i1(<16 x i1> %vec) { 353entry: 354 %res = extractelement <16 x i1> %vec, i32 1 355 %res.ext = zext i1 %res to i32 356 ret i32 %res.ext 357; CHECK-LABEL: extractelement_v16i1 358; CHECK: movups 359; CHECK: lea 360; CHECK: mov 361 362; SSE41-LABEL: extractelement_v16i1 363; SSE41: pextrb 364 365; MIPS32-LABEL: extractelement_v16i1 366; MIPS32: srl a0,a0,0x8 367; MIPS32: andi a0,a0,0xff 368; MIPS32: andi a0,a0,0x1 369; MIPS32: andi a0,a0,0x1 370; MIPS32: move v0,a0 371} 372