1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -mtriple=x86_64-unknown -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=SSE2 3; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=SLM 4; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=AVX 5; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=AVX 6; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=AVX 7; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+avx512bw -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=AVX 8 9; 10; vXi8 11; 12 13define <2 x i64> @loadext_2i8_to_2i64(i8* %p0) { 14; SSE2-LABEL: @loadext_2i8_to_2i64( 15; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 16; SSE2-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <2 x i8>* 17; SSE2-NEXT: [[TMP2:%.*]] = load <2 x i8>, <2 x i8>* [[TMP1]], align 1 18; SSE2-NEXT: [[TMP3:%.*]] = zext <2 x i8> [[TMP2]] to <2 x i64> 19; SSE2-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 20; SSE2-NEXT: [[V0:%.*]] = insertelement <2 x i64> undef, i64 [[TMP4]], i32 0 21; SSE2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1 22; SSE2-NEXT: [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[TMP5]], i32 1 23; SSE2-NEXT: ret <2 x i64> [[V1]] 24; 25; SLM-LABEL: @loadext_2i8_to_2i64( 26; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 27; SLM-NEXT: [[I0:%.*]] = load i8, i8* [[P0]], align 1 28; SLM-NEXT: [[I1:%.*]] = load i8, i8* [[P1]], align 1 29; SLM-NEXT: [[X0:%.*]] = zext i8 [[I0]] to i64 30; SLM-NEXT: [[X1:%.*]] = zext i8 [[I1]] to i64 31; SLM-NEXT: [[V0:%.*]] = insertelement <2 x i64> undef, i64 [[X0]], i32 0 32; SLM-NEXT: [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[X1]], i32 1 33; SLM-NEXT: ret <2 x i64> [[V1]] 34; 35; AVX-LABEL: @loadext_2i8_to_2i64( 36; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 37; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <2 x i8>* 38; AVX-NEXT: [[TMP2:%.*]] = load <2 x i8>, <2 x i8>* [[TMP1]], align 1 39; AVX-NEXT: [[TMP3:%.*]] = zext <2 x i8> [[TMP2]] to <2 x i64> 40; AVX-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 41; AVX-NEXT: [[V0:%.*]] = insertelement <2 x i64> undef, i64 [[TMP4]], i32 0 42; AVX-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1 43; AVX-NEXT: [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[TMP5]], i32 1 44; AVX-NEXT: ret <2 x i64> [[V1]] 45; 46 %p1 = getelementptr inbounds i8, i8* %p0, i64 1 47 %i0 = load i8, i8* %p0, align 1 48 %i1 = load i8, i8* %p1, align 1 49 %x0 = zext i8 %i0 to i64 50 %x1 = zext i8 %i1 to i64 51 %v0 = insertelement <2 x i64> undef, i64 %x0, i32 0 52 %v1 = insertelement <2 x i64> %v0, i64 %x1, i32 1 53 ret <2 x i64> %v1 54} 55 56define <4 x i32> @loadext_4i8_to_4i32(i8* %p0) { 57; SSE2-LABEL: @loadext_4i8_to_4i32( 58; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 59; SSE2-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 60; SSE2-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 61; SSE2-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <4 x i8>* 62; SSE2-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1 63; SSE2-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i32> 64; SSE2-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP3]], i32 0 65; SSE2-NEXT: [[V0:%.*]] = insertelement <4 x i32> undef, i32 [[TMP4]], i32 0 66; SSE2-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP3]], i32 1 67; SSE2-NEXT: [[V1:%.*]] = insertelement <4 x i32> [[V0]], i32 [[TMP5]], i32 1 68; SSE2-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP3]], i32 2 69; SSE2-NEXT: [[V2:%.*]] = insertelement <4 x i32> [[V1]], i32 [[TMP6]], i32 2 70; SSE2-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP3]], i32 3 71; SSE2-NEXT: [[V3:%.*]] = insertelement <4 x i32> [[V2]], i32 [[TMP7]], i32 3 72; SSE2-NEXT: ret <4 x i32> [[V3]] 73; 74; SLM-LABEL: @loadext_4i8_to_4i32( 75; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 76; SLM-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 77; SLM-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 78; SLM-NEXT: [[I0:%.*]] = load i8, i8* [[P0]], align 1 79; SLM-NEXT: [[I1:%.*]] = load i8, i8* [[P1]], align 1 80; SLM-NEXT: [[I2:%.*]] = load i8, i8* [[P2]], align 1 81; SLM-NEXT: [[I3:%.*]] = load i8, i8* [[P3]], align 1 82; SLM-NEXT: [[X0:%.*]] = zext i8 [[I0]] to i32 83; SLM-NEXT: [[X1:%.*]] = zext i8 [[I1]] to i32 84; SLM-NEXT: [[X2:%.*]] = zext i8 [[I2]] to i32 85; SLM-NEXT: [[X3:%.*]] = zext i8 [[I3]] to i32 86; SLM-NEXT: [[V0:%.*]] = insertelement <4 x i32> undef, i32 [[X0]], i32 0 87; SLM-NEXT: [[V1:%.*]] = insertelement <4 x i32> [[V0]], i32 [[X1]], i32 1 88; SLM-NEXT: [[V2:%.*]] = insertelement <4 x i32> [[V1]], i32 [[X2]], i32 2 89; SLM-NEXT: [[V3:%.*]] = insertelement <4 x i32> [[V2]], i32 [[X3]], i32 3 90; SLM-NEXT: ret <4 x i32> [[V3]] 91; 92; AVX-LABEL: @loadext_4i8_to_4i32( 93; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 94; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 95; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 96; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <4 x i8>* 97; AVX-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1 98; AVX-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i32> 99; AVX-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP3]], i32 0 100; AVX-NEXT: [[V0:%.*]] = insertelement <4 x i32> undef, i32 [[TMP4]], i32 0 101; AVX-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP3]], i32 1 102; AVX-NEXT: [[V1:%.*]] = insertelement <4 x i32> [[V0]], i32 [[TMP5]], i32 1 103; AVX-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP3]], i32 2 104; AVX-NEXT: [[V2:%.*]] = insertelement <4 x i32> [[V1]], i32 [[TMP6]], i32 2 105; AVX-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP3]], i32 3 106; AVX-NEXT: [[V3:%.*]] = insertelement <4 x i32> [[V2]], i32 [[TMP7]], i32 3 107; AVX-NEXT: ret <4 x i32> [[V3]] 108; 109 %p1 = getelementptr inbounds i8, i8* %p0, i64 1 110 %p2 = getelementptr inbounds i8, i8* %p0, i64 2 111 %p3 = getelementptr inbounds i8, i8* %p0, i64 3 112 %i0 = load i8, i8* %p0, align 1 113 %i1 = load i8, i8* %p1, align 1 114 %i2 = load i8, i8* %p2, align 1 115 %i3 = load i8, i8* %p3, align 1 116 %x0 = zext i8 %i0 to i32 117 %x1 = zext i8 %i1 to i32 118 %x2 = zext i8 %i2 to i32 119 %x3 = zext i8 %i3 to i32 120 %v0 = insertelement <4 x i32> undef, i32 %x0, i32 0 121 %v1 = insertelement <4 x i32> %v0, i32 %x1, i32 1 122 %v2 = insertelement <4 x i32> %v1, i32 %x2, i32 2 123 %v3 = insertelement <4 x i32> %v2, i32 %x3, i32 3 124 ret <4 x i32> %v3 125} 126 127define <4 x i64> @loadext_4i8_to_4i64(i8* %p0) { 128; SSE2-LABEL: @loadext_4i8_to_4i64( 129; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 130; SSE2-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 131; SSE2-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 132; SSE2-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <4 x i8>* 133; SSE2-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1 134; SSE2-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i64> 135; SSE2-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0 136; SSE2-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0 137; SSE2-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1 138; SSE2-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1 139; SSE2-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2 140; SSE2-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[TMP6]], i32 2 141; SSE2-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3 142; SSE2-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[TMP7]], i32 3 143; SSE2-NEXT: ret <4 x i64> [[V3]] 144; 145; SLM-LABEL: @loadext_4i8_to_4i64( 146; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 147; SLM-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 148; SLM-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 149; SLM-NEXT: [[I0:%.*]] = load i8, i8* [[P0]], align 1 150; SLM-NEXT: [[I1:%.*]] = load i8, i8* [[P1]], align 1 151; SLM-NEXT: [[I2:%.*]] = load i8, i8* [[P2]], align 1 152; SLM-NEXT: [[I3:%.*]] = load i8, i8* [[P3]], align 1 153; SLM-NEXT: [[X0:%.*]] = zext i8 [[I0]] to i64 154; SLM-NEXT: [[X1:%.*]] = zext i8 [[I1]] to i64 155; SLM-NEXT: [[X2:%.*]] = zext i8 [[I2]] to i64 156; SLM-NEXT: [[X3:%.*]] = zext i8 [[I3]] to i64 157; SLM-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[X0]], i32 0 158; SLM-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[X1]], i32 1 159; SLM-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[X2]], i32 2 160; SLM-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[X3]], i32 3 161; SLM-NEXT: ret <4 x i64> [[V3]] 162; 163; AVX-LABEL: @loadext_4i8_to_4i64( 164; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 165; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 166; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 167; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <4 x i8>* 168; AVX-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1 169; AVX-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i64> 170; AVX-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0 171; AVX-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0 172; AVX-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1 173; AVX-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1 174; AVX-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2 175; AVX-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[TMP6]], i32 2 176; AVX-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3 177; AVX-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[TMP7]], i32 3 178; AVX-NEXT: ret <4 x i64> [[V3]] 179; 180 %p1 = getelementptr inbounds i8, i8* %p0, i64 1 181 %p2 = getelementptr inbounds i8, i8* %p0, i64 2 182 %p3 = getelementptr inbounds i8, i8* %p0, i64 3 183 %i0 = load i8, i8* %p0, align 1 184 %i1 = load i8, i8* %p1, align 1 185 %i2 = load i8, i8* %p2, align 1 186 %i3 = load i8, i8* %p3, align 1 187 %x0 = zext i8 %i0 to i64 188 %x1 = zext i8 %i1 to i64 189 %x2 = zext i8 %i2 to i64 190 %x3 = zext i8 %i3 to i64 191 %v0 = insertelement <4 x i64> undef, i64 %x0, i32 0 192 %v1 = insertelement <4 x i64> %v0, i64 %x1, i32 1 193 %v2 = insertelement <4 x i64> %v1, i64 %x2, i32 2 194 %v3 = insertelement <4 x i64> %v2, i64 %x3, i32 3 195 ret <4 x i64> %v3 196} 197 198define <8 x i16> @loadext_8i8_to_8i16(i8* %p0) { 199; SSE2-LABEL: @loadext_8i8_to_8i16( 200; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 201; SSE2-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 202; SSE2-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 203; SSE2-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4 204; SSE2-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5 205; SSE2-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6 206; SSE2-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7 207; SSE2-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <8 x i8>* 208; SSE2-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1 209; SSE2-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i16> 210; SSE2-NEXT: [[TMP4:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0 211; SSE2-NEXT: [[V0:%.*]] = insertelement <8 x i16> undef, i16 [[TMP4]], i32 0 212; SSE2-NEXT: [[TMP5:%.*]] = extractelement <8 x i16> [[TMP3]], i32 1 213; SSE2-NEXT: [[V1:%.*]] = insertelement <8 x i16> [[V0]], i16 [[TMP5]], i32 1 214; SSE2-NEXT: [[TMP6:%.*]] = extractelement <8 x i16> [[TMP3]], i32 2 215; SSE2-NEXT: [[V2:%.*]] = insertelement <8 x i16> [[V1]], i16 [[TMP6]], i32 2 216; SSE2-NEXT: [[TMP7:%.*]] = extractelement <8 x i16> [[TMP3]], i32 3 217; SSE2-NEXT: [[V3:%.*]] = insertelement <8 x i16> [[V2]], i16 [[TMP7]], i32 3 218; SSE2-NEXT: [[TMP8:%.*]] = extractelement <8 x i16> [[TMP3]], i32 4 219; SSE2-NEXT: [[V4:%.*]] = insertelement <8 x i16> [[V3]], i16 [[TMP8]], i32 4 220; SSE2-NEXT: [[TMP9:%.*]] = extractelement <8 x i16> [[TMP3]], i32 5 221; SSE2-NEXT: [[V5:%.*]] = insertelement <8 x i16> [[V4]], i16 [[TMP9]], i32 5 222; SSE2-NEXT: [[TMP10:%.*]] = extractelement <8 x i16> [[TMP3]], i32 6 223; SSE2-NEXT: [[V6:%.*]] = insertelement <8 x i16> [[V5]], i16 [[TMP10]], i32 6 224; SSE2-NEXT: [[TMP11:%.*]] = extractelement <8 x i16> [[TMP3]], i32 7 225; SSE2-NEXT: [[V7:%.*]] = insertelement <8 x i16> [[V6]], i16 [[TMP11]], i32 7 226; SSE2-NEXT: ret <8 x i16> [[V7]] 227; 228; SLM-LABEL: @loadext_8i8_to_8i16( 229; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 230; SLM-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 231; SLM-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 232; SLM-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4 233; SLM-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5 234; SLM-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6 235; SLM-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7 236; SLM-NEXT: [[I0:%.*]] = load i8, i8* [[P0]], align 1 237; SLM-NEXT: [[I1:%.*]] = load i8, i8* [[P1]], align 1 238; SLM-NEXT: [[I2:%.*]] = load i8, i8* [[P2]], align 1 239; SLM-NEXT: [[I3:%.*]] = load i8, i8* [[P3]], align 1 240; SLM-NEXT: [[I4:%.*]] = load i8, i8* [[P4]], align 1 241; SLM-NEXT: [[I5:%.*]] = load i8, i8* [[P5]], align 1 242; SLM-NEXT: [[I6:%.*]] = load i8, i8* [[P6]], align 1 243; SLM-NEXT: [[I7:%.*]] = load i8, i8* [[P7]], align 1 244; SLM-NEXT: [[X0:%.*]] = zext i8 [[I0]] to i16 245; SLM-NEXT: [[X1:%.*]] = zext i8 [[I1]] to i16 246; SLM-NEXT: [[X2:%.*]] = zext i8 [[I2]] to i16 247; SLM-NEXT: [[X3:%.*]] = zext i8 [[I3]] to i16 248; SLM-NEXT: [[X4:%.*]] = zext i8 [[I4]] to i16 249; SLM-NEXT: [[X5:%.*]] = zext i8 [[I5]] to i16 250; SLM-NEXT: [[X6:%.*]] = zext i8 [[I6]] to i16 251; SLM-NEXT: [[X7:%.*]] = zext i8 [[I7]] to i16 252; SLM-NEXT: [[V0:%.*]] = insertelement <8 x i16> undef, i16 [[X0]], i32 0 253; SLM-NEXT: [[V1:%.*]] = insertelement <8 x i16> [[V0]], i16 [[X1]], i32 1 254; SLM-NEXT: [[V2:%.*]] = insertelement <8 x i16> [[V1]], i16 [[X2]], i32 2 255; SLM-NEXT: [[V3:%.*]] = insertelement <8 x i16> [[V2]], i16 [[X3]], i32 3 256; SLM-NEXT: [[V4:%.*]] = insertelement <8 x i16> [[V3]], i16 [[X4]], i32 4 257; SLM-NEXT: [[V5:%.*]] = insertelement <8 x i16> [[V4]], i16 [[X5]], i32 5 258; SLM-NEXT: [[V6:%.*]] = insertelement <8 x i16> [[V5]], i16 [[X6]], i32 6 259; SLM-NEXT: [[V7:%.*]] = insertelement <8 x i16> [[V6]], i16 [[X7]], i32 7 260; SLM-NEXT: ret <8 x i16> [[V7]] 261; 262; AVX-LABEL: @loadext_8i8_to_8i16( 263; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 264; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 265; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 266; AVX-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4 267; AVX-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5 268; AVX-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6 269; AVX-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7 270; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <8 x i8>* 271; AVX-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1 272; AVX-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i16> 273; AVX-NEXT: [[TMP4:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0 274; AVX-NEXT: [[V0:%.*]] = insertelement <8 x i16> undef, i16 [[TMP4]], i32 0 275; AVX-NEXT: [[TMP5:%.*]] = extractelement <8 x i16> [[TMP3]], i32 1 276; AVX-NEXT: [[V1:%.*]] = insertelement <8 x i16> [[V0]], i16 [[TMP5]], i32 1 277; AVX-NEXT: [[TMP6:%.*]] = extractelement <8 x i16> [[TMP3]], i32 2 278; AVX-NEXT: [[V2:%.*]] = insertelement <8 x i16> [[V1]], i16 [[TMP6]], i32 2 279; AVX-NEXT: [[TMP7:%.*]] = extractelement <8 x i16> [[TMP3]], i32 3 280; AVX-NEXT: [[V3:%.*]] = insertelement <8 x i16> [[V2]], i16 [[TMP7]], i32 3 281; AVX-NEXT: [[TMP8:%.*]] = extractelement <8 x i16> [[TMP3]], i32 4 282; AVX-NEXT: [[V4:%.*]] = insertelement <8 x i16> [[V3]], i16 [[TMP8]], i32 4 283; AVX-NEXT: [[TMP9:%.*]] = extractelement <8 x i16> [[TMP3]], i32 5 284; AVX-NEXT: [[V5:%.*]] = insertelement <8 x i16> [[V4]], i16 [[TMP9]], i32 5 285; AVX-NEXT: [[TMP10:%.*]] = extractelement <8 x i16> [[TMP3]], i32 6 286; AVX-NEXT: [[V6:%.*]] = insertelement <8 x i16> [[V5]], i16 [[TMP10]], i32 6 287; AVX-NEXT: [[TMP11:%.*]] = extractelement <8 x i16> [[TMP3]], i32 7 288; AVX-NEXT: [[V7:%.*]] = insertelement <8 x i16> [[V6]], i16 [[TMP11]], i32 7 289; AVX-NEXT: ret <8 x i16> [[V7]] 290; 291 %p1 = getelementptr inbounds i8, i8* %p0, i64 1 292 %p2 = getelementptr inbounds i8, i8* %p0, i64 2 293 %p3 = getelementptr inbounds i8, i8* %p0, i64 3 294 %p4 = getelementptr inbounds i8, i8* %p0, i64 4 295 %p5 = getelementptr inbounds i8, i8* %p0, i64 5 296 %p6 = getelementptr inbounds i8, i8* %p0, i64 6 297 %p7 = getelementptr inbounds i8, i8* %p0, i64 7 298 %i0 = load i8, i8* %p0, align 1 299 %i1 = load i8, i8* %p1, align 1 300 %i2 = load i8, i8* %p2, align 1 301 %i3 = load i8, i8* %p3, align 1 302 %i4 = load i8, i8* %p4, align 1 303 %i5 = load i8, i8* %p5, align 1 304 %i6 = load i8, i8* %p6, align 1 305 %i7 = load i8, i8* %p7, align 1 306 %x0 = zext i8 %i0 to i16 307 %x1 = zext i8 %i1 to i16 308 %x2 = zext i8 %i2 to i16 309 %x3 = zext i8 %i3 to i16 310 %x4 = zext i8 %i4 to i16 311 %x5 = zext i8 %i5 to i16 312 %x6 = zext i8 %i6 to i16 313 %x7 = zext i8 %i7 to i16 314 %v0 = insertelement <8 x i16> undef, i16 %x0, i32 0 315 %v1 = insertelement <8 x i16> %v0, i16 %x1, i32 1 316 %v2 = insertelement <8 x i16> %v1, i16 %x2, i32 2 317 %v3 = insertelement <8 x i16> %v2, i16 %x3, i32 3 318 %v4 = insertelement <8 x i16> %v3, i16 %x4, i32 4 319 %v5 = insertelement <8 x i16> %v4, i16 %x5, i32 5 320 %v6 = insertelement <8 x i16> %v5, i16 %x6, i32 6 321 %v7 = insertelement <8 x i16> %v6, i16 %x7, i32 7 322 ret <8 x i16> %v7 323} 324 325define <8 x i32> @loadext_8i8_to_8i32(i8* %p0) { 326; SSE2-LABEL: @loadext_8i8_to_8i32( 327; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 328; SSE2-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 329; SSE2-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 330; SSE2-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4 331; SSE2-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5 332; SSE2-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6 333; SSE2-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7 334; SSE2-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <8 x i8>* 335; SSE2-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1 336; SSE2-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i32> 337; SSE2-NEXT: [[TMP4:%.*]] = extractelement <8 x i32> [[TMP3]], i32 0 338; SSE2-NEXT: [[V0:%.*]] = insertelement <8 x i32> undef, i32 [[TMP4]], i32 0 339; SSE2-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[TMP3]], i32 1 340; SSE2-NEXT: [[V1:%.*]] = insertelement <8 x i32> [[V0]], i32 [[TMP5]], i32 1 341; SSE2-NEXT: [[TMP6:%.*]] = extractelement <8 x i32> [[TMP3]], i32 2 342; SSE2-NEXT: [[V2:%.*]] = insertelement <8 x i32> [[V1]], i32 [[TMP6]], i32 2 343; SSE2-NEXT: [[TMP7:%.*]] = extractelement <8 x i32> [[TMP3]], i32 3 344; SSE2-NEXT: [[V3:%.*]] = insertelement <8 x i32> [[V2]], i32 [[TMP7]], i32 3 345; SSE2-NEXT: [[TMP8:%.*]] = extractelement <8 x i32> [[TMP3]], i32 4 346; SSE2-NEXT: [[V4:%.*]] = insertelement <8 x i32> [[V3]], i32 [[TMP8]], i32 4 347; SSE2-NEXT: [[TMP9:%.*]] = extractelement <8 x i32> [[TMP3]], i32 5 348; SSE2-NEXT: [[V5:%.*]] = insertelement <8 x i32> [[V4]], i32 [[TMP9]], i32 5 349; SSE2-NEXT: [[TMP10:%.*]] = extractelement <8 x i32> [[TMP3]], i32 6 350; SSE2-NEXT: [[V6:%.*]] = insertelement <8 x i32> [[V5]], i32 [[TMP10]], i32 6 351; SSE2-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[TMP3]], i32 7 352; SSE2-NEXT: [[V7:%.*]] = insertelement <8 x i32> [[V6]], i32 [[TMP11]], i32 7 353; SSE2-NEXT: ret <8 x i32> [[V7]] 354; 355; SLM-LABEL: @loadext_8i8_to_8i32( 356; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 357; SLM-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 358; SLM-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 359; SLM-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4 360; SLM-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5 361; SLM-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6 362; SLM-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7 363; SLM-NEXT: [[I0:%.*]] = load i8, i8* [[P0]], align 1 364; SLM-NEXT: [[I1:%.*]] = load i8, i8* [[P1]], align 1 365; SLM-NEXT: [[I2:%.*]] = load i8, i8* [[P2]], align 1 366; SLM-NEXT: [[I3:%.*]] = load i8, i8* [[P3]], align 1 367; SLM-NEXT: [[I4:%.*]] = load i8, i8* [[P4]], align 1 368; SLM-NEXT: [[I5:%.*]] = load i8, i8* [[P5]], align 1 369; SLM-NEXT: [[I6:%.*]] = load i8, i8* [[P6]], align 1 370; SLM-NEXT: [[I7:%.*]] = load i8, i8* [[P7]], align 1 371; SLM-NEXT: [[X0:%.*]] = zext i8 [[I0]] to i32 372; SLM-NEXT: [[X1:%.*]] = zext i8 [[I1]] to i32 373; SLM-NEXT: [[X2:%.*]] = zext i8 [[I2]] to i32 374; SLM-NEXT: [[X3:%.*]] = zext i8 [[I3]] to i32 375; SLM-NEXT: [[X4:%.*]] = zext i8 [[I4]] to i32 376; SLM-NEXT: [[X5:%.*]] = zext i8 [[I5]] to i32 377; SLM-NEXT: [[X6:%.*]] = zext i8 [[I6]] to i32 378; SLM-NEXT: [[X7:%.*]] = zext i8 [[I7]] to i32 379; SLM-NEXT: [[V0:%.*]] = insertelement <8 x i32> undef, i32 [[X0]], i32 0 380; SLM-NEXT: [[V1:%.*]] = insertelement <8 x i32> [[V0]], i32 [[X1]], i32 1 381; SLM-NEXT: [[V2:%.*]] = insertelement <8 x i32> [[V1]], i32 [[X2]], i32 2 382; SLM-NEXT: [[V3:%.*]] = insertelement <8 x i32> [[V2]], i32 [[X3]], i32 3 383; SLM-NEXT: [[V4:%.*]] = insertelement <8 x i32> [[V3]], i32 [[X4]], i32 4 384; SLM-NEXT: [[V5:%.*]] = insertelement <8 x i32> [[V4]], i32 [[X5]], i32 5 385; SLM-NEXT: [[V6:%.*]] = insertelement <8 x i32> [[V5]], i32 [[X6]], i32 6 386; SLM-NEXT: [[V7:%.*]] = insertelement <8 x i32> [[V6]], i32 [[X7]], i32 7 387; SLM-NEXT: ret <8 x i32> [[V7]] 388; 389; AVX-LABEL: @loadext_8i8_to_8i32( 390; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 391; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 392; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 393; AVX-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4 394; AVX-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5 395; AVX-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6 396; AVX-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7 397; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <8 x i8>* 398; AVX-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1 399; AVX-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i32> 400; AVX-NEXT: [[TMP4:%.*]] = extractelement <8 x i32> [[TMP3]], i32 0 401; AVX-NEXT: [[V0:%.*]] = insertelement <8 x i32> undef, i32 [[TMP4]], i32 0 402; AVX-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[TMP3]], i32 1 403; AVX-NEXT: [[V1:%.*]] = insertelement <8 x i32> [[V0]], i32 [[TMP5]], i32 1 404; AVX-NEXT: [[TMP6:%.*]] = extractelement <8 x i32> [[TMP3]], i32 2 405; AVX-NEXT: [[V2:%.*]] = insertelement <8 x i32> [[V1]], i32 [[TMP6]], i32 2 406; AVX-NEXT: [[TMP7:%.*]] = extractelement <8 x i32> [[TMP3]], i32 3 407; AVX-NEXT: [[V3:%.*]] = insertelement <8 x i32> [[V2]], i32 [[TMP7]], i32 3 408; AVX-NEXT: [[TMP8:%.*]] = extractelement <8 x i32> [[TMP3]], i32 4 409; AVX-NEXT: [[V4:%.*]] = insertelement <8 x i32> [[V3]], i32 [[TMP8]], i32 4 410; AVX-NEXT: [[TMP9:%.*]] = extractelement <8 x i32> [[TMP3]], i32 5 411; AVX-NEXT: [[V5:%.*]] = insertelement <8 x i32> [[V4]], i32 [[TMP9]], i32 5 412; AVX-NEXT: [[TMP10:%.*]] = extractelement <8 x i32> [[TMP3]], i32 6 413; AVX-NEXT: [[V6:%.*]] = insertelement <8 x i32> [[V5]], i32 [[TMP10]], i32 6 414; AVX-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[TMP3]], i32 7 415; AVX-NEXT: [[V7:%.*]] = insertelement <8 x i32> [[V6]], i32 [[TMP11]], i32 7 416; AVX-NEXT: ret <8 x i32> [[V7]] 417; 418 %p1 = getelementptr inbounds i8, i8* %p0, i64 1 419 %p2 = getelementptr inbounds i8, i8* %p0, i64 2 420 %p3 = getelementptr inbounds i8, i8* %p0, i64 3 421 %p4 = getelementptr inbounds i8, i8* %p0, i64 4 422 %p5 = getelementptr inbounds i8, i8* %p0, i64 5 423 %p6 = getelementptr inbounds i8, i8* %p0, i64 6 424 %p7 = getelementptr inbounds i8, i8* %p0, i64 7 425 %i0 = load i8, i8* %p0, align 1 426 %i1 = load i8, i8* %p1, align 1 427 %i2 = load i8, i8* %p2, align 1 428 %i3 = load i8, i8* %p3, align 1 429 %i4 = load i8, i8* %p4, align 1 430 %i5 = load i8, i8* %p5, align 1 431 %i6 = load i8, i8* %p6, align 1 432 %i7 = load i8, i8* %p7, align 1 433 %x0 = zext i8 %i0 to i32 434 %x1 = zext i8 %i1 to i32 435 %x2 = zext i8 %i2 to i32 436 %x3 = zext i8 %i3 to i32 437 %x4 = zext i8 %i4 to i32 438 %x5 = zext i8 %i5 to i32 439 %x6 = zext i8 %i6 to i32 440 %x7 = zext i8 %i7 to i32 441 %v0 = insertelement <8 x i32> undef, i32 %x0, i32 0 442 %v1 = insertelement <8 x i32> %v0, i32 %x1, i32 1 443 %v2 = insertelement <8 x i32> %v1, i32 %x2, i32 2 444 %v3 = insertelement <8 x i32> %v2, i32 %x3, i32 3 445 %v4 = insertelement <8 x i32> %v3, i32 %x4, i32 4 446 %v5 = insertelement <8 x i32> %v4, i32 %x5, i32 5 447 %v6 = insertelement <8 x i32> %v5, i32 %x6, i32 6 448 %v7 = insertelement <8 x i32> %v6, i32 %x7, i32 7 449 ret <8 x i32> %v7 450} 451 452define <16 x i16> @loadext_16i8_to_16i16(i8* %p0) { 453; SSE2-LABEL: @loadext_16i8_to_16i16( 454; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 455; SSE2-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 456; SSE2-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 457; SSE2-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4 458; SSE2-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5 459; SSE2-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6 460; SSE2-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7 461; SSE2-NEXT: [[P8:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 8 462; SSE2-NEXT: [[P9:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 9 463; SSE2-NEXT: [[P10:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 10 464; SSE2-NEXT: [[P11:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 11 465; SSE2-NEXT: [[P12:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 12 466; SSE2-NEXT: [[P13:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 13 467; SSE2-NEXT: [[P14:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 14 468; SSE2-NEXT: [[P15:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 15 469; SSE2-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <16 x i8>* 470; SSE2-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[TMP1]], align 1 471; SSE2-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[TMP2]] to <16 x i16> 472; SSE2-NEXT: [[TMP4:%.*]] = extractelement <16 x i16> [[TMP3]], i32 0 473; SSE2-NEXT: [[V0:%.*]] = insertelement <16 x i16> undef, i16 [[TMP4]], i32 0 474; SSE2-NEXT: [[TMP5:%.*]] = extractelement <16 x i16> [[TMP3]], i32 1 475; SSE2-NEXT: [[V1:%.*]] = insertelement <16 x i16> [[V0]], i16 [[TMP5]], i32 1 476; SSE2-NEXT: [[TMP6:%.*]] = extractelement <16 x i16> [[TMP3]], i32 2 477; SSE2-NEXT: [[V2:%.*]] = insertelement <16 x i16> [[V1]], i16 [[TMP6]], i32 2 478; SSE2-NEXT: [[TMP7:%.*]] = extractelement <16 x i16> [[TMP3]], i32 3 479; SSE2-NEXT: [[V3:%.*]] = insertelement <16 x i16> [[V2]], i16 [[TMP7]], i32 3 480; SSE2-NEXT: [[TMP8:%.*]] = extractelement <16 x i16> [[TMP3]], i32 4 481; SSE2-NEXT: [[V4:%.*]] = insertelement <16 x i16> [[V3]], i16 [[TMP8]], i32 4 482; SSE2-NEXT: [[TMP9:%.*]] = extractelement <16 x i16> [[TMP3]], i32 5 483; SSE2-NEXT: [[V5:%.*]] = insertelement <16 x i16> [[V4]], i16 [[TMP9]], i32 5 484; SSE2-NEXT: [[TMP10:%.*]] = extractelement <16 x i16> [[TMP3]], i32 6 485; SSE2-NEXT: [[V6:%.*]] = insertelement <16 x i16> [[V5]], i16 [[TMP10]], i32 6 486; SSE2-NEXT: [[TMP11:%.*]] = extractelement <16 x i16> [[TMP3]], i32 7 487; SSE2-NEXT: [[V7:%.*]] = insertelement <16 x i16> [[V6]], i16 [[TMP11]], i32 7 488; SSE2-NEXT: [[TMP12:%.*]] = extractelement <16 x i16> [[TMP3]], i32 8 489; SSE2-NEXT: [[V8:%.*]] = insertelement <16 x i16> [[V7]], i16 [[TMP12]], i32 8 490; SSE2-NEXT: [[TMP13:%.*]] = extractelement <16 x i16> [[TMP3]], i32 9 491; SSE2-NEXT: [[V9:%.*]] = insertelement <16 x i16> [[V8]], i16 [[TMP13]], i32 9 492; SSE2-NEXT: [[TMP14:%.*]] = extractelement <16 x i16> [[TMP3]], i32 10 493; SSE2-NEXT: [[V10:%.*]] = insertelement <16 x i16> [[V9]], i16 [[TMP14]], i32 10 494; SSE2-NEXT: [[TMP15:%.*]] = extractelement <16 x i16> [[TMP3]], i32 11 495; SSE2-NEXT: [[V11:%.*]] = insertelement <16 x i16> [[V10]], i16 [[TMP15]], i32 11 496; SSE2-NEXT: [[TMP16:%.*]] = extractelement <16 x i16> [[TMP3]], i32 12 497; SSE2-NEXT: [[V12:%.*]] = insertelement <16 x i16> [[V11]], i16 [[TMP16]], i32 12 498; SSE2-NEXT: [[TMP17:%.*]] = extractelement <16 x i16> [[TMP3]], i32 13 499; SSE2-NEXT: [[V13:%.*]] = insertelement <16 x i16> [[V12]], i16 [[TMP17]], i32 13 500; SSE2-NEXT: [[TMP18:%.*]] = extractelement <16 x i16> [[TMP3]], i32 14 501; SSE2-NEXT: [[V14:%.*]] = insertelement <16 x i16> [[V13]], i16 [[TMP18]], i32 14 502; SSE2-NEXT: [[TMP19:%.*]] = extractelement <16 x i16> [[TMP3]], i32 15 503; SSE2-NEXT: [[V15:%.*]] = insertelement <16 x i16> [[V14]], i16 [[TMP19]], i32 15 504; SSE2-NEXT: ret <16 x i16> [[V15]] 505; 506; SLM-LABEL: @loadext_16i8_to_16i16( 507; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 508; SLM-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 509; SLM-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 510; SLM-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4 511; SLM-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5 512; SLM-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6 513; SLM-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7 514; SLM-NEXT: [[P8:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 8 515; SLM-NEXT: [[P9:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 9 516; SLM-NEXT: [[P10:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 10 517; SLM-NEXT: [[P11:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 11 518; SLM-NEXT: [[P12:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 12 519; SLM-NEXT: [[P13:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 13 520; SLM-NEXT: [[P14:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 14 521; SLM-NEXT: [[P15:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 15 522; SLM-NEXT: [[I0:%.*]] = load i8, i8* [[P0]], align 1 523; SLM-NEXT: [[I1:%.*]] = load i8, i8* [[P1]], align 1 524; SLM-NEXT: [[I2:%.*]] = load i8, i8* [[P2]], align 1 525; SLM-NEXT: [[I3:%.*]] = load i8, i8* [[P3]], align 1 526; SLM-NEXT: [[I4:%.*]] = load i8, i8* [[P4]], align 1 527; SLM-NEXT: [[I5:%.*]] = load i8, i8* [[P5]], align 1 528; SLM-NEXT: [[I6:%.*]] = load i8, i8* [[P6]], align 1 529; SLM-NEXT: [[I7:%.*]] = load i8, i8* [[P7]], align 1 530; SLM-NEXT: [[I8:%.*]] = load i8, i8* [[P8]], align 1 531; SLM-NEXT: [[I9:%.*]] = load i8, i8* [[P9]], align 1 532; SLM-NEXT: [[I10:%.*]] = load i8, i8* [[P10]], align 1 533; SLM-NEXT: [[I11:%.*]] = load i8, i8* [[P11]], align 1 534; SLM-NEXT: [[I12:%.*]] = load i8, i8* [[P12]], align 1 535; SLM-NEXT: [[I13:%.*]] = load i8, i8* [[P13]], align 1 536; SLM-NEXT: [[I14:%.*]] = load i8, i8* [[P14]], align 1 537; SLM-NEXT: [[I15:%.*]] = load i8, i8* [[P15]], align 1 538; SLM-NEXT: [[X0:%.*]] = zext i8 [[I0]] to i16 539; SLM-NEXT: [[X1:%.*]] = zext i8 [[I1]] to i16 540; SLM-NEXT: [[X2:%.*]] = zext i8 [[I2]] to i16 541; SLM-NEXT: [[X3:%.*]] = zext i8 [[I3]] to i16 542; SLM-NEXT: [[X4:%.*]] = zext i8 [[I4]] to i16 543; SLM-NEXT: [[X5:%.*]] = zext i8 [[I5]] to i16 544; SLM-NEXT: [[X6:%.*]] = zext i8 [[I6]] to i16 545; SLM-NEXT: [[X7:%.*]] = zext i8 [[I7]] to i16 546; SLM-NEXT: [[X8:%.*]] = zext i8 [[I8]] to i16 547; SLM-NEXT: [[X9:%.*]] = zext i8 [[I9]] to i16 548; SLM-NEXT: [[X10:%.*]] = zext i8 [[I10]] to i16 549; SLM-NEXT: [[X11:%.*]] = zext i8 [[I11]] to i16 550; SLM-NEXT: [[X12:%.*]] = zext i8 [[I12]] to i16 551; SLM-NEXT: [[X13:%.*]] = zext i8 [[I13]] to i16 552; SLM-NEXT: [[X14:%.*]] = zext i8 [[I14]] to i16 553; SLM-NEXT: [[X15:%.*]] = zext i8 [[I15]] to i16 554; SLM-NEXT: [[V0:%.*]] = insertelement <16 x i16> undef, i16 [[X0]], i32 0 555; SLM-NEXT: [[V1:%.*]] = insertelement <16 x i16> [[V0]], i16 [[X1]], i32 1 556; SLM-NEXT: [[V2:%.*]] = insertelement <16 x i16> [[V1]], i16 [[X2]], i32 2 557; SLM-NEXT: [[V3:%.*]] = insertelement <16 x i16> [[V2]], i16 [[X3]], i32 3 558; SLM-NEXT: [[V4:%.*]] = insertelement <16 x i16> [[V3]], i16 [[X4]], i32 4 559; SLM-NEXT: [[V5:%.*]] = insertelement <16 x i16> [[V4]], i16 [[X5]], i32 5 560; SLM-NEXT: [[V6:%.*]] = insertelement <16 x i16> [[V5]], i16 [[X6]], i32 6 561; SLM-NEXT: [[V7:%.*]] = insertelement <16 x i16> [[V6]], i16 [[X7]], i32 7 562; SLM-NEXT: [[V8:%.*]] = insertelement <16 x i16> [[V7]], i16 [[X8]], i32 8 563; SLM-NEXT: [[V9:%.*]] = insertelement <16 x i16> [[V8]], i16 [[X9]], i32 9 564; SLM-NEXT: [[V10:%.*]] = insertelement <16 x i16> [[V9]], i16 [[X10]], i32 10 565; SLM-NEXT: [[V11:%.*]] = insertelement <16 x i16> [[V10]], i16 [[X11]], i32 11 566; SLM-NEXT: [[V12:%.*]] = insertelement <16 x i16> [[V11]], i16 [[X12]], i32 12 567; SLM-NEXT: [[V13:%.*]] = insertelement <16 x i16> [[V12]], i16 [[X13]], i32 13 568; SLM-NEXT: [[V14:%.*]] = insertelement <16 x i16> [[V13]], i16 [[X14]], i32 14 569; SLM-NEXT: [[V15:%.*]] = insertelement <16 x i16> [[V14]], i16 [[X15]], i32 15 570; SLM-NEXT: ret <16 x i16> [[V15]] 571; 572; AVX-LABEL: @loadext_16i8_to_16i16( 573; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 574; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 575; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 576; AVX-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4 577; AVX-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5 578; AVX-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6 579; AVX-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7 580; AVX-NEXT: [[P8:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 8 581; AVX-NEXT: [[P9:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 9 582; AVX-NEXT: [[P10:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 10 583; AVX-NEXT: [[P11:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 11 584; AVX-NEXT: [[P12:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 12 585; AVX-NEXT: [[P13:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 13 586; AVX-NEXT: [[P14:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 14 587; AVX-NEXT: [[P15:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 15 588; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <16 x i8>* 589; AVX-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[TMP1]], align 1 590; AVX-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[TMP2]] to <16 x i16> 591; AVX-NEXT: [[TMP4:%.*]] = extractelement <16 x i16> [[TMP3]], i32 0 592; AVX-NEXT: [[V0:%.*]] = insertelement <16 x i16> undef, i16 [[TMP4]], i32 0 593; AVX-NEXT: [[TMP5:%.*]] = extractelement <16 x i16> [[TMP3]], i32 1 594; AVX-NEXT: [[V1:%.*]] = insertelement <16 x i16> [[V0]], i16 [[TMP5]], i32 1 595; AVX-NEXT: [[TMP6:%.*]] = extractelement <16 x i16> [[TMP3]], i32 2 596; AVX-NEXT: [[V2:%.*]] = insertelement <16 x i16> [[V1]], i16 [[TMP6]], i32 2 597; AVX-NEXT: [[TMP7:%.*]] = extractelement <16 x i16> [[TMP3]], i32 3 598; AVX-NEXT: [[V3:%.*]] = insertelement <16 x i16> [[V2]], i16 [[TMP7]], i32 3 599; AVX-NEXT: [[TMP8:%.*]] = extractelement <16 x i16> [[TMP3]], i32 4 600; AVX-NEXT: [[V4:%.*]] = insertelement <16 x i16> [[V3]], i16 [[TMP8]], i32 4 601; AVX-NEXT: [[TMP9:%.*]] = extractelement <16 x i16> [[TMP3]], i32 5 602; AVX-NEXT: [[V5:%.*]] = insertelement <16 x i16> [[V4]], i16 [[TMP9]], i32 5 603; AVX-NEXT: [[TMP10:%.*]] = extractelement <16 x i16> [[TMP3]], i32 6 604; AVX-NEXT: [[V6:%.*]] = insertelement <16 x i16> [[V5]], i16 [[TMP10]], i32 6 605; AVX-NEXT: [[TMP11:%.*]] = extractelement <16 x i16> [[TMP3]], i32 7 606; AVX-NEXT: [[V7:%.*]] = insertelement <16 x i16> [[V6]], i16 [[TMP11]], i32 7 607; AVX-NEXT: [[TMP12:%.*]] = extractelement <16 x i16> [[TMP3]], i32 8 608; AVX-NEXT: [[V8:%.*]] = insertelement <16 x i16> [[V7]], i16 [[TMP12]], i32 8 609; AVX-NEXT: [[TMP13:%.*]] = extractelement <16 x i16> [[TMP3]], i32 9 610; AVX-NEXT: [[V9:%.*]] = insertelement <16 x i16> [[V8]], i16 [[TMP13]], i32 9 611; AVX-NEXT: [[TMP14:%.*]] = extractelement <16 x i16> [[TMP3]], i32 10 612; AVX-NEXT: [[V10:%.*]] = insertelement <16 x i16> [[V9]], i16 [[TMP14]], i32 10 613; AVX-NEXT: [[TMP15:%.*]] = extractelement <16 x i16> [[TMP3]], i32 11 614; AVX-NEXT: [[V11:%.*]] = insertelement <16 x i16> [[V10]], i16 [[TMP15]], i32 11 615; AVX-NEXT: [[TMP16:%.*]] = extractelement <16 x i16> [[TMP3]], i32 12 616; AVX-NEXT: [[V12:%.*]] = insertelement <16 x i16> [[V11]], i16 [[TMP16]], i32 12 617; AVX-NEXT: [[TMP17:%.*]] = extractelement <16 x i16> [[TMP3]], i32 13 618; AVX-NEXT: [[V13:%.*]] = insertelement <16 x i16> [[V12]], i16 [[TMP17]], i32 13 619; AVX-NEXT: [[TMP18:%.*]] = extractelement <16 x i16> [[TMP3]], i32 14 620; AVX-NEXT: [[V14:%.*]] = insertelement <16 x i16> [[V13]], i16 [[TMP18]], i32 14 621; AVX-NEXT: [[TMP19:%.*]] = extractelement <16 x i16> [[TMP3]], i32 15 622; AVX-NEXT: [[V15:%.*]] = insertelement <16 x i16> [[V14]], i16 [[TMP19]], i32 15 623; AVX-NEXT: ret <16 x i16> [[V15]] 624; 625 %p1 = getelementptr inbounds i8, i8* %p0, i64 1 626 %p2 = getelementptr inbounds i8, i8* %p0, i64 2 627 %p3 = getelementptr inbounds i8, i8* %p0, i64 3 628 %p4 = getelementptr inbounds i8, i8* %p0, i64 4 629 %p5 = getelementptr inbounds i8, i8* %p0, i64 5 630 %p6 = getelementptr inbounds i8, i8* %p0, i64 6 631 %p7 = getelementptr inbounds i8, i8* %p0, i64 7 632 %p8 = getelementptr inbounds i8, i8* %p0, i64 8 633 %p9 = getelementptr inbounds i8, i8* %p0, i64 9 634 %p10 = getelementptr inbounds i8, i8* %p0, i64 10 635 %p11 = getelementptr inbounds i8, i8* %p0, i64 11 636 %p12 = getelementptr inbounds i8, i8* %p0, i64 12 637 %p13 = getelementptr inbounds i8, i8* %p0, i64 13 638 %p14 = getelementptr inbounds i8, i8* %p0, i64 14 639 %p15 = getelementptr inbounds i8, i8* %p0, i64 15 640 %i0 = load i8, i8* %p0, align 1 641 %i1 = load i8, i8* %p1, align 1 642 %i2 = load i8, i8* %p2, align 1 643 %i3 = load i8, i8* %p3, align 1 644 %i4 = load i8, i8* %p4, align 1 645 %i5 = load i8, i8* %p5, align 1 646 %i6 = load i8, i8* %p6, align 1 647 %i7 = load i8, i8* %p7, align 1 648 %i8 = load i8, i8* %p8, align 1 649 %i9 = load i8, i8* %p9, align 1 650 %i10 = load i8, i8* %p10, align 1 651 %i11 = load i8, i8* %p11, align 1 652 %i12 = load i8, i8* %p12, align 1 653 %i13 = load i8, i8* %p13, align 1 654 %i14 = load i8, i8* %p14, align 1 655 %i15 = load i8, i8* %p15, align 1 656 %x0 = zext i8 %i0 to i16 657 %x1 = zext i8 %i1 to i16 658 %x2 = zext i8 %i2 to i16 659 %x3 = zext i8 %i3 to i16 660 %x4 = zext i8 %i4 to i16 661 %x5 = zext i8 %i5 to i16 662 %x6 = zext i8 %i6 to i16 663 %x7 = zext i8 %i7 to i16 664 %x8 = zext i8 %i8 to i16 665 %x9 = zext i8 %i9 to i16 666 %x10 = zext i8 %i10 to i16 667 %x11 = zext i8 %i11 to i16 668 %x12 = zext i8 %i12 to i16 669 %x13 = zext i8 %i13 to i16 670 %x14 = zext i8 %i14 to i16 671 %x15 = zext i8 %i15 to i16 672 %v0 = insertelement <16 x i16> undef, i16 %x0, i32 0 673 %v1 = insertelement <16 x i16> %v0, i16 %x1, i32 1 674 %v2 = insertelement <16 x i16> %v1, i16 %x2, i32 2 675 %v3 = insertelement <16 x i16> %v2, i16 %x3, i32 3 676 %v4 = insertelement <16 x i16> %v3, i16 %x4, i32 4 677 %v5 = insertelement <16 x i16> %v4, i16 %x5, i32 5 678 %v6 = insertelement <16 x i16> %v5, i16 %x6, i32 6 679 %v7 = insertelement <16 x i16> %v6, i16 %x7, i32 7 680 %v8 = insertelement <16 x i16> %v7, i16 %x8, i32 8 681 %v9 = insertelement <16 x i16> %v8, i16 %x9, i32 9 682 %v10 = insertelement <16 x i16> %v9, i16 %x10, i32 10 683 %v11 = insertelement <16 x i16> %v10, i16 %x11, i32 11 684 %v12 = insertelement <16 x i16> %v11, i16 %x12, i32 12 685 %v13 = insertelement <16 x i16> %v12, i16 %x13, i32 13 686 %v14 = insertelement <16 x i16> %v13, i16 %x14, i32 14 687 %v15 = insertelement <16 x i16> %v14, i16 %x15, i32 15 688 ret <16 x i16> %v15 689} 690 691; 692; vXi16 693; 694 695define <2 x i64> @loadext_2i16_to_2i64(i16* %p0) { 696; SSE2-LABEL: @loadext_2i16_to_2i64( 697; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 698; SSE2-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <2 x i16>* 699; SSE2-NEXT: [[TMP2:%.*]] = load <2 x i16>, <2 x i16>* [[TMP1]], align 1 700; SSE2-NEXT: [[TMP3:%.*]] = zext <2 x i16> [[TMP2]] to <2 x i64> 701; SSE2-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 702; SSE2-NEXT: [[V0:%.*]] = insertelement <2 x i64> undef, i64 [[TMP4]], i32 0 703; SSE2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1 704; SSE2-NEXT: [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[TMP5]], i32 1 705; SSE2-NEXT: ret <2 x i64> [[V1]] 706; 707; SLM-LABEL: @loadext_2i16_to_2i64( 708; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 709; SLM-NEXT: [[I0:%.*]] = load i16, i16* [[P0]], align 1 710; SLM-NEXT: [[I1:%.*]] = load i16, i16* [[P1]], align 1 711; SLM-NEXT: [[X0:%.*]] = zext i16 [[I0]] to i64 712; SLM-NEXT: [[X1:%.*]] = zext i16 [[I1]] to i64 713; SLM-NEXT: [[V0:%.*]] = insertelement <2 x i64> undef, i64 [[X0]], i32 0 714; SLM-NEXT: [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[X1]], i32 1 715; SLM-NEXT: ret <2 x i64> [[V1]] 716; 717; AVX-LABEL: @loadext_2i16_to_2i64( 718; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 719; AVX-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <2 x i16>* 720; AVX-NEXT: [[TMP2:%.*]] = load <2 x i16>, <2 x i16>* [[TMP1]], align 1 721; AVX-NEXT: [[TMP3:%.*]] = zext <2 x i16> [[TMP2]] to <2 x i64> 722; AVX-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 723; AVX-NEXT: [[V0:%.*]] = insertelement <2 x i64> undef, i64 [[TMP4]], i32 0 724; AVX-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1 725; AVX-NEXT: [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[TMP5]], i32 1 726; AVX-NEXT: ret <2 x i64> [[V1]] 727; 728 %p1 = getelementptr inbounds i16, i16* %p0, i64 1 729 %i0 = load i16, i16* %p0, align 1 730 %i1 = load i16, i16* %p1, align 1 731 %x0 = zext i16 %i0 to i64 732 %x1 = zext i16 %i1 to i64 733 %v0 = insertelement <2 x i64> undef, i64 %x0, i32 0 734 %v1 = insertelement <2 x i64> %v0, i64 %x1, i32 1 735 ret <2 x i64> %v1 736} 737 738define <4 x i32> @loadext_4i16_to_4i32(i16* %p0) { 739; SSE2-LABEL: @loadext_4i16_to_4i32( 740; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 741; SSE2-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2 742; SSE2-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3 743; SSE2-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <4 x i16>* 744; SSE2-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1 745; SSE2-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i32> 746; SSE2-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP3]], i32 0 747; SSE2-NEXT: [[V0:%.*]] = insertelement <4 x i32> undef, i32 [[TMP4]], i32 0 748; SSE2-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP3]], i32 1 749; SSE2-NEXT: [[V1:%.*]] = insertelement <4 x i32> [[V0]], i32 [[TMP5]], i32 1 750; SSE2-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP3]], i32 2 751; SSE2-NEXT: [[V2:%.*]] = insertelement <4 x i32> [[V1]], i32 [[TMP6]], i32 2 752; SSE2-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP3]], i32 3 753; SSE2-NEXT: [[V3:%.*]] = insertelement <4 x i32> [[V2]], i32 [[TMP7]], i32 3 754; SSE2-NEXT: ret <4 x i32> [[V3]] 755; 756; SLM-LABEL: @loadext_4i16_to_4i32( 757; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 758; SLM-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2 759; SLM-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3 760; SLM-NEXT: [[I0:%.*]] = load i16, i16* [[P0]], align 1 761; SLM-NEXT: [[I1:%.*]] = load i16, i16* [[P1]], align 1 762; SLM-NEXT: [[I2:%.*]] = load i16, i16* [[P2]], align 1 763; SLM-NEXT: [[I3:%.*]] = load i16, i16* [[P3]], align 1 764; SLM-NEXT: [[X0:%.*]] = zext i16 [[I0]] to i32 765; SLM-NEXT: [[X1:%.*]] = zext i16 [[I1]] to i32 766; SLM-NEXT: [[X2:%.*]] = zext i16 [[I2]] to i32 767; SLM-NEXT: [[X3:%.*]] = zext i16 [[I3]] to i32 768; SLM-NEXT: [[V0:%.*]] = insertelement <4 x i32> undef, i32 [[X0]], i32 0 769; SLM-NEXT: [[V1:%.*]] = insertelement <4 x i32> [[V0]], i32 [[X1]], i32 1 770; SLM-NEXT: [[V2:%.*]] = insertelement <4 x i32> [[V1]], i32 [[X2]], i32 2 771; SLM-NEXT: [[V3:%.*]] = insertelement <4 x i32> [[V2]], i32 [[X3]], i32 3 772; SLM-NEXT: ret <4 x i32> [[V3]] 773; 774; AVX-LABEL: @loadext_4i16_to_4i32( 775; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 776; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2 777; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3 778; AVX-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <4 x i16>* 779; AVX-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1 780; AVX-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i32> 781; AVX-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP3]], i32 0 782; AVX-NEXT: [[V0:%.*]] = insertelement <4 x i32> undef, i32 [[TMP4]], i32 0 783; AVX-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP3]], i32 1 784; AVX-NEXT: [[V1:%.*]] = insertelement <4 x i32> [[V0]], i32 [[TMP5]], i32 1 785; AVX-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP3]], i32 2 786; AVX-NEXT: [[V2:%.*]] = insertelement <4 x i32> [[V1]], i32 [[TMP6]], i32 2 787; AVX-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP3]], i32 3 788; AVX-NEXT: [[V3:%.*]] = insertelement <4 x i32> [[V2]], i32 [[TMP7]], i32 3 789; AVX-NEXT: ret <4 x i32> [[V3]] 790; 791 %p1 = getelementptr inbounds i16, i16* %p0, i64 1 792 %p2 = getelementptr inbounds i16, i16* %p0, i64 2 793 %p3 = getelementptr inbounds i16, i16* %p0, i64 3 794 %i0 = load i16, i16* %p0, align 1 795 %i1 = load i16, i16* %p1, align 1 796 %i2 = load i16, i16* %p2, align 1 797 %i3 = load i16, i16* %p3, align 1 798 %x0 = zext i16 %i0 to i32 799 %x1 = zext i16 %i1 to i32 800 %x2 = zext i16 %i2 to i32 801 %x3 = zext i16 %i3 to i32 802 %v0 = insertelement <4 x i32> undef, i32 %x0, i32 0 803 %v1 = insertelement <4 x i32> %v0, i32 %x1, i32 1 804 %v2 = insertelement <4 x i32> %v1, i32 %x2, i32 2 805 %v3 = insertelement <4 x i32> %v2, i32 %x3, i32 3 806 ret <4 x i32> %v3 807} 808 809define <4 x i64> @loadext_4i16_to_4i64(i16* %p0) { 810; SSE2-LABEL: @loadext_4i16_to_4i64( 811; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 812; SSE2-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2 813; SSE2-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3 814; SSE2-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <4 x i16>* 815; SSE2-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1 816; SSE2-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i64> 817; SSE2-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0 818; SSE2-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0 819; SSE2-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1 820; SSE2-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1 821; SSE2-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2 822; SSE2-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[TMP6]], i32 2 823; SSE2-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3 824; SSE2-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[TMP7]], i32 3 825; SSE2-NEXT: ret <4 x i64> [[V3]] 826; 827; SLM-LABEL: @loadext_4i16_to_4i64( 828; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 829; SLM-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2 830; SLM-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3 831; SLM-NEXT: [[I0:%.*]] = load i16, i16* [[P0]], align 1 832; SLM-NEXT: [[I1:%.*]] = load i16, i16* [[P1]], align 1 833; SLM-NEXT: [[I2:%.*]] = load i16, i16* [[P2]], align 1 834; SLM-NEXT: [[I3:%.*]] = load i16, i16* [[P3]], align 1 835; SLM-NEXT: [[X0:%.*]] = zext i16 [[I0]] to i64 836; SLM-NEXT: [[X1:%.*]] = zext i16 [[I1]] to i64 837; SLM-NEXT: [[X2:%.*]] = zext i16 [[I2]] to i64 838; SLM-NEXT: [[X3:%.*]] = zext i16 [[I3]] to i64 839; SLM-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[X0]], i32 0 840; SLM-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[X1]], i32 1 841; SLM-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[X2]], i32 2 842; SLM-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[X3]], i32 3 843; SLM-NEXT: ret <4 x i64> [[V3]] 844; 845; AVX-LABEL: @loadext_4i16_to_4i64( 846; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 847; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2 848; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3 849; AVX-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <4 x i16>* 850; AVX-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1 851; AVX-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i64> 852; AVX-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0 853; AVX-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0 854; AVX-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1 855; AVX-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1 856; AVX-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2 857; AVX-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[TMP6]], i32 2 858; AVX-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3 859; AVX-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[TMP7]], i32 3 860; AVX-NEXT: ret <4 x i64> [[V3]] 861; 862 %p1 = getelementptr inbounds i16, i16* %p0, i64 1 863 %p2 = getelementptr inbounds i16, i16* %p0, i64 2 864 %p3 = getelementptr inbounds i16, i16* %p0, i64 3 865 %i0 = load i16, i16* %p0, align 1 866 %i1 = load i16, i16* %p1, align 1 867 %i2 = load i16, i16* %p2, align 1 868 %i3 = load i16, i16* %p3, align 1 869 %x0 = zext i16 %i0 to i64 870 %x1 = zext i16 %i1 to i64 871 %x2 = zext i16 %i2 to i64 872 %x3 = zext i16 %i3 to i64 873 %v0 = insertelement <4 x i64> undef, i64 %x0, i32 0 874 %v1 = insertelement <4 x i64> %v0, i64 %x1, i32 1 875 %v2 = insertelement <4 x i64> %v1, i64 %x2, i32 2 876 %v3 = insertelement <4 x i64> %v2, i64 %x3, i32 3 877 ret <4 x i64> %v3 878} 879 880define <8 x i32> @loadext_8i16_to_8i32(i16* %p0) { 881; SSE2-LABEL: @loadext_8i16_to_8i32( 882; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 883; SSE2-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2 884; SSE2-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3 885; SSE2-NEXT: [[P4:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 4 886; SSE2-NEXT: [[P5:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 5 887; SSE2-NEXT: [[P6:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 6 888; SSE2-NEXT: [[P7:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 7 889; SSE2-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <8 x i16>* 890; SSE2-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 1 891; SSE2-NEXT: [[TMP3:%.*]] = zext <8 x i16> [[TMP2]] to <8 x i32> 892; SSE2-NEXT: [[TMP4:%.*]] = extractelement <8 x i32> [[TMP3]], i32 0 893; SSE2-NEXT: [[V0:%.*]] = insertelement <8 x i32> undef, i32 [[TMP4]], i32 0 894; SSE2-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[TMP3]], i32 1 895; SSE2-NEXT: [[V1:%.*]] = insertelement <8 x i32> [[V0]], i32 [[TMP5]], i32 1 896; SSE2-NEXT: [[TMP6:%.*]] = extractelement <8 x i32> [[TMP3]], i32 2 897; SSE2-NEXT: [[V2:%.*]] = insertelement <8 x i32> [[V1]], i32 [[TMP6]], i32 2 898; SSE2-NEXT: [[TMP7:%.*]] = extractelement <8 x i32> [[TMP3]], i32 3 899; SSE2-NEXT: [[V3:%.*]] = insertelement <8 x i32> [[V2]], i32 [[TMP7]], i32 3 900; SSE2-NEXT: [[TMP8:%.*]] = extractelement <8 x i32> [[TMP3]], i32 4 901; SSE2-NEXT: [[V4:%.*]] = insertelement <8 x i32> [[V3]], i32 [[TMP8]], i32 4 902; SSE2-NEXT: [[TMP9:%.*]] = extractelement <8 x i32> [[TMP3]], i32 5 903; SSE2-NEXT: [[V5:%.*]] = insertelement <8 x i32> [[V4]], i32 [[TMP9]], i32 5 904; SSE2-NEXT: [[TMP10:%.*]] = extractelement <8 x i32> [[TMP3]], i32 6 905; SSE2-NEXT: [[V6:%.*]] = insertelement <8 x i32> [[V5]], i32 [[TMP10]], i32 6 906; SSE2-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[TMP3]], i32 7 907; SSE2-NEXT: [[V7:%.*]] = insertelement <8 x i32> [[V6]], i32 [[TMP11]], i32 7 908; SSE2-NEXT: ret <8 x i32> [[V7]] 909; 910; SLM-LABEL: @loadext_8i16_to_8i32( 911; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 912; SLM-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2 913; SLM-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3 914; SLM-NEXT: [[P4:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 4 915; SLM-NEXT: [[P5:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 5 916; SLM-NEXT: [[P6:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 6 917; SLM-NEXT: [[P7:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 7 918; SLM-NEXT: [[I0:%.*]] = load i16, i16* [[P0]], align 1 919; SLM-NEXT: [[I1:%.*]] = load i16, i16* [[P1]], align 1 920; SLM-NEXT: [[I2:%.*]] = load i16, i16* [[P2]], align 1 921; SLM-NEXT: [[I3:%.*]] = load i16, i16* [[P3]], align 1 922; SLM-NEXT: [[I4:%.*]] = load i16, i16* [[P4]], align 1 923; SLM-NEXT: [[I5:%.*]] = load i16, i16* [[P5]], align 1 924; SLM-NEXT: [[I6:%.*]] = load i16, i16* [[P6]], align 1 925; SLM-NEXT: [[I7:%.*]] = load i16, i16* [[P7]], align 1 926; SLM-NEXT: [[X0:%.*]] = zext i16 [[I0]] to i32 927; SLM-NEXT: [[X1:%.*]] = zext i16 [[I1]] to i32 928; SLM-NEXT: [[X2:%.*]] = zext i16 [[I2]] to i32 929; SLM-NEXT: [[X3:%.*]] = zext i16 [[I3]] to i32 930; SLM-NEXT: [[X4:%.*]] = zext i16 [[I4]] to i32 931; SLM-NEXT: [[X5:%.*]] = zext i16 [[I5]] to i32 932; SLM-NEXT: [[X6:%.*]] = zext i16 [[I6]] to i32 933; SLM-NEXT: [[X7:%.*]] = zext i16 [[I7]] to i32 934; SLM-NEXT: [[V0:%.*]] = insertelement <8 x i32> undef, i32 [[X0]], i32 0 935; SLM-NEXT: [[V1:%.*]] = insertelement <8 x i32> [[V0]], i32 [[X1]], i32 1 936; SLM-NEXT: [[V2:%.*]] = insertelement <8 x i32> [[V1]], i32 [[X2]], i32 2 937; SLM-NEXT: [[V3:%.*]] = insertelement <8 x i32> [[V2]], i32 [[X3]], i32 3 938; SLM-NEXT: [[V4:%.*]] = insertelement <8 x i32> [[V3]], i32 [[X4]], i32 4 939; SLM-NEXT: [[V5:%.*]] = insertelement <8 x i32> [[V4]], i32 [[X5]], i32 5 940; SLM-NEXT: [[V6:%.*]] = insertelement <8 x i32> [[V5]], i32 [[X6]], i32 6 941; SLM-NEXT: [[V7:%.*]] = insertelement <8 x i32> [[V6]], i32 [[X7]], i32 7 942; SLM-NEXT: ret <8 x i32> [[V7]] 943; 944; AVX-LABEL: @loadext_8i16_to_8i32( 945; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 946; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2 947; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3 948; AVX-NEXT: [[P4:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 4 949; AVX-NEXT: [[P5:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 5 950; AVX-NEXT: [[P6:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 6 951; AVX-NEXT: [[P7:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 7 952; AVX-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <8 x i16>* 953; AVX-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 1 954; AVX-NEXT: [[TMP3:%.*]] = zext <8 x i16> [[TMP2]] to <8 x i32> 955; AVX-NEXT: [[TMP4:%.*]] = extractelement <8 x i32> [[TMP3]], i32 0 956; AVX-NEXT: [[V0:%.*]] = insertelement <8 x i32> undef, i32 [[TMP4]], i32 0 957; AVX-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[TMP3]], i32 1 958; AVX-NEXT: [[V1:%.*]] = insertelement <8 x i32> [[V0]], i32 [[TMP5]], i32 1 959; AVX-NEXT: [[TMP6:%.*]] = extractelement <8 x i32> [[TMP3]], i32 2 960; AVX-NEXT: [[V2:%.*]] = insertelement <8 x i32> [[V1]], i32 [[TMP6]], i32 2 961; AVX-NEXT: [[TMP7:%.*]] = extractelement <8 x i32> [[TMP3]], i32 3 962; AVX-NEXT: [[V3:%.*]] = insertelement <8 x i32> [[V2]], i32 [[TMP7]], i32 3 963; AVX-NEXT: [[TMP8:%.*]] = extractelement <8 x i32> [[TMP3]], i32 4 964; AVX-NEXT: [[V4:%.*]] = insertelement <8 x i32> [[V3]], i32 [[TMP8]], i32 4 965; AVX-NEXT: [[TMP9:%.*]] = extractelement <8 x i32> [[TMP3]], i32 5 966; AVX-NEXT: [[V5:%.*]] = insertelement <8 x i32> [[V4]], i32 [[TMP9]], i32 5 967; AVX-NEXT: [[TMP10:%.*]] = extractelement <8 x i32> [[TMP3]], i32 6 968; AVX-NEXT: [[V6:%.*]] = insertelement <8 x i32> [[V5]], i32 [[TMP10]], i32 6 969; AVX-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[TMP3]], i32 7 970; AVX-NEXT: [[V7:%.*]] = insertelement <8 x i32> [[V6]], i32 [[TMP11]], i32 7 971; AVX-NEXT: ret <8 x i32> [[V7]] 972; 973 %p1 = getelementptr inbounds i16, i16* %p0, i64 1 974 %p2 = getelementptr inbounds i16, i16* %p0, i64 2 975 %p3 = getelementptr inbounds i16, i16* %p0, i64 3 976 %p4 = getelementptr inbounds i16, i16* %p0, i64 4 977 %p5 = getelementptr inbounds i16, i16* %p0, i64 5 978 %p6 = getelementptr inbounds i16, i16* %p0, i64 6 979 %p7 = getelementptr inbounds i16, i16* %p0, i64 7 980 %i0 = load i16, i16* %p0, align 1 981 %i1 = load i16, i16* %p1, align 1 982 %i2 = load i16, i16* %p2, align 1 983 %i3 = load i16, i16* %p3, align 1 984 %i4 = load i16, i16* %p4, align 1 985 %i5 = load i16, i16* %p5, align 1 986 %i6 = load i16, i16* %p6, align 1 987 %i7 = load i16, i16* %p7, align 1 988 %x0 = zext i16 %i0 to i32 989 %x1 = zext i16 %i1 to i32 990 %x2 = zext i16 %i2 to i32 991 %x3 = zext i16 %i3 to i32 992 %x4 = zext i16 %i4 to i32 993 %x5 = zext i16 %i5 to i32 994 %x6 = zext i16 %i6 to i32 995 %x7 = zext i16 %i7 to i32 996 %v0 = insertelement <8 x i32> undef, i32 %x0, i32 0 997 %v1 = insertelement <8 x i32> %v0, i32 %x1, i32 1 998 %v2 = insertelement <8 x i32> %v1, i32 %x2, i32 2 999 %v3 = insertelement <8 x i32> %v2, i32 %x3, i32 3 1000 %v4 = insertelement <8 x i32> %v3, i32 %x4, i32 4 1001 %v5 = insertelement <8 x i32> %v4, i32 %x5, i32 5 1002 %v6 = insertelement <8 x i32> %v5, i32 %x6, i32 6 1003 %v7 = insertelement <8 x i32> %v6, i32 %x7, i32 7 1004 ret <8 x i32> %v7 1005} 1006 1007; 1008; vXi32 1009; 1010 1011define <2 x i64> @loadext_2i32_to_2i64(i32* %p0) { 1012; SSE2-LABEL: @loadext_2i32_to_2i64( 1013; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1 1014; SSE2-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0]] to <2 x i32>* 1015; SSE2-NEXT: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 1 1016; SSE2-NEXT: [[TMP3:%.*]] = zext <2 x i32> [[TMP2]] to <2 x i64> 1017; SSE2-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 1018; SSE2-NEXT: [[V0:%.*]] = insertelement <2 x i64> undef, i64 [[TMP4]], i32 0 1019; SSE2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1 1020; SSE2-NEXT: [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[TMP5]], i32 1 1021; SSE2-NEXT: ret <2 x i64> [[V1]] 1022; 1023; SLM-LABEL: @loadext_2i32_to_2i64( 1024; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1 1025; SLM-NEXT: [[I0:%.*]] = load i32, i32* [[P0]], align 1 1026; SLM-NEXT: [[I1:%.*]] = load i32, i32* [[P1]], align 1 1027; SLM-NEXT: [[X0:%.*]] = zext i32 [[I0]] to i64 1028; SLM-NEXT: [[X1:%.*]] = zext i32 [[I1]] to i64 1029; SLM-NEXT: [[V0:%.*]] = insertelement <2 x i64> undef, i64 [[X0]], i32 0 1030; SLM-NEXT: [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[X1]], i32 1 1031; SLM-NEXT: ret <2 x i64> [[V1]] 1032; 1033; AVX-LABEL: @loadext_2i32_to_2i64( 1034; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1 1035; AVX-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0]] to <2 x i32>* 1036; AVX-NEXT: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 1 1037; AVX-NEXT: [[TMP3:%.*]] = zext <2 x i32> [[TMP2]] to <2 x i64> 1038; AVX-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 1039; AVX-NEXT: [[V0:%.*]] = insertelement <2 x i64> undef, i64 [[TMP4]], i32 0 1040; AVX-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1 1041; AVX-NEXT: [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[TMP5]], i32 1 1042; AVX-NEXT: ret <2 x i64> [[V1]] 1043; 1044 %p1 = getelementptr inbounds i32, i32* %p0, i64 1 1045 %i0 = load i32, i32* %p0, align 1 1046 %i1 = load i32, i32* %p1, align 1 1047 %x0 = zext i32 %i0 to i64 1048 %x1 = zext i32 %i1 to i64 1049 %v0 = insertelement <2 x i64> undef, i64 %x0, i32 0 1050 %v1 = insertelement <2 x i64> %v0, i64 %x1, i32 1 1051 ret <2 x i64> %v1 1052} 1053 1054define <4 x i64> @loadext_4i32_to_4i64(i32* %p0) { 1055; SSE2-LABEL: @loadext_4i32_to_4i64( 1056; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1 1057; SSE2-NEXT: [[P2:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 2 1058; SSE2-NEXT: [[P3:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 3 1059; SSE2-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0]] to <4 x i32>* 1060; SSE2-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 1 1061; SSE2-NEXT: [[TMP3:%.*]] = zext <4 x i32> [[TMP2]] to <4 x i64> 1062; SSE2-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0 1063; SSE2-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0 1064; SSE2-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1 1065; SSE2-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1 1066; SSE2-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2 1067; SSE2-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[TMP6]], i32 2 1068; SSE2-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3 1069; SSE2-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[TMP7]], i32 3 1070; SSE2-NEXT: ret <4 x i64> [[V3]] 1071; 1072; SLM-LABEL: @loadext_4i32_to_4i64( 1073; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1 1074; SLM-NEXT: [[P2:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 2 1075; SLM-NEXT: [[P3:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 3 1076; SLM-NEXT: [[I0:%.*]] = load i32, i32* [[P0]], align 1 1077; SLM-NEXT: [[I1:%.*]] = load i32, i32* [[P1]], align 1 1078; SLM-NEXT: [[I2:%.*]] = load i32, i32* [[P2]], align 1 1079; SLM-NEXT: [[I3:%.*]] = load i32, i32* [[P3]], align 1 1080; SLM-NEXT: [[X0:%.*]] = zext i32 [[I0]] to i64 1081; SLM-NEXT: [[X1:%.*]] = zext i32 [[I1]] to i64 1082; SLM-NEXT: [[X2:%.*]] = zext i32 [[I2]] to i64 1083; SLM-NEXT: [[X3:%.*]] = zext i32 [[I3]] to i64 1084; SLM-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[X0]], i32 0 1085; SLM-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[X1]], i32 1 1086; SLM-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[X2]], i32 2 1087; SLM-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[X3]], i32 3 1088; SLM-NEXT: ret <4 x i64> [[V3]] 1089; 1090; AVX-LABEL: @loadext_4i32_to_4i64( 1091; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1 1092; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 2 1093; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 3 1094; AVX-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0]] to <4 x i32>* 1095; AVX-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 1 1096; AVX-NEXT: [[TMP3:%.*]] = zext <4 x i32> [[TMP2]] to <4 x i64> 1097; AVX-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0 1098; AVX-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0 1099; AVX-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1 1100; AVX-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1 1101; AVX-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2 1102; AVX-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[TMP6]], i32 2 1103; AVX-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3 1104; AVX-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[TMP7]], i32 3 1105; AVX-NEXT: ret <4 x i64> [[V3]] 1106; 1107 %p1 = getelementptr inbounds i32, i32* %p0, i64 1 1108 %p2 = getelementptr inbounds i32, i32* %p0, i64 2 1109 %p3 = getelementptr inbounds i32, i32* %p0, i64 3 1110 %i0 = load i32, i32* %p0, align 1 1111 %i1 = load i32, i32* %p1, align 1 1112 %i2 = load i32, i32* %p2, align 1 1113 %i3 = load i32, i32* %p3, align 1 1114 %x0 = zext i32 %i0 to i64 1115 %x1 = zext i32 %i1 to i64 1116 %x2 = zext i32 %i2 to i64 1117 %x3 = zext i32 %i3 to i64 1118 %v0 = insertelement <4 x i64> undef, i64 %x0, i32 0 1119 %v1 = insertelement <4 x i64> %v0, i64 %x1, i32 1 1120 %v2 = insertelement <4 x i64> %v1, i64 %x2, i32 2 1121 %v3 = insertelement <4 x i64> %v2, i64 %x3, i32 3 1122 ret <4 x i64> %v3 1123} 1124