1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -mtriple=x86_64-unknown -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=SSE 3; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX256 --check-prefix=AVX256NODQ 4; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=bdver1 -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX256 --check-prefix=AVX256NODQ 5; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX256 --check-prefix=AVX256NODQ 6; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skylake-avx512 -mattr=-prefer-256-bit -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX512 7; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skylake-avx512 -mattr=+prefer-256-bit -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX256 --check-prefix=AVX256DQ 8 9target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 10 11@src64 = common global [8 x i64] zeroinitializer, align 64 12@src32 = common global [16 x i32] zeroinitializer, align 64 13@src16 = common global [32 x i16] zeroinitializer, align 64 14@src8 = common global [64 x i8] zeroinitializer, align 64 15 16@dst64 = common global [8 x double] zeroinitializer, align 64 17@dst32 = common global [16 x float] zeroinitializer, align 64 18 19; 20; UITOFP to vXf64 21; 22 23define void @uitofp_2i64_2f64() #0 { 24; CHECK-LABEL: @uitofp_2i64_2f64( 25; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @src64 to <2 x i64>*), align 64 26; CHECK-NEXT: [[TMP2:%.*]] = uitofp <2 x i64> [[TMP1]] to <2 x double> 27; CHECK-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64 28; CHECK-NEXT: ret void 29; 30 %ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64 31 %ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8 32 %cvt0 = uitofp i64 %ld0 to double 33 %cvt1 = uitofp i64 %ld1 to double 34 store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 35 store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 36 ret void 37} 38 39define void @uitofp_4i64_4f64() #0 { 40; SSE-LABEL: @uitofp_4i64_4f64( 41; SSE-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @src64 to <2 x i64>*), align 64 42; SSE-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2) to <2 x i64>*), align 16 43; SSE-NEXT: [[TMP3:%.*]] = uitofp <2 x i64> [[TMP1]] to <2 x double> 44; SSE-NEXT: [[TMP4:%.*]] = uitofp <2 x i64> [[TMP2]] to <2 x double> 45; SSE-NEXT: store <2 x double> [[TMP3]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64 46; SSE-NEXT: store <2 x double> [[TMP4]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2) to <2 x double>*), align 16 47; SSE-NEXT: ret void 48; 49; AVX-LABEL: @uitofp_4i64_4f64( 50; AVX-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64 51; AVX-NEXT: [[TMP2:%.*]] = uitofp <4 x i64> [[TMP1]] to <4 x double> 52; AVX-NEXT: store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 64 53; AVX-NEXT: ret void 54; 55 %ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64 56 %ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8 57 %ld2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16 58 %ld3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8 59 %cvt0 = uitofp i64 %ld0 to double 60 %cvt1 = uitofp i64 %ld1 to double 61 %cvt2 = uitofp i64 %ld2 to double 62 %cvt3 = uitofp i64 %ld3 to double 63 store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 64 store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 65 store double %cvt2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16 66 store double %cvt3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8 67 ret void 68} 69 70define void @uitofp_8i64_8f64() #0 { 71; SSE-LABEL: @uitofp_8i64_8f64( 72; SSE-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @src64 to <2 x i64>*), align 64 73; SSE-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2) to <2 x i64>*), align 16 74; SSE-NEXT: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 4) to <2 x i64>*), align 32 75; SSE-NEXT: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 6) to <2 x i64>*), align 16 76; SSE-NEXT: [[TMP5:%.*]] = uitofp <2 x i64> [[TMP1]] to <2 x double> 77; SSE-NEXT: [[TMP6:%.*]] = uitofp <2 x i64> [[TMP2]] to <2 x double> 78; SSE-NEXT: [[TMP7:%.*]] = uitofp <2 x i64> [[TMP3]] to <2 x double> 79; SSE-NEXT: [[TMP8:%.*]] = uitofp <2 x i64> [[TMP4]] to <2 x double> 80; SSE-NEXT: store <2 x double> [[TMP5]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64 81; SSE-NEXT: store <2 x double> [[TMP6]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2) to <2 x double>*), align 16 82; SSE-NEXT: store <2 x double> [[TMP7]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <2 x double>*), align 32 83; SSE-NEXT: store <2 x double> [[TMP8]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6) to <2 x double>*), align 16 84; SSE-NEXT: ret void 85; 86; AVX256-LABEL: @uitofp_8i64_8f64( 87; AVX256-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64 88; AVX256-NEXT: [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 4) to <4 x i64>*), align 32 89; AVX256-NEXT: [[TMP3:%.*]] = uitofp <4 x i64> [[TMP1]] to <4 x double> 90; AVX256-NEXT: [[TMP4:%.*]] = uitofp <4 x i64> [[TMP2]] to <4 x double> 91; AVX256-NEXT: store <4 x double> [[TMP3]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 64 92; AVX256-NEXT: store <4 x double> [[TMP4]], <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <4 x double>*), align 32 93; AVX256-NEXT: ret void 94; 95; AVX512-LABEL: @uitofp_8i64_8f64( 96; AVX512-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @src64 to <8 x i64>*), align 64 97; AVX512-NEXT: [[TMP2:%.*]] = uitofp <8 x i64> [[TMP1]] to <8 x double> 98; AVX512-NEXT: store <8 x double> [[TMP2]], <8 x double>* bitcast ([8 x double]* @dst64 to <8 x double>*), align 64 99; AVX512-NEXT: ret void 100; 101 %ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64 102 %ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8 103 %ld2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16 104 %ld3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8 105 %ld4 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 4), align 32 106 %ld5 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 5), align 8 107 %ld6 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 6), align 16 108 %ld7 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 7), align 8 109 %cvt0 = uitofp i64 %ld0 to double 110 %cvt1 = uitofp i64 %ld1 to double 111 %cvt2 = uitofp i64 %ld2 to double 112 %cvt3 = uitofp i64 %ld3 to double 113 %cvt4 = uitofp i64 %ld4 to double 114 %cvt5 = uitofp i64 %ld5 to double 115 %cvt6 = uitofp i64 %ld6 to double 116 %cvt7 = uitofp i64 %ld7 to double 117 store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 118 store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 119 store double %cvt2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16 120 store double %cvt3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8 121 store double %cvt4, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 32 122 store double %cvt5, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 8 123 store double %cvt6, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 16 124 store double %cvt7, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 8 125 ret void 126} 127 128define void @uitofp_2i32_2f64() #0 { 129; SSE-LABEL: @uitofp_2i32_2f64( 130; SSE-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64 131; SSE-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4 132; SSE-NEXT: [[CVT0:%.*]] = uitofp i32 [[LD0]] to double 133; SSE-NEXT: [[CVT1:%.*]] = uitofp i32 [[LD1]] to double 134; SSE-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 135; SSE-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 136; SSE-NEXT: ret void 137; 138; AVX256NODQ-LABEL: @uitofp_2i32_2f64( 139; AVX256NODQ-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64 140; AVX256NODQ-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4 141; AVX256NODQ-NEXT: [[CVT0:%.*]] = uitofp i32 [[LD0]] to double 142; AVX256NODQ-NEXT: [[CVT1:%.*]] = uitofp i32 [[LD1]] to double 143; AVX256NODQ-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 144; AVX256NODQ-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 145; AVX256NODQ-NEXT: ret void 146; 147; AVX512-LABEL: @uitofp_2i32_2f64( 148; AVX512-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* bitcast ([16 x i32]* @src32 to <2 x i32>*), align 64 149; AVX512-NEXT: [[TMP2:%.*]] = uitofp <2 x i32> [[TMP1]] to <2 x double> 150; AVX512-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64 151; AVX512-NEXT: ret void 152; 153; AVX256DQ-LABEL: @uitofp_2i32_2f64( 154; AVX256DQ-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* bitcast ([16 x i32]* @src32 to <2 x i32>*), align 64 155; AVX256DQ-NEXT: [[TMP2:%.*]] = uitofp <2 x i32> [[TMP1]] to <2 x double> 156; AVX256DQ-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64 157; AVX256DQ-NEXT: ret void 158; 159 %ld0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64 160 %ld1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4 161 %cvt0 = uitofp i32 %ld0 to double 162 %cvt1 = uitofp i32 %ld1 to double 163 store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 164 store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 165 ret void 166} 167 168define void @uitofp_4i32_4f64() #0 { 169; SSE-LABEL: @uitofp_4i32_4f64( 170; SSE-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64 171; SSE-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4 172; SSE-NEXT: [[LD2:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 2), align 8 173; SSE-NEXT: [[LD3:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 3), align 4 174; SSE-NEXT: [[CVT0:%.*]] = uitofp i32 [[LD0]] to double 175; SSE-NEXT: [[CVT1:%.*]] = uitofp i32 [[LD1]] to double 176; SSE-NEXT: [[CVT2:%.*]] = uitofp i32 [[LD2]] to double 177; SSE-NEXT: [[CVT3:%.*]] = uitofp i32 [[LD3]] to double 178; SSE-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 179; SSE-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 180; SSE-NEXT: store double [[CVT2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16 181; SSE-NEXT: store double [[CVT3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8 182; SSE-NEXT: ret void 183; 184; AVX-LABEL: @uitofp_4i32_4f64( 185; AVX-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @src32 to <4 x i32>*), align 64 186; AVX-NEXT: [[TMP2:%.*]] = uitofp <4 x i32> [[TMP1]] to <4 x double> 187; AVX-NEXT: store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 64 188; AVX-NEXT: ret void 189; 190 %ld0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64 191 %ld1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4 192 %ld2 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 2), align 8 193 %ld3 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 3), align 4 194 %cvt0 = uitofp i32 %ld0 to double 195 %cvt1 = uitofp i32 %ld1 to double 196 %cvt2 = uitofp i32 %ld2 to double 197 %cvt3 = uitofp i32 %ld3 to double 198 store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 199 store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 200 store double %cvt2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16 201 store double %cvt3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8 202 ret void 203} 204 205define void @uitofp_8i32_8f64() #0 { 206; SSE-LABEL: @uitofp_8i32_8f64( 207; SSE-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64 208; SSE-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4 209; SSE-NEXT: [[LD2:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 2), align 8 210; SSE-NEXT: [[LD3:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 3), align 4 211; SSE-NEXT: [[LD4:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 4), align 16 212; SSE-NEXT: [[LD5:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 5), align 4 213; SSE-NEXT: [[LD6:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 6), align 8 214; SSE-NEXT: [[LD7:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 7), align 4 215; SSE-NEXT: [[CVT0:%.*]] = uitofp i32 [[LD0]] to double 216; SSE-NEXT: [[CVT1:%.*]] = uitofp i32 [[LD1]] to double 217; SSE-NEXT: [[CVT2:%.*]] = uitofp i32 [[LD2]] to double 218; SSE-NEXT: [[CVT3:%.*]] = uitofp i32 [[LD3]] to double 219; SSE-NEXT: [[CVT4:%.*]] = uitofp i32 [[LD4]] to double 220; SSE-NEXT: [[CVT5:%.*]] = uitofp i32 [[LD5]] to double 221; SSE-NEXT: [[CVT6:%.*]] = uitofp i32 [[LD6]] to double 222; SSE-NEXT: [[CVT7:%.*]] = uitofp i32 [[LD7]] to double 223; SSE-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 224; SSE-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 225; SSE-NEXT: store double [[CVT2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16 226; SSE-NEXT: store double [[CVT3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8 227; SSE-NEXT: store double [[CVT4]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 32 228; SSE-NEXT: store double [[CVT5]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 8 229; SSE-NEXT: store double [[CVT6]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 16 230; SSE-NEXT: store double [[CVT7]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 8 231; SSE-NEXT: ret void 232; 233; AVX256-LABEL: @uitofp_8i32_8f64( 234; AVX256-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @src32 to <4 x i32>*), align 64 235; AVX256-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 4) to <4 x i32>*), align 16 236; AVX256-NEXT: [[TMP3:%.*]] = uitofp <4 x i32> [[TMP1]] to <4 x double> 237; AVX256-NEXT: [[TMP4:%.*]] = uitofp <4 x i32> [[TMP2]] to <4 x double> 238; AVX256-NEXT: store <4 x double> [[TMP3]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 64 239; AVX256-NEXT: store <4 x double> [[TMP4]], <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <4 x double>*), align 32 240; AVX256-NEXT: ret void 241; 242; AVX512-LABEL: @uitofp_8i32_8f64( 243; AVX512-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @src32 to <8 x i32>*), align 64 244; AVX512-NEXT: [[TMP2:%.*]] = uitofp <8 x i32> [[TMP1]] to <8 x double> 245; AVX512-NEXT: store <8 x double> [[TMP2]], <8 x double>* bitcast ([8 x double]* @dst64 to <8 x double>*), align 64 246; AVX512-NEXT: ret void 247; 248 %ld0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64 249 %ld1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4 250 %ld2 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 2), align 8 251 %ld3 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 3), align 4 252 %ld4 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 4), align 16 253 %ld5 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 5), align 4 254 %ld6 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 6), align 8 255 %ld7 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 7), align 4 256 %cvt0 = uitofp i32 %ld0 to double 257 %cvt1 = uitofp i32 %ld1 to double 258 %cvt2 = uitofp i32 %ld2 to double 259 %cvt3 = uitofp i32 %ld3 to double 260 %cvt4 = uitofp i32 %ld4 to double 261 %cvt5 = uitofp i32 %ld5 to double 262 %cvt6 = uitofp i32 %ld6 to double 263 %cvt7 = uitofp i32 %ld7 to double 264 store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 265 store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 266 store double %cvt2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16 267 store double %cvt3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8 268 store double %cvt4, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 32 269 store double %cvt5, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 8 270 store double %cvt6, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 16 271 store double %cvt7, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 8 272 ret void 273} 274 275define void @uitofp_2i16_2f64() #0 { 276; CHECK-LABEL: @uitofp_2i16_2f64( 277; CHECK-NEXT: [[LD0:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64 278; CHECK-NEXT: [[LD1:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2 279; CHECK-NEXT: [[CVT0:%.*]] = uitofp i16 [[LD0]] to double 280; CHECK-NEXT: [[CVT1:%.*]] = uitofp i16 [[LD1]] to double 281; CHECK-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 282; CHECK-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 283; CHECK-NEXT: ret void 284; 285 %ld0 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64 286 %ld1 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2 287 %cvt0 = uitofp i16 %ld0 to double 288 %cvt1 = uitofp i16 %ld1 to double 289 store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 290 store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 291 ret void 292} 293 294define void @uitofp_4i16_4f64() #0 { 295; SSE-LABEL: @uitofp_4i16_4f64( 296; SSE-NEXT: [[LD0:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64 297; SSE-NEXT: [[LD1:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2 298; SSE-NEXT: [[LD2:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2), align 4 299; SSE-NEXT: [[LD3:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3), align 2 300; SSE-NEXT: [[CVT0:%.*]] = uitofp i16 [[LD0]] to double 301; SSE-NEXT: [[CVT1:%.*]] = uitofp i16 [[LD1]] to double 302; SSE-NEXT: [[CVT2:%.*]] = uitofp i16 [[LD2]] to double 303; SSE-NEXT: [[CVT3:%.*]] = uitofp i16 [[LD3]] to double 304; SSE-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 305; SSE-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 306; SSE-NEXT: store double [[CVT2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16 307; SSE-NEXT: store double [[CVT3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8 308; SSE-NEXT: ret void 309; 310; AVX-LABEL: @uitofp_4i16_4f64( 311; AVX-NEXT: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* bitcast ([32 x i16]* @src16 to <4 x i16>*), align 64 312; AVX-NEXT: [[TMP2:%.*]] = uitofp <4 x i16> [[TMP1]] to <4 x double> 313; AVX-NEXT: store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 64 314; AVX-NEXT: ret void 315; 316 %ld0 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64 317 %ld1 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2 318 %ld2 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2), align 4 319 %ld3 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3), align 2 320 %cvt0 = uitofp i16 %ld0 to double 321 %cvt1 = uitofp i16 %ld1 to double 322 %cvt2 = uitofp i16 %ld2 to double 323 %cvt3 = uitofp i16 %ld3 to double 324 store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 325 store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 326 store double %cvt2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16 327 store double %cvt3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8 328 ret void 329} 330 331define void @uitofp_8i16_8f64() #0 { 332; SSE-LABEL: @uitofp_8i16_8f64( 333; SSE-NEXT: [[LD0:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64 334; SSE-NEXT: [[LD1:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2 335; SSE-NEXT: [[LD2:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2), align 4 336; SSE-NEXT: [[LD3:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3), align 2 337; SSE-NEXT: [[LD4:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4), align 8 338; SSE-NEXT: [[LD5:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 5), align 2 339; SSE-NEXT: [[LD6:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 6), align 4 340; SSE-NEXT: [[LD7:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 7), align 2 341; SSE-NEXT: [[CVT0:%.*]] = uitofp i16 [[LD0]] to double 342; SSE-NEXT: [[CVT1:%.*]] = uitofp i16 [[LD1]] to double 343; SSE-NEXT: [[CVT2:%.*]] = uitofp i16 [[LD2]] to double 344; SSE-NEXT: [[CVT3:%.*]] = uitofp i16 [[LD3]] to double 345; SSE-NEXT: [[CVT4:%.*]] = uitofp i16 [[LD4]] to double 346; SSE-NEXT: [[CVT5:%.*]] = uitofp i16 [[LD5]] to double 347; SSE-NEXT: [[CVT6:%.*]] = uitofp i16 [[LD6]] to double 348; SSE-NEXT: [[CVT7:%.*]] = uitofp i16 [[LD7]] to double 349; SSE-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 350; SSE-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 351; SSE-NEXT: store double [[CVT2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16 352; SSE-NEXT: store double [[CVT3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8 353; SSE-NEXT: store double [[CVT4]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 32 354; SSE-NEXT: store double [[CVT5]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 8 355; SSE-NEXT: store double [[CVT6]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 16 356; SSE-NEXT: store double [[CVT7]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 8 357; SSE-NEXT: ret void 358; 359; AVX256-LABEL: @uitofp_8i16_8f64( 360; AVX256-NEXT: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* bitcast ([32 x i16]* @src16 to <4 x i16>*), align 64 361; AVX256-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4) to <4 x i16>*), align 8 362; AVX256-NEXT: [[TMP3:%.*]] = uitofp <4 x i16> [[TMP1]] to <4 x double> 363; AVX256-NEXT: [[TMP4:%.*]] = uitofp <4 x i16> [[TMP2]] to <4 x double> 364; AVX256-NEXT: store <4 x double> [[TMP3]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 64 365; AVX256-NEXT: store <4 x double> [[TMP4]], <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <4 x double>*), align 32 366; AVX256-NEXT: ret void 367; 368; AVX512-LABEL: @uitofp_8i16_8f64( 369; AVX512-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @src16 to <8 x i16>*), align 64 370; AVX512-NEXT: [[TMP2:%.*]] = uitofp <8 x i16> [[TMP1]] to <8 x double> 371; AVX512-NEXT: store <8 x double> [[TMP2]], <8 x double>* bitcast ([8 x double]* @dst64 to <8 x double>*), align 64 372; AVX512-NEXT: ret void 373; 374 %ld0 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64 375 %ld1 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2 376 %ld2 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2), align 4 377 %ld3 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3), align 2 378 %ld4 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4), align 8 379 %ld5 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 5), align 2 380 %ld6 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 6), align 4 381 %ld7 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 7), align 2 382 %cvt0 = uitofp i16 %ld0 to double 383 %cvt1 = uitofp i16 %ld1 to double 384 %cvt2 = uitofp i16 %ld2 to double 385 %cvt3 = uitofp i16 %ld3 to double 386 %cvt4 = uitofp i16 %ld4 to double 387 %cvt5 = uitofp i16 %ld5 to double 388 %cvt6 = uitofp i16 %ld6 to double 389 %cvt7 = uitofp i16 %ld7 to double 390 store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 391 store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 392 store double %cvt2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16 393 store double %cvt3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8 394 store double %cvt4, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 32 395 store double %cvt5, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 8 396 store double %cvt6, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 16 397 store double %cvt7, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 8 398 ret void 399} 400 401define void @uitofp_2i8_2f64() #0 { 402; SSE-LABEL: @uitofp_2i8_2f64( 403; SSE-NEXT: [[LD0:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64 404; SSE-NEXT: [[LD1:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1 405; SSE-NEXT: [[CVT0:%.*]] = uitofp i8 [[LD0]] to double 406; SSE-NEXT: [[CVT1:%.*]] = uitofp i8 [[LD1]] to double 407; SSE-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 408; SSE-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 409; SSE-NEXT: ret void 410; 411; AVX256NODQ-LABEL: @uitofp_2i8_2f64( 412; AVX256NODQ-NEXT: [[LD0:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64 413; AVX256NODQ-NEXT: [[LD1:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1 414; AVX256NODQ-NEXT: [[CVT0:%.*]] = uitofp i8 [[LD0]] to double 415; AVX256NODQ-NEXT: [[CVT1:%.*]] = uitofp i8 [[LD1]] to double 416; AVX256NODQ-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 417; AVX256NODQ-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 418; AVX256NODQ-NEXT: ret void 419; 420; AVX512-LABEL: @uitofp_2i8_2f64( 421; AVX512-NEXT: [[TMP1:%.*]] = load <2 x i8>, <2 x i8>* bitcast ([64 x i8]* @src8 to <2 x i8>*), align 64 422; AVX512-NEXT: [[TMP2:%.*]] = uitofp <2 x i8> [[TMP1]] to <2 x double> 423; AVX512-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64 424; AVX512-NEXT: ret void 425; 426; AVX256DQ-LABEL: @uitofp_2i8_2f64( 427; AVX256DQ-NEXT: [[TMP1:%.*]] = load <2 x i8>, <2 x i8>* bitcast ([64 x i8]* @src8 to <2 x i8>*), align 64 428; AVX256DQ-NEXT: [[TMP2:%.*]] = uitofp <2 x i8> [[TMP1]] to <2 x double> 429; AVX256DQ-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64 430; AVX256DQ-NEXT: ret void 431; 432 %ld0 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64 433 %ld1 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1 434 %cvt0 = uitofp i8 %ld0 to double 435 %cvt1 = uitofp i8 %ld1 to double 436 store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 437 store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 438 ret void 439} 440 441define void @uitofp_4i8_4f64() #0 { 442; SSE-LABEL: @uitofp_4i8_4f64( 443; SSE-NEXT: [[LD0:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64 444; SSE-NEXT: [[LD1:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1 445; SSE-NEXT: [[LD2:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 2), align 2 446; SSE-NEXT: [[LD3:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 3), align 1 447; SSE-NEXT: [[CVT0:%.*]] = uitofp i8 [[LD0]] to double 448; SSE-NEXT: [[CVT1:%.*]] = uitofp i8 [[LD1]] to double 449; SSE-NEXT: [[CVT2:%.*]] = uitofp i8 [[LD2]] to double 450; SSE-NEXT: [[CVT3:%.*]] = uitofp i8 [[LD3]] to double 451; SSE-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 452; SSE-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 453; SSE-NEXT: store double [[CVT2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16 454; SSE-NEXT: store double [[CVT3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8 455; SSE-NEXT: ret void 456; 457; AVX-LABEL: @uitofp_4i8_4f64( 458; AVX-NEXT: [[TMP1:%.*]] = load <4 x i8>, <4 x i8>* bitcast ([64 x i8]* @src8 to <4 x i8>*), align 64 459; AVX-NEXT: [[TMP2:%.*]] = uitofp <4 x i8> [[TMP1]] to <4 x double> 460; AVX-NEXT: store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 64 461; AVX-NEXT: ret void 462; 463 %ld0 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64 464 %ld1 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1 465 %ld2 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 2), align 2 466 %ld3 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 3), align 1 467 %cvt0 = uitofp i8 %ld0 to double 468 %cvt1 = uitofp i8 %ld1 to double 469 %cvt2 = uitofp i8 %ld2 to double 470 %cvt3 = uitofp i8 %ld3 to double 471 store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 472 store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 473 store double %cvt2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16 474 store double %cvt3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8 475 ret void 476} 477 478define void @uitofp_8i8_8f64() #0 { 479; SSE-LABEL: @uitofp_8i8_8f64( 480; SSE-NEXT: [[LD0:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64 481; SSE-NEXT: [[LD1:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1 482; SSE-NEXT: [[LD2:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 2), align 2 483; SSE-NEXT: [[LD3:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 3), align 1 484; SSE-NEXT: [[LD4:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 4), align 4 485; SSE-NEXT: [[LD5:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 5), align 1 486; SSE-NEXT: [[LD6:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 6), align 2 487; SSE-NEXT: [[LD7:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 7), align 1 488; SSE-NEXT: [[CVT0:%.*]] = uitofp i8 [[LD0]] to double 489; SSE-NEXT: [[CVT1:%.*]] = uitofp i8 [[LD1]] to double 490; SSE-NEXT: [[CVT2:%.*]] = uitofp i8 [[LD2]] to double 491; SSE-NEXT: [[CVT3:%.*]] = uitofp i8 [[LD3]] to double 492; SSE-NEXT: [[CVT4:%.*]] = uitofp i8 [[LD4]] to double 493; SSE-NEXT: [[CVT5:%.*]] = uitofp i8 [[LD5]] to double 494; SSE-NEXT: [[CVT6:%.*]] = uitofp i8 [[LD6]] to double 495; SSE-NEXT: [[CVT7:%.*]] = uitofp i8 [[LD7]] to double 496; SSE-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 497; SSE-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 498; SSE-NEXT: store double [[CVT2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16 499; SSE-NEXT: store double [[CVT3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8 500; SSE-NEXT: store double [[CVT4]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 32 501; SSE-NEXT: store double [[CVT5]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 8 502; SSE-NEXT: store double [[CVT6]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 16 503; SSE-NEXT: store double [[CVT7]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 8 504; SSE-NEXT: ret void 505; 506; AVX256-LABEL: @uitofp_8i8_8f64( 507; AVX256-NEXT: [[TMP1:%.*]] = load <4 x i8>, <4 x i8>* bitcast ([64 x i8]* @src8 to <4 x i8>*), align 64 508; AVX256-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 4) to <4 x i8>*), align 4 509; AVX256-NEXT: [[TMP3:%.*]] = uitofp <4 x i8> [[TMP1]] to <4 x double> 510; AVX256-NEXT: [[TMP4:%.*]] = uitofp <4 x i8> [[TMP2]] to <4 x double> 511; AVX256-NEXT: store <4 x double> [[TMP3]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 64 512; AVX256-NEXT: store <4 x double> [[TMP4]], <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <4 x double>*), align 32 513; AVX256-NEXT: ret void 514; 515; AVX512-LABEL: @uitofp_8i8_8f64( 516; AVX512-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* bitcast ([64 x i8]* @src8 to <8 x i8>*), align 64 517; AVX512-NEXT: [[TMP2:%.*]] = uitofp <8 x i8> [[TMP1]] to <8 x double> 518; AVX512-NEXT: store <8 x double> [[TMP2]], <8 x double>* bitcast ([8 x double]* @dst64 to <8 x double>*), align 64 519; AVX512-NEXT: ret void 520; 521 %ld0 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64 522 %ld1 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1 523 %ld2 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 2), align 2 524 %ld3 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 3), align 1 525 %ld4 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 4), align 4 526 %ld5 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 5), align 1 527 %ld6 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 6), align 2 528 %ld7 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 7), align 1 529 %cvt0 = uitofp i8 %ld0 to double 530 %cvt1 = uitofp i8 %ld1 to double 531 %cvt2 = uitofp i8 %ld2 to double 532 %cvt3 = uitofp i8 %ld3 to double 533 %cvt4 = uitofp i8 %ld4 to double 534 %cvt5 = uitofp i8 %ld5 to double 535 %cvt6 = uitofp i8 %ld6 to double 536 %cvt7 = uitofp i8 %ld7 to double 537 store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 538 store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 539 store double %cvt2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16 540 store double %cvt3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8 541 store double %cvt4, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 32 542 store double %cvt5, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 8 543 store double %cvt6, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 16 544 store double %cvt7, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 8 545 ret void 546} 547 548; 549; UITOFP to vXf32 550; 551 552define void @uitofp_2i64_2f32() #0 { 553; CHECK-LABEL: @uitofp_2i64_2f32( 554; CHECK-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64 555; CHECK-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8 556; CHECK-NEXT: [[CVT0:%.*]] = uitofp i64 [[LD0]] to float 557; CHECK-NEXT: [[CVT1:%.*]] = uitofp i64 [[LD1]] to float 558; CHECK-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64 559; CHECK-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4 560; CHECK-NEXT: ret void 561; 562 %ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64 563 %ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8 564 %cvt0 = uitofp i64 %ld0 to float 565 %cvt1 = uitofp i64 %ld1 to float 566 store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64 567 store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4 568 ret void 569} 570 571define void @uitofp_4i64_4f32() #0 { 572; SSE-LABEL: @uitofp_4i64_4f32( 573; SSE-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64 574; SSE-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8 575; SSE-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16 576; SSE-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8 577; SSE-NEXT: [[CVT0:%.*]] = uitofp i64 [[LD0]] to float 578; SSE-NEXT: [[CVT1:%.*]] = uitofp i64 [[LD1]] to float 579; SSE-NEXT: [[CVT2:%.*]] = uitofp i64 [[LD2]] to float 580; SSE-NEXT: [[CVT3:%.*]] = uitofp i64 [[LD3]] to float 581; SSE-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64 582; SSE-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4 583; SSE-NEXT: store float [[CVT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8 584; SSE-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4 585; SSE-NEXT: ret void 586; 587; AVX256NODQ-LABEL: @uitofp_4i64_4f32( 588; AVX256NODQ-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64 589; AVX256NODQ-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8 590; AVX256NODQ-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16 591; AVX256NODQ-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8 592; AVX256NODQ-NEXT: [[CVT0:%.*]] = uitofp i64 [[LD0]] to float 593; AVX256NODQ-NEXT: [[CVT1:%.*]] = uitofp i64 [[LD1]] to float 594; AVX256NODQ-NEXT: [[CVT2:%.*]] = uitofp i64 [[LD2]] to float 595; AVX256NODQ-NEXT: [[CVT3:%.*]] = uitofp i64 [[LD3]] to float 596; AVX256NODQ-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64 597; AVX256NODQ-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4 598; AVX256NODQ-NEXT: store float [[CVT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8 599; AVX256NODQ-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4 600; AVX256NODQ-NEXT: ret void 601; 602; AVX512-LABEL: @uitofp_4i64_4f32( 603; AVX512-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64 604; AVX512-NEXT: [[TMP2:%.*]] = uitofp <4 x i64> [[TMP1]] to <4 x float> 605; AVX512-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64 606; AVX512-NEXT: ret void 607; 608; AVX256DQ-LABEL: @uitofp_4i64_4f32( 609; AVX256DQ-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @src64 to <4 x i64>*), align 64 610; AVX256DQ-NEXT: [[TMP2:%.*]] = uitofp <4 x i64> [[TMP1]] to <4 x float> 611; AVX256DQ-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64 612; AVX256DQ-NEXT: ret void 613; 614 %ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64 615 %ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8 616 %ld2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16 617 %ld3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8 618 %cvt0 = uitofp i64 %ld0 to float 619 %cvt1 = uitofp i64 %ld1 to float 620 %cvt2 = uitofp i64 %ld2 to float 621 %cvt3 = uitofp i64 %ld3 to float 622 store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64 623 store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4 624 store float %cvt2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8 625 store float %cvt3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4 626 ret void 627} 628 629define void @uitofp_8i64_8f32() #0 { 630; SSE-LABEL: @uitofp_8i64_8f32( 631; SSE-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64 632; SSE-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8 633; SSE-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16 634; SSE-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8 635; SSE-NEXT: [[LD4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 4), align 32 636; SSE-NEXT: [[LD5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 5), align 8 637; SSE-NEXT: [[LD6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 6), align 16 638; SSE-NEXT: [[LD7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 7), align 8 639; SSE-NEXT: [[CVT0:%.*]] = uitofp i64 [[LD0]] to float 640; SSE-NEXT: [[CVT1:%.*]] = uitofp i64 [[LD1]] to float 641; SSE-NEXT: [[CVT2:%.*]] = uitofp i64 [[LD2]] to float 642; SSE-NEXT: [[CVT3:%.*]] = uitofp i64 [[LD3]] to float 643; SSE-NEXT: [[CVT4:%.*]] = uitofp i64 [[LD4]] to float 644; SSE-NEXT: [[CVT5:%.*]] = uitofp i64 [[LD5]] to float 645; SSE-NEXT: [[CVT6:%.*]] = uitofp i64 [[LD6]] to float 646; SSE-NEXT: [[CVT7:%.*]] = uitofp i64 [[LD7]] to float 647; SSE-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64 648; SSE-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4 649; SSE-NEXT: store float [[CVT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8 650; SSE-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4 651; SSE-NEXT: store float [[CVT4]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 16 652; SSE-NEXT: store float [[CVT5]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4 653; SSE-NEXT: store float [[CVT6]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 8 654; SSE-NEXT: store float [[CVT7]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4 655; SSE-NEXT: ret void 656; 657; AVX256NODQ-LABEL: @uitofp_8i64_8f32( 658; AVX256NODQ-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64 659; AVX256NODQ-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8 660; AVX256NODQ-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16 661; AVX256NODQ-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8 662; AVX256NODQ-NEXT: [[LD4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 4), align 32 663; AVX256NODQ-NEXT: [[LD5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 5), align 8 664; AVX256NODQ-NEXT: [[LD6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 6), align 16 665; AVX256NODQ-NEXT: [[LD7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 7), align 8 666; AVX256NODQ-NEXT: [[CVT0:%.*]] = uitofp i64 [[LD0]] to float 667; AVX256NODQ-NEXT: [[CVT1:%.*]] = uitofp i64 [[LD1]] to float 668; AVX256NODQ-NEXT: [[CVT2:%.*]] = uitofp i64 [[LD2]] to float 669; AVX256NODQ-NEXT: [[CVT3:%.*]] = uitofp i64 [[LD3]] to float 670; AVX256NODQ-NEXT: [[CVT4:%.*]] = uitofp i64 [[LD4]] to float 671; AVX256NODQ-NEXT: [[CVT5:%.*]] = uitofp i64 [[LD5]] to float 672; AVX256NODQ-NEXT: [[CVT6:%.*]] = uitofp i64 [[LD6]] to float 673; AVX256NODQ-NEXT: [[CVT7:%.*]] = uitofp i64 [[LD7]] to float 674; AVX256NODQ-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64 675; AVX256NODQ-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4 676; AVX256NODQ-NEXT: store float [[CVT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8 677; AVX256NODQ-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4 678; AVX256NODQ-NEXT: store float [[CVT4]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 16 679; AVX256NODQ-NEXT: store float [[CVT5]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4 680; AVX256NODQ-NEXT: store float [[CVT6]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 8 681; AVX256NODQ-NEXT: store float [[CVT7]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4 682; AVX256NODQ-NEXT: ret void 683; 684; AVX512-LABEL: @uitofp_8i64_8f32( 685; AVX512-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @src64 to <8 x i64>*), align 64 686; AVX512-NEXT: [[TMP2:%.*]] = uitofp <8 x i64> [[TMP1]] to <8 x float> 687; AVX512-NEXT: store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64 688; AVX512-NEXT: ret void 689; 690; AVX256DQ-LABEL: @uitofp_8i64_8f32( 691; AVX256DQ-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @src64 to <8 x i64>*), align 64 692; AVX256DQ-NEXT: [[TMP2:%.*]] = uitofp <8 x i64> [[TMP1]] to <8 x float> 693; AVX256DQ-NEXT: store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64 694; AVX256DQ-NEXT: ret void 695; 696 %ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64 697 %ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8 698 %ld2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16 699 %ld3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8 700 %ld4 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 4), align 32 701 %ld5 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 5), align 8 702 %ld6 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 6), align 16 703 %ld7 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 7), align 8 704 %cvt0 = uitofp i64 %ld0 to float 705 %cvt1 = uitofp i64 %ld1 to float 706 %cvt2 = uitofp i64 %ld2 to float 707 %cvt3 = uitofp i64 %ld3 to float 708 %cvt4 = uitofp i64 %ld4 to float 709 %cvt5 = uitofp i64 %ld5 to float 710 %cvt6 = uitofp i64 %ld6 to float 711 %cvt7 = uitofp i64 %ld7 to float 712 store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64 713 store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4 714 store float %cvt2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8 715 store float %cvt3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4 716 store float %cvt4, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 16 717 store float %cvt5, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4 718 store float %cvt6, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 8 719 store float %cvt7, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4 720 ret void 721} 722 723define void @uitofp_4i32_4f32() #0 { 724; CHECK-LABEL: @uitofp_4i32_4f32( 725; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @src32 to <4 x i32>*), align 64 726; CHECK-NEXT: [[TMP2:%.*]] = uitofp <4 x i32> [[TMP1]] to <4 x float> 727; CHECK-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64 728; CHECK-NEXT: ret void 729; 730 %ld0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64 731 %ld1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4 732 %ld2 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 2), align 8 733 %ld3 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 3), align 4 734 %cvt0 = uitofp i32 %ld0 to float 735 %cvt1 = uitofp i32 %ld1 to float 736 %cvt2 = uitofp i32 %ld2 to float 737 %cvt3 = uitofp i32 %ld3 to float 738 store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64 739 store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4 740 store float %cvt2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8 741 store float %cvt3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4 742 ret void 743} 744 745define void @uitofp_8i32_8f32() #0 { 746; SSE-LABEL: @uitofp_8i32_8f32( 747; SSE-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @src32 to <4 x i32>*), align 64 748; SSE-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 4) to <4 x i32>*), align 16 749; SSE-NEXT: [[TMP3:%.*]] = uitofp <4 x i32> [[TMP1]] to <4 x float> 750; SSE-NEXT: [[TMP4:%.*]] = uitofp <4 x i32> [[TMP2]] to <4 x float> 751; SSE-NEXT: store <4 x float> [[TMP3]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64 752; SSE-NEXT: store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 16 753; SSE-NEXT: ret void 754; 755; AVX-LABEL: @uitofp_8i32_8f32( 756; AVX-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @src32 to <8 x i32>*), align 64 757; AVX-NEXT: [[TMP2:%.*]] = uitofp <8 x i32> [[TMP1]] to <8 x float> 758; AVX-NEXT: store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64 759; AVX-NEXT: ret void 760; 761 %ld0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64 762 %ld1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4 763 %ld2 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 2), align 8 764 %ld3 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 3), align 4 765 %ld4 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 4), align 16 766 %ld5 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 5), align 4 767 %ld6 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 6), align 8 768 %ld7 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 7), align 4 769 %cvt0 = uitofp i32 %ld0 to float 770 %cvt1 = uitofp i32 %ld1 to float 771 %cvt2 = uitofp i32 %ld2 to float 772 %cvt3 = uitofp i32 %ld3 to float 773 %cvt4 = uitofp i32 %ld4 to float 774 %cvt5 = uitofp i32 %ld5 to float 775 %cvt6 = uitofp i32 %ld6 to float 776 %cvt7 = uitofp i32 %ld7 to float 777 store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64 778 store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4 779 store float %cvt2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8 780 store float %cvt3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4 781 store float %cvt4, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 16 782 store float %cvt5, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4 783 store float %cvt6, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 8 784 store float %cvt7, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4 785 ret void 786} 787 788define void @uitofp_16i32_16f32() #0 { 789; SSE-LABEL: @uitofp_16i32_16f32( 790; SSE-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @src32 to <4 x i32>*), align 64 791; SSE-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 4) to <4 x i32>*), align 16 792; SSE-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 8) to <4 x i32>*), align 32 793; SSE-NEXT: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 12) to <4 x i32>*), align 16 794; SSE-NEXT: [[TMP5:%.*]] = uitofp <4 x i32> [[TMP1]] to <4 x float> 795; SSE-NEXT: [[TMP6:%.*]] = uitofp <4 x i32> [[TMP2]] to <4 x float> 796; SSE-NEXT: [[TMP7:%.*]] = uitofp <4 x i32> [[TMP3]] to <4 x float> 797; SSE-NEXT: [[TMP8:%.*]] = uitofp <4 x i32> [[TMP4]] to <4 x float> 798; SSE-NEXT: store <4 x float> [[TMP5]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64 799; SSE-NEXT: store <4 x float> [[TMP6]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 16 800; SSE-NEXT: store <4 x float> [[TMP7]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <4 x float>*), align 32 801; SSE-NEXT: store <4 x float> [[TMP8]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12) to <4 x float>*), align 16 802; SSE-NEXT: ret void 803; 804; AVX256-LABEL: @uitofp_16i32_16f32( 805; AVX256-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @src32 to <8 x i32>*), align 64 806; AVX256-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 8) to <8 x i32>*), align 32 807; AVX256-NEXT: [[TMP3:%.*]] = uitofp <8 x i32> [[TMP1]] to <8 x float> 808; AVX256-NEXT: [[TMP4:%.*]] = uitofp <8 x i32> [[TMP2]] to <8 x float> 809; AVX256-NEXT: store <8 x float> [[TMP3]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64 810; AVX256-NEXT: store <8 x float> [[TMP4]], <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <8 x float>*), align 32 811; AVX256-NEXT: ret void 812; 813; AVX512-LABEL: @uitofp_16i32_16f32( 814; AVX512-NEXT: [[TMP1:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([16 x i32]* @src32 to <16 x i32>*), align 64 815; AVX512-NEXT: [[TMP2:%.*]] = uitofp <16 x i32> [[TMP1]] to <16 x float> 816; AVX512-NEXT: store <16 x float> [[TMP2]], <16 x float>* bitcast ([16 x float]* @dst32 to <16 x float>*), align 64 817; AVX512-NEXT: ret void 818; 819 %ld0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0 ), align 64 820 %ld1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1 ), align 4 821 %ld2 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 2 ), align 8 822 %ld3 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 3 ), align 4 823 %ld4 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 4 ), align 16 824 %ld5 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 5 ), align 4 825 %ld6 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 6 ), align 8 826 %ld7 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 7 ), align 4 827 %ld8 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 8 ), align 32 828 %ld9 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 9 ), align 4 829 %ld10 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 10), align 8 830 %ld11 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 11), align 4 831 %ld12 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 12), align 16 832 %ld13 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 13), align 4 833 %ld14 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 14), align 8 834 %ld15 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 15), align 4 835 %cvt0 = uitofp i32 %ld0 to float 836 %cvt1 = uitofp i32 %ld1 to float 837 %cvt2 = uitofp i32 %ld2 to float 838 %cvt3 = uitofp i32 %ld3 to float 839 %cvt4 = uitofp i32 %ld4 to float 840 %cvt5 = uitofp i32 %ld5 to float 841 %cvt6 = uitofp i32 %ld6 to float 842 %cvt7 = uitofp i32 %ld7 to float 843 %cvt8 = uitofp i32 %ld8 to float 844 %cvt9 = uitofp i32 %ld9 to float 845 %cvt10 = uitofp i32 %ld10 to float 846 %cvt11 = uitofp i32 %ld11 to float 847 %cvt12 = uitofp i32 %ld12 to float 848 %cvt13 = uitofp i32 %ld13 to float 849 %cvt14 = uitofp i32 %ld14 to float 850 %cvt15 = uitofp i32 %ld15 to float 851 store float %cvt0 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0 ), align 64 852 store float %cvt1 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1 ), align 4 853 store float %cvt2 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2 ), align 8 854 store float %cvt3 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3 ), align 4 855 store float %cvt4 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4 ), align 16 856 store float %cvt5 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5 ), align 4 857 store float %cvt6 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6 ), align 8 858 store float %cvt7 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7 ), align 4 859 store float %cvt8 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8 ), align 32 860 store float %cvt9 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 9 ), align 4 861 store float %cvt10, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 10), align 8 862 store float %cvt11, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 11), align 4 863 store float %cvt12, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12), align 16 864 store float %cvt13, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 13), align 4 865 store float %cvt14, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 14), align 8 866 store float %cvt15, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 15), align 4 867 ret void 868} 869 870define void @uitofp_4i16_4f32() #0 { 871; SSE-LABEL: @uitofp_4i16_4f32( 872; SSE-NEXT: [[LD0:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64 873; SSE-NEXT: [[LD1:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2 874; SSE-NEXT: [[LD2:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2), align 4 875; SSE-NEXT: [[LD3:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3), align 2 876; SSE-NEXT: [[CVT0:%.*]] = uitofp i16 [[LD0]] to float 877; SSE-NEXT: [[CVT1:%.*]] = uitofp i16 [[LD1]] to float 878; SSE-NEXT: [[CVT2:%.*]] = uitofp i16 [[LD2]] to float 879; SSE-NEXT: [[CVT3:%.*]] = uitofp i16 [[LD3]] to float 880; SSE-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64 881; SSE-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4 882; SSE-NEXT: store float [[CVT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8 883; SSE-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4 884; SSE-NEXT: ret void 885; 886; AVX-LABEL: @uitofp_4i16_4f32( 887; AVX-NEXT: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* bitcast ([32 x i16]* @src16 to <4 x i16>*), align 64 888; AVX-NEXT: [[TMP2:%.*]] = uitofp <4 x i16> [[TMP1]] to <4 x float> 889; AVX-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64 890; AVX-NEXT: ret void 891; 892 %ld0 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64 893 %ld1 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2 894 %ld2 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2), align 4 895 %ld3 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3), align 2 896 %cvt0 = uitofp i16 %ld0 to float 897 %cvt1 = uitofp i16 %ld1 to float 898 %cvt2 = uitofp i16 %ld2 to float 899 %cvt3 = uitofp i16 %ld3 to float 900 store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64 901 store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4 902 store float %cvt2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8 903 store float %cvt3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4 904 ret void 905} 906 907define void @uitofp_8i16_8f32() #0 { 908; SSE-LABEL: @uitofp_8i16_8f32( 909; SSE-NEXT: [[LD0:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64 910; SSE-NEXT: [[LD1:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2 911; SSE-NEXT: [[LD2:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2), align 4 912; SSE-NEXT: [[LD3:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3), align 2 913; SSE-NEXT: [[LD4:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4), align 8 914; SSE-NEXT: [[LD5:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 5), align 2 915; SSE-NEXT: [[LD6:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 6), align 4 916; SSE-NEXT: [[LD7:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 7), align 2 917; SSE-NEXT: [[CVT0:%.*]] = uitofp i16 [[LD0]] to float 918; SSE-NEXT: [[CVT1:%.*]] = uitofp i16 [[LD1]] to float 919; SSE-NEXT: [[CVT2:%.*]] = uitofp i16 [[LD2]] to float 920; SSE-NEXT: [[CVT3:%.*]] = uitofp i16 [[LD3]] to float 921; SSE-NEXT: [[CVT4:%.*]] = uitofp i16 [[LD4]] to float 922; SSE-NEXT: [[CVT5:%.*]] = uitofp i16 [[LD5]] to float 923; SSE-NEXT: [[CVT6:%.*]] = uitofp i16 [[LD6]] to float 924; SSE-NEXT: [[CVT7:%.*]] = uitofp i16 [[LD7]] to float 925; SSE-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64 926; SSE-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4 927; SSE-NEXT: store float [[CVT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8 928; SSE-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4 929; SSE-NEXT: store float [[CVT4]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 16 930; SSE-NEXT: store float [[CVT5]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4 931; SSE-NEXT: store float [[CVT6]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 8 932; SSE-NEXT: store float [[CVT7]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4 933; SSE-NEXT: ret void 934; 935; AVX-LABEL: @uitofp_8i16_8f32( 936; AVX-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @src16 to <8 x i16>*), align 64 937; AVX-NEXT: [[TMP2:%.*]] = uitofp <8 x i16> [[TMP1]] to <8 x float> 938; AVX-NEXT: store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64 939; AVX-NEXT: ret void 940; 941 %ld0 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64 942 %ld1 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2 943 %ld2 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2), align 4 944 %ld3 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3), align 2 945 %ld4 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4), align 8 946 %ld5 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 5), align 2 947 %ld6 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 6), align 4 948 %ld7 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 7), align 2 949 %cvt0 = uitofp i16 %ld0 to float 950 %cvt1 = uitofp i16 %ld1 to float 951 %cvt2 = uitofp i16 %ld2 to float 952 %cvt3 = uitofp i16 %ld3 to float 953 %cvt4 = uitofp i16 %ld4 to float 954 %cvt5 = uitofp i16 %ld5 to float 955 %cvt6 = uitofp i16 %ld6 to float 956 %cvt7 = uitofp i16 %ld7 to float 957 store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64 958 store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4 959 store float %cvt2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8 960 store float %cvt3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4 961 store float %cvt4, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 16 962 store float %cvt5, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4 963 store float %cvt6, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 8 964 store float %cvt7, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4 965 ret void 966} 967 968define void @uitofp_16i16_16f32() #0 { 969; SSE-LABEL: @uitofp_16i16_16f32( 970; SSE-NEXT: [[LD0:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64 971; SSE-NEXT: [[LD1:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2 972; SSE-NEXT: [[LD2:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2), align 4 973; SSE-NEXT: [[LD3:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3), align 2 974; SSE-NEXT: [[LD4:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4), align 8 975; SSE-NEXT: [[LD5:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 5), align 2 976; SSE-NEXT: [[LD6:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 6), align 4 977; SSE-NEXT: [[LD7:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 7), align 2 978; SSE-NEXT: [[LD8:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 8), align 16 979; SSE-NEXT: [[LD9:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 9), align 2 980; SSE-NEXT: [[LD10:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 10), align 4 981; SSE-NEXT: [[LD11:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 11), align 2 982; SSE-NEXT: [[LD12:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 12), align 8 983; SSE-NEXT: [[LD13:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 13), align 2 984; SSE-NEXT: [[LD14:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 14), align 4 985; SSE-NEXT: [[LD15:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 15), align 2 986; SSE-NEXT: [[CVT0:%.*]] = uitofp i16 [[LD0]] to float 987; SSE-NEXT: [[CVT1:%.*]] = uitofp i16 [[LD1]] to float 988; SSE-NEXT: [[CVT2:%.*]] = uitofp i16 [[LD2]] to float 989; SSE-NEXT: [[CVT3:%.*]] = uitofp i16 [[LD3]] to float 990; SSE-NEXT: [[CVT4:%.*]] = uitofp i16 [[LD4]] to float 991; SSE-NEXT: [[CVT5:%.*]] = uitofp i16 [[LD5]] to float 992; SSE-NEXT: [[CVT6:%.*]] = uitofp i16 [[LD6]] to float 993; SSE-NEXT: [[CVT7:%.*]] = uitofp i16 [[LD7]] to float 994; SSE-NEXT: [[CVT8:%.*]] = uitofp i16 [[LD8]] to float 995; SSE-NEXT: [[CVT9:%.*]] = uitofp i16 [[LD9]] to float 996; SSE-NEXT: [[CVT10:%.*]] = uitofp i16 [[LD10]] to float 997; SSE-NEXT: [[CVT11:%.*]] = uitofp i16 [[LD11]] to float 998; SSE-NEXT: [[CVT12:%.*]] = uitofp i16 [[LD12]] to float 999; SSE-NEXT: [[CVT13:%.*]] = uitofp i16 [[LD13]] to float 1000; SSE-NEXT: [[CVT14:%.*]] = uitofp i16 [[LD14]] to float 1001; SSE-NEXT: [[CVT15:%.*]] = uitofp i16 [[LD15]] to float 1002; SSE-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64 1003; SSE-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4 1004; SSE-NEXT: store float [[CVT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8 1005; SSE-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4 1006; SSE-NEXT: store float [[CVT4]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 16 1007; SSE-NEXT: store float [[CVT5]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4 1008; SSE-NEXT: store float [[CVT6]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 8 1009; SSE-NEXT: store float [[CVT7]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4 1010; SSE-NEXT: store float [[CVT8]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8), align 32 1011; SSE-NEXT: store float [[CVT9]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 9), align 4 1012; SSE-NEXT: store float [[CVT10]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 10), align 8 1013; SSE-NEXT: store float [[CVT11]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 11), align 4 1014; SSE-NEXT: store float [[CVT12]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12), align 16 1015; SSE-NEXT: store float [[CVT13]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 13), align 4 1016; SSE-NEXT: store float [[CVT14]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 14), align 8 1017; SSE-NEXT: store float [[CVT15]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 15), align 4 1018; SSE-NEXT: ret void 1019; 1020; AVX256-LABEL: @uitofp_16i16_16f32( 1021; AVX256-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @src16 to <8 x i16>*), align 64 1022; AVX256-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 8) to <8 x i16>*), align 16 1023; AVX256-NEXT: [[TMP3:%.*]] = uitofp <8 x i16> [[TMP1]] to <8 x float> 1024; AVX256-NEXT: [[TMP4:%.*]] = uitofp <8 x i16> [[TMP2]] to <8 x float> 1025; AVX256-NEXT: store <8 x float> [[TMP3]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64 1026; AVX256-NEXT: store <8 x float> [[TMP4]], <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <8 x float>*), align 32 1027; AVX256-NEXT: ret void 1028; 1029; AVX512-LABEL: @uitofp_16i16_16f32( 1030; AVX512-NEXT: [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([32 x i16]* @src16 to <16 x i16>*), align 64 1031; AVX512-NEXT: [[TMP2:%.*]] = uitofp <16 x i16> [[TMP1]] to <16 x float> 1032; AVX512-NEXT: store <16 x float> [[TMP2]], <16 x float>* bitcast ([16 x float]* @dst32 to <16 x float>*), align 64 1033; AVX512-NEXT: ret void 1034; 1035 %ld0 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0 ), align 64 1036 %ld1 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1 ), align 2 1037 %ld2 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2 ), align 4 1038 %ld3 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3 ), align 2 1039 %ld4 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4 ), align 8 1040 %ld5 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 5 ), align 2 1041 %ld6 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 6 ), align 4 1042 %ld7 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 7 ), align 2 1043 %ld8 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 8 ), align 16 1044 %ld9 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 9 ), align 2 1045 %ld10 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 10), align 4 1046 %ld11 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 11), align 2 1047 %ld12 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 12), align 8 1048 %ld13 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 13), align 2 1049 %ld14 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 14), align 4 1050 %ld15 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 15), align 2 1051 %cvt0 = uitofp i16 %ld0 to float 1052 %cvt1 = uitofp i16 %ld1 to float 1053 %cvt2 = uitofp i16 %ld2 to float 1054 %cvt3 = uitofp i16 %ld3 to float 1055 %cvt4 = uitofp i16 %ld4 to float 1056 %cvt5 = uitofp i16 %ld5 to float 1057 %cvt6 = uitofp i16 %ld6 to float 1058 %cvt7 = uitofp i16 %ld7 to float 1059 %cvt8 = uitofp i16 %ld8 to float 1060 %cvt9 = uitofp i16 %ld9 to float 1061 %cvt10 = uitofp i16 %ld10 to float 1062 %cvt11 = uitofp i16 %ld11 to float 1063 %cvt12 = uitofp i16 %ld12 to float 1064 %cvt13 = uitofp i16 %ld13 to float 1065 %cvt14 = uitofp i16 %ld14 to float 1066 %cvt15 = uitofp i16 %ld15 to float 1067 store float %cvt0 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0 ), align 64 1068 store float %cvt1 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1 ), align 4 1069 store float %cvt2 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2 ), align 8 1070 store float %cvt3 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3 ), align 4 1071 store float %cvt4 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4 ), align 16 1072 store float %cvt5 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5 ), align 4 1073 store float %cvt6 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6 ), align 8 1074 store float %cvt7 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7 ), align 4 1075 store float %cvt8 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8 ), align 32 1076 store float %cvt9 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 9 ), align 4 1077 store float %cvt10, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 10), align 8 1078 store float %cvt11, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 11), align 4 1079 store float %cvt12, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12), align 16 1080 store float %cvt13, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 13), align 4 1081 store float %cvt14, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 14), align 8 1082 store float %cvt15, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 15), align 4 1083 ret void 1084} 1085 1086define void @uitofp_4i8_4f32() #0 { 1087; CHECK-LABEL: @uitofp_4i8_4f32( 1088; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, <4 x i8>* bitcast ([64 x i8]* @src8 to <4 x i8>*), align 64 1089; CHECK-NEXT: [[TMP2:%.*]] = uitofp <4 x i8> [[TMP1]] to <4 x float> 1090; CHECK-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64 1091; CHECK-NEXT: ret void 1092; 1093 %ld0 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64 1094 %ld1 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1 1095 %ld2 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 2), align 2 1096 %ld3 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 3), align 1 1097 %cvt0 = uitofp i8 %ld0 to float 1098 %cvt1 = uitofp i8 %ld1 to float 1099 %cvt2 = uitofp i8 %ld2 to float 1100 %cvt3 = uitofp i8 %ld3 to float 1101 store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64 1102 store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4 1103 store float %cvt2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8 1104 store float %cvt3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4 1105 ret void 1106} 1107 1108define void @uitofp_8i8_8f32() #0 { 1109; SSE-LABEL: @uitofp_8i8_8f32( 1110; SSE-NEXT: [[TMP1:%.*]] = load <4 x i8>, <4 x i8>* bitcast ([64 x i8]* @src8 to <4 x i8>*), align 64 1111; SSE-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 4) to <4 x i8>*), align 4 1112; SSE-NEXT: [[TMP3:%.*]] = uitofp <4 x i8> [[TMP1]] to <4 x float> 1113; SSE-NEXT: [[TMP4:%.*]] = uitofp <4 x i8> [[TMP2]] to <4 x float> 1114; SSE-NEXT: store <4 x float> [[TMP3]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64 1115; SSE-NEXT: store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 16 1116; SSE-NEXT: ret void 1117; 1118; AVX-LABEL: @uitofp_8i8_8f32( 1119; AVX-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* bitcast ([64 x i8]* @src8 to <8 x i8>*), align 64 1120; AVX-NEXT: [[TMP2:%.*]] = uitofp <8 x i8> [[TMP1]] to <8 x float> 1121; AVX-NEXT: store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64 1122; AVX-NEXT: ret void 1123; 1124 %ld0 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64 1125 %ld1 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1 1126 %ld2 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 2), align 2 1127 %ld3 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 3), align 1 1128 %ld4 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 4), align 4 1129 %ld5 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 5), align 1 1130 %ld6 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 6), align 2 1131 %ld7 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 7), align 1 1132 %cvt0 = uitofp i8 %ld0 to float 1133 %cvt1 = uitofp i8 %ld1 to float 1134 %cvt2 = uitofp i8 %ld2 to float 1135 %cvt3 = uitofp i8 %ld3 to float 1136 %cvt4 = uitofp i8 %ld4 to float 1137 %cvt5 = uitofp i8 %ld5 to float 1138 %cvt6 = uitofp i8 %ld6 to float 1139 %cvt7 = uitofp i8 %ld7 to float 1140 store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64 1141 store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4 1142 store float %cvt2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8 1143 store float %cvt3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4 1144 store float %cvt4, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 16 1145 store float %cvt5, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4 1146 store float %cvt6, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 8 1147 store float %cvt7, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4 1148 ret void 1149} 1150 1151define void @uitofp_16i8_16f32() #0 { 1152; SSE-LABEL: @uitofp_16i8_16f32( 1153; SSE-NEXT: [[TMP1:%.*]] = load <4 x i8>, <4 x i8>* bitcast ([64 x i8]* @src8 to <4 x i8>*), align 64 1154; SSE-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 4) to <4 x i8>*), align 4 1155; SSE-NEXT: [[TMP3:%.*]] = load <4 x i8>, <4 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 8) to <4 x i8>*), align 8 1156; SSE-NEXT: [[TMP4:%.*]] = load <4 x i8>, <4 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 12) to <4 x i8>*), align 4 1157; SSE-NEXT: [[TMP5:%.*]] = uitofp <4 x i8> [[TMP1]] to <4 x float> 1158; SSE-NEXT: [[TMP6:%.*]] = uitofp <4 x i8> [[TMP2]] to <4 x float> 1159; SSE-NEXT: [[TMP7:%.*]] = uitofp <4 x i8> [[TMP3]] to <4 x float> 1160; SSE-NEXT: [[TMP8:%.*]] = uitofp <4 x i8> [[TMP4]] to <4 x float> 1161; SSE-NEXT: store <4 x float> [[TMP5]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64 1162; SSE-NEXT: store <4 x float> [[TMP6]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 16 1163; SSE-NEXT: store <4 x float> [[TMP7]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <4 x float>*), align 32 1164; SSE-NEXT: store <4 x float> [[TMP8]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12) to <4 x float>*), align 16 1165; SSE-NEXT: ret void 1166; 1167; AVX256-LABEL: @uitofp_16i8_16f32( 1168; AVX256-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* bitcast ([64 x i8]* @src8 to <8 x i8>*), align 64 1169; AVX256-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 8) to <8 x i8>*), align 8 1170; AVX256-NEXT: [[TMP3:%.*]] = uitofp <8 x i8> [[TMP1]] to <8 x float> 1171; AVX256-NEXT: [[TMP4:%.*]] = uitofp <8 x i8> [[TMP2]] to <8 x float> 1172; AVX256-NEXT: store <8 x float> [[TMP3]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64 1173; AVX256-NEXT: store <8 x float> [[TMP4]], <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <8 x float>*), align 32 1174; AVX256-NEXT: ret void 1175; 1176; AVX512-LABEL: @uitofp_16i8_16f32( 1177; AVX512-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([64 x i8]* @src8 to <16 x i8>*), align 64 1178; AVX512-NEXT: [[TMP2:%.*]] = uitofp <16 x i8> [[TMP1]] to <16 x float> 1179; AVX512-NEXT: store <16 x float> [[TMP2]], <16 x float>* bitcast ([16 x float]* @dst32 to <16 x float>*), align 64 1180; AVX512-NEXT: ret void 1181; 1182 %ld0 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0 ), align 64 1183 %ld1 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1 ), align 1 1184 %ld2 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 2 ), align 2 1185 %ld3 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 3 ), align 1 1186 %ld4 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 4 ), align 4 1187 %ld5 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 5 ), align 1 1188 %ld6 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 6 ), align 2 1189 %ld7 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 7 ), align 1 1190 %ld8 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 8 ), align 8 1191 %ld9 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 9 ), align 1 1192 %ld10 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 10), align 2 1193 %ld11 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 11), align 1 1194 %ld12 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 12), align 4 1195 %ld13 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 13), align 1 1196 %ld14 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 14), align 2 1197 %ld15 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 15), align 1 1198 %cvt0 = uitofp i8 %ld0 to float 1199 %cvt1 = uitofp i8 %ld1 to float 1200 %cvt2 = uitofp i8 %ld2 to float 1201 %cvt3 = uitofp i8 %ld3 to float 1202 %cvt4 = uitofp i8 %ld4 to float 1203 %cvt5 = uitofp i8 %ld5 to float 1204 %cvt6 = uitofp i8 %ld6 to float 1205 %cvt7 = uitofp i8 %ld7 to float 1206 %cvt8 = uitofp i8 %ld8 to float 1207 %cvt9 = uitofp i8 %ld9 to float 1208 %cvt10 = uitofp i8 %ld10 to float 1209 %cvt11 = uitofp i8 %ld11 to float 1210 %cvt12 = uitofp i8 %ld12 to float 1211 %cvt13 = uitofp i8 %ld13 to float 1212 %cvt14 = uitofp i8 %ld14 to float 1213 %cvt15 = uitofp i8 %ld15 to float 1214 store float %cvt0 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0 ), align 64 1215 store float %cvt1 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1 ), align 4 1216 store float %cvt2 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2 ), align 8 1217 store float %cvt3 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3 ), align 4 1218 store float %cvt4 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4 ), align 16 1219 store float %cvt5 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5 ), align 4 1220 store float %cvt6 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6 ), align 8 1221 store float %cvt7 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7 ), align 4 1222 store float %cvt8 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8 ), align 32 1223 store float %cvt9 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 9 ), align 4 1224 store float %cvt10, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 10), align 8 1225 store float %cvt11, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 11), align 4 1226 store float %cvt12, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12), align 16 1227 store float %cvt13, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 13), align 4 1228 store float %cvt14, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 14), align 8 1229 store float %cvt15, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 15), align 4 1230 ret void 1231} 1232 1233attributes #0 = { nounwind } 1234