1; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py 2; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2 3; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+ssse3 | FileCheck %s --check-prefixes=SSSE3 4; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE4,SSE41 5; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE4,SSE42 6; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx | FileCheck %s --check-prefixes=AVX1 7; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2 8; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F 9; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW 10; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=AVX512,AVX512DQ 11 12define i32 @reduce_i64(i32 %arg) { 13; SSE2-LABEL: 'reduce_i64' 14; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> undef) 15; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef) 16; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef) 17; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> undef) 18; SSE2-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V16 = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> undef) 19; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 20; 21; SSSE3-LABEL: 'reduce_i64' 22; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> undef) 23; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef) 24; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef) 25; SSSE3-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> undef) 26; SSSE3-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V16 = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> undef) 27; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 28; 29; SSE41-LABEL: 'reduce_i64' 30; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> undef) 31; SSE41-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef) 32; SSE41-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef) 33; SSE41-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V8 = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> undef) 34; SSE41-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V16 = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> undef) 35; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 36; 37; SSE42-LABEL: 'reduce_i64' 38; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> undef) 39; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef) 40; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef) 41; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> undef) 42; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16 = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> undef) 43; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 44; 45; AVX1-LABEL: 'reduce_i64' 46; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> undef) 47; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef) 48; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef) 49; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8 = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> undef) 50; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16 = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> undef) 51; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 52; 53; AVX2-LABEL: 'reduce_i64' 54; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> undef) 55; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef) 56; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef) 57; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> undef) 58; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> undef) 59; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 60; 61; AVX512-LABEL: 'reduce_i64' 62; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> undef) 63; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef) 64; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef) 65; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> undef) 66; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> undef) 67; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 68; 69 %V1 = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> undef) 70 %V2 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef) 71 %V4 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef) 72 %V8 = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> undef) 73 %V16 = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> undef) 74 ret i32 undef 75} 76 77define i32 @reduce_i32(i32 %arg) { 78; SSE2-LABEL: 'reduce_i32' 79; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> undef) 80; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef) 81; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef) 82; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16 = call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> undef) 83; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32 = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> undef) 84; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 85; 86; SSSE3-LABEL: 'reduce_i32' 87; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> undef) 88; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef) 89; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef) 90; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16 = call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> undef) 91; SSSE3-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32 = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> undef) 92; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 93; 94; SSE4-LABEL: 'reduce_i32' 95; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> undef) 96; SSE4-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef) 97; SSE4-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef) 98; SSE4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> undef) 99; SSE4-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> undef) 100; SSE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 101; 102; AVX1-LABEL: 'reduce_i32' 103; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> undef) 104; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef) 105; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef) 106; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> undef) 107; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> undef) 108; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 109; 110; AVX2-LABEL: 'reduce_i32' 111; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> undef) 112; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef) 113; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef) 114; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> undef) 115; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> undef) 116; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 117; 118; AVX512-LABEL: 'reduce_i32' 119; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> undef) 120; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef) 121; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef) 122; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> undef) 123; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> undef) 124; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 125; 126 %V2 = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> undef) 127 %V4 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef) 128 %V8 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef) 129 %V16 = call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> undef) 130 %V32 = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> undef) 131 ret i32 undef 132} 133 134define i32 @reduce_i16(i32 %arg) { 135; SSE2-LABEL: 'reduce_i16' 136; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> undef) 137; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> undef) 138; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef) 139; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef) 140; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef) 141; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef) 142; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 143; 144; SSSE3-LABEL: 'reduce_i16' 145; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> undef) 146; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> undef) 147; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef) 148; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef) 149; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef) 150; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef) 151; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 152; 153; SSE4-LABEL: 'reduce_i16' 154; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> undef) 155; SSE4-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> undef) 156; SSE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef) 157; SSE4-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef) 158; SSE4-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef) 159; SSE4-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef) 160; SSE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 161; 162; AVX1-LABEL: 'reduce_i16' 163; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> undef) 164; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> undef) 165; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef) 166; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef) 167; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef) 168; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef) 169; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 170; 171; AVX2-LABEL: 'reduce_i16' 172; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> undef) 173; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> undef) 174; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef) 175; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef) 176; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef) 177; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef) 178; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 179; 180; AVX512F-LABEL: 'reduce_i16' 181; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> undef) 182; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> undef) 183; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef) 184; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef) 185; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef) 186; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef) 187; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 188; 189; AVX512BW-LABEL: 'reduce_i16' 190; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> undef) 191; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> undef) 192; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef) 193; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef) 194; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef) 195; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef) 196; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 197; 198; AVX512DQ-LABEL: 'reduce_i16' 199; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> undef) 200; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> undef) 201; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef) 202; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef) 203; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef) 204; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef) 205; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 206; 207 %V2 = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> undef) 208 %V4 = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> undef) 209 %V8 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef) 210 %V16 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef) 211 %V32 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef) 212 %V64 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef) 213 ret i32 undef 214} 215 216define i32 @reduce_i8(i32 %arg) { 217; SSE2-LABEL: 'reduce_i8' 218; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.vector.reduce.smax.v2i8(<2 x i8> undef) 219; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> undef) 220; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> undef) 221; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef) 222; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef) 223; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V64 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef) 224; SSE2-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef) 225; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 226; 227; SSSE3-LABEL: 'reduce_i8' 228; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.vector.reduce.smax.v2i8(<2 x i8> undef) 229; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> undef) 230; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> undef) 231; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef) 232; SSSE3-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef) 233; SSSE3-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V64 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef) 234; SSSE3-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef) 235; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 236; 237; SSE4-LABEL: 'reduce_i8' 238; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.smax.v2i8(<2 x i8> undef) 239; SSE4-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> undef) 240; SSE4-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> undef) 241; SSE4-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef) 242; SSE4-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef) 243; SSE4-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef) 244; SSE4-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef) 245; SSE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 246; 247; AVX1-LABEL: 'reduce_i8' 248; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.smax.v2i8(<2 x i8> undef) 249; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> undef) 250; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> undef) 251; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef) 252; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef) 253; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef) 254; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef) 255; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 256; 257; AVX2-LABEL: 'reduce_i8' 258; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.smax.v2i8(<2 x i8> undef) 259; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> undef) 260; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> undef) 261; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef) 262; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef) 263; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef) 264; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef) 265; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 266; 267; AVX512F-LABEL: 'reduce_i8' 268; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.smax.v2i8(<2 x i8> undef) 269; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> undef) 270; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> undef) 271; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef) 272; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef) 273; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef) 274; AVX512F-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef) 275; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 276; 277; AVX512BW-LABEL: 'reduce_i8' 278; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.smax.v2i8(<2 x i8> undef) 279; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> undef) 280; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> undef) 281; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef) 282; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef) 283; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef) 284; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef) 285; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 286; 287; AVX512DQ-LABEL: 'reduce_i8' 288; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.smax.v2i8(<2 x i8> undef) 289; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> undef) 290; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> undef) 291; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef) 292; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef) 293; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef) 294; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef) 295; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 296; 297 %V2 = call i8 @llvm.vector.reduce.smax.v2i8(<2 x i8> undef) 298 %V4 = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> undef) 299 %V8 = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> undef) 300 %V16 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef) 301 %V32 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef) 302 %V64 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef) 303 %V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef) 304 ret i32 undef 305} 306 307declare i64 @llvm.vector.reduce.smax.v1i64(<1 x i64>) 308declare i64 @llvm.vector.reduce.smax.v2i64(<2 x i64>) 309declare i64 @llvm.vector.reduce.smax.v4i64(<4 x i64>) 310declare i64 @llvm.vector.reduce.smax.v8i64(<8 x i64>) 311declare i64 @llvm.vector.reduce.smax.v16i64(<16 x i64>) 312 313declare i32 @llvm.vector.reduce.smax.v2i32(<2 x i32>) 314declare i32 @llvm.vector.reduce.smax.v4i32(<4 x i32>) 315declare i32 @llvm.vector.reduce.smax.v8i32(<8 x i32>) 316declare i32 @llvm.vector.reduce.smax.v16i32(<16 x i32>) 317declare i32 @llvm.vector.reduce.smax.v32i32(<32 x i32>) 318 319declare i16 @llvm.vector.reduce.smax.v2i16(<2 x i16>) 320declare i16 @llvm.vector.reduce.smax.v4i16(<4 x i16>) 321declare i16 @llvm.vector.reduce.smax.v8i16(<8 x i16>) 322declare i16 @llvm.vector.reduce.smax.v16i16(<16 x i16>) 323declare i16 @llvm.vector.reduce.smax.v32i16(<32 x i16>) 324declare i16 @llvm.vector.reduce.smax.v64i16(<64 x i16>) 325 326declare i8 @llvm.vector.reduce.smax.v2i8(<2 x i8>) 327declare i8 @llvm.vector.reduce.smax.v4i8(<4 x i8>) 328declare i8 @llvm.vector.reduce.smax.v8i8(<8 x i8>) 329declare i8 @llvm.vector.reduce.smax.v16i8(<16 x i8>) 330declare i8 @llvm.vector.reduce.smax.v32i8(<32 x i8>) 331declare i8 @llvm.vector.reduce.smax.v64i8(<64 x i8>) 332declare i8 @llvm.vector.reduce.smax.v128i8(<128 x i8>) 333