1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -codegenprepare -S | FileCheck %s 3 4target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" 5target triple = "aarch64-unknown" 6 7define <8 x i16> @sink_zext(<8 x i8> %a, <8 x i8> %b, i1 %c) { 8; CHECK-LABEL: @sink_zext( 9; CHECK-NEXT: entry: 10; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] 11; CHECK: if.then: 12; CHECK-NEXT: [[ZB_1:%.*]] = zext <8 x i8> [[B:%.*]] to <8 x i16> 13; CHECK-NEXT: [[TMP0:%.*]] = zext <8 x i8> [[A:%.*]] to <8 x i16> 14; CHECK-NEXT: [[RES_1:%.*]] = add <8 x i16> [[TMP0]], [[ZB_1]] 15; CHECK-NEXT: ret <8 x i16> [[RES_1]] 16; CHECK: if.else: 17; CHECK-NEXT: [[ZB_2:%.*]] = zext <8 x i8> [[B]] to <8 x i16> 18; CHECK-NEXT: [[TMP1:%.*]] = zext <8 x i8> [[A]] to <8 x i16> 19; CHECK-NEXT: [[RES_2:%.*]] = sub <8 x i16> [[TMP1]], [[ZB_2]] 20; CHECK-NEXT: ret <8 x i16> [[RES_2]] 21; 22entry: 23 %za = zext <8 x i8> %a to <8 x i16> 24 br i1 %c, label %if.then, label %if.else 25 26if.then: 27 %zb.1 = zext <8 x i8> %b to <8 x i16> 28 %res.1 = add <8 x i16> %za, %zb.1 29 ret <8 x i16> %res.1 30 31if.else: 32 %zb.2 = zext <8 x i8> %b to <8 x i16> 33 %res.2 = sub <8 x i16> %za, %zb.2 34 ret <8 x i16> %res.2 35} 36 37define <8 x i16> @sink_sext(<8 x i8> %a, <8 x i8> %b, i1 %c) { 38; CHECK-LABEL: @sink_sext( 39; CHECK-NEXT: entry: 40; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] 41; CHECK: if.then: 42; CHECK-NEXT: [[ZB_1:%.*]] = sext <8 x i8> [[B:%.*]] to <8 x i16> 43; CHECK-NEXT: [[TMP0:%.*]] = sext <8 x i8> [[A:%.*]] to <8 x i16> 44; CHECK-NEXT: [[RES_1:%.*]] = add <8 x i16> [[TMP0]], [[ZB_1]] 45; CHECK-NEXT: ret <8 x i16> [[RES_1]] 46; CHECK: if.else: 47; CHECK-NEXT: [[ZB_2:%.*]] = sext <8 x i8> [[B]] to <8 x i16> 48; CHECK-NEXT: [[TMP1:%.*]] = sext <8 x i8> [[A]] to <8 x i16> 49; CHECK-NEXT: [[RES_2:%.*]] = sub <8 x i16> [[TMP1]], [[ZB_2]] 50; CHECK-NEXT: ret <8 x i16> [[RES_2]] 51; 52entry: 53 %za = sext <8 x i8> %a to <8 x i16> 54 br i1 %c, label %if.then, label %if.else 55 56if.then: 57 %zb.1 = sext <8 x i8> %b to <8 x i16> 58 %res.1 = add <8 x i16> %za, %zb.1 59 ret <8 x i16> %res.1 60 61if.else: 62 %zb.2 = sext <8 x i8> %b to <8 x i16> 63 %res.2 = sub <8 x i16> %za, %zb.2 64 ret <8 x i16> %res.2 65} 66 67define <8 x i16> @do_not_sink_nonfree_zext(<8 x i8> %a, <8 x i8> %b, i1 %c) { 68; CHECK-LABEL: @do_not_sink_nonfree_zext( 69; CHECK-NEXT: entry: 70; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] 71; CHECK: if.then: 72; CHECK-NEXT: [[ZB_1:%.*]] = sext <8 x i8> [[B:%.*]] to <8 x i16> 73; CHECK-NEXT: [[TMP0:%.*]] = sext <8 x i8> [[A:%.*]] to <8 x i16> 74; CHECK-NEXT: [[RES_1:%.*]] = add <8 x i16> [[TMP0]], [[ZB_1]] 75; CHECK-NEXT: ret <8 x i16> [[RES_1]] 76; CHECK: if.else: 77; CHECK-NEXT: [[ZB_2:%.*]] = sext <8 x i8> [[B]] to <8 x i16> 78; CHECK-NEXT: ret <8 x i16> [[ZB_2]] 79; 80entry: 81 %za = sext <8 x i8> %a to <8 x i16> 82 br i1 %c, label %if.then, label %if.else 83 84if.then: 85 %zb.1 = sext <8 x i8> %b to <8 x i16> 86 %res.1 = add <8 x i16> %za, %zb.1 87 ret <8 x i16> %res.1 88 89if.else: 90 %zb.2 = sext <8 x i8> %b to <8 x i16> 91 ret <8 x i16> %zb.2 92} 93 94define <8 x i16> @do_not_sink_nonfree_sext(<8 x i8> %a, <8 x i8> %b, i1 %c) { 95; CHECK-LABEL: @do_not_sink_nonfree_sext( 96; CHECK-NEXT: entry: 97; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] 98; CHECK: if.then: 99; CHECK-NEXT: [[ZB_1:%.*]] = sext <8 x i8> [[B:%.*]] to <8 x i16> 100; CHECK-NEXT: [[TMP0:%.*]] = sext <8 x i8> [[A:%.*]] to <8 x i16> 101; CHECK-NEXT: [[RES_1:%.*]] = add <8 x i16> [[TMP0]], [[ZB_1]] 102; CHECK-NEXT: ret <8 x i16> [[RES_1]] 103; CHECK: if.else: 104; CHECK-NEXT: [[ZB_2:%.*]] = sext <8 x i8> [[B]] to <8 x i16> 105; CHECK-NEXT: ret <8 x i16> [[ZB_2]] 106; 107entry: 108 %za = sext <8 x i8> %a to <8 x i16> 109 br i1 %c, label %if.then, label %if.else 110 111if.then: 112 %zb.1 = sext <8 x i8> %b to <8 x i16> 113 %res.1 = add <8 x i16> %za, %zb.1 114 ret <8 x i16> %res.1 115 116if.else: 117 %zb.2 = sext <8 x i8> %b to <8 x i16> 118 ret <8 x i16> %zb.2 119} 120 121; The masks used are suitable for umull, sink shufflevector to users. 122define <8 x i16> @sink_shufflevector_umull(<16 x i8> %a, <16 x i8> %b) { 123; CHECK-LABEL: @sink_shufflevector_umull( 124; CHECK-NEXT: entry: 125; CHECK-NEXT: br i1 undef, label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] 126; CHECK: if.then: 127; CHECK-NEXT: [[S2:%.*]] = shufflevector <16 x i8> [[B:%.*]], <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 128; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 129; CHECK-NEXT: [[VMULL0:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[TMP0]], <8 x i8> [[S2]]) 130; CHECK-NEXT: ret <8 x i16> [[VMULL0]] 131; CHECK: if.else: 132; CHECK-NEXT: [[S4:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 133; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 134; CHECK-NEXT: [[VMULL1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[TMP1]], <8 x i8> [[S4]]) 135; CHECK-NEXT: ret <8 x i16> [[VMULL1]] 136; 137entry: 138 %s1 = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 139 %s3 = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 140 br i1 undef, label %if.then, label %if.else 141 142if.then: 143 %s2 = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 144 %vmull0 = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %s1, <8 x i8> %s2) #3 145 ret <8 x i16> %vmull0 146 147if.else: 148 %s4 = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 149 %vmull1 = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %s3, <8 x i8> %s4) #3 150 ret <8 x i16> %vmull1 151} 152 153; Both exts and their shufflevector operands can be sunk. 154define <8 x i16> @sink_shufflevector_ext_subadd(<16 x i8> %a, <16 x i8> %b) { 155entry: 156 %s1 = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 157 %z1 = zext <8 x i8> %s1 to <8 x i16> 158 %s3 = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 159 %z3 = sext <8 x i8> %s3 to <8 x i16> 160 br i1 undef, label %if.then, label %if.else 161 162if.then: 163 %s2 = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 164 %z2 = zext <8 x i8> %s2 to <8 x i16> 165 %res1 = add <8 x i16> %z1, %z2 166 ret <8 x i16> %res1 167 168if.else: 169 %s4 = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 170 %z4 = sext <8 x i8> %s4 to <8 x i16> 171 %res2 = sub <8 x i16> %z3, %z4 172 ret <8 x i16> %res2 173} 174 175 176declare void @user1(<8 x i16>) 177 178; Both exts and their shufflevector operands can be sunk. 179define <8 x i16> @sink_shufflevector_ext_subadd_multiuse(<16 x i8> %a, <16 x i8> %b) { 180entry: 181 %s1 = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 182 %z1 = zext <8 x i8> %s1 to <8 x i16> 183 %s3 = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 184 %z3 = sext <8 x i8> %s3 to <8 x i16> 185 call void @user1(<8 x i16> %z3) 186 br i1 undef, label %if.then, label %if.else 187 188if.then: 189 %s2 = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 190 %z2 = zext <8 x i8> %s2 to <8 x i16> 191 %res1 = add <8 x i16> %z1, %z2 192 ret <8 x i16> %res1 193 194if.else: 195 %s4 = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 196 %z4 = sext <8 x i8> %s4 to <8 x i16> 197 %res2 = sub <8 x i16> %z3, %z4 198 ret <8 x i16> %res2 199} 200 201 202; The masks used are not suitable for umull, do not sink. 203define <8 x i16> @no_sink_shufflevector_umull(<16 x i8> %a, <16 x i8> %b) { 204; CHECK-LABEL: @no_sink_shufflevector_umull( 205; CHECK-NEXT: entry: 206; CHECK-NEXT: [[S1:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 1, i32 5, i32 6, i32 7> 207; CHECK-NEXT: [[S3:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 208; CHECK-NEXT: br i1 undef, label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] 209; CHECK: if.then: 210; CHECK-NEXT: [[S2:%.*]] = shufflevector <16 x i8> [[B:%.*]], <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 211; CHECK-NEXT: [[VMULL0:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[S1]], <8 x i8> [[S2]]) 212; CHECK-NEXT: ret <8 x i16> [[VMULL0]] 213; CHECK: if.else: 214; CHECK-NEXT: [[S4:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 10, i32 12, i32 13, i32 14, i32 15> 215; CHECK-NEXT: [[VMULL1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[S3]], <8 x i8> [[S4]]) 216; CHECK-NEXT: ret <8 x i16> [[VMULL1]] 217; 218entry: 219 %s1 = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 1, i32 5, i32 6, i32 7> 220 %s3 = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 221 br i1 undef, label %if.then, label %if.else 222 223if.then: 224 %s2 = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 225 %vmull0 = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %s1, <8 x i8> %s2) #3 226 ret <8 x i16> %vmull0 227 228if.else: 229 %s4 = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 10, i32 12, i32 13, i32 14, i32 15> 230 %vmull1 = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %s3, <8 x i8> %s4) #3 231 ret <8 x i16> %vmull1 232} 233 234 235; Function Attrs: nounwind readnone 236declare <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8>, <8 x i8>) #2 237