1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl --show-mc-encoding| FileCheck %s 3 4define <8 x i32> @test_256_1(i8 * %addr) { 5; CHECK-LABEL: test_256_1: 6; CHECK: ## BB#0: 7; CHECK-NEXT: vmovdqu32 (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7e,0x28,0x6f,0x07] 8; CHECK-NEXT: retq ## encoding: [0xc3] 9 %vaddr = bitcast i8* %addr to <8 x i32>* 10 %res = load <8 x i32>, <8 x i32>* %vaddr, align 1 11 ret <8 x i32>%res 12} 13 14define <8 x i32> @test_256_2(i8 * %addr) { 15; CHECK-LABEL: test_256_2: 16; CHECK: ## BB#0: 17; CHECK-NEXT: vmovdqa32 (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x6f,0x07] 18; CHECK-NEXT: retq ## encoding: [0xc3] 19 %vaddr = bitcast i8* %addr to <8 x i32>* 20 %res = load <8 x i32>, <8 x i32>* %vaddr, align 32 21 ret <8 x i32>%res 22} 23 24define void @test_256_3(i8 * %addr, <4 x i64> %data) { 25; CHECK-LABEL: test_256_3: 26; CHECK: ## BB#0: 27; CHECK-NEXT: vmovdqa64 %ymm0, (%rdi) ## encoding: [0x62,0xf1,0xfd,0x28,0x7f,0x07] 28; CHECK-NEXT: retq ## encoding: [0xc3] 29 %vaddr = bitcast i8* %addr to <4 x i64>* 30 store <4 x i64>%data, <4 x i64>* %vaddr, align 32 31 ret void 32} 33 34define void @test_256_4(i8 * %addr, <8 x i32> %data) { 35; CHECK-LABEL: test_256_4: 36; CHECK: ## BB#0: 37; CHECK-NEXT: vmovdqu32 %ymm0, (%rdi) ## encoding: [0x62,0xf1,0x7e,0x28,0x7f,0x07] 38; CHECK-NEXT: retq ## encoding: [0xc3] 39 %vaddr = bitcast i8* %addr to <8 x i32>* 40 store <8 x i32>%data, <8 x i32>* %vaddr, align 1 41 ret void 42} 43 44define void @test_256_5(i8 * %addr, <8 x i32> %data) { 45; CHECK-LABEL: test_256_5: 46; CHECK: ## BB#0: 47; CHECK-NEXT: vmovdqa32 %ymm0, (%rdi) ## encoding: [0x62,0xf1,0x7d,0x28,0x7f,0x07] 48; CHECK-NEXT: retq ## encoding: [0xc3] 49 %vaddr = bitcast i8* %addr to <8 x i32>* 50 store <8 x i32>%data, <8 x i32>* %vaddr, align 32 51 ret void 52} 53 54define <4 x i64> @test_256_6(i8 * %addr) { 55; CHECK-LABEL: test_256_6: 56; CHECK: ## BB#0: 57; CHECK-NEXT: vmovdqa64 (%rdi), %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0x07] 58; CHECK-NEXT: retq ## encoding: [0xc3] 59 %vaddr = bitcast i8* %addr to <4 x i64>* 60 %res = load <4 x i64>, <4 x i64>* %vaddr, align 32 61 ret <4 x i64>%res 62} 63 64define void @test_256_7(i8 * %addr, <4 x i64> %data) { 65; CHECK-LABEL: test_256_7: 66; CHECK: ## BB#0: 67; CHECK-NEXT: vmovdqu64 %ymm0, (%rdi) ## encoding: [0x62,0xf1,0xfe,0x28,0x7f,0x07] 68; CHECK-NEXT: retq ## encoding: [0xc3] 69 %vaddr = bitcast i8* %addr to <4 x i64>* 70 store <4 x i64>%data, <4 x i64>* %vaddr, align 1 71 ret void 72} 73 74define <4 x i64> @test_256_8(i8 * %addr) { 75; CHECK-LABEL: test_256_8: 76; CHECK: ## BB#0: 77; CHECK-NEXT: vmovdqu64 (%rdi), %ymm0 ## encoding: [0x62,0xf1,0xfe,0x28,0x6f,0x07] 78; CHECK-NEXT: retq ## encoding: [0xc3] 79 %vaddr = bitcast i8* %addr to <4 x i64>* 80 %res = load <4 x i64>, <4 x i64>* %vaddr, align 1 81 ret <4 x i64>%res 82} 83 84define void @test_256_9(i8 * %addr, <4 x double> %data) { 85; CHECK-LABEL: test_256_9: 86; CHECK: ## BB#0: 87; CHECK-NEXT: vmovapd %ymm0, (%rdi) ## encoding: [0x62,0xf1,0xfd,0x28,0x29,0x07] 88; CHECK-NEXT: retq ## encoding: [0xc3] 89 %vaddr = bitcast i8* %addr to <4 x double>* 90 store <4 x double>%data, <4 x double>* %vaddr, align 32 91 ret void 92} 93 94define <4 x double> @test_256_10(i8 * %addr) { 95; CHECK-LABEL: test_256_10: 96; CHECK: ## BB#0: 97; CHECK-NEXT: vmovapd (%rdi), %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x28,0x07] 98; CHECK-NEXT: retq ## encoding: [0xc3] 99 %vaddr = bitcast i8* %addr to <4 x double>* 100 %res = load <4 x double>, <4 x double>* %vaddr, align 32 101 ret <4 x double>%res 102} 103 104define void @test_256_11(i8 * %addr, <8 x float> %data) { 105; CHECK-LABEL: test_256_11: 106; CHECK: ## BB#0: 107; CHECK-NEXT: vmovaps %ymm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x28,0x29,0x07] 108; CHECK-NEXT: retq ## encoding: [0xc3] 109 %vaddr = bitcast i8* %addr to <8 x float>* 110 store <8 x float>%data, <8 x float>* %vaddr, align 32 111 ret void 112} 113 114define <8 x float> @test_256_12(i8 * %addr) { 115; CHECK-LABEL: test_256_12: 116; CHECK: ## BB#0: 117; CHECK-NEXT: vmovaps (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0x07] 118; CHECK-NEXT: retq ## encoding: [0xc3] 119 %vaddr = bitcast i8* %addr to <8 x float>* 120 %res = load <8 x float>, <8 x float>* %vaddr, align 32 121 ret <8 x float>%res 122} 123 124define void @test_256_13(i8 * %addr, <4 x double> %data) { 125; CHECK-LABEL: test_256_13: 126; CHECK: ## BB#0: 127; CHECK-NEXT: vmovupd %ymm0, (%rdi) ## encoding: [0x62,0xf1,0xfd,0x28,0x11,0x07] 128; CHECK-NEXT: retq ## encoding: [0xc3] 129 %vaddr = bitcast i8* %addr to <4 x double>* 130 store <4 x double>%data, <4 x double>* %vaddr, align 1 131 ret void 132} 133 134define <4 x double> @test_256_14(i8 * %addr) { 135; CHECK-LABEL: test_256_14: 136; CHECK: ## BB#0: 137; CHECK-NEXT: vmovupd (%rdi), %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x10,0x07] 138; CHECK-NEXT: retq ## encoding: [0xc3] 139 %vaddr = bitcast i8* %addr to <4 x double>* 140 %res = load <4 x double>, <4 x double>* %vaddr, align 1 141 ret <4 x double>%res 142} 143 144define void @test_256_15(i8 * %addr, <8 x float> %data) { 145; CHECK-LABEL: test_256_15: 146; CHECK: ## BB#0: 147; CHECK-NEXT: vmovups %ymm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x28,0x11,0x07] 148; CHECK-NEXT: retq ## encoding: [0xc3] 149 %vaddr = bitcast i8* %addr to <8 x float>* 150 store <8 x float>%data, <8 x float>* %vaddr, align 1 151 ret void 152} 153 154define <8 x float> @test_256_16(i8 * %addr) { 155; CHECK-LABEL: test_256_16: 156; CHECK: ## BB#0: 157; CHECK-NEXT: vmovups (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x10,0x07] 158; CHECK-NEXT: retq ## encoding: [0xc3] 159 %vaddr = bitcast i8* %addr to <8 x float>* 160 %res = load <8 x float>, <8 x float>* %vaddr, align 1 161 ret <8 x float>%res 162} 163 164define <8 x i32> @test_256_17(i8 * %addr, <8 x i32> %old, <8 x i32> %mask1) { 165; CHECK-LABEL: test_256_17: 166; CHECK: ## BB#0: 167; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2] 168; CHECK-NEXT: vpcmpneqd %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0x75,0x28,0x1f,0xca,0x04] 169; CHECK-NEXT: vpblendmd (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x64,0x07] 170; CHECK-NEXT: retq ## encoding: [0xc3] 171 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 172 %vaddr = bitcast i8* %addr to <8 x i32>* 173 %r = load <8 x i32>, <8 x i32>* %vaddr, align 32 174 %res = select <8 x i1> %mask, <8 x i32> %r, <8 x i32> %old 175 ret <8 x i32>%res 176} 177 178define <8 x i32> @test_256_18(i8 * %addr, <8 x i32> %old, <8 x i32> %mask1) { 179; CHECK-LABEL: test_256_18: 180; CHECK: ## BB#0: 181; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2] 182; CHECK-NEXT: vpcmpneqd %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0x75,0x28,0x1f,0xca,0x04] 183; CHECK-NEXT: vpblendmd (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x64,0x07] 184; CHECK-NEXT: retq ## encoding: [0xc3] 185 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 186 %vaddr = bitcast i8* %addr to <8 x i32>* 187 %r = load <8 x i32>, <8 x i32>* %vaddr, align 1 188 %res = select <8 x i1> %mask, <8 x i32> %r, <8 x i32> %old 189 ret <8 x i32>%res 190} 191 192define <8 x i32> @test_256_19(i8 * %addr, <8 x i32> %mask1) { 193; CHECK-LABEL: test_256_19: 194; CHECK: ## BB#0: 195; CHECK-NEXT: vpxord %ymm1, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xef,0xc9] 196; CHECK-NEXT: vpcmpneqd %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xc9,0x04] 197; CHECK-NEXT: vmovdqa32 (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0x6f,0x07] 198; CHECK-NEXT: retq ## encoding: [0xc3] 199 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 200 %vaddr = bitcast i8* %addr to <8 x i32>* 201 %r = load <8 x i32>, <8 x i32>* %vaddr, align 32 202 %res = select <8 x i1> %mask, <8 x i32> %r, <8 x i32> zeroinitializer 203 ret <8 x i32>%res 204} 205 206define <8 x i32> @test_256_20(i8 * %addr, <8 x i32> %mask1) { 207; CHECK-LABEL: test_256_20: 208; CHECK: ## BB#0: 209; CHECK-NEXT: vpxord %ymm1, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xef,0xc9] 210; CHECK-NEXT: vpcmpneqd %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xc9,0x04] 211; CHECK-NEXT: vmovdqu32 (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xa9,0x6f,0x07] 212; CHECK-NEXT: retq ## encoding: [0xc3] 213 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 214 %vaddr = bitcast i8* %addr to <8 x i32>* 215 %r = load <8 x i32>, <8 x i32>* %vaddr, align 1 216 %res = select <8 x i1> %mask, <8 x i32> %r, <8 x i32> zeroinitializer 217 ret <8 x i32>%res 218} 219 220define <4 x i64> @test_256_21(i8 * %addr, <4 x i64> %old, <4 x i64> %mask1) { 221; CHECK-LABEL: test_256_21: 222; CHECK: ## BB#0: 223; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2] 224; CHECK-NEXT: vpcmpneqq %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x28,0x1f,0xca,0x04] 225; CHECK-NEXT: vpblendmq (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x64,0x07] 226; CHECK-NEXT: retq ## encoding: [0xc3] 227 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 228 %vaddr = bitcast i8* %addr to <4 x i64>* 229 %r = load <4 x i64>, <4 x i64>* %vaddr, align 32 230 %res = select <4 x i1> %mask, <4 x i64> %r, <4 x i64> %old 231 ret <4 x i64>%res 232} 233 234define <4 x i64> @test_256_22(i8 * %addr, <4 x i64> %old, <4 x i64> %mask1) { 235; CHECK-LABEL: test_256_22: 236; CHECK: ## BB#0: 237; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2] 238; CHECK-NEXT: vpcmpneqq %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x28,0x1f,0xca,0x04] 239; CHECK-NEXT: vpblendmq (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x64,0x07] 240; CHECK-NEXT: retq ## encoding: [0xc3] 241 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 242 %vaddr = bitcast i8* %addr to <4 x i64>* 243 %r = load <4 x i64>, <4 x i64>* %vaddr, align 1 244 %res = select <4 x i1> %mask, <4 x i64> %r, <4 x i64> %old 245 ret <4 x i64>%res 246} 247 248define <4 x i64> @test_256_23(i8 * %addr, <4 x i64> %mask1) { 249; CHECK-LABEL: test_256_23: 250; CHECK: ## BB#0: 251; CHECK-NEXT: vpxord %ymm1, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xef,0xc9] 252; CHECK-NEXT: vpcmpneqq %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xc9,0x04] 253; CHECK-NEXT: vmovdqa64 (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x6f,0x07] 254; CHECK-NEXT: retq ## encoding: [0xc3] 255 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 256 %vaddr = bitcast i8* %addr to <4 x i64>* 257 %r = load <4 x i64>, <4 x i64>* %vaddr, align 32 258 %res = select <4 x i1> %mask, <4 x i64> %r, <4 x i64> zeroinitializer 259 ret <4 x i64>%res 260} 261 262define <4 x i64> @test_256_24(i8 * %addr, <4 x i64> %mask1) { 263; CHECK-LABEL: test_256_24: 264; CHECK: ## BB#0: 265; CHECK-NEXT: vpxord %ymm1, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xef,0xc9] 266; CHECK-NEXT: vpcmpneqq %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xc9,0x04] 267; CHECK-NEXT: vmovdqu64 (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfe,0xa9,0x6f,0x07] 268; CHECK-NEXT: retq ## encoding: [0xc3] 269 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 270 %vaddr = bitcast i8* %addr to <4 x i64>* 271 %r = load <4 x i64>, <4 x i64>* %vaddr, align 1 272 %res = select <4 x i1> %mask, <4 x i64> %r, <4 x i64> zeroinitializer 273 ret <4 x i64>%res 274} 275 276define <8 x float> @test_256_25(i8 * %addr, <8 x float> %old, <8 x float> %mask1) { 277; CHECK-LABEL: test_256_25: 278; CHECK: ## BB#0: 279; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2] 280; CHECK-NEXT: vcmpordps %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf1,0x74,0x28,0xc2,0xca,0x07] 281; CHECK-NEXT: vcmpneqps %ymm2, %ymm1, %k1 {%k1} ## encoding: [0x62,0xf1,0x74,0x29,0xc2,0xca,0x04] 282; CHECK-NEXT: vblendmps (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x65,0x07] 283; CHECK-NEXT: retq ## encoding: [0xc3] 284 %mask = fcmp one <8 x float> %mask1, zeroinitializer 285 %vaddr = bitcast i8* %addr to <8 x float>* 286 %r = load <8 x float>, <8 x float>* %vaddr, align 32 287 %res = select <8 x i1> %mask, <8 x float> %r, <8 x float> %old 288 ret <8 x float>%res 289} 290 291define <8 x float> @test_256_26(i8 * %addr, <8 x float> %old, <8 x float> %mask1) { 292; CHECK-LABEL: test_256_26: 293; CHECK: ## BB#0: 294; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2] 295; CHECK-NEXT: vcmpordps %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf1,0x74,0x28,0xc2,0xca,0x07] 296; CHECK-NEXT: vcmpneqps %ymm2, %ymm1, %k1 {%k1} ## encoding: [0x62,0xf1,0x74,0x29,0xc2,0xca,0x04] 297; CHECK-NEXT: vblendmps (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x65,0x07] 298; CHECK-NEXT: retq ## encoding: [0xc3] 299 %mask = fcmp one <8 x float> %mask1, zeroinitializer 300 %vaddr = bitcast i8* %addr to <8 x float>* 301 %r = load <8 x float>, <8 x float>* %vaddr, align 1 302 %res = select <8 x i1> %mask, <8 x float> %r, <8 x float> %old 303 ret <8 x float>%res 304} 305 306define <8 x float> @test_256_27(i8 * %addr, <8 x float> %mask1) { 307; CHECK-LABEL: test_256_27: 308; CHECK: ## BB#0: 309; CHECK-NEXT: vpxord %ymm1, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xef,0xc9] 310; CHECK-NEXT: vcmpordps %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc9,0x07] 311; CHECK-NEXT: vcmpneqps %ymm1, %ymm0, %k1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0xc2,0xc9,0x04] 312; CHECK-NEXT: vmovaps (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x28,0x07] 313; CHECK-NEXT: retq ## encoding: [0xc3] 314 %mask = fcmp one <8 x float> %mask1, zeroinitializer 315 %vaddr = bitcast i8* %addr to <8 x float>* 316 %r = load <8 x float>, <8 x float>* %vaddr, align 32 317 %res = select <8 x i1> %mask, <8 x float> %r, <8 x float> zeroinitializer 318 ret <8 x float>%res 319} 320 321define <8 x float> @test_256_28(i8 * %addr, <8 x float> %mask1) { 322; CHECK-LABEL: test_256_28: 323; CHECK: ## BB#0: 324; CHECK-NEXT: vpxord %ymm1, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xef,0xc9] 325; CHECK-NEXT: vcmpordps %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc9,0x07] 326; CHECK-NEXT: vcmpneqps %ymm1, %ymm0, %k1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0xc2,0xc9,0x04] 327; CHECK-NEXT: vmovups (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x10,0x07] 328; CHECK-NEXT: retq ## encoding: [0xc3] 329 %mask = fcmp one <8 x float> %mask1, zeroinitializer 330 %vaddr = bitcast i8* %addr to <8 x float>* 331 %r = load <8 x float>, <8 x float>* %vaddr, align 1 332 %res = select <8 x i1> %mask, <8 x float> %r, <8 x float> zeroinitializer 333 ret <8 x float>%res 334} 335 336define <4 x double> @test_256_29(i8 * %addr, <4 x double> %old, <4 x i64> %mask1) { 337; CHECK-LABEL: test_256_29: 338; CHECK: ## BB#0: 339; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2] 340; CHECK-NEXT: vpcmpneqq %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x28,0x1f,0xca,0x04] 341; CHECK-NEXT: vblendmpd (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x65,0x07] 342; CHECK-NEXT: retq ## encoding: [0xc3] 343 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 344 %vaddr = bitcast i8* %addr to <4 x double>* 345 %r = load <4 x double>, <4 x double>* %vaddr, align 32 346 %res = select <4 x i1> %mask, <4 x double> %r, <4 x double> %old 347 ret <4 x double>%res 348} 349 350define <4 x double> @test_256_30(i8 * %addr, <4 x double> %old, <4 x i64> %mask1) { 351; CHECK-LABEL: test_256_30: 352; CHECK: ## BB#0: 353; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2] 354; CHECK-NEXT: vpcmpneqq %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x28,0x1f,0xca,0x04] 355; CHECK-NEXT: vblendmpd (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x65,0x07] 356; CHECK-NEXT: retq ## encoding: [0xc3] 357 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 358 %vaddr = bitcast i8* %addr to <4 x double>* 359 %r = load <4 x double>, <4 x double>* %vaddr, align 1 360 %res = select <4 x i1> %mask, <4 x double> %r, <4 x double> %old 361 ret <4 x double>%res 362} 363 364define <4 x double> @test_256_31(i8 * %addr, <4 x i64> %mask1) { 365; CHECK-LABEL: test_256_31: 366; CHECK: ## BB#0: 367; CHECK-NEXT: vpxord %ymm1, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xef,0xc9] 368; CHECK-NEXT: vpcmpneqq %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xc9,0x04] 369; CHECK-NEXT: vmovapd (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x28,0x07] 370; CHECK-NEXT: retq ## encoding: [0xc3] 371 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 372 %vaddr = bitcast i8* %addr to <4 x double>* 373 %r = load <4 x double>, <4 x double>* %vaddr, align 32 374 %res = select <4 x i1> %mask, <4 x double> %r, <4 x double> zeroinitializer 375 ret <4 x double>%res 376} 377 378define <4 x double> @test_256_32(i8 * %addr, <4 x i64> %mask1) { 379; CHECK-LABEL: test_256_32: 380; CHECK: ## BB#0: 381; CHECK-NEXT: vpxord %ymm1, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xef,0xc9] 382; CHECK-NEXT: vpcmpneqq %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xc9,0x04] 383; CHECK-NEXT: vmovupd (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x10,0x07] 384; CHECK-NEXT: retq ## encoding: [0xc3] 385 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 386 %vaddr = bitcast i8* %addr to <4 x double>* 387 %r = load <4 x double>, <4 x double>* %vaddr, align 1 388 %res = select <4 x i1> %mask, <4 x double> %r, <4 x double> zeroinitializer 389 ret <4 x double>%res 390} 391 392define <4 x i32> @test_128_1(i8 * %addr) { 393; CHECK-LABEL: test_128_1: 394; CHECK: ## BB#0: 395; CHECK-NEXT: vmovdqu32 (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7e,0x08,0x6f,0x07] 396; CHECK-NEXT: retq ## encoding: [0xc3] 397 %vaddr = bitcast i8* %addr to <4 x i32>* 398 %res = load <4 x i32>, <4 x i32>* %vaddr, align 1 399 ret <4 x i32>%res 400} 401 402define <4 x i32> @test_128_2(i8 * %addr) { 403; CHECK-LABEL: test_128_2: 404; CHECK: ## BB#0: 405; CHECK-NEXT: vmovdqa32 (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6f,0x07] 406; CHECK-NEXT: retq ## encoding: [0xc3] 407 %vaddr = bitcast i8* %addr to <4 x i32>* 408 %res = load <4 x i32>, <4 x i32>* %vaddr, align 16 409 ret <4 x i32>%res 410} 411 412define void @test_128_3(i8 * %addr, <2 x i64> %data) { 413; CHECK-LABEL: test_128_3: 414; CHECK: ## BB#0: 415; CHECK-NEXT: vmovdqa64 %xmm0, (%rdi) ## encoding: [0x62,0xf1,0xfd,0x08,0x7f,0x07] 416; CHECK-NEXT: retq ## encoding: [0xc3] 417 %vaddr = bitcast i8* %addr to <2 x i64>* 418 store <2 x i64>%data, <2 x i64>* %vaddr, align 16 419 ret void 420} 421 422define void @test_128_4(i8 * %addr, <4 x i32> %data) { 423; CHECK-LABEL: test_128_4: 424; CHECK: ## BB#0: 425; CHECK-NEXT: vmovdqu32 %xmm0, (%rdi) ## encoding: [0x62,0xf1,0x7e,0x08,0x7f,0x07] 426; CHECK-NEXT: retq ## encoding: [0xc3] 427 %vaddr = bitcast i8* %addr to <4 x i32>* 428 store <4 x i32>%data, <4 x i32>* %vaddr, align 1 429 ret void 430} 431 432define void @test_128_5(i8 * %addr, <4 x i32> %data) { 433; CHECK-LABEL: test_128_5: 434; CHECK: ## BB#0: 435; CHECK-NEXT: vmovdqa32 %xmm0, (%rdi) ## encoding: [0x62,0xf1,0x7d,0x08,0x7f,0x07] 436; CHECK-NEXT: retq ## encoding: [0xc3] 437 %vaddr = bitcast i8* %addr to <4 x i32>* 438 store <4 x i32>%data, <4 x i32>* %vaddr, align 16 439 ret void 440} 441 442define <2 x i64> @test_128_6(i8 * %addr) { 443; CHECK-LABEL: test_128_6: 444; CHECK: ## BB#0: 445; CHECK-NEXT: vmovdqa64 (%rdi), %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0x07] 446; CHECK-NEXT: retq ## encoding: [0xc3] 447 %vaddr = bitcast i8* %addr to <2 x i64>* 448 %res = load <2 x i64>, <2 x i64>* %vaddr, align 16 449 ret <2 x i64>%res 450} 451 452define void @test_128_7(i8 * %addr, <2 x i64> %data) { 453; CHECK-LABEL: test_128_7: 454; CHECK: ## BB#0: 455; CHECK-NEXT: vmovdqu64 %xmm0, (%rdi) ## encoding: [0x62,0xf1,0xfe,0x08,0x7f,0x07] 456; CHECK-NEXT: retq ## encoding: [0xc3] 457 %vaddr = bitcast i8* %addr to <2 x i64>* 458 store <2 x i64>%data, <2 x i64>* %vaddr, align 1 459 ret void 460} 461 462define <2 x i64> @test_128_8(i8 * %addr) { 463; CHECK-LABEL: test_128_8: 464; CHECK: ## BB#0: 465; CHECK-NEXT: vmovdqu64 (%rdi), %xmm0 ## encoding: [0x62,0xf1,0xfe,0x08,0x6f,0x07] 466; CHECK-NEXT: retq ## encoding: [0xc3] 467 %vaddr = bitcast i8* %addr to <2 x i64>* 468 %res = load <2 x i64>, <2 x i64>* %vaddr, align 1 469 ret <2 x i64>%res 470} 471 472define void @test_128_9(i8 * %addr, <2 x double> %data) { 473; CHECK-LABEL: test_128_9: 474; CHECK: ## BB#0: 475; CHECK-NEXT: vmovapd %xmm0, (%rdi) ## encoding: [0x62,0xf1,0xfd,0x08,0x29,0x07] 476; CHECK-NEXT: retq ## encoding: [0xc3] 477 %vaddr = bitcast i8* %addr to <2 x double>* 478 store <2 x double>%data, <2 x double>* %vaddr, align 16 479 ret void 480} 481 482define <2 x double> @test_128_10(i8 * %addr) { 483; CHECK-LABEL: test_128_10: 484; CHECK: ## BB#0: 485; CHECK-NEXT: vmovapd (%rdi), %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x28,0x07] 486; CHECK-NEXT: retq ## encoding: [0xc3] 487 %vaddr = bitcast i8* %addr to <2 x double>* 488 %res = load <2 x double>, <2 x double>* %vaddr, align 16 489 ret <2 x double>%res 490} 491 492define void @test_128_11(i8 * %addr, <4 x float> %data) { 493; CHECK-LABEL: test_128_11: 494; CHECK: ## BB#0: 495; CHECK-NEXT: vmovaps %xmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x08,0x29,0x07] 496; CHECK-NEXT: retq ## encoding: [0xc3] 497 %vaddr = bitcast i8* %addr to <4 x float>* 498 store <4 x float>%data, <4 x float>* %vaddr, align 16 499 ret void 500} 501 502define <4 x float> @test_128_12(i8 * %addr) { 503; CHECK-LABEL: test_128_12: 504; CHECK: ## BB#0: 505; CHECK-NEXT: vmovaps (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0x07] 506; CHECK-NEXT: retq ## encoding: [0xc3] 507 %vaddr = bitcast i8* %addr to <4 x float>* 508 %res = load <4 x float>, <4 x float>* %vaddr, align 16 509 ret <4 x float>%res 510} 511 512define void @test_128_13(i8 * %addr, <2 x double> %data) { 513; CHECK-LABEL: test_128_13: 514; CHECK: ## BB#0: 515; CHECK-NEXT: vmovupd %xmm0, (%rdi) ## encoding: [0x62,0xf1,0xfd,0x08,0x11,0x07] 516; CHECK-NEXT: retq ## encoding: [0xc3] 517 %vaddr = bitcast i8* %addr to <2 x double>* 518 store <2 x double>%data, <2 x double>* %vaddr, align 1 519 ret void 520} 521 522define <2 x double> @test_128_14(i8 * %addr) { 523; CHECK-LABEL: test_128_14: 524; CHECK: ## BB#0: 525; CHECK-NEXT: vmovupd (%rdi), %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x10,0x07] 526; CHECK-NEXT: retq ## encoding: [0xc3] 527 %vaddr = bitcast i8* %addr to <2 x double>* 528 %res = load <2 x double>, <2 x double>* %vaddr, align 1 529 ret <2 x double>%res 530} 531 532define void @test_128_15(i8 * %addr, <4 x float> %data) { 533; CHECK-LABEL: test_128_15: 534; CHECK: ## BB#0: 535; CHECK-NEXT: vmovups %xmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x08,0x11,0x07] 536; CHECK-NEXT: retq ## encoding: [0xc3] 537 %vaddr = bitcast i8* %addr to <4 x float>* 538 store <4 x float>%data, <4 x float>* %vaddr, align 1 539 ret void 540} 541 542define <4 x float> @test_128_16(i8 * %addr) { 543; CHECK-LABEL: test_128_16: 544; CHECK: ## BB#0: 545; CHECK-NEXT: vmovups (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x10,0x07] 546; CHECK-NEXT: retq ## encoding: [0xc3] 547 %vaddr = bitcast i8* %addr to <4 x float>* 548 %res = load <4 x float>, <4 x float>* %vaddr, align 1 549 ret <4 x float>%res 550} 551 552define <4 x i32> @test_128_17(i8 * %addr, <4 x i32> %old, <4 x i32> %mask1) { 553; CHECK-LABEL: test_128_17: 554; CHECK: ## BB#0: 555; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2] 556; CHECK-NEXT: vpcmpneqd %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x08,0x1f,0xca,0x04] 557; CHECK-NEXT: vpblendmd (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x64,0x07] 558; CHECK-NEXT: retq ## encoding: [0xc3] 559 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 560 %vaddr = bitcast i8* %addr to <4 x i32>* 561 %r = load <4 x i32>, <4 x i32>* %vaddr, align 16 562 %res = select <4 x i1> %mask, <4 x i32> %r, <4 x i32> %old 563 ret <4 x i32>%res 564} 565 566define <4 x i32> @test_128_18(i8 * %addr, <4 x i32> %old, <4 x i32> %mask1) { 567; CHECK-LABEL: test_128_18: 568; CHECK: ## BB#0: 569; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2] 570; CHECK-NEXT: vpcmpneqd %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x08,0x1f,0xca,0x04] 571; CHECK-NEXT: vpblendmd (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x64,0x07] 572; CHECK-NEXT: retq ## encoding: [0xc3] 573 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 574 %vaddr = bitcast i8* %addr to <4 x i32>* 575 %r = load <4 x i32>, <4 x i32>* %vaddr, align 1 576 %res = select <4 x i1> %mask, <4 x i32> %r, <4 x i32> %old 577 ret <4 x i32>%res 578} 579 580define <4 x i32> @test_128_19(i8 * %addr, <4 x i32> %mask1) { 581; CHECK-LABEL: test_128_19: 582; CHECK: ## BB#0: 583; CHECK-NEXT: vpxord %xmm1, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xef,0xc9] 584; CHECK-NEXT: vpcmpneqd %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xc9,0x04] 585; CHECK-NEXT: vmovdqa32 (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x6f,0x07] 586; CHECK-NEXT: retq ## encoding: [0xc3] 587 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 588 %vaddr = bitcast i8* %addr to <4 x i32>* 589 %r = load <4 x i32>, <4 x i32>* %vaddr, align 16 590 %res = select <4 x i1> %mask, <4 x i32> %r, <4 x i32> zeroinitializer 591 ret <4 x i32>%res 592} 593 594define <4 x i32> @test_128_20(i8 * %addr, <4 x i32> %mask1) { 595; CHECK-LABEL: test_128_20: 596; CHECK: ## BB#0: 597; CHECK-NEXT: vpxord %xmm1, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xef,0xc9] 598; CHECK-NEXT: vpcmpneqd %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xc9,0x04] 599; CHECK-NEXT: vmovdqu32 (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x6f,0x07] 600; CHECK-NEXT: retq ## encoding: [0xc3] 601 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 602 %vaddr = bitcast i8* %addr to <4 x i32>* 603 %r = load <4 x i32>, <4 x i32>* %vaddr, align 1 604 %res = select <4 x i1> %mask, <4 x i32> %r, <4 x i32> zeroinitializer 605 ret <4 x i32>%res 606} 607 608define <2 x i64> @test_128_21(i8 * %addr, <2 x i64> %old, <2 x i64> %mask1) { 609; CHECK-LABEL: test_128_21: 610; CHECK: ## BB#0: 611; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2] 612; CHECK-NEXT: vpcmpneqq %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x08,0x1f,0xca,0x04] 613; CHECK-NEXT: vpblendmq (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x64,0x07] 614; CHECK-NEXT: retq ## encoding: [0xc3] 615 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 616 %vaddr = bitcast i8* %addr to <2 x i64>* 617 %r = load <2 x i64>, <2 x i64>* %vaddr, align 16 618 %res = select <2 x i1> %mask, <2 x i64> %r, <2 x i64> %old 619 ret <2 x i64>%res 620} 621 622define <2 x i64> @test_128_22(i8 * %addr, <2 x i64> %old, <2 x i64> %mask1) { 623; CHECK-LABEL: test_128_22: 624; CHECK: ## BB#0: 625; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2] 626; CHECK-NEXT: vpcmpneqq %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x08,0x1f,0xca,0x04] 627; CHECK-NEXT: vpblendmq (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x64,0x07] 628; CHECK-NEXT: retq ## encoding: [0xc3] 629 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 630 %vaddr = bitcast i8* %addr to <2 x i64>* 631 %r = load <2 x i64>, <2 x i64>* %vaddr, align 1 632 %res = select <2 x i1> %mask, <2 x i64> %r, <2 x i64> %old 633 ret <2 x i64>%res 634} 635 636define <2 x i64> @test_128_23(i8 * %addr, <2 x i64> %mask1) { 637; CHECK-LABEL: test_128_23: 638; CHECK: ## BB#0: 639; CHECK-NEXT: vpxord %xmm1, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xef,0xc9] 640; CHECK-NEXT: vpcmpneqq %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xc9,0x04] 641; CHECK-NEXT: vmovdqa64 (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x6f,0x07] 642; CHECK-NEXT: retq ## encoding: [0xc3] 643 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 644 %vaddr = bitcast i8* %addr to <2 x i64>* 645 %r = load <2 x i64>, <2 x i64>* %vaddr, align 16 646 %res = select <2 x i1> %mask, <2 x i64> %r, <2 x i64> zeroinitializer 647 ret <2 x i64>%res 648} 649 650define <2 x i64> @test_128_24(i8 * %addr, <2 x i64> %mask1) { 651; CHECK-LABEL: test_128_24: 652; CHECK: ## BB#0: 653; CHECK-NEXT: vpxord %xmm1, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xef,0xc9] 654; CHECK-NEXT: vpcmpneqq %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xc9,0x04] 655; CHECK-NEXT: vmovdqu64 (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfe,0x89,0x6f,0x07] 656; CHECK-NEXT: retq ## encoding: [0xc3] 657 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 658 %vaddr = bitcast i8* %addr to <2 x i64>* 659 %r = load <2 x i64>, <2 x i64>* %vaddr, align 1 660 %res = select <2 x i1> %mask, <2 x i64> %r, <2 x i64> zeroinitializer 661 ret <2 x i64>%res 662} 663 664define <4 x float> @test_128_25(i8 * %addr, <4 x float> %old, <4 x i32> %mask1) { 665; CHECK-LABEL: test_128_25: 666; CHECK: ## BB#0: 667; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2] 668; CHECK-NEXT: vpcmpneqd %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x08,0x1f,0xca,0x04] 669; CHECK-NEXT: vblendmps (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x65,0x07] 670; CHECK-NEXT: retq ## encoding: [0xc3] 671 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 672 %vaddr = bitcast i8* %addr to <4 x float>* 673 %r = load <4 x float>, <4 x float>* %vaddr, align 16 674 %res = select <4 x i1> %mask, <4 x float> %r, <4 x float> %old 675 ret <4 x float>%res 676} 677 678define <4 x float> @test_128_26(i8 * %addr, <4 x float> %old, <4 x i32> %mask1) { 679; CHECK-LABEL: test_128_26: 680; CHECK: ## BB#0: 681; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2] 682; CHECK-NEXT: vpcmpneqd %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x08,0x1f,0xca,0x04] 683; CHECK-NEXT: vblendmps (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x65,0x07] 684; CHECK-NEXT: retq ## encoding: [0xc3] 685 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 686 %vaddr = bitcast i8* %addr to <4 x float>* 687 %r = load <4 x float>, <4 x float>* %vaddr, align 1 688 %res = select <4 x i1> %mask, <4 x float> %r, <4 x float> %old 689 ret <4 x float>%res 690} 691 692define <4 x float> @test_128_27(i8 * %addr, <4 x i32> %mask1) { 693; CHECK-LABEL: test_128_27: 694; CHECK: ## BB#0: 695; CHECK-NEXT: vpxord %xmm1, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xef,0xc9] 696; CHECK-NEXT: vpcmpneqd %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xc9,0x04] 697; CHECK-NEXT: vmovaps (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x28,0x07] 698; CHECK-NEXT: retq ## encoding: [0xc3] 699 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 700 %vaddr = bitcast i8* %addr to <4 x float>* 701 %r = load <4 x float>, <4 x float>* %vaddr, align 16 702 %res = select <4 x i1> %mask, <4 x float> %r, <4 x float> zeroinitializer 703 ret <4 x float>%res 704} 705 706define <4 x float> @test_128_28(i8 * %addr, <4 x i32> %mask1) { 707; CHECK-LABEL: test_128_28: 708; CHECK: ## BB#0: 709; CHECK-NEXT: vpxord %xmm1, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xef,0xc9] 710; CHECK-NEXT: vpcmpneqd %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xc9,0x04] 711; CHECK-NEXT: vmovups (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x10,0x07] 712; CHECK-NEXT: retq ## encoding: [0xc3] 713 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 714 %vaddr = bitcast i8* %addr to <4 x float>* 715 %r = load <4 x float>, <4 x float>* %vaddr, align 1 716 %res = select <4 x i1> %mask, <4 x float> %r, <4 x float> zeroinitializer 717 ret <4 x float>%res 718} 719 720define <2 x double> @test_128_29(i8 * %addr, <2 x double> %old, <2 x i64> %mask1) { 721; CHECK-LABEL: test_128_29: 722; CHECK: ## BB#0: 723; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2] 724; CHECK-NEXT: vpcmpneqq %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x08,0x1f,0xca,0x04] 725; CHECK-NEXT: vblendmpd (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x65,0x07] 726; CHECK-NEXT: retq ## encoding: [0xc3] 727 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 728 %vaddr = bitcast i8* %addr to <2 x double>* 729 %r = load <2 x double>, <2 x double>* %vaddr, align 16 730 %res = select <2 x i1> %mask, <2 x double> %r, <2 x double> %old 731 ret <2 x double>%res 732} 733 734define <2 x double> @test_128_30(i8 * %addr, <2 x double> %old, <2 x i64> %mask1) { 735; CHECK-LABEL: test_128_30: 736; CHECK: ## BB#0: 737; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2] 738; CHECK-NEXT: vpcmpneqq %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x08,0x1f,0xca,0x04] 739; CHECK-NEXT: vblendmpd (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x65,0x07] 740; CHECK-NEXT: retq ## encoding: [0xc3] 741 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 742 %vaddr = bitcast i8* %addr to <2 x double>* 743 %r = load <2 x double>, <2 x double>* %vaddr, align 1 744 %res = select <2 x i1> %mask, <2 x double> %r, <2 x double> %old 745 ret <2 x double>%res 746} 747 748define <2 x double> @test_128_31(i8 * %addr, <2 x i64> %mask1) { 749; CHECK-LABEL: test_128_31: 750; CHECK: ## BB#0: 751; CHECK-NEXT: vpxord %xmm1, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xef,0xc9] 752; CHECK-NEXT: vpcmpneqq %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xc9,0x04] 753; CHECK-NEXT: vmovapd (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x28,0x07] 754; CHECK-NEXT: retq ## encoding: [0xc3] 755 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 756 %vaddr = bitcast i8* %addr to <2 x double>* 757 %r = load <2 x double>, <2 x double>* %vaddr, align 16 758 %res = select <2 x i1> %mask, <2 x double> %r, <2 x double> zeroinitializer 759 ret <2 x double>%res 760} 761 762define <2 x double> @test_128_32(i8 * %addr, <2 x i64> %mask1) { 763; CHECK-LABEL: test_128_32: 764; CHECK: ## BB#0: 765; CHECK-NEXT: vpxord %xmm1, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xef,0xc9] 766; CHECK-NEXT: vpcmpneqq %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xc9,0x04] 767; CHECK-NEXT: vmovupd (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x10,0x07] 768; CHECK-NEXT: retq ## encoding: [0xc3] 769 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 770 %vaddr = bitcast i8* %addr to <2 x double>* 771 %r = load <2 x double>, <2 x double>* %vaddr, align 1 772 %res = select <2 x i1> %mask, <2 x double> %r, <2 x double> zeroinitializer 773 ret <2 x double>%res 774} 775 776