1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl --show-mc-encoding| FileCheck %s 3 4define <8 x i32> @test_256_1(i8 * %addr) { 5; CHECK-LABEL: test_256_1: 6; CHECK: ## %bb.0: 7; CHECK-NEXT: vmovups (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x07] 8; CHECK-NEXT: retq ## encoding: [0xc3] 9 %vaddr = bitcast i8* %addr to <8 x i32>* 10 %res = load <8 x i32>, <8 x i32>* %vaddr, align 1 11 ret <8 x i32>%res 12} 13 14define <8 x i32> @test_256_2(i8 * %addr) { 15; CHECK-LABEL: test_256_2: 16; CHECK: ## %bb.0: 17; CHECK-NEXT: vmovaps (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0x07] 18; CHECK-NEXT: retq ## encoding: [0xc3] 19 %vaddr = bitcast i8* %addr to <8 x i32>* 20 %res = load <8 x i32>, <8 x i32>* %vaddr, align 32 21 ret <8 x i32>%res 22} 23 24define void @test_256_3(i8 * %addr, <4 x i64> %data) { 25; CHECK-LABEL: test_256_3: 26; CHECK: ## %bb.0: 27; CHECK-NEXT: vmovaps %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x29,0x07] 28; CHECK-NEXT: retq ## encoding: [0xc3] 29 %vaddr = bitcast i8* %addr to <4 x i64>* 30 store <4 x i64>%data, <4 x i64>* %vaddr, align 32 31 ret void 32} 33 34define void @test_256_4(i8 * %addr, <8 x i32> %data) { 35; CHECK-LABEL: test_256_4: 36; CHECK: ## %bb.0: 37; CHECK-NEXT: vmovups %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x07] 38; CHECK-NEXT: retq ## encoding: [0xc3] 39 %vaddr = bitcast i8* %addr to <8 x i32>* 40 store <8 x i32>%data, <8 x i32>* %vaddr, align 1 41 ret void 42} 43 44define void @test_256_5(i8 * %addr, <8 x i32> %data) { 45; CHECK-LABEL: test_256_5: 46; CHECK: ## %bb.0: 47; CHECK-NEXT: vmovaps %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x29,0x07] 48; CHECK-NEXT: retq ## encoding: [0xc3] 49 %vaddr = bitcast i8* %addr to <8 x i32>* 50 store <8 x i32>%data, <8 x i32>* %vaddr, align 32 51 ret void 52} 53 54define <4 x i64> @test_256_6(i8 * %addr) { 55; CHECK-LABEL: test_256_6: 56; CHECK: ## %bb.0: 57; CHECK-NEXT: vmovaps (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0x07] 58; CHECK-NEXT: retq ## encoding: [0xc3] 59 %vaddr = bitcast i8* %addr to <4 x i64>* 60 %res = load <4 x i64>, <4 x i64>* %vaddr, align 32 61 ret <4 x i64>%res 62} 63 64define void @test_256_7(i8 * %addr, <4 x i64> %data) { 65; CHECK-LABEL: test_256_7: 66; CHECK: ## %bb.0: 67; CHECK-NEXT: vmovups %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x07] 68; CHECK-NEXT: retq ## encoding: [0xc3] 69 %vaddr = bitcast i8* %addr to <4 x i64>* 70 store <4 x i64>%data, <4 x i64>* %vaddr, align 1 71 ret void 72} 73 74define <4 x i64> @test_256_8(i8 * %addr) { 75; CHECK-LABEL: test_256_8: 76; CHECK: ## %bb.0: 77; CHECK-NEXT: vmovups (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x07] 78; CHECK-NEXT: retq ## encoding: [0xc3] 79 %vaddr = bitcast i8* %addr to <4 x i64>* 80 %res = load <4 x i64>, <4 x i64>* %vaddr, align 1 81 ret <4 x i64>%res 82} 83 84define void @test_256_9(i8 * %addr, <4 x double> %data) { 85; CHECK-LABEL: test_256_9: 86; CHECK: ## %bb.0: 87; CHECK-NEXT: vmovaps %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x29,0x07] 88; CHECK-NEXT: retq ## encoding: [0xc3] 89 %vaddr = bitcast i8* %addr to <4 x double>* 90 store <4 x double>%data, <4 x double>* %vaddr, align 32 91 ret void 92} 93 94define <4 x double> @test_256_10(i8 * %addr) { 95; CHECK-LABEL: test_256_10: 96; CHECK: ## %bb.0: 97; CHECK-NEXT: vmovaps (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0x07] 98; CHECK-NEXT: retq ## encoding: [0xc3] 99 %vaddr = bitcast i8* %addr to <4 x double>* 100 %res = load <4 x double>, <4 x double>* %vaddr, align 32 101 ret <4 x double>%res 102} 103 104define void @test_256_11(i8 * %addr, <8 x float> %data) { 105; CHECK-LABEL: test_256_11: 106; CHECK: ## %bb.0: 107; CHECK-NEXT: vmovaps %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x29,0x07] 108; CHECK-NEXT: retq ## encoding: [0xc3] 109 %vaddr = bitcast i8* %addr to <8 x float>* 110 store <8 x float>%data, <8 x float>* %vaddr, align 32 111 ret void 112} 113 114define <8 x float> @test_256_12(i8 * %addr) { 115; CHECK-LABEL: test_256_12: 116; CHECK: ## %bb.0: 117; CHECK-NEXT: vmovaps (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0x07] 118; CHECK-NEXT: retq ## encoding: [0xc3] 119 %vaddr = bitcast i8* %addr to <8 x float>* 120 %res = load <8 x float>, <8 x float>* %vaddr, align 32 121 ret <8 x float>%res 122} 123 124define void @test_256_13(i8 * %addr, <4 x double> %data) { 125; CHECK-LABEL: test_256_13: 126; CHECK: ## %bb.0: 127; CHECK-NEXT: vmovups %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x07] 128; CHECK-NEXT: retq ## encoding: [0xc3] 129 %vaddr = bitcast i8* %addr to <4 x double>* 130 store <4 x double>%data, <4 x double>* %vaddr, align 1 131 ret void 132} 133 134define <4 x double> @test_256_14(i8 * %addr) { 135; CHECK-LABEL: test_256_14: 136; CHECK: ## %bb.0: 137; CHECK-NEXT: vmovups (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x07] 138; CHECK-NEXT: retq ## encoding: [0xc3] 139 %vaddr = bitcast i8* %addr to <4 x double>* 140 %res = load <4 x double>, <4 x double>* %vaddr, align 1 141 ret <4 x double>%res 142} 143 144define void @test_256_15(i8 * %addr, <8 x float> %data) { 145; CHECK-LABEL: test_256_15: 146; CHECK: ## %bb.0: 147; CHECK-NEXT: vmovups %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x07] 148; CHECK-NEXT: retq ## encoding: [0xc3] 149 %vaddr = bitcast i8* %addr to <8 x float>* 150 store <8 x float>%data, <8 x float>* %vaddr, align 1 151 ret void 152} 153 154define <8 x float> @test_256_16(i8 * %addr) { 155; CHECK-LABEL: test_256_16: 156; CHECK: ## %bb.0: 157; CHECK-NEXT: vmovups (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x07] 158; CHECK-NEXT: retq ## encoding: [0xc3] 159 %vaddr = bitcast i8* %addr to <8 x float>* 160 %res = load <8 x float>, <8 x float>* %vaddr, align 1 161 ret <8 x float>%res 162} 163 164define <8 x i32> @test_256_17(i8 * %addr, <8 x i32> %old, <8 x i32> %mask1) { 165; CHECK-LABEL: test_256_17: 166; CHECK: ## %bb.0: 167; CHECK-NEXT: vptestmd %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0x75,0x28,0x27,0xc9] 168; CHECK-NEXT: vmovdqa32 (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x6f,0x07] 169; CHECK-NEXT: retq ## encoding: [0xc3] 170 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 171 %vaddr = bitcast i8* %addr to <8 x i32>* 172 %r = load <8 x i32>, <8 x i32>* %vaddr, align 32 173 %res = select <8 x i1> %mask, <8 x i32> %r, <8 x i32> %old 174 ret <8 x i32>%res 175} 176 177define <8 x i32> @test_256_18(i8 * %addr, <8 x i32> %old, <8 x i32> %mask1) { 178; CHECK-LABEL: test_256_18: 179; CHECK: ## %bb.0: 180; CHECK-NEXT: vptestmd %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0x75,0x28,0x27,0xc9] 181; CHECK-NEXT: vmovdqu32 (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7e,0x29,0x6f,0x07] 182; CHECK-NEXT: retq ## encoding: [0xc3] 183 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 184 %vaddr = bitcast i8* %addr to <8 x i32>* 185 %r = load <8 x i32>, <8 x i32>* %vaddr, align 1 186 %res = select <8 x i1> %mask, <8 x i32> %r, <8 x i32> %old 187 ret <8 x i32>%res 188} 189 190define <8 x i32> @test_256_19(i8 * %addr, <8 x i32> %mask1) { 191; CHECK-LABEL: test_256_19: 192; CHECK: ## %bb.0: 193; CHECK-NEXT: vptestmd %ymm0, %ymm0, %k1 ## encoding: [0x62,0xf2,0x7d,0x28,0x27,0xc8] 194; CHECK-NEXT: vmovdqa32 (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0x6f,0x07] 195; CHECK-NEXT: retq ## encoding: [0xc3] 196 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 197 %vaddr = bitcast i8* %addr to <8 x i32>* 198 %r = load <8 x i32>, <8 x i32>* %vaddr, align 32 199 %res = select <8 x i1> %mask, <8 x i32> %r, <8 x i32> zeroinitializer 200 ret <8 x i32>%res 201} 202 203define <8 x i32> @test_256_20(i8 * %addr, <8 x i32> %mask1) { 204; CHECK-LABEL: test_256_20: 205; CHECK: ## %bb.0: 206; CHECK-NEXT: vptestmd %ymm0, %ymm0, %k1 ## encoding: [0x62,0xf2,0x7d,0x28,0x27,0xc8] 207; CHECK-NEXT: vmovdqu32 (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xa9,0x6f,0x07] 208; CHECK-NEXT: retq ## encoding: [0xc3] 209 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 210 %vaddr = bitcast i8* %addr to <8 x i32>* 211 %r = load <8 x i32>, <8 x i32>* %vaddr, align 1 212 %res = select <8 x i1> %mask, <8 x i32> %r, <8 x i32> zeroinitializer 213 ret <8 x i32>%res 214} 215 216define <4 x i64> @test_256_21(i8 * %addr, <4 x i64> %old, <4 x i64> %mask1) { 217; CHECK-LABEL: test_256_21: 218; CHECK: ## %bb.0: 219; CHECK-NEXT: vptestmq %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x28,0x27,0xc9] 220; CHECK-NEXT: vmovdqa64 (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x6f,0x07] 221; CHECK-NEXT: retq ## encoding: [0xc3] 222 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 223 %vaddr = bitcast i8* %addr to <4 x i64>* 224 %r = load <4 x i64>, <4 x i64>* %vaddr, align 32 225 %res = select <4 x i1> %mask, <4 x i64> %r, <4 x i64> %old 226 ret <4 x i64>%res 227} 228 229define <4 x i64> @test_256_22(i8 * %addr, <4 x i64> %old, <4 x i64> %mask1) { 230; CHECK-LABEL: test_256_22: 231; CHECK: ## %bb.0: 232; CHECK-NEXT: vptestmq %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x28,0x27,0xc9] 233; CHECK-NEXT: vmovdqu64 (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0xfe,0x29,0x6f,0x07] 234; CHECK-NEXT: retq ## encoding: [0xc3] 235 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 236 %vaddr = bitcast i8* %addr to <4 x i64>* 237 %r = load <4 x i64>, <4 x i64>* %vaddr, align 1 238 %res = select <4 x i1> %mask, <4 x i64> %r, <4 x i64> %old 239 ret <4 x i64>%res 240} 241 242define <4 x i64> @test_256_23(i8 * %addr, <4 x i64> %mask1) { 243; CHECK-LABEL: test_256_23: 244; CHECK: ## %bb.0: 245; CHECK-NEXT: vptestmq %ymm0, %ymm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x28,0x27,0xc8] 246; CHECK-NEXT: vmovdqa64 (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x6f,0x07] 247; CHECK-NEXT: retq ## encoding: [0xc3] 248 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 249 %vaddr = bitcast i8* %addr to <4 x i64>* 250 %r = load <4 x i64>, <4 x i64>* %vaddr, align 32 251 %res = select <4 x i1> %mask, <4 x i64> %r, <4 x i64> zeroinitializer 252 ret <4 x i64>%res 253} 254 255define <4 x i64> @test_256_24(i8 * %addr, <4 x i64> %mask1) { 256; CHECK-LABEL: test_256_24: 257; CHECK: ## %bb.0: 258; CHECK-NEXT: vptestmq %ymm0, %ymm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x28,0x27,0xc8] 259; CHECK-NEXT: vmovdqu64 (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfe,0xa9,0x6f,0x07] 260; CHECK-NEXT: retq ## encoding: [0xc3] 261 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 262 %vaddr = bitcast i8* %addr to <4 x i64>* 263 %r = load <4 x i64>, <4 x i64>* %vaddr, align 1 264 %res = select <4 x i1> %mask, <4 x i64> %r, <4 x i64> zeroinitializer 265 ret <4 x i64>%res 266} 267 268define <8 x float> @test_256_25(i8 * %addr, <8 x float> %old, <8 x float> %mask1) { 269; CHECK-LABEL: test_256_25: 270; CHECK: ## %bb.0: 271; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe8,0x57,0xd2] 272; CHECK-NEXT: vcmpneq_oqps %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf1,0x74,0x28,0xc2,0xca,0x0c] 273; CHECK-NEXT: vmovaps (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x28,0x07] 274; CHECK-NEXT: retq ## encoding: [0xc3] 275 %mask = fcmp one <8 x float> %mask1, zeroinitializer 276 %vaddr = bitcast i8* %addr to <8 x float>* 277 %r = load <8 x float>, <8 x float>* %vaddr, align 32 278 %res = select <8 x i1> %mask, <8 x float> %r, <8 x float> %old 279 ret <8 x float>%res 280} 281 282define <8 x float> @test_256_26(i8 * %addr, <8 x float> %old, <8 x float> %mask1) { 283; CHECK-LABEL: test_256_26: 284; CHECK: ## %bb.0: 285; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe8,0x57,0xd2] 286; CHECK-NEXT: vcmpneq_oqps %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf1,0x74,0x28,0xc2,0xca,0x0c] 287; CHECK-NEXT: vmovups (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x10,0x07] 288; CHECK-NEXT: retq ## encoding: [0xc3] 289 %mask = fcmp one <8 x float> %mask1, zeroinitializer 290 %vaddr = bitcast i8* %addr to <8 x float>* 291 %r = load <8 x float>, <8 x float>* %vaddr, align 1 292 %res = select <8 x i1> %mask, <8 x float> %r, <8 x float> %old 293 ret <8 x float>%res 294} 295 296define <8 x float> @test_256_27(i8 * %addr, <8 x float> %mask1) { 297; CHECK-LABEL: test_256_27: 298; CHECK: ## %bb.0: 299; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xc9] 300; CHECK-NEXT: vcmpneq_oqps %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc9,0x0c] 301; CHECK-NEXT: vmovaps (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x28,0x07] 302; CHECK-NEXT: retq ## encoding: [0xc3] 303 %mask = fcmp one <8 x float> %mask1, zeroinitializer 304 %vaddr = bitcast i8* %addr to <8 x float>* 305 %r = load <8 x float>, <8 x float>* %vaddr, align 32 306 %res = select <8 x i1> %mask, <8 x float> %r, <8 x float> zeroinitializer 307 ret <8 x float>%res 308} 309 310define <8 x float> @test_256_28(i8 * %addr, <8 x float> %mask1) { 311; CHECK-LABEL: test_256_28: 312; CHECK: ## %bb.0: 313; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xc9] 314; CHECK-NEXT: vcmpneq_oqps %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc9,0x0c] 315; CHECK-NEXT: vmovups (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x10,0x07] 316; CHECK-NEXT: retq ## encoding: [0xc3] 317 %mask = fcmp one <8 x float> %mask1, zeroinitializer 318 %vaddr = bitcast i8* %addr to <8 x float>* 319 %r = load <8 x float>, <8 x float>* %vaddr, align 1 320 %res = select <8 x i1> %mask, <8 x float> %r, <8 x float> zeroinitializer 321 ret <8 x float>%res 322} 323 324define <4 x double> @test_256_29(i8 * %addr, <4 x double> %old, <4 x i64> %mask1) { 325; CHECK-LABEL: test_256_29: 326; CHECK: ## %bb.0: 327; CHECK-NEXT: vptestmq %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x28,0x27,0xc9] 328; CHECK-NEXT: vmovapd (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x28,0x07] 329; CHECK-NEXT: retq ## encoding: [0xc3] 330 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 331 %vaddr = bitcast i8* %addr to <4 x double>* 332 %r = load <4 x double>, <4 x double>* %vaddr, align 32 333 %res = select <4 x i1> %mask, <4 x double> %r, <4 x double> %old 334 ret <4 x double>%res 335} 336 337define <4 x double> @test_256_30(i8 * %addr, <4 x double> %old, <4 x i64> %mask1) { 338; CHECK-LABEL: test_256_30: 339; CHECK: ## %bb.0: 340; CHECK-NEXT: vptestmq %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x28,0x27,0xc9] 341; CHECK-NEXT: vmovupd (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x10,0x07] 342; CHECK-NEXT: retq ## encoding: [0xc3] 343 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 344 %vaddr = bitcast i8* %addr to <4 x double>* 345 %r = load <4 x double>, <4 x double>* %vaddr, align 1 346 %res = select <4 x i1> %mask, <4 x double> %r, <4 x double> %old 347 ret <4 x double>%res 348} 349 350define <4 x double> @test_256_31(i8 * %addr, <4 x i64> %mask1) { 351; CHECK-LABEL: test_256_31: 352; CHECK: ## %bb.0: 353; CHECK-NEXT: vptestmq %ymm0, %ymm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x28,0x27,0xc8] 354; CHECK-NEXT: vmovapd (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x28,0x07] 355; CHECK-NEXT: retq ## encoding: [0xc3] 356 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 357 %vaddr = bitcast i8* %addr to <4 x double>* 358 %r = load <4 x double>, <4 x double>* %vaddr, align 32 359 %res = select <4 x i1> %mask, <4 x double> %r, <4 x double> zeroinitializer 360 ret <4 x double>%res 361} 362 363define <4 x double> @test_256_32(i8 * %addr, <4 x i64> %mask1) { 364; CHECK-LABEL: test_256_32: 365; CHECK: ## %bb.0: 366; CHECK-NEXT: vptestmq %ymm0, %ymm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x28,0x27,0xc8] 367; CHECK-NEXT: vmovupd (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x10,0x07] 368; CHECK-NEXT: retq ## encoding: [0xc3] 369 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 370 %vaddr = bitcast i8* %addr to <4 x double>* 371 %r = load <4 x double>, <4 x double>* %vaddr, align 1 372 %res = select <4 x i1> %mask, <4 x double> %r, <4 x double> zeroinitializer 373 ret <4 x double>%res 374} 375 376define <4 x i32> @test_128_1(i8 * %addr) { 377; CHECK-LABEL: test_128_1: 378; CHECK: ## %bb.0: 379; CHECK-NEXT: vmovups (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07] 380; CHECK-NEXT: retq ## encoding: [0xc3] 381 %vaddr = bitcast i8* %addr to <4 x i32>* 382 %res = load <4 x i32>, <4 x i32>* %vaddr, align 1 383 ret <4 x i32>%res 384} 385 386define <4 x i32> @test_128_2(i8 * %addr) { 387; CHECK-LABEL: test_128_2: 388; CHECK: ## %bb.0: 389; CHECK-NEXT: vmovaps (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07] 390; CHECK-NEXT: retq ## encoding: [0xc3] 391 %vaddr = bitcast i8* %addr to <4 x i32>* 392 %res = load <4 x i32>, <4 x i32>* %vaddr, align 16 393 ret <4 x i32>%res 394} 395 396define void @test_128_3(i8 * %addr, <2 x i64> %data) { 397; CHECK-LABEL: test_128_3: 398; CHECK: ## %bb.0: 399; CHECK-NEXT: vmovaps %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07] 400; CHECK-NEXT: retq ## encoding: [0xc3] 401 %vaddr = bitcast i8* %addr to <2 x i64>* 402 store <2 x i64>%data, <2 x i64>* %vaddr, align 16 403 ret void 404} 405 406define void @test_128_4(i8 * %addr, <4 x i32> %data) { 407; CHECK-LABEL: test_128_4: 408; CHECK: ## %bb.0: 409; CHECK-NEXT: vmovups %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07] 410; CHECK-NEXT: retq ## encoding: [0xc3] 411 %vaddr = bitcast i8* %addr to <4 x i32>* 412 store <4 x i32>%data, <4 x i32>* %vaddr, align 1 413 ret void 414} 415 416define void @test_128_5(i8 * %addr, <4 x i32> %data) { 417; CHECK-LABEL: test_128_5: 418; CHECK: ## %bb.0: 419; CHECK-NEXT: vmovaps %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07] 420; CHECK-NEXT: retq ## encoding: [0xc3] 421 %vaddr = bitcast i8* %addr to <4 x i32>* 422 store <4 x i32>%data, <4 x i32>* %vaddr, align 16 423 ret void 424} 425 426define <2 x i64> @test_128_6(i8 * %addr) { 427; CHECK-LABEL: test_128_6: 428; CHECK: ## %bb.0: 429; CHECK-NEXT: vmovaps (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07] 430; CHECK-NEXT: retq ## encoding: [0xc3] 431 %vaddr = bitcast i8* %addr to <2 x i64>* 432 %res = load <2 x i64>, <2 x i64>* %vaddr, align 16 433 ret <2 x i64>%res 434} 435 436define void @test_128_7(i8 * %addr, <2 x i64> %data) { 437; CHECK-LABEL: test_128_7: 438; CHECK: ## %bb.0: 439; CHECK-NEXT: vmovups %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07] 440; CHECK-NEXT: retq ## encoding: [0xc3] 441 %vaddr = bitcast i8* %addr to <2 x i64>* 442 store <2 x i64>%data, <2 x i64>* %vaddr, align 1 443 ret void 444} 445 446define <2 x i64> @test_128_8(i8 * %addr) { 447; CHECK-LABEL: test_128_8: 448; CHECK: ## %bb.0: 449; CHECK-NEXT: vmovups (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07] 450; CHECK-NEXT: retq ## encoding: [0xc3] 451 %vaddr = bitcast i8* %addr to <2 x i64>* 452 %res = load <2 x i64>, <2 x i64>* %vaddr, align 1 453 ret <2 x i64>%res 454} 455 456define void @test_128_9(i8 * %addr, <2 x double> %data) { 457; CHECK-LABEL: test_128_9: 458; CHECK: ## %bb.0: 459; CHECK-NEXT: vmovaps %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07] 460; CHECK-NEXT: retq ## encoding: [0xc3] 461 %vaddr = bitcast i8* %addr to <2 x double>* 462 store <2 x double>%data, <2 x double>* %vaddr, align 16 463 ret void 464} 465 466define <2 x double> @test_128_10(i8 * %addr) { 467; CHECK-LABEL: test_128_10: 468; CHECK: ## %bb.0: 469; CHECK-NEXT: vmovaps (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07] 470; CHECK-NEXT: retq ## encoding: [0xc3] 471 %vaddr = bitcast i8* %addr to <2 x double>* 472 %res = load <2 x double>, <2 x double>* %vaddr, align 16 473 ret <2 x double>%res 474} 475 476define void @test_128_11(i8 * %addr, <4 x float> %data) { 477; CHECK-LABEL: test_128_11: 478; CHECK: ## %bb.0: 479; CHECK-NEXT: vmovaps %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07] 480; CHECK-NEXT: retq ## encoding: [0xc3] 481 %vaddr = bitcast i8* %addr to <4 x float>* 482 store <4 x float>%data, <4 x float>* %vaddr, align 16 483 ret void 484} 485 486define <4 x float> @test_128_12(i8 * %addr) { 487; CHECK-LABEL: test_128_12: 488; CHECK: ## %bb.0: 489; CHECK-NEXT: vmovaps (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07] 490; CHECK-NEXT: retq ## encoding: [0xc3] 491 %vaddr = bitcast i8* %addr to <4 x float>* 492 %res = load <4 x float>, <4 x float>* %vaddr, align 16 493 ret <4 x float>%res 494} 495 496define void @test_128_13(i8 * %addr, <2 x double> %data) { 497; CHECK-LABEL: test_128_13: 498; CHECK: ## %bb.0: 499; CHECK-NEXT: vmovups %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07] 500; CHECK-NEXT: retq ## encoding: [0xc3] 501 %vaddr = bitcast i8* %addr to <2 x double>* 502 store <2 x double>%data, <2 x double>* %vaddr, align 1 503 ret void 504} 505 506define <2 x double> @test_128_14(i8 * %addr) { 507; CHECK-LABEL: test_128_14: 508; CHECK: ## %bb.0: 509; CHECK-NEXT: vmovups (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07] 510; CHECK-NEXT: retq ## encoding: [0xc3] 511 %vaddr = bitcast i8* %addr to <2 x double>* 512 %res = load <2 x double>, <2 x double>* %vaddr, align 1 513 ret <2 x double>%res 514} 515 516define void @test_128_15(i8 * %addr, <4 x float> %data) { 517; CHECK-LABEL: test_128_15: 518; CHECK: ## %bb.0: 519; CHECK-NEXT: vmovups %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07] 520; CHECK-NEXT: retq ## encoding: [0xc3] 521 %vaddr = bitcast i8* %addr to <4 x float>* 522 store <4 x float>%data, <4 x float>* %vaddr, align 1 523 ret void 524} 525 526define <4 x float> @test_128_16(i8 * %addr) { 527; CHECK-LABEL: test_128_16: 528; CHECK: ## %bb.0: 529; CHECK-NEXT: vmovups (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07] 530; CHECK-NEXT: retq ## encoding: [0xc3] 531 %vaddr = bitcast i8* %addr to <4 x float>* 532 %res = load <4 x float>, <4 x float>* %vaddr, align 1 533 ret <4 x float>%res 534} 535 536define <4 x i32> @test_128_17(i8 * %addr, <4 x i32> %old, <4 x i32> %mask1) { 537; CHECK-LABEL: test_128_17: 538; CHECK: ## %bb.0: 539; CHECK-NEXT: vptestmd %xmm1, %xmm1, %k1 ## encoding: [0x62,0xf2,0x75,0x08,0x27,0xc9] 540; CHECK-NEXT: vmovdqa32 (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x6f,0x07] 541; CHECK-NEXT: retq ## encoding: [0xc3] 542 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 543 %vaddr = bitcast i8* %addr to <4 x i32>* 544 %r = load <4 x i32>, <4 x i32>* %vaddr, align 16 545 %res = select <4 x i1> %mask, <4 x i32> %r, <4 x i32> %old 546 ret <4 x i32>%res 547} 548 549define <4 x i32> @test_128_18(i8 * %addr, <4 x i32> %old, <4 x i32> %mask1) { 550; CHECK-LABEL: test_128_18: 551; CHECK: ## %bb.0: 552; CHECK-NEXT: vptestmd %xmm1, %xmm1, %k1 ## encoding: [0x62,0xf2,0x75,0x08,0x27,0xc9] 553; CHECK-NEXT: vmovdqu32 (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x6f,0x07] 554; CHECK-NEXT: retq ## encoding: [0xc3] 555 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 556 %vaddr = bitcast i8* %addr to <4 x i32>* 557 %r = load <4 x i32>, <4 x i32>* %vaddr, align 1 558 %res = select <4 x i1> %mask, <4 x i32> %r, <4 x i32> %old 559 ret <4 x i32>%res 560} 561 562define <4 x i32> @test_128_19(i8 * %addr, <4 x i32> %mask1) { 563; CHECK-LABEL: test_128_19: 564; CHECK: ## %bb.0: 565; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k1 ## encoding: [0x62,0xf2,0x7d,0x08,0x27,0xc8] 566; CHECK-NEXT: vmovdqa32 (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x6f,0x07] 567; CHECK-NEXT: retq ## encoding: [0xc3] 568 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 569 %vaddr = bitcast i8* %addr to <4 x i32>* 570 %r = load <4 x i32>, <4 x i32>* %vaddr, align 16 571 %res = select <4 x i1> %mask, <4 x i32> %r, <4 x i32> zeroinitializer 572 ret <4 x i32>%res 573} 574 575define <4 x i32> @test_128_20(i8 * %addr, <4 x i32> %mask1) { 576; CHECK-LABEL: test_128_20: 577; CHECK: ## %bb.0: 578; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k1 ## encoding: [0x62,0xf2,0x7d,0x08,0x27,0xc8] 579; CHECK-NEXT: vmovdqu32 (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x6f,0x07] 580; CHECK-NEXT: retq ## encoding: [0xc3] 581 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 582 %vaddr = bitcast i8* %addr to <4 x i32>* 583 %r = load <4 x i32>, <4 x i32>* %vaddr, align 1 584 %res = select <4 x i1> %mask, <4 x i32> %r, <4 x i32> zeroinitializer 585 ret <4 x i32>%res 586} 587 588define <2 x i64> @test_128_21(i8 * %addr, <2 x i64> %old, <2 x i64> %mask1) { 589; CHECK-LABEL: test_128_21: 590; CHECK: ## %bb.0: 591; CHECK-NEXT: vptestmq %xmm1, %xmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x08,0x27,0xc9] 592; CHECK-NEXT: vmovdqa64 (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x6f,0x07] 593; CHECK-NEXT: retq ## encoding: [0xc3] 594 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 595 %vaddr = bitcast i8* %addr to <2 x i64>* 596 %r = load <2 x i64>, <2 x i64>* %vaddr, align 16 597 %res = select <2 x i1> %mask, <2 x i64> %r, <2 x i64> %old 598 ret <2 x i64>%res 599} 600 601define <2 x i64> @test_128_22(i8 * %addr, <2 x i64> %old, <2 x i64> %mask1) { 602; CHECK-LABEL: test_128_22: 603; CHECK: ## %bb.0: 604; CHECK-NEXT: vptestmq %xmm1, %xmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x08,0x27,0xc9] 605; CHECK-NEXT: vmovdqu64 (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0xfe,0x09,0x6f,0x07] 606; CHECK-NEXT: retq ## encoding: [0xc3] 607 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 608 %vaddr = bitcast i8* %addr to <2 x i64>* 609 %r = load <2 x i64>, <2 x i64>* %vaddr, align 1 610 %res = select <2 x i1> %mask, <2 x i64> %r, <2 x i64> %old 611 ret <2 x i64>%res 612} 613 614define <2 x i64> @test_128_23(i8 * %addr, <2 x i64> %mask1) { 615; CHECK-LABEL: test_128_23: 616; CHECK: ## %bb.0: 617; CHECK-NEXT: vptestmq %xmm0, %xmm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x08,0x27,0xc8] 618; CHECK-NEXT: vmovdqa64 (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x6f,0x07] 619; CHECK-NEXT: retq ## encoding: [0xc3] 620 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 621 %vaddr = bitcast i8* %addr to <2 x i64>* 622 %r = load <2 x i64>, <2 x i64>* %vaddr, align 16 623 %res = select <2 x i1> %mask, <2 x i64> %r, <2 x i64> zeroinitializer 624 ret <2 x i64>%res 625} 626 627define <2 x i64> @test_128_24(i8 * %addr, <2 x i64> %mask1) { 628; CHECK-LABEL: test_128_24: 629; CHECK: ## %bb.0: 630; CHECK-NEXT: vptestmq %xmm0, %xmm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x08,0x27,0xc8] 631; CHECK-NEXT: vmovdqu64 (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfe,0x89,0x6f,0x07] 632; CHECK-NEXT: retq ## encoding: [0xc3] 633 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 634 %vaddr = bitcast i8* %addr to <2 x i64>* 635 %r = load <2 x i64>, <2 x i64>* %vaddr, align 1 636 %res = select <2 x i1> %mask, <2 x i64> %r, <2 x i64> zeroinitializer 637 ret <2 x i64>%res 638} 639 640define <4 x float> @test_128_25(i8 * %addr, <4 x float> %old, <4 x i32> %mask1) { 641; CHECK-LABEL: test_128_25: 642; CHECK: ## %bb.0: 643; CHECK-NEXT: vptestmd %xmm1, %xmm1, %k1 ## encoding: [0x62,0xf2,0x75,0x08,0x27,0xc9] 644; CHECK-NEXT: vmovaps (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x28,0x07] 645; CHECK-NEXT: retq ## encoding: [0xc3] 646 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 647 %vaddr = bitcast i8* %addr to <4 x float>* 648 %r = load <4 x float>, <4 x float>* %vaddr, align 16 649 %res = select <4 x i1> %mask, <4 x float> %r, <4 x float> %old 650 ret <4 x float>%res 651} 652 653define <4 x float> @test_128_26(i8 * %addr, <4 x float> %old, <4 x i32> %mask1) { 654; CHECK-LABEL: test_128_26: 655; CHECK: ## %bb.0: 656; CHECK-NEXT: vptestmd %xmm1, %xmm1, %k1 ## encoding: [0x62,0xf2,0x75,0x08,0x27,0xc9] 657; CHECK-NEXT: vmovups (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x10,0x07] 658; CHECK-NEXT: retq ## encoding: [0xc3] 659 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 660 %vaddr = bitcast i8* %addr to <4 x float>* 661 %r = load <4 x float>, <4 x float>* %vaddr, align 1 662 %res = select <4 x i1> %mask, <4 x float> %r, <4 x float> %old 663 ret <4 x float>%res 664} 665 666define <4 x float> @test_128_27(i8 * %addr, <4 x i32> %mask1) { 667; CHECK-LABEL: test_128_27: 668; CHECK: ## %bb.0: 669; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k1 ## encoding: [0x62,0xf2,0x7d,0x08,0x27,0xc8] 670; CHECK-NEXT: vmovaps (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x28,0x07] 671; CHECK-NEXT: retq ## encoding: [0xc3] 672 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 673 %vaddr = bitcast i8* %addr to <4 x float>* 674 %r = load <4 x float>, <4 x float>* %vaddr, align 16 675 %res = select <4 x i1> %mask, <4 x float> %r, <4 x float> zeroinitializer 676 ret <4 x float>%res 677} 678 679define <4 x float> @test_128_28(i8 * %addr, <4 x i32> %mask1) { 680; CHECK-LABEL: test_128_28: 681; CHECK: ## %bb.0: 682; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k1 ## encoding: [0x62,0xf2,0x7d,0x08,0x27,0xc8] 683; CHECK-NEXT: vmovups (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x10,0x07] 684; CHECK-NEXT: retq ## encoding: [0xc3] 685 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 686 %vaddr = bitcast i8* %addr to <4 x float>* 687 %r = load <4 x float>, <4 x float>* %vaddr, align 1 688 %res = select <4 x i1> %mask, <4 x float> %r, <4 x float> zeroinitializer 689 ret <4 x float>%res 690} 691 692define <2 x double> @test_128_29(i8 * %addr, <2 x double> %old, <2 x i64> %mask1) { 693; CHECK-LABEL: test_128_29: 694; CHECK: ## %bb.0: 695; CHECK-NEXT: vptestmq %xmm1, %xmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x08,0x27,0xc9] 696; CHECK-NEXT: vmovapd (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x28,0x07] 697; CHECK-NEXT: retq ## encoding: [0xc3] 698 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 699 %vaddr = bitcast i8* %addr to <2 x double>* 700 %r = load <2 x double>, <2 x double>* %vaddr, align 16 701 %res = select <2 x i1> %mask, <2 x double> %r, <2 x double> %old 702 ret <2 x double>%res 703} 704 705define <2 x double> @test_128_30(i8 * %addr, <2 x double> %old, <2 x i64> %mask1) { 706; CHECK-LABEL: test_128_30: 707; CHECK: ## %bb.0: 708; CHECK-NEXT: vptestmq %xmm1, %xmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x08,0x27,0xc9] 709; CHECK-NEXT: vmovupd (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x10,0x07] 710; CHECK-NEXT: retq ## encoding: [0xc3] 711 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 712 %vaddr = bitcast i8* %addr to <2 x double>* 713 %r = load <2 x double>, <2 x double>* %vaddr, align 1 714 %res = select <2 x i1> %mask, <2 x double> %r, <2 x double> %old 715 ret <2 x double>%res 716} 717 718define <2 x double> @test_128_31(i8 * %addr, <2 x i64> %mask1) { 719; CHECK-LABEL: test_128_31: 720; CHECK: ## %bb.0: 721; CHECK-NEXT: vptestmq %xmm0, %xmm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x08,0x27,0xc8] 722; CHECK-NEXT: vmovapd (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x28,0x07] 723; CHECK-NEXT: retq ## encoding: [0xc3] 724 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 725 %vaddr = bitcast i8* %addr to <2 x double>* 726 %r = load <2 x double>, <2 x double>* %vaddr, align 16 727 %res = select <2 x i1> %mask, <2 x double> %r, <2 x double> zeroinitializer 728 ret <2 x double>%res 729} 730 731define <2 x double> @test_128_32(i8 * %addr, <2 x i64> %mask1) { 732; CHECK-LABEL: test_128_32: 733; CHECK: ## %bb.0: 734; CHECK-NEXT: vptestmq %xmm0, %xmm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x08,0x27,0xc8] 735; CHECK-NEXT: vmovupd (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x10,0x07] 736; CHECK-NEXT: retq ## encoding: [0xc3] 737 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 738 %vaddr = bitcast i8* %addr to <2 x double>* 739 %r = load <2 x double>, <2 x double>* %vaddr, align 1 740 %res = select <2 x i1> %mask, <2 x double> %r, <2 x double> zeroinitializer 741 ret <2 x double>%res 742} 743 744