1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE41 3; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s --check-prefix=AVX 4 5define <2 x double> @floor_v2f64(<2 x double> %p) { 6; SSE41-LABEL: floor_v2f64: 7; SSE41: ## BB#0: 8; SSE41-NEXT: roundpd $9, %xmm0, %xmm0 9; SSE41-NEXT: retq 10; 11; AVX-LABEL: floor_v2f64: 12; AVX: ## BB#0: 13; AVX-NEXT: vroundpd $9, %xmm0, %xmm0 14; AVX-NEXT: retq 15 %t = call <2 x double> @llvm.floor.v2f64(<2 x double> %p) 16 ret <2 x double> %t 17} 18declare <2 x double> @llvm.floor.v2f64(<2 x double> %p) 19 20define <4 x float> @floor_v4f32(<4 x float> %p) { 21; SSE41-LABEL: floor_v4f32: 22; SSE41: ## BB#0: 23; SSE41-NEXT: roundps $9, %xmm0, %xmm0 24; SSE41-NEXT: retq 25; 26; AVX-LABEL: floor_v4f32: 27; AVX: ## BB#0: 28; AVX-NEXT: vroundps $9, %xmm0, %xmm0 29; AVX-NEXT: retq 30 %t = call <4 x float> @llvm.floor.v4f32(<4 x float> %p) 31 ret <4 x float> %t 32} 33declare <4 x float> @llvm.floor.v4f32(<4 x float> %p) 34 35define <4 x double> @floor_v4f64(<4 x double> %p){ 36; SSE41-LABEL: floor_v4f64: 37; SSE41: ## BB#0: 38; SSE41-NEXT: roundpd $9, %xmm0, %xmm0 39; SSE41-NEXT: roundpd $9, %xmm1, %xmm1 40; SSE41-NEXT: retq 41; 42; AVX-LABEL: floor_v4f64: 43; AVX: ## BB#0: 44; AVX-NEXT: vroundpd $9, %ymm0, %ymm0 45; AVX-NEXT: retq 46 %t = call <4 x double> @llvm.floor.v4f64(<4 x double> %p) 47 ret <4 x double> %t 48} 49declare <4 x double> @llvm.floor.v4f64(<4 x double> %p) 50 51define <8 x float> @floor_v8f32(<8 x float> %p) { 52; SSE41-LABEL: floor_v8f32: 53; SSE41: ## BB#0: 54; SSE41-NEXT: roundps $9, %xmm0, %xmm0 55; SSE41-NEXT: roundps $9, %xmm1, %xmm1 56; SSE41-NEXT: retq 57; 58; AVX-LABEL: floor_v8f32: 59; AVX: ## BB#0: 60; AVX-NEXT: vroundps $9, %ymm0, %ymm0 61; AVX-NEXT: retq 62 %t = call <8 x float> @llvm.floor.v8f32(<8 x float> %p) 63 ret <8 x float> %t 64} 65declare <8 x float> @llvm.floor.v8f32(<8 x float> %p) 66 67define <2 x double> @ceil_v2f64(<2 x double> %p) { 68; SSE41-LABEL: ceil_v2f64: 69; SSE41: ## BB#0: 70; SSE41-NEXT: roundpd $10, %xmm0, %xmm0 71; SSE41-NEXT: retq 72; 73; AVX-LABEL: ceil_v2f64: 74; AVX: ## BB#0: 75; AVX-NEXT: vroundpd $10, %xmm0, %xmm0 76; AVX-NEXT: retq 77 %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> %p) 78 ret <2 x double> %t 79} 80declare <2 x double> @llvm.ceil.v2f64(<2 x double> %p) 81 82define <4 x float> @ceil_v4f32(<4 x float> %p) { 83; SSE41-LABEL: ceil_v4f32: 84; SSE41: ## BB#0: 85; SSE41-NEXT: roundps $10, %xmm0, %xmm0 86; SSE41-NEXT: retq 87; 88; AVX-LABEL: ceil_v4f32: 89; AVX: ## BB#0: 90; AVX-NEXT: vroundps $10, %xmm0, %xmm0 91; AVX-NEXT: retq 92 %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> %p) 93 ret <4 x float> %t 94} 95declare <4 x float> @llvm.ceil.v4f32(<4 x float> %p) 96 97define <4 x double> @ceil_v4f64(<4 x double> %p) { 98; SSE41-LABEL: ceil_v4f64: 99; SSE41: ## BB#0: 100; SSE41-NEXT: roundpd $10, %xmm0, %xmm0 101; SSE41-NEXT: roundpd $10, %xmm1, %xmm1 102; SSE41-NEXT: retq 103; 104; AVX-LABEL: ceil_v4f64: 105; AVX: ## BB#0: 106; AVX-NEXT: vroundpd $10, %ymm0, %ymm0 107; AVX-NEXT: retq 108 %t = call <4 x double> @llvm.ceil.v4f64(<4 x double> %p) 109 ret <4 x double> %t 110} 111declare <4 x double> @llvm.ceil.v4f64(<4 x double> %p) 112 113define <8 x float> @ceil_v8f32(<8 x float> %p) { 114; SSE41-LABEL: ceil_v8f32: 115; SSE41: ## BB#0: 116; SSE41-NEXT: roundps $10, %xmm0, %xmm0 117; SSE41-NEXT: roundps $10, %xmm1, %xmm1 118; SSE41-NEXT: retq 119; 120; AVX-LABEL: ceil_v8f32: 121; AVX: ## BB#0: 122; AVX-NEXT: vroundps $10, %ymm0, %ymm0 123; AVX-NEXT: retq 124 %t = call <8 x float> @llvm.ceil.v8f32(<8 x float> %p) 125 ret <8 x float> %t 126} 127declare <8 x float> @llvm.ceil.v8f32(<8 x float> %p) 128 129define <2 x double> @trunc_v2f64(<2 x double> %p) { 130; SSE41-LABEL: trunc_v2f64: 131; SSE41: ## BB#0: 132; SSE41-NEXT: roundpd $11, %xmm0, %xmm0 133; SSE41-NEXT: retq 134; 135; AVX-LABEL: trunc_v2f64: 136; AVX: ## BB#0: 137; AVX-NEXT: vroundpd $11, %xmm0, %xmm0 138; AVX-NEXT: retq 139 %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> %p) 140 ret <2 x double> %t 141} 142declare <2 x double> @llvm.trunc.v2f64(<2 x double> %p) 143 144define <4 x float> @trunc_v4f32(<4 x float> %p) { 145; SSE41-LABEL: trunc_v4f32: 146; SSE41: ## BB#0: 147; SSE41-NEXT: roundps $11, %xmm0, %xmm0 148; SSE41-NEXT: retq 149; 150; AVX-LABEL: trunc_v4f32: 151; AVX: ## BB#0: 152; AVX-NEXT: vroundps $11, %xmm0, %xmm0 153; AVX-NEXT: retq 154 %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %p) 155 ret <4 x float> %t 156} 157declare <4 x float> @llvm.trunc.v4f32(<4 x float> %p) 158 159define <4 x double> @trunc_v4f64(<4 x double> %p) { 160; SSE41-LABEL: trunc_v4f64: 161; SSE41: ## BB#0: 162; SSE41-NEXT: roundpd $11, %xmm0, %xmm0 163; SSE41-NEXT: roundpd $11, %xmm1, %xmm1 164; SSE41-NEXT: retq 165; 166; AVX-LABEL: trunc_v4f64: 167; AVX: ## BB#0: 168; AVX-NEXT: vroundpd $11, %ymm0, %ymm0 169; AVX-NEXT: retq 170 %t = call <4 x double> @llvm.trunc.v4f64(<4 x double> %p) 171 ret <4 x double> %t 172} 173declare <4 x double> @llvm.trunc.v4f64(<4 x double> %p) 174 175define <8 x float> @trunc_v8f32(<8 x float> %p) { 176; SSE41-LABEL: trunc_v8f32: 177; SSE41: ## BB#0: 178; SSE41-NEXT: roundps $11, %xmm0, %xmm0 179; SSE41-NEXT: roundps $11, %xmm1, %xmm1 180; SSE41-NEXT: retq 181; 182; AVX-LABEL: trunc_v8f32: 183; AVX: ## BB#0: 184; AVX-NEXT: vroundps $11, %ymm0, %ymm0 185; AVX-NEXT: retq 186 %t = call <8 x float> @llvm.trunc.v8f32(<8 x float> %p) 187 ret <8 x float> %t 188} 189declare <8 x float> @llvm.trunc.v8f32(<8 x float> %p) 190 191define <2 x double> @rint_v2f64(<2 x double> %p) { 192; SSE41-LABEL: rint_v2f64: 193; SSE41: ## BB#0: 194; SSE41-NEXT: roundpd $4, %xmm0, %xmm0 195; SSE41-NEXT: retq 196; 197; AVX-LABEL: rint_v2f64: 198; AVX: ## BB#0: 199; AVX-NEXT: vroundpd $4, %xmm0, %xmm0 200; AVX-NEXT: retq 201 %t = call <2 x double> @llvm.rint.v2f64(<2 x double> %p) 202 ret <2 x double> %t 203} 204declare <2 x double> @llvm.rint.v2f64(<2 x double> %p) 205 206define <4 x float> @rint_v4f32(<4 x float> %p) { 207; SSE41-LABEL: rint_v4f32: 208; SSE41: ## BB#0: 209; SSE41-NEXT: roundps $4, %xmm0, %xmm0 210; SSE41-NEXT: retq 211; 212; AVX-LABEL: rint_v4f32: 213; AVX: ## BB#0: 214; AVX-NEXT: vroundps $4, %xmm0, %xmm0 215; AVX-NEXT: retq 216 %t = call <4 x float> @llvm.rint.v4f32(<4 x float> %p) 217 ret <4 x float> %t 218} 219declare <4 x float> @llvm.rint.v4f32(<4 x float> %p) 220 221define <4 x double> @rint_v4f64(<4 x double> %p) { 222; SSE41-LABEL: rint_v4f64: 223; SSE41: ## BB#0: 224; SSE41-NEXT: roundpd $4, %xmm0, %xmm0 225; SSE41-NEXT: roundpd $4, %xmm1, %xmm1 226; SSE41-NEXT: retq 227; 228; AVX-LABEL: rint_v4f64: 229; AVX: ## BB#0: 230; AVX-NEXT: vroundpd $4, %ymm0, %ymm0 231; AVX-NEXT: retq 232 %t = call <4 x double> @llvm.rint.v4f64(<4 x double> %p) 233 ret <4 x double> %t 234} 235declare <4 x double> @llvm.rint.v4f64(<4 x double> %p) 236 237define <8 x float> @rint_v8f32(<8 x float> %p) { 238; SSE41-LABEL: rint_v8f32: 239; SSE41: ## BB#0: 240; SSE41-NEXT: roundps $4, %xmm0, %xmm0 241; SSE41-NEXT: roundps $4, %xmm1, %xmm1 242; SSE41-NEXT: retq 243; 244; AVX-LABEL: rint_v8f32: 245; AVX: ## BB#0: 246; AVX-NEXT: vroundps $4, %ymm0, %ymm0 247; AVX-NEXT: retq 248 %t = call <8 x float> @llvm.rint.v8f32(<8 x float> %p) 249 ret <8 x float> %t 250} 251declare <8 x float> @llvm.rint.v8f32(<8 x float> %p) 252 253define <2 x double> @nearbyint_v2f64(<2 x double> %p) { 254; SSE41-LABEL: nearbyint_v2f64: 255; SSE41: ## BB#0: 256; SSE41-NEXT: roundpd $12, %xmm0, %xmm0 257; SSE41-NEXT: retq 258; 259; AVX-LABEL: nearbyint_v2f64: 260; AVX: ## BB#0: 261; AVX-NEXT: vroundpd $12, %xmm0, %xmm0 262; AVX-NEXT: retq 263 %t = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p) 264 ret <2 x double> %t 265} 266declare <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p) 267 268define <4 x float> @nearbyint_v4f32(<4 x float> %p) { 269; SSE41-LABEL: nearbyint_v4f32: 270; SSE41: ## BB#0: 271; SSE41-NEXT: roundps $12, %xmm0, %xmm0 272; SSE41-NEXT: retq 273; 274; AVX-LABEL: nearbyint_v4f32: 275; AVX: ## BB#0: 276; AVX-NEXT: vroundps $12, %xmm0, %xmm0 277; AVX-NEXT: retq 278 %t = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p) 279 ret <4 x float> %t 280} 281declare <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p) 282 283define <4 x double> @nearbyint_v4f64(<4 x double> %p) { 284; SSE41-LABEL: nearbyint_v4f64: 285; SSE41: ## BB#0: 286; SSE41-NEXT: roundpd $12, %xmm0, %xmm0 287; SSE41-NEXT: roundpd $12, %xmm1, %xmm1 288; SSE41-NEXT: retq 289; 290; AVX-LABEL: nearbyint_v4f64: 291; AVX: ## BB#0: 292; AVX-NEXT: vroundpd $12, %ymm0, %ymm0 293; AVX-NEXT: retq 294 %t = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p) 295 ret <4 x double> %t 296} 297declare <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p) 298 299define <8 x float> @nearbyint_v8f32(<8 x float> %p) { 300; SSE41-LABEL: nearbyint_v8f32: 301; SSE41: ## BB#0: 302; SSE41-NEXT: roundps $12, %xmm0, %xmm0 303; SSE41-NEXT: roundps $12, %xmm1, %xmm1 304; SSE41-NEXT: retq 305; 306; AVX-LABEL: nearbyint_v8f32: 307; AVX: ## BB#0: 308; AVX-NEXT: vroundps $12, %ymm0, %ymm0 309; AVX-NEXT: retq 310 %t = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p) 311 ret <8 x float> %t 312} 313declare <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p) 314 315; 316; Constant Folding 317; 318 319define <2 x double> @const_floor_v2f64() { 320; SSE41-LABEL: const_floor_v2f64: 321; SSE41: ## BB#0: 322; SSE41-NEXT: movaps {{.*#+}} xmm0 = [-2.000000e+00,2.000000e+00] 323; SSE41-NEXT: retq 324; 325; AVX-LABEL: const_floor_v2f64: 326; AVX: ## BB#0: 327; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [-2.000000e+00,2.000000e+00] 328; AVX-NEXT: retq 329 %t = call <2 x double> @llvm.floor.v2f64(<2 x double> <double -1.5, double 2.5>) 330 ret <2 x double> %t 331} 332 333define <4 x float> @const_floor_v4f32() { 334; SSE41-LABEL: const_floor_v4f32: 335; SSE41: ## BB#0: 336; SSE41-NEXT: movaps {{.*#+}} xmm0 = [-4.000000e+00,6.000000e+00,-9.000000e+00,2.000000e+00] 337; SSE41-NEXT: retq 338; 339; AVX-LABEL: const_floor_v4f32: 340; AVX: ## BB#0: 341; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [-4.000000e+00,6.000000e+00,-9.000000e+00,2.000000e+00] 342; AVX-NEXT: retq 343 %t = call <4 x float> @llvm.floor.v4f32(<4 x float> <float -3.5, float 6.0, float -9.0, float 2.5>) 344 ret <4 x float> %t 345} 346 347define <2 x double> @const_ceil_v2f64() { 348; SSE41-LABEL: const_ceil_v2f64: 349; SSE41: ## BB#0: 350; SSE41-NEXT: movaps {{.*#+}} xmm0 = [-1.000000e+00,3.000000e+00] 351; SSE41-NEXT: retq 352; 353; AVX-LABEL: const_ceil_v2f64: 354; AVX: ## BB#0: 355; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [-1.000000e+00,3.000000e+00] 356; AVX-NEXT: retq 357 %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> <double -1.5, double 2.5>) 358 ret <2 x double> %t 359} 360 361define <4 x float> @const_ceil_v4f32() { 362; SSE41-LABEL: const_ceil_v4f32: 363; SSE41: ## BB#0: 364; SSE41-NEXT: movaps {{.*#+}} xmm0 = [-3.000000e+00,6.000000e+00,-9.000000e+00,3.000000e+00] 365; SSE41-NEXT: retq 366; 367; AVX-LABEL: const_ceil_v4f32: 368; AVX: ## BB#0: 369; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [-3.000000e+00,6.000000e+00,-9.000000e+00,3.000000e+00] 370; AVX-NEXT: retq 371 %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> <float -3.5, float 6.0, float -9.0, float 2.5>) 372 ret <4 x float> %t 373} 374 375define <2 x double> @const_trunc_v2f64() { 376; SSE41-LABEL: const_trunc_v2f64: 377; SSE41: ## BB#0: 378; SSE41-NEXT: movaps {{.*#+}} xmm0 = [-1.000000e+00,2.000000e+00] 379; SSE41-NEXT: retq 380; 381; AVX-LABEL: const_trunc_v2f64: 382; AVX: ## BB#0: 383; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [-1.000000e+00,2.000000e+00] 384; AVX-NEXT: retq 385 %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> <double -1.5, double 2.5>) 386 ret <2 x double> %t 387} 388 389define <4 x float> @const_trunc_v4f32() { 390; SSE41-LABEL: const_trunc_v4f32: 391; SSE41: ## BB#0: 392; SSE41-NEXT: movaps {{.*#+}} xmm0 = [-3.000000e+00,6.000000e+00,-9.000000e+00,2.000000e+00] 393; SSE41-NEXT: retq 394; 395; AVX-LABEL: const_trunc_v4f32: 396; AVX: ## BB#0: 397; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [-3.000000e+00,6.000000e+00,-9.000000e+00,2.000000e+00] 398; AVX-NEXT: retq 399 %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> <float -3.5, float 6.0, float -9.0, float 2.5>) 400 ret <4 x float> %t 401} 402