1; RUN: llc -O3 -mtriple=x86_64-unknown -mcpu=x86-64 -mattr=+sse2 < %s | FileCheck %s --check-prefix=SSE 2; RUN: llc -O3 -mtriple=x86_64-unknown -mcpu=x86-64 -mattr=+avx2 < %s | FileCheck %s --check-prefix=AVX 3 4; 5; Float Comparisons 6; Only equal/not-equal/ordered/unordered can be safely commuted 7; 8 9define <4 x i32> @commute_cmpps_eq(<4 x float>* %a0, <4 x float> %a1) #0 { 10 ;SSE-LABEL: commute_cmpps_eq 11 ;SSE: cmpeqps (%rdi), %xmm0 12 ;SSE-NEXT: retq 13 14 ;AVX-LABEL: commute_cmpps_eq 15 ;AVX: vcmpeqps (%rdi), %xmm0, %xmm0 16 ;AVX-NEXT: retq 17 18 %1 = load <4 x float>, <4 x float>* %a0 19 %2 = fcmp oeq <4 x float> %1, %a1 20 %3 = sext <4 x i1> %2 to <4 x i32> 21 ret <4 x i32> %3 22} 23 24define <4 x i32> @commute_cmpps_ne(<4 x float>* %a0, <4 x float> %a1) #0 { 25 ;SSE-LABEL: commute_cmpps_ne 26 ;SSE: cmpneqps (%rdi), %xmm0 27 ;SSE-NEXT: retq 28 29 ;AVX-LABEL: commute_cmpps_ne 30 ;AVX: vcmpneqps (%rdi), %xmm0, %xmm0 31 ;AVX-NEXT: retq 32 33 %1 = load <4 x float>, <4 x float>* %a0 34 %2 = fcmp une <4 x float> %1, %a1 35 %3 = sext <4 x i1> %2 to <4 x i32> 36 ret <4 x i32> %3 37} 38 39define <4 x i32> @commute_cmpps_ord(<4 x float>* %a0, <4 x float> %a1) #0 { 40 ;SSE-LABEL: commute_cmpps_ord 41 ;SSE: cmpordps (%rdi), %xmm0 42 ;SSE-NEXT: retq 43 44 ;AVX-LABEL: commute_cmpps_ord 45 ;AVX: vcmpordps (%rdi), %xmm0, %xmm0 46 ;AVX-NEXT: retq 47 48 %1 = load <4 x float>, <4 x float>* %a0 49 %2 = fcmp ord <4 x float> %1, %a1 50 %3 = sext <4 x i1> %2 to <4 x i32> 51 ret <4 x i32> %3 52} 53 54define <4 x i32> @commute_cmpps_uno(<4 x float>* %a0, <4 x float> %a1) #0 { 55 ;SSE-LABEL: commute_cmpps_uno 56 ;SSE: cmpunordps (%rdi), %xmm0 57 ;SSE-NEXT: retq 58 59 ;AVX-LABEL: commute_cmpps_uno 60 ;AVX: vcmpunordps (%rdi), %xmm0, %xmm0 61 ;AVX-NEXT: retq 62 63 %1 = load <4 x float>, <4 x float>* %a0 64 %2 = fcmp uno <4 x float> %1, %a1 65 %3 = sext <4 x i1> %2 to <4 x i32> 66 ret <4 x i32> %3 67} 68 69define <4 x i32> @commute_cmpps_lt(<4 x float>* %a0, <4 x float> %a1) #0 { 70 ;SSE-LABEL: commute_cmpps_lt 71 ;SSE: movaps (%rdi), %xmm1 72 ;SSE-NEXT: cmpltps %xmm0, %xmm1 73 ;SSE-NEXT: movaps %xmm1, %xmm0 74 ;SSE-NEXT: retq 75 76 ;AVX-LABEL: commute_cmpps_lt 77 ;AVX: vmovaps (%rdi), %xmm1 78 ;AVX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 79 ;AVX-NEXT: retq 80 81 %1 = load <4 x float>, <4 x float>* %a0 82 %2 = fcmp olt <4 x float> %1, %a1 83 %3 = sext <4 x i1> %2 to <4 x i32> 84 ret <4 x i32> %3 85} 86 87define <4 x i32> @commute_cmpps_le(<4 x float>* %a0, <4 x float> %a1) #0 { 88 ;SSE-LABEL: commute_cmpps_le 89 ;SSE: movaps (%rdi), %xmm1 90 ;SSE-NEXT: cmpleps %xmm0, %xmm1 91 ;SSE-NEXT: movaps %xmm1, %xmm0 92 ;SSE-NEXT: retq 93 94 ;AVX-LABEL: commute_cmpps_le 95 ;AVX: vmovaps (%rdi), %xmm1 96 ;AVX-NEXT: vcmpleps %xmm0, %xmm1, %xmm0 97 ;AVX-NEXT: retq 98 99 %1 = load <4 x float>, <4 x float>* %a0 100 %2 = fcmp ole <4 x float> %1, %a1 101 %3 = sext <4 x i1> %2 to <4 x i32> 102 ret <4 x i32> %3 103} 104 105define <8 x i32> @commute_cmpps_eq_ymm(<8 x float>* %a0, <8 x float> %a1) #0 { 106 ;AVX-LABEL: commute_cmpps_eq_ymm 107 ;AVX: vcmpeqps (%rdi), %ymm0, %ymm0 108 ;AVX-NEXT: retq 109 110 %1 = load <8 x float>, <8 x float>* %a0 111 %2 = fcmp oeq <8 x float> %1, %a1 112 %3 = sext <8 x i1> %2 to <8 x i32> 113 ret <8 x i32> %3 114} 115 116define <8 x i32> @commute_cmpps_ne_ymm(<8 x float>* %a0, <8 x float> %a1) #0 { 117 ;AVX-LABEL: commute_cmpps_ne_ymm 118 ;AVX: vcmpneqps (%rdi), %ymm0, %ymm0 119 ;AVX-NEXT: retq 120 121 %1 = load <8 x float>, <8 x float>* %a0 122 %2 = fcmp une <8 x float> %1, %a1 123 %3 = sext <8 x i1> %2 to <8 x i32> 124 ret <8 x i32> %3 125} 126 127define <8 x i32> @commute_cmpps_ord_ymm(<8 x float>* %a0, <8 x float> %a1) #0 { 128 ;AVX-LABEL: commute_cmpps_ord_ymm 129 ;AVX: vcmpordps (%rdi), %ymm0, %ymm0 130 ;AVX-NEXT: retq 131 132 %1 = load <8 x float>, <8 x float>* %a0 133 %2 = fcmp ord <8 x float> %1, %a1 134 %3 = sext <8 x i1> %2 to <8 x i32> 135 ret <8 x i32> %3 136} 137 138define <8 x i32> @commute_cmpps_uno_ymm(<8 x float>* %a0, <8 x float> %a1) #0 { 139 ;AVX-LABEL: commute_cmpps_uno_ymm 140 ;AVX: vcmpunordps (%rdi), %ymm0, %ymm0 141 ;AVX-NEXT: retq 142 143 %1 = load <8 x float>, <8 x float>* %a0 144 %2 = fcmp uno <8 x float> %1, %a1 145 %3 = sext <8 x i1> %2 to <8 x i32> 146 ret <8 x i32> %3 147} 148 149define <8 x i32> @commute_cmpps_lt_ymm(<8 x float>* %a0, <8 x float> %a1) #0 { 150 ;AVX-LABEL: commute_cmpps_lt_ymm 151 ;AVX: vmovaps (%rdi), %ymm1 152 ;AVX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 153 ;AVX-NEXT: retq 154 155 %1 = load <8 x float>, <8 x float>* %a0 156 %2 = fcmp olt <8 x float> %1, %a1 157 %3 = sext <8 x i1> %2 to <8 x i32> 158 ret <8 x i32> %3 159} 160 161define <8 x i32> @commute_cmpps_le_ymm(<8 x float>* %a0, <8 x float> %a1) #0 { 162 ;AVX-LABEL: commute_cmpps_le_ymm 163 ;AVX: vmovaps (%rdi), %ymm1 164 ;AVX-NEXT: vcmpleps %ymm0, %ymm1, %ymm0 165 ;AVX-NEXT: retq 166 167 %1 = load <8 x float>, <8 x float>* %a0 168 %2 = fcmp ole <8 x float> %1, %a1 169 %3 = sext <8 x i1> %2 to <8 x i32> 170 ret <8 x i32> %3 171} 172 173; 174; Double Comparisons 175; Only equal/not-equal/ordered/unordered can be safely commuted 176; 177 178define <2 x i64> @commute_cmppd_eq(<2 x double>* %a0, <2 x double> %a1) #0 { 179 ;SSE-LABEL: commute_cmppd_eq 180 ;SSE: cmpeqpd (%rdi), %xmm0 181 ;SSE-NEXT: retq 182 183 ;AVX-LABEL: commute_cmppd_eq 184 ;AVX: vcmpeqpd (%rdi), %xmm0, %xmm0 185 ;AVX-NEXT: retq 186 187 %1 = load <2 x double>, <2 x double>* %a0 188 %2 = fcmp oeq <2 x double> %1, %a1 189 %3 = sext <2 x i1> %2 to <2 x i64> 190 ret <2 x i64> %3 191} 192 193define <2 x i64> @commute_cmppd_ne(<2 x double>* %a0, <2 x double> %a1) #0 { 194 ;SSE-LABEL: commute_cmppd_ne 195 ;SSE: cmpneqpd (%rdi), %xmm0 196 ;SSE-NEXT: retq 197 198 ;AVX-LABEL: commute_cmppd_ne 199 ;AVX: vcmpneqpd (%rdi), %xmm0, %xmm0 200 ;AVX-NEXT: retq 201 202 %1 = load <2 x double>, <2 x double>* %a0 203 %2 = fcmp une <2 x double> %1, %a1 204 %3 = sext <2 x i1> %2 to <2 x i64> 205 ret <2 x i64> %3 206} 207 208define <2 x i64> @commute_cmppd_ord(<2 x double>* %a0, <2 x double> %a1) #0 { 209 ;SSE-LABEL: commute_cmppd_ord 210 ;SSE: cmpordpd (%rdi), %xmm0 211 ;SSE-NEXT: retq 212 213 ;AVX-LABEL: commute_cmppd_ord 214 ;AVX: vcmpordpd (%rdi), %xmm0, %xmm0 215 ;AVX-NEXT: retq 216 217 %1 = load <2 x double>, <2 x double>* %a0 218 %2 = fcmp ord <2 x double> %1, %a1 219 %3 = sext <2 x i1> %2 to <2 x i64> 220 ret <2 x i64> %3 221} 222 223define <2 x i64> @commute_cmppd_uno(<2 x double>* %a0, <2 x double> %a1) #0 { 224 ;SSE-LABEL: commute_cmppd_uno 225 ;SSE: cmpunordpd (%rdi), %xmm0 226 ;SSE-NEXT: retq 227 228 ;AVX-LABEL: commute_cmppd_uno 229 ;AVX: vcmpunordpd (%rdi), %xmm0, %xmm0 230 ;AVX-NEXT: retq 231 232 %1 = load <2 x double>, <2 x double>* %a0 233 %2 = fcmp uno <2 x double> %1, %a1 234 %3 = sext <2 x i1> %2 to <2 x i64> 235 ret <2 x i64> %3 236} 237 238define <2 x i64> @commute_cmppd_lt(<2 x double>* %a0, <2 x double> %a1) #0 { 239 ;SSE-LABEL: commute_cmppd_lt 240 ;SSE: movapd (%rdi), %xmm1 241 ;SSE-NEXT: cmpltpd %xmm0, %xmm1 242 ;SSE-NEXT: movapd %xmm1, %xmm0 243 ;SSE-NEXT: retq 244 245 ;AVX-LABEL: commute_cmppd_lt 246 ;AVX: vmovapd (%rdi), %xmm1 247 ;AVX-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 248 ;AVX-NEXT: retq 249 250 %1 = load <2 x double>, <2 x double>* %a0 251 %2 = fcmp olt <2 x double> %1, %a1 252 %3 = sext <2 x i1> %2 to <2 x i64> 253 ret <2 x i64> %3 254} 255 256define <2 x i64> @commute_cmppd_le(<2 x double>* %a0, <2 x double> %a1) #0 { 257 ;SSE-LABEL: commute_cmppd_le 258 ;SSE: movapd (%rdi), %xmm1 259 ;SSE-NEXT: cmplepd %xmm0, %xmm1 260 ;SSE-NEXT: movapd %xmm1, %xmm0 261 ;SSE-NEXT: retq 262 263 ;AVX-LABEL: commute_cmppd_le 264 ;AVX: vmovapd (%rdi), %xmm1 265 ;AVX-NEXT: vcmplepd %xmm0, %xmm1, %xmm0 266 ;AVX-NEXT: retq 267 268 %1 = load <2 x double>, <2 x double>* %a0 269 %2 = fcmp ole <2 x double> %1, %a1 270 %3 = sext <2 x i1> %2 to <2 x i64> 271 ret <2 x i64> %3 272} 273 274define <4 x i64> @commute_cmppd_eq_ymmm(<4 x double>* %a0, <4 x double> %a1) #0 { 275 ;AVX-LABEL: commute_cmppd_eq 276 ;AVX: vcmpeqpd (%rdi), %ymm0, %ymm0 277 ;AVX-NEXT: retq 278 279 %1 = load <4 x double>, <4 x double>* %a0 280 %2 = fcmp oeq <4 x double> %1, %a1 281 %3 = sext <4 x i1> %2 to <4 x i64> 282 ret <4 x i64> %3 283} 284 285define <4 x i64> @commute_cmppd_ne_ymmm(<4 x double>* %a0, <4 x double> %a1) #0 { 286 ;AVX-LABEL: commute_cmppd_ne 287 ;AVX: vcmpneqpd (%rdi), %ymm0, %ymm0 288 ;AVX-NEXT: retq 289 290 %1 = load <4 x double>, <4 x double>* %a0 291 %2 = fcmp une <4 x double> %1, %a1 292 %3 = sext <4 x i1> %2 to <4 x i64> 293 ret <4 x i64> %3 294} 295 296define <4 x i64> @commute_cmppd_ord_ymmm(<4 x double>* %a0, <4 x double> %a1) #0 { 297 ;AVX-LABEL: commute_cmppd_ord 298 ;AVX: vcmpordpd (%rdi), %ymm0, %ymm0 299 ;AVX-NEXT: retq 300 301 %1 = load <4 x double>, <4 x double>* %a0 302 %2 = fcmp ord <4 x double> %1, %a1 303 %3 = sext <4 x i1> %2 to <4 x i64> 304 ret <4 x i64> %3 305} 306 307define <4 x i64> @commute_cmppd_uno_ymmm(<4 x double>* %a0, <4 x double> %a1) #0 { 308 ;AVX-LABEL: commute_cmppd_uno 309 ;AVX: vcmpunordpd (%rdi), %ymm0, %ymm0 310 ;AVX-NEXT: retq 311 312 %1 = load <4 x double>, <4 x double>* %a0 313 %2 = fcmp uno <4 x double> %1, %a1 314 %3 = sext <4 x i1> %2 to <4 x i64> 315 ret <4 x i64> %3 316} 317 318define <4 x i64> @commute_cmppd_lt_ymmm(<4 x double>* %a0, <4 x double> %a1) #0 { 319 ;AVX-LABEL: commute_cmppd_lt 320 ;AVX: vmovapd (%rdi), %ymm1 321 ;AVX-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 322 ;AVX-NEXT: retq 323 324 %1 = load <4 x double>, <4 x double>* %a0 325 %2 = fcmp olt <4 x double> %1, %a1 326 %3 = sext <4 x i1> %2 to <4 x i64> 327 ret <4 x i64> %3 328} 329 330define <4 x i64> @commute_cmppd_le_ymmm(<4 x double>* %a0, <4 x double> %a1) #0 { 331 ;AVX-LABEL: commute_cmppd_le 332 ;AVX: vmovapd (%rdi), %ymm1 333 ;AVX-NEXT: vcmplepd %ymm0, %ymm1, %ymm0 334 ;AVX-NEXT: retq 335 336 %1 = load <4 x double>, <4 x double>* %a0 337 %2 = fcmp ole <4 x double> %1, %a1 338 %3 = sext <4 x i1> %2 to <4 x i64> 339 ret <4 x i64> %3 340} 341