1 /* 2 * Vector math abstractions. 3 * 4 * Copyright (c) 2019, Arm Limited. 5 * SPDX-License-Identifier: MIT 6 */ 7 8 #ifndef _V_MATH_H 9 #define _V_MATH_H 10 11 #ifndef WANT_VMATH 12 /* Enable the build of vector math code. */ 13 # define WANT_VMATH 1 14 #endif 15 #if WANT_VMATH 16 17 /* The goal of this header is to allow vector and scalar 18 build of the same algorithm, the provided intrinsic 19 wrappers are also vector length agnostic so they can 20 be implemented for SVE too (or other simd architectures) 21 and then the code should work on those targets too. */ 22 23 #if SCALAR 24 #define V_NAME(x) __s_##x 25 #elif VPCS && __aarch64__ 26 #define V_NAME(x) __vn_##x 27 #define VPCS_ATTR __attribute__ ((aarch64_vector_pcs)) 28 #else 29 #define V_NAME(x) __v_##x 30 #endif 31 32 #ifndef VPCS_ATTR 33 #define VPCS_ATTR 34 #endif 35 #ifndef VPCS_ALIAS 36 #define VPCS_ALIAS 37 #endif 38 39 #include <stdint.h> 40 #include "math_config.h" 41 42 typedef float f32_t; 43 typedef uint32_t u32_t; 44 typedef int32_t s32_t; 45 typedef double f64_t; 46 typedef uint64_t u64_t; 47 typedef int64_t s64_t; 48 49 /* reinterpret as type1 from type2. */ 50 static inline u32_t 51 as_u32_f32 (f32_t x) 52 { 53 union { f32_t f; u32_t u; } r = {x}; 54 return r.u; 55 } 56 static inline f32_t 57 as_f32_u32 (u32_t x) 58 { 59 union { u32_t u; f32_t f; } r = {x}; 60 return r.f; 61 } 62 static inline s32_t 63 as_s32_u32 (u32_t x) 64 { 65 union { u32_t u; s32_t i; } r = {x}; 66 return r.i; 67 } 68 static inline u32_t 69 as_u32_s32 (s32_t x) 70 { 71 union { s32_t i; u32_t u; } r = {x}; 72 return r.u; 73 } 74 static inline u64_t 75 as_u64_f64 (f64_t x) 76 { 77 union { f64_t f; u64_t u; } r = {x}; 78 return r.u; 79 } 80 static inline f64_t 81 as_f64_u64 (u64_t x) 82 { 83 union { u64_t u; f64_t f; } r = {x}; 84 return r.f; 85 } 86 static inline s64_t 87 as_s64_u64 (u64_t x) 88 { 89 union { u64_t u; s64_t i; } r = {x}; 90 return r.i; 91 } 92 static inline u64_t 93 as_u64_s64 (s64_t x) 94 { 95 union { s64_t i; u64_t u; } r = {x}; 96 return r.u; 97 } 98 99 #if SCALAR 100 #define V_SUPPORTED 1 101 typedef f32_t v_f32_t; 102 typedef u32_t v_u32_t; 103 typedef s32_t v_s32_t; 104 typedef f64_t v_f64_t; 105 typedef u64_t v_u64_t; 106 typedef s64_t v_s64_t; 107 108 static inline int 109 v_lanes32 (void) 110 { 111 return 1; 112 } 113 114 static inline v_f32_t 115 v_f32 (f32_t x) 116 { 117 return x; 118 } 119 static inline v_u32_t 120 v_u32 (u32_t x) 121 { 122 return x; 123 } 124 static inline v_s32_t 125 v_s32 (s32_t x) 126 { 127 return x; 128 } 129 130 static inline f32_t 131 v_get_f32 (v_f32_t x, int i) 132 { 133 return x; 134 } 135 static inline u32_t 136 v_get_u32 (v_u32_t x, int i) 137 { 138 return x; 139 } 140 static inline s32_t 141 v_get_s32 (v_s32_t x, int i) 142 { 143 return x; 144 } 145 146 static inline void 147 v_set_f32 (v_f32_t *x, int i, f32_t v) 148 { 149 *x = v; 150 } 151 static inline void 152 v_set_u32 (v_u32_t *x, int i, u32_t v) 153 { 154 *x = v; 155 } 156 static inline void 157 v_set_s32 (v_s32_t *x, int i, s32_t v) 158 { 159 *x = v; 160 } 161 162 /* true if any elements of a v_cond result is non-zero. */ 163 static inline int 164 v_any_u32 (v_u32_t x) 165 { 166 return x != 0; 167 } 168 /* to wrap the result of relational operators. */ 169 static inline v_u32_t 170 v_cond_u32 (v_u32_t x) 171 { 172 return x ? -1 : 0; 173 } 174 static inline v_f32_t 175 v_abs_f32 (v_f32_t x) 176 { 177 return __builtin_fabsf (x); 178 } 179 static inline v_f32_t 180 v_fma_f32 (v_f32_t x, v_f32_t y, v_f32_t z) 181 { 182 return __builtin_fmaf (x, y, z); 183 } 184 static inline v_f32_t 185 v_round_f32 (v_f32_t x) 186 { 187 return __builtin_roundf (x); 188 } 189 static inline v_s32_t 190 v_round_s32 (v_f32_t x) 191 { 192 return __builtin_lroundf (x); /* relies on -fno-math-errno. */ 193 } 194 /* convert to type1 from type2. */ 195 static inline v_f32_t 196 v_to_f32_s32 (v_s32_t x) 197 { 198 return x; 199 } 200 static inline v_f32_t 201 v_to_f32_u32 (v_u32_t x) 202 { 203 return x; 204 } 205 /* reinterpret as type1 from type2. */ 206 static inline v_u32_t 207 v_as_u32_f32 (v_f32_t x) 208 { 209 union { v_f32_t f; v_u32_t u; } r = {x}; 210 return r.u; 211 } 212 static inline v_f32_t 213 v_as_f32_u32 (v_u32_t x) 214 { 215 union { v_u32_t u; v_f32_t f; } r = {x}; 216 return r.f; 217 } 218 static inline v_s32_t 219 v_as_s32_u32 (v_u32_t x) 220 { 221 union { v_u32_t u; v_s32_t i; } r = {x}; 222 return r.i; 223 } 224 static inline v_u32_t 225 v_as_u32_s32 (v_s32_t x) 226 { 227 union { v_s32_t i; v_u32_t u; } r = {x}; 228 return r.u; 229 } 230 static inline v_f32_t 231 v_lookup_f32 (const f32_t *tab, v_u32_t idx) 232 { 233 return tab[idx]; 234 } 235 static inline v_u32_t 236 v_lookup_u32 (const u32_t *tab, v_u32_t idx) 237 { 238 return tab[idx]; 239 } 240 static inline v_f32_t 241 v_call_f32 (f32_t (*f) (f32_t), v_f32_t x, v_f32_t y, v_u32_t p) 242 { 243 return f (x); 244 } 245 static inline v_f32_t 246 v_call2_f32 (f32_t (*f) (f32_t, f32_t), v_f32_t x1, v_f32_t x2, v_f32_t y, 247 v_u32_t p) 248 { 249 return f (x1, x2); 250 } 251 252 static inline int 253 v_lanes64 (void) 254 { 255 return 1; 256 } 257 static inline v_f64_t 258 v_f64 (f64_t x) 259 { 260 return x; 261 } 262 static inline v_u64_t 263 v_u64 (u64_t x) 264 { 265 return x; 266 } 267 static inline v_s64_t 268 v_s64 (s64_t x) 269 { 270 return x; 271 } 272 static inline f64_t 273 v_get_f64 (v_f64_t x, int i) 274 { 275 return x; 276 } 277 static inline void 278 v_set_f64 (v_f64_t *x, int i, f64_t v) 279 { 280 *x = v; 281 } 282 /* true if any elements of a v_cond result is non-zero. */ 283 static inline int 284 v_any_u64 (v_u64_t x) 285 { 286 return x != 0; 287 } 288 /* to wrap the result of relational operators. */ 289 static inline v_u64_t 290 v_cond_u64 (v_u64_t x) 291 { 292 return x ? -1 : 0; 293 } 294 static inline v_f64_t 295 v_abs_f64 (v_f64_t x) 296 { 297 return __builtin_fabs (x); 298 } 299 static inline v_f64_t 300 v_fma_f64 (v_f64_t x, v_f64_t y, v_f64_t z) 301 { 302 return __builtin_fma (x, y, z); 303 } 304 static inline v_f64_t 305 v_round_f64 (v_f64_t x) 306 { 307 return __builtin_round (x); 308 } 309 static inline v_s64_t 310 v_round_s64 (v_f64_t x) 311 { 312 return __builtin_lround (x); /* relies on -fno-math-errno. */ 313 } 314 /* convert to type1 from type2. */ 315 static inline v_f64_t 316 v_to_f64_s64 (v_s64_t x) 317 { 318 return x; 319 } 320 static inline v_f64_t 321 v_to_f64_u64 (v_u64_t x) 322 { 323 return x; 324 } 325 /* reinterpret as type1 from type2. */ 326 static inline v_u64_t 327 v_as_u64_f64 (v_f64_t x) 328 { 329 union { v_f64_t f; v_u64_t u; } r = {x}; 330 return r.u; 331 } 332 static inline v_f64_t 333 v_as_f64_u64 (v_u64_t x) 334 { 335 union { v_u64_t u; v_f64_t f; } r = {x}; 336 return r.f; 337 } 338 static inline v_s64_t 339 v_as_s64_u64 (v_u64_t x) 340 { 341 union { v_u64_t u; v_s64_t i; } r = {x}; 342 return r.i; 343 } 344 static inline v_u64_t 345 v_as_u64_s64 (v_s64_t x) 346 { 347 union { v_s64_t i; v_u64_t u; } r = {x}; 348 return r.u; 349 } 350 static inline v_f64_t 351 v_lookup_f64 (const f64_t *tab, v_u64_t idx) 352 { 353 return tab[idx]; 354 } 355 static inline v_u64_t 356 v_lookup_u64 (const u64_t *tab, v_u64_t idx) 357 { 358 return tab[idx]; 359 } 360 static inline v_f64_t 361 v_call_f64 (f64_t (*f) (f64_t), v_f64_t x, v_f64_t y, v_u64_t p) 362 { 363 return f (x); 364 } 365 366 #elif __aarch64__ 367 #define V_SUPPORTED 1 368 #include <arm_neon.h> 369 typedef float32x4_t v_f32_t; 370 typedef uint32x4_t v_u32_t; 371 typedef int32x4_t v_s32_t; 372 typedef float64x2_t v_f64_t; 373 typedef uint64x2_t v_u64_t; 374 typedef int64x2_t v_s64_t; 375 376 static inline int 377 v_lanes32 (void) 378 { 379 return 4; 380 } 381 382 static inline v_f32_t 383 v_f32 (f32_t x) 384 { 385 return (v_f32_t){x, x, x, x}; 386 } 387 static inline v_u32_t 388 v_u32 (u32_t x) 389 { 390 return (v_u32_t){x, x, x, x}; 391 } 392 static inline v_s32_t 393 v_s32 (s32_t x) 394 { 395 return (v_s32_t){x, x, x, x}; 396 } 397 398 static inline f32_t 399 v_get_f32 (v_f32_t x, int i) 400 { 401 return x[i]; 402 } 403 static inline u32_t 404 v_get_u32 (v_u32_t x, int i) 405 { 406 return x[i]; 407 } 408 static inline s32_t 409 v_get_s32 (v_s32_t x, int i) 410 { 411 return x[i]; 412 } 413 414 static inline void 415 v_set_f32 (v_f32_t *x, int i, f32_t v) 416 { 417 (*x)[i] = v; 418 } 419 static inline void 420 v_set_u32 (v_u32_t *x, int i, u32_t v) 421 { 422 (*x)[i] = v; 423 } 424 static inline void 425 v_set_s32 (v_s32_t *x, int i, s32_t v) 426 { 427 (*x)[i] = v; 428 } 429 430 /* true if any elements of a v_cond result is non-zero. */ 431 static inline int 432 v_any_u32 (v_u32_t x) 433 { 434 /* assume elements in x are either 0 or -1u. */ 435 return vpaddd_u64 (vreinterpretq_u64_u32 (x)) != 0; 436 } 437 /* to wrap the result of relational operators. */ 438 static inline v_u32_t 439 v_cond_u32 (v_u32_t x) 440 { 441 return x; 442 } 443 static inline v_f32_t 444 v_abs_f32 (v_f32_t x) 445 { 446 return vabsq_f32 (x); 447 } 448 static inline v_f32_t 449 v_fma_f32 (v_f32_t x, v_f32_t y, v_f32_t z) 450 { 451 return vfmaq_f32 (z, x, y); 452 } 453 static inline v_f32_t 454 v_round_f32 (v_f32_t x) 455 { 456 return vrndaq_f32 (x); 457 } 458 static inline v_s32_t 459 v_round_s32 (v_f32_t x) 460 { 461 return vcvtaq_s32_f32 (x); 462 } 463 /* convert to type1 from type2. */ 464 static inline v_f32_t 465 v_to_f32_s32 (v_s32_t x) 466 { 467 return (v_f32_t){x[0], x[1], x[2], x[3]}; 468 } 469 static inline v_f32_t 470 v_to_f32_u32 (v_u32_t x) 471 { 472 return (v_f32_t){x[0], x[1], x[2], x[3]}; 473 } 474 /* reinterpret as type1 from type2. */ 475 static inline v_u32_t 476 v_as_u32_f32 (v_f32_t x) 477 { 478 union { v_f32_t f; v_u32_t u; } r = {x}; 479 return r.u; 480 } 481 static inline v_f32_t 482 v_as_f32_u32 (v_u32_t x) 483 { 484 union { v_u32_t u; v_f32_t f; } r = {x}; 485 return r.f; 486 } 487 static inline v_s32_t 488 v_as_s32_u32 (v_u32_t x) 489 { 490 union { v_u32_t u; v_s32_t i; } r = {x}; 491 return r.i; 492 } 493 static inline v_u32_t 494 v_as_u32_s32 (v_s32_t x) 495 { 496 union { v_s32_t i; v_u32_t u; } r = {x}; 497 return r.u; 498 } 499 static inline v_f32_t 500 v_lookup_f32 (const f32_t *tab, v_u32_t idx) 501 { 502 return (v_f32_t){tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]}; 503 } 504 static inline v_u32_t 505 v_lookup_u32 (const u32_t *tab, v_u32_t idx) 506 { 507 return (v_u32_t){tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]}; 508 } 509 static inline v_f32_t 510 v_call_f32 (f32_t (*f) (f32_t), v_f32_t x, v_f32_t y, v_u32_t p) 511 { 512 return (v_f32_t){p[0] ? f (x[0]) : y[0], p[1] ? f (x[1]) : y[1], 513 p[2] ? f (x[2]) : y[2], p[3] ? f (x[3]) : y[3]}; 514 } 515 static inline v_f32_t 516 v_call2_f32 (f32_t (*f) (f32_t, f32_t), v_f32_t x1, v_f32_t x2, v_f32_t y, 517 v_u32_t p) 518 { 519 return ( 520 v_f32_t){p[0] ? f (x1[0], x2[0]) : y[0], p[1] ? f (x1[1], x2[1]) : y[1], 521 p[2] ? f (x1[2], x2[2]) : y[2], p[3] ? f (x1[3], x2[3]) : y[3]}; 522 } 523 524 static inline int 525 v_lanes64 (void) 526 { 527 return 2; 528 } 529 static inline v_f64_t 530 v_f64 (f64_t x) 531 { 532 return (v_f64_t){x, x}; 533 } 534 static inline v_u64_t 535 v_u64 (u64_t x) 536 { 537 return (v_u64_t){x, x}; 538 } 539 static inline v_s64_t 540 v_s64 (s64_t x) 541 { 542 return (v_s64_t){x, x}; 543 } 544 static inline f64_t 545 v_get_f64 (v_f64_t x, int i) 546 { 547 return x[i]; 548 } 549 static inline void 550 v_set_f64 (v_f64_t *x, int i, f64_t v) 551 { 552 (*x)[i] = v; 553 } 554 /* true if any elements of a v_cond result is non-zero. */ 555 static inline int 556 v_any_u64 (v_u64_t x) 557 { 558 /* assume elements in x are either 0 or -1u. */ 559 return vpaddd_u64 (x) != 0; 560 } 561 /* to wrap the result of relational operators. */ 562 static inline v_u64_t 563 v_cond_u64 (v_u64_t x) 564 { 565 return x; 566 } 567 static inline v_f64_t 568 v_abs_f64 (v_f64_t x) 569 { 570 return vabsq_f64 (x); 571 } 572 static inline v_f64_t 573 v_fma_f64 (v_f64_t x, v_f64_t y, v_f64_t z) 574 { 575 return vfmaq_f64 (z, x, y); 576 } 577 static inline v_f64_t 578 v_round_f64 (v_f64_t x) 579 { 580 return vrndaq_f64 (x); 581 } 582 static inline v_s64_t 583 v_round_s64 (v_f64_t x) 584 { 585 return vcvtaq_s64_f64 (x); 586 } 587 /* convert to type1 from type2. */ 588 static inline v_f64_t 589 v_to_f64_s64 (v_s64_t x) 590 { 591 return (v_f64_t){x[0], x[1]}; 592 } 593 static inline v_f64_t 594 v_to_f64_u64 (v_u64_t x) 595 { 596 return (v_f64_t){x[0], x[1]}; 597 } 598 /* reinterpret as type1 from type2. */ 599 static inline v_u64_t 600 v_as_u64_f64 (v_f64_t x) 601 { 602 union { v_f64_t f; v_u64_t u; } r = {x}; 603 return r.u; 604 } 605 static inline v_f64_t 606 v_as_f64_u64 (v_u64_t x) 607 { 608 union { v_u64_t u; v_f64_t f; } r = {x}; 609 return r.f; 610 } 611 static inline v_s64_t 612 v_as_s64_u64 (v_u64_t x) 613 { 614 union { v_u64_t u; v_s64_t i; } r = {x}; 615 return r.i; 616 } 617 static inline v_u64_t 618 v_as_u64_s64 (v_s64_t x) 619 { 620 union { v_s64_t i; v_u64_t u; } r = {x}; 621 return r.u; 622 } 623 static inline v_f64_t 624 v_lookup_f64 (const f64_t *tab, v_u64_t idx) 625 { 626 return (v_f64_t){tab[idx[0]], tab[idx[1]]}; 627 } 628 static inline v_u64_t 629 v_lookup_u64 (const u64_t *tab, v_u64_t idx) 630 { 631 return (v_u64_t){tab[idx[0]], tab[idx[1]]}; 632 } 633 static inline v_f64_t 634 v_call_f64 (f64_t (*f) (f64_t), v_f64_t x, v_f64_t y, v_u64_t p) 635 { 636 return (v_f64_t){p[0] ? f (x[0]) : y[0], p[1] ? f (x[1]) : y[1]}; 637 } 638 #endif 639 640 #endif 641 #endif 642