1 /**************************************************************************//** 2 * @file core_cmSimd.h 3 * @brief CMSIS Cortex-M SIMD Header File 4 * @version V4.00 5 * @date 22. August 2014 6 * 7 * @note 8 * 9 ******************************************************************************/ 10 /* Copyright (c) 2009 - 2014 ARM LIMITED 11 12 All rights reserved. 13 Redistribution and use in source and binary forms, with or without 14 modification, are permitted provided that the following conditions are met: 15 - Redistributions of source code must retain the above copyright 16 notice, this list of conditions and the following disclaimer. 17 - Redistributions in binary form must reproduce the above copyright 18 notice, this list of conditions and the following disclaimer in the 19 documentation and/or other materials provided with the distribution. 20 - Neither the name of ARM nor the names of its contributors may be used 21 to endorse or promote products derived from this software without 22 specific prior written permission. 23 * 24 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 25 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 ARE DISCLAIMED. IN NO EVENT SHALL COPYRIGHT HOLDERS AND CONTRIBUTORS BE 28 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 29 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 30 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 31 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 32 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 33 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 34 POSSIBILITY OF SUCH DAMAGE. 35 ---------------------------------------------------------------------------*/ 36 37 38 #if defined ( __ICCARM__ ) 39 #pragma system_include /* treat file as system include file for MISRA check */ 40 #endif 41 42 #ifndef __CORE_CMSIMD_H 43 #define __CORE_CMSIMD_H 44 45 #ifdef __cplusplus 46 extern "C" { 47 #endif 48 49 50 /******************************************************************************* 51 * Hardware Abstraction Layer 52 ******************************************************************************/ 53 54 55 /* ################### Compiler specific Intrinsics ########################### */ 56 /** \defgroup CMSIS_SIMD_intrinsics CMSIS SIMD Intrinsics 57 Access to dedicated SIMD instructions 58 @{ 59 */ 60 61 #if defined ( __CC_ARM ) /*------------------RealView Compiler -----------------*/ 62 /* ARM armcc specific functions */ 63 #define __SADD8 __sadd8 64 #define __QADD8 __qadd8 65 #define __SHADD8 __shadd8 66 #define __UADD8 __uadd8 67 #define __UQADD8 __uqadd8 68 #define __UHADD8 __uhadd8 69 #define __SSUB8 __ssub8 70 #define __QSUB8 __qsub8 71 #define __SHSUB8 __shsub8 72 #define __USUB8 __usub8 73 #define __UQSUB8 __uqsub8 74 #define __UHSUB8 __uhsub8 75 #define __SADD16 __sadd16 76 #define __QADD16 __qadd16 77 #define __SHADD16 __shadd16 78 #define __UADD16 __uadd16 79 #define __UQADD16 __uqadd16 80 #define __UHADD16 __uhadd16 81 #define __SSUB16 __ssub16 82 #define __QSUB16 __qsub16 83 #define __SHSUB16 __shsub16 84 #define __USUB16 __usub16 85 #define __UQSUB16 __uqsub16 86 #define __UHSUB16 __uhsub16 87 #define __SASX __sasx 88 #define __QASX __qasx 89 #define __SHASX __shasx 90 #define __UASX __uasx 91 #define __UQASX __uqasx 92 #define __UHASX __uhasx 93 #define __SSAX __ssax 94 #define __QSAX __qsax 95 #define __SHSAX __shsax 96 #define __USAX __usax 97 #define __UQSAX __uqsax 98 #define __UHSAX __uhsax 99 #define __USAD8 __usad8 100 #define __USADA8 __usada8 101 #define __SSAT16 __ssat16 102 #define __USAT16 __usat16 103 #define __UXTB16 __uxtb16 104 #define __UXTAB16 __uxtab16 105 #define __SXTB16 __sxtb16 106 #define __SXTAB16 __sxtab16 107 #define __SMUAD __smuad 108 #define __SMUADX __smuadx 109 #define __SMLAD __smlad 110 #define __SMLADX __smladx 111 #define __SMLALD __smlald 112 #define __SMLALDX __smlaldx 113 #define __SMUSD __smusd 114 #define __SMUSDX __smusdx 115 #define __SMLSD __smlsd 116 #define __SMLSDX __smlsdx 117 #define __SMLSLD __smlsld 118 #define __SMLSLDX __smlsldx 119 #define __SEL __sel 120 #define __QADD __qadd 121 #define __QSUB __qsub 122 123 #define __PKHBT(ARG1,ARG2,ARG3) ( ((((uint32_t)(ARG1)) ) & 0x0000FFFFUL) | \ 124 ((((uint32_t)(ARG2)) << (ARG3)) & 0xFFFF0000UL) ) 125 126 #define __PKHTB(ARG1,ARG2,ARG3) ( ((((uint32_t)(ARG1)) ) & 0xFFFF0000UL) | \ 127 ((((uint32_t)(ARG2)) >> (ARG3)) & 0x0000FFFFUL) ) 128 129 #define __SMMLA(ARG1,ARG2,ARG3) ( (int32_t)((((int64_t)(ARG1) * (ARG2)) + \ 130 ((int64_t)(ARG3) << 32) ) >> 32)) 131 132 133 #elif defined ( __GNUC__ ) /*------------------ GNU Compiler ---------------------*/ 134 /* GNU gcc specific functions */ 135 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SADD8(uint32_t op1, uint32_t op2) 136 { 137 uint32_t result; 138 139 __ASM volatile ("sadd8 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 140 return(result); 141 } 142 143 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __QADD8(uint32_t op1, uint32_t op2) 144 { 145 uint32_t result; 146 147 __ASM volatile ("qadd8 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 148 return(result); 149 } 150 151 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SHADD8(uint32_t op1, uint32_t op2) 152 { 153 uint32_t result; 154 155 __ASM volatile ("shadd8 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 156 return(result); 157 } 158 159 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UADD8(uint32_t op1, uint32_t op2) 160 { 161 uint32_t result; 162 163 __ASM volatile ("uadd8 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 164 return(result); 165 } 166 167 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UQADD8(uint32_t op1, uint32_t op2) 168 { 169 uint32_t result; 170 171 __ASM volatile ("uqadd8 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 172 return(result); 173 } 174 175 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UHADD8(uint32_t op1, uint32_t op2) 176 { 177 uint32_t result; 178 179 __ASM volatile ("uhadd8 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 180 return(result); 181 } 182 183 184 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SSUB8(uint32_t op1, uint32_t op2) 185 { 186 uint32_t result; 187 188 __ASM volatile ("ssub8 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 189 return(result); 190 } 191 192 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __QSUB8(uint32_t op1, uint32_t op2) 193 { 194 uint32_t result; 195 196 __ASM volatile ("qsub8 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 197 return(result); 198 } 199 200 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SHSUB8(uint32_t op1, uint32_t op2) 201 { 202 uint32_t result; 203 204 __ASM volatile ("shsub8 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 205 return(result); 206 } 207 208 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __USUB8(uint32_t op1, uint32_t op2) 209 { 210 uint32_t result; 211 212 __ASM volatile ("usub8 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 213 return(result); 214 } 215 216 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UQSUB8(uint32_t op1, uint32_t op2) 217 { 218 uint32_t result; 219 220 __ASM volatile ("uqsub8 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 221 return(result); 222 } 223 224 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UHSUB8(uint32_t op1, uint32_t op2) 225 { 226 uint32_t result; 227 228 __ASM volatile ("uhsub8 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 229 return(result); 230 } 231 232 233 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SADD16(uint32_t op1, uint32_t op2) 234 { 235 uint32_t result; 236 237 __ASM volatile ("sadd16 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 238 return(result); 239 } 240 241 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __QADD16(uint32_t op1, uint32_t op2) 242 { 243 uint32_t result; 244 245 __ASM volatile ("qadd16 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 246 return(result); 247 } 248 249 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SHADD16(uint32_t op1, uint32_t op2) 250 { 251 uint32_t result; 252 253 __ASM volatile ("shadd16 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 254 return(result); 255 } 256 257 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UADD16(uint32_t op1, uint32_t op2) 258 { 259 uint32_t result; 260 261 __ASM volatile ("uadd16 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 262 return(result); 263 } 264 265 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UQADD16(uint32_t op1, uint32_t op2) 266 { 267 uint32_t result; 268 269 __ASM volatile ("uqadd16 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 270 return(result); 271 } 272 273 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UHADD16(uint32_t op1, uint32_t op2) 274 { 275 uint32_t result; 276 277 __ASM volatile ("uhadd16 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 278 return(result); 279 } 280 281 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SSUB16(uint32_t op1, uint32_t op2) 282 { 283 uint32_t result; 284 285 __ASM volatile ("ssub16 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 286 return(result); 287 } 288 289 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __QSUB16(uint32_t op1, uint32_t op2) 290 { 291 uint32_t result; 292 293 __ASM volatile ("qsub16 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 294 return(result); 295 } 296 297 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SHSUB16(uint32_t op1, uint32_t op2) 298 { 299 uint32_t result; 300 301 __ASM volatile ("shsub16 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 302 return(result); 303 } 304 305 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __USUB16(uint32_t op1, uint32_t op2) 306 { 307 uint32_t result; 308 309 __ASM volatile ("usub16 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 310 return(result); 311 } 312 313 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UQSUB16(uint32_t op1, uint32_t op2) 314 { 315 uint32_t result; 316 317 __ASM volatile ("uqsub16 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 318 return(result); 319 } 320 321 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UHSUB16(uint32_t op1, uint32_t op2) 322 { 323 uint32_t result; 324 325 __ASM volatile ("uhsub16 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 326 return(result); 327 } 328 329 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SASX(uint32_t op1, uint32_t op2) 330 { 331 uint32_t result; 332 333 __ASM volatile ("sasx %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 334 return(result); 335 } 336 337 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __QASX(uint32_t op1, uint32_t op2) 338 { 339 uint32_t result; 340 341 __ASM volatile ("qasx %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 342 return(result); 343 } 344 345 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SHASX(uint32_t op1, uint32_t op2) 346 { 347 uint32_t result; 348 349 __ASM volatile ("shasx %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 350 return(result); 351 } 352 353 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UASX(uint32_t op1, uint32_t op2) 354 { 355 uint32_t result; 356 357 __ASM volatile ("uasx %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 358 return(result); 359 } 360 361 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UQASX(uint32_t op1, uint32_t op2) 362 { 363 uint32_t result; 364 365 __ASM volatile ("uqasx %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 366 return(result); 367 } 368 369 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UHASX(uint32_t op1, uint32_t op2) 370 { 371 uint32_t result; 372 373 __ASM volatile ("uhasx %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 374 return(result); 375 } 376 377 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SSAX(uint32_t op1, uint32_t op2) 378 { 379 uint32_t result; 380 381 __ASM volatile ("ssax %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 382 return(result); 383 } 384 385 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __QSAX(uint32_t op1, uint32_t op2) 386 { 387 uint32_t result; 388 389 __ASM volatile ("qsax %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 390 return(result); 391 } 392 393 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SHSAX(uint32_t op1, uint32_t op2) 394 { 395 uint32_t result; 396 397 __ASM volatile ("shsax %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 398 return(result); 399 } 400 401 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __USAX(uint32_t op1, uint32_t op2) 402 { 403 uint32_t result; 404 405 __ASM volatile ("usax %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 406 return(result); 407 } 408 409 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UQSAX(uint32_t op1, uint32_t op2) 410 { 411 uint32_t result; 412 413 __ASM volatile ("uqsax %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 414 return(result); 415 } 416 417 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UHSAX(uint32_t op1, uint32_t op2) 418 { 419 uint32_t result; 420 421 __ASM volatile ("uhsax %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 422 return(result); 423 } 424 425 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __USAD8(uint32_t op1, uint32_t op2) 426 { 427 uint32_t result; 428 429 __ASM volatile ("usad8 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 430 return(result); 431 } 432 433 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __USADA8(uint32_t op1, uint32_t op2, uint32_t op3) 434 { 435 uint32_t result; 436 437 __ASM volatile ("usada8 %0, %1, %2, %3" : "=r" (result) : "r" (op1), "r" (op2), "r" (op3) ); 438 return(result); 439 } 440 441 #define __SSAT16(ARG1,ARG2) \ 442 ({ \ 443 uint32_t __RES, __ARG1 = (ARG1); \ 444 __ASM ("ssat16 %0, %1, %2" : "=r" (__RES) : "I" (ARG2), "r" (__ARG1) ); \ 445 __RES; \ 446 }) 447 448 #define __USAT16(ARG1,ARG2) \ 449 ({ \ 450 uint32_t __RES, __ARG1 = (ARG1); \ 451 __ASM ("usat16 %0, %1, %2" : "=r" (__RES) : "I" (ARG2), "r" (__ARG1) ); \ 452 __RES; \ 453 }) 454 455 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UXTB16(uint32_t op1) 456 { 457 uint32_t result; 458 459 __ASM volatile ("uxtb16 %0, %1" : "=r" (result) : "r" (op1)); 460 return(result); 461 } 462 463 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __UXTAB16(uint32_t op1, uint32_t op2) 464 { 465 uint32_t result; 466 467 __ASM volatile ("uxtab16 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 468 return(result); 469 } 470 471 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SXTB16(uint32_t op1) 472 { 473 uint32_t result; 474 475 __ASM volatile ("sxtb16 %0, %1" : "=r" (result) : "r" (op1)); 476 return(result); 477 } 478 479 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SXTAB16(uint32_t op1, uint32_t op2) 480 { 481 uint32_t result; 482 483 __ASM volatile ("sxtab16 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 484 return(result); 485 } 486 487 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SMUAD (uint32_t op1, uint32_t op2) 488 { 489 uint32_t result; 490 491 __ASM volatile ("smuad %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 492 return(result); 493 } 494 495 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SMUADX (uint32_t op1, uint32_t op2) 496 { 497 uint32_t result; 498 499 __ASM volatile ("smuadx %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 500 return(result); 501 } 502 503 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SMLAD (uint32_t op1, uint32_t op2, uint32_t op3) 504 { 505 uint32_t result; 506 507 __ASM volatile ("smlad %0, %1, %2, %3" : "=r" (result) : "r" (op1), "r" (op2), "r" (op3) ); 508 return(result); 509 } 510 511 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SMLADX (uint32_t op1, uint32_t op2, uint32_t op3) 512 { 513 uint32_t result; 514 515 __ASM volatile ("smladx %0, %1, %2, %3" : "=r" (result) : "r" (op1), "r" (op2), "r" (op3) ); 516 return(result); 517 } 518 519 __attribute__( ( always_inline ) ) __STATIC_INLINE uint64_t __SMLALD (uint32_t op1, uint32_t op2, uint64_t acc) 520 { 521 union llreg_u{ 522 uint32_t w32[2]; 523 uint64_t w64; 524 } llr; 525 llr.w64 = acc; 526 527 #ifndef __ARMEB__ // Little endian 528 __ASM volatile ("smlald %0, %1, %2, %3" : "=r" (llr.w32[0]), "=r" (llr.w32[1]): "r" (op1), "r" (op2) , "0" (llr.w32[0]), "1" (llr.w32[1]) ); 529 #else // Big endian 530 __ASM volatile ("smlald %0, %1, %2, %3" : "=r" (llr.w32[1]), "=r" (llr.w32[0]): "r" (op1), "r" (op2) , "0" (llr.w32[1]), "1" (llr.w32[0]) ); 531 #endif 532 533 return(llr.w64); 534 } 535 536 __attribute__( ( always_inline ) ) __STATIC_INLINE uint64_t __SMLALDX (uint32_t op1, uint32_t op2, uint64_t acc) 537 { 538 union llreg_u{ 539 uint32_t w32[2]; 540 uint64_t w64; 541 } llr; 542 llr.w64 = acc; 543 544 #ifndef __ARMEB__ // Little endian 545 __ASM volatile ("smlaldx %0, %1, %2, %3" : "=r" (llr.w32[0]), "=r" (llr.w32[1]): "r" (op1), "r" (op2) , "0" (llr.w32[0]), "1" (llr.w32[1]) ); 546 #else // Big endian 547 __ASM volatile ("smlaldx %0, %1, %2, %3" : "=r" (llr.w32[1]), "=r" (llr.w32[0]): "r" (op1), "r" (op2) , "0" (llr.w32[1]), "1" (llr.w32[0]) ); 548 #endif 549 550 return(llr.w64); 551 } 552 553 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SMUSD (uint32_t op1, uint32_t op2) 554 { 555 uint32_t result; 556 557 __ASM volatile ("smusd %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 558 return(result); 559 } 560 561 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SMUSDX (uint32_t op1, uint32_t op2) 562 { 563 uint32_t result; 564 565 __ASM volatile ("smusdx %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 566 return(result); 567 } 568 569 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SMLSD (uint32_t op1, uint32_t op2, uint32_t op3) 570 { 571 uint32_t result; 572 573 __ASM volatile ("smlsd %0, %1, %2, %3" : "=r" (result) : "r" (op1), "r" (op2), "r" (op3) ); 574 return(result); 575 } 576 577 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SMLSDX (uint32_t op1, uint32_t op2, uint32_t op3) 578 { 579 uint32_t result; 580 581 __ASM volatile ("smlsdx %0, %1, %2, %3" : "=r" (result) : "r" (op1), "r" (op2), "r" (op3) ); 582 return(result); 583 } 584 585 __attribute__( ( always_inline ) ) __STATIC_INLINE uint64_t __SMLSLD (uint32_t op1, uint32_t op2, uint64_t acc) 586 { 587 union llreg_u{ 588 uint32_t w32[2]; 589 uint64_t w64; 590 } llr; 591 llr.w64 = acc; 592 593 #ifndef __ARMEB__ // Little endian 594 __ASM volatile ("smlsld %0, %1, %2, %3" : "=r" (llr.w32[0]), "=r" (llr.w32[1]): "r" (op1), "r" (op2) , "0" (llr.w32[0]), "1" (llr.w32[1]) ); 595 #else // Big endian 596 __ASM volatile ("smlsld %0, %1, %2, %3" : "=r" (llr.w32[1]), "=r" (llr.w32[0]): "r" (op1), "r" (op2) , "0" (llr.w32[1]), "1" (llr.w32[0]) ); 597 #endif 598 599 return(llr.w64); 600 } 601 602 __attribute__( ( always_inline ) ) __STATIC_INLINE uint64_t __SMLSLDX (uint32_t op1, uint32_t op2, uint64_t acc) 603 { 604 union llreg_u{ 605 uint32_t w32[2]; 606 uint64_t w64; 607 } llr; 608 llr.w64 = acc; 609 610 #ifndef __ARMEB__ // Little endian 611 __ASM volatile ("smlsldx %0, %1, %2, %3" : "=r" (llr.w32[0]), "=r" (llr.w32[1]): "r" (op1), "r" (op2) , "0" (llr.w32[0]), "1" (llr.w32[1]) ); 612 #else // Big endian 613 __ASM volatile ("smlsldx %0, %1, %2, %3" : "=r" (llr.w32[1]), "=r" (llr.w32[0]): "r" (op1), "r" (op2) , "0" (llr.w32[1]), "1" (llr.w32[0]) ); 614 #endif 615 616 return(llr.w64); 617 } 618 619 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SEL (uint32_t op1, uint32_t op2) 620 { 621 uint32_t result; 622 623 __ASM volatile ("sel %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 624 return(result); 625 } 626 627 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __QADD(uint32_t op1, uint32_t op2) 628 { 629 uint32_t result; 630 631 __ASM volatile ("qadd %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 632 return(result); 633 } 634 635 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __QSUB(uint32_t op1, uint32_t op2) 636 { 637 uint32_t result; 638 639 __ASM volatile ("qsub %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); 640 return(result); 641 } 642 643 #define __PKHBT(ARG1,ARG2,ARG3) \ 644 ({ \ 645 uint32_t __RES, __ARG1 = (ARG1), __ARG2 = (ARG2); \ 646 __ASM ("pkhbt %0, %1, %2, lsl %3" : "=r" (__RES) : "r" (__ARG1), "r" (__ARG2), "I" (ARG3) ); \ 647 __RES; \ 648 }) 649 650 #define __PKHTB(ARG1,ARG2,ARG3) \ 651 ({ \ 652 uint32_t __RES, __ARG1 = (ARG1), __ARG2 = (ARG2); \ 653 if (ARG3 == 0) \ 654 __ASM ("pkhtb %0, %1, %2" : "=r" (__RES) : "r" (__ARG1), "r" (__ARG2) ); \ 655 else \ 656 __ASM ("pkhtb %0, %1, %2, asr %3" : "=r" (__RES) : "r" (__ARG1), "r" (__ARG2), "I" (ARG3) ); \ 657 __RES; \ 658 }) 659 660 __attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t __SMMLA (int32_t op1, int32_t op2, int32_t op3) 661 { 662 int32_t result; 663 664 __ASM volatile ("smmla %0, %1, %2, %3" : "=r" (result): "r" (op1), "r" (op2), "r" (op3) ); 665 return(result); 666 } 667 668 669 #elif defined ( __ICCARM__ ) /*------------------ ICC Compiler -------------------*/ 670 /* IAR iccarm specific functions */ 671 #include <cmsis_iar.h> 672 673 674 #elif defined ( __TMS470__ ) /*---------------- TI CCS Compiler ------------------*/ 675 /* TI CCS specific functions */ 676 #include <cmsis_ccs.h> 677 678 679 #elif defined ( __TASKING__ ) /*------------------ TASKING Compiler --------------*/ 680 /* TASKING carm specific functions */ 681 /* not yet supported */ 682 683 684 #elif defined ( __CSMC__ ) /*------------------ COSMIC Compiler -------------------*/ 685 /* Cosmic specific functions */ 686 #include <cmsis_csm.h> 687 688 #endif 689 690 /*@} end of group CMSIS_SIMD_intrinsics */ 691 692 693 #ifdef __cplusplus 694 } 695 #endif 696 697 #endif /* __CORE_CMSIMD_H */ 698