1 /****************************************************************************
2  * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * @file Scatter.cpp
24  *
25  * @brief Shader support library implementation for scatter emulation
26  *
27  * Notes:
28  *
29  ******************************************************************************/
30 #include <stdarg.h>
31 #include "common/os.h"
32 #include "common/simdlib.hpp"
33 
ScatterPS_256(uint8_t * pBase,SIMD256::Integer vIndices,SIMD256::Float vSrc,uint8_t mask,uint32_t scale)34 extern "C" void ScatterPS_256(uint8_t* pBase, SIMD256::Integer vIndices, SIMD256::Float vSrc, uint8_t mask, uint32_t scale)
35 {
36     OSALIGN(float, 32) src[8];
37     OSALIGN(uint32_t, 32) indices[8];
38 
39     SIMD256::store_ps(src, vSrc);
40     SIMD256::store_si((SIMD256::Integer*)indices, vIndices);
41 
42     unsigned long index;
43     while (_BitScanForward(&index, mask))
44     {
45         mask &= ~(1 << index);
46 
47         *(float*)(pBase + indices[index] * scale) = src[index];
48     }
49 }
50