1 /****************************************************************************
2 * Copyright (C) 2014-2017 Intel Corporation.   All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 ****************************************************************************/
23 
24 #ifndef __SWR_OS_H__
25 #define __SWR_OS_H__
26 
27 #include <cstddef>
28 #include "core/knobs.h"
29 
30 #if (defined(FORCE_WINDOWS) || defined(_WIN32)) && !defined(FORCE_LINUX)
31 
32 #define SWR_API __cdecl
33 #define SWR_VISIBLE  __declspec(dllexport)
34 
35 #ifndef NOMINMAX
36 #define NOMINMAX
37 #include <windows.h>
38 #undef NOMINMAX
39 #else
40 #include <windows.h>
41 #endif
42 #include <intrin.h>
43 #include <cstdint>
44 
45 #if defined(MemoryFence)
46 // Windows.h defines MemoryFence as _mm_mfence, but this conflicts with llvm::sys::MemoryFence
47 #undef MemoryFence
48 #endif
49 
50 #define OSALIGN(RWORD, WIDTH) __declspec(align(WIDTH)) RWORD
51 
52 #if defined(_DEBUG)
53 // We compile Debug builds with inline function expansion enabled.  This allows
54 // functions compiled with __forceinline to be inlined even in Debug builds.
55 // The inline_depth(0) pragma below will disable inline function expansion for
56 // normal INLINE / inline functions, but not for __forceinline functions.
57 // Our SIMD function wrappers (see simdlib.hpp) use __forceinline even in
58 // Debug builds.
59 #define INLINE inline
60 #pragma inline_depth(0)
61 #else
62 #define INLINE __forceinline
63 #endif
64 #define DEBUGBREAK __debugbreak()
65 
66 #define PRAGMA_WARNING_PUSH_DISABLE(...) \
67     __pragma(warning(push));\
68     __pragma(warning(disable:__VA_ARGS__));
69 
70 #define PRAGMA_WARNING_POP() __pragma(warning(pop))
71 
AlignedMalloc(size_t _Size,size_t _Alignment)72 static inline void *AlignedMalloc(size_t _Size, size_t _Alignment)
73 {
74     return _aligned_malloc(_Size, _Alignment);
75 }
76 
AlignedFree(void * p)77 static inline void AlignedFree(void* p)
78 {
79     return _aligned_free(p);
80 }
81 
82 #if defined(_WIN64)
83 #define BitScanReverseSizeT BitScanReverse64
84 #define BitScanForwardSizeT BitScanForward64
85 #define _mm_popcount_sizeT _mm_popcnt_u64
86 #else
87 #define BitScanReverseSizeT BitScanReverse
88 #define BitScanForwardSizeT BitScanForward
89 #define _mm_popcount_sizeT _mm_popcnt_u32
90 #endif
91 
92 #elif defined(__APPLE__) || defined(FORCE_LINUX) || defined(__linux__) || defined(__gnu_linux__)
93 
94 #define SWR_API
95 #define SWR_VISIBLE __attribute__((visibility("default")))
96 
97 #include <stdlib.h>
98 #include <string.h>
99 #include <x86intrin.h>
100 #include <stdint.h>
101 #include <sys/types.h>
102 #include <unistd.h>
103 #include <sys/stat.h>
104 #include <stdio.h>
105 #include <limits.h>
106 
107 typedef void            VOID;
108 typedef void*           LPVOID;
109 typedef int             INT;
110 typedef unsigned int    UINT;
111 typedef void*           HANDLE;
112 typedef int             LONG;
113 typedef unsigned int    DWORD;
114 
115 #undef FALSE
116 #define FALSE 0
117 
118 #undef TRUE
119 #define TRUE 1
120 
121 #define MAX_PATH PATH_MAX
122 
123 #define OSALIGN(RWORD, WIDTH) RWORD __attribute__((aligned(WIDTH)))
124 #ifndef INLINE
125 #define INLINE __inline
126 #endif
127 #define DEBUGBREAK asm ("int $3")
128 
129 #if !defined(__CYGWIN__)
130 
131 #ifndef __cdecl
132 #define __cdecl
133 #endif
134 #ifndef __stdcall
135 #define __stdcall
136 #endif
137 
138 #if defined(__GNUC__) && !defined(__INTEL_COMPILER)
139     #define __declspec(x)           __declspec_##x
140     #define __declspec_align(y)     __attribute__((aligned(y)))
141     #define __declspec_deprecated   __attribute__((deprecated))
142     #define __declspec_dllexport
143     #define __declspec_dllimport
144     #define __declspec_noinline     __attribute__((__noinline__))
145     #define __declspec_nothrow      __attribute__((nothrow))
146     #define __declspec_novtable
147     #define __declspec_thread       __thread
148 #else
149     #define __declspec(X)
150 #endif
151 
152 #endif
153 
154 #define GCC_VERSION (__GNUC__ * 10000 \
155                      + __GNUC_MINOR__ * 100 \
156                      + __GNUC_PATCHLEVEL__)
157 
158 #if !defined(__clang__) && (__GNUC__) && (GCC_VERSION < 40500)
159 inline
__rdtsc()160 uint64_t __rdtsc()
161 {
162     long low, high;
163     asm volatile("rdtsc" : "=a"(low), "=d"(high));
164     return (low | ((uint64_t)high << 32));
165 }
166 #endif
167 
168 #if !defined( __clang__) && !defined(__INTEL_COMPILER)
169 // Intrinsic not defined in gcc
170 static INLINE
_mm256_storeu2_m128i(__m128i * hi,__m128i * lo,__m256i a)171 void _mm256_storeu2_m128i(__m128i *hi, __m128i *lo, __m256i a)
172 {
173     _mm_storeu_si128((__m128i*)lo, _mm256_castsi256_si128(a));
174     _mm_storeu_si128((__m128i*)hi, _mm256_extractf128_si256(a, 0x1));
175 }
176 
177 // gcc prior to 4.9 doesn't have _mm*_undefined_*
178 #if (__GNUC__) && (GCC_VERSION < 409000)
179 #define _mm_undefined_si128 _mm_setzero_si128
180 #define _mm256_undefined_ps _mm256_setzero_ps
181 #endif
182 #endif
183 
184 inline
_BitScanForward(unsigned long * Index,unsigned long Mask)185 unsigned char _BitScanForward(unsigned long *Index, unsigned long Mask)
186 {
187     *Index = __builtin_ctz(Mask);
188     return (Mask != 0);
189 }
190 
191 inline
_BitScanForward(unsigned int * Index,unsigned int Mask)192 unsigned char _BitScanForward(unsigned int *Index, unsigned int Mask)
193 {
194     *Index = __builtin_ctz(Mask);
195     return (Mask != 0);
196 }
197 
198 inline
_BitScanReverse(unsigned long * Index,unsigned long Mask)199 unsigned char _BitScanReverse(unsigned long *Index, unsigned long Mask)
200 {
201     *Index = __builtin_clz(Mask);
202     return (Mask != 0);
203 }
204 
205 inline
_BitScanReverse(unsigned int * Index,unsigned int Mask)206 unsigned char _BitScanReverse(unsigned int *Index, unsigned int Mask)
207 {
208     *Index = __builtin_clz(Mask);
209     return (Mask != 0);
210 }
211 
212 inline
AlignedMalloc(size_t size,size_t alignment)213 void *AlignedMalloc(size_t size, size_t alignment)
214 {
215     void *ret;
216     if (posix_memalign(&ret, alignment, size))
217     {
218         return NULL;
219     }
220     return ret;
221 }
222 
223 static inline
AlignedFree(void * p)224 void AlignedFree(void* p)
225 {
226     free(p);
227 }
228 
229 #define _countof(a) (sizeof(a)/sizeof(*(a)))
230 
231 #define sprintf_s sprintf
232 #define strcpy_s(dst,size,src) strncpy(dst,src,size)
233 #define GetCurrentProcessId getpid
234 
235 #define InterlockedCompareExchange(Dest, Exchange, Comparand) __sync_val_compare_and_swap(Dest, Comparand, Exchange)
236 #define InterlockedExchangeAdd(Addend, Value) __sync_fetch_and_add(Addend, Value)
237 #define InterlockedDecrement(Append) __sync_sub_and_fetch(Append, 1)
238 #define InterlockedDecrement64(Append) __sync_sub_and_fetch(Append, 1)
239 #define InterlockedIncrement(Append) __sync_add_and_fetch(Append, 1)
240 #define InterlockedAdd(Addend, Value) __sync_add_and_fetch(Addend, Value)
241 #define InterlockedAdd64(Addend, Value) __sync_add_and_fetch(Addend, Value)
242 #define _ReadWriteBarrier() asm volatile("" ::: "memory")
243 
244 #define PRAGMA_WARNING_PUSH_DISABLE(...)
245 #define PRAGMA_WARNING_POP()
246 
247 #define ZeroMemory(dst, size) memset(dst, 0, size)
248 #else
249 
250 #error Unsupported OS/system.
251 
252 #endif
253 
254 #define THREAD thread_local
255 
256 // Universal types
257 typedef uint8_t     KILOBYTE[1024];
258 typedef KILOBYTE    MEGABYTE[1024];
259 typedef MEGABYTE    GIGABYTE[1024];
260 
261 #define OSALIGNLINE(RWORD) OSALIGN(RWORD, 64)
262 #define OSALIGNSIMD(RWORD) OSALIGN(RWORD, KNOB_SIMD_BYTES)
263 #if ENABLE_AVX512_SIMD16
264 #define OSALIGNSIMD16(RWORD) OSALIGN(RWORD, KNOB_SIMD16_BYTES)
265 #endif
266 
267 #include "common/swr_assert.h"
268 
269 #ifdef __GNUC__
270 #define ATTR_UNUSED __attribute__((unused))
271 #else
272 #define ATTR_UNUSED
273 #endif
274 
275 #define SWR_FUNC(_retType, _funcName, /* args */...)   \
276    typedef _retType (SWR_API * PFN##_funcName)(__VA_ARGS__); \
277   _retType SWR_API _funcName(__VA_ARGS__);
278 
279 // Defined in os.cpp
280 void SWR_API SetCurrentThreadName(const char* pThreadName);
281 void SWR_API CreateDirectoryPath(const std::string& path);
282 
283 #endif//__SWR_OS_H__
284