1 /*******************************************************************************
2 * Copyright 2001-2018 Intel Corporation
3 * All Rights Reserved.
4 *
5 * If this  software was obtained  under the  Intel Simplified  Software License,
6 * the following terms apply:
7 *
8 * The source code,  information  and material  ("Material") contained  herein is
9 * owned by Intel Corporation or its  suppliers or licensors,  and  title to such
10 * Material remains with Intel  Corporation or its  suppliers or  licensors.  The
11 * Material  contains  proprietary  information  of  Intel or  its suppliers  and
12 * licensors.  The Material is protected by  worldwide copyright  laws and treaty
13 * provisions.  No part  of  the  Material   may  be  used,  copied,  reproduced,
14 * modified, published,  uploaded, posted, transmitted,  distributed or disclosed
15 * in any way without Intel's prior express written permission.  No license under
16 * any patent,  copyright or other  intellectual property rights  in the Material
17 * is granted to  or  conferred  upon  you,  either   expressly,  by implication,
18 * inducement,  estoppel  or  otherwise.  Any  license   under such  intellectual
19 * property rights must be express and approved by Intel in writing.
20 *
21 * Unless otherwise agreed by Intel in writing,  you may not remove or alter this
22 * notice or  any  other  notice   embedded  in  Materials  by  Intel  or Intel's
23 * suppliers or licensors in any way.
24 *
25 *
26 * If this  software  was obtained  under the  Apache License,  Version  2.0 (the
27 * "License"), the following terms apply:
28 *
29 * You may  not use this  file except  in compliance  with  the License.  You may
30 * obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
31 *
32 *
33 * Unless  required  by   applicable  law  or  agreed  to  in  writing,  software
34 * distributed under the License  is distributed  on an  "AS IS"  BASIS,  WITHOUT
35 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
36 *
37 * See the   License  for the   specific  language   governing   permissions  and
38 * limitations under the License.
39 *******************************************************************************/
40 
41 #if defined( _OPENMP )
42   #include <omp.h>
43 #endif
44 
45 #include "owndefs.h"
46 #include "ippcpdefs.h"
47 #include "ippcp.h"
48 #ifdef _PCS
49 #undef _PCS
50 #define _MY_PCS_DISABLED
51 #endif
52 #include "dispatcher.h"
53 #ifdef _MY_PCS_DISABLED
54 #define _PCS
55 #endif
56 #if defined( _IPP_DATA )
57 
58 static Ipp64u cpFeatures = 0;
59 static Ipp64u cpFeaturesMask = 0;
60 
61 static int cpGetFeatures( Ipp64u* pFeaturesMask );
62 extern void IPP_CDECL cpGetReg( int* buf, int valEAX, int valECX );
63 extern int IPP_CDECL cp_is_avx_extension();
64 extern int IPP_CDECL cp_is_avx512_extension();
65 IppStatus owncpSetCpuFeaturesAndIdx( Ipp64u cpuFeatures, int* index );
66 
67 IPPFUN( Ipp64u, ippcpGetEnabledCpuFeatures, ( void ))
68 {
69     return cpFeaturesMask;
70 }
71 
72 /*===================================================================*/
73 IPPFUN( IppStatus, ippcpGetCpuFeatures, ( Ipp64u* pFeaturesMask ))
74 {
IPP_BAD_PTR1_RET(pFeaturesMask)75   IPP_BAD_PTR1_RET( pFeaturesMask )
76   {
77     if( 0 != cpFeatures){
78         *pFeaturesMask = cpFeatures;// & cpFeaturesMask;
79     } else {
80         int ret = cpGetFeatures( pFeaturesMask );
81         if( !ret ) return ippStsNotSupportedCpu;
82     }
83     return ippStsNoErr;
84   }
85 }
86 
87 /*===================================================================*/
88 
cpGetFeature(Ipp64u Feature)89 int cpGetFeature( Ipp64u Feature )
90 {
91   if(( cpFeaturesMask & Feature ) == Feature ){
92     return 1;
93   } else {
94     return 0;
95   }
96 }
97 
k0_cpGetFeature(Ipp64u Feature)98 int k0_cpGetFeature( Ipp64u Feature ){
99   if(( cpFeaturesMask & Feature ) == Feature ) return 1;
100   else return 0; }
n0_cpGetFeature(Ipp64u Feature)101 int n0_cpGetFeature( Ipp64u Feature ){
102   if(( cpFeaturesMask & Feature ) == Feature ) return 1;
103   else return 0; }
l9_cpGetFeature(Ipp64u Feature)104 int l9_cpGetFeature( Ipp64u Feature ){
105   if(( cpFeaturesMask & Feature ) == Feature ) return 1;
106   else return 0; }
e9_cpGetFeature(Ipp64u Feature)107 int e9_cpGetFeature( Ipp64u Feature ){
108   if(( cpFeaturesMask & Feature ) == Feature ) return 1;
109   else return 0; }
y8_cpGetFeature(Ipp64u Feature)110 int y8_cpGetFeature( Ipp64u Feature ){
111   if(( cpFeaturesMask & Feature ) == Feature ) return 1;
112   else return 0; }
113 
h9_cpGetFeature(Ipp64u Feature)114 int h9_cpGetFeature( Ipp64u Feature ){
115   if(( cpFeaturesMask & Feature ) == Feature ) return 1;
116   else return 0; }
g9_cpGetFeature(Ipp64u Feature)117 int g9_cpGetFeature( Ipp64u Feature ){
118   if(( cpFeaturesMask & Feature ) == Feature ) return 1;
119   else return 0; }
p8_cpGetFeature(Ipp64u Feature)120 int p8_cpGetFeature( Ipp64u Feature ){
121   if(( cpFeaturesMask & Feature ) == Feature ) return 1;
122   else return 0; }
123 
124 /*===================================================================*/
125 #define BIT00 0x00000001
126 #define BIT01 0x00000002
127 #define BIT02 0x00000004
128 #define BIT03 0x00000008
129 #define BIT04 0x00000010
130 #define BIT05 0x00000020
131 #define BIT06 0x00000040
132 #define BIT07 0x00000080
133 #define BIT08 0x00000100
134 #define BIT09 0x00000200
135 #define BIT10 0x00000400
136 #define BIT11 0x00000800
137 #define BIT12 0x00001000
138 #define BIT13 0x00002000
139 #define BIT14 0x00004000
140 #define BIT15 0x00008000
141 #define BIT16 0x00010000
142 #define BIT17 0x00020000
143 #define BIT18 0x00040000
144 #define BIT19 0x00080000
145 #define BIT20 0x00100000
146 #define BIT21 0x00200000
147 #define BIT22 0x00400000
148 #define BIT23 0x00800000
149 #define BIT24 0x01000000
150 #define BIT25 0x02000000
151 #define BIT26 0x04000000
152 #define BIT27 0x08000000
153 #define BIT28 0x10000000
154 #define BIT29 0x20000000
155 #define BIT30 0x40000000
156 #define BIT31 0x80000000
157 
158 
cpGetFeatures(Ipp64u * pFeaturesMask)159 static int cpGetFeatures( Ipp64u* pFeaturesMask )
160 {
161     Ipp32u  buf[4];
162     Ipp32u  eax_, ebx_, ecx_, edx_, tmp;
163     Ipp64u  mask;
164     int flgFMA=0, flgINT=0, flgGPR=0;   // for avx2
165     Ipp32u idBaseMax, idExtdMax;
166 
167     cpGetReg((int*)buf, 0, 0);          //get max value for basic info.
168     idBaseMax = buf[0];
169     cpGetReg((int*)buf, 0x80000000, 0); //get max value for extended info.
170     idExtdMax = buf[0];
171 
172     cpGetReg( (int*)buf, 1, 0 );
173     eax_ = (Ipp32u)buf[0];
174     ecx_ = (Ipp32u)buf[2];
175     edx_ = (Ipp32u)buf[3];
176     mask = 0;
177     if( edx_ & BIT23 ) mask |= ippCPUID_MMX;          // edx[23] - MMX(TM) Technology
178     if( edx_ & BIT25 ) mask |= ippCPUID_SSE;          // edx[25] - Intel(R) Streaming SIMD Extensions (Intel(R) SSE)
179     if( edx_ & BIT26 ) mask |= ippCPUID_SSE2;         // edx[26] - Intel(R) Streaming SIMD Extensions 2 (Intel(R) SSE2)
180     if( ecx_ & BIT00 ) mask |= ippCPUID_SSE3;         // ecx[0]  - Intel(R) Streaming SIMD Extensions 3 (Intel(R) SSE3) (formerly codenamed Prescott)
181     if( ecx_ & BIT09 ) mask |= ippCPUID_SSSE3;        // ecx[9]  - Supplemental Streaming SIMD Extensions 3 (SSSE3) (formerly codenamed Merom)
182     if( ecx_ & BIT22 ) mask |= ippCPUID_MOVBE;        // ecx[22] - Intel(R) instruction MOVBE (Intel Atom(R) processor)
183     if( ecx_ & BIT19 ) mask |= ippCPUID_SSE41;        // ecx[19] - Intel(R) Streaming SIMD Extensions 4.1 (Intel(R) SSE4.1) (formerly codenamed Penryn)
184     if( ecx_ & BIT20 ) mask |= ippCPUID_SSE42;        // ecx[20] - Intel(R) Streaming SIMD Extensions 4.2 (Intel(R) SSE4.2) (formerly codenamed Nenalem)
185     if( ecx_ & BIT28 ) mask |= ippCPUID_AVX;          // ecx[28] - Intel(R) Advanced Vector Extensions (Intel(R) AVX) (formerly codenamed Sandy Bridge)
186     if(( ecx_ & 0x18000000 ) == 0x18000000 ){
187         tmp = (Ipp32u)cp_is_avx_extension();
188         if( tmp & BIT00 ) mask |= ippAVX_ENABLEDBYOS; // Intel(R) AVX is supported by OS
189     }
190     if( ecx_ & BIT25 ) mask |= ippCPUID_AES;          // ecx[25] - Intel(R) AES New Instructions
191     if( ecx_ & BIT01 ) mask |= ippCPUID_CLMUL;        // ecx[1]  - Intel(R) instruction PCLMULQDQ
192     if( ecx_ & BIT30 ) mask |= ippCPUID_RDRAND;       // ecx[30] - Intel(R) instruction RDRRAND
193     if( ecx_ & BIT29 ) mask |= ippCPUID_F16C;         // ecx[29] - Intel(R) instruction F16C
194          // Intel(R) AVX2 instructions extention: only if 3 features are enabled at once:
195          // FMA, Intel(R) AVX 256 int & GPR BMI (bit-manipulation);
196     if( ecx_ & BIT12 ) flgFMA = 1; else flgFMA = 0;   // ecx[12] - FMA 128 & 256 bit
197     if( idBaseMax >= 7 ){                             // get CPUID.eax = 7
198        cpGetReg( (int*)buf, 0x7, 0 );
199        ebx_ = (Ipp32u)buf[1];
200        ecx_ = (Ipp32u)buf[2];
201        edx_ = (Ipp32u)buf[3];
202        if( ebx_ & BIT05 ) flgINT = 1;
203        else flgINT = 0;                               //ebx[5], Intel(R) Advanced Vector Extensions 2 (Intel(R) AVX2) (int 256bits)
204            // ebx[3] - enabled ANDN, BEXTR, BLSI, BLSMK, BLSR, TZCNT
205            // ebx[8] - enabled BZHI, MULX, PDEP, PEXT, RORX, SARX, SHLX, SHRX
206        if(( ebx_ & BIT03 )&&( ebx_ & BIT08 )) flgGPR = 1;
207        else flgGPR = 0;                               // VEX-encoded GPR instructions (GPR BMI)
208            // Intel(R) architecture formerly codenamed Broadwell instructions extention
209        if( ebx_ & BIT19 ) mask |= ippCPUID_ADCOX;     // eax[0x7] -->> ebx:: Bit 19: Intel(R) instructions ADOX/ADCX
210        if( ebx_ & BIT18 ) mask |= ippCPUID_RDSEED;    // eax[0x7] -->> ebx:: Bit 18: Intel(R) instruction RDSEED
211        if( ebx_ & BIT29 ) mask |= ippCPUID_SHA;       // eax[0x7] -->> ebx:: Bit 29: Intel(R) Secure Hash Algorithm Extensions
212            // Intel(R) Advanced Vector Extensions 512 (Intel(R) AVX-512) extention
213        if(cp_is_avx512_extension()){
214            mask |= ippAVX512_ENABLEDBYOS;             // Intel(R) AVX-512 is supported by OS
215        }
216        if( ebx_ & BIT16 ) mask |= ippCPUID_AVX512F;   // ebx[16] - Intel(R) AVX-512 Foundation
217        if( ebx_ & BIT26 ) mask |= ippCPUID_AVX512PF;  // ebx[26] - Intel(R) AVX-512 Prefetch instructions
218        if( ebx_ & BIT27 ) mask |= ippCPUID_AVX512ER;  // ebx[27] - Intel(R) AVX-512 Exponential and Reciprocal instructions
219        if( ebx_ & BIT28 ) mask |= ippCPUID_AVX512CD;  // ebx[28] - Intel(R) AVX-512 Conflict Detection
220        if( ebx_ & BIT17 ) mask |= ippCPUID_AVX512DQ;  // ebx[17] - Intel(R) AVX-512 Dword & Quadword
221        if( ebx_ & BIT30 ) mask |= ippCPUID_AVX512BW;  // ebx[30] - Intel(R) AVX-512 Byte & Word
222        if( ebx_ & BIT31 ) mask |= ippCPUID_AVX512VL;  // ebx[31] - Intel(R) AVX-512 Vector Length extensions
223        if( ecx_ & BIT01 ) mask |= ippCPUID_AVX512VBMI; // ecx[01] - Intel(R) AVX-512 Vector Byte Manipulation Instructions
224        if( edx_ & BIT02 ) mask |= ippCPUID_AVX512_4VNNIW; // edx[02] - Intel(R) AVX-512 Vector instructions for deep learning enhanced word variable precision
225        if( edx_ & BIT03 ) mask |= ippCPUID_AVX512_4FMADDPS; // edx[03] - Intel(R) AVX-512 Vector instructions for deep learning floating-point single precision
226        // bitwise OR between ippCPUID_MPX & ippCPUID_AVX flags can be used to define that arch is GE than formerly codenamed Skylake
227        if( ebx_ & BIT14 ) mask |= ippCPUID_MPX;       // ebx[14] - Intel(R) Memory Protection Extensions (Intel(R) MPX)
228        if( ebx_ & BIT21 ) mask |= ippCPUID_AVX512IFMA;  // ebx[21] - Intel(R) AVX-512 IFMA PMADD52
229     }
230     mask = ( flgFMA && flgINT && flgGPR ) ? (mask | ippCPUID_AVX2) : mask; // to separate Intel(R) AVX2 flags here
231 
232     if( idExtdMax >= 0x80000001 ){ // get CPUID.eax=0x80000001
233        cpGetReg( (int*)buf, 0x80000001, 0 );
234        ecx_ = (Ipp32u)buf[2];
235            // Intel(R) architecture formerly codenamed Broadwell instructions extention
236        if( ecx_ & BIT08 ) mask |= ippCPUID_PREFETCHW; // eax[0x80000001] -->> ecx:: Bit 8: Intel(R) instruction PREFETCHW
237     }
238        // Intel(R) architecture formerly codenamed Knights Corner
239     if(((( eax_ << 20 ) >> 24 ) ^ 0xb1 ) == 0 ){
240         mask = mask | ippCPUID_KNC;
241     }
242     cpFeatures = mask;
243     cpFeaturesMask = mask; /* all CPU features are enabled by default */
244     *pFeaturesMask = cpFeatures;
245     return 1; /* if somebody need to check for cpuid support - do it at the top of function and return 0 if it's not supported */
246 }
247 
248 int ippcpJumpIndexForMergedLibs = -1;
249 static int cpthreads_omp_of_n_ipp = 1;
250 
251 IPPFUN( int, ippcpGetEnabledNumThreads,( void ))
252 {
253     return cpthreads_omp_of_n_ipp;
254 }
255 
256 
257 #define AVX3X_FEATURES ( ippCPUID_AVX512F|ippCPUID_AVX512CD|ippCPUID_AVX512VL|ippCPUID_AVX512BW|ippCPUID_AVX512DQ )
258 #define AVX3M_FEATURES ( ippCPUID_AVX512F|ippCPUID_AVX512CD|ippCPUID_AVX512PF|ippCPUID_AVX512ER )
259 // AVX3X_FEATURES means Intel(R) Xeon(R) processor
260 // AVX3M_FEATURES means Intel(R) Many Integrated Core Architecture
261 
262 
owncpFeaturesToIdx(Ipp64u * cpuFeatures,int * index)263 IppStatus owncpFeaturesToIdx(  Ipp64u* cpuFeatures, int* index )
264 {
265    IppStatus ownStatus = ippStsNoErr;
266    Ipp64u    mask = 0;
267 
268    *index = 0;
269 
270    if(( AVX3X_FEATURES  == ( *cpuFeatures & AVX3X_FEATURES  ))&&
271       ( ippAVX512_ENABLEDBYOS & cpFeatures )){                         /* Intel(R) architecture formerlySkylake ia32=S0, x64=K0 */
272          mask = AVX3X_MSK;
273          *index = LIB_AVX3X;
274    } else
275    if(( AVX3M_FEATURES  == ( *cpuFeatures & AVX3M_FEATURES  ))&&
276       ( ippAVX512_ENABLEDBYOS & cpFeatures )){                         /* Intel(R) architecture formerly codenamed Knights Landing ia32=i0, x64=N0 */
277        mask = AVX3M_MSK;
278        *index = LIB_AVX3M;
279    } else
280    if(( ippCPUID_AVX2  == ( *cpuFeatures & ippCPUID_AVX2  ))&&
281       ( ippAVX_ENABLEDBYOS & cpFeatures )){                            /* Intel(R) architecture formerly codenamed Haswell ia32=H9, x64=L9 */
282        mask = AVX2_MSK;
283        *index = LIB_AVX2;
284    } else
285    if(( ippCPUID_AVX   == ( *cpuFeatures & ippCPUID_AVX   ))&&
286       ( ippAVX_ENABLEDBYOS & cpFeatures )){                            /* Intel(R) architecture formerly codenamed Sandy Bridge ia32=G9, x64=E9 */
287        mask = AVX_MSK;
288        *index = LIB_AVX;
289    } else
290    if( ippCPUID_SSE42 == ( *cpuFeatures & ippCPUID_SSE42 )){           /* Intel(R) architecture formerly codenamed Nehalem or Intel(R) architecture formerly codenamed Westmer = Intel(R) architecture formerly codenamed Penryn + Intel(R) SSE4.2 + ?Intel(R) instruction PCLMULQDQ + ?(Intel(R) AES New Instructions) + ?(Intel(R) Secure Hash Algorithm Extensions) */
291        mask = SSE42_MSK;                                               /* or new Intel Atom(R) processor formerly codenamed Silvermont */
292        *index = LIB_SSE42;
293    } else
294    if( ippCPUID_SSE41 == ( *cpuFeatures & ippCPUID_SSE41 )){           /* Intel(R) architecture formerly codenamed Penryn ia32=P8, x64=Y8 */
295        mask = SSE41_MSK;
296        *index = LIB_SSE41;
297    } else
298    if( ippCPUID_MOVBE == ( *cpuFeatures & ippCPUID_MOVBE )) {          /* Intel Atom(R) processor formerly codenamed Silverthorne ia32=S8, x64=N8 */
299        mask = ATOM_MSK;
300        *index = LIB_ATOM;
301    } else
302    if( ippCPUID_SSSE3 == ( *cpuFeatures & ippCPUID_SSSE3 )) {          /* Intel(R) architecture formerly codenamed Merom ia32=V8, x64=U8 (letters etymology is unknown) */
303        mask = SSSE3_MSK;
304        *index = LIB_SSSE3;
305    } else
306    if( ippCPUID_SSE3  == ( *cpuFeatures & ippCPUID_SSE3  )) {          /* Intel(R) architecture formerly codenamed Prescott ia32=W7, x64=M7 */
307        mask = SSE3_MSK;
308        *index = LIB_SSE3;
309    } else
310    if( ippCPUID_SSE2  == ( *cpuFeatures & ippCPUID_SSE2  )) {          /* Intel(R) architecture formerly codenamed Willamette ia32=W7, x64=PX */
311        mask = SSE2_MSK;
312        *index = LIB_SSE2;
313    } else
314    if( ippCPUID_SSE   == ( *cpuFeatures & ippCPUID_SSE   )) {          /* Intel(R) Pentium(R) processor III ia32=PX only */
315        mask = SSE_MSK;
316        *index = LIB_SSE;
317 #if (defined( _WIN32E ) || defined( linux32e ) || defined( OSXEM64T )) && !(defined( _ARCH_LRB2 ))
318        ownStatus = ippStsNotSupportedCpu;                              /* the lowest CPU supported by Intel(R) Integrated Performance Primitives (Intel(R) IPP) must at least support Intel(R) SSE2 for x64 */
319 #endif
320    } else
321    if( ippCPUID_MMX   >= ( *cpuFeatures & ippCPUID_MMX   )) {          /* not supported, PX dispatched */
322        mask = MMX_MSK;
323        *index = LIB_MMX;
324        ownStatus = ippStsNotSupportedCpu; /* the lowest CPU supported by Intel(R) IPP must at least support Intel(R) SSE for ia32 or Intel(R) SSE2 for x64 */
325    }
326 #if defined ( _IPP_QUARK)
327      else {
328        mask = PX_MSK;
329        *index = LIB_PX;
330        ownStatus = ippStsNoErr; /* the lowest CPU supported by Intel(R) IPP must at least support Intel(R) SSE for ia32 or Intel(R) SSE2 for x64 */
331    }
332 #endif
333 
334     if(( mask != ( *cpuFeatures & mask ))&&( ownStatus == ippStsNoErr ))
335         ownStatus = ippStsFeaturesCombination; /* warning if combination of features is incomplete */
336    *cpuFeatures |= mask;
337    return ownStatus;
338 }
339 
340 #ifdef _PCS
341 
342 extern IppStatus (IPP_STDCALL *pcpSetCpuFeatures)( Ipp64u cpuFeatures );
343 extern IppStatus (IPP_STDCALL *pcpSetNumThreads)( int numThr );
344 extern IppStatus (IPP_STDCALL *pcpGetNumThreads)( int* pNumThr );
345 
346 IPPFUN( IppStatus, ippcpSetNumThreads, ( int numThr ))
347 {
348    IppStatus status = ippStsNoErr;
349 
350    if (pcpSetNumThreads != 0)
351    {
352       status = pcpSetNumThreads(numThr);
353       if (status == ippStsNoErr)
354       {
355           cpthreads_omp_of_n_ipp = numThr;
356       }
357    }
358    return status;
359 }
360 
361 IPPFUN( IppStatus, ippcpGetNumThreads, (int* pNumThr) )
362 {
363    IppStatus status = ippStsNoErr;
364 
365    IPP_BAD_PTR1_RET( pNumThr )
366 
367    if (pcpGetNumThreads != 0)
368    {
369       status = pcpGetNumThreads(pNumThr);
370    }
371    return status;
372 }
373 #else
374 
375 
376 IPPFUN( IppStatus, ippcpSetNumThreads, ( int numThr ))
377 {
378    IppStatus status = ippStsNoErr;
379 #if defined( _OPENMP )
380    IPP_BAD_SIZE_RET( numThr )
381    cpthreads_omp_of_n_ipp = numThr;
382    status = ippStsNoErr;
383 #else
384    UNREFERENCED_PARAMETER(numThr);
385    status = ippStsNoOperation;
386 #endif
387    return status;
388 }
389 
390 IPPFUN( IppStatus, ippcpGetNumThreads, (int* pNumThr) )
391 {
392    IppStatus status = ippStsNoErr;
393    IPP_BAD_PTR1_RET( pNumThr )
394 
395 #if defined( _OPENMP )
396    *pNumThr = cpthreads_omp_of_n_ipp;
397    status =  ippStsNoErr;
398 #else
399    *pNumThr = 1;
400    status = ippStsNoOperation;
401 #endif
402    return status;
403 }
404 
405 #endif /* #ifdef _PCS */
406 
407 #ifdef _IPP_DYNAMIC
408 
409 typedef IppStatus (IPP_STDCALL *DYN_RELOAD)( int );
410 static DYN_RELOAD IppDispatcher; /* ippCP only */
411 static int currentCpu = -1;      /* control for disabling the same DLL re-loading */
412 
owncpRegisterLib(DYN_RELOAD reload)413 void owncpRegisterLib( DYN_RELOAD reload )
414 {
415     pcpSetCpuFeatures = 0;
416     pcpSetNumThreads  = 0;
417     pcpGetNumThreads  = 0;
418 
419     IppDispatcher = reload;  /* function DynReload() that is defined in ippmain.gen - */
420     return;                                                               /* therefore in each domain there is own DynReload() function */
421 }
422 
owncpUnregisterLib(void)423 void owncpUnregisterLib( void )
424 {
425    IppDispatcher = 0;
426    currentCpu = -1;
427 
428    pcpSetCpuFeatures = 0;
429    pcpSetNumThreads  = 0;
430    pcpGetNumThreads  = 0;
431 
432    return;
433 }
434 
435 IPPFUN( IppStatus, ippcpSetCpuFeatures,( Ipp64u cpuFeatures ))
436 {
437    IppStatus status, ownStatus;
438    int       index = 0;
439 
440     ownStatus = owncpSetCpuFeaturesAndIdx( cpuFeatures, &index );
441     if(( IppDispatcher )&&( currentCpu != index )) {
442         status = IppDispatcher( index );
443         currentCpu = index;
444     } else
445         status = ippStsNoErr;
446 
447 #ifdef _PCS
448     if (pcpSetCpuFeatures != 0 && status >= ippStsNoErr)
449     {
450         /* Pass down features to Waterfall dll */
451         status = pcpSetCpuFeatures(cpuFeatures);
452     }
453     if (pcpSetNumThreads != 0 && status >= ippStsNoErr)
454     {
455         /* Pass down features to Waterfall dll */
456         status = pcpSetNumThreads(cpthreads_omp_of_n_ipp);
457     }
458 #endif
459 
460     if( status != ippStsNoErr && status != ippStsNoOperation)
461         return status;
462     else
463         return ownStatus;
464 }
465 
466 IPPFUN( IppStatus, ippcpInit,( void ))
467 {
468     int index = 0;
469     IppStatus status, statusf, statusi;
470     Ipp64u    cpuFeatures;
471 
472     statusf = ippcpGetCpuFeatures( &cpuFeatures );
473     statusi = owncpSetCpuFeaturesAndIdx( cpuFeatures, &index ); /* ownSetFeatures instead of ippSetFeatures because need unconditional initialization, */
474     if( IppDispatcher ) status = IppDispatcher( index ); /* call DynReload() function for each domain */
475     else status = ippStsNoErr;
476     currentCpu = index;
477     if( ippStsNoErr != statusf ) return statusf;
478     if( ippStsNoErr != statusi ) return statusi;
479     if( ippStsNoErr != status ) return status;
480     return ippStsNoErr;
481 }
482 
483 
484 #else /* _IPP_DYNAMIC */
485 
486 IPPFUN( IppStatus, ippcpInit,( void ))
487 {
488     Ipp64u     cpuFeatures;
489 
490 #if defined( _OPENMP )
491     ippcpSetNumThreads( IPP_MIN( omp_get_num_procs(), omp_get_max_threads()));
492 #endif
493     ippcpGetCpuFeatures( &cpuFeatures );
494     return ippcpSetCpuFeatures( cpuFeatures );
495 }
496 
497 
498 IPPFUN( IppStatus, ippcpSetCpuFeatures,( Ipp64u cpuFeatures ))
499 {
500    IppStatus ownStatus;
501    int       index = 0;
502 
503 #if defined( _OPENMP )
504     ippcpSetNumThreads( IPP_MIN( omp_get_num_procs(), omp_get_max_threads()));
505 #endif
506     ownStatus = owncpSetCpuFeaturesAndIdx( cpuFeatures, &index );
507     ippcpJumpIndexForMergedLibs = index;
508     cpFeaturesMask = cpuFeatures;
509     return ownStatus;
510 }
511 
512 #endif
513 
owncpSetCpuFeaturesAndIdx(Ipp64u cpuFeatures,int * index)514 IppStatus owncpSetCpuFeaturesAndIdx( Ipp64u cpuFeatures, int* index )
515 {
516     Ipp64u    tmp;
517     IppStatus tmpStatus;
518     *index = 0;
519 
520     if( ippCPUID_NOCHECK & cpuFeatures ){
521     // if NOCHECK is set - static variable cpFeatures is initialized unconditionally and real CPU features from CPUID are ignored;
522     // the one who uses this method of initialization must understand what and why it does and the possible unpredictable consequences.
523     // the only one known purpose for this approach - environments where CPUID instruction is disabled (for example Intel(R) Software Guard Extensions).
524         cpuFeatures &= ( IPP_MAX_64U ^ ippCPUID_NOCHECK );
525         cpFeatures = cpuFeatures;
526     } else
527 //    if( 0 == cpFeatures ) //do cpFeatures restore unconditionally - to protect from possible previous NOCHECK
528     {
529     // if library has not been initialized yet
530         cpGetFeatures( &tmp );
531     }
532     tmpStatus = owncpFeaturesToIdx( &cpuFeatures, index );
533     cpFeaturesMask = cpuFeatures;
534 
535     return tmpStatus;
536 }
537 
538 static struct {
539    int sts;
540    const char *msg;
541 } ippcpMsg[] = {
542 /* ippStatus */
543 /* -9999 */ ippStsCpuNotSupportedErr, "ippStsCpuNotSupportedErr: The target CPU is not supported",
544 /* -9702 */ MSG_NO_SHARED, "No shared libraries were found in the Waterfall procedure",
545 /* -9701 */ MSG_NO_DLL, "No DLLs were found in the Waterfall procedure",
546 /* -9700 */ MSG_LOAD_DLL_ERR, "Error at loading of %s library",
547 /* -1016 */ ippStsQuadraticNonResidueErr, "ippStsQuadraticNonResidueErr: SQRT operation on quadratic non-residue value",
548 /* -1015 */ ippStsPointAtInfinity, "ippStsPointAtInfinity: Point at infinity is detected",
549 /* -1014 */ ippStsOFBSizeErr, "ippStsOFBSizeErr: Incorrect value for crypto OFB block size",
550 /* -1013 */ ippStsIncompleteContextErr, "ippStsIncompleteContextErr: Crypto: set up of context is not complete",
551 /* -1012 */ ippStsCTRSizeErr, "ippStsCTRSizeErr: Incorrect value for crypto CTR block size",
552 /* -1011 */ ippStsEphemeralKeyErr, "ippStsEphemeralKeyErr: ECC: Invalid ephemeral key",
553 /* -1010 */ ippStsMessageErr, "ippStsMessageErr: ECC: Invalid message digest",
554 /* -1009 */ ippStsShareKeyErr, "ippStsShareKeyErr: ECC: Invalid share key",
555 /* -1008 */ ippStsIvalidPrivateKey, "ippStsIvalidPrivateKey ECC: Invalid private key",
556 /* -1007 */ ippStsOutOfECErr, "ippStsOutOfECErr: ECC: Point out of EC",
557 /* -1006 */ ippStsECCInvalidFlagErr, "ippStsECCInvalidFlagErr: ECC: Invalid Flag",
558 /* -1005 */ ippStsUnderRunErr, "ippStsUnderRunErr: Error in data under run",
559 /* -1004 */ ippStsPaddingErr, "ippStsPaddingErr: Detected padding error indicates the possible data corruption",
560 /* -1003 */ ippStsCFBSizeErr, "ippStsCFBSizeErr: Incorrect value for crypto CFB block size",
561 /* -1002 */ ippStsPaddingSchemeErr, "ippStsPaddingSchemeErr: Invalid padding scheme",
562 /* -1001 */ ippStsBadModulusErr, "ippStsBadModulusErr: Bad modulus caused a failure in module inversion",
563 /*  -216 */ ippStsUnknownStatusCodeErr, "ippStsUnknownStatusCodeErr: Unknown status code",
564 /*  -221 */ ippStsLoadDynErr, "ippStsLoadDynErr: Error when loading the dynamic library",
565 /*   -15 */ ippStsLengthErr, "ippStsLengthErr: Incorrect value for string length",
566 /*   -14 */ ippStsNotSupportedModeErr, "ippStsNotSupportedModeErr: The requested mode is currently not supported",
567 /*   -13 */ ippStsContextMatchErr, "ippStsContextMatchErr: Context parameter does not match the operation",
568 /*   -12 */ ippStsScaleRangeErr, "ippStsScaleRangeErr: Scale bounds are out of range",
569 /*   -11 */ ippStsOutOfRangeErr, "ippStsOutOfRangeErr: Argument is out of range, or point is outside the image",
570 /*   -10 */ ippStsDivByZeroErr, "ippStsDivByZeroErr: An attempt to divide by zero",
571 /*    -9 */ ippStsMemAllocErr, "ippStsMemAllocErr: Memory allocated for the operation is not enough",
572 /*    -8 */ ippStsNullPtrErr, "ippStsNullPtrErr: Null pointer error",
573 /*    -7 */ ippStsRangeErr, "ippStsRangeErr: Incorrect values for bounds: the lower bound is greater than the upper bound",
574 /*    -6 */ ippStsSizeErr, "ippStsSizeErr: Incorrect value for data size",
575 /*    -5 */ ippStsBadArgErr, "ippStsBadArgErr: Incorrect arg/param of the function",
576 /*    -4 */ ippStsNoMemErr, "ippStsNoMemErr: Not enough memory for the operation",
577 /*    -2 */ ippStsErr, "ippStsErr: Unknown/unspecified error, -2",
578 /*     0 */ ippStsNoErr, "ippStsNoErr: No errors",
579 /*     1 */ ippStsNoOperation, "ippStsNoOperation: No operation has been executed",
580 /*     2 */ ippStsDivByZero, "ippStsDivByZero: Zero value(s) for the divisor in the Div function",
581 /*    25 */ ippStsInsufficientEntropy, "ippStsInsufficientEntropy: Generation of the prime/key failed due to insufficient entropy in the random seed and stimulus bit string",
582 /*    36 */ ippStsNotSupportedCpu, "The CPU is not supported",
583 /*    36 */ ippStsFeaturesCombination, "Wrong combination of features",
584 };
585 
586 /* /////////////////////////////////////////////////////////////////////////////
587 //  Name:       ippcpGetStatusString
588 //  Purpose:    transformation of a code of a status Intel(R) IPP to string
589 //  Returns:
590 //  Parameters:
591 //    StsCode   Intel(R) IPP status code
592 //
593 //  Notes:      not necessary to release the returned string
594 */
595 IPPFUN( const char*, ippcpGetStatusString, ( IppStatus StsCode ) )
596 {
597    unsigned int i;
598    for( i=0; i<IPP_COUNT_OF( ippcpMsg ); i++ ) {
599       if( StsCode == ippcpMsg[i].sts ) {
600          return ippcpMsg[i].msg;
601       }
602    }
603    return ippcpGetStatusString( ippStsUnknownStatusCodeErr );
604 }
605 
606 extern Ipp64u IPP_CDECL cp_get_pentium_counter (void);
607 
608 /* /////////////////////////////////////////////////////////////////////////////
609 //  Name:       ippcpGetCpuClocks
610 //  Purpose:    time stamp counter (TSC) register reading
611 //  Returns:    TSC value
612 //
613 //  Note:      An hardware exception is possible if TSC reading is not supported by
614 //             the current chipset
615 */
616 IPPFUN( Ipp64u, ippcpGetCpuClocks, (void) )
617 {
618    return (Ipp64u)cp_get_pentium_counter();
619 }
620 
621 #endif /* _IPP_DATA */
622