1 /*
2  * This file derives from SFMT 1.3.3
3  * (http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/SFMT/index.html), which was
4  * released under the terms of the following license:
5  *
6  *   Copyright (c) 2006,2007 Mutsuo Saito, Makoto Matsumoto and Hiroshima
7  *   University. All rights reserved.
8  *
9  *   Redistribution and use in source and binary forms, with or without
10  *   modification, are permitted provided that the following conditions are
11  *   met:
12  *
13  *       * Redistributions of source code must retain the above copyright
14  *         notice, this list of conditions and the following disclaimer.
15  *       * Redistributions in binary form must reproduce the above
16  *         copyright notice, this list of conditions and the following
17  *         disclaimer in the documentation and/or other materials provided
18  *         with the distribution.
19  *       * Neither the name of the Hiroshima University nor the names of
20  *         its contributors may be used to endorse or promote products
21  *         derived from this software without specific prior written
22  *         permission.
23  *
24  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
27  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
28  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
29  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
30  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
31  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
32  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
33  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
34  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35  */
36 /**
37  * @file  SFMT-sse2.h
38  * @brief SIMD oriented Fast Mersenne Twister(SFMT) for Intel SSE2
39  *
40  * @author Mutsuo Saito (Hiroshima University)
41  * @author Makoto Matsumoto (Hiroshima University)
42  *
43  * @note We assume LITTLE ENDIAN in this file
44  *
45  * Copyright (C) 2006, 2007 Mutsuo Saito, Makoto Matsumoto and Hiroshima
46  * University. All rights reserved.
47  *
48  * The new BSD License is applied to this software, see LICENSE.txt
49  */
50 
51 #ifndef SFMT_SSE2_H
52 #define SFMT_SSE2_H
53 
54 /**
55  * This function represents the recursion formula.
56  * @param a a 128-bit part of the interal state array
57  * @param b a 128-bit part of the interal state array
58  * @param c a 128-bit part of the interal state array
59  * @param d a 128-bit part of the interal state array
60  * @param mask 128-bit mask
61  * @return output
62  */
mm_recursion(__m128i * a,__m128i * b,__m128i c,__m128i d,__m128i mask)63 JEMALLOC_ALWAYS_INLINE __m128i mm_recursion(__m128i *a, __m128i *b,
64 				   __m128i c, __m128i d, __m128i mask) {
65     __m128i v, x, y, z;
66 
67     x = _mm_load_si128(a);
68     y = _mm_srli_epi32(*b, SR1);
69     z = _mm_srli_si128(c, SR2);
70     v = _mm_slli_epi32(d, SL1);
71     z = _mm_xor_si128(z, x);
72     z = _mm_xor_si128(z, v);
73     x = _mm_slli_si128(x, SL2);
74     y = _mm_and_si128(y, mask);
75     z = _mm_xor_si128(z, x);
76     z = _mm_xor_si128(z, y);
77     return z;
78 }
79 
80 /**
81  * This function fills the internal state array with pseudorandom
82  * integers.
83  */
gen_rand_all(sfmt_t * ctx)84 JEMALLOC_INLINE void gen_rand_all(sfmt_t *ctx) {
85     int i;
86     __m128i r, r1, r2, mask;
87     mask = _mm_set_epi32(MSK4, MSK3, MSK2, MSK1);
88 
89     r1 = _mm_load_si128(&ctx->sfmt[N - 2].si);
90     r2 = _mm_load_si128(&ctx->sfmt[N - 1].si);
91     for (i = 0; i < N - POS1; i++) {
92 	r = mm_recursion(&ctx->sfmt[i].si, &ctx->sfmt[i + POS1].si, r1, r2,
93 	  mask);
94 	_mm_store_si128(&ctx->sfmt[i].si, r);
95 	r1 = r2;
96 	r2 = r;
97     }
98     for (; i < N; i++) {
99 	r = mm_recursion(&ctx->sfmt[i].si, &ctx->sfmt[i + POS1 - N].si, r1, r2,
100 	  mask);
101 	_mm_store_si128(&ctx->sfmt[i].si, r);
102 	r1 = r2;
103 	r2 = r;
104     }
105 }
106 
107 /**
108  * This function fills the user-specified array with pseudorandom
109  * integers.
110  *
111  * @param array an 128-bit array to be filled by pseudorandom numbers.
112  * @param size number of 128-bit pesudorandom numbers to be generated.
113  */
gen_rand_array(sfmt_t * ctx,w128_t * array,int size)114 JEMALLOC_INLINE void gen_rand_array(sfmt_t *ctx, w128_t *array, int size) {
115     int i, j;
116     __m128i r, r1, r2, mask;
117     mask = _mm_set_epi32(MSK4, MSK3, MSK2, MSK1);
118 
119     r1 = _mm_load_si128(&ctx->sfmt[N - 2].si);
120     r2 = _mm_load_si128(&ctx->sfmt[N - 1].si);
121     for (i = 0; i < N - POS1; i++) {
122 	r = mm_recursion(&ctx->sfmt[i].si, &ctx->sfmt[i + POS1].si, r1, r2,
123 	  mask);
124 	_mm_store_si128(&array[i].si, r);
125 	r1 = r2;
126 	r2 = r;
127     }
128     for (; i < N; i++) {
129 	r = mm_recursion(&ctx->sfmt[i].si, &array[i + POS1 - N].si, r1, r2,
130 	  mask);
131 	_mm_store_si128(&array[i].si, r);
132 	r1 = r2;
133 	r2 = r;
134     }
135     /* main loop */
136     for (; i < size - N; i++) {
137 	r = mm_recursion(&array[i - N].si, &array[i + POS1 - N].si, r1, r2,
138 			 mask);
139 	_mm_store_si128(&array[i].si, r);
140 	r1 = r2;
141 	r2 = r;
142     }
143     for (j = 0; j < 2 * N - size; j++) {
144 	r = _mm_load_si128(&array[j + size - N].si);
145 	_mm_store_si128(&ctx->sfmt[j].si, r);
146     }
147     for (; i < size; i++) {
148 	r = mm_recursion(&array[i - N].si, &array[i + POS1 - N].si, r1, r2,
149 			 mask);
150 	_mm_store_si128(&array[i].si, r);
151 	_mm_store_si128(&ctx->sfmt[j++].si, r);
152 	r1 = r2;
153 	r2 = r;
154     }
155 }
156 
157 #endif
158