1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #ifndef TENSORFLOW_CORE_LIB_RANDOM_RANDOM_DISTRIBUTIONS_H_
17 #define TENSORFLOW_CORE_LIB_RANDOM_RANDOM_DISTRIBUTIONS_H_
18
19 #define _USE_MATH_DEFINES
20 #include <math.h>
21 #include <cmath>
22 #undef _USE_MATH_DEFINES
23
24 #include <string.h>
25 #include <algorithm>
26 #include <type_traits>
27
28 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
29 #include "tensorflow/core/lib/bfloat16/bfloat16.h"
30 #include "tensorflow/core/lib/random/philox_random.h"
31
32 namespace tensorflow {
33 namespace random {
34
35 // Helper function to convert a 16-bit integer to a half between [0..1).
36 PHILOX_DEVICE_INLINE Eigen::half Uint16ToHalf(uint16 x);
37 // Helper function to convert a 16-bit integer to a bfloat16 between [0..1).
38 PHILOX_DEVICE_INLINE bfloat16 Uint16ToGfloat16(uint16 x);
39 // Helper function to convert a 32-bit integer to a float between [0..1).
40 PHILOX_DEVICE_INLINE float Uint32ToFloat(uint32 x);
41 // Helper function to convert two 32-bit integers to a double between [0..1).
42 PHILOX_DEVICE_INLINE double Uint64ToDouble(uint32 x0, uint32 x1);
43
44 // Computes a + b. Requires that the result is representable in the destination
45 // type and that b is not maximal (i.e. b + 1 is not 0). Notably, the addend b
46 // need *not* be representable in that type. (The condition on b excludes the
47 // extremal case INT_MIN + UINT_MAX = INT_MAX, which this function cannot
48 // compute.)
49 template <typename Int>
SignedAdd(Int a,typename std::make_unsigned<Int>::type b)50 PHILOX_DEVICE_INLINE Int SignedAdd(Int a,
51 typename std::make_unsigned<Int>::type b) {
52 // Implementation note: both b_div_2 and b - b_div_2 are positive and
53 // representatble as Int.
54 auto b_div_2 = b >> 1;
55 return a + static_cast<Int>(b_div_2) + static_cast<Int>(b - b_div_2);
56 }
57
58 // A class that generates uniform distribution random numbers from the
59 // underlying random integer generator.
60 // Arguments:
61 // Generator: a generator type that returns a number of uint32 upon each
62 // invocation. It needs to define kResultElementCount for the
63 // sample count for each invocation, and ResultType for the
64 // actual returned sample type.
65 // RealType: the data type of the real numbers that will be returned by the
66 // distribution. This could be either float or double for now.
67 // This class is meant to be implemented through specialization. The default
68 // is not defined by design.
69 template <class Generator, typename RealType>
70 class UniformDistribution;
71
72 template <class Generator>
73 class UniformDistribution<Generator, Eigen::half> {
74 public:
75 // The number of elements that will be returned.
76 static const int kResultElementCount = Generator::kResultElementCount;
77 // Cost of generation of a single element (in cycles).
78 static const int kElementCost = 3;
79 // Indicate that this distribution may take variable number of samples
80 // during the runtime.
81 static const bool kVariableSamplesPerOutput = false;
82 typedef Array<Eigen::half, kResultElementCount> ResultType;
83 typedef Eigen::half ResultElementType;
84
85 PHILOX_DEVICE_INLINE
operator()86 ResultType operator()(Generator* gen) {
87 typename Generator::ResultType sample = (*gen)();
88 ResultType result;
89 for (int i = 0; i < kResultElementCount; ++i) {
90 result[i] = Uint16ToHalf(sample[i]); // Truncate the upper 16 bits.
91 }
92 return result;
93 }
94 };
95
96 template <class Generator>
97 class UniformDistribution<Generator, bfloat16> {
98 public:
99 // The number of elements that will be returned.
100 static const int kResultElementCount = Generator::kResultElementCount;
101 // Cost of generation of a single element (in cycles).
102 static const int kElementCost = 3;
103 // Indicate that this distribution may take variable number of samples
104 // during the runtime.
105 static const bool kVariableSamplesPerOutput = false;
106 typedef Array<bfloat16, kResultElementCount> ResultType;
107 typedef bfloat16 ResultElementType;
108
109 PHILOX_DEVICE_INLINE
operator()110 ResultType operator()(Generator* gen) {
111 typename Generator::ResultType sample = (*gen)();
112 ResultType result;
113 for (int i = 0; i < kResultElementCount; ++i) {
114 result[i] = Uint16ToGfloat16(sample[i]);
115 }
116 return result;
117 }
118 };
119
120 template <class Generator>
121 class UniformDistribution<Generator, float> {
122 public:
123 // The number of elements that will be returned.
124 static const int kResultElementCount = Generator::kResultElementCount;
125 // Cost of generation of a single element (in cycles).
126 static const int kElementCost = 3;
127 // Indicate that this distribution may take variable number of samples
128 // during the runtime.
129 static const bool kVariableSamplesPerOutput = false;
130 typedef Array<float, kResultElementCount> ResultType;
131 typedef float ResultElementType;
132
133 PHILOX_DEVICE_INLINE
operator()134 ResultType operator()(Generator* gen) {
135 typename Generator::ResultType sample = (*gen)();
136 ResultType result;
137 for (int i = 0; i < kResultElementCount; ++i) {
138 result[i] = Uint32ToFloat(sample[i]);
139 }
140 return result;
141 }
142 };
143
144 template <class Generator>
145 class UniformDistribution<Generator, double> {
146 public:
147 // The number of elements that will be returned.
148 static const int kResultElementCount = Generator::kResultElementCount / 2;
149 // Cost of generation of a single element (in cycles).
150 static const int kElementCost = 3;
151 // Indicate that this distribution may take variable number of samples
152 // during the runtime.
153 static const bool kVariableSamplesPerOutput = false;
154 typedef Array<double, kResultElementCount> ResultType;
155 typedef double ResultElementType;
156
157 PHILOX_DEVICE_INLINE
operator()158 ResultType operator()(Generator* gen) {
159 typename Generator::ResultType sample = (*gen)();
160 ResultType result;
161 for (int i = 0; i < kResultElementCount; ++i) {
162 result[i] = Uint64ToDouble(sample[2 * i], sample[2 * i + 1]);
163 }
164 return result;
165 }
166 };
167
168 template <class Generator>
169 class UniformDistribution<Generator, int32> {
170 public:
171 // The number of elements that will be returned.
172 static const int kResultElementCount = Generator::kResultElementCount;
173 // Cost of generation of a single element (in cycles).
174 static const int kElementCost = 3;
175 // Indicate that this distribution may take variable number of samples
176 // during the runtime.
177 static const bool kVariableSamplesPerOutput = false;
178 typedef Array<int32, kResultElementCount> ResultType;
179 typedef int32 ResultElementType;
180
181 // Must have lo < hi
UniformDistribution(int32 lo,int32 hi)182 UniformDistribution(int32 lo, int32 hi)
183 : lo_(lo), range_(static_cast<uint32>(hi) - static_cast<uint32>(lo)) {}
184
185 PHILOX_DEVICE_INLINE
operator()186 ResultType operator()(Generator* gen) {
187 typename Generator::ResultType sample = (*gen)();
188 ResultType result;
189 for (int i = 0; i < kResultElementCount; ++i) {
190 result[i] = SignedAdd(lo_, sample[i] % range_);
191 }
192 return result;
193 }
194
195 private:
196 // Note that lo_ is intentionally signed while range_ is intentionally
197 // unsigned. This is because hi - lo can overflow signed integers if
198 // lo < 0 < hi, but always fits in unsigned.
199 int32 lo_;
200 uint32 range_;
201 };
202
203 template <class Generator>
204 class UniformDistribution<Generator, int64> {
205 public:
206 // The number of elements that will be returned.
207 static const int kResultElementCount = Generator::kResultElementCount / 2;
208 // Cost of generation of a single element (in cycles).
209 static const int kElementCost = 3;
210 // Indicate that this distribution may take variable number of samples
211 // during the runtime.
212 static const bool kVariableSamplesPerOutput = false;
213 typedef Array<int64, kResultElementCount> ResultType;
214 typedef int64 ResultElementType;
215
216 // Must have lo < hi
UniformDistribution(int64 lo,int64 hi)217 UniformDistribution(int64 lo, int64 hi)
218 : lo_(lo), range_(static_cast<uint64>(hi) - static_cast<uint64>(lo)) {}
219
220 PHILOX_DEVICE_INLINE
operator()221 ResultType operator()(Generator* gen) {
222 typename Generator::ResultType sample = (*gen)();
223 ResultType result;
224 for (int i = 0; i < kResultElementCount; ++i) {
225 auto bits = sample[2 * i] | static_cast<uint64>(sample[2 * i + 1]) << 32;
226 result[i] = SignedAdd(lo_, bits % range_);
227 }
228 return result;
229 }
230
231 private:
232 // Note that lo_ is intentionally signed while range_ is intentionally
233 // unsigned. This is because hi - lo can overflow signed integers if
234 // lo < 0 < hi, but always fits in unsigned.
235 int64 lo_;
236 uint64 range_;
237 };
238
239 // A class that adapts the underlying native multiple samples to return a single
240 // sample at a time.
241 template <class Generator>
242 class SingleSampleAdapter {
243 public:
244 // The number of elements that will be returned.
245 static const int kResultElementCount = 1;
246 // The number of elements that will be returned by the underlying generator.
247 static const int kNativeElementCount = Generator::kResultElementCount;
248 typedef typename Generator::ResultElementType ResultType;
249 typedef typename Generator::ResultElementType ResultElementType;
250
251 PHILOX_DEVICE_INLINE
SingleSampleAdapter(Generator * gen)252 explicit SingleSampleAdapter(Generator* gen)
253 : generator_(gen), used_result_index_(Generator::kResultElementCount) {}
254
255 PHILOX_DEVICE_INLINE
operator()256 ResultType operator()() {
257 if (used_result_index_ == Generator::kResultElementCount) {
258 unused_results_ = (*generator_)();
259 used_result_index_ = 0;
260 }
261
262 return unused_results_[used_result_index_++];
263 }
264
265 PHILOX_DEVICE_INLINE
Skip(uint64 num_skips)266 void Skip(uint64 num_skips) {
267 if (!num_skips) {
268 return;
269 }
270 int num_unused_results = kNativeElementCount - used_result_index_;
271 if (num_skips <= num_unused_results) {
272 used_result_index_ += num_skips;
273 return;
274 }
275 num_skips -= num_unused_results;
276 used_result_index_ = kNativeElementCount;
277 SkipFromGenerator(num_skips / kNativeElementCount);
278 num_skips = num_skips % kNativeElementCount;
279 if (num_skips) {
280 unused_results_ = (*generator_)();
281 used_result_index_ = num_skips;
282 }
283 }
284
285 private:
286 // This implementation iteratively skips over `num_skips` samples
287 // from `generator_`. There is an O(1) implementation for PhiloxRandom
288 // in random_distributions.cc.
289 PHILOX_DEVICE_INLINE
SkipFromGenerator(uint64 num_skips)290 void SkipFromGenerator(uint64 num_skips) {
291 while (num_skips--) {
292 (*generator_)();
293 }
294 }
295
296 Generator* generator_;
297 typename Generator::ResultType unused_results_;
298 int used_result_index_;
299 };
300
301 // A class that generates unit normal distribution random numbers from the
302 // underlying random integer generator.
303 // Arguments:
304 // Generator: a generator type that returns a number of uint32 upon each
305 // each invocation. It needs to define kResultElementCount for the
306 // sample count for each invocation, and ResultType for actual
307 // returned sample type.
308 // RealType: the data type of the real numbers that will be returned by the
309 // distribution. This could be either float or double for now.
310 // This class is meant to be implemented through specialization. The default
311 // is not defined by design.
312 template <class Generator, typename RealType>
313 class NormalDistribution;
314
315 PHILOX_DEVICE_INLINE
316 void BoxMullerFloat(uint32 x0, uint32 x1, float* f0, float* f1);
317
318 PHILOX_DEVICE_INLINE
319 void BoxMullerDouble(uint32 x0, uint32 x1, uint32 x2, uint32 x3, double* d0,
320 double* d1);
321
322 // Exactly like the float version, except that we convert to half afterwards;
323 // since we don't have half-precision sin/cos even on GPUs, there's nothing to
324 // gain from working in half internally.
325 template <class Generator>
326 class NormalDistribution<Generator, Eigen::half> {
327 public:
328 // The number of elements that will be returned.
329 static const int kResultElementCount = Generator::kResultElementCount;
330 // Cost of generation of a single element (in cycles).
331 static const int kElementCost = 70;
332 // Indicate that this distribution may take variable number of samples
333 // during the runtime.
334 static const bool kVariableSamplesPerOutput = false;
335 typedef Array<Eigen::half, kResultElementCount> ResultType;
336 typedef Eigen::half ResultElementType;
337
338 PHILOX_DEVICE_INLINE
operator()339 ResultType operator()(Generator* gen) {
340 typename Generator::ResultType sample = (*gen)();
341 ResultType result;
342 for (int i = 0; i < kResultElementCount; i += 2) {
343 float f[2];
344 BoxMullerFloat(sample[i], sample[i + 1], &f[0], &f[1]);
345 result[i] = Eigen::half(f[0]);
346 result[i + 1] = Eigen::half(f[1]);
347 }
348 return result;
349 }
350 };
351
352 template <class Generator>
353 class NormalDistribution<Generator, bfloat16> {
354 public:
355 // The number of elements that will be returned.
356 static const int kResultElementCount = Generator::kResultElementCount;
357 // Cost of generation of a single element (in cycles).
358 static const int kElementCost = 70;
359 // Indicate that this distribution may take variable number of samples
360 // during the runtime.
361 static const bool kVariableSamplesPerOutput = false;
362 typedef Array<bfloat16, kResultElementCount> ResultType;
363 typedef bfloat16 ResultElementType;
364
365 PHILOX_DEVICE_INLINE
operator()366 ResultType operator()(Generator* gen) {
367 typename Generator::ResultType sample = (*gen)();
368 ResultType result;
369 static_assert(kResultElementCount % 2 == 0,
370 "kResultElementCount should be an even number");
371 for (int i = 0; i < kResultElementCount; i += 2) {
372 float f[2];
373 // Box-Muller transform requires processing 2 elements at a time.
374 BoxMullerFloat(sample[i], sample[i + 1], &f[0], &f[1]);
375 result[i] = bfloat16(f[0]);
376 result[i + 1] = bfloat16(f[1]);
377 }
378 return result;
379 }
380 };
381
382 template <class Generator>
383 class NormalDistribution<Generator, float> {
384 public:
385 // The number of elements that will be returned.
386 static const int kResultElementCount = Generator::kResultElementCount;
387 // Cost of generation of a single element (in cycles).
388 static const int kElementCost = 70;
389 // Indicate that this distribution may take variable number of samples
390 // during the runtime.
391 static const bool kVariableSamplesPerOutput = false;
392 typedef Array<float, kResultElementCount> ResultType;
393 typedef float ResultElementType;
394
395 PHILOX_DEVICE_INLINE
operator()396 ResultType operator()(Generator* gen) {
397 typename Generator::ResultType sample = (*gen)();
398 ResultType result;
399 for (int i = 0; i < kResultElementCount; i += 2) {
400 BoxMullerFloat(sample[i], sample[i + 1], &result[i], &result[i + 1]);
401 }
402 return result;
403 }
404 };
405
406 template <class Generator>
407 class NormalDistribution<Generator, double> {
408 public:
409 // The number of elements that will be returned.
410 static const int kResultElementCount = Generator::kResultElementCount / 2;
411 // Cost of generation of a single element (in cycles).
412 static const int kElementCost = 70;
413 // Indicate that this distribution may take variable number of samples
414 // during the runtime.
415 static const bool kVariableSamplesPerOutput = false;
416 typedef Array<double, kResultElementCount> ResultType;
417 typedef double ResultElementType;
418
419 PHILOX_DEVICE_INLINE
operator()420 ResultType operator()(Generator* gen) {
421 typename Generator::ResultType sample = (*gen)();
422 ResultType result;
423 for (int i = 0; i < kResultElementCount; i += 2) {
424 const int i2 = 2 * i;
425 BoxMullerDouble(sample[i2], sample[i2 + 1], sample[i2 + 2],
426 sample[i2 + 3], &result[i], &result[i + 1]);
427 }
428 return result;
429 }
430 };
431
432 // A class that returns standard normal distribution between
433 // [-kTruncateValue, kTruncateValue].
434 // Arguments:
435 // Generator: a generator type that returns a number of uint32 upon each
436 // each invocation. It needs to define kResultElementCount for the
437 // sample count for each invocation, and ResultType for actual
438 // returned sample type.
439 // RealType: the data type of the real numbers that will be returned by the
440 // distribution. This could be either float or double for now.
441 // This class is meant to be implemented through specialization. The default
442 // is not defined by design.
443 template <class SingleSampleGenerator, typename RealType>
444 class TruncatedNormalDistribution;
445
446 // Exactly like the float version, except that we convert to half afterwards;
447 // since we don't have half-precision sin/cos even on GPUs, there's nothing to
448 // gain from working in half internally.
449 template <class SingleSampleGenerator>
450 class TruncatedNormalDistribution<SingleSampleGenerator, Eigen::half> {
451 public:
452 // The number of elements that will be returned.
453 static const int kResultElementCount =
454 SingleSampleGenerator::kNativeElementCount;
455 // Cost of generation of a single element (in cycles).
456 static const int kElementCost = 90;
457 // Indicate that this distribution may take variable number of samples
458 // during the runtime.
459 static const bool kVariableSamplesPerOutput = true;
460 // The threshold where the normal distribution is truncated.
461 const float kTruncateValue = 2.0f;
462
463 typedef Array<Eigen::half, kResultElementCount> ResultType;
464 typedef Eigen::half ResultElementType;
465
466 PHILOX_DEVICE_INLINE
operator()467 ResultType operator()(SingleSampleGenerator* gen) {
468 ResultType results;
469 int index = 0;
470 while (true) {
471 // Repeatedly take samples from the normal distribution, until we have
472 // the desired number of elements that fall within the pre-defined cutoff
473 // threshold.
474 const uint32 x0 = (*gen)();
475 const uint32 x1 = (*gen)();
476 float f[2];
477 BoxMullerFloat(x0, x1, &f[0], &f[1]);
478
479 for (int i = 0; i < 2; ++i) {
480 if (Eigen::numext::abs(f[i]) < kTruncateValue) {
481 results[index++] = Eigen::half(f[i]);
482 if (index >= kResultElementCount) {
483 return results;
484 }
485 }
486 }
487 }
488 }
489 };
490
491 template <class SingleSampleGenerator>
492 class TruncatedNormalDistribution<SingleSampleGenerator, bfloat16> {
493 public:
494 // The number of elements that will be returned.
495 static const int kResultElementCount =
496 SingleSampleGenerator::kNativeElementCount;
497 // Cost of generation of a single element (in cycles).
498 static const int kElementCost = 90;
499 // Indicate that this distribution may take variable number of samples
500 // during the runtime.
501 static const bool kVariableSamplesPerOutput = true;
502 // The threshold where the normal distribution is truncated.
503 const float kTruncateValue = 2.0f;
504
505 typedef Array<bfloat16, kResultElementCount> ResultType;
506 typedef bfloat16 ResultElementType;
507
508 PHILOX_DEVICE_INLINE
operator()509 ResultType operator()(SingleSampleGenerator* gen) {
510 ResultType results;
511 int index = 0;
512 while (true) {
513 // Repeatedly take samples from the normal distribution, until we have
514 // the desired number of elements that fall within the pre-defined cutoff
515 // threshold.
516 const uint32 x0 = (*gen)();
517 const uint32 x1 = (*gen)();
518 float f[2];
519 BoxMullerFloat(x0, x1, &f[0], &f[1]);
520
521 for (int i = 0; i < 2; ++i) {
522 if (Eigen::numext::abs(f[i]) < kTruncateValue) {
523 results[index++] = bfloat16(f[i]);
524 if (index >= kResultElementCount) {
525 return results;
526 }
527 }
528 }
529 }
530 }
531 };
532
533 // Partial specialization for float.
534 template <class SingleSampleGenerator>
535 class TruncatedNormalDistribution<SingleSampleGenerator, float> {
536 public:
537 // The number of elements that will be returned.
538 static const int kResultElementCount =
539 SingleSampleGenerator::kNativeElementCount;
540 // Cost of generation of a single element (in cycles).
541 static const int kElementCost = 90;
542 // Indicate that this distribution may take variable number of samples
543 // during the runtime.
544 static const bool kVariableSamplesPerOutput = true;
545 // The threshold where the normal distribution is truncated.
546 const float kTruncateValue = 2.0f;
547
548 typedef Array<float, kResultElementCount> ResultType;
549 typedef float ResultElementType;
550
551 PHILOX_DEVICE_INLINE
operator()552 ResultType operator()(SingleSampleGenerator* gen) {
553 ResultType results;
554 int index = 0;
555 while (true) {
556 // Repeatedly take samples from the normal distribution, until we have
557 // the desired number of elements that fall within the pre-defined cutoff
558 // threshold.
559 const uint32 x0 = (*gen)();
560 const uint32 x1 = (*gen)();
561 float f[2];
562 BoxMullerFloat(x0, x1, &f[0], &f[1]);
563
564 for (int i = 0; i < 2; ++i) {
565 if (Eigen::numext::abs(f[i]) < kTruncateValue) {
566 results[index++] = f[i];
567 if (index >= kResultElementCount) {
568 return results;
569 }
570 }
571 }
572 }
573 }
574 };
575
576 // Partial specialization for double.
577 template <class SingleSampleGenerator>
578 class TruncatedNormalDistribution<SingleSampleGenerator, double> {
579 public:
580 // The number of elements that will be returned.
581 static const int kResultElementCount =
582 (SingleSampleGenerator::kNativeElementCount > 1)
583 ? SingleSampleGenerator::kNativeElementCount / 2
584 : 1;
585 // Cost of generation of a single element (in cycles).
586 static const int kElementCost = 90;
587 // Indicate that this distribution may take variable number of samples
588 // during the runtime.
589 static const bool kVariableSamplesPerOutput = true;
590 typedef Array<double, kResultElementCount> ResultType;
591 typedef double ResultElementType;
592 const double kTruncateValue = 2.0;
593
594 PHILOX_DEVICE_INLINE
operator()595 ResultType operator()(SingleSampleGenerator* gen) {
596 ResultType results;
597 int index = 0;
598 while (1) {
599 const uint32 x0 = (*gen)();
600 const uint32 x1 = (*gen)();
601 const uint32 x2 = (*gen)();
602 const uint32 x3 = (*gen)();
603 double d[2];
604 BoxMullerDouble(x0, x1, x2, x3, &d[0], &d[1]);
605
606 for (int i = 0; i < 2; ++i) {
607 if (Eigen::numext::abs(d[i]) < kTruncateValue) {
608 results[index++] = d[i];
609 if (index >= kResultElementCount) {
610 return results;
611 }
612 }
613 }
614 }
615 }
616 };
617
618 // Helper function to convert two 32-bit uniform integers to two floats
619 // under the unit normal distribution.
620 PHILOX_DEVICE_INLINE
BoxMullerFloat(uint32 x0,uint32 x1,float * f0,float * f1)621 void BoxMullerFloat(uint32 x0, uint32 x1, float* f0, float* f1) {
622 // This function implements the Box-Muller transform:
623 // http://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform#Basic_form
624 // Do not send a really small number to log().
625 // We cannot mark "epsilon" as "static const" because NVCC would complain
626 const float epsilon = 1.0e-7f;
627 float u1 = Uint32ToFloat(x0);
628 if (u1 < epsilon) {
629 u1 = epsilon;
630 }
631 const float v1 = 2.0f * M_PI * Uint32ToFloat(x1);
632 const float u2 = Eigen::numext::sqrt(-2.0f * Eigen::numext::log(u1));
633 #if defined(TENSORFLOW_USE_SYCL) || !defined(__linux__)
634 *f0 = Eigen::numext::sin(v1);
635 *f1 = Eigen::numext::cos(v1);
636 #else
637 sincosf(v1, f0, f1);
638 #endif
639 *f0 *= u2;
640 *f1 *= u2;
641 }
642
643 // Helper function to convert four 32-bit uniform integers to two doubles
644 // under the unit normal distribution.
645 PHILOX_DEVICE_INLINE
BoxMullerDouble(uint32 x0,uint32 x1,uint32 x2,uint32 x3,double * d0,double * d1)646 void BoxMullerDouble(uint32 x0, uint32 x1, uint32 x2, uint32 x3, double* d0,
647 double* d1) {
648 // This function implements the Box-Muller transform:
649 // http://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform#Basic_form
650 // Do not send a really small number to log().
651 // We cannot mark "epsilon" as "static const" because NVCC would complain
652 const double epsilon = 1.0e-7;
653 double u1 = Uint64ToDouble(x0, x1);
654 if (u1 < epsilon) {
655 u1 = epsilon;
656 }
657 const double v1 = 2 * M_PI * Uint64ToDouble(x2, x3);
658 const double u2 = Eigen::numext::sqrt(-2.0 * Eigen::numext::log(u1));
659 #if defined(TENSORFLOW_USE_SYCL) || !defined(__linux__)
660 *d0 = Eigen::numext::sin(v1);
661 *d1 = Eigen::numext::cos(v1);
662 #else
663 sincos(v1, d0, d1);
664 #endif
665 *d0 *= u2;
666 *d1 *= u2;
667 }
668
669 // Helper function to convert an 16-bit integer to a half between [0..1).
Uint16ToHalf(uint16 x)670 PHILOX_DEVICE_INLINE Eigen::half Uint16ToHalf(uint16 x) {
671 // IEEE754 halfs are formatted as follows (MSB first):
672 // sign(1) exponent(5) mantissa(10)
673 // Conceptually construct the following:
674 // sign == 0
675 // exponent == 15 -- an excess 15 representation of a zero exponent
676 // mantissa == 10 random bits
677 const uint16 man = x & 0x3ffu; // 10 bit mantissa
678 const uint16 exp = static_cast<uint16>(15);
679 const uint16 val = (exp << 10) | man;
680
681 Eigen::half result;
682 result.x = val;
683 return result - Eigen::half(1.0);
684 }
685
686 // Helper function to convert an 16-bit integer to a bfloat16 between [0..1).
687 // This can create a uniform distribution of values between [0..1).
Uint16ToGfloat16(uint16 x)688 PHILOX_DEVICE_INLINE bfloat16 Uint16ToGfloat16(uint16 x) {
689 // bfloat are formatted as follows (MSB first):
690 // sign(1) exponent(8) mantissa(7)
691 // Conceptually construct the following:
692 // sign == 0
693 // exponent == 127 -- an excess 127 representation of a zero exponent
694 // mantissa == 7 random bits
695 const uint16 man = x & 0x7fu; // 7 bit mantissa
696 const uint16 exp = static_cast<uint16>(127);
697 const uint16 val = (exp << 7) | man;
698
699 bfloat16 result;
700 memcpy(&result, &val, sizeof(val));
701 // The mantissa has an implicit leading 1, so the above code creates a value
702 // in [1, 2). The minus will not cause a rounding that makes the result 1.
703 // Instead it will just be close to 1.
704 return result - bfloat16(1.0);
705 }
706
707 // Helper function to convert an 32-bit integer to a float between [0..1).
Uint32ToFloat(uint32 x)708 PHILOX_DEVICE_INLINE float Uint32ToFloat(uint32 x) {
709 // IEEE754 floats are formatted as follows (MSB first):
710 // sign(1) exponent(8) mantissa(23)
711 // Conceptually construct the following:
712 // sign == 0
713 // exponent == 127 -- an excess 127 representation of a zero exponent
714 // mantissa == 23 random bits
715 const uint32 man = x & 0x7fffffu; // 23 bit mantissa
716 const uint32 exp = static_cast<uint32>(127);
717 const uint32 val = (exp << 23) | man;
718
719 // Assumes that endian-ness is same for float and uint32.
720 float result;
721 memcpy(&result, &val, sizeof(val));
722 return result - 1.0f;
723 }
724
725 // Helper function to convert two 32-bit integers to a double between [0..1).
Uint64ToDouble(uint32 x0,uint32 x1)726 PHILOX_DEVICE_INLINE double Uint64ToDouble(uint32 x0, uint32 x1) {
727 // IEEE754 doubles are formatted as follows (MSB first):
728 // sign(1) exponent(11) mantissa(52)
729 // Conceptually construct the following:
730 // sign == 0
731 // exponent == 1023 -- an excess 1023 representation of a zero exponent
732 // mantissa == 52 random bits
733 const uint32 mhi = x0 & 0xfffffu; // upper 20 bits of mantissa
734 const uint32 mlo = x1; // lower 32 bits of mantissa
735 const uint64 man = (static_cast<uint64>(mhi) << 32) | mlo; // mantissa
736 const uint64 exp = static_cast<uint64>(1023);
737 const uint64 val = (exp << 52) | man;
738 // Assumes that endian-ness is same for double and uint64.
739 double result;
740 memcpy(&result, &val, sizeof(val));
741 return result - 1.0;
742 }
743
744 } // namespace random
745 } // namespace tensorflow
746
747 #endif // TENSORFLOW_CORE_LIB_RANDOM_RANDOM_DISTRIBUTIONS_H_
748