1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #ifndef TENSORFLOW_CORE_LIB_RANDOM_RANDOM_DISTRIBUTIONS_H_
17 #define TENSORFLOW_CORE_LIB_RANDOM_RANDOM_DISTRIBUTIONS_H_
18
19 // This include is needed in order to prevent a build error when including
20 // tensorflow/core/lib/bfloat16/bfloat16.h.
21 #include <iostream>
22
23 #define _USE_MATH_DEFINES
24 #include <math.h>
25
26 #include <cmath>
27 #undef _USE_MATH_DEFINES
28
29 #include <string.h>
30 #include <tensorflow/core/lib/bfloat16/bfloat16.h>
31
32 #pragma clang diagnostic push
33 #pragma clang diagnostic ignored "-Wunused-parameter"
34 #pragma clang diagnostic ignored "-Winvalid-partial-specialization"
35 #include <unsupported/Eigen/CXX11/Tensor>
36 #pragma clang diagnostic pop
37
38 #include <algorithm>
39 #include <type_traits>
40
41 #include "philox_random.h"
42
43 namespace tensorflow {
44 namespace random {
45
46 // Helper function to convert a 16-bit integer to a half between [0..1).
47 PHILOX_DEVICE_INLINE Eigen::half Uint16ToHalf(uint16 x);
48 // Helper function to convert a 16-bit integer to a bfloat16 between [0..1).
49 PHILOX_DEVICE_INLINE bfloat16 Uint16ToGfloat16(uint16 x);
50 // Helper function to convert a 32-bit integer to a float between [0..1).
51 PHILOX_DEVICE_INLINE float Uint32ToFloat(uint32 x);
52 // Helper function to convert two 32-bit integers to a double between [0..1).
53 PHILOX_DEVICE_INLINE double Uint64ToDouble(uint32 x0, uint32 x1);
54
55 // Computes a + b. Requires that the result is representable in the destination
56 // type and that b is not maximal (i.e. b + 1 is not 0). Notably, the addend b
57 // need *not* be representable in that type. (The condition on b excludes the
58 // extremal case INT_MIN + UINT_MAX = INT_MAX, which this function cannot
59 // compute.)
60 template <typename Int>
SignedAdd(Int a,typename std::make_unsigned<Int>::type b)61 PHILOX_DEVICE_INLINE Int SignedAdd(Int a, typename std::make_unsigned<Int>::type b) {
62 // Implementation note: both b_div_2 and b - b_div_2 are positive and
63 // representatble as Int.
64 auto b_div_2 = b >> 1;
65 return a + static_cast<Int>(b_div_2) + static_cast<Int>(b - b_div_2);
66 }
67
68 // A class that generates uniform distribution random numbers from the
69 // underlying random integer generator.
70 // Arguments:
71 // Generator: a generator type that returns a number of uint32 upon each
72 // invocation. It needs to define kResultElementCount for the
73 // sample count for each invocation, and ResultType for the
74 // actual returned sample type.
75 // RealType: the data type of the real numbers that will be returned by the
76 // distribution. This could be either float or double for now.
77 // This class is meant to be implemented through specialization. The default
78 // is not defined by design.
79 template <class Generator, typename RealType>
80 class UniformDistribution;
81
82 template <class Generator>
83 class UniformDistribution<Generator, Eigen::half> {
84 public:
85 // The number of elements that will be returned.
86 static const int kResultElementCount = Generator::kResultElementCount;
87 // Cost of generation of a single element (in cycles).
88 static const int kElementCost = 3;
89 // Indicate that this distribution may take variable number of samples
90 // during the runtime.
91 static const bool kVariableSamplesPerOutput = false;
92 typedef Array<Eigen::half, kResultElementCount> ResultType;
93 typedef Eigen::half ResultElementType;
94
95 PHILOX_DEVICE_INLINE
operator()96 ResultType operator()(Generator* gen) {
97 typename Generator::ResultType sample = (*gen)();
98 ResultType result;
99 for (int i = 0; i < kResultElementCount; ++i) {
100 result[i] = Uint16ToHalf(sample[i]); // Truncate the upper 16 bits.
101 }
102 return result;
103 }
104 };
105
106 template <class Generator>
107 class UniformDistribution<Generator, bfloat16> {
108 public:
109 // The number of elements that will be returned.
110 static const int kResultElementCount = Generator::kResultElementCount;
111 // Cost of generation of a single element (in cycles).
112 static const int kElementCost = 3;
113 // Indicate that this distribution may take variable number of samples
114 // during the runtime.
115 static const bool kVariableSamplesPerOutput = false;
116 typedef Array<bfloat16, kResultElementCount> ResultType;
117 typedef bfloat16 ResultElementType;
118
119 PHILOX_DEVICE_INLINE
operator()120 ResultType operator()(Generator* gen) {
121 typename Generator::ResultType sample = (*gen)();
122 ResultType result;
123 for (int i = 0; i < kResultElementCount; ++i) {
124 result[i] = Uint16ToGfloat16(sample[i]);
125 }
126 return result;
127 }
128 };
129
130 template <class Generator>
131 class UniformDistribution<Generator, float> {
132 public:
133 // The number of elements that will be returned.
134 static const int kResultElementCount = Generator::kResultElementCount;
135 // Cost of generation of a single element (in cycles).
136 static const int kElementCost = 3;
137 // Indicate that this distribution may take variable number of samples
138 // during the runtime.
139 static const bool kVariableSamplesPerOutput = false;
140 typedef Array<float, kResultElementCount> ResultType;
141 typedef float ResultElementType;
142
143 PHILOX_DEVICE_INLINE
operator()144 ResultType operator()(Generator* gen) {
145 typename Generator::ResultType sample = (*gen)();
146 ResultType result;
147 for (int i = 0; i < kResultElementCount; ++i) {
148 result[i] = Uint32ToFloat(sample[i]);
149 }
150 return result;
151 }
152 };
153
154 template <class Generator>
155 class UniformDistribution<Generator, double> {
156 public:
157 // The number of elements that will be returned.
158 static const int kResultElementCount = Generator::kResultElementCount / 2;
159 // Cost of generation of a single element (in cycles).
160 static const int kElementCost = 3;
161 // Indicate that this distribution may take variable number of samples
162 // during the runtime.
163 static const bool kVariableSamplesPerOutput = false;
164 typedef Array<double, kResultElementCount> ResultType;
165 typedef double ResultElementType;
166
167 PHILOX_DEVICE_INLINE
operator()168 ResultType operator()(Generator* gen) {
169 typename Generator::ResultType sample = (*gen)();
170 ResultType result;
171 for (int i = 0; i < kResultElementCount; ++i) {
172 result[i] = Uint64ToDouble(sample[2 * i], sample[2 * i + 1]);
173 }
174 return result;
175 }
176 };
177
178 template <class Generator>
179 class UniformDistribution<Generator, int32> {
180 public:
181 // The number of elements that will be returned.
182 static const int kResultElementCount = Generator::kResultElementCount;
183 // Cost of generation of a single element (in cycles).
184 static const int kElementCost = 3;
185 // Indicate that this distribution may take variable number of samples
186 // during the runtime.
187 static const bool kVariableSamplesPerOutput = false;
188 typedef Array<int32, kResultElementCount> ResultType;
189 typedef int32 ResultElementType;
190
191 // Must have lo < hi
UniformDistribution(int32 lo,int32 hi)192 UniformDistribution(int32 lo, int32 hi)
193 : lo_(lo), range_(static_cast<uint32>(hi) - static_cast<uint32>(lo)) {}
194
195 PHILOX_DEVICE_INLINE
operator()196 ResultType operator()(Generator* gen) {
197 typename Generator::ResultType sample = (*gen)();
198 ResultType result;
199 for (int i = 0; i < kResultElementCount; ++i) {
200 result[i] = SignedAdd(lo_, sample[i] % range_);
201 }
202 return result;
203 }
204
205 private:
206 // Note that lo_ is intentionally signed while range_ is intentionally
207 // unsigned. This is because hi - lo can overflow signed integers if
208 // lo < 0 < hi, but always fits in unsigned.
209 int32 lo_;
210 uint32 range_;
211 };
212
213 template <class Generator>
214 class UniformDistribution<Generator, int64> {
215 public:
216 // The number of elements that will be returned.
217 static const int kResultElementCount = Generator::kResultElementCount / 2;
218 // Cost of generation of a single element (in cycles).
219 static const int kElementCost = 3;
220 // Indicate that this distribution may take variable number of samples
221 // during the runtime.
222 static const bool kVariableSamplesPerOutput = false;
223 typedef Array<int64, kResultElementCount> ResultType;
224 typedef int64 ResultElementType;
225
226 // Must have lo < hi
UniformDistribution(int64 lo,int64 hi)227 UniformDistribution(int64 lo, int64 hi)
228 : lo_(lo), range_(static_cast<uint64>(hi) - static_cast<uint64>(lo)) {}
229
230 PHILOX_DEVICE_INLINE
operator()231 ResultType operator()(Generator* gen) {
232 typename Generator::ResultType sample = (*gen)();
233 ResultType result;
234 for (int i = 0; i < kResultElementCount; ++i) {
235 auto bits = sample[2 * i] | static_cast<uint64>(sample[2 * i + 1]) << 32;
236 result[i] = SignedAdd(lo_, bits % range_);
237 }
238 return result;
239 }
240
241 private:
242 // Note that lo_ is intentionally signed while range_ is intentionally
243 // unsigned. This is because hi - lo can overflow signed integers if
244 // lo < 0 < hi, but always fits in unsigned.
245 int64 lo_;
246 uint64 range_;
247 };
248
249 // A class that adapts the underlying native multiple samples to return a single
250 // sample at a time.
251 template <class Generator>
252 class SingleSampleAdapter {
253 public:
254 // The number of elements that will be returned.
255 static const int kResultElementCount = 1;
256 // The number of elements that will be returned by the underlying generator.
257 static const int kNativeElementCount = Generator::kResultElementCount;
258 typedef typename Generator::ResultElementType ResultType;
259 typedef typename Generator::ResultElementType ResultElementType;
260
261 PHILOX_DEVICE_INLINE
SingleSampleAdapter(Generator * gen)262 explicit SingleSampleAdapter(Generator* gen)
263 : generator_(gen), used_result_index_(Generator::kResultElementCount) {}
264
265 PHILOX_DEVICE_INLINE
operator()266 ResultType operator()() {
267 if (used_result_index_ == Generator::kResultElementCount) {
268 unused_results_ = (*generator_)();
269 used_result_index_ = 0;
270 }
271
272 return unused_results_[used_result_index_++];
273 }
274
275 PHILOX_DEVICE_INLINE
Skip(uint64 num_skips)276 void Skip(uint64 num_skips) {
277 if (!num_skips) {
278 return;
279 }
280 int num_unused_results = kNativeElementCount - used_result_index_;
281 if (num_skips <= num_unused_results) {
282 used_result_index_ += num_skips;
283 return;
284 }
285 num_skips -= num_unused_results;
286 used_result_index_ = kNativeElementCount;
287 SkipFromGenerator(num_skips / kNativeElementCount);
288 num_skips = num_skips % kNativeElementCount;
289 if (num_skips) {
290 unused_results_ = (*generator_)();
291 used_result_index_ = num_skips;
292 }
293 }
294
295 private:
296 // This implementation iteratively skips over `num_skips` samples
297 // from `generator_`. There is an O(1) implementation for PhiloxRandom
298 // in random_distributions.cc.
299 PHILOX_DEVICE_INLINE
SkipFromGenerator(uint64 num_skips)300 void SkipFromGenerator(uint64 num_skips) {
301 while (num_skips--) {
302 (*generator_)();
303 }
304 }
305
306 Generator* generator_;
307 typename Generator::ResultType unused_results_;
308 int used_result_index_;
309 };
310
311 // A class that generates unit normal distribution random numbers from the
312 // underlying random integer generator.
313 // Arguments:
314 // Generator: a generator type that returns a number of uint32 upon each
315 // each invocation. It needs to define kResultElementCount for the
316 // sample count for each invocation, and ResultType for actual
317 // returned sample type.
318 // RealType: the data type of the real numbers that will be returned by the
319 // distribution. This could be either float or double for now.
320 // This class is meant to be implemented through specialization. The default
321 // is not defined by design.
322 template <class Generator, typename RealType>
323 class NormalDistribution;
324
325 PHILOX_DEVICE_INLINE
326 void BoxMullerFloat(uint32 x0, uint32 x1, float* f0, float* f1);
327
328 PHILOX_DEVICE_INLINE
329 void BoxMullerDouble(uint32 x0, uint32 x1, uint32 x2, uint32 x3, double* d0, double* d1);
330
331 // Exactly like the float version, except that we convert to half afterwards;
332 // since we don't have half-precision sin/cos even on GPUs, there's nothing to
333 // gain from working in half internally.
334 template <class Generator>
335 class NormalDistribution<Generator, Eigen::half> {
336 public:
337 // The number of elements that will be returned.
338 static const int kResultElementCount = Generator::kResultElementCount;
339 // Cost of generation of a single element (in cycles).
340 static const int kElementCost = 70;
341 // Indicate that this distribution may take variable number of samples
342 // during the runtime.
343 static const bool kVariableSamplesPerOutput = false;
344 typedef Array<Eigen::half, kResultElementCount> ResultType;
345 typedef Eigen::half ResultElementType;
346
347 PHILOX_DEVICE_INLINE
operator()348 ResultType operator()(Generator* gen) {
349 typename Generator::ResultType sample = (*gen)();
350 ResultType result;
351 for (int i = 0; i < kResultElementCount; i += 2) {
352 float f[2];
353 BoxMullerFloat(sample[i], sample[i + 1], &f[0], &f[1]);
354 result[i] = Eigen::half(f[0]);
355 result[i + 1] = Eigen::half(f[1]);
356 }
357 return result;
358 }
359 };
360
361 template <class Generator>
362 class NormalDistribution<Generator, bfloat16> {
363 public:
364 // The number of elements that will be returned.
365 static const int kResultElementCount = Generator::kResultElementCount;
366 // Cost of generation of a single element (in cycles).
367 static const int kElementCost = 70;
368 // Indicate that this distribution may take variable number of samples
369 // during the runtime.
370 static const bool kVariableSamplesPerOutput = false;
371 typedef Array<bfloat16, kResultElementCount> ResultType;
372 typedef bfloat16 ResultElementType;
373
374 PHILOX_DEVICE_INLINE
operator()375 ResultType operator()(Generator* gen) {
376 typename Generator::ResultType sample = (*gen)();
377 ResultType result;
378 static_assert(kResultElementCount % 2 == 0, "kResultElementCount should be an even number");
379 for (int i = 0; i < kResultElementCount; i += 2) {
380 float f[2];
381 // Box-Muller transform requires processing 2 elements at a time.
382 BoxMullerFloat(sample[i], sample[i + 1], &f[0], &f[1]);
383 result[i] = bfloat16(f[0]);
384 result[i + 1] = bfloat16(f[1]);
385 }
386 return result;
387 }
388 };
389
390 template <class Generator>
391 class NormalDistribution<Generator, float> {
392 public:
393 // The number of elements that will be returned.
394 static const int kResultElementCount = Generator::kResultElementCount;
395 // Cost of generation of a single element (in cycles).
396 static const int kElementCost = 70;
397 // Indicate that this distribution may take variable number of samples
398 // during the runtime.
399 static const bool kVariableSamplesPerOutput = false;
400 typedef Array<float, kResultElementCount> ResultType;
401 typedef float ResultElementType;
402
403 PHILOX_DEVICE_INLINE
operator()404 ResultType operator()(Generator* gen) {
405 typename Generator::ResultType sample = (*gen)();
406 ResultType result;
407 for (int i = 0; i < kResultElementCount; i += 2) {
408 BoxMullerFloat(sample[i], sample[i + 1], &result[i], &result[i + 1]);
409 }
410 return result;
411 }
412 };
413
414 template <class Generator>
415 class NormalDistribution<Generator, double> {
416 public:
417 // The number of elements that will be returned.
418 static const int kResultElementCount = Generator::kResultElementCount / 2;
419 // Cost of generation of a single element (in cycles).
420 static const int kElementCost = 70;
421 // Indicate that this distribution may take variable number of samples
422 // during the runtime.
423 static const bool kVariableSamplesPerOutput = false;
424 typedef Array<double, kResultElementCount> ResultType;
425 typedef double ResultElementType;
426
427 PHILOX_DEVICE_INLINE
operator()428 ResultType operator()(Generator* gen) {
429 typename Generator::ResultType sample = (*gen)();
430 ResultType result;
431 for (int i = 0; i < kResultElementCount; i += 2) {
432 const int i2 = 2 * i;
433 BoxMullerDouble(sample[i2], sample[i2 + 1], sample[i2 + 2], sample[i2 + 3], &result[i],
434 &result[i + 1]);
435 }
436 return result;
437 }
438 };
439
440 // A class that returns standard normal distribution between
441 // [-kTruncateValue, kTruncateValue].
442 // Arguments:
443 // Generator: a generator type that returns a number of uint32 upon each
444 // each invocation. It needs to define kResultElementCount for the
445 // sample count for each invocation, and ResultType for actual
446 // returned sample type.
447 // RealType: the data type of the real numbers that will be returned by the
448 // distribution. This could be either float or double for now.
449 // This class is meant to be implemented through specialization. The default
450 // is not defined by design.
451 template <class SingleSampleGenerator, typename RealType>
452 class TruncatedNormalDistribution;
453
454 // Exactly like the float version, except that we convert to half afterwards;
455 // since we don't have half-precision sin/cos even on GPUs, there's nothing to
456 // gain from working in half internally.
457 template <class SingleSampleGenerator>
458 class TruncatedNormalDistribution<SingleSampleGenerator, Eigen::half> {
459 public:
460 // The number of elements that will be returned.
461 static const int kResultElementCount = SingleSampleGenerator::kNativeElementCount;
462 // Cost of generation of a single element (in cycles).
463 static const int kElementCost = 90;
464 // Indicate that this distribution may take variable number of samples
465 // during the runtime.
466 static const bool kVariableSamplesPerOutput = true;
467 // The threshold where the normal distribution is truncated.
468 const float kTruncateValue = 2.0f;
469
470 typedef Array<Eigen::half, kResultElementCount> ResultType;
471 typedef Eigen::half ResultElementType;
472
473 PHILOX_DEVICE_INLINE
operator()474 ResultType operator()(SingleSampleGenerator* gen) {
475 ResultType results;
476 int index = 0;
477 while (true) {
478 // Repeatedly take samples from the normal distribution, until we have
479 // the desired number of elements that fall within the pre-defined cutoff
480 // threshold.
481 const uint32 x0 = (*gen)();
482 const uint32 x1 = (*gen)();
483 float f[2];
484 BoxMullerFloat(x0, x1, &f[0], &f[1]);
485
486 for (int i = 0; i < 2; ++i) {
487 if (Eigen::numext::abs(f[i]) < kTruncateValue) {
488 results[index++] = Eigen::half(f[i]);
489 if (index >= kResultElementCount) {
490 return results;
491 }
492 }
493 }
494 }
495 }
496 };
497
498 template <class SingleSampleGenerator>
499 class TruncatedNormalDistribution<SingleSampleGenerator, bfloat16> {
500 public:
501 // The number of elements that will be returned.
502 static const int kResultElementCount = SingleSampleGenerator::kNativeElementCount;
503 // Cost of generation of a single element (in cycles).
504 static const int kElementCost = 90;
505 // Indicate that this distribution may take variable number of samples
506 // during the runtime.
507 static const bool kVariableSamplesPerOutput = true;
508 // The threshold where the normal distribution is truncated.
509 const float kTruncateValue = 2.0f;
510
511 typedef Array<bfloat16, kResultElementCount> ResultType;
512 typedef bfloat16 ResultElementType;
513
514 PHILOX_DEVICE_INLINE
operator()515 ResultType operator()(SingleSampleGenerator* gen) {
516 ResultType results;
517 int index = 0;
518 while (true) {
519 // Repeatedly take samples from the normal distribution, until we have
520 // the desired number of elements that fall within the pre-defined cutoff
521 // threshold.
522 const uint32 x0 = (*gen)();
523 const uint32 x1 = (*gen)();
524 float f[2];
525 BoxMullerFloat(x0, x1, &f[0], &f[1]);
526
527 for (int i = 0; i < 2; ++i) {
528 if (Eigen::numext::abs(f[i]) < kTruncateValue) {
529 results[index++] = bfloat16(f[i]);
530 if (index >= kResultElementCount) {
531 return results;
532 }
533 }
534 }
535 }
536 }
537 };
538
539 // Partial specialization for float.
540 template <class SingleSampleGenerator>
541 class TruncatedNormalDistribution<SingleSampleGenerator, float> {
542 public:
543 // The number of elements that will be returned.
544 static const int kResultElementCount = SingleSampleGenerator::kNativeElementCount;
545 // Cost of generation of a single element (in cycles).
546 static const int kElementCost = 90;
547 // Indicate that this distribution may take variable number of samples
548 // during the runtime.
549 static const bool kVariableSamplesPerOutput = true;
550 // The threshold where the normal distribution is truncated.
551 const float kTruncateValue = 2.0f;
552
553 typedef Array<float, kResultElementCount> ResultType;
554 typedef float ResultElementType;
555
556 PHILOX_DEVICE_INLINE
operator()557 ResultType operator()(SingleSampleGenerator* gen) {
558 ResultType results;
559 int index = 0;
560 while (true) {
561 // Repeatedly take samples from the normal distribution, until we have
562 // the desired number of elements that fall within the pre-defined cutoff
563 // threshold.
564 const uint32 x0 = (*gen)();
565 const uint32 x1 = (*gen)();
566 float f[2];
567 BoxMullerFloat(x0, x1, &f[0], &f[1]);
568
569 for (int i = 0; i < 2; ++i) {
570 if (Eigen::numext::abs(f[i]) < kTruncateValue) {
571 results[index++] = f[i];
572 if (index >= kResultElementCount) {
573 return results;
574 }
575 }
576 }
577 }
578 }
579 };
580
581 // Partial specialization for double.
582 template <class SingleSampleGenerator>
583 class TruncatedNormalDistribution<SingleSampleGenerator, double> {
584 public:
585 // The number of elements that will be returned.
586 static const int kResultElementCount = (SingleSampleGenerator::kNativeElementCount > 1)
587 ? SingleSampleGenerator::kNativeElementCount / 2
588 : 1;
589 // Cost of generation of a single element (in cycles).
590 static const int kElementCost = 90;
591 // Indicate that this distribution may take variable number of samples
592 // during the runtime.
593 static const bool kVariableSamplesPerOutput = true;
594 typedef Array<double, kResultElementCount> ResultType;
595 typedef double ResultElementType;
596 const double kTruncateValue = 2.0;
597
598 PHILOX_DEVICE_INLINE
operator()599 ResultType operator()(SingleSampleGenerator* gen) {
600 ResultType results;
601 int index = 0;
602 while (1) {
603 const uint32 x0 = (*gen)();
604 const uint32 x1 = (*gen)();
605 const uint32 x2 = (*gen)();
606 const uint32 x3 = (*gen)();
607 double d[2];
608 BoxMullerDouble(x0, x1, x2, x3, &d[0], &d[1]);
609
610 for (int i = 0; i < 2; ++i) {
611 if (Eigen::numext::abs(d[i]) < kTruncateValue) {
612 results[index++] = d[i];
613 if (index >= kResultElementCount) {
614 return results;
615 }
616 }
617 }
618 }
619 }
620 };
621
622 // Helper function to convert two 32-bit uniform integers to two floats
623 // under the unit normal distribution.
624 PHILOX_DEVICE_INLINE
BoxMullerFloat(uint32 x0,uint32 x1,float * f0,float * f1)625 void BoxMullerFloat(uint32 x0, uint32 x1, float* f0, float* f1) {
626 // This function implements the Box-Muller transform:
627 // http://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform#Basic_form
628 // Do not send a really small number to log().
629 // We cannot mark "epsilon" as "static const" because NVCC would complain
630 const float epsilon = 1.0e-7f;
631 float u1 = Uint32ToFloat(x0);
632 if (u1 < epsilon) {
633 u1 = epsilon;
634 }
635 const float v1 = 2.0f * M_PI * Uint32ToFloat(x1);
636 const float u2 = Eigen::numext::sqrt(-2.0f * Eigen::numext::log(u1));
637 #if defined(TENSORFLOW_USE_SYCL) || !defined(__linux__)
638 *f0 = Eigen::numext::sin(v1);
639 *f1 = Eigen::numext::cos(v1);
640 #else
641 sincosf(v1, f0, f1);
642 #endif
643 *f0 *= u2;
644 *f1 *= u2;
645 }
646
647 // Helper function to convert four 32-bit uniform integers to two doubles
648 // under the unit normal distribution.
649 PHILOX_DEVICE_INLINE
BoxMullerDouble(uint32 x0,uint32 x1,uint32 x2,uint32 x3,double * d0,double * d1)650 void BoxMullerDouble(uint32 x0, uint32 x1, uint32 x2, uint32 x3, double* d0, double* d1) {
651 // This function implements the Box-Muller transform:
652 // http://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform#Basic_form
653 // Do not send a really small number to log().
654 // We cannot mark "epsilon" as "static const" because NVCC would complain
655 const double epsilon = 1.0e-7;
656 double u1 = Uint64ToDouble(x0, x1);
657 if (u1 < epsilon) {
658 u1 = epsilon;
659 }
660 const double v1 = 2 * M_PI * Uint64ToDouble(x2, x3);
661 const double u2 = Eigen::numext::sqrt(-2.0 * Eigen::numext::log(u1));
662 #if defined(TENSORFLOW_USE_SYCL) || !defined(__linux__)
663 *d0 = Eigen::numext::sin(v1);
664 *d1 = Eigen::numext::cos(v1);
665 #else
666 sincos(v1, d0, d1);
667 #endif
668 *d0 *= u2;
669 *d1 *= u2;
670 }
671
672 // Helper function to convert an 16-bit integer to a half between [0..1).
Uint16ToHalf(uint16 x)673 PHILOX_DEVICE_INLINE Eigen::half Uint16ToHalf(uint16 x) {
674 // IEEE754 halfs are formatted as follows (MSB first):
675 // sign(1) exponent(5) mantissa(10)
676 // Conceptually construct the following:
677 // sign == 0
678 // exponent == 15 -- an excess 15 representation of a zero exponent
679 // mantissa == 10 random bits
680 const uint16 man = x & 0x3ffu; // 10 bit mantissa
681 const uint16 exp = static_cast<uint16>(15);
682 const uint16 val = (exp << 10) | man;
683
684 Eigen::half result;
685 result.x = val;
686 return result - Eigen::half(1.0);
687 }
688
689 // Helper function to convert an 16-bit integer to a bfloat16 between [0..1).
690 // This can create a uniform distribution of values between [0..1).
Uint16ToGfloat16(uint16 x)691 PHILOX_DEVICE_INLINE bfloat16 Uint16ToGfloat16(uint16 x) {
692 // bfloat are formatted as follows (MSB first):
693 // sign(1) exponent(8) mantissa(7)
694 // Conceptually construct the following:
695 // sign == 0
696 // exponent == 127 -- an excess 127 representation of a zero exponent
697 // mantissa == 7 random bits
698 const uint16 man = x & 0x7fu; // 7 bit mantissa
699 const uint16 exp = static_cast<uint16>(127);
700 const uint16 val = (exp << 7) | man;
701
702 bfloat16 result;
703 memcpy(&result, &val, sizeof(val));
704 // The mantissa has an implicit leading 1, so the above code creates a value
705 // in [1, 2). The minus will not cause a rounding that makes the result 1.
706 // Instead it will just be close to 1.
707 return result - bfloat16(1.0);
708 }
709
710 // Helper function to convert an 32-bit integer to a float between [0..1).
Uint32ToFloat(uint32 x)711 PHILOX_DEVICE_INLINE float Uint32ToFloat(uint32 x) {
712 // IEEE754 floats are formatted as follows (MSB first):
713 // sign(1) exponent(8) mantissa(23)
714 // Conceptually construct the following:
715 // sign == 0
716 // exponent == 127 -- an excess 127 representation of a zero exponent
717 // mantissa == 23 random bits
718 const uint32 man = x & 0x7fffffu; // 23 bit mantissa
719 const uint32 exp = static_cast<uint32>(127);
720 const uint32 val = (exp << 23) | man;
721
722 // Assumes that endian-ness is same for float and uint32.
723 float result;
724 memcpy(&result, &val, sizeof(val));
725 return result - 1.0f;
726 }
727
728 // Helper function to convert two 32-bit integers to a double between [0..1).
Uint64ToDouble(uint32 x0,uint32 x1)729 PHILOX_DEVICE_INLINE double Uint64ToDouble(uint32 x0, uint32 x1) {
730 // IEEE754 doubles are formatted as follows (MSB first):
731 // sign(1) exponent(11) mantissa(52)
732 // Conceptually construct the following:
733 // sign == 0
734 // exponent == 1023 -- an excess 1023 representation of a zero exponent
735 // mantissa == 52 random bits
736 const uint32 mhi = x0 & 0xfffffu; // upper 20 bits of mantissa
737 const uint32 mlo = x1; // lower 32 bits of mantissa
738 const uint64 man = (static_cast<uint64>(mhi) << 32) | mlo; // mantissa
739 const uint64 exp = static_cast<uint64>(1023);
740 const uint64 val = (exp << 52) | man;
741 // Assumes that endian-ness is same for double and uint64.
742 double result;
743 memcpy(&result, &val, sizeof(val));
744 return result - 1.0;
745 }
746
747 } // namespace random
748 } // namespace tensorflow
749
750 #endif // TENSORFLOW_CORE_LIB_RANDOM_RANDOM_DISTRIBUTIONS_H_
751