1 // Copyright 2015 Google Inc. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 // bit_depth.h: defines the BitDepthSetting enum
16 
17 #ifndef GEMMLOWP_PUBLIC_BIT_DEPTH_H_
18 #define GEMMLOWP_PUBLIC_BIT_DEPTH_H_
19 
20 namespace gemmlowp {
21 
22 // A specific bit depth to requantize an operand (Lhs or Rhs) to.
23 // The case tBits==8 means no requantization, since at the moment
24 // we only accept 8-bit input data.
25 template <int tBits>
26 struct BitDepth {
27   static const int kBits = tBits;
28   static_assert(kBits >= 1 && kBits <= 8, "bad bit depth");
29 };
30 
31 // A rounding mode to use when requantizing an operand.
32 // The requantizing operation is:
33 //   dst = (src * maxval + rounding_offset) / 255;
34 // Where dst and src are uint8, maxval is 2^(dstbits)-1,
35 // and the intermediate values are computed as uint16s
36 // so no overflow occurs.
37 // The rounding_offset in the above formula is a value
38 // in [0..254] determined by the RoundingMode as follows:
39 enum class RoundingMode {
40   Exact,                  // No rounding, do nothing. Use with bit_depth == 8.
41   Nearest,                // rounding_offset = 127
42   ProbabilisticXorshift,  // rounding_offset given by 8-bit Xorshift PRNG
43   ProbabilisticAddmod     // rounding_offset given by 8-bit add/mod LDSG
44 };
45 
46 // A rounding strategy is a heuristic for choosing a rounding mode.
47 // When the bit depth is 8 bit like the source, there is no
48 // quantization to be done, so this is moot. In this case, we use
49 // the following "no-op" "strategy",
50 struct ExactRoundingStrategyFor8Bit {
51   static const RoundingMode kRoundingModeForSmallSizes = RoundingMode::Exact;
52   static const RoundingMode kRoundingModeForLargeSizes = RoundingMode::Exact;
53   static const int kRoundingModeSizeThreshold = 0;
54 };
55 
56 // Default rounding strategy when actually requantizing to less than 8 bit.
57 // Round-to-nearest tends to give the best results for small enough
58 // accumulation sizes (i.e. accumulation depth, but we refrain from using
59 // the word "depth" here as it gets confusing with "bit depth").
60 // Some flavor of probabilistic tends to perform better for larger sizes.
61 // See doc/less-than-8-bit.txt for details.
62 struct DefaultRoundingStrategyForLessThan8Bit {
63   static const RoundingMode kRoundingModeForSmallSizes = RoundingMode::Nearest;
64   static const RoundingMode kRoundingModeForLargeSizes =
65       RoundingMode::ProbabilisticAddmod;
66 
67   // The threshold on the depth dimension at which we switch to
68   // probabilistic rounding instead of rounding-to-nearest when
69   // requantizing input data. Indeed, both statistical theory and
70   // empirical measurements show that for given input data and bit depth,
71   // probabilistic rounding gives more accurate results for large enough
72   // depth, while rounding-to-nearest does for smaller depth. This threshold
73   // is naively determined from some experiments with Inception at 7bit/5bit
74   // on a set of 10,000 images with 8-bit Xorshift probabilistic rounding:
75   //
76   //   7 bit weights, 5 bit activations, switch at 64:   59.82% top-1 accuracy
77   //   7 bit weights, 5 bit activations, switch at 128:  59.58% top-1 accuracy
78   //   7 bit weights, 5 bit activations, switch at 192:  63.37% top-1 accuracy
79   //   7 bit weights, 5 bit activations, switch at 256:  63.47% top-1 accuracy
80   //   7 bit weights, 5 bit activations, switch at 320:  63.71% top-1 accuracy
81   //   7 bit weights, 5 bit activations, switch at 384:  63.71% top-1 accuracy
82   //   7 bit weights, 5 bit activations, switch at 448:  63.58% top-1 accuracy
83   //   7 bit weights, 5 bit activations, switch at 512:  64.10% top-1 accuracy
84   //   7 bit weights, 5 bit activations, switch at 640:  62.49% top-1 accuracy
85   //   7 bit weights, 5 bit activations, switch at 768:  62.49% top-1 accuracy
86   //   7 bit weights, 5 bit activations, switch at 1024: 58.96% top-1 accuracy
87   //
88   // So here, 384 looks comfortably in the middle of a plateau of good values,
89   // and it's a roundish number (3/2 * 256) so let's stick with that for now.
90   // It would be nice to work out the theory of this, and understand how this
91   // should depend on the distribution of inputs and the bit depth.
92   //
93   // Repeating the same evaluation with AddMod:
94   //   7 bit weights, 5 bit activations, switch at 64:   62.65% top-1 accuracy
95   //   7 bit weights, 5 bit activations, switch at 128:  62.65% top-1 accuracy
96   //   7 bit weights, 5 bit activations, switch at 192:  63.81% top-1 accuracy
97   //   7 bit weights, 5 bit activations, switch at 256:  64.23% top-1 accuracy
98   //   7 bit weights, 5 bit activations, switch at 320:  64.16% top-1 accuracy
99   //   7 bit weights, 5 bit activations, switch at 384:  64.16% top-1 accuracy
100   //   7 bit weights, 5 bit activations, switch at 448:  64.16% top-1 accuracy
101   //   7 bit weights, 5 bit activations, switch at 512:  64.52% top-1 accuracy
102   //   7 bit weights, 5 bit activations, switch at 640:  62.74% top-1 accuracy
103   //   7 bit weights, 5 bit activations, switch at 768:  62.74% top-1 accuracy
104   //   7 bit weights, 5 bit activations, switch at 1024: 59.74% top-1 accuracy
105   //
106   // The behavior is similar, so 384 remains a good choice.
107 
108   static const int kRoundingModeSizeThreshold = 384;
109 };
110 
111 struct DefaultL8R8BitDepthParams {
112   typedef BitDepth<8> LhsBitDepth;
113   typedef BitDepth<8> RhsBitDepth;
114   typedef ExactRoundingStrategyFor8Bit RoundingStrategy;
115 };
116 
117 struct DefaultL7R5BitDepthParams {
118   typedef BitDepth<7> LhsBitDepth;
119   typedef BitDepth<5> RhsBitDepth;
120   typedef DefaultRoundingStrategyForLessThan8Bit RoundingStrategy;
121 };
122 
123 }  // namespace gemmlowp
124 
125 #endif  // GEMMLOWP_PUBLIC_BIT_DEPTH_H_
126