1 /* Copyright 2013 Google Inc. All Rights Reserved.
2 
3    Distributed under MIT license.
4    See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 */
6 
7 /* Block split point selection utilities. */
8 
9 #include "./block_splitter.h"
10 
11 #include <string.h>  /* memcpy, memset */
12 
13 #include "../common/platform.h"
14 #include "./bit_cost.h"
15 #include "./cluster.h"
16 #include "./command.h"
17 #include "./fast_log.h"
18 #include "./histogram.h"
19 #include "./memory.h"
20 #include "./quality.h"
21 
22 #if defined(__cplusplus) || defined(c_plusplus)
23 extern "C" {
24 #endif
25 
26 static const size_t kMaxLiteralHistograms = 100;
27 static const size_t kMaxCommandHistograms = 50;
28 static const double kLiteralBlockSwitchCost = 28.1;
29 static const double kCommandBlockSwitchCost = 13.5;
30 static const double kDistanceBlockSwitchCost = 14.6;
31 static const size_t kLiteralStrideLength = 70;
32 static const size_t kCommandStrideLength = 40;
33 static const size_t kSymbolsPerLiteralHistogram = 544;
34 static const size_t kSymbolsPerCommandHistogram = 530;
35 static const size_t kSymbolsPerDistanceHistogram = 544;
36 static const size_t kMinLengthForBlockSplitting = 128;
37 static const size_t kIterMulForRefining = 2;
38 static const size_t kMinItersForRefining = 100;
39 
CountLiterals(const Command * cmds,const size_t num_commands)40 static size_t CountLiterals(const Command* cmds, const size_t num_commands) {
41   /* Count how many we have. */
42   size_t total_length = 0;
43   size_t i;
44   for (i = 0; i < num_commands; ++i) {
45     total_length += cmds[i].insert_len_;
46   }
47   return total_length;
48 }
49 
CopyLiteralsToByteArray(const Command * cmds,const size_t num_commands,const uint8_t * data,const size_t offset,const size_t mask,uint8_t * literals)50 static void CopyLiteralsToByteArray(const Command* cmds,
51                                     const size_t num_commands,
52                                     const uint8_t* data,
53                                     const size_t offset,
54                                     const size_t mask,
55                                     uint8_t* literals) {
56   size_t pos = 0;
57   size_t from_pos = offset & mask;
58   size_t i;
59   for (i = 0; i < num_commands; ++i) {
60     size_t insert_len = cmds[i].insert_len_;
61     if (from_pos + insert_len > mask) {
62       size_t head_size = mask + 1 - from_pos;
63       memcpy(literals + pos, data + from_pos, head_size);
64       from_pos = 0;
65       pos += head_size;
66       insert_len -= head_size;
67     }
68     if (insert_len > 0) {
69       memcpy(literals + pos, data + from_pos, insert_len);
70       pos += insert_len;
71     }
72     from_pos = (from_pos + insert_len + CommandCopyLen(&cmds[i])) & mask;
73   }
74 }
75 
MyRand(uint32_t * seed)76 static BROTLI_INLINE uint32_t MyRand(uint32_t* seed) {
77   /* Initial seed should be 7. In this case, loop length is (1 << 29). */
78   *seed *= 16807U;
79   return *seed;
80 }
81 
BitCost(size_t count)82 static BROTLI_INLINE double BitCost(size_t count) {
83   return count == 0 ? -2.0 : FastLog2(count);
84 }
85 
86 #define HISTOGRAMS_PER_BATCH 64
87 #define CLUSTERS_PER_BATCH 16
88 
89 #define FN(X) X ## Literal
90 #define DataType uint8_t
91 /* NOLINTNEXTLINE(build/include) */
92 #include "./block_splitter_inc.h"
93 #undef DataType
94 #undef FN
95 
96 #define FN(X) X ## Command
97 #define DataType uint16_t
98 /* NOLINTNEXTLINE(build/include) */
99 #include "./block_splitter_inc.h"
100 #undef FN
101 
102 #define FN(X) X ## Distance
103 /* NOLINTNEXTLINE(build/include) */
104 #include "./block_splitter_inc.h"
105 #undef DataType
106 #undef FN
107 
BrotliInitBlockSplit(BlockSplit * self)108 void BrotliInitBlockSplit(BlockSplit* self) {
109   self->num_types = 0;
110   self->num_blocks = 0;
111   self->types = 0;
112   self->lengths = 0;
113   self->types_alloc_size = 0;
114   self->lengths_alloc_size = 0;
115 }
116 
BrotliDestroyBlockSplit(MemoryManager * m,BlockSplit * self)117 void BrotliDestroyBlockSplit(MemoryManager* m, BlockSplit* self) {
118   BROTLI_FREE(m, self->types);
119   BROTLI_FREE(m, self->lengths);
120 }
121 
BrotliSplitBlock(MemoryManager * m,const Command * cmds,const size_t num_commands,const uint8_t * data,const size_t pos,const size_t mask,const BrotliEncoderParams * params,BlockSplit * literal_split,BlockSplit * insert_and_copy_split,BlockSplit * dist_split)122 void BrotliSplitBlock(MemoryManager* m,
123                       const Command* cmds,
124                       const size_t num_commands,
125                       const uint8_t* data,
126                       const size_t pos,
127                       const size_t mask,
128                       const BrotliEncoderParams* params,
129                       BlockSplit* literal_split,
130                       BlockSplit* insert_and_copy_split,
131                       BlockSplit* dist_split) {
132   {
133     size_t literals_count = CountLiterals(cmds, num_commands);
134     uint8_t* literals = BROTLI_ALLOC(m, uint8_t, literals_count);
135     if (BROTLI_IS_OOM(m)) return;
136     /* Create a continuous array of literals. */
137     CopyLiteralsToByteArray(cmds, num_commands, data, pos, mask, literals);
138     /* Create the block split on the array of literals.
139        Literal histograms have alphabet size 256. */
140     SplitByteVectorLiteral(
141         m, literals, literals_count,
142         kSymbolsPerLiteralHistogram, kMaxLiteralHistograms,
143         kLiteralStrideLength, kLiteralBlockSwitchCost, params,
144         literal_split);
145     if (BROTLI_IS_OOM(m)) return;
146     BROTLI_FREE(m, literals);
147   }
148 
149   {
150     /* Compute prefix codes for commands. */
151     uint16_t* insert_and_copy_codes = BROTLI_ALLOC(m, uint16_t, num_commands);
152     size_t i;
153     if (BROTLI_IS_OOM(m)) return;
154     for (i = 0; i < num_commands; ++i) {
155       insert_and_copy_codes[i] = cmds[i].cmd_prefix_;
156     }
157     /* Create the block split on the array of command prefixes. */
158     SplitByteVectorCommand(
159         m, insert_and_copy_codes, num_commands,
160         kSymbolsPerCommandHistogram, kMaxCommandHistograms,
161         kCommandStrideLength, kCommandBlockSwitchCost, params,
162         insert_and_copy_split);
163     if (BROTLI_IS_OOM(m)) return;
164     /* TODO: reuse for distances? */
165     BROTLI_FREE(m, insert_and_copy_codes);
166   }
167 
168   {
169     /* Create a continuous array of distance prefixes. */
170     uint16_t* distance_prefixes = BROTLI_ALLOC(m, uint16_t, num_commands);
171     size_t j = 0;
172     size_t i;
173     if (BROTLI_IS_OOM(m)) return;
174     for (i = 0; i < num_commands; ++i) {
175       const Command* cmd = &cmds[i];
176       if (CommandCopyLen(cmd) && cmd->cmd_prefix_ >= 128) {
177         distance_prefixes[j++] = cmd->dist_prefix_ & 0x3FF;
178       }
179     }
180     /* Create the block split on the array of distance prefixes. */
181     SplitByteVectorDistance(
182         m, distance_prefixes, j,
183         kSymbolsPerDistanceHistogram, kMaxCommandHistograms,
184         kCommandStrideLength, kDistanceBlockSwitchCost, params,
185         dist_split);
186     if (BROTLI_IS_OOM(m)) return;
187     BROTLI_FREE(m, distance_prefixes);
188   }
189 }
190 
191 
192 #if defined(__cplusplus) || defined(c_plusplus)
193 }  /* extern "C" */
194 #endif
195