1 /* Copyright 2013 Google Inc. All Rights Reserved.
2 
3    Distributed under MIT license.
4    See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 */
6 
7 #include "./transform.h"
8 
9 #if defined(__cplusplus) || defined(c_plusplus)
10 extern "C" {
11 #endif
12 
13 /* RFC 7932 transforms string data */
14 static const char kPrefixSuffix[217] =
15       "\1 \2, \10 of the \4 of \2s \1.\5 and \4 "
16 /* 0x  _0 _2  __5        _E    _3  _6 _8     _E */
17       "in \1\"\4 to \2\">\1\n\2. \1]\5 for \3 a \6 "
18 /* 2x     _3_ _5    _A_  _D_ _F  _2 _4     _A   _E */
19       "that \1\'\6 with \6 from \4 by \1(\6. T"
20 /* 4x       _5_ _7      _E      _5    _A _C */
21       "he \4 on \4 as \4 is \4ing \2\n\t\1:\3ed "
22 /* 6x     _3    _8    _D    _2    _7_ _ _A _C */
23       "\2=\"\4 at \3ly \1,\2=\'\5.com/\7. This \5"
24 /* 8x  _0 _ _3    _8   _C _E _ _1     _7       _F */
25       " not \3er \3al \4ful \4ive \5less \4es"
26 /* Ax       _5   _9   _D    _2    _7     _D */
27       "t \4ize \2\xc2\xa0\4ous \5 the \2e \0";
28 /* Cx    _2    _7___ ___ _A    _F     _5  _8 */
29 
30 static const uint16_t kPrefixSuffixMap[50] = {
31   0x00, 0x02, 0x05, 0x0E, 0x13, 0x16, 0x18, 0x1E, 0x23, 0x25,
32   0x2A, 0x2D, 0x2F, 0x32, 0x34, 0x3A, 0x3E, 0x45, 0x47, 0x4E,
33   0x55, 0x5A, 0x5C, 0x63, 0x68, 0x6D, 0x72, 0x77, 0x7A, 0x7C,
34   0x80, 0x83, 0x88, 0x8C, 0x8E, 0x91, 0x97, 0x9F, 0xA5, 0xA9,
35   0xAD, 0xB2, 0xB7, 0xBD, 0xC2, 0xC7, 0xCA, 0xCF, 0xD5, 0xD8
36 };
37 
38 /* RFC 7932 transforms */
39 static const uint8_t kTransformsData[] = {
40   49, BROTLI_TRANSFORM_IDENTITY, 49,
41   49, BROTLI_TRANSFORM_IDENTITY, 0,
42    0, BROTLI_TRANSFORM_IDENTITY, 0,
43   49, BROTLI_TRANSFORM_OMIT_FIRST_1, 49,
44   49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 0,
45   49, BROTLI_TRANSFORM_IDENTITY, 47,
46    0, BROTLI_TRANSFORM_IDENTITY, 49,
47    4, BROTLI_TRANSFORM_IDENTITY, 0,
48   49, BROTLI_TRANSFORM_IDENTITY, 3,
49   49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 49,
50   49, BROTLI_TRANSFORM_IDENTITY, 6,
51   49, BROTLI_TRANSFORM_OMIT_FIRST_2, 49,
52   49, BROTLI_TRANSFORM_OMIT_LAST_1, 49,
53    1, BROTLI_TRANSFORM_IDENTITY, 0,
54   49, BROTLI_TRANSFORM_IDENTITY, 1,
55    0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 0,
56   49, BROTLI_TRANSFORM_IDENTITY, 7,
57   49, BROTLI_TRANSFORM_IDENTITY, 9,
58   48, BROTLI_TRANSFORM_IDENTITY, 0,
59   49, BROTLI_TRANSFORM_IDENTITY, 8,
60   49, BROTLI_TRANSFORM_IDENTITY, 5,
61   49, BROTLI_TRANSFORM_IDENTITY, 10,
62   49, BROTLI_TRANSFORM_IDENTITY, 11,
63   49, BROTLI_TRANSFORM_OMIT_LAST_3, 49,
64   49, BROTLI_TRANSFORM_IDENTITY, 13,
65   49, BROTLI_TRANSFORM_IDENTITY, 14,
66   49, BROTLI_TRANSFORM_OMIT_FIRST_3, 49,
67   49, BROTLI_TRANSFORM_OMIT_LAST_2, 49,
68   49, BROTLI_TRANSFORM_IDENTITY, 15,
69   49, BROTLI_TRANSFORM_IDENTITY, 16,
70    0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 49,
71   49, BROTLI_TRANSFORM_IDENTITY, 12,
72    5, BROTLI_TRANSFORM_IDENTITY, 49,
73    0, BROTLI_TRANSFORM_IDENTITY, 1,
74   49, BROTLI_TRANSFORM_OMIT_FIRST_4, 49,
75   49, BROTLI_TRANSFORM_IDENTITY, 18,
76   49, BROTLI_TRANSFORM_IDENTITY, 17,
77   49, BROTLI_TRANSFORM_IDENTITY, 19,
78   49, BROTLI_TRANSFORM_IDENTITY, 20,
79   49, BROTLI_TRANSFORM_OMIT_FIRST_5, 49,
80   49, BROTLI_TRANSFORM_OMIT_FIRST_6, 49,
81   47, BROTLI_TRANSFORM_IDENTITY, 49,
82   49, BROTLI_TRANSFORM_OMIT_LAST_4, 49,
83   49, BROTLI_TRANSFORM_IDENTITY, 22,
84   49, BROTLI_TRANSFORM_UPPERCASE_ALL, 49,
85   49, BROTLI_TRANSFORM_IDENTITY, 23,
86   49, BROTLI_TRANSFORM_IDENTITY, 24,
87   49, BROTLI_TRANSFORM_IDENTITY, 25,
88   49, BROTLI_TRANSFORM_OMIT_LAST_7, 49,
89   49, BROTLI_TRANSFORM_OMIT_LAST_1, 26,
90   49, BROTLI_TRANSFORM_IDENTITY, 27,
91   49, BROTLI_TRANSFORM_IDENTITY, 28,
92    0, BROTLI_TRANSFORM_IDENTITY, 12,
93   49, BROTLI_TRANSFORM_IDENTITY, 29,
94   49, BROTLI_TRANSFORM_OMIT_FIRST_9, 49,
95   49, BROTLI_TRANSFORM_OMIT_FIRST_7, 49,
96   49, BROTLI_TRANSFORM_OMIT_LAST_6, 49,
97   49, BROTLI_TRANSFORM_IDENTITY, 21,
98   49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 1,
99   49, BROTLI_TRANSFORM_OMIT_LAST_8, 49,
100   49, BROTLI_TRANSFORM_IDENTITY, 31,
101   49, BROTLI_TRANSFORM_IDENTITY, 32,
102   47, BROTLI_TRANSFORM_IDENTITY, 3,
103   49, BROTLI_TRANSFORM_OMIT_LAST_5, 49,
104   49, BROTLI_TRANSFORM_OMIT_LAST_9, 49,
105    0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 1,
106   49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 8,
107    5, BROTLI_TRANSFORM_IDENTITY, 21,
108   49, BROTLI_TRANSFORM_UPPERCASE_ALL, 0,
109   49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 10,
110   49, BROTLI_TRANSFORM_IDENTITY, 30,
111    0, BROTLI_TRANSFORM_IDENTITY, 5,
112   35, BROTLI_TRANSFORM_IDENTITY, 49,
113   47, BROTLI_TRANSFORM_IDENTITY, 2,
114   49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 17,
115   49, BROTLI_TRANSFORM_IDENTITY, 36,
116   49, BROTLI_TRANSFORM_IDENTITY, 33,
117    5, BROTLI_TRANSFORM_IDENTITY, 0,
118   49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 21,
119   49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 5,
120   49, BROTLI_TRANSFORM_IDENTITY, 37,
121    0, BROTLI_TRANSFORM_IDENTITY, 30,
122   49, BROTLI_TRANSFORM_IDENTITY, 38,
123    0, BROTLI_TRANSFORM_UPPERCASE_ALL, 0,
124   49, BROTLI_TRANSFORM_IDENTITY, 39,
125    0, BROTLI_TRANSFORM_UPPERCASE_ALL, 49,
126   49, BROTLI_TRANSFORM_IDENTITY, 34,
127   49, BROTLI_TRANSFORM_UPPERCASE_ALL, 8,
128   49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 12,
129    0, BROTLI_TRANSFORM_IDENTITY, 21,
130   49, BROTLI_TRANSFORM_IDENTITY, 40,
131    0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 12,
132   49, BROTLI_TRANSFORM_IDENTITY, 41,
133   49, BROTLI_TRANSFORM_IDENTITY, 42,
134   49, BROTLI_TRANSFORM_UPPERCASE_ALL, 17,
135   49, BROTLI_TRANSFORM_IDENTITY, 43,
136    0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 5,
137   49, BROTLI_TRANSFORM_UPPERCASE_ALL, 10,
138    0, BROTLI_TRANSFORM_IDENTITY, 34,
139   49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 33,
140   49, BROTLI_TRANSFORM_IDENTITY, 44,
141   49, BROTLI_TRANSFORM_UPPERCASE_ALL, 5,
142   45, BROTLI_TRANSFORM_IDENTITY, 49,
143    0, BROTLI_TRANSFORM_IDENTITY, 33,
144   49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 30,
145   49, BROTLI_TRANSFORM_UPPERCASE_ALL, 30,
146   49, BROTLI_TRANSFORM_IDENTITY, 46,
147   49, BROTLI_TRANSFORM_UPPERCASE_ALL, 1,
148   49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 34,
149    0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 33,
150    0, BROTLI_TRANSFORM_UPPERCASE_ALL, 30,
151    0, BROTLI_TRANSFORM_UPPERCASE_ALL, 1,
152   49, BROTLI_TRANSFORM_UPPERCASE_ALL, 33,
153   49, BROTLI_TRANSFORM_UPPERCASE_ALL, 21,
154   49, BROTLI_TRANSFORM_UPPERCASE_ALL, 12,
155    0, BROTLI_TRANSFORM_UPPERCASE_ALL, 5,
156   49, BROTLI_TRANSFORM_UPPERCASE_ALL, 34,
157    0, BROTLI_TRANSFORM_UPPERCASE_ALL, 12,
158    0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 30,
159    0, BROTLI_TRANSFORM_UPPERCASE_ALL, 34,
160    0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 34,
161 };
162 
163 static BrotliTransforms kBrotliTransforms = {
164   sizeof(kPrefixSuffix),
165   (const uint8_t*)kPrefixSuffix,
166   kPrefixSuffixMap,
167   sizeof(kTransformsData) / (3 * sizeof(kTransformsData[0])),
168   kTransformsData,
169   {0, 12, 27, 23, 42, 63, 56, 48, 59, 64}
170 };
171 
BrotliGetTransforms(void)172 const BrotliTransforms* BrotliGetTransforms(void) {
173   return &kBrotliTransforms;
174 }
175 
ToUpperCase(uint8_t * p)176 static int ToUpperCase(uint8_t* p) {
177   if (p[0] < 0xC0) {
178     if (p[0] >= 'a' && p[0] <= 'z') {
179       p[0] ^= 32;
180     }
181     return 1;
182   }
183   /* An overly simplified uppercasing model for UTF-8. */
184   if (p[0] < 0xE0) {
185     p[1] ^= 32;
186     return 2;
187   }
188   /* An arbitrary transform for three byte characters. */
189   p[2] ^= 5;
190   return 3;
191 }
192 
BrotliTransformDictionaryWord(uint8_t * dst,const uint8_t * word,int len,const BrotliTransforms * transforms,int transform_idx)193 int BrotliTransformDictionaryWord(uint8_t* dst, const uint8_t* word, int len,
194     const BrotliTransforms* transforms, int transform_idx) {
195   int idx = 0;
196   const uint8_t* prefix = BROTLI_TRANSFORM_PREFIX(transforms, transform_idx);
197   uint8_t type = BROTLI_TRANSFORM_TYPE(transforms, transform_idx);
198   const uint8_t* suffix = BROTLI_TRANSFORM_SUFFIX(transforms, transform_idx);
199   {
200     int prefix_len = *prefix++;
201     while (prefix_len--) { dst[idx++] = *prefix++; }
202   }
203   {
204     const int t = type;
205     int i = 0;
206     if (t <= BROTLI_TRANSFORM_OMIT_LAST_9) {
207       len -= t;
208     } else if (t >= BROTLI_TRANSFORM_OMIT_FIRST_1
209         && t <= BROTLI_TRANSFORM_OMIT_FIRST_9) {
210       int skip = t - (BROTLI_TRANSFORM_OMIT_FIRST_1 - 1);
211       word += skip;
212       len -= skip;
213     }
214     while (i < len) { dst[idx++] = word[i++]; }
215     if (t == BROTLI_TRANSFORM_UPPERCASE_FIRST) {
216       ToUpperCase(&dst[idx - len]);
217     } else if (t == BROTLI_TRANSFORM_UPPERCASE_ALL) {
218       uint8_t* uppercase = &dst[idx - len];
219       while (len > 0) {
220         int step = ToUpperCase(uppercase);
221         uppercase += step;
222         len -= step;
223       }
224     }
225   }
226   {
227     int suffix_len = *suffix++;
228     while (suffix_len--) { dst[idx++] = *suffix++; }
229     return idx;
230   }
231 }
232 
233 #if defined(__cplusplus) || defined(c_plusplus)
234 }  /* extern "C" */
235 #endif
236