1 // Copyright 2017 Google Inc. All rights reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "src/utf8_fix.h"
16 
17 #include <algorithm>
18 #include <cassert>
19 
20 namespace protobuf_mutator {
21 
22 namespace {
23 
StoreCode(char * e,char32_t code,uint8_t size,uint8_t prefix)24 void StoreCode(char* e, char32_t code, uint8_t size, uint8_t prefix) {
25   while (--size) {
26     *(--e) = 0x80 | (code & 0x3F);
27     code >>= 6;
28   }
29   *(--e) = prefix | code;
30 }
31 
FixCode(char * b,const char * e,RandomEngine * random)32 char* FixCode(char* b, const char* e, RandomEngine* random) {
33   const char* start = b;
34   assert(b < e);
35 
36   e = std::min<const char*>(e, b + 4);
37   char32_t c = *b++;
38   for (; b < e && (*b & 0xC0) == 0x80; ++b) {
39     c = (c << 6) + (*b & 0x3F);
40   }
41   uint8_t size = b - start;
42   switch (size) {
43     case 1:
44       c &= 0x7F;
45       StoreCode(b, c, size, 0);
46       break;
47     case 2:
48       c &= 0x7FF;
49       if (c < 0x80) {
50         // Use uint32_t because uniform_int_distribution does not support
51         // char32_t on Windows.
52         c = std::uniform_int_distribution<uint32_t>(0x80, 0x7FF)(*random);
53       }
54       StoreCode(b, c, size, 0xC0);
55       break;
56     case 3:
57       c &= 0xFFFF;
58 
59       // [0xD800, 0xE000) are reserved for UTF-16 surrogate halves.
60       if (c < 0x800 || (c >= 0xD800 && c < 0xE000)) {
61         uint32_t halves = 0xE000 - 0xD800;
62         c = std::uniform_int_distribution<uint32_t>(0x800,
63                                                     0xFFFF - halves)(*random);
64         if (c >= 0xD800) c += halves;
65       }
66       StoreCode(b, c, size, 0xE0);
67       break;
68     case 4:
69       c &= 0x1FFFFF;
70       if (c < 0x10000 || c > 0x10FFFF) {
71         c = std::uniform_int_distribution<uint32_t>(0x10000, 0x10FFFF)(*random);
72       }
73       StoreCode(b, c, size, 0xF0);
74       break;
75     default:
76       assert(false && "Unexpected size of UTF-8 sequence");
77   }
78   return b;
79 }
80 
81 }  // namespace
82 
FixUtf8String(std::string * str,RandomEngine * random)83 void FixUtf8String(std::string* str, RandomEngine* random) {
84   if (str->empty()) return;
85   char* b = &(*str)[0];
86   const char* e = b + str->size();
87   while (b < e) {
88     b = FixCode(b, e, random);
89   }
90 }
91 
92 }  // namespace protobuf_mutator
93