1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "base/json/string_escape.h"
6 
7 #include <stddef.h>
8 #include <stdint.h>
9 
10 #include <limits>
11 #include <string>
12 
13 #include "base/strings/string_util.h"
14 #include "base/strings/stringprintf.h"
15 #include "base/strings/utf_string_conversion_utils.h"
16 #include "base/strings/utf_string_conversions.h"
17 #include "base/third_party/icu/icu_utf.h"
18 
19 namespace base {
20 
21 namespace {
22 
23 // Format string for printing a \uXXXX escape sequence.
24 const char kU16EscapeFormat[] = "\\u%04X";
25 
26 // The code point to output for an invalid input code unit.
27 const uint32_t kReplacementCodePoint = 0xFFFD;
28 
29 // Used below in EscapeSpecialCodePoint().
30 static_assert('<' == 0x3C, "less than sign must be 0x3c");
31 
32 // Try to escape the |code_point| if it is a known special character. If
33 // successful, returns true and appends the escape sequence to |dest|. This
34 // isn't required by the spec, but it's more readable by humans.
EscapeSpecialCodePoint(uint32_t code_point,std::string * dest)35 bool EscapeSpecialCodePoint(uint32_t code_point, std::string* dest) {
36   // WARNING: if you add a new case here, you need to update the reader as well.
37   // Note: \v is in the reader, but not here since the JSON spec doesn't
38   // allow it.
39   switch (code_point) {
40     case '\b':
41       dest->append("\\b");
42       break;
43     case '\f':
44       dest->append("\\f");
45       break;
46     case '\n':
47       dest->append("\\n");
48       break;
49     case '\r':
50       dest->append("\\r");
51       break;
52     case '\t':
53       dest->append("\\t");
54       break;
55     case '\\':
56       dest->append("\\\\");
57       break;
58     case '"':
59       dest->append("\\\"");
60       break;
61     // Escape < to prevent script execution; escaping > is not necessary and
62     // not doing so save a few bytes.
63     case '<':
64       dest->append("\\u003C");
65       break;
66     // Escape the "Line Separator" and "Paragraph Separator" characters, since
67     // they should be treated like a new line \r or \n.
68     case 0x2028:
69       dest->append("\\u2028");
70       break;
71     case 0x2029:
72       dest->append("\\u2029");
73       break;
74     default:
75       return false;
76   }
77   return true;
78 }
79 
80 template <typename S>
EscapeJSONStringImpl(const S & str,bool put_in_quotes,std::string * dest)81 bool EscapeJSONStringImpl(const S& str, bool put_in_quotes, std::string* dest) {
82   bool did_replacement = false;
83 
84   if (put_in_quotes)
85     dest->push_back('"');
86 
87   // Casting is necessary because ICU uses int32_t. Try and do so safely.
88   CHECK_LE(str.length(),
89            static_cast<size_t>(std::numeric_limits<int32_t>::max()));
90   const int32_t length = static_cast<int32_t>(str.length());
91 
92   for (int32_t i = 0; i < length; ++i) {
93     uint32_t code_point;
94     if (!ReadUnicodeCharacter(str.data(), length, &i, &code_point) ||
95         code_point == static_cast<decltype(code_point)>(CBU_SENTINEL) ||
96         !IsValidCharacter(code_point)) {
97       code_point = kReplacementCodePoint;
98       did_replacement = true;
99     }
100 
101     if (EscapeSpecialCodePoint(code_point, dest))
102       continue;
103 
104     // Escape non-printing characters.
105     if (code_point < 32)
106       base::StringAppendF(dest, kU16EscapeFormat, code_point);
107     else
108       WriteUnicodeCharacter(code_point, dest);
109   }
110 
111   if (put_in_quotes)
112     dest->push_back('"');
113 
114   return !did_replacement;
115 }
116 
117 }  // namespace
118 
EscapeJSONString(StringPiece str,bool put_in_quotes,std::string * dest)119 bool EscapeJSONString(StringPiece str, bool put_in_quotes, std::string* dest) {
120   return EscapeJSONStringImpl(str, put_in_quotes, dest);
121 }
122 
EscapeJSONString(StringPiece16 str,bool put_in_quotes,std::string * dest)123 bool EscapeJSONString(StringPiece16 str,
124                       bool put_in_quotes,
125                       std::string* dest) {
126   return EscapeJSONStringImpl(str, put_in_quotes, dest);
127 }
128 
GetQuotedJSONString(StringPiece str)129 std::string GetQuotedJSONString(StringPiece str) {
130   std::string dest;
131   bool ok = EscapeJSONStringImpl(str, true, &dest);
132   DCHECK(ok);
133   return dest;
134 }
135 
GetQuotedJSONString(StringPiece16 str)136 std::string GetQuotedJSONString(StringPiece16 str) {
137   std::string dest;
138   bool ok = EscapeJSONStringImpl(str, true, &dest);
139   DCHECK(ok);
140   return dest;
141 }
142 
EscapeBytesAsInvalidJSONString(StringPiece str,bool put_in_quotes)143 std::string EscapeBytesAsInvalidJSONString(StringPiece str,
144                                            bool put_in_quotes) {
145   std::string dest;
146 
147   if (put_in_quotes)
148     dest.push_back('"');
149 
150   for (StringPiece::const_iterator it = str.begin(); it != str.end(); ++it) {
151     unsigned char c = *it;
152     if (EscapeSpecialCodePoint(c, &dest))
153       continue;
154 
155     if (c < 32 || c > 126)
156       base::StringAppendF(&dest, kU16EscapeFormat, c);
157     else
158       dest.push_back(*it);
159   }
160 
161   if (put_in_quotes)
162     dest.push_back('"');
163 
164   return dest;
165 }
166 
167 }  // namespace base
168