1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "base/strings/utf_string_conversions.h"
6 
7 #include <stdint.h>
8 
9 #include "base/strings/string_piece.h"
10 #include "base/strings/string_util.h"
11 #include "base/strings/utf_string_conversion_utils.h"
12 #include "build/build_config.h"
13 
14 namespace base {
15 
16 namespace {
17 
18 // Generalized Unicode converter -----------------------------------------------
19 
20 // Converts the given source Unicode character type to the given destination
21 // Unicode character type as a STL string. The given input buffer and size
22 // determine the source, and the given output STL string will be replaced by
23 // the result.
24 template<typename SRC_CHAR, typename DEST_STRING>
ConvertUnicode(const SRC_CHAR * src,size_t src_len,DEST_STRING * output)25 bool ConvertUnicode(const SRC_CHAR* src,
26                     size_t src_len,
27                     DEST_STRING* output) {
28   // ICU requires 32-bit numbers.
29   bool success = true;
30   int32_t src_len32 = static_cast<int32_t>(src_len);
31   for (int32_t i = 0; i < src_len32; i++) {
32     uint32_t code_point;
33     if (ReadUnicodeCharacter(src, src_len32, &i, &code_point)) {
34       WriteUnicodeCharacter(code_point, output);
35     } else {
36       WriteUnicodeCharacter(0xFFFD, output);
37       success = false;
38     }
39   }
40 
41   return success;
42 }
43 
44 }  // namespace
45 
46 // UTF-8 <-> Wide --------------------------------------------------------------
47 
WideToUTF8(const wchar_t * src,size_t src_len,std::string * output)48 bool WideToUTF8(const wchar_t* src, size_t src_len, std::string* output) {
49   if (IsStringASCII(std::wstring(src, src_len))) {
50     output->assign(src, src + src_len);
51     return true;
52   } else {
53     PrepareForUTF8Output(src, src_len, output);
54     return ConvertUnicode(src, src_len, output);
55   }
56 }
57 
WideToUTF8(const std::wstring & wide)58 std::string WideToUTF8(const std::wstring& wide) {
59   if (IsStringASCII(wide)) {
60     return std::string(wide.data(), wide.data() + wide.length());
61   }
62 
63   std::string ret;
64   PrepareForUTF8Output(wide.data(), wide.length(), &ret);
65   ConvertUnicode(wide.data(), wide.length(), &ret);
66   return ret;
67 }
68 
UTF8ToWide(const char * src,size_t src_len,std::wstring * output)69 bool UTF8ToWide(const char* src, size_t src_len, std::wstring* output) {
70   if (IsStringASCII(StringPiece(src, src_len))) {
71     output->assign(src, src + src_len);
72     return true;
73   } else {
74     PrepareForUTF16Or32Output(src, src_len, output);
75     return ConvertUnicode(src, src_len, output);
76   }
77 }
78 
UTF8ToWide(StringPiece utf8)79 std::wstring UTF8ToWide(StringPiece utf8) {
80   if (IsStringASCII(utf8)) {
81     return std::wstring(utf8.begin(), utf8.end());
82   }
83 
84   std::wstring ret;
85   PrepareForUTF16Or32Output(utf8.data(), utf8.length(), &ret);
86   ConvertUnicode(utf8.data(), utf8.length(), &ret);
87   return ret;
88 }
89 
90 // UTF-16 <-> Wide -------------------------------------------------------------
91 
92 #if defined(WCHAR_T_IS_UTF16)
93 
94 // When wide == UTF-16, then conversions are a NOP.
WideToUTF16(const wchar_t * src,size_t src_len,string16 * output)95 bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) {
96   output->assign(src, src_len);
97   return true;
98 }
99 
WideToUTF16(const std::wstring & wide)100 string16 WideToUTF16(const std::wstring& wide) {
101   return wide;
102 }
103 
UTF16ToWide(const char16 * src,size_t src_len,std::wstring * output)104 bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) {
105   output->assign(src, src_len);
106   return true;
107 }
108 
UTF16ToWide(const string16 & utf16)109 std::wstring UTF16ToWide(const string16& utf16) {
110   return utf16;
111 }
112 
113 #elif defined(WCHAR_T_IS_UTF32)
114 
WideToUTF16(const wchar_t * src,size_t src_len,string16 * output)115 bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) {
116   output->clear();
117   // Assume that normally we won't have any non-BMP characters so the counts
118   // will be the same.
119   output->reserve(src_len);
120   return ConvertUnicode(src, src_len, output);
121 }
122 
WideToUTF16(const std::wstring & wide)123 string16 WideToUTF16(const std::wstring& wide) {
124   string16 ret;
125   WideToUTF16(wide.data(), wide.length(), &ret);
126   return ret;
127 }
128 
UTF16ToWide(const char16 * src,size_t src_len,std::wstring * output)129 bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) {
130   output->clear();
131   // Assume that normally we won't have any non-BMP characters so the counts
132   // will be the same.
133   output->reserve(src_len);
134   return ConvertUnicode(src, src_len, output);
135 }
136 
UTF16ToWide(const string16 & utf16)137 std::wstring UTF16ToWide(const string16& utf16) {
138   std::wstring ret;
139   UTF16ToWide(utf16.data(), utf16.length(), &ret);
140   return ret;
141 }
142 
143 #endif  // defined(WCHAR_T_IS_UTF32)
144 
145 // UTF16 <-> UTF8 --------------------------------------------------------------
146 
147 #if defined(WCHAR_T_IS_UTF32)
148 
UTF8ToUTF16(const char * src,size_t src_len,string16 * output)149 bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) {
150   if (IsStringASCII(StringPiece(src, src_len))) {
151     output->assign(src, src + src_len);
152     return true;
153   } else {
154     PrepareForUTF16Or32Output(src, src_len, output);
155     return ConvertUnicode(src, src_len, output);
156   }
157 }
158 
UTF8ToUTF16(StringPiece utf8)159 string16 UTF8ToUTF16(StringPiece utf8) {
160   if (IsStringASCII(utf8)) {
161     return string16(utf8.begin(), utf8.end());
162   }
163 
164   string16 ret;
165   PrepareForUTF16Or32Output(utf8.data(), utf8.length(), &ret);
166   // Ignore the success flag of this call, it will do the best it can for
167   // invalid input, which is what we want here.
168   ConvertUnicode(utf8.data(), utf8.length(), &ret);
169   return ret;
170 }
171 
UTF16ToUTF8(const char16 * src,size_t src_len,std::string * output)172 bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) {
173   if (IsStringASCII(StringPiece16(src, src_len))) {
174     output->assign(src, src + src_len);
175     return true;
176   } else {
177     PrepareForUTF8Output(src, src_len, output);
178     return ConvertUnicode(src, src_len, output);
179   }
180 }
181 
UTF16ToUTF8(StringPiece16 utf16)182 std::string UTF16ToUTF8(StringPiece16 utf16) {
183   if (IsStringASCII(utf16)) {
184     return std::string(utf16.begin(), utf16.end());
185   }
186 
187   std::string ret;
188   // Ignore the success flag of this call, it will do the best it can for
189   // invalid input, which is what we want here.
190   UTF16ToUTF8(utf16.data(), utf16.length(), &ret);
191   return ret;
192 }
193 
194 #elif defined(WCHAR_T_IS_UTF16)
195 // Easy case since we can use the "wide" versions we already wrote above.
196 
UTF8ToUTF16(const char * src,size_t src_len,string16 * output)197 bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) {
198   return UTF8ToWide(src, src_len, output);
199 }
200 
UTF8ToUTF16(StringPiece utf8)201 string16 UTF8ToUTF16(StringPiece utf8) {
202   return UTF8ToWide(utf8);
203 }
204 
UTF16ToUTF8(const char16 * src,size_t src_len,std::string * output)205 bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) {
206   return WideToUTF8(src, src_len, output);
207 }
208 
UTF16ToUTF8(StringPiece16 utf16)209 std::string UTF16ToUTF8(StringPiece16 utf16) {
210   if (IsStringASCII(utf16))
211     return std::string(utf16.data(), utf16.data() + utf16.length());
212 
213   std::string ret;
214   PrepareForUTF8Output(utf16.data(), utf16.length(), &ret);
215   ConvertUnicode(utf16.data(), utf16.length(), &ret);
216   return ret;
217 }
218 
219 #endif
220 
ASCIIToUTF16(StringPiece ascii)221 string16 ASCIIToUTF16(StringPiece ascii) {
222   DCHECK(IsStringASCII(ascii)) << ascii;
223   return string16(ascii.begin(), ascii.end());
224 }
225 
UTF16ToASCII(StringPiece16 utf16)226 std::string UTF16ToASCII(StringPiece16 utf16) {
227   DCHECK(IsStringASCII(utf16)) << UTF16ToUTF8(utf16);
228   return std::string(utf16.begin(), utf16.end());
229 }
230 
231 }  // namespace base
232