1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/strings/sys_string_conversions.h"
6
7 #include <stddef.h>
8 #include <wchar.h>
9
10 #include "base/strings/string_piece.h"
11 #include "base/strings/utf_string_conversions.h"
12 #include "build/build_config.h"
13
14 namespace base {
15
SysWideToUTF8(const std::wstring & wide)16 std::string SysWideToUTF8(const std::wstring& wide) {
17 // In theory this should be using the system-provided conversion rather
18 // than our ICU, but this will do for now.
19 return WideToUTF8(wide);
20 }
SysUTF8ToWide(const StringPiece & utf8)21 std::wstring SysUTF8ToWide(const StringPiece& utf8) {
22 // In theory this should be using the system-provided conversion rather
23 // than our ICU, but this will do for now.
24 std::wstring out;
25 UTF8ToWide(utf8.data(), utf8.size(), &out);
26 return out;
27 }
28
29 #if defined(SYSTEM_NATIVE_UTF8) || defined(OS_ANDROID)
30 // TODO(port): Consider reverting the OS_ANDROID when we have wcrtomb()
31 // support and a better understanding of what calls these routines.
32
SysWideToNativeMB(const std::wstring & wide)33 std::string SysWideToNativeMB(const std::wstring& wide) {
34 return WideToUTF8(wide);
35 }
36
SysNativeMBToWide(const StringPiece & native_mb)37 std::wstring SysNativeMBToWide(const StringPiece& native_mb) {
38 return SysUTF8ToWide(native_mb);
39 }
40
41 #else
42
SysWideToNativeMB(const std::wstring & wide)43 std::string SysWideToNativeMB(const std::wstring& wide) {
44 mbstate_t ps;
45
46 // Calculate the number of multi-byte characters. We walk through the string
47 // without writing the output, counting the number of multi-byte characters.
48 size_t num_out_chars = 0;
49 memset(&ps, 0, sizeof(ps));
50 for (size_t i = 0; i < wide.size(); ++i) {
51 const wchar_t src = wide[i];
52 // Use a temp buffer since calling wcrtomb with an output of NULL does not
53 // calculate the output length.
54 char buf[16];
55 // Skip NULLs to avoid wcrtomb's special handling of them.
56 size_t res = src ? wcrtomb(buf, src, &ps) : 0;
57 switch (res) {
58 // Handle any errors and return an empty string.
59 case static_cast<size_t>(-1):
60 return std::string();
61 break;
62 case 0:
63 // We hit an embedded null byte, keep going.
64 ++num_out_chars;
65 break;
66 default:
67 num_out_chars += res;
68 break;
69 }
70 }
71
72 if (num_out_chars == 0)
73 return std::string();
74
75 std::string out;
76 out.resize(num_out_chars);
77
78 // We walk the input string again, with |i| tracking the index of the
79 // wide input, and |j| tracking the multi-byte output.
80 memset(&ps, 0, sizeof(ps));
81 for (size_t i = 0, j = 0; i < wide.size(); ++i) {
82 const wchar_t src = wide[i];
83 // We don't want wcrtomb to do its funkiness for embedded NULLs.
84 size_t res = src ? wcrtomb(&out[j], src, &ps) : 0;
85 switch (res) {
86 // Handle any errors and return an empty string.
87 case static_cast<size_t>(-1):
88 return std::string();
89 break;
90 case 0:
91 // We hit an embedded null byte, keep going.
92 ++j; // Output is already zeroed.
93 break;
94 default:
95 j += res;
96 break;
97 }
98 }
99
100 return out;
101 }
102
SysNativeMBToWide(const StringPiece & native_mb)103 std::wstring SysNativeMBToWide(const StringPiece& native_mb) {
104 mbstate_t ps;
105
106 // Calculate the number of wide characters. We walk through the string
107 // without writing the output, counting the number of wide characters.
108 size_t num_out_chars = 0;
109 memset(&ps, 0, sizeof(ps));
110 for (size_t i = 0; i < native_mb.size(); ) {
111 const char* src = native_mb.data() + i;
112 size_t res = mbrtowc(NULL, src, native_mb.size() - i, &ps);
113 switch (res) {
114 // Handle any errors and return an empty string.
115 case static_cast<size_t>(-2):
116 case static_cast<size_t>(-1):
117 return std::wstring();
118 break;
119 case 0:
120 // We hit an embedded null byte, keep going.
121 i += 1; // Fall through.
122 default:
123 i += res;
124 ++num_out_chars;
125 break;
126 }
127 }
128
129 if (num_out_chars == 0)
130 return std::wstring();
131
132 std::wstring out;
133 out.resize(num_out_chars);
134
135 memset(&ps, 0, sizeof(ps)); // Clear the shift state.
136 // We walk the input string again, with |i| tracking the index of the
137 // multi-byte input, and |j| tracking the wide output.
138 for (size_t i = 0, j = 0; i < native_mb.size(); ++j) {
139 const char* src = native_mb.data() + i;
140 wchar_t* dst = &out[j];
141 size_t res = mbrtowc(dst, src, native_mb.size() - i, &ps);
142 switch (res) {
143 // Handle any errors and return an empty string.
144 case static_cast<size_t>(-2):
145 case static_cast<size_t>(-1):
146 return std::wstring();
147 break;
148 case 0:
149 i += 1; // Skip null byte.
150 break;
151 default:
152 i += res;
153 break;
154 }
155 }
156
157 return out;
158 }
159
160 #endif // OS_CHROMEOS
161
162 } // namespace base
163