1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "base/strings/sys_string_conversions.h"
6 
7 #include <stddef.h>
8 #include <wchar.h>
9 
10 #include "base/strings/string_piece.h"
11 #include "base/strings/utf_string_conversions.h"
12 #include "build/build_config.h"
13 
14 namespace base {
15 
SysWideToUTF8(const std::wstring & wide)16 std::string SysWideToUTF8(const std::wstring& wide) {
17   // In theory this should be using the system-provided conversion rather
18   // than our ICU, but this will do for now.
19   return WideToUTF8(wide);
20 }
SysUTF8ToWide(const StringPiece & utf8)21 std::wstring SysUTF8ToWide(const StringPiece& utf8) {
22   // In theory this should be using the system-provided conversion rather
23   // than our ICU, but this will do for now.
24   std::wstring out;
25   UTF8ToWide(utf8.data(), utf8.size(), &out);
26   return out;
27 }
28 
29 #if defined(SYSTEM_NATIVE_UTF8) || defined(OS_ANDROID)
30 // TODO(port): Consider reverting the OS_ANDROID when we have wcrtomb()
31 // support and a better understanding of what calls these routines.
32 
SysWideToNativeMB(const std::wstring & wide)33 std::string SysWideToNativeMB(const std::wstring& wide) {
34   return WideToUTF8(wide);
35 }
36 
SysNativeMBToWide(const StringPiece & native_mb)37 std::wstring SysNativeMBToWide(const StringPiece& native_mb) {
38   return SysUTF8ToWide(native_mb);
39 }
40 
41 #else
42 
SysWideToNativeMB(const std::wstring & wide)43 std::string SysWideToNativeMB(const std::wstring& wide) {
44   mbstate_t ps;
45 
46   // Calculate the number of multi-byte characters.  We walk through the string
47   // without writing the output, counting the number of multi-byte characters.
48   size_t num_out_chars = 0;
49   memset(&ps, 0, sizeof(ps));
50   for (size_t i = 0; i < wide.size(); ++i) {
51     const wchar_t src = wide[i];
52     // Use a temp buffer since calling wcrtomb with an output of NULL does not
53     // calculate the output length.
54     char buf[16];
55     // Skip NULLs to avoid wcrtomb's special handling of them.
56     size_t res = src ? wcrtomb(buf, src, &ps) : 0;
57     switch (res) {
58       // Handle any errors and return an empty string.
59       case static_cast<size_t>(-1):
60         return std::string();
61         break;
62       case 0:
63         // We hit an embedded null byte, keep going.
64         ++num_out_chars;
65         break;
66       default:
67         num_out_chars += res;
68         break;
69     }
70   }
71 
72   if (num_out_chars == 0)
73     return std::string();
74 
75   std::string out;
76   out.resize(num_out_chars);
77 
78   // We walk the input string again, with |i| tracking the index of the
79   // wide input, and |j| tracking the multi-byte output.
80   memset(&ps, 0, sizeof(ps));
81   for (size_t i = 0, j = 0; i < wide.size(); ++i) {
82     const wchar_t src = wide[i];
83     // We don't want wcrtomb to do its funkiness for embedded NULLs.
84     size_t res = src ? wcrtomb(&out[j], src, &ps) : 0;
85     switch (res) {
86       // Handle any errors and return an empty string.
87       case static_cast<size_t>(-1):
88         return std::string();
89         break;
90       case 0:
91         // We hit an embedded null byte, keep going.
92         ++j;  // Output is already zeroed.
93         break;
94       default:
95         j += res;
96         break;
97     }
98   }
99 
100   return out;
101 }
102 
SysNativeMBToWide(const StringPiece & native_mb)103 std::wstring SysNativeMBToWide(const StringPiece& native_mb) {
104   mbstate_t ps;
105 
106   // Calculate the number of wide characters.  We walk through the string
107   // without writing the output, counting the number of wide characters.
108   size_t num_out_chars = 0;
109   memset(&ps, 0, sizeof(ps));
110   for (size_t i = 0; i < native_mb.size(); ) {
111     const char* src = native_mb.data() + i;
112     size_t res = mbrtowc(NULL, src, native_mb.size() - i, &ps);
113     switch (res) {
114       // Handle any errors and return an empty string.
115       case static_cast<size_t>(-2):
116       case static_cast<size_t>(-1):
117         return std::wstring();
118         break;
119       case 0:
120         // We hit an embedded null byte, keep going.
121         i += 1;  // Fall through.
122       default:
123         i += res;
124         ++num_out_chars;
125         break;
126     }
127   }
128 
129   if (num_out_chars == 0)
130     return std::wstring();
131 
132   std::wstring out;
133   out.resize(num_out_chars);
134 
135   memset(&ps, 0, sizeof(ps));  // Clear the shift state.
136   // We walk the input string again, with |i| tracking the index of the
137   // multi-byte input, and |j| tracking the wide output.
138   for (size_t i = 0, j = 0; i < native_mb.size(); ++j) {
139     const char* src = native_mb.data() + i;
140     wchar_t* dst = &out[j];
141     size_t res = mbrtowc(dst, src, native_mb.size() - i, &ps);
142     switch (res) {
143       // Handle any errors and return an empty string.
144       case static_cast<size_t>(-2):
145       case static_cast<size_t>(-1):
146         return std::wstring();
147         break;
148       case 0:
149         i += 1;  // Skip null byte.
150         break;
151       default:
152         i += res;
153         break;
154     }
155   }
156 
157   return out;
158 }
159 
160 #endif  // OS_CHROMEOS
161 
162 }  // namespace base
163