1 // Copyright (c) 2006, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 //     * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 //     * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 //     * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 
30 #include <string.h>
31 
32 #include "common/convert_UTF.h"
33 #include "common/scoped_ptr.h"
34 #include "common/string_conversion.h"
35 #include "common/using_std_string.h"
36 
37 namespace google_breakpad {
38 
39 using std::vector;
40 
UTF8ToUTF16(const char * in,vector<uint16_t> * out)41 void UTF8ToUTF16(const char *in, vector<uint16_t> *out) {
42   size_t source_length = strlen(in);
43   const UTF8 *source_ptr = reinterpret_cast<const UTF8 *>(in);
44   const UTF8 *source_end_ptr = source_ptr + source_length;
45   // Erase the contents and zero fill to the expected size
46   out->clear();
47   out->insert(out->begin(), source_length, 0);
48   uint16_t *target_ptr = &(*out)[0];
49   uint16_t *target_end_ptr = target_ptr + out->capacity() * sizeof(uint16_t);
50   ConversionResult result = ConvertUTF8toUTF16(&source_ptr, source_end_ptr,
51                                                &target_ptr, target_end_ptr,
52                                                strictConversion);
53 
54   // Resize to be the size of the # of converted characters + NULL
55   out->resize(result == conversionOK ? target_ptr - &(*out)[0] + 1: 0);
56 }
57 
UTF8ToUTF16Char(const char * in,int in_length,uint16_t out[2])58 int UTF8ToUTF16Char(const char *in, int in_length, uint16_t out[2]) {
59   const UTF8 *source_ptr = reinterpret_cast<const UTF8 *>(in);
60   const UTF8 *source_end_ptr = source_ptr + sizeof(char);
61   uint16_t *target_ptr = out;
62   uint16_t *target_end_ptr = target_ptr + 2 * sizeof(uint16_t);
63   out[0] = out[1] = 0;
64 
65   // Process one character at a time
66   while (1) {
67     ConversionResult result = ConvertUTF8toUTF16(&source_ptr, source_end_ptr,
68                                                  &target_ptr, target_end_ptr,
69                                                  strictConversion);
70 
71     if (result == conversionOK)
72       return static_cast<int>(source_ptr - reinterpret_cast<const UTF8 *>(in));
73 
74     // Add another character to the input stream and try again
75     source_ptr = reinterpret_cast<const UTF8 *>(in);
76     ++source_end_ptr;
77 
78     if (source_end_ptr > reinterpret_cast<const UTF8 *>(in) + in_length)
79       break;
80   }
81 
82   return 0;
83 }
84 
UTF32ToUTF16(const wchar_t * in,vector<uint16_t> * out)85 void UTF32ToUTF16(const wchar_t *in, vector<uint16_t> *out) {
86   size_t source_length = wcslen(in);
87   const UTF32 *source_ptr = reinterpret_cast<const UTF32 *>(in);
88   const UTF32 *source_end_ptr = source_ptr + source_length;
89   // Erase the contents and zero fill to the expected size
90   out->clear();
91   out->insert(out->begin(), source_length, 0);
92   uint16_t *target_ptr = &(*out)[0];
93   uint16_t *target_end_ptr = target_ptr + out->capacity() * sizeof(uint16_t);
94   ConversionResult result = ConvertUTF32toUTF16(&source_ptr, source_end_ptr,
95                                                 &target_ptr, target_end_ptr,
96                                                 strictConversion);
97 
98   // Resize to be the size of the # of converted characters + NULL
99   out->resize(result == conversionOK ? target_ptr - &(*out)[0] + 1: 0);
100 }
101 
UTF32ToUTF16Char(wchar_t in,uint16_t out[2])102 void UTF32ToUTF16Char(wchar_t in, uint16_t out[2]) {
103   const UTF32 *source_ptr = reinterpret_cast<const UTF32 *>(&in);
104   const UTF32 *source_end_ptr = source_ptr + 1;
105   uint16_t *target_ptr = out;
106   uint16_t *target_end_ptr = target_ptr + 2 * sizeof(uint16_t);
107   out[0] = out[1] = 0;
108   ConversionResult result = ConvertUTF32toUTF16(&source_ptr, source_end_ptr,
109                                                 &target_ptr, target_end_ptr,
110                                                 strictConversion);
111 
112   if (result != conversionOK) {
113     out[0] = out[1] = 0;
114   }
115 }
116 
Swap(uint16_t value)117 static inline uint16_t Swap(uint16_t value) {
118   return (value >> 8) | static_cast<uint16_t>(value << 8);
119 }
120 
UTF16ToUTF8(const vector<uint16_t> & in,bool swap)121 string UTF16ToUTF8(const vector<uint16_t> &in, bool swap) {
122   const UTF16 *source_ptr = &in[0];
123   scoped_array<uint16_t> source_buffer;
124 
125   // If we're to swap, we need to make a local copy and swap each byte pair
126   if (swap) {
127     int idx = 0;
128     source_buffer.reset(new uint16_t[in.size()]);
129     UTF16 *source_buffer_ptr = source_buffer.get();
130     for (vector<uint16_t>::const_iterator it = in.begin();
131          it != in.end(); ++it, ++idx)
132       source_buffer_ptr[idx] = Swap(*it);
133 
134     source_ptr = source_buffer.get();
135   }
136 
137   // The maximum expansion would be 4x the size of the input string.
138   const UTF16 *source_end_ptr = source_ptr + in.size();
139   size_t target_capacity = in.size() * 4;
140   scoped_array<UTF8> target_buffer(new UTF8[target_capacity]);
141   UTF8 *target_ptr = target_buffer.get();
142   UTF8 *target_end_ptr = target_ptr + target_capacity;
143   ConversionResult result = ConvertUTF16toUTF8(&source_ptr, source_end_ptr,
144                                                &target_ptr, target_end_ptr,
145                                                strictConversion);
146 
147   if (result == conversionOK) {
148     const char *targetPtr = reinterpret_cast<const char *>(target_buffer.get());
149     return targetPtr;
150   }
151 
152   return "";
153 }
154 
155 }  // namespace google_breakpad
156