1 // UTFConvert.cpp
2
3 #include "StdAfx.h"
4
5 #include "MyTypes.h"
6 #include "UTFConvert.h"
7
8 static const Byte kUtf8Limits[5] = { 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
9
CheckUTF8(const char * src)10 bool CheckUTF8(const char *src) throw()
11 {
12 for (;;)
13 {
14 Byte c;
15 unsigned numAdds;
16 c = *src++;
17 if (c == 0)
18 return true;
19
20 if (c < 0x80)
21 continue;
22 if (c < 0xC0)
23 return false;
24 for (numAdds = 1; numAdds < 5; numAdds++)
25 if (c < kUtf8Limits[numAdds])
26 break;
27 UInt32 value = (c - kUtf8Limits[numAdds - 1]);
28
29 do
30 {
31 Byte c2 = *src++;
32 if (c2 < 0x80 || c2 >= 0xC0)
33 return false;
34 value <<= 6;
35 value |= (c2 - 0x80);
36 }
37 while (--numAdds);
38
39 if (value >= 0x110000)
40 return false;
41 }
42 }
43
44
Utf8_To_Utf16(wchar_t * dest,size_t * destLen,const char * src,size_t srcLen)45 static Bool Utf8_To_Utf16(wchar_t *dest, size_t *destLen, const char *src, size_t srcLen) throw()
46 {
47 size_t destPos = 0, srcPos = 0;
48 for (;;)
49 {
50 Byte c;
51 unsigned numAdds;
52 if (srcPos == srcLen)
53 {
54 *destLen = destPos;
55 return True;
56 }
57 c = (Byte)src[srcPos++];
58
59 if (c < 0x80)
60 {
61 if (dest)
62 dest[destPos] = (wchar_t)c;
63 destPos++;
64 continue;
65 }
66 if (c < 0xC0)
67 break;
68 for (numAdds = 1; numAdds < 5; numAdds++)
69 if (c < kUtf8Limits[numAdds])
70 break;
71 UInt32 value = (c - kUtf8Limits[numAdds - 1]);
72
73 do
74 {
75 Byte c2;
76 if (srcPos == srcLen)
77 break;
78 c2 = (Byte)src[srcPos++];
79 if (c2 < 0x80 || c2 >= 0xC0)
80 break;
81 value <<= 6;
82 value |= (c2 - 0x80);
83 }
84 while (--numAdds);
85
86 if (value < 0x10000)
87 {
88 if (dest)
89 dest[destPos] = (wchar_t)value;
90 destPos++;
91 }
92 else
93 {
94 value -= 0x10000;
95 if (value >= 0x100000)
96 break;
97 if (dest)
98 {
99 dest[destPos + 0] = (wchar_t)(0xD800 + (value >> 10));
100 dest[destPos + 1] = (wchar_t)(0xDC00 + (value & 0x3FF));
101 }
102 destPos += 2;
103 }
104 }
105 *destLen = destPos;
106 return False;
107 }
108
Utf16_To_Utf8(char * dest,size_t * destLen,const wchar_t * src,size_t srcLen)109 static Bool Utf16_To_Utf8(char *dest, size_t *destLen, const wchar_t *src, size_t srcLen)
110 {
111 size_t destPos = 0, srcPos = 0;
112 for (;;)
113 {
114 unsigned numAdds;
115 UInt32 value;
116 if (srcPos == srcLen)
117 {
118 *destLen = destPos;
119 return True;
120 }
121 value = src[srcPos++];
122 if (value < 0x80)
123 {
124 if (dest)
125 dest[destPos] = (char)value;
126 destPos++;
127 continue;
128 }
129 if (value >= 0xD800 && value < 0xE000)
130 {
131 UInt32 c2;
132 if (value >= 0xDC00 || srcPos == srcLen)
133 break;
134 c2 = src[srcPos++];
135 if (c2 < 0xDC00 || c2 >= 0xE000)
136 break;
137 value = (((value - 0xD800) << 10) | (c2 - 0xDC00)) + 0x10000;
138 }
139 for (numAdds = 1; numAdds < 5; numAdds++)
140 if (value < (((UInt32)1) << (numAdds * 5 + 6)))
141 break;
142 if (dest)
143 dest[destPos] = (char)(kUtf8Limits[numAdds - 1] + (value >> (6 * numAdds)));
144 destPos++;
145 do
146 {
147 numAdds--;
148 if (dest)
149 dest[destPos] = (char)(0x80 + ((value >> (6 * numAdds)) & 0x3F));
150 destPos++;
151 }
152 while (numAdds != 0);
153 }
154 *destLen = destPos;
155 return False;
156 }
157
ConvertUTF8ToUnicode(const AString & src,UString & dest)158 bool ConvertUTF8ToUnicode(const AString &src, UString &dest)
159 {
160 dest.Empty();
161 size_t destLen = 0;
162 Utf8_To_Utf16(NULL, &destLen, src, src.Len());
163 Bool res = Utf8_To_Utf16(dest.GetBuffer((unsigned)destLen), &destLen, src, src.Len());
164 dest.ReleaseBuffer((unsigned)destLen);
165 return res ? true : false;
166 }
167
ConvertUnicodeToUTF8(const UString & src,AString & dest)168 bool ConvertUnicodeToUTF8(const UString &src, AString &dest)
169 {
170 dest.Empty();
171 size_t destLen = 0;
172 Utf16_To_Utf8(NULL, &destLen, src, src.Len());
173 Bool res = Utf16_To_Utf8(dest.GetBuffer((unsigned)destLen), &destLen, src, src.Len());
174 dest.ReleaseBuffer((unsigned)destLen);
175 return res ? true : false;
176 }
177