1 // UTFConvert.cpp
2
3 #include "StdAfx.h"
4
5 #include "UTFConvert.h"
6 #include "Types.h"
7
8 static const Byte kUtf8Limits[5] = { 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
9
Utf8_To_Utf16(wchar_t * dest,size_t * destLen,const char * src,size_t srcLen)10 static Bool Utf8_To_Utf16(wchar_t *dest, size_t *destLen, const char *src, size_t srcLen)
11 {
12 size_t destPos = 0, srcPos = 0;
13 for (;;)
14 {
15 Byte c;
16 int numAdds;
17 if (srcPos == srcLen)
18 {
19 *destLen = destPos;
20 return True;
21 }
22 c = (Byte)src[srcPos++];
23
24 if (c < 0x80)
25 {
26 if (dest)
27 dest[destPos] = (wchar_t)c;
28 destPos++;
29 continue;
30 }
31 if (c < 0xC0)
32 break;
33 for (numAdds = 1; numAdds < 5; numAdds++)
34 if (c < kUtf8Limits[numAdds])
35 break;
36 UInt32 value = (c - kUtf8Limits[numAdds - 1]);
37
38 do
39 {
40 Byte c2;
41 if (srcPos == srcLen)
42 break;
43 c2 = (Byte)src[srcPos++];
44 if (c2 < 0x80 || c2 >= 0xC0)
45 break;
46 value <<= 6;
47 value |= (c2 - 0x80);
48 }
49 while (--numAdds != 0);
50
51 if (value < 0x10000)
52 {
53 if (dest)
54 dest[destPos] = (wchar_t)value;
55 destPos++;
56 }
57 else
58 {
59 value -= 0x10000;
60 if (value >= 0x100000)
61 break;
62 if (dest)
63 {
64 dest[destPos + 0] = (wchar_t)(0xD800 + (value >> 10));
65 dest[destPos + 1] = (wchar_t)(0xDC00 + (value & 0x3FF));
66 }
67 destPos += 2;
68 }
69 }
70 *destLen = destPos;
71 return False;
72 }
73
Utf16_To_Utf8(char * dest,size_t * destLen,const wchar_t * src,size_t srcLen)74 static Bool Utf16_To_Utf8(char *dest, size_t *destLen, const wchar_t *src, size_t srcLen)
75 {
76 size_t destPos = 0, srcPos = 0;
77 for (;;)
78 {
79 unsigned numAdds;
80 UInt32 value;
81 if (srcPos == srcLen)
82 {
83 *destLen = destPos;
84 return True;
85 }
86 value = src[srcPos++];
87 if (value < 0x80)
88 {
89 if (dest)
90 dest[destPos] = (char)value;
91 destPos++;
92 continue;
93 }
94 if (value >= 0xD800 && value < 0xE000)
95 {
96 UInt32 c2;
97 if (value >= 0xDC00 || srcPos == srcLen)
98 break;
99 c2 = src[srcPos++];
100 if (c2 < 0xDC00 || c2 >= 0xE000)
101 break;
102 value = (((value - 0xD800) << 10) | (c2 - 0xDC00)) + 0x10000;
103 }
104 for (numAdds = 1; numAdds < 5; numAdds++)
105 if (value < (((UInt32)1) << (numAdds * 5 + 6)))
106 break;
107 if (dest)
108 dest[destPos] = (char)(kUtf8Limits[numAdds - 1] + (value >> (6 * numAdds)));
109 destPos++;
110 do
111 {
112 numAdds--;
113 if (dest)
114 dest[destPos] = (char)(0x80 + ((value >> (6 * numAdds)) & 0x3F));
115 destPos++;
116 }
117 while (numAdds != 0);
118 }
119 *destLen = destPos;
120 return False;
121 }
122
ConvertUTF8ToUnicode(const AString & src,UString & dest)123 bool ConvertUTF8ToUnicode(const AString &src, UString &dest)
124 {
125 dest.Empty();
126 size_t destLen = 0;
127 Utf8_To_Utf16(NULL, &destLen, src, src.Length());
128 wchar_t *p = dest.GetBuffer((int)destLen);
129 Bool res = Utf8_To_Utf16(p, &destLen, src, src.Length());
130 p[destLen] = 0;
131 dest.ReleaseBuffer();
132 return res ? true : false;
133 }
134
ConvertUnicodeToUTF8(const UString & src,AString & dest)135 bool ConvertUnicodeToUTF8(const UString &src, AString &dest)
136 {
137 dest.Empty();
138 size_t destLen = 0;
139 Utf16_To_Utf8(NULL, &destLen, src, src.Length());
140 char *p = dest.GetBuffer((int)destLen);
141 Bool res = Utf16_To_Utf8(p, &destLen, src, src.Length());
142 p[destLen] = 0;
143 dest.ReleaseBuffer();
144 return res ? true : false;
145 }
146