1 /** @file
2   Implementation of translation upon VT-UTF8.
3 
4 Copyright (c) 2006 - 2010, Intel Corporation. All rights reserved.<BR>
5 This program and the accompanying materials
6 are licensed and made available under the terms and conditions of the BSD License
7 which accompanies this distribution.  The full text of the license may be found at
8 http://opensource.org/licenses/bsd-license.php
9 
10 THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
11 WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
12 
13 **/
14 
15 #include "Terminal.h"
16 
17 /**
18   Translate all VT-UTF8 characters in the Raw FIFI into unicode characters,
19   and insert them into Unicode FIFO.
20 
21   @param TerminalDevice          The terminal device.
22 
23 **/
24 VOID
VTUTF8RawDataToUnicode(IN TERMINAL_DEV * TerminalDevice)25 VTUTF8RawDataToUnicode (
26   IN  TERMINAL_DEV    *TerminalDevice
27   )
28 {
29   UTF8_CHAR Utf8Char;
30   UINT8     ValidBytes;
31   UINT16    UnicodeChar;
32 
33   ValidBytes = 0;
34   //
35   // pop the raw data out from the raw fifo,
36   // and translate it into unicode, then push
37   // the unicode into unicode fifo, until the raw fifo is empty.
38   //
39   while (!IsRawFiFoEmpty (TerminalDevice) && !IsUnicodeFiFoFull (TerminalDevice)) {
40 
41     GetOneValidUtf8Char (TerminalDevice, &Utf8Char, &ValidBytes);
42 
43     if (ValidBytes < 1 || ValidBytes > 3) {
44       continue;
45     }
46 
47     Utf8ToUnicode (Utf8Char, ValidBytes, (CHAR16 *) &UnicodeChar);
48 
49     UnicodeFiFoInsertOneKey (TerminalDevice, UnicodeChar);
50   }
51 }
52 
53 /**
54   Get one valid VT-UTF8 characters set from Raw Data FIFO.
55 
56   @param  Utf8Device          The terminal device.
57   @param  Utf8Char            Returned valid VT-UTF8 characters set.
58   @param  ValidBytes          The count of returned VT-VTF8 characters.
59                               If ValidBytes is zero, no valid VT-UTF8 returned.
60 
61 **/
62 VOID
GetOneValidUtf8Char(IN TERMINAL_DEV * Utf8Device,OUT UTF8_CHAR * Utf8Char,OUT UINT8 * ValidBytes)63 GetOneValidUtf8Char (
64   IN  TERMINAL_DEV      *Utf8Device,
65   OUT UTF8_CHAR         *Utf8Char,
66   OUT UINT8             *ValidBytes
67   )
68 {
69   UINT8   Temp;
70   UINT8   Index;
71   BOOLEAN FetchFlag;
72 
73   Temp      = 0;
74   Index     = 0;
75   FetchFlag = TRUE;
76 
77   //
78   // if no valid Utf8 char is found in the RawFiFo,
79   // then *ValidBytes will be zero.
80   //
81   *ValidBytes = 0;
82 
83   while (!IsRawFiFoEmpty (Utf8Device)) {
84 
85     RawFiFoRemoveOneKey (Utf8Device, &Temp);
86 
87     switch (*ValidBytes) {
88 
89     case 0:
90       if ((Temp & 0x80) == 0) {
91         //
92         // one-byte utf8 char
93         //
94         *ValidBytes       = 1;
95 
96         Utf8Char->Utf8_1  = Temp;
97 
98         FetchFlag         = FALSE;
99 
100       } else if ((Temp & 0xe0) == 0xc0) {
101         //
102         // two-byte utf8 char
103         //
104         *ValidBytes         = 2;
105 
106         Utf8Char->Utf8_2[1] = Temp;
107 
108       } else if ((Temp & 0xf0) == 0xe0) {
109         //
110         // three-byte utf8 char
111         //
112         *ValidBytes         = 3;
113 
114         Utf8Char->Utf8_3[2] = Temp;
115 
116         Index++;
117 
118       } else {
119         //
120         // reset *ValidBytes to zero, let valid utf8 char search restart
121         //
122         *ValidBytes = 0;
123       }
124 
125       break;
126 
127     case 2:
128       //
129       // two-byte utf8 char go on
130       //
131       if ((Temp & 0xc0) == 0x80) {
132 
133         Utf8Char->Utf8_2[0] = Temp;
134 
135         FetchFlag           = FALSE;
136 
137       } else {
138 
139         *ValidBytes = 0;
140       }
141       break;
142 
143     case 3:
144       //
145       // three-byte utf8 char go on
146       //
147       if ((Temp & 0xc0) == 0x80) {
148         if (Index == 1) {
149           Utf8Char->Utf8_3[1] = Temp;
150           Index++;
151         } else {
152           Utf8Char->Utf8_3[0] = Temp;
153           FetchFlag = FALSE;
154         }
155       } else {
156         //
157         // reset *ValidBytes and Index to zero, let valid utf8 char search restart
158         //
159         *ValidBytes = 0;
160         Index       = 0;
161       }
162       break;
163 
164     default:
165       break;
166     }
167 
168     if (!FetchFlag) {
169       break;
170     }
171   }
172 
173   return ;
174 }
175 
176 /**
177   Translate VT-UTF8 characters into one Unicode character.
178 
179   UTF8 Encoding Table
180   Bits per Character | Unicode Character Range | Unicode Binary  Encoding |	UTF8 Binary Encoding
181         0-7	         |     0x0000 - 0x007F	   |     00000000 0xxxxxxx	  |   0xxxxxxx
182         8-11 	       |     0x0080 - 0x07FF	   |     00000xxx xxxxxxxx 	  |   110xxxxx 10xxxxxx
183        12-16	       |     0x0800 - 0xFFFF	   |     xxxxxxxx xxxxxxxx	  |   1110xxxx 10xxxxxx 10xxxxxx
184 
185 
186   @param  Utf8Char         VT-UTF8 character set needs translating.
187   @param  ValidBytes       The count of valid VT-UTF8 characters.
188   @param  UnicodeChar      Returned unicode character.
189 
190 **/
191 VOID
Utf8ToUnicode(IN UTF8_CHAR Utf8Char,IN UINT8 ValidBytes,OUT CHAR16 * UnicodeChar)192 Utf8ToUnicode (
193   IN  UTF8_CHAR       Utf8Char,
194   IN  UINT8           ValidBytes,
195   OUT CHAR16          *UnicodeChar
196   )
197 {
198   UINT8 UnicodeByte0;
199   UINT8 UnicodeByte1;
200   UINT8 Byte0;
201   UINT8 Byte1;
202   UINT8 Byte2;
203 
204   *UnicodeChar = 0;
205 
206   //
207   // translate utf8 code to unicode, in terminal standard,
208   // up to 3 bytes utf8 code is supported.
209   //
210   switch (ValidBytes) {
211   case 1:
212     //
213     // one-byte utf8 code
214     //
215     *UnicodeChar = (UINT16) Utf8Char.Utf8_1;
216     break;
217 
218   case 2:
219     //
220     // two-byte utf8 code
221     //
222     Byte0         = Utf8Char.Utf8_2[0];
223     Byte1         = Utf8Char.Utf8_2[1];
224 
225     UnicodeByte0  = (UINT8) ((Byte1 << 6) | (Byte0 & 0x3f));
226     UnicodeByte1  = (UINT8) ((Byte1 >> 2) & 0x07);
227     *UnicodeChar  = (UINT16) (UnicodeByte0 | (UnicodeByte1 << 8));
228     break;
229 
230   case 3:
231     //
232     // three-byte utf8 code
233     //
234     Byte0         = Utf8Char.Utf8_3[0];
235     Byte1         = Utf8Char.Utf8_3[1];
236     Byte2         = Utf8Char.Utf8_3[2];
237 
238     UnicodeByte0  = (UINT8) ((Byte1 << 6) | (Byte0 & 0x3f));
239     UnicodeByte1  = (UINT8) ((Byte2 << 4) | ((Byte1 >> 2) & 0x0f));
240     *UnicodeChar  = (UINT16) (UnicodeByte0 | (UnicodeByte1 << 8));
241 
242   default:
243     break;
244   }
245 
246   return ;
247 }
248 
249 /**
250   Translate one Unicode character into VT-UTF8 characters.
251 
252   UTF8 Encoding Table
253   Bits per Character | Unicode Character Range | Unicode Binary  Encoding |	UTF8 Binary Encoding
254         0-7	         |     0x0000 - 0x007F	   |     00000000 0xxxxxxx	  |   0xxxxxxx
255         8-11 	       |     0x0080 - 0x07FF	   |     00000xxx xxxxxxxx 	  |   110xxxxx 10xxxxxx
256        12-16	       |     0x0800 - 0xFFFF	   |     xxxxxxxx xxxxxxxx	  |   1110xxxx 10xxxxxx 10xxxxxx
257 
258 
259   @param  Unicode          Unicode character need translating.
260   @param  Utf8Char         Return VT-UTF8 character set.
261   @param  ValidBytes       The count of valid VT-UTF8 characters. If
262                            ValidBytes is zero, no valid VT-UTF8 returned.
263 
264 **/
265 VOID
UnicodeToUtf8(IN CHAR16 Unicode,OUT UTF8_CHAR * Utf8Char,OUT UINT8 * ValidBytes)266 UnicodeToUtf8 (
267   IN  CHAR16      Unicode,
268   OUT UTF8_CHAR   *Utf8Char,
269   OUT UINT8       *ValidBytes
270   )
271 {
272   UINT8 UnicodeByte0;
273   UINT8 UnicodeByte1;
274   //
275   // translate unicode to utf8 code
276   //
277   UnicodeByte0  = (UINT8) Unicode;
278   UnicodeByte1  = (UINT8) (Unicode >> 8);
279 
280   if (Unicode < 0x0080) {
281 
282     Utf8Char->Utf8_1  = (UINT8) (UnicodeByte0 & 0x7f);
283     *ValidBytes       = 1;
284 
285   } else if (Unicode < 0x0800) {
286     //
287     // byte sequence: high -> low
288     //                Utf8_2[0], Utf8_2[1]
289     //
290     Utf8Char->Utf8_2[1] = (UINT8) ((UnicodeByte0 & 0x3f) + 0x80);
291     Utf8Char->Utf8_2[0] = (UINT8) ((((UnicodeByte1 << 2) + (UnicodeByte0 >> 6)) & 0x1f) + 0xc0);
292 
293     *ValidBytes         = 2;
294 
295   } else {
296     //
297     // byte sequence: high -> low
298     //                Utf8_3[0], Utf8_3[1], Utf8_3[2]
299     //
300     Utf8Char->Utf8_3[2] = (UINT8) ((UnicodeByte0 & 0x3f) + 0x80);
301     Utf8Char->Utf8_3[1] = (UINT8) ((((UnicodeByte1 << 2) + (UnicodeByte0 >> 6)) & 0x3f) + 0x80);
302     Utf8Char->Utf8_3[0] = (UINT8) (((UnicodeByte1 >> 4) & 0x0f) + 0xe0);
303 
304     *ValidBytes         = 3;
305   }
306 }
307 
308 
309 /**
310   Check if input string is valid VT-UTF8 string.
311 
312   @param  TerminalDevice          The terminal device.
313   @param  WString                 The input string.
314 
315   @retval EFI_SUCCESS             If all input characters are valid.
316 
317 **/
318 EFI_STATUS
VTUTF8TestString(IN TERMINAL_DEV * TerminalDevice,IN CHAR16 * WString)319 VTUTF8TestString (
320   IN  TERMINAL_DEV    *TerminalDevice,
321   IN  CHAR16          *WString
322   )
323 {
324   //
325   // to utf8, all kind of characters are supported.
326   //
327   return EFI_SUCCESS;
328 }
329