1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "core/include/fxcrt/fx_basic.h"
8 
Clear()9 void CFX_UTF8Decoder::Clear() {
10   m_Buffer.Clear();
11   m_PendingBytes = 0;
12 }
AppendChar(FX_DWORD ch)13 void CFX_UTF8Decoder::AppendChar(FX_DWORD ch) {
14   m_Buffer.AppendChar((FX_WCHAR)ch);
15 }
Input(uint8_t byte)16 void CFX_UTF8Decoder::Input(uint8_t byte) {
17   if (byte < 0x80) {
18     m_PendingBytes = 0;
19     m_Buffer.AppendChar(byte);
20   } else if (byte < 0xc0) {
21     if (m_PendingBytes == 0) {
22       return;
23     }
24     m_PendingBytes--;
25     m_PendingChar |= (byte & 0x3f) << (m_PendingBytes * 6);
26     if (m_PendingBytes == 0) {
27       AppendChar(m_PendingChar);
28     }
29   } else if (byte < 0xe0) {
30     m_PendingBytes = 1;
31     m_PendingChar = (byte & 0x1f) << 6;
32   } else if (byte < 0xf0) {
33     m_PendingBytes = 2;
34     m_PendingChar = (byte & 0x0f) << 12;
35   } else if (byte < 0xf8) {
36     m_PendingBytes = 3;
37     m_PendingChar = (byte & 0x07) << 18;
38   } else if (byte < 0xfc) {
39     m_PendingBytes = 4;
40     m_PendingChar = (byte & 0x03) << 24;
41   } else if (byte < 0xfe) {
42     m_PendingBytes = 5;
43     m_PendingChar = (byte & 0x01) << 30;
44   }
45 }
Input(FX_WCHAR unicode)46 void CFX_UTF8Encoder::Input(FX_WCHAR unicode) {
47   if ((FX_DWORD)unicode < 0x80) {
48     m_Buffer.AppendChar(unicode);
49   } else {
50     if ((FX_DWORD)unicode >= 0x80000000) {
51       return;
52     }
53     int nbytes = 0;
54     if ((FX_DWORD)unicode < 0x800) {
55       nbytes = 2;
56     } else if ((FX_DWORD)unicode < 0x10000) {
57       nbytes = 3;
58     } else if ((FX_DWORD)unicode < 0x200000) {
59       nbytes = 4;
60     } else if ((FX_DWORD)unicode < 0x4000000) {
61       nbytes = 5;
62     } else {
63       nbytes = 6;
64     }
65     static uint8_t prefix[] = {0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
66     int order = 1 << ((nbytes - 1) * 6);
67     int code = unicode;
68     m_Buffer.AppendChar(prefix[nbytes - 2] | (code / order));
69     for (int i = 0; i < nbytes - 1; i++) {
70       code = code % order;
71       order >>= 6;
72       m_Buffer.AppendChar(0x80 | (code / order));
73     }
74   }
75 }
FX_UTF8Encode(const FX_WCHAR * pwsStr,FX_STRSIZE len)76 CFX_ByteString FX_UTF8Encode(const FX_WCHAR* pwsStr, FX_STRSIZE len) {
77   FXSYS_assert(pwsStr);
78   if (len < 0) {
79     len = FXSYS_wcslen(pwsStr);
80   }
81   CFX_UTF8Encoder encoder;
82   while (len-- > 0) {
83     encoder.Input(*pwsStr++);
84   }
85   return encoder.GetResult();
86 }
87