1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #ifndef _FX_CODEPAGE
8 #define _FX_CODEPAGE
9 class IFX_CodePage;
10 #define FX_CODEPAGE_DefANSI 0
11 #define FX_CODEPAGE_DefOEM 1
12 #define FX_CODEPAGE_DefMAC 2
13 #define FX_CODEPAGE_Thread 3
14 #define FX_CODEPAGE_Symbol 42
15 #define FX_CODEPAGE_MSDOS_US 437
16 #define FX_CODEPAGE_Arabic_ASMO708 708
17 #define FX_CODEPAGE_Arabic_ASMO449Plus 709
18 #define FX_CODEPAGE_Arabic_Transparent 710
19 #define FX_CODEPAGE_Arabic_NafithaEnhanced 711
20 #define FX_CODEPAGE_Arabic_TransparentASMO 720
21 #define FX_CODEPAGE_MSDOS_Greek1 737
22 #define FX_CODEPAGE_MSDOS_Baltic 775
23 #define FX_CODEPAGE_MSWin31_WesternEuropean 819
24 #define FX_CODEPAGE_MSDOS_WesternEuropean 850
25 #define FX_CODEPAGE_MSDOS_EasternEuropean 852
26 #define FX_CODEPAGE_MSDOS_Latin3 853
27 #define FX_CODEPAGE_MSDOS_Cyrillic 855
28 #define FX_CODEPAGE_MSDOS_Turkish 857
29 #define FX_CODEPAGE_MSDOS_Latin1Euro 858
30 #define FX_CODEPAGE_MSDOS_Portuguese 860
31 #define FX_CODEPAGE_MSDOS_Icelandic 861
32 #define FX_CODEPAGE_MSDOS_Hebrew 862
33 #define FX_CODEPAGE_MSDOS_FrenchCanadian 863
34 #define FX_CODEPAGE_MSDOS_Arabic 864
35 #define FX_CODEPAGE_MSDOS_Norwegian 865
36 #define FX_CODEPAGE_MSDOS_Russian 866
37 #define FX_CODEPAGE_MSDOS_Greek2 869
38 #define FX_CODEPAGE_MSDOS_Thai 874
39 #define FX_CODEPAGE_MSDOS_KamenickyCS 895
40 #define FX_CODEPAGE_ShiftJIS 932
41 #define FX_CODEPAGE_ChineseSimplified 936
42 #define FX_CODEPAGE_Korean 949
43 #define FX_CODEPAGE_ChineseTraditional 950
44 #define FX_CODEPAGE_UTF16LE 1200
45 #define FX_CODEPAGE_UTF16BE 1201
46 #define FX_CODEPAGE_MSWin_EasternEuropean 1250
47 #define FX_CODEPAGE_MSWin_Cyrillic 1251
48 #define FX_CODEPAGE_MSWin_WesternEuropean 1252
49 #define FX_CODEPAGE_MSWin_Greek 1253
50 #define FX_CODEPAGE_MSWin_Turkish 1254
51 #define FX_CODEPAGE_MSWin_Hebrew 1255
52 #define FX_CODEPAGE_MSWin_Arabic 1256
53 #define FX_CODEPAGE_MSWin_Baltic 1257
54 #define FX_CODEPAGE_MSWin_Vietnamese 1258
55 #define FX_CODEPAGE_Johab 1361
56 #define FX_CODEPAGE_MAC_Roman 10000
57 #define FX_CODEPAGE_MAC_ShiftJIS 10001
58 #define FX_CODEPAGE_MAC_ChineseTraditional 10002
59 #define FX_CODEPAGE_MAC_Korean 10003
60 #define FX_CODEPAGE_MAC_Arabic 10004
61 #define FX_CODEPAGE_MAC_Hebrew 10005
62 #define FX_CODEPAGE_MAC_Greek 10006
63 #define FX_CODEPAGE_MAC_Cyrillic 10007
64 #define FX_CODEPAGE_MAC_ChineseSimplified 10008
65 #define FX_CODEPAGE_MAC_Thai 10021
66 #define FX_CODEPAGE_MAC_EasternEuropean 10029
67 #define FX_CODEPAGE_MAC_Turkish 10081
68 #define FX_CODEPAGE_UTF32LE 12000
69 #define FX_CODEPAGE_UTF32BE 12001
70 #define FX_CODEPAGE_ISO8859_1 28591
71 #define FX_CODEPAGE_ISO8859_2 28592
72 #define FX_CODEPAGE_ISO8859_3 28593
73 #define FX_CODEPAGE_ISO8859_4 28594
74 #define FX_CODEPAGE_ISO8859_5 28595
75 #define FX_CODEPAGE_ISO8859_6 28596
76 #define FX_CODEPAGE_ISO8859_7 28597
77 #define FX_CODEPAGE_ISO8859_8 28598
78 #define FX_CODEPAGE_ISO8859_9 28599
79 #define FX_CODEPAGE_ISO8859_10 28600
80 #define FX_CODEPAGE_ISO8859_11 28601
81 #define FX_CODEPAGE_ISO8859_12 28602
82 #define FX_CODEPAGE_ISO8859_13 28603
83 #define FX_CODEPAGE_ISO8859_14 28604
84 #define FX_CODEPAGE_ISO8859_15 28605
85 #define FX_CODEPAGE_ISO8859_16 28606
86 #define FX_CODEPAGE_ISCII_Devanagari 57002
87 #define FX_CODEPAGE_ISCII_Bengali 57003
88 #define FX_CODEPAGE_ISCII_Tamil 57004
89 #define FX_CODEPAGE_ISCII_Telugu 57005
90 #define FX_CODEPAGE_ISCII_Assamese 57006
91 #define FX_CODEPAGE_ISCII_Oriya 57007
92 #define FX_CODEPAGE_ISCII_Kannada 57008
93 #define FX_CODEPAGE_ISCII_Malayalam 57009
94 #define FX_CODEPAGE_ISCII_Gujarati 57010
95 #define FX_CODEPAGE_ISCII_Punjabi 57011
96 #define FX_CODEPAGE_UTF7 65000
97 #define FX_CODEPAGE_UTF8 65001
98 #define FX_CHARSET_ANSI 0
99 #define FX_CHARSET_Default 1
100 #define FX_CHARSET_Symbol 2
101 #define FX_CHARSET_MAC_Roman 77
102 #define FX_CHARSET_MAC_ShiftJIS 78
103 #define FX_CHARSET_MAC_Korean 79
104 #define FX_CHARSET_MAC_ChineseSimplified 80
105 #define FX_CHARSET_MAC_ChineseTriditional 81
106 #define FX_CHARSET_MAC_Johab 82
107 #define FX_CHARSET_MAC_Hebrew 83
108 #define FX_CHARSET_MAC_Arabic 84
109 #define FX_CHARSET_MAC_Greek 85
110 #define FX_CHARSET_MAC_Turkish 86
111 #define FX_CHARSET_MAC_Thai 87
112 #define FX_CHARSET_MAC_EasternEuropean 88
113 #define FX_CHARSET_MAC_Cyrillic 89
114 #define FX_CHARSET_ShiftJIS 128
115 #define FX_CHARSET_Korean 129
116 #define FX_CHARSET_Johab 130
117 #define FX_CHARSET_ChineseSimplified 134
118 #define FX_CHARSET_ChineseTriditional 136
119 #define FX_CHARSET_MSWin_Greek 161
120 #define FX_CHARSET_MSWin_Turkish 162
121 #define FX_CHARSET_MSWin_Vietnamese 163
122 #define FX_CHARSET_MSWin_Hebrew 177
123 #define FX_CHARSET_MSWin_Arabic 178
124 #define FX_CHARSET_ArabicTraditional 179
125 #define FX_CHARSET_ArabicUser 180
126 #define FX_CHARSET_HebrewUser 181
127 #define FX_CHARSET_MSWin_Baltic 186
128 #define FX_CHARSET_MSWin_Cyrillic 204
129 #define FX_CHARSET_Thai 222
130 #define FX_CHARSET_MSWin_EasterEuropean 238
131 #define FX_CHARSET_US 254
132 #define FX_CHARSET_OEM 255
133 FX_WORD FX_GetCodePageFromCharset(uint8_t charset);
134 FX_WORD FX_GetCharsetFromCodePage(FX_WORD codepage);
135 FX_WORD FX_GetCodePageFromStringA(const FX_CHAR* pStr, int32_t iLength);
136 FX_WORD FX_GetCodePageFormStringW(const FX_WCHAR* pStr, int32_t iLength);
137 FX_WORD FX_GetDefCodePageByLanguage(FX_WORD wLanguage);
138 void FX_SwapByteOrder(FX_WCHAR* pStr, int32_t iLength);
139 void FX_SwapByteOrderCopy(const FX_WCHAR* pSrc,
140                           FX_WCHAR* pDst,
141                           int32_t iLength);
142 void FX_UTF16ToWChar(void* pBuffer, int32_t iLength);
143 void FX_UTF16ToWCharCopy(const FX_WORD* pUTF16,
144                          FX_WCHAR* pWChar,
145                          int32_t iLength);
146 void FX_WCharToUTF16(void* pBuffer, int32_t iLength);
147 void FX_WCharToUTF16Copy(const FX_WCHAR* pWChar,
148                          FX_WORD* pUTF16,
149                          int32_t iLength);
150 int32_t FX_DecodeString(FX_WORD wCodePage,
151                         const FX_CHAR* pSrc,
152                         int32_t* pSrcLen,
153                         FX_WCHAR* pDst,
154                         int32_t* pDstLen,
155                         FX_BOOL bErrBreak = FALSE);
156 int32_t FX_UTF8Decode(const FX_CHAR* pSrc,
157                       int32_t* pSrcLen,
158                       FX_WCHAR* pDst,
159                       int32_t* pDstLen);
160 enum FX_CODESYSTEM {
161   FX_MBCS = 0,
162   FX_SBCS,
163   FX_DBCS,
164 };
165 typedef struct _FX_CODEPAGE_HEADER {
166   uint16_t uCPID;
167   uint8_t uMinCharBytes;
168   uint8_t uMaxCharBytes;
169   FX_CODESYSTEM eCPType;
170   FX_BOOL bHasLeadByte;
171   FX_WCHAR wMinChar;
172   FX_WCHAR wMaxChar;
173   FX_WCHAR wDefChar;
174   FX_WCHAR wMinUnicode;
175   FX_WCHAR wMaxUnicode;
176   FX_WCHAR wDefUnicode;
177 } FX_CODEPAGE_HEADER;
178 #define FX_CPMAPTYPE_Consecution 1
179 #define FX_CPMAPTYPE_Strict 2
180 #define FX_CPMAPTYPE_NoMapping 3
181 #define FX_CPMAPTYPE_Delta 4
182 typedef struct _FX_CPCU_MAPTABLE1 {
183   uint16_t uMapType;
184   uint16_t uUniocde;
185 } FX_CPCU_MAPTABLE1;
186 typedef struct _FX_CPCU_MAPTABLE2 {
187   uint8_t uTrailByte;
188   uint8_t uMapType;
189   uint16_t uOffset;
190 } FX_CPCU_MAPTABLE2;
191 typedef struct _FX_CPCU_MAPINFO {
192   FX_CPCU_MAPTABLE1* pMapTable1;
193   FX_CPCU_MAPTABLE2* pMapTable2;
194   const uint8_t* pMapData;
195 } FX_CPCU_MAPINFO;
196 typedef struct _FX_CPUC_MAPTABLE {
197   uint16_t uStartUnicode;
198   uint16_t uEndUnicode;
199   uint16_t uMapType;
200   uint16_t uOffset;
201 } FX_CPUC_MAPTABLE;
202 typedef struct _FX_CPUC_MAPINFO {
203   uint32_t uMapCount;
204   FX_CPUC_MAPTABLE* pMapTable;
205   const uint8_t* pMapData;
206 } FX_CPUC_MAPINFO;
207 typedef struct _FX_CODEPAGE {
208   FX_CODEPAGE_HEADER const* pCPHeader;
209   FX_CPCU_MAPINFO const* pCPCUMapInfo;
210   FX_CPUC_MAPINFO const* pCPUCMapInfo;
211 } FX_CODEPAGE, *FX_LPCODEPAGE;
212 typedef FX_CODEPAGE const* FX_LPCCODEPAGE;
213 typedef struct _FX_STR2CPHASH {
214   uint32_t uHash;
215   uint32_t uCodePage;
216 } FX_STR2CPHASH;
217 typedef struct _FX_CHARSET_MAP {
218   uint16_t charset;
219   uint16_t codepage;
220 } FX_CHARSET_MAP;
221 typedef struct _FX_LANG2CPMAP {
222   FX_WORD wLanguage;
223   FX_WORD wCodepage;
224 } FX_LANG2CPMAP;
225 
226 class IFX_CodePage {
227  public:
228   static IFX_CodePage* Create(FX_WORD wCodePage);
~IFX_CodePage()229   virtual ~IFX_CodePage() {}
230   virtual void Release() = 0;
231   virtual FX_WORD GetCodePageNumber() const = 0;
232   virtual FX_CODESYSTEM GetCodeSystemType() const = 0;
233   virtual FX_BOOL HasLeadByte() const = 0;
234   virtual FX_BOOL IsLeadByte(uint8_t byte) const = 0;
235   virtual int32_t GetMinBytesPerChar() const = 0;
236   virtual int32_t GetMaxBytesPerChar() const = 0;
237   virtual FX_WCHAR GetMinCharcode() const = 0;
238   virtual FX_WCHAR GetMaxCharcode() const = 0;
239   virtual FX_WCHAR GetDefCharcode() const = 0;
240   virtual FX_WCHAR GetMinUnicode() const = 0;
241   virtual FX_WCHAR GetMaxUnicode() const = 0;
242   virtual FX_WCHAR GetDefUnicode() const = 0;
243   virtual FX_BOOL IsValidCharcode(FX_WORD wCharcode) const = 0;
244   virtual FX_WCHAR GetUnicode(FX_WORD wCharcode) const = 0;
245   virtual FX_BOOL IsValidUnicode(FX_WCHAR wUnicode) const = 0;
246   virtual FX_WORD GetCharcode(FX_WCHAR wUnicode) const = 0;
247 };
248 #endif
249