1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "fpdfsdk/include/pdfwindow/PWL_FontMap.h"
8
9 #include "core/include/fpdfapi/fpdf_module.h"
10 #include "fpdfsdk/include/pdfwindow/PWL_Wnd.h"
11
12 namespace {
13
14 const char kDefaultFontName[] = "Helvetica";
15
16 const char* const g_sDEStandardFontName[] = {"Courier",
17 "Courier-Bold",
18 "Courier-BoldOblique",
19 "Courier-Oblique",
20 "Helvetica",
21 "Helvetica-Bold",
22 "Helvetica-BoldOblique",
23 "Helvetica-Oblique",
24 "Times-Roman",
25 "Times-Bold",
26 "Times-Italic",
27 "Times-BoldItalic",
28 "Symbol",
29 "ZapfDingbats"};
30
31 } // namespace
32
CPWL_FontMap(IFX_SystemHandler * pSystemHandler)33 CPWL_FontMap::CPWL_FontMap(IFX_SystemHandler* pSystemHandler)
34 : m_pPDFDoc(NULL), m_pSystemHandler(pSystemHandler) {
35 ASSERT(m_pSystemHandler);
36 }
37
~CPWL_FontMap()38 CPWL_FontMap::~CPWL_FontMap() {
39 delete m_pPDFDoc;
40 m_pPDFDoc = NULL;
41
42 Empty();
43 }
44
SetSystemHandler(IFX_SystemHandler * pSystemHandler)45 void CPWL_FontMap::SetSystemHandler(IFX_SystemHandler* pSystemHandler) {
46 m_pSystemHandler = pSystemHandler;
47 }
48
GetDocument()49 CPDF_Document* CPWL_FontMap::GetDocument() {
50 if (!m_pPDFDoc) {
51 if (CPDF_ModuleMgr::Get()) {
52 m_pPDFDoc = new CPDF_Document;
53 m_pPDFDoc->CreateNewDoc();
54 }
55 }
56
57 return m_pPDFDoc;
58 }
59
GetPDFFont(int32_t nFontIndex)60 CPDF_Font* CPWL_FontMap::GetPDFFont(int32_t nFontIndex) {
61 if (nFontIndex >= 0 && nFontIndex < m_aData.GetSize()) {
62 if (CPWL_FontMap_Data* pData = m_aData.GetAt(nFontIndex)) {
63 return pData->pFont;
64 }
65 }
66
67 return NULL;
68 }
69
GetPDFFontAlias(int32_t nFontIndex)70 CFX_ByteString CPWL_FontMap::GetPDFFontAlias(int32_t nFontIndex) {
71 if (nFontIndex >= 0 && nFontIndex < m_aData.GetSize()) {
72 if (CPWL_FontMap_Data* pData = m_aData.GetAt(nFontIndex)) {
73 return pData->sFontName;
74 }
75 }
76
77 return "";
78 }
79
KnowWord(int32_t nFontIndex,FX_WORD word)80 FX_BOOL CPWL_FontMap::KnowWord(int32_t nFontIndex, FX_WORD word) {
81 if (nFontIndex >= 0 && nFontIndex < m_aData.GetSize()) {
82 if (m_aData.GetAt(nFontIndex)) {
83 return CharCodeFromUnicode(nFontIndex, word) >= 0;
84 }
85 }
86
87 return FALSE;
88 }
89
GetWordFontIndex(FX_WORD word,int32_t nCharset,int32_t nFontIndex)90 int32_t CPWL_FontMap::GetWordFontIndex(FX_WORD word,
91 int32_t nCharset,
92 int32_t nFontIndex) {
93 if (nFontIndex > 0) {
94 if (KnowWord(nFontIndex, word))
95 return nFontIndex;
96 } else {
97 if (const CPWL_FontMap_Data* pData = GetFontMapData(0)) {
98 if (nCharset == DEFAULT_CHARSET || pData->nCharset == SYMBOL_CHARSET ||
99 nCharset == pData->nCharset) {
100 if (KnowWord(0, word))
101 return 0;
102 }
103 }
104 }
105
106 int32_t nNewFontIndex =
107 GetFontIndex(GetNativeFontName(nCharset), nCharset, TRUE);
108 if (nNewFontIndex >= 0) {
109 if (KnowWord(nNewFontIndex, word))
110 return nNewFontIndex;
111 }
112 nNewFontIndex = GetFontIndex("Arial Unicode MS", DEFAULT_CHARSET, FALSE);
113 if (nNewFontIndex >= 0) {
114 if (KnowWord(nNewFontIndex, word))
115 return nNewFontIndex;
116 }
117 return -1;
118 }
119
CharCodeFromUnicode(int32_t nFontIndex,FX_WORD word)120 int32_t CPWL_FontMap::CharCodeFromUnicode(int32_t nFontIndex, FX_WORD word) {
121 if (CPWL_FontMap_Data* pData = m_aData.GetAt(nFontIndex)) {
122 if (pData->pFont) {
123 if (pData->pFont->IsUnicodeCompatible()) {
124 int nCharCode = pData->pFont->CharCodeFromUnicode(word);
125 pData->pFont->GlyphFromCharCode(nCharCode);
126 return nCharCode;
127 }
128 if (word < 0xFF)
129 return word;
130 }
131 }
132 return -1;
133 }
134
GetNativeFontName(int32_t nCharset)135 CFX_ByteString CPWL_FontMap::GetNativeFontName(int32_t nCharset) {
136 // searching native font is slow, so we must save time
137 for (int32_t i = 0, sz = m_aNativeFont.GetSize(); i < sz; i++) {
138 if (CPWL_FontMap_Native* pData = m_aNativeFont.GetAt(i)) {
139 if (pData->nCharset == nCharset)
140 return pData->sFontName;
141 }
142 }
143
144 CFX_ByteString sNew = GetNativeFont(nCharset);
145
146 if (!sNew.IsEmpty()) {
147 CPWL_FontMap_Native* pNewData = new CPWL_FontMap_Native;
148 pNewData->nCharset = nCharset;
149 pNewData->sFontName = sNew;
150
151 m_aNativeFont.Add(pNewData);
152 }
153
154 return sNew;
155 }
156
Empty()157 void CPWL_FontMap::Empty() {
158 {
159 for (int32_t i = 0, sz = m_aData.GetSize(); i < sz; i++)
160 delete m_aData.GetAt(i);
161
162 m_aData.RemoveAll();
163 }
164 {
165 for (int32_t i = 0, sz = m_aNativeFont.GetSize(); i < sz; i++)
166 delete m_aNativeFont.GetAt(i);
167
168 m_aNativeFont.RemoveAll();
169 }
170 }
171
Initialize()172 void CPWL_FontMap::Initialize() {
173 GetFontIndex(kDefaultFontName, ANSI_CHARSET, FALSE);
174 }
175
IsStandardFont(const CFX_ByteString & sFontName)176 FX_BOOL CPWL_FontMap::IsStandardFont(const CFX_ByteString& sFontName) {
177 for (int32_t i = 0; i < FX_ArraySize(g_sDEStandardFontName); ++i) {
178 if (sFontName == g_sDEStandardFontName[i])
179 return TRUE;
180 }
181
182 return FALSE;
183 }
184
FindFont(const CFX_ByteString & sFontName,int32_t nCharset)185 int32_t CPWL_FontMap::FindFont(const CFX_ByteString& sFontName,
186 int32_t nCharset) {
187 for (int32_t i = 0, sz = m_aData.GetSize(); i < sz; i++) {
188 if (CPWL_FontMap_Data* pData = m_aData.GetAt(i)) {
189 if (nCharset == DEFAULT_CHARSET || nCharset == pData->nCharset) {
190 if (sFontName.IsEmpty() || pData->sFontName == sFontName)
191 return i;
192 }
193 }
194 }
195
196 return -1;
197 }
198
GetFontIndex(const CFX_ByteString & sFontName,int32_t nCharset,FX_BOOL bFind)199 int32_t CPWL_FontMap::GetFontIndex(const CFX_ByteString& sFontName,
200 int32_t nCharset,
201 FX_BOOL bFind) {
202 int32_t nFontIndex = FindFont(EncodeFontAlias(sFontName, nCharset), nCharset);
203 if (nFontIndex >= 0)
204 return nFontIndex;
205
206 CFX_ByteString sAlias;
207 CPDF_Font* pFont = NULL;
208 if (bFind)
209 pFont = FindFontSameCharset(sAlias, nCharset);
210
211 if (!pFont) {
212 CFX_ByteString sTemp = sFontName;
213 pFont = AddFontToDocument(GetDocument(), sTemp, nCharset);
214 sAlias = EncodeFontAlias(sTemp, nCharset);
215 }
216 AddedFont(pFont, sAlias);
217 return AddFontData(pFont, sAlias, nCharset);
218 }
219
GetPWLFontIndex(FX_WORD word,int32_t nCharset)220 int32_t CPWL_FontMap::GetPWLFontIndex(FX_WORD word, int32_t nCharset) {
221 int32_t nFind = -1;
222
223 for (int32_t i = 0, sz = m_aData.GetSize(); i < sz; i++) {
224 if (CPWL_FontMap_Data* pData = m_aData.GetAt(i)) {
225 if (pData->nCharset == nCharset) {
226 nFind = i;
227 break;
228 }
229 }
230 }
231
232 CPDF_Font* pNewFont = GetPDFFont(nFind);
233
234 if (!pNewFont)
235 return -1;
236
237 CFX_ByteString sAlias = EncodeFontAlias("Arial_Chrome", nCharset);
238 AddedFont(pNewFont, sAlias);
239
240 return AddFontData(pNewFont, sAlias, nCharset);
241 }
242
FindFontSameCharset(CFX_ByteString & sFontAlias,int32_t nCharset)243 CPDF_Font* CPWL_FontMap::FindFontSameCharset(CFX_ByteString& sFontAlias,
244 int32_t nCharset) {
245 return NULL;
246 }
247
AddFontData(CPDF_Font * pFont,const CFX_ByteString & sFontAlias,int32_t nCharset)248 int32_t CPWL_FontMap::AddFontData(CPDF_Font* pFont,
249 const CFX_ByteString& sFontAlias,
250 int32_t nCharset) {
251 CPWL_FontMap_Data* pNewData = new CPWL_FontMap_Data;
252 pNewData->pFont = pFont;
253 pNewData->sFontName = sFontAlias;
254 pNewData->nCharset = nCharset;
255
256 m_aData.Add(pNewData);
257
258 return m_aData.GetSize() - 1;
259 }
260
AddedFont(CPDF_Font * pFont,const CFX_ByteString & sFontAlias)261 void CPWL_FontMap::AddedFont(CPDF_Font* pFont,
262 const CFX_ByteString& sFontAlias) {}
263
GetFontName(int32_t nFontIndex)264 CFX_ByteString CPWL_FontMap::GetFontName(int32_t nFontIndex) {
265 if (nFontIndex >= 0 && nFontIndex < m_aData.GetSize()) {
266 if (CPWL_FontMap_Data* pData = m_aData.GetAt(nFontIndex)) {
267 return pData->sFontName;
268 }
269 }
270
271 return "";
272 }
273
GetNativeFont(int32_t nCharset)274 CFX_ByteString CPWL_FontMap::GetNativeFont(int32_t nCharset) {
275 if (nCharset == DEFAULT_CHARSET)
276 nCharset = GetNativeCharset();
277
278 CFX_ByteString sFontName = GetDefaultFontByCharset(nCharset);
279 if (m_pSystemHandler) {
280 if (m_pSystemHandler->FindNativeTrueTypeFont(nCharset, sFontName))
281 return sFontName;
282
283 sFontName = m_pSystemHandler->GetNativeTrueTypeFont(nCharset);
284 }
285 return sFontName;
286 }
287
AddFontToDocument(CPDF_Document * pDoc,CFX_ByteString & sFontName,uint8_t nCharset)288 CPDF_Font* CPWL_FontMap::AddFontToDocument(CPDF_Document* pDoc,
289 CFX_ByteString& sFontName,
290 uint8_t nCharset) {
291 if (IsStandardFont(sFontName))
292 return AddStandardFont(pDoc, sFontName);
293
294 return AddSystemFont(pDoc, sFontName, nCharset);
295 }
296
AddStandardFont(CPDF_Document * pDoc,CFX_ByteString & sFontName)297 CPDF_Font* CPWL_FontMap::AddStandardFont(CPDF_Document* pDoc,
298 CFX_ByteString& sFontName) {
299 if (!pDoc)
300 return NULL;
301
302 CPDF_Font* pFont = NULL;
303
304 if (sFontName == "ZapfDingbats") {
305 pFont = pDoc->AddStandardFont(sFontName, NULL);
306 } else {
307 CPDF_FontEncoding fe(PDFFONT_ENCODING_WINANSI);
308 pFont = pDoc->AddStandardFont(sFontName, &fe);
309 }
310
311 return pFont;
312 }
313
AddSystemFont(CPDF_Document * pDoc,CFX_ByteString & sFontName,uint8_t nCharset)314 CPDF_Font* CPWL_FontMap::AddSystemFont(CPDF_Document* pDoc,
315 CFX_ByteString& sFontName,
316 uint8_t nCharset) {
317 if (!pDoc)
318 return NULL;
319
320 if (sFontName.IsEmpty())
321 sFontName = GetNativeFont(nCharset);
322 if (nCharset == DEFAULT_CHARSET)
323 nCharset = GetNativeCharset();
324
325 if (m_pSystemHandler)
326 return m_pSystemHandler->AddNativeTrueTypeFontToPDF(pDoc, sFontName,
327 nCharset);
328
329 return NULL;
330 }
331
EncodeFontAlias(const CFX_ByteString & sFontName,int32_t nCharset)332 CFX_ByteString CPWL_FontMap::EncodeFontAlias(const CFX_ByteString& sFontName,
333 int32_t nCharset) {
334 CFX_ByteString sPostfix;
335 sPostfix.Format("_%02X", nCharset);
336 return EncodeFontAlias(sFontName) + sPostfix;
337 }
338
EncodeFontAlias(const CFX_ByteString & sFontName)339 CFX_ByteString CPWL_FontMap::EncodeFontAlias(const CFX_ByteString& sFontName) {
340 CFX_ByteString sRet = sFontName;
341 sRet.Remove(' ');
342 return sRet;
343 }
344
GetFontMapCount() const345 int32_t CPWL_FontMap::GetFontMapCount() const {
346 return m_aData.GetSize();
347 }
348
GetFontMapData(int32_t nIndex) const349 const CPWL_FontMap_Data* CPWL_FontMap::GetFontMapData(int32_t nIndex) const {
350 if (nIndex >= 0 && nIndex < m_aData.GetSize()) {
351 return m_aData.GetAt(nIndex);
352 }
353
354 return NULL;
355 }
356
GetNativeCharset()357 int32_t CPWL_FontMap::GetNativeCharset() {
358 uint8_t nCharset = ANSI_CHARSET;
359 int32_t iCodePage = FXSYS_GetACP();
360 switch (iCodePage) {
361 case 932: // Japan
362 nCharset = SHIFTJIS_CHARSET;
363 break;
364 case 936: // Chinese (PRC, Singapore)
365 nCharset = GB2312_CHARSET;
366 break;
367 case 950: // Chinese (Taiwan; Hong Kong SAR, PRC)
368 nCharset = GB2312_CHARSET;
369 break;
370 case 1252: // Windows 3.1 Latin 1 (US, Western Europe)
371 nCharset = ANSI_CHARSET;
372 break;
373 case 874: // Thai
374 nCharset = THAI_CHARSET;
375 break;
376 case 949: // Korean
377 nCharset = HANGUL_CHARSET;
378 break;
379 case 1200: // Unicode (BMP of ISO 10646)
380 nCharset = ANSI_CHARSET;
381 break;
382 case 1250: // Windows 3.1 Eastern European
383 nCharset = EASTEUROPE_CHARSET;
384 break;
385 case 1251: // Windows 3.1 Cyrillic
386 nCharset = RUSSIAN_CHARSET;
387 break;
388 case 1253: // Windows 3.1 Greek
389 nCharset = GREEK_CHARSET;
390 break;
391 case 1254: // Windows 3.1 Turkish
392 nCharset = TURKISH_CHARSET;
393 break;
394 case 1255: // Hebrew
395 nCharset = HEBREW_CHARSET;
396 break;
397 case 1256: // Arabic
398 nCharset = ARABIC_CHARSET;
399 break;
400 case 1257: // Baltic
401 nCharset = BALTIC_CHARSET;
402 break;
403 case 1258: // Vietnamese
404 nCharset = VIETNAMESE_CHARSET;
405 break;
406 case 1361: // Korean(Johab)
407 nCharset = JOHAB_CHARSET;
408 break;
409 }
410 return nCharset;
411 }
412
413 const CPWL_FontMap::CharsetFontMap CPWL_FontMap::defaultTTFMap[] = {
414 {ANSI_CHARSET, "Helvetica"}, {GB2312_CHARSET, "SimSun"},
415 {CHINESEBIG5_CHARSET, "MingLiU"}, {SHIFTJIS_CHARSET, "MS Gothic"},
416 {HANGUL_CHARSET, "Batang"}, {RUSSIAN_CHARSET, "Arial"},
417 #if _FXM_PLATFORM_ == _FXM_PLATFORM_LINUX_ || \
418 _FXM_PLATFORM_ == _FXM_PLATFORM_APPLE_
419 {EASTEUROPE_CHARSET, "Arial"},
420 #else
421 {EASTEUROPE_CHARSET, "Tahoma"},
422 #endif
423 {ARABIC_CHARSET, "Arial"}, {-1, NULL}};
424
GetDefaultFontByCharset(int32_t nCharset)425 CFX_ByteString CPWL_FontMap::GetDefaultFontByCharset(int32_t nCharset) {
426 int i = 0;
427 while (defaultTTFMap[i].charset != -1) {
428 if (nCharset == defaultTTFMap[i].charset)
429 return defaultTTFMap[i].fontname;
430 ++i;
431 }
432 return "";
433 }
434
CharSetFromUnicode(FX_WORD word,int32_t nOldCharset)435 int32_t CPWL_FontMap::CharSetFromUnicode(FX_WORD word, int32_t nOldCharset) {
436 if (m_pSystemHandler && (-1 != m_pSystemHandler->GetCharSet()))
437 return m_pSystemHandler->GetCharSet();
438 // to avoid CJK Font to show ASCII
439 if (word < 0x7F)
440 return ANSI_CHARSET;
441 // follow the old charset
442 if (nOldCharset != DEFAULT_CHARSET)
443 return nOldCharset;
444
445 // find new charset
446 if ((word >= 0x4E00 && word <= 0x9FA5) ||
447 (word >= 0xE7C7 && word <= 0xE7F3) ||
448 (word >= 0x3000 && word <= 0x303F) ||
449 (word >= 0x2000 && word <= 0x206F)) {
450 return GB2312_CHARSET;
451 }
452
453 if (((word >= 0x3040) && (word <= 0x309F)) ||
454 ((word >= 0x30A0) && (word <= 0x30FF)) ||
455 ((word >= 0x31F0) && (word <= 0x31FF)) ||
456 ((word >= 0xFF00) && (word <= 0xFFEF))) {
457 return SHIFTJIS_CHARSET;
458 }
459
460 if (((word >= 0xAC00) && (word <= 0xD7AF)) ||
461 ((word >= 0x1100) && (word <= 0x11FF)) ||
462 ((word >= 0x3130) && (word <= 0x318F))) {
463 return HANGUL_CHARSET;
464 }
465
466 if (word >= 0x0E00 && word <= 0x0E7F)
467 return THAI_CHARSET;
468
469 if ((word >= 0x0370 && word <= 0x03FF) || (word >= 0x1F00 && word <= 0x1FFF))
470 return GREEK_CHARSET;
471
472 if ((word >= 0x0600 && word <= 0x06FF) || (word >= 0xFB50 && word <= 0xFEFC))
473 return ARABIC_CHARSET;
474
475 if (word >= 0x0590 && word <= 0x05FF)
476 return HEBREW_CHARSET;
477
478 if (word >= 0x0400 && word <= 0x04FF)
479 return RUSSIAN_CHARSET;
480
481 if (word >= 0x0100 && word <= 0x024F)
482 return EASTEUROPE_CHARSET;
483
484 if (word >= 0x1E00 && word <= 0x1EFF)
485 return VIETNAMESE_CHARSET;
486
487 return ANSI_CHARSET;
488 }
489
CPWL_DocFontMap(IFX_SystemHandler * pSystemHandler,CPDF_Document * pAttachedDoc)490 CPWL_DocFontMap::CPWL_DocFontMap(IFX_SystemHandler* pSystemHandler,
491 CPDF_Document* pAttachedDoc)
492 : CPWL_FontMap(pSystemHandler), m_pAttachedDoc(pAttachedDoc) {}
493
~CPWL_DocFontMap()494 CPWL_DocFontMap::~CPWL_DocFontMap() {}
495
GetDocument()496 CPDF_Document* CPWL_DocFontMap::GetDocument() {
497 return m_pAttachedDoc;
498 }
499