1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "fpdfsdk/include/pdfwindow/PWL_FontMap.h"
8 
9 #include "core/include/fpdfapi/fpdf_module.h"
10 #include "fpdfsdk/include/pdfwindow/PWL_Wnd.h"
11 
12 namespace {
13 
14 const char kDefaultFontName[] = "Helvetica";
15 
16 const char* const g_sDEStandardFontName[] = {"Courier",
17                                              "Courier-Bold",
18                                              "Courier-BoldOblique",
19                                              "Courier-Oblique",
20                                              "Helvetica",
21                                              "Helvetica-Bold",
22                                              "Helvetica-BoldOblique",
23                                              "Helvetica-Oblique",
24                                              "Times-Roman",
25                                              "Times-Bold",
26                                              "Times-Italic",
27                                              "Times-BoldItalic",
28                                              "Symbol",
29                                              "ZapfDingbats"};
30 
31 }  // namespace
32 
CPWL_FontMap(IFX_SystemHandler * pSystemHandler)33 CPWL_FontMap::CPWL_FontMap(IFX_SystemHandler* pSystemHandler)
34     : m_pPDFDoc(NULL), m_pSystemHandler(pSystemHandler) {
35   ASSERT(m_pSystemHandler);
36 }
37 
~CPWL_FontMap()38 CPWL_FontMap::~CPWL_FontMap() {
39   delete m_pPDFDoc;
40   m_pPDFDoc = NULL;
41 
42   Empty();
43 }
44 
SetSystemHandler(IFX_SystemHandler * pSystemHandler)45 void CPWL_FontMap::SetSystemHandler(IFX_SystemHandler* pSystemHandler) {
46   m_pSystemHandler = pSystemHandler;
47 }
48 
GetDocument()49 CPDF_Document* CPWL_FontMap::GetDocument() {
50   if (!m_pPDFDoc) {
51     if (CPDF_ModuleMgr::Get()) {
52       m_pPDFDoc = new CPDF_Document;
53       m_pPDFDoc->CreateNewDoc();
54     }
55   }
56 
57   return m_pPDFDoc;
58 }
59 
GetPDFFont(int32_t nFontIndex)60 CPDF_Font* CPWL_FontMap::GetPDFFont(int32_t nFontIndex) {
61   if (nFontIndex >= 0 && nFontIndex < m_aData.GetSize()) {
62     if (CPWL_FontMap_Data* pData = m_aData.GetAt(nFontIndex)) {
63       return pData->pFont;
64     }
65   }
66 
67   return NULL;
68 }
69 
GetPDFFontAlias(int32_t nFontIndex)70 CFX_ByteString CPWL_FontMap::GetPDFFontAlias(int32_t nFontIndex) {
71   if (nFontIndex >= 0 && nFontIndex < m_aData.GetSize()) {
72     if (CPWL_FontMap_Data* pData = m_aData.GetAt(nFontIndex)) {
73       return pData->sFontName;
74     }
75   }
76 
77   return "";
78 }
79 
KnowWord(int32_t nFontIndex,FX_WORD word)80 FX_BOOL CPWL_FontMap::KnowWord(int32_t nFontIndex, FX_WORD word) {
81   if (nFontIndex >= 0 && nFontIndex < m_aData.GetSize()) {
82     if (m_aData.GetAt(nFontIndex)) {
83       return CharCodeFromUnicode(nFontIndex, word) >= 0;
84     }
85   }
86 
87   return FALSE;
88 }
89 
GetWordFontIndex(FX_WORD word,int32_t nCharset,int32_t nFontIndex)90 int32_t CPWL_FontMap::GetWordFontIndex(FX_WORD word,
91                                        int32_t nCharset,
92                                        int32_t nFontIndex) {
93   if (nFontIndex > 0) {
94     if (KnowWord(nFontIndex, word))
95       return nFontIndex;
96   } else {
97     if (const CPWL_FontMap_Data* pData = GetFontMapData(0)) {
98       if (nCharset == DEFAULT_CHARSET || pData->nCharset == SYMBOL_CHARSET ||
99           nCharset == pData->nCharset) {
100         if (KnowWord(0, word))
101           return 0;
102       }
103     }
104   }
105 
106   int32_t nNewFontIndex =
107       GetFontIndex(GetNativeFontName(nCharset), nCharset, TRUE);
108   if (nNewFontIndex >= 0) {
109     if (KnowWord(nNewFontIndex, word))
110       return nNewFontIndex;
111   }
112   nNewFontIndex = GetFontIndex("Arial Unicode MS", DEFAULT_CHARSET, FALSE);
113   if (nNewFontIndex >= 0) {
114     if (KnowWord(nNewFontIndex, word))
115       return nNewFontIndex;
116   }
117   return -1;
118 }
119 
CharCodeFromUnicode(int32_t nFontIndex,FX_WORD word)120 int32_t CPWL_FontMap::CharCodeFromUnicode(int32_t nFontIndex, FX_WORD word) {
121   if (CPWL_FontMap_Data* pData = m_aData.GetAt(nFontIndex)) {
122     if (pData->pFont) {
123       if (pData->pFont->IsUnicodeCompatible()) {
124         int nCharCode = pData->pFont->CharCodeFromUnicode(word);
125         pData->pFont->GlyphFromCharCode(nCharCode);
126         return nCharCode;
127       }
128       if (word < 0xFF)
129         return word;
130     }
131   }
132   return -1;
133 }
134 
GetNativeFontName(int32_t nCharset)135 CFX_ByteString CPWL_FontMap::GetNativeFontName(int32_t nCharset) {
136   // searching native font is slow, so we must save time
137   for (int32_t i = 0, sz = m_aNativeFont.GetSize(); i < sz; i++) {
138     if (CPWL_FontMap_Native* pData = m_aNativeFont.GetAt(i)) {
139       if (pData->nCharset == nCharset)
140         return pData->sFontName;
141     }
142   }
143 
144   CFX_ByteString sNew = GetNativeFont(nCharset);
145 
146   if (!sNew.IsEmpty()) {
147     CPWL_FontMap_Native* pNewData = new CPWL_FontMap_Native;
148     pNewData->nCharset = nCharset;
149     pNewData->sFontName = sNew;
150 
151     m_aNativeFont.Add(pNewData);
152   }
153 
154   return sNew;
155 }
156 
Empty()157 void CPWL_FontMap::Empty() {
158   {
159     for (int32_t i = 0, sz = m_aData.GetSize(); i < sz; i++)
160       delete m_aData.GetAt(i);
161 
162     m_aData.RemoveAll();
163   }
164   {
165     for (int32_t i = 0, sz = m_aNativeFont.GetSize(); i < sz; i++)
166       delete m_aNativeFont.GetAt(i);
167 
168     m_aNativeFont.RemoveAll();
169   }
170 }
171 
Initialize()172 void CPWL_FontMap::Initialize() {
173   GetFontIndex(kDefaultFontName, ANSI_CHARSET, FALSE);
174 }
175 
IsStandardFont(const CFX_ByteString & sFontName)176 FX_BOOL CPWL_FontMap::IsStandardFont(const CFX_ByteString& sFontName) {
177   for (int32_t i = 0; i < FX_ArraySize(g_sDEStandardFontName); ++i) {
178     if (sFontName == g_sDEStandardFontName[i])
179       return TRUE;
180   }
181 
182   return FALSE;
183 }
184 
FindFont(const CFX_ByteString & sFontName,int32_t nCharset)185 int32_t CPWL_FontMap::FindFont(const CFX_ByteString& sFontName,
186                                int32_t nCharset) {
187   for (int32_t i = 0, sz = m_aData.GetSize(); i < sz; i++) {
188     if (CPWL_FontMap_Data* pData = m_aData.GetAt(i)) {
189       if (nCharset == DEFAULT_CHARSET || nCharset == pData->nCharset) {
190         if (sFontName.IsEmpty() || pData->sFontName == sFontName)
191           return i;
192       }
193     }
194   }
195 
196   return -1;
197 }
198 
GetFontIndex(const CFX_ByteString & sFontName,int32_t nCharset,FX_BOOL bFind)199 int32_t CPWL_FontMap::GetFontIndex(const CFX_ByteString& sFontName,
200                                    int32_t nCharset,
201                                    FX_BOOL bFind) {
202   int32_t nFontIndex = FindFont(EncodeFontAlias(sFontName, nCharset), nCharset);
203   if (nFontIndex >= 0)
204     return nFontIndex;
205 
206   CFX_ByteString sAlias;
207   CPDF_Font* pFont = NULL;
208   if (bFind)
209     pFont = FindFontSameCharset(sAlias, nCharset);
210 
211   if (!pFont) {
212     CFX_ByteString sTemp = sFontName;
213     pFont = AddFontToDocument(GetDocument(), sTemp, nCharset);
214     sAlias = EncodeFontAlias(sTemp, nCharset);
215   }
216   AddedFont(pFont, sAlias);
217   return AddFontData(pFont, sAlias, nCharset);
218 }
219 
GetPWLFontIndex(FX_WORD word,int32_t nCharset)220 int32_t CPWL_FontMap::GetPWLFontIndex(FX_WORD word, int32_t nCharset) {
221   int32_t nFind = -1;
222 
223   for (int32_t i = 0, sz = m_aData.GetSize(); i < sz; i++) {
224     if (CPWL_FontMap_Data* pData = m_aData.GetAt(i)) {
225       if (pData->nCharset == nCharset) {
226         nFind = i;
227         break;
228       }
229     }
230   }
231 
232   CPDF_Font* pNewFont = GetPDFFont(nFind);
233 
234   if (!pNewFont)
235     return -1;
236 
237   CFX_ByteString sAlias = EncodeFontAlias("Arial_Chrome", nCharset);
238   AddedFont(pNewFont, sAlias);
239 
240   return AddFontData(pNewFont, sAlias, nCharset);
241 }
242 
FindFontSameCharset(CFX_ByteString & sFontAlias,int32_t nCharset)243 CPDF_Font* CPWL_FontMap::FindFontSameCharset(CFX_ByteString& sFontAlias,
244                                              int32_t nCharset) {
245   return NULL;
246 }
247 
AddFontData(CPDF_Font * pFont,const CFX_ByteString & sFontAlias,int32_t nCharset)248 int32_t CPWL_FontMap::AddFontData(CPDF_Font* pFont,
249                                   const CFX_ByteString& sFontAlias,
250                                   int32_t nCharset) {
251   CPWL_FontMap_Data* pNewData = new CPWL_FontMap_Data;
252   pNewData->pFont = pFont;
253   pNewData->sFontName = sFontAlias;
254   pNewData->nCharset = nCharset;
255 
256   m_aData.Add(pNewData);
257 
258   return m_aData.GetSize() - 1;
259 }
260 
AddedFont(CPDF_Font * pFont,const CFX_ByteString & sFontAlias)261 void CPWL_FontMap::AddedFont(CPDF_Font* pFont,
262                              const CFX_ByteString& sFontAlias) {}
263 
GetFontName(int32_t nFontIndex)264 CFX_ByteString CPWL_FontMap::GetFontName(int32_t nFontIndex) {
265   if (nFontIndex >= 0 && nFontIndex < m_aData.GetSize()) {
266     if (CPWL_FontMap_Data* pData = m_aData.GetAt(nFontIndex)) {
267       return pData->sFontName;
268     }
269   }
270 
271   return "";
272 }
273 
GetNativeFont(int32_t nCharset)274 CFX_ByteString CPWL_FontMap::GetNativeFont(int32_t nCharset) {
275   if (nCharset == DEFAULT_CHARSET)
276     nCharset = GetNativeCharset();
277 
278   CFX_ByteString sFontName = GetDefaultFontByCharset(nCharset);
279   if (m_pSystemHandler) {
280     if (m_pSystemHandler->FindNativeTrueTypeFont(nCharset, sFontName))
281       return sFontName;
282 
283     sFontName = m_pSystemHandler->GetNativeTrueTypeFont(nCharset);
284   }
285   return sFontName;
286 }
287 
AddFontToDocument(CPDF_Document * pDoc,CFX_ByteString & sFontName,uint8_t nCharset)288 CPDF_Font* CPWL_FontMap::AddFontToDocument(CPDF_Document* pDoc,
289                                            CFX_ByteString& sFontName,
290                                            uint8_t nCharset) {
291   if (IsStandardFont(sFontName))
292     return AddStandardFont(pDoc, sFontName);
293 
294   return AddSystemFont(pDoc, sFontName, nCharset);
295 }
296 
AddStandardFont(CPDF_Document * pDoc,CFX_ByteString & sFontName)297 CPDF_Font* CPWL_FontMap::AddStandardFont(CPDF_Document* pDoc,
298                                          CFX_ByteString& sFontName) {
299   if (!pDoc)
300     return NULL;
301 
302   CPDF_Font* pFont = NULL;
303 
304   if (sFontName == "ZapfDingbats") {
305     pFont = pDoc->AddStandardFont(sFontName, NULL);
306   } else {
307     CPDF_FontEncoding fe(PDFFONT_ENCODING_WINANSI);
308     pFont = pDoc->AddStandardFont(sFontName, &fe);
309   }
310 
311   return pFont;
312 }
313 
AddSystemFont(CPDF_Document * pDoc,CFX_ByteString & sFontName,uint8_t nCharset)314 CPDF_Font* CPWL_FontMap::AddSystemFont(CPDF_Document* pDoc,
315                                        CFX_ByteString& sFontName,
316                                        uint8_t nCharset) {
317   if (!pDoc)
318     return NULL;
319 
320   if (sFontName.IsEmpty())
321     sFontName = GetNativeFont(nCharset);
322   if (nCharset == DEFAULT_CHARSET)
323     nCharset = GetNativeCharset();
324 
325   if (m_pSystemHandler)
326     return m_pSystemHandler->AddNativeTrueTypeFontToPDF(pDoc, sFontName,
327                                                         nCharset);
328 
329   return NULL;
330 }
331 
EncodeFontAlias(const CFX_ByteString & sFontName,int32_t nCharset)332 CFX_ByteString CPWL_FontMap::EncodeFontAlias(const CFX_ByteString& sFontName,
333                                              int32_t nCharset) {
334   CFX_ByteString sPostfix;
335   sPostfix.Format("_%02X", nCharset);
336   return EncodeFontAlias(sFontName) + sPostfix;
337 }
338 
EncodeFontAlias(const CFX_ByteString & sFontName)339 CFX_ByteString CPWL_FontMap::EncodeFontAlias(const CFX_ByteString& sFontName) {
340   CFX_ByteString sRet = sFontName;
341   sRet.Remove(' ');
342   return sRet;
343 }
344 
GetFontMapCount() const345 int32_t CPWL_FontMap::GetFontMapCount() const {
346   return m_aData.GetSize();
347 }
348 
GetFontMapData(int32_t nIndex) const349 const CPWL_FontMap_Data* CPWL_FontMap::GetFontMapData(int32_t nIndex) const {
350   if (nIndex >= 0 && nIndex < m_aData.GetSize()) {
351     return m_aData.GetAt(nIndex);
352   }
353 
354   return NULL;
355 }
356 
GetNativeCharset()357 int32_t CPWL_FontMap::GetNativeCharset() {
358   uint8_t nCharset = ANSI_CHARSET;
359   int32_t iCodePage = FXSYS_GetACP();
360   switch (iCodePage) {
361     case 932:  // Japan
362       nCharset = SHIFTJIS_CHARSET;
363       break;
364     case 936:  // Chinese (PRC, Singapore)
365       nCharset = GB2312_CHARSET;
366       break;
367     case 950:  // Chinese (Taiwan; Hong Kong SAR, PRC)
368       nCharset = GB2312_CHARSET;
369       break;
370     case 1252:  // Windows 3.1 Latin 1 (US, Western Europe)
371       nCharset = ANSI_CHARSET;
372       break;
373     case 874:  // Thai
374       nCharset = THAI_CHARSET;
375       break;
376     case 949:  // Korean
377       nCharset = HANGUL_CHARSET;
378       break;
379     case 1200:  // Unicode (BMP of ISO 10646)
380       nCharset = ANSI_CHARSET;
381       break;
382     case 1250:  // Windows 3.1 Eastern European
383       nCharset = EASTEUROPE_CHARSET;
384       break;
385     case 1251:  // Windows 3.1 Cyrillic
386       nCharset = RUSSIAN_CHARSET;
387       break;
388     case 1253:  // Windows 3.1 Greek
389       nCharset = GREEK_CHARSET;
390       break;
391     case 1254:  // Windows 3.1 Turkish
392       nCharset = TURKISH_CHARSET;
393       break;
394     case 1255:  // Hebrew
395       nCharset = HEBREW_CHARSET;
396       break;
397     case 1256:  // Arabic
398       nCharset = ARABIC_CHARSET;
399       break;
400     case 1257:  // Baltic
401       nCharset = BALTIC_CHARSET;
402       break;
403     case 1258:  // Vietnamese
404       nCharset = VIETNAMESE_CHARSET;
405       break;
406     case 1361:  // Korean(Johab)
407       nCharset = JOHAB_CHARSET;
408       break;
409   }
410   return nCharset;
411 }
412 
413 const CPWL_FontMap::CharsetFontMap CPWL_FontMap::defaultTTFMap[] = {
414     {ANSI_CHARSET, "Helvetica"},      {GB2312_CHARSET, "SimSun"},
415     {CHINESEBIG5_CHARSET, "MingLiU"}, {SHIFTJIS_CHARSET, "MS Gothic"},
416     {HANGUL_CHARSET, "Batang"},       {RUSSIAN_CHARSET, "Arial"},
417 #if _FXM_PLATFORM_ == _FXM_PLATFORM_LINUX_ || \
418     _FXM_PLATFORM_ == _FXM_PLATFORM_APPLE_
419     {EASTEUROPE_CHARSET, "Arial"},
420 #else
421     {EASTEUROPE_CHARSET, "Tahoma"},
422 #endif
423     {ARABIC_CHARSET, "Arial"},        {-1, NULL}};
424 
GetDefaultFontByCharset(int32_t nCharset)425 CFX_ByteString CPWL_FontMap::GetDefaultFontByCharset(int32_t nCharset) {
426   int i = 0;
427   while (defaultTTFMap[i].charset != -1) {
428     if (nCharset == defaultTTFMap[i].charset)
429       return defaultTTFMap[i].fontname;
430     ++i;
431   }
432   return "";
433 }
434 
CharSetFromUnicode(FX_WORD word,int32_t nOldCharset)435 int32_t CPWL_FontMap::CharSetFromUnicode(FX_WORD word, int32_t nOldCharset) {
436   if (m_pSystemHandler && (-1 != m_pSystemHandler->GetCharSet()))
437     return m_pSystemHandler->GetCharSet();
438   // to avoid CJK Font to show ASCII
439   if (word < 0x7F)
440     return ANSI_CHARSET;
441   // follow the old charset
442   if (nOldCharset != DEFAULT_CHARSET)
443     return nOldCharset;
444 
445   // find new charset
446   if ((word >= 0x4E00 && word <= 0x9FA5) ||
447       (word >= 0xE7C7 && word <= 0xE7F3) ||
448       (word >= 0x3000 && word <= 0x303F) ||
449       (word >= 0x2000 && word <= 0x206F)) {
450     return GB2312_CHARSET;
451   }
452 
453   if (((word >= 0x3040) && (word <= 0x309F)) ||
454       ((word >= 0x30A0) && (word <= 0x30FF)) ||
455       ((word >= 0x31F0) && (word <= 0x31FF)) ||
456       ((word >= 0xFF00) && (word <= 0xFFEF))) {
457     return SHIFTJIS_CHARSET;
458   }
459 
460   if (((word >= 0xAC00) && (word <= 0xD7AF)) ||
461       ((word >= 0x1100) && (word <= 0x11FF)) ||
462       ((word >= 0x3130) && (word <= 0x318F))) {
463     return HANGUL_CHARSET;
464   }
465 
466   if (word >= 0x0E00 && word <= 0x0E7F)
467     return THAI_CHARSET;
468 
469   if ((word >= 0x0370 && word <= 0x03FF) || (word >= 0x1F00 && word <= 0x1FFF))
470     return GREEK_CHARSET;
471 
472   if ((word >= 0x0600 && word <= 0x06FF) || (word >= 0xFB50 && word <= 0xFEFC))
473     return ARABIC_CHARSET;
474 
475   if (word >= 0x0590 && word <= 0x05FF)
476     return HEBREW_CHARSET;
477 
478   if (word >= 0x0400 && word <= 0x04FF)
479     return RUSSIAN_CHARSET;
480 
481   if (word >= 0x0100 && word <= 0x024F)
482     return EASTEUROPE_CHARSET;
483 
484   if (word >= 0x1E00 && word <= 0x1EFF)
485     return VIETNAMESE_CHARSET;
486 
487   return ANSI_CHARSET;
488 }
489 
CPWL_DocFontMap(IFX_SystemHandler * pSystemHandler,CPDF_Document * pAttachedDoc)490 CPWL_DocFontMap::CPWL_DocFontMap(IFX_SystemHandler* pSystemHandler,
491                                  CPDF_Document* pAttachedDoc)
492     : CPWL_FontMap(pSystemHandler), m_pAttachedDoc(pAttachedDoc) {}
493 
~CPWL_DocFontMap()494 CPWL_DocFontMap::~CPWL_DocFontMap() {}
495 
GetDocument()496 CPDF_Document* CPWL_DocFontMap::GetDocument() {
497   return m_pAttachedDoc;
498 }
499