1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "fpdfsdk/pdfwindow/PWL_FontMap.h"
8 
9 #include <utility>
10 
11 #include "core/fpdfapi/cpdf_modulemgr.h"
12 #include "core/fpdfapi/font/cpdf_font.h"
13 #include "core/fpdfapi/font/cpdf_fontencoding.h"
14 #include "core/fpdfapi/parser/cpdf_document.h"
15 #include "core/fpdfapi/parser/cpdf_parser.h"
16 #include "core/fpdfdoc/ipvt_fontmap.h"
17 #include "fpdfsdk/pdfwindow/PWL_Wnd.h"
18 #include "third_party/base/ptr_util.h"
19 #include "third_party/base/stl_util.h"
20 
21 namespace {
22 
23 const char kDefaultFontName[] = "Helvetica";
24 
25 const char* const g_sDEStandardFontName[] = {"Courier",
26                                              "Courier-Bold",
27                                              "Courier-BoldOblique",
28                                              "Courier-Oblique",
29                                              "Helvetica",
30                                              "Helvetica-Bold",
31                                              "Helvetica-BoldOblique",
32                                              "Helvetica-Oblique",
33                                              "Times-Roman",
34                                              "Times-Bold",
35                                              "Times-Italic",
36                                              "Times-BoldItalic",
37                                              "Symbol",
38                                              "ZapfDingbats"};
39 
40 }  // namespace
41 
CPWL_FontMap(CFX_SystemHandler * pSystemHandler)42 CPWL_FontMap::CPWL_FontMap(CFX_SystemHandler* pSystemHandler)
43     : m_pSystemHandler(pSystemHandler) {
44   ASSERT(m_pSystemHandler);
45 }
46 
~CPWL_FontMap()47 CPWL_FontMap::~CPWL_FontMap() {
48   Empty();
49 }
50 
GetDocument()51 CPDF_Document* CPWL_FontMap::GetDocument() {
52   if (!m_pPDFDoc) {
53     if (CPDF_ModuleMgr::Get()) {
54       m_pPDFDoc = pdfium::MakeUnique<CPDF_Document>(nullptr);
55       m_pPDFDoc->CreateNewDoc();
56     }
57   }
58 
59   return m_pPDFDoc.get();
60 }
61 
GetPDFFont(int32_t nFontIndex)62 CPDF_Font* CPWL_FontMap::GetPDFFont(int32_t nFontIndex) {
63   if (nFontIndex >= 0 && nFontIndex < pdfium::CollectionSize<int32_t>(m_Data)) {
64     if (m_Data[nFontIndex])
65       return m_Data[nFontIndex]->pFont;
66   }
67   return nullptr;
68 }
69 
GetPDFFontAlias(int32_t nFontIndex)70 CFX_ByteString CPWL_FontMap::GetPDFFontAlias(int32_t nFontIndex) {
71   if (nFontIndex >= 0 && nFontIndex < pdfium::CollectionSize<int32_t>(m_Data)) {
72     if (m_Data[nFontIndex])
73       return m_Data[nFontIndex]->sFontName;
74   }
75   return CFX_ByteString();
76 }
77 
KnowWord(int32_t nFontIndex,uint16_t word)78 bool CPWL_FontMap::KnowWord(int32_t nFontIndex, uint16_t word) {
79   if (nFontIndex >= 0 && nFontIndex < pdfium::CollectionSize<int32_t>(m_Data)) {
80     if (m_Data[nFontIndex])
81       return CharCodeFromUnicode(nFontIndex, word) >= 0;
82   }
83   return false;
84 }
85 
GetWordFontIndex(uint16_t word,int32_t nCharset,int32_t nFontIndex)86 int32_t CPWL_FontMap::GetWordFontIndex(uint16_t word,
87                                        int32_t nCharset,
88                                        int32_t nFontIndex) {
89   if (nFontIndex > 0) {
90     if (KnowWord(nFontIndex, word))
91       return nFontIndex;
92   } else {
93     if (const CPWL_FontMap_Data* pData = GetFontMapData(0)) {
94       if (nCharset == FXFONT_DEFAULT_CHARSET ||
95           pData->nCharset == FXFONT_SYMBOL_CHARSET ||
96           nCharset == pData->nCharset) {
97         if (KnowWord(0, word))
98           return 0;
99       }
100     }
101   }
102 
103   int32_t nNewFontIndex =
104       GetFontIndex(GetNativeFontName(nCharset), nCharset, true);
105   if (nNewFontIndex >= 0) {
106     if (KnowWord(nNewFontIndex, word))
107       return nNewFontIndex;
108   }
109   nNewFontIndex =
110       GetFontIndex("Arial Unicode MS", FXFONT_DEFAULT_CHARSET, false);
111   if (nNewFontIndex >= 0) {
112     if (KnowWord(nNewFontIndex, word))
113       return nNewFontIndex;
114   }
115   return -1;
116 }
117 
CharCodeFromUnicode(int32_t nFontIndex,uint16_t word)118 int32_t CPWL_FontMap::CharCodeFromUnicode(int32_t nFontIndex, uint16_t word) {
119   if (nFontIndex < 0 || nFontIndex >= pdfium::CollectionSize<int32_t>(m_Data))
120     return -1;
121 
122   CPWL_FontMap_Data* pData = m_Data[nFontIndex].get();
123   if (!pData || !pData->pFont)
124     return -1;
125 
126   if (pData->pFont->IsUnicodeCompatible())
127     return pData->pFont->CharCodeFromUnicode(word);
128 
129   return word < 0xFF ? word : -1;
130 }
131 
GetNativeFontName(int32_t nCharset)132 CFX_ByteString CPWL_FontMap::GetNativeFontName(int32_t nCharset) {
133   for (const auto& pData : m_NativeFont) {
134     if (pData && pData->nCharset == nCharset)
135       return pData->sFontName;
136   }
137 
138   CFX_ByteString sNew = GetNativeFont(nCharset);
139   if (sNew.IsEmpty())
140     return CFX_ByteString();
141 
142   auto pNewData = pdfium::MakeUnique<CPWL_FontMap_Native>();
143   pNewData->nCharset = nCharset;
144   pNewData->sFontName = sNew;
145   m_NativeFont.push_back(std::move(pNewData));
146   return sNew;
147 }
148 
Empty()149 void CPWL_FontMap::Empty() {
150   m_Data.clear();
151   m_NativeFont.clear();
152 }
153 
Initialize()154 void CPWL_FontMap::Initialize() {
155   GetFontIndex(kDefaultFontName, FXFONT_ANSI_CHARSET, false);
156 }
157 
IsStandardFont(const CFX_ByteString & sFontName)158 bool CPWL_FontMap::IsStandardFont(const CFX_ByteString& sFontName) {
159   for (size_t i = 0; i < FX_ArraySize(g_sDEStandardFontName); ++i) {
160     if (sFontName == g_sDEStandardFontName[i])
161       return true;
162   }
163 
164   return false;
165 }
166 
FindFont(const CFX_ByteString & sFontName,int32_t nCharset)167 int32_t CPWL_FontMap::FindFont(const CFX_ByteString& sFontName,
168                                int32_t nCharset) {
169   int32_t i = 0;
170   for (const auto& pData : m_Data) {
171     if (pData &&
172         (nCharset == FXFONT_DEFAULT_CHARSET || nCharset == pData->nCharset) &&
173         (sFontName.IsEmpty() || pData->sFontName == sFontName)) {
174       return i;
175     }
176     ++i;
177   }
178   return -1;
179 }
180 
GetFontIndex(const CFX_ByteString & sFontName,int32_t nCharset,bool bFind)181 int32_t CPWL_FontMap::GetFontIndex(const CFX_ByteString& sFontName,
182                                    int32_t nCharset,
183                                    bool bFind) {
184   int32_t nFontIndex = FindFont(EncodeFontAlias(sFontName, nCharset), nCharset);
185   if (nFontIndex >= 0)
186     return nFontIndex;
187 
188   CFX_ByteString sAlias;
189   CPDF_Font* pFont = nullptr;
190   if (bFind)
191     pFont = FindFontSameCharset(sAlias, nCharset);
192 
193   if (!pFont) {
194     CFX_ByteString sTemp = sFontName;
195     pFont = AddFontToDocument(GetDocument(), sTemp, nCharset);
196     sAlias = EncodeFontAlias(sTemp, nCharset);
197   }
198   AddedFont(pFont, sAlias);
199   return AddFontData(pFont, sAlias, nCharset);
200 }
201 
FindFontSameCharset(CFX_ByteString & sFontAlias,int32_t nCharset)202 CPDF_Font* CPWL_FontMap::FindFontSameCharset(CFX_ByteString& sFontAlias,
203                                              int32_t nCharset) {
204   return nullptr;
205 }
206 
AddFontData(CPDF_Font * pFont,const CFX_ByteString & sFontAlias,int32_t nCharset)207 int32_t CPWL_FontMap::AddFontData(CPDF_Font* pFont,
208                                   const CFX_ByteString& sFontAlias,
209                                   int32_t nCharset) {
210   auto pNewData = pdfium::MakeUnique<CPWL_FontMap_Data>();
211   pNewData->pFont = pFont;
212   pNewData->sFontName = sFontAlias;
213   pNewData->nCharset = nCharset;
214   m_Data.push_back(std::move(pNewData));
215   return pdfium::CollectionSize<int32_t>(m_Data) - 1;
216 }
217 
AddedFont(CPDF_Font * pFont,const CFX_ByteString & sFontAlias)218 void CPWL_FontMap::AddedFont(CPDF_Font* pFont,
219                              const CFX_ByteString& sFontAlias) {}
220 
GetNativeFont(int32_t nCharset)221 CFX_ByteString CPWL_FontMap::GetNativeFont(int32_t nCharset) {
222   if (nCharset == FXFONT_DEFAULT_CHARSET)
223     nCharset = GetNativeCharset();
224 
225   CFX_ByteString sFontName = GetDefaultFontByCharset(nCharset);
226   if (!m_pSystemHandler->FindNativeTrueTypeFont(sFontName))
227     return CFX_ByteString();
228 
229   return sFontName;
230 }
231 
AddFontToDocument(CPDF_Document * pDoc,CFX_ByteString & sFontName,uint8_t nCharset)232 CPDF_Font* CPWL_FontMap::AddFontToDocument(CPDF_Document* pDoc,
233                                            CFX_ByteString& sFontName,
234                                            uint8_t nCharset) {
235   if (IsStandardFont(sFontName))
236     return AddStandardFont(pDoc, sFontName);
237 
238   return AddSystemFont(pDoc, sFontName, nCharset);
239 }
240 
AddStandardFont(CPDF_Document * pDoc,CFX_ByteString & sFontName)241 CPDF_Font* CPWL_FontMap::AddStandardFont(CPDF_Document* pDoc,
242                                          CFX_ByteString& sFontName) {
243   if (!pDoc)
244     return nullptr;
245 
246   CPDF_Font* pFont = nullptr;
247 
248   if (sFontName == "ZapfDingbats") {
249     pFont = pDoc->AddStandardFont(sFontName.c_str(), nullptr);
250   } else {
251     CPDF_FontEncoding fe(PDFFONT_ENCODING_WINANSI);
252     pFont = pDoc->AddStandardFont(sFontName.c_str(), &fe);
253   }
254 
255   return pFont;
256 }
257 
AddSystemFont(CPDF_Document * pDoc,CFX_ByteString & sFontName,uint8_t nCharset)258 CPDF_Font* CPWL_FontMap::AddSystemFont(CPDF_Document* pDoc,
259                                        CFX_ByteString& sFontName,
260                                        uint8_t nCharset) {
261   if (!pDoc)
262     return nullptr;
263 
264   if (sFontName.IsEmpty())
265     sFontName = GetNativeFont(nCharset);
266   if (nCharset == FXFONT_DEFAULT_CHARSET)
267     nCharset = GetNativeCharset();
268 
269   return m_pSystemHandler->AddNativeTrueTypeFontToPDF(pDoc, sFontName,
270                                                       nCharset);
271 }
272 
EncodeFontAlias(const CFX_ByteString & sFontName,int32_t nCharset)273 CFX_ByteString CPWL_FontMap::EncodeFontAlias(const CFX_ByteString& sFontName,
274                                              int32_t nCharset) {
275   CFX_ByteString sPostfix;
276   sPostfix.Format("_%02X", nCharset);
277   return EncodeFontAlias(sFontName) + sPostfix;
278 }
279 
EncodeFontAlias(const CFX_ByteString & sFontName)280 CFX_ByteString CPWL_FontMap::EncodeFontAlias(const CFX_ByteString& sFontName) {
281   CFX_ByteString sRet = sFontName;
282   sRet.Remove(' ');
283   return sRet;
284 }
285 
GetFontMapData(int32_t nIndex) const286 const CPWL_FontMap_Data* CPWL_FontMap::GetFontMapData(int32_t nIndex) const {
287   if (nIndex < 0 || nIndex >= pdfium::CollectionSize<int32_t>(m_Data))
288     return nullptr;
289 
290   return m_Data[nIndex].get();
291 }
292 
GetNativeCharset()293 int32_t CPWL_FontMap::GetNativeCharset() {
294   uint8_t nCharset = FXFONT_ANSI_CHARSET;
295   int32_t iCodePage = FXSYS_GetACP();
296   switch (iCodePage) {
297     case 932:  // Japan
298       nCharset = FXFONT_SHIFTJIS_CHARSET;
299       break;
300     case 936:  // Chinese (PRC, Singapore)
301       nCharset = FXFONT_GB2312_CHARSET;
302       break;
303     case 950:  // Chinese (Taiwan; Hong Kong SAR, PRC)
304       nCharset = FXFONT_GB2312_CHARSET;
305       break;
306     case 1252:  // Windows 3.1 Latin 1 (US, Western Europe)
307       nCharset = FXFONT_ANSI_CHARSET;
308       break;
309     case 874:  // Thai
310       nCharset = FXFONT_THAI_CHARSET;
311       break;
312     case 949:  // Korean
313       nCharset = FXFONT_HANGUL_CHARSET;
314       break;
315     case 1200:  // Unicode (BMP of ISO 10646)
316       nCharset = FXFONT_ANSI_CHARSET;
317       break;
318     case 1250:  // Windows 3.1 Eastern European
319       nCharset = FXFONT_EASTEUROPE_CHARSET;
320       break;
321     case 1251:  // Windows 3.1 Cyrillic
322       nCharset = FXFONT_RUSSIAN_CHARSET;
323       break;
324     case 1253:  // Windows 3.1 Greek
325       nCharset = FXFONT_GREEK_CHARSET;
326       break;
327     case 1254:  // Windows 3.1 Turkish
328       nCharset = FXFONT_TURKISH_CHARSET;
329       break;
330     case 1255:  // Hebrew
331       nCharset = FXFONT_HEBREW_CHARSET;
332       break;
333     case 1256:  // Arabic
334       nCharset = FXFONT_ARABIC_CHARSET;
335       break;
336     case 1257:  // Baltic
337       nCharset = FXFONT_BALTIC_CHARSET;
338       break;
339     case 1258:  // Vietnamese
340       nCharset = FXFONT_VIETNAMESE_CHARSET;
341       break;
342     case 1361:  // Korean(Johab)
343       nCharset = FXFONT_JOHAB_CHARSET;
344       break;
345   }
346   return nCharset;
347 }
348 
349 const FPDF_CharsetFontMap CPWL_FontMap::defaultTTFMap[] = {
350     {FXFONT_ANSI_CHARSET, "Helvetica"},
351     {FXFONT_GB2312_CHARSET, "SimSun"},
352     {FXFONT_CHINESEBIG5_CHARSET, "MingLiU"},
353     {FXFONT_SHIFTJIS_CHARSET, "MS Gothic"},
354     {FXFONT_HANGUL_CHARSET, "Batang"},
355     {FXFONT_RUSSIAN_CHARSET, "Arial"},
356 #if _FXM_PLATFORM_ == _FXM_PLATFORM_LINUX_ || \
357     _FXM_PLATFORM_ == _FXM_PLATFORM_APPLE_
358     {FXFONT_EASTEUROPE_CHARSET, "Arial"},
359 #else
360     {FXFONT_EASTEUROPE_CHARSET, "Tahoma"},
361 #endif
362     {FXFONT_ARABIC_CHARSET, "Arial"},
363     {-1, nullptr}};
364 
GetDefaultFontByCharset(int32_t nCharset)365 CFX_ByteString CPWL_FontMap::GetDefaultFontByCharset(int32_t nCharset) {
366   int i = 0;
367   while (defaultTTFMap[i].charset != -1) {
368     if (nCharset == defaultTTFMap[i].charset)
369       return defaultTTFMap[i].fontname;
370     ++i;
371   }
372   return "";
373 }
374 
CharSetFromUnicode(uint16_t word,int32_t nOldCharset)375 int32_t CPWL_FontMap::CharSetFromUnicode(uint16_t word, int32_t nOldCharset) {
376   // to avoid CJK Font to show ASCII
377   if (word < 0x7F)
378     return FXFONT_ANSI_CHARSET;
379   // follow the old charset
380   if (nOldCharset != FXFONT_DEFAULT_CHARSET)
381     return nOldCharset;
382 
383   // find new charset
384   if ((word >= 0x4E00 && word <= 0x9FA5) ||
385       (word >= 0xE7C7 && word <= 0xE7F3) ||
386       (word >= 0x3000 && word <= 0x303F) ||
387       (word >= 0x2000 && word <= 0x206F)) {
388     return FXFONT_GB2312_CHARSET;
389   }
390 
391   if (((word >= 0x3040) && (word <= 0x309F)) ||
392       ((word >= 0x30A0) && (word <= 0x30FF)) ||
393       ((word >= 0x31F0) && (word <= 0x31FF)) ||
394       ((word >= 0xFF00) && (word <= 0xFFEF))) {
395     return FXFONT_SHIFTJIS_CHARSET;
396   }
397 
398   if (((word >= 0xAC00) && (word <= 0xD7AF)) ||
399       ((word >= 0x1100) && (word <= 0x11FF)) ||
400       ((word >= 0x3130) && (word <= 0x318F))) {
401     return FXFONT_HANGUL_CHARSET;
402   }
403 
404   if (word >= 0x0E00 && word <= 0x0E7F)
405     return FXFONT_THAI_CHARSET;
406 
407   if ((word >= 0x0370 && word <= 0x03FF) || (word >= 0x1F00 && word <= 0x1FFF))
408     return FXFONT_GREEK_CHARSET;
409 
410   if ((word >= 0x0600 && word <= 0x06FF) || (word >= 0xFB50 && word <= 0xFEFC))
411     return FXFONT_ARABIC_CHARSET;
412 
413   if (word >= 0x0590 && word <= 0x05FF)
414     return FXFONT_HEBREW_CHARSET;
415 
416   if (word >= 0x0400 && word <= 0x04FF)
417     return FXFONT_RUSSIAN_CHARSET;
418 
419   if (word >= 0x0100 && word <= 0x024F)
420     return FXFONT_EASTEUROPE_CHARSET;
421 
422   if (word >= 0x1E00 && word <= 0x1EFF)
423     return FXFONT_VIETNAMESE_CHARSET;
424 
425   return FXFONT_ANSI_CHARSET;
426 }
427