1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "fpdfsdk/pwl/cpwl_font_map.h"
8 
9 #include <utility>
10 
11 #include "core/fpdfapi/cpdf_modulemgr.h"
12 #include "core/fpdfapi/font/cpdf_font.h"
13 #include "core/fpdfapi/font/cpdf_fontencoding.h"
14 #include "core/fpdfapi/parser/cpdf_document.h"
15 #include "core/fpdfapi/parser/cpdf_parser.h"
16 #include "core/fpdfdoc/ipvt_fontmap.h"
17 #include "core/fxcrt/fx_codepage.h"
18 #include "fpdfsdk/pwl/cpwl_wnd.h"
19 #include "third_party/base/ptr_util.h"
20 #include "third_party/base/stl_util.h"
21 
22 namespace {
23 
24 const char kDefaultFontName[] = "Helvetica";
25 
26 const char* const g_sDEStandardFontName[] = {"Courier",
27                                              "Courier-Bold",
28                                              "Courier-BoldOblique",
29                                              "Courier-Oblique",
30                                              "Helvetica",
31                                              "Helvetica-Bold",
32                                              "Helvetica-BoldOblique",
33                                              "Helvetica-Oblique",
34                                              "Times-Roman",
35                                              "Times-Bold",
36                                              "Times-Italic",
37                                              "Times-BoldItalic",
38                                              "Symbol",
39                                              "ZapfDingbats"};
40 
41 }  // namespace
42 
CPWL_FontMap(CFX_SystemHandler * pSystemHandler)43 CPWL_FontMap::CPWL_FontMap(CFX_SystemHandler* pSystemHandler)
44     : m_pSystemHandler(pSystemHandler) {
45   ASSERT(m_pSystemHandler);
46 }
47 
~CPWL_FontMap()48 CPWL_FontMap::~CPWL_FontMap() {
49   Empty();
50 }
51 
GetDocument()52 CPDF_Document* CPWL_FontMap::GetDocument() {
53   if (!m_pPDFDoc) {
54     if (CPDF_ModuleMgr::Get()) {
55       m_pPDFDoc = pdfium::MakeUnique<CPDF_Document>(nullptr);
56       m_pPDFDoc->CreateNewDoc();
57     }
58   }
59   return m_pPDFDoc.get();
60 }
61 
GetPDFFont(int32_t nFontIndex)62 CPDF_Font* CPWL_FontMap::GetPDFFont(int32_t nFontIndex) {
63   if (pdfium::IndexInBounds(m_Data, nFontIndex) && m_Data[nFontIndex])
64     return m_Data[nFontIndex]->pFont;
65 
66   return nullptr;
67 }
68 
GetPDFFontAlias(int32_t nFontIndex)69 ByteString CPWL_FontMap::GetPDFFontAlias(int32_t nFontIndex) {
70   if (pdfium::IndexInBounds(m_Data, nFontIndex) && m_Data[nFontIndex])
71     return m_Data[nFontIndex]->sFontName;
72 
73   return ByteString();
74 }
75 
KnowWord(int32_t nFontIndex,uint16_t word)76 bool CPWL_FontMap::KnowWord(int32_t nFontIndex, uint16_t word) {
77   return pdfium::IndexInBounds(m_Data, nFontIndex) && m_Data[nFontIndex] &&
78          CharCodeFromUnicode(nFontIndex, word) >= 0;
79 }
80 
GetWordFontIndex(uint16_t word,int32_t nCharset,int32_t nFontIndex)81 int32_t CPWL_FontMap::GetWordFontIndex(uint16_t word,
82                                        int32_t nCharset,
83                                        int32_t nFontIndex) {
84   if (nFontIndex > 0) {
85     if (KnowWord(nFontIndex, word))
86       return nFontIndex;
87   } else {
88     if (const CPWL_FontMap_Data* pData = GetFontMapData(0)) {
89       if (nCharset == FX_CHARSET_Default ||
90           pData->nCharset == FX_CHARSET_Symbol || nCharset == pData->nCharset) {
91         if (KnowWord(0, word))
92           return 0;
93       }
94     }
95   }
96 
97   int32_t nNewFontIndex =
98       GetFontIndex(GetNativeFontName(nCharset), nCharset, true);
99   if (nNewFontIndex >= 0) {
100     if (KnowWord(nNewFontIndex, word))
101       return nNewFontIndex;
102   }
103   nNewFontIndex = GetFontIndex("Arial Unicode MS", FX_CHARSET_Default, false);
104   if (nNewFontIndex >= 0) {
105     if (KnowWord(nNewFontIndex, word))
106       return nNewFontIndex;
107   }
108   return -1;
109 }
110 
CharCodeFromUnicode(int32_t nFontIndex,uint16_t word)111 int32_t CPWL_FontMap::CharCodeFromUnicode(int32_t nFontIndex, uint16_t word) {
112   if (!pdfium::IndexInBounds(m_Data, nFontIndex))
113     return -1;
114 
115   CPWL_FontMap_Data* pData = m_Data[nFontIndex].get();
116   if (!pData || !pData->pFont)
117     return -1;
118 
119   if (pData->pFont->IsUnicodeCompatible())
120     return pData->pFont->CharCodeFromUnicode(word);
121 
122   return word < 0xFF ? word : -1;
123 }
124 
GetNativeFontName(int32_t nCharset)125 ByteString CPWL_FontMap::GetNativeFontName(int32_t nCharset) {
126   for (const auto& pData : m_NativeFont) {
127     if (pData && pData->nCharset == nCharset)
128       return pData->sFontName;
129   }
130 
131   ByteString sNew = GetNativeFont(nCharset);
132   if (sNew.IsEmpty())
133     return ByteString();
134 
135   auto pNewData = pdfium::MakeUnique<CPWL_FontMap_Native>();
136   pNewData->nCharset = nCharset;
137   pNewData->sFontName = sNew;
138   m_NativeFont.push_back(std::move(pNewData));
139   return sNew;
140 }
141 
Empty()142 void CPWL_FontMap::Empty() {
143   m_Data.clear();
144   m_NativeFont.clear();
145 }
146 
Initialize()147 void CPWL_FontMap::Initialize() {
148   GetFontIndex(kDefaultFontName, FX_CHARSET_ANSI, false);
149 }
150 
IsStandardFont(const ByteString & sFontName)151 bool CPWL_FontMap::IsStandardFont(const ByteString& sFontName) {
152   for (const char* name : g_sDEStandardFontName) {
153     if (sFontName == name)
154       return true;
155   }
156 
157   return false;
158 }
159 
FindFont(const ByteString & sFontName,int32_t nCharset)160 int32_t CPWL_FontMap::FindFont(const ByteString& sFontName, int32_t nCharset) {
161   int32_t i = 0;
162   for (const auto& pData : m_Data) {
163     if (pData &&
164         (nCharset == FX_CHARSET_Default || nCharset == pData->nCharset) &&
165         (sFontName.IsEmpty() || pData->sFontName == sFontName)) {
166       return i;
167     }
168     ++i;
169   }
170   return -1;
171 }
172 
GetFontIndex(const ByteString & sFontName,int32_t nCharset,bool bFind)173 int32_t CPWL_FontMap::GetFontIndex(const ByteString& sFontName,
174                                    int32_t nCharset,
175                                    bool bFind) {
176   int32_t nFontIndex = FindFont(EncodeFontAlias(sFontName, nCharset), nCharset);
177   if (nFontIndex >= 0)
178     return nFontIndex;
179 
180   ByteString sAlias;
181   CPDF_Font* pFont = bFind ? FindFontSameCharset(&sAlias, nCharset) : nullptr;
182   if (!pFont) {
183     ByteString sTemp = sFontName;
184     pFont = AddFontToDocument(GetDocument(), sTemp, nCharset);
185     sAlias = EncodeFontAlias(sTemp, nCharset);
186   }
187   AddedFont(pFont, sAlias);
188   return AddFontData(pFont, sAlias, nCharset);
189 }
190 
FindFontSameCharset(ByteString * sFontAlias,int32_t nCharset)191 CPDF_Font* CPWL_FontMap::FindFontSameCharset(ByteString* sFontAlias,
192                                              int32_t nCharset) {
193   return nullptr;
194 }
195 
AddFontData(CPDF_Font * pFont,const ByteString & sFontAlias,int32_t nCharset)196 int32_t CPWL_FontMap::AddFontData(CPDF_Font* pFont,
197                                   const ByteString& sFontAlias,
198                                   int32_t nCharset) {
199   auto pNewData = pdfium::MakeUnique<CPWL_FontMap_Data>();
200   pNewData->pFont = pFont;
201   pNewData->sFontName = sFontAlias;
202   pNewData->nCharset = nCharset;
203   m_Data.push_back(std::move(pNewData));
204   return pdfium::CollectionSize<int32_t>(m_Data) - 1;
205 }
206 
AddedFont(CPDF_Font * pFont,const ByteString & sFontAlias)207 void CPWL_FontMap::AddedFont(CPDF_Font* pFont, const ByteString& sFontAlias) {}
208 
GetNativeFont(int32_t nCharset)209 ByteString CPWL_FontMap::GetNativeFont(int32_t nCharset) {
210   if (nCharset == FX_CHARSET_Default)
211     nCharset = GetNativeCharset();
212 
213   ByteString sFontName = GetDefaultFontByCharset(nCharset);
214   if (!m_pSystemHandler->FindNativeTrueTypeFont(sFontName))
215     return ByteString();
216 
217   return sFontName;
218 }
219 
AddFontToDocument(CPDF_Document * pDoc,ByteString & sFontName,uint8_t nCharset)220 CPDF_Font* CPWL_FontMap::AddFontToDocument(CPDF_Document* pDoc,
221                                            ByteString& sFontName,
222                                            uint8_t nCharset) {
223   if (IsStandardFont(sFontName))
224     return AddStandardFont(pDoc, sFontName);
225 
226   return AddSystemFont(pDoc, sFontName, nCharset);
227 }
228 
AddStandardFont(CPDF_Document * pDoc,ByteString & sFontName)229 CPDF_Font* CPWL_FontMap::AddStandardFont(CPDF_Document* pDoc,
230                                          ByteString& sFontName) {
231   if (!pDoc)
232     return nullptr;
233 
234   CPDF_Font* pFont = nullptr;
235 
236   if (sFontName == "ZapfDingbats") {
237     pFont = pDoc->AddStandardFont(sFontName.c_str(), nullptr);
238   } else {
239     CPDF_FontEncoding fe(PDFFONT_ENCODING_WINANSI);
240     pFont = pDoc->AddStandardFont(sFontName.c_str(), &fe);
241   }
242 
243   return pFont;
244 }
245 
AddSystemFont(CPDF_Document * pDoc,ByteString & sFontName,uint8_t nCharset)246 CPDF_Font* CPWL_FontMap::AddSystemFont(CPDF_Document* pDoc,
247                                        ByteString& sFontName,
248                                        uint8_t nCharset) {
249   if (!pDoc)
250     return nullptr;
251 
252   if (sFontName.IsEmpty())
253     sFontName = GetNativeFont(nCharset);
254   if (nCharset == FX_CHARSET_Default)
255     nCharset = GetNativeCharset();
256 
257   return m_pSystemHandler->AddNativeTrueTypeFontToPDF(pDoc, sFontName,
258                                                       nCharset);
259 }
260 
EncodeFontAlias(const ByteString & sFontName,int32_t nCharset)261 ByteString CPWL_FontMap::EncodeFontAlias(const ByteString& sFontName,
262                                          int32_t nCharset) {
263   return EncodeFontAlias(sFontName) + ByteString::Format("_%02X", nCharset);
264 }
265 
EncodeFontAlias(const ByteString & sFontName)266 ByteString CPWL_FontMap::EncodeFontAlias(const ByteString& sFontName) {
267   ByteString sRet = sFontName;
268   sRet.Remove(' ');
269   return sRet;
270 }
271 
GetFontMapData(int32_t nIndex) const272 const CPWL_FontMap_Data* CPWL_FontMap::GetFontMapData(int32_t nIndex) const {
273   return pdfium::IndexInBounds(m_Data, nIndex) ? m_Data[nIndex].get() : nullptr;
274 }
275 
GetNativeCharset()276 int32_t CPWL_FontMap::GetNativeCharset() {
277   uint8_t nCharset = FX_CHARSET_ANSI;
278   int32_t iCodePage = FXSYS_GetACP();
279   switch (iCodePage) {
280     case FX_CODEPAGE_ShiftJIS:
281       nCharset = FX_CHARSET_ShiftJIS;
282       break;
283     case FX_CODEPAGE_ChineseSimplified:
284       nCharset = FX_CHARSET_ChineseSimplified;
285       break;
286     case FX_CODEPAGE_ChineseTraditional:
287       nCharset = FX_CHARSET_ChineseTraditional;
288       break;
289     case FX_CODEPAGE_MSWin_WesternEuropean:
290       nCharset = FX_CHARSET_ANSI;
291       break;
292     case FX_CODEPAGE_MSDOS_Thai:
293       nCharset = FX_CHARSET_Thai;
294       break;
295     case FX_CODEPAGE_Hangul:
296       nCharset = FX_CHARSET_Hangul;
297       break;
298     case FX_CODEPAGE_UTF16LE:
299       nCharset = FX_CHARSET_ANSI;
300       break;
301     case FX_CODEPAGE_MSWin_EasternEuropean:
302       nCharset = FX_CHARSET_MSWin_EasternEuropean;
303       break;
304     case FX_CODEPAGE_MSWin_Cyrillic:
305       nCharset = FX_CHARSET_MSWin_Cyrillic;
306       break;
307     case FX_CODEPAGE_MSWin_Greek:
308       nCharset = FX_CHARSET_MSWin_Greek;
309       break;
310     case FX_CODEPAGE_MSWin_Turkish:
311       nCharset = FX_CHARSET_MSWin_Turkish;
312       break;
313     case FX_CODEPAGE_MSWin_Hebrew:
314       nCharset = FX_CHARSET_MSWin_Hebrew;
315       break;
316     case FX_CODEPAGE_MSWin_Arabic:
317       nCharset = FX_CHARSET_MSWin_Arabic;
318       break;
319     case FX_CODEPAGE_MSWin_Baltic:
320       nCharset = FX_CHARSET_MSWin_Baltic;
321       break;
322     case FX_CODEPAGE_MSWin_Vietnamese:
323       nCharset = FX_CHARSET_MSWin_Vietnamese;
324       break;
325     case FX_CODEPAGE_Johab:
326       nCharset = FX_CHARSET_Johab;
327       break;
328   }
329   return nCharset;
330 }
331 
332 const FPDF_CharsetFontMap CPWL_FontMap::defaultTTFMap[] = {
333     {FX_CHARSET_ANSI, "Helvetica"},
334     {FX_CHARSET_ChineseSimplified, "SimSun"},
335     {FX_CHARSET_ChineseTraditional, "MingLiU"},
336     {FX_CHARSET_ShiftJIS, "MS Gothic"},
337     {FX_CHARSET_Hangul, "Batang"},
338     {FX_CHARSET_MSWin_Cyrillic, "Arial"},
339 #if _FX_PLATFORM_ == _FX_PLATFORM_LINUX_ || _FX_PLATFORM_ == _FX_PLATFORM_APPLE_
340     {FX_CHARSET_MSWin_EasternEuropean, "Arial"},
341 #else
342     {FX_CHARSET_MSWin_EasternEuropean, "Tahoma"},
343 #endif
344     {FX_CHARSET_MSWin_Arabic, "Arial"},
345     {-1, nullptr}};
346 
GetDefaultFontByCharset(int32_t nCharset)347 ByteString CPWL_FontMap::GetDefaultFontByCharset(int32_t nCharset) {
348   int i = 0;
349   while (defaultTTFMap[i].charset != -1) {
350     if (nCharset == defaultTTFMap[i].charset)
351       return defaultTTFMap[i].fontname;
352     ++i;
353   }
354   return "";
355 }
356 
CharSetFromUnicode(uint16_t word,int32_t nOldCharset)357 int32_t CPWL_FontMap::CharSetFromUnicode(uint16_t word, int32_t nOldCharset) {
358   // to avoid CJK Font to show ASCII
359   if (word < 0x7F)
360     return FX_CHARSET_ANSI;
361   // follow the old charset
362   if (nOldCharset != FX_CHARSET_Default)
363     return nOldCharset;
364 
365   // find new charset
366   if ((word >= 0x4E00 && word <= 0x9FA5) ||
367       (word >= 0xE7C7 && word <= 0xE7F3) ||
368       (word >= 0x3000 && word <= 0x303F) ||
369       (word >= 0x2000 && word <= 0x206F)) {
370     return FX_CHARSET_ChineseSimplified;
371   }
372 
373   if (((word >= 0x3040) && (word <= 0x309F)) ||
374       ((word >= 0x30A0) && (word <= 0x30FF)) ||
375       ((word >= 0x31F0) && (word <= 0x31FF)) ||
376       ((word >= 0xFF00) && (word <= 0xFFEF))) {
377     return FX_CHARSET_ShiftJIS;
378   }
379 
380   if (((word >= 0xAC00) && (word <= 0xD7AF)) ||
381       ((word >= 0x1100) && (word <= 0x11FF)) ||
382       ((word >= 0x3130) && (word <= 0x318F))) {
383     return FX_CHARSET_Hangul;
384   }
385 
386   if (word >= 0x0E00 && word <= 0x0E7F)
387     return FX_CHARSET_Thai;
388 
389   if ((word >= 0x0370 && word <= 0x03FF) || (word >= 0x1F00 && word <= 0x1FFF))
390     return FX_CHARSET_MSWin_Greek;
391 
392   if ((word >= 0x0600 && word <= 0x06FF) || (word >= 0xFB50 && word <= 0xFEFC))
393     return FX_CHARSET_MSWin_Arabic;
394 
395   if (word >= 0x0590 && word <= 0x05FF)
396     return FX_CHARSET_MSWin_Hebrew;
397 
398   if (word >= 0x0400 && word <= 0x04FF)
399     return FX_CHARSET_MSWin_Cyrillic;
400 
401   if (word >= 0x0100 && word <= 0x024F)
402     return FX_CHARSET_MSWin_EasternEuropean;
403 
404   if (word >= 0x1E00 && word <= 0x1EFF)
405     return FX_CHARSET_MSWin_Vietnamese;
406 
407   return FX_CHARSET_ANSI;
408 }
409