1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "core/fpdfdoc/cba_fontmap.h"
8 
9 #include <memory>
10 #include <utility>
11 
12 #include "constants/annotation_common.h"
13 #include "core/fpdfapi/font/cpdf_font.h"
14 #include "core/fpdfapi/font/cpdf_fontencoding.h"
15 #include "core/fpdfapi/page/cpdf_docpagedata.h"
16 #include "core/fpdfapi/page/cpdf_page.h"
17 #include "core/fpdfapi/parser/cpdf_dictionary.h"
18 #include "core/fpdfapi/parser/cpdf_document.h"
19 #include "core/fpdfapi/parser/cpdf_parser.h"
20 #include "core/fpdfapi/parser/cpdf_reference.h"
21 #include "core/fpdfapi/parser/cpdf_stream.h"
22 #include "core/fpdfapi/parser/fpdf_parser_utility.h"
23 #include "core/fpdfdoc/cpdf_defaultappearance.h"
24 #include "core/fpdfdoc/cpdf_formfield.h"
25 #include "core/fpdfdoc/ipvt_fontmap.h"
26 #include "core/fxcrt/fx_codepage.h"
27 #include "core/fxge/cfx_fontmapper.h"
28 #include "core/fxge/cfx_fontmgr.h"
29 #include "core/fxge/cfx_gemodule.h"
30 #include "core/fxge/cfx_substfont.h"
31 #include "third_party/base/ptr_util.h"
32 #include "third_party/base/stl_util.h"
33 
34 namespace {
35 
FindNativeTrueTypeFont(ByteString sFontFaceName)36 bool FindNativeTrueTypeFont(ByteString sFontFaceName) {
37   CFX_FontMgr* pFontMgr = CFX_GEModule::Get()->GetFontMgr();
38   if (!pFontMgr)
39     return false;
40 
41   CFX_FontMapper* pFontMapper = pFontMgr->GetBuiltinMapper();
42   pFontMapper->LoadInstalledFonts();
43 
44   for (const auto& font : pFontMapper->m_InstalledTTFonts) {
45     if (font.Compare(sFontFaceName.AsStringView()))
46       return true;
47   }
48   for (const auto& fontPair : pFontMapper->m_LocalizedTTFonts) {
49     if (fontPair.first.Compare(sFontFaceName.AsStringView()))
50       return true;
51   }
52   return false;
53 }
54 
AddNativeTrueTypeFontToPDF(CPDF_Document * pDoc,ByteString sFontFaceName,uint8_t nCharset)55 RetainPtr<CPDF_Font> AddNativeTrueTypeFontToPDF(CPDF_Document* pDoc,
56                                                 ByteString sFontFaceName,
57                                                 uint8_t nCharset) {
58   if (!pDoc)
59     return nullptr;
60 
61   auto pFXFont = pdfium::MakeUnique<CFX_Font>();
62   pFXFont->LoadSubst(sFontFaceName, true, 0, 0, 0,
63                      FX_GetCodePageFromCharset(nCharset), false);
64 
65   auto* pDocPageData = CPDF_DocPageData::FromDocument(pDoc);
66   return pDocPageData->AddFont(std::move(pFXFont), nCharset);
67 }
68 
69 }  // namespace
70 
71 CBA_FontMap::Data::Data() = default;
72 
73 CBA_FontMap::Data::~Data() = default;
74 
CBA_FontMap(CPDF_Document * pDocument,CPDF_Dictionary * pAnnotDict)75 CBA_FontMap::CBA_FontMap(CPDF_Document* pDocument, CPDF_Dictionary* pAnnotDict)
76     : m_pDocument(pDocument), m_pAnnotDict(pAnnotDict) {
77   Initialize();
78 }
79 
~CBA_FontMap()80 CBA_FontMap::~CBA_FontMap() {
81   Clear();
82 }
83 
GetPDFFont(int32_t nFontIndex)84 RetainPtr<CPDF_Font> CBA_FontMap::GetPDFFont(int32_t nFontIndex) {
85   if (pdfium::IndexInBounds(m_Data, nFontIndex))
86     return m_Data[nFontIndex]->pFont;
87   return nullptr;
88 }
89 
GetPDFFontAlias(int32_t nFontIndex)90 ByteString CBA_FontMap::GetPDFFontAlias(int32_t nFontIndex) {
91   if (pdfium::IndexInBounds(m_Data, nFontIndex))
92     return m_Data[nFontIndex]->sFontName;
93   return ByteString();
94 }
95 
GetWordFontIndex(uint16_t word,int32_t nCharset,int32_t nFontIndex)96 int32_t CBA_FontMap::GetWordFontIndex(uint16_t word,
97                                       int32_t nCharset,
98                                       int32_t nFontIndex) {
99   if (nFontIndex > 0) {
100     if (KnowWord(nFontIndex, word))
101       return nFontIndex;
102   } else {
103     if (!m_Data.empty()) {
104       const Data* pData = m_Data.front().get();
105       if (nCharset == FX_CHARSET_Default ||
106           pData->nCharset == FX_CHARSET_Symbol || nCharset == pData->nCharset) {
107         if (KnowWord(0, word))
108           return 0;
109       }
110     }
111   }
112 
113   int32_t nNewFontIndex =
114       GetFontIndex(GetCachedNativeFontName(nCharset), nCharset, true);
115   if (nNewFontIndex >= 0) {
116     if (KnowWord(nNewFontIndex, word))
117       return nNewFontIndex;
118   }
119   nNewFontIndex = GetFontIndex(CFX_Font::kUniversalDefaultFontName,
120                                FX_CHARSET_Default, false);
121   if (nNewFontIndex >= 0) {
122     if (KnowWord(nNewFontIndex, word))
123       return nNewFontIndex;
124   }
125   return -1;
126 }
127 
CharCodeFromUnicode(int32_t nFontIndex,uint16_t word)128 int32_t CBA_FontMap::CharCodeFromUnicode(int32_t nFontIndex, uint16_t word) {
129   if (!pdfium::IndexInBounds(m_Data, nFontIndex))
130     return -1;
131 
132   Data* pData = m_Data[nFontIndex].get();
133   if (!pData->pFont)
134     return -1;
135 
136   if (pData->pFont->IsUnicodeCompatible())
137     return pData->pFont->CharCodeFromUnicode(word);
138 
139   return word < 0xFF ? word : -1;
140 }
141 
CharSetFromUnicode(uint16_t word,int32_t nOldCharset)142 int32_t CBA_FontMap::CharSetFromUnicode(uint16_t word, int32_t nOldCharset) {
143   // to avoid CJK Font to show ASCII
144   if (word < 0x7F)
145     return FX_CHARSET_ANSI;
146 
147   // follow the old charset
148   if (nOldCharset != FX_CHARSET_Default)
149     return nOldCharset;
150 
151   return CFX_Font::GetCharSetFromUnicode(word);
152 }
153 
GetNativeCharset()154 int32_t CBA_FontMap::GetNativeCharset() {
155   return FX_GetCharsetFromCodePage(FXSYS_GetACP());
156 }
157 
Reset()158 void CBA_FontMap::Reset() {
159   Clear();
160   m_pDefaultFont = nullptr;
161   m_sDefaultFontName.clear();
162 }
163 
SetAPType(const ByteString & sAPType)164 void CBA_FontMap::SetAPType(const ByteString& sAPType) {
165   m_sAPType = sAPType;
166 
167   Reset();
168   Initialize();
169 }
170 
Initialize()171 void CBA_FontMap::Initialize() {
172   int32_t nCharset = FX_CHARSET_Default;
173 
174   if (!m_pDefaultFont) {
175     m_pDefaultFont = GetAnnotDefaultFont(&m_sDefaultFontName);
176     if (m_pDefaultFont) {
177       if (const CFX_SubstFont* pSubstFont = m_pDefaultFont->GetSubstFont()) {
178         nCharset = pSubstFont->m_Charset;
179       } else {
180         if (m_sDefaultFontName == "Wingdings" ||
181             m_sDefaultFontName == "Wingdings2" ||
182             m_sDefaultFontName == "Wingdings3" ||
183             m_sDefaultFontName == "Webdings")
184           nCharset = FX_CHARSET_Symbol;
185         else
186           nCharset = FX_CHARSET_ANSI;
187       }
188       AddFontData(m_pDefaultFont, m_sDefaultFontName, nCharset);
189       AddFontToAnnotDict(m_pDefaultFont, m_sDefaultFontName);
190     }
191   }
192 
193   if (nCharset != FX_CHARSET_ANSI)
194     GetFontIndex(CFX_Font::kDefaultAnsiFontName, FX_CHARSET_ANSI, false);
195 }
196 
FindFontSameCharset(ByteString * sFontAlias,int32_t nCharset)197 RetainPtr<CPDF_Font> CBA_FontMap::FindFontSameCharset(ByteString* sFontAlias,
198                                                       int32_t nCharset) {
199   if (m_pAnnotDict->GetStringFor(pdfium::annotation::kSubtype) != "Widget")
200     return nullptr;
201 
202   const CPDF_Dictionary* pRootDict = m_pDocument->GetRoot();
203   if (!pRootDict)
204     return nullptr;
205 
206   const CPDF_Dictionary* pAcroFormDict = pRootDict->GetDictFor("AcroForm");
207   if (!pAcroFormDict)
208     return nullptr;
209 
210   const CPDF_Dictionary* pDRDict = pAcroFormDict->GetDictFor("DR");
211   if (!pDRDict)
212     return nullptr;
213 
214   return FindResFontSameCharset(pDRDict, sFontAlias, nCharset);
215 }
216 
FindResFontSameCharset(const CPDF_Dictionary * pResDict,ByteString * sFontAlias,int32_t nCharset)217 RetainPtr<CPDF_Font> CBA_FontMap::FindResFontSameCharset(
218     const CPDF_Dictionary* pResDict,
219     ByteString* sFontAlias,
220     int32_t nCharset) {
221   if (!pResDict)
222     return nullptr;
223 
224   const CPDF_Dictionary* pFonts = pResDict->GetDictFor("Font");
225   if (!pFonts)
226     return nullptr;
227 
228   RetainPtr<CPDF_Font> pFind;
229   CPDF_DictionaryLocker locker(pFonts);
230   for (const auto& it : locker) {
231     const ByteString& csKey = it.first;
232     if (!it.second)
233       continue;
234 
235     CPDF_Dictionary* pElement = ToDictionary(it.second->GetDirect());
236     if (!pElement || pElement->GetStringFor("Type") != "Font")
237       continue;
238 
239     auto* pData = CPDF_DocPageData::FromDocument(m_pDocument.Get());
240     RetainPtr<CPDF_Font> pFont = pData->GetFont(pElement);
241     if (!pFont)
242       continue;
243 
244     const CFX_SubstFont* pSubst = pFont->GetSubstFont();
245     if (!pSubst)
246       continue;
247 
248     if (pSubst->m_Charset == nCharset) {
249       *sFontAlias = csKey;
250       pFind = std::move(pFont);
251     }
252   }
253   return pFind;
254 }
255 
GetAnnotDefaultFont(ByteString * sAlias)256 RetainPtr<CPDF_Font> CBA_FontMap::GetAnnotDefaultFont(ByteString* sAlias) {
257   CPDF_Dictionary* pAcroFormDict = nullptr;
258   const bool bWidget =
259       (m_pAnnotDict->GetStringFor(pdfium::annotation::kSubtype) == "Widget");
260   if (bWidget) {
261     CPDF_Dictionary* pRootDict = m_pDocument->GetRoot();
262     if (pRootDict)
263       pAcroFormDict = pRootDict->GetDictFor("AcroForm");
264   }
265 
266   ByteString sDA;
267   const CPDF_Object* pObj =
268       CPDF_FormField::GetFieldAttr(m_pAnnotDict.Get(), "DA");
269   if (pObj)
270     sDA = pObj->GetString();
271 
272   if (bWidget) {
273     if (sDA.IsEmpty()) {
274       pObj = CPDF_FormField::GetFieldAttr(pAcroFormDict, "DA");
275       sDA = pObj ? pObj->GetString() : ByteString();
276     }
277   }
278   if (sDA.IsEmpty())
279     return nullptr;
280 
281   CPDF_DefaultAppearance appearance(sDA);
282   float font_size;
283   Optional<ByteString> font = appearance.GetFont(&font_size);
284   *sAlias = font.value_or(ByteString());
285 
286   CPDF_Dictionary* pFontDict = nullptr;
287   if (CPDF_Dictionary* pAPDict =
288           m_pAnnotDict->GetDictFor(pdfium::annotation::kAP)) {
289     if (CPDF_Dictionary* pNormalDict = pAPDict->GetDictFor("N")) {
290       if (CPDF_Dictionary* pNormalResDict =
291               pNormalDict->GetDictFor("Resources")) {
292         if (CPDF_Dictionary* pResFontDict = pNormalResDict->GetDictFor("Font"))
293           pFontDict = pResFontDict->GetDictFor(*sAlias);
294       }
295     }
296   }
297   if (bWidget && !pFontDict && pAcroFormDict) {
298     if (CPDF_Dictionary* pDRDict = pAcroFormDict->GetDictFor("DR")) {
299       if (CPDF_Dictionary* pDRFontDict = pDRDict->GetDictFor("Font"))
300         pFontDict = pDRFontDict->GetDictFor(*sAlias);
301     }
302   }
303   if (!pFontDict)
304     return nullptr;
305 
306   return CPDF_DocPageData::FromDocument(m_pDocument.Get())->GetFont(pFontDict);
307 }
308 
AddFontToAnnotDict(const RetainPtr<CPDF_Font> & pFont,const ByteString & sAlias)309 void CBA_FontMap::AddFontToAnnotDict(const RetainPtr<CPDF_Font>& pFont,
310                                      const ByteString& sAlias) {
311   if (!pFont)
312     return;
313 
314   CPDF_Dictionary* pAPDict = m_pAnnotDict->GetDictFor(pdfium::annotation::kAP);
315   if (!pAPDict)
316     pAPDict = m_pAnnotDict->SetNewFor<CPDF_Dictionary>(pdfium::annotation::kAP);
317 
318   // to avoid checkbox and radiobutton
319   if (ToDictionary(pAPDict->GetObjectFor(m_sAPType)))
320     return;
321 
322   CPDF_Stream* pStream = pAPDict->GetStreamFor(m_sAPType);
323   if (!pStream) {
324     pStream = m_pDocument->NewIndirect<CPDF_Stream>();
325     pAPDict->SetNewFor<CPDF_Reference>(m_sAPType, m_pDocument.Get(),
326                                        pStream->GetObjNum());
327   }
328 
329   CPDF_Dictionary* pStreamDict = pStream->GetDict();
330   if (!pStreamDict) {
331     auto pOwnedDict = m_pDocument->New<CPDF_Dictionary>();
332     pStreamDict = pOwnedDict.Get();
333     pStream->InitStream({}, std::move(pOwnedDict));
334   }
335 
336   CPDF_Dictionary* pStreamResList = pStreamDict->GetDictFor("Resources");
337   if (!pStreamResList)
338     pStreamResList = pStreamDict->SetNewFor<CPDF_Dictionary>("Resources");
339   CPDF_Dictionary* pStreamResFontList = pStreamResList->GetDictFor("Font");
340   if (!pStreamResFontList) {
341     pStreamResFontList = m_pDocument->NewIndirect<CPDF_Dictionary>();
342     pStreamResList->SetNewFor<CPDF_Reference>("Font", m_pDocument.Get(),
343                                               pStreamResFontList->GetObjNum());
344   }
345   if (!pStreamResFontList->KeyExist(sAlias)) {
346     CPDF_Dictionary* pFontDict = pFont->GetFontDict();
347     RetainPtr<CPDF_Object> pObject =
348         pFontDict->IsInline() ? pFontDict->Clone()
349                               : pFontDict->MakeReference(m_pDocument.Get());
350     pStreamResFontList->SetFor(sAlias, std::move(pObject));
351   }
352 }
353 
KnowWord(int32_t nFontIndex,uint16_t word)354 bool CBA_FontMap::KnowWord(int32_t nFontIndex, uint16_t word) {
355   return pdfium::IndexInBounds(m_Data, nFontIndex) &&
356          CharCodeFromUnicode(nFontIndex, word) >= 0;
357 }
358 
Clear()359 void CBA_FontMap::Clear() {
360   m_Data.clear();
361   m_NativeFont.clear();
362 }
363 
GetFontIndex(const ByteString & sFontName,int32_t nCharset,bool bFind)364 int32_t CBA_FontMap::GetFontIndex(const ByteString& sFontName,
365                                   int32_t nCharset,
366                                   bool bFind) {
367   int32_t nFontIndex = FindFont(EncodeFontAlias(sFontName, nCharset), nCharset);
368   if (nFontIndex >= 0)
369     return nFontIndex;
370 
371   ByteString sAlias;
372   RetainPtr<CPDF_Font> pFont =
373       bFind ? FindFontSameCharset(&sAlias, nCharset) : nullptr;
374   if (!pFont) {
375     ByteString sTemp = sFontName;
376     pFont = AddFontToDocument(sTemp, nCharset);
377     sAlias = EncodeFontAlias(sTemp, nCharset);
378   }
379   AddFontToAnnotDict(pFont, sAlias);
380   return AddFontData(pFont, sAlias, nCharset);
381 }
382 
AddFontData(const RetainPtr<CPDF_Font> & pFont,const ByteString & sFontAlias,int32_t nCharset)383 int32_t CBA_FontMap::AddFontData(const RetainPtr<CPDF_Font>& pFont,
384                                  const ByteString& sFontAlias,
385                                  int32_t nCharset) {
386   auto pNewData = pdfium::MakeUnique<Data>();
387   pNewData->pFont = pFont;
388   pNewData->sFontName = sFontAlias;
389   pNewData->nCharset = nCharset;
390   m_Data.push_back(std::move(pNewData));
391   return pdfium::CollectionSize<int32_t>(m_Data) - 1;
392 }
393 
EncodeFontAlias(const ByteString & sFontName,int32_t nCharset)394 ByteString CBA_FontMap::EncodeFontAlias(const ByteString& sFontName,
395                                         int32_t nCharset) {
396   return EncodeFontAlias(sFontName) + ByteString::Format("_%02X", nCharset);
397 }
398 
EncodeFontAlias(const ByteString & sFontName)399 ByteString CBA_FontMap::EncodeFontAlias(const ByteString& sFontName) {
400   ByteString sRet = sFontName;
401   sRet.Remove(' ');
402   return sRet;
403 }
404 
FindFont(const ByteString & sFontName,int32_t nCharset)405 int32_t CBA_FontMap::FindFont(const ByteString& sFontName, int32_t nCharset) {
406   int32_t i = 0;
407   for (const auto& pData : m_Data) {
408     if ((nCharset == FX_CHARSET_Default || nCharset == pData->nCharset) &&
409         (sFontName.IsEmpty() || pData->sFontName == sFontName)) {
410       return i;
411     }
412     ++i;
413   }
414   return -1;
415 }
416 
GetNativeFontName(int32_t nCharset)417 ByteString CBA_FontMap::GetNativeFontName(int32_t nCharset) {
418   if (nCharset == FX_CHARSET_Default)
419     nCharset = GetNativeCharset();
420 
421   ByteString sFontName = CFX_Font::GetDefaultFontNameByCharset(nCharset);
422   if (!FindNativeTrueTypeFont(sFontName))
423     return ByteString();
424 
425   return sFontName;
426 }
427 
GetCachedNativeFontName(int32_t nCharset)428 ByteString CBA_FontMap::GetCachedNativeFontName(int32_t nCharset) {
429   for (const auto& pData : m_NativeFont) {
430     if (pData && pData->nCharset == nCharset)
431       return pData->sFontName;
432   }
433 
434   ByteString sNew = GetNativeFontName(nCharset);
435   if (sNew.IsEmpty())
436     return ByteString();
437 
438   auto pNewData = pdfium::MakeUnique<Native>();
439   pNewData->nCharset = nCharset;
440   pNewData->sFontName = sNew;
441   m_NativeFont.push_back(std::move(pNewData));
442   return sNew;
443 }
444 
AddFontToDocument(ByteString sFontName,uint8_t nCharset)445 RetainPtr<CPDF_Font> CBA_FontMap::AddFontToDocument(ByteString sFontName,
446                                                     uint8_t nCharset) {
447   if (IsStandardFont(sFontName))
448     return AddStandardFont(sFontName);
449 
450   return AddSystemFont(sFontName, nCharset);
451 }
452 
IsStandardFont(const ByteString & sFontName)453 bool CBA_FontMap::IsStandardFont(const ByteString& sFontName) {
454   static const char* const kStandardFontNames[] = {"Courier",
455                                                    "Courier-Bold",
456                                                    "Courier-BoldOblique",
457                                                    "Courier-Oblique",
458                                                    "Helvetica",
459                                                    "Helvetica-Bold",
460                                                    "Helvetica-BoldOblique",
461                                                    "Helvetica-Oblique",
462                                                    "Times-Roman",
463                                                    "Times-Bold",
464                                                    "Times-Italic",
465                                                    "Times-BoldItalic",
466                                                    "Symbol",
467                                                    "ZapfDingbats"};
468   for (const char* name : kStandardFontNames) {
469     if (sFontName == name)
470       return true;
471   }
472   return false;
473 }
474 
AddStandardFont(ByteString sFontName)475 RetainPtr<CPDF_Font> CBA_FontMap::AddStandardFont(ByteString sFontName) {
476   auto* pPageData = CPDF_DocPageData::FromDocument(m_pDocument.Get());
477   if (sFontName == "ZapfDingbats")
478     return pPageData->AddStandardFont(sFontName, nullptr);
479 
480   static const CPDF_FontEncoding fe(PDFFONT_ENCODING_WINANSI);
481   return pPageData->AddStandardFont(sFontName, &fe);
482 }
483 
AddSystemFont(ByteString sFontName,uint8_t nCharset)484 RetainPtr<CPDF_Font> CBA_FontMap::AddSystemFont(ByteString sFontName,
485                                                 uint8_t nCharset) {
486   if (sFontName.IsEmpty())
487     sFontName = GetNativeFontName(nCharset);
488 
489   if (nCharset == FX_CHARSET_Default)
490     nCharset = GetNativeCharset();
491 
492   return AddNativeTrueTypeFontToPDF(m_pDocument.Get(), sFontName, nCharset);
493 }
494