1 // Copyright 2016 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "core/fpdfapi/font/cpdf_font.h"
8 
9 #include <limits>
10 #include <memory>
11 #include <utility>
12 #include <vector>
13 
14 #include "core/fpdfapi/cpdf_modulemgr.h"
15 #include "core/fpdfapi/font/cpdf_fontencoding.h"
16 #include "core/fpdfapi/font/cpdf_truetypefont.h"
17 #include "core/fpdfapi/font/cpdf_type1font.h"
18 #include "core/fpdfapi/font/cpdf_type3font.h"
19 #include "core/fpdfapi/page/cpdf_docpagedata.h"
20 #include "core/fpdfapi/page/cpdf_pagemodule.h"
21 #include "core/fpdfapi/parser/cpdf_array.h"
22 #include "core/fpdfapi/parser/cpdf_dictionary.h"
23 #include "core/fpdfapi/parser/cpdf_document.h"
24 #include "core/fpdfapi/parser/cpdf_name.h"
25 #include "core/fpdfapi/parser/cpdf_stream_acc.h"
26 #include "core/fxcrt/fx_memory.h"
27 #include "core/fxge/fx_freetype.h"
28 #include "third_party/base/logging.h"
29 #include "third_party/base/ptr_util.h"
30 #include "third_party/base/stl_util.h"
31 
32 namespace {
33 
34 const uint8_t kChineseFontNames[][5] = {{0xCB, 0xCE, 0xCC, 0xE5, 0x00},
35                                         {0xBF, 0xAC, 0xCC, 0xE5, 0x00},
36                                         {0xBA, 0xDA, 0xCC, 0xE5, 0x00},
37                                         {0xB7, 0xC2, 0xCB, 0xCE, 0x00},
38                                         {0xD0, 0xC2, 0xCB, 0xCE, 0x00}};
39 
GetPredefinedEncoding(const ByteString & value,int * basemap)40 void GetPredefinedEncoding(const ByteString& value, int* basemap) {
41   if (value == "WinAnsiEncoding")
42     *basemap = PDFFONT_ENCODING_WINANSI;
43   else if (value == "MacRomanEncoding")
44     *basemap = PDFFONT_ENCODING_MACROMAN;
45   else if (value == "MacExpertEncoding")
46     *basemap = PDFFONT_ENCODING_MACEXPERT;
47   else if (value == "PDFDocEncoding")
48     *basemap = PDFFONT_ENCODING_PDFDOC;
49 }
50 
51 }  // namespace
52 
CPDF_Font()53 CPDF_Font::CPDF_Font()
54     : m_pFontFile(nullptr),
55       m_pFontDict(nullptr),
56       m_bToUnicodeLoaded(false),
57       m_Flags(0),
58       m_StemV(0),
59       m_Ascent(0),
60       m_Descent(0),
61       m_ItalicAngle(0) {}
62 
~CPDF_Font()63 CPDF_Font::~CPDF_Font() {
64   if (m_pFontFile) {
65     auto* pPageData = m_pDocument->GetPageData();
66     if (pPageData) {
67       pPageData->MaybePurgeFontFileStreamAcc(
68           m_pFontFile->GetStream()->AsStream());
69     }
70   }
71 }
72 
IsType1Font() const73 bool CPDF_Font::IsType1Font() const {
74   return false;
75 }
76 
IsTrueTypeFont() const77 bool CPDF_Font::IsTrueTypeFont() const {
78   return false;
79 }
80 
IsType3Font() const81 bool CPDF_Font::IsType3Font() const {
82   return false;
83 }
84 
IsCIDFont() const85 bool CPDF_Font::IsCIDFont() const {
86   return false;
87 }
88 
AsType1Font() const89 const CPDF_Type1Font* CPDF_Font::AsType1Font() const {
90   return nullptr;
91 }
92 
AsType1Font()93 CPDF_Type1Font* CPDF_Font::AsType1Font() {
94   return nullptr;
95 }
96 
AsTrueTypeFont() const97 const CPDF_TrueTypeFont* CPDF_Font::AsTrueTypeFont() const {
98   return nullptr;
99 }
100 
AsTrueTypeFont()101 CPDF_TrueTypeFont* CPDF_Font::AsTrueTypeFont() {
102   return nullptr;
103 }
104 
AsType3Font() const105 const CPDF_Type3Font* CPDF_Font::AsType3Font() const {
106   return nullptr;
107 }
108 
AsType3Font()109 CPDF_Type3Font* CPDF_Font::AsType3Font() {
110   return nullptr;
111 }
112 
AsCIDFont() const113 const CPDF_CIDFont* CPDF_Font::AsCIDFont() const {
114   return nullptr;
115 }
116 
AsCIDFont()117 CPDF_CIDFont* CPDF_Font::AsCIDFont() {
118   return nullptr;
119 }
120 
IsUnicodeCompatible() const121 bool CPDF_Font::IsUnicodeCompatible() const {
122   return false;
123 }
124 
CountChar(const char * pString,int size) const125 int CPDF_Font::CountChar(const char* pString, int size) const {
126   return size;
127 }
128 
GlyphFromCharCodeExt(uint32_t charcode)129 int CPDF_Font::GlyphFromCharCodeExt(uint32_t charcode) {
130   return GlyphFromCharCode(charcode, nullptr);
131 }
132 
IsVertWriting() const133 bool CPDF_Font::IsVertWriting() const {
134   const CPDF_CIDFont* pCIDFont = AsCIDFont();
135   return pCIDFont ? pCIDFont->IsVertWriting() : m_Font.IsVertical();
136 }
137 
AppendChar(char * buf,uint32_t charcode) const138 int CPDF_Font::AppendChar(char* buf, uint32_t charcode) const {
139   *buf = static_cast<char>(charcode);
140   return 1;
141 }
142 
AppendChar(ByteString * str,uint32_t charcode) const143 void CPDF_Font::AppendChar(ByteString* str, uint32_t charcode) const {
144   char buf[4];
145   int len = AppendChar(buf, charcode);
146   *str += ByteStringView(buf, len);
147 }
148 
UnicodeFromCharCode(uint32_t charcode) const149 WideString CPDF_Font::UnicodeFromCharCode(uint32_t charcode) const {
150   if (!m_bToUnicodeLoaded)
151     LoadUnicodeMap();
152 
153   return m_pToUnicodeMap ? m_pToUnicodeMap->Lookup(charcode) : WideString();
154 }
155 
CharCodeFromUnicode(wchar_t unicode) const156 uint32_t CPDF_Font::CharCodeFromUnicode(wchar_t unicode) const {
157   if (!m_bToUnicodeLoaded)
158     LoadUnicodeMap();
159 
160   return m_pToUnicodeMap ? m_pToUnicodeMap->ReverseLookup(unicode) : 0;
161 }
162 
HasFontWidths() const163 bool CPDF_Font::HasFontWidths() const {
164   return true;
165 }
166 
LoadFontDescriptor(CPDF_Dictionary * pFontDesc)167 void CPDF_Font::LoadFontDescriptor(CPDF_Dictionary* pFontDesc) {
168   m_Flags = pFontDesc->GetIntegerFor("Flags", FXFONT_NONSYMBOLIC);
169   int ItalicAngle = 0;
170   bool bExistItalicAngle = false;
171   if (pFontDesc->KeyExist("ItalicAngle")) {
172     ItalicAngle = pFontDesc->GetIntegerFor("ItalicAngle");
173     bExistItalicAngle = true;
174   }
175   if (ItalicAngle < 0) {
176     m_Flags |= FXFONT_ITALIC;
177     m_ItalicAngle = ItalicAngle;
178   }
179   bool bExistStemV = false;
180   if (pFontDesc->KeyExist("StemV")) {
181     m_StemV = pFontDesc->GetIntegerFor("StemV");
182     bExistStemV = true;
183   }
184   bool bExistAscent = false;
185   if (pFontDesc->KeyExist("Ascent")) {
186     m_Ascent = pFontDesc->GetIntegerFor("Ascent");
187     bExistAscent = true;
188   }
189   bool bExistDescent = false;
190   if (pFontDesc->KeyExist("Descent")) {
191     m_Descent = pFontDesc->GetIntegerFor("Descent");
192     bExistDescent = true;
193   }
194   bool bExistCapHeight = false;
195   if (pFontDesc->KeyExist("CapHeight"))
196     bExistCapHeight = true;
197   if (bExistItalicAngle && bExistAscent && bExistCapHeight && bExistDescent &&
198       bExistStemV) {
199     m_Flags |= FXFONT_USEEXTERNATTR;
200   }
201   if (m_Descent > 10)
202     m_Descent = -m_Descent;
203   CPDF_Array* pBBox = pFontDesc->GetArrayFor("FontBBox");
204   if (pBBox) {
205     m_FontBBox.left = pBBox->GetIntegerAt(0);
206     m_FontBBox.bottom = pBBox->GetIntegerAt(1);
207     m_FontBBox.right = pBBox->GetIntegerAt(2);
208     m_FontBBox.top = pBBox->GetIntegerAt(3);
209   }
210 
211   CPDF_Stream* pFontFile = pFontDesc->GetStreamFor("FontFile");
212   if (!pFontFile)
213     pFontFile = pFontDesc->GetStreamFor("FontFile2");
214   if (!pFontFile)
215     pFontFile = pFontDesc->GetStreamFor("FontFile3");
216   if (!pFontFile)
217     return;
218 
219   m_pFontFile = m_pDocument->LoadFontFile(pFontFile);
220   if (!m_pFontFile)
221     return;
222 
223   const uint8_t* pFontData = m_pFontFile->GetData();
224   uint32_t dwFontSize = m_pFontFile->GetSize();
225   if (!m_Font.LoadEmbedded(pFontData, dwFontSize)) {
226     m_pDocument->GetPageData()->MaybePurgeFontFileStreamAcc(
227         m_pFontFile->GetStream()->AsStream());
228     m_pFontFile = nullptr;
229   }
230 }
231 
CheckFontMetrics()232 void CPDF_Font::CheckFontMetrics() {
233   if (m_FontBBox.top == 0 && m_FontBBox.bottom == 0 && m_FontBBox.left == 0 &&
234       m_FontBBox.right == 0) {
235     FXFT_Face face = m_Font.GetFace();
236     if (face) {
237       m_FontBBox.left = TT2PDF(FXFT_Get_Face_xMin(face), face);
238       m_FontBBox.bottom = TT2PDF(FXFT_Get_Face_yMin(face), face);
239       m_FontBBox.right = TT2PDF(FXFT_Get_Face_xMax(face), face);
240       m_FontBBox.top = TT2PDF(FXFT_Get_Face_yMax(face), face);
241       m_Ascent = TT2PDF(FXFT_Get_Face_Ascender(face), face);
242       m_Descent = TT2PDF(FXFT_Get_Face_Descender(face), face);
243     } else {
244       bool bFirst = true;
245       for (int i = 0; i < 256; i++) {
246         FX_RECT rect = GetCharBBox(i);
247         if (rect.left == rect.right) {
248           continue;
249         }
250         if (bFirst) {
251           m_FontBBox = rect;
252           bFirst = false;
253         } else {
254           if (m_FontBBox.top < rect.top) {
255             m_FontBBox.top = rect.top;
256           }
257           if (m_FontBBox.right < rect.right) {
258             m_FontBBox.right = rect.right;
259           }
260           if (m_FontBBox.left > rect.left) {
261             m_FontBBox.left = rect.left;
262           }
263           if (m_FontBBox.bottom > rect.bottom) {
264             m_FontBBox.bottom = rect.bottom;
265           }
266         }
267       }
268     }
269   }
270   if (m_Ascent == 0 && m_Descent == 0) {
271     FX_RECT rect = GetCharBBox('A');
272     m_Ascent = rect.bottom == rect.top ? m_FontBBox.top : rect.top;
273     rect = GetCharBBox('g');
274     m_Descent = rect.bottom == rect.top ? m_FontBBox.bottom : rect.bottom;
275   }
276 }
277 
LoadUnicodeMap() const278 void CPDF_Font::LoadUnicodeMap() const {
279   m_bToUnicodeLoaded = true;
280   CPDF_Stream* pStream = m_pFontDict->GetStreamFor("ToUnicode");
281   if (!pStream) {
282     return;
283   }
284   m_pToUnicodeMap = pdfium::MakeUnique<CPDF_ToUnicodeMap>();
285   m_pToUnicodeMap->Load(pStream);
286 }
287 
GetStringWidth(const char * pString,int size)288 int CPDF_Font::GetStringWidth(const char* pString, int size) {
289   int offset = 0;
290   int width = 0;
291   while (offset < size) {
292     uint32_t charcode = GetNextChar(pString, size, offset);
293     width += GetCharWidthF(charcode);
294   }
295   return width;
296 }
297 
298 // static
GetStockFont(CPDF_Document * pDoc,const ByteStringView & name)299 CPDF_Font* CPDF_Font::GetStockFont(CPDF_Document* pDoc,
300                                    const ByteStringView& name) {
301   ByteString fontname(name);
302   int font_id = PDF_GetStandardFontName(&fontname);
303   if (font_id < 0)
304     return nullptr;
305 
306   CPDF_FontGlobals* pFontGlobals =
307       CPDF_ModuleMgr::Get()->GetPageModule()->GetFontGlobals();
308   CPDF_Font* pFont = pFontGlobals->Find(pDoc, font_id);
309   if (pFont)
310     return pFont;
311 
312   CPDF_Dictionary* pDict = new CPDF_Dictionary(pDoc->GetByteStringPool());
313   pDict->SetNewFor<CPDF_Name>("Type", "Font");
314   pDict->SetNewFor<CPDF_Name>("Subtype", "Type1");
315   pDict->SetNewFor<CPDF_Name>("BaseFont", fontname);
316   pDict->SetNewFor<CPDF_Name>("Encoding", "WinAnsiEncoding");
317   return pFontGlobals->Set(pDoc, font_id, CPDF_Font::Create(nullptr, pDict));
318 }
319 
Create(CPDF_Document * pDoc,CPDF_Dictionary * pFontDict)320 std::unique_ptr<CPDF_Font> CPDF_Font::Create(CPDF_Document* pDoc,
321                                              CPDF_Dictionary* pFontDict) {
322   ByteString type = pFontDict->GetStringFor("Subtype");
323   std::unique_ptr<CPDF_Font> pFont;
324   if (type == "TrueType") {
325     ByteString tag = pFontDict->GetStringFor("BaseFont").Left(4);
326     for (size_t i = 0; i < FX_ArraySize(kChineseFontNames); ++i) {
327       if (tag == ByteString(kChineseFontNames[i], 4)) {
328         CPDF_Dictionary* pFontDesc = pFontDict->GetDictFor("FontDescriptor");
329         if (!pFontDesc || !pFontDesc->KeyExist("FontFile2"))
330           pFont = pdfium::MakeUnique<CPDF_CIDFont>();
331         break;
332       }
333     }
334     if (!pFont)
335       pFont = pdfium::MakeUnique<CPDF_TrueTypeFont>();
336   } else if (type == "Type3") {
337     pFont = pdfium::MakeUnique<CPDF_Type3Font>();
338   } else if (type == "Type0") {
339     pFont = pdfium::MakeUnique<CPDF_CIDFont>();
340   } else {
341     pFont = pdfium::MakeUnique<CPDF_Type1Font>();
342   }
343   pFont->m_pFontDict = pFontDict;
344   pFont->m_pDocument = pDoc;
345   pFont->m_BaseFont = pFontDict->GetStringFor("BaseFont");
346   return pFont->Load() ? std::move(pFont) : nullptr;
347 }
348 
GetNextChar(const char * pString,int nStrLen,int & offset) const349 uint32_t CPDF_Font::GetNextChar(const char* pString,
350                                 int nStrLen,
351                                 int& offset) const {
352   if (offset < 0 || nStrLen < 1) {
353     return 0;
354   }
355   uint8_t ch = offset < nStrLen ? pString[offset++] : pString[nStrLen - 1];
356   return static_cast<uint32_t>(ch);
357 }
358 
LoadPDFEncoding(CPDF_Object * pEncoding,int & iBaseEncoding,std::vector<ByteString> * pCharNames,bool bEmbedded,bool bTrueType)359 void CPDF_Font::LoadPDFEncoding(CPDF_Object* pEncoding,
360                                 int& iBaseEncoding,
361                                 std::vector<ByteString>* pCharNames,
362                                 bool bEmbedded,
363                                 bool bTrueType) {
364   if (!pEncoding) {
365     if (m_BaseFont == "Symbol") {
366       iBaseEncoding = bTrueType ? PDFFONT_ENCODING_MS_SYMBOL
367                                 : PDFFONT_ENCODING_ADOBE_SYMBOL;
368     } else if (!bEmbedded && iBaseEncoding == PDFFONT_ENCODING_BUILTIN) {
369       iBaseEncoding = PDFFONT_ENCODING_WINANSI;
370     }
371     return;
372   }
373   if (pEncoding->IsName()) {
374     if (iBaseEncoding == PDFFONT_ENCODING_ADOBE_SYMBOL ||
375         iBaseEncoding == PDFFONT_ENCODING_ZAPFDINGBATS) {
376       return;
377     }
378     if (FontStyleIsSymbolic(m_Flags) && m_BaseFont == "Symbol") {
379       if (!bTrueType)
380         iBaseEncoding = PDFFONT_ENCODING_ADOBE_SYMBOL;
381       return;
382     }
383     ByteString bsEncoding = pEncoding->GetString();
384     if (bsEncoding.Compare("MacExpertEncoding") == 0) {
385       bsEncoding = "WinAnsiEncoding";
386     }
387     GetPredefinedEncoding(bsEncoding, &iBaseEncoding);
388     return;
389   }
390 
391   CPDF_Dictionary* pDict = pEncoding->AsDictionary();
392   if (!pDict)
393     return;
394 
395   if (iBaseEncoding != PDFFONT_ENCODING_ADOBE_SYMBOL &&
396       iBaseEncoding != PDFFONT_ENCODING_ZAPFDINGBATS) {
397     ByteString bsEncoding = pDict->GetStringFor("BaseEncoding");
398     if (bTrueType && bsEncoding.Compare("MacExpertEncoding") == 0)
399       bsEncoding = "WinAnsiEncoding";
400     GetPredefinedEncoding(bsEncoding, &iBaseEncoding);
401   }
402   if ((!bEmbedded || bTrueType) && iBaseEncoding == PDFFONT_ENCODING_BUILTIN)
403     iBaseEncoding = PDFFONT_ENCODING_STANDARD;
404 
405   CPDF_Array* pDiffs = pDict->GetArrayFor("Differences");
406   if (!pDiffs)
407     return;
408 
409   pCharNames->resize(256);
410   uint32_t cur_code = 0;
411   for (uint32_t i = 0; i < pDiffs->GetCount(); i++) {
412     CPDF_Object* pElement = pDiffs->GetDirectObjectAt(i);
413     if (!pElement)
414       continue;
415 
416     if (CPDF_Name* pName = pElement->AsName()) {
417       if (cur_code < 256)
418         (*pCharNames)[cur_code] = pName->GetString();
419       cur_code++;
420     } else {
421       cur_code = pElement->GetInteger();
422     }
423   }
424 }
425 
IsStandardFont() const426 bool CPDF_Font::IsStandardFont() const {
427   if (!IsType1Font())
428     return false;
429   if (m_pFontFile)
430     return false;
431   if (AsType1Font()->GetBase14Font() < 0)
432     return false;
433   return true;
434 }
435 
GetAdobeCharName(int iBaseEncoding,const std::vector<ByteString> & charnames,int charcode)436 const char* CPDF_Font::GetAdobeCharName(
437     int iBaseEncoding,
438     const std::vector<ByteString>& charnames,
439     int charcode) {
440   if (charcode < 0 || charcode >= 256) {
441     NOTREACHED();
442     return nullptr;
443   }
444 
445   if (!charnames.empty() && !charnames[charcode].IsEmpty())
446     return charnames[charcode].c_str();
447 
448   const char* name = nullptr;
449   if (iBaseEncoding)
450     name = PDF_CharNameFromPredefinedCharSet(iBaseEncoding, charcode);
451   return name && name[0] ? name : nullptr;
452 }
453 
FallbackFontFromCharcode(uint32_t charcode)454 uint32_t CPDF_Font::FallbackFontFromCharcode(uint32_t charcode) {
455   if (m_FontFallbacks.empty()) {
456     m_FontFallbacks.push_back(pdfium::MakeUnique<CFX_Font>());
457     m_FontFallbacks[0]->LoadSubst("Arial", IsTrueTypeFont(), m_Flags,
458                                   m_StemV * 5, m_ItalicAngle, 0,
459                                   IsVertWriting());
460   }
461   return 0;
462 }
463 
FallbackGlyphFromCharcode(int fallbackFont,uint32_t charcode)464 int CPDF_Font::FallbackGlyphFromCharcode(int fallbackFont, uint32_t charcode) {
465   if (!pdfium::IndexInBounds(m_FontFallbacks, fallbackFont))
466     return -1;
467 
468   WideString str = UnicodeFromCharCode(charcode);
469   uint32_t unicode = !str.IsEmpty() ? str[0] : charcode;
470   int glyph =
471       FXFT_Get_Char_Index(m_FontFallbacks[fallbackFont]->GetFace(), unicode);
472   if (glyph == 0)
473     return -1;
474 
475   return glyph;
476 }
477 
GetFontFallback(int position)478 CFX_Font* CPDF_Font::GetFontFallback(int position) {
479   if (position < 0 || static_cast<size_t>(position) >= m_FontFallbacks.size())
480     return nullptr;
481   return m_FontFallbacks[position].get();
482 }
483 
484 // static
TT2PDF(int m,FXFT_Face face)485 int CPDF_Font::TT2PDF(int m, FXFT_Face face) {
486   int upm = FXFT_Get_Face_UnitsPerEM(face);
487   if (upm == 0)
488     return m;
489 
490   return static_cast<int>(
491       pdfium::clamp((m * 1000.0 + upm / 2) / upm,
492                     static_cast<double>(std::numeric_limits<int>::min()),
493                     static_cast<double>(std::numeric_limits<int>::max())));
494 }
495 
496 // static
FT_UseTTCharmap(FXFT_Face face,int platform_id,int encoding_id)497 bool CPDF_Font::FT_UseTTCharmap(FXFT_Face face,
498                                 int platform_id,
499                                 int encoding_id) {
500   auto** pCharMap = FXFT_Get_Face_Charmaps(face);
501   for (int i = 0; i < FXFT_Get_Face_CharmapCount(face); i++) {
502     if (FXFT_Get_Charmap_PlatformID(pCharMap[i]) == platform_id &&
503         FXFT_Get_Charmap_EncodingID(pCharMap[i]) == encoding_id) {
504       FXFT_Set_Charmap(face, pCharMap[i]);
505       return true;
506     }
507   }
508   return false;
509 }
510