1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "core/fpdfapi/font/cpdf_cmapparser.h"
8 
9 #include <vector>
10 
11 #include "core/fpdfapi/cmaps/fpdf_cmaps.h"
12 #include "core/fpdfapi/parser/cpdf_array.h"
13 #include "core/fpdfapi/parser/cpdf_dictionary.h"
14 #include "core/fpdfapi/parser/cpdf_simple_parser.h"
15 #include "core/fxcrt/fx_extension.h"
16 #include "core/fxcrt/fx_safe_types.h"
17 #include "core/fxge/fx_freetype.h"
18 #include "third_party/base/logging.h"
19 
20 namespace {
21 
CMap_GetString(ByteStringView word)22 ByteStringView CMap_GetString(ByteStringView word) {
23   if (word.GetLength() <= 2)
24     return ByteStringView();
25   return word.Last(word.GetLength() - 2);
26 }
27 
28 }  // namespace
29 
CPDF_CMapParser(CPDF_CMap * pCMap)30 CPDF_CMapParser::CPDF_CMapParser(CPDF_CMap* pCMap) : m_pCMap(pCMap) {}
31 
~CPDF_CMapParser()32 CPDF_CMapParser::~CPDF_CMapParser() {
33   m_pCMap->SetAdditionalMappings(std::move(m_AdditionalCharcodeToCIDMappings));
34   m_pCMap->SetMixedFourByteLeadingRanges(std::move(m_Ranges));
35 }
36 
ParseWord(ByteStringView word)37 void CPDF_CMapParser::ParseWord(ByteStringView word) {
38   ASSERT(!word.IsEmpty());
39 
40   if (word == "begincidchar") {
41     m_Status = kProcessingCidChar;
42     m_CodeSeq = 0;
43   } else if (word == "begincidrange") {
44     m_Status = kProcessingCidRange;
45     m_CodeSeq = 0;
46   } else if (word == "endcidrange" || word == "endcidchar") {
47     m_Status = kStart;
48   } else if (word == "/WMode") {
49     m_Status = kProcessingWMode;
50   } else if (word == "/Registry") {
51     m_Status = kProcessingRegistry;
52   } else if (word == "/Ordering") {
53     m_Status = kProcessingOrdering;
54   } else if (word == "/Supplement") {
55     m_Status = kProcessingSupplement;
56   } else if (word == "begincodespacerange") {
57     m_Status = kProcessingCodeSpaceRange;
58     m_CodeSeq = 0;
59   } else if (word == "usecmap") {
60   } else if (m_Status == kProcessingCidChar) {
61     HandleCid(word);
62   } else if (m_Status == kProcessingCidRange) {
63     HandleCid(word);
64   } else if (m_Status == kProcessingRegistry) {
65     m_Status = kStart;
66   } else if (m_Status == kProcessingOrdering) {
67     m_pCMap->SetCharset(CharsetFromOrdering(CMap_GetString(word)));
68     m_Status = kStart;
69   } else if (m_Status == kProcessingSupplement) {
70     m_Status = kStart;
71   } else if (m_Status == kProcessingWMode) {
72     m_pCMap->SetVertical(GetCode(word) != 0);
73     m_Status = kStart;
74   } else if (m_Status == kProcessingCodeSpaceRange) {
75     HandleCodeSpaceRange(word);
76   }
77   m_LastWord = word;
78 }
79 
HandleCid(ByteStringView word)80 void CPDF_CMapParser::HandleCid(ByteStringView word) {
81   ASSERT(m_Status == kProcessingCidChar || m_Status == kProcessingCidRange);
82   bool bChar = m_Status == kProcessingCidChar;
83 
84   m_CodePoints[m_CodeSeq] = GetCode(word);
85   m_CodeSeq++;
86   int nRequiredCodePoints = bChar ? 2 : 3;
87   if (m_CodeSeq < nRequiredCodePoints)
88     return;
89 
90   uint32_t StartCode = m_CodePoints[0];
91   uint32_t EndCode;
92   uint16_t StartCID;
93   if (bChar) {
94     EndCode = StartCode;
95     StartCID = static_cast<uint16_t>(m_CodePoints[1]);
96   } else {
97     EndCode = m_CodePoints[1];
98     StartCID = static_cast<uint16_t>(m_CodePoints[2]);
99   }
100   if (EndCode < 0x10000) {
101     for (uint32_t code = StartCode; code <= EndCode; code++) {
102       m_pCMap->SetDirectCharcodeToCIDTable(
103           code, static_cast<uint16_t>(StartCID + code - StartCode));
104     }
105   } else {
106     m_AdditionalCharcodeToCIDMappings.push_back({StartCode, EndCode, StartCID});
107   }
108   m_CodeSeq = 0;
109 }
110 
HandleCodeSpaceRange(ByteStringView word)111 void CPDF_CMapParser::HandleCodeSpaceRange(ByteStringView word) {
112   if (word != "endcodespacerange") {
113     if (word.IsEmpty() || word[0] != '<')
114       return;
115 
116     if (m_CodeSeq % 2) {
117       Optional<CPDF_CMap::CodeRange> range =
118           GetCodeRange(m_LastWord.AsStringView(), word);
119       if (range.has_value())
120         m_PendingRanges.push_back(range.value());
121     }
122     m_CodeSeq++;
123     return;
124   }
125 
126   size_t nSegs = m_Ranges.size() + m_PendingRanges.size();
127   if (nSegs == 1) {
128     const auto& first_range =
129         !m_Ranges.empty() ? m_Ranges[0] : m_PendingRanges[0];
130     m_pCMap->SetCodingScheme(first_range.m_CharSize == 2 ? CPDF_CMap::TwoBytes
131                                                          : CPDF_CMap::OneByte);
132   } else if (nSegs > 1) {
133     m_pCMap->SetCodingScheme(CPDF_CMap::MixedFourBytes);
134     m_Ranges.reserve(nSegs);
135     std::move(m_PendingRanges.begin(), m_PendingRanges.end(),
136               std::back_inserter(m_Ranges));
137     m_PendingRanges.clear();
138   }
139   m_Status = kStart;
140 }
141 
142 // static
GetCode(ByteStringView word)143 uint32_t CPDF_CMapParser::GetCode(ByteStringView word) {
144   if (word.IsEmpty())
145     return 0;
146 
147   FX_SAFE_UINT32 num = 0;
148   if (word[0] == '<') {
149     for (size_t i = 1; i < word.GetLength() && std::isxdigit(word[i]); ++i) {
150       num = num * 16 + FXSYS_HexCharToInt(word[i]);
151       if (!num.IsValid())
152         return 0;
153     }
154     return num.ValueOrDie();
155   }
156 
157   for (size_t i = 0; i < word.GetLength() && std::isdigit(word[i]); ++i) {
158     num = num * 10 + FXSYS_DecimalCharToInt(static_cast<wchar_t>(word[i]));
159     if (!num.IsValid())
160       return 0;
161   }
162   return num.ValueOrDie();
163 }
164 
165 // static
GetCodeRange(ByteStringView first,ByteStringView second)166 Optional<CPDF_CMap::CodeRange> CPDF_CMapParser::GetCodeRange(
167     ByteStringView first,
168     ByteStringView second) {
169   if (first.IsEmpty() || first[0] != '<')
170     return pdfium::nullopt;
171 
172   size_t i;
173   for (i = 1; i < first.GetLength(); ++i) {
174     if (first[i] == '>')
175       break;
176   }
177   size_t char_size = (i - 1) / 2;
178   if (char_size > 4)
179     return pdfium::nullopt;
180 
181   CPDF_CMap::CodeRange range;
182   range.m_CharSize = char_size;
183   for (i = 0; i < range.m_CharSize; ++i) {
184     uint8_t digit1 = first[i * 2 + 1];
185     uint8_t digit2 = first[i * 2 + 2];
186     range.m_Lower[i] =
187         FXSYS_HexCharToInt(digit1) * 16 + FXSYS_HexCharToInt(digit2);
188   }
189 
190   size_t size = second.GetLength();
191   for (i = 0; i < range.m_CharSize; ++i) {
192     size_t i1 = i * 2 + 1;
193     size_t i2 = i1 + 1;
194     uint8_t digit1 = i1 < size ? second[i1] : '0';
195     uint8_t digit2 = i2 < size ? second[i2] : '0';
196     range.m_Upper[i] =
197         FXSYS_HexCharToInt(digit1) * 16 + FXSYS_HexCharToInt(digit2);
198   }
199   return range;
200 }
201 
202 // static
CharsetFromOrdering(ByteStringView ordering)203 CIDSet CPDF_CMapParser::CharsetFromOrdering(ByteStringView ordering) {
204   static const char* const kCharsetNames[CIDSET_NUM_SETS] = {
205       nullptr, "GB1", "CNS1", "Japan1", "Korea1", "UCS"};
206   static_assert(FX_ArraySize(kCharsetNames) == CIDSET_NUM_SETS,
207                 "Too many CID sets");
208 
209   for (size_t charset = 1; charset < FX_ArraySize(kCharsetNames); ++charset) {
210     if (ordering == kCharsetNames[charset])
211       return static_cast<CIDSet>(charset);
212   }
213   return CIDSET_UNKNOWN;
214 }
215