1 // Copyright 2016 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "core/fpdfdoc/ctypeset.h"
8
9 #include <algorithm>
10
11 #include "core/fpdfdoc/cline.h"
12 #include "core/fpdfdoc/cpvt_wordinfo.h"
13 #include "core/fpdfdoc/csection.h"
14
15 namespace {
16
17 const uint8_t special_chars[128] = {
18 0x00, 0x0C, 0x08, 0x0C, 0x08, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00,
19 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
20 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x08, 0x00,
21 0x10, 0x00, 0x00, 0x28, 0x0C, 0x08, 0x00, 0x00, 0x28, 0x28, 0x28, 0x28,
22 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x08, 0x08,
23 0x00, 0x00, 0x00, 0x08, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
24 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
25 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x0C, 0x00, 0x08, 0x00, 0x00,
26 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
27 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
28 0x01, 0x01, 0x01, 0x0C, 0x00, 0x08, 0x00, 0x00,
29 };
30
IsLatin(uint16_t word)31 bool IsLatin(uint16_t word) {
32 if (word <= 0x007F)
33 return !!(special_chars[word] & 0x01);
34
35 return ((word >= 0x00C0 && word <= 0x00FF) ||
36 (word >= 0x0100 && word <= 0x024F) ||
37 (word >= 0x1E00 && word <= 0x1EFF) ||
38 (word >= 0x2C60 && word <= 0x2C7F) ||
39 (word >= 0xA720 && word <= 0xA7FF) ||
40 (word >= 0xFF21 && word <= 0xFF3A) ||
41 (word >= 0xFF41 && word <= 0xFF5A));
42 }
43
IsDigit(uint32_t word)44 bool IsDigit(uint32_t word) {
45 return word >= 0x0030 && word <= 0x0039;
46 }
47
IsCJK(uint32_t word)48 bool IsCJK(uint32_t word) {
49 if ((word >= 0x1100 && word <= 0x11FF) ||
50 (word >= 0x2E80 && word <= 0x2FFF) ||
51 (word >= 0x3040 && word <= 0x9FBF) ||
52 (word >= 0xAC00 && word <= 0xD7AF) ||
53 (word >= 0xF900 && word <= 0xFAFF) ||
54 (word >= 0xFE30 && word <= 0xFE4F) ||
55 (word >= 0x20000 && word <= 0x2A6DF) ||
56 (word >= 0x2F800 && word <= 0x2FA1F)) {
57 return true;
58 }
59 if (word >= 0x3000 && word <= 0x303F) {
60 return (
61 word == 0x3005 || word == 0x3006 || word == 0x3021 || word == 0x3022 ||
62 word == 0x3023 || word == 0x3024 || word == 0x3025 || word == 0x3026 ||
63 word == 0x3027 || word == 0x3028 || word == 0x3029 || word == 0x3031 ||
64 word == 0x3032 || word == 0x3033 || word == 0x3034 || word == 0x3035);
65 }
66 return word >= 0xFF66 && word <= 0xFF9D;
67 }
68
IsPunctuation(uint32_t word)69 bool IsPunctuation(uint32_t word) {
70 if (word <= 0x007F)
71 return !!(special_chars[word] & 0x08);
72
73 if (word >= 0x0080 && word <= 0x00FF) {
74 return (word == 0x0082 || word == 0x0084 || word == 0x0085 ||
75 word == 0x0091 || word == 0x0092 || word == 0x0093 ||
76 word <= 0x0094 || word == 0x0096 || word == 0x00B4 ||
77 word == 0x00B8);
78 }
79
80 if (word >= 0x2000 && word <= 0x206F) {
81 return (
82 word == 0x2010 || word == 0x2011 || word == 0x2012 || word == 0x2013 ||
83 word == 0x2018 || word == 0x2019 || word == 0x201A || word == 0x201B ||
84 word == 0x201C || word == 0x201D || word == 0x201E || word == 0x201F ||
85 word == 0x2032 || word == 0x2033 || word == 0x2034 || word == 0x2035 ||
86 word == 0x2036 || word == 0x2037 || word == 0x203C || word == 0x203D ||
87 word == 0x203E || word == 0x2044);
88 }
89
90 if (word >= 0x3000 && word <= 0x303F) {
91 return (
92 word == 0x3001 || word == 0x3002 || word == 0x3003 || word == 0x3005 ||
93 word == 0x3009 || word == 0x300A || word == 0x300B || word == 0x300C ||
94 word == 0x300D || word == 0x300F || word == 0x300E || word == 0x3010 ||
95 word == 0x3011 || word == 0x3014 || word == 0x3015 || word == 0x3016 ||
96 word == 0x3017 || word == 0x3018 || word == 0x3019 || word == 0x301A ||
97 word == 0x301B || word == 0x301D || word == 0x301E || word == 0x301F);
98 }
99
100 if (word >= 0xFE50 && word <= 0xFE6F)
101 return (word >= 0xFE50 && word <= 0xFE5E) || word == 0xFE63;
102
103 if (word >= 0xFF00 && word <= 0xFFEF) {
104 return (
105 word == 0xFF01 || word == 0xFF02 || word == 0xFF07 || word == 0xFF08 ||
106 word == 0xFF09 || word == 0xFF0C || word == 0xFF0E || word == 0xFF0F ||
107 word == 0xFF1A || word == 0xFF1B || word == 0xFF1F || word == 0xFF3B ||
108 word == 0xFF3D || word == 0xFF40 || word == 0xFF5B || word == 0xFF5C ||
109 word == 0xFF5D || word == 0xFF61 || word == 0xFF62 || word == 0xFF63 ||
110 word == 0xFF64 || word == 0xFF65 || word == 0xFF9E || word == 0xFF9F);
111 }
112
113 return false;
114 }
115
IsConnectiveSymbol(uint32_t word)116 bool IsConnectiveSymbol(uint32_t word) {
117 return word <= 0x007F && (special_chars[word] & 0x20);
118 }
119
IsOpenStylePunctuation(uint32_t word)120 bool IsOpenStylePunctuation(uint32_t word) {
121 if (word <= 0x007F)
122 return !!(special_chars[word] & 0x04);
123
124 return (word == 0x300A || word == 0x300C || word == 0x300E ||
125 word == 0x3010 || word == 0x3014 || word == 0x3016 ||
126 word == 0x3018 || word == 0x301A || word == 0xFF08 ||
127 word == 0xFF3B || word == 0xFF5B || word == 0xFF62);
128 }
129
IsCurrencySymbol(uint16_t word)130 bool IsCurrencySymbol(uint16_t word) {
131 return (word == 0x0024 || word == 0x0080 || word == 0x00A2 ||
132 word == 0x00A3 || word == 0x00A4 || word == 0x00A5 ||
133 (word >= 0x20A0 && word <= 0x20CF) || word == 0xFE69 ||
134 word == 0xFF04 || word == 0xFFE0 || word == 0xFFE1 ||
135 word == 0xFFE5 || word == 0xFFE6);
136 }
137
IsPrefixSymbol(uint16_t word)138 bool IsPrefixSymbol(uint16_t word) {
139 return IsCurrencySymbol(word) || word == 0x2116;
140 }
141
IsSpace(uint16_t word)142 bool IsSpace(uint16_t word) {
143 return word == 0x0020 || word == 0x3000;
144 }
145
NeedDivision(uint16_t prevWord,uint16_t curWord)146 bool NeedDivision(uint16_t prevWord, uint16_t curWord) {
147 if ((IsLatin(prevWord) || IsDigit(prevWord)) &&
148 (IsLatin(curWord) || IsDigit(curWord))) {
149 return false;
150 }
151 if (IsSpace(curWord) || IsPunctuation(curWord)) {
152 return false;
153 }
154 if (IsConnectiveSymbol(prevWord) || IsConnectiveSymbol(curWord)) {
155 return false;
156 }
157 if (IsSpace(prevWord) || IsPunctuation(prevWord)) {
158 return true;
159 }
160 if (IsPrefixSymbol(prevWord)) {
161 return false;
162 }
163 if (IsPrefixSymbol(curWord) || IsCJK(curWord)) {
164 return true;
165 }
166 if (IsCJK(prevWord)) {
167 return true;
168 }
169 return false;
170 }
171
172 } // namespace
173
CTypeset(CSection * pSection)174 CTypeset::CTypeset(CSection* pSection)
175 : m_rcRet(0.0f, 0.0f, 0.0f, 0.0f),
176 m_pVT(pSection->m_pVT),
177 m_pSection(pSection) {}
178
~CTypeset()179 CTypeset::~CTypeset() {}
180
CharArray()181 CPVT_FloatRect CTypeset::CharArray() {
182 ASSERT(m_pSection);
183 FX_FLOAT fLineAscent =
184 m_pVT->GetFontAscent(m_pVT->GetDefaultFontIndex(), m_pVT->GetFontSize());
185 FX_FLOAT fLineDescent =
186 m_pVT->GetFontDescent(m_pVT->GetDefaultFontIndex(), m_pVT->GetFontSize());
187 m_rcRet.Default();
188 FX_FLOAT x = 0.0f, y = 0.0f;
189 FX_FLOAT fNextWidth;
190 int32_t nStart = 0;
191 FX_FLOAT fNodeWidth = m_pVT->GetPlateWidth() /
192 (m_pVT->m_nCharArray <= 0 ? 1 : m_pVT->m_nCharArray);
193 if (CLine* pLine = m_pSection->m_LineArray.GetAt(0)) {
194 x = 0.0f;
195 y += m_pVT->GetLineLeading(m_pSection->m_SecInfo);
196 y += fLineAscent;
197 nStart = 0;
198 switch (m_pVT->GetAlignment(m_pSection->m_SecInfo)) {
199 case 0:
200 pLine->m_LineInfo.fLineX = fNodeWidth * VARIABLETEXT_HALF;
201 break;
202 case 1:
203 nStart = (m_pVT->m_nCharArray - m_pSection->m_WordArray.GetSize()) / 2;
204 pLine->m_LineInfo.fLineX =
205 fNodeWidth * nStart - fNodeWidth * VARIABLETEXT_HALF;
206 break;
207 case 2:
208 nStart = m_pVT->m_nCharArray - m_pSection->m_WordArray.GetSize();
209 pLine->m_LineInfo.fLineX =
210 fNodeWidth * nStart - fNodeWidth * VARIABLETEXT_HALF;
211 break;
212 }
213 for (int32_t w = 0, sz = m_pSection->m_WordArray.GetSize(); w < sz; w++) {
214 if (w >= m_pVT->m_nCharArray) {
215 break;
216 }
217 fNextWidth = 0;
218 if (CPVT_WordInfo* pNextWord = m_pSection->m_WordArray.GetAt(w + 1)) {
219 pNextWord->fWordTail = 0;
220 fNextWidth = m_pVT->GetWordWidth(*pNextWord);
221 }
222 if (CPVT_WordInfo* pWord = m_pSection->m_WordArray.GetAt(w)) {
223 pWord->fWordTail = 0;
224 FX_FLOAT fWordWidth = m_pVT->GetWordWidth(*pWord);
225 FX_FLOAT fWordAscent = m_pVT->GetWordAscent(*pWord);
226 FX_FLOAT fWordDescent = m_pVT->GetWordDescent(*pWord);
227 x = (FX_FLOAT)(fNodeWidth * (w + nStart + 0.5) -
228 fWordWidth * VARIABLETEXT_HALF);
229 pWord->fWordX = x;
230 pWord->fWordY = y;
231 if (w == 0) {
232 pLine->m_LineInfo.fLineX = x;
233 }
234 if (w != m_pSection->m_WordArray.GetSize() - 1) {
235 pWord->fWordTail =
236 (fNodeWidth - (fWordWidth + fNextWidth) * VARIABLETEXT_HALF > 0
237 ? fNodeWidth - (fWordWidth + fNextWidth) * VARIABLETEXT_HALF
238 : 0);
239 } else {
240 pWord->fWordTail = 0;
241 }
242 x += fWordWidth;
243 fLineAscent = std::max(fLineAscent, fWordAscent);
244 fLineDescent = std::min(fLineDescent, fWordDescent);
245 }
246 }
247 pLine->m_LineInfo.nBeginWordIndex = 0;
248 pLine->m_LineInfo.nEndWordIndex = m_pSection->m_WordArray.GetSize() - 1;
249 pLine->m_LineInfo.fLineY = y;
250 pLine->m_LineInfo.fLineWidth = x - pLine->m_LineInfo.fLineX;
251 pLine->m_LineInfo.fLineAscent = fLineAscent;
252 pLine->m_LineInfo.fLineDescent = fLineDescent;
253 y -= fLineDescent;
254 }
255 return m_rcRet = CPVT_FloatRect(0, 0, x, y);
256 }
257
GetEditSize(FX_FLOAT fFontSize)258 CFX_SizeF CTypeset::GetEditSize(FX_FLOAT fFontSize) {
259 ASSERT(m_pSection);
260 ASSERT(m_pVT);
261 SplitLines(false, fFontSize);
262 return CFX_SizeF(m_rcRet.Width(), m_rcRet.Height());
263 }
264
Typeset()265 CPVT_FloatRect CTypeset::Typeset() {
266 ASSERT(m_pVT);
267 m_pSection->m_LineArray.Empty();
268 SplitLines(true, 0.0f);
269 m_pSection->m_LineArray.Clear();
270 OutputLines();
271 return m_rcRet;
272 }
273
SplitLines(bool bTypeset,FX_FLOAT fFontSize)274 void CTypeset::SplitLines(bool bTypeset, FX_FLOAT fFontSize) {
275 ASSERT(m_pVT);
276 ASSERT(m_pSection);
277 int32_t nLineHead = 0;
278 int32_t nLineTail = 0;
279 FX_FLOAT fMaxX = 0.0f, fMaxY = 0.0f;
280 FX_FLOAT fLineWidth = 0.0f, fBackupLineWidth = 0.0f;
281 FX_FLOAT fLineAscent = 0.0f, fBackupLineAscent = 0.0f;
282 FX_FLOAT fLineDescent = 0.0f, fBackupLineDescent = 0.0f;
283 int32_t nWordStartPos = 0;
284 bool bFullWord = false;
285 int32_t nLineFullWordIndex = 0;
286 int32_t nCharIndex = 0;
287 CPVT_LineInfo line;
288 FX_FLOAT fWordWidth = 0;
289 FX_FLOAT fTypesetWidth = std::max(
290 m_pVT->GetPlateWidth() - m_pVT->GetLineIndent(m_pSection->m_SecInfo),
291 0.0f);
292 int32_t nTotalWords = m_pSection->m_WordArray.GetSize();
293 bool bOpened = false;
294 if (nTotalWords > 0) {
295 int32_t i = 0;
296 while (i < nTotalWords) {
297 CPVT_WordInfo* pWord = m_pSection->m_WordArray.GetAt(i);
298 CPVT_WordInfo* pOldWord = pWord;
299 if (i > 0) {
300 pOldWord = m_pSection->m_WordArray.GetAt(i - 1);
301 }
302 if (pWord) {
303 if (bTypeset) {
304 fLineAscent = std::max(fLineAscent, m_pVT->GetWordAscent(*pWord));
305 fLineDescent = std::min(fLineDescent, m_pVT->GetWordDescent(*pWord));
306 fWordWidth = m_pVT->GetWordWidth(*pWord);
307 } else {
308 fLineAscent =
309 std::max(fLineAscent, m_pVT->GetWordAscent(*pWord, fFontSize));
310 fLineDescent =
311 std::min(fLineDescent, m_pVT->GetWordDescent(*pWord, fFontSize));
312 fWordWidth = m_pVT->GetWordWidth(
313 pWord->nFontIndex, pWord->Word, m_pVT->m_wSubWord,
314 m_pVT->m_fCharSpace, m_pVT->m_nHorzScale, fFontSize,
315 pWord->fWordTail);
316 }
317 if (!bOpened) {
318 if (IsOpenStylePunctuation(pWord->Word)) {
319 bOpened = true;
320 bFullWord = true;
321 } else if (pOldWord) {
322 if (NeedDivision(pOldWord->Word, pWord->Word)) {
323 bFullWord = true;
324 }
325 }
326 } else {
327 if (!IsSpace(pWord->Word) && !IsOpenStylePunctuation(pWord->Word)) {
328 bOpened = false;
329 }
330 }
331 if (bFullWord) {
332 bFullWord = false;
333 if (nCharIndex > 0) {
334 nLineFullWordIndex++;
335 }
336 nWordStartPos = i;
337 fBackupLineWidth = fLineWidth;
338 fBackupLineAscent = fLineAscent;
339 fBackupLineDescent = fLineDescent;
340 }
341 nCharIndex++;
342 }
343 if (m_pVT->m_bLimitWidth && fTypesetWidth > 0 &&
344 fLineWidth + fWordWidth > fTypesetWidth) {
345 if (nLineFullWordIndex > 0) {
346 i = nWordStartPos;
347 fLineWidth = fBackupLineWidth;
348 fLineAscent = fBackupLineAscent;
349 fLineDescent = fBackupLineDescent;
350 }
351 if (nCharIndex == 1) {
352 fLineWidth = fWordWidth;
353 i++;
354 }
355 nLineTail = i - 1;
356 if (bTypeset) {
357 line.nBeginWordIndex = nLineHead;
358 line.nEndWordIndex = nLineTail;
359 line.nTotalWord = nLineTail - nLineHead + 1;
360 line.fLineWidth = fLineWidth;
361 line.fLineAscent = fLineAscent;
362 line.fLineDescent = fLineDescent;
363 m_pSection->AddLine(line);
364 }
365 fMaxY += (fLineAscent + m_pVT->GetLineLeading(m_pSection->m_SecInfo));
366 fMaxY -= fLineDescent;
367 fMaxX = std::max(fLineWidth, fMaxX);
368 nLineHead = i;
369 fLineWidth = 0.0f;
370 fLineAscent = 0.0f;
371 fLineDescent = 0.0f;
372 nCharIndex = 0;
373 nLineFullWordIndex = 0;
374 bFullWord = false;
375 } else {
376 fLineWidth += fWordWidth;
377 i++;
378 }
379 }
380 if (nLineHead <= nTotalWords - 1) {
381 nLineTail = nTotalWords - 1;
382 if (bTypeset) {
383 line.nBeginWordIndex = nLineHead;
384 line.nEndWordIndex = nLineTail;
385 line.nTotalWord = nLineTail - nLineHead + 1;
386 line.fLineWidth = fLineWidth;
387 line.fLineAscent = fLineAscent;
388 line.fLineDescent = fLineDescent;
389 m_pSection->AddLine(line);
390 }
391 fMaxY += (fLineAscent + m_pVT->GetLineLeading(m_pSection->m_SecInfo));
392 fMaxY -= fLineDescent;
393 fMaxX = std::max(fLineWidth, fMaxX);
394 }
395 } else {
396 if (bTypeset) {
397 fLineAscent = m_pVT->GetLineAscent(m_pSection->m_SecInfo);
398 fLineDescent = m_pVT->GetLineDescent(m_pSection->m_SecInfo);
399 } else {
400 fLineAscent =
401 m_pVT->GetFontAscent(m_pVT->GetDefaultFontIndex(), fFontSize);
402 fLineDescent =
403 m_pVT->GetFontDescent(m_pVT->GetDefaultFontIndex(), fFontSize);
404 }
405 if (bTypeset) {
406 line.nBeginWordIndex = -1;
407 line.nEndWordIndex = -1;
408 line.nTotalWord = 0;
409 line.fLineWidth = 0;
410 line.fLineAscent = fLineAscent;
411 line.fLineDescent = fLineDescent;
412 m_pSection->AddLine(line);
413 }
414 fMaxY += m_pVT->GetLineLeading(m_pSection->m_SecInfo) + fLineAscent -
415 fLineDescent;
416 }
417 m_rcRet = CPVT_FloatRect(0, 0, fMaxX, fMaxY);
418 }
419
OutputLines()420 void CTypeset::OutputLines() {
421 ASSERT(m_pVT);
422 ASSERT(m_pSection);
423 FX_FLOAT fMinX = 0.0f, fMinY = 0.0f, fMaxX = 0.0f, fMaxY = 0.0f;
424 FX_FLOAT fPosX = 0.0f, fPosY = 0.0f;
425 FX_FLOAT fLineIndent = m_pVT->GetLineIndent(m_pSection->m_SecInfo);
426 FX_FLOAT fTypesetWidth = std::max(m_pVT->GetPlateWidth() - fLineIndent, 0.0f);
427 switch (m_pVT->GetAlignment(m_pSection->m_SecInfo)) {
428 default:
429 case 0:
430 fMinX = 0.0f;
431 break;
432 case 1:
433 fMinX = (fTypesetWidth - m_rcRet.Width()) * VARIABLETEXT_HALF;
434 break;
435 case 2:
436 fMinX = fTypesetWidth - m_rcRet.Width();
437 break;
438 }
439 fMaxX = fMinX + m_rcRet.Width();
440 fMinY = 0.0f;
441 fMaxY = m_rcRet.Height();
442 int32_t nTotalLines = m_pSection->m_LineArray.GetSize();
443 if (nTotalLines > 0) {
444 m_pSection->m_SecInfo.nTotalLine = nTotalLines;
445 for (int32_t l = 0; l < nTotalLines; l++) {
446 if (CLine* pLine = m_pSection->m_LineArray.GetAt(l)) {
447 switch (m_pVT->GetAlignment(m_pSection->m_SecInfo)) {
448 default:
449 case 0:
450 fPosX = 0;
451 break;
452 case 1:
453 fPosX = (fTypesetWidth - pLine->m_LineInfo.fLineWidth) *
454 VARIABLETEXT_HALF;
455 break;
456 case 2:
457 fPosX = fTypesetWidth - pLine->m_LineInfo.fLineWidth;
458 break;
459 }
460 fPosX += fLineIndent;
461 fPosY += m_pVT->GetLineLeading(m_pSection->m_SecInfo);
462 fPosY += pLine->m_LineInfo.fLineAscent;
463 pLine->m_LineInfo.fLineX = fPosX - fMinX;
464 pLine->m_LineInfo.fLineY = fPosY - fMinY;
465 for (int32_t w = pLine->m_LineInfo.nBeginWordIndex;
466 w <= pLine->m_LineInfo.nEndWordIndex; w++) {
467 if (CPVT_WordInfo* pWord = m_pSection->m_WordArray.GetAt(w)) {
468 pWord->fWordX = fPosX - fMinX;
469 if (pWord->pWordProps) {
470 switch (pWord->pWordProps->nScriptType) {
471 default:
472 case CPDF_VariableText::ScriptType::Normal:
473 pWord->fWordY = fPosY - fMinY;
474 break;
475 case CPDF_VariableText::ScriptType::Super:
476 pWord->fWordY = fPosY - m_pVT->GetWordAscent(*pWord) - fMinY;
477 break;
478 case CPDF_VariableText::ScriptType::Sub:
479 pWord->fWordY = fPosY - m_pVT->GetWordDescent(*pWord) - fMinY;
480 break;
481 }
482 } else {
483 pWord->fWordY = fPosY - fMinY;
484 }
485 fPosX += m_pVT->GetWordWidth(*pWord);
486 }
487 }
488 fPosY -= pLine->m_LineInfo.fLineDescent;
489 }
490 }
491 }
492 m_rcRet = CPVT_FloatRect(fMinX, fMinY, fMaxX, fMaxY);
493 }
494