1 // Copyright 2016 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "core/fpdfdoc/ctypeset.h"
8 
9 #include <algorithm>
10 
11 #include "core/fpdfdoc/cline.h"
12 #include "core/fpdfdoc/cpdf_variabletext.h"
13 #include "core/fpdfdoc/cpvt_wordinfo.h"
14 #include "core/fpdfdoc/csection.h"
15 #include "third_party/base/stl_util.h"
16 
17 namespace {
18 
19 const uint8_t special_chars[128] = {
20     0x00, 0x0C, 0x08, 0x0C, 0x08, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00,
21     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
22     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x08, 0x00,
23     0x10, 0x00, 0x00, 0x28, 0x0C, 0x08, 0x00, 0x00, 0x28, 0x28, 0x28, 0x28,
24     0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x08, 0x08,
25     0x00, 0x00, 0x00, 0x08, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
26     0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
27     0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x0C, 0x00, 0x08, 0x00, 0x00,
28     0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
29     0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
30     0x01, 0x01, 0x01, 0x0C, 0x00, 0x08, 0x00, 0x00,
31 };
32 
IsLatin(uint16_t word)33 bool IsLatin(uint16_t word) {
34   if (word <= 0x007F)
35     return !!(special_chars[word] & 0x01);
36 
37   return ((word >= 0x00C0 && word <= 0x00FF) ||
38           (word >= 0x0100 && word <= 0x024F) ||
39           (word >= 0x1E00 && word <= 0x1EFF) ||
40           (word >= 0x2C60 && word <= 0x2C7F) ||
41           (word >= 0xA720 && word <= 0xA7FF) ||
42           (word >= 0xFF21 && word <= 0xFF3A) ||
43           (word >= 0xFF41 && word <= 0xFF5A));
44 }
45 
IsDigit(uint32_t word)46 bool IsDigit(uint32_t word) {
47   return word >= 0x0030 && word <= 0x0039;
48 }
49 
IsCJK(uint32_t word)50 bool IsCJK(uint32_t word) {
51   if ((word >= 0x1100 && word <= 0x11FF) ||
52       (word >= 0x2E80 && word <= 0x2FFF) ||
53       (word >= 0x3040 && word <= 0x9FBF) ||
54       (word >= 0xAC00 && word <= 0xD7AF) ||
55       (word >= 0xF900 && word <= 0xFAFF) ||
56       (word >= 0xFE30 && word <= 0xFE4F) ||
57       (word >= 0x20000 && word <= 0x2A6DF) ||
58       (word >= 0x2F800 && word <= 0x2FA1F)) {
59     return true;
60   }
61   if (word >= 0x3000 && word <= 0x303F) {
62     return (
63         word == 0x3005 || word == 0x3006 || word == 0x3021 || word == 0x3022 ||
64         word == 0x3023 || word == 0x3024 || word == 0x3025 || word == 0x3026 ||
65         word == 0x3027 || word == 0x3028 || word == 0x3029 || word == 0x3031 ||
66         word == 0x3032 || word == 0x3033 || word == 0x3034 || word == 0x3035);
67   }
68   return word >= 0xFF66 && word <= 0xFF9D;
69 }
70 
IsPunctuation(uint32_t word)71 bool IsPunctuation(uint32_t word) {
72   if (word <= 0x007F)
73     return !!(special_chars[word] & 0x08);
74 
75   if (word >= 0x0080 && word <= 0x00FF) {
76     return (word == 0x0082 || word == 0x0084 || word == 0x0085 ||
77             word == 0x0091 || word == 0x0092 || word == 0x0093 ||
78             word <= 0x0094 || word == 0x0096 || word == 0x00B4 ||
79             word == 0x00B8);
80   }
81 
82   if (word >= 0x2000 && word <= 0x206F) {
83     return (
84         word == 0x2010 || word == 0x2011 || word == 0x2012 || word == 0x2013 ||
85         word == 0x2018 || word == 0x2019 || word == 0x201A || word == 0x201B ||
86         word == 0x201C || word == 0x201D || word == 0x201E || word == 0x201F ||
87         word == 0x2032 || word == 0x2033 || word == 0x2034 || word == 0x2035 ||
88         word == 0x2036 || word == 0x2037 || word == 0x203C || word == 0x203D ||
89         word == 0x203E || word == 0x2044);
90   }
91 
92   if (word >= 0x3000 && word <= 0x303F) {
93     return (
94         word == 0x3001 || word == 0x3002 || word == 0x3003 || word == 0x3005 ||
95         word == 0x3009 || word == 0x300A || word == 0x300B || word == 0x300C ||
96         word == 0x300D || word == 0x300F || word == 0x300E || word == 0x3010 ||
97         word == 0x3011 || word == 0x3014 || word == 0x3015 || word == 0x3016 ||
98         word == 0x3017 || word == 0x3018 || word == 0x3019 || word == 0x301A ||
99         word == 0x301B || word == 0x301D || word == 0x301E || word == 0x301F);
100   }
101 
102   if (word >= 0xFE50 && word <= 0xFE6F)
103     return (word >= 0xFE50 && word <= 0xFE5E) || word == 0xFE63;
104 
105   if (word >= 0xFF00 && word <= 0xFFEF) {
106     return (
107         word == 0xFF01 || word == 0xFF02 || word == 0xFF07 || word == 0xFF08 ||
108         word == 0xFF09 || word == 0xFF0C || word == 0xFF0E || word == 0xFF0F ||
109         word == 0xFF1A || word == 0xFF1B || word == 0xFF1F || word == 0xFF3B ||
110         word == 0xFF3D || word == 0xFF40 || word == 0xFF5B || word == 0xFF5C ||
111         word == 0xFF5D || word == 0xFF61 || word == 0xFF62 || word == 0xFF63 ||
112         word == 0xFF64 || word == 0xFF65 || word == 0xFF9E || word == 0xFF9F);
113   }
114 
115   return false;
116 }
117 
IsConnectiveSymbol(uint32_t word)118 bool IsConnectiveSymbol(uint32_t word) {
119   return word <= 0x007F && (special_chars[word] & 0x20);
120 }
121 
IsOpenStylePunctuation(uint32_t word)122 bool IsOpenStylePunctuation(uint32_t word) {
123   if (word <= 0x007F)
124     return !!(special_chars[word] & 0x04);
125 
126   return (word == 0x300A || word == 0x300C || word == 0x300E ||
127           word == 0x3010 || word == 0x3014 || word == 0x3016 ||
128           word == 0x3018 || word == 0x301A || word == 0xFF08 ||
129           word == 0xFF3B || word == 0xFF5B || word == 0xFF62);
130 }
131 
IsCurrencySymbol(uint16_t word)132 bool IsCurrencySymbol(uint16_t word) {
133   return (word == 0x0024 || word == 0x0080 || word == 0x00A2 ||
134           word == 0x00A3 || word == 0x00A4 || word == 0x00A5 ||
135           (word >= 0x20A0 && word <= 0x20CF) || word == 0xFE69 ||
136           word == 0xFF04 || word == 0xFFE0 || word == 0xFFE1 ||
137           word == 0xFFE5 || word == 0xFFE6);
138 }
139 
IsPrefixSymbol(uint16_t word)140 bool IsPrefixSymbol(uint16_t word) {
141   return IsCurrencySymbol(word) || word == 0x2116;
142 }
143 
IsSpace(uint16_t word)144 bool IsSpace(uint16_t word) {
145   return word == 0x0020 || word == 0x3000;
146 }
147 
NeedDivision(uint16_t prevWord,uint16_t curWord)148 bool NeedDivision(uint16_t prevWord, uint16_t curWord) {
149   if ((IsLatin(prevWord) || IsDigit(prevWord)) &&
150       (IsLatin(curWord) || IsDigit(curWord))) {
151     return false;
152   }
153   if (IsSpace(curWord) || IsPunctuation(curWord)) {
154     return false;
155   }
156   if (IsConnectiveSymbol(prevWord) || IsConnectiveSymbol(curWord)) {
157     return false;
158   }
159   if (IsSpace(prevWord) || IsPunctuation(prevWord)) {
160     return true;
161   }
162   if (IsPrefixSymbol(prevWord)) {
163     return false;
164   }
165   if (IsPrefixSymbol(curWord) || IsCJK(curWord)) {
166     return true;
167   }
168   if (IsCJK(prevWord)) {
169     return true;
170   }
171   return false;
172 }
173 
174 }  // namespace
175 
CTypeset(CSection * pSection)176 CTypeset::CTypeset(CSection* pSection)
177     : m_rcRet(0.0f, 0.0f, 0.0f, 0.0f),
178       m_pVT(pSection->m_pVT),
179       m_pSection(pSection) {}
180 
~CTypeset()181 CTypeset::~CTypeset() {}
182 
CharArray()183 CPVT_FloatRect CTypeset::CharArray() {
184   m_rcRet = CPVT_FloatRect(0, 0, 0, 0);
185   if (m_pSection->m_LineArray.empty())
186     return m_rcRet;
187 
188   float fNodeWidth = m_pVT->GetPlateWidth() /
189                      (m_pVT->GetCharArray() <= 0 ? 1 : m_pVT->GetCharArray());
190   float fLineAscent =
191       m_pVT->GetFontAscent(m_pVT->GetDefaultFontIndex(), m_pVT->GetFontSize());
192   float fLineDescent =
193       m_pVT->GetFontDescent(m_pVT->GetDefaultFontIndex(), m_pVT->GetFontSize());
194   float x = 0.0f;
195   float y = m_pVT->GetLineLeading() + fLineAscent;
196   int32_t nStart = 0;
197   CLine* pLine = m_pSection->m_LineArray.front().get();
198   switch (m_pVT->GetAlignment()) {
199     case 0:
200       pLine->m_LineInfo.fLineX = fNodeWidth * VARIABLETEXT_HALF;
201       break;
202     case 1:
203       nStart = (m_pVT->GetCharArray() -
204                 pdfium::CollectionSize<int32_t>(m_pSection->m_WordArray)) /
205                2;
206       pLine->m_LineInfo.fLineX =
207           fNodeWidth * nStart - fNodeWidth * VARIABLETEXT_HALF;
208       break;
209     case 2:
210       nStart = m_pVT->GetCharArray() -
211                pdfium::CollectionSize<int32_t>(m_pSection->m_WordArray);
212       pLine->m_LineInfo.fLineX =
213           fNodeWidth * nStart - fNodeWidth * VARIABLETEXT_HALF;
214       break;
215   }
216   for (int32_t w = 0,
217                sz = pdfium::CollectionSize<int32_t>(m_pSection->m_WordArray);
218        w < sz; w++) {
219     if (w >= m_pVT->GetCharArray())
220       break;
221 
222     float fNextWidth = 0;
223     if (pdfium::IndexInBounds(m_pSection->m_WordArray, w + 1)) {
224       CPVT_WordInfo* pNextWord = m_pSection->m_WordArray[w + 1].get();
225       pNextWord->fWordTail = 0;
226       fNextWidth = m_pVT->GetWordWidth(*pNextWord);
227     }
228     CPVT_WordInfo* pWord = m_pSection->m_WordArray[w].get();
229     pWord->fWordTail = 0;
230     float fWordWidth = m_pVT->GetWordWidth(*pWord);
231     float fWordAscent = m_pVT->GetWordAscent(*pWord);
232     float fWordDescent = m_pVT->GetWordDescent(*pWord);
233     x = (float)(fNodeWidth * (w + nStart + 0.5) -
234                 fWordWidth * VARIABLETEXT_HALF);
235     pWord->fWordX = x;
236     pWord->fWordY = y;
237     if (w == 0) {
238       pLine->m_LineInfo.fLineX = x;
239     }
240     if (w != pdfium::CollectionSize<int32_t>(m_pSection->m_WordArray) - 1) {
241       pWord->fWordTail =
242           (fNodeWidth - (fWordWidth + fNextWidth) * VARIABLETEXT_HALF > 0
243                ? fNodeWidth - (fWordWidth + fNextWidth) * VARIABLETEXT_HALF
244                : 0);
245     } else {
246       pWord->fWordTail = 0;
247     }
248     x += fWordWidth;
249     fLineAscent = std::max(fLineAscent, fWordAscent);
250     fLineDescent = std::min(fLineDescent, fWordDescent);
251   }
252   pLine->m_LineInfo.nBeginWordIndex = 0;
253   pLine->m_LineInfo.nEndWordIndex =
254       pdfium::CollectionSize<int32_t>(m_pSection->m_WordArray) - 1;
255   pLine->m_LineInfo.fLineY = y;
256   pLine->m_LineInfo.fLineWidth = x - pLine->m_LineInfo.fLineX;
257   pLine->m_LineInfo.fLineAscent = fLineAscent;
258   pLine->m_LineInfo.fLineDescent = fLineDescent;
259   m_rcRet = CPVT_FloatRect(0, 0, x, y - fLineDescent);
260   return m_rcRet;
261 }
262 
GetEditSize(float fFontSize)263 CFX_SizeF CTypeset::GetEditSize(float fFontSize) {
264   ASSERT(m_pSection);
265   ASSERT(m_pVT);
266   SplitLines(false, fFontSize);
267   return CFX_SizeF(m_rcRet.Width(), m_rcRet.Height());
268 }
269 
Typeset()270 CPVT_FloatRect CTypeset::Typeset() {
271   ASSERT(m_pVT);
272   m_pSection->m_LineArray.clear();
273   SplitLines(true, 0.0f);
274   OutputLines();
275   return m_rcRet;
276 }
277 
SplitLines(bool bTypeset,float fFontSize)278 void CTypeset::SplitLines(bool bTypeset, float fFontSize) {
279   ASSERT(m_pVT);
280   ASSERT(m_pSection);
281   int32_t nLineHead = 0;
282   int32_t nLineTail = 0;
283   float fMaxX = 0.0f, fMaxY = 0.0f;
284   float fLineWidth = 0.0f, fBackupLineWidth = 0.0f;
285   float fLineAscent = 0.0f, fBackupLineAscent = 0.0f;
286   float fLineDescent = 0.0f, fBackupLineDescent = 0.0f;
287   int32_t nWordStartPos = 0;
288   bool bFullWord = false;
289   int32_t nLineFullWordIndex = 0;
290   int32_t nCharIndex = 0;
291   CPVT_LineInfo line;
292   float fWordWidth = 0;
293   float fTypesetWidth =
294       std::max(m_pVT->GetPlateWidth() - m_pVT->GetLineIndent(), 0.0f);
295   int32_t nTotalWords =
296       pdfium::CollectionSize<int32_t>(m_pSection->m_WordArray);
297   bool bOpened = false;
298   if (nTotalWords > 0) {
299     int32_t i = 0;
300     while (i < nTotalWords) {
301       CPVT_WordInfo* pWord = m_pSection->m_WordArray[i].get();
302       CPVT_WordInfo* pOldWord = pWord;
303       if (i > 0) {
304         pOldWord = m_pSection->m_WordArray[i - 1].get();
305       }
306       if (pWord) {
307         if (bTypeset) {
308           fLineAscent = std::max(fLineAscent, m_pVT->GetWordAscent(*pWord));
309           fLineDescent = std::min(fLineDescent, m_pVT->GetWordDescent(*pWord));
310           fWordWidth = m_pVT->GetWordWidth(*pWord);
311         } else {
312           fLineAscent =
313               std::max(fLineAscent, m_pVT->GetWordAscent(*pWord, fFontSize));
314           fLineDescent =
315               std::min(fLineDescent, m_pVT->GetWordDescent(*pWord, fFontSize));
316           fWordWidth = m_pVT->GetWordWidth(
317               pWord->nFontIndex, pWord->Word, m_pVT->GetSubWord(),
318               m_pVT->GetCharSpace(), m_pVT->GetHorzScale(), fFontSize,
319               pWord->fWordTail);
320         }
321         if (!bOpened) {
322           if (IsOpenStylePunctuation(pWord->Word)) {
323             bOpened = true;
324             bFullWord = true;
325           } else if (pOldWord) {
326             if (NeedDivision(pOldWord->Word, pWord->Word)) {
327               bFullWord = true;
328             }
329           }
330         } else {
331           if (!IsSpace(pWord->Word) && !IsOpenStylePunctuation(pWord->Word)) {
332             bOpened = false;
333           }
334         }
335         if (bFullWord) {
336           bFullWord = false;
337           if (nCharIndex > 0) {
338             nLineFullWordIndex++;
339           }
340           nWordStartPos = i;
341           fBackupLineWidth = fLineWidth;
342           fBackupLineAscent = fLineAscent;
343           fBackupLineDescent = fLineDescent;
344         }
345         nCharIndex++;
346       }
347       if (m_pVT->IsAutoReturn() && fTypesetWidth > 0 &&
348           fLineWidth + fWordWidth > fTypesetWidth) {
349         if (nLineFullWordIndex > 0) {
350           i = nWordStartPos;
351           fLineWidth = fBackupLineWidth;
352           fLineAscent = fBackupLineAscent;
353           fLineDescent = fBackupLineDescent;
354         }
355         if (nCharIndex == 1) {
356           fLineWidth = fWordWidth;
357           i++;
358         }
359         nLineTail = i - 1;
360         if (bTypeset) {
361           line.nBeginWordIndex = nLineHead;
362           line.nEndWordIndex = nLineTail;
363           line.nTotalWord = nLineTail - nLineHead + 1;
364           line.fLineWidth = fLineWidth;
365           line.fLineAscent = fLineAscent;
366           line.fLineDescent = fLineDescent;
367           m_pSection->AddLine(line);
368         }
369         fMaxY += (fLineAscent + m_pVT->GetLineLeading());
370         fMaxY -= fLineDescent;
371         fMaxX = std::max(fLineWidth, fMaxX);
372         nLineHead = i;
373         fLineWidth = 0.0f;
374         fLineAscent = 0.0f;
375         fLineDescent = 0.0f;
376         nCharIndex = 0;
377         nLineFullWordIndex = 0;
378         bFullWord = false;
379       } else {
380         fLineWidth += fWordWidth;
381         i++;
382       }
383     }
384     if (nLineHead <= nTotalWords - 1) {
385       nLineTail = nTotalWords - 1;
386       if (bTypeset) {
387         line.nBeginWordIndex = nLineHead;
388         line.nEndWordIndex = nLineTail;
389         line.nTotalWord = nLineTail - nLineHead + 1;
390         line.fLineWidth = fLineWidth;
391         line.fLineAscent = fLineAscent;
392         line.fLineDescent = fLineDescent;
393         m_pSection->AddLine(line);
394       }
395       fMaxY += (fLineAscent + m_pVT->GetLineLeading());
396       fMaxY -= fLineDescent;
397       fMaxX = std::max(fLineWidth, fMaxX);
398     }
399   } else {
400     if (bTypeset) {
401       fLineAscent = m_pVT->GetLineAscent();
402       fLineDescent = m_pVT->GetLineDescent();
403     } else {
404       fLineAscent =
405           m_pVT->GetFontAscent(m_pVT->GetDefaultFontIndex(), fFontSize);
406       fLineDescent =
407           m_pVT->GetFontDescent(m_pVT->GetDefaultFontIndex(), fFontSize);
408     }
409     if (bTypeset) {
410       line.nBeginWordIndex = -1;
411       line.nEndWordIndex = -1;
412       line.nTotalWord = 0;
413       line.fLineWidth = 0;
414       line.fLineAscent = fLineAscent;
415       line.fLineDescent = fLineDescent;
416       m_pSection->AddLine(line);
417     }
418     fMaxY += m_pVT->GetLineLeading() + fLineAscent - fLineDescent;
419   }
420   m_rcRet = CPVT_FloatRect(0, 0, fMaxX, fMaxY);
421 }
422 
OutputLines()423 void CTypeset::OutputLines() {
424   ASSERT(m_pVT);
425   ASSERT(m_pSection);
426   float fMinX = 0.0f, fMinY = 0.0f, fMaxX = 0.0f, fMaxY = 0.0f;
427   float fPosX = 0.0f, fPosY = 0.0f;
428   float fLineIndent = m_pVT->GetLineIndent();
429   float fTypesetWidth = std::max(m_pVT->GetPlateWidth() - fLineIndent, 0.0f);
430   switch (m_pVT->GetAlignment()) {
431     default:
432     case 0:
433       fMinX = 0.0f;
434       break;
435     case 1:
436       fMinX = (fTypesetWidth - m_rcRet.Width()) * VARIABLETEXT_HALF;
437       break;
438     case 2:
439       fMinX = fTypesetWidth - m_rcRet.Width();
440       break;
441   }
442   fMaxX = fMinX + m_rcRet.Width();
443   fMinY = 0.0f;
444   fMaxY = m_rcRet.Height();
445   int32_t nTotalLines =
446       pdfium::CollectionSize<int32_t>(m_pSection->m_LineArray);
447   if (nTotalLines > 0) {
448     for (int32_t l = 0; l < nTotalLines; l++) {
449       CLine* pLine = m_pSection->m_LineArray[l].get();
450       switch (m_pVT->GetAlignment()) {
451         default:
452         case 0:
453           fPosX = 0;
454           break;
455         case 1:
456           fPosX = (fTypesetWidth - pLine->m_LineInfo.fLineWidth) *
457                   VARIABLETEXT_HALF;
458           break;
459         case 2:
460           fPosX = fTypesetWidth - pLine->m_LineInfo.fLineWidth;
461           break;
462       }
463       fPosX += fLineIndent;
464       fPosY += m_pVT->GetLineLeading();
465       fPosY += pLine->m_LineInfo.fLineAscent;
466       pLine->m_LineInfo.fLineX = fPosX - fMinX;
467       pLine->m_LineInfo.fLineY = fPosY - fMinY;
468       for (int32_t w = pLine->m_LineInfo.nBeginWordIndex;
469            w <= pLine->m_LineInfo.nEndWordIndex; w++) {
470         if (pdfium::IndexInBounds(m_pSection->m_WordArray, w)) {
471           CPVT_WordInfo* pWord = m_pSection->m_WordArray[w].get();
472           pWord->fWordX = fPosX - fMinX;
473           pWord->fWordY = fPosY - fMinY;
474 
475           fPosX += m_pVT->GetWordWidth(*pWord);
476         }
477       }
478       fPosY -= pLine->m_LineInfo.fLineDescent;
479     }
480   }
481   m_rcRet = CPVT_FloatRect(fMinX, fMinY, fMaxX, fMaxY);
482 }
483