1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "xfa/src/foxitlib.h"
8 #include "fx_wordbreak_impl.h"
9 #define FX_IsOdd(a) ((a)&1)
FX_GetWordBreakProperty(FX_WCHAR wcCodePoint)10 FX_WordBreakProp FX_GetWordBreakProperty(FX_WCHAR wcCodePoint) {
11   FX_DWORD dwProperty =
12       (FX_DWORD)gs_FX_WordBreak_CodePointProperties[wcCodePoint >> 1];
13   return (FX_WordBreakProp)(FX_IsOdd(wcCodePoint) ? (dwProperty & 0x0F)
14                                                   : (dwProperty >> 4));
15 }
CFX_CharIter(const CFX_WideString & wsText)16 CFX_CharIter::CFX_CharIter(const CFX_WideString& wsText)
17     : m_wsText(wsText), m_nIndex(0) {
18   FXSYS_assert(!wsText.IsEmpty());
19 }
~CFX_CharIter()20 CFX_CharIter::~CFX_CharIter() {}
Release()21 void CFX_CharIter::Release() {
22   delete this;
23 }
Next(FX_BOOL bPrev)24 FX_BOOL CFX_CharIter::Next(FX_BOOL bPrev) {
25   if (bPrev) {
26     if (m_nIndex <= 0) {
27       return FALSE;
28     }
29     m_nIndex--;
30   } else {
31     if (m_nIndex + 1 >= m_wsText.GetLength()) {
32       return FALSE;
33     }
34     m_nIndex++;
35   }
36   return TRUE;
37 }
GetChar()38 FX_WCHAR CFX_CharIter::GetChar() {
39   return m_wsText.GetAt(m_nIndex);
40 }
SetAt(int32_t nIndex)41 void CFX_CharIter::SetAt(int32_t nIndex) {
42   if (nIndex < 0 || nIndex >= m_wsText.GetLength()) {
43     return;
44   }
45   m_nIndex = nIndex;
46 }
GetAt() const47 int32_t CFX_CharIter::GetAt() const {
48   return m_nIndex;
49 }
IsEOF(FX_BOOL bTail) const50 FX_BOOL CFX_CharIter::IsEOF(FX_BOOL bTail) const {
51   return bTail ? (m_nIndex + 1 == m_wsText.GetLength()) : (m_nIndex == 0);
52 }
Clone()53 IFX_CharIter* CFX_CharIter::Clone() {
54   CFX_CharIter* pIter = new CFX_CharIter(m_wsText);
55   pIter->m_nIndex = m_nIndex;
56   return pIter;
57 }
CFX_WordBreak()58 CFX_WordBreak::CFX_WordBreak() : m_pPreIter(NULL), m_pCurIter(NULL) {}
~CFX_WordBreak()59 CFX_WordBreak::~CFX_WordBreak() {
60   if (m_pPreIter) {
61     m_pPreIter->Release();
62     m_pPreIter = NULL;
63   }
64   if (m_pCurIter) {
65     m_pCurIter->Release();
66     m_pCurIter = NULL;
67   }
68 }
Release()69 void CFX_WordBreak::Release() {
70   delete this;
71 }
Attach(IFX_CharIter * pIter)72 void CFX_WordBreak::Attach(IFX_CharIter* pIter) {
73   FXSYS_assert(pIter);
74   m_pCurIter = pIter;
75 }
Attach(const CFX_WideString & wsText)76 void CFX_WordBreak::Attach(const CFX_WideString& wsText) {
77   m_pCurIter = new CFX_CharIter(wsText);
78 }
Next(FX_BOOL bPrev)79 FX_BOOL CFX_WordBreak::Next(FX_BOOL bPrev) {
80   IFX_CharIter* pIter = bPrev ? m_pPreIter->Clone() : m_pCurIter->Clone();
81   if (pIter->IsEOF(!bPrev)) {
82     return FALSE;
83   }
84   pIter->Next(bPrev);
85   if (!FindNextBreakPos(pIter, bPrev, TRUE)) {
86     pIter->Release();
87     return FALSE;
88   }
89   if (bPrev) {
90     m_pCurIter->Release();
91     m_pCurIter = m_pPreIter;
92     m_pCurIter->Next(TRUE);
93     m_pPreIter = pIter;
94   } else {
95     m_pPreIter->Release();
96     m_pPreIter = m_pCurIter;
97     m_pPreIter->Next();
98     m_pCurIter = pIter;
99   }
100   return TRUE;
101 }
SetAt(int32_t nIndex)102 void CFX_WordBreak::SetAt(int32_t nIndex) {
103   if (m_pPreIter) {
104     m_pPreIter->Release();
105     m_pPreIter = NULL;
106   }
107   m_pCurIter->SetAt(nIndex);
108   FindNextBreakPos(m_pCurIter, TRUE, FALSE);
109   m_pPreIter = m_pCurIter;
110   m_pCurIter = m_pPreIter->Clone();
111   FindNextBreakPos(m_pCurIter, FALSE, FALSE);
112 }
GetWordPos() const113 int32_t CFX_WordBreak::GetWordPos() const {
114   return m_pPreIter->GetAt();
115 }
GetWordLength() const116 int32_t CFX_WordBreak::GetWordLength() const {
117   return m_pCurIter->GetAt() - m_pPreIter->GetAt() + 1;
118 }
GetWord(CFX_WideString & wsWord) const119 void CFX_WordBreak::GetWord(CFX_WideString& wsWord) const {
120   int32_t nWordLength = GetWordLength();
121   if (nWordLength <= 0) {
122     return;
123   }
124   FX_WCHAR* lpBuf = wsWord.GetBuffer(nWordLength);
125   IFX_CharIter* pTempIter = m_pPreIter->Clone();
126   int32_t i = 0;
127   while (pTempIter->GetAt() <= m_pCurIter->GetAt()) {
128     lpBuf[i++] = pTempIter->GetChar();
129     FX_BOOL bEnd = pTempIter->Next();
130     if (!bEnd) {
131       break;
132     }
133   }
134   pTempIter->Release();
135   wsWord.ReleaseBuffer(nWordLength);
136 }
IsEOF(FX_BOOL bTail) const137 FX_BOOL CFX_WordBreak::IsEOF(FX_BOOL bTail) const {
138   return m_pCurIter->IsEOF(bTail);
139 }
FindNextBreakPos(IFX_CharIter * pIter,FX_BOOL bPrev,FX_BOOL bFromNext)140 FX_BOOL CFX_WordBreak::FindNextBreakPos(IFX_CharIter* pIter,
141                                         FX_BOOL bPrev,
142                                         FX_BOOL bFromNext) {
143   FX_WordBreakProp ePreType = FX_WordBreakProp_None;
144   FX_WordBreakProp eCurType = FX_WordBreakProp_None;
145   FX_WordBreakProp eNextType = FX_WordBreakProp_None;
146   if (pIter->IsEOF(!bPrev)) {
147     return TRUE;
148   }
149   if (!(bFromNext || pIter->IsEOF(bPrev))) {
150     pIter->Next(!bPrev);
151     FX_WCHAR wcTemp = pIter->GetChar();
152     ePreType = FX_GetWordBreakProperty(wcTemp);
153     pIter->Next(bPrev);
154   }
155   FX_WCHAR wcTemp = pIter->GetChar();
156   eCurType = FX_GetWordBreakProperty(wcTemp);
157   FX_BOOL bFirst = TRUE;
158   do {
159     pIter->Next(bPrev);
160     FX_WCHAR wcTemp = pIter->GetChar();
161     eNextType = FX_GetWordBreakProperty(wcTemp);
162     FX_WORD wBreak =
163         gs_FX_WordBreak_Table[eCurType] & ((FX_WORD)(1 << eNextType));
164     if (wBreak) {
165       if (pIter->IsEOF(!bPrev)) {
166         pIter->Next(!bPrev);
167         return TRUE;
168       }
169       if (bFirst) {
170         int32_t nFlags = 0;
171         if (eCurType == FX_WordBreakProp_MidLetter) {
172           if (eNextType == FX_WordBreakProp_ALetter) {
173             nFlags = 1;
174           }
175         } else if (eCurType == FX_WordBreakProp_MidNum) {
176           if (eNextType == FX_WordBreakProp_Numberic) {
177             nFlags = 2;
178           }
179         } else if (eCurType == FX_WordBreakProp_MidNumLet) {
180           if (eNextType == FX_WordBreakProp_ALetter) {
181             nFlags = 1;
182           } else if (eNextType == FX_WordBreakProp_Numberic) {
183             nFlags = 2;
184           }
185         }
186         if (nFlags > 0) {
187           FXSYS_assert(nFlags <= 2);
188           if (!((nFlags == 1 && ePreType == FX_WordBreakProp_ALetter) ||
189                 (nFlags == 2 && ePreType == FX_WordBreakProp_Numberic))) {
190             pIter->Next(!bPrev);
191             return TRUE;
192           }
193           pIter->Next(bPrev);
194           wBreak = FALSE;
195         }
196         bFirst = FALSE;
197       }
198       if (wBreak) {
199         int32_t nFlags = 0;
200         if (eNextType == FX_WordBreakProp_MidLetter) {
201           if (eCurType == FX_WordBreakProp_ALetter) {
202             nFlags = 1;
203           }
204         } else if (eNextType == FX_WordBreakProp_MidNum) {
205           if (eCurType == FX_WordBreakProp_Numberic) {
206             nFlags = 2;
207           }
208         } else if (eNextType == FX_WordBreakProp_MidNumLet) {
209           if (eCurType == FX_WordBreakProp_ALetter) {
210             nFlags = 1;
211           } else if (eCurType == FX_WordBreakProp_Numberic) {
212             nFlags = 2;
213           }
214         }
215         if (nFlags <= 0) {
216           pIter->Next(!bPrev);
217           return TRUE;
218         }
219         FXSYS_assert(nFlags <= 2);
220         pIter->Next(bPrev);
221         wcTemp = pIter->GetChar();
222         eNextType = (FX_WordBreakProp)FX_GetWordBreakProperty(wcTemp);
223         if (!((nFlags == 1 && eNextType == FX_WordBreakProp_ALetter) ||
224               (nFlags == 2 && eNextType == FX_WordBreakProp_Numberic))) {
225           pIter->Next(!bPrev);
226           pIter->Next(!bPrev);
227           return TRUE;
228         }
229       }
230     }
231     ePreType = eCurType;
232     eCurType = eNextType;
233     bFirst = FALSE;
234   } while (!pIter->IsEOF(!bPrev));
235   return TRUE;
236 }
FX_WordBreak_Create()237 IFX_WordBreak* FX_WordBreak_Create() {
238   return new CFX_WordBreak;
239 }
240