1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 /**
18  * A module for breaking paragraphs into lines, supporting high quality
19  * hyphenation and justification.
20  */
21 
22 #ifndef MINIKIN_LINE_BREAKER_H
23 #define MINIKIN_LINE_BREAKER_H
24 
25 #include "unicode/brkiter.h"
26 #include "unicode/locid.h"
27 #include <cmath>
28 #include <vector>
29 #include "minikin/Hyphenator.h"
30 
31 namespace android {
32 
33 enum BreakStrategy {
34     kBreakStrategy_Greedy = 0,
35     kBreakStrategy_HighQuality = 1,
36     kBreakStrategy_Balanced = 2
37 };
38 
39 enum HyphenationFrequency {
40     kHyphenationFrequency_None = 0,
41     kHyphenationFrequency_Normal = 1,
42     kHyphenationFrequency_Full = 2
43 };
44 
45 // TODO: want to generalize to be able to handle array of line widths
46 class LineWidths {
47     public:
setWidths(float firstWidth,int firstWidthLineCount,float restWidth)48         void setWidths(float firstWidth, int firstWidthLineCount, float restWidth) {
49             mFirstWidth = firstWidth;
50             mFirstWidthLineCount = firstWidthLineCount;
51             mRestWidth = restWidth;
52         }
setIndents(const std::vector<float> & indents)53         void setIndents(const std::vector<float>& indents) {
54             mIndents = indents;
55         }
isConstant()56         bool isConstant() const {
57             // technically mFirstWidthLineCount == 0 would count too, but doesn't actually happen
58             return mRestWidth == mFirstWidth && mIndents.empty();
59         }
getLineWidth(int line)60         float getLineWidth(int line) const {
61             float width = (line < mFirstWidthLineCount) ? mFirstWidth : mRestWidth;
62             if (!mIndents.empty()) {
63                 if ((size_t)line < mIndents.size()) {
64                     width -= mIndents[line];
65                 } else {
66                     width -= mIndents.back();
67                 }
68             }
69             return width;
70         }
71     private:
72         float mFirstWidth;
73         int mFirstWidthLineCount;
74         float mRestWidth;
75         std::vector<float> mIndents;
76 };
77 
78 class TabStops {
79     public:
set(const int * stops,size_t nStops,int tabWidth)80         void set(const int* stops, size_t nStops, int tabWidth) {
81             if (stops != nullptr) {
82                 mStops.assign(stops, stops + nStops);
83             } else {
84                 mStops.clear();
85             }
86             mTabWidth = tabWidth;
87         }
nextTab(float widthSoFar)88         float nextTab(float widthSoFar) const {
89             for (size_t i = 0; i < mStops.size(); i++) {
90                 if (mStops[i] > widthSoFar) {
91                     return mStops[i];
92                 }
93             }
94             return floor(widthSoFar / mTabWidth + 1) * mTabWidth;
95         }
96     private:
97         std::vector<int> mStops;
98         int mTabWidth;
99 };
100 
101 class LineBreaker {
102     public:
103         const static int kTab_Shift = 29;  // keep synchronized with TAB_MASK in StaticLayout.java
104 
~LineBreaker()105         ~LineBreaker() {
106             utext_close(&mUText);
107             delete mBreakIterator;
108         }
109 
110         // Note: Locale persists across multiple invocations (it is not cleaned up by finish()),
111         // explicitly to avoid the cost of creating ICU BreakIterator objects. It should always
112         // be set on the first invocation, but callers are encouraged not to call again unless
113         // locale has actually changed.
114         // That logic could be here but it's better for performance that it's upstream because of
115         // the cost of constructing and comparing the ICU Locale object.
116         // Note: caller is responsible for managing lifetime of hyphenator
117         void setLocale(const icu::Locale& locale, Hyphenator* hyphenator);
118 
resize(size_t size)119         void resize(size_t size) {
120             mTextBuf.resize(size);
121             mCharWidths.resize(size);
122         }
123 
size()124         size_t size() const {
125             return mTextBuf.size();
126         }
127 
buffer()128         uint16_t* buffer() {
129             return mTextBuf.data();
130         }
131 
charWidths()132         float* charWidths() {
133             return mCharWidths.data();
134         }
135 
136         // set text to current contents of buffer
137         void setText();
138 
139         void setLineWidths(float firstWidth, int firstWidthLineCount, float restWidth);
140 
141         void setIndents(const std::vector<float>& indents);
142 
setTabStops(const int * stops,size_t nStops,int tabWidth)143         void setTabStops(const int* stops, size_t nStops, int tabWidth) {
144             mTabStops.set(stops, nStops, tabWidth);
145         }
146 
getStrategy()147         BreakStrategy getStrategy() const { return mStrategy; }
148 
setStrategy(BreakStrategy strategy)149         void setStrategy(BreakStrategy strategy) { mStrategy = strategy; }
150 
getHyphenationFrequency()151         HyphenationFrequency getHyphenationFrequency() const { return mHyphenationFrequency; }
152 
setHyphenationFrequency(HyphenationFrequency frequency)153         void setHyphenationFrequency(HyphenationFrequency frequency) {
154             mHyphenationFrequency = frequency;
155         }
156 
157         // TODO: this class is actually fairly close to being general and not tied to using
158         // Minikin to do the shaping of the strings. The main thing that would need to be changed
159         // is having some kind of callback (or virtual class, or maybe even template), which could
160         // easily be instantiated with Minikin's Layout. Future work for when needed.
161         float addStyleRun(MinikinPaint* paint, const FontCollection* typeface, FontStyle style,
162                 size_t start, size_t end, bool isRtl);
163 
164         void addReplacement(size_t start, size_t end, float width);
165 
166         size_t computeBreaks();
167 
getBreaks()168         const int* getBreaks() const {
169             return mBreaks.data();
170         }
171 
getWidths()172         const float* getWidths() const {
173             return mWidths.data();
174         }
175 
getFlags()176         const int* getFlags() const {
177             return mFlags.data();
178         }
179 
180         void finish();
181 
182     private:
183         // ParaWidth is used to hold cumulative width from beginning of paragraph. Note that for
184         // very large paragraphs, accuracy could degrade using only 32-bit float. Note however
185         // that float is used extensively on the Java side for this. This is a typedef so that
186         // we can easily change it based on performance/accuracy tradeoff.
187         typedef double ParaWidth;
188 
189         // A single candidate break
190         struct Candidate {
191             size_t offset;  // offset to text buffer, in code units
192             size_t prev;  // index to previous break
193             ParaWidth preBreak;
194             ParaWidth postBreak;
195             float penalty;  // penalty of this break (for example, hyphen penalty)
196             float score;  // best score found for this break
197             size_t lineNumber;  // only updated for non-constant line widths
198             uint8_t hyphenEdit;
199         };
200 
201         float currentLineWidth() const;
202 
203         void addWordBreak(size_t offset, ParaWidth preBreak, ParaWidth postBreak, float penalty,
204                 uint8_t hyph);
205 
206         void addCandidate(Candidate cand);
207 
208         // push an actual break to the output. Takes care of setting flags for tab
209         void pushBreak(int offset, float width, uint8_t hyph);
210 
211         void computeBreaksGreedy();
212 
213         void computeBreaksOptimal(bool isRectangular);
214 
215         void finishBreaksOptimal();
216 
217         icu::BreakIterator* mBreakIterator = nullptr;
218         UText mUText = UTEXT_INITIALIZER;
219         std::vector<uint16_t>mTextBuf;
220         std::vector<float>mCharWidths;
221 
222         Hyphenator* mHyphenator;
223         std::vector<uint8_t> mHyphBuf;
224 
225         // layout parameters
226         BreakStrategy mStrategy = kBreakStrategy_Greedy;
227         HyphenationFrequency mHyphenationFrequency = kHyphenationFrequency_Normal;
228         LineWidths mLineWidths;
229         TabStops mTabStops;
230 
231         // result of line breaking
232         std::vector<int> mBreaks;
233         std::vector<float> mWidths;
234         std::vector<int> mFlags;
235 
236         ParaWidth mWidth = 0;
237         std::vector<Candidate> mCandidates;
238         float mLinePenalty = 0.0f;
239 
240         // the following are state for greedy breaker (updated while adding style runs)
241         size_t mLastBreak;
242         size_t mBestBreak;
243         float mBestScore;
244         ParaWidth mPreBreak;  // prebreak of last break
245         int mFirstTabIndex;
246 };
247 
248 }  // namespace android
249 
250 #endif  // MINIKIN_LINE_BREAKER_H
251