1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 /**
18  * A module for breaking paragraphs into lines, supporting high quality
19  * hyphenation and justification.
20  */
21 
22 #ifndef MINIKIN_LINE_BREAKER_H
23 #define MINIKIN_LINE_BREAKER_H
24 
25 #include "unicode/brkiter.h"
26 #include "unicode/locid.h"
27 #include <cmath>
28 #include <vector>
29 #include "minikin/Hyphenator.h"
30 #include "minikin/WordBreaker.h"
31 
32 namespace minikin {
33 
34 enum BreakStrategy {
35     kBreakStrategy_Greedy = 0,
36     kBreakStrategy_HighQuality = 1,
37     kBreakStrategy_Balanced = 2
38 };
39 
40 enum HyphenationFrequency {
41     kHyphenationFrequency_None = 0,
42     kHyphenationFrequency_Normal = 1,
43     kHyphenationFrequency_Full = 2
44 };
45 
46 // TODO: want to generalize to be able to handle array of line widths
47 class LineWidths {
48     public:
setWidths(float firstWidth,int firstWidthLineCount,float restWidth)49         void setWidths(float firstWidth, int firstWidthLineCount, float restWidth) {
50             mFirstWidth = firstWidth;
51             mFirstWidthLineCount = firstWidthLineCount;
52             mRestWidth = restWidth;
53         }
setIndents(const std::vector<float> & indents)54         void setIndents(const std::vector<float>& indents) {
55             mIndents = indents;
56         }
isConstant()57         bool isConstant() const {
58             // technically mFirstWidthLineCount == 0 would count too, but doesn't actually happen
59             return mRestWidth == mFirstWidth && mIndents.empty();
60         }
getLineWidth(int line)61         float getLineWidth(int line) const {
62             float width = (line < mFirstWidthLineCount) ? mFirstWidth : mRestWidth;
63             if (!mIndents.empty()) {
64                 if ((size_t)line < mIndents.size()) {
65                     width -= mIndents[line];
66                 } else {
67                     width -= mIndents.back();
68                 }
69             }
70             return width;
71         }
clear()72         void clear() {
73             mIndents.clear();
74         }
75     private:
76         float mFirstWidth;
77         int mFirstWidthLineCount;
78         float mRestWidth;
79         std::vector<float> mIndents;
80 };
81 
82 class TabStops {
83     public:
set(const int * stops,size_t nStops,int tabWidth)84         void set(const int* stops, size_t nStops, int tabWidth) {
85             if (stops != nullptr) {
86                 mStops.assign(stops, stops + nStops);
87             } else {
88                 mStops.clear();
89             }
90             mTabWidth = tabWidth;
91         }
nextTab(float widthSoFar)92         float nextTab(float widthSoFar) const {
93             for (size_t i = 0; i < mStops.size(); i++) {
94                 if (mStops[i] > widthSoFar) {
95                     return mStops[i];
96                 }
97             }
98             return floor(widthSoFar / mTabWidth + 1) * mTabWidth;
99         }
100     private:
101         std::vector<int> mStops;
102         int mTabWidth;
103 };
104 
105 class LineBreaker {
106     public:
107         const static int kTab_Shift = 29;  // keep synchronized with TAB_MASK in StaticLayout.java
108 
109         // Note: Locale persists across multiple invocations (it is not cleaned up by finish()),
110         // explicitly to avoid the cost of creating ICU BreakIterator objects. It should always
111         // be set on the first invocation, but callers are encouraged not to call again unless
112         // locale has actually changed.
113         // That logic could be here but it's better for performance that it's upstream because of
114         // the cost of constructing and comparing the ICU Locale object.
115         // Note: caller is responsible for managing lifetime of hyphenator
116         void setLocale(const icu::Locale& locale, Hyphenator* hyphenator);
117 
resize(size_t size)118         void resize(size_t size) {
119             mTextBuf.resize(size);
120             mCharWidths.resize(size);
121         }
122 
size()123         size_t size() const {
124             return mTextBuf.size();
125         }
126 
buffer()127         uint16_t* buffer() {
128             return mTextBuf.data();
129         }
130 
charWidths()131         float* charWidths() {
132             return mCharWidths.data();
133         }
134 
135         // set text to current contents of buffer
136         void setText();
137 
138         void setLineWidths(float firstWidth, int firstWidthLineCount, float restWidth);
139 
140         void setIndents(const std::vector<float>& indents);
141 
setTabStops(const int * stops,size_t nStops,int tabWidth)142         void setTabStops(const int* stops, size_t nStops, int tabWidth) {
143             mTabStops.set(stops, nStops, tabWidth);
144         }
145 
getStrategy()146         BreakStrategy getStrategy() const { return mStrategy; }
147 
setStrategy(BreakStrategy strategy)148         void setStrategy(BreakStrategy strategy) { mStrategy = strategy; }
149 
setJustified(bool justified)150         void setJustified(bool justified) { mJustified = justified; }
151 
getHyphenationFrequency()152         HyphenationFrequency getHyphenationFrequency() const { return mHyphenationFrequency; }
153 
setHyphenationFrequency(HyphenationFrequency frequency)154         void setHyphenationFrequency(HyphenationFrequency frequency) {
155             mHyphenationFrequency = frequency;
156         }
157 
158         // TODO: this class is actually fairly close to being general and not tied to using
159         // Minikin to do the shaping of the strings. The main thing that would need to be changed
160         // is having some kind of callback (or virtual class, or maybe even template), which could
161         // easily be instantiated with Minikin's Layout. Future work for when needed.
162         float addStyleRun(MinikinPaint* paint, const std::shared_ptr<FontCollection>& typeface,
163                 FontStyle style, size_t start, size_t end, bool isRtl);
164 
165         void addReplacement(size_t start, size_t end, float width);
166 
167         size_t computeBreaks();
168 
getBreaks()169         const int* getBreaks() const {
170             return mBreaks.data();
171         }
172 
getWidths()173         const float* getWidths() const {
174             return mWidths.data();
175         }
176 
getFlags()177         const int* getFlags() const {
178             return mFlags.data();
179         }
180 
181         void finish();
182 
183     private:
184         // ParaWidth is used to hold cumulative width from beginning of paragraph. Note that for
185         // very large paragraphs, accuracy could degrade using only 32-bit float. Note however
186         // that float is used extensively on the Java side for this. This is a typedef so that
187         // we can easily change it based on performance/accuracy tradeoff.
188         typedef double ParaWidth;
189 
190         // A single candidate break
191         struct Candidate {
192             size_t offset;  // offset to text buffer, in code units
193             size_t prev;  // index to previous break
194             ParaWidth preBreak;  // width of text until this point, if we decide to not break here
195             ParaWidth postBreak;  // width of text until this point, if we decide to break here
196             float penalty;  // penalty of this break (for example, hyphen penalty)
197             float score;  // best score found for this break
198             size_t lineNumber;  // only updated for non-constant line widths
199             size_t preSpaceCount;  // preceding space count before breaking
200             size_t postSpaceCount;  // preceding space count after breaking
201             HyphenationType hyphenType;
202         };
203 
204         float currentLineWidth() const;
205 
206         void addWordBreak(size_t offset, ParaWidth preBreak, ParaWidth postBreak,
207                 size_t preSpaceCount, size_t postSpaceCount, float penalty, HyphenationType hyph);
208 
209         void addCandidate(Candidate cand);
210         void pushGreedyBreak();
211 
212         // push an actual break to the output. Takes care of setting flags for tab
213         void pushBreak(int offset, float width, uint8_t hyphenEdit);
214 
215         float getSpaceWidth() const;
216 
217         void computeBreaksGreedy();
218 
219         void computeBreaksOptimal(bool isRectangular);
220 
221         void finishBreaksOptimal();
222 
223         WordBreaker mWordBreaker;
224         icu::Locale mLocale;
225         std::vector<uint16_t>mTextBuf;
226         std::vector<float>mCharWidths;
227 
228         Hyphenator* mHyphenator;
229         std::vector<HyphenationType> mHyphBuf;
230 
231         // layout parameters
232         BreakStrategy mStrategy = kBreakStrategy_Greedy;
233         HyphenationFrequency mHyphenationFrequency = kHyphenationFrequency_Normal;
234         bool mJustified;
235         LineWidths mLineWidths;
236         TabStops mTabStops;
237 
238         // result of line breaking
239         std::vector<int> mBreaks;
240         std::vector<float> mWidths;
241         std::vector<int> mFlags;
242 
243         ParaWidth mWidth = 0;
244         std::vector<Candidate> mCandidates;
245         float mLinePenalty = 0.0f;
246 
247         // the following are state for greedy breaker (updated while adding style runs)
248         size_t mLastBreak;
249         size_t mBestBreak;
250         float mBestScore;
251         ParaWidth mPreBreak;  // prebreak of last break
252         uint32_t mLastHyphenation;  // hyphen edit of last break kept for next line
253         int mFirstTabIndex;
254         size_t mSpaceCount;
255 };
256 
257 }  // namespace minikin
258 
259 #endif  // MINIKIN_LINE_BREAKER_H
260