1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define LOG_TAG "GreedyLineBreak"
18 
19 #include "minikin/Characters.h"
20 #include "minikin/LineBreaker.h"
21 #include "minikin/MeasuredText.h"
22 #include "minikin/Range.h"
23 #include "minikin/U16StringPiece.h"
24 
25 #include "HyphenatorMap.h"
26 #include "LineBreakerUtil.h"
27 #include "Locale.h"
28 #include "LocaleListCache.h"
29 #include "WordBreaker.h"
30 
31 namespace minikin {
32 
33 namespace {
34 
35 constexpr uint32_t NOWHERE = 0xFFFFFFFF;
36 
37 class GreedyLineBreaker {
38 public:
39     // User of this class must keep measured, lineWidthLimit, tabStop alive until the instance is
40     // destructed.
GreedyLineBreaker(const U16StringPiece & textBuf,const MeasuredText & measured,const LineWidth & lineWidthLimits,const TabStops & tabStops,bool enableHyphenation)41     GreedyLineBreaker(const U16StringPiece& textBuf, const MeasuredText& measured,
42                       const LineWidth& lineWidthLimits, const TabStops& tabStops,
43                       bool enableHyphenation)
44             : mLineWidthLimit(lineWidthLimits.getAt(0)),
45               mTextBuf(textBuf),
46               mMeasuredText(measured),
47               mLineWidthLimits(lineWidthLimits),
48               mTabStops(tabStops),
49               mEnableHyphenation(enableHyphenation) {}
50 
51     void process();
52 
53     LineBreakResult getResult() const;
54 
55 private:
56     struct BreakPoint {
BreakPointminikin::__anondb74f7790111::GreedyLineBreaker::BreakPoint57         BreakPoint(uint32_t offset, float lineWidth, StartHyphenEdit startHyphen,
58                    EndHyphenEdit endHyphen)
59                 : offset(offset),
60                   lineWidth(lineWidth),
61                   hyphenEdit(packHyphenEdit(startHyphen, endHyphen)) {}
62 
63         uint32_t offset;
64         float lineWidth;
65         HyphenEdit hyphenEdit;
66     };
67 
getPrevLineBreakOffset()68     inline uint32_t getPrevLineBreakOffset() {
69         return mBreakPoints.empty() ? 0 : mBreakPoints.back().offset;
70     }
71 
72     // Registers the break point and prepares for next line computation.
73     void breakLineAt(uint32_t offset, float lineWidth, float remainingNextLineWidth,
74                      float remainingNextSumOfCharWidths, EndHyphenEdit thisLineEndHyphen,
75                      StartHyphenEdit nextLineStartHyphen);
76 
77     // Update current line width.
78     void updateLineWidth(uint16_t c, float width);
79 
80     // Break line if current line exceeds the line limit.
81     void processLineBreak(uint32_t offset, WordBreaker* breaker);
82 
83     // Try to break with previous word boundary.
84     // Returns false if unable to break by word boundary.
85     bool tryLineBreakWithWordBreak();
86 
87     // Try to break with hyphenation.
88     // Returns false if unable to hyphenate.
89     //
90     // This method keeps hyphenation until the line width after line break meets the line width
91     // limit.
92     bool tryLineBreakWithHyphenation(const Range& range, WordBreaker* breaker);
93 
94     // Do line break with each characters.
95     //
96     // This method only breaks at the first offset which has the longest width for the line width
97     // limit. This method don't keep line breaking even if the rest of the word exceeds the line
98     // width limit.
99     void doLineBreakWithGraphemeBounds(const Range& range);
100 
101     // Info about the line currently processing.
102     uint32_t mLineNum = 0;
103     double mLineWidth = 0;
104     double mSumOfCharWidths = 0;
105     double mLineWidthLimit;
106     StartHyphenEdit mStartHyphenEdit = StartHyphenEdit::NO_EDIT;
107 
108     // Previous word break point info.
109     uint32_t mPrevWordBoundsOffset = NOWHERE;
110     double mLineWidthAtPrevWordBoundary = 0;
111     double mSumOfCharWidthsAtPrevWordBoundary = 0;
112     bool mIsPrevWordBreakIsInEmailOrUrl = false;
113 
114     // The hyphenator currently used.
115     const Hyphenator* mHyphenator = nullptr;
116 
117     // Input parameters.
118     const U16StringPiece& mTextBuf;
119     const MeasuredText& mMeasuredText;
120     const LineWidth& mLineWidthLimits;
121     const TabStops& mTabStops;
122     bool mEnableHyphenation;
123 
124     // The result of line breaking.
125     std::vector<BreakPoint> mBreakPoints;
126 
127     MINIKIN_PREVENT_COPY_ASSIGN_AND_MOVE(GreedyLineBreaker);
128 };
129 
breakLineAt(uint32_t offset,float lineWidth,float remainingNextLineWidth,float remainingNextSumOfCharWidths,EndHyphenEdit thisLineEndHyphen,StartHyphenEdit nextLineStartHyphen)130 void GreedyLineBreaker::breakLineAt(uint32_t offset, float lineWidth, float remainingNextLineWidth,
131                                     float remainingNextSumOfCharWidths,
132                                     EndHyphenEdit thisLineEndHyphen,
133                                     StartHyphenEdit nextLineStartHyphen) {
134     // First, push the break to result.
135     mBreakPoints.emplace_back(offset, lineWidth, mStartHyphenEdit, thisLineEndHyphen);
136 
137     // Update the current line info.
138     mLineWidthLimit = mLineWidthLimits.getAt(++mLineNum);
139     mLineWidth = remainingNextLineWidth;
140     mSumOfCharWidths = remainingNextSumOfCharWidths;
141     mStartHyphenEdit = nextLineStartHyphen;
142     mPrevWordBoundsOffset = NOWHERE;
143     mLineWidthAtPrevWordBoundary = 0;
144     mSumOfCharWidthsAtPrevWordBoundary = 0;
145     mIsPrevWordBreakIsInEmailOrUrl = false;
146 }
147 
tryLineBreakWithWordBreak()148 bool GreedyLineBreaker::tryLineBreakWithWordBreak() {
149     if (mPrevWordBoundsOffset == NOWHERE) {
150         return false;  // No word break point before..
151     }
152 
153     breakLineAt(mPrevWordBoundsOffset,                            // break offset
154                 mLineWidthAtPrevWordBoundary,                     // line width
155                 mLineWidth - mSumOfCharWidthsAtPrevWordBoundary,  // remaining next line width
156                 // remaining next sum of char widths.
157                 mSumOfCharWidths - mSumOfCharWidthsAtPrevWordBoundary, EndHyphenEdit::NO_EDIT,
158                 StartHyphenEdit::NO_EDIT);  // No hyphen modification.
159     return true;
160 }
161 
tryLineBreakWithHyphenation(const Range & range,WordBreaker * breaker)162 bool GreedyLineBreaker::tryLineBreakWithHyphenation(const Range& range, WordBreaker* breaker) {
163     if (!mEnableHyphenation || mHyphenator == nullptr) {
164         return false;
165     }
166 
167     Run* targetRun = nullptr;
168     for (const auto& run : mMeasuredText.runs) {
169         if (run->getRange().contains(range)) {
170             targetRun = run.get();
171         }
172     }
173 
174     if (targetRun == nullptr) {
175         return false;  // The target range may lay on multiple run. Unable to hyphenate.
176     }
177 
178     const Range targetRange = breaker->wordRange();
179     if (!range.contains(targetRange)) {
180         return false;
181     }
182 
183     const std::vector<HyphenationType> hyphenResult =
184             hyphenate(mTextBuf.substr(targetRange), *mHyphenator);
185     Range contextRange = range;
186     uint32_t prevOffset = NOWHERE;
187     float prevWidth = 0;
188 
189     // Look up the hyphenation point from the begining.
190     for (uint32_t i = targetRange.getStart(); i < targetRange.getEnd(); ++i) {
191         const HyphenationType hyph = hyphenResult[targetRange.toRangeOffset(i)];
192         if (hyph == HyphenationType::DONT_BREAK) {
193             continue;  // Not a hyphenation point.
194         }
195 
196         const float width = targetRun->measureHyphenPiece(mTextBuf, contextRange.split(i).first,
197                                                           mStartHyphenEdit, editForThisLine(hyph),
198                                                           nullptr /* advances */, nullptr);
199 
200         if (width <= mLineWidthLimit) {
201             // There are still space, remember current offset and look up next hyphenation point.
202             prevOffset = i;
203             prevWidth = width;
204             continue;
205         }
206 
207         if (prevOffset == NOWHERE) {
208             // Even with hyphenation, the piece is too long for line. Give up and break in
209             // character bounds.
210             doLineBreakWithGraphemeBounds(contextRange);
211         } else {
212             // Previous offset is the longest hyphenation piece. Break with it.
213             const HyphenationType hyph = hyphenResult[targetRange.toRangeOffset(prevOffset)];
214             const StartHyphenEdit nextLineStartHyphenEdit = editForNextLine(hyph);
215             const float remainingCharWidths = targetRun->measureHyphenPiece(
216                     mTextBuf, contextRange.split(prevOffset).second, nextLineStartHyphenEdit,
217                     EndHyphenEdit::NO_EDIT, nullptr /* advances */, nullptr);
218             breakLineAt(prevOffset, prevWidth,
219                         remainingCharWidths - (mSumOfCharWidths - mLineWidth), remainingCharWidths,
220                         editForThisLine(hyph), nextLineStartHyphenEdit);
221         }
222 
223         if (mLineWidth <= mLineWidthLimit) {
224             // The remaining hyphenation piece is less than line width. No more hyphenation is
225             // needed. Go to next word.
226             return true;
227         }
228 
229         // Even after line break, the remaining hyphenation piece is still too long for the limit.
230         // Keep hyphenating for the rest.
231         i = getPrevLineBreakOffset();
232         contextRange.setStart(i);  // Update the hyphenation start point.
233         prevOffset = NOWHERE;
234     }
235 
236     // Do the same line break at the end of text.
237     // TODO: Remove code duplication. This is the same as in the for loop but extracting function
238     //       may not clear.
239     if (prevOffset == NOWHERE) {
240         doLineBreakWithGraphemeBounds(contextRange);
241     } else {
242         const HyphenationType hyph = hyphenResult[targetRange.toRangeOffset(prevOffset)];
243         const StartHyphenEdit nextLineStartHyphenEdit = editForNextLine(hyph);
244         const float remainingCharWidths = targetRun->measureHyphenPiece(
245                 mTextBuf, contextRange.split(prevOffset).second, nextLineStartHyphenEdit,
246                 EndHyphenEdit::NO_EDIT, nullptr /* advances */, nullptr);
247 
248         breakLineAt(prevOffset, prevWidth, remainingCharWidths - (mSumOfCharWidths - mLineWidth),
249                     remainingCharWidths, editForThisLine(hyph), nextLineStartHyphenEdit);
250     }
251 
252     return true;
253 }
254 
255 // TODO: Respect trailing line end spaces.
doLineBreakWithGraphemeBounds(const Range & range)256 void GreedyLineBreaker::doLineBreakWithGraphemeBounds(const Range& range) {
257     double width = mMeasuredText.widths[range.getStart()];
258 
259     // Starting from + 1 since at least one character needs to be assigned to a line.
260     for (uint32_t i = range.getStart() + 1; i < range.getEnd(); ++i) {
261         const float w = mMeasuredText.widths[i];
262         if (w == 0) {
263             continue;  // w == 0 means here is not a grapheme bounds. Don't break here.
264         }
265         if (width + w > mLineWidthLimit) {
266             // Okay, here is the longest position.
267             breakLineAt(i, width, mLineWidth - width, mSumOfCharWidths - width,
268                         EndHyphenEdit::NO_EDIT, StartHyphenEdit::NO_EDIT);
269 
270             // This method only breaks at the first longest offset, since we may want to hyphenate
271             // the rest of the word.
272             return;
273         } else {
274             width += w;
275         }
276     }
277 
278     // Reaching here means even one character (or cluster) doesn't fit the line.
279     // Give up and break at the end of this range.
280     breakLineAt(range.getEnd(), mLineWidth, 0, 0, EndHyphenEdit::NO_EDIT, StartHyphenEdit::NO_EDIT);
281 }
282 
updateLineWidth(uint16_t c,float width)283 void GreedyLineBreaker::updateLineWidth(uint16_t c, float width) {
284     if (c == CHAR_TAB) {
285         mSumOfCharWidths = mTabStops.nextTab(mSumOfCharWidths);
286         mLineWidth = mSumOfCharWidths;
287     } else {
288         mSumOfCharWidths += width;
289         if (!isLineEndSpace(c)) {
290             mLineWidth = mSumOfCharWidths;
291         }
292     }
293 }
294 
processLineBreak(uint32_t offset,WordBreaker * breaker)295 void GreedyLineBreaker::processLineBreak(uint32_t offset, WordBreaker* breaker) {
296     while (mLineWidth > mLineWidthLimit) {
297         const Range lineRange(getPrevLineBreakOffset(), offset);  // The range we need to address.
298         if (tryLineBreakWithWordBreak()) {
299             continue;  // The word in the new line may still be too long for the line limit.
300         } else if (tryLineBreakWithHyphenation(lineRange, breaker)) {
301             continue;  // TODO: we may be able to return here.
302         } else {
303             doLineBreakWithGraphemeBounds(lineRange);
304         }
305     }
306 
307     // There is still spaces, remember current word break point as a candidate and wait next word.
308     const bool isInEmailOrUrl = breaker->breakBadness() != 0;
309     if (mPrevWordBoundsOffset == NOWHERE || mIsPrevWordBreakIsInEmailOrUrl | !isInEmailOrUrl) {
310         mPrevWordBoundsOffset = offset;
311         mLineWidthAtPrevWordBoundary = mLineWidth;
312         mSumOfCharWidthsAtPrevWordBoundary = mSumOfCharWidths;
313         mIsPrevWordBreakIsInEmailOrUrl = isInEmailOrUrl;
314     }
315 }
316 
process()317 void GreedyLineBreaker::process() {
318     WordBreaker wordBreaker;
319     wordBreaker.setText(mTextBuf.data(), mTextBuf.size());
320 
321     // Following two will be initialized after the first iteration.
322     uint32_t localeListId = LocaleListCache::kInvalidListId;
323     uint32_t nextWordBoundaryOffset = 0;
324     for (const auto& run : mMeasuredText.runs) {
325         const Range range = run->getRange();
326 
327         // Update locale if necessary.
328         uint32_t newLocaleListId = run->getLocaleListId();
329         if (localeListId != newLocaleListId) {
330             Locale locale = getEffectiveLocale(newLocaleListId);
331             nextWordBoundaryOffset = wordBreaker.followingWithLocale(locale, range.getStart());
332             mHyphenator = HyphenatorMap::lookup(locale);
333             localeListId = newLocaleListId;
334         }
335 
336         for (uint32_t i = range.getStart(); i < range.getEnd(); ++i) {
337             updateLineWidth(mTextBuf[i], mMeasuredText.widths[i]);
338 
339             if ((i + 1) == nextWordBoundaryOffset) {
340                 // Only process line break at word boundary.
341                 processLineBreak(i + 1, &wordBreaker);
342                 nextWordBoundaryOffset = wordBreaker.next();
343             }
344         }
345     }
346 
347     if (getPrevLineBreakOffset() != mTextBuf.size() && mPrevWordBoundsOffset != NOWHERE) {
348         // The remaining words in the last line.
349         breakLineAt(mPrevWordBoundsOffset, mLineWidth, 0, 0, EndHyphenEdit::NO_EDIT,
350                     StartHyphenEdit::NO_EDIT);
351     }
352 }
353 
getResult() const354 LineBreakResult GreedyLineBreaker::getResult() const {
355     constexpr int TAB_BIT = 1 << 29;  // Must be the same in StaticLayout.java
356 
357     LineBreakResult out;
358     uint32_t prevBreakOffset = 0;
359     for (const auto& breakPoint : mBreakPoints) {
360         // TODO: compute these during line breaking if these takes longer time.
361         bool hasTabChar = false;
362         MinikinExtent extent = {0.0, 0.0, 0.0};
363         for (uint32_t i = prevBreakOffset; i < breakPoint.offset; ++i) {
364             const uint16_t c = mTextBuf[i];
365             hasTabChar |= c == CHAR_TAB;
366             if (!isLineSpaceExcludeChar(c)) {
367                 extent.extendBy(mMeasuredText.extents[i]);
368             }
369         }
370 
371         out.breakPoints.push_back(breakPoint.offset);
372         out.widths.push_back(breakPoint.lineWidth);
373         out.ascents.push_back(extent.ascent);
374         out.descents.push_back(extent.descent);
375         out.flags.push_back((hasTabChar ? TAB_BIT : 0) | static_cast<int>(breakPoint.hyphenEdit));
376 
377         prevBreakOffset = breakPoint.offset;
378     }
379     return out;
380 }
381 
382 }  // namespace
383 
breakLineGreedy(const U16StringPiece & textBuf,const MeasuredText & measured,const LineWidth & lineWidthLimits,const TabStops & tabStops,bool enableHyphenation)384 LineBreakResult breakLineGreedy(const U16StringPiece& textBuf, const MeasuredText& measured,
385                                 const LineWidth& lineWidthLimits, const TabStops& tabStops,
386                                 bool enableHyphenation) {
387     if (textBuf.size() == 0) {
388         return LineBreakResult();
389     }
390     GreedyLineBreaker lineBreaker(textBuf, measured, lineWidthLimits, tabStops, enableHyphenation);
391     lineBreaker.process();
392     return lineBreaker.getResult();
393 }
394 
395 }  // namespace minikin
396