1 /*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #define LOG_TAG "GreedyLineBreak"
18
19 #include "minikin/Characters.h"
20 #include "minikin/LineBreaker.h"
21 #include "minikin/MeasuredText.h"
22 #include "minikin/Range.h"
23 #include "minikin/U16StringPiece.h"
24
25 #include "HyphenatorMap.h"
26 #include "LineBreakerUtil.h"
27 #include "Locale.h"
28 #include "LocaleListCache.h"
29 #include "WordBreaker.h"
30
31 namespace minikin {
32
33 namespace {
34
35 constexpr uint32_t NOWHERE = 0xFFFFFFFF;
36
37 class GreedyLineBreaker {
38 public:
39 // User of this class must keep measured, lineWidthLimit, tabStop alive until the instance is
40 // destructed.
GreedyLineBreaker(const U16StringPiece & textBuf,const MeasuredText & measured,const LineWidth & lineWidthLimits,const TabStops & tabStops,bool enableHyphenation)41 GreedyLineBreaker(const U16StringPiece& textBuf, const MeasuredText& measured,
42 const LineWidth& lineWidthLimits, const TabStops& tabStops,
43 bool enableHyphenation)
44 : mLineWidthLimit(lineWidthLimits.getAt(0)),
45 mTextBuf(textBuf),
46 mMeasuredText(measured),
47 mLineWidthLimits(lineWidthLimits),
48 mTabStops(tabStops),
49 mEnableHyphenation(enableHyphenation) {}
50
51 void process();
52
53 LineBreakResult getResult() const;
54
55 private:
56 struct BreakPoint {
BreakPointminikin::__anondb74f7790111::GreedyLineBreaker::BreakPoint57 BreakPoint(uint32_t offset, float lineWidth, StartHyphenEdit startHyphen,
58 EndHyphenEdit endHyphen)
59 : offset(offset),
60 lineWidth(lineWidth),
61 hyphenEdit(packHyphenEdit(startHyphen, endHyphen)) {}
62
63 uint32_t offset;
64 float lineWidth;
65 HyphenEdit hyphenEdit;
66 };
67
getPrevLineBreakOffset()68 inline uint32_t getPrevLineBreakOffset() {
69 return mBreakPoints.empty() ? 0 : mBreakPoints.back().offset;
70 }
71
72 // Registers the break point and prepares for next line computation.
73 void breakLineAt(uint32_t offset, float lineWidth, float remainingNextLineWidth,
74 float remainingNextSumOfCharWidths, EndHyphenEdit thisLineEndHyphen,
75 StartHyphenEdit nextLineStartHyphen);
76
77 // Update current line width.
78 void updateLineWidth(uint16_t c, float width);
79
80 // Break line if current line exceeds the line limit.
81 void processLineBreak(uint32_t offset, WordBreaker* breaker);
82
83 // Try to break with previous word boundary.
84 // Returns false if unable to break by word boundary.
85 bool tryLineBreakWithWordBreak();
86
87 // Try to break with hyphenation.
88 // Returns false if unable to hyphenate.
89 //
90 // This method keeps hyphenation until the line width after line break meets the line width
91 // limit.
92 bool tryLineBreakWithHyphenation(const Range& range, WordBreaker* breaker);
93
94 // Do line break with each characters.
95 //
96 // This method only breaks at the first offset which has the longest width for the line width
97 // limit. This method don't keep line breaking even if the rest of the word exceeds the line
98 // width limit.
99 void doLineBreakWithGraphemeBounds(const Range& range);
100
101 // Info about the line currently processing.
102 uint32_t mLineNum = 0;
103 double mLineWidth = 0;
104 double mSumOfCharWidths = 0;
105 double mLineWidthLimit;
106 StartHyphenEdit mStartHyphenEdit = StartHyphenEdit::NO_EDIT;
107
108 // Previous word break point info.
109 uint32_t mPrevWordBoundsOffset = NOWHERE;
110 double mLineWidthAtPrevWordBoundary = 0;
111 double mSumOfCharWidthsAtPrevWordBoundary = 0;
112 bool mIsPrevWordBreakIsInEmailOrUrl = false;
113
114 // The hyphenator currently used.
115 const Hyphenator* mHyphenator = nullptr;
116
117 // Input parameters.
118 const U16StringPiece& mTextBuf;
119 const MeasuredText& mMeasuredText;
120 const LineWidth& mLineWidthLimits;
121 const TabStops& mTabStops;
122 bool mEnableHyphenation;
123
124 // The result of line breaking.
125 std::vector<BreakPoint> mBreakPoints;
126
127 MINIKIN_PREVENT_COPY_ASSIGN_AND_MOVE(GreedyLineBreaker);
128 };
129
breakLineAt(uint32_t offset,float lineWidth,float remainingNextLineWidth,float remainingNextSumOfCharWidths,EndHyphenEdit thisLineEndHyphen,StartHyphenEdit nextLineStartHyphen)130 void GreedyLineBreaker::breakLineAt(uint32_t offset, float lineWidth, float remainingNextLineWidth,
131 float remainingNextSumOfCharWidths,
132 EndHyphenEdit thisLineEndHyphen,
133 StartHyphenEdit nextLineStartHyphen) {
134 // First, push the break to result.
135 mBreakPoints.emplace_back(offset, lineWidth, mStartHyphenEdit, thisLineEndHyphen);
136
137 // Update the current line info.
138 mLineWidthLimit = mLineWidthLimits.getAt(++mLineNum);
139 mLineWidth = remainingNextLineWidth;
140 mSumOfCharWidths = remainingNextSumOfCharWidths;
141 mStartHyphenEdit = nextLineStartHyphen;
142 mPrevWordBoundsOffset = NOWHERE;
143 mLineWidthAtPrevWordBoundary = 0;
144 mSumOfCharWidthsAtPrevWordBoundary = 0;
145 mIsPrevWordBreakIsInEmailOrUrl = false;
146 }
147
tryLineBreakWithWordBreak()148 bool GreedyLineBreaker::tryLineBreakWithWordBreak() {
149 if (mPrevWordBoundsOffset == NOWHERE) {
150 return false; // No word break point before..
151 }
152
153 breakLineAt(mPrevWordBoundsOffset, // break offset
154 mLineWidthAtPrevWordBoundary, // line width
155 mLineWidth - mSumOfCharWidthsAtPrevWordBoundary, // remaining next line width
156 // remaining next sum of char widths.
157 mSumOfCharWidths - mSumOfCharWidthsAtPrevWordBoundary, EndHyphenEdit::NO_EDIT,
158 StartHyphenEdit::NO_EDIT); // No hyphen modification.
159 return true;
160 }
161
tryLineBreakWithHyphenation(const Range & range,WordBreaker * breaker)162 bool GreedyLineBreaker::tryLineBreakWithHyphenation(const Range& range, WordBreaker* breaker) {
163 if (!mEnableHyphenation || mHyphenator == nullptr) {
164 return false;
165 }
166
167 Run* targetRun = nullptr;
168 for (const auto& run : mMeasuredText.runs) {
169 if (run->getRange().contains(range)) {
170 targetRun = run.get();
171 }
172 }
173
174 if (targetRun == nullptr) {
175 return false; // The target range may lay on multiple run. Unable to hyphenate.
176 }
177
178 const Range targetRange = breaker->wordRange();
179 if (!range.contains(targetRange)) {
180 return false;
181 }
182
183 const std::vector<HyphenationType> hyphenResult =
184 hyphenate(mTextBuf.substr(targetRange), *mHyphenator);
185 Range contextRange = range;
186 uint32_t prevOffset = NOWHERE;
187 float prevWidth = 0;
188
189 // Look up the hyphenation point from the begining.
190 for (uint32_t i = targetRange.getStart(); i < targetRange.getEnd(); ++i) {
191 const HyphenationType hyph = hyphenResult[targetRange.toRangeOffset(i)];
192 if (hyph == HyphenationType::DONT_BREAK) {
193 continue; // Not a hyphenation point.
194 }
195
196 const float width = targetRun->measureHyphenPiece(mTextBuf, contextRange.split(i).first,
197 mStartHyphenEdit, editForThisLine(hyph),
198 nullptr /* advances */, nullptr);
199
200 if (width <= mLineWidthLimit) {
201 // There are still space, remember current offset and look up next hyphenation point.
202 prevOffset = i;
203 prevWidth = width;
204 continue;
205 }
206
207 if (prevOffset == NOWHERE) {
208 // Even with hyphenation, the piece is too long for line. Give up and break in
209 // character bounds.
210 doLineBreakWithGraphemeBounds(contextRange);
211 } else {
212 // Previous offset is the longest hyphenation piece. Break with it.
213 const HyphenationType hyph = hyphenResult[targetRange.toRangeOffset(prevOffset)];
214 const StartHyphenEdit nextLineStartHyphenEdit = editForNextLine(hyph);
215 const float remainingCharWidths = targetRun->measureHyphenPiece(
216 mTextBuf, contextRange.split(prevOffset).second, nextLineStartHyphenEdit,
217 EndHyphenEdit::NO_EDIT, nullptr /* advances */, nullptr);
218 breakLineAt(prevOffset, prevWidth,
219 remainingCharWidths - (mSumOfCharWidths - mLineWidth), remainingCharWidths,
220 editForThisLine(hyph), nextLineStartHyphenEdit);
221 }
222
223 if (mLineWidth <= mLineWidthLimit) {
224 // The remaining hyphenation piece is less than line width. No more hyphenation is
225 // needed. Go to next word.
226 return true;
227 }
228
229 // Even after line break, the remaining hyphenation piece is still too long for the limit.
230 // Keep hyphenating for the rest.
231 i = getPrevLineBreakOffset();
232 contextRange.setStart(i); // Update the hyphenation start point.
233 prevOffset = NOWHERE;
234 }
235
236 // Do the same line break at the end of text.
237 // TODO: Remove code duplication. This is the same as in the for loop but extracting function
238 // may not clear.
239 if (prevOffset == NOWHERE) {
240 doLineBreakWithGraphemeBounds(contextRange);
241 } else {
242 const HyphenationType hyph = hyphenResult[targetRange.toRangeOffset(prevOffset)];
243 const StartHyphenEdit nextLineStartHyphenEdit = editForNextLine(hyph);
244 const float remainingCharWidths = targetRun->measureHyphenPiece(
245 mTextBuf, contextRange.split(prevOffset).second, nextLineStartHyphenEdit,
246 EndHyphenEdit::NO_EDIT, nullptr /* advances */, nullptr);
247
248 breakLineAt(prevOffset, prevWidth, remainingCharWidths - (mSumOfCharWidths - mLineWidth),
249 remainingCharWidths, editForThisLine(hyph), nextLineStartHyphenEdit);
250 }
251
252 return true;
253 }
254
255 // TODO: Respect trailing line end spaces.
doLineBreakWithGraphemeBounds(const Range & range)256 void GreedyLineBreaker::doLineBreakWithGraphemeBounds(const Range& range) {
257 double width = mMeasuredText.widths[range.getStart()];
258
259 // Starting from + 1 since at least one character needs to be assigned to a line.
260 for (uint32_t i = range.getStart() + 1; i < range.getEnd(); ++i) {
261 const float w = mMeasuredText.widths[i];
262 if (w == 0) {
263 continue; // w == 0 means here is not a grapheme bounds. Don't break here.
264 }
265 if (width + w > mLineWidthLimit) {
266 // Okay, here is the longest position.
267 breakLineAt(i, width, mLineWidth - width, mSumOfCharWidths - width,
268 EndHyphenEdit::NO_EDIT, StartHyphenEdit::NO_EDIT);
269
270 // This method only breaks at the first longest offset, since we may want to hyphenate
271 // the rest of the word.
272 return;
273 } else {
274 width += w;
275 }
276 }
277
278 // Reaching here means even one character (or cluster) doesn't fit the line.
279 // Give up and break at the end of this range.
280 breakLineAt(range.getEnd(), mLineWidth, 0, 0, EndHyphenEdit::NO_EDIT, StartHyphenEdit::NO_EDIT);
281 }
282
updateLineWidth(uint16_t c,float width)283 void GreedyLineBreaker::updateLineWidth(uint16_t c, float width) {
284 if (c == CHAR_TAB) {
285 mSumOfCharWidths = mTabStops.nextTab(mSumOfCharWidths);
286 mLineWidth = mSumOfCharWidths;
287 } else {
288 mSumOfCharWidths += width;
289 if (!isLineEndSpace(c)) {
290 mLineWidth = mSumOfCharWidths;
291 }
292 }
293 }
294
processLineBreak(uint32_t offset,WordBreaker * breaker)295 void GreedyLineBreaker::processLineBreak(uint32_t offset, WordBreaker* breaker) {
296 while (mLineWidth > mLineWidthLimit) {
297 const Range lineRange(getPrevLineBreakOffset(), offset); // The range we need to address.
298 if (tryLineBreakWithWordBreak()) {
299 continue; // The word in the new line may still be too long for the line limit.
300 } else if (tryLineBreakWithHyphenation(lineRange, breaker)) {
301 continue; // TODO: we may be able to return here.
302 } else {
303 doLineBreakWithGraphemeBounds(lineRange);
304 }
305 }
306
307 // There is still spaces, remember current word break point as a candidate and wait next word.
308 const bool isInEmailOrUrl = breaker->breakBadness() != 0;
309 if (mPrevWordBoundsOffset == NOWHERE || mIsPrevWordBreakIsInEmailOrUrl | !isInEmailOrUrl) {
310 mPrevWordBoundsOffset = offset;
311 mLineWidthAtPrevWordBoundary = mLineWidth;
312 mSumOfCharWidthsAtPrevWordBoundary = mSumOfCharWidths;
313 mIsPrevWordBreakIsInEmailOrUrl = isInEmailOrUrl;
314 }
315 }
316
process()317 void GreedyLineBreaker::process() {
318 WordBreaker wordBreaker;
319 wordBreaker.setText(mTextBuf.data(), mTextBuf.size());
320
321 // Following two will be initialized after the first iteration.
322 uint32_t localeListId = LocaleListCache::kInvalidListId;
323 uint32_t nextWordBoundaryOffset = 0;
324 for (const auto& run : mMeasuredText.runs) {
325 const Range range = run->getRange();
326
327 // Update locale if necessary.
328 uint32_t newLocaleListId = run->getLocaleListId();
329 if (localeListId != newLocaleListId) {
330 Locale locale = getEffectiveLocale(newLocaleListId);
331 nextWordBoundaryOffset = wordBreaker.followingWithLocale(locale, range.getStart());
332 mHyphenator = HyphenatorMap::lookup(locale);
333 localeListId = newLocaleListId;
334 }
335
336 for (uint32_t i = range.getStart(); i < range.getEnd(); ++i) {
337 updateLineWidth(mTextBuf[i], mMeasuredText.widths[i]);
338
339 if ((i + 1) == nextWordBoundaryOffset) {
340 // Only process line break at word boundary.
341 processLineBreak(i + 1, &wordBreaker);
342 nextWordBoundaryOffset = wordBreaker.next();
343 }
344 }
345 }
346
347 if (getPrevLineBreakOffset() != mTextBuf.size() && mPrevWordBoundsOffset != NOWHERE) {
348 // The remaining words in the last line.
349 breakLineAt(mPrevWordBoundsOffset, mLineWidth, 0, 0, EndHyphenEdit::NO_EDIT,
350 StartHyphenEdit::NO_EDIT);
351 }
352 }
353
getResult() const354 LineBreakResult GreedyLineBreaker::getResult() const {
355 constexpr int TAB_BIT = 1 << 29; // Must be the same in StaticLayout.java
356
357 LineBreakResult out;
358 uint32_t prevBreakOffset = 0;
359 for (const auto& breakPoint : mBreakPoints) {
360 // TODO: compute these during line breaking if these takes longer time.
361 bool hasTabChar = false;
362 MinikinExtent extent = {0.0, 0.0, 0.0};
363 for (uint32_t i = prevBreakOffset; i < breakPoint.offset; ++i) {
364 const uint16_t c = mTextBuf[i];
365 hasTabChar |= c == CHAR_TAB;
366 if (!isLineSpaceExcludeChar(c)) {
367 extent.extendBy(mMeasuredText.extents[i]);
368 }
369 }
370
371 out.breakPoints.push_back(breakPoint.offset);
372 out.widths.push_back(breakPoint.lineWidth);
373 out.ascents.push_back(extent.ascent);
374 out.descents.push_back(extent.descent);
375 out.flags.push_back((hasTabChar ? TAB_BIT : 0) | static_cast<int>(breakPoint.hyphenEdit));
376
377 prevBreakOffset = breakPoint.offset;
378 }
379 return out;
380 }
381
382 } // namespace
383
breakLineGreedy(const U16StringPiece & textBuf,const MeasuredText & measured,const LineWidth & lineWidthLimits,const TabStops & tabStops,bool enableHyphenation)384 LineBreakResult breakLineGreedy(const U16StringPiece& textBuf, const MeasuredText& measured,
385 const LineWidth& lineWidthLimits, const TabStops& tabStops,
386 bool enableHyphenation) {
387 if (textBuf.size() == 0) {
388 return LineBreakResult();
389 }
390 GreedyLineBreaker lineBreaker(textBuf, measured, lineWidthLimits, tabStops, enableHyphenation);
391 lineBreaker.process();
392 return lineBreaker.getResult();
393 }
394
395 } // namespace minikin
396