1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define VERBOSE_DEBUG 0
18 
19 #define LOG_TAG "Minikin"
20 
21 #include <limits>
22 
23 #include <log/log.h>
24 
25 #include "LayoutUtils.h"
26 #include <minikin/Layout.h>
27 #include <minikin/LineBreaker.h>
28 
29 using std::vector;
30 
31 namespace minikin {
32 
33 const int CHAR_TAB = 0x0009;
34 
35 // Large scores in a hierarchy; we prefer desperate breaks to an overfull line. All these
36 // constants are larger than any reasonable actual width score.
37 const float SCORE_INFTY = std::numeric_limits<float>::max();
38 const float SCORE_OVERFULL = 1e12f;
39 const float SCORE_DESPERATE = 1e10f;
40 
41 // Multiplier for hyphen penalty on last line.
42 const float LAST_LINE_PENALTY_MULTIPLIER = 4.0f;
43 // Penalty assigned to each line break (to try to minimize number of lines)
44 // TODO: when we implement full justification (so spaces can shrink and stretch), this is
45 // probably not the most appropriate method.
46 const float LINE_PENALTY_MULTIPLIER = 2.0f;
47 
48 // Penalty assigned to shrinking the whitepsace.
49 const float SHRINK_PENALTY_MULTIPLIER = 4.0f;
50 
51 // Very long words trigger O(n^2) behavior in hyphenation, so we disable hyphenation for
52 // unreasonably long words. This is somewhat of a heuristic because extremely long words
53 // are possible in some languages. This does mean that very long real words can get
54 // broken by desperate breaks, with no hyphens.
55 const size_t LONGEST_HYPHENATED_WORD = 45;
56 
57 // When the text buffer is within this limit, capacity of vectors is retained at finish(),
58 // to avoid allocation.
59 const size_t MAX_TEXT_BUF_RETAIN = 32678;
60 
61 // Maximum amount that spaces can shrink, in justified text.
62 const float SHRINKABILITY = 1.0 / 3.0;
63 
setLocale(const icu::Locale & locale,Hyphenator * hyphenator)64 void LineBreaker::setLocale(const icu::Locale& locale, Hyphenator* hyphenator) {
65     mWordBreaker.setLocale(locale);
66     mLocale = locale;
67     mHyphenator = hyphenator;
68 }
69 
setText()70 void LineBreaker::setText() {
71     mWordBreaker.setText(mTextBuf.data(), mTextBuf.size());
72 
73     // handle initial break here because addStyleRun may never be called
74     mWordBreaker.next();
75     mCandidates.clear();
76     Candidate cand = {0, 0, 0.0, 0.0, 0.0, 0.0, 0, 0, 0, HyphenationType::DONT_BREAK};
77     mCandidates.push_back(cand);
78 
79     // reset greedy breaker state
80     mBreaks.clear();
81     mWidths.clear();
82     mFlags.clear();
83     mLastBreak = 0;
84     mBestBreak = 0;
85     mBestScore = SCORE_INFTY;
86     mPreBreak = 0;
87     mLastHyphenation = HyphenEdit::NO_EDIT;
88     mFirstTabIndex = INT_MAX;
89     mSpaceCount = 0;
90 }
91 
setLineWidths(float firstWidth,int firstWidthLineCount,float restWidth)92 void LineBreaker::setLineWidths(float firstWidth, int firstWidthLineCount, float restWidth) {
93     mLineWidths.setWidths(firstWidth, firstWidthLineCount, restWidth);
94 }
95 
96 
setIndents(const std::vector<float> & indents)97 void LineBreaker::setIndents(const std::vector<float>& indents) {
98     mLineWidths.setIndents(indents);
99 }
100 
101 // This function determines whether a character is a space that disappears at end of line.
102 // It is the Unicode set: [[:General_Category=Space_Separator:]-[:Line_Break=Glue:]],
103 // plus '\n'.
104 // Note: all such characters are in the BMP, so it's ok to use code units for this.
isLineEndSpace(uint16_t c)105 static bool isLineEndSpace(uint16_t c) {
106     return c == '\n' || c == ' ' || c == 0x1680 || (0x2000 <= c && c <= 0x200A && c != 0x2007) ||
107             c == 0x205F || c == 0x3000;
108 }
109 
110 // Ordinarily, this method measures the text in the range given. However, when paint
111 // is nullptr, it assumes the widths have already been calculated and stored in the
112 // width buffer.
113 // This method finds the candidate word breaks (using the ICU break iterator) and sends them
114 // to addCandidate.
addStyleRun(MinikinPaint * paint,const std::shared_ptr<FontCollection> & typeface,FontStyle style,size_t start,size_t end,bool isRtl)115 float LineBreaker::addStyleRun(MinikinPaint* paint, const std::shared_ptr<FontCollection>& typeface,
116         FontStyle style, size_t start, size_t end, bool isRtl) {
117     float width = 0.0f;
118     int bidiFlags = isRtl ? kBidi_Force_RTL : kBidi_Force_LTR;
119 
120     float hyphenPenalty = 0.0;
121     if (paint != nullptr) {
122         width = Layout::measureText(mTextBuf.data(), start, end - start, mTextBuf.size(), bidiFlags,
123                 style, *paint, typeface, mCharWidths.data() + start);
124 
125         // a heuristic that seems to perform well
126         hyphenPenalty = 0.5 * paint->size * paint->scaleX * mLineWidths.getLineWidth(0);
127         if (mHyphenationFrequency == kHyphenationFrequency_Normal) {
128             hyphenPenalty *= 4.0; // TODO: Replace with a better value after some testing
129         }
130 
131         if (mJustified) {
132             // Make hyphenation more aggressive for fully justified text (so that "normal" in
133             // justified mode is the same as "full" in ragged-right).
134             hyphenPenalty *= 0.25;
135         } else {
136             // Line penalty is zero for justified text.
137             mLinePenalty = std::max(mLinePenalty, hyphenPenalty * LINE_PENALTY_MULTIPLIER);
138         }
139     }
140 
141     size_t current = (size_t)mWordBreaker.current();
142     size_t afterWord = start;
143     size_t lastBreak = start;
144     ParaWidth lastBreakWidth = mWidth;
145     ParaWidth postBreak = mWidth;
146     size_t postSpaceCount = mSpaceCount;
147     for (size_t i = start; i < end; i++) {
148         uint16_t c = mTextBuf[i];
149         if (c == CHAR_TAB) {
150             mWidth = mPreBreak + mTabStops.nextTab(mWidth - mPreBreak);
151             if (mFirstTabIndex == INT_MAX) {
152                 mFirstTabIndex = (int)i;
153             }
154             // fall back to greedy; other modes don't know how to deal with tabs
155             mStrategy = kBreakStrategy_Greedy;
156         } else {
157             if (isWordSpace(c)) mSpaceCount += 1;
158             mWidth += mCharWidths[i];
159             if (!isLineEndSpace(c)) {
160                 postBreak = mWidth;
161                 postSpaceCount = mSpaceCount;
162                 afterWord = i + 1;
163             }
164         }
165         if (i + 1 == current) {
166             size_t wordStart = mWordBreaker.wordStart();
167             size_t wordEnd = mWordBreaker.wordEnd();
168             if (paint != nullptr && mHyphenator != nullptr &&
169                     mHyphenationFrequency != kHyphenationFrequency_None &&
170                     wordStart >= start && wordEnd > wordStart &&
171                     wordEnd - wordStart <= LONGEST_HYPHENATED_WORD) {
172                 mHyphenator->hyphenate(&mHyphBuf,
173                         &mTextBuf[wordStart],
174                         wordEnd - wordStart,
175                         mLocale);
176 #if VERBOSE_DEBUG
177                 std::string hyphenatedString;
178                 for (size_t j = wordStart; j < wordEnd; j++) {
179                     if (mHyphBuf[j - wordStart] == HyphenationType::BREAK_AND_INSERT_HYPHEN) {
180                         hyphenatedString.push_back('-');
181                     }
182                     // Note: only works with ASCII, should do UTF-8 conversion here
183                     hyphenatedString.push_back(buffer()[j]);
184                 }
185                 ALOGD("hyphenated string: %s", hyphenatedString.c_str());
186 #endif
187 
188                 // measure hyphenated substrings
189                 for (size_t j = wordStart; j < wordEnd; j++) {
190                     HyphenationType hyph = mHyphBuf[j - wordStart];
191                     if (hyph != HyphenationType::DONT_BREAK) {
192                         paint->hyphenEdit = HyphenEdit::editForThisLine(hyph);
193                         const float firstPartWidth = Layout::measureText(mTextBuf.data(),
194                                 lastBreak, j - lastBreak, mTextBuf.size(), bidiFlags, style,
195                                 *paint, typeface, nullptr);
196                         ParaWidth hyphPostBreak = lastBreakWidth + firstPartWidth;
197 
198                         paint->hyphenEdit = HyphenEdit::editForNextLine(hyph);
199                         const float secondPartWidth = Layout::measureText(mTextBuf.data(), j,
200                                 afterWord - j, mTextBuf.size(), bidiFlags, style, *paint,
201                                 typeface, nullptr);
202                         ParaWidth hyphPreBreak = postBreak - secondPartWidth;
203 
204                         addWordBreak(j, hyphPreBreak, hyphPostBreak, postSpaceCount, postSpaceCount,
205                                 hyphenPenalty, hyph);
206 
207                         paint->hyphenEdit = HyphenEdit::NO_EDIT;
208                     }
209                 }
210             }
211 
212             // Skip break for zero-width characters inside replacement span
213             if (paint != nullptr || current == end || mCharWidths[current] > 0) {
214                 float penalty = hyphenPenalty * mWordBreaker.breakBadness();
215                 addWordBreak(current, mWidth, postBreak, mSpaceCount, postSpaceCount, penalty,
216                         HyphenationType::DONT_BREAK);
217             }
218             lastBreak = current;
219             lastBreakWidth = mWidth;
220             current = (size_t)mWordBreaker.next();
221         }
222     }
223 
224     return width;
225 }
226 
227 // add a word break (possibly for a hyphenated fragment), and add desperate breaks if
228 // needed (ie when word exceeds current line width)
addWordBreak(size_t offset,ParaWidth preBreak,ParaWidth postBreak,size_t preSpaceCount,size_t postSpaceCount,float penalty,HyphenationType hyph)229 void LineBreaker::addWordBreak(size_t offset, ParaWidth preBreak, ParaWidth postBreak,
230         size_t preSpaceCount, size_t postSpaceCount, float penalty, HyphenationType hyph) {
231     Candidate cand;
232     ParaWidth width = mCandidates.back().preBreak;
233     if (postBreak - width > currentLineWidth()) {
234         // Add desperate breaks.
235         // Note: these breaks are based on the shaping of the (non-broken) original text; they
236         // are imprecise especially in the presence of kerning, ligatures, and Arabic shaping.
237         size_t i = mCandidates.back().offset;
238         width += mCharWidths[i++];
239         for (; i < offset; i++) {
240             float w = mCharWidths[i];
241             if (w > 0) {
242                 cand.offset = i;
243                 cand.preBreak = width;
244                 cand.postBreak = width;
245                 // postSpaceCount doesn't include trailing spaces
246                 cand.preSpaceCount = postSpaceCount;
247                 cand.postSpaceCount = postSpaceCount;
248                 cand.penalty = SCORE_DESPERATE;
249                 cand.hyphenType = HyphenationType::BREAK_AND_DONT_INSERT_HYPHEN;
250 #if VERBOSE_DEBUG
251                 ALOGD("desperate cand: %zd %g:%g",
252                         mCandidates.size(), cand.postBreak, cand.preBreak);
253 #endif
254                 addCandidate(cand);
255                 width += w;
256             }
257         }
258     }
259 
260     cand.offset = offset;
261     cand.preBreak = preBreak;
262     cand.postBreak = postBreak;
263     cand.penalty = penalty;
264     cand.preSpaceCount = preSpaceCount;
265     cand.postSpaceCount = postSpaceCount;
266     cand.hyphenType = hyph;
267 #if VERBOSE_DEBUG
268     ALOGD("cand: %zd %g:%g", mCandidates.size(), cand.postBreak, cand.preBreak);
269 #endif
270     addCandidate(cand);
271 }
272 
273 // Helper method for addCandidate()
pushGreedyBreak()274 void LineBreaker::pushGreedyBreak() {
275     const Candidate& bestCandidate = mCandidates[mBestBreak];
276     pushBreak(bestCandidate.offset, bestCandidate.postBreak - mPreBreak,
277             mLastHyphenation | HyphenEdit::editForThisLine(bestCandidate.hyphenType));
278     mBestScore = SCORE_INFTY;
279 #if VERBOSE_DEBUG
280     ALOGD("break: %d %g", mBreaks.back(), mWidths.back());
281 #endif
282     mLastBreak = mBestBreak;
283     mPreBreak = bestCandidate.preBreak;
284     mLastHyphenation = HyphenEdit::editForNextLine(bestCandidate.hyphenType);
285 }
286 
287 // TODO performance: could avoid populating mCandidates if greedy only
addCandidate(Candidate cand)288 void LineBreaker::addCandidate(Candidate cand) {
289     const size_t candIndex = mCandidates.size();
290     mCandidates.push_back(cand);
291 
292     // mLastBreak is the index of the last line break we decided to do in mCandidates,
293     // and mPreBreak is its preBreak value. mBestBreak is the index of the best line breaking candidate
294     // we have found since then, and mBestScore is its penalty.
295     if (cand.postBreak - mPreBreak > currentLineWidth()) {
296         // This break would create an overfull line, pick the best break and break there (greedy)
297         if (mBestBreak == mLastBreak) {
298             // No good break has been found since last break. Break here.
299             mBestBreak = candIndex;
300         }
301         pushGreedyBreak();
302     }
303 
304     while (mLastBreak != candIndex && cand.postBreak - mPreBreak > currentLineWidth()) {
305         // We should rarely come here. But if we are here, we have broken the line, but the
306         // remaining part still doesn't fit. We now need to break at the second best place after the
307         // last break, but we have not kept that information, so we need to go back and find it.
308         //
309         // In some really rare cases, postBreak - preBreak of a candidate itself may be over the
310         // current line width. We protect ourselves against an infinite loop in that case by
311         // checking that we have not broken the line at this candidate already.
312         for (size_t i = mLastBreak + 1; i < candIndex; i++) {
313             const float penalty = mCandidates[i].penalty;
314             if (penalty <= mBestScore) {
315                 mBestBreak = i;
316                 mBestScore = penalty;
317             }
318         }
319         if (mBestBreak == mLastBreak) {
320             // We didn't find anything good. Break here.
321             mBestBreak = candIndex;
322         }
323         pushGreedyBreak();
324     }
325 
326     if (cand.penalty <= mBestScore) {
327         mBestBreak = candIndex;
328         mBestScore = cand.penalty;
329     }
330 }
331 
pushBreak(int offset,float width,uint8_t hyphenEdit)332 void LineBreaker::pushBreak(int offset, float width, uint8_t hyphenEdit) {
333     mBreaks.push_back(offset);
334     mWidths.push_back(width);
335     int flags = (mFirstTabIndex < mBreaks.back()) << kTab_Shift;
336     flags |= hyphenEdit;
337     mFlags.push_back(flags);
338     mFirstTabIndex = INT_MAX;
339 }
340 
addReplacement(size_t start,size_t end,float width)341 void LineBreaker::addReplacement(size_t start, size_t end, float width) {
342     mCharWidths[start] = width;
343     std::fill(&mCharWidths[start + 1], &mCharWidths[end], 0.0f);
344     addStyleRun(nullptr, nullptr, FontStyle(), start, end, false);
345 }
346 
347 // Get the width of a space. May return 0 if there are no spaces.
348 // Note: if there are multiple different widths for spaces (for example, because of mixing of
349 // fonts), it's only guaranteed to pick one.
getSpaceWidth() const350 float LineBreaker::getSpaceWidth() const {
351     for (size_t i = 0; i < mTextBuf.size(); i++) {
352         if (isWordSpace(mTextBuf[i])) {
353             return mCharWidths[i];
354         }
355     }
356     return 0.0f;
357 }
358 
currentLineWidth() const359 float LineBreaker::currentLineWidth() const {
360     return mLineWidths.getLineWidth(mBreaks.size());
361 }
362 
computeBreaksGreedy()363 void LineBreaker::computeBreaksGreedy() {
364     // All breaks but the last have been added in addCandidate already.
365     size_t nCand = mCandidates.size();
366     if (nCand == 1 || mLastBreak != nCand - 1) {
367         pushBreak(mCandidates[nCand - 1].offset, mCandidates[nCand - 1].postBreak - mPreBreak,
368                 mLastHyphenation);
369         // don't need to update mBestScore, because we're done
370 #if VERBOSE_DEBUG
371         ALOGD("final break: %d %g", mBreaks.back(), mWidths.back());
372 #endif
373     }
374 }
375 
376 // Follow "prev" links in mCandidates array, and copy to result arrays.
finishBreaksOptimal()377 void LineBreaker::finishBreaksOptimal() {
378     // clear existing greedy break result
379     mBreaks.clear();
380     mWidths.clear();
381     mFlags.clear();
382     size_t nCand = mCandidates.size();
383     size_t prev;
384     for (size_t i = nCand - 1; i > 0; i = prev) {
385         prev = mCandidates[i].prev;
386         mBreaks.push_back(mCandidates[i].offset);
387         mWidths.push_back(mCandidates[i].postBreak - mCandidates[prev].preBreak);
388         int flags = HyphenEdit::editForThisLine(mCandidates[i].hyphenType);
389         if (prev > 0) {
390             flags |= HyphenEdit::editForNextLine(mCandidates[prev].hyphenType);
391         }
392         mFlags.push_back(flags);
393     }
394     std::reverse(mBreaks.begin(), mBreaks.end());
395     std::reverse(mWidths.begin(), mWidths.end());
396     std::reverse(mFlags.begin(), mFlags.end());
397 }
398 
computeBreaksOptimal(bool isRectangle)399 void LineBreaker::computeBreaksOptimal(bool isRectangle) {
400     size_t active = 0;
401     size_t nCand = mCandidates.size();
402     float width = mLineWidths.getLineWidth(0);
403     float shortLineFactor = mJustified ? 0.75f : 0.5f;
404     float maxShrink = mJustified ? SHRINKABILITY * getSpaceWidth() : 0.0f;
405 
406     // "i" iterates through candidates for the end of the line.
407     for (size_t i = 1; i < nCand; i++) {
408         bool atEnd = i == nCand - 1;
409         float best = SCORE_INFTY;
410         size_t bestPrev = 0;
411         size_t lineNumberLast = 0;
412 
413         if (!isRectangle) {
414             size_t lineNumberLast = mCandidates[active].lineNumber;
415             width = mLineWidths.getLineWidth(lineNumberLast);
416         }
417         ParaWidth leftEdge = mCandidates[i].postBreak - width;
418         float bestHope = 0;
419 
420         // "j" iterates through candidates for the beginning of the line.
421         for (size_t j = active; j < i; j++) {
422             if (!isRectangle) {
423                 size_t lineNumber = mCandidates[j].lineNumber;
424                 if (lineNumber != lineNumberLast) {
425                     float widthNew = mLineWidths.getLineWidth(lineNumber);
426                     if (widthNew != width) {
427                         leftEdge = mCandidates[i].postBreak - width;
428                         bestHope = 0;
429                         width = widthNew;
430                     }
431                     lineNumberLast = lineNumber;
432                 }
433             }
434             float jScore = mCandidates[j].score;
435             if (jScore + bestHope >= best) continue;
436             float delta = mCandidates[j].preBreak - leftEdge;
437 
438             // compute width score for line
439 
440             // Note: the "bestHope" optimization makes the assumption that, when delta is
441             // non-negative, widthScore will increase monotonically as successive candidate
442             // breaks are considered.
443             float widthScore = 0.0f;
444             float additionalPenalty = 0.0f;
445             if ((atEnd || !mJustified) && delta < 0) {
446                 widthScore = SCORE_OVERFULL;
447             } else if (atEnd && mStrategy != kBreakStrategy_Balanced) {
448                 // increase penalty for hyphen on last line
449                 additionalPenalty = LAST_LINE_PENALTY_MULTIPLIER * mCandidates[j].penalty;
450                 // Penalize very short (< 1 - shortLineFactor of total width) lines.
451                 float underfill = delta - shortLineFactor * width;
452                 widthScore = underfill > 0 ? underfill * underfill : 0;
453             } else {
454                 widthScore = delta * delta;
455                 if (delta < 0) {
456                     if (-delta < maxShrink *
457                             (mCandidates[i].postSpaceCount - mCandidates[j].preSpaceCount)) {
458                         widthScore *= SHRINK_PENALTY_MULTIPLIER;
459                     } else {
460                         widthScore = SCORE_OVERFULL;
461                     }
462                 }
463             }
464 
465             if (delta < 0) {
466                 active = j + 1;
467             } else {
468                 bestHope = widthScore;
469             }
470 
471             float score = jScore + widthScore + additionalPenalty;
472             if (score <= best) {
473                 best = score;
474                 bestPrev = j;
475             }
476         }
477         mCandidates[i].score = best + mCandidates[i].penalty + mLinePenalty;
478         mCandidates[i].prev = bestPrev;
479         mCandidates[i].lineNumber = mCandidates[bestPrev].lineNumber + 1;
480 #if VERBOSE_DEBUG
481         ALOGD("break %zd: score=%g, prev=%zd", i, mCandidates[i].score, mCandidates[i].prev);
482 #endif
483     }
484     finishBreaksOptimal();
485 }
486 
computeBreaks()487 size_t LineBreaker::computeBreaks() {
488     if (mStrategy == kBreakStrategy_Greedy) {
489         computeBreaksGreedy();
490     } else {
491         computeBreaksOptimal(mLineWidths.isConstant());
492     }
493     return mBreaks.size();
494 }
495 
finish()496 void LineBreaker::finish() {
497     mWordBreaker.finish();
498     mWidth = 0;
499     mLineWidths.clear();
500     mCandidates.clear();
501     mBreaks.clear();
502     mWidths.clear();
503     mFlags.clear();
504     if (mTextBuf.size() > MAX_TEXT_BUF_RETAIN) {
505         mTextBuf.clear();
506         mTextBuf.shrink_to_fit();
507         mCharWidths.clear();
508         mCharWidths.shrink_to_fit();
509         mHyphBuf.clear();
510         mHyphBuf.shrink_to_fit();
511         mCandidates.shrink_to_fit();
512         mBreaks.shrink_to_fit();
513         mWidths.shrink_to_fit();
514         mFlags.shrink_to_fit();
515     }
516     mStrategy = kBreakStrategy_Greedy;
517     mHyphenationFrequency = kHyphenationFrequency_Normal;
518     mLinePenalty = 0.0f;
519     mJustified = false;
520 }
521 
522 }  // namespace minikin
523