1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define VERBOSE_DEBUG 0
18 
19 #include <limits>
20 
21 #define LOG_TAG "Minikin"
22 #include <cutils/log.h>
23 
24 #include <minikin/Layout.h>
25 #include <minikin/LineBreaker.h>
26 
27 using std::vector;
28 
29 namespace android {
30 
31 const int CHAR_TAB = 0x0009;
32 
33 // Large scores in a hierarchy; we prefer desperate breaks to an overfull line. All these
34 // constants are larger than any reasonable actual width score.
35 const float SCORE_INFTY = std::numeric_limits<float>::max();
36 const float SCORE_OVERFULL = 1e12f;
37 const float SCORE_DESPERATE = 1e10f;
38 
39 // Multiplier for hyphen penalty on last line.
40 const float LAST_LINE_PENALTY_MULTIPLIER = 4.0f;
41 // Penalty assigned to each line break (to try to minimize number of lines)
42 // TODO: when we implement full justification (so spaces can shrink and stretch), this is
43 // probably not the most appropriate method.
44 const float LINE_PENALTY_MULTIPLIER = 2.0f;
45 
46 // Very long words trigger O(n^2) behavior in hyphenation, so we disable hyphenation for
47 // unreasonably long words. This is somewhat of a heuristic because extremely long words
48 // are possible in some languages. This does mean that very long real words can get
49 // broken by desperate breaks, with no hyphens.
50 const size_t LONGEST_HYPHENATED_WORD = 45;
51 
52 // When the text buffer is within this limit, capacity of vectors is retained at finish(),
53 // to avoid allocation.
54 const size_t MAX_TEXT_BUF_RETAIN = 32678;
55 
setLocale(const icu::Locale & locale,Hyphenator * hyphenator)56 void LineBreaker::setLocale(const icu::Locale& locale, Hyphenator* hyphenator) {
57     mWordBreaker.setLocale(locale);
58 
59     mHyphenator = hyphenator;
60 }
61 
setText()62 void LineBreaker::setText() {
63     mWordBreaker.setText(mTextBuf.data(), mTextBuf.size());
64 
65     // handle initial break here because addStyleRun may never be called
66     mWordBreaker.next();
67     mCandidates.clear();
68     Candidate cand = {0, 0, 0.0, 0.0, 0.0, 0.0, 0, 0};
69     mCandidates.push_back(cand);
70 
71     // reset greedy breaker state
72     mBreaks.clear();
73     mWidths.clear();
74     mFlags.clear();
75     mLastBreak = 0;
76     mBestBreak = 0;
77     mBestScore = SCORE_INFTY;
78     mPreBreak = 0;
79     mFirstTabIndex = INT_MAX;
80 }
81 
setLineWidths(float firstWidth,int firstWidthLineCount,float restWidth)82 void LineBreaker::setLineWidths(float firstWidth, int firstWidthLineCount, float restWidth) {
83     mLineWidths.setWidths(firstWidth, firstWidthLineCount, restWidth);
84 }
85 
86 
setIndents(const std::vector<float> & indents)87 void LineBreaker::setIndents(const std::vector<float>& indents) {
88     mLineWidths.setIndents(indents);
89 }
90 
91 // This function determines whether a character is a space that disappears at end of line.
92 // It is the Unicode set: [[:General_Category=Space_Separator:]-[:Line_Break=Glue:]],
93 // plus '\n'.
94 // Note: all such characters are in the BMP, so it's ok to use code units for this.
isLineEndSpace(uint16_t c)95 static bool isLineEndSpace(uint16_t c) {
96     return c == '\n' || c == ' ' || c == 0x1680 || (0x2000 <= c && c <= 0x200A && c != 0x2007) ||
97             c == 0x205F || c == 0x3000;
98 }
99 
100 // This function determines whether a character is like U+2010 HYPHEN in
101 // line breaking and usage: a character immediately after which line breaks
102 // are allowed, but words containing it should not be automatically
103 // hyphenated. This is a curated set, created by manually inspecting all
104 // the characters that have the Unicode line breaking property of BA or HY
105 // and seeing which ones are hyphens.
isLineBreakingHyphen(uint16_t c)106 static bool isLineBreakingHyphen(uint16_t c) {
107     return (c == 0x002D || // HYPHEN-MINUS
108             c == 0x058A || // ARMENIAN HYPHEN
109             c == 0x05BE || // HEBREW PUNCTUATION MAQAF
110             c == 0x1400 || // CANADIAN SYLLABICS HYPHEN
111             c == 0x2010 || // HYPHEN
112             c == 0x2013 || // EN DASH
113             c == 0x2027 || // HYPHENATION POINT
114             c == 0x2E17 || // DOUBLE OBLIQUE HYPHEN
115             c == 0x2E40);  // DOUBLE HYPHEN
116 }
117 
118 // Ordinarily, this method measures the text in the range given. However, when paint
119 // is nullptr, it assumes the widths have already been calculated and stored in the
120 // width buffer.
121 // This method finds the candidate word breaks (using the ICU break iterator) and sends them
122 // to addCandidate.
addStyleRun(MinikinPaint * paint,const FontCollection * typeface,FontStyle style,size_t start,size_t end,bool isRtl)123 float LineBreaker::addStyleRun(MinikinPaint* paint, const FontCollection* typeface,
124         FontStyle style, size_t start, size_t end, bool isRtl) {
125     float width = 0.0f;
126     int bidiFlags = isRtl ? kBidi_Force_RTL : kBidi_Force_LTR;
127 
128     float hyphenPenalty = 0.0;
129     if (paint != nullptr) {
130         width = Layout::measureText(mTextBuf.data(), start, end - start, mTextBuf.size(), bidiFlags,
131                 style, *paint, typeface, mCharWidths.data() + start);
132 
133         // a heuristic that seems to perform well
134         hyphenPenalty = 0.5 * paint->size * paint->scaleX * mLineWidths.getLineWidth(0);
135         if (mHyphenationFrequency == kHyphenationFrequency_Normal) {
136             hyphenPenalty *= 4.0; // TODO: Replace with a better value after some testing
137         }
138 
139         mLinePenalty = std::max(mLinePenalty, hyphenPenalty * LINE_PENALTY_MULTIPLIER);
140     }
141 
142     size_t current = (size_t)mWordBreaker.current();
143     size_t afterWord = start;
144     size_t lastBreak = start;
145     ParaWidth lastBreakWidth = mWidth;
146     ParaWidth postBreak = mWidth;
147     bool temporarilySkipHyphenation = false;
148     for (size_t i = start; i < end; i++) {
149         uint16_t c = mTextBuf[i];
150         if (c == CHAR_TAB) {
151             mWidth = mPreBreak + mTabStops.nextTab(mWidth - mPreBreak);
152             if (mFirstTabIndex == INT_MAX) {
153                 mFirstTabIndex = (int)i;
154             }
155             // fall back to greedy; other modes don't know how to deal with tabs
156             mStrategy = kBreakStrategy_Greedy;
157         } else {
158             mWidth += mCharWidths[i];
159             if (!isLineEndSpace(c)) {
160                 postBreak = mWidth;
161                 afterWord = i + 1;
162             }
163         }
164         if (i + 1 == current) {
165             // TODO: Add a new type of HyphenEdit for breaks whose hyphen already exists, so
166             // we can pass the whole word down to Hyphenator like the soft hyphen case.
167             bool wordEndsInHyphen = isLineBreakingHyphen(c);
168             size_t wordStart = mWordBreaker.wordStart();
169             size_t wordEnd = mWordBreaker.wordEnd();
170             if (paint != nullptr && mHyphenator != nullptr &&
171                     mHyphenationFrequency != kHyphenationFrequency_None &&
172                     !wordEndsInHyphen && !temporarilySkipHyphenation &&
173                     wordStart >= start && wordEnd > wordStart &&
174                     wordEnd - wordStart <= LONGEST_HYPHENATED_WORD) {
175                 mHyphenator->hyphenate(&mHyphBuf, &mTextBuf[wordStart], wordEnd - wordStart);
176 #if VERBOSE_DEBUG
177                 std::string hyphenatedString;
178                 for (size_t j = wordStart; j < wordEnd; j++) {
179                     if (mHyphBuf[j - wordStart]) hyphenatedString.push_back('-');
180                     // Note: only works with ASCII, should do UTF-8 conversion here
181                     hyphenatedString.push_back(buffer()[j]);
182                 }
183                 ALOGD("hyphenated string: %s", hyphenatedString.c_str());
184 #endif
185 
186                 // measure hyphenated substrings
187                 for (size_t j = wordStart; j < wordEnd; j++) {
188                     uint8_t hyph = mHyphBuf[j - wordStart];
189                     if (hyph) {
190                         paint->hyphenEdit = hyph;
191 
192                         const float firstPartWidth = Layout::measureText(mTextBuf.data(),
193                                 lastBreak, j - lastBreak, mTextBuf.size(), bidiFlags, style,
194                                 *paint, typeface, nullptr);
195                         ParaWidth hyphPostBreak = lastBreakWidth + firstPartWidth;
196                         paint->hyphenEdit = 0;
197 
198                         const float secondPartWith = Layout::measureText(mTextBuf.data(), j,
199                                 afterWord - j, mTextBuf.size(), bidiFlags, style, *paint,
200                                 typeface, nullptr);
201                         ParaWidth hyphPreBreak = postBreak - secondPartWith;
202                         addWordBreak(j, hyphPreBreak, hyphPostBreak, hyphenPenalty, hyph);
203                     }
204                 }
205             }
206             // Skip hyphenating the next word if and only if the present word ends in a hyphen
207             temporarilySkipHyphenation = wordEndsInHyphen;
208 
209             // Skip break for zero-width characters inside replacement span
210             if (paint != nullptr || current == end || mCharWidths[current] > 0) {
211                 float penalty = hyphenPenalty * mWordBreaker.breakBadness();
212                 addWordBreak(current, mWidth, postBreak, penalty, 0);
213             }
214             lastBreak = current;
215             lastBreakWidth = mWidth;
216             current = (size_t)mWordBreaker.next();
217         }
218     }
219 
220     return width;
221 }
222 
223 // add a word break (possibly for a hyphenated fragment), and add desperate breaks if
224 // needed (ie when word exceeds current line width)
addWordBreak(size_t offset,ParaWidth preBreak,ParaWidth postBreak,float penalty,uint8_t hyph)225 void LineBreaker::addWordBreak(size_t offset, ParaWidth preBreak, ParaWidth postBreak,
226         float penalty, uint8_t hyph) {
227     Candidate cand;
228     ParaWidth width = mCandidates.back().preBreak;
229     if (postBreak - width > currentLineWidth()) {
230         // Add desperate breaks.
231         // Note: these breaks are based on the shaping of the (non-broken) original text; they
232         // are imprecise especially in the presence of kerning, ligatures, and Arabic shaping.
233         size_t i = mCandidates.back().offset;
234         width += mCharWidths[i++];
235         for (; i < offset; i++) {
236             float w = mCharWidths[i];
237             if (w > 0) {
238                 cand.offset = i;
239                 cand.preBreak = width;
240                 cand.postBreak = width;
241                 cand.penalty = SCORE_DESPERATE;
242                 cand.hyphenEdit = 0;
243 #if VERBOSE_DEBUG
244                 ALOGD("desperate cand: %zd %g:%g",
245                         mCandidates.size(), cand.postBreak, cand.preBreak);
246 #endif
247                 addCandidate(cand);
248                 width += w;
249             }
250         }
251     }
252 
253     cand.offset = offset;
254     cand.preBreak = preBreak;
255     cand.postBreak = postBreak;
256     cand.penalty = penalty;
257     cand.hyphenEdit = hyph;
258 #if VERBOSE_DEBUG
259     ALOGD("cand: %zd %g:%g", mCandidates.size(), cand.postBreak, cand.preBreak);
260 #endif
261     addCandidate(cand);
262 }
263 
264 // TODO performance: could avoid populating mCandidates if greedy only
addCandidate(Candidate cand)265 void LineBreaker::addCandidate(Candidate cand) {
266     size_t candIndex = mCandidates.size();
267     mCandidates.push_back(cand);
268     if (cand.postBreak - mPreBreak > currentLineWidth()) {
269         // This break would create an overfull line, pick the best break and break there (greedy)
270         if (mBestBreak == mLastBreak) {
271             mBestBreak = candIndex;
272         }
273         pushBreak(mCandidates[mBestBreak].offset, mCandidates[mBestBreak].postBreak - mPreBreak,
274                 mCandidates[mBestBreak].hyphenEdit);
275         mBestScore = SCORE_INFTY;
276 #if VERBOSE_DEBUG
277         ALOGD("break: %d %g", mBreaks.back(), mWidths.back());
278 #endif
279         mLastBreak = mBestBreak;
280         mPreBreak = mCandidates[mBestBreak].preBreak;
281     }
282     if (cand.penalty <= mBestScore) {
283         mBestBreak = candIndex;
284         mBestScore = cand.penalty;
285     }
286 }
287 
pushBreak(int offset,float width,uint8_t hyph)288 void LineBreaker::pushBreak(int offset, float width, uint8_t hyph) {
289     mBreaks.push_back(offset);
290     mWidths.push_back(width);
291     int flags = (mFirstTabIndex < mBreaks.back()) << kTab_Shift;
292     flags |= hyph;
293     mFlags.push_back(flags);
294     mFirstTabIndex = INT_MAX;
295 }
296 
addReplacement(size_t start,size_t end,float width)297 void LineBreaker::addReplacement(size_t start, size_t end, float width) {
298     mCharWidths[start] = width;
299     std::fill(&mCharWidths[start + 1], &mCharWidths[end], 0.0f);
300     addStyleRun(nullptr, nullptr, FontStyle(), start, end, false);
301 }
302 
currentLineWidth() const303 float LineBreaker::currentLineWidth() const {
304     return mLineWidths.getLineWidth(mBreaks.size());
305 }
306 
computeBreaksGreedy()307 void LineBreaker::computeBreaksGreedy() {
308     // All breaks but the last have been added in addCandidate already.
309     size_t nCand = mCandidates.size();
310     if (nCand == 1 || mLastBreak != nCand - 1) {
311         pushBreak(mCandidates[nCand - 1].offset, mCandidates[nCand - 1].postBreak - mPreBreak, 0);
312         // don't need to update mBestScore, because we're done
313 #if VERBOSE_DEBUG
314         ALOGD("final break: %d %g", mBreaks.back(), mWidths.back());
315 #endif
316     }
317 }
318 
319 // Follow "prev" links in mCandidates array, and copy to result arrays.
finishBreaksOptimal()320 void LineBreaker::finishBreaksOptimal() {
321     // clear existing greedy break result
322     mBreaks.clear();
323     mWidths.clear();
324     mFlags.clear();
325     size_t nCand = mCandidates.size();
326     size_t prev;
327     for (size_t i = nCand - 1; i > 0; i = prev) {
328         prev = mCandidates[i].prev;
329         mBreaks.push_back(mCandidates[i].offset);
330         mWidths.push_back(mCandidates[i].postBreak - mCandidates[prev].preBreak);
331         mFlags.push_back(mCandidates[i].hyphenEdit);
332     }
333     std::reverse(mBreaks.begin(), mBreaks.end());
334     std::reverse(mWidths.begin(), mWidths.end());
335     std::reverse(mFlags.begin(), mFlags.end());
336 }
337 
computeBreaksOptimal(bool isRectangle)338 void LineBreaker::computeBreaksOptimal(bool isRectangle) {
339     size_t active = 0;
340     size_t nCand = mCandidates.size();
341     float width = mLineWidths.getLineWidth(0);
342     for (size_t i = 1; i < nCand; i++) {
343         bool atEnd = i == nCand - 1;
344         float best = SCORE_INFTY;
345         size_t bestPrev = 0;
346         size_t lineNumberLast = 0;
347 
348         if (!isRectangle) {
349             size_t lineNumberLast = mCandidates[active].lineNumber;
350             width = mLineWidths.getLineWidth(lineNumberLast);
351         }
352         ParaWidth leftEdge = mCandidates[i].postBreak - width;
353         float bestHope = 0;
354 
355         for (size_t j = active; j < i; j++) {
356             if (!isRectangle) {
357                 size_t lineNumber = mCandidates[j].lineNumber;
358                 if (lineNumber != lineNumberLast) {
359                     float widthNew = mLineWidths.getLineWidth(lineNumber);
360                     if (widthNew != width) {
361                         leftEdge = mCandidates[i].postBreak - width;
362                         bestHope = 0;
363                         width = widthNew;
364                     }
365                     lineNumberLast = lineNumber;
366                 }
367             }
368             float jScore = mCandidates[j].score;
369             if (jScore + bestHope >= best) continue;
370             float delta = mCandidates[j].preBreak - leftEdge;
371 
372             // compute width score for line
373 
374             // Note: the "bestHope" optimization makes the assumption that, when delta is
375             // non-negative, widthScore will increase monotonically as successive candidate
376             // breaks are considered.
377             float widthScore = 0.0f;
378             float additionalPenalty = 0.0f;
379             if (delta < 0) {
380                 widthScore = SCORE_OVERFULL;
381             } else if (atEnd && mStrategy != kBreakStrategy_Balanced) {
382                 // increase penalty for hyphen on last line
383                 additionalPenalty = LAST_LINE_PENALTY_MULTIPLIER * mCandidates[j].penalty;
384             } else {
385                 widthScore = delta * delta;
386             }
387 
388             if (delta < 0) {
389                 active = j + 1;
390             } else {
391                 bestHope = widthScore;
392             }
393 
394             float score = jScore + widthScore + additionalPenalty;
395             if (score <= best) {
396                 best = score;
397                 bestPrev = j;
398             }
399         }
400         mCandidates[i].score = best + mCandidates[i].penalty + mLinePenalty;
401         mCandidates[i].prev = bestPrev;
402         mCandidates[i].lineNumber = mCandidates[bestPrev].lineNumber + 1;
403 #if VERBOSE_DEBUG
404         ALOGD("break %zd: score=%g, prev=%zd", i, mCandidates[i].score, mCandidates[i].prev);
405 #endif
406     }
407     finishBreaksOptimal();
408 }
409 
computeBreaks()410 size_t LineBreaker::computeBreaks() {
411     if (mStrategy == kBreakStrategy_Greedy) {
412         computeBreaksGreedy();
413     } else {
414         computeBreaksOptimal(mLineWidths.isConstant());
415     }
416     return mBreaks.size();
417 }
418 
finish()419 void LineBreaker::finish() {
420     mWordBreaker.finish();
421     mWidth = 0;
422     mLineWidths.clear();
423     mCandidates.clear();
424     mBreaks.clear();
425     mWidths.clear();
426     mFlags.clear();
427     if (mTextBuf.size() > MAX_TEXT_BUF_RETAIN) {
428         mTextBuf.clear();
429         mTextBuf.shrink_to_fit();
430         mCharWidths.clear();
431         mCharWidths.shrink_to_fit();
432         mHyphBuf.clear();
433         mHyphBuf.shrink_to_fit();
434         mCandidates.shrink_to_fit();
435         mBreaks.shrink_to_fit();
436         mWidths.shrink_to_fit();
437         mFlags.shrink_to_fit();
438     }
439     mStrategy = kBreakStrategy_Greedy;
440     mHyphenationFrequency = kHyphenationFrequency_Normal;
441     mLinePenalty = 0.0f;
442 }
443 
444 }  // namespace android
445