1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "OptimalLineBreaker.h"
18 
19 #include <algorithm>
20 #include <limits>
21 
22 #include "minikin/Characters.h"
23 #include "minikin/Layout.h"
24 #include "minikin/Range.h"
25 #include "minikin/U16StringPiece.h"
26 
27 #include "HyphenatorMap.h"
28 #include "LayoutUtils.h"
29 #include "LineBreakerUtil.h"
30 #include "Locale.h"
31 #include "LocaleListCache.h"
32 #include "MinikinInternal.h"
33 #include "WordBreaker.h"
34 
35 namespace minikin {
36 
37 namespace {
38 
39 // Large scores in a hierarchy; we prefer desperate breaks to an overfull line. All these
40 // constants are larger than any reasonable actual width score.
41 constexpr float SCORE_INFTY = std::numeric_limits<float>::max();
42 constexpr float SCORE_OVERFULL = 1e12f;
43 constexpr float SCORE_DESPERATE = 1e10f;
44 
45 // Multiplier for hyphen penalty on last line.
46 constexpr float LAST_LINE_PENALTY_MULTIPLIER = 4.0f;
47 // Penalty assigned to each line break (to try to minimize number of lines)
48 // TODO: when we implement full justification (so spaces can shrink and stretch), this is
49 // probably not the most appropriate method.
50 constexpr float LINE_PENALTY_MULTIPLIER = 2.0f;
51 
52 // Penalty assigned to shrinking the whitepsace.
53 constexpr float SHRINK_PENALTY_MULTIPLIER = 4.0f;
54 
55 // Maximum amount that spaces can shrink, in justified text.
56 constexpr float SHRINKABILITY = 1.0 / 3.0;
57 
58 // A single candidate break
59 struct Candidate {
60     uint32_t offset;  // offset to text buffer, in code units
61 
62     ParaWidth preBreak;       // width of text until this point, if we decide to not break here:
63                               // preBreak is used as an optimized way to calculate the width
64                               // between two candidates. The line width between two line break
65                               // candidates i and j is calculated as postBreak(j) - preBreak(i).
66     ParaWidth postBreak;      // width of text until this point, if we decide to break here
67     float penalty;            // penalty of this break (for example, hyphen penalty)
68     uint32_t preSpaceCount;   // preceding space count before breaking
69     uint32_t postSpaceCount;  // preceding space count after breaking
70     HyphenationType hyphenType;
71     bool isRtl;  // The direction of the bidi run containing or ending in this candidate
72 
Candidateminikin::__anon6b174eaf0111::Candidate73     Candidate(uint32_t offset, ParaWidth preBreak, ParaWidth postBreak, float penalty,
74               uint32_t preSpaceCount, uint32_t postSpaceCount, HyphenationType hyphenType,
75               bool isRtl)
76             : offset(offset),
77               preBreak(preBreak),
78               postBreak(postBreak),
79               penalty(penalty),
80               preSpaceCount(preSpaceCount),
81               postSpaceCount(postSpaceCount),
82               hyphenType(hyphenType),
83               isRtl(isRtl) {}
84 };
85 
86 // A context of line break optimization.
87 struct OptimizeContext {
88     // The break candidates.
89     std::vector<Candidate> candidates;
90 
91     // The penalty for the number of lines.
92     float linePenalty = 0.0f;
93 
94     // The width of a space. May be 0 if there are no spaces.
95     // Note: if there are multiple different widths for spaces (for example, because of mixing of
96     // fonts), it's only guaranteed to pick one.
97     float spaceWidth = 0.0f;
98 
99     // Append desperate break point to the candidates.
pushDesperateminikin::__anon6b174eaf0111::OptimizeContext100     inline void pushDesperate(uint32_t offset, ParaWidth sumOfCharWidths, uint32_t spaceCount,
101                               bool isRtl) {
102         candidates.emplace_back(offset, sumOfCharWidths, sumOfCharWidths, SCORE_DESPERATE,
103                                 spaceCount, spaceCount,
104                                 HyphenationType::BREAK_AND_DONT_INSERT_HYPHEN, isRtl);
105     }
106 
107     // Append hyphenation break point to the candidates.
pushHyphenationminikin::__anon6b174eaf0111::OptimizeContext108     inline void pushHyphenation(uint32_t offset, ParaWidth preBreak, ParaWidth postBreak,
109                                 float penalty, uint32_t spaceCount, HyphenationType type,
110                                 bool isRtl) {
111         candidates.emplace_back(offset, preBreak, postBreak, penalty, spaceCount, spaceCount, type,
112                                 isRtl);
113     }
114 
115     // Append word break point to the candidates.
pushWordBreakminikin::__anon6b174eaf0111::OptimizeContext116     inline void pushWordBreak(uint32_t offset, ParaWidth preBreak, ParaWidth postBreak,
117                               float penalty, uint32_t preSpaceCount, uint32_t postSpaceCount,
118                               bool isRtl) {
119         candidates.emplace_back(offset, preBreak, postBreak, penalty, preSpaceCount, postSpaceCount,
120                                 HyphenationType::DONT_BREAK, isRtl);
121     }
122 
OptimizeContextminikin::__anon6b174eaf0111::OptimizeContext123     OptimizeContext() {
124         candidates.emplace_back(0, 0.0f, 0.0f, 0.0f, 0, 0, HyphenationType::DONT_BREAK, false);
125     }
126 };
127 
128 // Compute the penalty for the run and returns penalty for hyphenation and number of lines.
computePenalties(const Run & run,const LineWidth & lineWidth,HyphenationFrequency frequency,bool justified)129 std::pair<float, float> computePenalties(const Run& run, const LineWidth& lineWidth,
130                                          HyphenationFrequency frequency, bool justified) {
131     float linePenalty = 0.0;
132     const MinikinPaint* paint = run.getPaint();
133     // a heuristic that seems to perform well
134     float hyphenPenalty = 0.5 * paint->size * paint->scaleX * lineWidth.getAt(0);
135     if (frequency == HyphenationFrequency::Normal) {
136         hyphenPenalty *= 4.0;  // TODO: Replace with a better value after some testing
137     }
138 
139     if (justified) {
140         // Make hyphenation more aggressive for fully justified text (so that "normal" in
141         // justified mode is the same as "full" in ragged-right).
142         hyphenPenalty *= 0.25;
143     } else {
144         // Line penalty is zero for justified text.
145         linePenalty = hyphenPenalty * LINE_PENALTY_MULTIPLIER;
146     }
147 
148     return std::make_pair(hyphenPenalty, linePenalty);
149 }
150 
151 // Represents a desperate break point.
152 struct DesperateBreak {
153     // The break offset.
154     uint32_t offset;
155 
156     // The sum of the character width from the beginning of the word.
157     ParaWidth sumOfChars;
158 
DesperateBreakminikin::__anon6b174eaf0111::DesperateBreak159     DesperateBreak(uint32_t offset, ParaWidth sumOfChars)
160             : offset(offset), sumOfChars(sumOfChars){};
161 };
162 
163 // Retrieves desperate break points from a word.
populateDesperatePoints(const MeasuredText & measured,const Range & range)164 std::vector<DesperateBreak> populateDesperatePoints(const MeasuredText& measured,
165                                                     const Range& range) {
166     std::vector<DesperateBreak> out;
167     ParaWidth width = measured.widths[range.getStart()];
168     for (uint32_t i = range.getStart() + 1; i < range.getEnd(); ++i) {
169         const float w = measured.widths[i];
170         if (w == 0) {
171             continue;  // w == 0 means here is not a grapheme bounds. Don't break here.
172         }
173         out.emplace_back(i, width);
174         width += w;
175     }
176     return out;
177 }
178 
179 // Append hyphenation break points and desperate break points.
180 // If an offset is a both candidate for hyphenation and desperate break points, place desperate
181 // break candidate first and hyphenation break points second since the result width of the desperate
182 // break is shorter than hyphenation break.
183 // This is important since DP in computeBreaksOptimal assumes that the result line width is
184 // increased by break offset.
appendWithMerging(std::vector<HyphenBreak>::const_iterator hyIter,std::vector<HyphenBreak>::const_iterator endHyIter,const std::vector<DesperateBreak> & desperates,const CharProcessor & proc,float hyphenPenalty,bool isRtl,OptimizeContext * out)185 void appendWithMerging(std::vector<HyphenBreak>::const_iterator hyIter,
186                        std::vector<HyphenBreak>::const_iterator endHyIter,
187                        const std::vector<DesperateBreak>& desperates, const CharProcessor& proc,
188                        float hyphenPenalty, bool isRtl, OptimizeContext* out) {
189     auto d = desperates.begin();
190     while (hyIter != endHyIter || d != desperates.end()) {
191         // If both hyphen breaks and desperate breaks point to the same offset, push desperate
192         // breaks first.
193         if (d != desperates.end() && (hyIter == endHyIter || d->offset <= hyIter->offset)) {
194             out->pushDesperate(d->offset, proc.sumOfCharWidthsAtPrevWordBreak + d->sumOfChars,
195                                proc.effectiveSpaceCount, isRtl);
196             d++;
197         } else {
198             out->pushHyphenation(hyIter->offset, proc.sumOfCharWidths - hyIter->second,
199                                  proc.sumOfCharWidthsAtPrevWordBreak + hyIter->first, hyphenPenalty,
200                                  proc.effectiveSpaceCount, hyIter->type, isRtl);
201             hyIter++;
202         }
203     }
204 }
205 
206 // Enumerate all line break candidates.
populateCandidates(const U16StringPiece & textBuf,const MeasuredText & measured,const LineWidth & lineWidth,HyphenationFrequency frequency,bool isJustified)207 OptimizeContext populateCandidates(const U16StringPiece& textBuf, const MeasuredText& measured,
208                                    const LineWidth& lineWidth, HyphenationFrequency frequency,
209                                    bool isJustified) {
210     const ParaWidth minLineWidth = lineWidth.getMin();
211     CharProcessor proc(textBuf);
212 
213     OptimizeContext result;
214 
215     const bool doHyphenation = frequency != HyphenationFrequency::None;
216     auto hyIter = std::begin(measured.hyphenBreaks);
217 
218     for (const auto& run : measured.runs) {
219         const bool isRtl = run->isRtl();
220         const Range& range = run->getRange();
221 
222         // Compute penalty parameters.
223         float hyphenPenalty = 0.0f;
224         if (run->canHyphenate()) {
225             auto penalties = computePenalties(*run, lineWidth, frequency, isJustified);
226             hyphenPenalty = penalties.first;
227             result.linePenalty = std::max(penalties.second, result.linePenalty);
228         }
229 
230         proc.updateLocaleIfNecessary(*run);
231 
232         for (uint32_t i = range.getStart(); i < range.getEnd(); ++i) {
233             MINIKIN_ASSERT(textBuf[i] != CHAR_TAB, "TAB is not supported in optimal line breaker");
234             proc.feedChar(i, textBuf[i], measured.widths[i]);
235 
236             const uint32_t nextCharOffset = i + 1;
237             if (nextCharOffset != proc.nextWordBreak) {
238                 continue;  // Wait until word break point.
239             }
240 
241             // Add hyphenation and desperate break points.
242             std::vector<HyphenBreak> hyphenedBreaks;
243             std::vector<DesperateBreak> desperateBreaks;
244             const Range contextRange = proc.contextRange();
245 
246             auto beginHyIter = hyIter;
247             while (hyIter != std::end(measured.hyphenBreaks) &&
248                    hyIter->offset < contextRange.getEnd()) {
249                 hyIter++;
250             }
251             if (proc.widthFromLastWordBreak() > minLineWidth) {
252                 desperateBreaks = populateDesperatePoints(measured, contextRange);
253             }
254             appendWithMerging(beginHyIter, doHyphenation ? hyIter : beginHyIter, desperateBreaks,
255                               proc, hyphenPenalty, isRtl, &result);
256 
257             // We skip breaks for zero-width characters inside replacement spans.
258             if (nextCharOffset == range.getEnd() || measured.widths[nextCharOffset] > 0) {
259                 const float penalty = hyphenPenalty * proc.wordBreakPenalty();
260                 result.pushWordBreak(nextCharOffset, proc.sumOfCharWidths, proc.effectiveWidth,
261                                      penalty, proc.rawSpaceCount, proc.effectiveSpaceCount, isRtl);
262             }
263         }
264     }
265     result.spaceWidth = proc.spaceWidth;
266     return result;
267 }
268 
269 class LineBreakOptimizer {
270 public:
LineBreakOptimizer()271     LineBreakOptimizer() {}
272 
273     LineBreakResult computeBreaks(const OptimizeContext& context, const U16StringPiece& textBuf,
274                                   const MeasuredText& measuredText, const LineWidth& lineWidth,
275                                   BreakStrategy strategy, bool justified);
276 
277 private:
278     // Data used to compute optimal line breaks
279     struct OptimalBreaksData {
280         float score;          // best score found for this break
281         uint32_t prev;        // index to previous break
282         uint32_t lineNumber;  // the computed line number of the candidate
283     };
284     LineBreakResult finishBreaksOptimal(const U16StringPiece& textBuf, const MeasuredText& measured,
285                                         const std::vector<OptimalBreaksData>& breaksData,
286                                         const std::vector<Candidate>& candidates);
287 
288     MinikinExtent computeMaxExtent(const U16StringPiece& textBuf, const MeasuredText& measured,
289                                    uint32_t start, uint32_t end) const;
290 };
291 
292 // Find the needed extent between the start and end ranges. start is inclusive and end is exclusive.
293 // Both are indices of the source string.
computeMaxExtent(const U16StringPiece & textBuf,const MeasuredText & measured,uint32_t start,uint32_t end) const294 MinikinExtent LineBreakOptimizer::computeMaxExtent(const U16StringPiece& textBuf,
295                                                    const MeasuredText& measured, uint32_t start,
296                                                    uint32_t end) const {
297     MinikinExtent res = {0.0, 0.0, 0.0};
298     for (uint32_t j = start; j < end; j++) {
299         if (!isLineSpaceExcludeChar(textBuf[j])) {
300             res.extendBy(measured.extents[j]);
301         }
302     }
303     return res;
304 }
305 
306 // Follow "prev" links in candidates array, and copy to result arrays.
finishBreaksOptimal(const U16StringPiece & textBuf,const MeasuredText & measured,const std::vector<OptimalBreaksData> & breaksData,const std::vector<Candidate> & candidates)307 LineBreakResult LineBreakOptimizer::finishBreaksOptimal(
308         const U16StringPiece& textBuf, const MeasuredText& measured,
309         const std::vector<OptimalBreaksData>& breaksData,
310         const std::vector<Candidate>& candidates) {
311     LineBreakResult result;
312     const uint32_t nCand = candidates.size();
313     uint32_t prevIndex;
314     for (uint32_t i = nCand - 1; i > 0; i = prevIndex) {
315         prevIndex = breaksData[i].prev;
316         const Candidate& cand = candidates[i];
317         const Candidate& prev = candidates[prevIndex];
318 
319         result.breakPoints.push_back(cand.offset);
320         result.widths.push_back(cand.postBreak - prev.preBreak);
321         MinikinExtent extent = computeMaxExtent(textBuf, measured, prev.offset, cand.offset);
322         result.ascents.push_back(extent.ascent);
323         result.descents.push_back(extent.descent);
324 
325         const HyphenEdit edit =
326                 packHyphenEdit(editForNextLine(prev.hyphenType), editForThisLine(cand.hyphenType));
327         result.flags.push_back(static_cast<int>(edit));
328     }
329     result.reverse();
330     return result;
331 }
332 
computeBreaks(const OptimizeContext & context,const U16StringPiece & textBuf,const MeasuredText & measured,const LineWidth & lineWidth,BreakStrategy strategy,bool justified)333 LineBreakResult LineBreakOptimizer::computeBreaks(const OptimizeContext& context,
334                                                   const U16StringPiece& textBuf,
335                                                   const MeasuredText& measured,
336                                                   const LineWidth& lineWidth,
337                                                   BreakStrategy strategy, bool justified) {
338     const std::vector<Candidate>& candidates = context.candidates;
339     uint32_t active = 0;
340     const uint32_t nCand = candidates.size();
341     const float maxShrink = justified ? SHRINKABILITY * context.spaceWidth : 0.0f;
342 
343     std::vector<OptimalBreaksData> breaksData;
344     breaksData.reserve(nCand);
345     breaksData.push_back({0.0, 0, 0});  // The first candidate is always at the first line.
346 
347     // "i" iterates through candidates for the end of the line.
348     for (uint32_t i = 1; i < nCand; i++) {
349         const bool atEnd = i == nCand - 1;
350         float best = SCORE_INFTY;
351         uint32_t bestPrev = 0;
352 
353         uint32_t lineNumberLast = breaksData[active].lineNumber;
354         float width = lineWidth.getAt(lineNumberLast);
355 
356         ParaWidth leftEdge = candidates[i].postBreak - width;
357         float bestHope = 0;
358 
359         // "j" iterates through candidates for the beginning of the line.
360         for (uint32_t j = active; j < i; j++) {
361             const uint32_t lineNumber = breaksData[j].lineNumber;
362             if (lineNumber != lineNumberLast) {
363                 const float widthNew = lineWidth.getAt(lineNumber);
364                 if (widthNew != width) {
365                     leftEdge = candidates[i].postBreak - width;
366                     bestHope = 0;
367                     width = widthNew;
368                 }
369                 lineNumberLast = lineNumber;
370             }
371             const float jScore = breaksData[j].score;
372             if (jScore + bestHope >= best) continue;
373             const float delta = candidates[j].preBreak - leftEdge;
374 
375             // compute width score for line
376 
377             // Note: the "bestHope" optimization makes the assumption that, when delta is
378             // non-negative, widthScore will increase monotonically as successive candidate
379             // breaks are considered.
380             float widthScore = 0.0f;
381             float additionalPenalty = 0.0f;
382             if ((atEnd || !justified) && delta < 0) {
383                 widthScore = SCORE_OVERFULL;
384             } else if (atEnd && strategy != BreakStrategy::Balanced) {
385                 // increase penalty for hyphen on last line
386                 additionalPenalty = LAST_LINE_PENALTY_MULTIPLIER * candidates[j].penalty;
387             } else {
388                 widthScore = delta * delta;
389                 if (delta < 0) {
390                     if (-delta <
391                         maxShrink * (candidates[i].postSpaceCount - candidates[j].preSpaceCount)) {
392                         widthScore *= SHRINK_PENALTY_MULTIPLIER;
393                     } else {
394                         widthScore = SCORE_OVERFULL;
395                     }
396                 }
397             }
398 
399             if (delta < 0) {
400                 active = j + 1;
401             } else {
402                 bestHope = widthScore;
403             }
404 
405             const float score = jScore + widthScore + additionalPenalty;
406             if (score <= best) {
407                 best = score;
408                 bestPrev = j;
409             }
410         }
411         breaksData.push_back({best + candidates[i].penalty + context.linePenalty,  // score
412                               bestPrev,                                            // prev
413                               breaksData[bestPrev].lineNumber + 1});               // lineNumber
414     }
415     return finishBreaksOptimal(textBuf, measured, breaksData, candidates);
416 }
417 
418 }  // namespace
419 
breakLineOptimal(const U16StringPiece & textBuf,const MeasuredText & measured,const LineWidth & lineWidth,BreakStrategy strategy,HyphenationFrequency frequency,bool justified)420 LineBreakResult breakLineOptimal(const U16StringPiece& textBuf, const MeasuredText& measured,
421                                  const LineWidth& lineWidth, BreakStrategy strategy,
422                                  HyphenationFrequency frequency, bool justified) {
423     if (textBuf.size() == 0) {
424         return LineBreakResult();
425     }
426     const OptimizeContext context =
427             populateCandidates(textBuf, measured, lineWidth, frequency, justified);
428     LineBreakOptimizer optimizer;
429     return optimizer.computeBreaks(context, textBuf, measured, lineWidth, strategy, justified);
430 }
431 
432 }  // namespace minikin
433