1 /*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef MINIKIN_LINE_BREAKER_UTIL_H
18 #define MINIKIN_LINE_BREAKER_UTIL_H
19
20 #include <vector>
21
22 #include "minikin/Hyphenator.h"
23 #include "minikin/MeasuredText.h"
24 #include "minikin/U16StringPiece.h"
25
26 #include "HyphenatorMap.h"
27 #include "LayoutUtils.h"
28 #include "Locale.h"
29 #include "LocaleListCache.h"
30 #include "MinikinInternal.h"
31 #include "WordBreaker.h"
32
33 namespace minikin {
34
35 // ParaWidth is used to hold cumulative width from beginning of paragraph. Note that for very large
36 // paragraphs, accuracy could degrade using only 32-bit float. Note however that float is used
37 // extensively on the Java side for this. This is a typedef so that we can easily change it based
38 // on performance/accuracy tradeoff.
39 typedef double ParaWidth;
40
41 // Hyphenates a string potentially containing non-breaking spaces.
42 std::vector<HyphenationType> hyphenate(const U16StringPiece& string, const Hyphenator& hypenator);
43
44 // This function determines whether a character is a space that disappears at end of line.
45 // It is the Unicode set: [[:General_Category=Space_Separator:]-[:Line_Break=Glue:]], plus '\n'.
46 // Note: all such characters are in the BMP, so it's ok to use code units for this.
isLineEndSpace(uint16_t c)47 inline bool isLineEndSpace(uint16_t c) {
48 return c == '\n' || c == ' ' // SPACE
49 || c == 0x1680 // OGHAM SPACE MARK
50 || (0x2000 <= c && c <= 0x200A && c != 0x2007) // EN QUAD, EM QUAD, EN SPACE, EM SPACE,
51 // THREE-PER-EM SPACE, FOUR-PER-EM SPACE,
52 // SIX-PER-EM SPACE, PUNCTUATION SPACE,
53 // THIN SPACE, HAIR SPACE
54 || c == 0x205F // MEDIUM MATHEMATICAL SPACE
55 || c == 0x3000;
56 }
57
58 // Returns true if the character needs to be excluded for the line spacing.
isLineSpaceExcludeChar(uint16_t c)59 inline bool isLineSpaceExcludeChar(uint16_t c) {
60 return c == CHAR_LINE_FEED || c == CHAR_CARRIAGE_RETURN;
61 }
62
getEffectiveLocale(uint32_t localeListId)63 inline Locale getEffectiveLocale(uint32_t localeListId) {
64 const LocaleList& localeList = LocaleListCache::getById(localeListId);
65 return localeList.empty() ? Locale() : localeList[0];
66 }
67
68 // Retrieves hyphenation break points from a word.
populateHyphenationPoints(const U16StringPiece & textBuf,const Run & run,const Hyphenator & hyphenator,const Range & contextRange,const Range & hyphenationTargetRange,std::vector<HyphenBreak> * out,LayoutPieces * pieces)69 inline void populateHyphenationPoints(
70 const U16StringPiece& textBuf, // A text buffer.
71 const Run& run, // A run of this region.
72 const Hyphenator& hyphenator, // A hyphenator to be used for hyphenation.
73 const Range& contextRange, // A context range for measuring hyphenated piece.
74 const Range& hyphenationTargetRange, // An actual range for the hyphenation target.
75 std::vector<HyphenBreak>* out, // An output to be appended.
76 LayoutPieces* pieces) { // An output of layout pieces. Maybe null.
77 if (!run.getRange().contains(contextRange) || !contextRange.contains(hyphenationTargetRange)) {
78 return;
79 }
80
81 const std::vector<HyphenationType> hyphenResult =
82 hyphenate(textBuf.substr(hyphenationTargetRange), hyphenator);
83 for (uint32_t i = hyphenationTargetRange.getStart(); i < hyphenationTargetRange.getEnd(); ++i) {
84 const HyphenationType hyph = hyphenResult[hyphenationTargetRange.toRangeOffset(i)];
85 if (hyph == HyphenationType::DONT_BREAK) {
86 continue; // Not a hyphenation point.
87 }
88
89 auto hyphenPart = contextRange.split(i);
90 U16StringPiece firstText = textBuf.substr(hyphenPart.first);
91 U16StringPiece secondText = textBuf.substr(hyphenPart.second);
92 const float first = run.measureHyphenPiece(firstText, Range(0, firstText.size()),
93 StartHyphenEdit::NO_EDIT /* start hyphen edit */,
94 editForThisLine(hyph) /* end hyphen edit */,
95 nullptr /* advances */, pieces);
96 const float second = run.measureHyphenPiece(secondText, Range(0, secondText.size()),
97 editForNextLine(hyph) /* start hyphen edit */,
98 EndHyphenEdit::NO_EDIT /* end hyphen edit */,
99 nullptr /* advances */, pieces);
100
101 out->emplace_back(i, hyph, first, second);
102 }
103 }
104
105 // Processes and retrieve informations from characters in the paragraph.
106 struct CharProcessor {
107 // The number of spaces.
108 uint32_t rawSpaceCount = 0;
109
110 // The number of spaces minus trailing spaces.
111 uint32_t effectiveSpaceCount = 0;
112
113 // The sum of character width from the paragraph start.
114 ParaWidth sumOfCharWidths = 0.0;
115
116 // The sum of character width from the paragraph start minus trailing line end spaces.
117 // This means that the line width from the paragraph start if we decided break now.
118 ParaWidth effectiveWidth = 0.0;
119
120 // The total amount of character widths at the previous word break point.
121 ParaWidth sumOfCharWidthsAtPrevWordBreak = 0.0;
122
123 // The next word break offset.
124 uint32_t nextWordBreak = 0;
125
126 // The previous word break offset.
127 uint32_t prevWordBreak = 0;
128
129 // The width of a space. May be 0 if there are no spaces.
130 // Note: if there are multiple different widths for spaces (for example, because of mixing of
131 // fonts), it's only guaranteed to pick one.
132 float spaceWidth = 0.0f;
133
134 // The current hyphenator.
135 const Hyphenator* hyphenator = nullptr;
136
137 // Retrieve the current word range.
wordRangeCharProcessor138 inline Range wordRange() const { return breaker.wordRange(); }
139
140 // Retrieve the current context range.
contextRangeCharProcessor141 inline Range contextRange() const { return Range(prevWordBreak, nextWordBreak); }
142
143 // Returns the width from the last word break point.
widthFromLastWordBreakCharProcessor144 inline ParaWidth widthFromLastWordBreak() const {
145 return effectiveWidth - sumOfCharWidthsAtPrevWordBreak;
146 }
147
148 // Returns the break penalty for the current word break point.
wordBreakPenaltyCharProcessor149 inline int wordBreakPenalty() const { return breaker.breakBadness(); }
150
CharProcessorCharProcessor151 CharProcessor(const U16StringPiece& text) { breaker.setText(text.data(), text.size()); }
152
153 // The user of CharProcessor must call updateLocaleIfNecessary with valid locale at least one
154 // time before feeding characters.
updateLocaleIfNecessaryCharProcessor155 void updateLocaleIfNecessary(const Run& run) {
156 // Update locale if necessary.
157 uint32_t newLocaleListId = run.getLocaleListId();
158 if (localeListId != newLocaleListId) {
159 Locale locale = getEffectiveLocale(newLocaleListId);
160 nextWordBreak = breaker.followingWithLocale(locale, run.getRange().getStart());
161 hyphenator = HyphenatorMap::lookup(locale);
162 localeListId = newLocaleListId;
163 }
164 }
165
166 // Process one character.
feedCharCharProcessor167 void feedChar(uint32_t idx, uint16_t c, float w) {
168 if (idx == nextWordBreak) {
169 prevWordBreak = nextWordBreak;
170 nextWordBreak = breaker.next();
171 sumOfCharWidthsAtPrevWordBreak = sumOfCharWidths;
172 }
173 if (isWordSpace(c)) {
174 rawSpaceCount += 1;
175 spaceWidth = w;
176 }
177 sumOfCharWidths += w;
178 if (isLineEndSpace(c)) {
179 // If we break a line on a line-ending space, that space goes away. So postBreak
180 // and postSpaceCount, which keep the width and number of spaces if we decide to
181 // break at this point, don't need to get adjusted.
182 } else {
183 effectiveSpaceCount = rawSpaceCount;
184 effectiveWidth = sumOfCharWidths;
185 }
186 }
187
188 private:
189 // The current locale list id.
190 uint32_t localeListId = LocaleListCache::kInvalidListId;
191
192 WordBreaker breaker;
193 };
194 } // namespace minikin
195
196 #endif // MINIKIN_LINE_BREAKER_UTIL_H
197