1 /* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 /** 18 * A wrapper around ICU's line break iterator, that gives customized line 19 * break opportunities, as well as identifying words for the purpose of 20 * hyphenation. 21 */ 22 23 #ifndef MINIKIN_WORD_BREAKER_H 24 #define MINIKIN_WORD_BREAKER_H 25 26 #include "unicode/brkiter.h" 27 #include <memory> 28 29 namespace minikin { 30 31 class WordBreaker { 32 public: ~WordBreaker()33 ~WordBreaker() { 34 finish(); 35 } 36 37 void setLocale(const icu::Locale& locale); 38 39 void setText(const uint16_t* data, size_t size); 40 41 // Advance iterator to next word break. Return offset, or -1 if EOT 42 ssize_t next(); 43 44 // Current offset of iterator, equal to 0 at BOT or last return from next() 45 ssize_t current() const; 46 47 // After calling next(), wordStart() and wordEnd() are offsets defining the previous 48 // word. If wordEnd <= wordStart, it's not a word for the purpose of hyphenation. 49 ssize_t wordStart() const; 50 51 ssize_t wordEnd() const; 52 53 int breakBadness() const; 54 55 void finish(); 56 57 private: 58 int32_t iteratorNext(); 59 void detectEmailOrUrl(); 60 ssize_t findNextBreakInEmailOrUrl(); 61 62 std::unique_ptr<icu::BreakIterator> mBreakIterator; 63 UText mUText = UTEXT_INITIALIZER; 64 const uint16_t* mText = nullptr; 65 size_t mTextSize; 66 ssize_t mLast; 67 ssize_t mCurrent; 68 bool mIteratorWasReset; 69 70 // state for the email address / url detector 71 ssize_t mScanOffset; 72 bool mInEmailOrUrl; 73 }; 74 75 } // namespace minikin 76 77 #endif // MINIKIN_WORD_BREAKER_H 78