1 /* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 /** 18 * A wrapper around ICU's line break iterator, that gives customized line 19 * break opportunities, as well as identifying words for the purpose of 20 * hyphenation. 21 */ 22 23 #ifndef MINIKIN_WORD_BREAKER_H 24 #define MINIKIN_WORD_BREAKER_H 25 26 #include <list> 27 #include <mutex> 28 29 #include <unicode/brkiter.h> 30 31 #include "minikin/Macros.h" 32 #include "minikin/Range.h" 33 34 #include "Locale.h" 35 36 namespace minikin { 37 38 // A class interface for providing pooling implementation of ICU's line breaker. 39 // The implementation can be customized for testing purposes. 40 class ICULineBreakerPool { 41 public: 42 struct Slot { SlotSlot43 Slot() : localeId(0), breaker(nullptr) {} SlotSlot44 Slot(uint64_t localeId, std::unique_ptr<icu::BreakIterator>&& breaker) 45 : localeId(localeId), breaker(std::move(breaker)) {} 46 47 Slot(Slot&& other) = default; 48 Slot& operator=(Slot&& other) = default; 49 50 // Forbid copy and assignment. 51 Slot(const Slot&) = delete; 52 Slot& operator=(const Slot&) = delete; 53 54 uint64_t localeId; 55 std::unique_ptr<icu::BreakIterator> breaker; 56 }; ~ICULineBreakerPool()57 virtual ~ICULineBreakerPool() {} 58 virtual Slot acquire(const Locale& locale) = 0; 59 virtual void release(Slot&& slot) = 0; 60 }; 61 62 // An singleton implementation of the ICU line breaker pool. 63 // Since creating ICU line breaker instance takes some time. Pool it for later use. 64 class ICULineBreakerPoolImpl : public ICULineBreakerPool { 65 public: 66 Slot acquire(const Locale& locale) override; 67 void release(Slot&& slot) override; 68 getInstance()69 static ICULineBreakerPoolImpl& getInstance() { 70 static ICULineBreakerPoolImpl pool; 71 return pool; 72 } 73 74 protected: 75 // protected for testing purposes. 76 static constexpr size_t MAX_POOL_SIZE = 4; ICULineBreakerPoolImpl()77 ICULineBreakerPoolImpl(){}; // singleton. getPoolSize()78 size_t getPoolSize() const { 79 std::lock_guard<std::mutex> lock(mMutex); 80 return mPool.size(); 81 } 82 83 private: 84 std::list<Slot> mPool GUARDED_BY(mMutex); 85 mutable std::mutex mMutex; 86 }; 87 88 class WordBreaker { 89 public: ~WordBreaker()90 virtual ~WordBreaker() { finish(); } 91 92 WordBreaker(); 93 94 void setText(const uint16_t* data, size_t size); 95 96 // Advance iterator to next word break with current locale. Return offset, or -1 if EOT 97 ssize_t next(); 98 99 // Advance iterator to the break just after "from" with using the new provided locale. 100 // Return offset, or -1 if EOT 101 ssize_t followingWithLocale(const Locale& locale, size_t from); 102 103 // Current offset of iterator, equal to 0 at BOT or last return from next() 104 ssize_t current() const; 105 106 // After calling next(), wordStart() and wordEnd() are offsets defining the previous 107 // word. If wordEnd <= wordStart, it's not a word for the purpose of hyphenation. 108 ssize_t wordStart() const; 109 110 ssize_t wordEnd() const; 111 112 // Returns the range from wordStart() to wordEnd(). 113 // If wordEnd() <= wordStart(), returns empty range. wordRange()114 inline Range wordRange() const { 115 const uint32_t start = wordStart(); 116 const uint32_t end = wordEnd(); 117 return start < end ? Range(start, end) : Range(end, end); 118 } 119 120 int breakBadness() const; 121 122 void finish(); 123 124 protected: 125 // protected virtual for testing purpose. 126 // Caller must release the pool. 127 WordBreaker(ICULineBreakerPool* pool); 128 129 private: 130 int32_t iteratorNext(); 131 void detectEmailOrUrl(); 132 ssize_t findNextBreakInEmailOrUrl(); 133 134 // Doesn't take ownership. Must not be nullptr. Must be set in constructor. 135 ICULineBreakerPool* mPool; 136 137 ICULineBreakerPool::Slot mIcuBreaker; 138 139 UText mUText = UTEXT_INITIALIZER; 140 const uint16_t* mText = nullptr; 141 size_t mTextSize; 142 ssize_t mLast; 143 ssize_t mCurrent; 144 145 // state for the email address / url detector 146 ssize_t mScanOffset; 147 bool mInEmailOrUrl; 148 }; 149 150 } // namespace minikin 151 152 #endif // MINIKIN_WORD_BREAKER_H 153