1 /* 2 * Copyright (C) 2013, The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef LATINIME_PATRICIA_TRIE_POLICY_H 18 #define LATINIME_PATRICIA_TRIE_POLICY_H 19 20 #include <cstdint> 21 #include <vector> 22 23 #include "defines.h" 24 #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" 25 #include "suggest/policyimpl/dictionary/header/header_policy.h" 26 #include "suggest/policyimpl/dictionary/structure/v2/bigram/bigram_list_policy.h" 27 #include "suggest/policyimpl/dictionary/structure/v2/shortcut/shortcut_list_policy.h" 28 #include "suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h" 29 #include "suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.h" 30 #include "suggest/policyimpl/dictionary/utils/format_utils.h" 31 #include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h" 32 #include "utils/byte_array_view.h" 33 34 namespace latinime { 35 36 class DicNode; 37 class DicNodeVector; 38 39 class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { 40 public: PatriciaTriePolicy(MmappedBuffer::MmappedBufferPtr mmappedBuffer)41 PatriciaTriePolicy(MmappedBuffer::MmappedBufferPtr mmappedBuffer) 42 : mMmappedBuffer(std::move(mmappedBuffer)), 43 mHeaderPolicy(mMmappedBuffer->getReadOnlyByteArrayView().data(), 44 FormatUtils::VERSION_2), 45 mDictRoot(mMmappedBuffer->getReadOnlyByteArrayView().data() 46 + mHeaderPolicy.getSize()), 47 mDictBufferSize(mMmappedBuffer->getReadOnlyByteArrayView().size() 48 - mHeaderPolicy.getSize()), 49 mBigramListPolicy(mDictRoot, mDictBufferSize), mShortcutListPolicy(mDictRoot), 50 mPtNodeReader(mDictRoot, mDictBufferSize, &mBigramListPolicy, &mShortcutListPolicy), 51 mPtNodeArrayReader(mDictRoot, mDictBufferSize), 52 mTerminalPtNodePositionsForIteratingWords(), mIsCorrupted(false) {} 53 getRootPosition()54 AK_FORCE_INLINE int getRootPosition() const { 55 return 0; 56 } 57 58 void createAndGetAllChildDicNodes(const DicNode *const dicNode, 59 DicNodeVector *const childDicNodes) const; 60 61 int getCodePointsAndProbabilityAndReturnCodePointCount( 62 const int terminalNodePos, const int maxCodePointCount, int *const outCodePoints, 63 int *const outUnigramProbability) const; 64 65 int getTerminalPtNodePositionOfWord(const int *const inWord, 66 const int length, const bool forceLowerCaseSearch) const; 67 68 int getProbability(const int unigramProbability, const int bigramProbability) const; 69 70 int getProbabilityOfPtNode(const int *const prevWordsPtNodePos, const int ptNodePos) const; 71 72 void iterateNgramEntries(const int *const prevWordsPtNodePos, 73 NgramListener *const listener) const; 74 75 int getShortcutPositionOfPtNode(const int ptNodePos) const; 76 getHeaderStructurePolicy()77 const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const { 78 return &mHeaderPolicy; 79 } 80 getShortcutsStructurePolicy()81 const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const { 82 return &mShortcutListPolicy; 83 } 84 addUnigramEntry(const int * const word,const int length,const UnigramProperty * const unigramProperty)85 bool addUnigramEntry(const int *const word, const int length, 86 const UnigramProperty *const unigramProperty) { 87 // This method should not be called for non-updatable dictionary. 88 AKLOGI("Warning: addUnigramEntry() is called for non-updatable dictionary."); 89 return false; 90 } 91 removeUnigramEntry(const int * const word,const int length)92 bool removeUnigramEntry(const int *const word, const int length) { 93 // This method should not be called for non-updatable dictionary. 94 AKLOGI("Warning: removeUnigramEntry() is called for non-updatable dictionary."); 95 return false; 96 } 97 addNgramEntry(const PrevWordsInfo * const prevWordsInfo,const BigramProperty * const bigramProperty)98 bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo, 99 const BigramProperty *const bigramProperty) { 100 // This method should not be called for non-updatable dictionary. 101 AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary."); 102 return false; 103 } 104 removeNgramEntry(const PrevWordsInfo * const prevWordsInfo,const int * const word,const int length)105 bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, const int *const word, 106 const int length) { 107 // This method should not be called for non-updatable dictionary. 108 AKLOGI("Warning: removeNgramEntry() is called for non-updatable dictionary."); 109 return false; 110 } 111 flush(const char * const filePath)112 bool flush(const char *const filePath) { 113 // This method should not be called for non-updatable dictionary. 114 AKLOGI("Warning: flush() is called for non-updatable dictionary."); 115 return false; 116 } 117 flushWithGC(const char * const filePath)118 bool flushWithGC(const char *const filePath) { 119 // This method should not be called for non-updatable dictionary. 120 AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary."); 121 return false; 122 } 123 needsToRunGC(const bool mindsBlockByGC)124 bool needsToRunGC(const bool mindsBlockByGC) const { 125 // This method should not be called for non-updatable dictionary. 126 AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary."); 127 return false; 128 } 129 getProperty(const char * const query,const int queryLength,char * const outResult,const int maxResultLength)130 void getProperty(const char *const query, const int queryLength, char *const outResult, 131 const int maxResultLength) { 132 // getProperty is not supported for this class. 133 if (maxResultLength > 0) { 134 outResult[0] = '\0'; 135 } 136 } 137 138 const WordProperty getWordProperty(const int *const codePoints, 139 const int codePointCount) const; 140 141 int getNextWordAndNextToken(const int token, int *const outCodePoints, 142 int *const outCodePointCount); 143 isCorrupted()144 bool isCorrupted() const { 145 return mIsCorrupted; 146 } 147 148 private: 149 DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTriePolicy); 150 151 const MmappedBuffer::MmappedBufferPtr mMmappedBuffer; 152 const HeaderPolicy mHeaderPolicy; 153 const uint8_t *const mDictRoot; 154 const int mDictBufferSize; 155 const BigramListPolicy mBigramListPolicy; 156 const ShortcutListPolicy mShortcutListPolicy; 157 const Ver2ParticiaTrieNodeReader mPtNodeReader; 158 const Ver2PtNodeArrayReader mPtNodeArrayReader; 159 std::vector<int> mTerminalPtNodePositionsForIteratingWords; 160 mutable bool mIsCorrupted; 161 162 int getBigramsPositionOfPtNode(const int ptNodePos) const; 163 int createAndGetLeavingChildNode(const DicNode *const dicNode, const int ptNodePos, 164 DicNodeVector *const childDicNodes) const; 165 }; 166 } // namespace latinime 167 #endif // LATINIME_PATRICIA_TRIE_POLICY_H 168