1 /*
2  * Copyright (C) 2013, The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef LATINIME_PATRICIA_TRIE_POLICY_H
18 #define LATINIME_PATRICIA_TRIE_POLICY_H
19 
20 #include <cstdint>
21 #include <vector>
22 
23 #include "defines.h"
24 #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
25 #include "suggest/policyimpl/dictionary/header/header_policy.h"
26 #include "suggest/policyimpl/dictionary/structure/v2/bigram/bigram_list_policy.h"
27 #include "suggest/policyimpl/dictionary/structure/v2/shortcut/shortcut_list_policy.h"
28 #include "suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h"
29 #include "suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.h"
30 #include "suggest/policyimpl/dictionary/utils/format_utils.h"
31 #include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
32 #include "utils/byte_array_view.h"
33 
34 namespace latinime {
35 
36 class DicNode;
37 class DicNodeVector;
38 
39 class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
40  public:
PatriciaTriePolicy(MmappedBuffer::MmappedBufferPtr mmappedBuffer)41     PatriciaTriePolicy(MmappedBuffer::MmappedBufferPtr mmappedBuffer)
42             : mMmappedBuffer(std::move(mmappedBuffer)),
43               mHeaderPolicy(mMmappedBuffer->getReadOnlyByteArrayView().data(),
44                       FormatUtils::VERSION_2),
45               mDictRoot(mMmappedBuffer->getReadOnlyByteArrayView().data()
46                       + mHeaderPolicy.getSize()),
47               mDictBufferSize(mMmappedBuffer->getReadOnlyByteArrayView().size()
48                       - mHeaderPolicy.getSize()),
49               mBigramListPolicy(mDictRoot, mDictBufferSize), mShortcutListPolicy(mDictRoot),
50               mPtNodeReader(mDictRoot, mDictBufferSize, &mBigramListPolicy, &mShortcutListPolicy),
51               mPtNodeArrayReader(mDictRoot, mDictBufferSize),
52               mTerminalPtNodePositionsForIteratingWords(), mIsCorrupted(false) {}
53 
getRootPosition()54     AK_FORCE_INLINE int getRootPosition() const {
55         return 0;
56     }
57 
58     void createAndGetAllChildDicNodes(const DicNode *const dicNode,
59             DicNodeVector *const childDicNodes) const;
60 
61     int getCodePointsAndProbabilityAndReturnCodePointCount(
62             const int terminalNodePos, const int maxCodePointCount, int *const outCodePoints,
63             int *const outUnigramProbability) const;
64 
65     int getTerminalPtNodePositionOfWord(const int *const inWord,
66             const int length, const bool forceLowerCaseSearch) const;
67 
68     int getProbability(const int unigramProbability, const int bigramProbability) const;
69 
70     int getProbabilityOfPtNode(const int *const prevWordsPtNodePos, const int ptNodePos) const;
71 
72     void iterateNgramEntries(const int *const prevWordsPtNodePos,
73             NgramListener *const listener) const;
74 
75     int getShortcutPositionOfPtNode(const int ptNodePos) const;
76 
getHeaderStructurePolicy()77     const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const {
78         return &mHeaderPolicy;
79     }
80 
getShortcutsStructurePolicy()81     const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const {
82         return &mShortcutListPolicy;
83     }
84 
addUnigramEntry(const int * const word,const int length,const UnigramProperty * const unigramProperty)85     bool addUnigramEntry(const int *const word, const int length,
86             const UnigramProperty *const unigramProperty) {
87         // This method should not be called for non-updatable dictionary.
88         AKLOGI("Warning: addUnigramEntry() is called for non-updatable dictionary.");
89         return false;
90     }
91 
removeUnigramEntry(const int * const word,const int length)92     bool removeUnigramEntry(const int *const word, const int length) {
93         // This method should not be called for non-updatable dictionary.
94         AKLOGI("Warning: removeUnigramEntry() is called for non-updatable dictionary.");
95         return false;
96     }
97 
addNgramEntry(const PrevWordsInfo * const prevWordsInfo,const BigramProperty * const bigramProperty)98     bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
99             const BigramProperty *const bigramProperty) {
100         // This method should not be called for non-updatable dictionary.
101         AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary.");
102         return false;
103     }
104 
removeNgramEntry(const PrevWordsInfo * const prevWordsInfo,const int * const word,const int length)105     bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, const int *const word,
106             const int length) {
107         // This method should not be called for non-updatable dictionary.
108         AKLOGI("Warning: removeNgramEntry() is called for non-updatable dictionary.");
109         return false;
110     }
111 
flush(const char * const filePath)112     bool flush(const char *const filePath) {
113         // This method should not be called for non-updatable dictionary.
114         AKLOGI("Warning: flush() is called for non-updatable dictionary.");
115         return false;
116     }
117 
flushWithGC(const char * const filePath)118     bool flushWithGC(const char *const filePath) {
119         // This method should not be called for non-updatable dictionary.
120         AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
121         return false;
122     }
123 
needsToRunGC(const bool mindsBlockByGC)124     bool needsToRunGC(const bool mindsBlockByGC) const {
125         // This method should not be called for non-updatable dictionary.
126         AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary.");
127         return false;
128     }
129 
getProperty(const char * const query,const int queryLength,char * const outResult,const int maxResultLength)130     void getProperty(const char *const query, const int queryLength, char *const outResult,
131             const int maxResultLength) {
132         // getProperty is not supported for this class.
133         if (maxResultLength > 0) {
134             outResult[0] = '\0';
135         }
136     }
137 
138     const WordProperty getWordProperty(const int *const codePoints,
139             const int codePointCount) const;
140 
141     int getNextWordAndNextToken(const int token, int *const outCodePoints,
142             int *const outCodePointCount);
143 
isCorrupted()144     bool isCorrupted() const {
145         return mIsCorrupted;
146     }
147 
148  private:
149     DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTriePolicy);
150 
151     const MmappedBuffer::MmappedBufferPtr mMmappedBuffer;
152     const HeaderPolicy mHeaderPolicy;
153     const uint8_t *const mDictRoot;
154     const int mDictBufferSize;
155     const BigramListPolicy mBigramListPolicy;
156     const ShortcutListPolicy mShortcutListPolicy;
157     const Ver2ParticiaTrieNodeReader mPtNodeReader;
158     const Ver2PtNodeArrayReader mPtNodeArrayReader;
159     std::vector<int> mTerminalPtNodePositionsForIteratingWords;
160     mutable bool mIsCorrupted;
161 
162     int getBigramsPositionOfPtNode(const int ptNodePos) const;
163     int createAndGetLeavingChildNode(const DicNode *const dicNode, const int ptNodePos,
164             DicNodeVector *const childDicNodes) const;
165 };
166 } // namespace latinime
167 #endif // LATINIME_PATRICIA_TRIE_POLICY_H
168