1 /*
2  * Copyright (C) 2014, The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "suggest/core/dictionary/dictionary_utils.h"
18 
19 #include "dictionary/interface/dictionary_structure_with_buffer_policy.h"
20 #include "dictionary/property/ngram_context.h"
21 #include "suggest/core/dicnode/dic_node.h"
22 #include "suggest/core/dicnode/dic_node_priority_queue.h"
23 #include "suggest/core/dicnode/dic_node_vector.h"
24 #include "suggest/core/dictionary/dictionary.h"
25 #include "suggest/core/dictionary/digraph_utils.h"
26 #include "utils/int_array_view.h"
27 
28 namespace latinime {
29 
getMaxProbabilityOfExactMatches(const DictionaryStructureWithBufferPolicy * const dictionaryStructurePolicy,const CodePointArrayView codePoints)30 /* static */ int DictionaryUtils::getMaxProbabilityOfExactMatches(
31         const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
32         const CodePointArrayView codePoints) {
33     std::vector<DicNode> current;
34     std::vector<DicNode> next;
35 
36     // No ngram context.
37     NgramContext emptyNgramContext;
38     WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordIdArray;
39     const WordIdArrayView prevWordIds = emptyNgramContext.getPrevWordIds(
40             dictionaryStructurePolicy, &prevWordIdArray, false /* tryLowerCaseSearch */);
41     current.emplace_back();
42     DicNodeUtils::initAsRoot(dictionaryStructurePolicy, prevWordIds, &current.front());
43     for (const int codePoint : codePoints) {
44         // The base-lower input is used to ignore case errors and accent errors.
45         const int baseLowerCodePoint = CharUtils::toBaseLowerCase(codePoint);
46         for (const DicNode &dicNode : current) {
47             if (dicNode.isInDigraph() && dicNode.getNodeCodePoint() == baseLowerCodePoint) {
48                 next.emplace_back(dicNode);
49                 next.back().advanceDigraphIndex();
50                 continue;
51             }
52             processChildDicNodes(dictionaryStructurePolicy, baseLowerCodePoint, &dicNode, &next);
53         }
54         current.clear();
55         current.swap(next);
56     }
57 
58     int maxProbability = NOT_A_PROBABILITY;
59     for (const DicNode &dicNode : current) {
60         if (!dicNode.isTerminalDicNode()) {
61             continue;
62         }
63         const WordAttributes wordAttributes =
64                 dictionaryStructurePolicy->getWordAttributesInContext(dicNode.getPrevWordIds(),
65                         dicNode.getWordId(), nullptr /* multiBigramMap */);
66         // dicNode can contain case errors, accent errors, intentional omissions or digraphs.
67         maxProbability = std::max(maxProbability, wordAttributes.getProbability());
68     }
69     return maxProbability;
70 }
71 
processChildDicNodes(const DictionaryStructureWithBufferPolicy * const dictionaryStructurePolicy,const int inputCodePoint,const DicNode * const parentDicNode,std::vector<DicNode> * const outDicNodes)72 /* static */ void DictionaryUtils::processChildDicNodes(
73         const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
74         const int inputCodePoint, const DicNode *const parentDicNode,
75         std::vector<DicNode> *const outDicNodes) {
76     DicNodeVector childDicNodes;
77     DicNodeUtils::getAllChildDicNodes(parentDicNode, dictionaryStructurePolicy, &childDicNodes);
78     for (int childIndex = 0; childIndex < childDicNodes.getSizeAndLock(); ++childIndex) {
79         DicNode *const childDicNode = childDicNodes[childIndex];
80         const int codePoint = CharUtils::toBaseLowerCase(childDicNode->getNodeCodePoint());
81         if (inputCodePoint == codePoint) {
82             outDicNodes->emplace_back(*childDicNode);
83         }
84         if (childDicNode->canBeIntentionalOmission()) {
85             processChildDicNodes(dictionaryStructurePolicy, inputCodePoint, childDicNode,
86                     outDicNodes);
87         }
88         if (DigraphUtils::hasDigraphForCodePoint(
89                 dictionaryStructurePolicy->getHeaderStructurePolicy(),
90                 childDicNode->getNodeCodePoint())) {
91             childDicNode->advanceDigraphIndex();
92             if (childDicNode->getNodeCodePoint() == codePoint) {
93                 childDicNode->advanceDigraphIndex();
94                 outDicNodes->emplace_back(*childDicNode);
95             }
96         }
97     }
98 }
99 
100 } // namespace latinime
101