1 /*
2 * Copyright (C) 2014, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "suggest/core/dictionary/dictionary_utils.h"
18
19 #include "dictionary/interface/dictionary_structure_with_buffer_policy.h"
20 #include "dictionary/property/ngram_context.h"
21 #include "suggest/core/dicnode/dic_node.h"
22 #include "suggest/core/dicnode/dic_node_priority_queue.h"
23 #include "suggest/core/dicnode/dic_node_vector.h"
24 #include "suggest/core/dictionary/dictionary.h"
25 #include "suggest/core/dictionary/digraph_utils.h"
26 #include "utils/int_array_view.h"
27
28 namespace latinime {
29
getMaxProbabilityOfExactMatches(const DictionaryStructureWithBufferPolicy * const dictionaryStructurePolicy,const CodePointArrayView codePoints)30 /* static */ int DictionaryUtils::getMaxProbabilityOfExactMatches(
31 const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
32 const CodePointArrayView codePoints) {
33 std::vector<DicNode> current;
34 std::vector<DicNode> next;
35
36 // No ngram context.
37 NgramContext emptyNgramContext;
38 WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordIdArray;
39 const WordIdArrayView prevWordIds = emptyNgramContext.getPrevWordIds(
40 dictionaryStructurePolicy, &prevWordIdArray, false /* tryLowerCaseSearch */);
41 current.emplace_back();
42 DicNodeUtils::initAsRoot(dictionaryStructurePolicy, prevWordIds, ¤t.front());
43 for (const int codePoint : codePoints) {
44 // The base-lower input is used to ignore case errors and accent errors.
45 const int baseLowerCodePoint = CharUtils::toBaseLowerCase(codePoint);
46 for (const DicNode &dicNode : current) {
47 if (dicNode.isInDigraph() && dicNode.getNodeCodePoint() == baseLowerCodePoint) {
48 next.emplace_back(dicNode);
49 next.back().advanceDigraphIndex();
50 continue;
51 }
52 processChildDicNodes(dictionaryStructurePolicy, baseLowerCodePoint, &dicNode, &next);
53 }
54 current.clear();
55 current.swap(next);
56 }
57
58 int maxProbability = NOT_A_PROBABILITY;
59 for (const DicNode &dicNode : current) {
60 if (!dicNode.isTerminalDicNode()) {
61 continue;
62 }
63 const WordAttributes wordAttributes =
64 dictionaryStructurePolicy->getWordAttributesInContext(dicNode.getPrevWordIds(),
65 dicNode.getWordId(), nullptr /* multiBigramMap */);
66 // dicNode can contain case errors, accent errors, intentional omissions or digraphs.
67 maxProbability = std::max(maxProbability, wordAttributes.getProbability());
68 }
69 return maxProbability;
70 }
71
processChildDicNodes(const DictionaryStructureWithBufferPolicy * const dictionaryStructurePolicy,const int inputCodePoint,const DicNode * const parentDicNode,std::vector<DicNode> * const outDicNodes)72 /* static */ void DictionaryUtils::processChildDicNodes(
73 const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
74 const int inputCodePoint, const DicNode *const parentDicNode,
75 std::vector<DicNode> *const outDicNodes) {
76 DicNodeVector childDicNodes;
77 DicNodeUtils::getAllChildDicNodes(parentDicNode, dictionaryStructurePolicy, &childDicNodes);
78 for (int childIndex = 0; childIndex < childDicNodes.getSizeAndLock(); ++childIndex) {
79 DicNode *const childDicNode = childDicNodes[childIndex];
80 const int codePoint = CharUtils::toBaseLowerCase(childDicNode->getNodeCodePoint());
81 if (inputCodePoint == codePoint) {
82 outDicNodes->emplace_back(*childDicNode);
83 }
84 if (childDicNode->canBeIntentionalOmission()) {
85 processChildDicNodes(dictionaryStructurePolicy, inputCodePoint, childDicNode,
86 outDicNodes);
87 }
88 if (DigraphUtils::hasDigraphForCodePoint(
89 dictionaryStructurePolicy->getHeaderStructurePolicy(),
90 childDicNode->getNodeCodePoint())) {
91 childDicNode->advanceDigraphIndex();
92 if (childDicNode->getNodeCodePoint() == codePoint) {
93 childDicNode->advanceDigraphIndex();
94 outDicNodes->emplace_back(*childDicNode);
95 }
96 }
97 }
98 }
99
100 } // namespace latinime
101