1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef LATINIME_NGRAM_CONTEXT_H
18 #define LATINIME_NGRAM_CONTEXT_H
19 
20 #include <array>
21 
22 #include "defines.h"
23 #include "utils/int_array_view.h"
24 
25 namespace latinime {
26 
27 class DictionaryStructureWithBufferPolicy;
28 
29 class NgramContext {
30  public:
31     // No prev word information.
32     NgramContext();
33     // Copy constructor to use this class with std::vector and use this class as a return value.
34     NgramContext(const NgramContext &ngramContext);
35     // Construct from previous words.
36     NgramContext(const int prevWordCodePoints[][MAX_WORD_LENGTH],
37             const int *const prevWordCodePointCount, const bool *const isBeginningOfSentence,
38             const size_t prevWordCount);
39     // Construct from a previous word.
40     NgramContext(const int *const prevWordCodePoints, const int prevWordCodePointCount,
41             const bool isBeginningOfSentence);
42 
getPrevWordCount()43     size_t getPrevWordCount() const {
44         return mPrevWordCount;
45     }
46     bool isValid() const;
47 
48     template<size_t N>
getPrevWordIds(const DictionaryStructureWithBufferPolicy * const dictStructurePolicy,WordIdArray<N> * const prevWordIdBuffer,const bool tryLowerCaseSearch)49     const WordIdArrayView getPrevWordIds(
50             const DictionaryStructureWithBufferPolicy *const dictStructurePolicy,
51             WordIdArray<N> *const prevWordIdBuffer, const bool tryLowerCaseSearch) const {
52         for (size_t i = 0; i < std::min(mPrevWordCount, N); ++i) {
53             prevWordIdBuffer->at(i) = getWordId(dictStructurePolicy, mPrevWordCodePoints[i],
54                     mPrevWordCodePointCount[i], mIsBeginningOfSentence[i], tryLowerCaseSearch);
55         }
56         return WordIdArrayView::fromArray(*prevWordIdBuffer).limit(mPrevWordCount);
57     }
58 
59     // n is 1-indexed.
60     const CodePointArrayView getNthPrevWordCodePoints(const size_t n) const;
61     // n is 1-indexed.
62     bool isNthPrevWordBeginningOfSentence(const size_t n) const;
63 
64  private:
65     DISALLOW_ASSIGNMENT_OPERATOR(NgramContext);
66 
67     static int getWordId(const DictionaryStructureWithBufferPolicy *const dictStructurePolicy,
68             const int *const wordCodePoints, const int wordCodePointCount,
69             const bool isBeginningOfSentence, const bool tryLowerCaseSearch);
70     void clear();
71 
72     const size_t mPrevWordCount;
73     int mPrevWordCodePoints[MAX_PREV_WORD_COUNT_FOR_N_GRAM][MAX_WORD_LENGTH];
74     int mPrevWordCodePointCount[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
75     bool mIsBeginningOfSentence[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
76 };
77 } // namespace latinime
78 #endif // LATINIME_NGRAM_CONTEXT_H
79