1 /*
2  * Copyright (C) 2013, The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef LATINIME_PROBABILITY_ENTRY_H
18 #define LATINIME_PROBABILITY_ENTRY_H
19 
20 #include <climits>
21 #include <cstdint>
22 
23 #include "defines.h"
24 #include "dictionary/property/historical_info.h"
25 #include "dictionary/property/ngram_property.h"
26 #include "dictionary/property/unigram_property.h"
27 #include "dictionary/structure/v4/ver4_dict_constants.h"
28 
29 namespace latinime {
30 
31 class ProbabilityEntry {
32  public:
ProbabilityEntry(const ProbabilityEntry & probabilityEntry)33     ProbabilityEntry(const ProbabilityEntry &probabilityEntry)
34             : mFlags(probabilityEntry.mFlags), mProbability(probabilityEntry.mProbability),
35               mHistoricalInfo(probabilityEntry.mHistoricalInfo) {}
36 
37     // Dummy entry
ProbabilityEntry()38     ProbabilityEntry()
39             : mFlags(Ver4DictConstants::FLAG_NOT_A_VALID_ENTRY), mProbability(NOT_A_PROBABILITY),
40               mHistoricalInfo() {}
41 
42     // Entry without historical information
ProbabilityEntry(const int flags,const int probability)43     ProbabilityEntry(const int flags, const int probability)
44             : mFlags(flags), mProbability(probability), mHistoricalInfo() {}
45 
46     // Entry with historical information.
ProbabilityEntry(const int flags,const HistoricalInfo * const historicalInfo)47     ProbabilityEntry(const int flags, const HistoricalInfo *const historicalInfo)
48             : mFlags(flags), mProbability(NOT_A_PROBABILITY), mHistoricalInfo(*historicalInfo) {}
49 
50     // Create from unigram property.
ProbabilityEntry(const UnigramProperty * const unigramProperty)51     ProbabilityEntry(const UnigramProperty *const unigramProperty)
52             : mFlags(createFlags(unigramProperty->representsBeginningOfSentence(),
53                     unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(),
54                     unigramProperty->isPossiblyOffensive())),
55               mProbability(unigramProperty->getProbability()),
56               mHistoricalInfo(unigramProperty->getHistoricalInfo()) {}
57 
58     // Create from ngram property.
59     // TODO: Set flags.
ProbabilityEntry(const NgramProperty * const ngramProperty)60     ProbabilityEntry(const NgramProperty *const ngramProperty)
61             : mFlags(0), mProbability(ngramProperty->getProbability()),
62               mHistoricalInfo(ngramProperty->getHistoricalInfo()) {}
63 
isValid()64     bool isValid() const {
65         return (mFlags & Ver4DictConstants::FLAG_NOT_A_VALID_ENTRY) == 0;
66     }
67 
hasHistoricalInfo()68     bool hasHistoricalInfo() const {
69         return mHistoricalInfo.isValid();
70     }
71 
getFlags()72     uint8_t getFlags() const {
73         return mFlags;
74     }
75 
getProbability()76     int getProbability() const {
77         return mProbability;
78     }
79 
getHistoricalInfo()80     const HistoricalInfo *getHistoricalInfo() const {
81         return &mHistoricalInfo;
82     }
83 
representsBeginningOfSentence()84     bool representsBeginningOfSentence() const {
85         return (mFlags & Ver4DictConstants::FLAG_REPRESENTS_BEGINNING_OF_SENTENCE) != 0;
86     }
87 
isNotAWord()88     bool isNotAWord() const {
89         return (mFlags & Ver4DictConstants::FLAG_NOT_A_WORD) != 0;
90     }
91 
isBlacklisted()92     bool isBlacklisted() const {
93         return (mFlags & Ver4DictConstants::FLAG_BLACKLISTED) != 0;
94     }
95 
isPossiblyOffensive()96     bool isPossiblyOffensive() const {
97         return (mFlags & Ver4DictConstants::FLAG_POSSIBLY_OFFENSIVE) != 0;
98     }
99 
encode(const bool hasHistoricalInfo)100     uint64_t encode(const bool hasHistoricalInfo) const {
101         uint64_t encodedEntry = static_cast<uint8_t>(mFlags);
102         if (hasHistoricalInfo) {
103             encodedEntry = (encodedEntry << (Ver4DictConstants::TIME_STAMP_FIELD_SIZE * CHAR_BIT))
104                     | static_cast<uint32_t>(mHistoricalInfo.getTimestamp());
105             encodedEntry = (encodedEntry << (Ver4DictConstants::WORD_LEVEL_FIELD_SIZE * CHAR_BIT))
106                     | static_cast<uint8_t>(mHistoricalInfo.getLevel());
107             encodedEntry = (encodedEntry << (Ver4DictConstants::WORD_COUNT_FIELD_SIZE * CHAR_BIT))
108                     | static_cast<uint16_t>(mHistoricalInfo.getCount());
109         } else {
110             encodedEntry = (encodedEntry << (Ver4DictConstants::PROBABILITY_SIZE * CHAR_BIT))
111                     | static_cast<uint8_t>(mProbability);
112         }
113         return encodedEntry;
114     }
115 
decode(const uint64_t encodedEntry,const bool hasHistoricalInfo)116     static ProbabilityEntry decode(const uint64_t encodedEntry, const bool hasHistoricalInfo) {
117         if (hasHistoricalInfo) {
118             const int flags = readFromEncodedEntry(encodedEntry,
119                     Ver4DictConstants::FLAGS_IN_LANGUAGE_MODEL_SIZE,
120                     Ver4DictConstants::TIME_STAMP_FIELD_SIZE
121                             + Ver4DictConstants::WORD_LEVEL_FIELD_SIZE
122                             + Ver4DictConstants::WORD_COUNT_FIELD_SIZE);
123             const int timestamp = readFromEncodedEntry(encodedEntry,
124                     Ver4DictConstants::TIME_STAMP_FIELD_SIZE,
125                     Ver4DictConstants::WORD_LEVEL_FIELD_SIZE
126                             + Ver4DictConstants::WORD_COUNT_FIELD_SIZE);
127             const int level = readFromEncodedEntry(encodedEntry,
128                     Ver4DictConstants::WORD_LEVEL_FIELD_SIZE,
129                     Ver4DictConstants::WORD_COUNT_FIELD_SIZE);
130             const int count = readFromEncodedEntry(encodedEntry,
131                     Ver4DictConstants::WORD_COUNT_FIELD_SIZE, 0 /* pos */);
132             const HistoricalInfo historicalInfo(timestamp, level, count);
133             return ProbabilityEntry(flags, &historicalInfo);
134         } else {
135             const int flags = readFromEncodedEntry(encodedEntry,
136                     Ver4DictConstants::FLAGS_IN_LANGUAGE_MODEL_SIZE,
137                     Ver4DictConstants::PROBABILITY_SIZE);
138             const int probability = readFromEncodedEntry(encodedEntry,
139                     Ver4DictConstants::PROBABILITY_SIZE, 0 /* pos */);
140             return ProbabilityEntry(flags, probability);
141         }
142     }
143 
144  private:
145     // Copy constructor is public to use this class as a type of return value.
146     DISALLOW_ASSIGNMENT_OPERATOR(ProbabilityEntry);
147 
148     const uint8_t mFlags;
149     const int mProbability;
150     const HistoricalInfo mHistoricalInfo;
151 
readFromEncodedEntry(const uint64_t encodedEntry,const int size,const int pos)152     static int readFromEncodedEntry(const uint64_t encodedEntry, const int size, const int pos) {
153         return static_cast<int>(
154                 (encodedEntry >> (pos * CHAR_BIT)) & ((1ull << (size * CHAR_BIT)) - 1));
155     }
156 
createFlags(const bool representsBeginningOfSentence,const bool isNotAWord,const bool isBlacklisted,const bool isPossiblyOffensive)157     static uint8_t createFlags(const bool representsBeginningOfSentence,
158             const bool isNotAWord, const bool isBlacklisted, const bool isPossiblyOffensive) {
159         uint8_t flags = 0;
160         if (representsBeginningOfSentence) {
161             flags |= Ver4DictConstants::FLAG_REPRESENTS_BEGINNING_OF_SENTENCE;
162         }
163         if (isNotAWord) {
164             flags |= Ver4DictConstants::FLAG_NOT_A_WORD;
165         }
166         if (isBlacklisted) {
167             flags |= Ver4DictConstants::FLAG_BLACKLISTED;
168         }
169         if (isPossiblyOffensive) {
170             flags |= Ver4DictConstants::FLAG_POSSIBLY_OFFENSIVE;
171         }
172         return flags;
173     }
174 };
175 } // namespace latinime
176 #endif /* LATINIME_PROBABILITY_ENTRY_H */
177