1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef LIBTEXTCLASSIFIER_ANNOTATOR_VOCAB_VOCAB_LEVEL_TABLE_H_
18 #define LIBTEXTCLASSIFIER_ANNOTATOR_VOCAB_VOCAB_LEVEL_TABLE_H_
19 
20 #include "annotator/model_generated.h"
21 #include "annotator/types.h"
22 #include "utils/container/bit-vector.h"
23 #include "marisa/trie.h"
24 
25 namespace libtextclassifier3 {
26 
27 struct LookupResult {
28   // Whether to trigger define for users of beginner proficiency.
29   bool beginner_level;
30   // Whether if we should avoid triggering define if the leading character is in
31   // upper case.
32   bool do_not_trigger_in_upper_case;
33 };
34 
35 // A table of vocabs and their levels which is backed by a marisa trie.
36 // See http://www.s-yata.jp/marisa-trie/docs/readme.en.html.
37 class VocabLevelTable {
38  public:
39   static std::unique_ptr<VocabLevelTable> Create(const VocabModel* model);
40 
41   Optional<LookupResult> Lookup(const std::string& vocab) const;
42 
43  private:
44   explicit VocabLevelTable(const VocabModel* model,
45                            std::unique_ptr<marisa::Trie> vocab_trie,
46                            const BitVector beginner_level,
47                            const BitVector do_not_trigger_in_upper_case);
48   static const VocabModel* LoadAndVerifyModel();
49 
50   const VocabModel* model_;
51   const std::unique_ptr<marisa::Trie> vocab_trie_;
52   const BitVector beginner_level_;
53   const BitVector do_not_trigger_in_upper_case_;
54 };
55 
56 }  // namespace libtextclassifier3
57 
58 #endif  // LIBTEXTCLASSIFIER_ANNOTATOR_VOCAB_VOCAB_LEVEL_TABLE_H_
59