1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "utils/resources.h"
18 #include "utils/base/logging.h"
19 #include "utils/zlib/buffer_generated.h"
20 #include "utils/zlib/zlib.h"
21 
22 namespace libtextclassifier3 {
23 namespace {
isWildcardMatch(const flatbuffers::String * left,const std::string & right)24 bool isWildcardMatch(const flatbuffers::String* left,
25                      const std::string& right) {
26   return (left == nullptr || right.empty());
27 }
28 
isExactMatch(const flatbuffers::String * left,const std::string & right)29 bool isExactMatch(const flatbuffers::String* left, const std::string& right) {
30   if (left == nullptr) {
31     return right.empty();
32   }
33   return left->str() == right;
34 }
35 
36 }  // namespace
37 
LocaleMatch(const Locale & locale,const LanguageTag * entry_locale) const38 int Resources::LocaleMatch(const Locale& locale,
39                            const LanguageTag* entry_locale) const {
40   int match = LOCALE_NO_MATCH;
41   if (isExactMatch(entry_locale->language(), locale.Language())) {
42     match |= LOCALE_LANGUAGE_MATCH;
43   } else if (isWildcardMatch(entry_locale->language(), locale.Language())) {
44     match |= LOCALE_LANGUAGE_WILDCARD_MATCH;
45   }
46 
47   if (isExactMatch(entry_locale->script(), locale.Script())) {
48     match |= LOCALE_SCRIPT_MATCH;
49   } else if (isWildcardMatch(entry_locale->script(), locale.Script())) {
50     match |= LOCALE_SCRIPT_WILDCARD_MATCH;
51   }
52 
53   if (isExactMatch(entry_locale->region(), locale.Region())) {
54     match |= LOCALE_REGION_MATCH;
55   } else if (isWildcardMatch(entry_locale->region(), locale.Region())) {
56     match |= LOCALE_REGION_WILDCARD_MATCH;
57   }
58 
59   return match;
60 }
61 
FindResource(const StringPiece resource_name) const62 const ResourceEntry* Resources::FindResource(
63     const StringPiece resource_name) const {
64   if (resources_ == nullptr || resources_->resource_entry() == nullptr) {
65     TC3_LOG(ERROR) << "No resources defined.";
66     return nullptr;
67   }
68   const ResourceEntry* entry =
69       resources_->resource_entry()->LookupByKey(resource_name.data());
70   if (entry == nullptr) {
71     TC3_LOG(ERROR) << "Resource " << resource_name.ToString() << " not found";
72     return nullptr;
73   }
74   return entry;
75 }
76 
BestResourceForLocales(const ResourceEntry * resource,const std::vector<Locale> & locales) const77 int Resources::BestResourceForLocales(
78     const ResourceEntry* resource, const std::vector<Locale>& locales) const {
79   // Find best match based on locale.
80   int resource_id = -1;
81   int locale_match = LOCALE_NO_MATCH;
82   const auto* resources = resource->resource();
83   for (int user_locale = 0; user_locale < locales.size(); user_locale++) {
84     if (!locales[user_locale].IsValid()) {
85       continue;
86     }
87     for (int i = 0; i < resources->size(); i++) {
88       for (const int locale_id : *resources->Get(i)->locale()) {
89         const int candidate_match = LocaleMatch(
90             locales[user_locale], resources_->locale()->Get(locale_id));
91 
92         // Only consider if at least the language matches.
93         if ((candidate_match & LOCALE_LANGUAGE_MATCH) == 0 &&
94             (candidate_match & LOCALE_LANGUAGE_WILDCARD_MATCH) == 0) {
95           continue;
96         }
97 
98         if (candidate_match > locale_match) {
99           locale_match = candidate_match;
100           resource_id = i;
101         }
102       }
103     }
104 
105     // If the language matches exactly, we are already finished.
106     // We found an exact language match.
107     if (locale_match & LOCALE_LANGUAGE_MATCH) {
108       return resource_id;
109     }
110   }
111   return resource_id;
112 }
113 
GetResourceContent(const std::vector<Locale> & locales,const StringPiece resource_name,std::string * result) const114 bool Resources::GetResourceContent(const std::vector<Locale>& locales,
115                                    const StringPiece resource_name,
116                                    std::string* result) const {
117   const ResourceEntry* entry = FindResource(resource_name);
118   if (entry == nullptr || entry->resource() == nullptr) {
119     return false;
120   }
121 
122   int resource_id = BestResourceForLocales(entry, locales);
123   if (resource_id < 0) {
124     return false;
125   }
126   const auto* resource = entry->resource()->Get(resource_id);
127   if (resource->content() != nullptr) {
128     *result = resource->content()->str();
129     return true;
130   } else if (resource->compressed_content() != nullptr) {
131     std::unique_ptr<ZlibDecompressor> decompressor = ZlibDecompressor::Instance(
132         resources_->compression_dictionary()->data(),
133         resources_->compression_dictionary()->size());
134     if (decompressor != nullptr &&
135         decompressor->MaybeDecompress(resource->compressed_content(), result)) {
136       return true;
137     }
138   }
139   return false;
140 }
141 
CompressResources(ResourcePoolT * resources,const bool build_compression_dictionary,const int dictionary_sample_every)142 bool CompressResources(ResourcePoolT* resources,
143                        const bool build_compression_dictionary,
144                        const int dictionary_sample_every) {
145   std::vector<unsigned char> dictionary;
146   if (build_compression_dictionary) {
147     {
148       // Build up a compression dictionary.
149       std::unique_ptr<ZlibCompressor> compressor = ZlibCompressor::Instance();
150       int i = 0;
151       for (auto& entry : resources->resource_entry) {
152         for (auto& resource : entry->resource) {
153           if (resource->content.empty()) {
154             continue;
155           }
156           i++;
157 
158           // Use a sample of the entries to build up a custom compression
159           // dictionary. Using all entries will generally not give a benefit
160           // for small data sizes, so we subsample here.
161           if (i % dictionary_sample_every != 0) {
162             continue;
163           }
164           CompressedBufferT compressed_content;
165           compressor->Compress(resource->content, &compressed_content);
166         }
167       }
168       compressor->GetDictionary(&dictionary);
169       resources->compression_dictionary.assign(
170           dictionary.data(), dictionary.data() + dictionary.size());
171     }
172   }
173 
174   for (auto& entry : resources->resource_entry) {
175     for (auto& resource : entry->resource) {
176       if (resource->content.empty()) {
177         continue;
178       }
179       // Try compressing the data.
180       std::unique_ptr<ZlibCompressor> compressor =
181           build_compression_dictionary
182               ? ZlibCompressor::Instance(dictionary.data(), dictionary.size())
183               : ZlibCompressor::Instance();
184       if (!compressor) {
185         TC3_LOG(ERROR) << "Cannot create zlib compressor.";
186         return false;
187       }
188 
189       CompressedBufferT compressed_content;
190       compressor->Compress(resource->content, &compressed_content);
191 
192       // Only keep compressed version if smaller.
193       if (compressed_content.uncompressed_size >
194           compressed_content.buffer.size()) {
195         resource->content.clear();
196         resource->compressed_content.reset(new CompressedBufferT);
197         *resource->compressed_content = compressed_content;
198       }
199     }
200   }
201   return true;
202 }
203 
CompressSerializedResources(const std::string & resources,const int dictionary_sample_every)204 std::string CompressSerializedResources(const std::string& resources,
205                                         const int dictionary_sample_every) {
206   std::unique_ptr<ResourcePoolT> unpacked_resources(
207       flatbuffers::GetRoot<ResourcePool>(resources.data())->UnPack());
208   TC3_CHECK(unpacked_resources != nullptr);
209   TC3_CHECK(
210       CompressResources(unpacked_resources.get(), dictionary_sample_every));
211   flatbuffers::FlatBufferBuilder builder;
212   builder.Finish(ResourcePool::Pack(builder, unpacked_resources.get()));
213   return std::string(reinterpret_cast<const char*>(builder.GetBufferPointer()),
214                      builder.GetSize());
215 }
216 
217 }  // namespace libtextclassifier3
218