1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "utils/i18n/locale.h"
18 
19 #include <string>
20 
21 #include "utils/strings/split.h"
22 
23 namespace libtextclassifier3 {
24 
25 namespace {
26 constexpr const char* kAnyMatch = "*";
27 
28 // BCP 47 code for "Undetermined Language".
29 constexpr const char* kUnknownLanguageCode = "und";
30 
CheckLanguage(StringPiece language)31 bool CheckLanguage(StringPiece language) {
32   if (language.size() == 1 && language.data()[0] == '*') {
33     return true;
34   }
35 
36   if (language.size() != 2 && language.size() != 3) {
37     return false;
38   }
39 
40   // Needs to be all lowercase.
41   for (int i = 0; i < language.size(); ++i) {
42     if (!std::islower(language[i])) {
43       return false;
44     }
45   }
46 
47   return true;
48 }
49 
CheckScript(StringPiece script)50 bool CheckScript(StringPiece script) {
51   if (script.size() != 4) {
52     return false;
53   }
54 
55   if (!std::isupper(script[0])) {
56     return false;
57   }
58 
59   // Needs to be all lowercase.
60   for (int i = 1; i < script.size(); ++i) {
61     if (!std::islower(script[i])) {
62       return false;
63     }
64   }
65 
66   return true;
67 }
68 
CheckRegion(StringPiece region)69 bool CheckRegion(StringPiece region) {
70   if (region.size() == 2) {
71     return std::isupper(region[0]) && std::isupper(region[1]);
72   } else if (region.size() == 3) {
73     return std::isdigit(region[0]) && std::isdigit(region[1]) &&
74            std::isdigit(region[2]);
75   } else {
76     return false;
77   }
78 }
79 
80 }  // namespace
81 
FromBCP47(const std::string & locale_tag)82 Locale Locale::FromBCP47(const std::string& locale_tag) {
83   std::vector<StringPiece> parts = strings::Split(locale_tag, '-');
84   if (parts.empty()) {
85     return Locale::Invalid();
86   }
87 
88   auto parts_it = parts.begin();
89   StringPiece language = *parts_it;
90   if (!CheckLanguage(language)) {
91     return Locale::Invalid();
92   }
93   ++parts_it;
94 
95   StringPiece script;
96   if (parts_it != parts.end()) {
97     script = *parts_it;
98     if (!CheckScript(script)) {
99       script = "";
100     } else {
101       ++parts_it;
102     }
103   }
104 
105   StringPiece region;
106   if (parts_it != parts.end()) {
107     region = *parts_it;
108     if (!CheckRegion(region)) {
109       region = "";
110     } else {
111       ++parts_it;
112     }
113   }
114 
115   // NOTE: We don't parse the rest of the BCP47 tag here even if specified.
116 
117   return Locale(language.ToString(), script.ToString(), region.ToString());
118 }
119 
FromLanguageTag(const LanguageTag * language_tag)120 Locale Locale::FromLanguageTag(const LanguageTag* language_tag) {
121   if (language_tag == nullptr || language_tag->language() == nullptr) {
122     return Locale::Invalid();
123   }
124 
125   StringPiece language = language_tag->language()->c_str();
126   if (!CheckLanguage(language)) {
127     return Locale::Invalid();
128   }
129 
130   StringPiece script;
131   if (language_tag->script() != nullptr) {
132     script = language_tag->script()->c_str();
133     if (!CheckScript(script)) {
134       script = "";
135     }
136   }
137 
138   StringPiece region;
139   if (language_tag->region() != nullptr) {
140     region = language_tag->region()->c_str();
141     if (!CheckRegion(region)) {
142       region = "";
143     }
144   }
145   return Locale(language.ToString(), script.ToString(), region.ToString());
146 }
147 
IsUnknown() const148 bool Locale::IsUnknown() const {
149   return is_valid_ && language_ == kUnknownLanguageCode;
150 }
151 
IsLocaleSupported(const Locale & locale,const std::vector<Locale> & supported_locales,bool default_value)152 bool Locale::IsLocaleSupported(const Locale& locale,
153                                const std::vector<Locale>& supported_locales,
154                                bool default_value) {
155   if (!locale.IsValid()) {
156     return false;
157   }
158   if (locale.IsUnknown()) {
159     return default_value;
160   }
161   for (const Locale& supported_locale : supported_locales) {
162     if (!supported_locale.IsValid()) {
163       continue;
164     }
165     const bool language_matches =
166         supported_locale.Language().empty() ||
167         supported_locale.Language() == kAnyMatch ||
168         supported_locale.Language() == locale.Language();
169     const bool script_matches = supported_locale.Script().empty() ||
170                                 supported_locale.Script() == kAnyMatch ||
171                                 locale.Script().empty() ||
172                                 supported_locale.Script() == locale.Script();
173     const bool region_matches = supported_locale.Region().empty() ||
174                                 supported_locale.Region() == kAnyMatch ||
175                                 locale.Region().empty() ||
176                                 supported_locale.Region() == locale.Region();
177     if (language_matches && script_matches && region_matches) {
178       return true;
179     }
180   }
181   return false;
182 }
183 
IsAnyLocaleSupported(const std::vector<Locale> & locales,const std::vector<Locale> & supported_locales,bool default_value)184 bool Locale::IsAnyLocaleSupported(const std::vector<Locale>& locales,
185                                   const std::vector<Locale>& supported_locales,
186                                   bool default_value) {
187   if (locales.empty()) {
188     return default_value;
189   }
190   if (supported_locales.empty()) {
191     return default_value;
192   }
193   for (const Locale& locale : locales) {
194     if (IsLocaleSupported(locale, supported_locales, default_value)) {
195       return true;
196     }
197   }
198   return false;
199 }
200 
operator ==(const Locale & locale) const201 bool Locale::operator==(const Locale& locale) const {
202   return language_ == locale.language_ && region_ == locale.region_ &&
203          script_ == locale.script_;
204 }
205 
operator <(const Locale & locale) const206 bool Locale::operator<(const Locale& locale) const {
207   return std::tie(language_, region_, script_) <
208          std::tie(locale.language_, locale.region_, locale.script_);
209 }
210 
operator !=(const Locale & locale) const211 bool Locale::operator!=(const Locale& locale) const {
212   return !(*this == locale);
213 }
214 
operator <<(logging::LoggingStringStream & stream,const Locale & locale)215 logging::LoggingStringStream& operator<<(logging::LoggingStringStream& stream,
216                                          const Locale& locale) {
217   return stream << "Locale(language=" << locale.Language()
218                 << ", script=" << locale.Script()
219                 << ", region=" << locale.Region()
220                 << ", is_valid=" << locale.IsValid()
221                 << ", is_unknown=" << locale.IsUnknown() << ")";
222 }
223 
ParseLocales(StringPiece locales_list,std::vector<Locale> * locales)224 bool ParseLocales(StringPiece locales_list, std::vector<Locale>* locales) {
225   for (const auto& locale_str : strings::Split(locales_list, ',')) {
226     const Locale locale = Locale::FromBCP47(locale_str.ToString());
227     if (!locale.IsValid()) {
228       TC3_LOG(ERROR) << "Invalid locale " << locale_str.ToString();
229       return false;
230     }
231     locales->push_back(locale);
232   }
233   return true;
234 }
235 
236 }  // namespace libtextclassifier3
237