/* * Copyright (C) 2018 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef LIBTEXTCLASSIFIER_UTILS_UTF8_UNILIB_COMMON_H_ #define LIBTEXTCLASSIFIER_UTILS_UTF8_UNILIB_COMMON_H_ #include "utils/base/integral_types.h" #include "utils/utf8/unicodetext.h" namespace libtextclassifier3 { bool IsOpeningBracket(char32 codepoint); bool IsClosingBracket(char32 codepoint); bool IsWhitespace(char32 codepoint); bool IsBidirectional(char32 codepoint); bool IsDigit(char32 codepoint); bool IsLower(char32 codepoint); bool IsUpper(char32 codepoint); bool IsPunctuation(char32 codepoint); bool IsPercentage(char32 codepoint); bool IsSlash(char32 codepoint); bool IsMinus(char32 codepoint); bool IsNumberSign(char32 codepoint); bool IsDot(char32 codepoint); bool IsApostrophe(char32 codepoint); bool IsQuotation(char32 codepoint); bool IsAmpersand(char32 codepoint); bool IsLatinLetter(char32 codepoint); bool IsArabicLetter(char32 codepoint); bool IsCyrillicLetter(char32 codepoint); bool IsChineseLetter(char32 codepoint); bool IsJapaneseLetter(char32 codepoint); bool IsKoreanLetter(char32 codepoint); bool IsThaiLetter(char32 codepoint); bool IsLetter(char32 codepoint); bool IsCJTletter(char32 codepoint); char32 ToLower(char32 codepoint); char32 ToUpper(char32 codepoint); char32 GetPairedBracket(char32 codepoint); // Checks if the text format is not likely to be a number. Used to avoid most of // the java exceptions thrown when fail to parse. template bool PassesIntPreChesks(const UnicodeText& text, const T result) { if (text.empty() || (std::is_same::value && text.size_codepoints() > 10) || (std::is_same::value && text.size_codepoints() > 19)) { return false; } for (auto it = text.begin(); it != text.end(); ++it) { if (!IsDigit(*it)) { return false; } } return true; } } // namespace libtextclassifier3 #endif // LIBTEXTCLASSIFIER_UTILS_UTF8_UNILIB_COMMON_H_