Home
last modified time | relevance | path

Searched refs:UnicodeText (Results 1 – 25 of 72) sorted by relevance

123

/external/libtextclassifier/native/utils/utf8/
Dunicodetext.cc32 UnicodeText::Repr& UnicodeText::Repr::operator=(Repr&& src) { in operator =()
42 void UnicodeText::Repr::PointTo(const char* data, int size) { in PointTo()
50 void UnicodeText::Repr::Copy(const char* data, int size) { in Copy()
55 void UnicodeText::Repr::resize(int new_size) { in resize()
67 void UnicodeText::Repr::reserve(int new_capacity) { in reserve()
85 void UnicodeText::Repr::append(const char* bytes, int byte_length) { in append()
91 void UnicodeText::Repr::clear() { in clear()
100 UnicodeText::UnicodeText() {} in UnicodeText() function in libtextclassifier3::UnicodeText
102 UnicodeText::UnicodeText(const UnicodeText& src, bool do_copy) { in UnicodeText() function in libtextclassifier3::UnicodeText
110 UnicodeText& UnicodeText::operator=(UnicodeText&& src) { in operator =()
[all …]
Dunicodetext.h69 class UnicodeText {
73 UnicodeText(); // Create an empty text.
74 UnicodeText(const UnicodeText& src, bool do_copy = true);
75 UnicodeText& operator=(UnicodeText&& src);
76 ~UnicodeText();
140 friend class UnicodeText;
165 bool operator==(const UnicodeText& other) const;
170 UnicodeText& Copy(const UnicodeText& src);
171 UnicodeText& PointToUTF8(const char* utf8_buffer, int byte_length);
172 UnicodeText& CopyUTF8(const char* utf8_buffer, int byte_length);
[all …]
Dunicodetext_test.cc35 UnicodeText empty_text_;
36 UnicodeText text_;
40 UnicodeText text = UTF8ToUnicodeText("1234��hello", /*do_copy=*/false); in TEST()
41 EXPECT_EQ(UnicodeText(text).ToUTF8String(), "1234��hello"); in TEST()
42 EXPECT_EQ(UnicodeText(text, /*do_copy=*/false).ToUTF8String(), "1234��hello"); in TEST()
47 UnicodeText text = UTF8ToUnicodeText("1234��hello", /*do_copy=*/false); in TEST()
61 UnicodeText text = in TEST()
75 UnicodeText text = UTF8ToUnicodeText("1234��hello", /*do_copy=*/false); in TEST()
78 UnicodeText::Substring(std::next(text.begin(), 4), in TEST()
82 UnicodeText::Substring(std::next(text.begin(), 4), in TEST()
[all …]
Dunilib-javaicu.h44 bool ParseInt32(const UnicodeText& text, int32* result) const;
45 bool ParseInt64(const UnicodeText& text, int64* result) const;
46 bool ParseDouble(const UnicodeText& text, double* result) const;
60 StatusOr<int32> Length(const UnicodeText& text) const;
111 UnicodeText Group(int* status) const;
116 UnicodeText Group(int group_idx, int* status) const;
145 std::unique_ptr<RegexMatcher> Matcher(const UnicodeText& context) const;
149 RegexPattern(const JniCache* jni_cache, const UnicodeText& pattern,
163 mutable UnicodeText pattern_text_;
174 BreakIterator(const JniCache* jni_cache, const UnicodeText& text);
[all …]
Dunilib.h44 UnicodeText ToLowerText(const UnicodeText& text) const { in ToLowerText()
45 UnicodeText result; in ToLowerText()
53 UnicodeText ToUpperText(const UnicodeText& text) const { in ToUpperText()
54 UnicodeText result; in ToUpperText()
61 bool IsLowerText(const UnicodeText& text) const { in IsLowerText()
70 bool IsUpperText(const UnicodeText& text) const { in IsUpperText()
79 bool IsDigits(const UnicodeText& text) const { in IsDigits()
156 bool IsValidUtf8(const UnicodeText& text) const { in IsValidUtf8()
Dunilib-javaicu.cc84 StatusOr<int32> UniLibBase::Length(const UnicodeText& text) const { in Length()
98 bool UniLibBase::ParseInt32(const UnicodeText& text, int32* result) const { in ParseInt32()
102 bool UniLibBase::ParseInt64(const UnicodeText& text, int64* result) const { in ParseInt64()
106 bool UniLibBase::ParseDouble(const UnicodeText& text, double* result) const { in ParseDouble()
116 if (!ParseInt(UnicodeText::Substring(text.begin(), it_dot, /*do_copy=*/false), in ParseDouble()
124 UnicodeText::Substring(++it_dot, text.end(), /*do_copy=*/false), in ParseDouble()
140 const UnicodeText& regex) const { in CreateRegexPattern()
146 const UnicodeText& regex) const { in CreateLazyRegexPattern()
152 const UnicodeText& pattern, bool lazy) in RegexPattern()
194 const UnicodeText& context) const { in Matcher()
[all …]
/external/libtextclassifier/native/annotator/datetime/
Dextractor.h69 bool ExtractType(const UnicodeText& input,
71 UnicodeText* match_result = nullptr) const;
73 bool GroupTextFromMatch(int group_id, UnicodeText* result) const;
81 bool MapInput(const UnicodeText& input,
85 bool ParseDigits(const UnicodeText& input, int* parsed_digits) const;
86 bool ParseWrittenNumber(const UnicodeText& input, int* parsed_number) const;
87 bool ParseYear(const UnicodeText& input, int* parsed_year) const;
88 bool ParseMonth(const UnicodeText& input, int* parsed_month) const;
89 bool ParseMeridiem(const UnicodeText& input, int* parsed_meridiem) const;
91 const UnicodeText& input,
[all …]
Dextractor.cc42 UnicodeText group_text; in Extract()
221 bool DatetimeExtractor::ExtractType(const UnicodeText& input, in ExtractType()
223 UnicodeText* match_result) const { in ExtractType()
250 UnicodeText* result) const { in GroupTextFromMatch()
282 const UnicodeText& input, in MapInput()
294 bool DatetimeExtractor::ParseWrittenNumber(const UnicodeText& input, in ParseWrittenNumber()
376 bool DatetimeExtractor::ParseDigits(const UnicodeText& input, in ParseDigits()
378 UnicodeText digit; in ParseDigits()
389 bool DatetimeExtractor::ParseYear(const UnicodeText& input, in ParseYear()
399 bool DatetimeExtractor::ParseMonth(const UnicodeText& input, in ParseMonth()
[all …]
/external/libtextclassifier/native/annotator/
Dfeature-processor.h96 const UnicodeText& unicode_text, const CodepointSpan& span);
127 std::vector<Token> Tokenize(const UnicodeText& text_unicode) const;
155 void RetokenizeAndFindClick(const UnicodeText& context_unicode,
156 const UnicodeText::const_iterator& span_begin,
157 const UnicodeText::const_iterator& span_end,
196 const UnicodeText& context_unicode,
206 CodepointSpan StripBoundaryCodepoints(const UnicodeText& context_unicode,
211 const UnicodeText::const_iterator& span_begin,
212 const UnicodeText::const_iterator& span_end,
262 const UnicodeText::const_iterator& span_start,
[all …]
Dfeature-processor.cc88 const UnicodeText token_word = in SplitTokensOnSelectionBoundaries()
93 std::vector<UnicodeText::const_iterator> split_points; in SplitTokensOnSelectionBoundaries()
142 const UnicodeText context_unicode = UTF8ToUnicodeText(context, in StripTokensFromOtherLines()
151 const UnicodeText& context_unicode, in StripTokensFromOtherLines()
152 const UnicodeText::const_iterator& span_begin, in StripTokensFromOtherLines()
153 const UnicodeText::const_iterator& span_end, const CodepointSpan& span, in StripTokensFromOtherLines()
194 const UnicodeText& text_unicode) const { in Tokenize()
223 const UnicodeText token_begin_unicode = in LabelToSpan()
225 UnicodeText::const_iterator token_begin = token_begin_unicode.begin(); in LabelToSpan()
226 const UnicodeText token_end_unicode = in LabelToSpan()
[all …]
Dstrip-unpaired-brackets.cc27 const UnicodeText::const_iterator& span_begin, in StripUnpairedBrackets()
28 const UnicodeText::const_iterator& span_end, CodepointSpan span, in StripUnpairedBrackets()
34 UnicodeText::const_iterator begin = span_begin; in StripUnpairedBrackets()
35 const UnicodeText::const_iterator end = span_end; in StripUnpairedBrackets()
69 CodepointSpan StripUnpairedBrackets(const UnicodeText& context, in StripUnpairedBrackets()
74 const UnicodeText span_text = UnicodeText::Substring( in StripUnpairedBrackets()
Dstrip-unpaired-brackets.h31 const UnicodeText::const_iterator& span_begin,
32 const UnicodeText::const_iterator& span_end, CodepointSpan span,
36 CodepointSpan StripUnpairedBrackets(const UnicodeText& context,
/external/libtextclassifier/native/annotator/translate/
Dtranslate.cc33 const UnicodeText& context, CodepointSpan selection_indices, in ClassifyText()
103 const UnicodeText& context, CodepointSpan selection_indices) const { in BackoffDetectLanguages()
111 const UnicodeText entity = in BackoffDetectLanguages()
112 UnicodeText::Substring(context, selection_indices.first, in BackoffDetectLanguages()
121 const UnicodeText entity_with_context = TokenAlignedSubstringAroundSpan( in BackoffDetectLanguages()
153 UnicodeText::const_iterator
155 const UnicodeText& text, int start_index, int direction) const { in FindIndexOfNextWhitespaceOrPunctuation()
168 UnicodeText TranslateAnnotator::TokenAlignedSubstringAroundSpan( in TokenAlignedSubstringAroundSpan()
169 const UnicodeText& text, CodepointSpan indices, int minimum_length) const { in TokenAlignedSubstringAroundSpan()
172 return UnicodeText(text, /*do_copy=*/false); in TokenAlignedSubstringAroundSpan()
[all …]
Dtranslate.h39 bool ClassifyText(const UnicodeText& context, CodepointSpan selection_indices,
54 const UnicodeText& context, CodepointSpan selection_indices) const;
59 UnicodeText::const_iterator FindIndexOfNextWhitespaceOrPunctuation(
60 const UnicodeText& text, int start_index, int direction) const;
65 UnicodeText TokenAlignedSubstringAroundSpan(const UnicodeText& text,
/external/libtextclassifier/native/annotator/duration/
Dduration_test.cc112 std::vector<Token> Tokenize(const UnicodeText& text) { in Tokenize()
151 const UnicodeText text = UTF8ToUnicodeText("Wake me up in 15 minutes ok?"); in TEST_F()
169 const UnicodeText text = in TEST_F()
188 const UnicodeText text = in TEST_F()
207 const UnicodeText text = UTF8ToUnicodeText( in TEST_F()
227 const UnicodeText text = UTF8ToUnicodeText("Set a timer for half an hour"); in TEST_F()
245 const UnicodeText text = in TEST_F()
264 const UnicodeText text = in TEST_F()
284 const UnicodeText text = in TEST_F()
303 const UnicodeText text = UTF8ToUnicodeText( in TEST_F()
[all …]
/external/libtextclassifier/native/utils/
Dtokenizer.cc98 UnicodeText text_unicode = UTF8ToUnicodeText(text, /*do_copy=*/false); in Tokenize()
102 std::vector<Token> Tokenizer::Tokenize(const UnicodeText& text_unicode) const { in Tokenize()
131 void AppendCodepointToToken(UnicodeText::const_iterator it, Token* token) { in AppendCodepointToToken()
137 const UnicodeText& text_unicode) const { in InternalTokenize()
177 void Tokenizer::TokenizeSubstring(const UnicodeText& unicode_text, in TokenizeSubstring()
186 UnicodeText text = UnicodeText::Substring(unicode_text, span.first, in TokenizeSubstring()
202 void Tokenizer::InternalRetokenize(const UnicodeText& unicode_text, in InternalRetokenize()
207 const UnicodeText unicode_token_value = in InternalRetokenize()
234 bool Tokenizer::ICUTokenize(const UnicodeText& context_unicode, in ICUTokenize()
280 bool Tokenizer::NumberTokenize(const UnicodeText& text_unicode, in NumberTokenize()
[all …]
Dnormalization.cc24 UnicodeText NormalizeText(const UniLib& unilib, in NormalizeText()
26 const UnicodeText& text) { in NormalizeText()
31 UnicodeText NormalizeTextCodepointWise(const UniLib& unilib, in NormalizeTextCodepointWise()
33 const UnicodeText& text) { in NormalizeTextCodepointWise()
40 UnicodeText result; in NormalizeTextCodepointWise()
Dtokenizer.h96 std::vector<Token> Tokenize(const UnicodeText& text_unicode) const;
112 void TokenizeSubstring(const UnicodeText& unicode_text, CodepointSpan span,
115 std::vector<Token> InternalTokenize(const UnicodeText& text_unicode) const;
119 void InternalRetokenize(const UnicodeText& unicode_text,
123 bool ICUTokenize(const UnicodeText& context_unicode,
127 bool NumberTokenize(const UnicodeText& text_unicode,
Dnormalization.h30 UnicodeText NormalizeText(const UniLib& unilib,
32 const UnicodeText& text);
37 UnicodeText NormalizeTextCodepointWise(const UniLib& unilib,
39 const UnicodeText& text);
Dtoken-feature-extractor.cc51 const UniLib& unilib, UnicodeText* remapped) { in RemapTokenUnicode()
57 UnicodeText word = UTF8ToUnicodeText(token, /*do_copy=*/false); in RemapTokenUnicode()
110 UnicodeText token_unicode = in ExtractDenseFeatures()
140 UnicodeText token_unicode = in ExtractDenseFeatures()
237 UnicodeText word = UTF8ToUnicodeText(token.value, /*do_copy=*/false); in ExtractCharactergramFeaturesUnicode()
265 const UnicodeText feature_word_unicode = in ExtractCharactergramFeaturesUnicode()
276 UnicodeText::const_iterator it_start = feature_word_unicode.begin(); in ExtractCharactergramFeaturesUnicode()
277 UnicodeText::const_iterator it_end = feature_word_unicode.end(); in ExtractCharactergramFeaturesUnicode()
283 UnicodeText::const_iterator it_chargram_start = it_start; in ExtractCharactergramFeaturesUnicode()
284 UnicodeText::const_iterator it_chargram_end = it_start; in ExtractCharactergramFeaturesUnicode()
Dtokenizer-utils.h69 const UnicodeText input_unicode = UTF8ToUnicodeText(input, /*do_copy=*/false); in TokenizeWithFilter()
71 UnicodeText::const_iterator start_it = input_unicode.begin(); in TokenizeWithFilter()
79 const std::string token_text = UnicodeText::UTF8Substring(start_it, it); in TokenizeWithFilter()
86 UnicodeText::UTF8Substring(it, std::next(it)); in TokenizeWithFilter()
97 UnicodeText::UTF8Substring(start_it, input_unicode.end()); in TokenizeWithFilter()
/external/libtextclassifier/native/annotator/number/
Dnumber.h55 bool ClassifyText(const UnicodeText& context, CodepointSpan selection_indices,
60 bool FindAll(const UnicodeText& context_unicode,
73 void FindPercentages(const UnicodeText& context,
102 bool TryParseNumber(const UnicodeText& token_text, bool is_negative,
107 bool IsCJTterm(UnicodeText::const_iterator token_begin_it,
/external/libtextclassifier/native/annotator/pod_ner/
Dpod-ner-impl.h40 bool Annotate(const UnicodeText &context,
46 bool SuggestSelection(const UnicodeText &context, CodepointSpan click,
49 bool ClassifyText(const UnicodeText &context, CodepointSpan click,
65 bool PrepareText(const UnicodeText &text_unicode,
70 bool AnnotateAroundSpanOfInterest(const UnicodeText &context,
/external/libtextclassifier/native/actions/
Dutils.h44 const UnicodeText& match_text, const std::string& smart_reply_action_type,
48 UnicodeText NormalizeMatchText(
53 UnicodeText NormalizeMatchText(
56 const UnicodeText match_text);
/external/libtextclassifier/native/annotator/vocab/
Dvocab-annotator-impl.cc61 const UnicodeText& context, in Annotate()
79 const UnicodeText& context, CodepointSpan click, in ClassifyText()
89 const UnicodeText& context, const CodepointSpan click, in ClassifyTextInternal()
105 const UnicodeText stripped_token = UnicodeText::Substring( in ClassifyTextInternal()

123