1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "annotator/types.h"
18 
19 #include <vector>
20 
21 #include "utils/optional.h"
22 
23 namespace libtextclassifier3 {
24 
25 const CodepointSpan CodepointSpan::kInvalid =
26     CodepointSpan(kInvalidIndex, kInvalidIndex);
27 
28 const TokenSpan TokenSpan::kInvalid = TokenSpan(kInvalidIndex, kInvalidIndex);
29 
operator <<(logging::LoggingStringStream & stream,const CodepointSpan & span)30 logging::LoggingStringStream& operator<<(logging::LoggingStringStream& stream,
31                                          const CodepointSpan& span) {
32   return stream << "CodepointSpan(" << span.first << ", " << span.second << ")";
33 }
34 
operator <<(logging::LoggingStringStream & stream,const TokenSpan & span)35 logging::LoggingStringStream& operator<<(logging::LoggingStringStream& stream,
36                                          const TokenSpan& span) {
37   return stream << "TokenSpan(" << span.first << ", " << span.second << ")";
38 }
39 
operator <<(logging::LoggingStringStream & stream,const Token & token)40 logging::LoggingStringStream& operator<<(logging::LoggingStringStream& stream,
41                                          const Token& token) {
42   if (!token.is_padding) {
43     return stream << "Token(\"" << token.value << "\", " << token.start << ", "
44                   << token.end << ")";
45   } else {
46     return stream << "Token()";
47   }
48 }
49 
ShouldRoundToGranularity() const50 bool DatetimeComponent::ShouldRoundToGranularity() const {
51   // Don't round to the granularity for relative expressions that specify the
52   // distance. So that, e.g. "in 2 hours" when it's 8:35:03 will result in
53   // 10:35:03.
54   if (relative_qualifier == RelativeQualifier::UNSPECIFIED) {
55     return false;
56   }
57   if (relative_qualifier == RelativeQualifier::NEXT ||
58       relative_qualifier == RelativeQualifier::TOMORROW ||
59       relative_qualifier == RelativeQualifier::YESTERDAY ||
60       relative_qualifier == RelativeQualifier::LAST ||
61       relative_qualifier == RelativeQualifier::THIS ||
62       relative_qualifier == RelativeQualifier::NOW) {
63     return true;
64   }
65   return false;
66 }
67 
68 namespace {
FormatMillis(int64 time_ms_utc)69 std::string FormatMillis(int64 time_ms_utc) {
70   long time_seconds = time_ms_utc / 1000;  // NOLINT
71   char buffer[512];
72   strftime(buffer, sizeof(buffer), "%a %Y-%m-%d %H:%M:%S %Z",
73            localtime(&time_seconds));
74   return std::string(buffer);
75 }
76 }  // namespace
77 
ComponentTypeToString(const DatetimeComponent::ComponentType & component_type)78 std::string ComponentTypeToString(
79     const DatetimeComponent::ComponentType& component_type) {
80   switch (component_type) {
81     case DatetimeComponent::ComponentType::UNSPECIFIED:
82       return "UNSPECIFIED";
83     case DatetimeComponent::ComponentType::YEAR:
84       return "YEAR";
85     case DatetimeComponent::ComponentType::MONTH:
86       return "MONTH";
87     case DatetimeComponent::ComponentType::WEEK:
88       return "WEEK";
89     case DatetimeComponent::ComponentType::DAY_OF_WEEK:
90       return "DAY_OF_WEEK";
91     case DatetimeComponent::ComponentType::DAY_OF_MONTH:
92       return "DAY_OF_MONTH";
93     case DatetimeComponent::ComponentType::HOUR:
94       return "HOUR";
95     case DatetimeComponent::ComponentType::MINUTE:
96       return "MINUTE";
97     case DatetimeComponent::ComponentType::SECOND:
98       return "SECOND";
99     case DatetimeComponent::ComponentType::MERIDIEM:
100       return "MERIDIEM";
101     case DatetimeComponent::ComponentType::ZONE_OFFSET:
102       return "ZONE_OFFSET";
103     case DatetimeComponent::ComponentType::DST_OFFSET:
104       return "DST_OFFSET";
105     default:
106       return "";
107   }
108 }
109 
RelativeQualifierToString(const DatetimeComponent::RelativeQualifier & relative_qualifier)110 std::string RelativeQualifierToString(
111     const DatetimeComponent::RelativeQualifier& relative_qualifier) {
112   switch (relative_qualifier) {
113     case DatetimeComponent::RelativeQualifier::UNSPECIFIED:
114       return "UNSPECIFIED";
115     case DatetimeComponent::RelativeQualifier::NEXT:
116       return "NEXT";
117     case DatetimeComponent::RelativeQualifier::THIS:
118       return "THIS";
119     case DatetimeComponent::RelativeQualifier::LAST:
120       return "LAST";
121     case DatetimeComponent::RelativeQualifier::NOW:
122       return "NOW";
123     case DatetimeComponent::RelativeQualifier::TOMORROW:
124       return "TOMORROW";
125     case DatetimeComponent::RelativeQualifier::YESTERDAY:
126       return "YESTERDAY";
127     case DatetimeComponent::RelativeQualifier::PAST:
128       return "PAST";
129     case DatetimeComponent::RelativeQualifier::FUTURE:
130       return "FUTURE";
131     default:
132       return "";
133   }
134 }
135 
operator <<(logging::LoggingStringStream & stream,const DatetimeParseResultSpan & value)136 logging::LoggingStringStream& operator<<(logging::LoggingStringStream& stream,
137                                          const DatetimeParseResultSpan& value) {
138   stream << "DatetimeParseResultSpan({" << value.span.first << ", "
139          << value.span.second << "}, "
140          << "/*target_classification_score=*/ "
141          << value.target_classification_score << "/*priority_score=*/"
142          << value.priority_score << " {";
143   for (const DatetimeParseResult& data : value.data) {
144     stream << "{/*time_ms_utc=*/ " << data.time_ms_utc << " /* "
145            << FormatMillis(data.time_ms_utc) << " */, /*granularity=*/ "
146            << data.granularity << ", /*datetime_components=*/ ";
147     for (const DatetimeComponent& datetime_comp : data.datetime_components) {
148       stream << "{/*component_type=*/ "
149              << ComponentTypeToString(datetime_comp.component_type)
150              << " /*relative_qualifier=*/ "
151              << RelativeQualifierToString(datetime_comp.relative_qualifier)
152              << " /*value=*/ " << datetime_comp.value << " /*relative_count=*/ "
153              << datetime_comp.relative_count << "}, ";
154     }
155     stream << "}, ";
156   }
157   stream << "})";
158   return stream;
159 }
160 
operator ==(const ClassificationResult & other) const161 bool ClassificationResult::operator==(const ClassificationResult& other) const {
162   return ClassificationResultsEqualIgnoringScoresAndSerializedEntityData(
163              *this, other) &&
164          fabs(score - other.score) < 0.001 &&
165          fabs(priority_score - other.priority_score) < 0.001 &&
166          serialized_entity_data == other.serialized_entity_data;
167 }
168 
ClassificationResultsEqualIgnoringScoresAndSerializedEntityData(const ClassificationResult & a,const ClassificationResult & b)169 bool ClassificationResultsEqualIgnoringScoresAndSerializedEntityData(
170     const ClassificationResult& a, const ClassificationResult& b) {
171   return a.collection == b.collection &&
172          a.datetime_parse_result == b.datetime_parse_result &&
173          a.serialized_knowledge_result == b.serialized_knowledge_result &&
174          a.contact_pointer == b.contact_pointer &&
175          a.contact_name == b.contact_name &&
176          a.contact_given_name == b.contact_given_name &&
177          a.contact_family_name == b.contact_family_name &&
178          a.contact_nickname == b.contact_nickname &&
179          a.contact_email_address == b.contact_email_address &&
180          a.contact_phone_number == b.contact_phone_number &&
181          a.contact_id == b.contact_id &&
182          a.app_package_name == b.app_package_name &&
183          a.numeric_value == b.numeric_value &&
184          fabs(a.numeric_double_value - b.numeric_double_value) < 0.001 &&
185          a.duration_ms == b.duration_ms;
186 }
187 
operator <<(logging::LoggingStringStream & stream,const ClassificationResult & result)188 logging::LoggingStringStream& operator<<(logging::LoggingStringStream& stream,
189                                          const ClassificationResult& result) {
190   return stream << "ClassificationResult(" << result.collection
191                 << ", /*score=*/ " << result.score << ", /*priority_score=*/ "
192                 << result.priority_score << ")";
193 }
194 
operator <<(logging::LoggingStringStream & stream,const std::vector<ClassificationResult> & results)195 logging::LoggingStringStream& operator<<(
196     logging::LoggingStringStream& stream,
197     const std::vector<ClassificationResult>& results) {
198   stream = stream << "{\n";
199   for (const ClassificationResult& result : results) {
200     stream = stream << "    " << result << "\n";
201   }
202   stream = stream << "}";
203   return stream;
204 }
205 
operator <<(logging::LoggingStringStream & stream,const AnnotatedSpan & span)206 logging::LoggingStringStream& operator<<(logging::LoggingStringStream& stream,
207                                          const AnnotatedSpan& span) {
208   std::string best_class;
209   float best_score = -1;
210   if (!span.classification.empty()) {
211     best_class = span.classification[0].collection;
212     best_score = span.classification[0].score;
213   }
214   return stream << "Span(" << span.span.first << ", " << span.span.second
215                 << ", " << best_class << ", " << best_score << ")";
216 }
217 
operator <<(logging::LoggingStringStream & stream,const DatetimeParsedData & data)218 logging::LoggingStringStream& operator<<(logging::LoggingStringStream& stream,
219                                          const DatetimeParsedData& data) {
220   std::vector<DatetimeComponent> date_time_components;
221   data.GetDatetimeComponents(&date_time_components);
222   stream = stream << "DatetimeParsedData { \n";
223   for (const DatetimeComponent& c : date_time_components) {
224     stream = stream << " DatetimeComponent { \n";
225     stream = stream << "  Component Type:" << static_cast<int>(c.component_type)
226                     << "\n";
227     stream = stream << "  Value:" << c.value << "\n";
228     stream = stream << "  Relative Qualifier:"
229                     << static_cast<int>(c.relative_qualifier) << "\n";
230     stream = stream << "  Relative Count:" << c.relative_count << "\n";
231     stream = stream << " } \n";
232   }
233   stream = stream << "}";
234   return stream;
235 }
236 
SetAbsoluteValue(const DatetimeComponent::ComponentType & field_type,int value)237 void DatetimeParsedData::SetAbsoluteValue(
238     const DatetimeComponent::ComponentType& field_type, int value) {
239   GetOrCreateDatetimeComponent(field_type).value = value;
240 }
241 
SetRelativeValue(const DatetimeComponent::ComponentType & field_type,const DatetimeComponent::RelativeQualifier & relative_value)242 void DatetimeParsedData::SetRelativeValue(
243     const DatetimeComponent::ComponentType& field_type,
244     const DatetimeComponent::RelativeQualifier& relative_value) {
245   GetOrCreateDatetimeComponent(field_type).relative_qualifier = relative_value;
246 }
247 
SetRelativeCount(const DatetimeComponent::ComponentType & field_type,int relative_count)248 void DatetimeParsedData::SetRelativeCount(
249     const DatetimeComponent::ComponentType& field_type, int relative_count) {
250   GetOrCreateDatetimeComponent(field_type).relative_count = relative_count;
251 }
252 
AddDatetimeComponents(const std::vector<DatetimeComponent> & datetime_components)253 void DatetimeParsedData::AddDatetimeComponents(
254     const std::vector<DatetimeComponent>& datetime_components) {
255   for (const DatetimeComponent& datetime_component : datetime_components) {
256     date_time_components_.insert(
257         {datetime_component.component_type, datetime_component});
258   }
259 }
260 
HasFieldType(const DatetimeComponent::ComponentType & field_type) const261 bool DatetimeParsedData::HasFieldType(
262     const DatetimeComponent::ComponentType& field_type) const {
263   if (date_time_components_.find(field_type) == date_time_components_.end()) {
264     return false;
265   }
266   return true;
267 }
268 
GetFieldValue(const DatetimeComponent::ComponentType & field_type,int * field_value) const269 bool DatetimeParsedData::GetFieldValue(
270     const DatetimeComponent::ComponentType& field_type,
271     int* field_value) const {
272   if (HasFieldType(field_type)) {
273     *field_value = date_time_components_.at(field_type).value;
274     return true;
275   }
276   return false;
277 }
278 
GetRelativeValue(const DatetimeComponent::ComponentType & field_type,DatetimeComponent::RelativeQualifier * relative_value) const279 bool DatetimeParsedData::GetRelativeValue(
280     const DatetimeComponent::ComponentType& field_type,
281     DatetimeComponent::RelativeQualifier* relative_value) const {
282   if (HasFieldType(field_type)) {
283     *relative_value = date_time_components_.at(field_type).relative_qualifier;
284     return true;
285   }
286   return false;
287 }
288 
HasRelativeValue(const DatetimeComponent::ComponentType & field_type) const289 bool DatetimeParsedData::HasRelativeValue(
290     const DatetimeComponent::ComponentType& field_type) const {
291   if (HasFieldType(field_type)) {
292     return date_time_components_.at(field_type).relative_qualifier !=
293            DatetimeComponent::RelativeQualifier::UNSPECIFIED;
294   }
295   return false;
296 }
297 
HasAbsoluteValue(const DatetimeComponent::ComponentType & field_type) const298 bool DatetimeParsedData::HasAbsoluteValue(
299     const DatetimeComponent::ComponentType& field_type) const {
300   return HasFieldType(field_type) && !HasRelativeValue(field_type);
301 }
302 
IsEmpty() const303 bool DatetimeParsedData::IsEmpty() const {
304   return date_time_components_.empty();
305 }
306 
GetRelativeDatetimeComponents(std::vector<DatetimeComponent> * date_time_components) const307 void DatetimeParsedData::GetRelativeDatetimeComponents(
308     std::vector<DatetimeComponent>* date_time_components) const {
309   for (auto it = date_time_components_.begin();
310        it != date_time_components_.end(); it++) {
311     if (it->second.relative_qualifier !=
312         DatetimeComponent::RelativeQualifier::UNSPECIFIED) {
313       date_time_components->push_back(it->second);
314     }
315   }
316 }
317 
GetDatetimeComponents(std::vector<DatetimeComponent> * date_time_components) const318 void DatetimeParsedData::GetDatetimeComponents(
319     std::vector<DatetimeComponent>* date_time_components) const {
320   for (auto it = date_time_components_.begin();
321        it != date_time_components_.end(); it++) {
322     date_time_components->push_back(it->second);
323   }
324 }
325 
GetOrCreateDatetimeComponent(const DatetimeComponent::ComponentType & component_type)326 DatetimeComponent& DatetimeParsedData::GetOrCreateDatetimeComponent(
327     const DatetimeComponent::ComponentType& component_type) {
328   auto result =
329       date_time_components_
330           .insert(
331               {component_type,
332                DatetimeComponent(
333                    component_type,
334                    DatetimeComponent::RelativeQualifier::UNSPECIFIED, 0, 0)})
335           .first;
336   return result->second;
337 }
338 
339 namespace {
GetFinestGranularityFromComponentTypes(const std::vector<DatetimeComponent::ComponentType> & datetime_component_types)340 DatetimeGranularity GetFinestGranularityFromComponentTypes(
341     const std::vector<DatetimeComponent::ComponentType>&
342         datetime_component_types) {
343   DatetimeGranularity granularity = DatetimeGranularity::GRANULARITY_UNKNOWN;
344   for (const auto& component_type : datetime_component_types) {
345     switch (component_type) {
346       case DatetimeComponent::ComponentType::YEAR:
347         if (granularity < DatetimeGranularity::GRANULARITY_YEAR) {
348           granularity = DatetimeGranularity::GRANULARITY_YEAR;
349         }
350         break;
351 
352       case DatetimeComponent::ComponentType::MONTH:
353         if (granularity < DatetimeGranularity::GRANULARITY_MONTH) {
354           granularity = DatetimeGranularity::GRANULARITY_MONTH;
355         }
356         break;
357 
358       case DatetimeComponent::ComponentType::WEEK:
359         if (granularity < DatetimeGranularity::GRANULARITY_WEEK) {
360           granularity = DatetimeGranularity::GRANULARITY_WEEK;
361         }
362         break;
363 
364       case DatetimeComponent::ComponentType::DAY_OF_WEEK:
365       case DatetimeComponent::ComponentType::DAY_OF_MONTH:
366         if (granularity < DatetimeGranularity::GRANULARITY_DAY) {
367           granularity = DatetimeGranularity::GRANULARITY_DAY;
368         }
369         break;
370 
371       case DatetimeComponent::ComponentType::HOUR:
372         if (granularity < DatetimeGranularity::GRANULARITY_HOUR) {
373           granularity = DatetimeGranularity::GRANULARITY_HOUR;
374         }
375         break;
376 
377       case DatetimeComponent::ComponentType::MINUTE:
378         if (granularity < DatetimeGranularity::GRANULARITY_MINUTE) {
379           granularity = DatetimeGranularity::GRANULARITY_MINUTE;
380         }
381         break;
382 
383       case DatetimeComponent::ComponentType::SECOND:
384         if (granularity < DatetimeGranularity::GRANULARITY_SECOND) {
385           granularity = DatetimeGranularity::GRANULARITY_SECOND;
386         }
387         break;
388 
389       case DatetimeComponent::ComponentType::MERIDIEM:
390       case DatetimeComponent::ComponentType::ZONE_OFFSET:
391       case DatetimeComponent::ComponentType::DST_OFFSET:
392       default:
393         break;
394     }
395   }
396   return granularity;
397 }
398 }  // namespace
399 
GetFinestGranularity() const400 DatetimeGranularity DatetimeParsedData::GetFinestGranularity() const {
401   std::vector<DatetimeComponent::ComponentType> component_types;
402   std::transform(date_time_components_.begin(), date_time_components_.end(),
403                  std::back_inserter(component_types),
404                  [](const std::map<DatetimeComponent::ComponentType,
405                                    DatetimeComponent>::value_type& pair) {
406                    return pair.first;
407                  });
408   return GetFinestGranularityFromComponentTypes(component_types);
409 }
410 
GetDatetimeComponent(const std::vector<DatetimeComponent> & datetime_components,const DatetimeComponent::ComponentType & component_type)411 Optional<DatetimeComponent> GetDatetimeComponent(
412     const std::vector<DatetimeComponent>& datetime_components,
413     const DatetimeComponent::ComponentType& component_type) {
414   for (auto datetime_component : datetime_components) {
415     if (datetime_component.component_type == component_type) {
416       return Optional<DatetimeComponent>(datetime_component);
417     }
418   }
419   return Optional<DatetimeComponent>();
420 }
421 
422 // Returns the granularity of the DatetimeComponents.
GetFinestGranularity(const std::vector<DatetimeComponent> & datetime_component)423 DatetimeGranularity GetFinestGranularity(
424     const std::vector<DatetimeComponent>& datetime_component) {
425   std::vector<DatetimeComponent::ComponentType> component_types;
426   std::transform(datetime_component.begin(), datetime_component.end(),
427                  std::back_inserter(component_types),
428                  [](const DatetimeComponent& component) {
429                    return component.component_type;
430                  });
431   return GetFinestGranularityFromComponentTypes(component_types);
432 }
433 
434 }  // namespace libtextclassifier3
435