/* * Copyright (C) 2018 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef LIBTEXTCLASSIFIER_ANNOTATOR_DATETIME_PARSER_H_ #define LIBTEXTCLASSIFIER_ANNOTATOR_DATETIME_PARSER_H_ #include #include #include #include #include #include "annotator/datetime/extractor.h" #include "annotator/model_generated.h" #include "annotator/types.h" #include "utils/base/integral_types.h" #include "utils/calendar/calendar.h" #include "utils/utf8/unilib.h" #include "utils/zlib/zlib.h" namespace libtextclassifier3 { // Parses datetime expressions in the input and resolves them to actual absolute // time. class DatetimeParser { public: static std::unique_ptr Instance( const DatetimeModel* model, const UniLib& unilib, const CalendarLib& calendarlib, ZlibDecompressor* decompressor); // Parses the dates in 'input' and fills result. Makes sure that the results // do not overlap. // If 'anchor_start_end' is true the extracted results need to start at the // beginning of 'input' and end at the end of it. bool Parse(const std::string& input, int64 reference_time_ms_utc, const std::string& reference_timezone, const std::string& locales, ModeFlag mode, AnnotationUsecase annotation_usecase, bool anchor_start_end, std::vector* results) const; // Same as above but takes UnicodeText. bool Parse(const UnicodeText& input, int64 reference_time_ms_utc, const std::string& reference_timezone, const std::string& locales, ModeFlag mode, AnnotationUsecase annotation_usecase, bool anchor_start_end, std::vector* results) const; #ifdef TC3_TEST_ONLY void TestOnlySetGenerateAlternativeInterpretationsWhenAmbiguous(bool value) { generate_alternative_interpretations_when_ambiguous_ = value; } #endif // TC3_TEST_ONLY protected: DatetimeParser(const DatetimeModel* model, const UniLib& unilib, const CalendarLib& calendarlib, ZlibDecompressor* decompressor); // Returns a list of locale ids for given locale spec string (comma-separated // locale names). Assigns the first parsed locale to reference_locale. std::vector ParseAndExpandLocales(const std::string& locales, std::string* reference_locale) const; // Helper function that finds datetime spans, only using the rules associated // with the given locales. bool FindSpansUsingLocales( const std::vector& locale_ids, const UnicodeText& input, const int64 reference_time_ms_utc, const std::string& reference_timezone, ModeFlag mode, AnnotationUsecase annotation_usecase, bool anchor_start_end, const std::string& reference_locale, std::unordered_set* executed_rules, std::vector* found_spans) const; bool ParseWithRule(const CompiledRule& rule, const UnicodeText& input, int64 reference_time_ms_utc, const std::string& reference_timezone, const std::string& reference_locale, const int locale_id, bool anchor_start_end, std::vector* result) const; void FillInterpretations(const DateParseData& parse, std::vector* interpretations) const; // Converts the current match in 'matcher' into DatetimeParseResult. bool ExtractDatetime(const CompiledRule& rule, const UniLib::RegexMatcher& matcher, int64 reference_time_ms_utc, const std::string& reference_timezone, const std::string& reference_locale, int locale_id, std::vector* results, CodepointSpan* result_span) const; // Parse and extract information from current match in 'matcher'. bool HandleParseMatch(const CompiledRule& rule, const UniLib::RegexMatcher& matcher, int64 reference_time_ms_utc, const std::string& reference_timezone, const std::string& reference_locale, int locale_id, std::vector* result) const; private: bool initialized_; const UniLib& unilib_; const CalendarLib& calendarlib_; std::vector rules_; std::unordered_map> locale_to_rules_; std::vector> extractor_rules_; std::unordered_map> type_and_locale_to_extractor_rule_; std::unordered_map locale_string_to_id_; std::vector default_locale_ids_; bool use_extractors_for_locating_; bool generate_alternative_interpretations_when_ambiguous_; }; } // namespace libtextclassifier3 #endif // LIBTEXTCLASSIFIER_ANNOTATOR_DATETIME_PARSER_H_