1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <time.h>
18 #include <fstream>
19 #include <iostream>
20 #include <memory>
21 #include <string>
22 
23 #include "gmock/gmock.h"
24 #include "gtest/gtest.h"
25 
26 #include "annotator/annotator.h"
27 #include "annotator/datetime/parser.h"
28 #include "annotator/model_generated.h"
29 #include "annotator/types-test-util.h"
30 #include "utils/testing/annotator.h"
31 
32 using testing::ElementsAreArray;
33 
34 namespace libtextclassifier3 {
35 namespace {
36 
GetModelPath()37 std::string GetModelPath() {
38   return TC3_TEST_DATA_DIR;
39 }
40 
ReadFile(const std::string & file_name)41 std::string ReadFile(const std::string& file_name) {
42   std::ifstream file_stream(file_name);
43   return std::string(std::istreambuf_iterator<char>(file_stream), {});
44 }
45 
46 class ParserTest : public testing::Test {
47  public:
SetUp()48   void SetUp() override {
49     // Loads default unmodified model. Individual tests can call LoadModel to
50     // make changes.
51     LoadModel([](ModelT* model) {});
52   }
53 
54   template <typename Fn>
LoadModel(Fn model_visitor_fn)55   void LoadModel(Fn model_visitor_fn) {
56     std::string model_buffer = ReadFile(GetModelPath() + "test_model.fb");
57     model_buffer_ = ModifyAnnotatorModel(model_buffer, model_visitor_fn);
58     classifier_ = Annotator::FromUnownedBuffer(model_buffer_.data(),
59                                                model_buffer_.size(), &unilib_);
60     TC3_CHECK(classifier_);
61     parser_ = classifier_->DatetimeParserForTests();
62     TC3_CHECK(parser_);
63   }
64 
HasNoResult(const std::string & text,bool anchor_start_end=false,const std::string & timezone="Europe/Zurich",AnnotationUsecase annotation_usecase=AnnotationUsecase_ANNOTATION_USECASE_SMART)65   bool HasNoResult(const std::string& text, bool anchor_start_end = false,
66                    const std::string& timezone = "Europe/Zurich",
67                    AnnotationUsecase annotation_usecase =
68                        AnnotationUsecase_ANNOTATION_USECASE_SMART) {
69     std::vector<DatetimeParseResultSpan> results;
70     if (!parser_->Parse(text, 0, timezone, /*locales=*/"", ModeFlag_ANNOTATION,
71                         annotation_usecase, anchor_start_end, &results)) {
72       TC3_LOG(ERROR) << text;
73       TC3_CHECK(false);
74     }
75     return results.empty();
76   }
77 
ParsesCorrectly(const std::string & marked_text,const std::vector<int64> & expected_ms_utcs,DatetimeGranularity expected_granularity,bool anchor_start_end=false,const std::string & timezone="Europe/Zurich",const std::string & locales="en-US",AnnotationUsecase annotation_usecase=AnnotationUsecase_ANNOTATION_USECASE_SMART)78   bool ParsesCorrectly(const std::string& marked_text,
79                        const std::vector<int64>& expected_ms_utcs,
80                        DatetimeGranularity expected_granularity,
81                        bool anchor_start_end = false,
82                        const std::string& timezone = "Europe/Zurich",
83                        const std::string& locales = "en-US",
84                        AnnotationUsecase annotation_usecase =
85                            AnnotationUsecase_ANNOTATION_USECASE_SMART) {
86     const UnicodeText marked_text_unicode =
87         UTF8ToUnicodeText(marked_text, /*do_copy=*/false);
88     auto brace_open_it =
89         std::find(marked_text_unicode.begin(), marked_text_unicode.end(), '{');
90     auto brace_end_it =
91         std::find(marked_text_unicode.begin(), marked_text_unicode.end(), '}');
92     TC3_CHECK(brace_open_it != marked_text_unicode.end());
93     TC3_CHECK(brace_end_it != marked_text_unicode.end());
94 
95     std::string text;
96     text +=
97         UnicodeText::UTF8Substring(marked_text_unicode.begin(), brace_open_it);
98     text += UnicodeText::UTF8Substring(std::next(brace_open_it), brace_end_it);
99     text += UnicodeText::UTF8Substring(std::next(brace_end_it),
100                                        marked_text_unicode.end());
101 
102     std::vector<DatetimeParseResultSpan> results;
103 
104     if (!parser_->Parse(text, 0, timezone, locales, ModeFlag_ANNOTATION,
105                         annotation_usecase, anchor_start_end, &results)) {
106       TC3_LOG(ERROR) << text;
107       TC3_CHECK(false);
108     }
109     if (results.empty()) {
110       TC3_LOG(ERROR) << "No results.";
111       return false;
112     }
113 
114     const int expected_start_index =
115         std::distance(marked_text_unicode.begin(), brace_open_it);
116     // The -1 bellow is to account for the opening bracket character.
117     const int expected_end_index =
118         std::distance(marked_text_unicode.begin(), brace_end_it) - 1;
119 
120     std::vector<DatetimeParseResultSpan> filtered_results;
121     for (const DatetimeParseResultSpan& result : results) {
122       if (SpansOverlap(result.span,
123                        {expected_start_index, expected_end_index})) {
124         filtered_results.push_back(result);
125       }
126     }
127 
128     std::vector<DatetimeParseResultSpan> expected{
129         {{expected_start_index, expected_end_index},
130          {},
131          /*target_classification_score=*/1.0,
132          /*priority_score=*/0.1}};
133     expected[0].data.resize(expected_ms_utcs.size());
134     for (int i = 0; i < expected_ms_utcs.size(); i++) {
135       expected[0].data[i] = {expected_ms_utcs[i], expected_granularity};
136     }
137 
138     const bool matches =
139         testing::Matches(ElementsAreArray(expected))(filtered_results);
140     if (!matches) {
141       TC3_LOG(ERROR) << "Expected: " << expected[0];
142       if (filtered_results.empty()) {
143         TC3_LOG(ERROR) << "But got no results.";
144       }
145       TC3_LOG(ERROR) << "Actual: " << filtered_results[0];
146     }
147 
148     return matches;
149   }
150 
ParsesCorrectly(const std::string & marked_text,const int64 expected_ms_utc,DatetimeGranularity expected_granularity,bool anchor_start_end=false,const std::string & timezone="Europe/Zurich",const std::string & locales="en-US",AnnotationUsecase annotation_usecase=AnnotationUsecase_ANNOTATION_USECASE_SMART)151   bool ParsesCorrectly(const std::string& marked_text,
152                        const int64 expected_ms_utc,
153                        DatetimeGranularity expected_granularity,
154                        bool anchor_start_end = false,
155                        const std::string& timezone = "Europe/Zurich",
156                        const std::string& locales = "en-US",
157                        AnnotationUsecase annotation_usecase =
158                            AnnotationUsecase_ANNOTATION_USECASE_SMART) {
159     return ParsesCorrectly(marked_text, std::vector<int64>{expected_ms_utc},
160                            expected_granularity, anchor_start_end, timezone,
161                            locales, annotation_usecase);
162   }
163 
ParsesCorrectlyGerman(const std::string & marked_text,const std::vector<int64> & expected_ms_utcs,DatetimeGranularity expected_granularity)164   bool ParsesCorrectlyGerman(const std::string& marked_text,
165                              const std::vector<int64>& expected_ms_utcs,
166                              DatetimeGranularity expected_granularity) {
167     return ParsesCorrectly(marked_text, expected_ms_utcs, expected_granularity,
168                            /*anchor_start_end=*/false,
169                            /*timezone=*/"Europe/Zurich", /*locales=*/"de");
170   }
171 
ParsesCorrectlyGerman(const std::string & marked_text,const int64 expected_ms_utc,DatetimeGranularity expected_granularity)172   bool ParsesCorrectlyGerman(const std::string& marked_text,
173                              const int64 expected_ms_utc,
174                              DatetimeGranularity expected_granularity) {
175     return ParsesCorrectly(marked_text, expected_ms_utc, expected_granularity,
176                            /*anchor_start_end=*/false,
177                            /*timezone=*/"Europe/Zurich", /*locales=*/"de");
178   }
179 
180  protected:
181   std::string model_buffer_;
182   std::unique_ptr<Annotator> classifier_;
183   const DatetimeParser* parser_;
184   UniLib unilib_;
185 };
186 
187 // Test with just a few cases to make debugging of general failures easier.
TEST_F(ParserTest,ParseShort)188 TEST_F(ParserTest, ParseShort) {
189   EXPECT_TRUE(
190       ParsesCorrectly("{January 1, 1988}", 567990000000, GRANULARITY_DAY));
191 }
192 
TEST_F(ParserTest,Parse)193 TEST_F(ParserTest, Parse) {
194   EXPECT_TRUE(
195       ParsesCorrectly("{January 1, 1988}", 567990000000, GRANULARITY_DAY));
196   EXPECT_TRUE(
197       ParsesCorrectly("{january 31 2018}", 1517353200000, GRANULARITY_DAY));
198   EXPECT_TRUE(ParsesCorrectly("lorem {1 january 2018} ipsum", 1514761200000,
199                               GRANULARITY_DAY));
200   EXPECT_TRUE(ParsesCorrectly("{09/Mar/2004 22:02:40}", 1078866160000,
201                               GRANULARITY_SECOND));
202   EXPECT_TRUE(ParsesCorrectly("{Dec 2, 2010 2:39:58 AM}", 1291253998000,
203                               GRANULARITY_SECOND));
204   EXPECT_TRUE(ParsesCorrectly("{Jun 09 2011 15:28:14}", 1307626094000,
205                               GRANULARITY_SECOND));
206   EXPECT_TRUE(ParsesCorrectly("{Mar 16 08:12:04}", {6419524000, 6462724000},
207                               GRANULARITY_SECOND));
208   EXPECT_TRUE(ParsesCorrectly("{2010-06-26 02:31:29}",
209                               {1277512289000, 1277555489000},
210                               GRANULARITY_SECOND));
211   EXPECT_TRUE(ParsesCorrectly("{2006/01/22 04:11:05}",
212                               {1137899465000, 1137942665000},
213                               GRANULARITY_SECOND));
214   EXPECT_TRUE(
215       ParsesCorrectly("{11:42:35}", {38555000, 81755000}, GRANULARITY_SECOND));
216   EXPECT_TRUE(ParsesCorrectly("{23/Apr 11:42:35}", {9715355000, 9758555000},
217                               GRANULARITY_SECOND));
218   EXPECT_TRUE(ParsesCorrectly("{23/Apr/2015 11:42:35}",
219                               {1429782155000, 1429825355000},
220                               GRANULARITY_SECOND));
221   EXPECT_TRUE(ParsesCorrectly("{23-Apr-2015 11:42:35}",
222                               {1429782155000, 1429825355000},
223                               GRANULARITY_SECOND));
224   EXPECT_TRUE(ParsesCorrectly("{23 Apr 2015 11:42:35}",
225                               {1429782155000, 1429825355000},
226                               GRANULARITY_SECOND));
227   EXPECT_TRUE(ParsesCorrectly("{04/23/15 11:42:35}",
228                               {1429782155000, 1429825355000},
229                               GRANULARITY_SECOND));
230   EXPECT_TRUE(ParsesCorrectly("{04/23/2015 11:42:35}",
231                               {1429782155000, 1429825355000},
232                               GRANULARITY_SECOND));
233   EXPECT_TRUE(ParsesCorrectly("{9/28/2011 2:23:15 PM}", 1317212595000,
234                               GRANULARITY_SECOND));
235   EXPECT_TRUE(ParsesCorrectly(
236       "Are sentiments apartments decisively the especially alteration. "
237       "Thrown shy denote ten ladies though ask saw. Or by to he going "
238       "think order event music. Incommode so intention defective at "
239       "convinced. Led income months itself and houses you. After nor "
240       "you leave might share court balls. {19/apr/2010 06:36:15} Are "
241       "sentiments apartments decisively the especially alteration. "
242       "Thrown shy denote ten ladies though ask saw. Or by to he going "
243       "think order event music. Incommode so intention defective at "
244       "convinced. Led income months itself and houses you. After nor "
245       "you leave might share court balls. ",
246       {1271651775000, 1271694975000}, GRANULARITY_SECOND));
247   EXPECT_TRUE(ParsesCorrectly("{january 1 2018 at 4:30}",
248                               {1514777400000, 1514820600000},
249                               GRANULARITY_MINUTE));
250   EXPECT_TRUE(ParsesCorrectly("{january 1 2018 at 4:30 am}", 1514777400000,
251                               GRANULARITY_MINUTE));
252   EXPECT_TRUE(ParsesCorrectly("{january 1 2018 at 4pm}", 1514818800000,
253                               GRANULARITY_HOUR));
254 
255   EXPECT_TRUE(ParsesCorrectly("{today at 0:00}", {-3600000, 39600000},
256                               GRANULARITY_MINUTE));
257   EXPECT_TRUE(ParsesCorrectly(
258       "{today at 0:00}", {-57600000, -14400000}, GRANULARITY_MINUTE,
259       /*anchor_start_end=*/false, "America/Los_Angeles"));
260   EXPECT_TRUE(ParsesCorrectly("{tomorrow at 4:00}", {97200000, 140400000},
261                               GRANULARITY_MINUTE));
262   EXPECT_TRUE(ParsesCorrectly("{tomorrow at 4am}", 97200000, GRANULARITY_HOUR));
263   EXPECT_TRUE(
264       ParsesCorrectly("{wednesday at 4am}", 529200000, GRANULARITY_HOUR));
265   EXPECT_TRUE(ParsesCorrectly("last seen {today at 9:01 PM}", 72060000,
266                               GRANULARITY_MINUTE));
267   EXPECT_TRUE(ParsesCorrectly("set an alarm for {7am tomorrow}", 108000000,
268                               GRANULARITY_HOUR));
269   EXPECT_TRUE(
270       ParsesCorrectly("set an alarm for {7 a.m}", 21600000, GRANULARITY_HOUR));
271 }
272 
TEST_F(ParserTest,ParseWithAnchor)273 TEST_F(ParserTest, ParseWithAnchor) {
274   EXPECT_TRUE(ParsesCorrectly("{January 1, 1988}", 567990000000,
275                               GRANULARITY_DAY, /*anchor_start_end=*/false));
276   EXPECT_TRUE(ParsesCorrectly("{January 1, 1988}", 567990000000,
277                               GRANULARITY_DAY, /*anchor_start_end=*/true));
278   EXPECT_TRUE(ParsesCorrectly("lorem {1 january 2018} ipsum", 1514761200000,
279                               GRANULARITY_DAY, /*anchor_start_end=*/false));
280   EXPECT_TRUE(HasNoResult("lorem 1 january 2018 ipsum",
281                           /*anchor_start_end=*/true));
282 }
283 
TEST_F(ParserTest,ParseWithRawUsecase)284 TEST_F(ParserTest, ParseWithRawUsecase) {
285   // Annotated for RAW usecase.
286   EXPECT_TRUE(ParsesCorrectly(
287       "{tomorrow}", 82800000, GRANULARITY_DAY, /*anchor_start_end=*/false,
288       /*timezone=*/"Europe/Zurich", /*locales=*/"en-US",
289       /*annotation_usecase=*/AnnotationUsecase_ANNOTATION_USECASE_RAW));
290 
291   EXPECT_TRUE(ParsesCorrectly(
292       "call me {in two hours}", 7200000, GRANULARITY_HOUR,
293       /*anchor_start_end=*/false,
294       /*timezone=*/"Europe/Zurich", /*locales=*/"en-US",
295       /*annotation_usecase=*/AnnotationUsecase_ANNOTATION_USECASE_RAW));
296 
297   EXPECT_TRUE(ParsesCorrectly(
298       "call me {next month}", 2674800000, GRANULARITY_MONTH,
299       /*anchor_start_end=*/false,
300       /*timezone=*/"Europe/Zurich", /*locales=*/"en-US",
301       /*annotation_usecase=*/AnnotationUsecase_ANNOTATION_USECASE_RAW));
302   EXPECT_TRUE(ParsesCorrectly(
303       "what's the time {now}", -3600000, GRANULARITY_DAY,
304       /*anchor_start_end=*/false,
305       /*timezone=*/"Europe/Zurich", /*locales=*/"en-US",
306       /*annotation_usecase=*/AnnotationUsecase_ANNOTATION_USECASE_RAW));
307 
308   EXPECT_TRUE(ParsesCorrectly(
309       "call me on {Saturday}", 169200000, GRANULARITY_DAY,
310       /*anchor_start_end=*/false,
311       /*timezone=*/"Europe/Zurich", /*locales=*/"en-US",
312       /*annotation_usecase=*/AnnotationUsecase_ANNOTATION_USECASE_RAW));
313 
314   // Not annotated for Smart usecase.
315   EXPECT_TRUE(HasNoResult(
316       "{tomorrow}", /*anchor_start_end=*/false,
317       /*timezone=*/"Europe/Zurich",
318       /*annotation_usecase=*/AnnotationUsecase_ANNOTATION_USECASE_SMART));
319 }
320 
TEST_F(ParserTest,ParsesNoonAndMidnightCorrectly)321 TEST_F(ParserTest, ParsesNoonAndMidnightCorrectly) {
322   EXPECT_TRUE(ParsesCorrectly("{January 1, 1988 12:30am}", 567991800000,
323                               GRANULARITY_MINUTE));
324   EXPECT_TRUE(ParsesCorrectly("{January 1, 1988 12:30pm}", 568035000000,
325                               GRANULARITY_MINUTE));
326 }
327 
TEST_F(ParserTest,ParseGerman)328 TEST_F(ParserTest, ParseGerman) {
329   EXPECT_TRUE(
330       ParsesCorrectlyGerman("{Januar 1 2018}", 1514761200000, GRANULARITY_DAY));
331   EXPECT_TRUE(
332       ParsesCorrectlyGerman("{1 2 2018}", 1517439600000, GRANULARITY_DAY));
333   EXPECT_TRUE(ParsesCorrectlyGerman("lorem {1 Januar 2018} ipsum",
334                                     1514761200000, GRANULARITY_DAY));
335   EXPECT_TRUE(ParsesCorrectlyGerman("{19/Apr/2010:06:36:15}",
336                                     {1271651775000, 1271694975000},
337                                     GRANULARITY_SECOND));
338   EXPECT_TRUE(ParsesCorrectlyGerman("{09/März/2004 22:02:40}", 1078866160000,
339                                     GRANULARITY_SECOND));
340   EXPECT_TRUE(ParsesCorrectlyGerman("{Dez 2, 2010 2:39:58}",
341                                     {1291253998000, 1291297198000},
342                                     GRANULARITY_SECOND));
343   EXPECT_TRUE(ParsesCorrectlyGerman("{Juni 09 2011 15:28:14}", 1307626094000,
344                                     GRANULARITY_SECOND));
345   EXPECT_TRUE(ParsesCorrectlyGerman(
346       "{März 16 08:12:04}", {6419524000, 6462724000}, GRANULARITY_SECOND));
347   EXPECT_TRUE(ParsesCorrectlyGerman("{2010-06-26 02:31:29}",
348                                     {1277512289000, 1277555489000},
349                                     GRANULARITY_SECOND));
350   EXPECT_TRUE(ParsesCorrectlyGerman("{2006/01/22 04:11:05}",
351                                     {1137899465000, 1137942665000},
352                                     GRANULARITY_SECOND));
353   EXPECT_TRUE(ParsesCorrectlyGerman("{11:42:35}", {38555000, 81755000},
354                                     GRANULARITY_SECOND));
355   EXPECT_TRUE(ParsesCorrectlyGerman(
356       "{23/Apr 11:42:35}", {9715355000, 9758555000}, GRANULARITY_SECOND));
357   EXPECT_TRUE(ParsesCorrectlyGerman("{23/Apr/2015:11:42:35}",
358                                     {1429782155000, 1429825355000},
359                                     GRANULARITY_SECOND));
360   EXPECT_TRUE(ParsesCorrectlyGerman("{23/Apr/2015 11:42:35}",
361                                     {1429782155000, 1429825355000},
362                                     GRANULARITY_SECOND));
363   EXPECT_TRUE(ParsesCorrectlyGerman("{23-Apr-2015 11:42:35}",
364                                     {1429782155000, 1429825355000},
365                                     GRANULARITY_SECOND));
366   EXPECT_TRUE(ParsesCorrectlyGerman("{23 Apr 2015 11:42:35}",
367                                     {1429782155000, 1429825355000},
368                                     GRANULARITY_SECOND));
369   EXPECT_TRUE(ParsesCorrectlyGerman("{04/23/15 11:42:35}",
370                                     {1429782155000, 1429825355000},
371                                     GRANULARITY_SECOND));
372   EXPECT_TRUE(ParsesCorrectlyGerman("{04/23/2015 11:42:35}",
373                                     {1429782155000, 1429825355000},
374                                     GRANULARITY_SECOND));
375   EXPECT_TRUE(ParsesCorrectlyGerman("{19/apr/2010:06:36:15}",
376                                     {1271651775000, 1271694975000},
377                                     GRANULARITY_SECOND));
378   EXPECT_TRUE(ParsesCorrectlyGerman("{januar 1 2018 um 4:30}",
379                                     {1514777400000, 1514820600000},
380                                     GRANULARITY_MINUTE));
381   EXPECT_TRUE(ParsesCorrectlyGerman("{januar 1 2018 um 4:30 nachm}",
382                                     1514820600000, GRANULARITY_MINUTE));
383   EXPECT_TRUE(ParsesCorrectlyGerman("{januar 1 2018 um 4 nachm}", 1514818800000,
384                                     GRANULARITY_HOUR));
385   EXPECT_TRUE(
386       ParsesCorrectlyGerman("{14.03.2017}", 1489446000000, GRANULARITY_DAY));
387   EXPECT_TRUE(ParsesCorrectlyGerman("{morgen 0:00}", {82800000, 126000000},
388                                     GRANULARITY_MINUTE));
389   EXPECT_TRUE(ParsesCorrectlyGerman("{morgen um 4:00}", {97200000, 140400000},
390                                     GRANULARITY_MINUTE));
391   EXPECT_TRUE(
392       ParsesCorrectlyGerman("{morgen um 4 vorm}", 97200000, GRANULARITY_HOUR));
393 }
394 
TEST_F(ParserTest,ParseNonUs)395 TEST_F(ParserTest, ParseNonUs) {
396   EXPECT_TRUE(ParsesCorrectly("{1/5/15}", 1430431200000, GRANULARITY_DAY,
397                               /*anchor_start_end=*/false,
398                               /*timezone=*/"Europe/Zurich",
399                               /*locales=*/"en-GB"));
400   EXPECT_TRUE(ParsesCorrectly("{1/5/15}", 1430431200000, GRANULARITY_DAY,
401                               /*anchor_start_end=*/false,
402                               /*timezone=*/"Europe/Zurich", /*locales=*/"en"));
403 }
404 
TEST_F(ParserTest,ParseUs)405 TEST_F(ParserTest, ParseUs) {
406   EXPECT_TRUE(ParsesCorrectly("{1/5/15}", 1420412400000, GRANULARITY_DAY,
407                               /*anchor_start_end=*/false,
408                               /*timezone=*/"Europe/Zurich",
409                               /*locales=*/"en-US"));
410   EXPECT_TRUE(ParsesCorrectly("{1/5/15}", 1420412400000, GRANULARITY_DAY,
411                               /*anchor_start_end=*/false,
412                               /*timezone=*/"Europe/Zurich",
413                               /*locales=*/"es-US"));
414 }
415 
TEST_F(ParserTest,ParseUnknownLanguage)416 TEST_F(ParserTest, ParseUnknownLanguage) {
417   EXPECT_TRUE(ParsesCorrectly("bylo to {31. 12. 2015} v 6 hodin", 1451516400000,
418                               GRANULARITY_DAY,
419                               /*anchor_start_end=*/false,
420                               /*timezone=*/"Europe/Zurich", /*locales=*/"xx"));
421 }
422 
TEST_F(ParserTest,WhenAlternativesEnabledGeneratesAlternatives)423 TEST_F(ParserTest, WhenAlternativesEnabledGeneratesAlternatives) {
424   LoadModel([](ModelT* model) {
425     model->datetime_model->generate_alternative_interpretations_when_ambiguous =
426         true;
427   });
428 
429   EXPECT_TRUE(ParsesCorrectly("{january 1 2018 at 4:30}",
430                               {1514777400000, 1514820600000},
431                               GRANULARITY_MINUTE));
432   EXPECT_TRUE(ParsesCorrectly("{monday 3pm}", 396000000, GRANULARITY_HOUR));
433   EXPECT_TRUE(ParsesCorrectly("{monday 3:00}", {352800000, 396000000},
434                               GRANULARITY_MINUTE));
435 }
436 
TEST_F(ParserTest,WhenAlternativesDisabledDoesNotGenerateAlternatives)437 TEST_F(ParserTest, WhenAlternativesDisabledDoesNotGenerateAlternatives) {
438   LoadModel([](ModelT* model) {
439     model->datetime_model->generate_alternative_interpretations_when_ambiguous =
440         false;
441   });
442 
443   EXPECT_TRUE(ParsesCorrectly("{january 1 2018 at 4:30}", 1514777400000,
444                               GRANULARITY_MINUTE));
445 }
446 
447 class ParserLocaleTest : public testing::Test {
448  public:
449   void SetUp() override;
450   bool HasResult(const std::string& input, const std::string& locales);
451 
452  protected:
453   UniLib unilib_;
454   CalendarLib calendarlib_;
455   flatbuffers::FlatBufferBuilder builder_;
456   std::unique_ptr<DatetimeParser> parser_;
457 };
458 
AddPattern(const std::string & regex,int locale,std::vector<std::unique_ptr<DatetimeModelPatternT>> * patterns)459 void AddPattern(const std::string& regex, int locale,
460                 std::vector<std::unique_ptr<DatetimeModelPatternT>>* patterns) {
461   patterns->emplace_back(new DatetimeModelPatternT);
462   patterns->back()->regexes.emplace_back(new DatetimeModelPattern_::RegexT);
463   patterns->back()->regexes.back()->pattern = regex;
464   patterns->back()->regexes.back()->groups.push_back(
465       DatetimeGroupType_GROUP_UNUSED);
466   patterns->back()->locales.push_back(locale);
467 }
468 
SetUp()469 void ParserLocaleTest::SetUp() {
470   DatetimeModelT model;
471   model.use_extractors_for_locating = false;
472   model.locales.clear();
473   model.locales.push_back("en-US");
474   model.locales.push_back("en-CH");
475   model.locales.push_back("zh-Hant");
476   model.locales.push_back("en-*");
477   model.locales.push_back("zh-Hant-*");
478   model.locales.push_back("*-CH");
479   model.locales.push_back("default");
480   model.default_locales.push_back(6);
481 
482   AddPattern(/*regex=*/"en-US", /*locale=*/0, &model.patterns);
483   AddPattern(/*regex=*/"en-CH", /*locale=*/1, &model.patterns);
484   AddPattern(/*regex=*/"zh-Hant", /*locale=*/2, &model.patterns);
485   AddPattern(/*regex=*/"en-all", /*locale=*/3, &model.patterns);
486   AddPattern(/*regex=*/"zh-Hant-all", /*locale=*/4, &model.patterns);
487   AddPattern(/*regex=*/"all-CH", /*locale=*/5, &model.patterns);
488   AddPattern(/*regex=*/"default", /*locale=*/6, &model.patterns);
489 
490   builder_.Finish(DatetimeModel::Pack(builder_, &model));
491   const DatetimeModel* model_fb =
492       flatbuffers::GetRoot<DatetimeModel>(builder_.GetBufferPointer());
493   ASSERT_TRUE(model_fb);
494 
495   parser_ = DatetimeParser::Instance(model_fb, unilib_, calendarlib_,
496                                      /*decompressor=*/nullptr);
497   ASSERT_TRUE(parser_);
498 }
499 
HasResult(const std::string & input,const std::string & locales)500 bool ParserLocaleTest::HasResult(const std::string& input,
501                                  const std::string& locales) {
502   std::vector<DatetimeParseResultSpan> results;
503   EXPECT_TRUE(parser_->Parse(
504       input, /*reference_time_ms_utc=*/0,
505       /*reference_timezone=*/"", locales, ModeFlag_ANNOTATION,
506       AnnotationUsecase_ANNOTATION_USECASE_SMART, false, &results));
507   return results.size() == 1;
508 }
509 
TEST_F(ParserLocaleTest,English)510 TEST_F(ParserLocaleTest, English) {
511   EXPECT_TRUE(HasResult("en-US", /*locales=*/"en-US"));
512   EXPECT_FALSE(HasResult("en-CH", /*locales=*/"en-US"));
513   EXPECT_FALSE(HasResult("en-US", /*locales=*/"en-CH"));
514   EXPECT_TRUE(HasResult("en-CH", /*locales=*/"en-CH"));
515   EXPECT_TRUE(HasResult("default", /*locales=*/"en-CH"));
516 }
517 
TEST_F(ParserLocaleTest,TraditionalChinese)518 TEST_F(ParserLocaleTest, TraditionalChinese) {
519   EXPECT_TRUE(HasResult("zh-Hant-all", /*locales=*/"zh-Hant"));
520   EXPECT_TRUE(HasResult("zh-Hant-all", /*locales=*/"zh-Hant-TW"));
521   EXPECT_TRUE(HasResult("zh-Hant-all", /*locales=*/"zh-Hant-SG"));
522   EXPECT_FALSE(HasResult("zh-Hant-all", /*locales=*/"zh-SG"));
523   EXPECT_FALSE(HasResult("zh-Hant-all", /*locales=*/"zh"));
524   EXPECT_TRUE(HasResult("default", /*locales=*/"zh"));
525   EXPECT_TRUE(HasResult("default", /*locales=*/"zh-Hant-SG"));
526 }
527 
TEST_F(ParserLocaleTest,SwissEnglish)528 TEST_F(ParserLocaleTest, SwissEnglish) {
529   EXPECT_TRUE(HasResult("all-CH", /*locales=*/"de-CH"));
530   EXPECT_TRUE(HasResult("all-CH", /*locales=*/"en-CH"));
531   EXPECT_TRUE(HasResult("en-all", /*locales=*/"en-CH"));
532   EXPECT_FALSE(HasResult("all-CH", /*locales=*/"de-DE"));
533   EXPECT_TRUE(HasResult("default", /*locales=*/"de-CH"));
534   EXPECT_TRUE(HasResult("default", /*locales=*/"en-CH"));
535 }
536 
537 }  // namespace
538 }  // namespace libtextclassifier3
539