1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "annotator/pod_ner/pod-ner-impl.h"
18 
19 #include <iostream>
20 #include <memory>
21 #include <thread>  // NOLINT(build/c++11)
22 
23 #include "annotator/model_generated.h"
24 #include "annotator/types.h"
25 #include "utils/jvm-test-utils.h"
26 #include "utils/test-data-test-utils.h"
27 #include "utils/tokenizer-utils.h"
28 #include "utils/utf8/unicodetext.h"
29 #include "utils/utf8/unilib.h"
30 #include "gmock/gmock.h"
31 #include "gtest/gtest.h"
32 
33 namespace libtextclassifier3 {
34 namespace {
35 
36 using ::testing::IsEmpty;
37 using ::testing::Not;
38 
39 using PodNerModel_::Label_::BoiseType;
40 using PodNerModel_::Label_::BoiseType_BEGIN;
41 using PodNerModel_::Label_::BoiseType_END;
42 using PodNerModel_::Label_::BoiseType_INTERMEDIATE;
43 using PodNerModel_::Label_::BoiseType_O;
44 using PodNerModel_::Label_::BoiseType_SINGLE;
45 using PodNerModel_::Label_::MentionType;
46 using PodNerModel_::Label_::MentionType_NAM;
47 using PodNerModel_::Label_::MentionType_NOM;
48 using PodNerModel_::Label_::MentionType_UNDEFINED;
49 
50 constexpr int kMinNumberOfTokens = 1;
51 constexpr int kMinNumberOfWordpieces = 1;
52 constexpr float kDefaultPriorityScore = 0.5;
53 
54 class PodNerTest : public testing::Test {
55  protected:
PodNerTest()56   PodNerTest() {
57     PodNerModelT model;
58 
59     model.min_number_of_tokens = kMinNumberOfTokens;
60     model.min_number_of_wordpieces = kMinNumberOfWordpieces;
61     model.priority_score = kDefaultPriorityScore;
62 
63     const std::string tflite_model_buffer =
64         GetTestFileContent("annotator/pod_ner/test_data/tflite_model.tflite");
65     model.tflite_model = std::vector<uint8_t>(tflite_model_buffer.begin(),
66                                               tflite_model_buffer.end());
67     const std::string word_piece_vocab_buffer =
68         GetTestFileContent("annotator/pod_ner/test_data/vocab.txt");
69     model.word_piece_vocab = std::vector<uint8_t>(
70         word_piece_vocab_buffer.begin(), word_piece_vocab_buffer.end());
71 
72     flatbuffers::FlatBufferBuilder builder;
73     builder.Finish(PodNerModel::Pack(builder, &model));
74 
75     model_buffer_ =
76         std::string(reinterpret_cast<const char*>(builder.GetBufferPointer()),
77                     builder.GetSize());
78     model_ = static_cast<const PodNerModel*>(
79         flatbuffers::GetRoot<PodNerModel>(model_buffer_.data()));
80 
81     model.append_final_period = true;
82     flatbuffers::FlatBufferBuilder builder_append_final_period;
83     builder_append_final_period.Finish(
84         PodNerModel::Pack(builder_append_final_period, &model));
85 
86     model_buffer_append_final_period_ =
87         std::string(reinterpret_cast<const char*>(
88                         builder_append_final_period.GetBufferPointer()),
89                     builder_append_final_period.GetSize());
90     model_append_final_period_ =
91         static_cast<const PodNerModel*>(flatbuffers::GetRoot<PodNerModel>(
92             model_buffer_append_final_period_.data()));
93 
94     unilib_ = CreateUniLibForTesting();
95   }
96 
97   std::string model_buffer_;
98   const PodNerModel* model_;
99   std::string model_buffer_append_final_period_;
100   const PodNerModel* model_append_final_period_;
101   std::unique_ptr<UniLib> unilib_;
102 };
103 
TEST_F(PodNerTest,AnnotateSmokeTest)104 TEST_F(PodNerTest, AnnotateSmokeTest) {
105   std::unique_ptr<PodNerAnnotator> annotator =
106       PodNerAnnotator::Create(model_, *unilib_);
107   ASSERT_TRUE(annotator != nullptr);
108 
109   {
110     std::vector<AnnotatedSpan> annotations;
111     ASSERT_TRUE(annotator->Annotate(
112         UTF8ToUnicodeText("Google New York , in New York"), &annotations));
113     EXPECT_THAT(annotations, Not(IsEmpty()));
114   }
115 
116   {
117     std::vector<AnnotatedSpan> annotations;
118     ASSERT_TRUE(annotator->Annotate(
119         UTF8ToUnicodeText("Jamie I'm in the first picture and Cameron and Zach "
120                           "are in the second "
121                           "picture."),
122         &annotations));
123     EXPECT_THAT(annotations, Not(IsEmpty()));
124   }
125 }
126 
TEST_F(PodNerTest,AnnotateEmptyInput)127 TEST_F(PodNerTest, AnnotateEmptyInput) {
128   std::unique_ptr<PodNerAnnotator> annotator =
129       PodNerAnnotator::Create(model_, *unilib_);
130   ASSERT_TRUE(annotator != nullptr);
131 
132   {
133     std::vector<AnnotatedSpan> annotations;
134     ASSERT_TRUE(annotator->Annotate(UTF8ToUnicodeText(""), &annotations));
135     EXPECT_THAT(annotations, IsEmpty());
136   }
137 }
138 
FillCollections(const std::vector<std::string> & collection_names,const std::vector<float> & single_token_priority_scores,const std::vector<float> & multi_token_priority_scores,std::vector<std::unique_ptr<PodNerModel_::CollectionT>> * collections)139 void FillCollections(
140     const std::vector<std::string>& collection_names,
141     const std::vector<float>& single_token_priority_scores,
142     const std::vector<float>& multi_token_priority_scores,
143     std::vector<std::unique_ptr<PodNerModel_::CollectionT>>* collections) {
144   ASSERT_TRUE(collection_names.size() == single_token_priority_scores.size() &&
145               collection_names.size() == multi_token_priority_scores.size());
146   collections->clear();
147   for (int i = 0; i < collection_names.size(); ++i) {
148     collections->push_back(std::make_unique<PodNerModel_::CollectionT>());
149     collections->back()->name = collection_names[i];
150     collections->back()->single_token_priority_score =
151         single_token_priority_scores[i];
152     collections->back()->multi_token_priority_score =
153         multi_token_priority_scores[i];
154   }
155 }
156 
EmplaceToLabelVector(BoiseType boise_type,MentionType mention_type,int collection_id,std::vector<std::unique_ptr<PodNerModel_::LabelT>> * labels)157 void EmplaceToLabelVector(
158     BoiseType boise_type, MentionType mention_type, int collection_id,
159     std::vector<std::unique_ptr<PodNerModel_::LabelT>>* labels) {
160   labels->push_back(std::make_unique<PodNerModel_::LabelT>());
161   labels->back()->boise_type = boise_type;
162   labels->back()->mention_type = mention_type;
163   labels->back()->collection_id = collection_id;
164 }
165 
FillLabels(int num_collections,std::vector<std::unique_ptr<PodNerModel_::LabelT>> * labels)166 void FillLabels(int num_collections,
167                 std::vector<std::unique_ptr<PodNerModel_::LabelT>>* labels) {
168   labels->clear();
169   for (auto boise_type :
170        {BoiseType_BEGIN, BoiseType_END, BoiseType_INTERMEDIATE}) {
171     for (auto mention_type : {MentionType_NAM, MentionType_NOM}) {
172       for (int i = 0; i < num_collections - 1; ++i) {  // skip undefined
173         EmplaceToLabelVector(boise_type, mention_type, i, labels);
174       }
175     }
176   }
177   EmplaceToLabelVector(BoiseType_O, MentionType_UNDEFINED, num_collections - 1,
178                        labels);
179   for (auto mention_type : {MentionType_NAM, MentionType_NOM}) {
180     for (int i = 0; i < num_collections - 1; ++i) {  // skip undefined
181       EmplaceToLabelVector(BoiseType_SINGLE, mention_type, i, labels);
182     }
183   }
184 }
185 
TEST_F(PodNerTest,AnnotateDefaultCollections)186 TEST_F(PodNerTest, AnnotateDefaultCollections) {
187   std::unique_ptr<PodNerAnnotator> annotator =
188       PodNerAnnotator::Create(model_, *unilib_);
189   ASSERT_TRUE(annotator != nullptr);
190 
191   std::string multi_word_location = "I live in New York";
192   std::string single_word_location = "I live in Zurich";
193   {
194     std::vector<AnnotatedSpan> annotations;
195     ASSERT_TRUE(annotator->Annotate(UTF8ToUnicodeText(multi_word_location),
196                                     &annotations));
197     EXPECT_THAT(annotations, Not(IsEmpty()));
198     EXPECT_EQ(annotations[0].classification[0].collection, "location");
199     EXPECT_EQ(annotations[0].classification[0].priority_score,
200               kDefaultPriorityScore);
201 
202     annotations.clear();
203     ASSERT_TRUE(annotator->Annotate(UTF8ToUnicodeText(single_word_location),
204                                     &annotations));
205     EXPECT_THAT(annotations, Not(IsEmpty()));
206     EXPECT_EQ(annotations[0].classification[0].collection, "location");
207     EXPECT_EQ(annotations[0].classification[0].priority_score,
208               kDefaultPriorityScore);
209   }
210 }
211 
TEST_F(PodNerTest,AnnotateConfigurableCollections)212 TEST_F(PodNerTest, AnnotateConfigurableCollections) {
213   std::unique_ptr<PodNerModelT> unpacked_model(model_->UnPack());
214   ASSERT_TRUE(unpacked_model != nullptr);
215 
216   float xxx_single_token_priority = 0.9;
217   float xxx_multi_token_priority = 1.7;
218   const std::vector<std::string> collection_names = {
219       "art",          "consumer_good", "event",  "xxx",
220       "organization", "ner_entity",    "person", "undefined"};
221   FillCollections(collection_names,
222                   /*single_token_priority_scores=*/
223                   {0., 0., 0., xxx_single_token_priority, 0., 0., 0., 0.},
224                   /*multi_token_priority_scores=*/
225                   {0., 0., 0., xxx_multi_token_priority, 0., 0., 0., 0.},
226                   &(unpacked_model->collections));
227   FillLabels(collection_names.size(), &(unpacked_model->labels));
228   flatbuffers::FlatBufferBuilder builder;
229   builder.Finish(PodNerModel::Pack(builder, unpacked_model.get()));
230   std::string model_buffer =
231       std::string(reinterpret_cast<const char*>(builder.GetBufferPointer()),
232                   builder.GetSize());
233   std::unique_ptr<PodNerAnnotator> annotator = PodNerAnnotator::Create(
234       static_cast<const PodNerModel*>(
235           flatbuffers::GetRoot<PodNerModel>(model_buffer.data())),
236       *unilib_);
237   ASSERT_TRUE(annotator != nullptr);
238 
239   std::string multi_word_location = "I live in New York";
240   std::string single_word_location = "I live in Zurich";
241   {
242     std::vector<AnnotatedSpan> annotations;
243     ASSERT_TRUE(annotator->Annotate(UTF8ToUnicodeText(multi_word_location),
244                                     &annotations));
245     EXPECT_THAT(annotations, Not(IsEmpty()));
246     EXPECT_EQ(annotations[0].classification[0].collection, "xxx");
247     EXPECT_EQ(annotations[0].classification[0].priority_score,
248               xxx_multi_token_priority);
249 
250     annotations.clear();
251     ASSERT_TRUE(annotator->Annotate(UTF8ToUnicodeText(single_word_location),
252                                     &annotations));
253     EXPECT_THAT(annotations, Not(IsEmpty()));
254     EXPECT_EQ(annotations[0].classification[0].collection, "xxx");
255     EXPECT_EQ(annotations[0].classification[0].priority_score,
256               xxx_single_token_priority);
257   }
258 }
259 
TEST_F(PodNerTest,AnnotateMinNumTokens)260 TEST_F(PodNerTest, AnnotateMinNumTokens) {
261   std::unique_ptr<PodNerAnnotator> annotator =
262       PodNerAnnotator::Create(model_, *unilib_);
263   ASSERT_TRUE(annotator != nullptr);
264 
265   std::string text = "in New York";
266   {
267     std::vector<AnnotatedSpan> annotations;
268     ASSERT_TRUE(annotator->Annotate(UTF8ToUnicodeText(text), &annotations));
269     EXPECT_THAT(annotations, Not(IsEmpty()));
270   }
271 
272   std::unique_ptr<PodNerModelT> unpacked_model(model_->UnPack());
273   ASSERT_TRUE(unpacked_model != nullptr);
274 
275   unpacked_model->min_number_of_tokens = 4;
276   flatbuffers::FlatBufferBuilder builder;
277   builder.Finish(PodNerModel::Pack(builder, unpacked_model.get()));
278 
279   std::string model_buffer =
280       std::string(reinterpret_cast<const char*>(builder.GetBufferPointer()),
281                   builder.GetSize());
282   annotator = PodNerAnnotator::Create(
283       static_cast<const PodNerModel*>(
284           flatbuffers::GetRoot<PodNerModel>(model_buffer.data())),
285       *unilib_);
286   ASSERT_TRUE(annotator != nullptr);
287   {
288     std::vector<AnnotatedSpan> annotations;
289     ASSERT_TRUE(annotator->Annotate(UTF8ToUnicodeText(text), &annotations));
290     EXPECT_THAT(annotations, IsEmpty());
291   }
292 }
293 
TEST_F(PodNerTest,AnnotateMinNumWordpieces)294 TEST_F(PodNerTest, AnnotateMinNumWordpieces) {
295   std::unique_ptr<PodNerAnnotator> annotator =
296       PodNerAnnotator::Create(model_, *unilib_);
297   ASSERT_TRUE(annotator != nullptr);
298 
299   std::string text = "in New York";
300   {
301     std::vector<AnnotatedSpan> annotations;
302     ASSERT_TRUE(annotator->Annotate(UTF8ToUnicodeText(text), &annotations));
303     EXPECT_THAT(annotations, Not(IsEmpty()));
304   }
305 
306   std::unique_ptr<PodNerModelT> unpacked_model(model_->UnPack());
307   ASSERT_TRUE(unpacked_model != nullptr);
308 
309   unpacked_model->min_number_of_wordpieces = 10;
310   flatbuffers::FlatBufferBuilder builder;
311   builder.Finish(PodNerModel::Pack(builder, unpacked_model.get()));
312 
313   std::string model_buffer =
314       std::string(reinterpret_cast<const char*>(builder.GetBufferPointer()),
315                   builder.GetSize());
316   annotator = PodNerAnnotator::Create(
317       static_cast<const PodNerModel*>(
318           flatbuffers::GetRoot<PodNerModel>(model_buffer.data())),
319       *unilib_);
320   ASSERT_TRUE(annotator != nullptr);
321   {
322     std::vector<AnnotatedSpan> annotations;
323     ASSERT_TRUE(annotator->Annotate(UTF8ToUnicodeText(text), &annotations));
324     EXPECT_THAT(annotations, IsEmpty());
325   }
326 }
327 
TEST_F(PodNerTest,AnnotateNonstandardText)328 TEST_F(PodNerTest, AnnotateNonstandardText) {
329   std::unique_ptr<PodNerAnnotator> annotator =
330       PodNerAnnotator::Create(model_, *unilib_);
331   ASSERT_TRUE(annotator != nullptr);
332 
333   const std::string nonstandard_text =
334       "abcNxCDU1RWNvbXByLXI4NS8xNzcwLzE3NzA4NDY2L3J1Ymluby1raWRzLXJlY2xpbmVyLXd"
335       "pdGgtY3VwLWhvbGRlci5qcGc=/"
336       "UnViaW5vIEtpZHMgUmVjbGluZXIgd2l0aCBDdXAgSG9sZGVyIGJ5IEhhcnJpZXQgQmVl."
337       "html>";
338   std::vector<AnnotatedSpan> annotations;
339   ASSERT_TRUE(
340       annotator->Annotate(UTF8ToUnicodeText(nonstandard_text), &annotations));
341   EXPECT_THAT(annotations, IsEmpty());
342 }
343 
TEST_F(PodNerTest,AnnotateTextWithLinefeed)344 TEST_F(PodNerTest, AnnotateTextWithLinefeed) {
345   std::unique_ptr<PodNerAnnotator> annotator =
346       PodNerAnnotator::Create(model_, *unilib_);
347   ASSERT_TRUE(annotator != nullptr);
348 
349   std::string nonstandard_text = "My name is Kuba\x09";
350   nonstandard_text += "and this is a test.";
351   std::vector<AnnotatedSpan> annotations;
352   ASSERT_TRUE(
353       annotator->Annotate(UTF8ToUnicodeText(nonstandard_text), &annotations));
354   EXPECT_THAT(annotations, Not(IsEmpty()));
355   EXPECT_EQ(annotations[0].span, CodepointSpan(11, 15));
356 
357   nonstandard_text = "My name is Kuba\x09 and this is a test.";
358   ASSERT_TRUE(
359       annotator->Annotate(UTF8ToUnicodeText(nonstandard_text), &annotations));
360   EXPECT_THAT(annotations, Not(IsEmpty()));
361   EXPECT_EQ(annotations[0].span, CodepointSpan(11, 15));
362 }
363 
TEST_F(PodNerTest,AnnotateWithUnknownWordpieces)364 TEST_F(PodNerTest, AnnotateWithUnknownWordpieces) {
365   std::unique_ptr<PodNerAnnotator> annotator =
366       PodNerAnnotator::Create(model_, *unilib_);
367   ASSERT_TRUE(annotator != nullptr);
368 
369   const std::string long_text =
370       "It is easy to spend a fun and exciting day in Seattle without a car.  "
371       "There are lots of ways to modify this itinerary. Add a ferry ride "
372       "from the waterfront. Spending the day at the Seattle Center or at the "
373       "aquarium could easily extend this from one to several days. Take the "
374       "Underground Tour in Pioneer Square. Visit the Klondike Gold Rush "
375       "Museum which is fun and free.  In the summer months you can ride the "
376       "passenger-only Water Taxi from the waterfront to West Seattle and "
377       "Alki Beach. Here's a sample one day itinerary: Start at the Space "
378       "Needle by taking the Seattle Monorail from downtown. Look around the "
379       "Seattle Center or go to the Space Needle.";
380   const std::string text_with_unknown_wordpieces = "před chvílí";
381 
382   std::vector<AnnotatedSpan> annotations;
383   ASSERT_TRUE(
384       annotator->Annotate(UTF8ToUnicodeText("Google New York , in New York. " +
385                                             text_with_unknown_wordpieces),
386                           &annotations));
387   EXPECT_THAT(annotations, IsEmpty());
388   ASSERT_TRUE(annotator->Annotate(
389       UTF8ToUnicodeText(long_text + " " + text_with_unknown_wordpieces),
390       &annotations));
391   EXPECT_THAT(annotations, Not(IsEmpty()));
392 }
393 
394 class PodNerTestWithOrWithoutFinalPeriod
395     : public PodNerTest,
396       public testing::WithParamInterface<bool> {};
397 
398 INSTANTIATE_TEST_SUITE_P(TestAnnotateLongText,
399                          PodNerTestWithOrWithoutFinalPeriod,
400                          testing::Values(true, false));
401 
TEST_P(PodNerTestWithOrWithoutFinalPeriod,AnnotateLongText)402 TEST_P(PodNerTestWithOrWithoutFinalPeriod, AnnotateLongText) {
403   std::unique_ptr<PodNerAnnotator> annotator = PodNerAnnotator::Create(
404       GetParam() ? model_append_final_period_ : model_, *unilib_);
405   ASSERT_TRUE(annotator != nullptr);
406 
407   const std::string long_text =
408       "It is easy to spend a fun and exciting day in Seattle without a car.  "
409       "There are lots of ways to modify this itinerary. Add a ferry ride "
410       "from the waterfront. Spending the day at the Seattle Center or at the "
411       "aquarium could easily extend this from one to several days. Take the "
412       "Underground Tour in Pioneer Square. Visit the Klondike Gold Rush "
413       "Museum which is fun and free.  In the summer months you can ride the "
414       "passenger-only Water Taxi from the waterfront to West Seattle and "
415       "Alki Beach. Here's a sample one day itinerary: Start at the Space "
416       "Needle by taking the Seattle Monorail from downtown. Look around the "
417       "Seattle Center or go to the Space Needle. If you're interested in "
418       "music the EMP-SFM (Experience Music Project - Science Fiction Musuem) "
419       "is located at the foot of the Space Needle.  It has a lot of rock'n "
420       "roll memorabilia that you may find interesting.  The Chihuly Garden "
421       "and Glass musuem is near the Space Needle and you can get a "
422       "combination ticket for both.  It gets really good reviews.  If you're "
423       "interested, then the Bill & Melinda Gates Foundation is across from "
424       "the EMP and has a visitors center that is free.  Come see how Bill "
425       "Gates is giving away his millions. Take the Monorail back downtown.  "
426       "You will be at 5th and Pine (Westlake Center). Head west to the Pike "
427       "Place Market. Look around then head for the Pike Place hill climb "
428       "which is a series of steps that walk down to the waterfront. You will "
429       "end up across the street from the Seattle Aquarium. Plenty of things "
430       "to do on the waterfront, boat cruises, seafood restaurants, the "
431       "Aquarium, or your typical tourist activities. You can walk or take "
432       "the waterfront trolley bus.  Note that waterfront construction has "
433       "relocated the  trolley Metro bus route 99 that will take you from "
434       "Pioneer Square all the way to the end of the waterfront where you can "
435       "visit the Seattle Art Musuem's XXX Sculpture Garden just north of "
436       "Pier 70. The route goes thru Chinatown/International District, "
437       "through Pioneer Square, up 1st ave past the Pike Place Market and to "
438       "1st and Cedar which is walking distance to the Space Needle.  It then "
439       "goes down Broad Street toward the Olympic Sculpture Garden.   It runs "
440       "approximately every 30 minutes during the day and early evening.";
441   std::vector<AnnotatedSpan> annotations;
442   ASSERT_TRUE(annotator->Annotate(UTF8ToUnicodeText(long_text), &annotations));
443   EXPECT_THAT(annotations, Not(IsEmpty()));
444 
445   const std::string location_from_beginning = "Seattle";
446   int start_span_location_from_beginning =
447       long_text.find(location_from_beginning);
448   EXPECT_EQ(annotations[0].span,
449             CodepointSpan(start_span_location_from_beginning,
450                           start_span_location_from_beginning +
451                               location_from_beginning.length()));
452 
453   const std::string location_from_end = "Olympic Sculpture Garden";
454   int start_span_location_from_end = long_text.find(location_from_end);
455   const AnnotatedSpan& last_annotation = *annotations.rbegin();
456   EXPECT_EQ(
457       last_annotation.span,
458       CodepointSpan(start_span_location_from_end,
459                     start_span_location_from_end + location_from_end.length()));
460 }
461 
TEST_F(PodNerTest,SuggestSelectionLongText)462 TEST_F(PodNerTest, SuggestSelectionLongText) {
463   std::unique_ptr<PodNerAnnotator> annotator =
464       PodNerAnnotator::Create(model_, *unilib_);
465   ASSERT_TRUE(annotator != nullptr);
466 
467   const std::string long_text =
468       "It is easy to spend a fun and exciting day in Seattle without a car.  "
469       "There are lots of ways to modify this itinerary. Add a ferry ride "
470       "from the waterfront. Spending the day at the Seattle Center or at the "
471       "aquarium could easily extend this from one to several days. Take the "
472       "Underground Tour in Pioneer Square. Visit the Klondike Gold Rush "
473       "Museum which is fun and free.  In the summer months you can ride the "
474       "passenger-only Water Taxi from the waterfront to West Seattle and "
475       "Alki Beach. Here's a sample one day itinerary: Start at the Space "
476       "Needle by taking the Seattle Monorail from downtown. Look around the "
477       "Seattle Center or go to the Space Needle. If you're interested in "
478       "music the EMP-SFM (Experience Music Project - Science Fiction Musuem) "
479       "is located at the foot of the Space Needle.  It has a lot of rock'n "
480       "roll memorabilia that you may find interesting.  The Chihuly Garden "
481       "and Glass musuem is near the Space Needle and you can get a "
482       "combination ticket for both.  It gets really good reviews.  If you're "
483       "interested, then the Bill & Melinda Gates Foundation is across from "
484       "the EMP and has a visitors center that is free.  Come see how Bill "
485       "Gates is giving away his millions. Take the Monorail back downtown.  "
486       "You will be at 5th and Pine (Westlake Center). Head west to the Pike "
487       "Place Market. Look around then head for the Pike Place hill climb "
488       "which is a series of steps that walk down to the waterfront. You will "
489       "end up across the street from the Seattle Aquarium. Plenty of things "
490       "to do on the waterfront, boat cruises, seafood restaurants, the "
491       "Aquarium, or your typical tourist activities. You can walk or take "
492       "the waterfront trolley bus.  Note that waterfront construction has "
493       "relocated the  trolley Metro bus route 99 that will take you from "
494       "Pioneer Square all the way to the end of the waterfront where you can "
495       "visit the Seattle Art Musuem's XXX Sculpture Garden just north of "
496       "Pier 70. The route goes thru Chinatown/International District, "
497       "through Pioneer Square, up 1st ave past the Pike Place Market and to "
498       "1st and Cedar which is walking distance to the Space Needle.  It then "
499       "goes down Broad Street toward the Olympic Sculpture Garden.   It runs "
500       "approximately every 30 minutes during the day and early evening.";
501   const std::string klondike = "Klondike Gold Rush Museum";
502   int klondike_start = long_text.find(klondike);
503 
504   AnnotatedSpan suggested_span;
505   EXPECT_TRUE(annotator->SuggestSelection(UTF8ToUnicodeText(long_text),
506                                           {klondike_start, klondike_start + 8},
507                                           &suggested_span));
508   EXPECT_EQ(suggested_span.span,
509             CodepointSpan(klondike_start, klondike_start + klondike.length()));
510 }
511 
TEST_F(PodNerTest,SuggestSelectionTest)512 TEST_F(PodNerTest, SuggestSelectionTest) {
513   std::unique_ptr<PodNerAnnotator> annotator =
514       PodNerAnnotator::Create(model_, *unilib_);
515   ASSERT_TRUE(annotator != nullptr);
516 
517   AnnotatedSpan suggested_span;
518   EXPECT_TRUE(annotator->SuggestSelection(
519       UTF8ToUnicodeText("Google New York, in New York"), {7, 10},
520       &suggested_span));
521   EXPECT_EQ(suggested_span.span, CodepointSpan(7, 15));
522   EXPECT_FALSE(annotator->SuggestSelection(
523       UTF8ToUnicodeText("Google New York, in New York"), {17, 19},
524       &suggested_span));
525   EXPECT_EQ(suggested_span.span, CodepointSpan(kInvalidIndex, kInvalidIndex));
526 }
527 
TEST_F(PodNerTest,ClassifyTextTest)528 TEST_F(PodNerTest, ClassifyTextTest) {
529   std::unique_ptr<PodNerAnnotator> annotator =
530       PodNerAnnotator::Create(model_, *unilib_);
531   ASSERT_TRUE(annotator != nullptr);
532 
533   ClassificationResult result;
534   ASSERT_TRUE(annotator->ClassifyText(UTF8ToUnicodeText("We met in New York"),
535                                       {10, 18}, &result));
536   EXPECT_EQ(result.collection, "location");
537 }
538 
TEST_F(PodNerTest,ThreadSafety)539 TEST_F(PodNerTest, ThreadSafety) {
540   std::unique_ptr<PodNerAnnotator> annotator =
541       PodNerAnnotator::Create(model_, *unilib_);
542   ASSERT_TRUE(annotator != nullptr);
543 
544   // Do inference in 20 threads. When run with --config=tsan, this should fire
545   // if there's a problem.
546   std::vector<std::thread> thread_pool(20);
547   for (std::thread& thread : thread_pool) {
548     thread = std::thread([&annotator]() {
549       AnnotatedSpan suggested_span;
550       EXPECT_TRUE(annotator->SuggestSelection(
551           UTF8ToUnicodeText("Google New York, in New York"), {7, 10},
552           &suggested_span));
553       EXPECT_EQ(suggested_span.span, CodepointSpan(7, 15));
554     });
555   }
556   for (std::thread& thread : thread_pool) {
557     thread.join();
558   }
559 }
560 
561 }  // namespace
562 }  // namespace libtextclassifier3
563