1 /*
2 * Copyright (C) 2018 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "annotator/pod_ner/pod-ner-impl.h"
18
19 #include <iostream>
20 #include <memory>
21 #include <thread> // NOLINT(build/c++11)
22
23 #include "annotator/model_generated.h"
24 #include "annotator/types.h"
25 #include "utils/jvm-test-utils.h"
26 #include "utils/test-data-test-utils.h"
27 #include "utils/tokenizer-utils.h"
28 #include "utils/utf8/unicodetext.h"
29 #include "utils/utf8/unilib.h"
30 #include "gmock/gmock.h"
31 #include "gtest/gtest.h"
32
33 namespace libtextclassifier3 {
34 namespace {
35
36 using ::testing::IsEmpty;
37 using ::testing::Not;
38
39 using PodNerModel_::Label_::BoiseType;
40 using PodNerModel_::Label_::BoiseType_BEGIN;
41 using PodNerModel_::Label_::BoiseType_END;
42 using PodNerModel_::Label_::BoiseType_INTERMEDIATE;
43 using PodNerModel_::Label_::BoiseType_O;
44 using PodNerModel_::Label_::BoiseType_SINGLE;
45 using PodNerModel_::Label_::MentionType;
46 using PodNerModel_::Label_::MentionType_NAM;
47 using PodNerModel_::Label_::MentionType_NOM;
48 using PodNerModel_::Label_::MentionType_UNDEFINED;
49
50 constexpr int kMinNumberOfTokens = 1;
51 constexpr int kMinNumberOfWordpieces = 1;
52 constexpr float kDefaultPriorityScore = 0.5;
53
54 class PodNerTest : public testing::Test {
55 protected:
PodNerTest()56 PodNerTest() {
57 PodNerModelT model;
58
59 model.min_number_of_tokens = kMinNumberOfTokens;
60 model.min_number_of_wordpieces = kMinNumberOfWordpieces;
61 model.priority_score = kDefaultPriorityScore;
62
63 const std::string tflite_model_buffer =
64 GetTestFileContent("annotator/pod_ner/test_data/tflite_model.tflite");
65 model.tflite_model = std::vector<uint8_t>(tflite_model_buffer.begin(),
66 tflite_model_buffer.end());
67 const std::string word_piece_vocab_buffer =
68 GetTestFileContent("annotator/pod_ner/test_data/vocab.txt");
69 model.word_piece_vocab = std::vector<uint8_t>(
70 word_piece_vocab_buffer.begin(), word_piece_vocab_buffer.end());
71
72 flatbuffers::FlatBufferBuilder builder;
73 builder.Finish(PodNerModel::Pack(builder, &model));
74
75 model_buffer_ =
76 std::string(reinterpret_cast<const char*>(builder.GetBufferPointer()),
77 builder.GetSize());
78 model_ = static_cast<const PodNerModel*>(
79 flatbuffers::GetRoot<PodNerModel>(model_buffer_.data()));
80
81 model.append_final_period = true;
82 flatbuffers::FlatBufferBuilder builder_append_final_period;
83 builder_append_final_period.Finish(
84 PodNerModel::Pack(builder_append_final_period, &model));
85
86 model_buffer_append_final_period_ =
87 std::string(reinterpret_cast<const char*>(
88 builder_append_final_period.GetBufferPointer()),
89 builder_append_final_period.GetSize());
90 model_append_final_period_ =
91 static_cast<const PodNerModel*>(flatbuffers::GetRoot<PodNerModel>(
92 model_buffer_append_final_period_.data()));
93
94 unilib_ = CreateUniLibForTesting();
95 }
96
97 std::string model_buffer_;
98 const PodNerModel* model_;
99 std::string model_buffer_append_final_period_;
100 const PodNerModel* model_append_final_period_;
101 std::unique_ptr<UniLib> unilib_;
102 };
103
TEST_F(PodNerTest,AnnotateSmokeTest)104 TEST_F(PodNerTest, AnnotateSmokeTest) {
105 std::unique_ptr<PodNerAnnotator> annotator =
106 PodNerAnnotator::Create(model_, *unilib_);
107 ASSERT_TRUE(annotator != nullptr);
108
109 {
110 std::vector<AnnotatedSpan> annotations;
111 ASSERT_TRUE(annotator->Annotate(
112 UTF8ToUnicodeText("Google New York , in New York"), &annotations));
113 EXPECT_THAT(annotations, Not(IsEmpty()));
114 }
115
116 {
117 std::vector<AnnotatedSpan> annotations;
118 ASSERT_TRUE(annotator->Annotate(
119 UTF8ToUnicodeText("Jamie I'm in the first picture and Cameron and Zach "
120 "are in the second "
121 "picture."),
122 &annotations));
123 EXPECT_THAT(annotations, Not(IsEmpty()));
124 }
125 }
126
TEST_F(PodNerTest,AnnotateEmptyInput)127 TEST_F(PodNerTest, AnnotateEmptyInput) {
128 std::unique_ptr<PodNerAnnotator> annotator =
129 PodNerAnnotator::Create(model_, *unilib_);
130 ASSERT_TRUE(annotator != nullptr);
131
132 {
133 std::vector<AnnotatedSpan> annotations;
134 ASSERT_TRUE(annotator->Annotate(UTF8ToUnicodeText(""), &annotations));
135 EXPECT_THAT(annotations, IsEmpty());
136 }
137 }
138
FillCollections(const std::vector<std::string> & collection_names,const std::vector<float> & single_token_priority_scores,const std::vector<float> & multi_token_priority_scores,std::vector<std::unique_ptr<PodNerModel_::CollectionT>> * collections)139 void FillCollections(
140 const std::vector<std::string>& collection_names,
141 const std::vector<float>& single_token_priority_scores,
142 const std::vector<float>& multi_token_priority_scores,
143 std::vector<std::unique_ptr<PodNerModel_::CollectionT>>* collections) {
144 ASSERT_TRUE(collection_names.size() == single_token_priority_scores.size() &&
145 collection_names.size() == multi_token_priority_scores.size());
146 collections->clear();
147 for (int i = 0; i < collection_names.size(); ++i) {
148 collections->push_back(std::make_unique<PodNerModel_::CollectionT>());
149 collections->back()->name = collection_names[i];
150 collections->back()->single_token_priority_score =
151 single_token_priority_scores[i];
152 collections->back()->multi_token_priority_score =
153 multi_token_priority_scores[i];
154 }
155 }
156
EmplaceToLabelVector(BoiseType boise_type,MentionType mention_type,int collection_id,std::vector<std::unique_ptr<PodNerModel_::LabelT>> * labels)157 void EmplaceToLabelVector(
158 BoiseType boise_type, MentionType mention_type, int collection_id,
159 std::vector<std::unique_ptr<PodNerModel_::LabelT>>* labels) {
160 labels->push_back(std::make_unique<PodNerModel_::LabelT>());
161 labels->back()->boise_type = boise_type;
162 labels->back()->mention_type = mention_type;
163 labels->back()->collection_id = collection_id;
164 }
165
FillLabels(int num_collections,std::vector<std::unique_ptr<PodNerModel_::LabelT>> * labels)166 void FillLabels(int num_collections,
167 std::vector<std::unique_ptr<PodNerModel_::LabelT>>* labels) {
168 labels->clear();
169 for (auto boise_type :
170 {BoiseType_BEGIN, BoiseType_END, BoiseType_INTERMEDIATE}) {
171 for (auto mention_type : {MentionType_NAM, MentionType_NOM}) {
172 for (int i = 0; i < num_collections - 1; ++i) { // skip undefined
173 EmplaceToLabelVector(boise_type, mention_type, i, labels);
174 }
175 }
176 }
177 EmplaceToLabelVector(BoiseType_O, MentionType_UNDEFINED, num_collections - 1,
178 labels);
179 for (auto mention_type : {MentionType_NAM, MentionType_NOM}) {
180 for (int i = 0; i < num_collections - 1; ++i) { // skip undefined
181 EmplaceToLabelVector(BoiseType_SINGLE, mention_type, i, labels);
182 }
183 }
184 }
185
TEST_F(PodNerTest,AnnotateDefaultCollections)186 TEST_F(PodNerTest, AnnotateDefaultCollections) {
187 std::unique_ptr<PodNerAnnotator> annotator =
188 PodNerAnnotator::Create(model_, *unilib_);
189 ASSERT_TRUE(annotator != nullptr);
190
191 std::string multi_word_location = "I live in New York";
192 std::string single_word_location = "I live in Zurich";
193 {
194 std::vector<AnnotatedSpan> annotations;
195 ASSERT_TRUE(annotator->Annotate(UTF8ToUnicodeText(multi_word_location),
196 &annotations));
197 EXPECT_THAT(annotations, Not(IsEmpty()));
198 EXPECT_EQ(annotations[0].classification[0].collection, "location");
199 EXPECT_EQ(annotations[0].classification[0].priority_score,
200 kDefaultPriorityScore);
201
202 annotations.clear();
203 ASSERT_TRUE(annotator->Annotate(UTF8ToUnicodeText(single_word_location),
204 &annotations));
205 EXPECT_THAT(annotations, Not(IsEmpty()));
206 EXPECT_EQ(annotations[0].classification[0].collection, "location");
207 EXPECT_EQ(annotations[0].classification[0].priority_score,
208 kDefaultPriorityScore);
209 }
210 }
211
TEST_F(PodNerTest,AnnotateConfigurableCollections)212 TEST_F(PodNerTest, AnnotateConfigurableCollections) {
213 std::unique_ptr<PodNerModelT> unpacked_model(model_->UnPack());
214 ASSERT_TRUE(unpacked_model != nullptr);
215
216 float xxx_single_token_priority = 0.9;
217 float xxx_multi_token_priority = 1.7;
218 const std::vector<std::string> collection_names = {
219 "art", "consumer_good", "event", "xxx",
220 "organization", "ner_entity", "person", "undefined"};
221 FillCollections(collection_names,
222 /*single_token_priority_scores=*/
223 {0., 0., 0., xxx_single_token_priority, 0., 0., 0., 0.},
224 /*multi_token_priority_scores=*/
225 {0., 0., 0., xxx_multi_token_priority, 0., 0., 0., 0.},
226 &(unpacked_model->collections));
227 FillLabels(collection_names.size(), &(unpacked_model->labels));
228 flatbuffers::FlatBufferBuilder builder;
229 builder.Finish(PodNerModel::Pack(builder, unpacked_model.get()));
230 std::string model_buffer =
231 std::string(reinterpret_cast<const char*>(builder.GetBufferPointer()),
232 builder.GetSize());
233 std::unique_ptr<PodNerAnnotator> annotator = PodNerAnnotator::Create(
234 static_cast<const PodNerModel*>(
235 flatbuffers::GetRoot<PodNerModel>(model_buffer.data())),
236 *unilib_);
237 ASSERT_TRUE(annotator != nullptr);
238
239 std::string multi_word_location = "I live in New York";
240 std::string single_word_location = "I live in Zurich";
241 {
242 std::vector<AnnotatedSpan> annotations;
243 ASSERT_TRUE(annotator->Annotate(UTF8ToUnicodeText(multi_word_location),
244 &annotations));
245 EXPECT_THAT(annotations, Not(IsEmpty()));
246 EXPECT_EQ(annotations[0].classification[0].collection, "xxx");
247 EXPECT_EQ(annotations[0].classification[0].priority_score,
248 xxx_multi_token_priority);
249
250 annotations.clear();
251 ASSERT_TRUE(annotator->Annotate(UTF8ToUnicodeText(single_word_location),
252 &annotations));
253 EXPECT_THAT(annotations, Not(IsEmpty()));
254 EXPECT_EQ(annotations[0].classification[0].collection, "xxx");
255 EXPECT_EQ(annotations[0].classification[0].priority_score,
256 xxx_single_token_priority);
257 }
258 }
259
TEST_F(PodNerTest,AnnotateMinNumTokens)260 TEST_F(PodNerTest, AnnotateMinNumTokens) {
261 std::unique_ptr<PodNerAnnotator> annotator =
262 PodNerAnnotator::Create(model_, *unilib_);
263 ASSERT_TRUE(annotator != nullptr);
264
265 std::string text = "in New York";
266 {
267 std::vector<AnnotatedSpan> annotations;
268 ASSERT_TRUE(annotator->Annotate(UTF8ToUnicodeText(text), &annotations));
269 EXPECT_THAT(annotations, Not(IsEmpty()));
270 }
271
272 std::unique_ptr<PodNerModelT> unpacked_model(model_->UnPack());
273 ASSERT_TRUE(unpacked_model != nullptr);
274
275 unpacked_model->min_number_of_tokens = 4;
276 flatbuffers::FlatBufferBuilder builder;
277 builder.Finish(PodNerModel::Pack(builder, unpacked_model.get()));
278
279 std::string model_buffer =
280 std::string(reinterpret_cast<const char*>(builder.GetBufferPointer()),
281 builder.GetSize());
282 annotator = PodNerAnnotator::Create(
283 static_cast<const PodNerModel*>(
284 flatbuffers::GetRoot<PodNerModel>(model_buffer.data())),
285 *unilib_);
286 ASSERT_TRUE(annotator != nullptr);
287 {
288 std::vector<AnnotatedSpan> annotations;
289 ASSERT_TRUE(annotator->Annotate(UTF8ToUnicodeText(text), &annotations));
290 EXPECT_THAT(annotations, IsEmpty());
291 }
292 }
293
TEST_F(PodNerTest,AnnotateMinNumWordpieces)294 TEST_F(PodNerTest, AnnotateMinNumWordpieces) {
295 std::unique_ptr<PodNerAnnotator> annotator =
296 PodNerAnnotator::Create(model_, *unilib_);
297 ASSERT_TRUE(annotator != nullptr);
298
299 std::string text = "in New York";
300 {
301 std::vector<AnnotatedSpan> annotations;
302 ASSERT_TRUE(annotator->Annotate(UTF8ToUnicodeText(text), &annotations));
303 EXPECT_THAT(annotations, Not(IsEmpty()));
304 }
305
306 std::unique_ptr<PodNerModelT> unpacked_model(model_->UnPack());
307 ASSERT_TRUE(unpacked_model != nullptr);
308
309 unpacked_model->min_number_of_wordpieces = 10;
310 flatbuffers::FlatBufferBuilder builder;
311 builder.Finish(PodNerModel::Pack(builder, unpacked_model.get()));
312
313 std::string model_buffer =
314 std::string(reinterpret_cast<const char*>(builder.GetBufferPointer()),
315 builder.GetSize());
316 annotator = PodNerAnnotator::Create(
317 static_cast<const PodNerModel*>(
318 flatbuffers::GetRoot<PodNerModel>(model_buffer.data())),
319 *unilib_);
320 ASSERT_TRUE(annotator != nullptr);
321 {
322 std::vector<AnnotatedSpan> annotations;
323 ASSERT_TRUE(annotator->Annotate(UTF8ToUnicodeText(text), &annotations));
324 EXPECT_THAT(annotations, IsEmpty());
325 }
326 }
327
TEST_F(PodNerTest,AnnotateNonstandardText)328 TEST_F(PodNerTest, AnnotateNonstandardText) {
329 std::unique_ptr<PodNerAnnotator> annotator =
330 PodNerAnnotator::Create(model_, *unilib_);
331 ASSERT_TRUE(annotator != nullptr);
332
333 const std::string nonstandard_text =
334 "abcNxCDU1RWNvbXByLXI4NS8xNzcwLzE3NzA4NDY2L3J1Ymluby1raWRzLXJlY2xpbmVyLXd"
335 "pdGgtY3VwLWhvbGRlci5qcGc=/"
336 "UnViaW5vIEtpZHMgUmVjbGluZXIgd2l0aCBDdXAgSG9sZGVyIGJ5IEhhcnJpZXQgQmVl."
337 "html>";
338 std::vector<AnnotatedSpan> annotations;
339 ASSERT_TRUE(
340 annotator->Annotate(UTF8ToUnicodeText(nonstandard_text), &annotations));
341 EXPECT_THAT(annotations, IsEmpty());
342 }
343
TEST_F(PodNerTest,AnnotateTextWithLinefeed)344 TEST_F(PodNerTest, AnnotateTextWithLinefeed) {
345 std::unique_ptr<PodNerAnnotator> annotator =
346 PodNerAnnotator::Create(model_, *unilib_);
347 ASSERT_TRUE(annotator != nullptr);
348
349 std::string nonstandard_text = "My name is Kuba\x09";
350 nonstandard_text += "and this is a test.";
351 std::vector<AnnotatedSpan> annotations;
352 ASSERT_TRUE(
353 annotator->Annotate(UTF8ToUnicodeText(nonstandard_text), &annotations));
354 EXPECT_THAT(annotations, Not(IsEmpty()));
355 EXPECT_EQ(annotations[0].span, CodepointSpan(11, 15));
356
357 nonstandard_text = "My name is Kuba\x09 and this is a test.";
358 ASSERT_TRUE(
359 annotator->Annotate(UTF8ToUnicodeText(nonstandard_text), &annotations));
360 EXPECT_THAT(annotations, Not(IsEmpty()));
361 EXPECT_EQ(annotations[0].span, CodepointSpan(11, 15));
362 }
363
TEST_F(PodNerTest,AnnotateWithUnknownWordpieces)364 TEST_F(PodNerTest, AnnotateWithUnknownWordpieces) {
365 std::unique_ptr<PodNerAnnotator> annotator =
366 PodNerAnnotator::Create(model_, *unilib_);
367 ASSERT_TRUE(annotator != nullptr);
368
369 const std::string long_text =
370 "It is easy to spend a fun and exciting day in Seattle without a car. "
371 "There are lots of ways to modify this itinerary. Add a ferry ride "
372 "from the waterfront. Spending the day at the Seattle Center or at the "
373 "aquarium could easily extend this from one to several days. Take the "
374 "Underground Tour in Pioneer Square. Visit the Klondike Gold Rush "
375 "Museum which is fun and free. In the summer months you can ride the "
376 "passenger-only Water Taxi from the waterfront to West Seattle and "
377 "Alki Beach. Here's a sample one day itinerary: Start at the Space "
378 "Needle by taking the Seattle Monorail from downtown. Look around the "
379 "Seattle Center or go to the Space Needle.";
380 const std::string text_with_unknown_wordpieces = "před chvílí";
381
382 std::vector<AnnotatedSpan> annotations;
383 ASSERT_TRUE(
384 annotator->Annotate(UTF8ToUnicodeText("Google New York , in New York. " +
385 text_with_unknown_wordpieces),
386 &annotations));
387 EXPECT_THAT(annotations, IsEmpty());
388 ASSERT_TRUE(annotator->Annotate(
389 UTF8ToUnicodeText(long_text + " " + text_with_unknown_wordpieces),
390 &annotations));
391 EXPECT_THAT(annotations, Not(IsEmpty()));
392 }
393
394 class PodNerTestWithOrWithoutFinalPeriod
395 : public PodNerTest,
396 public testing::WithParamInterface<bool> {};
397
398 INSTANTIATE_TEST_SUITE_P(TestAnnotateLongText,
399 PodNerTestWithOrWithoutFinalPeriod,
400 testing::Values(true, false));
401
TEST_P(PodNerTestWithOrWithoutFinalPeriod,AnnotateLongText)402 TEST_P(PodNerTestWithOrWithoutFinalPeriod, AnnotateLongText) {
403 std::unique_ptr<PodNerAnnotator> annotator = PodNerAnnotator::Create(
404 GetParam() ? model_append_final_period_ : model_, *unilib_);
405 ASSERT_TRUE(annotator != nullptr);
406
407 const std::string long_text =
408 "It is easy to spend a fun and exciting day in Seattle without a car. "
409 "There are lots of ways to modify this itinerary. Add a ferry ride "
410 "from the waterfront. Spending the day at the Seattle Center or at the "
411 "aquarium could easily extend this from one to several days. Take the "
412 "Underground Tour in Pioneer Square. Visit the Klondike Gold Rush "
413 "Museum which is fun and free. In the summer months you can ride the "
414 "passenger-only Water Taxi from the waterfront to West Seattle and "
415 "Alki Beach. Here's a sample one day itinerary: Start at the Space "
416 "Needle by taking the Seattle Monorail from downtown. Look around the "
417 "Seattle Center or go to the Space Needle. If you're interested in "
418 "music the EMP-SFM (Experience Music Project - Science Fiction Musuem) "
419 "is located at the foot of the Space Needle. It has a lot of rock'n "
420 "roll memorabilia that you may find interesting. The Chihuly Garden "
421 "and Glass musuem is near the Space Needle and you can get a "
422 "combination ticket for both. It gets really good reviews. If you're "
423 "interested, then the Bill & Melinda Gates Foundation is across from "
424 "the EMP and has a visitors center that is free. Come see how Bill "
425 "Gates is giving away his millions. Take the Monorail back downtown. "
426 "You will be at 5th and Pine (Westlake Center). Head west to the Pike "
427 "Place Market. Look around then head for the Pike Place hill climb "
428 "which is a series of steps that walk down to the waterfront. You will "
429 "end up across the street from the Seattle Aquarium. Plenty of things "
430 "to do on the waterfront, boat cruises, seafood restaurants, the "
431 "Aquarium, or your typical tourist activities. You can walk or take "
432 "the waterfront trolley bus. Note that waterfront construction has "
433 "relocated the trolley Metro bus route 99 that will take you from "
434 "Pioneer Square all the way to the end of the waterfront where you can "
435 "visit the Seattle Art Musuem's XXX Sculpture Garden just north of "
436 "Pier 70. The route goes thru Chinatown/International District, "
437 "through Pioneer Square, up 1st ave past the Pike Place Market and to "
438 "1st and Cedar which is walking distance to the Space Needle. It then "
439 "goes down Broad Street toward the Olympic Sculpture Garden. It runs "
440 "approximately every 30 minutes during the day and early evening.";
441 std::vector<AnnotatedSpan> annotations;
442 ASSERT_TRUE(annotator->Annotate(UTF8ToUnicodeText(long_text), &annotations));
443 EXPECT_THAT(annotations, Not(IsEmpty()));
444
445 const std::string location_from_beginning = "Seattle";
446 int start_span_location_from_beginning =
447 long_text.find(location_from_beginning);
448 EXPECT_EQ(annotations[0].span,
449 CodepointSpan(start_span_location_from_beginning,
450 start_span_location_from_beginning +
451 location_from_beginning.length()));
452
453 const std::string location_from_end = "Olympic Sculpture Garden";
454 int start_span_location_from_end = long_text.find(location_from_end);
455 const AnnotatedSpan& last_annotation = *annotations.rbegin();
456 EXPECT_EQ(
457 last_annotation.span,
458 CodepointSpan(start_span_location_from_end,
459 start_span_location_from_end + location_from_end.length()));
460 }
461
TEST_F(PodNerTest,SuggestSelectionLongText)462 TEST_F(PodNerTest, SuggestSelectionLongText) {
463 std::unique_ptr<PodNerAnnotator> annotator =
464 PodNerAnnotator::Create(model_, *unilib_);
465 ASSERT_TRUE(annotator != nullptr);
466
467 const std::string long_text =
468 "It is easy to spend a fun and exciting day in Seattle without a car. "
469 "There are lots of ways to modify this itinerary. Add a ferry ride "
470 "from the waterfront. Spending the day at the Seattle Center or at the "
471 "aquarium could easily extend this from one to several days. Take the "
472 "Underground Tour in Pioneer Square. Visit the Klondike Gold Rush "
473 "Museum which is fun and free. In the summer months you can ride the "
474 "passenger-only Water Taxi from the waterfront to West Seattle and "
475 "Alki Beach. Here's a sample one day itinerary: Start at the Space "
476 "Needle by taking the Seattle Monorail from downtown. Look around the "
477 "Seattle Center or go to the Space Needle. If you're interested in "
478 "music the EMP-SFM (Experience Music Project - Science Fiction Musuem) "
479 "is located at the foot of the Space Needle. It has a lot of rock'n "
480 "roll memorabilia that you may find interesting. The Chihuly Garden "
481 "and Glass musuem is near the Space Needle and you can get a "
482 "combination ticket for both. It gets really good reviews. If you're "
483 "interested, then the Bill & Melinda Gates Foundation is across from "
484 "the EMP and has a visitors center that is free. Come see how Bill "
485 "Gates is giving away his millions. Take the Monorail back downtown. "
486 "You will be at 5th and Pine (Westlake Center). Head west to the Pike "
487 "Place Market. Look around then head for the Pike Place hill climb "
488 "which is a series of steps that walk down to the waterfront. You will "
489 "end up across the street from the Seattle Aquarium. Plenty of things "
490 "to do on the waterfront, boat cruises, seafood restaurants, the "
491 "Aquarium, or your typical tourist activities. You can walk or take "
492 "the waterfront trolley bus. Note that waterfront construction has "
493 "relocated the trolley Metro bus route 99 that will take you from "
494 "Pioneer Square all the way to the end of the waterfront where you can "
495 "visit the Seattle Art Musuem's XXX Sculpture Garden just north of "
496 "Pier 70. The route goes thru Chinatown/International District, "
497 "through Pioneer Square, up 1st ave past the Pike Place Market and to "
498 "1st and Cedar which is walking distance to the Space Needle. It then "
499 "goes down Broad Street toward the Olympic Sculpture Garden. It runs "
500 "approximately every 30 minutes during the day and early evening.";
501 const std::string klondike = "Klondike Gold Rush Museum";
502 int klondike_start = long_text.find(klondike);
503
504 AnnotatedSpan suggested_span;
505 EXPECT_TRUE(annotator->SuggestSelection(UTF8ToUnicodeText(long_text),
506 {klondike_start, klondike_start + 8},
507 &suggested_span));
508 EXPECT_EQ(suggested_span.span,
509 CodepointSpan(klondike_start, klondike_start + klondike.length()));
510 }
511
TEST_F(PodNerTest,SuggestSelectionTest)512 TEST_F(PodNerTest, SuggestSelectionTest) {
513 std::unique_ptr<PodNerAnnotator> annotator =
514 PodNerAnnotator::Create(model_, *unilib_);
515 ASSERT_TRUE(annotator != nullptr);
516
517 AnnotatedSpan suggested_span;
518 EXPECT_TRUE(annotator->SuggestSelection(
519 UTF8ToUnicodeText("Google New York, in New York"), {7, 10},
520 &suggested_span));
521 EXPECT_EQ(suggested_span.span, CodepointSpan(7, 15));
522 EXPECT_FALSE(annotator->SuggestSelection(
523 UTF8ToUnicodeText("Google New York, in New York"), {17, 19},
524 &suggested_span));
525 EXPECT_EQ(suggested_span.span, CodepointSpan(kInvalidIndex, kInvalidIndex));
526 }
527
TEST_F(PodNerTest,ClassifyTextTest)528 TEST_F(PodNerTest, ClassifyTextTest) {
529 std::unique_ptr<PodNerAnnotator> annotator =
530 PodNerAnnotator::Create(model_, *unilib_);
531 ASSERT_TRUE(annotator != nullptr);
532
533 ClassificationResult result;
534 ASSERT_TRUE(annotator->ClassifyText(UTF8ToUnicodeText("We met in New York"),
535 {10, 18}, &result));
536 EXPECT_EQ(result.collection, "location");
537 }
538
TEST_F(PodNerTest,ThreadSafety)539 TEST_F(PodNerTest, ThreadSafety) {
540 std::unique_ptr<PodNerAnnotator> annotator =
541 PodNerAnnotator::Create(model_, *unilib_);
542 ASSERT_TRUE(annotator != nullptr);
543
544 // Do inference in 20 threads. When run with --config=tsan, this should fire
545 // if there's a problem.
546 std::vector<std::thread> thread_pool(20);
547 for (std::thread& thread : thread_pool) {
548 thread = std::thread([&annotator]() {
549 AnnotatedSpan suggested_span;
550 EXPECT_TRUE(annotator->SuggestSelection(
551 UTF8ToUnicodeText("Google New York, in New York"), {7, 10},
552 &suggested_span));
553 EXPECT_EQ(suggested_span.span, CodepointSpan(7, 15));
554 });
555 }
556 for (std::thread& thread : thread_pool) {
557 thread.join();
558 }
559 }
560
561 } // namespace
562 } // namespace libtextclassifier3
563