1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "lang_id/lang-id-wrapper.h"
18 
19 #include <fcntl.h>
20 
21 #include "lang_id/fb_model/lang-id-from-fb.h"
22 #include "lang_id/lang-id.h"
23 
24 namespace libtextclassifier3 {
25 
26 namespace langid {
27 
LoadFromPath(const std::string & langid_model_path)28 std::unique_ptr<libtextclassifier3::mobile::lang_id::LangId> LoadFromPath(
29     const std::string& langid_model_path) {
30   std::unique_ptr<libtextclassifier3::mobile::lang_id::LangId> langid_model =
31       libtextclassifier3::mobile::lang_id::GetLangIdFromFlatbufferFile(langid_model_path);
32   return langid_model;
33 }
34 
LoadFromDescriptor(const int langid_fd)35 std::unique_ptr<libtextclassifier3::mobile::lang_id::LangId> LoadFromDescriptor(
36     const int langid_fd) {
37   std::unique_ptr<libtextclassifier3::mobile::lang_id::LangId> langid_model =
38       libtextclassifier3::mobile::lang_id::GetLangIdFromFlatbufferFileDescriptor(
39           langid_fd);
40   return langid_model;
41 }
42 
LoadFromUnownedBuffer(const char * buffer,int size)43 std::unique_ptr<libtextclassifier3::mobile::lang_id::LangId> LoadFromUnownedBuffer(
44     const char* buffer, int size) {
45   std::unique_ptr<libtextclassifier3::mobile::lang_id::LangId> langid_model =
46       libtextclassifier3::mobile::lang_id::GetLangIdFromFlatbufferBytes(buffer, size);
47   return langid_model;
48 }
49 
GetPredictions(const libtextclassifier3::mobile::lang_id::LangId * model,const std::string & text)50 std::vector<std::pair<std::string, float>> GetPredictions(
51     const libtextclassifier3::mobile::lang_id::LangId* model, const std::string& text) {
52   return GetPredictions(model, text.data(), text.size());
53 }
54 
GetPredictions(const libtextclassifier3::mobile::lang_id::LangId * model,const char * text,int text_size)55 std::vector<std::pair<std::string, float>> GetPredictions(
56     const libtextclassifier3::mobile::lang_id::LangId* model, const char* text,
57     int text_size) {
58   std::vector<std::pair<std::string, float>> prediction_results;
59   if (model == nullptr) {
60     return prediction_results;
61   }
62 
63   const float noise_threshold =
64       model->GetFloatProperty("text_classifier_langid_noise_threshold", -1.0f);
65 
66   // Speed up the things by specifying the max results we want. For example, if
67   // the noise threshold is 0.1, we don't need more than 10 results.
68   const int max_results =
69       noise_threshold < 0.01
70           ? -1  // -1 means FindLanguages returns all predictions
71           : static_cast<int>(1 / noise_threshold) + 1;
72 
73   libtextclassifier3::mobile::lang_id::LangIdResult langid_result;
74   model->FindLanguages(text, text_size, &langid_result, max_results);
75   for (int i = 0; i < langid_result.predictions.size(); i++) {
76     const auto& prediction = langid_result.predictions[i];
77     if (prediction.second >= noise_threshold && prediction.first != "und") {
78       prediction_results.push_back({prediction.first, prediction.second});
79     }
80   }
81   return prediction_results;
82 }
83 
GetLanguageTags(const libtextclassifier3::mobile::lang_id::LangId * model,const std::string & text)84 std::string GetLanguageTags(const libtextclassifier3::mobile::lang_id::LangId* model,
85                             const std::string& text) {
86   const std::vector<std::pair<std::string, float>>& predictions =
87       GetPredictions(model, text);
88   const float threshold =
89       model->GetFloatProperty("text_classifier_langid_threshold", -1.0f);
90   std::string detected_language_tags = "";
91   bool first_accepted_language = true;
92   for (int i = 0; i < predictions.size(); i++) {
93     const auto& prediction = predictions[i];
94     if (threshold >= 0.f && prediction.second < threshold) {
95       continue;
96     }
97     if (first_accepted_language) {
98       first_accepted_language = false;
99     } else {
100       detected_language_tags += ",";
101     }
102     detected_language_tags += prediction.first;
103   }
104   return detected_language_tags;
105 }
106 
107 }  // namespace langid
108 
109 }  // namespace libtextclassifier3
110