1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "common/feature-extractor.h"
18 
19 #include "common/feature-types.h"
20 #include "common/fml-parser.h"
21 #include "util/base/integral_types.h"
22 #include "util/base/logging.h"
23 #include "util/gtl/stl_util.h"
24 #include "util/strings/numbers.h"
25 
26 namespace libtextclassifier {
27 namespace nlp_core {
28 
29 constexpr FeatureValue GenericFeatureFunction::kNone;
30 
GenericFeatureExtractor()31 GenericFeatureExtractor::GenericFeatureExtractor() {}
32 
~GenericFeatureExtractor()33 GenericFeatureExtractor::~GenericFeatureExtractor() {}
34 
Parse(const std::string & source)35 bool GenericFeatureExtractor::Parse(const std::string &source) {
36   // Parse feature specification into descriptor.
37   FMLParser parser;
38   if (!parser.Parse(source, mutable_descriptor())) return false;
39 
40   // Initialize feature extractor from descriptor.
41   if (!InitializeFeatureFunctions()) return false;
42   return true;
43 }
44 
InitializeFeatureTypes()45 bool GenericFeatureExtractor::InitializeFeatureTypes() {
46   // Register all feature types.
47   GetFeatureTypes(&feature_types_);
48   for (size_t i = 0; i < feature_types_.size(); ++i) {
49     FeatureType *ft = feature_types_[i];
50     ft->set_base(i);
51 
52     // Check for feature space overflow.
53     double domain_size = ft->GetDomainSize();
54     if (domain_size < 0) {
55       TC_LOG(ERROR) << "Illegal domain size for feature " << ft->name() << ": "
56                     << domain_size;
57       return false;
58     }
59   }
60   return true;
61 }
62 
GetDomainSize() const63 FeatureValue GenericFeatureExtractor::GetDomainSize() const {
64   // Domain size of the set of features is equal to:
65   //   [largest domain size of any feature types] * [number of feature types]
66   FeatureValue max_feature_type_dsize = 0;
67   for (size_t i = 0; i < feature_types_.size(); ++i) {
68     FeatureType *ft = feature_types_[i];
69     const FeatureValue feature_type_dsize = ft->GetDomainSize();
70     if (feature_type_dsize > max_feature_type_dsize) {
71       max_feature_type_dsize = feature_type_dsize;
72     }
73   }
74 
75   return max_feature_type_dsize * feature_types_.size();
76 }
77 
GetParameter(const std::string & name) const78 std::string GenericFeatureFunction::GetParameter(
79     const std::string &name) const {
80   // Find named parameter in feature descriptor.
81   for (int i = 0; i < descriptor_->parameter_size(); ++i) {
82     if (name == descriptor_->parameter(i).name()) {
83       return descriptor_->parameter(i).value();
84     }
85   }
86   return "";
87 }
88 
GenericFeatureFunction()89 GenericFeatureFunction::GenericFeatureFunction() {}
90 
~GenericFeatureFunction()91 GenericFeatureFunction::~GenericFeatureFunction() { delete feature_type_; }
92 
GetIntParameter(const std::string & name,int default_value) const93 int GenericFeatureFunction::GetIntParameter(const std::string &name,
94                                             int default_value) const {
95   int32 parsed_value = default_value;
96   std::string value = GetParameter(name);
97   if (!value.empty()) {
98     if (!ParseInt32(value.c_str(), &parsed_value)) {
99       // A parameter value has been specified, but it can't be parsed as an int.
100       // We don't crash: instead, we long an error and return the default value.
101       TC_LOG(ERROR) << "Value of param " << name << " is not an int: " << value;
102     }
103   }
104   return parsed_value;
105 }
106 
GetBoolParameter(const std::string & name,bool default_value) const107 bool GenericFeatureFunction::GetBoolParameter(const std::string &name,
108                                               bool default_value) const {
109   std::string value = GetParameter(name);
110   if (value.empty()) return default_value;
111   if (value == "true") return true;
112   if (value == "false") return false;
113   TC_LOG(ERROR) << "Illegal value '" << value << "' for bool parameter '"
114                 << name << "'"
115                 << " will assume default " << default_value;
116   return default_value;
117 }
118 
GetFeatureTypes(std::vector<FeatureType * > * types) const119 void GenericFeatureFunction::GetFeatureTypes(
120     std::vector<FeatureType *> *types) const {
121   if (feature_type_ != nullptr) types->push_back(feature_type_);
122 }
123 
GetFeatureType() const124 FeatureType *GenericFeatureFunction::GetFeatureType() const {
125   // If a single feature type has been registered return it.
126   if (feature_type_ != nullptr) return feature_type_;
127 
128   // Get feature types for function.
129   std::vector<FeatureType *> types;
130   GetFeatureTypes(&types);
131 
132   // If there is exactly one feature type return this, else return null.
133   if (types.size() == 1) return types[0];
134   return nullptr;
135 }
136 
name() const137 std::string GenericFeatureFunction::name() const {
138   std::string output;
139   if (descriptor_->name().empty()) {
140     if (!prefix_.empty()) {
141       output.append(prefix_);
142       output.append(".");
143     }
144     ToFML(*descriptor_, &output);
145   } else {
146     output = descriptor_->name();
147   }
148   return output;
149 }
150 
151 }  // namespace nlp_core
152 }  // namespace libtextclassifier
153