1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "lang_id/common/fel/fel-parser.h"
18 
19 #include <ctype.h>
20 
21 #include <string>
22 
23 #include "lang_id/common/lite_base/logging.h"
24 #include "lang_id/common/lite_strings/numbers.h"
25 
26 namespace libtextclassifier3 {
27 namespace mobile {
28 
29 namespace {
IsValidCharAtStartOfIdentifier(char c)30 inline bool IsValidCharAtStartOfIdentifier(char c) {
31   return isalpha(c) || (c == '_') || (c == '/');
32 }
33 
34 // Returns true iff character c can appear inside an identifier.
IsValidCharInsideIdentifier(char c)35 inline bool IsValidCharInsideIdentifier(char c) {
36   return isalnum(c) || (c == '_') || (c == '-') || (c == '/');
37 }
38 
39 // Returns true iff character c can appear at the beginning of a number.
IsValidCharAtStartOfNumber(char c)40 inline bool IsValidCharAtStartOfNumber(char c) {
41   return isdigit(c) || (c == '+') || (c == '-');
42 }
43 
44 // Returns true iff character c can appear inside a number.
IsValidCharInsideNumber(char c)45 inline bool IsValidCharInsideNumber(char c) {
46   return isdigit(c) || (c == '.');
47 }
48 }  // namespace
49 
Initialize(const std::string & source)50 bool FELParser::Initialize(const std::string &source) {
51   // Initialize parser state.
52   source_ = source;
53   current_ = source_.begin();
54   item_start_ = line_start_ = current_;
55   line_number_ = item_line_number_ = 1;
56 
57   // Read first input item.
58   return NextItem();
59 }
60 
ReportError(const std::string & error_message)61 void FELParser::ReportError(const std::string &error_message) {
62   const int position = item_start_ - line_start_ + 1;
63   const std::string line(line_start_, current_);
64 
65   SAFTM_LOG(ERROR) << "Error in feature model, line " << item_line_number_
66                    << ", position " << position << ": " << error_message
67                    << "\n    " << line << " <--HERE";
68 }
69 
Next()70 void FELParser::Next() {
71   // Move to the next input character. If we are at a line break update line
72   // number and line start position.
73   if (CurrentChar() == '\n') {
74     ++line_number_;
75     ++current_;
76     line_start_ = current_;
77   } else {
78     ++current_;
79   }
80 }
81 
NextItem()82 bool FELParser::NextItem() {
83   // Skip white space and comments.
84   while (!eos()) {
85     if (CurrentChar() == '#') {
86       // Skip comment.
87       while (!eos() && CurrentChar() != '\n') Next();
88     } else if (isspace(CurrentChar())) {
89       // Skip whitespace.
90       while (!eos() && isspace(CurrentChar())) Next();
91     } else {
92       break;
93     }
94   }
95 
96   // Record start position for next item.
97   item_start_ = current_;
98   item_line_number_ = line_number_;
99 
100   // Check for end of input.
101   if (eos()) {
102     item_type_ = END;
103     return true;
104   }
105 
106   // Parse number.
107   if (IsValidCharAtStartOfNumber(CurrentChar())) {
108     std::string::iterator start = current_;
109     Next();
110     while (!eos() && IsValidCharInsideNumber(CurrentChar())) Next();
111     item_text_.assign(start, current_);
112     item_type_ = NUMBER;
113     return true;
114   }
115 
116   // Parse string.
117   if (CurrentChar() == '"') {
118     Next();
119     std::string::iterator start = current_;
120     while (CurrentChar() != '"') {
121       if (eos()) {
122         ReportError("Unterminated string");
123         return false;
124       }
125       Next();
126     }
127     item_text_.assign(start, current_);
128     item_type_ = STRING;
129     Next();
130     return true;
131   }
132 
133   // Parse identifier name.
134   if (IsValidCharAtStartOfIdentifier(CurrentChar())) {
135     std::string::iterator start = current_;
136     while (!eos() && IsValidCharInsideIdentifier(CurrentChar())) {
137       Next();
138     }
139     item_text_.assign(start, current_);
140     item_type_ = NAME;
141     return true;
142   }
143 
144   // Single character item.
145   item_type_ = CurrentChar();
146   Next();
147   return true;
148 }
149 
Parse(const std::string & source,FeatureExtractorDescriptor * result)150 bool FELParser::Parse(const std::string &source,
151                       FeatureExtractorDescriptor *result) {
152   // Initialize parser.
153   if (!Initialize(source)) {
154     return false;
155   }
156 
157   while (item_type_ != END) {
158     // Current item should be a feature name.
159     if (item_type_ != NAME) {
160       ReportError("Feature type name expected");
161       return false;
162     }
163     std::string name = item_text_;
164     if (!NextItem()) {
165       return false;
166     }
167 
168     if (item_type_ == '=') {
169       ReportError("Invalid syntax: feature expected");
170       return false;
171     } else {
172       // Parse feature.
173       FeatureFunctionDescriptor *descriptor = result->add_feature();
174       descriptor->set_type(name);
175       if (!ParseFeature(descriptor)) {
176         return false;
177       }
178     }
179   }
180 
181   return true;
182 }
183 
ParseFeature(FeatureFunctionDescriptor * result)184 bool FELParser::ParseFeature(FeatureFunctionDescriptor *result) {
185   // Parse argument and parameters.
186   if (item_type_ == '(') {
187     if (!NextItem()) return false;
188     if (!ParseParameter(result)) return false;
189     while (item_type_ == ',') {
190       if (!NextItem()) return false;
191       if (!ParseParameter(result)) return false;
192     }
193 
194     if (item_type_ != ')') {
195       ReportError(") expected");
196       return false;
197     }
198     if (!NextItem()) return false;
199   }
200 
201   // Parse feature name.
202   if (item_type_ == ':') {
203     if (!NextItem()) return false;
204     if (item_type_ != NAME && item_type_ != STRING) {
205       ReportError("Feature name expected");
206       return false;
207     }
208     std::string name = item_text_;
209     if (!NextItem()) return false;
210 
211     // Set feature name.
212     result->set_name(name);
213   }
214 
215   // Parse sub-features.
216   if (item_type_ == '.') {
217     // Parse dotted sub-feature.
218     if (!NextItem()) return false;
219     if (item_type_ != NAME) {
220       ReportError("Feature type name expected");
221       return false;
222     }
223     std::string type = item_text_;
224     if (!NextItem()) return false;
225 
226     // Parse sub-feature.
227     FeatureFunctionDescriptor *subfeature = result->add_feature();
228     subfeature->set_type(type);
229     if (!ParseFeature(subfeature)) return false;
230   } else if (item_type_ == '{') {
231     // Parse sub-feature block.
232     if (!NextItem()) return false;
233     while (item_type_ != '}') {
234       if (item_type_ != NAME) {
235         ReportError("Feature type name expected");
236         return false;
237       }
238       std::string type = item_text_;
239       if (!NextItem()) return false;
240 
241       // Parse sub-feature.
242       FeatureFunctionDescriptor *subfeature = result->add_feature();
243       subfeature->set_type(type);
244       if (!ParseFeature(subfeature)) return false;
245     }
246     if (!NextItem()) return false;
247   }
248   return true;
249 }
250 
ParseParameter(FeatureFunctionDescriptor * result)251 bool FELParser::ParseParameter(FeatureFunctionDescriptor *result) {
252   if (item_type_ == NUMBER) {
253     int argument;
254     if (!LiteAtoi(item_text_, &argument)) {
255       ReportError("Unable to parse number");
256       return false;
257     }
258     if (!NextItem()) return false;
259 
260     // Set default argument for feature.
261     result->set_argument(argument);
262   } else if (item_type_ == NAME) {
263     std::string name = item_text_;
264     if (!NextItem()) return false;
265     if (item_type_ != '=') {
266       ReportError("= expected");
267       return false;
268     }
269     if (!NextItem()) return false;
270     if (item_type_ >= END) {
271       ReportError("Parameter value expected");
272       return false;
273     }
274     std::string value = item_text_;
275     if (!NextItem()) return false;
276 
277     // Add parameter to feature.
278     Parameter *parameter;
279     parameter = result->add_parameter();
280     parameter->set_name(name);
281     parameter->set_value(value);
282   } else {
283     ReportError("Syntax error in parameter list");
284     return false;
285   }
286   return true;
287 }
288 
289 }  // namespace mobile
290 }  // namespace nlp_saft
291