1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "lang_id/common/fel/fel-parser.h"
18 
19 #include <ctype.h>
20 #include <string>
21 
22 #include "lang_id/common/lite_base/logging.h"
23 #include "lang_id/common/lite_strings/numbers.h"
24 
25 namespace libtextclassifier3 {
26 namespace mobile {
27 
28 namespace {
IsValidCharAtStartOfIdentifier(char c)29 inline bool IsValidCharAtStartOfIdentifier(char c) {
30   return isalpha(c) || (c == '_') || (c == '/');
31 }
32 
33 // Returns true iff character c can appear inside an identifier.
IsValidCharInsideIdentifier(char c)34 inline bool IsValidCharInsideIdentifier(char c) {
35   return isalnum(c) || (c == '_') || (c == '-') || (c == '/');
36 }
37 
38 // Returns true iff character c can appear at the beginning of a number.
IsValidCharAtStartOfNumber(char c)39 inline bool IsValidCharAtStartOfNumber(char c) {
40   return isdigit(c) || (c == '+') || (c == '-');
41 }
42 
43 // Returns true iff character c can appear inside a number.
IsValidCharInsideNumber(char c)44 inline bool IsValidCharInsideNumber(char c) {
45   return isdigit(c) || (c == '.');
46 }
47 }  // namespace
48 
Initialize(const string & source)49 bool FELParser::Initialize(const string &source) {
50   // Initialize parser state.
51   source_ = source;
52   current_ = source_.begin();
53   item_start_ = line_start_ = current_;
54   line_number_ = item_line_number_ = 1;
55 
56   // Read first input item.
57   return NextItem();
58 }
59 
ReportError(const string & error_message)60 void FELParser::ReportError(const string &error_message) {
61   const int position = item_start_ - line_start_ + 1;
62   const string line(line_start_, current_);
63 
64   SAFTM_LOG(ERROR) << "Error in feature model, line " << item_line_number_
65                    << ", position " << position << ": " << error_message
66                    << "\n    " << line << " <--HERE";
67 }
68 
Next()69 void FELParser::Next() {
70   // Move to the next input character. If we are at a line break update line
71   // number and line start position.
72   if (CurrentChar() == '\n') {
73     ++line_number_;
74     ++current_;
75     line_start_ = current_;
76   } else {
77     ++current_;
78   }
79 }
80 
NextItem()81 bool FELParser::NextItem() {
82   // Skip white space and comments.
83   while (!eos()) {
84     if (CurrentChar() == '#') {
85       // Skip comment.
86       while (!eos() && CurrentChar() != '\n') Next();
87     } else if (isspace(CurrentChar())) {
88       // Skip whitespace.
89       while (!eos() && isspace(CurrentChar())) Next();
90     } else {
91       break;
92     }
93   }
94 
95   // Record start position for next item.
96   item_start_ = current_;
97   item_line_number_ = line_number_;
98 
99   // Check for end of input.
100   if (eos()) {
101     item_type_ = END;
102     return true;
103   }
104 
105   // Parse number.
106   if (IsValidCharAtStartOfNumber(CurrentChar())) {
107     string::iterator start = current_;
108     Next();
109     while (!eos() && IsValidCharInsideNumber(CurrentChar())) Next();
110     item_text_.assign(start, current_);
111     item_type_ = NUMBER;
112     return true;
113   }
114 
115   // Parse string.
116   if (CurrentChar() == '"') {
117     Next();
118     string::iterator start = current_;
119     while (CurrentChar() != '"') {
120       if (eos()) {
121         ReportError("Unterminated string");
122         return false;
123       }
124       Next();
125     }
126     item_text_.assign(start, current_);
127     item_type_ = STRING;
128     Next();
129     return true;
130   }
131 
132   // Parse identifier name.
133   if (IsValidCharAtStartOfIdentifier(CurrentChar())) {
134     string::iterator start = current_;
135     while (!eos() && IsValidCharInsideIdentifier(CurrentChar())) {
136       Next();
137     }
138     item_text_.assign(start, current_);
139     item_type_ = NAME;
140     return true;
141   }
142 
143   // Single character item.
144   item_type_ = CurrentChar();
145   Next();
146   return true;
147 }
148 
Parse(const string & source,FeatureExtractorDescriptor * result)149 bool FELParser::Parse(const string &source,
150                       FeatureExtractorDescriptor *result) {
151   // Initialize parser.
152   if (!Initialize(source)) {
153     return false;
154   }
155 
156   while (item_type_ != END) {
157     // Current item should be a feature name.
158     if (item_type_ != NAME) {
159       ReportError("Feature type name expected");
160       return false;
161     }
162     string name = item_text_;
163     if (!NextItem()) {
164       return false;
165     }
166 
167     if (item_type_ == '=') {
168       ReportError("Invalid syntax: feature expected");
169       return false;
170     } else {
171       // Parse feature.
172       FeatureFunctionDescriptor *descriptor = result->add_feature();
173       descriptor->set_type(name);
174       if (!ParseFeature(descriptor)) {
175         return false;
176       }
177     }
178   }
179 
180   return true;
181 }
182 
ParseFeature(FeatureFunctionDescriptor * result)183 bool FELParser::ParseFeature(FeatureFunctionDescriptor *result) {
184   // Parse argument and parameters.
185   if (item_type_ == '(') {
186     if (!NextItem()) return false;
187     if (!ParseParameter(result)) return false;
188     while (item_type_ == ',') {
189       if (!NextItem()) return false;
190       if (!ParseParameter(result)) return false;
191     }
192 
193     if (item_type_ != ')') {
194       ReportError(") expected");
195       return false;
196     }
197     if (!NextItem()) return false;
198   }
199 
200   // Parse feature name.
201   if (item_type_ == ':') {
202     if (!NextItem()) return false;
203     if (item_type_ != NAME && item_type_ != STRING) {
204       ReportError("Feature name expected");
205       return false;
206     }
207     string name = item_text_;
208     if (!NextItem()) return false;
209 
210     // Set feature name.
211     result->set_name(name);
212   }
213 
214   // Parse sub-features.
215   if (item_type_ == '.') {
216     // Parse dotted sub-feature.
217     if (!NextItem()) return false;
218     if (item_type_ != NAME) {
219       ReportError("Feature type name expected");
220       return false;
221     }
222     string type = item_text_;
223     if (!NextItem()) return false;
224 
225     // Parse sub-feature.
226     FeatureFunctionDescriptor *subfeature = result->add_feature();
227     subfeature->set_type(type);
228     if (!ParseFeature(subfeature)) return false;
229   } else if (item_type_ == '{') {
230     // Parse sub-feature block.
231     if (!NextItem()) return false;
232     while (item_type_ != '}') {
233       if (item_type_ != NAME) {
234         ReportError("Feature type name expected");
235         return false;
236       }
237       string type = item_text_;
238       if (!NextItem()) return false;
239 
240       // Parse sub-feature.
241       FeatureFunctionDescriptor *subfeature = result->add_feature();
242       subfeature->set_type(type);
243       if (!ParseFeature(subfeature)) return false;
244     }
245     if (!NextItem()) return false;
246   }
247   return true;
248 }
249 
ParseParameter(FeatureFunctionDescriptor * result)250 bool FELParser::ParseParameter(FeatureFunctionDescriptor *result) {
251   if (item_type_ == NUMBER) {
252     int argument;
253     if (!LiteAtoi(item_text_, &argument)) {
254       ReportError("Unable to parse number");
255       return false;
256     }
257     if (!NextItem()) return false;
258 
259     // Set default argument for feature.
260     result->set_argument(argument);
261   } else if (item_type_ == NAME) {
262     string name = item_text_;
263     if (!NextItem()) return false;
264     if (item_type_ != '=') {
265       ReportError("= expected");
266       return false;
267     }
268     if (!NextItem()) return false;
269     if (item_type_ >= END) {
270       ReportError("Parameter value expected");
271       return false;
272     }
273     string value = item_text_;
274     if (!NextItem()) return false;
275 
276     // Add parameter to feature.
277     Parameter *parameter;
278     parameter = result->add_parameter();
279     parameter->set_name(name);
280     parameter->set_value(value);
281   } else {
282     ReportError("Syntax error in parameter list");
283     return false;
284   }
285   return true;
286 }
287 
288 }  // namespace mobile
289 }  // namespace nlp_saft
290