1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 // Feature extraction language (FEL) parser.
18 //
19 // BNF grammar for FEL:
20 //
21 // <feature model> ::= { <feature extractor> }
22 //
23 // <feature extractor> ::= <extractor spec> |
24 //                         <extractor spec> '.' <feature extractor> |
25 //                         <extractor spec> '{' { <feature extractor> } '}'
26 //
27 // <extractor spec> ::= <extractor type>
28 //                      [ '(' <parameter list> ')' ]
29 //                      [ ':' <extractor name> ]
30 //
31 // <parameter list> = ( <parameter> | <argument> ) { ',' <parameter> }
32 //
33 // <parameter> ::= <parameter name> '=' <parameter value>
34 //
35 // <extractor type> ::= NAME
36 // <extractor name> ::= NAME | STRING
37 // <argument> ::= NUMBER
38 // <parameter name> ::= NAME
39 // <parameter value> ::= NUMBER | STRING | NAME
40 
41 #ifndef NLP_SAFT_COMPONENTS_COMMON_MOBILE_FEL_FEL_PARSER_H_
42 #define NLP_SAFT_COMPONENTS_COMMON_MOBILE_FEL_FEL_PARSER_H_
43 
44 #include <string>
45 
46 #include "lang_id/common/fel/feature-descriptors.h"
47 #include "lang_id/common/lite_base/logging.h"
48 
49 namespace libtextclassifier3 {
50 namespace mobile {
51 
52 class FELParser {
53  public:
54   // Parses fml specification into feature extractor descriptor.
55   // Returns true on success, false on error (e.g., syntax errors).
56   bool Parse(const string &source, FeatureExtractorDescriptor *result);
57 
58  private:
59   // Initializes the parser with the source text.
60   // Returns true on success, false on syntax error.
61   bool Initialize(const string &source);
62 
63   // Outputs an error message, with context info.
64   void ReportError(const string &error_message);
65 
66   // Moves to the next input character.
67   void Next();
68 
69   // Moves to the next input item.  Sets item_text_ and item_type_ accordingly.
70   // Returns true on success, false on syntax error.
71   bool NextItem();
72 
73   // Parses a feature descriptor.
74   // Returns true on success, false on syntax error.
75   bool ParseFeature(FeatureFunctionDescriptor *result);
76 
77   // Parses a parameter specification.
78   // Returns true on success, false on syntax error.
79   bool ParseParameter(FeatureFunctionDescriptor *result);
80 
81   // Returns true if end of source input has been reached.
eos()82   bool eos() const { return current_ >= source_.end(); }
83 
84   // Returns current character.  Other methods should access the current
85   // character through this method (instead of using *current_ directly): this
86   // method performs extra safety checks.
87   //
88   // In case of an unsafe access, returns '\0'.
CurrentChar()89   char CurrentChar() const {
90     if ((current_ >= source_.begin()) && (current_ < source_.end())) {
91       return *current_;
92     } else {
93       SAFTM_LOG(ERROR) << "Unsafe char read";
94       return '\0';
95     }
96   }
97 
98   // Item types.
99   enum ItemTypes {
100     END = 0,
101     NAME = -1,
102     NUMBER = -2,
103     STRING = -3,
104   };
105 
106   // Source text.
107   string source_;
108 
109   // Current input position.
110   string::iterator current_;
111 
112   // Line number for current input position.
113   int line_number_;
114 
115   // Start position for current item.
116   string::iterator item_start_;
117 
118   // Start position for current line.
119   string::iterator line_start_;
120 
121   // Line number for current item.
122   int item_line_number_;
123 
124   // Item type for current item. If this is positive it is interpreted as a
125   // character. If it is negative it is interpreted as an item type.
126   int item_type_;
127 
128   // Text for current item.
129   string item_text_;
130 };
131 
132 }  // namespace mobile
133 }  // namespace nlp_saft
134 
135 #endif  // NLP_SAFT_COMPONENTS_COMMON_MOBILE_FEL_FEL_PARSER_H_
136