1 /*
2  * Copyright 2017 Google Inc.
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #include "RegexNode.h"
9 
10 #include "NFA.h"
11 
12 std::vector<int> RegexNode::createStates(NFA* nfa, const std::vector<int>& accept) const {
13     std::vector<int> result;
14     switch (fKind) {
15         case kChar_Kind:
16             result.push_back(nfa->addState(NFAState(fPayload.fChar, accept)));
17             break;
18         case kCharset_Kind: {
19             std::vector<bool> chars;
20             for (const RegexNode& child : fChildren) {
21                 if (child.fKind == kChar_Kind) {
22                     while (chars.size() <= (size_t) child.fPayload.fChar) {
23                         chars.push_back(false);
24                     }
25                     chars[child.fPayload.fChar] = true;
26                 } else {
27                     ASSERT(child.fKind == kRange_Kind);
28                     while (chars.size() <= (size_t) child.fChildren[1].fPayload.fChar) {
29                         chars.push_back(false);
30                     }
31                     for (char c = child.fChildren[0].fPayload.fChar;
32                          c <= child.fChildren[1].fPayload.fChar;
33                          ++c) {
34                         chars[c] = true;
35                     }
36                 }
37             }
38             result.push_back(nfa->addState(NFAState(fPayload.fBool, chars, accept)));
39             break;
40         }
41         case kConcat_Kind: {
42             std::vector<int> right = fChildren[1].createStates(nfa, accept);
43             result = fChildren[0].createStates(nfa, right);
44             break;
45         }
46         case kDot_Kind:
47             result.push_back(nfa->addState(NFAState(NFAState::kDot_Kind, accept)));
48             break;
49         case kOr_Kind: {
50             std::vector<int> states = fChildren[0].createStates(nfa, accept);
51             result.insert(result.end(), states.begin(), states.end());
52             states = fChildren[1].createStates(nfa, accept);
53             result.insert(result.end(), states.begin(), states.end());
54             break;
55         }
56         case kPlus_Kind: {
57             std::vector<int> next = accept;
58             std::vector<int> placeholder;
59             int id = nfa->addState(NFAState(placeholder));
60             next.push_back(id);
61             result = fChildren[0].createStates(nfa, next);
62             nfa->fStates[id] = NFAState(result);
63             break;
64         }
65         case kQuestion_Kind:
66             result = fChildren[0].createStates(nfa, accept);
67             result.insert(result.end(), accept.begin(), accept.end());
68             break;
69         case kRange_Kind:
70             ABORT("unreachable");
71         case kStar_Kind: {
72             std::vector<int> next = accept;
73             std::vector<int> placeholder;
74             int id = nfa->addState(NFAState(placeholder));
75             next.push_back(id);
76             result = fChildren[0].createStates(nfa, next);
77             result.insert(result.end(), accept.begin(), accept.end());
78             nfa->fStates[id] = NFAState(result);
79             break;
80         }
81     }
82     return result;
83 }
84 
85 std::string RegexNode::description() const {
86     switch (fKind) {
87         case kChar_Kind:
88             return std::string(1, fPayload.fChar);
89         case kCharset_Kind: {
90             std::string result("[");
91             if (fPayload.fBool) {
92                 result += "^";
93             }
94             for (const RegexNode& c : fChildren) {
95                 result += c.description();
96             }
97             result += "]";
98             return result;
99         }
100         case kConcat_Kind:
101             return fChildren[0].description() + fChildren[1].description();
102         case kDot_Kind:
103             return ".";
104         case kOr_Kind:
105             return "(" + fChildren[0].description() + "|" + fChildren[1].description() + ")";
106         case kPlus_Kind:
107             return fChildren[0].description() + "+";
108         case kQuestion_Kind:
109             return fChildren[0].description() + "?";
110         case kRange_Kind:
111             return fChildren[0].description() + "-" + fChildren[1].description();
112         case kStar_Kind:
113             return fChildren[0].description() + "*";
114         default:
115             return "<" + std::to_string(fKind) + ">";
116     }
117 }
118