1# 2# start state, scan position is at the beginning of the pattern. 3# 4start: 5 '[' n set-open ^set-finish 6 '\' n set-escape ^set-finish 7 default errorDeath doRuleError 8 9# 10# [set expression] parsing, 11# All states involved in parsing set expressions have names beginning with "set-" 12# 13 14set-open: 15 '^' n set-open2 doSetNegate 16 ':' set-posix doSetPosixProp 17 default set-open2 18 19set-open2: 20 ']' n set-after-lit doSetLiteral 21 default set-start 22 23# set-posix: 24# scanned a '[:' If it really is a [:property:], doSetPosixProp will have 25# moved the scan to the closing ']'. If it wasn't a property 26# expression, the scan will still be at the opening ':', which should 27# be interpreted as a normal set expression. 28set-posix: 29 ']' n pop doSetEnd 30 ':' set-start 31 default errorDeath doRuleError # should not be possible. 32 33# 34# set-start after the [ and special case leading characters (^ and/or ]) but before 35# everything else. A '-' is literal at this point. 36# 37set-start: 38 ']' n pop doSetEnd 39 '[' n set-open ^set-after-set doSetBeginUnion 40 '\' n set-escape 41 '-' n set-start-dash 42 '&' n set-start-amp 43 default n set-after-lit doSetLiteral 44 45# set-start-dash Turn "[--" into a syntax error. 46# "[-x" is good, - and x are literals. 47# 48set-start-dash: 49 '-' errorDeath doRuleError 50 default set-after-lit doSetAddDash 51 52# set-start-amp Turn "[&&" into a syntax error. 53# "[&x" is good, & and x are literals. 54# 55set-start-amp: 56 '&' errorDeath doRuleError 57 default set-after-lit doSetAddAmp 58 59# 60# set-after-lit The last thing scanned was a literal character within a set. 61# Can be followed by anything. Single '-' or '&' are 62# literals in this context, not operators. 63set-after-lit: 64 ']' n pop doSetEnd 65 '[' n set-open ^set-after-set doSetBeginUnion 66 '-' n set-lit-dash 67 '&' n set-lit-amp 68 '\' n set-escape 69 eof errorDeath doSetNoCloseError 70 default n set-after-lit doSetLiteral 71 72set-after-set: 73 ']' n pop doSetEnd 74 '[' n set-open ^set-after-set doSetBeginUnion 75 '-' n set-set-dash 76 '&' n set-set-amp 77 '\' n set-escape 78 eof errorDeath doSetNoCloseError 79 default n set-after-lit doSetLiteral 80 81set-after-range: 82 ']' n pop doSetEnd 83 '[' n set-open ^set-after-set doSetBeginUnion 84 '-' n set-range-dash 85 '&' n set-range-amp 86 '\' n set-escape 87 eof errorDeath doSetNoCloseError 88 default n set-after-lit doSetLiteral 89 90 91# set-after-op 92# After a -- or && 93# It is an error to close a set at this point. 94# 95set-after-op: 96 '[' n set-open ^set-after-set doSetBeginUnion 97 ']' errorDeath doSetOpError 98 '\' n set-escape 99 default n set-after-lit doSetLiteral 100 101# 102# set-set-amp 103# Have scanned [[set]& 104# Could be a '&' intersection operator, if a set follows. 105# Could be the start of a '&&' operator. 106# Otherewise is a literal. 107set-set-amp: 108 '[' n set-open ^set-after-set doSetBeginIntersection1 109 '&' n set-after-op doSetIntersection2 110 default set-after-lit doSetAddAmp 111 112 113# set-lit-amp Have scanned "[literals&" 114# Could be a start of "&&" operator or a literal 115# In [abc&[def]], the '&' is a literal 116# 117set-lit-amp: 118 '&' n set-after-op doSetIntersection2 119 default set-after-lit doSetAddAmp 120 121 122# 123# set-set-dash 124# Have scanned [set]- 125# Could be a '-' difference operator, if a [set] follows. 126# Could be the start of a '--' operator. 127# Otherwise is a literal. 128set-set-dash: 129 '[' n set-open ^set-after-set doSetBeginDifference1 130 '-' n set-after-op doSetDifference2 131 default set-after-lit doSetAddDash 132 133 134# 135# set-range-dash 136# scanned a-b- or \w- 137# any set or range like item where the trailing single '-' should 138# be literal, not a set difference operation. 139# A trailing "--" is still a difference operator. 140set-range-dash: 141 '-' n set-after-op doSetDifference2 142 default set-after-lit doSetAddDash 143 144 145set-range-amp: 146 '&' n set-after-op doSetIntersection2 147 default set-after-lit doSetAddAmp 148 149 150# set-lit-dash 151# Have scanned "[literals-" Could be a range or a -- operator or a literal 152# In [abc-[def]], the '-' is a literal (confirmed with a Java test) 153# [abc-\p{xx} the '-' is an error 154# [abc-] the '-' is a literal 155# [ab-xy] the '-' is a range 156# 157set-lit-dash: 158 '-' n set-after-op doSetDifference2 159 '[' set-after-lit doSetAddDash 160 ']' set-after-lit doSetAddDash 161 '\' n set-lit-dash-escape 162 default n set-after-range doSetRange 163 164# set-lit-dash-escape 165# 166# scanned "[literal-\" 167# Could be a range, if the \ introduces an escaped literal char or a named char. 168# Otherwise it is an error. 169# 170set-lit-dash-escape: 171 's' errorDeath doSetOpError 172 'S' errorDeath doSetOpError 173 'w' errorDeath doSetOpError 174 'W' errorDeath doSetOpError 175 'd' errorDeath doSetOpError 176 'D' errorDeath doSetOpError 177 'N' set-name-start ^set-after-range doStartNamedChar 178 'x' set-hex-start ^set-after-range doStartHex 179 default n set-after-range doSetRange 180# TODO fix 'N', 'x' 181 182# 183# set-escape 184# Common back-slash escape processing within set expressions 185# 186set-escape: 187 'p' n set-prop-start ^set-after-set doStartSetProp 188 'P' n set-prop-start ^set-after-set doStartSetProp 189 'N' n set-name-start ^set-after-lit doStartNamedChar 190 'x' n set-hex-start ^set-after-lit doStartHex 191 's' n set-after-range doSetBackslash_s 192 'S' n set-after-range doSetBackslash_S 193 'w' n set-after-range doSetBackslash_w 194 'W' n set-after-range doSetBackslash_W 195 'd' n set-after-range doSetBackslash_d 196 'D' n set-after-range doSetBackslash_D 197 default n set-after-lit doSetLiteralEscaped 198# TODO add \r, \n, etc 199 200set-prop-start: 201 '{' n set-prop-cont 202 default errorDeath 203 204set-prop-cont: 205 '}' n pop doPropName 206 '=' n set-value doPropRelation 207 '≠' n set-value doPropRelation 208 default n set-prop-cont 209 210set-value: 211 '}' n pop doPropValue 212 default n set-value 213 214set-name-start: 215 '{' n set-name-cont 216 default errorDeath 217 218set-name-cont: 219 '}' n pop doName 220 [\ \-0-9A-Za-z] n set-name-cont 221 default n errorDeath 222 223set-hex-start: 224 '{' n set-hex-cont 225 default errorDeath 226 227set-hex-cont: 228 '}' n pop doHex 229 [0-9A-Fa-f] n set-hex-cont 230 default n errorDeath 231 232# 233# set-finish 234# Have just encountered the final ']' that completes a [set], and 235# arrived here via a pop. From here, we exit the set parsing world, and go 236# back to generic regular expression parsing. 237# 238set-finish: 239 default exit doSetFinish 240