1 /* 2 * [The "BSD license"] 3 * Copyright (c) 2010 Terence Parr 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 package org.antlr.analysis; 29 30 import org.antlr.misc.OrderedHashSet; 31 import org.antlr.misc.Utils; 32 import org.antlr.runtime.Token; 33 import org.antlr.tool.ErrorManager; 34 35 import java.util.*; 36 37 /** Code that embodies the NFA conversion to DFA. A new object is needed 38 * per DFA (also required for thread safety if multiple conversions 39 * launched). 40 */ 41 public class NFAToDFAConverter { 42 /** A list of DFA states we still need to process during NFA conversion */ 43 protected List<DFAState> work = new LinkedList<DFAState>(); 44 45 /** While converting NFA, we must track states that 46 * reference other rule's NFAs so we know what to do 47 * at the end of a rule. We need to know what context invoked 48 * this rule so we can know where to continue looking for NFA 49 * states. I'm tracking a context tree (record of rule invocation 50 * stack trace) for each alternative that could be predicted. 51 */ 52 protected NFAContext[] contextTrees; 53 54 /** We are converting which DFA? */ 55 protected DFA dfa; 56 57 public static boolean debug = false; 58 59 /** Should ANTLR launch multiple threads to convert NFAs to DFAs? 60 * With a 2-CPU box, I note that it's about the same single or 61 * multithreaded. Both CPU meters are going even when single-threaded 62 * so I assume the GC is killing us. Could be the compiler. When I 63 * run java -Xint mode, I get about 15% speed improvement with multiple 64 * threads. 65 */ 66 public static boolean SINGLE_THREADED_NFA_CONVERSION = true; 67 68 protected boolean computingStartState = false; 69 NFAToDFAConverter(DFA dfa)70 public NFAToDFAConverter(DFA dfa) { 71 this.dfa = dfa; 72 int nAlts = dfa.getNumberOfAlts(); 73 initContextTrees(nAlts); 74 } 75 convert()76 public void convert() { 77 //dfa.conversionStartTime = System.currentTimeMillis(); 78 79 // create the DFA start state 80 dfa.startState = computeStartState(); 81 82 // while more DFA states to check, process them 83 while ( work.size()>0 && 84 !dfa.nfa.grammar.NFAToDFAConversionExternallyAborted() ) 85 { 86 DFAState d = work.get(0); 87 if ( dfa.nfa.grammar.composite.watchNFAConversion ) { 88 System.out.println("convert DFA state "+d.stateNumber+ 89 " ("+d.nfaConfigurations.size()+" nfa states)"); 90 } 91 int k = dfa.getUserMaxLookahead(); 92 if ( k>0 && k==d.getLookaheadDepth() ) { 93 // we've hit max lookahead, make this a stop state 94 //System.out.println("stop state @k="+k+" (terminated early)"); 95 /* 96 List<Label> sampleInputLabels = d.dfa.probe.getSampleNonDeterministicInputSequence(d); 97 String input = d.dfa.probe.getInputSequenceDisplay(sampleInputLabels); 98 System.out.println("sample input: "+input); 99 */ 100 resolveNonDeterminisms(d); 101 // Check to see if we need to add any semantic predicate transitions 102 if ( d.isResolvedWithPredicates() ) { 103 addPredicateTransitions(d); 104 } 105 else { 106 d.setAcceptState(true); // must convert to accept state at k 107 } 108 } 109 else { 110 findNewDFAStatesAndAddDFATransitions(d); 111 } 112 work.remove(0); // done with it; remove from work list 113 } 114 115 // Find all manual syn preds (gated). These are not discovered 116 // in tryToResolveWithSemanticPredicates because they are implicitly 117 // added to every edge by code gen, DOT generation etc... 118 dfa.findAllGatedSynPredsUsedInDFAAcceptStates(); 119 } 120 121 /** From this first NFA state of a decision, create a DFA. 122 * Walk each alt in decision and compute closure from the start of that 123 * rule, making sure that the closure does not include other alts within 124 * that same decision. The idea is to associate a specific alt number 125 * with the starting closure so we can trace the alt number for all states 126 * derived from this. At a stop state in the DFA, we can return this alt 127 * number, indicating which alt is predicted. 128 * 129 * If this DFA is derived from an loop back NFA state, then the first 130 * transition is actually the exit branch of the loop. Rather than make 131 * this alternative one, let's make this alt n+1 where n is the number of 132 * alts in this block. This is nice to keep the alts of the block 1..n; 133 * helps with error messages. 134 * 135 * I handle nongreedy in findNewDFAStatesAndAddDFATransitions 136 * when nongreedy and EOT transition. Make state with EOT emanating 137 * from it the accept state. 138 */ computeStartState()139 protected DFAState computeStartState() { 140 NFAState alt = dfa.decisionNFAStartState; 141 DFAState startState = dfa.newState(); 142 computingStartState = true; 143 int i = 0; 144 int altNum = 1; 145 while ( alt!=null ) { 146 // find the set of NFA states reachable without consuming 147 // any input symbols for each alt. Keep adding to same 148 // overall closure that will represent the DFA start state, 149 // but track the alt number 150 NFAContext initialContext = contextTrees[i]; 151 // if first alt is derived from loopback/exit branch of loop, 152 // make alt=n+1 for n alts instead of 1 153 if ( i==0 && 154 dfa.getNFADecisionStartState().decisionStateType==NFAState.LOOPBACK ) 155 { 156 int numAltsIncludingExitBranch = dfa.nfa.grammar 157 .getNumberOfAltsForDecisionNFA(dfa.decisionNFAStartState); 158 altNum = numAltsIncludingExitBranch; 159 closure((NFAState)alt.transition[0].target, 160 altNum, 161 initialContext, 162 SemanticContext.EMPTY_SEMANTIC_CONTEXT, 163 startState, 164 true 165 ); 166 altNum = 1; // make next alt the first 167 } 168 else { 169 closure((NFAState)alt.transition[0].target, 170 altNum, 171 initialContext, 172 SemanticContext.EMPTY_SEMANTIC_CONTEXT, 173 startState, 174 true 175 ); 176 altNum++; 177 } 178 i++; 179 180 // move to next alternative 181 if ( alt.transition[1] ==null ) { 182 break; 183 } 184 alt = (NFAState)alt.transition[1].target; 185 } 186 187 // now DFA start state has the complete closure for the decision 188 // but we have tracked which alt is associated with which 189 // NFA states. 190 dfa.addState(startState); // make sure dfa knows about this state 191 work.add(startState); 192 computingStartState = false; 193 return startState; 194 } 195 196 /** From this node, add a d--a-->t transition for all 197 * labels 'a' where t is a DFA node created 198 * from the set of NFA states reachable from any NFA 199 * state in DFA state d. 200 */ findNewDFAStatesAndAddDFATransitions(DFAState d)201 protected void findNewDFAStatesAndAddDFATransitions(DFAState d) { 202 //System.out.println("work on DFA state "+d); 203 OrderedHashSet<Label> labels = d.getReachableLabels(); 204 //System.out.println("reachable labels="+labels); 205 206 /* 207 System.out.println("|reachable|/|nfaconfigs|="+ 208 labels.size()+"/"+d.getNFAConfigurations().size()+"="+ 209 labels.size()/(float)d.getNFAConfigurations().size()); 210 */ 211 212 // normally EOT is the "default" clause and decisions just 213 // choose that last clause when nothing else matches. DFA conversion 214 // continues searching for a unique sequence that predicts the 215 // various alts or until it finds EOT. So this rule 216 // 217 // DUH : ('x'|'y')* "xy!"; 218 // 219 // does not need a greedy indicator. The following rule works fine too 220 // 221 // A : ('x')+ ; 222 // 223 // When the follow branch could match what is in the loop, by default, 224 // the nondeterminism is resolved in favor of the loop. You don't 225 // get a warning because the only way to get this condition is if 226 // the DFA conversion hits the end of the token. In that case, 227 // we're not *sure* what will happen next, but it could be anything. 228 // Anyway, EOT is the default case which means it will never be matched 229 // as resolution goes to the lowest alt number. Exit branches are 230 // always alt n+1 for n alts in a block. 231 // 232 // When a loop is nongreedy and we find an EOT transition, the DFA 233 // state should become an accept state, predicting exit of loop. It's 234 // just reversing the resolution of ambiguity. 235 // TODO: should this be done in the resolveAmbig method? 236 Label EOTLabel = new Label(Label.EOT); 237 boolean containsEOT = labels!=null && labels.contains(EOTLabel); 238 if ( !dfa.isGreedy() && containsEOT ) { 239 convertToEOTAcceptState(d); 240 return; // no more work to do on this accept state 241 } 242 243 // if in filter mode for lexer, want to match shortest not longest 244 // string so if we see an EOT edge emanating from this state, then 245 // convert this state to an accept state. This only counts for 246 // The Tokens rule as all other decisions must continue to look for 247 // longest match. 248 // [Taking back out a few days later on Jan 17, 2006. This could 249 // be an option for the future, but this was wrong soluion for 250 // filtering.] 251 /* 252 if ( dfa.nfa.grammar.type==Grammar.LEXER && containsEOT ) { 253 String filterOption = (String)dfa.nfa.grammar.getOption("filter"); 254 boolean filterMode = filterOption!=null && filterOption.equals("true"); 255 if ( filterMode && d.dfa.isTokensRuleDecision() ) { 256 DFAState t = reach(d, EOTLabel); 257 if ( t.getNFAConfigurations().size()>0 ) { 258 convertToEOTAcceptState(d); 259 //System.out.println("state "+d+" has EOT target "+t.stateNumber); 260 return; 261 } 262 } 263 } 264 */ 265 266 int numberOfEdgesEmanating = 0; 267 Map<Integer, Transition> targetToLabelMap = new HashMap<Integer, Transition>(); 268 // for each label that could possibly emanate from NFAStates of d 269 int numLabels = 0; 270 if ( labels!=null ) { 271 numLabels = labels.size(); 272 } 273 for (int i=0; i<numLabels; i++) { 274 Label label = labels.get(i); 275 DFAState t = reach(d, label); 276 if ( debug ) { 277 System.out.println("DFA state after reach "+label+" "+d+"-" + 278 label.toString(dfa.nfa.grammar)+"->"+t); 279 } 280 if ( t==null ) { 281 // nothing was reached by label due to conflict resolution 282 // EOT also seems to be in here occasionally probably due 283 // to an end-of-rule state seeing it even though we'll pop 284 // an invoking state off the state; don't bother to conflict 285 // as this labels set is a covering approximation only. 286 continue; 287 } 288 //System.out.println("dfa.k="+dfa.getUserMaxLookahead()); 289 if ( t.getUniqueAlt()==NFA.INVALID_ALT_NUMBER ) { 290 // Only compute closure if a unique alt number is not known. 291 // If a unique alternative is mentioned among all NFA 292 // configurations then there is no possibility of needing to look 293 // beyond this state; also no possibility of a nondeterminism. 294 // This optimization May 22, 2006 just dropped -Xint time 295 // for analysis of Java grammar from 11.5s to 2s! Wow. 296 closure(t); // add any NFA states reachable via epsilon 297 } 298 299 /* 300 System.out.println("DFA state after closure "+d+"-"+ 301 label.toString(dfa.nfa.grammar)+ 302 "->"+t); 303 */ 304 305 // add if not in DFA yet and then make d-label->t 306 DFAState targetState = addDFAStateToWorkList(t); 307 308 numberOfEdgesEmanating += 309 addTransition(d, label, targetState, targetToLabelMap); 310 311 // lookahead of target must be one larger than d's k 312 // We are possibly setting the depth of a pre-existing state 313 // that is equal to one we just computed...not sure if that's 314 // ok. 315 targetState.setLookaheadDepth(d.getLookaheadDepth() + 1); 316 } 317 318 //System.out.println("DFA after reach / closures:\n"+dfa); 319 if ( !d.isResolvedWithPredicates() && numberOfEdgesEmanating==0 ) { 320 //System.out.println("dangling DFA state "+d+"\nAfter reach / closures:\n"+dfa); 321 // TODO: can fixed lookahead hit a dangling state case? 322 // TODO: yes, with left recursion 323 //System.err.println("dangling state alts: "+d.getAltSet()); 324 dfa.probe.reportDanglingState(d); 325 // turn off all configurations except for those associated with 326 // min alt number; somebody has to win else some input will not 327 // predict any alt. 328 int minAlt = resolveByPickingMinAlt(d, null); 329 // force it to be an accept state 330 // don't call convertToAcceptState() which merges stop states. 331 // other states point at us; don't want them pointing to dead states 332 d.setAcceptState(true); // might be adding new accept state for alt 333 dfa.setAcceptState(minAlt, d); 334 //convertToAcceptState(d, minAlt); // force it to be an accept state 335 } 336 337 // Check to see if we need to add any semantic predicate transitions 338 // might have both token and predicated edges from d 339 if ( d.isResolvedWithPredicates() ) { 340 addPredicateTransitions(d); 341 } 342 } 343 344 /** Add a transition from state d to targetState with label in normal case. 345 * if COLLAPSE_ALL_INCIDENT_EDGES, however, try to merge all edges from 346 * d to targetState; this means merging their labels. Another optimization 347 * is to reduce to a single EOT edge any set of edges from d to targetState 348 * where there exists an EOT state. EOT is like the wildcard so don't 349 * bother to test any other edges. Example: 350 * 351 * NUM_INT 352 * : '1'..'9' ('0'..'9')* ('l'|'L')? 353 * | '0' ('x'|'X') ('0'..'9'|'a'..'f'|'A'..'F')+ ('l'|'L')? 354 * | '0' ('0'..'7')* ('l'|'L')? 355 * ; 356 * 357 * The normal decision to predict alts 1, 2, 3 is: 358 * 359 * if ( (input.LA(1)>='1' && input.LA(1)<='9') ) { 360 * alt7=1; 361 * } 362 * else if ( input.LA(1)=='0' ) { 363 * if ( input.LA(2)=='X'||input.LA(2)=='x' ) { 364 * alt7=2; 365 * } 366 * else if ( (input.LA(2)>='0' && input.LA(2)<='7') ) { 367 * alt7=3; 368 * } 369 * else if ( input.LA(2)=='L'||input.LA(2)=='l' ) { 370 * alt7=3; 371 * } 372 * else { 373 * alt7=3; 374 * } 375 * } 376 * else error 377 * 378 * Clearly, alt 3 is predicted with extra work since it tests 0..7 379 * and [lL] before finally realizing that any character is actually 380 * ok at k=2. 381 * 382 * A better decision is as follows: 383 * 384 * if ( (input.LA(1)>='1' && input.LA(1)<='9') ) { 385 * alt7=1; 386 * } 387 * else if ( input.LA(1)=='0' ) { 388 * if ( input.LA(2)=='X'||input.LA(2)=='x' ) { 389 * alt7=2; 390 * } 391 * else { 392 * alt7=3; 393 * } 394 * } 395 * 396 * The DFA originally has 3 edges going to the state the predicts alt 3, 397 * but upon seeing the EOT edge (the "else"-clause), this method 398 * replaces the old merged label (which would have (0..7|l|L)) with EOT. 399 * The code generator then leaves alt 3 predicted with a simple else- 400 * clause. :) 401 * 402 * The only time the EOT optimization makes no sense is in the Tokens 403 * rule. We want EOT to truly mean you have matched an entire token 404 * so don't bother actually rewinding to execute that rule unless there 405 * are actions in that rule. For now, since I am not preventing 406 * backtracking from Tokens rule, I will simply allow the optimization. 407 */ addTransition(DFAState d, Label label, DFAState targetState, Map<Integer, Transition> targetToLabelMap)408 protected static int addTransition(DFAState d, 409 Label label, 410 DFAState targetState, 411 Map<Integer, Transition> targetToLabelMap) 412 { 413 //System.out.println(d.stateNumber+"-"+label.toString(dfa.nfa.grammar)+"->"+targetState.stateNumber); 414 int n = 0; 415 if ( DFAOptimizer.COLLAPSE_ALL_PARALLEL_EDGES ) { 416 // track which targets we've hit 417 Integer tI = Utils.integer(targetState.stateNumber); 418 Transition oldTransition = targetToLabelMap.get(tI); 419 if ( oldTransition!=null ) { 420 //System.out.println("extra transition to "+tI+" upon "+label.toString(dfa.nfa.grammar)); 421 // already seen state d to target transition, just add label 422 // to old label unless EOT 423 if ( label.getAtom()==Label.EOT ) { 424 // merge with EOT means old edge can go away 425 oldTransition.label = new Label(Label.EOT); 426 } 427 else { 428 // don't add anything to EOT, it's essentially the wildcard 429 if ( oldTransition.label.getAtom()!=Label.EOT ) { 430 // ok, not EOT, add in this label to old label 431 oldTransition.label.add(label); 432 } 433 //System.out.println("label updated to be "+oldTransition.label.toString(dfa.nfa.grammar)); 434 } 435 } 436 else { 437 // make a transition from d to t upon 'a' 438 n = 1; 439 label = (Label)label.clone(); // clone in case we alter later 440 int transitionIndex = d.addTransition(targetState, label); 441 Transition trans = d.getTransition(transitionIndex); 442 // track target/transition pairs 443 targetToLabelMap.put(tI, trans); 444 } 445 } 446 else { 447 n = 1; 448 d.addTransition(targetState, label); 449 } 450 return n; 451 } 452 453 /** For all NFA states (configurations) merged in d, 454 * compute the epsilon closure; that is, find all NFA states reachable 455 * from the NFA states in d via purely epsilon transitions. 456 */ closure(DFAState d)457 public void closure(DFAState d) { 458 if ( debug ) { 459 System.out.println("closure("+d+")"); 460 } 461 462 List<NFAConfiguration> configs = new ArrayList<NFAConfiguration>(); 463 // Because we are adding to the configurations in closure 464 // must clone initial list so we know when to stop doing closure 465 configs.addAll(d.nfaConfigurations); 466 // for each NFA configuration in d (abort if we detect non-LL(*) state) 467 int numConfigs = configs.size(); 468 for (int i = 0; i < numConfigs; i++) { 469 NFAConfiguration c = configs.get(i); 470 if ( c.singleAtomTransitionEmanating ) { 471 continue; // ignore NFA states w/o epsilon transitions 472 } 473 //System.out.println("go do reach for NFA state "+c.state); 474 // figure out reachable NFA states from each of d's nfa states 475 // via epsilon transitions. 476 // Fill configsInClosure rather than altering d configs inline 477 closure(dfa.nfa.getState(c.state), 478 c.alt, 479 c.context, 480 c.semanticContext, 481 d, 482 false); 483 } 484 //System.out.println("after closure d="+d); 485 d.closureBusy = null; // wack all that memory used during closure 486 } 487 488 /** Where can we get from NFA state p traversing only epsilon transitions? 489 * Add new NFA states + context to DFA state d. Also add semantic 490 * predicates to semantic context if collectPredicates is set. We only 491 * collect predicates at hoisting depth 0, meaning before any token/char 492 * have been recognized. This corresponds, during analysis, to the 493 * initial DFA start state construction closure() invocation. 494 * 495 * There are four cases of interest (the last being the usual transition): 496 * 497 * 1. Traverse an edge that takes us to the start state of another 498 * rule, r. We must push this state so that if the DFA 499 * conversion hits the end of rule r, then it knows to continue 500 * the conversion at state following state that "invoked" r. By 501 * construction, there is a single transition emanating from a rule 502 * ref node. 503 * 504 * 2. Reach an NFA state associated with the end of a rule, r, in the 505 * grammar from which it was built. We must add an implicit (i.e., 506 * don't actually add an epsilon transition) epsilon transition 507 * from r's end state to the NFA state following the NFA state 508 * that transitioned to rule r's start state. Because there are 509 * many states that could reach r, the context for a rule invocation 510 * is part of a call tree not a simple stack. When we fall off end 511 * of rule, "pop" a state off the call tree and add that state's 512 * "following" node to d's NFA configuration list. The context 513 * for this new addition will be the new "stack top" in the call tree. 514 * 515 * 3. Like case 2, we reach an NFA state associated with the end of a 516 * rule, r, in the grammar from which NFA was built. In this case, 517 * however, we realize that during this NFA→DFA conversion, no state 518 * invoked the current rule's NFA. There is no choice but to add 519 * all NFA states that follow references to r's start state. This is 520 * analogous to computing the FOLLOW(r) in the LL(k) world. By 521 * construction, even rule stop state has a chain of nodes emanating 522 * from it that points to every possible following node. This case 523 * is conveniently handled then by the 4th case. 524 * 525 * 4. Normal case. If p can reach another NFA state q, then add 526 * q to d's configuration list, copying p's context for q's context. 527 * If there is a semantic predicate on the transition, then AND it 528 * with any existing semantic context. 529 * 530 * Current state p is always added to d's configuration list as it's part 531 * of the closure as well. 532 * 533 * When is a closure operation in a cycle condition? While it is 534 * very possible to have the same NFA state mentioned twice 535 * within the same DFA state, there are two situations that 536 * would lead to nontermination of closure operation: 537 * 538 * o Whenever closure reaches a configuration where the same state 539 * with same or a suffix context already exists. This catches 540 * the IF-THEN-ELSE tail recursion cycle and things like 541 * 542 * a : A a | B ; 543 * 544 * the context will be $ (empty stack). 545 * 546 * We have to check 547 * larger context stacks because of (...)+ loops. For 548 * example, the context of a (...)+ can be nonempty if the 549 * surrounding rule is invoked by another rule: 550 * 551 * a : b A | X ; 552 * b : (B|)+ ; // nondeterministic by the way 553 * 554 * The context of the (B|)+ loop is "invoked from item 555 * a : . b A ;" and then the empty alt of the loop can reach back 556 * to itself. The context stack will have one "return 557 * address" element and so we must check for same state, same 558 * context for arbitrary context stacks. 559 * 560 * Idea: If we've seen this configuration before during closure, stop. 561 * We also need to avoid reaching same state with conflicting context. 562 * Ultimately analysis would stop and we'd find the conflict, but we 563 * should stop the computation. Previously I only checked for 564 * exact config. Need to check for same state, suffix context 565 * not just exact context. 566 * 567 * o Whenever closure reaches a configuration where state p 568 * is present in its own context stack. This means that 569 * p is a rule invocation state and the target rule has 570 * been called before. NFAContext.MAX_RECURSIVE_INVOCATIONS 571 * (See the comment there also) determines how many times 572 * it's possible to recurse; clearly we cannot recurse forever. 573 * Some grammars such as the following actually require at 574 * least one recursive call to correctly compute the lookahead: 575 * 576 * a : L ID R 577 * | b 578 * ; 579 * b : ID 580 * | L a R 581 * ; 582 * 583 * Input L ID R is ambiguous but to figure this out, ANTLR 584 * needs to go a->b->a->b to find the L ID sequence. 585 * 586 * Do not allow closure to add a configuration that would 587 * allow too much recursion. 588 * 589 * This case also catches infinite left recursion. 590 */ closure(NFAState p, int alt, NFAContext context, SemanticContext semanticContext, DFAState d, boolean collectPredicates)591 public void closure(NFAState p, 592 int alt, 593 NFAContext context, 594 SemanticContext semanticContext, 595 DFAState d, 596 boolean collectPredicates) 597 { 598 if ( debug ){ 599 System.out.println("closure at "+p.enclosingRule.name+" state "+p.stateNumber+"|"+ 600 alt+" filling DFA state "+d.stateNumber+" with context "+context 601 ); 602 } 603 604 // if ( DFA.MAX_TIME_PER_DFA_CREATION>0 && 605 // System.currentTimeMillis() - d.dfa.conversionStartTime >= 606 // DFA.MAX_TIME_PER_DFA_CREATION ) 607 // { 608 // // bail way out; we've blown up somehow 609 // throw new AnalysisTimeoutException(d.dfa); 610 // } 611 612 NFAConfiguration proposedNFAConfiguration = 613 new NFAConfiguration(p.stateNumber, 614 alt, 615 context, 616 semanticContext); 617 618 // Avoid infinite recursion 619 if ( closureIsBusy(d, proposedNFAConfiguration) ) { 620 if ( debug ) { 621 System.out.println("avoid visiting exact closure computation NFA config: "+ 622 proposedNFAConfiguration+" in "+p.enclosingRule.name); 623 System.out.println("state is "+d.dfa.decisionNumber+"."+d.stateNumber); 624 } 625 return; 626 } 627 628 // set closure to be busy for this NFA configuration 629 d.closureBusy.add(proposedNFAConfiguration); 630 631 // p itself is always in closure 632 d.addNFAConfiguration(p, proposedNFAConfiguration); 633 634 // Case 1: are we a reference to another rule? 635 Transition transition0 = p.transition[0]; 636 if ( transition0 instanceof RuleClosureTransition ) { 637 int depth = context.recursionDepthEmanatingFromState(p.stateNumber); 638 // Detect recursion by more than a single alt, which indicates 639 // that the decision's lookahead language is potentially non-regular; terminate 640 if ( depth == 1 && d.dfa.getUserMaxLookahead()==0 ) { // k=* only 641 d.dfa.recursiveAltSet.add(alt); // indicate that this alt is recursive 642 if ( d.dfa.recursiveAltSet.size()>1 ) { 643 //System.out.println("recursive alts: "+d.dfa.recursiveAltSet.toString()); 644 d.abortedDueToMultipleRecursiveAlts = true; 645 throw new NonLLStarDecisionException(d.dfa); 646 } 647 /* 648 System.out.println("alt "+alt+" in rule "+p.enclosingRule+" dec "+d.dfa.decisionNumber+ 649 " ctx: "+context); 650 System.out.println("d="+d); 651 */ 652 } 653 // Detect an attempt to recurse too high 654 // if this context has hit the max recursions for p.stateNumber, 655 // don't allow it to enter p.stateNumber again 656 if ( depth >= NFAContext.MAX_SAME_RULE_INVOCATIONS_PER_NFA_CONFIG_STACK ) { 657 /* 658 System.out.println("OVF state "+d); 659 System.out.println("proposed "+proposedNFAConfiguration); 660 */ 661 d.abortedDueToRecursionOverflow = true; 662 d.dfa.probe.reportRecursionOverflow(d, proposedNFAConfiguration); 663 if ( debug ) { 664 System.out.println("analysis overflow in closure("+d.stateNumber+")"); 665 } 666 return; 667 } 668 669 // otherwise, it's cool to (re)enter target of this rule ref 670 RuleClosureTransition ref = (RuleClosureTransition)transition0; 671 // first create a new context and push onto call tree, 672 // recording the fact that we are invoking a rule and 673 // from which state (case 2 below will get the following state 674 // via the RuleClosureTransition emanating from the invoking state 675 // pushed on the stack). 676 // Reset the context to reflect the fact we invoked rule 677 NFAContext newContext = new NFAContext(context, p); 678 //System.out.println("invoking rule "+ref.rule.name); 679 // System.out.println(" context="+context); 680 // traverse epsilon edge to new rule 681 NFAState ruleTarget = (NFAState)ref.target; 682 closure(ruleTarget, alt, newContext, semanticContext, d, collectPredicates); 683 } 684 // Case 2: end of rule state, context (i.e., an invoker) exists 685 else if ( p.isAcceptState() && context.parent!=null ) { 686 NFAState whichStateInvokedRule = context.invokingState; 687 RuleClosureTransition edgeToRule = 688 (RuleClosureTransition)whichStateInvokedRule.transition[0]; 689 NFAState continueState = edgeToRule.followState; 690 NFAContext newContext = context.parent; // "pop" invoking state 691 closure(continueState, alt, newContext, semanticContext, d, collectPredicates); 692 } 693 // Case 3: end of rule state, nobody invoked this rule (no context) 694 // Fall thru to be handled by case 4 automagically. 695 // Case 4: ordinary NFA->DFA conversion case: simple epsilon transition 696 else { 697 // recurse down any epsilon transitions 698 if ( transition0!=null && transition0.isEpsilon() ) { 699 boolean collectPredicatesAfterAction = collectPredicates; 700 if ( transition0.isAction() && collectPredicates ) { 701 collectPredicatesAfterAction = false; 702 /* 703 if ( computingStartState ) { 704 System.out.println("found action during prediction closure "+((ActionLabel)transition0.label).actionAST.token); 705 } 706 */ 707 } 708 closure((NFAState)transition0.target, 709 alt, 710 context, 711 semanticContext, 712 d, 713 collectPredicatesAfterAction 714 ); 715 } 716 else if ( transition0!=null && transition0.isSemanticPredicate() ) { 717 SemanticContext labelContext = transition0.label.getSemanticContext(); 718 if ( computingStartState ) { 719 if ( collectPredicates ) { 720 // only indicate we can see a predicate if we're collecting preds 721 // Could be computing start state & seen an action before this. 722 dfa.predicateVisible = true; 723 } 724 else { 725 // this state has a pred, but we can't see it. 726 dfa.hasPredicateBlockedByAction = true; 727 // System.out.println("found pred during prediction but blocked by action found previously"); 728 } 729 } 730 // continue closure here too, but add the sem pred to ctx 731 SemanticContext newSemanticContext = semanticContext; 732 if ( collectPredicates ) { 733 // AND the previous semantic context with new pred 734 // do not hoist syn preds from other rules; only get if in 735 // starting state's rule (i.e., context is empty) 736 int walkAlt = 737 dfa.decisionNFAStartState.translateDisplayAltToWalkAlt(alt); 738 NFAState altLeftEdge = 739 dfa.nfa.grammar.getNFAStateForAltOfDecision(dfa.decisionNFAStartState,walkAlt); 740 /* 741 System.out.println("state "+p.stateNumber+" alt "+alt+" walkAlt "+walkAlt+" trans to "+transition0.target); 742 System.out.println("DFA start state "+dfa.decisionNFAStartState.stateNumber); 743 System.out.println("alt left edge "+altLeftEdge.stateNumber+ 744 ", epsilon target "+ 745 altLeftEdge.transition(0).target.stateNumber); 746 */ 747 if ( !labelContext.isSyntacticPredicate() || 748 p==altLeftEdge.transition[0].target ) 749 { 750 //System.out.println("&"+labelContext+" enclosingRule="+p.enclosingRule); 751 newSemanticContext = 752 SemanticContext.and(semanticContext, labelContext); 753 } 754 } 755 closure((NFAState)transition0.target, 756 alt, 757 context, 758 newSemanticContext, 759 d, 760 collectPredicates); 761 } 762 Transition transition1 = p.transition[1]; 763 if ( transition1!=null && transition1.isEpsilon() ) { 764 closure((NFAState)transition1.target, 765 alt, 766 context, 767 semanticContext, 768 d, 769 collectPredicates); 770 } 771 } 772 773 // don't remove "busy" flag as we want to prevent all 774 // references to same config of state|alt|ctx|semCtx even 775 // if resulting from another NFA state 776 } 777 778 /** A closure operation should abort if that computation has already 779 * been done or a computation with a conflicting context has already 780 * been done. If proposed NFA config's state and alt are the same 781 * there is potentially a problem. If the stack context is identical 782 * then clearly the exact same computation is proposed. If a context 783 * is a suffix of the other, then again the computation is in an 784 * identical context. ?$ and ??$ are considered the same stack. 785 * We could walk configurations linearly doing the comparison instead 786 * of a set for exact matches but it's much slower because you can't 787 * do a Set lookup. I use exact match as ANTLR 788 * always detect the conflict later when checking for context suffixes... 789 * I check for left-recursive stuff and terminate before analysis to 790 * avoid need to do this more expensive computation. 791 * 792 * 12-31-2007: I had to use the loop again rather than simple 793 * closureBusy.contains(proposedNFAConfiguration) lookup. The 794 * semantic context should not be considered when determining if 795 * a closure operation is busy. I saw a FOLLOW closure operation 796 * spin until time out because the predicate context kept increasing 797 * in size even though it's same boolean value. This seems faster also 798 * because I'm not doing String.equals on the preds all the time. 799 * 800 * 05-05-2008: Hmm...well, i think it was a mistake to remove the sem 801 * ctx check below...adding back in. Coincides with report of ANTLR 802 * getting super slow: http://www.antlr.org:8888/browse/ANTLR-235 803 * This could be because it doesn't properly compute then resolve 804 * a predicate expression. Seems to fix unit test: 805 * TestSemanticPredicates.testSemanticContextPreventsEarlyTerminationOfClosure() 806 * Changing back to Set from List. Changed a large grammar from 8 minutes 807 * to 11 seconds. Cool. Closing ANTLR-235. 808 */ closureIsBusy(DFAState d, NFAConfiguration proposedNFAConfiguration)809 public static boolean closureIsBusy(DFAState d, 810 NFAConfiguration proposedNFAConfiguration) 811 { 812 return d.closureBusy.contains(proposedNFAConfiguration); 813 /* 814 int numConfigs = d.closureBusy.size(); 815 // Check epsilon cycle (same state, same alt, same context) 816 for (int i = 0; i < numConfigs; i++) { 817 NFAConfiguration c = (NFAConfiguration) d.closureBusy.get(i); 818 if ( proposedNFAConfiguration.state==c.state && 819 proposedNFAConfiguration.alt==c.alt && 820 proposedNFAConfiguration.semanticContext.equals(c.semanticContext) && 821 proposedNFAConfiguration.context.suffix(c.context) ) 822 { 823 return true; 824 } 825 } 826 return false; 827 */ 828 } 829 830 /** Given the set of NFA states in DFA state d, find all NFA states 831 * reachable traversing label arcs. By definition, there can be 832 * only one DFA state reachable by an atom from DFA state d so we must 833 * find and merge all NFA states reachable via label. Return a new 834 * DFAState that has all of those NFA states with their context (i.e., 835 * which alt do they predict and where to return to if they fall off 836 * end of a rule). 837 * 838 * Because we cannot jump to another rule nor fall off the end of a rule 839 * via a non-epsilon transition, NFA states reachable from d have the 840 * same configuration as the NFA state in d. So if NFA state 7 in d's 841 * configurations can reach NFA state 13 then 13 will be added to the 842 * new DFAState (labelDFATarget) with the same configuration as state 843 * 7 had. 844 * 845 * This method does not see EOT transitions off the end of token rule 846 * accept states if the rule was invoked by somebody. 847 */ reach(DFAState d, Label label)848 public DFAState reach(DFAState d, Label label) { 849 //System.out.println("reach "+label.toString(dfa.nfa.grammar)+" from "+d.stateNumber); 850 DFAState labelDFATarget = dfa.newState(); 851 852 // for each NFA state in d with a labeled edge, 853 // add in target states for label 854 //System.out.println("size(d.state="+d.stateNumber+")="+d.nfaConfigurations.size()); 855 //System.out.println("size(labeled edge states)="+d.configurationsWithLabeledEdges.size()); 856 List<NFAConfiguration> configs = d.configurationsWithLabeledEdges; 857 int numConfigs = configs.size(); 858 for (int i = 0; i < numConfigs; i++) { 859 NFAConfiguration c = configs.get(i); 860 if ( c.resolved || c.resolveWithPredicate ) { 861 continue; // the conflict resolver indicates we must leave alone 862 } 863 NFAState p = dfa.nfa.getState(c.state); 864 // by design of the grammar->NFA conversion, only transition 0 865 // may have a non-epsilon edge. 866 Transition edge = p.transition[0]; 867 if ( edge==null || !c.singleAtomTransitionEmanating ) { 868 continue; 869 } 870 Label edgeLabel = edge.label; 871 872 // SPECIAL CASE 873 // if it's an EOT transition on end of lexer rule, but context 874 // stack is not empty, then don't see the EOT; the closure 875 // will have added in the proper states following the reference 876 // to this rule in the invoking rule. In other words, if 877 // somebody called this rule, don't see the EOT emanating from 878 // this accept state. 879 if ( c.context.parent!=null && edgeLabel.label==Label.EOT ) { 880 continue; 881 } 882 883 // Labels not unique at this point (not until addReachableLabels) 884 // so try simple int label match before general set intersection 885 //System.out.println("comparing "+edgeLabel+" with "+label); 886 if ( Label.intersect(label, edgeLabel) ) { 887 // found a transition with label; 888 // add NFA target to (potentially) new DFA state 889 NFAConfiguration newC = labelDFATarget.addNFAConfiguration( 890 (NFAState)edge.target, 891 c.alt, 892 c.context, 893 c.semanticContext); 894 } 895 } 896 if ( labelDFATarget.nfaConfigurations.size()==0 ) { 897 // kill; it's empty 898 dfa.setState(labelDFATarget.stateNumber, null); 899 labelDFATarget = null; 900 } 901 return labelDFATarget; 902 } 903 904 /** Walk the configurations of this DFA state d looking for the 905 * configuration, c, that has a transition on EOT. State d should 906 * be converted to an accept state predicting the c.alt. Blast 907 * d's current configuration set and make it just have config c. 908 * 909 * TODO: can there be more than one config with EOT transition? 910 * That would mean that two NFA configurations could reach the 911 * end of the token with possibly different predicted alts. 912 * Seems like that would be rare or impossible. Perhaps convert 913 * this routine to find all such configs and give error if >1. 914 */ convertToEOTAcceptState(DFAState d)915 protected void convertToEOTAcceptState(DFAState d) { 916 Label eot = new Label(Label.EOT); 917 int numConfigs = d.nfaConfigurations.size(); 918 for (int i = 0; i < numConfigs; i++) { 919 NFAConfiguration c = d.nfaConfigurations.get(i); 920 if ( c.resolved || c.resolveWithPredicate ) { 921 continue; // the conflict resolver indicates we must leave alone 922 } 923 NFAState p = dfa.nfa.getState(c.state); 924 Transition edge = p.transition[0]; 925 Label edgeLabel = edge.label; 926 if ( edgeLabel.equals(eot) ) { 927 //System.out.println("config with EOT: "+c); 928 d.setAcceptState(true); 929 //System.out.println("d goes from "+d); 930 d.nfaConfigurations.clear(); 931 d.addNFAConfiguration(p,c.alt,c.context,c.semanticContext); 932 //System.out.println("to "+d); 933 return; // assume only one EOT transition 934 } 935 } 936 } 937 938 /** Add a new DFA state to the DFA if not already present. 939 * If the DFA state uniquely predicts a single alternative, it 940 * becomes a stop state; don't add to work list. Further, if 941 * there exists an NFA state predicted by > 1 different alternatives 942 * and with the same syn and sem context, the DFA is nondeterministic for 943 * at least one input sequence reaching that NFA state. 944 */ addDFAStateToWorkList(DFAState d)945 protected DFAState addDFAStateToWorkList(DFAState d) { 946 DFAState existingState = dfa.addState(d); 947 if ( d != existingState ) { 948 // already there...use/return the existing DFA state. 949 // But also set the states[d.stateNumber] to the existing 950 // DFA state because the closureIsBusy must report 951 // infinite recursion on a state before it knows 952 // whether or not the state will already be 953 // found after closure on it finishes. It could be 954 // referring to a state that will ultimately not make it 955 // into the reachable state space and the error 956 // reporting must be able to compute the path from 957 // start to the error state with infinite recursion 958 dfa.setState(d.stateNumber, existingState); 959 return existingState; 960 } 961 962 // if not there, then examine new state. 963 964 // resolve syntactic conflicts by choosing a single alt or 965 // by using semantic predicates if present. 966 resolveNonDeterminisms(d); 967 968 // If deterministic, don't add this state; it's an accept state 969 // Just return as a valid DFA state 970 int alt = d.getUniquelyPredictedAlt(); 971 if ( alt!=NFA.INVALID_ALT_NUMBER ) { // uniquely predicts an alt? 972 d = convertToAcceptState(d, alt); 973 /* 974 System.out.println("convert to accept; DFA "+d.dfa.decisionNumber+" state "+d.stateNumber+" uniquely predicts alt "+ 975 d.getUniquelyPredictedAlt()); 976 */ 977 } 978 else { 979 // unresolved, add to work list to continue NFA conversion 980 work.add(d); 981 } 982 return d; 983 } 984 convertToAcceptState(DFAState d, int alt)985 protected DFAState convertToAcceptState(DFAState d, int alt) { 986 // only merge stop states if they are deterministic and no 987 // recursion problems and only if they have the same gated pred 988 // context! 989 // Later, the error reporting may want to trace the path from 990 // the start state to the nondet state 991 if ( DFAOptimizer.MERGE_STOP_STATES && 992 d.getNonDeterministicAlts()==null && 993 !d.abortedDueToRecursionOverflow && 994 !d.abortedDueToMultipleRecursiveAlts ) 995 { 996 // check to see if we already have an accept state for this alt 997 // [must do this after we resolve nondeterminisms in general] 998 DFAState acceptStateForAlt = dfa.getAcceptState(alt); 999 if ( acceptStateForAlt!=null ) { 1000 // we already have an accept state for alt; 1001 // Are their gate sem pred contexts the same? 1002 // For now we assume a braindead version: both must not 1003 // have gated preds or share exactly same single gated pred. 1004 // The equals() method is only defined on Predicate contexts not 1005 // OR etc... 1006 SemanticContext gatedPreds = d.getGatedPredicatesInNFAConfigurations(); 1007 SemanticContext existingStateGatedPreds = 1008 acceptStateForAlt.getGatedPredicatesInNFAConfigurations(); 1009 if ( (gatedPreds==null && existingStateGatedPreds==null) || 1010 ((gatedPreds!=null && existingStateGatedPreds!=null) && 1011 gatedPreds.equals(existingStateGatedPreds)) ) 1012 { 1013 // make this d.statenumber point at old DFA state 1014 dfa.setState(d.stateNumber, acceptStateForAlt); 1015 dfa.removeState(d); // remove this state from unique DFA state set 1016 d = acceptStateForAlt; // use old accept state; throw this one out 1017 return d; 1018 } 1019 // else consider it a new accept state; fall through. 1020 } 1021 } 1022 d.setAcceptState(true); // new accept state for alt 1023 dfa.setAcceptState(alt, d); 1024 return d; 1025 } 1026 1027 /** If > 1 NFA configurations within this DFA state have identical 1028 * NFA state and context, but differ in their predicted 1029 * TODO update for new context suffix stuff 3-9-2005 1030 * alternative then a single input sequence predicts multiple alts. 1031 * The NFA decision is therefore syntactically indistinguishable 1032 * from the left edge upon at least one input sequence. We may 1033 * terminate the NFA to DFA conversion for these paths since no 1034 * paths emanating from those NFA states can possibly separate 1035 * these conjoined twins once interwined to make things 1036 * deterministic (unless there are semantic predicates; see below). 1037 * 1038 * Upon a nondeterministic set of NFA configurations, we should 1039 * report a problem to the grammar designer and resolve the issue 1040 * by aribitrarily picking the first alternative (this usually 1041 * ends up producing the most natural behavior). Pick the lowest 1042 * alt number and just turn off all NFA configurations 1043 * associated with the other alts. Rather than remove conflicting 1044 * NFA configurations, I set the "resolved" bit so that future 1045 * computations will ignore them. In this way, we maintain the 1046 * complete DFA state with all its configurations, but prevent 1047 * future DFA conversion operations from pursuing undesirable 1048 * paths. Remember that we want to terminate DFA conversion as 1049 * soon as we know the decision is deterministic *or* 1050 * nondeterministic. 1051 * 1052 * [BTW, I have convinced myself that there can be at most one 1053 * set of nondeterministic configurations in a DFA state. Only NFA 1054 * configurations arising from the same input sequence can appear 1055 * in a DFA state. There is no way to have another complete set 1056 * of nondeterministic NFA configurations without another input 1057 * sequence, which would reach a different DFA state. Therefore, 1058 * the two nondeterministic NFA configuration sets cannot collide 1059 * in the same DFA state.] 1060 * 1061 * Consider DFA state {(s|1),(s|2),(s|3),(t|3),(v|4)} where (s|a) 1062 * is state 's' and alternative 'a'. Here, configuration set 1063 * {(s|1),(s|2),(s|3)} predicts 3 different alts. Configurations 1064 * (s|2) and (s|3) are "resolved", leaving {(s|1),(t|3),(v|4)} as 1065 * items that must still be considered by the DFA conversion 1066 * algorithm in DFA.findNewDFAStatesAndAddDFATransitions(). 1067 * 1068 * Consider the following grammar where alts 1 and 2 are no 1069 * problem because of the 2nd lookahead symbol. Alts 3 and 4 are 1070 * identical and will therefore reach the rule end NFA state but 1071 * predicting 2 different alts (no amount of future lookahead 1072 * will render them deterministic/separable): 1073 * 1074 * a : A B 1075 * | A C 1076 * | A 1077 * | A 1078 * ; 1079 * 1080 * Here is a (slightly reduced) NFA of this grammar: 1081 * 1082 * (1)-A->(2)-B->(end)-EOF->(8) 1083 * | ^ 1084 * (2)-A->(3)-C----| 1085 * | ^ 1086 * (4)-A->(5)------| 1087 * | ^ 1088 * (6)-A->(7)------| 1089 * 1090 * where (n) is NFA state n. To begin DFA conversion, the start 1091 * state is created: 1092 * 1093 * {(1|1),(2|2),(4|3),(6|4)} 1094 * 1095 * Upon A, all NFA configurations lead to new NFA states yielding 1096 * new DFA state: 1097 * 1098 * {(2|1),(3|2),(5|3),(7|4),(end|3),(end|4)} 1099 * 1100 * where the configurations with state end in them are added 1101 * during the epsilon closure operation. State end predicts both 1102 * alts 3 and 4. An error is reported, the latter configuration is 1103 * flagged as resolved leaving the DFA state as: 1104 * 1105 * {(2|1),(3|2),(5|3),(7|4|resolved),(end|3),(end|4|resolved)} 1106 * 1107 * As NFA configurations are added to a DFA state during its 1108 * construction, the reachable set of labels is computed. Here 1109 * reachable is {B,C,EOF} because there is at least one NFA state 1110 * in the DFA state that can transition upon those symbols. 1111 * 1112 * The final DFA looks like: 1113 * 1114 * {(1|1),(2|2),(4|3),(6|4)} 1115 * | 1116 * v 1117 * {(2|1),(3|2),(5|3),(7|4),(end|3),(end|4)} -B-> (end|1) 1118 * | | 1119 * C ----EOF-> (8,3) 1120 * | 1121 * v 1122 * (end|2) 1123 * 1124 * Upon AB, alt 1 is predicted. Upon AC, alt 2 is predicted. 1125 * Upon A EOF, alt 3 is predicted. Alt 4 is not a viable 1126 * alternative. 1127 * 1128 * The algorithm is essentially to walk all the configurations 1129 * looking for a conflict of the form (s|i) and (s|j) for i!=j. 1130 * Use a hash table to track state+context pairs for collisions 1131 * so that we have O(n) to walk the n configurations looking for 1132 * a conflict. Upon every conflict, track the alt number so 1133 * we have a list of all nondeterministically predicted alts. Also 1134 * track the minimum alt. Next go back over the configurations, setting 1135 * the "resolved" bit for any that have an alt that is a member of 1136 * the nondeterministic set. This will effectively remove any alts 1137 * but the one we want from future consideration. 1138 * 1139 * See resolveWithSemanticPredicates() 1140 * 1141 * AMBIGUOUS TOKENS 1142 * 1143 * With keywords and ID tokens, there is an inherit ambiguity in that 1144 * "int" can be matched by ID also. Each lexer rule has an EOT 1145 * transition emanating from it which is used whenever the end of 1146 * a rule is reached and another token rule did not invoke it. EOT 1147 * is the only thing that can be seen next. If two rules are identical 1148 * like "int" and "int" then the 2nd def is unreachable and you'll get 1149 * a warning. We prevent a warning though for the keyword/ID issue as 1150 * ID is still reachable. This can be a bit weird. '+' rule then a 1151 * '+'|'+=' rule will fail to match '+' for the 2nd rule. 1152 * 1153 * If all NFA states in this DFA state are targets of EOT transitions, 1154 * (and there is more than one state plus no unique alt is predicted) 1155 * then DFA conversion will leave this state as a dead state as nothing 1156 * can be reached from this state. To resolve the ambiguity, just do 1157 * what flex and friends do: pick the first rule (alt in this case) to 1158 * win. This means you should put keywords before the ID rule. 1159 * If the DFA state has only one NFA state then there is no issue: 1160 * it uniquely predicts one alt. :) Problem 1161 * states will look like this during conversion: 1162 * 1163 * DFA 1:{9|1, 19|2, 14|3, 20|2, 23|2, 24|2, ...}-<EOT>->5:{41|3, 42|2} 1164 * 1165 * Worse, when you have two identical literal rules, you will see 3 alts 1166 * in the EOT state (one for ID and one each for the identical rules). 1167 */ resolveNonDeterminisms(DFAState d)1168 public void resolveNonDeterminisms(DFAState d) { 1169 if ( debug ) { 1170 System.out.println("resolveNonDeterminisms "+d.toString()); 1171 } 1172 boolean conflictingLexerRules = false; 1173 Set<Integer> nondeterministicAlts = d.getNonDeterministicAlts(); 1174 if ( debug && nondeterministicAlts!=null ) { 1175 System.out.println("nondet alts="+nondeterministicAlts); 1176 } 1177 1178 // CHECK FOR AMBIGUOUS EOT (if |allAlts|>1 and EOT state, resolve) 1179 // grab any config to see if EOT state; any other configs must 1180 // transition on EOT to get to this DFA state as well so all 1181 // states in d must be targets of EOT. These are the end states 1182 // created in NFAFactory.build_EOFState 1183 NFAConfiguration anyConfig = d.nfaConfigurations.get(0); 1184 NFAState anyState = dfa.nfa.getState(anyConfig.state); 1185 1186 // if d is target of EOT and more than one predicted alt 1187 // indicate that d is nondeterministic on all alts otherwise 1188 // it looks like state has no problem 1189 if ( anyState.isEOTTargetState() ) { 1190 Set<Integer> allAlts = d.getAltSet(); 1191 // is more than 1 alt predicted? 1192 if ( allAlts!=null && allAlts.size()>1 ) { 1193 nondeterministicAlts = allAlts; 1194 // track Tokens rule issues differently than other decisions 1195 if ( d.dfa.isTokensRuleDecision() ) { 1196 dfa.probe.reportLexerRuleNondeterminism(d,allAlts); 1197 //System.out.println("Tokens rule DFA state "+d+" nondeterministic"); 1198 conflictingLexerRules = true; 1199 } 1200 } 1201 } 1202 1203 // if no problems return unless we aborted work on d to avoid inf recursion 1204 if ( !d.abortedDueToRecursionOverflow && nondeterministicAlts==null ) { 1205 return; // no problems, return 1206 } 1207 1208 // if we're not a conflicting lexer rule and we didn't abort, report ambig 1209 // We should get a report for abort so don't give another 1210 if ( !d.abortedDueToRecursionOverflow && !conflictingLexerRules ) { 1211 // TODO: with k=x option set, this is called twice for same state 1212 dfa.probe.reportNondeterminism(d, nondeterministicAlts); 1213 // TODO: how to turn off when it's only the FOLLOW that is 1214 // conflicting. This used to shut off even alts i,j < n 1215 // conflict warnings. :( 1216 } 1217 1218 // ATTEMPT TO RESOLVE WITH SEMANTIC PREDICATES 1219 boolean resolved = 1220 tryToResolveWithSemanticPredicates(d, nondeterministicAlts); 1221 if ( resolved ) { 1222 if ( debug ) { 1223 System.out.println("resolved DFA state "+d.stateNumber+" with pred"); 1224 } 1225 d.resolvedWithPredicates = true; 1226 dfa.probe.reportNondeterminismResolvedWithSemanticPredicate(d); 1227 return; 1228 } 1229 1230 // RESOLVE SYNTACTIC CONFLICT BY REMOVING ALL BUT ONE ALT 1231 resolveByChoosingFirstAlt(d, nondeterministicAlts); 1232 1233 //System.out.println("state "+d.stateNumber+" resolved to alt "+winningAlt); 1234 } 1235 resolveByChoosingFirstAlt(DFAState d, Set<Integer> nondeterministicAlts)1236 protected int resolveByChoosingFirstAlt(DFAState d, Set<Integer> nondeterministicAlts) { 1237 int winningAlt; 1238 if ( dfa.isGreedy() ) { 1239 winningAlt = resolveByPickingMinAlt(d,nondeterministicAlts); 1240 } 1241 else { 1242 // If nongreedy, the exit alt shout win, but only if it's 1243 // involved in the nondeterminism! 1244 /* 1245 System.out.println("resolving exit alt for decision="+ 1246 dfa.decisionNumber+" state="+d); 1247 System.out.println("nondet="+nondeterministicAlts); 1248 System.out.println("exit alt "+exitAlt); 1249 */ 1250 int exitAlt = dfa.getNumberOfAlts(); 1251 if ( nondeterministicAlts.contains(Utils.integer(exitAlt)) ) { 1252 // if nongreedy and exit alt is one of those nondeterministic alts 1253 // predicted, resolve in favor of what follows block 1254 winningAlt = resolveByPickingExitAlt(d,nondeterministicAlts); 1255 } 1256 else { 1257 winningAlt = resolveByPickingMinAlt(d,nondeterministicAlts); 1258 } 1259 } 1260 return winningAlt; 1261 } 1262 1263 /** Turn off all configurations associated with the 1264 * set of incoming nondeterministic alts except the min alt number. 1265 * There may be many alts among the configurations but only turn off 1266 * the ones with problems (other than the min alt of course). 1267 * 1268 * If nondeterministicAlts is null then turn off all configs 'cept those 1269 * associated with the minimum alt. 1270 * 1271 * Return the min alt found. 1272 */ resolveByPickingMinAlt(DFAState d, Set<Integer> nondeterministicAlts)1273 protected int resolveByPickingMinAlt(DFAState d, Set<Integer> nondeterministicAlts) { 1274 int min; 1275 if ( nondeterministicAlts!=null ) { 1276 min = getMinAlt(nondeterministicAlts); 1277 } 1278 else { 1279 min = d.minAltInConfigurations; 1280 } 1281 1282 turnOffOtherAlts(d, min, nondeterministicAlts); 1283 1284 return min; 1285 } 1286 1287 /** Resolve state d by choosing exit alt, which is same value as the 1288 * number of alternatives. Return that exit alt. 1289 */ resolveByPickingExitAlt(DFAState d, Set<Integer> nondeterministicAlts)1290 protected int resolveByPickingExitAlt(DFAState d, Set<Integer> nondeterministicAlts) { 1291 int exitAlt = dfa.getNumberOfAlts(); 1292 turnOffOtherAlts(d, exitAlt, nondeterministicAlts); 1293 return exitAlt; 1294 } 1295 1296 /** turn off all states associated with alts other than the good one 1297 * (as long as they are one of the nondeterministic ones) 1298 */ turnOffOtherAlts(DFAState d, int min, Set<Integer> nondeterministicAlts)1299 protected static void turnOffOtherAlts(DFAState d, int min, Set<Integer> nondeterministicAlts) { 1300 int numConfigs = d.nfaConfigurations.size(); 1301 for (int i = 0; i < numConfigs; i++) { 1302 NFAConfiguration configuration = d.nfaConfigurations.get(i); 1303 if ( configuration.alt!=min ) { 1304 if ( nondeterministicAlts==null || 1305 nondeterministicAlts.contains(Utils.integer(configuration.alt)) ) 1306 { 1307 configuration.resolved = true; 1308 } 1309 } 1310 } 1311 } 1312 getMinAlt(Set<Integer> nondeterministicAlts)1313 protected static int getMinAlt(Set<Integer> nondeterministicAlts) { 1314 int min = Integer.MAX_VALUE; 1315 for (Integer altI : nondeterministicAlts) { 1316 int alt = altI; 1317 if ( alt < min ) { 1318 min = alt; 1319 } 1320 } 1321 return min; 1322 } 1323 1324 /** See if a set of nondeterministic alternatives can be disambiguated 1325 * with the semantic predicate contexts of the alternatives. 1326 * 1327 * Without semantic predicates, syntactic conflicts are resolved 1328 * by simply choosing the first viable alternative. In the 1329 * presence of semantic predicates, you can resolve the issue by 1330 * evaluating boolean expressions at run time. During analysis, 1331 * this amounts to suppressing grammar error messages to the 1332 * developer. NFA configurations are always marked as "to be 1333 * resolved with predicates" so that 1334 * DFA.findNewDFAStatesAndAddDFATransitions() will know to ignore 1335 * these configurations and add predicate transitions to the DFA 1336 * after adding token/char labels. 1337 * 1338 * During analysis, we can simply make sure that for n 1339 * ambiguously predicted alternatives there are at least n-1 1340 * unique predicate sets. The nth alternative can be predicted 1341 * with "not" the "or" of all other predicates. NFA configurations without 1342 * predicates are assumed to have the default predicate of 1343 * "true" from a user point of view. When true is combined via || with 1344 * another predicate, the predicate is a tautology and must be removed 1345 * from consideration for disambiguation: 1346 * 1347 * a : b | B ; // hoisting p1||true out of rule b, yields no predicate 1348 * b : {p1}? B | B ; 1349 * 1350 * This is done down in getPredicatesPerNonDeterministicAlt(). 1351 */ tryToResolveWithSemanticPredicates(DFAState d, Set<Integer> nondeterministicAlts)1352 protected boolean tryToResolveWithSemanticPredicates(DFAState d, 1353 Set<Integer> nondeterministicAlts) 1354 { 1355 Map<Integer, SemanticContext> altToPredMap = 1356 getPredicatesPerNonDeterministicAlt(d, nondeterministicAlts); 1357 1358 if ( altToPredMap.isEmpty() ) { 1359 return false; 1360 } 1361 1362 //System.out.println("nondeterministic alts with predicates: "+altToPredMap); 1363 dfa.probe.reportAltPredicateContext(d, altToPredMap); 1364 1365 if ( nondeterministicAlts.size()-altToPredMap.size()>1 ) { 1366 // too few predicates to resolve; just return 1367 return false; 1368 } 1369 1370 // Handle case where 1 predicate is missing 1371 // Case 1. Semantic predicates 1372 // If the missing pred is on nth alt, !(union of other preds)==true 1373 // so we can avoid that computation. If naked alt is ith, then must 1374 // test it with !(union) since semantic predicated alts are order 1375 // independent 1376 // Case 2: Syntactic predicates 1377 // The naked alt is always assumed to be true as the order of 1378 // alts is the order of precedence. The naked alt will be a tautology 1379 // anyway as it's !(union of other preds). This implies 1380 // that there is no such thing as noviable alt for synpred edges 1381 // emanating from a DFA state. 1382 if ( altToPredMap.size()==nondeterministicAlts.size()-1 ) { 1383 // if there are n-1 predicates for n nondeterministic alts, can fix 1384 org.antlr.misc.BitSet ndSet = org.antlr.misc.BitSet.of(nondeterministicAlts); 1385 org.antlr.misc.BitSet predSet = org.antlr.misc.BitSet.of(altToPredMap); 1386 int nakedAlt = ndSet.subtract(predSet).getSingleElement(); 1387 SemanticContext nakedAltPred; 1388 if ( nakedAlt == max(nondeterministicAlts) ) { 1389 // the naked alt is the last nondet alt and will be the default clause 1390 nakedAltPred = new SemanticContext.TruePredicate(); 1391 } 1392 else { 1393 // pretend naked alternative is covered with !(union other preds) 1394 // unless one of preds from other alts is a manually specified synpred 1395 // since those have precedence same as alt order. Missing synpred 1396 // is true so that alt wins (or is at least attempted). 1397 // Note: can't miss any preds on alts (can't be here) if auto backtrack 1398 // since it prefixes all. 1399 // In LL(*) paper, i'll just have algorithm emit warning about uncovered 1400 // pred 1401 SemanticContext unionOfPredicatesFromAllAlts = 1402 getUnionOfPredicates(altToPredMap); 1403 //System.out.println("all predicates "+unionOfPredicatesFromAllAlts); 1404 if ( unionOfPredicatesFromAllAlts.isSyntacticPredicate() ) { 1405 nakedAltPred = new SemanticContext.TruePredicate(); 1406 } 1407 else { 1408 nakedAltPred = 1409 SemanticContext.not(unionOfPredicatesFromAllAlts); 1410 } 1411 } 1412 1413 //System.out.println("covering naked alt="+nakedAlt+" with "+nakedAltPred); 1414 1415 altToPredMap.put(Utils.integer(nakedAlt), nakedAltPred); 1416 // set all config with alt=nakedAlt to have the computed predicate 1417 int numConfigs = d.nfaConfigurations.size(); 1418 for (int i = 0; i < numConfigs; i++) { // TODO: I don't think we need to do this; altToPredMap has it 1419 //7/27/10 theok, I removed it and it still seems to work with everything; leave in anyway just in case 1420 NFAConfiguration configuration = d.nfaConfigurations.get(i); 1421 if ( configuration.alt == nakedAlt ) { 1422 configuration.semanticContext = nakedAltPred; 1423 } 1424 } 1425 } 1426 1427 if ( altToPredMap.size()==nondeterministicAlts.size() ) { 1428 // RESOLVE CONFLICT by picking one NFA configuration for each alt 1429 // and setting its resolvedWithPredicate flag 1430 // First, prevent a recursion warning on this state due to 1431 // pred resolution 1432 if ( d.abortedDueToRecursionOverflow ) { 1433 d.dfa.probe.removeRecursiveOverflowState(d); 1434 } 1435 int numConfigs = d.nfaConfigurations.size(); 1436 //System.out.println("pred map="+altToPredMap); 1437 for (int i = 0; i < numConfigs; i++) { 1438 NFAConfiguration configuration = d.nfaConfigurations.get(i); 1439 SemanticContext semCtx = altToPredMap.get(Utils.integer(configuration.alt)); 1440 if ( semCtx!=null ) { 1441 // resolve (first found) with pred 1442 // and remove alt from problem list 1443 //System.out.println("c="+configuration); 1444 configuration.resolveWithPredicate = true; 1445 // altToPredMap has preds from all alts; store into "annointed" config 1446 configuration.semanticContext = semCtx; // reset to combined 1447 altToPredMap.remove(Utils.integer(configuration.alt)); 1448 1449 // notify grammar that we've used the preds contained in semCtx 1450 if ( semCtx.isSyntacticPredicate() ) { 1451 dfa.nfa.grammar.synPredUsedInDFA(dfa, semCtx); 1452 } 1453 } 1454 else if ( nondeterministicAlts.contains(Utils.integer(configuration.alt)) ) { 1455 // resolve all configurations for nondeterministic alts 1456 // for which there is no predicate context by turning it off 1457 configuration.resolved = true; 1458 } 1459 } 1460 return true; 1461 } 1462 1463 return false; // couldn't fix the problem with predicates 1464 } 1465 1466 /** Return a mapping from nondeterministc alt to combined list of predicates. 1467 * If both (s|i|semCtx1) and (t|i|semCtx2) exist, then the proper predicate 1468 * for alt i is semCtx1||semCtx2 because you have arrived at this single 1469 * DFA state via two NFA paths, both of which have semantic predicates. 1470 * We ignore deterministic alts because syntax alone is sufficient 1471 * to predict those. Do not include their predicates. 1472 * 1473 * Alts with no predicate are assumed to have {true}? pred. 1474 * 1475 * When combining via || with "true", all predicates are removed from 1476 * consideration since the expression will always be true and hence 1477 * not tell us how to resolve anything. So, if any NFA configuration 1478 * in this DFA state does not have a semantic context, the alt cannot 1479 * be resolved with a predicate. 1480 * 1481 * If nonnull, incidentEdgeLabel tells us what NFA transition label 1482 * we did a reach on to compute state d. d may have insufficient 1483 * preds, so we really want this for the error message. 1484 */ getPredicatesPerNonDeterministicAlt(DFAState d, Set<Integer> nondeterministicAlts)1485 protected Map<Integer, SemanticContext> getPredicatesPerNonDeterministicAlt(DFAState d, 1486 Set<Integer> nondeterministicAlts) 1487 { 1488 // map alt to combined SemanticContext 1489 Map<Integer, SemanticContext> altToPredicateContextMap = 1490 new HashMap<Integer, SemanticContext>(); 1491 // init the alt to predicate set map 1492 Map<Integer, OrderedHashSet<SemanticContext>> altToSetOfContextsMap = 1493 new HashMap<Integer, OrderedHashSet<SemanticContext>>(); 1494 for (Integer altI : nondeterministicAlts) { 1495 altToSetOfContextsMap.put(altI, new OrderedHashSet<SemanticContext>()); 1496 } 1497 1498 /* 1499 List<Label> sampleInputLabels = d.dfa.probe.getSampleNonDeterministicInputSequence(d); 1500 String input = d.dfa.probe.getInputSequenceDisplay(sampleInputLabels); 1501 System.out.println("sample input: "+input); 1502 */ 1503 1504 // for each configuration, create a unique set of predicates 1505 // Also, track the alts with at least one uncovered configuration 1506 // (one w/o a predicate); tracks tautologies like p1||true 1507 Map<Integer, Set<Token>> altToLocationsReachableWithoutPredicate = new HashMap<Integer, Set<Token>>(); 1508 Set<Integer> nondetAltsWithUncoveredConfiguration = new HashSet<Integer>(); 1509 //System.out.println("configs="+d.nfaConfigurations); 1510 //System.out.println("configs with preds?"+d.atLeastOneConfigurationHasAPredicate); 1511 //System.out.println("configs with preds="+d.configurationsWithPredicateEdges); 1512 int numConfigs = d.nfaConfigurations.size(); 1513 for (int i = 0; i < numConfigs; i++) { 1514 NFAConfiguration configuration = d.nfaConfigurations.get(i); 1515 Integer altI = Utils.integer(configuration.alt); 1516 // if alt is nondeterministic, combine its predicates 1517 if ( nondeterministicAlts.contains(altI) ) { 1518 // if there is a predicate for this NFA configuration, OR in 1519 if ( configuration.semanticContext != 1520 SemanticContext.EMPTY_SEMANTIC_CONTEXT ) 1521 { 1522 Set<SemanticContext> predSet = altToSetOfContextsMap.get(altI); 1523 predSet.add(configuration.semanticContext); 1524 } 1525 else { 1526 // if no predicate, but it's part of nondeterministic alt 1527 // then at least one path exists not covered by a predicate. 1528 // must remove predicate for this alt; track incomplete alts 1529 nondetAltsWithUncoveredConfiguration.add(altI); 1530 /* 1531 NFAState s = dfa.nfa.getState(configuration.state); 1532 System.out.println("###\ndec "+dfa.decisionNumber+" alt "+configuration.alt+ 1533 " enclosing rule for nfa state not covered "+ 1534 s.enclosingRule); 1535 if ( s.associatedASTNode!=null ) { 1536 System.out.println("token="+s.associatedASTNode.token); 1537 } 1538 System.out.println("nfa state="+s); 1539 1540 if ( s.incidentEdgeLabel!=null && Label.intersect(incidentEdgeLabel, s.incidentEdgeLabel) ) { 1541 Set<Token> locations = altToLocationsReachableWithoutPredicate.get(altI); 1542 if ( locations==null ) { 1543 locations = new HashSet<Token>(); 1544 altToLocationsReachableWithoutPredicate.put(altI, locations); 1545 } 1546 locations.add(s.associatedASTNode.token); 1547 } 1548 */ 1549 } 1550 } 1551 } 1552 1553 // For each alt, OR together all unique predicates associated with 1554 // all configurations 1555 // Also, track the list of incompletely covered alts: those alts 1556 // with at least 1 predicate and at least one configuration w/o a 1557 // predicate. We want this in order to report to the decision probe. 1558 List<Integer> incompletelyCoveredAlts = new ArrayList<Integer>(); 1559 for (Integer altI : nondeterministicAlts) { 1560 Set<SemanticContext> contextsForThisAlt = altToSetOfContextsMap.get(altI); 1561 if ( nondetAltsWithUncoveredConfiguration.contains(altI) ) { // >= 1 config has no ctx 1562 if ( contextsForThisAlt.size()>0 ) { // && at least one pred 1563 incompletelyCoveredAlts.add(altI); // this alt incompleted covered 1564 } 1565 continue; // don't include at least 1 config has no ctx 1566 } 1567 SemanticContext combinedContext = null; 1568 for (SemanticContext ctx : contextsForThisAlt) { 1569 combinedContext = 1570 SemanticContext.or(combinedContext,ctx); 1571 } 1572 altToPredicateContextMap.put(altI, combinedContext); 1573 } 1574 1575 if ( incompletelyCoveredAlts.size()>0 ) { 1576 /* 1577 System.out.println("prob in dec "+dfa.decisionNumber+" state="+d); 1578 FASerializer serializer = new FASerializer(dfa.nfa.grammar); 1579 String result = serializer.serialize(dfa.startState); 1580 System.out.println("dfa: "+result); 1581 System.out.println("incomplete alts: "+incompletelyCoveredAlts); 1582 System.out.println("nondet="+nondeterministicAlts); 1583 System.out.println("nondetAltsWithUncoveredConfiguration="+ nondetAltsWithUncoveredConfiguration); 1584 System.out.println("altToCtxMap="+altToSetOfContextsMap); 1585 System.out.println("altToPredicateContextMap="+altToPredicateContextMap); 1586 */ 1587 for (int i = 0; i < numConfigs; i++) { 1588 NFAConfiguration configuration = d.nfaConfigurations.get(i); 1589 Integer altI = Utils.integer(configuration.alt); 1590 if ( incompletelyCoveredAlts.contains(altI) && 1591 configuration.semanticContext == SemanticContext.EMPTY_SEMANTIC_CONTEXT ) 1592 { 1593 NFAState s = dfa.nfa.getState(configuration.state); 1594 /* 1595 System.out.print("nondet config w/o context "+configuration+ 1596 " incident "+(s.incidentEdgeLabel!=null?s.incidentEdgeLabel.toString(dfa.nfa.grammar):null)); 1597 if ( s.associatedASTNode!=null ) { 1598 System.out.print(" token="+s.associatedASTNode.token); 1599 } 1600 else System.out.println(); 1601 */ 1602 // We want to report getting to an NFA state with an 1603 // incoming label, unless it's EOF, w/o a predicate. 1604 if ( s.incidentEdgeLabel!=null && s.incidentEdgeLabel.label != Label.EOF ) { 1605 if ( s.associatedASTNode==null || s.associatedASTNode.token==null ) { 1606 ErrorManager.internalError("no AST/token for nonepsilon target w/o predicate"); 1607 } 1608 else { 1609 Set<Token> locations = altToLocationsReachableWithoutPredicate.get(altI); 1610 if ( locations==null ) { 1611 locations = new HashSet<Token>(); 1612 altToLocationsReachableWithoutPredicate.put(altI, locations); 1613 } 1614 locations.add(s.associatedASTNode.token); 1615 } 1616 } 1617 } 1618 } 1619 dfa.probe.reportIncompletelyCoveredAlts(d, 1620 altToLocationsReachableWithoutPredicate); 1621 } 1622 1623 return altToPredicateContextMap; 1624 } 1625 1626 /** OR together all predicates from the alts. Note that the predicate 1627 * for an alt could itself be a combination of predicates. 1628 */ getUnionOfPredicates(Map<?, SemanticContext> altToPredMap)1629 protected static SemanticContext getUnionOfPredicates(Map<?, SemanticContext> altToPredMap) { 1630 Iterator<SemanticContext> iter; 1631 SemanticContext unionOfPredicatesFromAllAlts = null; 1632 iter = altToPredMap.values().iterator(); 1633 while ( iter.hasNext() ) { 1634 SemanticContext semCtx = iter.next(); 1635 if ( unionOfPredicatesFromAllAlts==null ) { 1636 unionOfPredicatesFromAllAlts = semCtx; 1637 } 1638 else { 1639 unionOfPredicatesFromAllAlts = 1640 SemanticContext.or(unionOfPredicatesFromAllAlts,semCtx); 1641 } 1642 } 1643 return unionOfPredicatesFromAllAlts; 1644 } 1645 1646 /** for each NFA config in d, look for "predicate required" sign set 1647 * during nondeterminism resolution. 1648 * 1649 * Add the predicate edges sorted by the alternative number; I'm fairly 1650 * sure that I could walk the configs backwards so they are added to 1651 * the predDFATarget in the right order, but it's best to make sure. 1652 * Predicates succeed in the order they are specifed. Alt i wins 1653 * over alt i+1 if both predicates are true. 1654 */ addPredicateTransitions(DFAState d)1655 protected void addPredicateTransitions(DFAState d) { 1656 List<NFAConfiguration> configsWithPreds = new ArrayList<NFAConfiguration>(); 1657 // get a list of all configs with predicates 1658 int numConfigs = d.nfaConfigurations.size(); 1659 for (int i = 0; i < numConfigs; i++) { 1660 NFAConfiguration c = d.nfaConfigurations.get(i); 1661 if ( c.resolveWithPredicate ) { 1662 configsWithPreds.add(c); 1663 } 1664 } 1665 // Sort ascending according to alt; alt i has higher precedence than i+1 1666 Collections.sort(configsWithPreds, 1667 new Comparator<NFAConfiguration>() { 1668 @Override 1669 public int compare(NFAConfiguration a, NFAConfiguration b) { 1670 if ( a.alt < b.alt ) return -1; 1671 else if ( a.alt > b.alt ) return 1; 1672 return 0; 1673 } 1674 }); 1675 List<NFAConfiguration> predConfigsSortedByAlt = configsWithPreds; 1676 // Now, we can add edges emanating from d for these preds in right order 1677 for (int i = 0; i < predConfigsSortedByAlt.size(); i++) { 1678 NFAConfiguration c = predConfigsSortedByAlt.get(i); 1679 DFAState predDFATarget = d.dfa.getAcceptState(c.alt); 1680 if ( predDFATarget==null ) { 1681 predDFATarget = dfa.newState(); // create if not there. 1682 // create a new DFA state that is a target of the predicate from d 1683 predDFATarget.addNFAConfiguration(dfa.nfa.getState(c.state), 1684 c.alt, 1685 c.context, 1686 c.semanticContext); 1687 predDFATarget.setAcceptState(true); 1688 dfa.setAcceptState(c.alt, predDFATarget); 1689 DFAState existingState = dfa.addState(predDFATarget); 1690 if ( predDFATarget != existingState ) { 1691 // already there...use/return the existing DFA state that 1692 // is a target of this predicate. Make this state number 1693 // point at the existing state 1694 dfa.setState(predDFATarget.stateNumber, existingState); 1695 predDFATarget = existingState; 1696 } 1697 } 1698 // add a transition to pred target from d 1699 d.addTransition(predDFATarget, new PredicateLabel(c.semanticContext)); 1700 } 1701 } 1702 initContextTrees(int numberOfAlts)1703 protected void initContextTrees(int numberOfAlts) { 1704 contextTrees = new NFAContext[numberOfAlts]; 1705 for (int i = 0; i < contextTrees.length; i++) { 1706 int alt = i+1; 1707 // add a dummy root node so that an NFA configuration can 1708 // always point at an NFAContext. If a context refers to this 1709 // node then it implies there is no call stack for 1710 // that configuration 1711 contextTrees[i] = new NFAContext(null, null); 1712 } 1713 } 1714 max(Set<Integer> s)1715 public static int max(Set<Integer> s) { 1716 if ( s==null ) { 1717 return Integer.MIN_VALUE; 1718 } 1719 int i = 0; 1720 int m = 0; 1721 for (Integer value : s) { 1722 i++; 1723 Integer I = value; 1724 if ( i==1 ) { // init m with first value 1725 m = I; 1726 continue; 1727 } 1728 if ( I>m ) { 1729 m = I; 1730 } 1731 } 1732 return m; 1733 } 1734 } 1735