1# begin[licence]
2#
3#  [The "BSD licence"]
4#  Copyright (c) 2005-2012 Terence Parr
5#  All rights reserved.
6
7#  Redistribution and use in source and binary forms, with or without
8#  modification, are permitted provided that the following conditions
9#  are met:
10#  1. Redistributions of source code must retain the above copyright
11#     notice, this list of conditions and the following disclaimer.
12#  2. Redistributions in binary form must reproduce the above copyright
13#     notice, this list of conditions and the following disclaimer in the
14#     documentation and/or other materials provided with the distribution.
15#  3. The name of the author may not be used to endorse or promote products
16#     derived from this software without specific prior written permission.
17
18#  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19#  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20#  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21#  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22#  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23#  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27#  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28#
29# end[licence]
30
31import socket
32import sys
33from .constants import INVALID_TOKEN_TYPE
34from .exceptions import RecognitionException
35from .recognizers import Parser
36from .streams import TokenStream
37from .tokens import Token
38from .tree import CommonTreeAdaptor, TreeAdaptor, Tree
39
40class DebugParser(Parser):
41    def __init__(self, stream, state=None, dbg=None, *args, **kwargs):
42        # wrap token stream in DebugTokenStream (unless user already did so).
43        if not isinstance(stream, DebugTokenStream):
44            stream = DebugTokenStream(stream, dbg)
45
46        super().__init__(stream, state, *args, **kwargs)
47
48        # Who to notify when events in the parser occur.
49        self._dbg = None
50
51        self.setDebugListener(dbg)
52
53
54    def setDebugListener(self, dbg):
55        """Provide a new debug event listener for this parser.  Notify the
56        input stream too that it should send events to this listener.
57        """
58
59        if hasattr(self.input, 'dbg'):
60            self.input.dbg = dbg
61
62        self._dbg = dbg
63
64    def getDebugListener(self):
65        return self._dbg
66
67    dbg = property(getDebugListener, setDebugListener)
68
69
70    def beginResync(self):
71        self._dbg.beginResync()
72
73
74    def endResync(self):
75        self._dbg.endResync()
76
77
78    def beginBacktrack(self, level):
79        self._dbg.beginBacktrack(level)
80
81
82    def endBacktrack(self, level, successful):
83        self._dbg.endBacktrack(level, successful)
84
85
86    def reportError(self, exc):
87        Parser.reportError(self, exc)
88
89        if isinstance(exc, RecognitionException):
90            self._dbg.recognitionException(exc)
91
92
93class DebugTokenStream(TokenStream):
94    def __init__(self, input, dbg=None):
95        super().__init__()
96        self.input = input
97        self.initialStreamState = True
98        # Track the last mark() call result value for use in rewind().
99        self.lastMarker = None
100
101        self._dbg = None
102        self.setDebugListener(dbg)
103
104        # force TokenStream to get at least first valid token
105        # so we know if there are any hidden tokens first in the stream
106        self.input.LT(1)
107
108
109    def getDebugListener(self):
110        return self._dbg
111
112    def setDebugListener(self, dbg):
113        self._dbg = dbg
114
115    dbg = property(getDebugListener, setDebugListener)
116
117
118    def consume(self):
119        if self.initialStreamState:
120            self.consumeInitialHiddenTokens()
121
122        a = self.input.index()
123        t = self.input.LT(1)
124        self.input.consume()
125        b = self.input.index()
126        self._dbg.consumeToken(t)
127
128        if b > a + 1:
129            # then we consumed more than one token; must be off channel tokens
130            for idx in range(a + 1, b):
131                self._dbg.consumeHiddenToken(self.input.get(idx))
132
133
134    def consumeInitialHiddenTokens(self):
135        """consume all initial off-channel tokens"""
136
137        firstOnChannelTokenIndex = self.input.index()
138        for idx in range(firstOnChannelTokenIndex):
139            self._dbg.consumeHiddenToken(self.input.get(idx))
140
141        self.initialStreamState = False
142
143
144    def LT(self, i):
145        if self.initialStreamState:
146            self.consumeInitialHiddenTokens()
147
148        t = self.input.LT(i)
149        self._dbg.LT(i, t)
150        return t
151
152
153    def LA(self, i):
154        if self.initialStreamState:
155            self.consumeInitialHiddenTokens()
156
157        t = self.input.LT(i)
158        self._dbg.LT(i, t)
159        return t.type
160
161
162    def get(self, i):
163        return self.input.get(i)
164
165
166    def index(self):
167        return self.input.index()
168
169
170    def mark(self):
171        self.lastMarker = self.input.mark()
172        self._dbg.mark(self.lastMarker)
173        return self.lastMarker
174
175
176    def rewind(self, marker=None):
177        self._dbg.rewind(marker)
178        self.input.rewind(marker)
179
180
181    def release(self, marker):
182        pass
183
184
185    def seek(self, index):
186        # TODO: implement seek in dbg interface
187        # self._dbg.seek(index);
188        self.input.seek(index)
189
190
191    def size(self):
192        return self.input.size()
193
194
195    def getTokenSource(self):
196        return self.input.getTokenSource()
197
198
199    def getSourceName(self):
200        return self.getTokenSource().getSourceName()
201
202
203    def toString(self, start=None, stop=None):
204        return self.input.toString(start, stop)
205
206
207class DebugTreeAdaptor(TreeAdaptor):
208    """A TreeAdaptor proxy that fires debugging events to a DebugEventListener
209    delegate and uses the TreeAdaptor delegate to do the actual work.  All
210    AST events are triggered by this adaptor; no code gen changes are needed
211    in generated rules.  Debugging events are triggered *after* invoking
212    tree adaptor routines.
213
214    Trees created with actions in rewrite actions like "-> ^(ADD {foo} {bar})"
215    cannot be tracked as they might not use the adaptor to create foo, bar.
216    The debug listener has to deal with tree node IDs for which it did
217    not see a createNode event.  A single <unknown> node is sufficient even
218    if it represents a whole tree.
219    """
220
221    def __init__(self, dbg, adaptor):
222        super().__init__()
223        self.dbg = dbg
224        self.adaptor = adaptor
225
226
227    def createWithPayload(self, payload):
228        if payload.index < 0:
229            # could be token conjured up during error recovery
230            return self.createFromType(payload.type, payload.text)
231
232        node = self.adaptor.createWithPayload(payload)
233        self.dbg.createNode(node, payload)
234        return node
235
236    def createFromToken(self, tokenType, fromToken, text=None):
237        node = self.adaptor.createFromToken(tokenType, fromToken, text)
238        self.dbg.createNode(node)
239        return node
240
241    def createFromType(self, tokenType, text):
242        node = self.adaptor.createFromType(tokenType, text)
243        self.dbg.createNode(node)
244        return node
245
246
247    def errorNode(self, input, start, stop, exc):
248        node = self.adaptor.errorNode(input, start, stop, exc)
249        if node is not None:
250            self.dbg.errorNode(node)
251
252        return node
253
254
255    def dupTree(self, tree):
256        t = self.adaptor.dupTree(tree)
257        # walk the tree and emit create and add child events
258        # to simulate what dupTree has done. dupTree does not call this debug
259        # adapter so I must simulate.
260        self.simulateTreeConstruction(t)
261        return t
262
263
264    def simulateTreeConstruction(self, t):
265        """^(A B C): emit create A, create B, add child, ..."""
266        self.dbg.createNode(t)
267        for i in range(self.adaptor.getChildCount(t)):
268            child = self.adaptor.getChild(t, i)
269            self.simulateTreeConstruction(child)
270            self.dbg.addChild(t, child)
271
272
273    def dupNode(self, treeNode):
274        d = self.adaptor.dupNode(treeNode)
275        self.dbg.createNode(d)
276        return d
277
278
279    def nil(self):
280        node = self.adaptor.nil()
281        self.dbg.nilNode(node)
282        return node
283
284
285    def isNil(self, tree):
286        return self.adaptor.isNil(tree)
287
288
289    def addChild(self, t, child):
290        if isinstance(child, Token):
291            n = self.createWithPayload(child)
292            self.addChild(t, n)
293
294        else:
295            if t is None or child is None:
296                return
297
298            self.adaptor.addChild(t, child)
299            self.dbg.addChild(t, child)
300
301    def becomeRoot(self, newRoot, oldRoot):
302        if isinstance(newRoot, Token):
303            n = self.createWithPayload(newRoot)
304            self.adaptor.becomeRoot(n, oldRoot)
305        else:
306            n = self.adaptor.becomeRoot(newRoot, oldRoot)
307
308        self.dbg.becomeRoot(newRoot, oldRoot)
309        return n
310
311
312    def rulePostProcessing(self, root):
313        return self.adaptor.rulePostProcessing(root)
314
315
316    def getType(self, t):
317        return self.adaptor.getType(t)
318
319
320    def setType(self, t, type):
321        self.adaptor.setType(t, type)
322
323
324    def getText(self, t):
325        return self.adaptor.getText(t)
326
327
328    def setText(self, t, text):
329        self.adaptor.setText(t, text)
330
331
332    def getToken(self, t):
333        return self.adaptor.getToken(t)
334
335
336    def setTokenBoundaries(self, t, startToken, stopToken):
337        self.adaptor.setTokenBoundaries(t, startToken, stopToken)
338        if t and startToken and stopToken:
339            self.dbg.setTokenBoundaries(
340                t, startToken.index, stopToken.index)
341
342
343    def getTokenStartIndex(self, t):
344        return self.adaptor.getTokenStartIndex(t)
345
346
347    def getTokenStopIndex(self, t):
348        return self.adaptor.getTokenStopIndex(t)
349
350
351    def getChild(self, t, i):
352        return self.adaptor.getChild(t, i)
353
354
355    def setChild(self, t, i, child):
356        self.adaptor.setChild(t, i, child)
357
358
359    def deleteChild(self, t, i):
360        return self.adaptor.deleteChild(t, i)
361
362
363    def getChildCount(self, t):
364        return self.adaptor.getChildCount(t)
365
366
367    def getUniqueID(self, node):
368        return self.adaptor.getUniqueID(node)
369
370
371    def getParent(self, t):
372        return self.adaptor.getParent(t)
373
374
375    def getChildIndex(self, t):
376        return self.adaptor.getChildIndex(t)
377
378
379    def setParent(self, t, parent):
380        self.adaptor.setParent(t, parent)
381
382
383    def setChildIndex(self, t, index):
384        self.adaptor.setChildIndex(t, index)
385
386
387    def replaceChildren(self, parent, startChildIndex, stopChildIndex, t):
388        self.adaptor.replaceChildren(parent, startChildIndex, stopChildIndex, t)
389
390
391    ## support
392
393    def getDebugListener(self):
394        return self.dbg
395
396    def setDebugListener(self, dbg):
397        self.dbg = dbg
398
399
400    def getTreeAdaptor(self):
401        return self.adaptor
402
403
404
405class DebugEventListener(object):
406    """All debugging events that a recognizer can trigger.
407
408    I did not create a separate AST debugging interface as it would create
409    lots of extra classes and DebugParser has a dbg var defined, which makes
410    it hard to change to ASTDebugEventListener.  I looked hard at this issue
411    and it is easier to understand as one monolithic event interface for all
412    possible events.  Hopefully, adding ST debugging stuff won't be bad.  Leave
413    for future. 4/26/2006.
414    """
415
416    # Moved to version 2 for v3.1: added grammar name to enter/exit Rule
417    PROTOCOL_VERSION = "2"
418
419    def enterRule(self, grammarFileName, ruleName):
420        """The parser has just entered a rule. No decision has been made about
421        which alt is predicted.  This is fired AFTER init actions have been
422        executed.  Attributes are defined and available etc...
423        The grammarFileName allows composite grammars to jump around among
424        multiple grammar files.
425        """
426
427        pass
428
429
430    def enterAlt(self, alt):
431        """Because rules can have lots of alternatives, it is very useful to
432        know which alt you are entering.  This is 1..n for n alts.
433        """
434        pass
435
436
437    def exitRule(self, grammarFileName, ruleName):
438        """This is the last thing executed before leaving a rule.  It is
439        executed even if an exception is thrown.  This is triggered after
440        error reporting and recovery have occurred (unless the exception is
441        not caught in this rule).  This implies an "exitAlt" event.
442        The grammarFileName allows composite grammars to jump around among
443        multiple grammar files.
444        """
445        pass
446
447
448    def enterSubRule(self, decisionNumber):
449        """Track entry into any (...) subrule other EBNF construct"""
450        pass
451
452
453    def exitSubRule(self, decisionNumber):
454        pass
455
456
457    def enterDecision(self, decisionNumber, couldBacktrack):
458        """Every decision, fixed k or arbitrary, has an enter/exit event
459        so that a GUI can easily track what LT/consume events are
460        associated with prediction.  You will see a single enter/exit
461        subrule but multiple enter/exit decision events, one for each
462        loop iteration.
463        """
464        pass
465
466
467    def exitDecision(self, decisionNumber):
468        pass
469
470
471    def consumeToken(self, t):
472        """An input token was consumed; matched by any kind of element.
473        Trigger after the token was matched by things like match(), matchAny().
474        """
475        pass
476
477
478    def consumeHiddenToken(self, t):
479        """An off-channel input token was consumed.
480        Trigger after the token was matched by things like match(), matchAny().
481        (unless of course the hidden token is first stuff in the input stream).
482        """
483        pass
484
485
486    def LT(self, i, t):
487        """Somebody (anybody) looked ahead.  Note that this actually gets
488        triggered by both LA and LT calls.  The debugger will want to know
489        which Token object was examined.  Like consumeToken, this indicates
490        what token was seen at that depth.  A remote debugger cannot look
491        ahead into a file it doesn't have so LT events must pass the token
492        even if the info is redundant.
493        For tree parsers, if the type is UP or DOWN,
494        then the ID is not really meaningful as it's fixed--there is
495        just one UP node and one DOWN navigation node.
496        """
497        pass
498
499
500    def mark(self, marker):
501        """The parser is going to look arbitrarily ahead; mark this location,
502        the token stream's marker is sent in case you need it.
503        """
504        pass
505
506
507    def rewind(self, marker=None):
508        """After an arbitrairly long lookahead as with a cyclic DFA (or with
509        any backtrack), this informs the debugger that stream should be
510        rewound to the position associated with marker.
511
512        """
513        pass
514
515
516    def beginBacktrack(self, level):
517        pass
518
519
520    def endBacktrack(self, level, successful):
521        pass
522
523
524    def location(self, line, pos):
525        """To watch a parser move through the grammar, the parser needs to
526        inform the debugger what line/charPos it is passing in the grammar.
527        For now, this does not know how to switch from one grammar to the
528        other and back for island grammars etc...
529
530        This should also allow breakpoints because the debugger can stop
531        the parser whenever it hits this line/pos.
532        """
533        pass
534
535
536    def recognitionException(self, e):
537        """A recognition exception occurred such as NoViableAltException.  I made
538        this a generic event so that I can alter the exception hierachy later
539        without having to alter all the debug objects.
540
541        Upon error, the stack of enter rule/subrule must be properly unwound.
542        If no viable alt occurs it is within an enter/exit decision, which
543        also must be rewound.  Even the rewind for each mark must be unwount.
544        In the Java target this is pretty easy using try/finally, if a bit
545        ugly in the generated code.  The rewind is generated in DFA.predict()
546        actually so no code needs to be generated for that.  For languages
547        w/o this "finally" feature (C++?), the target implementor will have
548        to build an event stack or something.
549
550        Across a socket for remote debugging, only the RecognitionException
551        data fields are transmitted.  The token object or whatever that
552        caused the problem was the last object referenced by LT.  The
553        immediately preceding LT event should hold the unexpected Token or
554        char.
555
556        Here is a sample event trace for grammar:
557
558        b : C ({;}A|B) // {;} is there to prevent A|B becoming a set
559          | D
560          ;
561
562        The sequence for this rule (with no viable alt in the subrule) for
563        input 'c c' (there are 3 tokens) is:
564
565                commence
566                LT(1)
567                enterRule b
568                location 7 1
569                enter decision 3
570                LT(1)
571                exit decision 3
572                enterAlt1
573                location 7 5
574                LT(1)
575                consumeToken [c/<4>,1:0]
576                location 7 7
577                enterSubRule 2
578                enter decision 2
579                LT(1)
580                LT(1)
581                recognitionException NoViableAltException 2 1 2
582                exit decision 2
583                exitSubRule 2
584                beginResync
585                LT(1)
586                consumeToken [c/<4>,1:1]
587                LT(1)
588                endResync
589                LT(-1)
590                exitRule b
591                terminate
592        """
593        pass
594
595
596    def beginResync(self):
597        """Indicates the recognizer is about to consume tokens to resynchronize
598        the parser.  Any consume events from here until the recovered event
599        are not part of the parse--they are dead tokens.
600        """
601        pass
602
603
604    def endResync(self):
605        """Indicates that the recognizer has finished consuming tokens in order
606        to resychronize.  There may be multiple beginResync/endResync pairs
607        before the recognizer comes out of errorRecovery mode (in which
608        multiple errors are suppressed).  This will be useful
609        in a gui where you want to probably grey out tokens that are consumed
610        but not matched to anything in grammar.  Anything between
611        a beginResync/endResync pair was tossed out by the parser.
612        """
613        pass
614
615
616    def semanticPredicate(self, result, predicate):
617        """A semantic predicate was evaluate with this result and action text"""
618        pass
619
620
621    def commence(self):
622        """Announce that parsing has begun.  Not technically useful except for
623        sending events over a socket.  A GUI for example will launch a thread
624        to connect and communicate with a remote parser.  The thread will want
625        to notify the GUI when a connection is made.  ANTLR parsers
626        trigger this upon entry to the first rule (the ruleLevel is used to
627        figure this out).
628        """
629        pass
630
631
632    def terminate(self):
633        """Parsing is over; successfully or not.  Mostly useful for telling
634        remote debugging listeners that it's time to quit.  When the rule
635        invocation level goes to zero at the end of a rule, we are done
636        parsing.
637        """
638        pass
639
640
641    ## T r e e  P a r s i n g
642
643    def consumeNode(self, t):
644        """Input for a tree parser is an AST, but we know nothing for sure
645        about a node except its type and text (obtained from the adaptor).
646        This is the analog of the consumeToken method.  Again, the ID is
647        the hashCode usually of the node so it only works if hashCode is
648        not implemented.  If the type is UP or DOWN, then
649        the ID is not really meaningful as it's fixed--there is
650        just one UP node and one DOWN navigation node.
651        """
652        pass
653
654
655    ## A S T  E v e n t s
656
657    def nilNode(self, t):
658        """A nil was created (even nil nodes have a unique ID...
659        they are not "null" per se).  As of 4/28/2006, this
660        seems to be uniquely triggered when starting a new subtree
661        such as when entering a subrule in automatic mode and when
662        building a tree in rewrite mode.
663
664        If you are receiving this event over a socket via
665        RemoteDebugEventSocketListener then only t.ID is set.
666        """
667        pass
668
669
670    def errorNode(self, t):
671        """Upon syntax error, recognizers bracket the error with an error node
672        if they are building ASTs.
673        """
674        pass
675
676
677    def createNode(self, node, token=None):
678        """Announce a new node built from token elements such as type etc...
679
680        If you are receiving this event over a socket via
681        RemoteDebugEventSocketListener then only t.ID, type, text are
682        set.
683        """
684        pass
685
686
687    def becomeRoot(self, newRoot, oldRoot):
688        """Make a node the new root of an existing root.
689
690        Note: the newRootID parameter is possibly different
691        than the TreeAdaptor.becomeRoot() newRoot parameter.
692        In our case, it will always be the result of calling
693        TreeAdaptor.becomeRoot() and not root_n or whatever.
694
695        The listener should assume that this event occurs
696        only when the current subrule (or rule) subtree is
697        being reset to newRootID.
698
699        If you are receiving this event over a socket via
700        RemoteDebugEventSocketListener then only IDs are set.
701
702        @see antlr3.tree.TreeAdaptor.becomeRoot()
703        """
704        pass
705
706
707    def addChild(self, root, child):
708        """Make childID a child of rootID.
709
710        If you are receiving this event over a socket via
711        RemoteDebugEventSocketListener then only IDs are set.
712
713        @see antlr3.tree.TreeAdaptor.addChild()
714        """
715        pass
716
717
718    def setTokenBoundaries(self, t, tokenStartIndex, tokenStopIndex):
719        """Set the token start/stop token index for a subtree root or node.
720
721        If you are receiving this event over a socket via
722        RemoteDebugEventSocketListener then only t.ID is set.
723        """
724        pass
725
726
727class BlankDebugEventListener(DebugEventListener):
728    """A blank listener that does nothing; useful for real classes so
729    they don't have to have lots of blank methods and are less
730    sensitive to updates to debug interface.
731
732    Note: this class is identical to DebugEventListener and exists purely
733    for compatibility with Java.
734    """
735    pass
736
737
738class TraceDebugEventListener(DebugEventListener):
739    """A listener that simply records text representations of the events.
740
741    Useful for debugging the debugging facility ;)
742
743    Subclasses can override the record() method (which defaults to printing to
744    stdout) to record the events in a different way.
745    """
746
747    def __init__(self, adaptor=None):
748        super().__init__()
749
750        if adaptor is None:
751            adaptor = CommonTreeAdaptor()
752        self.adaptor = adaptor
753
754    def record(self, event):
755        sys.stdout.write(event + '\n')
756
757    def enterRule(self, grammarFileName, ruleName):
758        self.record("enterRule " + ruleName)
759
760    def exitRule(self, grammarFileName, ruleName):
761        self.record("exitRule " + ruleName)
762
763    def enterSubRule(self, decisionNumber):
764        self.record("enterSubRule")
765
766    def exitSubRule(self, decisionNumber):
767        self.record("exitSubRule")
768
769    def location(self, line, pos):
770        self.record("location {}:{}".format(line, pos))
771
772    ## Tree parsing stuff
773
774    def consumeNode(self, t):
775        self.record("consumeNode {} {} {}".format(
776                self.adaptor.getUniqueID(t),
777                self.adaptor.getText(t),
778                self.adaptor.getType(t)))
779
780    def LT(self, i, t):
781        self.record("LT {} {} {} {}".format(
782                i,
783                self.adaptor.getUniqueID(t),
784                self.adaptor.getText(t),
785                self.adaptor.getType(t)))
786
787
788    ## AST stuff
789    def nilNode(self, t):
790        self.record("nilNode {}".format(self.adaptor.getUniqueID(t)))
791
792    def createNode(self, t, token=None):
793        if token is None:
794            self.record("create {}: {}, {}".format(
795                    self.adaptor.getUniqueID(t),
796                    self.adaptor.getText(t),
797                    self.adaptor.getType(t)))
798
799        else:
800            self.record("create {}: {}".format(
801                    self.adaptor.getUniqueID(t),
802                    token.index))
803
804    def becomeRoot(self, newRoot, oldRoot):
805        self.record("becomeRoot {}, {}".format(
806                self.adaptor.getUniqueID(newRoot),
807                self.adaptor.getUniqueID(oldRoot)))
808
809    def addChild(self, root, child):
810        self.record("addChild {}, {}".format(
811                self.adaptor.getUniqueID(root),
812                self.adaptor.getUniqueID(child)))
813
814    def setTokenBoundaries(self, t, tokenStartIndex, tokenStopIndex):
815        self.record("setTokenBoundaries {}, {}, {}".format(
816                self.adaptor.getUniqueID(t),
817                tokenStartIndex, tokenStopIndex))
818
819
820class RecordDebugEventListener(TraceDebugEventListener):
821    """A listener that records events as strings in an array."""
822
823    def __init__(self, adaptor=None):
824        super().__init__(adaptor)
825
826        self.events = []
827
828    def record(self, event):
829        self.events.append(event)
830
831
832class DebugEventSocketProxy(DebugEventListener):
833    """A proxy debug event listener that forwards events over a socket to
834    a debugger (or any other listener) using a simple text-based protocol;
835    one event per line.  ANTLRWorks listens on server socket with a
836    RemoteDebugEventSocketListener instance.  These two objects must therefore
837    be kept in sync.  New events must be handled on both sides of socket.
838    """
839
840    DEFAULT_DEBUGGER_PORT = 49100
841
842    def __init__(self, recognizer, adaptor=None, port=None, debug=None):
843        super().__init__()
844
845        self.grammarFileName = recognizer.getGrammarFileName()
846
847        # Almost certainly the recognizer will have adaptor set, but
848        # we don't know how to cast it (Parser or TreeParser) to get
849        # the adaptor field.  Must be set with a constructor. :(
850        self.adaptor = adaptor
851
852        self.port = port or self.DEFAULT_DEBUGGER_PORT
853
854        self.debug = debug
855
856        self.socket = None
857        self.connection = None
858        self.input = None
859        self.output = None
860
861
862    def log(self, msg):
863        if self.debug:
864            self.debug.write(msg + '\n')
865
866
867    def handshake(self):
868        if self.socket is None:
869            # create listening socket
870            self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
871            self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
872            self.socket.bind(('', self.port))
873            self.socket.listen(1)
874            self.log("Waiting for incoming connection on port {}".format(self.port))
875
876            # wait for an incoming connection
877            self.connection, addr = self.socket.accept()
878            self.log("Accepted connection from {}:{}".format(addr[0], addr[1]))
879
880            self.connection.setblocking(1)
881            self.connection.setsockopt(socket.SOL_TCP, socket.TCP_NODELAY, 1)
882
883            self.output = self.connection.makefile('w', 1)
884            self.input = self.connection.makefile('r', 1)
885
886            self.write("ANTLR {}".format(self.PROTOCOL_VERSION))
887            self.write('grammar "{}"'.format(self.grammarFileName))
888            self.ack()
889
890
891    def write(self, msg):
892        self.log("> {}".format(msg))
893        self.output.write("{}\n".format(msg))
894        self.output.flush()
895
896
897    def ack(self):
898        t = self.input.readline()
899        self.log("< {}".format(t.rstrip()))
900
901
902    def transmit(self, event):
903        self.write(event)
904        self.ack()
905
906
907    def commence(self):
908        # don't bother sending event; listener will trigger upon connection
909        pass
910
911
912    def terminate(self):
913        self.transmit("terminate")
914        self.output.close()
915        self.input.close()
916        self.connection.close()
917        self.socket.close()
918
919
920    def enterRule(self, grammarFileName, ruleName):
921        self.transmit("enterRule\t{}\t{}".format(grammarFileName, ruleName))
922
923
924    def enterAlt(self, alt):
925        self.transmit("enterAlt\t{}".format(alt))
926
927
928    def exitRule(self, grammarFileName, ruleName):
929        self.transmit("exitRule\t{}\t{}".format(grammarFileName, ruleName))
930
931
932    def enterSubRule(self, decisionNumber):
933        self.transmit("enterSubRule\t{}".format(decisionNumber))
934
935
936    def exitSubRule(self, decisionNumber):
937        self.transmit("exitSubRule\t{}".format(decisionNumber))
938
939
940    def enterDecision(self, decisionNumber, couldBacktrack):
941        self.transmit(
942            "enterDecision\t{}\t{:d}".format(decisionNumber, couldBacktrack))
943
944
945    def exitDecision(self, decisionNumber):
946        self.transmit("exitDecision\t{}".format(decisionNumber))
947
948
949    def consumeToken(self, t):
950        self.transmit("consumeToken\t{}".format(self.serializeToken(t)))
951
952
953    def consumeHiddenToken(self, t):
954        self.transmit("consumeHiddenToken\t{}".format(self.serializeToken(t)))
955
956
957    def LT(self, i, o):
958        if isinstance(o, Tree):
959            return self.LT_tree(i, o)
960        return self.LT_token(i, o)
961
962
963    def LT_token(self, i, t):
964        if t is not None:
965            self.transmit("LT\t{}\t{}".format(i, self.serializeToken(t)))
966
967
968    def mark(self, i):
969        self.transmit("mark\t{}".format(i))
970
971
972    def rewind(self, i=None):
973        if i is not None:
974            self.transmit("rewind\t{}".format(i))
975        else:
976            self.transmit("rewind")
977
978
979    def beginBacktrack(self, level):
980        self.transmit("beginBacktrack\t{}".format(level))
981
982
983    def endBacktrack(self, level, successful):
984        self.transmit("endBacktrack\t{}\t{}".format(
985                level, '1' if successful else '0'))
986
987
988    def location(self, line, pos):
989        self.transmit("location\t{}\t{}".format(line, pos))
990
991
992    def recognitionException(self, exc):
993        self.transmit('\t'.join([
994                    "exception",
995                    exc.__class__.__name__,
996                    str(int(exc.index)),
997                    str(int(exc.line)),
998                    str(int(exc.charPositionInLine))]))
999
1000
1001    def beginResync(self):
1002        self.transmit("beginResync")
1003
1004
1005    def endResync(self):
1006        self.transmit("endResync")
1007
1008
1009    def semanticPredicate(self, result, predicate):
1010        self.transmit('\t'.join([
1011                    "semanticPredicate",
1012                    str(int(result)),
1013                    self.escapeNewlines(predicate)]))
1014
1015    ## A S T  P a r s i n g  E v e n t s
1016
1017    def consumeNode(self, t):
1018        FIXME(31)
1019#         StringBuffer buf = new StringBuffer(50);
1020#         buf.append("consumeNode");
1021#         serializeNode(buf, t);
1022#         transmit(buf.toString());
1023
1024
1025    def LT_tree(self, i, t):
1026        FIXME(34)
1027#         int ID = adaptor.getUniqueID(t);
1028#         String text = adaptor.getText(t);
1029#         int type = adaptor.getType(t);
1030#         StringBuffer buf = new StringBuffer(50);
1031#         buf.append("LN\t"); // lookahead node; distinguish from LT in protocol
1032#         buf.append(i);
1033#         serializeNode(buf, t);
1034#         transmit(buf.toString());
1035
1036
1037    def serializeNode(self, buf, t):
1038        FIXME(33)
1039#         int ID = adaptor.getUniqueID(t);
1040#         String text = adaptor.getText(t);
1041#         int type = adaptor.getType(t);
1042#         buf.append("\t");
1043#         buf.append(ID);
1044#         buf.append("\t");
1045#         buf.append(type);
1046#         Token token = adaptor.getToken(t);
1047#         int line = -1;
1048#         int pos = -1;
1049#         if ( token!=null ) {
1050#             line = token.getLine();
1051#             pos = token.getCharPositionInLine();
1052#             }
1053#         buf.append("\t");
1054#         buf.append(line);
1055#         buf.append("\t");
1056#         buf.append(pos);
1057#         int tokenIndex = adaptor.getTokenStartIndex(t);
1058#         buf.append("\t");
1059#         buf.append(tokenIndex);
1060#         serializeText(buf, text);
1061
1062
1063    ## A S T  E v e n t s
1064
1065    def nilNode(self, t):
1066        self.transmit("nilNode\t{}".format(self.adaptor.getUniqueID(t)))
1067
1068
1069    def errorNode(self, t):
1070        self.transmit('errorNode\t{}\t{}\t"{}'.format(
1071             self.adaptor.getUniqueID(t),
1072             INVALID_TOKEN_TYPE,
1073             self.escapeNewlines(t.toString())))
1074
1075
1076    def createNode(self, node, token=None):
1077        if token is not None:
1078            self.transmit("createNode\t{}\t{}".format(
1079                    self.adaptor.getUniqueID(node),
1080                    token.index))
1081
1082        else:
1083            self.transmit('createNodeFromTokenElements\t{}\t{}\t"{}'.format(
1084                    self.adaptor.getUniqueID(node),
1085                    self.adaptor.getType(node),
1086                    self.adaptor.getText(node)))
1087
1088
1089    def becomeRoot(self, newRoot, oldRoot):
1090        self.transmit("becomeRoot\t{}\t{}".format(
1091                self.adaptor.getUniqueID(newRoot),
1092                self.adaptor.getUniqueID(oldRoot)))
1093
1094
1095    def addChild(self, root, child):
1096        self.transmit("addChild\t{}\t{}".format(
1097                self.adaptor.getUniqueID(root),
1098                self.adaptor.getUniqueID(child)))
1099
1100
1101    def setTokenBoundaries(self, t, tokenStartIndex, tokenStopIndex):
1102        self.transmit("setTokenBoundaries\t{}\t{}\t{}".format(
1103                self.adaptor.getUniqueID(t),
1104                tokenStartIndex, tokenStopIndex))
1105
1106
1107
1108    ## support
1109
1110    def setTreeAdaptor(self, adaptor):
1111        self.adaptor = adaptor
1112
1113    def getTreeAdaptor(self):
1114        return self.adaptor
1115
1116
1117    def serializeToken(self, t):
1118        buf = [str(int(t.index)),
1119               str(int(t.type)),
1120               str(int(t.channel)),
1121               str(int(t.line or 0)),
1122               str(int(t.charPositionInLine or 0)),
1123               '"' + self.escapeNewlines(t.text)]
1124        return '\t'.join(buf)
1125
1126
1127    def escapeNewlines(self, txt):
1128        if txt is None:
1129            return ''
1130
1131        txt = txt.replace("%","%25")   # escape all escape char ;)
1132        txt = txt.replace("\n","%0A")  # escape \n
1133        txt = txt.replace("\r","%0D")  # escape \r
1134        return txt
1135