1#!/usr/bin/ruby
2# encoding: utf-8
3
4=begin LICENSE
5
6[The "BSD licence"]
7Copyright (c) 2009-2010 Kyle Yetter
8All rights reserved.
9
10Redistribution and use in source and binary forms, with or without
11modification, are permitted provided that the following conditions
12are met:
13
14 1. Redistributions of source code must retain the above copyright
15    notice, this list of conditions and the following disclaimer.
16 2. Redistributions in binary form must reproduce the above copyright
17    notice, this list of conditions and the following disclaimer in the
18    documentation and/or other materials provided with the distribution.
19 3. The name of the author may not be used to endorse or promote products
20    derived from this software without specific prior written permission.
21
22THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
23IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
24OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
25IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
26INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
27NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
31THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32
33=end
34
35require 'antlr3'
36
37module ANTLR3
38module AST
39
40=begin rdoc ANTLR3::AST::Wizard
41
42AST::Wizard is an extra utility class that allows quick creation of AST objects
43using definitions writing in ANTLR-style tree definition syntax. It can also
44define <i>tree patterns</i>, objects that are conceptually similar to regular
45expressions. Patterns allow a simple method for recursively searching through an
46AST for a particular node structure. These features make tree wizards useful
47while testing and debugging AST constructing parsers and tree parsers. This
48library has been ported to Ruby directly from the ANTLR Python runtime API.
49
50See
51http://www.antlr.org/wiki/display/~admin/2007/07/02/Exploring+Concept+of+TreeWizard
52for more background on the concept of a tree wizard.
53
54== Usage
55
56  # setting up and creating a tree wizard
57  token_names = Array.new(4, '') + %w(VAR NUMBER EQ PLUS MINUS MULT DIV)
58  adaptor     = ANTLR3::AST::CommonTreeAdaptor.new
59  wizard      = ANTLR3::AST::Wizard.new(adaptor, token_names)
60
61  # building trees
62  lone_node = wizard.create "VAR[x]"   # => x
63  lone_node.type                       # => 4  # = VAR
64  lone_node.text                       # => "x"
65
66  expression_node = wizard.create "(MINUS VAR NUMBER)"
67    # => (MINUS VAR NUMBER)
68  statement_node = wizard.create "(EQ[=] VAR[x] (PLUS[+] NUMBER[1] NUMBER[2]))"
69    # => (= x (+ 1 2))
70  deep_node = wizard.create(<<-TREE)
71    (MULT[*] NUMBER[1]
72      (MINUS[-]
73        (MULT[*] NUMBER[3]    VAR[x])
74        (DIV[/]  VAR[y] NUMBER[3.14])
75        (MULT[*] NUMBER[4]    VAR[z])
76      )
77    )
78  TREE
79    # => (* 1 (- (* 3 x) (/ y 3.14) (* 4 z))
80
81  bad_tree_syntax = wizard.create "(+ 1 2)"
82    # => nil - invalid node names
83
84  # test whether a tree matches a pattern
85  wizard.match(expression_node, '(MINUS VAR .)') # => true
86  wizard.match(lone_node, 'NUMBER NUMBER')       # => false
87
88  # extract nodes matching a pattern
89  wizard.find(statement_node, '(PLUS . .)')
90  # => [(+ 1 2)]
91  wizard.find(deep_node, 4)  # where 4 is the value of type VAR
92  # => [x, y, z]
93
94  # iterate through the tree and extract nodes with pattern labels
95  wizard.visit(deep_node, '(MULT %n:NUMBER %v:.)') do |node, parent, local_index, labels|
96    printf "n = %p\n, v = %p\n", labels['n'], labels['v']
97  end
98    # => prints out:
99    # n = 3, v = x
100    # n = 4, v = z
101
102== Tree Construction Syntax
103
104  Simple Token Node:     TK
105  Token Node With Text:  TK[text]
106  Flat Node List:        (nil TK1 TK2)
107  General Node:          (RT TK1 TK2)
108  Complex Nested Node:   (RT (SUB1[sometext] TK1) TK2 (SUB2 TK3 TK4[moretext]))
109
110=== Additional Syntax for Tree Matching Patterns
111
112  Match Any Token Node:  .
113  Label a Node:          %name:TK
114
115=end
116
117class Wizard
118
119  include Constants
120  include Util
121
122=begin rdoc ANTLR3::AST::Wizard::PatternLexer
123
124A class that is used internally by AST::Wizard to tokenize tree patterns
125
126=end
127
128  class PatternLexer
129    include ANTLR3::Constants
130
131    autoload :StringScanner, 'strscan'
132
133    PATTERNS = [
134      [ :space, /\s+/ ],
135      [ :identifier, /[a-z_]\w*/i ],
136      [ :open, /\(/ ],
137      [ :close, /\)/ ],
138      [ :percent, /%/ ],
139      [ :colon, /:/ ],
140      [ :dot, /\./ ],
141      [ :argument, /\[((?:[^\[\]\\]|\\\[|\\\]|\\.)*?)\]/ ]
142    ]
143
144    attr_reader :text, :error, :pattern
145    def initialize( pattern )
146      @pattern = pattern.to_s
147      @scanner = StringScanner.new( pattern )
148      @text = ''
149      @error = false
150    end
151
152    def next_token
153      begin
154        @scanner.eos? and return EOF
155
156        type, = PATTERNS.find do |type, pattern|
157          @scanner.scan( pattern )
158        end
159
160        case type
161        when nil
162          type, @text, @error = EOF, '', true
163          break
164        when :identifier then @text = @scanner.matched
165        when :argument
166          # remove escapes from \] sequences in the text argument
167          ( @text = @scanner[ 1 ] ).gsub!( /\\(?=[\[\]])/, '' )
168        end
169      end while type == :space
170
171      return type
172    end
173
174    alias error? error
175  end
176
177
178=begin rdoc ANTLR3::AST::Wizard::Pattern
179
180A class that is used internally by AST::Wizard to construct AST tree objects
181from a tokenized tree pattern
182
183=end
184
185  class PatternParser
186    def self.parse( pattern, token_scheme, adaptor )
187      lexer = PatternLexer.new( pattern )
188      new( lexer, token_scheme, adaptor ).pattern
189    end
190
191    include ANTLR3::Constants
192
193    def initialize( tokenizer, token_scheme, adaptor )
194      @tokenizer = tokenizer
195      @token_scheme = token_scheme
196      @adaptor   = adaptor
197      @token_type = tokenizer.next_token
198    end
199
200    def pattern
201      case @token_type
202      when :open then return parse_tree
203      when :identifier
204        node = parse_node
205        @token_type == EOF and return node
206        return nil
207      end
208      return nil
209    end
210
211    CONTINUE_TYPES = [ :open, :identifier, :percent, :dot ]
212
213    def parse_tree
214      @token_type != :open and return nil
215      @token_type = @tokenizer.next_token
216      root = parse_node or return nil
217
218      loop do
219        case @token_type
220        when :open
221          subtree = parse_tree
222          @adaptor.add_child( root, subtree )
223        when :identifier, :percent, :dot
224          child = parse_node or return nil
225          @adaptor.add_child( root, child )
226        else break
227        end
228      end
229      @token_type == :close or return nil
230      @token_type = @tokenizer.next_token
231      return root
232    end
233
234    def parse_node
235      label = nil
236      if @token_type == :percent
237        ( @token_type = @tokenizer.next_token ) == :identifier or return nil
238        label = @tokenizer.text
239        ( @token_type = @tokenizer.next_token ) == :colon or return nil
240        @token_type = @tokenizer.next_token
241      end
242
243      if @token_type == :dot
244        @token_type = @tokenizer.next_token
245        wildcard_payload = CommonToken.create( :type => 0, :text => '.' )
246        node = WildcardPattern.new( wildcard_payload )
247        label and node.label = label
248        return node
249      end
250
251      @token_type == :identifier or return nil
252      token_name = @tokenizer.text
253      @token_type = @tokenizer.next_token
254      token_name == 'nil' and return @adaptor.create_flat_list
255
256      text = token_name
257      arg = nil
258      if @token_type == :argument
259        arg = @tokenizer.text
260        text = arg
261        @token_type = @tokenizer.next_token
262      end
263
264      node_type = @token_scheme[ token_name ] || INVALID_TOKEN_TYPE
265      node = @adaptor.create_from_type( node_type, text )
266
267      if Pattern === node
268        node.label, node.has_text_arg = label, arg
269      end
270      return node
271    end
272  end
273
274
275=begin rdoc ANTLR3::AST::Wizard::Pattern
276
277A simple tree class that represents the skeletal structure of tree. It is used
278to validate tree structures as well as to extract nodes that match the pattern.
279
280=end
281
282  class Pattern < CommonTree
283    def self.parse( pattern_str, scheme )
284      PatternParser.parse(
285        pattern_str, scheme, PatternAdaptor.new( scheme.token_class )
286      )
287    end
288
289    attr_accessor :label, :has_text_arg
290    alias :has_text_arg? :has_text_arg
291
292    def initialize( payload )
293      super( payload )
294      @label = nil
295      @has_text_arg = nil
296    end
297
298    def to_s
299      prefix = @label ? '%' << @label << ':' : ''
300      return( prefix << super )
301    end
302  end
303
304=begin rdoc ANTLR3::AST::Wizard::WildcardPattern
305
306A simple tree node used to represent the operation "match any tree node type" in
307a tree pattern. They are represented by '.' in tree pattern specifications.
308
309=end
310
311  class WildcardPattern < Pattern; end
312
313
314=begin rdoc ANTLR3::AST::Wizard::PatternAdaptor
315
316A customized TreeAdaptor used by AST::Wizards to build tree patterns.
317
318=end
319
320  class PatternAdaptor < CommonTreeAdaptor
321    def create_with_payload( payload )
322      return Pattern.new( payload )
323    end
324  end
325
326  attr_accessor :token_scheme, :adaptor
327
328  def initialize( options = {} )
329    @token_scheme = options.fetch( :token_scheme ) do
330      TokenScheme.build( options[ :token_class ], options[ :tokens ] )
331    end
332    @adaptor = options.fetch( :adaptor ) do
333      CommonTreeAdaptor.new( @token_scheme.token_class )
334    end
335  end
336
337  def create( pattern )
338    PatternParser.parse( pattern, @token_scheme, @adaptor )
339  end
340
341  def index( tree, map = {} )
342    tree or return( map )
343    type = @adaptor.type_of( tree )
344    elements = map[ type ] ||= []
345    elements << tree
346    @adaptor.each_child( tree ) { | child | index( child, map ) }
347    return( map )
348  end
349
350  def find( tree, what )
351    case what
352    when Integer then find_token_type( tree, what )
353    when String  then find_pattern( tree, what )
354    when Symbol  then find_token_type( tree, @token_scheme[ what ] )
355    else raise ArgumentError, "search subject must be a token type (integer) or a string"
356    end
357  end
358
359  def find_token_type( tree, type )
360    nodes = []
361    visit( tree, type ) { | t, | nodes << t }
362    return nodes
363  end
364
365  def find_pattern( tree, pattern )
366    subtrees = []
367    visit_pattern( tree, pattern ) { | t, | subtrees << t }
368    return( subtrees )
369  end
370
371  def visit( tree, what = nil, &block )
372    block_given? or return enum_for( :visit, tree, what )
373    Symbol === what and what = @token_scheme[ what ]
374    case what
375    when nil then visit_all( tree, &block )
376    when Integer then visit_type( tree, nil, what, &block )
377    when String  then visit_pattern( tree, what, &block )
378    else raise( ArgumentError, tidy( <<-'END', true ) )
379      | The 'what' filter argument must be a tree
380      | pattern (String) or a token type (Integer)
381      | -- got #{ what.inspect }
382      END
383    end
384  end
385
386  def visit_all( tree, parent = nil, &block )
387    index = @adaptor.child_index( tree )
388    yield( tree, parent, index, nil )
389    @adaptor.each_child( tree ) do | child |
390      visit_all( child, tree, &block )
391    end
392  end
393
394  def visit_type( tree, parent, type, &block )
395    tree.nil? and return( nil )
396    index = @adaptor.child_index( tree )
397    @adaptor.type_of( tree ) == type and yield( tree, parent, index, nil )
398    @adaptor.each_child( tree ) do | child |
399      visit_type( child, tree, type, &block )
400    end
401  end
402
403  def visit_pattern( tree, pattern, &block )
404    pattern = Pattern.parse( pattern, @token_scheme )
405
406    if pattern.nil? or pattern.flat_list? or pattern.is_a?( WildcardPattern )
407      return( nil )
408    end
409
410    visit( tree, pattern.type ) do | tree, parent, child_index, labels |
411      labels = match!( tree, pattern ) and
412        yield( tree, parent, child_index, labels )
413    end
414  end
415
416  def match( tree, pattern )
417    pattern = Pattern.parse( pattern, @token_scheme )
418
419    return( match!( tree, pattern ) )
420  end
421
422  def match!( tree, pattern, labels = {} )
423    tree.nil? || pattern.nil? and return false
424    unless pattern.is_a? WildcardPattern
425      @adaptor.type_of( tree ) == pattern.type or return false
426      pattern.has_text_arg && ( @adaptor.text_of( tree ) != pattern.text ) and
427        return false
428    end
429    labels[ pattern.label ] = tree if labels && pattern.label
430
431    number_of_children = @adaptor.child_count( tree )
432    return false unless number_of_children == pattern.child_count
433
434    number_of_children.times do |index|
435      actual_child = @adaptor.child_of( tree, index )
436      pattern_child = pattern.child( index )
437
438      return( false ) unless match!( actual_child, pattern_child, labels )
439    end
440
441    return labels
442  end
443
444  def equals( tree_a, tree_b, adaptor = @adaptor )
445    tree_a && tree_b or return( false )
446
447    adaptor.type_of( tree_a ) == adaptor.type_of( tree_b ) or return false
448    adaptor.text_of( tree_a ) == adaptor.text_of( tree_b ) or return false
449
450    child_count_a = adaptor.child_count( tree_a )
451    child_count_b = adaptor.child_count( tree_b )
452    child_count_a == child_count_b or return false
453
454    child_count_a.times do | i |
455      child_a = adaptor.child_of( tree_a, i )
456      child_b = adaptor.child_of( tree_b, i )
457      equals( child_a, child_b, adaptor ) or return false
458    end
459    return true
460  end
461
462
463  DOT_DOT_PATTERN = /.*[^\.]\\.{2}[^\.].*/
464  DOUBLE_ETC_PATTERN = /.*\.{3}\s+\.{3}.*/
465
466  def in_context?( tree, context )
467    case context
468    when DOT_DOT_PATTERN then raise ArgumentError, "invalid syntax: .."
469    when DOUBLE_ETC_PATTERN then raise ArgumentError, "invalid syntax: ... ..."
470    end
471
472    context = context.gsub( /([^\.\s])\.{3}([^\.])/, '\1 ... \2' )
473    context.strip!
474    nodes = context.split( /\s+/ )
475
476    while tree = @adaptor.parent( tree ) and node = nodes.pop
477      if node == '...'
478        node = nodes.pop or return( true )
479        tree = @adaptor.each_ancestor( tree ).find do | t |
480          @adaptor.type_name( t ) == node
481        end or return( false )
482      end
483      @adaptor.type_name( tree ) == node or return( false )
484    end
485
486    return( false ) if tree.nil? and not nodes.empty?
487    return true
488  end
489
490  private :match!
491end
492end
493end
494