1#!/usr/bin/env python3
2
3#
4# Copyright (C) 2018 The Android Open Source Project
5#
6# Licensed under the Apache License, Version 2.0 (the "License");
7# you may not use this file except in compliance with the License.
8# You may obtain a copy of the License at
9#
10#      http://www.apache.org/licenses/LICENSE-2.0
11#
12# Unless required by applicable law or agreed to in writing, software
13# distributed under the License is distributed on an "AS IS" BASIS,
14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15# See the License for the specific language governing permissions and
16# limitations under the License.
17#
18
19"""This module contains the unit tests to check the Lexer class."""
20
21import sys
22import unittest
23
24from blueprint import Lexer, LexerError, Token
25
26
27#------------------------------------------------------------------------------
28# Python 2 compatibility
29#------------------------------------------------------------------------------
30
31if sys.version_info >= (3,):
32    py3_str = str  # pylint: disable=invalid-name
33else:
34    def py3_str(string):
35        """Convert a string into a utf-8 encoded string."""
36        return unicode(string).encode('utf-8')
37
38
39#------------------------------------------------------------------------------
40# LexerError
41#------------------------------------------------------------------------------
42
43class LexerErrorTest(unittest.TestCase):
44    """Unit tests for LexerError class."""
45
46    def test_lexer_error(self):
47        """Test LexerError __init__(), __str__(), line, column, and message."""
48
49        exc = LexerError('a %', 2, 'unexpected character')
50        self.assertEqual(exc.line, 1)
51        self.assertEqual(exc.column, 3)
52        self.assertEqual(exc.message, 'unexpected character')
53        self.assertEqual(str(exc), 'LexerError: 1:3: unexpected character')
54
55        exc = LexerError('a\nb\ncde %', 8, 'unexpected character')
56        self.assertEqual(exc.line, 3)
57        self.assertEqual(exc.column, 5)
58        self.assertEqual(exc.message, 'unexpected character')
59        self.assertEqual(str(exc), 'LexerError: 3:5: unexpected character')
60
61
62    def test_hierarchy(self):
63        """Test the hierarchy of LexerError."""
64        with self.assertRaises(ValueError):
65            raise LexerError('a', 0, 'error')
66
67
68class LexComputeLineColumn(unittest.TestCase):
69    """Unit tests for Lexer.compute_line_column() method."""
70
71    def test_compute_line_column(self):
72        """Test the line and column computation."""
73
74        # Line 1
75        line, column = Lexer.compute_line_column('ab\ncde\nfg\n', 0)
76        self.assertEqual(line, 1)
77        self.assertEqual(column, 1)
78
79        line, column = Lexer.compute_line_column('ab\ncde\nfg\n', 1)
80        self.assertEqual(line, 1)
81        self.assertEqual(column, 2)
82
83        line, column = Lexer.compute_line_column('ab\ncde\nfg\n', 2)
84        self.assertEqual(line, 1)
85        self.assertEqual(column, 3)
86
87        # Line 2
88        line, column = Lexer.compute_line_column('ab\ncde\nfg\n', 3)
89        self.assertEqual(line, 2)
90        self.assertEqual(column, 1)
91
92        line, column = Lexer.compute_line_column('ab\ncde\nfg\n', 4)
93        self.assertEqual(line, 2)
94        self.assertEqual(column, 2)
95
96        line, column = Lexer.compute_line_column('ab\ncde\nfg\n', 5)
97        self.assertEqual(line, 2)
98        self.assertEqual(column, 3)
99
100        line, column = Lexer.compute_line_column('ab\ncde\nfg\n', 6)
101        self.assertEqual(line, 2)
102        self.assertEqual(column, 4)
103
104        # Line 3
105        line, column = Lexer.compute_line_column('ab\ncde\nfg\n', 7)
106        self.assertEqual(line, 3)
107        self.assertEqual(column, 1)
108
109        line, column = Lexer.compute_line_column('ab\ncde\nfg\n', 8)
110        self.assertEqual(line, 3)
111        self.assertEqual(column, 2)
112
113        line, column = Lexer.compute_line_column('ab\ncde\nfg\n', 9)
114        self.assertEqual(line, 3)
115        self.assertEqual(column, 3)
116
117        # Line 4 (empty line)
118        line, column = Lexer.compute_line_column('ab\ncde\nfg\n', 10)
119        self.assertEqual(line, 4)
120        self.assertEqual(column, 1)
121
122
123#------------------------------------------------------------------------------
124# Lex.lex_string()
125#------------------------------------------------------------------------------
126
127class LexStringTest(unittest.TestCase):
128    """Unit tests for the Lexer.lex_string() method."""
129
130    def test_raw_string_lit(self):
131        """Test whether Lexer.lex_string() can tokenize raw string literal."""
132
133        end, lit = Lexer.lex_string('`a`', 0)
134        self.assertEqual(end, 3)
135        self.assertEqual(lit, 'a')
136
137        end, lit = Lexer.lex_string('`a\nb`', 0)
138        self.assertEqual(end, 5)
139        self.assertEqual(lit, 'a\nb')
140
141        end, lit = Lexer.lex_string('"a""b"', 3)
142        self.assertEqual(end, 6)
143        self.assertEqual(lit, 'b')
144
145        with self.assertRaises(LexerError) as ctx:
146            Lexer.lex_string('`a', 0)
147        self.assertEqual(ctx.exception.line, 1)
148        self.assertEqual(ctx.exception.column, 3)
149
150        with self.assertRaises(LexerError) as ctx:
151            Lexer.lex_string('"a\nb"', 0)
152        self.assertEqual(ctx.exception.line, 1)
153        self.assertEqual(ctx.exception.column, 3)
154
155
156    def test_interpreted_string_literal(self):
157        """Test whether Lexer.lex_string() can tokenize interpreted string
158        literal."""
159
160        end, lit = Lexer.lex_string('"a"', 0)
161        self.assertEqual(end, 3)
162        self.assertEqual(lit, 'a')
163
164        end, lit = Lexer.lex_string('"n"', 0)
165        self.assertEqual(end, 3)
166        self.assertEqual(lit, 'n')
167
168        with self.assertRaises(LexerError) as ctx:
169            Lexer.lex_string('"\\', 0)
170        self.assertEqual(ctx.exception.line, 1)
171        self.assertEqual(ctx.exception.column, 2)
172
173
174    def test_literal_escape_char(self):
175        """Test whether Lexer.lex_string() can tokenize interpreted string
176        literal with a escaped character."""
177
178        end, lit = Lexer.lex_string('"\\a"', 0)
179        self.assertEqual(end, 4)
180        self.assertEqual(lit, '\a')
181
182        end, lit = Lexer.lex_string('"\\b"', 0)
183        self.assertEqual(end, 4)
184        self.assertEqual(lit, '\b')
185
186        end, lit = Lexer.lex_string('"\\f"', 0)
187        self.assertEqual(end, 4)
188        self.assertEqual(lit, '\f')
189
190        end, lit = Lexer.lex_string('"\\n"', 0)
191        self.assertEqual(end, 4)
192        self.assertEqual(lit, '\n')
193
194        end, lit = Lexer.lex_string('"\\r"', 0)
195        self.assertEqual(end, 4)
196        self.assertEqual(lit, '\r')
197
198        end, lit = Lexer.lex_string('"\\t"', 0)
199        self.assertEqual(end, 4)
200        self.assertEqual(lit, '\t')
201
202        end, lit = Lexer.lex_string('"\\v"', 0)
203        self.assertEqual(end, 4)
204        self.assertEqual(lit, '\v')
205
206        end, lit = Lexer.lex_string('"\\\\"', 0)
207        self.assertEqual(end, 4)
208        self.assertEqual(lit, '\\')
209
210        end, lit = Lexer.lex_string('"\\\'"', 0)
211        self.assertEqual(end, 4)
212        self.assertEqual(lit, '\'')
213
214        end, lit = Lexer.lex_string('"\\\""', 0)
215        self.assertEqual(end, 4)
216        self.assertEqual(lit, '\"')
217
218        with self.assertRaises(LexerError) as ctx:
219            Lexer.lex_string('"\\?"', 0)
220        self.assertEqual(ctx.exception.line, 1)
221        self.assertEqual(ctx.exception.column, 2)
222
223
224    def test_literal_escape_octal(self):
225        """Test whether Lexer.lex_string() can tokenize interpreted string
226        literal with an octal escape sequence."""
227
228        end, lit = Lexer.lex_string('"\\000"', 0)
229        self.assertEqual(end, 6)
230        self.assertEqual(lit, '\0')
231
232        end, lit = Lexer.lex_string('"\\377"', 0)
233        self.assertEqual(end, 6)
234        self.assertEqual(lit, '\377')
235
236        tests = [
237            '"\\0',
238            '"\\0"  ',
239            '"\\09" ',
240            '"\\009"',
241        ]
242
243        for test in tests:
244            with self.assertRaises(LexerError) as ctx:
245                Lexer.lex_string(test, 0)
246            self.assertEqual(ctx.exception.line, 1)
247            self.assertEqual(ctx.exception.column, 2)
248
249
250    def test_literal_escape_hex(self):
251        """Test whether Lexer.lex_string() can tokenize interpreted string
252        literal with a hexadecimal escape sequence."""
253
254        end, lit = Lexer.lex_string('"\\x00"', 0)
255        self.assertEqual(end, 6)
256        self.assertEqual(lit, '\0')
257
258        end, lit = Lexer.lex_string('"\\xff"', 0)
259        self.assertEqual(end, 6)
260        self.assertEqual(lit, '\xff')
261
262        tests = [
263            '"\\x',
264            '"\\x"  ',
265            '"\\x0" ',
266            '"\\xg" ',
267            '"\\x0g"',
268        ]
269
270        for test in tests:
271            with self.assertRaises(LexerError) as ctx:
272                Lexer.lex_string(test, 0)
273            self.assertEqual(ctx.exception.line, 1)
274            self.assertEqual(ctx.exception.column, 2)
275
276
277    def test_literal_escape_little_u(self):
278        """Test whether Lexer.lex_string() can tokenize interpreted string
279        literal with a little u escape sequence."""
280
281        end, lit = Lexer.lex_string('"\\u0000"', 0)
282        self.assertEqual(end, 8)
283        self.assertEqual(lit, '\0')
284
285        end, lit = Lexer.lex_string('"\\uffff"', 0)
286        self.assertEqual(end, 8)
287        self.assertEqual(lit, py3_str(u'\uffff'))
288
289        tests = [
290            '"\\u',
291            '"\\u"    ',
292            '"\\u0"   ',
293            '"\\ug"   ',
294            '"\\u0g"  ',
295            '"\\u00g" ',
296            '"\\u000g"',
297        ]
298
299        for test in tests:
300            with self.assertRaises(LexerError) as ctx:
301                Lexer.lex_string(test, 0)
302            self.assertEqual(ctx.exception.line, 1)
303            self.assertEqual(ctx.exception.column, 2)
304
305
306    def test_literal_escape_big_u(self):
307        """Test whether Lexer.lex_string() can tokenize interpreted string
308        literal with a big u escape sequence."""
309
310        end, lit = Lexer.lex_string('"\\U00000000"', 0)
311        self.assertEqual(end, 12)
312        self.assertEqual(lit, '\0')
313
314        end, lit = Lexer.lex_string('"\\U0001ffff"', 0)
315        self.assertEqual(end, 12)
316        self.assertEqual(lit, py3_str(u'\U0001ffff'))
317
318        tests = [
319            '"\\U',
320            '"\\U"        ',
321            '"\\U0"       ',
322            '"\\Ug"       ',
323            '"\\U0g"      ',
324            '"\\U00g"     ',
325            '"\\U000g"    ',
326            '"\\U000g"    ',
327            '"\\U0000g"   ',
328            '"\\U00000g"  ',
329            '"\\U000000g" ',
330            '"\\U0000000g"',
331        ]
332
333        for test in tests:
334            with self.assertRaises(LexerError) as ctx:
335                Lexer.lex_string(test, 0)
336            self.assertEqual(ctx.exception.line, 1)
337            self.assertEqual(ctx.exception.column, 2)
338
339
340#------------------------------------------------------------------------------
341# Lexer.lex()
342#------------------------------------------------------------------------------
343
344class LexTest(unittest.TestCase):
345    """Unit tests for the Lexer.lex() method."""
346
347    def test_lex_char(self):
348        """Test whether Lexer.lex() can lex a character."""
349
350        token, end, lit = Lexer.lex('(', 0)
351        self.assertEqual(token, Token.LPAREN)
352        self.assertEqual(end, 1)
353        self.assertEqual(lit, None)
354
355        token, end, lit = Lexer.lex(')', 0)
356        self.assertEqual(token, Token.RPAREN)
357        self.assertEqual(end, 1)
358        self.assertEqual(lit, None)
359
360        token, end, lit = Lexer.lex('[', 0)
361        self.assertEqual(token, Token.LBRACKET)
362        self.assertEqual(end, 1)
363        self.assertEqual(lit, None)
364
365        token, end, lit = Lexer.lex(']', 0)
366        self.assertEqual(token, Token.RBRACKET)
367        self.assertEqual(end, 1)
368        self.assertEqual(lit, None)
369
370        token, end, lit = Lexer.lex('{', 0)
371        self.assertEqual(token, Token.LBRACE)
372        self.assertEqual(end, 1)
373        self.assertEqual(lit, None)
374
375        token, end, lit = Lexer.lex('}', 0)
376        self.assertEqual(token, Token.RBRACE)
377        self.assertEqual(end, 1)
378        self.assertEqual(lit, None)
379
380        token, end, lit = Lexer.lex(':', 0)
381        self.assertEqual(token, Token.COLON)
382        self.assertEqual(end, 1)
383        self.assertEqual(lit, None)
384
385        token, end, lit = Lexer.lex('=', 0)
386        self.assertEqual(token, Token.ASSIGN)
387        self.assertEqual(end, 1)
388        self.assertEqual(lit, None)
389
390        token, end, lit = Lexer.lex('+', 0)
391        self.assertEqual(token, Token.PLUS)
392        self.assertEqual(end, 1)
393        self.assertEqual(lit, None)
394
395        token, end, lit = Lexer.lex(',', 0)
396        self.assertEqual(token, Token.COMMA)
397        self.assertEqual(end, 1)
398        self.assertEqual(lit, None)
399
400
401    def test_lex_assign_plus(self):
402        """Test whether Lexer.lex() can lex `+=` without problems."""
403
404        token, end, lit = Lexer.lex('+=', 0)
405        self.assertEqual(token, Token.ASSIGNPLUS)
406        self.assertEqual(end, 2)
407        self.assertEqual(lit, None)
408
409
410    def test_lex_space(self):
411        """Test whether Lexer.lex() can lex whitespaces."""
412
413        token, end, lit = Lexer.lex(' ', 0)
414        self.assertEqual(token, Token.SPACE)
415        self.assertEqual(end, 1)
416        self.assertEqual(lit, None)
417
418        token, end, lit = Lexer.lex('\t', 0)
419        self.assertEqual(token, Token.SPACE)
420        self.assertEqual(end, 1)
421        self.assertEqual(lit, None)
422
423        token, end, lit = Lexer.lex('\r', 0)
424        self.assertEqual(token, Token.SPACE)
425        self.assertEqual(end, 1)
426        self.assertEqual(lit, None)
427
428        token, end, lit = Lexer.lex('\n', 0)
429        self.assertEqual(token, Token.SPACE)
430        self.assertEqual(end, 1)
431        self.assertEqual(lit, None)
432
433        token, end, lit = Lexer.lex('\n \r\t\n', 0)
434        self.assertEqual(token, Token.SPACE)
435        self.assertEqual(end, 5)
436        self.assertEqual(lit, None)
437
438
439    def test_lex_comment(self):
440        """Test whether Lexer.lex() can lex comments."""
441
442        token, end, lit = Lexer.lex('// abcd', 0)
443        self.assertEqual(token, Token.COMMENT)
444        self.assertEqual(end, 7)
445        self.assertEqual(lit, None)
446
447        token, end, lit = Lexer.lex('// abcd\nnext', 0)
448        self.assertEqual(token, Token.COMMENT)
449        self.assertEqual(end, 7)
450        self.assertEqual(lit, None)
451
452        token, end, lit = Lexer.lex('/*a\nb*/', 0)
453        self.assertEqual(token, Token.COMMENT)
454        self.assertEqual(end, 7)
455        self.assertEqual(lit, None)
456
457        token, end, lit = Lexer.lex('/*a\n *b*/', 0)
458        self.assertEqual(token, Token.COMMENT)
459        self.assertEqual(end, 9)
460        self.assertEqual(lit, None)
461
462        token, end, lit = Lexer.lex('/*a**b*/', 0)
463        self.assertEqual(token, Token.COMMENT)
464        self.assertEqual(end, 8)
465        self.assertEqual(lit, None)
466
467        token, end, lit = Lexer.lex('/*a***b*/', 0)
468        self.assertEqual(token, Token.COMMENT)
469        self.assertEqual(end, 9)
470        self.assertEqual(lit, None)
471
472        token, end, lit = Lexer.lex('/**/', 0)
473        self.assertEqual(token, Token.COMMENT)
474        self.assertEqual(end, 4)
475        self.assertEqual(lit, None)
476
477        token, end, lit = Lexer.lex('/***/', 0)
478        self.assertEqual(token, Token.COMMENT)
479        self.assertEqual(end, 5)
480        self.assertEqual(lit, None)
481
482        token, end, lit = Lexer.lex('/**a*/', 0)
483        self.assertEqual(token, Token.COMMENT)
484        self.assertEqual(end, 6)
485        self.assertEqual(lit, None)
486
487        token, end, lit = Lexer.lex('/*a**/', 0)
488        self.assertEqual(token, Token.COMMENT)
489        self.assertEqual(end, 6)
490        self.assertEqual(lit, None)
491
492        token, end, lit = Lexer.lex('/***a*/', 0)
493        self.assertEqual(token, Token.COMMENT)
494        self.assertEqual(end, 7)
495        self.assertEqual(lit, None)
496
497        token, end, lit = Lexer.lex('/*a***/', 0)
498        self.assertEqual(token, Token.COMMENT)
499        self.assertEqual(end, 7)
500        self.assertEqual(lit, None)
501
502
503    def test_lex_string(self):
504        """Test whether Lexer.lex() can lex a string."""
505
506        token, end, lit = Lexer.lex('"a"', 0)
507        self.assertEqual(token, Token.STRING)
508        self.assertEqual(end, 3)
509        self.assertEqual(lit, 'a')
510
511        token, end, lit = Lexer.lex('`a\nb`', 0)
512        self.assertEqual(token, Token.STRING)
513        self.assertEqual(end, 5)
514        self.assertEqual(lit, 'a\nb')
515
516
517    def test_lex_ident(self):
518        """Test whether Lexer.lex() can lex an identifier."""
519
520        token, end, lit = Lexer.lex('ident', 0)
521        self.assertEqual(token, Token.IDENT)
522        self.assertEqual(end, 5)
523        self.assertEqual(lit, 'ident')
524
525
526    def test_lex_offset(self):
527        """Test the offset argument of Lexer.lex()."""
528
529        token, end, lit = Lexer.lex('a "b"', 0)
530        self.assertEqual(token, Token.IDENT)
531        self.assertEqual(end, 1)
532        self.assertEqual(lit, 'a')
533
534        token, end, lit = Lexer.lex('a "b"', end)
535        self.assertEqual(token, Token.SPACE)
536        self.assertEqual(end, 2)
537        self.assertEqual(lit, None)
538
539        token, end, lit = Lexer.lex('a "b"', end)
540        self.assertEqual(token, Token.STRING)
541        self.assertEqual(end, 5)
542        self.assertEqual(lit, 'b')
543
544
545#------------------------------------------------------------------------------
546# Lexer class test
547#------------------------------------------------------------------------------
548
549class LexerTest(unittest.TestCase):
550    """Unit tests for the Lexer class."""
551
552    def test_lexer(self):
553        """Test token, start, end, literal, and consume()."""
554
555        lexer = Lexer('a b //a\n "c"', 0)
556
557        self.assertEqual(lexer.start, 0)
558        self.assertEqual(lexer.end, 1)
559        self.assertEqual(lexer.token, Token.IDENT)
560        self.assertEqual(lexer.literal, 'a')
561        lexer.consume(Token.IDENT)
562
563        self.assertEqual(lexer.start, 2)
564        self.assertEqual(lexer.end, 3)
565        self.assertEqual(lexer.token, Token.IDENT)
566        self.assertEqual(lexer.literal, 'b')
567        lexer.consume(Token.IDENT)
568
569        self.assertEqual(lexer.start, 9)
570        self.assertEqual(lexer.end, 12)
571        self.assertEqual(lexer.token, Token.STRING)
572        self.assertEqual(lexer.literal, 'c')
573        lexer.consume(Token.STRING)
574
575        self.assertEqual(lexer.start, 12)
576        self.assertEqual(lexer.end, 12)
577        self.assertEqual(lexer.token, Token.EOF)
578        self.assertEqual(lexer.literal, None)
579
580
581    def test_lexer_offset(self):
582        """Test the offset argument of Lexer.__init__()."""
583
584        lexer = Lexer('a b', 2)
585
586        self.assertEqual(lexer.start, 2)
587        self.assertEqual(lexer.end, 3)
588        self.assertEqual(lexer.token, Token.IDENT)
589        self.assertEqual(lexer.literal, 'b')
590        lexer.consume(Token.IDENT)
591
592        self.assertEqual(lexer.start, 3)
593        self.assertEqual(lexer.end, 3)
594        self.assertEqual(lexer.token, Token.EOF)
595        self.assertEqual(lexer.literal, None)
596        lexer.consume(Token.EOF)
597
598
599    def test_lexer_path(self):
600        """Test the path attribute of the Lexer object."""
601        lexer = Lexer('content', path='test_path')
602        self.assertEqual(lexer.path, 'test_path')
603
604
605if __name__ == '__main__':
606    unittest.main()
607