1# Copyright 2023 The Khronos Group Inc.
2#
3# SPDX-License-Identifier: Apache-2.0
4
5"""Utilities for automatic transformation of spec sources.  Most of the logic
6has to do with detecting asciidoc markup or block types that should not be
7transformed (tables, code) and ignoring them.  It is very likely there are many
8asciidoc constructs not yet accounted for in the script, our usage of asciidoc
9markup is intentionally somewhat limited.
10"""
11
12import re
13import sys
14from reflib import logDiag, logWarn
15
16# Vulkan-specific - will consolidate into scripts/ like OpenXR soon
17sys.path.insert(0, 'xml')
18
19from apiconventions import APIConventions
20conventions = APIConventions()
21
22# Start of an asciidoctor conditional
23#   ifdef::
24#   ifndef::
25conditionalStart = re.compile(r'^(ifdef|ifndef)::')
26
27# Markup that always ends a paragraph
28#   empty line or whitespace
29#   [block options]
30#   [[anchor]]
31#   //                  comment
32#   <<<<                page break
33#   :attribute-setting
34#   macro-directive::terms
35#   +                   standalone list item continuation
36#   label::             labelled list - label must be standalone
37endPara = re.compile(r'^( *|\[.*\]|//.*|<<<<|:.*|[a-z]+::.*|\+|.*::)$')
38
39# Special case of markup ending a paragraph, used to track the current
40# command/structure. This allows for either OpenXR or Vulkan API path
41# conventions. Nominally it should use the file suffix defined by the API
42# conventions (conventions.file_suffix), except that XR uses '.txt' for
43# generated API include files, not '.adoc' like its other includes.
44includePat = re.compile(
45        r'include::(?P<directory_traverse>((../){1,4}|\{generated\}/)(generated/)?)(?P<generated_type>[\w]+)/(?P<category>\w+)/(?P<entity_name>[^./]+).adoc[\[][\]]')
46
47# Markup that is OK in a contiguous paragraph but otherwise passed through
48#   .anything (except .., which indicates a literal block)
49#   === Section Titles
50#   image::path_to_image[attributes]  (apparently a single colon is OK but less idiomatic)
51endParaContinue = re.compile(r'^(\.[^.].*|=+ .*|image:.*\[.*\])$')
52
53# Markup for block delimiters whose contents *should* be reformatted
54#   --   (exactly two)  (open block)
55#   **** (4 or more)    (sidebar block)
56#   ==== (4 or more)    (example block)
57#   ____ (4 or more)    (quote block)
58blockTransform = re.compile(r'^(--|[*=_]{4,})$')
59
60# Fake block delimiters for "common" VU statements
61blockCommonTransform = '// Common Valid Usage\n'
62
63# Markup for block delimiters whose contents should *not* be transformed
64#   |=== (3 or more)  (table)
65#   ```  (3 or more)  (listing block)
66#   //// (4 or more)  (comment block)
67#   ---- (4 or more)  (listing block)
68#   .... (4 or more)  (literal block)
69#   ++++ (4 or more)  (passthrough block)
70blockPassthrough = re.compile(r'^(\|={3,}|[`]{3}|[\-+./]{4,})$')
71
72# Markup for introducing lists (hanging paragraphs)
73#   * bullet
74#     ** bullet
75#     -- bullet
76#   . bullet
77#   :: bullet (no longer supported by asciidoctor 2)
78#   {empty}:: bullet
79#   1. list item
80#   <1> source listing callout
81beginBullet = re.compile(r'^ *([-*.]+|\{empty\}::|::|[0-9]+[.]|<([0-9]+)>) ')
82
83class TransformState:
84    """State machine for transforming documents.
85
86    Represents the state of the transform operation"""
87    def __init__(self):
88        self.blockStack = [ None ]
89        """The last element is a line with the asciidoc block delimiter that is
90        currently in effect, such as '--', '----', '****', '====', or '++++'.
91        This affects whether or not the block contents should be transformed."""
92        self.transformStack = [ True ]
93        """The last element is True or False if the current blockStack contents
94        should be transformed."""
95        self.vuStack = [ False ]
96        """the last element is True or False if the current blockStack contents
97        are an explicit Valid Usage block."""
98
99        self.para = []
100        """list of lines in the paragraph being accumulated.
101        When this is non-empty, there is a current paragraph."""
102
103        self.lastTitle = False
104        """true if the previous line was a document title line
105        (e.g. :leveloffset: 0 - no attempt to track changes to this is made)."""
106
107        self.leadIndent = 0
108        """indent level (in spaces) of the first line of a paragraph."""
109
110        self.hangIndent = 0
111        """indent level of the remaining lines of a paragraph."""
112
113        self.lineNumber = 0
114        """line number being read from the input file."""
115
116        self.defaultApiName = '{refpage}'
117        self.apiName = self.defaultApiName
118        """String name of an API structure or command for VUID tag generation,
119        or {refpage} if one has not been included in this file yet."""
120
121    def incrLineNumber(self):
122        self.lineNumber = self.lineNumber + 1
123
124    def isOpenBlockDelimiter(self, line):
125        """Returns True if line is an open block delimiter.
126           This does not and should not match the listing block delimiter,
127           which is used inside refpage blocks both as a listing block and,
128           via an extension, as a nested open block."""
129        return line.rstrip() == '--'
130
131    def resetPara(self):
132        """Reset the paragraph, including its indentation level"""
133        self.para = []
134        self.leadIndent = 0
135        self.hangIndent = 0
136
137    def endBlock(self, line, transform, vuBlock):
138        """If beginning a block, tag whether or not to transform the contents.
139
140        vuBlock is True if the previous line indicates this is a Valid Usage
141        block."""
142        if self.blockStack[-1] == line:
143            logDiag('endBlock line', self.lineNumber,
144                    ': popping block end depth:', len(self.blockStack),
145                    ':', line, end='')
146
147            # Reset apiName at the end of an open block.
148            # Open blocks cannot be nested (at present), so this is safe.
149            if self.isOpenBlockDelimiter(line):
150                logDiag('reset apiName to empty at line', self.lineNumber)
151                self.apiName = self.defaultApiName
152            else:
153                logDiag('NOT resetting apiName to default at line',
154                        self.lineNumber)
155
156            self.blockStack.pop()
157            self.transformStack.pop()
158            self.vuStack.pop()
159        else:
160            # Start a block
161            self.blockStack.append(line)
162            self.transformStack.append(transform)
163            self.vuStack.append(vuBlock)
164
165            logDiag('endBlock transform =', transform, ' line', self.lineNumber,
166                    ': pushing block start depth', len(self.blockStack),
167                    ':', line, end='')
168
169    def addLine(self, line, indent):
170        """Add a line to the current paragraph"""
171        if self.para == []:
172            # Begin a new paragraph
173            self.para = [line]
174            self.leadIndent = indent
175            self.hangIndent = indent
176        else:
177            # Add a line to a paragraph. Increase the hanging indentation
178            # level - once.
179            if self.hangIndent == self.leadIndent:
180                self.hangIndent = indent
181            self.para.append(line)
182
183
184class TransformCallbackState:
185    """State given to the transformer callback object, derived from
186    TransformState."""
187    def __init__(self, state):
188        self.isVU = state.vuStack[-1] if len(state.vuStack) > 0 else False
189        """Whether this paragraph is a VU."""
190
191        self.apiName = state.apiName
192        """String name of an API structure or command this paragraph belongs
193        to."""
194
195        self.leadIndent = state.leadIndent
196        """indent level (in spaces) of the first line of a paragraph."""
197
198        self.hangIndent = state.hangIndent
199        """indent level of the remaining lines of a paragraph."""
200
201        self.lineNumber = state.lineNumber
202        """line number being read from the input file."""
203
204
205class DocTransformer:
206    """A transformer that recursively goes over all spec files under a path.
207
208    The transformer goes over all spec files under a path and does some basic
209    parsing.  In particular, it tracks which section the current text belongs
210    to, whether it references a VU, etc and processes them in 'paragraph'
211    granularity.
212    The transformer takes a callback object with the following methods:
213
214    - transformParagraph: Called when a paragraph is parsed.  The paragraph
215      along with some information (such as whether it is a VU) is passed.  The
216      function may transform the paragraph as necessary.
217    - onEmbeddedVUConditional: Called when an embedded VU conditional is
218      encountered.
219    """
220    def __init__(self,
221                 filename,
222                 outfile,
223                 callback):
224        self.filename = filename
225        """base name of file being read from."""
226
227        self.outfile = outfile
228        """file handle to write to."""
229
230        self.state = TransformState()
231        """State of transformation"""
232
233        self.callback = callback
234        """The transformation callback object"""
235
236    def printLines(self, lines):
237        """Print an array of lines with newlines already present"""
238        if len(lines) > 0:
239            logDiag(':: printLines:', len(lines), 'lines: ', lines[0], end='')
240
241        if self.outfile is not None:
242            for line in lines:
243                print(line, file=self.outfile, end='')
244
245    def emitPara(self):
246        """Emit a paragraph, possibly transforming it depending on the block
247        context.
248
249        Resets the paragraph accumulator."""
250        if self.state.para != []:
251            transformedPara = self.state.para
252
253            if self.state.transformStack[-1]:
254                callbackState = TransformCallbackState(self.state)
255
256                transformedPara = self.callback.transformParagraph(
257                        self.state.para,
258                        callbackState)
259
260            self.printLines(transformedPara)
261
262        self.state.resetPara()
263
264    def endPara(self, line):
265        """'line' ends a paragraph and should itself be emitted.
266        line may be None to indicate EOF or other exception."""
267        logDiag('endPara line', self.state.lineNumber, ': emitting paragraph')
268
269        # Emit current paragraph, this line, and reset tracker
270        self.emitPara()
271
272        if line:
273            self.printLines([line])
274
275    def endParaContinue(self, line):
276        """'line' ends a paragraph (unless there is already a paragraph being
277        accumulated, e.g. len(para) > 0 - currently not implemented)"""
278        self.endPara(line)
279
280    def endBlock(self, line, transform = False, vuBlock = False):
281        """'line' begins or ends a block.
282
283        If beginning a block, tag whether or not to transform the contents.
284
285        vuBlock is True if the previous line indicates this is a Valid Usage
286        block."""
287        self.endPara(line)
288        self.state.endBlock(line, transform, vuBlock)
289
290    def endParaBlockTransform(self, line, vuBlock):
291        """'line' begins or ends a block. The paragraphs in the block *should* be
292        reformatted (e.g. a NOTE)."""
293        self.endBlock(line, transform = True, vuBlock = vuBlock)
294
295    def endParaBlockPassthrough(self, line):
296        """'line' begins or ends a block. The paragraphs in the block should
297        *not* be reformatted (e.g. a code listing)."""
298        self.endBlock(line, transform = False)
299
300    def addLine(self, line):
301        """'line' starts or continues a paragraph.
302
303        Paragraphs may have "hanging indent", e.g.
304
305        ```
306          * Bullet point...
307            ... continued
308        ```
309
310        In this case, when the higher indentation level ends, so does the
311        paragraph."""
312        logDiag('addLine line', self.state.lineNumber, ':', line, end='')
313
314        # See https://stackoverflow.com/questions/13648813/what-is-the-pythonic-way-to-count-the-leading-spaces-in-a-string
315        indent = len(line) - len(line.lstrip())
316
317        # A hanging paragraph ends due to a less-indented line.
318        if self.state.para != [] and indent < self.state.hangIndent:
319            logDiag('addLine: line reduces indentation, emit paragraph')
320            self.emitPara()
321
322        # A bullet point (or something that looks like one) always ends the
323        # current paragraph.
324        if beginBullet.match(line):
325            logDiag('addLine: line matches beginBullet, emit paragraph')
326            self.emitPara()
327
328        self.state.addLine(line, indent)
329
330    def apiMatch(self, oldname, newname):
331        """Returns whether oldname and newname match, up to an API suffix.
332           This should use the API map instead of this heuristic, since aliases
333           like VkPhysicalDeviceVariablePointerFeaturesKHR ->
334           VkPhysicalDeviceVariablePointersFeatures are not recognized."""
335        upper = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
336        return oldname.rstrip(upper) == newname.rstrip(upper)
337
338    def transformFile(self, lines):
339        """Transform lines, and possibly output to to the given file."""
340
341        for line in lines:
342            self.state.incrLineNumber()
343
344            # Is this a title line (leading '= ' followed by text)?
345            thisTitle = False
346
347            # The logic here is broken. If we are in a non-transformable block and
348            # this line *does not* end the block, it should always be
349            # accumulated.
350
351            # Test for a blockCommonTransform delimiter comment first, to avoid
352            # treating it solely as a end-Paragraph marker comment.
353            if line == blockCommonTransform:
354                # Starting or ending a pseudo-block for "common" VU statements.
355                self.endParaBlockTransform(line, vuBlock = True)
356
357            elif blockTransform.match(line):
358                # Starting or ending a block whose contents may be transformed.
359                # Blocks cannot be nested.
360
361                # Is this is an explicit Valid Usage block?
362                vuBlock = (self.state.lineNumber > 1 and
363                           lines[self.state.lineNumber-2] == '.Valid Usage\n')
364
365                self.endParaBlockTransform(line, vuBlock)
366
367            elif endPara.match(line):
368                # Ending a paragraph. Emit the current paragraph, if any, and
369                # prepare to begin a new paragraph.
370
371                self.endPara(line)
372
373                # If this is an include:: line starting the definition of a
374                # structure or command, track that for use in VUID generation.
375
376                matches = includePat.search(line)
377                if matches is not None:
378                    generated_type = matches.group('generated_type')
379                    include_type = matches.group('category')
380                    if generated_type == 'api' and include_type in ('protos', 'structs', 'funcpointers'):
381                        apiName = matches.group('entity_name')
382                        if self.state.apiName != self.state.defaultApiName:
383                            # This happens when there are multiple API include
384                            # lines in a single block. The style guideline is to
385                            # always place the API which others are promoted to
386                            # first. In virtually all cases, the promoted API
387                            # will differ solely in the vendor suffix (or
388                            # absence of it), which is benign.
389                            if not self.apiMatch(self.state.apiName, apiName):
390                                logDiag(f'Promoted API name mismatch at line {self.state.lineNumber}: {apiName} does not match self.state.apiName (this is OK if it is just a spelling alias)')
391                        else:
392                            self.state.apiName = apiName
393
394            elif endParaContinue.match(line):
395                # For now, always just end the paragraph.
396                # Could check see if len(para) > 0 to accumulate.
397
398                self.endParaContinue(line)
399
400                # If it is a title line, track that
401                if line[0:2] == '= ':
402                    thisTitle = True
403
404            elif blockPassthrough.match(line):
405                # Starting or ending a block whose contents must not be
406                # transformed.  These are tables, etc. Blocks cannot be nested.
407                # Note that the use of a listing block masquerading as an
408                # open block, via an extension, will not be formatted even
409                # though it should be.
410                # Fixing this would require looking at the previous line
411                # state for the '[open]' tag, and there are so few cases of
412                # this in the spec markup that it is not worth the trouble.
413
414                self.endParaBlockPassthrough(line)
415            elif self.state.lastTitle:
416                # The previous line was a document title line. This line
417                # is the author / credits line and must not be transformed.
418
419                self.endPara(line)
420            else:
421                # Just accumulate a line to the current paragraph. Watch out for
422                # hanging indents / bullet-points and track that indent level.
423
424                self.addLine(line)
425
426                # Commented out now that VU extractor supports this, but may
427                # need to refactor through a conventions object enable if
428                # OpenXR still needs this.
429
430                # This test looks for disallowed conditionals inside Valid Usage
431                # blocks, by checking if (a) this line does not start a new VU
432                # (bullet point) and (b) the previous line starts an asciidoctor
433                # conditional (ifdef:: or ifndef::).
434                # if (self.state.vuStack[-1]
435                #     and not beginBullet.match(line)
436                #     and conditionalStart.match(lines[self.state.lineNumber-2])):
437                #        self.callback.onEmbeddedVUConditional(self.state)
438
439            self.state.lastTitle = thisTitle
440
441        # Cleanup at end of file
442        self.endPara(None)
443
444        # Check for sensible block nesting
445        if len(self.state.blockStack) > 1:
446            logWarn('file', self.filename,
447                    'mismatched asciidoc block delimiters at EOF:',
448                    self.state.blockStack[-1])
449
450