1#!/usr/bin/python3
2#
3# Copyright 2016-2023 The Khronos Group Inc.
4#
5# SPDX-License-Identifier: Apache-2.0
6
7# Utility functions for automatic ref page generation and other script stuff
8
9import io
10import re
11import sys
12import subprocess
13
14# global errFile, warnFile, diagFile
15
16errFile = sys.stderr
17warnFile = sys.stdout
18diagFile = None
19logSourcefile = None
20logProcname = None
21logLine = None
22
23def unescapeQuotes(s):
24    """Remove \' escape sequences in a string (refpage description)"""
25    return s.replace('\\\'', '\'')
26
27def write(*args, **kwargs ):
28    file = kwargs.pop('file',sys.stdout)
29    end = kwargs.pop('end','\n')
30    file.write(' '.join(str(arg) for arg in args))
31    file.write(end)
32
33def setLogSourcefile(filename):
34    """Metadata which may be printed (if not None) for diagnostic messages"""
35    global logSourcefile
36    logSourcefile = filename
37
38def setLogProcname(procname):
39    global logProcname
40    logProcname = procname
41
42def setLogLine(line):
43    global logLine
44    logLine = line
45
46def logHeader(severity):
47    """Generate prefix for a diagnostic line using metadata and severity"""
48    global logSourcefile, logProcname, logLine
49
50    msg = severity + ': '
51    if logProcname:
52        msg = msg + ' in ' + logProcname
53    if logSourcefile:
54        msg = msg + ' for ' + logSourcefile
55    if logLine:
56        msg = msg + ' line ' + str(logLine)
57    return msg + ' '
58
59def setLogFile(setDiag, setWarn, filename):
60    """Set the file handle to log either or both warnings and diagnostics to.
61
62    - setDiag and setWarn are True if the corresponding handle is to be set.
63    - filename is None for no logging, '-' for stdout, or a pathname."""
64    global diagFile, warnFile
65
66    if filename is None:
67        return
68
69    if filename == '-':
70        fp = sys.stdout
71    else:
72        fp = open(filename, 'w', encoding='utf-8')
73
74    if setDiag:
75        diagFile = fp
76    if setWarn:
77        warnFile = fp
78
79def logDiag(*args, **kwargs):
80    file = kwargs.pop('file', diagFile)
81    end = kwargs.pop('end','\n')
82    if file is not None:
83        file.write(logHeader('DIAG') + ' '.join(str(arg) for arg in args))
84        file.write(end)
85
86def logWarn(*args, **kwargs):
87    file = kwargs.pop('file', warnFile)
88    end = kwargs.pop('end','\n')
89    if file is not None:
90        file.write(logHeader('WARN') + ' '.join(str(arg) for arg in args))
91        file.write(end)
92
93def logErr(*args, **kwargs):
94    file = kwargs.pop('file', errFile)
95    end = kwargs.pop('end','\n')
96
97    strfile = io.StringIO()
98    strfile.write(logHeader('ERROR') + ' '.join(str(arg) for arg in args))
99    strfile.write(end)
100
101    if file is not None:
102        file.write(strfile.getvalue())
103    raise UserWarning(strfile.getvalue())
104
105def isempty(s):
106    """Return True if s is nothing but white space, False otherwise"""
107    return len(''.join(s.split())) == 0
108
109class pageInfo:
110    """Information about a ref page relative to the file it is extracted from."""
111    def __init__(self):
112        self.extractPage = True
113        """True if page should be extracted"""
114
115        self.Warning  = None
116        """string warning if page is suboptimal or cannot be generated"""
117
118        self.embed    = False
119        """False or the name of the ref page this include is embedded within"""
120
121        self.type     = None
122        """refpage type attribute - 'structs', 'protos', 'freeform', etc."""
123
124        self.name     = None
125        """struct/proto/enumerant/etc. name"""
126
127        self.desc     = None
128        """short description of ref page"""
129
130        self.begin    = None
131        """index of first line of the page (heuristic or // refBegin)"""
132
133        self.include  = None
134        """index of include:: line defining the page"""
135
136        self.param    = None
137        """index of first line of parameter/member definitions"""
138
139        self.body     = None
140        """index of first line of body text"""
141
142        self.validity = None
143        """index of validity include"""
144
145        self.end      = None
146        """index of last line of the page (heuristic validity include, or // refEnd)"""
147
148        self.alias    = ''
149        """aliases of this name, if supplied, or ''"""
150
151        self.refs     = ''
152        """cross-references on // refEnd line, if supplied"""
153
154        self.spec     = None
155        """'spec' attribute in refpage open block, if supplied, or None for the default ('api') type"""
156
157        self.anchor   = None
158        """'anchor' attribute in refpage open block, if supplied, or inferred to be the same as the 'name'"""
159
160def printPageInfoField(desc, line, file):
161    """Print a single field of a pageInfo struct, possibly None.
162
163    - desc - string description of field
164    - line - field value or None
165    - file - indexed by line"""
166    if line is not None:
167        logDiag(desc + ':', line + 1, '\t-> ', file[line], end='')
168    else:
169        logDiag(desc + ':', line)
170
171def printPageInfo(pi, file):
172    """Print out fields of a pageInfo struct
173
174    - pi - pageInfo
175    - file - indexed by pageInfo"""
176    logDiag('TYPE:   ', pi.type)
177    logDiag('NAME:   ', pi.name)
178    logDiag('WARNING:', pi.Warning)
179    logDiag('EXTRACT:', pi.extractPage)
180    logDiag('EMBED:  ', pi.embed)
181    logDiag('DESC:   ', pi.desc)
182    printPageInfoField('BEGIN   ', pi.begin,    file)
183    printPageInfoField('INCLUDE ', pi.include,  file)
184    printPageInfoField('PARAM   ', pi.param,    file)
185    printPageInfoField('BODY    ', pi.body,     file)
186    printPageInfoField('VALIDITY', pi.validity, file)
187    printPageInfoField('END     ', pi.end,      file)
188    logDiag('REFS: "' + pi.refs + '"')
189
190def prevPara(file, line):
191    """Go back one paragraph from the specified line and return the line number
192    of the first line of that paragraph.
193
194    Paragraphs are delimited by blank lines. It is assumed that the
195    current line is the first line of a paragraph.
196
197    - file is an array of strings
198    - line is the starting point (zero-based)"""
199    # Skip over current paragraph
200    while (line >= 0 and not isempty(file[line])):
201        line = line - 1
202    # Skip over white space
203    while (line >= 0 and isempty(file[line])):
204        line = line - 1
205    # Skip to first line of previous paragraph
206    while (line >= 1 and not isempty(file[line-1])):
207        line = line - 1
208    return line
209
210def nextPara(file, line):
211    """Go forward one paragraph from the specified line and return the line
212    number of the first line of that paragraph.
213
214    Paragraphs are delimited by blank lines. It is assumed that the
215    current line is standalone (which is bogus).
216
217    - file is an array of strings
218    - line is the starting point (zero-based)"""
219    maxLine = len(file) - 1
220    # Skip over current paragraph
221    while (line != maxLine and not isempty(file[line])):
222        line = line + 1
223    # Skip over white space
224    while (line != maxLine and isempty(file[line])):
225        line = line + 1
226    return line
227
228def lookupPage(pageMap, name):
229    """Return (creating if needed) the pageInfo entry in pageMap for name"""
230    if name not in pageMap:
231        pi = pageInfo()
232        pi.name = name
233        pageMap[name] = pi
234    else:
235        pi = pageMap[name]
236    return pi
237
238def loadFile(filename):
239    """Load a file into a list of strings. Return the (list, newline_string) or (None, None) on failure"""
240    newline_string = "\n"
241    try:
242        with open(filename, 'rb') as fp:
243            contents = fp.read()
244            if contents.count(b"\r\n") > 1:
245                newline_string = "\r\n"
246
247        with open(filename, 'r', encoding='utf-8') as fp:
248            lines = fp.readlines()
249    except:
250        logWarn('Cannot open file', filename, ':', sys.exc_info()[0])
251        return None, None
252
253    return lines, newline_string
254
255def clampToBlock(line, minline, maxline):
256    """Clamp a line number to be in the range [minline,maxline].
257
258    If the line number is None, just return it.
259    If minline is None, do not clamp to that value."""
260    if line is None:
261        return line
262    if minline and line < minline:
263        return minline
264    if line > maxline:
265        return maxline
266
267    return line
268
269def fixupRefs(pageMap, specFile, file):
270    """Fill in missing fields in pageInfo structures, to the extent they can be
271    inferred.
272
273    - pageMap - dictionary of pageInfo structures
274    - specFile - filename
275    - file - list of strings making up the file, indexed by pageInfo"""
276    # All potential ref pages are now in pageMap. Process them to
277    # identify actual page start/end/description boundaries, if
278    # not already determined from the text.
279    for name in sorted(pageMap.keys()):
280        pi = pageMap[name]
281
282        # # If nothing is found but an include line with no begin, validity,
283        # # or end, this is not intended as a ref page (yet). Set the begin
284        # # line to the include line, so autogeneration can at least
285        # # pull the include out, but mark it not to be extracted.
286        # # Examples include the host sync table includes in
287        # # chapters/fundamentals.adoc and the table of Vk*Flag types in
288        # # appendices/boilerplate.adoc.
289        # if pi.begin is None and pi.validity is None and pi.end is None:
290        #     pi.begin = pi.include
291        #     pi.extractPage = False
292        #     pi.Warning = 'No begin, validity, or end lines identified'
293        #     continue
294
295        # Using open block delimiters, ref pages must *always* have a
296        # defined begin and end. If either is undefined, that is fatal.
297        if pi.begin is None:
298            pi.extractPage = False
299            pi.Warning = 'Can\'t identify begin of ref page open block'
300            continue
301
302        if pi.end is None:
303            pi.extractPage = False
304            pi.Warning = 'Can\'t identify end of ref page open block'
305            continue
306
307        # If there is no description of the page, infer one from the type
308        if pi.desc is None:
309            if pi.type is not None:
310                # pi.desc = pi.type[0:len(pi.type)-1] + ' (no short description available)'
311                pi.Warning = 'No short description available; could infer from the type and name'
312            else:
313                pi.extractPage = False
314                pi.Warning = 'No short description available, cannot infer from the type'
315                continue
316
317        # Try to determine where the parameter and body sections of the page
318        # begin. funcpointer, proto, and struct pages infer the location of
319        # the parameter and body sections. Other pages infer the location of
320        # the body, but have no parameter sections.
321        #
322        # Probably some other types infer this as well - refer to list of
323        # all page types in genRef.py:emitPage()
324        if pi.include is not None:
325            if pi.type in ['funcpointers', 'protos', 'structs']:
326                pi.param = nextPara(file, pi.include)
327                if pi.body is None:
328                    pi.body = nextPara(file, pi.param)
329            else:
330                if pi.body is None:
331                    pi.body = nextPara(file, pi.include)
332        else:
333            pi.Warning = 'Page does not have an API definition include::'
334
335        # It is possible for the inferred param and body lines to run past
336        # the end of block, if, for example, there is no parameter section.
337        pi.param = clampToBlock(pi.param, pi.include, pi.end)
338        pi.body = clampToBlock(pi.body, pi.param, pi.end)
339
340        # We can get to this point with .include, .param, and .validity
341        # all being None, indicating those sections were not found.
342
343        logDiag('fixupRefs: after processing,', pi.name, 'looks like:')
344        printPageInfo(pi, file)
345
346    # Now that all the valid pages have been found, try to make some
347    # inferences about invalid pages.
348    #
349    # If a reference without a .end is entirely inside a valid reference,
350    # then it is intentionally embedded - may want to create an indirect
351    # page that links into the embedding page. This is done by a very
352    # inefficient double loop, but the loop depth is small.
353    for name in sorted(pageMap.keys()):
354        pi = pageMap[name]
355
356        if pi.end is None:
357            for embedName in sorted(pageMap.keys()):
358                logDiag('fixupRefs: comparing', pi.name, 'to', embedName)
359                embed = pageMap[embedName]
360                # Do not check embeddings which are themselves invalid
361                if not embed.extractPage:
362                    logDiag('Skipping check for embedding in:', embed.name)
363                    continue
364                if embed.begin is None or embed.end is None:
365                    logDiag('fixupRefs:', name + ':',
366                            'can\'t compare to unanchored ref:', embed.name,
367                            'in', specFile, 'at line', pi.include )
368                    printPageInfo(pi, file)
369                    printPageInfo(embed, file)
370                # If an embed is found, change the error to a warning
371                elif (pi.include is not None and pi.include >= embed.begin and
372                      pi.include <= embed.end):
373                    logDiag('fixupRefs: Found embed for:', name,
374                            'inside:', embedName,
375                            'in', specFile, 'at line', pi.include )
376                    pi.embed = embed.name
377                    pi.Warning = 'Embedded in definition for ' + embed.name
378                    break
379                else:
380                    logDiag('fixupRefs: No embed match for:', name,
381                            'inside:', embedName, 'in', specFile,
382                            'at line', pi.include)
383
384
385def compatiblePageTypes(refpage_type, pagemap_type):
386    """Returns whether two refpage 'types' (categories) are compatible -
387       this is only true for 'consts' and 'enums' types."""
388
389    constsEnums = [ 'consts', 'enums' ]
390
391    if refpage_type == pagemap_type:
392        return True
393    if refpage_type in constsEnums and pagemap_type in constsEnums:
394        return True
395    return False
396
397# Patterns used to recognize interesting lines in an asciidoc source file.
398# These patterns are only compiled once.
399endifPat   = re.compile(r'^endif::(?P<condition>[\w_+,]+)\[\]')
400beginPat   = re.compile(r'^\[open,(?P<attribs>refpage=.*)\]')
401# attribute key/value pairs of an open block
402attribStr  = r"([a-z]+)='([^'\\]*(?:\\.[^'\\]*)*)'"
403attribPat  = re.compile(attribStr)
404bodyPat    = re.compile(r'^// *refBody')
405errorPat   = re.compile(r'^// *refError')
406
407# This regex transplanted from check_spec_links
408# It looks for either OpenXR or Vulkan generated file conventions, and for
409# the api/validity include (generated_type), protos/struct/etc path
410# (category), and API name (entity_name). It could be put into the API
411# conventions object.
412INCLUDE = re.compile(
413        r'include::(?P<directory_traverse>((../){1,4}|\{generated\}/)(generated/)?)(?P<generated_type>[\w]+)/(?P<category>\w+)/(?P<entity_name>[^./]+).adoc[\[][\]]')
414
415def findRefs(file, filename):
416    """Identify reference pages in a list of strings, returning a dictionary of
417    pageInfo entries for each one found, or None on failure."""
418    setLogSourcefile(filename)
419    setLogProcname('findRefs')
420
421    # To reliably detect the open blocks around reference pages, we must
422    # first detect the '[open,refpage=...]' markup delimiting the block;
423    # skip past the '--' block delimiter on the next line; and identify the
424    # '--' block delimiter closing the page.
425    # This cannot be done solely with pattern matching, and requires state to
426    # track 'inside/outside block'.
427    # When looking for open blocks, possible states are:
428    #   'outside' - outside a block
429    #   'start' - have found the '[open...]' line
430    #   'inside' - have found the following '--' line
431    openBlockState = 'outside'
432
433    # Dictionary of interesting line numbers and strings related to an API
434    # name
435    pageMap = {}
436
437    numLines = len(file)
438    line = 0
439
440    # Track the pageInfo object corresponding to the current open block
441    pi = None
442
443    while (line < numLines):
444        setLogLine(line)
445
446        # Only one of the patterns can possibly match. Add it to
447        # the dictionary for that name.
448
449        # [open,refpage=...] starting a refpage block
450        matches = beginPat.search(file[line])
451        if matches is not None:
452            logDiag('Matched open block pattern')
453            attribs = matches.group('attribs')
454
455            # If the previous open block was not closed, raise an error
456            if openBlockState != 'outside':
457                logErr('Nested open block starting at line', line, 'of',
458                       filename)
459
460            openBlockState = 'start'
461
462            # Parse the block attributes
463            matches = attribPat.findall(attribs)
464
465            # Extract each attribute
466            name = None
467            desc = None
468            refpage_type = None
469            spec_type = None
470            anchor = None
471            alias = None
472            xrefs = None
473
474            for (key,value) in matches:
475                logDiag('got attribute', key, '=', value)
476                if key == 'refpage':
477                    name = value
478                elif key == 'desc':
479                    desc = unescapeQuotes(value)
480                elif key == 'type':
481                    refpage_type = value
482                elif key == 'spec':
483                    spec_type = value
484                elif key == 'anchor':
485                    anchor = value
486                elif key == 'alias':
487                    alias = value
488                elif key == 'xrefs':
489                    xrefs = value
490                else:
491                    logWarn('unknown open block attribute:', key)
492
493            if name is None or desc is None or refpage_type is None:
494                logWarn('missing one or more required open block attributes:'
495                        'refpage, desc, or type')
496                # Leave pi is None so open block delimiters are ignored
497            else:
498                pi = lookupPage(pageMap, name)
499                pi.desc = desc
500                # Must match later type definitions in interface/validity includes
501                pi.type = refpage_type
502                pi.spec = spec_type
503                pi.anchor = anchor
504                if alias:
505                    pi.alias = alias
506                if xrefs:
507                    pi.refs = xrefs
508                logDiag('open block for', name, 'added DESC =', desc,
509                        'TYPE =', refpage_type, 'ALIAS =', alias,
510                        'XREFS =', xrefs, 'SPEC =', spec_type,
511                        'ANCHOR =', anchor)
512
513            line = line + 1
514            continue
515
516        # '--' starting or ending and open block
517        if file[line].rstrip() == '--':
518            if openBlockState == 'outside':
519                # Only refpage open blocks should use -- delimiters
520                logWarn('Unexpected double-dash block delimiters')
521            elif openBlockState == 'start':
522                # -- delimiter following [open,refpage=...]
523                openBlockState = 'inside'
524
525                if pi is None:
526                    logWarn('no pageInfo available for opening -- delimiter')
527                else:
528                    pi.begin = line + 1
529                    logDiag('opening -- delimiter: added BEGIN =', pi.begin)
530            elif openBlockState == 'inside':
531                # -- delimiter ending an open block
532                if pi is None:
533                    logWarn('no pageInfo available for closing -- delimiter')
534                else:
535                    pi.end = line - 1
536                    logDiag('closing -- delimiter: added END =', pi.end)
537
538                openBlockState = 'outside'
539                pi = None
540            else:
541                logWarn('unknown openBlockState:', openBlockState)
542
543            line = line + 1
544            continue
545
546        matches = INCLUDE.search(file[line])
547        if matches is not None:
548            # Something got included, not sure what yet.
549            gen_type = matches.group('generated_type')
550            refpage_type = matches.group('category')
551            name = matches.group('entity_name')
552
553            # This will never match in OpenCL
554            if gen_type == 'validity':
555                logDiag('Matched validity pattern')
556                if pi is not None:
557                    if pi.type and not compatiblePageTypes(refpage_type, pi.type):
558                        logWarn('ERROR: pageMap[' + name + '] type:',
559                                pi.type, 'does not match type:', refpage_type)
560                    pi.type = refpage_type
561                    pi.validity = line
562                    logDiag('added TYPE =', pi.type, 'VALIDITY =', pi.validity)
563                else:
564                    logWarn('validity include:: line NOT inside block')
565
566                line = line + 1
567                continue
568
569            if gen_type == 'api':
570                logDiag('Matched include pattern')
571                if pi is not None:
572                    if pi.include is not None:
573                        logDiag('found multiple includes for this block')
574                    if pi.type and not compatiblePageTypes(refpage_type, pi.type):
575                        logWarn('ERROR: pageMap[' + name + '] type:',
576                                pi.type, 'does not match type:', refpage_type)
577                    pi.type = refpage_type
578                    pi.include = line
579                    logDiag('added TYPE =', pi.type, 'INCLUDE =', pi.include)
580                else:
581                    logWarn('interface include:: line NOT inside block')
582
583                line = line + 1
584                continue
585
586            logDiag('ignoring unrecognized include line ', matches.group())
587
588        # Vulkan 1.1 markup allows the last API include construct to be
589        # followed by an asciidoctor endif:: construct (and also preceded,
590        # at some distance).
591        # This looks for endif:: immediately following an include:: line
592        # and, if found, moves the include boundary to this line.
593        matches = endifPat.search(file[line])
594        if matches is not None and pi is not None:
595            if pi.include == line - 1:
596                logDiag('Matched endif pattern following include; moving include')
597                pi.include = line
598            else:
599                logDiag('Matched endif pattern (not following include)')
600
601            line = line + 1
602            continue
603
604        matches = bodyPat.search(file[line])
605        if matches is not None:
606            logDiag('Matched // refBody pattern')
607            if pi is not None:
608                pi.body = line
609                logDiag('added BODY =', pi.body)
610            else:
611                logWarn('// refBody line NOT inside block')
612
613            line = line + 1
614            continue
615
616        # OpenCL spec uses // refError to tag "validity" (Errors) language,
617        # instead of /validity/ includes.
618        matches = errorPat.search(file[line])
619        if matches is not None:
620            logDiag('Matched // refError pattern')
621            if pi is not None:
622                pi.validity = line
623                logDiag('added VALIDITY (refError) =', pi.validity)
624            else:
625                logWarn('// refError line NOT inside block')
626
627            line = line + 1
628            continue
629
630        line = line + 1
631        continue
632
633    if pi is not None:
634        logErr('Unclosed open block at EOF!')
635
636    setLogSourcefile(None)
637    setLogProcname(None)
638    setLogLine(None)
639
640    return pageMap
641
642
643def getBranch():
644    """Determine current git branch
645
646    Returns (branch name, ''), or (None, stderr output) if the branch name
647    cannot be determined"""
648
649    command = [ 'git', 'symbolic-ref', '--short', 'HEAD' ]
650    results = subprocess.run(command,
651                             stdout=subprocess.PIPE,
652                             stderr=subprocess.PIPE)
653
654    # git command failed
655    if len(results.stderr) > 0:
656        return (None, results.stderr)
657
658    # Remove newline from output and convert to a string
659    branch = results.stdout.rstrip().decode()
660    if len(branch) > 0:
661        # Strip trailing newline
662        branch = results.stdout.decode()[0:-1]
663
664    return (branch, '')
665