1# 2# content.py 3# 4# Parse comment blocks to build content blocks (library file). 5# 6# Copyright 2002-2018 by 7# David Turner. 8# 9# This file is part of the FreeType project, and may only be used, 10# modified, and distributed under the terms of the FreeType project 11# license, LICENSE.TXT. By continuing to use, modify, or distribute 12# this file you indicate that you have read the license and 13# understand and accept it fully. 14 15# 16# This file contains routines to parse documentation comment blocks, 17# building more structured objects out of them. 18# 19 20 21from sources import * 22from utils import * 23 24import string, re 25 26 27# 28# Regular expressions to detect code sequences. `Code sequences' are simply 29# code fragments embedded in '{' and '}', as demonstrated in the following 30# example. 31# 32# { 33# x = y + z; 34# if ( zookoo == 2 ) 35# { 36# foobar(); 37# } 38# } 39# 40# Note that the indentation of the first opening brace and the last closing 41# brace must be exactly the same. The code sequence itself should have a 42# larger indentation than the surrounding braces. 43# 44re_code_start = re.compile( r"(\s*){\s*$" ) 45re_code_end = re.compile( r"(\s*)}\s*$" ) 46 47 48# 49# A regular expression to isolate identifiers from other text. Two syntax 50# forms are supported: 51# 52# <name> 53# <name>[<id>] 54# 55# where both `<name>' and `<id>' consist of alphanumeric characters, `_', 56# and `-'. Use `<id>' if there are multiple, valid `<name>' entries; in the 57# index, `<id>' will be appended in parentheses. 58# 59# For example, 60# 61# stem_darkening[autofit] 62# 63# becomes `stem_darkening (autofit)' in the index. 64# 65re_identifier = re.compile( r""" 66 ((?:\w|-)+ 67 (?:\[(?:\w|-)+\])?) 68 """, re.VERBOSE ) 69 70 71# 72# We collect macro names ending in `_H' (group 1), as defined in 73# `freetype/config/ftheader.h'. While outputting the object data, we use 74# this info together with the object's file location (group 2) to emit the 75# appropriate header file macro and its associated file name before the 76# object itself. 77# 78# Example: 79# 80# #define FT_FREETYPE_H <freetype.h> 81# 82re_header_macro = re.compile( r'^#define\s{1,}(\w{1,}_H)\s{1,}<(.*)>' ) 83 84 85################################################################ 86## 87## DOC CODE CLASS 88## 89## The `DocCode' class is used to store source code lines. 90## 91## `self.lines' contains a set of source code lines that will be dumped as 92## HTML in a <PRE> tag. 93## 94## The object is filled line by line by the parser; it strips the leading 95## `margin' space from each input line before storing it in `self.lines'. 96## 97class DocCode: 98 99 def __init__( self, margin, lines ): 100 self.lines = [] 101 self.words = None 102 103 # remove margin spaces 104 for l in lines: 105 if string.strip( l[:margin] ) == "": 106 l = l[margin:] 107 self.lines.append( l ) 108 109 def dump( self, prefix = "", width = 60 ): 110 lines = self.dump_lines( 0, width ) 111 for l in lines: 112 print( prefix + l ) 113 114 def dump_lines( self, margin = 0, width = 60 ): 115 result = [] 116 for l in self.lines: 117 result.append( " " * margin + l ) 118 return result 119 120 121 122################################################################ 123## 124## DOC PARA CLASS 125## 126## `Normal' text paragraphs are stored in the `DocPara' class. 127## 128## `self.words' contains the list of words that make up the paragraph. 129## 130class DocPara: 131 132 def __init__( self, lines ): 133 self.lines = None 134 self.words = [] 135 for l in lines: 136 l = string.strip( l ) 137 self.words.extend( string.split( l ) ) 138 139 def dump( self, prefix = "", width = 60 ): 140 lines = self.dump_lines( 0, width ) 141 for l in lines: 142 print( prefix + l ) 143 144 def dump_lines( self, margin = 0, width = 60 ): 145 cur = "" # current line 146 col = 0 # current width 147 result = [] 148 149 for word in self.words: 150 ln = len( word ) 151 if col > 0: 152 ln = ln + 1 153 154 if col + ln > width: 155 result.append( " " * margin + cur ) 156 cur = word 157 col = len( word ) 158 else: 159 if col > 0: 160 cur = cur + " " 161 cur = cur + word 162 col = col + ln 163 164 if col > 0: 165 result.append( " " * margin + cur ) 166 167 return result 168 169 170################################################################ 171## 172## DOC FIELD CLASS 173## 174## The `DocField' class stores a list containing either `DocPara' or 175## `DocCode' objects. Each DocField object also has an optional `name' 176## that is used when the object corresponds to a field or value definition. 177## 178class DocField: 179 180 def __init__( self, name, lines ): 181 self.name = name # can be `None' for normal paragraphs/sources 182 self.items = [] # list of items 183 184 mode_none = 0 # start parsing mode 185 mode_code = 1 # parsing code sequences 186 mode_para = 3 # parsing normal paragraph 187 188 margin = -1 # current code sequence indentation 189 cur_lines = [] 190 191 # analyze the markup lines to check whether they contain paragraphs, 192 # code sequences, or fields definitions 193 # 194 start = 0 195 mode = mode_none 196 197 for l in lines: 198 # are we parsing a code sequence? 199 if mode == mode_code: 200 m = re_code_end.match( l ) 201 if m and len( m.group( 1 ) ) <= margin: 202 # that's it, we finished the code sequence 203 code = DocCode( 0, cur_lines ) 204 self.items.append( code ) 205 margin = -1 206 cur_lines = [] 207 mode = mode_none 208 else: 209 # otherwise continue the code sequence 210 cur_lines.append( l[margin:] ) 211 else: 212 # start of code sequence? 213 m = re_code_start.match( l ) 214 if m: 215 # save current lines 216 if cur_lines: 217 para = DocPara( cur_lines ) 218 self.items.append( para ) 219 cur_lines = [] 220 221 # switch to code extraction mode 222 margin = len( m.group( 1 ) ) 223 mode = mode_code 224 else: 225 if not string.split( l ) and cur_lines: 226 # if the line is empty, we end the current paragraph, 227 # if any 228 para = DocPara( cur_lines ) 229 self.items.append( para ) 230 cur_lines = [] 231 else: 232 # otherwise, simply add the line to the current 233 # paragraph 234 cur_lines.append( l ) 235 236 if mode == mode_code: 237 # unexpected end of code sequence 238 code = DocCode( margin, cur_lines ) 239 self.items.append( code ) 240 elif cur_lines: 241 para = DocPara( cur_lines ) 242 self.items.append( para ) 243 244 def dump( self, prefix = "" ): 245 if self.field: 246 print( prefix + self.field + " ::" ) 247 prefix = prefix + "----" 248 249 first = 1 250 for p in self.items: 251 if not first: 252 print( "" ) 253 p.dump( prefix ) 254 first = 0 255 256 def dump_lines( self, margin = 0, width = 60 ): 257 result = [] 258 nl = None 259 260 for p in self.items: 261 if nl: 262 result.append( "" ) 263 264 result.extend( p.dump_lines( margin, width ) ) 265 nl = 1 266 267 return result 268 269 270# 271# A regular expression to detect field definitions. 272# 273# Examples: 274# 275# foo :: 276# foo.bar :: 277# 278re_field = re.compile( r""" 279 \s* 280 ( 281 \w* 282 | 283 \w (\w | \.)* \w 284 ) 285 \s* :: 286 """, re.VERBOSE ) 287 288 289################################################################ 290## 291## DOC MARKUP CLASS 292## 293class DocMarkup: 294 295 def __init__( self, tag, lines ): 296 self.tag = string.lower( tag ) 297 self.fields = [] 298 299 cur_lines = [] 300 field = None 301 mode = 0 302 303 for l in lines: 304 m = re_field.match( l ) 305 if m: 306 # We detected the start of a new field definition. 307 308 # first, save the current one 309 if cur_lines: 310 f = DocField( field, cur_lines ) 311 self.fields.append( f ) 312 cur_lines = [] 313 field = None 314 315 field = m.group( 1 ) # record field name 316 ln = len( m.group( 0 ) ) 317 l = " " * ln + l[ln:] 318 cur_lines = [l] 319 else: 320 cur_lines.append( l ) 321 322 if field or cur_lines: 323 f = DocField( field, cur_lines ) 324 self.fields.append( f ) 325 326 def get_name( self ): 327 try: 328 return self.fields[0].items[0].words[0] 329 except: 330 return None 331 332 def dump( self, margin ): 333 print( " " * margin + "<" + self.tag + ">" ) 334 for f in self.fields: 335 f.dump( " " ) 336 print( " " * margin + "</" + self.tag + ">" ) 337 338 339################################################################ 340## 341## DOC CHAPTER CLASS 342## 343class DocChapter: 344 345 def __init__( self, block ): 346 self.block = block 347 self.sections = [] 348 if block: 349 self.name = block.name 350 self.title = block.get_markup_words( "title" ) 351 self.order = block.get_markup_words( "sections" ) 352 else: 353 self.name = "Other" 354 self.title = string.split( "Miscellaneous" ) 355 self.order = [] 356 357 358################################################################ 359## 360## DOC SECTION CLASS 361## 362class DocSection: 363 364 def __init__( self, name = "Other" ): 365 self.name = name 366 self.blocks = {} 367 self.block_names = [] # ordered block names in section 368 self.defs = [] 369 self.abstract = "" 370 self.description = "" 371 self.order = [] 372 self.title = "ERROR" 373 self.chapter = None 374 375 def add_def( self, block ): 376 self.defs.append( block ) 377 378 def add_block( self, block ): 379 self.block_names.append( block.name ) 380 self.blocks[block.name] = block 381 382 def process( self ): 383 # look up one block that contains a valid section description 384 for block in self.defs: 385 title = block.get_markup_text( "title" ) 386 if title: 387 self.title = title 388 self.abstract = block.get_markup_words( "abstract" ) 389 self.description = block.get_markup_items( "description" ) 390 self.order = block.get_markup_words_all( "order" ) 391 return 392 393 def reorder( self ): 394 self.block_names = sort_order_list( self.block_names, self.order ) 395 396 397################################################################ 398## 399## CONTENT PROCESSOR CLASS 400## 401class ContentProcessor: 402 403 def __init__( self ): 404 """Initialize a block content processor.""" 405 self.reset() 406 407 self.sections = {} # dictionary of documentation sections 408 self.section = None # current documentation section 409 410 self.chapters = [] # list of chapters 411 412 self.headers = {} # dictionary of header macros 413 414 def set_section( self, section_name ): 415 """Set current section during parsing.""" 416 if not section_name in self.sections: 417 section = DocSection( section_name ) 418 self.sections[section_name] = section 419 self.section = section 420 else: 421 self.section = self.sections[section_name] 422 423 def add_chapter( self, block ): 424 chapter = DocChapter( block ) 425 self.chapters.append( chapter ) 426 427 def reset( self ): 428 """Reset the content processor for a new block.""" 429 self.markups = [] 430 self.markup = None 431 self.markup_lines = [] 432 433 def add_markup( self ): 434 """Add a new markup section.""" 435 if self.markup and self.markup_lines: 436 437 # get rid of last line of markup if it's empty 438 marks = self.markup_lines 439 if len( marks ) > 0 and not string.strip( marks[-1] ): 440 self.markup_lines = marks[:-1] 441 442 m = DocMarkup( self.markup, self.markup_lines ) 443 444 self.markups.append( m ) 445 446 self.markup = None 447 self.markup_lines = [] 448 449 def process_content( self, content ): 450 """Process a block content and return a list of DocMarkup objects 451 corresponding to it.""" 452 markup = None 453 markup_lines = [] 454 first = 1 455 456 margin = -1 457 in_code = 0 458 459 for line in content: 460 if in_code: 461 m = re_code_end.match( line ) 462 if m and len( m.group( 1 ) ) <= margin: 463 in_code = 0 464 margin = -1 465 else: 466 m = re_code_start.match( line ) 467 if m: 468 in_code = 1 469 margin = len( m.group( 1 ) ) 470 471 found = None 472 473 if not in_code: 474 for t in re_markup_tags: 475 m = t.match( line ) 476 if m: 477 found = string.lower( m.group( 1 ) ) 478 prefix = len( m.group( 0 ) ) 479 # remove markup from line 480 line = " " * prefix + line[prefix:] 481 break 482 483 # is it the start of a new markup section ? 484 if found: 485 first = 0 486 self.add_markup() # add current markup content 487 self.markup = found 488 if len( string.strip( line ) ) > 0: 489 self.markup_lines.append( line ) 490 elif first == 0: 491 self.markup_lines.append( line ) 492 493 self.add_markup() 494 495 return self.markups 496 497 def parse_sources( self, source_processor ): 498 blocks = source_processor.blocks 499 count = len( blocks ) 500 501 for n in range( count ): 502 source = blocks[n] 503 if source.content: 504 # this is a documentation comment, we need to catch 505 # all following normal blocks in the "follow" list 506 # 507 follow = [] 508 m = n + 1 509 while m < count and not blocks[m].content: 510 follow.append( blocks[m] ) 511 m = m + 1 512 513 doc_block = DocBlock( source, follow, self ) 514 515 def finish( self ): 516 # process all sections to extract their abstract, description 517 # and ordered list of items 518 # 519 for sec in self.sections.values(): 520 sec.process() 521 522 # process chapters to check that all sections are correctly 523 # listed there 524 for chap in self.chapters: 525 for sec in chap.order: 526 if sec in self.sections: 527 section = self.sections[sec] 528 section.chapter = chap 529 section.reorder() 530 chap.sections.append( section ) 531 else: 532 sys.stderr.write( "WARNING: chapter '" + \ 533 chap.name + "' in " + chap.block.location() + \ 534 " lists unknown section '" + sec + "'\n" ) 535 536 # check that all sections are in a chapter 537 # 538 others = [] 539 for sec in self.sections.values(): 540 if not sec.chapter: 541 sec.reorder() 542 others.append( sec ) 543 544 # create a new special chapter for all remaining sections 545 # when necessary 546 # 547 if others: 548 chap = DocChapter( None ) 549 chap.sections = others 550 self.chapters.append( chap ) 551 552 553################################################################ 554## 555## DOC BLOCK CLASS 556## 557class DocBlock: 558 559 def __init__( self, source, follow, processor ): 560 processor.reset() 561 562 self.source = source 563 self.code = [] 564 self.type = "ERRTYPE" 565 self.name = "ERRNAME" 566 self.section = processor.section 567 self.markups = processor.process_content( source.content ) 568 569 # compute block type from first markup tag 570 try: 571 self.type = self.markups[0].tag 572 except: 573 pass 574 575 # compute block name from first markup paragraph 576 try: 577 markup = self.markups[0] 578 para = markup.fields[0].items[0] 579 name = para.words[0] 580 m = re_identifier.match( name ) 581 if m: 582 name = m.group( 1 ) 583 self.name = name 584 except: 585 pass 586 587 if self.type == "section": 588 # detect new section starts 589 processor.set_section( self.name ) 590 processor.section.add_def( self ) 591 elif self.type == "chapter": 592 # detect new chapter 593 processor.add_chapter( self ) 594 else: 595 processor.section.add_block( self ) 596 597 # now, compute the source lines relevant to this documentation 598 # block. We keep normal comments in for obvious reasons (??) 599 source = [] 600 for b in follow: 601 if b.format: 602 break 603 for l in b.lines: 604 # collect header macro definitions 605 m = re_header_macro.match( l ) 606 if m: 607 processor.headers[m.group( 2 )] = m.group( 1 ); 608 609 # we use "/* */" as a separator 610 if re_source_sep.match( l ): 611 break 612 source.append( l ) 613 614 # now strip the leading and trailing empty lines from the sources 615 start = 0 616 end = len( source ) - 1 617 618 while start < end and not string.strip( source[start] ): 619 start = start + 1 620 621 while start < end and not string.strip( source[end] ): 622 end = end - 1 623 624 if start == end and not string.strip( source[start] ): 625 self.code = [] 626 else: 627 self.code = source[start:end + 1] 628 629 def location( self ): 630 return self.source.location() 631 632 def get_markup( self, tag_name ): 633 """Return the DocMarkup corresponding to a given tag in a block.""" 634 for m in self.markups: 635 if m.tag == string.lower( tag_name ): 636 return m 637 return None 638 639 def get_markup_words( self, tag_name ): 640 try: 641 m = self.get_markup( tag_name ) 642 return m.fields[0].items[0].words 643 except: 644 return [] 645 646 def get_markup_words_all( self, tag_name ): 647 try: 648 m = self.get_markup( tag_name ) 649 words = [] 650 for item in m.fields[0].items: 651 # We honour empty lines in an `<Order>' section element by 652 # adding the sentinel `/empty/'. The formatter should then 653 # convert it to an appropriate representation in the 654 # `section_enter' function. 655 words += item.words 656 words.append( "/empty/" ) 657 return words 658 except: 659 return [] 660 661 def get_markup_text( self, tag_name ): 662 result = self.get_markup_words( tag_name ) 663 return string.join( result ) 664 665 def get_markup_items( self, tag_name ): 666 try: 667 m = self.get_markup( tag_name ) 668 return m.fields[0].items 669 except: 670 return None 671 672# eof 673