1#!/usr/bin/env python 2#===- gen_std.py - ------------------------------------------*- python -*--===# 3# 4# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 5# See https://llvm.org/LICENSE.txt for license information. 6# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7# 8#===------------------------------------------------------------------------===# 9 10"""gen_std.py is a tool to generate a lookup table (from qualified names to 11include headers) for C/C++ Standard Library symbols by parsing archieved HTML 12files from cppreference. 13 14Caveats and FIXMEs: 15 - only symbols directly in "std" namespace are added, we should also add std's 16 subnamespace symbols (e.g. chrono). 17 - symbols with multiple variants or defined in multiple headers aren't added, 18 e.g. std::move, std::swap 19 20Usage: 21 1. Install BeautifulSoup dependency, see instruction: 22 https://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-beautiful-soup 23 2. Download cppreference offline HTML files (e.g. html_book_20181028.zip) at 24 https://en.cppreference.com/w/Cppreference:Archives 25 3. Unzip the zip file from step 2 to directory </cppreference>, you should 26 get a "reference" directory in </cppreference> 27 4. Run the command: 28 // Generate C++ symbols 29 gen_std.py -cppreference </cppreference/reference> -language=cpp > StdSymbolMap.inc 30 // Generate C symbols 31 gen_std.py -cppreference </cppreference/reference> -language=c > CSymbolMap.inc 32""" 33 34 35import cppreference_parser 36import argparse 37import datetime 38import os 39import sys 40 41CODE_PREFIX = """\ 42//===-- gen_std.py generated file -------------------------------*- C++ -*-===// 43// 44// Used to build a lookup table (qualified names => include headers) for %s 45// Standard Library symbols. 46// 47// Automatically generated file, DO NOT EDIT! 48// 49// Generated from cppreference offline HTML book (modified on %s). 50//===----------------------------------------------------------------------===// 51""" 52 53def ParseArg(): 54 parser = argparse.ArgumentParser(description='Generate StdGen file') 55 parser.add_argument('-cppreference', metavar='PATH', 56 default='', 57 help='path to the cppreference offline HTML directory', 58 required=True 59 ) 60 parser.add_argument('-language', 61 default='cpp', 62 help='Generate c or cpp symbols', 63 required=True) 64 return parser.parse_args() 65 66 67def main(): 68 args = ParseArg() 69 if args.language == 'cpp': 70 page_root = os.path.join(args.cppreference, "en", "cpp") 71 symbol_index_root = os.path.join(page_root, "symbol_index") 72 parse_pages = [ 73 (page_root, "symbol_index.html", "std::"), 74 # std sub-namespace symbols have separated pages. 75 # We don't index std literal operators (e.g. 76 # std::literals::chrono_literals::operator""d), these symbols can't be 77 # accessed by std::<symbol_name>. 78 # FIXME: index std::placeholders symbols, placeholders.html page is 79 # different (which contains one entry for _1, _2, ..., _N), we need special 80 # handling. 81 (symbol_index_root, "chrono.html", "std::chrono::"), 82 (symbol_index_root, "filesystem.html", "std::filesystem::"), 83 (symbol_index_root, "pmr.html", "std::pmr::"), 84 (symbol_index_root, "regex_constants.html", "std::regex_constants::"), 85 (symbol_index_root, "this_thread.html", "std::this_thread::"), 86 ] 87 elif args.language == 'c': 88 page_root = os.path.join(args.cppreference, "en", "c") 89 symbol_index_root = page_root 90 parse_pages = [(page_root, "index.html", None)] 91 92 if not os.path.exists(symbol_index_root): 93 exit("Path %s doesn't exist!" % symbol_index_root) 94 95 symbols = cppreference_parser.GetSymbols(parse_pages) 96 97 # We don't have version information from the unzipped offline HTML files. 98 # so we use the modified time of the symbol_index.html as the version. 99 index_page_path = os.path.join(page_root, "index.html") 100 cppreference_modified_date = datetime.datetime.fromtimestamp( 101 os.stat(index_page_path).st_mtime).strftime('%Y-%m-%d') 102 print CODE_PREFIX % (args.language.upper(), cppreference_modified_date) 103 for symbol in symbols: 104 if len(symbol.headers) == 1: 105 # SYMBOL(unqualified_name, namespace, header) 106 print "SYMBOL(%s, %s, %s)" % (symbol.name, symbol.namespace, 107 symbol.headers[0]) 108 elif len(symbol.headers) == 0: 109 sys.stderr.write("No header found for symbol %s\n" % symbol.name) 110 else: 111 # FIXME: support symbols with multiple headers (e.g. std::move). 112 sys.stderr.write("Ambiguous header for symbol %s: %s\n" % ( 113 symbol.name, ', '.join(symbol.headers))) 114 115 116if __name__ == '__main__': 117 main() 118