1 //===--- ClangCommentHTMLNamedCharacterReferenceEmitter.cpp -----------------=//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This tablegen backend emits an fficient function to translate HTML named
11 // character references to UTF-8 sequences.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/ADT/SmallString.h"
16 #include "llvm/Support/ConvertUTF.h"
17 #include "llvm/TableGen/Error.h"
18 #include "llvm/TableGen/Record.h"
19 #include "llvm/TableGen/StringMatcher.h"
20 #include "llvm/TableGen/TableGenBackend.h"
21 #include <vector>
22 
23 using namespace llvm;
24 
25 /// \brief Convert a code point to the corresponding UTF-8 sequence represented
26 /// as a C string literal.
27 ///
28 /// \returns true on success.
translateCodePointToUTF8(unsigned CodePoint,SmallVectorImpl<char> & CLiteral)29 static bool translateCodePointToUTF8(unsigned CodePoint,
30                                      SmallVectorImpl<char> &CLiteral) {
31   char Translated[UNI_MAX_UTF8_BYTES_PER_CODE_POINT];
32   char *TranslatedPtr = Translated;
33   if (!ConvertCodePointToUTF8(CodePoint, TranslatedPtr))
34     return false;
35 
36   StringRef UTF8(Translated, TranslatedPtr - Translated);
37 
38   raw_svector_ostream OS(CLiteral);
39   OS << "\"";
40   for (size_t i = 0, e = UTF8.size(); i != e; ++i) {
41     OS << "\\x";
42     OS.write_hex(static_cast<unsigned char>(UTF8[i]));
43   }
44   OS << "\"";
45 
46   return true;
47 }
48 
49 namespace clang {
EmitClangCommentHTMLNamedCharacterReferences(RecordKeeper & Records,raw_ostream & OS)50 void EmitClangCommentHTMLNamedCharacterReferences(RecordKeeper &Records,
51                                                   raw_ostream &OS) {
52   std::vector<Record *> Tags = Records.getAllDerivedDefinitions("NCR");
53   std::vector<StringMatcher::StringPair> NameToUTF8;
54   SmallString<32> CLiteral;
55   for (std::vector<Record *>::iterator I = Tags.begin(), E = Tags.end();
56        I != E; ++I) {
57     Record &Tag = **I;
58     std::string Spelling = Tag.getValueAsString("Spelling");
59     uint64_t CodePoint = Tag.getValueAsInt("CodePoint");
60     CLiteral.clear();
61     CLiteral.append("return ");
62     if (!translateCodePointToUTF8(CodePoint, CLiteral)) {
63       SrcMgr.PrintMessage(Tag.getLoc().front(),
64                           SourceMgr::DK_Error,
65                           Twine("invalid code point"));
66       continue;
67     }
68     CLiteral.append(";");
69 
70     StringMatcher::StringPair Match(Spelling, CLiteral.str());
71     NameToUTF8.push_back(Match);
72   }
73 
74   emitSourceFileHeader("HTML named character reference to UTF-8 "
75                        "translation", OS);
76 
77   OS << "StringRef translateHTMLNamedCharacterReferenceToUTF8(\n"
78         "                                             StringRef Name) {\n";
79   StringMatcher("Name", NameToUTF8, OS).Emit();
80   OS << "  return StringRef();\n"
81      << "}\n\n";
82 }
83 
84 } // end namespace clang
85 
86