1 //===-- include/flang/Parser/provenance.h -----------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef FORTRAN_PARSER_PROVENANCE_H_
10 #define FORTRAN_PARSER_PROVENANCE_H_
11 
12 #include "char-block.h"
13 #include "char-buffer.h"
14 #include "characters.h"
15 #include "source.h"
16 #include "flang/Common/idioms.h"
17 #include "flang/Common/interval.h"
18 #include "llvm/Support/raw_ostream.h"
19 #include <cstddef>
20 #include <list>
21 #include <map>
22 #include <memory>
23 #include <optional>
24 #include <string>
25 #include <utility>
26 #include <variant>
27 #include <vector>
28 
29 namespace Fortran::parser {
30 
31 // Each character in the contiguous source stream built by the
32 // prescanner corresponds to a particular character in a source file,
33 // include file, macro expansion, or compiler-inserted padding.
34 // The location of this original character to which a parsable character
35 // corresponds is its provenance.
36 //
37 // Provenances are offsets into an (unmaterialized) marshaling of the
38 // entire contents of all the original source files, include files, macro
39 // expansions, &c. for each visit to each source.  These origins of the
40 // original source characters constitute a forest whose roots are
41 // the original source files named on the compiler's command line.
42 // Given a Provenance, we can find the tree node that contains it in time
43 // O(log(# of origins)), and describe the position precisely by walking
44 // up the tree.  (It would be possible, via a time/space trade-off, to
45 // cap the time by the use of an intermediate table that would be indexed
46 // by the upper bits of an offset, but that does not appear to be
47 // necessary.)
48 
49 class AllSources;
50 
51 class Provenance {
52 public:
Provenance()53   Provenance() {}
Provenance(std::size_t offset)54   Provenance(std::size_t offset) : offset_{offset} { CHECK(offset > 0); }
55   Provenance(const Provenance &that) = default;
56   Provenance(Provenance &&that) = default;
57   Provenance &operator=(const Provenance &that) = default;
58   Provenance &operator=(Provenance &&that) = default;
59 
offset()60   std::size_t offset() const { return offset_; }
61 
62   Provenance operator+(ptrdiff_t n) const {
63     CHECK(n > -static_cast<ptrdiff_t>(offset_));
64     return {offset_ + static_cast<std::size_t>(n)};
65   }
66   Provenance operator+(std::size_t n) const { return {offset_ + n}; }
67   std::size_t operator-(Provenance that) const {
68     CHECK(that <= *this);
69     return offset_ - that.offset_;
70   }
71   bool operator<(Provenance that) const { return offset_ < that.offset_; }
72   bool operator<=(Provenance that) const { return !(that < *this); }
73   bool operator==(Provenance that) const { return offset_ == that.offset_; }
74   bool operator!=(Provenance that) const { return !(*this == that); }
75 
76 private:
77   std::size_t offset_{0};
78 };
79 
80 using ProvenanceRange = common::Interval<Provenance>;
81 
82 // Maps contiguous ranges of byte offsets in original source files to
83 // contiguous ranges in the cooked character stream; essentially a
84 // partial inversion of OffsetToProvenanceMappings (below).
85 // Used for implementing the first step of mapping an identifier
86 // selected in a code editor to one of its declarative statements.
87 class ProvenanceRangeToOffsetMappings {
88 public:
89   ProvenanceRangeToOffsetMappings();
90   ~ProvenanceRangeToOffsetMappings();
empty()91   bool empty() const { return map_.empty(); }
92   void Put(ProvenanceRange, std::size_t offset);
93   std::optional<std::size_t> Map(ProvenanceRange) const;
94   llvm::raw_ostream &Dump(llvm::raw_ostream &) const;
95 
96 private:
97   // A comparison function object for use in std::multimap<Compare=>.
98   // Intersecting intervals will effectively compare equal, not being
99   // either < nor >= each other.
100   struct WhollyPrecedes {
101     bool operator()(ProvenanceRange, ProvenanceRange) const;
102   };
103 
104   std::multimap<ProvenanceRange, std::size_t, WhollyPrecedes> map_;
105 };
106 
107 // Maps 0-based local offsets in some contiguous range (e.g., a token
108 // sequence) to their provenances.  Lookup time is on the order of
109 // O(log(#of intervals with contiguous provenances)).  As mentioned
110 // above, this time could be capped via a time/space trade-off.
111 class OffsetToProvenanceMappings {
112 public:
OffsetToProvenanceMappings()113   OffsetToProvenanceMappings() {}
114   void clear();
115   void swap(OffsetToProvenanceMappings &);
116   void shrink_to_fit();
117   std::size_t SizeInBytes() const;
118   void Put(ProvenanceRange);
119   void Put(const OffsetToProvenanceMappings &);
120   ProvenanceRange Map(std::size_t at) const;
121   void RemoveLastBytes(std::size_t);
122   ProvenanceRangeToOffsetMappings Invert(const AllSources &) const;
123   llvm::raw_ostream &Dump(llvm::raw_ostream &) const;
124 
125 private:
126   struct ContiguousProvenanceMapping {
127     std::size_t start;
128     ProvenanceRange range;
129   };
130 
131   // Elements appear in ascending order of distinct .start values;
132   // their .range values are disjoint and not necessarily adjacent.
133   std::vector<ContiguousProvenanceMapping> provenanceMap_;
134 };
135 
136 // A singleton AllSources instance for the whole compilation
137 // is shared by reference.
138 class AllSources {
139 public:
140   AllSources();
141   ~AllSources();
142 
size()143   std::size_t size() const { return range_.size(); }
144   const char &operator[](Provenance) const;
encoding()145   Encoding encoding() const { return encoding_; }
set_encoding(Encoding e)146   AllSources &set_encoding(Encoding e) {
147     encoding_ = e;
148     return *this;
149   }
150 
151   void PushSearchPathDirectory(std::string);
152   std::string PopSearchPathDirectory();
153   const SourceFile *Open(std::string path, llvm::raw_ostream &error);
154   const SourceFile *ReadStandardInput(llvm::raw_ostream &error);
155 
156   ProvenanceRange AddIncludedFile(
157       const SourceFile &, ProvenanceRange, bool isModule = false);
158   ProvenanceRange AddMacroCall(
159       ProvenanceRange def, ProvenanceRange use, const std::string &expansion);
160   ProvenanceRange AddCompilerInsertion(std::string);
161 
IsValid(Provenance at)162   bool IsValid(Provenance at) const { return range_.Contains(at); }
IsValid(ProvenanceRange range)163   bool IsValid(ProvenanceRange range) const {
164     return range.size() > 0 && range_.Contains(range);
165   }
166   void EmitMessage(llvm::raw_ostream &, const std::optional<ProvenanceRange> &,
167       const std::string &message, bool echoSourceLine = false) const;
168   const SourceFile *GetSourceFile(
169       Provenance, std::size_t *offset = nullptr) const;
170   const char *GetSource(ProvenanceRange) const;
171   std::optional<SourcePosition> GetSourcePosition(Provenance) const;
172   std::optional<ProvenanceRange> GetFirstFileProvenance() const;
173   std::string GetPath(Provenance) const; // __FILE__
174   int GetLineNumber(Provenance) const; // __LINE__
175   Provenance CompilerInsertionProvenance(char ch);
176   Provenance CompilerInsertionProvenance(const char *, std::size_t);
177   ProvenanceRange IntersectionWithSourceFiles(ProvenanceRange) const;
178   llvm::raw_ostream &Dump(llvm::raw_ostream &) const;
179 
180 private:
181   struct Inclusion {
182     const SourceFile &source;
183     bool isModule{false};
184   };
185   struct Macro {
186     ProvenanceRange definition;
187     std::string expansion;
188   };
189   struct CompilerInsertion {
190     std::string text;
191   };
192 
193   struct Origin {
194     Origin(ProvenanceRange, const SourceFile &);
195     Origin(ProvenanceRange, const SourceFile &, ProvenanceRange,
196         bool isModule = false);
197     Origin(ProvenanceRange, ProvenanceRange def, ProvenanceRange use,
198         const std::string &expansion);
199     Origin(ProvenanceRange, const std::string &);
200 
201     const char &operator[](std::size_t) const;
202 
203     std::variant<Inclusion, Macro, CompilerInsertion> u;
204     ProvenanceRange covers, replaces;
205   };
206 
207   const Origin &MapToOrigin(Provenance) const;
208 
209   // Elements are in ascending & contiguous order of .covers.
210   std::vector<Origin> origin_;
211   ProvenanceRange range_;
212   std::map<char, Provenance> compilerInsertionProvenance_;
213   std::vector<std::unique_ptr<SourceFile>> ownedSourceFiles_;
214   std::vector<std::string> searchPath_;
215   Encoding encoding_{Encoding::UTF_8};
216 };
217 
218 // Represents the result of preprocessing and prescanning a single source
219 // file (and all its inclusions) or module file.  Parsers operate within
220 // single instances of CookedSource.
221 class CookedSource {
222 public:
AsCharBlock()223   CharBlock AsCharBlock() const { return CharBlock{data_}; }
224   std::optional<ProvenanceRange> GetProvenanceRange(CharBlock) const;
225   std::optional<CharBlock> GetCharBlock(ProvenanceRange) const;
226 
227   // The result of a Put() is the offset that the new data
228   // will have in the eventually marshaled contiguous buffer.
Put(const char * data,std::size_t bytes)229   std::size_t Put(const char *data, std::size_t bytes) {
230     return buffer_.Put(data, bytes);
231   }
Put(const std::string & s)232   std::size_t Put(const std::string &s) { return buffer_.Put(s); }
Put(char ch)233   std::size_t Put(char ch) { return buffer_.Put(&ch, 1); }
Put(char ch,Provenance p)234   std::size_t Put(char ch, Provenance p) {
235     provenanceMap_.Put(ProvenanceRange{p, 1});
236     return buffer_.Put(&ch, 1);
237   }
238 
PutProvenance(Provenance p)239   void PutProvenance(Provenance p) { provenanceMap_.Put(ProvenanceRange{p}); }
PutProvenance(ProvenanceRange pr)240   void PutProvenance(ProvenanceRange pr) { provenanceMap_.Put(pr); }
PutProvenanceMappings(const OffsetToProvenanceMappings & pm)241   void PutProvenanceMappings(const OffsetToProvenanceMappings &pm) {
242     provenanceMap_.Put(pm);
243   }
244 
245   std::size_t BufferedBytes() const;
246   void Marshal(AllSources &); // marshals text into one contiguous block
247   void CompileProvenanceRangeToOffsetMappings(AllSources &);
248   llvm::raw_ostream &Dump(llvm::raw_ostream &) const;
249 
250 private:
251   CharBuffer buffer_; // before Marshal()
252   std::string data_; // all of it, prescanned and preprocessed
253   OffsetToProvenanceMappings provenanceMap_;
254   ProvenanceRangeToOffsetMappings invertedMap_;
255 };
256 
257 class AllCookedSources {
258 public:
259   explicit AllCookedSources(AllSources &);
260   ~AllCookedSources();
261 
allSources()262   AllSources &allSources() { return allSources_; }
allSources()263   const AllSources &allSources() const { return allSources_; }
264 
265   CookedSource &NewCookedSource();
266 
267   template <typename A> // const char * or CharBlock
Find(A x)268   const CookedSource *Find(A x) const {
269     for (const auto &c : cooked_) {
270       if (c.AsCharBlock().Contains(x)) {
271         return &c;
272       }
273     }
274     return nullptr;
275   }
276 
IsValid(ProvenanceRange r)277   bool IsValid(ProvenanceRange r) const { return allSources_.IsValid(r); }
278 
279   std::optional<ProvenanceRange> GetProvenanceRange(CharBlock) const;
280   std::optional<CharBlock> GetCharBlockFromLineAndColumns(
281       int line, int startColumn, int endColumn) const;
282   std::optional<std::pair<SourcePosition, SourcePosition>>
283       GetSourcePositionRange(CharBlock) const;
284   std::optional<CharBlock> GetCharBlock(ProvenanceRange) const;
285   void Dump(llvm::raw_ostream &) const;
286 
287 private:
288   AllSources &allSources_;
289   std::list<CookedSource> cooked_; // owns all CookedSource instances
290 };
291 } // namespace Fortran::parser
292 #endif // FORTRAN_PARSER_PROVENANCE_H_
293