1 //===-- lib/Parser/source.cpp ---------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "flang/Parser/source.h"
10 #include "flang/Common/idioms.h"
11 #include "flang/Parser/char-buffer.h"
12 #include "llvm/Support/Errno.h"
13 #include "llvm/Support/FileSystem.h"
14 #include "llvm/Support/Path.h"
15 #include "llvm/Support/raw_ostream.h"
16 #include <algorithm>
17 #include <memory>
18 #include <vector>
19 
20 namespace Fortran::parser {
21 
~SourceFile()22 SourceFile::~SourceFile() { Close(); }
23 
FindLineStarts(llvm::StringRef source)24 static std::vector<std::size_t> FindLineStarts(llvm::StringRef source) {
25   std::vector<std::size_t> result;
26   if (source.size() > 0) {
27     CHECK(source.back() == '\n' && "missing ultimate newline");
28     std::size_t at{0};
29     do {
30       result.push_back(at);
31       at = source.find('\n', at) + 1;
32     } while (at < source.size());
33     result.shrink_to_fit();
34   }
35   return result;
36 }
37 
RecordLineStarts()38 void SourceFile::RecordLineStarts() {
39   lineStart_ = FindLineStarts({content().data(), bytes()});
40 }
41 
42 // Check for a Unicode byte order mark (BOM).
43 // Module files all have one; so can source files.
IdentifyPayload()44 void SourceFile::IdentifyPayload() {
45   llvm::StringRef content{buf_->getBufferStart(), buf_->getBufferSize()};
46   constexpr llvm::StringLiteral UTF8_BOM{"\xef\xbb\xbf"};
47   if (content.startswith(UTF8_BOM)) {
48     bom_end_ = UTF8_BOM.size();
49     encoding_ = Encoding::UTF_8;
50   }
51 }
52 
DirectoryName(std::string path)53 std::string DirectoryName(std::string path) {
54   llvm::SmallString<128> pathBuf{path};
55   llvm::sys::path::remove_filename(pathBuf);
56   return pathBuf.str().str();
57 }
58 
LocateSourceFile(std::string name,const std::vector<std::string> & searchPath)59 std::string LocateSourceFile(
60     std::string name, const std::vector<std::string> &searchPath) {
61   if (name.empty() || name == "-" || llvm::sys::path::is_absolute(name)) {
62     return name;
63   }
64   for (const std::string &dir : searchPath) {
65     llvm::SmallString<128> path{dir};
66     llvm::sys::path::append(path, name);
67     bool isDir{false};
68     auto er = llvm::sys::fs::is_directory(path, isDir);
69     if (!er && !isDir) {
70       return path.str().str();
71     }
72   }
73   return name;
74 }
75 
RemoveCarriageReturns(llvm::MutableArrayRef<char> buf)76 std::size_t RemoveCarriageReturns(llvm::MutableArrayRef<char> buf) {
77   std::size_t wrote{0};
78   char *buffer{buf.data()};
79   char *p{buf.data()};
80   std::size_t bytes = buf.size();
81   while (bytes > 0) {
82     void *vp{static_cast<void *>(p)};
83     void *crvp{std::memchr(vp, '\r', bytes)};
84     char *crcp{static_cast<char *>(crvp)};
85     if (!crcp) {
86       std::memmove(buffer + wrote, p, bytes);
87       wrote += bytes;
88       break;
89     }
90     std::size_t chunk = crcp - p;
91     auto advance{chunk + 1};
92     if (chunk + 1 >= bytes || crcp[1] == '\n') {
93       // CR followed by LF or EOF: omit
94     } else if ((chunk == 0 && p == buf.data()) || crcp[-1] == '\n') {
95       // CR preceded by LF or BOF: omit
96     } else {
97       // CR in line: retain
98       ++chunk;
99     }
100     std::memmove(buffer + wrote, p, chunk);
101     wrote += chunk;
102     p += advance;
103     bytes -= advance;
104   }
105   return wrote;
106 }
107 
Open(std::string path,llvm::raw_ostream & error)108 bool SourceFile::Open(std::string path, llvm::raw_ostream &error) {
109   Close();
110   path_ = path;
111   std::string errorPath{"'"s + path_ + "'"};
112   auto bufOr{llvm::WritableMemoryBuffer::getFile(path)};
113   if (!bufOr) {
114     auto err = bufOr.getError();
115     error << "Could not open " << errorPath << ": " << err.message();
116     return false;
117   }
118   buf_ = std::move(bufOr.get());
119   ReadFile();
120   return true;
121 }
122 
ReadStandardInput(llvm::raw_ostream & error)123 bool SourceFile::ReadStandardInput(llvm::raw_ostream &error) {
124   Close();
125   path_ = "standard input";
126 
127   auto buf_or = llvm::MemoryBuffer::getSTDIN();
128   if (!buf_or) {
129     auto err = buf_or.getError();
130     error << err.message();
131     return false;
132   }
133   auto inbuf = std::move(buf_or.get());
134   buf_ =
135       llvm::WritableMemoryBuffer::getNewUninitMemBuffer(inbuf->getBufferSize());
136   llvm::copy(inbuf->getBuffer(), buf_->getBufferStart());
137   ReadFile();
138   return true;
139 }
140 
ReadFile()141 void SourceFile::ReadFile() {
142   buf_end_ = RemoveCarriageReturns(buf_->getBuffer());
143   if (content().size() == 0 || content().back() != '\n') {
144     // Don't bother to copy if we have spare memory
145     if (content().size() >= buf_->getBufferSize()) {
146       auto tmp_buf{llvm::WritableMemoryBuffer::getNewUninitMemBuffer(
147           content().size() + 1)};
148       llvm::copy(content(), tmp_buf->getBufferStart());
149       Close();
150       buf_ = std::move(tmp_buf);
151     }
152     buf_end_++;
153     buf_->getBuffer()[buf_end_ - 1] = '\n';
154   }
155   IdentifyPayload();
156   RecordLineStarts();
157 }
158 
Close()159 void SourceFile::Close() {
160   path_.clear();
161   buf_.reset();
162 }
163 
FindOffsetLineAndColumn(std::size_t at) const164 SourcePosition SourceFile::FindOffsetLineAndColumn(std::size_t at) const {
165   CHECK(at < bytes());
166 
167   auto it = llvm::upper_bound(lineStart_, at);
168   auto low = std::distance(lineStart_.begin(), it - 1);
169   return {*this, static_cast<int>(low + 1),
170       static_cast<int>(at - lineStart_[low] + 1)};
171 }
172 } // namespace Fortran::parser
173