#include "ScriptLexer.h"
#include "lld/Common/ErrorHandler.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Support/ErrorHandling.h"
#include <algorithm>
using namespace llvm;
using namespace lld;
using namespace lld::elf;
StringRef ScriptLexer::getLine() {
StringRef s = getCurrentMB().getBuffer();
StringRef tok = tokens[pos - 1];
size_t pos = s.rfind('\n', tok.data() - s.data());
if (pos != StringRef::npos)
s = s.substr(pos + 1);
return s.substr(0, s.find_first_of("\r\n"));
}
size_t ScriptLexer::getLineNumber() {
if (pos == 0)
return 1;
StringRef s = getCurrentMB().getBuffer();
StringRef tok = tokens[pos - 1];
const size_t tokOffset = tok.data() - s.data();
size_t line = 1;
size_t start = 0;
if (lastLineNumberOffset > 0 && tokOffset >= lastLineNumberOffset) {
start = lastLineNumberOffset;
line = lastLineNumber;
}
line += s.substr(start, tokOffset - start).count('\n');
lastLineNumberOffset = tokOffset;
lastLineNumber = line;
return line;
}
size_t ScriptLexer::getColumnNumber() {
StringRef tok = tokens[pos - 1];
return tok.data() - getLine().data();
}
std::string ScriptLexer::getCurrentLocation() {
std::string filename = std::string(getCurrentMB().getBufferIdentifier());
return (filename + ":" + Twine(getLineNumber())).str();
}
ScriptLexer::ScriptLexer(MemoryBufferRef mb) { tokenize(mb); }
void ScriptLexer::setError(const Twine &msg) {
if (errorCount())
return;
std::string s = (getCurrentLocation() + ": " + msg).str();
if (pos)
s += "\n>>> " + getLine().str() + "\n>>> " +
std::string(getColumnNumber(), ' ') + "^";
error(s);
}
void ScriptLexer::tokenize(MemoryBufferRef mb) {
std::vector<StringRef> vec;
mbs.push_back(mb);
StringRef s = mb.getBuffer();
StringRef begin = s;
for (;;) {
s = skipSpace(s);
if (s.empty())
break;
if (s.starts_with("\"")) {
size_t e = s.find("\"", 1);
if (e == StringRef::npos) {
StringRef filename = mb.getBufferIdentifier();
size_t lineno = begin.substr(0, s.data() - begin.data()).count('\n');
error(filename + ":" + Twine(lineno + 1) + ": unclosed quote");
return;
}
vec.push_back(s.take_front(e + 1));
s = s.substr(e + 1);
continue;
}
if (s.starts_with("<<=") || s.starts_with(">>=")) {
vec.push_back(s.substr(0, 3));
s = s.substr(3);
continue;
}
if (s.size() > 1 && ((s[1] == '=' && strchr("*/+-<>&^|", s[0])) ||
(s[0] == s[1] && strchr("<>&|", s[0])))) {
vec.push_back(s.substr(0, 2));
s = s.substr(2);
continue;
}
size_t pos = s.find_first_not_of(
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
"0123456789_.$/\\~=+[]*?-!^:");
if (pos == 0)
pos = 1;
vec.push_back(s.substr(0, pos));
s = s.substr(pos);
}
tokens.insert(tokens.begin() + pos, vec.begin(), vec.end());
}
StringRef ScriptLexer::skipSpace(StringRef s) {
for (;;) {
if (s.starts_with("/*")) {
size_t e = s.find("*/", 2);
if (e == StringRef::npos) {
setError("unclosed comment in a linker script");
return "";
}
s = s.substr(e + 2);
continue;
}
if (s.starts_with("#")) {
size_t e = s.find('\n', 1);
if (e == StringRef::npos)
e = s.size() - 1;
s = s.substr(e + 1);
continue;
}
size_t size = s.size();
s = s.ltrim();
if (s.size() == size)
return s;
}
}
bool ScriptLexer::atEOF() { return errorCount() || tokens.size() == pos; }
static std::vector<StringRef> tokenizeExpr(StringRef s) {
StringRef ops = "!~*/+-<>?^:=";
if (s.starts_with("\""))
return {s};
std::vector<StringRef> ret;
while (!s.empty()) {
size_t e = s.find_first_of(ops);
if (e == StringRef::npos) {
ret.push_back(s);
break;
}
if (e != 0)
ret.push_back(s.substr(0, e));
if (s.substr(e).starts_with("!=") || s.substr(e).starts_with("==") ||
s.substr(e).starts_with(">=") || s.substr(e).starts_with("<=") ||
s.substr(e).starts_with("<<") || s.substr(e).starts_with(">>")) {
ret.push_back(s.substr(e, 2));
s = s.substr(e + 2);
} else {
ret.push_back(s.substr(e, 1));
s = s.substr(e + 1);
}
}
return ret;
}
void ScriptLexer::maybeSplitExpr() {
if (!inExpr || errorCount() || atEOF())
return;
std::vector<StringRef> v = tokenizeExpr(tokens[pos]);
if (v.size() == 1)
return;
tokens.erase(tokens.begin() + pos);
tokens.insert(tokens.begin() + pos, v.begin(), v.end());
}
StringRef ScriptLexer::next() {
maybeSplitExpr();
if (errorCount())
return "";
if (atEOF()) {
setError("unexpected EOF");
return "";
}
return tokens[pos++];
}
StringRef ScriptLexer::peek() {
StringRef tok = next();
if (errorCount())
return "";
pos = pos - 1;
return tok;
}
bool ScriptLexer::consume(StringRef tok) {
if (next() == tok)
return true;
--pos;
return false;
}
bool ScriptLexer::consumeLabel(StringRef tok) {
if (consume((tok + ":").str()))
return true;
if (tokens.size() >= pos + 2 && tokens[pos] == tok &&
tokens[pos + 1] == ":") {
pos += 2;
return true;
}
return false;
}
void ScriptLexer::skip() { (void)next(); }
void ScriptLexer::expect(StringRef expect) {
if (errorCount())
return;
StringRef tok = next();
if (tok != expect)
setError(expect + " expected, but got " + tok);
}
static bool encloses(StringRef s, StringRef t) {
return s.bytes_begin() <= t.bytes_begin() && t.bytes_end() <= s.bytes_end();
}
MemoryBufferRef ScriptLexer::getCurrentMB() {
assert(!mbs.empty());
if (pos == 0)
return mbs.back();
for (MemoryBufferRef mb : mbs)
if (encloses(mb.getBuffer(), tokens[pos - 1]))
return mb;
llvm_unreachable("getCurrentMB: failed to find a token");
}