#include "Lexer.h"
#include "Token.h"
#include "mlir/AsmParser/CodeComplete.h"
#include "mlir/IR/Diagnostics.h"
#include "mlir/IR/Location.h"
#include "mlir/IR/MLIRContext.h"
#include "mlir/Support/LLVM.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/SourceMgr.h"
#include <cassert>
#include <cctype>
using namespace mlir;
static bool isPunct(char c) {
return c == '$' || c == '.' || c == '_' || c == '-';
}
Lexer::Lexer(const llvm::SourceMgr &sourceMgr, MLIRContext *context,
AsmParserCodeCompleteContext *codeCompleteContext)
: sourceMgr(sourceMgr), context(context), codeCompleteLoc(nullptr) {
auto bufferID = sourceMgr.getMainFileID();
curBuffer = sourceMgr.getMemoryBuffer(bufferID)->getBuffer();
curPtr = curBuffer.begin();
if (codeCompleteContext)
codeCompleteLoc = codeCompleteContext->getCodeCompleteLoc().getPointer();
}
Location Lexer::getEncodedSourceLocation(SMLoc loc) {
auto &sourceMgr = getSourceMgr();
unsigned mainFileID = sourceMgr.getMainFileID();
auto &bufferInfo = sourceMgr.getBufferInfo(mainFileID);
unsigned lineNo = bufferInfo.getLineNumber(loc.getPointer());
unsigned column =
(loc.getPointer() - bufferInfo.getPointerForLineNumber(lineNo)) + 1;
auto *buffer = sourceMgr.getMemoryBuffer(mainFileID);
return FileLineColLoc::get(context, buffer->getBufferIdentifier(), lineNo,
column);
}
Token Lexer::emitError(const char *loc, const Twine &message) {
mlir::emitError(getEncodedSourceLocation(SMLoc::getFromPointer(loc)),
message);
return formToken(Token::error, loc);
}
Token Lexer::lexToken() {
while (true) {
const char *tokStart = curPtr;
if (tokStart == codeCompleteLoc)
return formToken(Token::code_complete, tokStart);
switch (*curPtr++) {
default:
if (isalpha(curPtr[-1]))
return lexBareIdentifierOrKeyword(tokStart);
return emitError(tokStart, "unexpected character");
case ' ':
case '\t':
case '\n':
case '\r':
continue;
case '_':
return lexBareIdentifierOrKeyword(tokStart);
case 0:
if (curPtr - 1 == curBuffer.end())
return formToken(Token::eof, tokStart);
continue;
case ':':
return formToken(Token::colon, tokStart);
case ',':
return formToken(Token::comma, tokStart);
case '.':
return lexEllipsis(tokStart);
case '(':
return formToken(Token::l_paren, tokStart);
case ')':
return formToken(Token::r_paren, tokStart);
case '{':
if (*curPtr == '-' && *(curPtr + 1) == '#') {
curPtr += 2;
return formToken(Token::file_metadata_begin, tokStart);
}
return formToken(Token::l_brace, tokStart);
case '}':
return formToken(Token::r_brace, tokStart);
case '[':
return formToken(Token::l_square, tokStart);
case ']':
return formToken(Token::r_square, tokStart);
case '<':
return formToken(Token::less, tokStart);
case '>':
return formToken(Token::greater, tokStart);
case '=':
return formToken(Token::equal, tokStart);
case '+':
return formToken(Token::plus, tokStart);
case '*':
return formToken(Token::star, tokStart);
case '-':
if (*curPtr == '>') {
++curPtr;
return formToken(Token::arrow, tokStart);
}
return formToken(Token::minus, tokStart);
case '?':
return formToken(Token::question, tokStart);
case '|':
return formToken(Token::vertical_bar, tokStart);
case '/':
if (*curPtr == '/') {
skipComment();
continue;
}
return emitError(tokStart, "unexpected character");
case '@':
return lexAtIdentifier(tokStart);
case '#':
if (*curPtr == '-' && *(curPtr + 1) == '}') {
curPtr += 2;
return formToken(Token::file_metadata_end, tokStart);
}
[[fallthrough]];
case '!':
case '^':
case '%':
return lexPrefixedIdentifier(tokStart);
case '"':
return lexString(tokStart);
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
return lexNumber(tokStart);
}
}
}
Token Lexer::lexAtIdentifier(const char *tokStart) {
char cur = *curPtr++;
if (cur == '"') {
Token stringIdentifier = lexString(curPtr);
if (stringIdentifier.is(Token::error))
return stringIdentifier;
return formToken(Token::at_identifier, tokStart);
}
if (!isalpha(cur) && cur != '_')
return emitError(curPtr - 1,
"@ identifier expected to start with letter or '_'");
while (isalpha(*curPtr) || isdigit(*curPtr) || *curPtr == '_' ||
*curPtr == '$' || *curPtr == '.')
++curPtr;
return formToken(Token::at_identifier, tokStart);
}
Token Lexer::lexBareIdentifierOrKeyword(const char *tokStart) {
while (isalpha(*curPtr) || isdigit(*curPtr) || *curPtr == '_' ||
*curPtr == '$' || *curPtr == '.')
++curPtr;
StringRef spelling(tokStart, curPtr - tokStart);
auto isAllDigit = [](StringRef str) {
return llvm::all_of(str, llvm::isDigit);
};
if ((spelling.size() > 1 && tokStart[0] == 'i' &&
isAllDigit(spelling.drop_front())) ||
((spelling.size() > 2 && tokStart[1] == 'i' &&
(tokStart[0] == 's' || tokStart[0] == 'u')) &&
isAllDigit(spelling.drop_front(2))))
return Token(Token::inttype, spelling);
Token::Kind kind = StringSwitch<Token::Kind>(spelling)
#define TOK_KEYWORD(SPELLING) .Case(#SPELLING, Token::kw_##SPELLING)
#include "TokenKinds.def"
.Default(Token::bare_identifier);
return Token(kind, spelling);
}
void Lexer::skipComment() {
assert(*curPtr == '/');
++curPtr;
while (true) {
switch (*curPtr++) {
case '\n':
case '\r':
return;
case 0:
if (curPtr - 1 == curBuffer.end()) {
--curPtr;
return;
}
[[fallthrough]];
default:
break;
}
}
}
Token Lexer::lexEllipsis(const char *tokStart) {
assert(curPtr[-1] == '.');
if (curPtr == curBuffer.end() || *curPtr != '.' || *(curPtr + 1) != '.')
return emitError(curPtr, "expected three consecutive dots for an ellipsis");
curPtr += 2;
return formToken(Token::ellipsis, tokStart);
}
Token Lexer::lexNumber(const char *tokStart) {
assert(isdigit(curPtr[-1]));
if (curPtr[-1] == '0' && *curPtr == 'x') {
if (!isxdigit(curPtr[1]))
return formToken(Token::integer, tokStart);
curPtr += 2;
while (isxdigit(*curPtr))
++curPtr;
return formToken(Token::integer, tokStart);
}
while (isdigit(*curPtr))
++curPtr;
if (*curPtr != '.')
return formToken(Token::integer, tokStart);
++curPtr;
while (isdigit(*curPtr))
++curPtr;
if (*curPtr == 'e' || *curPtr == 'E') {
if (isdigit(static_cast<unsigned char>(curPtr[1])) ||
((curPtr[1] == '-' || curPtr[1] == '+') &&
isdigit(static_cast<unsigned char>(curPtr[2])))) {
curPtr += 2;
while (isdigit(*curPtr))
++curPtr;
}
}
return formToken(Token::floatliteral, tokStart);
}
Token Lexer::lexPrefixedIdentifier(const char *tokStart) {
Token::Kind kind;
StringRef errorKind;
switch (*tokStart) {
case '#':
kind = Token::hash_identifier;
errorKind = "invalid attribute name";
break;
case '%':
kind = Token::percent_identifier;
errorKind = "invalid SSA name";
break;
case '^':
kind = Token::caret_identifier;
errorKind = "invalid block name";
break;
case '!':
kind = Token::exclamation_identifier;
errorKind = "invalid type identifier";
break;
default:
llvm_unreachable("invalid caller");
}
if (isdigit(*curPtr)) {
while (isdigit(*curPtr))
++curPtr;
} else if (isalpha(*curPtr) || isPunct(*curPtr)) {
do {
++curPtr;
} while (isalpha(*curPtr) || isdigit(*curPtr) || isPunct(*curPtr));
} else if (curPtr == codeCompleteLoc) {
return formToken(Token::code_complete, tokStart);
} else {
return emitError(curPtr - 1, errorKind);
}
if (codeCompleteLoc && codeCompleteLoc >= tokStart &&
codeCompleteLoc <= curPtr) {
return Token(Token::code_complete,
StringRef(tokStart, codeCompleteLoc - tokStart));
}
return formToken(kind, tokStart);
}
Token Lexer::lexString(const char *tokStart) {
assert(curPtr[-1] == '"');
while (true) {
if (curPtr == codeCompleteLoc)
return formToken(Token::code_complete, tokStart);
switch (*curPtr++) {
case '"':
return formToken(Token::string, tokStart);
case 0:
if (curPtr - 1 != curBuffer.end())
continue;
[[fallthrough]];
case '\n':
case '\v':
case '\f':
return emitError(curPtr - 1, "expected '\"' in string literal");
case '\\':
if (*curPtr == '"' || *curPtr == '\\' || *curPtr == 'n' || *curPtr == 't')
++curPtr;
else if (llvm::isHexDigit(*curPtr) && llvm::isHexDigit(curPtr[1]))
curPtr += 2;
else
return emitError(curPtr - 1, "unknown escape in string literal");
continue;
default:
continue;
}
}
}