0ed2bd93创建于 2022年8月1日历史提交
//===--- Main.cpp - Compile BNF grammar -----------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This is a tool to compile a BNF grammar, it is used by the build system to
// generate a necessary data bits to statically construct core pieces (Grammar,
// LRTable etc) of the LR parser.
//
//===----------------------------------------------------------------------===//

#include "clang-pseudo/grammar/Grammar.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/ToolOutputFile.h"
#include <algorithm>

using llvm::cl::desc;
using llvm::cl::init;
using llvm::cl::opt;
using llvm::cl::Required;
using llvm::cl::value_desc;
using llvm::cl::values;

namespace {
enum EmitType {
  EmitSymbolList,
  EmitGrammarContent,
};

opt<std::string> Grammar("grammar", desc("Parse a BNF grammar file."),
                         Required);
opt<EmitType>
    Emit(desc("which information to emit:"),
         values(clEnumValN(EmitSymbolList, "emit-symbol-list",
                           "Print nonterminal symbols (default)"),
                clEnumValN(EmitGrammarContent, "emit-grammar-content",
                           "Print the BNF grammar content as a string")));

opt<std::string> OutputFilename("o", init("-"), desc("Output"),
                                value_desc("file"));

std::string readOrDie(llvm::StringRef Path) {
  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text =
      llvm::MemoryBuffer::getFile(Path);
  if (std::error_code EC = Text.getError()) {
    llvm::errs() << "Error: can't read grammar file '" << Path
                 << "': " << EC.message() << "\n";
    ::exit(1);
  }
  return Text.get()->getBuffer().str();
}
} // namespace

namespace clang {
namespace pseudo {
namespace {

// Mangles a symbol name into a valid identifier.
//
// These follow names in the grammar fairly closely:
//   nonterminal: `ptr-declarator` becomes `ptr_declarator`;
//   punctuator: `,` becomes `COMMA`;
//   keyword: `INT` becomes `INT`;
//   terminal: `IDENTIFIER` becomes `IDENTIFIER`;
std::string mangleSymbol(SymbolID SID, const Grammar &G) {
  static auto &TokNames = *new std::vector<std::string>{
#define TOK(X) llvm::StringRef(#X).upper(),
#define KEYWORD(Keyword, Condition) llvm::StringRef(#Keyword).upper(),
#include "clang/Basic/TokenKinds.def"
      };
  if (isToken(SID))
    return TokNames[symbolToToken(SID)];
  std::string Name = G.symbolName(SID).str();
  // translation-unit -> translation_unit
  std::replace(Name.begin(), Name.end(), '-', '_');
  return Name;
}

// Mangles the RHS of a rule definition into a valid identifier.
// 
// These are unique only for a fixed LHS.
// e.g. for the grammar rule `ptr-declarator := ptr-operator ptr-declarator`,
// it is `ptr_operator__ptr_declarator`.
std::string mangleRule(RuleID RID, const Grammar &G) {
  const auto &R = G.lookupRule(RID);
  std::string MangleName = mangleSymbol(R.seq().front(), G);
  for (SymbolID S : R.seq().drop_front()) {
    MangleName.append("__");
    MangleName.append(mangleSymbol(S, G));
  }
  return MangleName;
}

} // namespace
} // namespace pseudo
} // namespace clang

int main(int argc, char *argv[]) {
  llvm::cl::ParseCommandLineOptions(argc, argv, "");

  std::string GrammarText = readOrDie(Grammar);
  std::vector<std::string> Diags;
  auto G = clang::pseudo::Grammar::parseBNF(GrammarText, Diags);

  if (!Diags.empty()) {
    llvm::errs() << llvm::join(Diags, "\n");
    return 1;
  }

  std::error_code EC;
  llvm::ToolOutputFile Out{OutputFilename, EC, llvm::sys::fs::OF_None};
  if (EC) {
    llvm::errs() << EC.message() << '\n';
    return 1;
  }

  switch (Emit) {
  case EmitSymbolList:
    Out.os() << R"cpp(
#ifndef NONTERMINAL
#define NONTERMINAL(NAME, ID)
#endif
#ifndef RULE
#define RULE(LHS, RHS, ID)
#endif
#ifndef EXTENSION
#define EXTENSION(NAME, ID)
#endif
)cpp";
    for (clang::pseudo::SymbolID ID = 0; ID < G.table().Nonterminals.size();
         ++ID) {
      Out.os() << llvm::formatv("NONTERMINAL({0}, {1})\n",
                                clang::pseudo::mangleSymbol(ID, G), ID);
      for (const clang::pseudo::Rule &R : G.rulesFor(ID)) {
        clang::pseudo::RuleID RID = &R - G.table().Rules.data();
        Out.os() << llvm::formatv("RULE({0}, {1}, {2})\n",
                                  clang::pseudo::mangleSymbol(R.Target, G),
                                  clang::pseudo::mangleRule(RID, G), RID);
      }
    }
    for (clang::pseudo::ExtensionID EID = 1 /*skip the sentinel 0 value*/;
         EID < G.table().AttributeValues.size(); ++EID) {
      llvm::StringRef Name = G.table().AttributeValues[EID];
      assert(!Name.empty());
      Out.os() << llvm::formatv("EXTENSION({0}, {1})\n", Name, EID);
    }
    Out.os() << R"cpp(
#undef NONTERMINAL
#undef RULE
#undef EXTENSION
)cpp";
    break;
  case EmitGrammarContent:
    for (llvm::StringRef Line : llvm::split(GrammarText, '\n')) {
      Out.os() << '"';
      Out.os().write_escaped((Line + "\n").str());
      Out.os() << "\"\n";
    }
    break;
  }

  Out.keep();

  return 0;
}