//===--- DirectiveTree.cpp - Find and strip preprocessor directives -------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "clang-pseudo/DirectiveTree.h"
#include "clang/Basic/IdentifierTable.h"
#include "clang/Basic/TokenKinds.h"
#include "llvm/Support/FormatVariadic.h"
#include <optional>
#include <variant>

namespace clang {
namespace pseudo {
namespace {

class DirectiveParser {
public:
  explicit DirectiveParser(const TokenStream &Code)
      : Code(Code), Tok(&Code.front()) {}
  void parse(DirectiveTree *Result) { parse(Result, /*TopLevel=*/true); }

private:
  // Roles that a directive might take within a conditional block.
  enum class Cond { None, If, Else, End };
  static Cond classifyDirective(tok::PPKeywordKind K) {
    switch (K) {
    case clang::tok::pp_if:
    case clang::tok::pp_ifdef:
    case clang::tok::pp_ifndef:
      return Cond::If;
    case clang::tok::pp_elif:
    case clang::tok::pp_elifdef:
    case clang::tok::pp_elifndef:
    case clang::tok::pp_else:
      return Cond::Else;
    case clang::tok::pp_endif:
      return Cond::End;
    default:
      return Cond::None;
    }
  }

  // Parses tokens starting at Tok into Tree.
  // If we reach an End or Else directive that ends Tree, returns it.
  // If TopLevel is true, then we do not expect End and always return
  // std::nullopt.
  std::optional<DirectiveTree::Directive> parse(DirectiveTree *Tree,
                                                bool TopLevel) {
    auto StartsDirective =
        [&, AllowDirectiveAt((const Token *)nullptr)]() mutable {
          if (Tok->flag(LexFlags::StartsPPLine)) {
            // If we considered a comment at the start of a PP-line, it doesn't
            // start a directive but the directive can still start after it.
            if (Tok->Kind == tok::comment)
              AllowDirectiveAt = Tok + 1;
            return Tok->Kind == tok::hash;
          }
          return Tok->Kind == tok::hash && AllowDirectiveAt == Tok;
        };
    // Each iteration adds one chunk (or returns, if we see #endif).
    while (Tok->Kind != tok::eof) {
      // If there's no directive here, we have a code chunk.
      if (!StartsDirective()) {
        const Token *Start = Tok;
        do
          ++Tok;
        while (Tok->Kind != tok::eof && !StartsDirective());
        Tree->Chunks.push_back(DirectiveTree::Code{
            Token::Range{Code.index(*Start), Code.index(*Tok)}});
        continue;
      }

      // We have some kind of directive.
      DirectiveTree::Directive Directive;
      parseDirective(&Directive);
      Cond Kind = classifyDirective(Directive.Kind);
      if (Kind == Cond::If) {
        // #if or similar, starting a nested conditional block.
        DirectiveTree::Conditional Conditional;
        Conditional.Branches.emplace_back();
        Conditional.Branches.back().first = std::move(Directive);
        parseConditional(&Conditional);
        Tree->Chunks.push_back(std::move(Conditional));
      } else if ((Kind == Cond::Else || Kind == Cond::End) && !TopLevel) {
        // #endif or similar, ending this PStructure scope.
        // (#endif is unexpected at the top level, treat as simple directive).
        return std::move(Directive);
      } else {
        // #define or similar, a simple directive at the current scope.
        Tree->Chunks.push_back(std::move(Directive));
      }
    }
    return std::nullopt;
  }

  // Parse the rest of a conditional section, after seeing the If directive.
  // Returns after consuming the End directive.
  void parseConditional(DirectiveTree::Conditional *C) {
    assert(C->Branches.size() == 1 &&
           C->Branches.front().second.Chunks.empty() &&
           "Should be ready to parse first branch body");
    while (Tok->Kind != tok::eof) {
      auto Terminator = parse(&C->Branches.back().second, /*TopLevel=*/false);
      if (!Terminator) {
        assert(Tok->Kind == tok::eof && "gave up parsing before eof?");
        C->End.Tokens = Token::Range::emptyAt(Code.index(*Tok));
        return;
      }
      if (classifyDirective(Terminator->Kind) == Cond::End) {
        C->End = std::move(*Terminator);
        return;
      }
      assert(classifyDirective(Terminator->Kind) == Cond::Else &&
             "ended branch unexpectedly");
      C->Branches.emplace_back();
      C->Branches.back().first = std::move(*Terminator);
    }
  }

  // Parse a directive. Tok is the hash.
  void parseDirective(DirectiveTree::Directive *D) {
    assert(Tok->Kind == tok::hash);

    // Directive spans from the hash until the end of line or file.
    const Token *Begin = Tok++;
    while (Tok->Kind != tok::eof && !Tok->flag(LexFlags::StartsPPLine))
      ++Tok;
    ArrayRef<Token> Tokens{Begin, Tok};
    D->Tokens = {Code.index(*Tokens.begin()), Code.index(*Tokens.end())};

    // Directive name is the first non-comment token after the hash.
    Tokens = Tokens.drop_front().drop_while(
        [](const Token &T) { return T.Kind == tok::comment; });
    if (!Tokens.empty())
      D->Kind = PPKeywords.get(Tokens.front().text()).getPPKeywordID();
  }

  const TokenStream &Code;
  const Token *Tok;
  clang::IdentifierTable PPKeywords;
};

struct Dumper {
  llvm::raw_ostream &OS;
  unsigned Indent = 0;

  Dumper(llvm::raw_ostream& OS) : OS(OS) {}
  void operator()(const DirectiveTree& Tree) {
    for (const auto& Chunk : Tree.Chunks)
      std::visit(*this, Chunk);
  }
  void operator()(const DirectiveTree::Conditional &Conditional) {
    for (unsigned I = 0; I < Conditional.Branches.size(); ++I) {
      const auto &Branch = Conditional.Branches[I];
      (*this)(Branch.first, Conditional.Taken == I);
      Indent += 2;
      (*this)(Branch.second);
      Indent -= 2;
    }
    (*this)(Conditional.End);
  }
  void operator()(const DirectiveTree::Directive &Directive,
                  bool Taken = false) {
    OS.indent(Indent) << llvm::formatv(
        "#{0} ({1} tokens){2}\n", tok::getPPKeywordSpelling(Directive.Kind),
        Directive.Tokens.size(), Taken ? " TAKEN" : "");
  }
  void operator()(const DirectiveTree::Code &Code) {
    OS.indent(Indent) << llvm::formatv("code ({0} tokens)\n",
                                       Code.Tokens.size());
  }
};
} // namespace

DirectiveTree DirectiveTree::parse(const TokenStream &Code) {
  DirectiveTree Result;
  DirectiveParser(Code).parse(&Result);
  return Result;
}

// Define operator<< in terms of dump() functions above.
#define OSTREAM_DUMP(Type)                                                     \
  llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Type &T) {        \
    Dumper{OS}(T);                                                         \
    return OS;                                                                 \
  }
OSTREAM_DUMP(DirectiveTree)
OSTREAM_DUMP(DirectiveTree::Directive)
OSTREAM_DUMP(DirectiveTree::Conditional)
OSTREAM_DUMP(DirectiveTree::Code)
#undef OSTREAM_DUMP

namespace {
// Makes choices about conditional branches.
//
// Generally it tries to maximize the amount of useful code we see.
//
// Caveat: each conditional is evaluated independently. Consider this code:
//   #ifdef WINDOWS
//     bool isWindows = true;
//   #endif
//   #ifndef WINDOWS
//     bool isWindows = false;
//   #endif
// We take both branches and define isWindows twice. We could track more state
// in order to produce a consistent view, but this is complex.
class BranchChooser {
public:
  BranchChooser(const TokenStream &Code) : Code(Code) {}

  // Describes code seen by making particular branch choices. Higher is better.
  struct Score {
    int Tokens = 0; // excluding comments and directives
    int Directives = 0;
    int Errors = 0; // #error directives

    bool operator>(const Score &Other) const {
      // Seeing errors is bad, other things are good.
      return std::make_tuple(-Errors, Tokens, Directives) >
             std::make_tuple(-Other.Errors, Other.Tokens, Other.Directives);
    }

    Score &operator+=(const Score &Other) {
      Tokens += Other.Tokens;
      Directives += Other.Directives;
      Errors += Other.Errors;
      return *this;
    }
  };

  Score operator()(DirectiveTree::Code &C) {
    Score S;
    for (const Token &T : Code.tokens(C.Tokens))
      if (T.Kind != tok::comment)
        ++S.Tokens;
    return S;
  }

  Score operator()(DirectiveTree::Directive &D) {
    Score S;
    S.Directives = 1;
    S.Errors = D.Kind == tok::pp_error;
    return S;
  }

  Score operator()(DirectiveTree::Conditional &C) {
    Score Best;
    bool MayTakeTrivial = true;
    bool TookTrivial = false;

    for (unsigned I = 0; I < C.Branches.size(); ++I) {
      // Walk the branch to make its nested choices in any case.
      Score BranchScore = walk(C.Branches[I].second);
      // If we already took an #if 1, don't consider any other branches.
      if (TookTrivial)
        continue;
      // Is this a trivial #if 0 or #if 1?
      if (auto TriviallyTaken = isTakenWhenReached(C.Branches[I].first)) {
        if (!*TriviallyTaken)
          continue; // Don't consider #if 0 even if it scores well.
        if (MayTakeTrivial)
          TookTrivial = true;
      } else {
        // After a nontrivial condition, #elif 1 isn't guaranteed taken.
        MayTakeTrivial = false;
      }
      // Is this the best branch so far? (Including if it's #if 1).
      if (TookTrivial || !C.Taken || BranchScore > Best) {
        Best = BranchScore;
        C.Taken = I;
      }
    }
    return Best;
  }
  Score walk(DirectiveTree &M) {
    Score S;
    for (auto &C : M.Chunks)
      S += std::visit(*this, C);
    return S;
  }

private:
  // Return true if the directive starts an always-taken conditional branch,
  // false if the branch is never taken, and std::nullopt otherwise.
  std::optional<bool> isTakenWhenReached(const DirectiveTree::Directive &Dir) {
    switch (Dir.Kind) {
    case clang::tok::pp_if:
    case clang::tok::pp_elif:
      break; // handled below
    case clang::tok::pp_else:
      return true;
    default: // #ifdef etc
      return std::nullopt;
    }

    const auto &Tokens = Code.tokens(Dir.Tokens);
    assert(!Tokens.empty() && Tokens.front().Kind == tok::hash);
    const Token &Name = Tokens.front().nextNC();
    const Token &Value = Name.nextNC();
    // Does the condition consist of exactly one token?
    if (&Value >= Tokens.end() || &Value.nextNC() < Tokens.end())
      return std::nullopt;
    return llvm::StringSwitch<std::optional<bool>>(Value.text())
        .Cases("true", "1", true)
        .Cases("false", "0", false)
        .Default(std::nullopt);
  }

  const TokenStream &Code;
};

} // namespace

void chooseConditionalBranches(DirectiveTree &Tree, const TokenStream &Code) {
  BranchChooser{Code}.walk(Tree);
}

namespace {
class Preprocessor {
  const TokenStream &In;
  TokenStream &Out;

public:
  Preprocessor(const TokenStream &In, TokenStream &Out) : In(In), Out(Out) {}
  ~Preprocessor() { Out.finalize(); }

  void walk(const DirectiveTree &T) {
    for (const auto &C : T.Chunks)
      std::visit(*this, C);
  }

  void operator()(const DirectiveTree::Code &C) {
    for (const auto &Tok : In.tokens(C.Tokens))
      Out.push(Tok);
  }

  void operator()(const DirectiveTree::Directive &) {}

  void operator()(const DirectiveTree::Conditional &C) {
    if (C.Taken)
      walk(C.Branches[*C.Taken].second);
  }
};
} // namespace

TokenStream DirectiveTree::stripDirectives(const TokenStream &In) const {
  TokenStream Out;
  Preprocessor(In, Out).walk(*this);
  return Out;
}

} // namespace pseudo
} // namespace clang