//===--- SymbolCollector.cpp -------------------------------------*- C++-*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "SymbolCollector.h"
#include "AST.h"
#include "CodeComplete.h"
#include "CodeCompletionStrings.h"
#include "ExpectedTypes.h"
#include "SourceCode.h"
#include "URI.h"
#include "clang-include-cleaner/Analysis.h"
#include "clang-include-cleaner/IncludeSpeller.h"
#include "clang-include-cleaner/Record.h"
#include "clang-include-cleaner/Types.h"
#include "index/CanonicalIncludes.h"
#include "index/Relation.h"
#include "index/Symbol.h"
#include "index/SymbolID.h"
#include "index/SymbolLocation.h"
#include "clang/AST/Decl.h"
#include "clang/AST/DeclBase.h"
#include "clang/AST/DeclObjC.h"
#include "clang/AST/DeclTemplate.h"
#include "clang/AST/DeclarationName.h"
#include "clang/AST/Expr.h"
#include "clang/Basic/FileEntry.h"
#include "clang/Basic/LangOptions.h"
#include "clang/Basic/SourceLocation.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Index/IndexSymbol.h"
#include "clang/Lex/Preprocessor.h"
#include "clang/Lex/Token.h"
#include "clang/Tooling/Inclusions/HeaderAnalysis.h"
#include "clang/Tooling/Inclusions/StandardLibrary.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Path.h"
#include <cassert>
#include <memory>
#include <optional>
#include <string>
#include <utility>

namespace clang {
namespace clangd {
namespace {

/// If \p ND is a template specialization, returns the described template.
/// Otherwise, returns \p ND.
const NamedDecl &getTemplateOrThis(const NamedDecl &ND) {
  if (auto *T = ND.getDescribedTemplate())
    return *T;
  return ND;
}

// Checks whether the decl is a private symbol in a header generated by
// protobuf compiler.
// FIXME: make filtering extensible when there are more use cases for symbol
// filters.
bool isPrivateProtoDecl(const NamedDecl &ND) {
  const auto &SM = ND.getASTContext().getSourceManager();
  if (!isProtoFile(nameLocation(ND, SM), SM))
    return false;

  // ND without identifier can be operators.
  if (ND.getIdentifier() == nullptr)
    return false;
  auto Name = ND.getIdentifier()->getName();
  if (!Name.contains('_'))
    return false;
  // Nested proto entities (e.g. Message::Nested) have top-level decls
  // that shouldn't be used (Message_Nested). Ignore them completely.
  // The nested entities are dangling type aliases, we may want to reconsider
  // including them in the future.
  // For enum constants, SOME_ENUM_CONSTANT is not private and should be
  // indexed. Outer_INNER is private. This heuristic relies on naming style, it
  // will include OUTER_INNER and exclude some_enum_constant.
  // FIXME: the heuristic relies on naming style (i.e. no underscore in
  // user-defined names) and can be improved.
  return (ND.getKind() != Decl::EnumConstant) || llvm::any_of(Name, islower);
}

// We only collect #include paths for symbols that are suitable for global code
// completion, except for namespaces since #include path for a namespace is hard
// to define.
Symbol::IncludeDirective shouldCollectIncludePath(index::SymbolKind Kind) {
  using SK = index::SymbolKind;
  switch (Kind) {
  case SK::Macro:
  case SK::Enum:
  case SK::Struct:
  case SK::Class:
  case SK::Union:
  case SK::TypeAlias:
  case SK::Using:
  case SK::Function:
  case SK::Variable:
  case SK::EnumConstant:
  case SK::Concept:
    return Symbol::Include | Symbol::Import;
  case SK::Protocol:
    return Symbol::Import;
  default:
    return Symbol::Invalid;
  }
}

// Return the symbol range of the token at \p TokLoc.
std::pair<SymbolLocation::Position, SymbolLocation::Position>
getTokenRange(SourceLocation TokLoc, const SourceManager &SM,
              const LangOptions &LangOpts) {
  auto CreatePosition = [&SM](SourceLocation Loc) {
    auto LSPLoc = sourceLocToPosition(SM, Loc);
    SymbolLocation::Position Pos;
    Pos.setLine(LSPLoc.line);
    Pos.setColumn(LSPLoc.character);
    return Pos;
  };

  auto TokenLength = clang::Lexer::MeasureTokenLength(TokLoc, SM, LangOpts);
  return {CreatePosition(TokLoc),
          CreatePosition(TokLoc.getLocWithOffset(TokenLength))};
}

// Checks whether \p ND is a good candidate to be the *canonical* declaration of
// its symbol (e.g. a go-to-declaration target). This overrides the default of
// using Clang's canonical declaration, which is the first in the TU.
//
// Example: preferring a class declaration over its forward declaration.
bool isPreferredDeclaration(const NamedDecl &ND, index::SymbolRoleSet Roles) {
  const auto &SM = ND.getASTContext().getSourceManager();
  if (isa<TagDecl>(ND))
    return (Roles & static_cast<unsigned>(index::SymbolRole::Definition)) &&
           !isInsideMainFile(ND.getLocation(), SM);
  if (const auto *ID = dyn_cast<ObjCInterfaceDecl>(&ND))
    return ID->isThisDeclarationADefinition();
  if (const auto *PD = dyn_cast<ObjCProtocolDecl>(&ND))
    return PD->isThisDeclarationADefinition();
  return false;
}

RefKind toRefKind(index::SymbolRoleSet Roles, bool Spelled = false) {
  RefKind Result = RefKind::Unknown;
  if (Roles & static_cast<unsigned>(index::SymbolRole::Declaration))
    Result |= RefKind::Declaration;
  if (Roles & static_cast<unsigned>(index::SymbolRole::Definition))
    Result |= RefKind::Definition;
  if (Roles & static_cast<unsigned>(index::SymbolRole::Reference))
    Result |= RefKind::Reference;
  if (Spelled)
    Result |= RefKind::Spelled;
  return Result;
}

std::optional<RelationKind> indexableRelation(const index::SymbolRelation &R) {
  if (R.Roles & static_cast<unsigned>(index::SymbolRole::RelationBaseOf))
    return RelationKind::BaseOf;
  if (R.Roles & static_cast<unsigned>(index::SymbolRole::RelationOverrideOf))
    return RelationKind::OverriddenBy;
  return std::nullopt;
}

// Check if there is an exact spelling of \p ND at \p Loc.
bool isSpelled(SourceLocation Loc, const NamedDecl &ND) {
  auto Name = ND.getDeclName();
  const auto NameKind = Name.getNameKind();
  if (NameKind != DeclarationName::Identifier &&
      NameKind != DeclarationName::CXXConstructorName &&
      NameKind != DeclarationName::ObjCZeroArgSelector &&
      NameKind != DeclarationName::ObjCOneArgSelector &&
      NameKind != DeclarationName::ObjCMultiArgSelector)
    return false;
  const auto &AST = ND.getASTContext();
  const auto &SM = AST.getSourceManager();
  const auto &LO = AST.getLangOpts();
  clang::Token Tok;
  if (clang::Lexer::getRawToken(Loc, Tok, SM, LO))
    return false;
  auto TokSpelling = clang::Lexer::getSpelling(Tok, SM, LO);
  if (const auto *MD = dyn_cast<ObjCMethodDecl>(&ND))
    return TokSpelling == MD->getSelector().getNameForSlot(0);
  return TokSpelling == Name.getAsString();
}
} // namespace

// Encapsulates decisions about how to record header paths in the index,
// including filename normalization, URI conversion etc.
// Expensive checks are cached internally.
class SymbolCollector::HeaderFileURICache {
  struct FrameworkUmbrellaSpelling {
    // Spelling for the public umbrella header, e.g. <Foundation/Foundation.h>
    std::optional<std::string> PublicHeader;
    // Spelling for the private umbrella header, e.g.
    // <Foundation/Foundation_Private.h>
    std::optional<std::string> PrivateHeader;
  };
  // Weird double-indirect access to PP, which might not be ready yet when
  // HeaderFiles is created but will be by the time it's used.
  // (IndexDataConsumer::setPreprocessor can happen before or after initialize)
  Preprocessor *&PP;
  const SourceManager &SM;
  const include_cleaner::PragmaIncludes *PI;
  llvm::StringRef FallbackDir;
  llvm::DenseMap<const FileEntry *, const std::string *> CacheFEToURI;
  llvm::StringMap<std::string> CachePathToURI;
  llvm::DenseMap<FileID, llvm::StringRef> CacheFIDToInclude;
  llvm::StringMap<std::string> CachePathToFrameworkSpelling;
  llvm::StringMap<FrameworkUmbrellaSpelling>
      CacheFrameworkToUmbrellaHeaderSpelling;

public:
  HeaderFileURICache(Preprocessor *&PP, const SourceManager &SM,
                     const SymbolCollector::Options &Opts)
      : PP(PP), SM(SM), PI(Opts.PragmaIncludes), FallbackDir(Opts.FallbackDir) {
  }

  // Returns a canonical URI for the file \p FE.
  // We attempt to make the path absolute first.
  const std::string &toURI(const FileEntryRef FE) {
    auto R = CacheFEToURI.try_emplace(FE);
    if (R.second) {
      auto CanonPath = getCanonicalPath(FE, SM.getFileManager());
      R.first->second = &toURIInternal(CanonPath ? *CanonPath : FE.getName());
    }
    return *R.first->second;
  }

  // Returns a canonical URI for \p Path.
  // If the file is in the FileManager, use that to canonicalize the path.
  // We attempt to make the path absolute in any case.
  const std::string &toURI(llvm::StringRef Path) {
    if (auto File = SM.getFileManager().getFileRef(Path))
      return toURI(*File);
    return toURIInternal(Path);
  }

  // Gets a canonical include (URI of the header or <header> or "header") for
  // header of \p FID (which should usually be the *expansion* file).
  // This does not account for any per-symbol overrides!
  // Returns "" if includes should not be inserted for this file.
  llvm::StringRef getIncludeHeader(FileID FID) {
    auto R = CacheFIDToInclude.try_emplace(FID);
    if (R.second)
      R.first->second = getIncludeHeaderUncached(FID);
    return R.first->second;
  }

  // If a file is mapped by canonical headers, use that mapping, regardless
  // of whether it's an otherwise-good header (header guards etc).
  llvm::StringRef mapCanonical(llvm::StringRef HeaderPath) {
    if (!PP)
      return "";
    // Populate the system header mapping as late as possible to
    // ensure the preprocessor has been set already.
    CanonicalIncludes SysHeaderMapping;
    SysHeaderMapping.addSystemHeadersMapping(PP->getLangOpts());
    auto Canonical = SysHeaderMapping.mapHeader(HeaderPath);
    if (Canonical.empty())
      return "";
    // If we had a mapping, always use it.
    assert(Canonical.starts_with("<") || Canonical.starts_with("\""));
    return Canonical;
  }

private:
  // This takes care of making paths absolute and path->URI caching, but no
  // FileManager-based canonicalization.
  const std::string &toURIInternal(llvm::StringRef Path) {
    auto R = CachePathToURI.try_emplace(Path);
    if (R.second) {
      llvm::SmallString<256> AbsPath = Path;
      if (!llvm::sys::path::is_absolute(AbsPath) && !FallbackDir.empty())
        llvm::sys::fs::make_absolute(FallbackDir, AbsPath);
      assert(llvm::sys::path::is_absolute(AbsPath) &&
             "If the VFS can't make paths absolute, a FallbackDir must be "
             "provided");
      llvm::sys::path::remove_dots(AbsPath, /*remove_dot_dot=*/true);
      R.first->second = URI::create(AbsPath).toString();
    }
    return R.first->second;
  }

  struct FrameworkHeaderPath {
    // Path to the framework directory containing the Headers/PrivateHeaders
    // directories  e.g. /Frameworks/Foundation.framework/
    llvm::StringRef HeadersParentDir;
    // Subpath relative to the Headers or PrivateHeaders dir, e.g. NSObject.h
    // Note: This is NOT relative to the `HeadersParentDir`.
    llvm::StringRef HeaderSubpath;
    // Whether this header is under the PrivateHeaders dir
    bool IsPrivateHeader;
  };

  std::optional<FrameworkHeaderPath>
  splitFrameworkHeaderPath(llvm::StringRef Path) {
    using namespace llvm::sys;
    path::reverse_iterator I = path::rbegin(Path);
    path::reverse_iterator Prev = I;
    path::reverse_iterator E = path::rend(Path);
    while (I != E) {
      if (*I == "Headers") {
        FrameworkHeaderPath HeaderPath;
        HeaderPath.HeadersParentDir = Path.substr(0, I - E);
        HeaderPath.HeaderSubpath = Path.substr(Prev - E);
        HeaderPath.IsPrivateHeader = false;
        return HeaderPath;
      }
      if (*I == "PrivateHeaders") {
        FrameworkHeaderPath HeaderPath;
        HeaderPath.HeadersParentDir = Path.substr(0, I - E);
        HeaderPath.HeaderSubpath = Path.substr(Prev - E);
        HeaderPath.IsPrivateHeader = true;
        return HeaderPath;
      }
      Prev = I;
      ++I;
    }
    // Unexpected, must not be a framework header.
    return std::nullopt;
  }

  // Frameworks typically have an umbrella header of the same name, e.g.
  // <Foundation/Foundation.h> instead of <Foundation/NSObject.h> or
  // <Foundation/Foundation_Private.h> instead of
  // <Foundation/NSObject_Private.h> which should be used instead of directly
  // importing the header.
  std::optional<std::string>
  getFrameworkUmbrellaSpelling(llvm::StringRef Framework,
                               const HeaderSearch &HS,
                               FrameworkHeaderPath &HeaderPath) {
    auto Res = CacheFrameworkToUmbrellaHeaderSpelling.try_emplace(Framework);
    auto *CachedSpelling = &Res.first->second;
    if (!Res.second) {
      return HeaderPath.IsPrivateHeader ? CachedSpelling->PrivateHeader
                                        : CachedSpelling->PublicHeader;
    }
    SmallString<256> UmbrellaPath(HeaderPath.HeadersParentDir);
    llvm::sys::path::append(UmbrellaPath, "Headers", Framework + ".h");

    llvm::vfs::Status Status;
    auto StatErr = HS.getFileMgr().getNoncachedStatValue(UmbrellaPath, Status);
    if (!StatErr)
      CachedSpelling->PublicHeader = llvm::formatv("<{0}/{0}.h>", Framework);

    UmbrellaPath = HeaderPath.HeadersParentDir;
    llvm::sys::path::append(UmbrellaPath, "PrivateHeaders",
                            Framework + "_Private.h");

    StatErr = HS.getFileMgr().getNoncachedStatValue(UmbrellaPath, Status);
    if (!StatErr)
      CachedSpelling->PrivateHeader =
          llvm::formatv("<{0}/{0}_Private.h>", Framework);

    return HeaderPath.IsPrivateHeader ? CachedSpelling->PrivateHeader
                                      : CachedSpelling->PublicHeader;
  }

  // Compute the framework include spelling for `FE` which is in a framework
  // named `Framework`, e.g. `NSObject.h` in framework `Foundation` would
  // give <Foundation/Foundation.h> if the umbrella header exists, otherwise
  // <Foundation/NSObject.h>.
  std::optional<llvm::StringRef>
  getFrameworkHeaderIncludeSpelling(FileEntryRef FE, llvm::StringRef Framework,
                                    HeaderSearch &HS) {
    auto Res = CachePathToFrameworkSpelling.try_emplace(FE.getName());
    auto *CachedHeaderSpelling = &Res.first->second;
    if (!Res.second)
      return llvm::StringRef(*CachedHeaderSpelling);

    auto HeaderPath = splitFrameworkHeaderPath(FE.getName());
    if (!HeaderPath) {
      // Unexpected: must not be a proper framework header, don't cache the
      // failure.
      CachePathToFrameworkSpelling.erase(Res.first);
      return std::nullopt;
    }
    if (auto UmbrellaSpelling =
            getFrameworkUmbrellaSpelling(Framework, HS, *HeaderPath)) {
      *CachedHeaderSpelling = *UmbrellaSpelling;
      return llvm::StringRef(*CachedHeaderSpelling);
    }

    *CachedHeaderSpelling =
        llvm::formatv("<{0}/{1}>", Framework, HeaderPath->HeaderSubpath).str();
    return llvm::StringRef(*CachedHeaderSpelling);
  }

  llvm::StringRef getIncludeHeaderUncached(FileID FID) {
    const auto FE = SM.getFileEntryRefForID(FID);
    if (!FE || FE->getName().empty())
      return "";

    if (auto Verbatim = PI->getPublic(*FE); !Verbatim.empty())
      return Verbatim;

    llvm::StringRef Filename = FE->getName();
    if (auto Canonical = mapCanonical(Filename); !Canonical.empty())
      return Canonical;

    // Framework headers are spelled as <FrameworkName/Foo.h>, not
    // "path/FrameworkName.framework/Headers/Foo.h".
    auto &HS = PP->getHeaderSearchInfo();
    if (const auto *HFI = HS.getExistingFileInfo(*FE))
      if (!HFI->Framework.empty())
        if (auto Spelling =
                getFrameworkHeaderIncludeSpelling(*FE, HFI->Framework, HS))
          return *Spelling;

    if (!tooling::isSelfContainedHeader(*FE, PP->getSourceManager(),
                                        PP->getHeaderSearchInfo())) {
      // A .inc or .def file is often included into a real header to define
      // symbols (e.g. LLVM tablegen files).
      if (Filename.ends_with(".inc") || Filename.ends_with(".def"))
        // Don't use cache reentrantly due to iterator invalidation.
        return getIncludeHeaderUncached(SM.getFileID(SM.getIncludeLoc(FID)));
      // Conservatively refuse to insert #includes to files without guards.
      return "";
    }
    // Standard case: just insert the file itself.
    return toURI(*FE);
  }
};

// Return the symbol location of the token at \p TokLoc.
std::optional<SymbolLocation>
SymbolCollector::getTokenLocation(SourceLocation TokLoc) {
  const auto &SM = ASTCtx->getSourceManager();
  const auto FE = SM.getFileEntryRefForID(SM.getFileID(TokLoc));
  if (!FE)
    return std::nullopt;

  SymbolLocation Result;
  Result.FileURI = HeaderFileURIs->toURI(*FE).c_str();
  auto Range = getTokenRange(TokLoc, SM, ASTCtx->getLangOpts());
  Result.Start = Range.first;
  Result.End = Range.second;

  return Result;
}

SymbolCollector::SymbolCollector(Options Opts) : Opts(std::move(Opts)) {}
SymbolCollector::~SymbolCollector() = default;

void SymbolCollector::initialize(ASTContext &Ctx) {
  ASTCtx = &Ctx;
  HeaderFileURIs = std::make_unique<HeaderFileURICache>(
      this->PP, ASTCtx->getSourceManager(), Opts);
  CompletionAllocator = std::make_shared<GlobalCodeCompletionAllocator>();
  CompletionTUInfo =
      std::make_unique<CodeCompletionTUInfo>(CompletionAllocator);
}

bool SymbolCollector::shouldCollectSymbol(const NamedDecl &ND,
                                          const ASTContext &ASTCtx,
                                          const Options &Opts,
                                          bool IsMainFileOnly) {
  // Skip anonymous declarations, e.g (anonymous enum/class/struct).
  if (ND.getDeclName().isEmpty())
    return false;

  // Skip main-file symbols if we are not collecting them.
  if (IsMainFileOnly && !Opts.CollectMainFileSymbols)
    return false;

  // Skip symbols in anonymous namespaces in header files.
  if (!IsMainFileOnly && ND.isInAnonymousNamespace())
    return false;

  // For function local symbols, index only classes and its member functions.
  if (index::isFunctionLocalSymbol(&ND))
    return isa<RecordDecl>(ND) ||
           (ND.isCXXInstanceMember() && ND.isFunctionOrFunctionTemplate());

  // We want most things but not "local" symbols such as symbols inside
  // FunctionDecl, BlockDecl, ObjCMethodDecl and OMPDeclareReductionDecl.
  // FIXME: Need a matcher for ExportDecl in order to include symbols declared
  // within an export.
  const auto *DeclCtx = ND.getDeclContext();
  switch (DeclCtx->getDeclKind()) {
  case Decl::TranslationUnit:
  case Decl::Namespace:
  case Decl::LinkageSpec:
  case Decl::Enum:
  case Decl::ObjCProtocol:
  case Decl::ObjCInterface:
  case Decl::ObjCCategory:
  case Decl::ObjCCategoryImpl:
  case Decl::ObjCImplementation:
    break;
  default:
    // Record has a few derivations (e.g. CXXRecord, Class specialization), it's
    // easier to cast.
    if (!isa<RecordDecl>(DeclCtx))
      return false;
  }

  // Avoid indexing internal symbols in protobuf generated headers.
  if (isPrivateProtoDecl(ND))
    return false;
  if (!Opts.CollectReserved &&
      (hasReservedName(ND) || hasReservedScope(*ND.getDeclContext())) &&
      ASTCtx.getSourceManager().isInSystemHeader(ND.getLocation()))
    return false;

  return true;
}

const Decl *
SymbolCollector::getRefContainer(const Decl *Enclosing,
                                 const SymbolCollector::Options &Opts) {
  while (Enclosing) {
    const auto *ND = dyn_cast<NamedDecl>(Enclosing);
    if (ND && shouldCollectSymbol(*ND, ND->getASTContext(), Opts, true)) {
      break;
    }
    Enclosing = dyn_cast_or_null<Decl>(Enclosing->getDeclContext());
  }
  return Enclosing;
}

// Always return true to continue indexing.
bool SymbolCollector::handleDeclOccurrence(
    const Decl *D, index::SymbolRoleSet Roles,
    llvm::ArrayRef<index::SymbolRelation> Relations, SourceLocation Loc,
    index::IndexDataConsumer::ASTNodeInfo ASTNode) {
  assert(ASTCtx && PP && HeaderFileURIs);
  assert(CompletionAllocator && CompletionTUInfo);
  assert(ASTNode.OrigD);
  // Indexing API puts canonical decl into D, which might not have a valid
  // source location for implicit/built-in decls. Fallback to original decl in
  // such cases.
  if (D->getLocation().isInvalid())
    D = ASTNode.OrigD;
  // If OrigD is an declaration associated with a friend declaration and it's
  // not a definition, skip it. Note that OrigD is the occurrence that the
  // collector is currently visiting.
  if ((ASTNode.OrigD->getFriendObjectKind() !=
       Decl::FriendObjectKind::FOK_None) &&
      !(Roles & static_cast<unsigned>(index::SymbolRole::Definition)))
    return true;
  // A declaration created for a friend declaration should not be used as the
  // canonical declaration in the index. Use OrigD instead, unless we've already
  // picked a replacement for D
  if (D->getFriendObjectKind() != Decl::FriendObjectKind::FOK_None)
    D = CanonicalDecls.try_emplace(D, ASTNode.OrigD).first->second;
  // Flag to mark that D should be considered canonical meaning its declaration
  // will override any previous declaration for the Symbol.
  bool DeclIsCanonical = false;
  // Avoid treating ObjCImplementationDecl as a canonical declaration if it has
  // a corresponding non-implicit and non-forward declared ObjcInterfaceDecl.
  if (const auto *IID = dyn_cast<ObjCImplementationDecl>(D)) {
    DeclIsCanonical = true;
    if (const auto *CID = IID->getClassInterface())
      if (const auto *DD = CID->getDefinition())
        if (!DD->isImplicitInterfaceDecl())
          D = DD;
  }
  // Avoid treating ObjCCategoryImplDecl as a canonical declaration in favor of
  // its ObjCCategoryDecl if it has one.
  if (const auto *CID = dyn_cast<ObjCCategoryImplDecl>(D)) {
    DeclIsCanonical = true;
    if (const auto *CD = CID->getCategoryDecl())
      D = CD;
  }
  const NamedDecl *ND = dyn_cast<NamedDecl>(D);
  if (!ND)
    return true;

  auto ID = getSymbolIDCached(ND);
  if (!ID)
    return true;

  // Mark D as referenced if this is a reference coming from the main file.
  // D may not be an interesting symbol, but it's cheaper to check at the end.
  auto &SM = ASTCtx->getSourceManager();
  if (Opts.CountReferences &&
      (Roles & static_cast<unsigned>(index::SymbolRole::Reference)) &&
      SM.getFileID(SM.getSpellingLoc(Loc)) == SM.getMainFileID())
    ReferencedSymbols.insert(ID);

  // ND is the canonical (i.e. first) declaration. If it's in the main file
  // (which is not a header), then no public declaration was visible, so assume
  // it's main-file only.
  bool IsMainFileOnly =
      SM.isWrittenInMainFile(SM.getExpansionLoc(ND->getBeginLoc())) &&
      !isHeaderFile(SM.getFileEntryRefForID(SM.getMainFileID())->getName(),
                    ASTCtx->getLangOpts());
  // In C, printf is a redecl of an implicit builtin! So check OrigD instead.
  if (ASTNode.OrigD->isImplicit() ||
      !shouldCollectSymbol(*ND, *ASTCtx, Opts, IsMainFileOnly))
    return true;

  // Note: we need to process relations for all decl occurrences, including
  // refs, because the indexing code only populates relations for specific
  // occurrences. For example, RelationBaseOf is only populated for the
  // occurrence inside the base-specifier.
  processRelations(*ND, ID, Relations);

  bool CollectRef = static_cast<bool>(Opts.RefFilter & toRefKind(Roles));
  // Unlike other fields, e.g. Symbols (which use spelling locations), we use
  // file locations for references (as it aligns the behavior of clangd's
  // AST-based xref).
  // FIXME: we should try to use the file locations for other fields.
  if (CollectRef &&
      (!IsMainFileOnly || Opts.CollectMainFileRefs ||
       ND->isExternallyVisible()) &&
      !isa<NamespaceDecl>(ND)) {
    auto FileLoc = SM.getFileLoc(Loc);
    auto FID = SM.getFileID(FileLoc);
    if (Opts.RefsInHeaders || FID == SM.getMainFileID()) {
      addRef(ID, SymbolRef{FileLoc, FID, Roles,
                           getRefContainer(ASTNode.Parent, Opts),
                           isSpelled(FileLoc, *ND)});
    }
  }
  // Don't continue indexing if this is a mere reference.
  if (!(Roles & (static_cast<unsigned>(index::SymbolRole::Declaration) |
                 static_cast<unsigned>(index::SymbolRole::Definition))))
    return true;

  // FIXME: ObjCPropertyDecl are not properly indexed here:
  // - ObjCPropertyDecl may have an OrigD of ObjCPropertyImplDecl, which is
  // not a NamedDecl.
  auto *OriginalDecl = dyn_cast<NamedDecl>(ASTNode.OrigD);
  if (!OriginalDecl)
    return true;

  const Symbol *BasicSymbol = Symbols.find(ID);
  if (isPreferredDeclaration(*OriginalDecl, Roles))
    // If OriginalDecl is preferred, replace/create the existing canonical
    // declaration (e.g. a class forward declaration). There should be at most
    // one duplicate as we expect to see only one preferred declaration per
    // TU, because in practice they are definitions.
    BasicSymbol = addDeclaration(*OriginalDecl, std::move(ID), IsMainFileOnly);
  else if (!BasicSymbol || DeclIsCanonical)
    BasicSymbol = addDeclaration(*ND, std::move(ID), IsMainFileOnly);

  if (Roles & static_cast<unsigned>(index::SymbolRole::Definition))
    addDefinition(*OriginalDecl, *BasicSymbol);

  return true;
}

void SymbolCollector::handleMacros(const MainFileMacros &MacroRefsToIndex) {
  assert(HeaderFileURIs && PP);
  const auto &SM = PP->getSourceManager();
  const auto MainFileEntryRef = SM.getFileEntryRefForID(SM.getMainFileID());
  assert(MainFileEntryRef);

  const std::string &MainFileURI = HeaderFileURIs->toURI(*MainFileEntryRef);
  // Add macro references.
  for (const auto &IDToRefs : MacroRefsToIndex.MacroRefs) {
    for (const auto &MacroRef : IDToRefs.second) {
      const auto &Range = MacroRef.toRange(SM);
      bool IsDefinition = MacroRef.IsDefinition;
      Ref R;
      R.Location.Start.setLine(Range.start.line);
      R.Location.Start.setColumn(Range.start.character);
      R.Location.End.setLine(Range.end.line);
      R.Location.End.setColumn(Range.end.character);
      R.Location.FileURI = MainFileURI.c_str();
      R.Kind = IsDefinition ? RefKind::Definition : RefKind::Reference;
      Refs.insert(IDToRefs.first, R);
      if (IsDefinition) {
        Symbol S;
        S.ID = IDToRefs.first;
        auto StartLoc = cantFail(sourceLocationInMainFile(SM, Range.start));
        auto EndLoc = cantFail(sourceLocationInMainFile(SM, Range.end));
        S.Name = toSourceCode(SM, SourceRange(StartLoc, EndLoc));
        S.SymInfo.Kind = index::SymbolKind::Macro;
        S.SymInfo.SubKind = index::SymbolSubKind::None;
        S.SymInfo.Properties = index::SymbolPropertySet();
        S.SymInfo.Lang = index::SymbolLanguage::C;
        S.Origin = Opts.Origin;
        S.CanonicalDeclaration = R.Location;
        // Make the macro visible for code completion if main file is an
        // include-able header.
        if (!HeaderFileURIs->getIncludeHeader(SM.getMainFileID()).empty()) {
          S.Flags |= Symbol::IndexedForCodeCompletion;
          S.Flags |= Symbol::VisibleOutsideFile;
        }
        Symbols.insert(S);
      }
    }
  }
}

bool SymbolCollector::handleMacroOccurrence(const IdentifierInfo *Name,
                                            const MacroInfo *MI,
                                            index::SymbolRoleSet Roles,
                                            SourceLocation Loc) {
  assert(PP);
  // Builtin macros don't have useful locations and aren't needed in completion.
  if (MI->isBuiltinMacro())
    return true;

  const auto &SM = PP->getSourceManager();
  auto DefLoc = MI->getDefinitionLoc();
  // Also avoid storing macros that aren't defined in any file, i.e. predefined
  // macros like __DBL_MIN__ and those defined on the command line.
  if (SM.isWrittenInBuiltinFile(DefLoc) ||
      SM.isWrittenInCommandLineFile(DefLoc) ||
      Name->getName() == "__GCC_HAVE_DWARF2_CFI_ASM")
    return true;

  auto ID = getSymbolIDCached(Name->getName(), MI, SM);
  if (!ID)
    return true;

  auto SpellingLoc = SM.getSpellingLoc(Loc);
  bool IsMainFileOnly =
      SM.isInMainFile(SM.getExpansionLoc(DefLoc)) &&
      !isHeaderFile(SM.getFileEntryRefForID(SM.getMainFileID())->getName(),
                    ASTCtx->getLangOpts());
  // Do not store references to main-file macros.
  if ((static_cast<unsigned>(Opts.RefFilter) & Roles) && !IsMainFileOnly &&
      (Opts.RefsInHeaders || SM.getFileID(SpellingLoc) == SM.getMainFileID())) {
    // FIXME: Populate container information for macro references.
    // FIXME: All MacroRefs are marked as Spelled now, but this should be
    // checked.
    addRef(ID, SymbolRef{Loc, SM.getFileID(Loc), Roles, /*Container=*/nullptr,
                         /*Spelled=*/true});
  }

  // Collect symbols.
  if (!Opts.CollectMacro)
    return true;

  // Skip main-file macros if we are not collecting them.
  if (IsMainFileOnly && !Opts.CollectMainFileSymbols)
    return false;

  // Mark the macro as referenced if this is a reference coming from the main
  // file. The macro may not be an interesting symbol, but it's cheaper to check
  // at the end.
  if (Opts.CountReferences &&
      (Roles & static_cast<unsigned>(index::SymbolRole::Reference)) &&
      SM.getFileID(SpellingLoc) == SM.getMainFileID())
    ReferencedSymbols.insert(ID);

  // Don't continue indexing if this is a mere reference.
  // FIXME: remove macro with ID if it is undefined.
  if (!(Roles & static_cast<unsigned>(index::SymbolRole::Declaration) ||
        Roles & static_cast<unsigned>(index::SymbolRole::Definition)))
    return true;

  // Only collect one instance in case there are multiple.
  if (Symbols.find(ID) != nullptr)
    return true;

  Symbol S;
  S.ID = std::move(ID);
  S.Name = Name->getName();
  if (!IsMainFileOnly) {
    S.Flags |= Symbol::IndexedForCodeCompletion;
    S.Flags |= Symbol::VisibleOutsideFile;
  }
  S.SymInfo = index::getSymbolInfoForMacro(*MI);
  S.Origin = Opts.Origin;
  // FIXME: use the result to filter out symbols.
  shouldIndexFile(SM.getFileID(Loc));
  if (auto DeclLoc = getTokenLocation(DefLoc))
    S.CanonicalDeclaration = *DeclLoc;

  CodeCompletionResult SymbolCompletion(Name);
  const auto *CCS = SymbolCompletion.CreateCodeCompletionStringForMacro(
      *PP, *CompletionAllocator, *CompletionTUInfo);
  std::string Signature;
  std::string SnippetSuffix;
  getSignature(*CCS, &Signature, &SnippetSuffix, SymbolCompletion.Kind,
               SymbolCompletion.CursorKind);
  S.Signature = Signature;
  S.CompletionSnippetSuffix = SnippetSuffix;

  IndexedMacros.insert(Name);

  setIncludeLocation(S, DefLoc, include_cleaner::Macro{Name, DefLoc});
  Symbols.insert(S);
  return true;
}

void SymbolCollector::processRelations(
    const NamedDecl &ND, const SymbolID &ID,
    ArrayRef<index::SymbolRelation> Relations) {
  for (const auto &R : Relations) {
    auto RKind = indexableRelation(R);
    if (!RKind)
      continue;
    const Decl *Object = R.RelatedSymbol;

    auto ObjectID = getSymbolIDCached(Object);
    if (!ObjectID)
      continue;

    // Record the relation.
    // TODO: There may be cases where the object decl is not indexed for some
    // reason. Those cases should probably be removed in due course, but for
    // now there are two possible ways to handle it:
    //   (A) Avoid storing the relation in such cases.
    //   (B) Store it anyways. Clients will likely lookup() the SymbolID
    //       in the index and find nothing, but that's a situation they
    //       probably need to handle for other reasons anyways.
    // We currently do (B) because it's simpler.
    if (*RKind == RelationKind::BaseOf)
      this->Relations.insert({ID, *RKind, ObjectID});
    else if (*RKind == RelationKind::OverriddenBy)
      this->Relations.insert({ObjectID, *RKind, ID});
  }
}

void SymbolCollector::setIncludeLocation(const Symbol &S, SourceLocation DefLoc,
                                         const include_cleaner::Symbol &Sym) {
  const auto &SM = PP->getSourceManager();
  if (!Opts.CollectIncludePath ||
      shouldCollectIncludePath(S.SymInfo.Kind) == Symbol::Invalid)
    return;

  // Use the expansion location to get the #include header since this is
  // where the symbol is exposed.
  if (FileID FID = SM.getDecomposedExpansionLoc(DefLoc).first; FID.isValid())
    IncludeFiles[S.ID] = FID;

  // We update providers for a symbol with each occurence, as SymbolCollector
  // might run while parsing, rather than at the end of a translation unit.
  // Hence we see more and more redecls over time.
  SymbolProviders[S.ID] =
      include_cleaner::headersForSymbol(Sym, SM, Opts.PragmaIncludes);
}

llvm::StringRef getStdHeader(const Symbol *S, const LangOptions &LangOpts) {
  tooling::stdlib::Lang Lang = tooling::stdlib::Lang::CXX;
  if (LangOpts.C11)
    Lang = tooling::stdlib::Lang::C;
  else if(!LangOpts.CPlusPlus)
    return "";

  if (S->Scope == "std::" && S->Name == "move") {
    if (!S->Signature.contains(','))
      return "<utility>";
    return "<algorithm>";
  }

  if (auto StdSym = tooling::stdlib::Symbol::named(S->Scope, S->Name, Lang))
    if (auto Header = StdSym->header())
      return Header->name();
  return "";
}

void SymbolCollector::finish() {
  // At the end of the TU, add 1 to the refcount of all referenced symbols.
  for (const auto &ID : ReferencedSymbols) {
    if (const auto *S = Symbols.find(ID)) {
      // SymbolSlab::Builder returns const symbols because strings are interned
      // and modifying returned symbols without inserting again wouldn't go
      // well. const_cast is safe here as we're modifying a data owned by the
      // Symbol. This reduces time spent in SymbolCollector by ~1%.
      ++const_cast<Symbol *>(S)->References;
    }
  }
  if (Opts.CollectMacro) {
    assert(PP);
    // First, drop header guards. We can't identify these until EOF.
    for (const IdentifierInfo *II : IndexedMacros) {
      if (const auto *MI = PP->getMacroDefinition(II).getMacroInfo())
        if (auto ID =
                getSymbolIDCached(II->getName(), MI, PP->getSourceManager()))
          if (MI->isUsedForHeaderGuard())
            Symbols.erase(ID);
    }
  }
  llvm::DenseMap<FileID, bool> FileToContainsImportsOrObjC;
  llvm::DenseMap<include_cleaner::Header, std::string> HeaderSpelling;
  // Fill in IncludeHeaders.
  // We delay this until end of TU so header guards are all resolved.
  for (const auto &[SID, Providers] : SymbolProviders) {
    const Symbol *S = Symbols.find(SID);
    if (!S)
      continue;

    FileID FID = IncludeFiles.lookup(SID);
    // Determine if the FID is #include'd or #import'ed.
    Symbol::IncludeDirective Directives = Symbol::Invalid;
    auto CollectDirectives = shouldCollectIncludePath(S->SymInfo.Kind);
    if ((CollectDirectives & Symbol::Include) != 0)
      Directives |= Symbol::Include;
    // Only allow #import for symbols from ObjC-like files.
    if ((CollectDirectives & Symbol::Import) != 0 && FID.isValid()) {
      auto [It, Inserted] = FileToContainsImportsOrObjC.try_emplace(FID);
      if (Inserted)
        It->second = FilesWithObjCConstructs.contains(FID) ||
                     tooling::codeContainsImports(
                         ASTCtx->getSourceManager().getBufferData(FID));
      if (It->second)
        Directives |= Symbol::Import;
    }

    if (Directives == Symbol::Invalid)
      continue;

    // Use the include location-based logic for Objective-C symbols.
    if (Directives & Symbol::Import) {
      llvm::StringRef IncludeHeader = getStdHeader(S, ASTCtx->getLangOpts());
      if (IncludeHeader.empty())
        IncludeHeader = HeaderFileURIs->getIncludeHeader(FID);

      if (!IncludeHeader.empty()) {
        auto NewSym = *S;
        NewSym.IncludeHeaders.push_back({IncludeHeader, 1, Directives});
        Symbols.insert(NewSym);
      }
      // FIXME: use providers from include-cleaner library once it's polished
      // for Objective-C.
      continue;
    }

    // For #include's, use the providers computed by the include-cleaner
    // library.
    assert(Directives == Symbol::Include);
    // Ignore providers that are not self-contained, this is especially
    // important for symbols defined in the main-file. We want to prefer the
    // header, if possible.
    // TODO: Limit this to specifically ignore main file, when we're indexing a
    // non-header file?
    auto SelfContainedProvider =
        [this](llvm::ArrayRef<include_cleaner::Header> Providers)
        -> std::optional<include_cleaner::Header> {
      for (const auto &H : Providers) {
        if (H.kind() != include_cleaner::Header::Physical)
          return H;
        if (tooling::isSelfContainedHeader(H.physical(), PP->getSourceManager(),
                                           PP->getHeaderSearchInfo()))
          return H;
      }
      return std::nullopt;
    };
    const auto OptionalProvider = SelfContainedProvider(Providers);
    if (!OptionalProvider)
      continue;
    const auto &H = *OptionalProvider;
    const auto [SpellingIt, Inserted] = HeaderSpelling.try_emplace(H);
    if (Inserted) {
      auto &SM = ASTCtx->getSourceManager();
      if (H.kind() == include_cleaner::Header::Kind::Physical) {
        // FIXME: Get rid of this once include-cleaner has support for system
        // headers.
        if (auto Canonical =
                HeaderFileURIs->mapCanonical(H.physical().getName());
            !Canonical.empty())
          SpellingIt->second = Canonical;
        // For physical files, prefer URIs as spellings might change
        // depending on the translation unit.
        else if (tooling::isSelfContainedHeader(H.physical(), SM,
                                                PP->getHeaderSearchInfo()))
          SpellingIt->second =
              HeaderFileURIs->toURI(H.physical());
      } else {
        SpellingIt->second = include_cleaner::spellHeader(
            {H, PP->getHeaderSearchInfo(),
             SM.getFileEntryForID(SM.getMainFileID())});
      }
    }

    if (!SpellingIt->second.empty()) {
      auto NewSym = *S;
      NewSym.IncludeHeaders.push_back({SpellingIt->second, 1, Directives});
      Symbols.insert(NewSym);
    }
  }

  ReferencedSymbols.clear();
  IncludeFiles.clear();
  SymbolProviders.clear();
  FilesWithObjCConstructs.clear();
}

const Symbol *SymbolCollector::addDeclaration(const NamedDecl &ND, SymbolID ID,
                                              bool IsMainFileOnly) {
  auto &Ctx = ND.getASTContext();
  auto &SM = Ctx.getSourceManager();

  Symbol S;
  S.ID = std::move(ID);
  std::string QName = printQualifiedName(ND);
  // FIXME: this returns foo:bar: for objective-C methods, we prefer only foo:
  // for consistency with CodeCompletionString and a clean name/signature split.
  std::tie(S.Scope, S.Name) = splitQualifiedName(QName);
  std::string TemplateSpecializationArgs = printTemplateSpecializationArgs(ND);
  S.TemplateSpecializationArgs = TemplateSpecializationArgs;

  // We collect main-file symbols, but do not use them for code completion.
  if (!IsMainFileOnly && isIndexedForCodeCompletion(ND, Ctx))
    S.Flags |= Symbol::IndexedForCodeCompletion;
  if (isImplementationDetail(&ND))
    S.Flags |= Symbol::ImplementationDetail;
  if (!IsMainFileOnly)
    S.Flags |= Symbol::VisibleOutsideFile;
  S.SymInfo = index::getSymbolInfo(&ND);
  auto Loc = nameLocation(ND, SM);
  assert(Loc.isValid() && "Invalid source location for NamedDecl");
  // FIXME: use the result to filter out symbols.
  auto FID = SM.getFileID(Loc);
  shouldIndexFile(FID);
  if (auto DeclLoc = getTokenLocation(Loc))
    S.CanonicalDeclaration = *DeclLoc;

  S.Origin = Opts.Origin;
  if (ND.getAvailability() == AR_Deprecated)
    S.Flags |= Symbol::Deprecated;

  // Add completion info.
  // FIXME: we may want to choose a different redecl, or combine from several.
  assert(ASTCtx && PP && "ASTContext and Preprocessor must be set.");
  // We use the primary template, as clang does during code completion.
  CodeCompletionResult SymbolCompletion(&getTemplateOrThis(ND), 0);
  const auto *CCS = SymbolCompletion.CreateCodeCompletionString(
      *ASTCtx, *PP, CodeCompletionContext::CCC_Symbol, *CompletionAllocator,
      *CompletionTUInfo,
      /*IncludeBriefComments*/ false);
  std::string Documentation =
      formatDocumentation(*CCS, getDocComment(Ctx, SymbolCompletion,
                                              /*CommentsFromHeaders=*/true));
  if (!(S.Flags & Symbol::IndexedForCodeCompletion)) {
    if (Opts.StoreAllDocumentation)
      S.Documentation = Documentation;
    Symbols.insert(S);
    return Symbols.find(S.ID);
  }
  S.Documentation = Documentation;
  std::string Signature;
  std::string SnippetSuffix;
  getSignature(*CCS, &Signature, &SnippetSuffix, SymbolCompletion.Kind,
               SymbolCompletion.CursorKind);
  S.Signature = Signature;
  S.CompletionSnippetSuffix = SnippetSuffix;
  std::string ReturnType = getReturnType(*CCS);
  S.ReturnType = ReturnType;

  std::optional<OpaqueType> TypeStorage;
  if (S.Flags & Symbol::IndexedForCodeCompletion) {
    TypeStorage = OpaqueType::fromCompletionResult(*ASTCtx, SymbolCompletion);
    if (TypeStorage)
      S.Type = TypeStorage->raw();
  }

  Symbols.insert(S);
  setIncludeLocation(S, ND.getLocation(), include_cleaner::Symbol{ND});
  if (S.SymInfo.Lang == index::SymbolLanguage::ObjC)
    FilesWithObjCConstructs.insert(FID);
  return Symbols.find(S.ID);
}

void SymbolCollector::addDefinition(const NamedDecl &ND,
                                    const Symbol &DeclSym) {
  if (DeclSym.Definition)
    return;
  const auto &SM = ND.getASTContext().getSourceManager();
  auto Loc = nameLocation(ND, SM);
  shouldIndexFile(SM.getFileID(Loc));
  auto DefLoc = getTokenLocation(Loc);
  // If we saw some forward declaration, we end up copying the symbol.
  // This is not ideal, but avoids duplicating the "is this a definition" check
  // in clang::index. We should only see one definition.
  if (!DefLoc)
    return;
  Symbol S = DeclSym;
  // FIXME: use the result to filter out symbols.
  S.Definition = *DefLoc;
  Symbols.insert(S);
}

bool SymbolCollector::shouldIndexFile(FileID FID) {
  if (!Opts.FileFilter)
    return true;
  auto I = FilesToIndexCache.try_emplace(FID);
  if (I.second)
    I.first->second = Opts.FileFilter(ASTCtx->getSourceManager(), FID);
  return I.first->second;
}

void SymbolCollector::addRef(SymbolID ID, const SymbolRef &SR) {
  const auto &SM = ASTCtx->getSourceManager();
  // FIXME: use the result to filter out references.
  shouldIndexFile(SR.FID);
  if (const auto FE = SM.getFileEntryRefForID(SR.FID)) {
    auto Range = getTokenRange(SR.Loc, SM, ASTCtx->getLangOpts());
    Ref R;
    R.Location.Start = Range.first;
    R.Location.End = Range.second;
    R.Location.FileURI = HeaderFileURIs->toURI(*FE).c_str();
    R.Kind = toRefKind(SR.Roles, SR.Spelled);
    R.Container = getSymbolIDCached(SR.Container);
    Refs.insert(ID, R);
  }
}

SymbolID SymbolCollector::getSymbolIDCached(const Decl *D) {
  auto It = DeclToIDCache.try_emplace(D, SymbolID{});
  if (It.second)
    It.first->second = getSymbolID(D);
  return It.first->second;
}

SymbolID SymbolCollector::getSymbolIDCached(const llvm::StringRef MacroName,
                                            const MacroInfo *MI,
                                            const SourceManager &SM) {
  auto It = MacroToIDCache.try_emplace(MI, SymbolID{});
  if (It.second)
    It.first->second = getSymbolID(MacroName, MI, SM);
  return It.first->second;
}
} // namespace clangd
} // namespace clang