#include "bolt/Passes/IdenticalCodeFolding.h"
#include "bolt/Core/HashUtilities.h"
#include "bolt/Core/ParallelUtilities.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ThreadPool.h"
#include "llvm/Support/Timer.h"
#include <atomic>
#include <iterator>
#include <map>
#include <set>
#include <unordered_map>
#define DEBUG_TYPE "bolt-icf"
using namespace llvm;
using namespace bolt;
namespace opts {
extern cl::OptionCategory BoltOptCategory;
static cl::opt<bool>
ICFUseDFS("icf-dfs", cl::desc("use DFS ordering when using -icf option"),
cl::ReallyHidden, cl::cat(BoltOptCategory));
static cl::opt<bool>
TimeICF("time-icf",
cl::desc("time icf steps"),
cl::ReallyHidden,
cl::ZeroOrMore,
cl::cat(BoltOptCategory));
}
static bool equalJumpTables(const JumpTable &JumpTableA,
const JumpTable &JumpTableB,
const BinaryFunction &FunctionA,
const BinaryFunction &FunctionB) {
if (JumpTableA.EntrySize != JumpTableB.EntrySize)
return false;
if (JumpTableA.Type != JumpTableB.Type)
return false;
if (JumpTableA.getSize() != JumpTableB.getSize())
return false;
for (uint64_t Index = 0; Index < JumpTableA.Entries.size(); ++Index) {
const MCSymbol *LabelA = JumpTableA.Entries[Index];
const MCSymbol *LabelB = JumpTableB.Entries[Index];
const BinaryBasicBlock *TargetA = FunctionA.getBasicBlockForLabel(LabelA);
const BinaryBasicBlock *TargetB = FunctionB.getBasicBlockForLabel(LabelB);
if (!TargetA || !TargetB) {
assert((TargetA || LabelA == FunctionA.getFunctionEndLabel()) &&
"no target basic block found");
assert((TargetB || LabelB == FunctionB.getFunctionEndLabel()) &&
"no target basic block found");
if (TargetA != TargetB)
return false;
continue;
}
assert(TargetA && TargetB && "cannot locate target block(s)");
if (TargetA->getLayoutIndex() != TargetB->getLayoutIndex())
return false;
}
return true;
}
template <class Compare>
static bool isInstrEquivalentWith(const MCInst &InstA,
const BinaryBasicBlock &BBA,
const MCInst &InstB,
const BinaryBasicBlock &BBB, Compare Comp) {
if (InstA.getOpcode() != InstB.getOpcode())
return false;
const BinaryContext &BC = BBA.getFunction()->getBinaryContext();
const std::optional<MCPlus::MCLandingPad> EHInfoA = BC.MIB->getEHInfo(InstA);
const std::optional<MCPlus::MCLandingPad> EHInfoB = BC.MIB->getEHInfo(InstB);
if (EHInfoA || EHInfoB) {
if (!EHInfoA && (EHInfoB->first || EHInfoB->second))
return false;
if (!EHInfoB && (EHInfoA->first || EHInfoA->second))
return false;
if (EHInfoA && EHInfoB) {
if (EHInfoA->second != EHInfoB->second)
return false;
if (!EHInfoA->first != !EHInfoB->first)
return false;
if (EHInfoA->first && EHInfoB->first) {
const BinaryBasicBlock *LPA = BBA.getLandingPad(EHInfoA->first);
const BinaryBasicBlock *LPB = BBB.getLandingPad(EHInfoB->first);
assert(LPA && LPB && "cannot locate landing pad(s)");
if (LPA->getLayoutIndex() != LPB->getLayoutIndex())
return false;
}
}
}
return BC.MIB->equals(InstA, InstB, Comp);
}
static bool isIdenticalWith(const BinaryFunction &A, const BinaryFunction &B,
bool CongruentSymbols) {
assert(A.hasCFG() && B.hasCFG() && "both functions should have CFG");
if (A.getLayout().block_size() != B.getLayout().block_size())
return false;
if (A.isMultiEntry() || B.isMultiEntry())
return false;
if (A.hasIslandsInfo() || B.hasIslandsInfo())
return false;
SmallVector<const BinaryBasicBlock *, 0> OrderA;
SmallVector<const BinaryBasicBlock *, 0> OrderB;
if (opts::ICFUseDFS) {
copy(A.dfs(), std::back_inserter(OrderA));
copy(B.dfs(), std::back_inserter(OrderB));
} else {
copy(A.getLayout().blocks(), std::back_inserter(OrderA));
copy(B.getLayout().blocks(), std::back_inserter(OrderB));
}
const BinaryContext &BC = A.getBinaryContext();
auto BBI = OrderB.begin();
for (const BinaryBasicBlock *BB : OrderA) {
const BinaryBasicBlock *OtherBB = *BBI;
if (BB->getLayoutIndex() != OtherBB->getLayoutIndex())
return false;
if (BB->succ_size() != OtherBB->succ_size())
return false;
auto SuccBBI = OtherBB->succ_begin();
for (const BinaryBasicBlock *SuccBB : BB->successors()) {
const BinaryBasicBlock *SuccOtherBB = *SuccBBI;
if (SuccBB->getLayoutIndex() != SuccOtherBB->getLayoutIndex())
return false;
++SuccBBI;
}
auto I = BB->begin(), E = BB->end();
auto OtherI = OtherBB->begin(), OtherE = OtherBB->end();
while (I != E && OtherI != OtherE) {
auto AreSymbolsIdentical = [&](const MCSymbol *SymbolA,
const MCSymbol *SymbolB) {
if (SymbolA == SymbolB)
return true;
if (SymbolA->isTemporary() && SymbolB->isTemporary())
return true;
uint64_t EntryIDA = 0;
uint64_t EntryIDB = 0;
const BinaryFunction *FunctionA =
BC.getFunctionForSymbol(SymbolA, &EntryIDA);
const BinaryFunction *FunctionB =
BC.getFunctionForSymbol(SymbolB, &EntryIDB);
if (FunctionA && EntryIDA)
FunctionA = nullptr;
if (FunctionB && EntryIDB)
FunctionB = nullptr;
if (FunctionA && FunctionB) {
if (FunctionA == &A && FunctionB == &B)
return true;
if (CongruentSymbols)
return FunctionA->getHash() == FunctionB->getHash();
return FunctionA == FunctionB;
}
if (FunctionA != FunctionB)
return false;
const BinaryData *SIA = BC.getBinaryDataByName(SymbolA->getName());
if (!SIA)
return false;
const BinaryData *SIB = BC.getBinaryDataByName(SymbolB->getName());
if (!SIB)
return false;
assert((SIA->getAddress() != SIB->getAddress()) &&
"different symbols should not have the same value");
const JumpTable *JumpTableA =
A.getJumpTableContainingAddress(SIA->getAddress());
if (!JumpTableA)
return false;
const JumpTable *JumpTableB =
B.getJumpTableContainingAddress(SIB->getAddress());
if (!JumpTableB)
return false;
if ((SIA->getAddress() - JumpTableA->getAddress()) !=
(SIB->getAddress() - JumpTableB->getAddress()))
return false;
return equalJumpTables(*JumpTableA, *JumpTableB, A, B);
};
if (!isInstrEquivalentWith(*I, *BB, *OtherI, *OtherBB,
AreSymbolsIdentical))
return false;
++I;
++OtherI;
}
const MCInst *TrailingInstr =
(I != E ? &(*I) : (OtherI != OtherE ? &(*OtherI) : nullptr));
if (TrailingInstr && !BC.MIB->isUnconditionalBranch(*TrailingInstr))
return false;
++BBI;
}
if (A.getLSDAActionTable() != B.getLSDAActionTable() ||
A.getLSDATypeTable() != B.getLSDATypeTable() ||
A.getLSDATypeIndexTable() != B.getLSDATypeIndexTable())
return false;
return true;
}
struct KeyHash {
size_t operator()(const BinaryFunction *F) const { return F->getHash(); }
};
struct KeyCongruent {
bool operator()(const BinaryFunction *A, const BinaryFunction *B) const {
if (A == B)
return true;
return isIdenticalWith(*A, *B, true);
}
};
struct KeyEqual {
bool operator()(const BinaryFunction *A, const BinaryFunction *B) const {
if (A == B)
return true;
return isIdenticalWith(*A, *B, false);
}
};
typedef std::unordered_map<BinaryFunction *, std::set<BinaryFunction *>,
KeyHash, KeyCongruent>
CongruentBucketsMap;
typedef std::unordered_map<BinaryFunction *, std::vector<BinaryFunction *>,
KeyHash, KeyEqual>
IdenticalBucketsMap;
namespace llvm {
namespace bolt {
Error IdenticalCodeFolding::runOnFunctions(BinaryContext &BC) {
const size_t OriginalFunctionCount = BC.getBinaryFunctions().size();
uint64_t NumFunctionsFolded = 0;
std::atomic<uint64_t> NumJTFunctionsFolded{0};
std::atomic<uint64_t> BytesSavedEstimate{0};
std::atomic<uint64_t> NumCalled{0};
std::atomic<uint64_t> NumFoldedLastIteration{0};
CongruentBucketsMap CongruentBuckets;
auto hashFunctions = [&]() {
NamedRegionTimer HashFunctionsTimer("hashing", "hashing", "ICF breakdown",
"ICF breakdown", opts::TimeICF);
ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) {
if (opts::ICFUseDFS)
BF.getLayout().updateLayoutIndices(BF.dfs());
else
BF.getLayout().updateLayoutIndices();
BF.computeHash(
opts::ICFUseDFS, HashFunction::Default,
[&BC](const MCOperand &Op) { return hashInstOperand(BC, Op); });
};
ParallelUtilities::PredicateTy SkipFunc = [&](const BinaryFunction &BF) {
return !shouldOptimize(BF);
};
ParallelUtilities::runOnEachFunction(
BC, ParallelUtilities::SchedulingPolicy::SP_TRIVIAL, WorkFun, SkipFunc,
"hashFunctions", false, 2);
};
auto createCongruentBuckets = [&]() {
NamedRegionTimer CongruentBucketsTimer("congruent buckets",
"congruent buckets", "ICF breakdown",
"ICF breakdown", opts::TimeICF);
for (auto &BFI : BC.getBinaryFunctions()) {
BinaryFunction &BF = BFI.second;
if (!this->shouldOptimize(BF))
continue;
CongruentBuckets[&BF].emplace(&BF);
}
};
auto performFoldingPass = [&]() {
NamedRegionTimer FoldingPassesTimer("folding passes", "folding passes",
"ICF breakdown", "ICF breakdown",
opts::TimeICF);
Timer SinglePass("single fold pass", "single fold pass");
LLVM_DEBUG(SinglePass.startTimer());
ThreadPoolInterface *ThPool;
if (!opts::NoThreads)
ThPool = &ParallelUtilities::getThreadPool();
auto processSingleBucket = [&](std::set<BinaryFunction *> &Candidates) {
Timer T("folding single congruent list", "folding single congruent list");
LLVM_DEBUG(T.startTimer());
IdenticalBucketsMap IdenticalBuckets;
for (BinaryFunction *BF : Candidates) {
IdenticalBuckets[BF].emplace_back(BF);
}
for (auto &IBI : IdenticalBuckets) {
std::vector<BinaryFunction *> &Twins = IBI.second;
if (Twins.size() < 2)
continue;
llvm::stable_sort(
Twins, [](const BinaryFunction *A, const BinaryFunction *B) {
return A->getFunctionNumber() < B->getFunctionNumber();
});
BinaryFunction *ParentBF = Twins[0];
if (!ParentBF->hasFunctionsFoldedInto())
NumCalled += ParentBF->getKnownExecutionCount();
for (unsigned I = 1; I < Twins.size(); ++I) {
BinaryFunction *ChildBF = Twins[I];
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: folding " << *ChildBF << " into "
<< *ParentBF << '\n');
auto FI = Candidates.find(ChildBF);
assert(FI != Candidates.end() &&
"function expected to be in the set");
Candidates.erase(FI);
BytesSavedEstimate += ChildBF->getSize();
if (!ChildBF->hasFunctionsFoldedInto())
NumCalled += ChildBF->getKnownExecutionCount();
BC.foldFunction(*ChildBF, *ParentBF);
++NumFoldedLastIteration;
if (ParentBF->hasJumpTables())
++NumJTFunctionsFolded;
}
}
LLVM_DEBUG(T.stopTimer());
};
for (auto &Entry : CongruentBuckets) {
std::set<BinaryFunction *> &Bucket = Entry.second;
if (Bucket.size() < 2)
continue;
if (opts::NoThreads)
processSingleBucket(Bucket);
else
ThPool->async(processSingleBucket, std::ref(Bucket));
}
if (!opts::NoThreads)
ThPool->wait();
LLVM_DEBUG(SinglePass.stopTimer());
};
hashFunctions();
createCongruentBuckets();
unsigned Iteration = 1;
do {
NumFoldedLastIteration = 0;
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ICF iteration " << Iteration << "...\n");
performFoldingPass();
NumFunctionsFolded += NumFoldedLastIteration;
++Iteration;
} while (NumFoldedLastIteration > 0);
LLVM_DEBUG({
for (auto &CBI : CongruentBuckets) {
std::set<BinaryFunction *> &Candidates = CBI.second;
if (Candidates.size() < 2)
continue;
dbgs() << "BOLT-DEBUG: the following " << Candidates.size()
<< " functions (each of size " << (*Candidates.begin())->getSize()
<< " bytes) are congruent but not identical:\n";
for (BinaryFunction *BF : Candidates) {
dbgs() << " " << *BF;
if (BF->getKnownExecutionCount())
dbgs() << " (executed " << BF->getKnownExecutionCount() << " times)";
dbgs() << '\n';
}
}
});
if (NumFunctionsFolded)
BC.outs() << "BOLT-INFO: ICF folded " << NumFunctionsFolded << " out of "
<< OriginalFunctionCount << " functions in " << Iteration
<< " passes. " << NumJTFunctionsFolded
<< " functions had jump tables.\n"
<< "BOLT-INFO: Removing all identical functions will save "
<< format("%.2lf", (double)BytesSavedEstimate / 1024)
<< " KB of code space. Folded functions were called " << NumCalled
<< " times based on profile.\n";
return Error::success();
}
}
}