#include "polly/ScopBuilder.h"
#include "polly/Options.h"
#include "polly/ScopDetection.h"
#include "polly/ScopInfo.h"
#include "polly/Support/GICHelper.h"
#include "polly/Support/ISLTools.h"
#include "polly/Support/SCEVValidator.h"
#include "polly/Support/ScopHelper.h"
#include "polly/Support/VirtualInstruction.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/EquivalenceClasses.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/Sequence.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/Delinearization.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/RegionInfo.h"
#include "llvm/Analysis/RegionIterator.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
using namespace llvm;
using namespace polly;
#include "polly/Support/PollyDebug.h"
#define DEBUG_TYPE "polly-scops"
STATISTIC(ScopFound, "Number of valid Scops");
STATISTIC(RichScopFound, "Number of Scops containing a loop");
STATISTIC(InfeasibleScops,
"Number of SCoPs with statically infeasible context.");
bool polly::ModelReadOnlyScalars;
static unsigned const MaxDimensionsInAccessRange = 9;
static cl::opt<bool, true> XModelReadOnlyScalars(
"polly-analyze-read-only-scalars",
cl::desc("Model read-only scalar values in the scop description"),
cl::location(ModelReadOnlyScalars), cl::Hidden, cl::init(true),
cl::cat(PollyCategory));
static cl::opt<int>
OptComputeOut("polly-analysis-computeout",
cl::desc("Bound the scop analysis by a maximal amount of "
"computational steps (0 means no bound)"),
cl::Hidden, cl::init(800000), cl::cat(PollyCategory));
static cl::opt<bool> PollyAllowDereferenceOfAllFunctionParams(
"polly-allow-dereference-of-all-function-parameters",
cl::desc(
"Treat all parameters to functions that are pointers as dereferencible."
" This is useful for invariant load hoisting, since we can generate"
" less runtime checks. This is only valid if all pointers to functions"
" are always initialized, so that Polly can choose to hoist"
" their loads. "),
cl::Hidden, cl::init(false), cl::cat(PollyCategory));
static cl::opt<bool>
PollyIgnoreInbounds("polly-ignore-inbounds",
cl::desc("Do not take inbounds assumptions at all"),
cl::Hidden, cl::init(false), cl::cat(PollyCategory));
static cl::opt<unsigned> RunTimeChecksMaxArraysPerGroup(
"polly-rtc-max-arrays-per-group",
cl::desc("The maximal number of arrays to compare in each alias group."),
cl::Hidden, cl::init(20), cl::cat(PollyCategory));
static cl::opt<unsigned> RunTimeChecksMaxAccessDisjuncts(
"polly-rtc-max-array-disjuncts",
cl::desc("The maximal number of disjunts allowed in memory accesses to "
"to build RTCs."),
cl::Hidden, cl::init(8), cl::cat(PollyCategory));
static cl::opt<unsigned> RunTimeChecksMaxParameters(
"polly-rtc-max-parameters",
cl::desc("The maximal number of parameters allowed in RTCs."), cl::Hidden,
cl::init(8), cl::cat(PollyCategory));
static cl::opt<bool> UnprofitableScalarAccs(
"polly-unprofitable-scalar-accs",
cl::desc("Count statements with scalar accesses as not optimizable"),
cl::Hidden, cl::init(false), cl::cat(PollyCategory));
static cl::opt<std::string> UserContextStr(
"polly-context", cl::value_desc("isl parameter set"),
cl::desc("Provide additional constraints on the context parameters"),
cl::init(""), cl::cat(PollyCategory));
static cl::opt<bool> DetectReductions("polly-detect-reductions",
cl::desc("Detect and exploit reductions"),
cl::Hidden, cl::init(true),
cl::cat(PollyCategory));
static cl::opt<bool> DisableMultiplicativeReductions(
"polly-disable-multiplicative-reductions",
cl::desc("Disable multiplicative reductions"), cl::Hidden,
cl::cat(PollyCategory));
enum class GranularityChoice { BasicBlocks, ScalarIndependence, Stores };
static cl::opt<GranularityChoice> StmtGranularity(
"polly-stmt-granularity",
cl::desc(
"Algorithm to use for splitting basic blocks into multiple statements"),
cl::values(clEnumValN(GranularityChoice::BasicBlocks, "bb",
"One statement per basic block"),
clEnumValN(GranularityChoice::ScalarIndependence, "scalar-indep",
"Scalar independence heuristic"),
clEnumValN(GranularityChoice::Stores, "store",
"Store-level granularity")),
cl::init(GranularityChoice::ScalarIndependence), cl::cat(PollyCategory));
static inline BasicBlock *getRegionNodeBasicBlock(RegionNode *RN) {
return RN->isSubRegion() ? RN->getNodeAs<Region>()->getEntry()
: RN->getNodeAs<BasicBlock>();
}
static inline BasicBlock *
getRegionNodeSuccessor(RegionNode *RN, Instruction *TI, unsigned idx) {
if (RN->isSubRegion()) {
assert(idx == 0);
return RN->getNodeAs<Region>()->getExit();
}
return TI->getSuccessor(idx);
}
static bool containsErrorBlock(RegionNode *RN, const Region &R,
ScopDetection *SD) {
if (!RN->isSubRegion())
return SD->isErrorBlock(*RN->getNodeAs<BasicBlock>(), R);
for (BasicBlock *BB : RN->getNodeAs<Region>()->blocks())
if (SD->isErrorBlock(*BB, R))
return true;
return false;
}
static isl::map createNextIterationMap(isl::space SetSpace, unsigned Dim) {
isl::space MapSpace = SetSpace.map_from_set();
isl::map NextIterationMap = isl::map::universe(MapSpace);
for (unsigned u : rangeIslSize(0, NextIterationMap.domain_tuple_dim()))
if (u != Dim)
NextIterationMap =
NextIterationMap.equate(isl::dim::in, u, isl::dim::out, u);
isl::constraint C =
isl::constraint::alloc_equality(isl::local_space(MapSpace));
C = C.set_constant_si(1);
C = C.set_coefficient_si(isl::dim::in, Dim, 1);
C = C.set_coefficient_si(isl::dim::out, Dim, -1);
NextIterationMap = NextIterationMap.add_constraint(C);
return NextIterationMap;
}
static isl::set collectBoundedParts(isl::set S) {
isl::set BoundedParts = isl::set::empty(S.get_space());
for (isl::basic_set BSet : S.get_basic_set_list())
if (BSet.is_bounded())
BoundedParts = BoundedParts.unite(isl::set(BSet));
return BoundedParts;
}
static std::pair<isl::set, isl::set> partitionSetParts(isl::set S,
unsigned Dim) {
for (unsigned u : rangeIslSize(0, S.tuple_dim()))
S = S.lower_bound_si(isl::dim::set, u, 0);
unsigned NumDimsS = unsignedFromIslSize(S.tuple_dim());
isl::set OnlyDimS = S;
assert(NumDimsS >= Dim + 1);
OnlyDimS = OnlyDimS.project_out(isl::dim::set, Dim + 1, NumDimsS - Dim - 1);
OnlyDimS = OnlyDimS.insert_dims(isl::dim::param, 0, Dim);
for (unsigned u = 0; u < Dim; u++) {
isl::constraint C = isl::constraint::alloc_inequality(
isl::local_space(OnlyDimS.get_space()));
C = C.set_coefficient_si(isl::dim::param, u, 1);
C = C.set_coefficient_si(isl::dim::set, u, -1);
OnlyDimS = OnlyDimS.add_constraint(C);
}
isl::set BoundedParts = collectBoundedParts(OnlyDimS);
BoundedParts =
BoundedParts.insert_dims(isl::dim::set, Dim + 1, NumDimsS - Dim - 1);
BoundedParts = BoundedParts.remove_dims(isl::dim::param, 0, Dim);
isl::set UnboundedParts = S.subtract(BoundedParts);
return std::make_pair(UnboundedParts, BoundedParts);
}
static isl::set buildConditionSet(ICmpInst::Predicate Pred, isl::pw_aff L,
isl::pw_aff R) {
switch (Pred) {
case ICmpInst::ICMP_EQ:
return L.eq_set(R);
case ICmpInst::ICMP_NE:
return L.ne_set(R);
case ICmpInst::ICMP_SLT:
return L.lt_set(R);
case ICmpInst::ICMP_SLE:
return L.le_set(R);
case ICmpInst::ICMP_SGT:
return L.gt_set(R);
case ICmpInst::ICMP_SGE:
return L.ge_set(R);
case ICmpInst::ICMP_ULT:
return L.lt_set(R);
case ICmpInst::ICMP_UGT:
return L.gt_set(R);
case ICmpInst::ICMP_ULE:
return L.le_set(R);
case ICmpInst::ICMP_UGE:
return L.ge_set(R);
default:
llvm_unreachable("Non integer predicate not supported");
}
}
isl::set ScopBuilder::adjustDomainDimensions(isl::set Dom, Loop *OldL,
Loop *NewL) {
if (NewL == OldL)
return Dom;
int OldDepth = scop->getRelativeLoopDepth(OldL);
int NewDepth = scop->getRelativeLoopDepth(NewL);
if (OldDepth == -1 && NewDepth == -1)
return Dom;
if (OldDepth == NewDepth) {
assert(OldL->getParentLoop() == NewL->getParentLoop());
Dom = Dom.project_out(isl::dim::set, NewDepth, 1);
Dom = Dom.add_dims(isl::dim::set, 1);
} else if (OldDepth < NewDepth) {
assert(OldDepth + 1 == NewDepth);
auto &R = scop->getRegion();
(void)R;
assert(NewL->getParentLoop() == OldL ||
((!OldL || !R.contains(OldL)) && R.contains(NewL)));
Dom = Dom.add_dims(isl::dim::set, 1);
} else {
assert(OldDepth > NewDepth);
unsigned Diff = OldDepth - NewDepth;
unsigned NumDim = unsignedFromIslSize(Dom.tuple_dim());
assert(NumDim >= Diff);
Dom = Dom.project_out(isl::dim::set, NumDim - Diff, Diff);
}
return Dom;
}
__isl_give isl_pw_aff *
ScopBuilder::getPwAff(BasicBlock *BB,
DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
const SCEV *E, bool NonNegative) {
PWACtx PWAC = scop->getPwAff(E, BB, NonNegative, &RecordedAssumptions);
InvalidDomainMap[BB] = InvalidDomainMap[BB].unite(PWAC.second);
return PWAC.first.release();
}
__isl_give isl_set *ScopBuilder::buildUnsignedConditionSets(
BasicBlock *BB, Value *Condition, __isl_keep isl_set *Domain,
const SCEV *SCEV_TestVal, const SCEV *SCEV_UpperBound,
DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
bool IsStrictUpperBound) {
isl_pw_aff *TestVal = getPwAff(BB, InvalidDomainMap, SCEV_TestVal, false);
isl_pw_aff *UpperBound =
getPwAff(BB, InvalidDomainMap, SCEV_UpperBound, true);
isl_set *First =
isl_pw_aff_le_set(isl_pw_aff_zero_on_domain(isl_local_space_from_space(
isl_pw_aff_get_domain_space(TestVal))),
isl_pw_aff_copy(TestVal));
isl_set *Second;
if (IsStrictUpperBound)
Second = isl_pw_aff_lt_set(TestVal, UpperBound);
else
Second = isl_pw_aff_le_set(TestVal, UpperBound);
isl_set *ConsequenceCondSet = isl_set_intersect(First, Second);
return ConsequenceCondSet;
}
bool ScopBuilder::buildConditionSets(
BasicBlock *BB, SwitchInst *SI, Loop *L, __isl_keep isl_set *Domain,
DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
SmallVectorImpl<__isl_give isl_set *> &ConditionSets) {
Value *Condition = getConditionFromTerminator(SI);
assert(Condition && "No condition for switch");
isl_pw_aff *LHS, *RHS;
LHS = getPwAff(BB, InvalidDomainMap, SE.getSCEVAtScope(Condition, L));
unsigned NumSuccessors = SI->getNumSuccessors();
ConditionSets.resize(NumSuccessors);
for (auto &Case : SI->cases()) {
unsigned Idx = Case.getSuccessorIndex();
ConstantInt *CaseValue = Case.getCaseValue();
RHS = getPwAff(BB, InvalidDomainMap, SE.getSCEV(CaseValue));
isl_set *CaseConditionSet =
buildConditionSet(ICmpInst::ICMP_EQ, isl::manage_copy(LHS),
isl::manage(RHS))
.release();
ConditionSets[Idx] = isl_set_coalesce(
isl_set_intersect(CaseConditionSet, isl_set_copy(Domain)));
}
assert(ConditionSets[0] == nullptr && "Default condition set was set");
isl_set *ConditionSetUnion = isl_set_copy(ConditionSets[1]);
for (unsigned u = 2; u < NumSuccessors; u++)
ConditionSetUnion =
isl_set_union(ConditionSetUnion, isl_set_copy(ConditionSets[u]));
ConditionSets[0] = isl_set_subtract(isl_set_copy(Domain), ConditionSetUnion);
isl_pw_aff_free(LHS);
return true;
}
bool ScopBuilder::buildConditionSets(
BasicBlock *BB, Value *Condition, Instruction *TI, Loop *L,
__isl_keep isl_set *Domain,
DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
SmallVectorImpl<__isl_give isl_set *> &ConditionSets) {
isl_set *ConsequenceCondSet = nullptr;
if (auto Load = dyn_cast<LoadInst>(Condition)) {
const SCEV *LHSSCEV = SE.getSCEVAtScope(Load, L);
const SCEV *RHSSCEV = SE.getZero(LHSSCEV->getType());
bool NonNeg = false;
isl_pw_aff *LHS = getPwAff(BB, InvalidDomainMap, LHSSCEV, NonNeg);
isl_pw_aff *RHS = getPwAff(BB, InvalidDomainMap, RHSSCEV, NonNeg);
ConsequenceCondSet = buildConditionSet(ICmpInst::ICMP_SLE, isl::manage(LHS),
isl::manage(RHS))
.release();
} else if (auto *PHI = dyn_cast<PHINode>(Condition)) {
auto *Unique = dyn_cast<ConstantInt>(
getUniqueNonErrorValue(PHI, &scop->getRegion(), &SD));
assert(Unique &&
"A PHINode condition should only be accepted by ScopDetection if "
"getUniqueNonErrorValue returns non-NULL");
if (Unique->isZero())
ConsequenceCondSet = isl_set_empty(isl_set_get_space(Domain));
else
ConsequenceCondSet = isl_set_universe(isl_set_get_space(Domain));
} else if (auto *CCond = dyn_cast<ConstantInt>(Condition)) {
if (CCond->isZero())
ConsequenceCondSet = isl_set_empty(isl_set_get_space(Domain));
else
ConsequenceCondSet = isl_set_universe(isl_set_get_space(Domain));
} else if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Condition)) {
auto Opcode = BinOp->getOpcode();
assert(Opcode == Instruction::And || Opcode == Instruction::Or);
bool Valid = buildConditionSets(BB, BinOp->getOperand(0), TI, L, Domain,
InvalidDomainMap, ConditionSets) &&
buildConditionSets(BB, BinOp->getOperand(1), TI, L, Domain,
InvalidDomainMap, ConditionSets);
if (!Valid) {
while (!ConditionSets.empty())
isl_set_free(ConditionSets.pop_back_val());
return false;
}
isl_set_free(ConditionSets.pop_back_val());
isl_set *ConsCondPart0 = ConditionSets.pop_back_val();
isl_set_free(ConditionSets.pop_back_val());
isl_set *ConsCondPart1 = ConditionSets.pop_back_val();
if (Opcode == Instruction::And)
ConsequenceCondSet = isl_set_intersect(ConsCondPart0, ConsCondPart1);
else
ConsequenceCondSet = isl_set_union(ConsCondPart0, ConsCondPart1);
} else {
auto *ICond = dyn_cast<ICmpInst>(Condition);
assert(ICond &&
"Condition of exiting branch was neither constant nor ICmp!");
Region &R = scop->getRegion();
isl_pw_aff *LHS, *RHS;
bool NonNeg = ICond->isUnsigned();
const SCEV *LeftOperand = SE.getSCEVAtScope(ICond->getOperand(0), L),
*RightOperand = SE.getSCEVAtScope(ICond->getOperand(1), L);
LeftOperand = tryForwardThroughPHI(LeftOperand, R, SE, &SD);
RightOperand = tryForwardThroughPHI(RightOperand, R, SE, &SD);
switch (ICond->getPredicate()) {
case ICmpInst::ICMP_ULT:
ConsequenceCondSet =
buildUnsignedConditionSets(BB, Condition, Domain, LeftOperand,
RightOperand, InvalidDomainMap, true);
break;
case ICmpInst::ICMP_ULE:
ConsequenceCondSet =
buildUnsignedConditionSets(BB, Condition, Domain, LeftOperand,
RightOperand, InvalidDomainMap, false);
break;
case ICmpInst::ICMP_UGT:
ConsequenceCondSet =
buildUnsignedConditionSets(BB, Condition, Domain, RightOperand,
LeftOperand, InvalidDomainMap, true);
break;
case ICmpInst::ICMP_UGE:
ConsequenceCondSet =
buildUnsignedConditionSets(BB, Condition, Domain, RightOperand,
LeftOperand, InvalidDomainMap, false);
break;
default:
LHS = getPwAff(BB, InvalidDomainMap, LeftOperand, NonNeg);
RHS = getPwAff(BB, InvalidDomainMap, RightOperand, NonNeg);
ConsequenceCondSet = buildConditionSet(ICond->getPredicate(),
isl::manage(LHS), isl::manage(RHS))
.release();
break;
}
}
if (!TI)
ConsequenceCondSet = isl_set_params(ConsequenceCondSet);
assert(ConsequenceCondSet);
ConsequenceCondSet = isl_set_coalesce(
isl_set_intersect(ConsequenceCondSet, isl_set_copy(Domain)));
isl_set *AlternativeCondSet = nullptr;
bool TooComplex =
isl_set_n_basic_set(ConsequenceCondSet) >= (int)MaxDisjunctsInDomain;
if (!TooComplex) {
AlternativeCondSet = isl_set_subtract(isl_set_copy(Domain),
isl_set_copy(ConsequenceCondSet));
TooComplex =
isl_set_n_basic_set(AlternativeCondSet) >= (int)MaxDisjunctsInDomain;
}
if (TooComplex) {
scop->invalidate(COMPLEXITY, TI ? TI->getDebugLoc() : DebugLoc(),
TI ? TI->getParent() : nullptr );
isl_set_free(AlternativeCondSet);
isl_set_free(ConsequenceCondSet);
return false;
}
ConditionSets.push_back(ConsequenceCondSet);
ConditionSets.push_back(isl_set_coalesce(AlternativeCondSet));
return true;
}
bool ScopBuilder::buildConditionSets(
BasicBlock *BB, Instruction *TI, Loop *L, __isl_keep isl_set *Domain,
DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
SmallVectorImpl<__isl_give isl_set *> &ConditionSets) {
if (SwitchInst *SI = dyn_cast<SwitchInst>(TI))
return buildConditionSets(BB, SI, L, Domain, InvalidDomainMap,
ConditionSets);
assert(isa<BranchInst>(TI) && "Terminator was neither branch nor switch.");
if (TI->getNumSuccessors() == 1) {
ConditionSets.push_back(isl_set_copy(Domain));
return true;
}
Value *Condition = getConditionFromTerminator(TI);
assert(Condition && "No condition for Terminator");
return buildConditionSets(BB, Condition, TI, L, Domain, InvalidDomainMap,
ConditionSets);
}
bool ScopBuilder::propagateDomainConstraints(
Region *R, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
ReversePostOrderTraversal<Region *> RTraversal(R);
for (auto *RN : RTraversal) {
if (RN->isSubRegion()) {
Region *SubRegion = RN->getNodeAs<Region>();
if (!scop->isNonAffineSubRegion(SubRegion)) {
if (!propagateDomainConstraints(SubRegion, InvalidDomainMap))
return false;
continue;
}
}
BasicBlock *BB = getRegionNodeBasicBlock(RN);
isl::set &Domain = scop->getOrInitEmptyDomain(BB);
assert(!Domain.is_null());
isl::set PredDom = getPredecessorDomainConstraints(BB, Domain);
Domain = Domain.intersect(PredDom).coalesce();
Domain = Domain.align_params(scop->getParamSpace());
Loop *BBLoop = getRegionNodeLoop(RN, LI);
if (BBLoop && BBLoop->getHeader() == BB && scop->contains(BBLoop))
if (!addLoopBoundsToHeaderDomain(BBLoop, InvalidDomainMap))
return false;
}
return true;
}
void ScopBuilder::propagateDomainConstraintsToRegionExit(
BasicBlock *BB, Loop *BBLoop,
SmallPtrSetImpl<BasicBlock *> &FinishedExitBlocks,
DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
auto *RI = scop->getRegion().getRegionInfo();
auto *BBReg = RI ? RI->getRegionFor(BB) : nullptr;
auto *ExitBB = BBReg ? BBReg->getExit() : nullptr;
if (!BBReg || BBReg->getEntry() != BB || !scop->contains(ExitBB))
return;
auto *L = BBLoop;
while (L && scop->contains(L)) {
SmallVector<BasicBlock *, 4> LatchBBs;
BBLoop->getLoopLatches(LatchBBs);
for (auto *LatchBB : LatchBBs)
if (BB != LatchBB && BBReg->contains(LatchBB))
return;
L = L->getParentLoop();
}
isl::set Domain = scop->getOrInitEmptyDomain(BB);
assert(!Domain.is_null() && "Cannot propagate a nullptr");
Loop *ExitBBLoop = getFirstNonBoxedLoopFor(ExitBB, LI, scop->getBoxedLoops());
isl::set AdjustedDomain = adjustDomainDimensions(Domain, BBLoop, ExitBBLoop);
isl::set &ExitDomain = scop->getOrInitEmptyDomain(ExitBB);
ExitDomain =
!ExitDomain.is_null() ? AdjustedDomain.unite(ExitDomain) : AdjustedDomain;
InvalidDomainMap[ExitBB] = ExitDomain.empty(ExitDomain.get_space());
FinishedExitBlocks.insert(ExitBB);
}
isl::set ScopBuilder::getPredecessorDomainConstraints(BasicBlock *BB,
isl::set Domain) {
if (scop->getRegion().getEntry() == BB)
return isl::set::universe(Domain.get_space());
auto &RI = *scop->getRegion().getRegionInfo();
Loop *BBLoop = getFirstNonBoxedLoopFor(BB, LI, scop->getBoxedLoops());
isl::set PredDom = isl::set::empty(Domain.get_space());
SmallSet<Region *, 8> PropagatedRegions;
for (auto *PredBB : predecessors(BB)) {
if (DT.dominates(BB, PredBB))
continue;
auto PredBBInRegion = [PredBB](Region *PR) { return PR->contains(PredBB); };
if (llvm::any_of(PropagatedRegions, PredBBInRegion)) {
continue;
}
auto *PredR = RI.getRegionFor(PredBB);
while (PredR->getExit() != BB && !PredR->contains(BB))
PredR = PredR->getParent();
if (PredR->getExit() == BB) {
PredBB = PredR->getEntry();
PropagatedRegions.insert(PredR);
}
isl::set PredBBDom = scop->getDomainConditions(PredBB);
Loop *PredBBLoop =
getFirstNonBoxedLoopFor(PredBB, LI, scop->getBoxedLoops());
PredBBDom = adjustDomainDimensions(PredBBDom, PredBBLoop, BBLoop);
PredDom = PredDom.unite(PredBBDom);
}
return PredDom;
}
bool ScopBuilder::addLoopBoundsToHeaderDomain(
Loop *L, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
int LoopDepth = scop->getRelativeLoopDepth(L);
assert(LoopDepth >= 0 && "Loop in region should have at least depth one");
BasicBlock *HeaderBB = L->getHeader();
assert(scop->isDomainDefined(HeaderBB));
isl::set &HeaderBBDom = scop->getOrInitEmptyDomain(HeaderBB);
isl::map NextIterationMap =
createNextIterationMap(HeaderBBDom.get_space(), LoopDepth);
isl::set UnionBackedgeCondition = HeaderBBDom.empty(HeaderBBDom.get_space());
SmallVector<BasicBlock *, 4> LatchBlocks;
L->getLoopLatches(LatchBlocks);
for (BasicBlock *LatchBB : LatchBlocks) {
if (!scop->isDomainDefined(LatchBB))
continue;
isl::set LatchBBDom = scop->getDomainConditions(LatchBB);
isl::set BackedgeCondition;
Instruction *TI = LatchBB->getTerminator();
BranchInst *BI = dyn_cast<BranchInst>(TI);
assert(BI && "Only branch instructions allowed in loop latches");
if (BI->isUnconditional())
BackedgeCondition = LatchBBDom;
else {
SmallVector<isl_set *, 8> ConditionSets;
int idx = BI->getSuccessor(0) != HeaderBB;
if (!buildConditionSets(LatchBB, TI, L, LatchBBDom.get(),
InvalidDomainMap, ConditionSets))
return false;
isl_set_free(ConditionSets[1 - idx]);
BackedgeCondition = isl::manage(ConditionSets[idx]);
}
int LatchLoopDepth = scop->getRelativeLoopDepth(LI.getLoopFor(LatchBB));
assert(LatchLoopDepth >= LoopDepth);
BackedgeCondition = BackedgeCondition.project_out(
isl::dim::set, LoopDepth + 1, LatchLoopDepth - LoopDepth);
UnionBackedgeCondition = UnionBackedgeCondition.unite(BackedgeCondition);
}
isl::map ForwardMap = ForwardMap.lex_le(HeaderBBDom.get_space());
for (int i = 0; i < LoopDepth; i++)
ForwardMap = ForwardMap.equate(isl::dim::in, i, isl::dim::out, i);
isl::set UnionBackedgeConditionComplement =
UnionBackedgeCondition.complement();
UnionBackedgeConditionComplement =
UnionBackedgeConditionComplement.lower_bound_si(isl::dim::set, LoopDepth,
0);
UnionBackedgeConditionComplement =
UnionBackedgeConditionComplement.apply(ForwardMap);
HeaderBBDom = HeaderBBDom.subtract(UnionBackedgeConditionComplement);
HeaderBBDom = HeaderBBDom.apply(NextIterationMap);
auto Parts = partitionSetParts(HeaderBBDom, LoopDepth);
HeaderBBDom = Parts.second;
bool RequiresRTC = !scop->hasNSWAddRecForLoop(L);
isl::set UnboundedCtx = Parts.first.params();
recordAssumption(&RecordedAssumptions, INFINITELOOP, UnboundedCtx,
HeaderBB->getTerminator()->getDebugLoc(), AS_RESTRICTION,
nullptr, RequiresRTC);
return true;
}
void ScopBuilder::buildInvariantEquivalenceClasses() {
DenseMap<std::pair<const SCEV *, Type *>, LoadInst *> EquivClasses;
const InvariantLoadsSetTy &RIL = scop->getRequiredInvariantLoads();
for (LoadInst *LInst : RIL) {
const SCEV *PointerSCEV = SE.getSCEV(LInst->getPointerOperand());
Type *Ty = LInst->getType();
LoadInst *&ClassRep = EquivClasses[std::make_pair(PointerSCEV, Ty)];
if (ClassRep) {
scop->addInvariantLoadMapping(LInst, ClassRep);
continue;
}
ClassRep = LInst;
scop->addInvariantEquivClass(
InvariantEquivClassTy{PointerSCEV, MemoryAccessList(), {}, Ty});
}
}
bool ScopBuilder::buildDomains(
Region *R, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
bool IsOnlyNonAffineRegion = scop->isNonAffineSubRegion(R);
auto *EntryBB = R->getEntry();
auto *L = IsOnlyNonAffineRegion ? nullptr : LI.getLoopFor(EntryBB);
int LD = scop->getRelativeLoopDepth(L);
auto *S =
isl_set_universe(isl_space_set_alloc(scop->getIslCtx().get(), 0, LD + 1));
InvalidDomainMap[EntryBB] = isl::manage(isl_set_empty(isl_set_get_space(S)));
isl::set Domain = isl::manage(S);
scop->setDomain(EntryBB, Domain);
if (IsOnlyNonAffineRegion)
return !containsErrorBlock(R->getNode(), *R, &SD);
if (!buildDomainsWithBranchConstraints(R, InvalidDomainMap))
return false;
if (!propagateDomainConstraints(R, InvalidDomainMap))
return false;
if (!propagateInvalidStmtDomains(R, InvalidDomainMap))
return false;
return true;
}
bool ScopBuilder::buildDomainsWithBranchConstraints(
Region *R, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
SmallPtrSet<BasicBlock *, 8> FinishedExitBlocks;
ReversePostOrderTraversal<Region *> RTraversal(R);
for (auto *RN : RTraversal) {
if (RN->isSubRegion()) {
Region *SubRegion = RN->getNodeAs<Region>();
if (!scop->isNonAffineSubRegion(SubRegion)) {
if (!buildDomainsWithBranchConstraints(SubRegion, InvalidDomainMap))
return false;
continue;
}
}
if (containsErrorBlock(RN, scop->getRegion(), &SD))
scop->notifyErrorBlock();
;
BasicBlock *BB = getRegionNodeBasicBlock(RN);
Instruction *TI = BB->getTerminator();
if (isa<UnreachableInst>(TI))
continue;
if (!scop->isDomainDefined(BB))
continue;
isl::set Domain = scop->getDomainConditions(BB);
scop->updateMaxLoopDepth(unsignedFromIslSize(Domain.tuple_dim()));
auto *BBLoop = getRegionNodeLoop(RN, LI);
propagateDomainConstraintsToRegionExit(BB, BBLoop, FinishedExitBlocks,
InvalidDomainMap);
auto IsFinishedRegionExit = [&FinishedExitBlocks](BasicBlock *SuccBB) {
return FinishedExitBlocks.count(SuccBB);
};
if (std::all_of(succ_begin(BB), succ_end(BB), IsFinishedRegionExit))
continue;
SmallVector<isl_set *, 8> ConditionSets;
if (RN->isSubRegion())
ConditionSets.push_back(Domain.copy());
else if (!buildConditionSets(BB, TI, BBLoop, Domain.get(), InvalidDomainMap,
ConditionSets))
return false;
assert(RN->isSubRegion() || TI->getNumSuccessors() == ConditionSets.size());
for (unsigned u = 0, e = ConditionSets.size(); u < e; u++) {
isl::set CondSet = isl::manage(ConditionSets[u]);
BasicBlock *SuccBB = getRegionNodeSuccessor(RN, TI, u);
if (!scop->contains(SuccBB))
continue;
if (FinishedExitBlocks.count(SuccBB))
continue;
if (DT.dominates(SuccBB, BB))
continue;
Loop *SuccBBLoop =
getFirstNonBoxedLoopFor(SuccBB, LI, scop->getBoxedLoops());
CondSet = adjustDomainDimensions(CondSet, BBLoop, SuccBBLoop);
isl::set &SuccDomain = scop->getOrInitEmptyDomain(SuccBB);
if (!SuccDomain.is_null()) {
SuccDomain = SuccDomain.unite(CondSet).coalesce();
} else {
InvalidDomainMap[SuccBB] = CondSet.empty(CondSet.get_space());
SuccDomain = CondSet;
}
SuccDomain = SuccDomain.detect_equalities();
if (unsignedFromIslSize(SuccDomain.n_basic_set()) < MaxDisjunctsInDomain)
continue;
scop->invalidate(COMPLEXITY, DebugLoc());
while (++u < ConditionSets.size())
isl_set_free(ConditionSets[u]);
return false;
}
}
return true;
}
bool ScopBuilder::propagateInvalidStmtDomains(
Region *R, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
ReversePostOrderTraversal<Region *> RTraversal(R);
for (auto *RN : RTraversal) {
if (RN->isSubRegion()) {
Region *SubRegion = RN->getNodeAs<Region>();
if (!scop->isNonAffineSubRegion(SubRegion)) {
propagateInvalidStmtDomains(SubRegion, InvalidDomainMap);
continue;
}
}
bool ContainsErrorBlock = containsErrorBlock(RN, scop->getRegion(), &SD);
BasicBlock *BB = getRegionNodeBasicBlock(RN);
isl::set &Domain = scop->getOrInitEmptyDomain(BB);
assert(!Domain.is_null() && "Cannot propagate a nullptr");
isl::set InvalidDomain = InvalidDomainMap[BB];
bool IsInvalidBlock = ContainsErrorBlock || Domain.is_subset(InvalidDomain);
if (!IsInvalidBlock) {
InvalidDomain = InvalidDomain.intersect(Domain);
} else {
InvalidDomain = Domain;
isl::set DomPar = Domain.params();
recordAssumption(&RecordedAssumptions, ERRORBLOCK, DomPar,
BB->getTerminator()->getDebugLoc(), AS_RESTRICTION);
Domain = isl::set::empty(Domain.get_space());
}
if (InvalidDomain.is_empty()) {
InvalidDomainMap[BB] = InvalidDomain;
continue;
}
auto *BBLoop = getRegionNodeLoop(RN, LI);
auto *TI = BB->getTerminator();
unsigned NumSuccs = RN->isSubRegion() ? 1 : TI->getNumSuccessors();
for (unsigned u = 0; u < NumSuccs; u++) {
auto *SuccBB = getRegionNodeSuccessor(RN, TI, u);
if (!scop->contains(SuccBB))
continue;
if (DT.dominates(SuccBB, BB))
continue;
Loop *SuccBBLoop =
getFirstNonBoxedLoopFor(SuccBB, LI, scop->getBoxedLoops());
auto AdjustedInvalidDomain =
adjustDomainDimensions(InvalidDomain, BBLoop, SuccBBLoop);
isl::set SuccInvalidDomain = InvalidDomainMap[SuccBB];
SuccInvalidDomain = SuccInvalidDomain.unite(AdjustedInvalidDomain);
SuccInvalidDomain = SuccInvalidDomain.coalesce();
InvalidDomainMap[SuccBB] = SuccInvalidDomain;
if (unsignedFromIslSize(SuccInvalidDomain.n_basic_set()) <
MaxDisjunctsInDomain)
continue;
InvalidDomainMap.erase(BB);
scop->invalidate(COMPLEXITY, TI->getDebugLoc(), TI->getParent());
return false;
}
InvalidDomainMap[BB] = InvalidDomain;
}
return true;
}
void ScopBuilder::buildPHIAccesses(ScopStmt *PHIStmt, PHINode *PHI,
Region *NonAffineSubRegion,
bool IsExitBlock) {
auto *Scope = LI.getLoopFor(PHI->getParent());
if (!IsExitBlock && canSynthesize(PHI, *scop, &SE, Scope))
return;
bool OnlyNonAffineSubRegionOperands = true;
for (unsigned u = 0; u < PHI->getNumIncomingValues(); u++) {
Value *Op = PHI->getIncomingValue(u);
BasicBlock *OpBB = PHI->getIncomingBlock(u);
ScopStmt *OpStmt = scop->getIncomingStmtFor(PHI->getOperandUse(u));
if (NonAffineSubRegion && NonAffineSubRegion->contains(OpBB)) {
auto *OpInst = dyn_cast<Instruction>(Op);
if (!OpInst || !NonAffineSubRegion->contains(OpInst))
ensureValueRead(Op, OpStmt);
continue;
}
OnlyNonAffineSubRegionOperands = false;
ensurePHIWrite(PHI, OpStmt, OpBB, Op, IsExitBlock);
}
if (!OnlyNonAffineSubRegionOperands && !IsExitBlock) {
addPHIReadAccess(PHIStmt, PHI);
}
}
void ScopBuilder::buildScalarDependences(ScopStmt *UserStmt,
Instruction *Inst) {
assert(!isa<PHINode>(Inst));
for (Use &Op : Inst->operands())
ensureValueRead(Op.get(), UserStmt);
}
static isl::schedule combineInSequence(isl::schedule Prev, isl::schedule Succ) {
if (Prev.is_null())
return Succ;
if (Succ.is_null())
return Prev;
return Prev.sequence(Succ);
}
static isl::multi_union_pw_aff mapToDimension(isl::union_set USet, unsigned N) {
assert(!USet.is_null());
assert(!USet.is_empty());
auto Result = isl::union_pw_multi_aff::empty(USet.get_space());
for (isl::set S : USet.get_set_list()) {
unsigned Dim = unsignedFromIslSize(S.tuple_dim());
assert(Dim >= N);
auto PMA = isl::pw_multi_aff::project_out_map(S.get_space(), isl::dim::set,
N, Dim - N);
if (N > 1)
PMA = PMA.drop_dims(isl::dim::out, 0, N - 1);
Result = Result.add_pw_multi_aff(PMA);
}
return isl::multi_union_pw_aff(isl::union_pw_multi_aff(Result));
}
void ScopBuilder::buildSchedule() {
Loop *L = getLoopSurroundingScop(*scop, LI);
LoopStackTy LoopStack({LoopStackElementTy(L, {}, 0)});
buildSchedule(scop->getRegion().getNode(), LoopStack);
assert(LoopStack.size() == 1 && LoopStack.back().L == L);
scop->setScheduleTree(LoopStack[0].Schedule);
}
void ScopBuilder::buildSchedule(Region *R, LoopStackTy &LoopStack) {
Loop *OuterScopLoop = getLoopSurroundingScop(*scop, LI);
ReversePostOrderTraversal<Region *> RTraversal(R);
std::deque<RegionNode *> WorkList(RTraversal.begin(), RTraversal.end());
std::deque<RegionNode *> DelayList;
bool LastRNWaiting = false;
while (!WorkList.empty() || !DelayList.empty()) {
RegionNode *RN;
if ((LastRNWaiting && !WorkList.empty()) || DelayList.empty()) {
RN = WorkList.front();
WorkList.pop_front();
LastRNWaiting = false;
} else {
RN = DelayList.front();
DelayList.pop_front();
}
Loop *L = getRegionNodeLoop(RN, LI);
if (!scop->contains(L))
L = OuterScopLoop;
Loop *LastLoop = LoopStack.back().L;
if (LastLoop != L) {
if (LastLoop && !LastLoop->contains(L)) {
LastRNWaiting = true;
DelayList.push_back(RN);
continue;
}
LoopStack.push_back({L, {}, 0});
}
buildSchedule(RN, LoopStack);
}
}
void ScopBuilder::buildSchedule(RegionNode *RN, LoopStackTy &LoopStack) {
if (RN->isSubRegion()) {
auto *LocalRegion = RN->getNodeAs<Region>();
if (!scop->isNonAffineSubRegion(LocalRegion)) {
buildSchedule(LocalRegion, LoopStack);
return;
}
}
assert(LoopStack.rbegin() != LoopStack.rend());
auto LoopData = LoopStack.rbegin();
LoopData->NumBlocksProcessed += getNumBlocksInRegionNode(RN);
for (auto *Stmt : scop->getStmtListFor(RN)) {
isl::union_set UDomain{Stmt->getDomain()};
auto StmtSchedule = isl::schedule::from_domain(UDomain);
LoopData->Schedule = combineInSequence(LoopData->Schedule, StmtSchedule);
}
size_t Dimension = LoopStack.size();
while (LoopData->L &&
LoopData->NumBlocksProcessed == getNumBlocksInLoop(LoopData->L)) {
isl::schedule Schedule = LoopData->Schedule;
auto NumBlocksProcessed = LoopData->NumBlocksProcessed;
assert(std::next(LoopData) != LoopStack.rend());
Loop *L = LoopData->L;
++LoopData;
--Dimension;
if (!Schedule.is_null()) {
isl::union_set Domain = Schedule.get_domain();
isl::multi_union_pw_aff MUPA = mapToDimension(Domain, Dimension);
Schedule = Schedule.insert_partial_schedule(MUPA);
if (hasDisableAllTransformsHint(L)) {
scop->markDisableHeuristics();
}
isl::id IslLoopId = createIslLoopAttr(scop->getIslCtx(), L);
if (!IslLoopId.is_null())
Schedule =
Schedule.get_root().child(0).insert_mark(IslLoopId).get_schedule();
LoopData->Schedule = combineInSequence(LoopData->Schedule, Schedule);
}
LoopData->NumBlocksProcessed += NumBlocksProcessed;
}
LoopStack.erase(LoopStack.begin() + Dimension, LoopStack.end());
}
void ScopBuilder::buildEscapingDependences(Instruction *Inst) {
if (scop->isEscaping(Inst))
ensureValueWrite(Inst);
}
void ScopBuilder::addRecordedAssumptions() {
for (auto &AS : llvm::reverse(RecordedAssumptions)) {
if (!AS.BB) {
scop->addAssumption(AS.Kind, AS.Set, AS.Loc, AS.Sign,
nullptr , AS.RequiresRTC);
continue;
}
isl_set *Dom = scop->getDomainConditions(AS.BB).release();
if (!Dom)
continue;
isl_set *S = AS.Set.copy();
if (AS.Sign == AS_RESTRICTION)
S = isl_set_params(isl_set_intersect(S, Dom));
else
S = isl_set_params(isl_set_subtract(Dom, S));
scop->addAssumption(AS.Kind, isl::manage(S), AS.Loc, AS_RESTRICTION, AS.BB,
AS.RequiresRTC);
}
}
void ScopBuilder::addUserAssumptions(
AssumptionCache &AC, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
for (auto &Assumption : AC.assumptions()) {
auto *CI = dyn_cast_or_null<CallInst>(Assumption);
if (!CI || CI->arg_size() != 1)
continue;
bool InScop = scop->contains(CI);
if (!InScop && !scop->isDominatedBy(DT, CI->getParent()))
continue;
auto *L = LI.getLoopFor(CI->getParent());
auto *Val = CI->getArgOperand(0);
ParameterSetTy DetectedParams;
auto &R = scop->getRegion();
if (!isAffineConstraint(Val, &R, L, SE, DetectedParams)) {
ORE.emit(
OptimizationRemarkAnalysis(DEBUG_TYPE, "IgnoreUserAssumption", CI)
<< "Non-affine user assumption ignored.");
continue;
}
ParameterSetTy NewParams;
for (auto *Param : DetectedParams) {
Param = extractConstantFactor(Param, SE).second;
Param = scop->getRepresentingInvariantLoadSCEV(Param);
if (scop->isParam(Param))
continue;
NewParams.insert(Param);
}
SmallVector<isl_set *, 2> ConditionSets;
auto *TI = InScop ? CI->getParent()->getTerminator() : nullptr;
BasicBlock *BB = InScop ? CI->getParent() : R.getEntry();
auto *Dom = InScop ? isl_set_copy(scop->getDomainConditions(BB).get())
: isl_set_copy(scop->getContext().get());
assert(Dom && "Cannot propagate a nullptr.");
bool Valid = buildConditionSets(BB, Val, TI, L, Dom, InvalidDomainMap,
ConditionSets);
isl_set_free(Dom);
if (!Valid)
continue;
isl_set *AssumptionCtx = nullptr;
if (InScop) {
AssumptionCtx = isl_set_complement(isl_set_params(ConditionSets[1]));
isl_set_free(ConditionSets[0]);
} else {
AssumptionCtx = isl_set_complement(ConditionSets[1]);
AssumptionCtx = isl_set_intersect(AssumptionCtx, ConditionSets[0]);
}
if (!NewParams.empty()) {
for (isl_size u = 0; u < isl_set_n_param(AssumptionCtx); u++) {
auto *Id = isl_set_get_dim_id(AssumptionCtx, isl_dim_param, u);
auto *Param = static_cast<const SCEV *>(isl_id_get_user(Id));
isl_id_free(Id);
if (!NewParams.count(Param))
continue;
AssumptionCtx =
isl_set_project_out(AssumptionCtx, isl_dim_param, u--, 1);
}
}
ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "UserAssumption", CI)
<< "Use user assumption: "
<< stringFromIslObj(AssumptionCtx, "null"));
isl::set newContext =
scop->getContext().intersect(isl::manage(AssumptionCtx));
scop->setContext(newContext);
}
}
bool ScopBuilder::buildAccessMultiDimFixed(MemAccInst Inst, ScopStmt *Stmt) {
if (!Inst.isLoad() && !Inst.isStore())
return false;
Value *Val = Inst.getValueOperand();
Type *ElementType = Val->getType();
Value *Address = Inst.getPointerOperand();
const SCEV *AccessFunction =
SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent()));
const SCEVUnknown *BasePointer =
dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));
enum MemoryAccess::AccessType AccType =
isa<LoadInst>(Inst) ? MemoryAccess::READ : MemoryAccess::MUST_WRITE;
if (auto *BitCast = dyn_cast<BitCastInst>(Address))
Address = BitCast->getOperand(0);
auto *GEP = dyn_cast<GetElementPtrInst>(Address);
if (!GEP || DL.getTypeAllocSize(GEP->getResultElementType()) !=
DL.getTypeAllocSize(ElementType))
return false;
SmallVector<const SCEV *, 4> Subscripts;
SmallVector<int, 4> Sizes;
getIndexExpressionsFromGEP(SE, GEP, Subscripts, Sizes);
auto *BasePtr = GEP->getOperand(0);
if (auto *BasePtrCast = dyn_cast<BitCastInst>(BasePtr))
BasePtr = BasePtrCast->getOperand(0);
if (BasePtr != BasePointer->getValue())
return false;
std::vector<const SCEV *> SizesSCEV;
const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads();
Loop *SurroundingLoop = Stmt->getSurroundingLoop();
for (auto *Subscript : Subscripts) {
InvariantLoadsSetTy AccessILS;
if (!isAffineExpr(&scop->getRegion(), SurroundingLoop, Subscript, SE,
&AccessILS))
return false;
for (LoadInst *LInst : AccessILS)
if (!ScopRIL.count(LInst))
return false;
}
if (Sizes.empty())
return false;
SizesSCEV.push_back(nullptr);
for (auto V : Sizes)
SizesSCEV.push_back(SE.getSCEV(
ConstantInt::get(IntegerType::getInt64Ty(BasePtr->getContext()), V)));
addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType,
true, Subscripts, SizesSCEV, Val);
return true;
}
bool ScopBuilder::buildAccessMultiDimParam(MemAccInst Inst, ScopStmt *Stmt) {
if (!Inst.isLoad() && !Inst.isStore())
return false;
if (!PollyDelinearize)
return false;
Value *Address = Inst.getPointerOperand();
Value *Val = Inst.getValueOperand();
Type *ElementType = Val->getType();
unsigned ElementSize = DL.getTypeAllocSize(ElementType);
enum MemoryAccess::AccessType AccType =
isa<LoadInst>(Inst) ? MemoryAccess::READ : MemoryAccess::MUST_WRITE;
const SCEV *AccessFunction =
SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent()));
const SCEVUnknown *BasePointer =
dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));
assert(BasePointer && "Could not find base pointer");
auto &InsnToMemAcc = scop->getInsnToMemAccMap();
auto AccItr = InsnToMemAcc.find(Inst);
if (AccItr == InsnToMemAcc.end())
return false;
std::vector<const SCEV *> Sizes = {nullptr};
Sizes.insert(Sizes.end(), AccItr->second.Shape->DelinearizedSizes.begin(),
AccItr->second.Shape->DelinearizedSizes.end());
if (Sizes.size() == 1)
return false;
auto DelinearizedSize =
cast<SCEVConstant>(Sizes.back())->getAPInt().getSExtValue();
Sizes.pop_back();
if (ElementSize != DelinearizedSize)
scop->invalidate(DELINEARIZATION, Inst->getDebugLoc(), Inst->getParent());
addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType,
true, AccItr->second.DelinearizedSubscripts, Sizes, Val);
return true;
}
bool ScopBuilder::buildAccessMemIntrinsic(MemAccInst Inst, ScopStmt *Stmt) {
auto *MemIntr = dyn_cast_or_null<MemIntrinsic>(Inst);
if (MemIntr == nullptr)
return false;
auto *L = LI.getLoopFor(Inst->getParent());
auto *LengthVal = SE.getSCEVAtScope(MemIntr->getLength(), L);
assert(LengthVal);
InvariantLoadsSetTy AccessILS;
const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads();
Loop *SurroundingLoop = Stmt->getSurroundingLoop();
bool LengthIsAffine = isAffineExpr(&scop->getRegion(), SurroundingLoop,
LengthVal, SE, &AccessILS);
for (LoadInst *LInst : AccessILS)
if (!ScopRIL.count(LInst))
LengthIsAffine = false;
if (!LengthIsAffine)
LengthVal = nullptr;
auto *DestPtrVal = MemIntr->getDest();
assert(DestPtrVal);
auto *DestAccFunc = SE.getSCEVAtScope(DestPtrVal, L);
assert(DestAccFunc);
if (DestAccFunc->isZero())
return true;
if (auto *U = dyn_cast<SCEVUnknown>(DestAccFunc)) {
if (isa<ConstantPointerNull>(U->getValue()))
return true;
}
auto *DestPtrSCEV = dyn_cast<SCEVUnknown>(SE.getPointerBase(DestAccFunc));
assert(DestPtrSCEV);
DestAccFunc = SE.getMinusSCEV(DestAccFunc, DestPtrSCEV);
addArrayAccess(Stmt, Inst, MemoryAccess::MUST_WRITE, DestPtrSCEV->getValue(),
IntegerType::getInt8Ty(DestPtrVal->getContext()),
LengthIsAffine, {DestAccFunc, LengthVal}, {nullptr},
Inst.getValueOperand());
auto *MemTrans = dyn_cast<MemTransferInst>(MemIntr);
if (!MemTrans)
return true;
auto *SrcPtrVal = MemTrans->getSource();
assert(SrcPtrVal);
auto *SrcAccFunc = SE.getSCEVAtScope(SrcPtrVal, L);
assert(SrcAccFunc);
if (SrcAccFunc->isZero())
return true;
auto *SrcPtrSCEV = dyn_cast<SCEVUnknown>(SE.getPointerBase(SrcAccFunc));
assert(SrcPtrSCEV);
SrcAccFunc = SE.getMinusSCEV(SrcAccFunc, SrcPtrSCEV);
addArrayAccess(Stmt, Inst, MemoryAccess::READ, SrcPtrSCEV->getValue(),
IntegerType::getInt8Ty(SrcPtrVal->getContext()),
LengthIsAffine, {SrcAccFunc, LengthVal}, {nullptr},
Inst.getValueOperand());
return true;
}
bool ScopBuilder::buildAccessCallInst(MemAccInst Inst, ScopStmt *Stmt) {
auto *CI = dyn_cast_or_null<CallInst>(Inst);
if (CI == nullptr)
return false;
if (CI->doesNotAccessMemory() || isIgnoredIntrinsic(CI) || isDebugCall(CI))
return true;
auto *AF = SE.getConstant(IntegerType::getInt64Ty(CI->getContext()), 0);
auto *CalledFunction = CI->getCalledFunction();
MemoryEffects ME = AA.getMemoryEffects(CalledFunction);
if (ME.doesNotAccessMemory())
return true;
if (ME.onlyAccessesArgPointees()) {
ModRefInfo ArgMR = ME.getModRef(IRMemLocation::ArgMem);
auto AccType =
!isModSet(ArgMR) ? MemoryAccess::READ : MemoryAccess::MAY_WRITE;
Loop *L = LI.getLoopFor(Inst->getParent());
for (const auto &Arg : CI->args()) {
if (!Arg->getType()->isPointerTy())
continue;
auto *ArgSCEV = SE.getSCEVAtScope(Arg, L);
if (ArgSCEV->isZero())
continue;
if (auto *U = dyn_cast<SCEVUnknown>(ArgSCEV)) {
if (isa<ConstantPointerNull>(U->getValue()))
return true;
}
auto *ArgBasePtr = cast<SCEVUnknown>(SE.getPointerBase(ArgSCEV));
addArrayAccess(Stmt, Inst, AccType, ArgBasePtr->getValue(),
ArgBasePtr->getType(), false, {AF}, {nullptr}, CI);
}
return true;
}
if (ME.onlyReadsMemory()) {
GlobalReads.emplace_back(Stmt, CI);
return true;
}
return false;
}
bool ScopBuilder::buildAccessSingleDim(MemAccInst Inst, ScopStmt *Stmt) {
if (!Inst.isLoad() && !Inst.isStore())
return false;
Value *Address = Inst.getPointerOperand();
Value *Val = Inst.getValueOperand();
Type *ElementType = Val->getType();
enum MemoryAccess::AccessType AccType =
isa<LoadInst>(Inst) ? MemoryAccess::READ : MemoryAccess::MUST_WRITE;
const SCEV *AccessFunction =
SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent()));
const SCEVUnknown *BasePointer =
dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));
assert(BasePointer && "Could not find base pointer");
AccessFunction = SE.getMinusSCEV(AccessFunction, BasePointer);
bool isVariantInNonAffineLoop = false;
SetVector<const Loop *> Loops;
findLoops(AccessFunction, Loops);
for (const Loop *L : Loops)
if (Stmt->contains(L)) {
isVariantInNonAffineLoop = true;
break;
}
InvariantLoadsSetTy AccessILS;
Loop *SurroundingLoop = Stmt->getSurroundingLoop();
bool IsAffine = !isVariantInNonAffineLoop &&
isAffineExpr(&scop->getRegion(), SurroundingLoop,
AccessFunction, SE, &AccessILS);
const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads();
for (LoadInst *LInst : AccessILS)
if (!ScopRIL.count(LInst))
IsAffine = false;
if (!IsAffine && AccType == MemoryAccess::MUST_WRITE)
AccType = MemoryAccess::MAY_WRITE;
addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType,
IsAffine, {AccessFunction}, {nullptr}, Val);
return true;
}
void ScopBuilder::buildMemoryAccess(MemAccInst Inst, ScopStmt *Stmt) {
if (buildAccessMemIntrinsic(Inst, Stmt))
return;
if (buildAccessCallInst(Inst, Stmt))
return;
if (buildAccessMultiDimFixed(Inst, Stmt))
return;
if (buildAccessMultiDimParam(Inst, Stmt))
return;
if (buildAccessSingleDim(Inst, Stmt))
return;
llvm_unreachable(
"At least one of the buildAccess functions must handled this access, or "
"ScopDetection should have rejected this SCoP");
}
void ScopBuilder::buildAccessFunctions() {
for (auto &Stmt : *scop) {
if (Stmt.isBlockStmt()) {
buildAccessFunctions(&Stmt, *Stmt.getBasicBlock());
continue;
}
Region *R = Stmt.getRegion();
for (BasicBlock *BB : R->blocks())
buildAccessFunctions(&Stmt, *BB, R);
}
for (BasicBlock *BB : scop->getRegion().blocks()) {
for (Instruction &Inst : *BB)
buildEscapingDependences(&Inst);
}
}
bool ScopBuilder::shouldModelInst(Instruction *Inst, Loop *L) {
return !Inst->isTerminator() && !isIgnoredIntrinsic(Inst) &&
!canSynthesize(Inst, *scop, &SE, L);
}
static std::string makeStmtName(BasicBlock *BB, long BBIdx, int Count,
bool IsMain, bool IsLast = false) {
std::string Suffix;
if (!IsMain) {
if (UseInstructionNames)
Suffix = '_';
if (IsLast)
Suffix += "last";
else if (Count < 26)
Suffix += 'a' + Count;
else
Suffix += std::to_string(Count);
}
return getIslCompatibleName("Stmt", BB, BBIdx, Suffix, UseInstructionNames);
}
static std::string makeStmtName(Region *R, long RIdx) {
return getIslCompatibleName("Stmt", R->getNameStr(), RIdx, "",
UseInstructionNames);
}
void ScopBuilder::buildSequentialBlockStmts(BasicBlock *BB, bool SplitOnStore) {
Loop *SurroundingLoop = LI.getLoopFor(BB);
int Count = 0;
long BBIdx = scop->getNextStmtIdx();
std::vector<Instruction *> Instructions;
for (Instruction &Inst : *BB) {
if (shouldModelInst(&Inst, SurroundingLoop))
Instructions.push_back(&Inst);
if (Inst.getMetadata("polly_split_after") ||
(SplitOnStore && isa<StoreInst>(Inst))) {
std::string Name = makeStmtName(BB, BBIdx, Count, Count == 0);
scop->addScopStmt(BB, Name, SurroundingLoop, Instructions);
Count++;
Instructions.clear();
}
}
std::string Name = makeStmtName(BB, BBIdx, Count, Count == 0);
scop->addScopStmt(BB, Name, SurroundingLoop, Instructions);
}
static bool isOrderedInstruction(Instruction *Inst) {
return Inst->mayHaveSideEffects() || Inst->mayReadOrWriteMemory();
}
static void joinOperandTree(EquivalenceClasses<Instruction *> &UnionFind,
ArrayRef<Instruction *> ModeledInsts) {
for (Instruction *Inst : ModeledInsts) {
if (isa<PHINode>(Inst))
continue;
for (Use &Op : Inst->operands()) {
Instruction *OpInst = dyn_cast<Instruction>(Op.get());
if (!OpInst)
continue;
auto OpVal = UnionFind.findValue(OpInst);
if (OpVal == UnionFind.end())
continue;
UnionFind.unionSets(Inst, OpInst);
}
}
}
static void
joinOrderedInstructions(EquivalenceClasses<Instruction *> &UnionFind,
ArrayRef<Instruction *> ModeledInsts) {
SetVector<Instruction *> SeenLeaders;
for (Instruction *Inst : ModeledInsts) {
if (!isOrderedInstruction(Inst))
continue;
Instruction *Leader = UnionFind.getLeaderValue(Inst);
bool Inserted = SeenLeaders.insert(Leader);
if (Inserted)
continue;
for (Instruction *Prev : reverse(SeenLeaders)) {
if (Prev == Leader)
break;
UnionFind.unionSets(Prev, Leader);
}
}
}
static void joinOrderedPHIs(EquivalenceClasses<Instruction *> &UnionFind,
ArrayRef<Instruction *> ModeledInsts) {
for (Instruction *Inst : ModeledInsts) {
PHINode *PHI = dyn_cast<PHINode>(Inst);
if (!PHI)
continue;
int Idx = PHI->getBasicBlockIndex(PHI->getParent());
if (Idx < 0)
continue;
Instruction *IncomingVal =
dyn_cast<Instruction>(PHI->getIncomingValue(Idx));
if (!IncomingVal)
continue;
UnionFind.unionSets(PHI, IncomingVal);
}
}
void ScopBuilder::buildEqivClassBlockStmts(BasicBlock *BB) {
Loop *L = LI.getLoopFor(BB);
SmallVector<Instruction *, 32> ModeledInsts;
EquivalenceClasses<Instruction *> UnionFind;
Instruction *MainInst = nullptr, *MainLeader = nullptr;
for (Instruction &Inst : *BB) {
if (!shouldModelInst(&Inst, L))
continue;
ModeledInsts.push_back(&Inst);
UnionFind.insert(&Inst);
if (!MainInst && (isa<StoreInst>(Inst) ||
(isa<CallInst>(Inst) && !isa<IntrinsicInst>(Inst))))
MainInst = &Inst;
}
joinOperandTree(UnionFind, ModeledInsts);
joinOrderedInstructions(UnionFind, ModeledInsts);
joinOrderedPHIs(UnionFind, ModeledInsts);
MapVector<Instruction *, std::vector<Instruction *>> LeaderToInstList;
for (Instruction *Inst : ModeledInsts) {
if (!isOrderedInstruction(Inst))
continue;
auto LeaderIt = UnionFind.findLeader(Inst);
if (LeaderIt == UnionFind.member_end())
continue;
(void)LeaderToInstList[*LeaderIt];
}
for (Instruction *Inst : ModeledInsts) {
auto LeaderIt = UnionFind.findLeader(Inst);
if (LeaderIt == UnionFind.member_end())
continue;
if (Inst == MainInst)
MainLeader = *LeaderIt;
std::vector<Instruction *> &InstList = LeaderToInstList[*LeaderIt];
InstList.push_back(Inst);
}
int Count = 0;
long BBIdx = scop->getNextStmtIdx();
for (auto &Instructions : LeaderToInstList) {
std::vector<Instruction *> &InstList = Instructions.second;
bool IsMain = (MainInst ? MainLeader == Instructions.first : Count == 0);
std::string Name = makeStmtName(BB, BBIdx, Count, IsMain);
scop->addScopStmt(BB, Name, L, std::move(InstList));
Count += 1;
}
std::string EpilogueName = makeStmtName(BB, BBIdx, Count, Count == 0, true);
scop->addScopStmt(BB, EpilogueName, L, {});
}
void ScopBuilder::buildStmts(Region &SR) {
if (scop->isNonAffineSubRegion(&SR)) {
std::vector<Instruction *> Instructions;
Loop *SurroundingLoop =
getFirstNonBoxedLoopFor(SR.getEntry(), LI, scop->getBoxedLoops());
for (Instruction &Inst : *SR.getEntry())
if (shouldModelInst(&Inst, SurroundingLoop))
Instructions.push_back(&Inst);
long RIdx = scop->getNextStmtIdx();
std::string Name = makeStmtName(&SR, RIdx);
scop->addScopStmt(&SR, Name, SurroundingLoop, Instructions);
return;
}
for (auto I = SR.element_begin(), E = SR.element_end(); I != E; ++I)
if (I->isSubRegion())
buildStmts(*I->getNodeAs<Region>());
else {
BasicBlock *BB = I->getNodeAs<BasicBlock>();
switch (StmtGranularity) {
case GranularityChoice::BasicBlocks:
buildSequentialBlockStmts(BB);
break;
case GranularityChoice::ScalarIndependence:
buildEqivClassBlockStmts(BB);
break;
case GranularityChoice::Stores:
buildSequentialBlockStmts(BB, true);
break;
}
}
}
void ScopBuilder::buildAccessFunctions(ScopStmt *Stmt, BasicBlock &BB,
Region *NonAffineSubRegion) {
assert(
Stmt &&
"The exit BB is the only one that cannot be represented by a statement");
assert(Stmt->represents(&BB));
if (SD.isErrorBlock(BB, scop->getRegion()))
return;
auto BuildAccessesForInst = [this, Stmt,
NonAffineSubRegion](Instruction *Inst) {
PHINode *PHI = dyn_cast<PHINode>(Inst);
if (PHI)
buildPHIAccesses(Stmt, PHI, NonAffineSubRegion, false);
if (auto MemInst = MemAccInst::dyn_cast(*Inst)) {
assert(Stmt && "Cannot build access function in non-existing statement");
buildMemoryAccess(MemInst, Stmt);
}
if (!PHI)
buildScalarDependences(Stmt, Inst);
};
const InvariantLoadsSetTy &RIL = scop->getRequiredInvariantLoads();
bool IsEntryBlock = (Stmt->getEntryBlock() == &BB);
if (IsEntryBlock) {
for (Instruction *Inst : Stmt->getInstructions())
BuildAccessesForInst(Inst);
if (Stmt->isRegionStmt())
BuildAccessesForInst(BB.getTerminator());
} else {
for (Instruction &Inst : BB) {
if (isIgnoredIntrinsic(&Inst))
continue;
if (isa<LoadInst>(Inst) && RIL.count(cast<LoadInst>(&Inst)))
continue;
BuildAccessesForInst(&Inst);
}
}
}
MemoryAccess *ScopBuilder::addMemoryAccess(
ScopStmt *Stmt, Instruction *Inst, MemoryAccess::AccessType AccType,
Value *BaseAddress, Type *ElementType, bool Affine, Value *AccessValue,
ArrayRef<const SCEV *> Subscripts, ArrayRef<const SCEV *> Sizes,
MemoryKind Kind) {
bool isKnownMustAccess = false;
if (Stmt->isBlockStmt())
isKnownMustAccess = true;
if (Stmt->isRegionStmt()) {
if (Inst && DT.dominates(Inst->getParent(), Stmt->getRegion()->getExit()))
isKnownMustAccess = true;
}
if (Kind == MemoryKind::PHI || Kind == MemoryKind::ExitPHI)
isKnownMustAccess = true;
if (!isKnownMustAccess && AccType == MemoryAccess::MUST_WRITE)
AccType = MemoryAccess::MAY_WRITE;
auto *Access = new MemoryAccess(Stmt, Inst, AccType, BaseAddress, ElementType,
Affine, Subscripts, Sizes, AccessValue, Kind);
scop->addAccessFunction(Access);
Stmt->addAccess(Access);
return Access;
}
void ScopBuilder::addArrayAccess(ScopStmt *Stmt, MemAccInst MemAccInst,
MemoryAccess::AccessType AccType,
Value *BaseAddress, Type *ElementType,
bool IsAffine,
ArrayRef<const SCEV *> Subscripts,
ArrayRef<const SCEV *> Sizes,
Value *AccessValue) {
ArrayBasePointers.insert(BaseAddress);
addMemoryAccess(Stmt, MemAccInst, AccType, BaseAddress, ElementType, IsAffine,
AccessValue, Subscripts, Sizes, MemoryKind::Array);
}
static bool isDivisible(const SCEV *Expr, unsigned Size, ScalarEvolution &SE) {
assert(Size != 0);
if (Size == 1)
return true;
if (auto *MulExpr = dyn_cast<SCEVMulExpr>(Expr)) {
for (auto *FactorExpr : MulExpr->operands())
if (isDivisible(FactorExpr, Size, SE))
return true;
return false;
}
if (auto *NAryExpr = dyn_cast<SCEVNAryExpr>(Expr)) {
for (auto *OpExpr : NAryExpr->operands())
if (!isDivisible(OpExpr, Size, SE))
return false;
return true;
}
auto *SizeSCEV = SE.getConstant(Expr->getType(), Size);
auto *UDivSCEV = SE.getUDivExpr(Expr, SizeSCEV);
auto *MulSCEV = SE.getMulExpr(UDivSCEV, SizeSCEV);
return MulSCEV == Expr;
}
void ScopBuilder::foldSizeConstantsToRight() {
isl::union_set Accessed = scop->getAccesses().range();
for (auto Array : scop->arrays()) {
if (Array->getNumberOfDimensions() <= 1)
continue;
isl::space Space = Array->getSpace();
Space = Space.align_params(Accessed.get_space());
if (!Accessed.contains(Space))
continue;
isl::set Elements = Accessed.extract_set(Space);
isl::map Transform = isl::map::universe(Array->getSpace().map_from_set());
std::vector<int> Int;
unsigned Dims = unsignedFromIslSize(Elements.tuple_dim());
for (unsigned i = 0; i < Dims; i++) {
isl::set DimOnly = isl::set(Elements).project_out(isl::dim::set, 0, i);
DimOnly = DimOnly.project_out(isl::dim::set, 1, Dims - i - 1);
DimOnly = DimOnly.lower_bound_si(isl::dim::set, 0, 0);
isl::basic_set DimHull = DimOnly.affine_hull();
if (i == Dims - 1) {
Int.push_back(1);
Transform = Transform.equate(isl::dim::in, i, isl::dim::out, i);
continue;
}
if (unsignedFromIslSize(DimHull.dim(isl::dim::div)) == 1) {
isl::aff Diff = DimHull.get_div(0);
isl::val Val = Diff.get_denominator_val();
int ValInt = 1;
if (Val.is_int()) {
auto ValAPInt = APIntFromVal(Val);
if (ValAPInt.isSignedIntN(32))
ValInt = ValAPInt.getSExtValue();
} else {
}
Int.push_back(ValInt);
isl::constraint C = isl::constraint::alloc_equality(
isl::local_space(Transform.get_space()));
C = C.set_coefficient_si(isl::dim::out, i, ValInt);
C = C.set_coefficient_si(isl::dim::in, i, -1);
Transform = Transform.add_constraint(C);
continue;
}
isl::basic_set ZeroSet = isl::basic_set(DimHull);
ZeroSet = ZeroSet.fix_si(isl::dim::set, 0, 0);
int ValInt = 1;
if (ZeroSet.is_equal(DimHull)) {
ValInt = 0;
}
Int.push_back(ValInt);
Transform = Transform.equate(isl::dim::in, i, isl::dim::out, i);
}
isl::set MappedElements = isl::map(Transform).domain();
if (!Elements.is_subset(MappedElements))
continue;
bool CanFold = true;
if (Int[0] <= 1)
CanFold = false;
unsigned NumDims = Array->getNumberOfDimensions();
for (unsigned i = 1; i < NumDims - 1; i++)
if (Int[0] != Int[i] && Int[i])
CanFold = false;
if (!CanFold)
continue;
for (auto &Access : scop->access_functions())
if (Access->getScopArrayInfo() == Array)
Access->setAccessRelation(
Access->getAccessRelation().apply_range(Transform));
std::vector<const SCEV *> Sizes;
for (unsigned i = 0; i < NumDims; i++) {
auto Size = Array->getDimensionSize(i);
if (i == NumDims - 1)
Size = SE.getMulExpr(Size, SE.getConstant(Size->getType(), Int[0]));
Sizes.push_back(Size);
}
Array->updateSizes(Sizes, false );
}
}
void ScopBuilder::finalizeAccesses() {
updateAccessDimensionality();
foldSizeConstantsToRight();
foldAccessRelations();
assumeNoOutOfBounds();
}
void ScopBuilder::updateAccessDimensionality() {
for (ScopStmt &Stmt : *scop)
for (MemoryAccess *Access : Stmt) {
if (!Access->isArrayKind())
continue;
ScopArrayInfo *Array =
const_cast<ScopArrayInfo *>(Access->getScopArrayInfo());
if (Array->getNumberOfDimensions() != 1)
continue;
unsigned DivisibleSize = Array->getElemSizeInBytes();
const SCEV *Subscript = Access->getSubscript(0);
while (!isDivisible(Subscript, DivisibleSize, SE))
DivisibleSize /= 2;
auto *Ty = IntegerType::get(SE.getContext(), DivisibleSize * 8);
Array->updateElementType(Ty);
}
for (auto &Stmt : *scop)
for (auto &Access : Stmt)
Access->updateDimensionality();
}
void ScopBuilder::foldAccessRelations() {
for (auto &Stmt : *scop)
for (auto &Access : Stmt)
Access->foldAccessRelation();
}
void ScopBuilder::assumeNoOutOfBounds() {
if (PollyIgnoreInbounds)
return;
for (auto &Stmt : *scop)
for (auto &Access : Stmt) {
isl::set Outside = Access->assumeNoOutOfBound();
const auto &Loc = Access->getAccessInstruction()
? Access->getAccessInstruction()->getDebugLoc()
: DebugLoc();
recordAssumption(&RecordedAssumptions, INBOUNDS, Outside, Loc,
AS_ASSUMPTION);
}
}
void ScopBuilder::ensureValueWrite(Instruction *Inst) {
ScopStmt *Stmt = scop->getStmtFor(Inst);
if (!Stmt)
Stmt = scop->getLastStmtFor(Inst->getParent());
if (!Stmt)
return;
if (Stmt->lookupValueWriteOf(Inst))
return;
addMemoryAccess(Stmt, Inst, MemoryAccess::MUST_WRITE, Inst, Inst->getType(),
true, Inst, ArrayRef<const SCEV *>(),
ArrayRef<const SCEV *>(), MemoryKind::Value);
}
void ScopBuilder::ensureValueRead(Value *V, ScopStmt *UserStmt) {
auto *Scope = UserStmt->getSurroundingLoop();
auto VUse = VirtualUse::create(scop.get(), UserStmt, Scope, V, false);
switch (VUse.getKind()) {
case VirtualUse::Constant:
case VirtualUse::Block:
case VirtualUse::Synthesizable:
case VirtualUse::Hoisted:
case VirtualUse::Intra:
break;
case VirtualUse::ReadOnly:
if (!ModelReadOnlyScalars)
break;
[[fallthrough]];
case VirtualUse::Inter:
if (UserStmt->lookupValueReadOf(V))
break;
addMemoryAccess(UserStmt, nullptr, MemoryAccess::READ, V, V->getType(),
true, V, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(),
MemoryKind::Value);
if (VUse.isInter())
ensureValueWrite(cast<Instruction>(V));
break;
}
}
void ScopBuilder::ensurePHIWrite(PHINode *PHI, ScopStmt *IncomingStmt,
BasicBlock *IncomingBlock,
Value *IncomingValue, bool IsExitBlock) {
if (IsExitBlock)
scop->getOrCreateScopArrayInfo(PHI, PHI->getType(), {},
MemoryKind::ExitPHI);
if (!IncomingStmt)
return;
ensureValueRead(IncomingValue, IncomingStmt);
if (MemoryAccess *Acc = IncomingStmt->lookupPHIWriteOf(PHI)) {
assert(Acc->getAccessInstruction() == PHI);
Acc->addIncoming(IncomingBlock, IncomingValue);
return;
}
MemoryAccess *Acc = addMemoryAccess(
IncomingStmt, PHI, MemoryAccess::MUST_WRITE, PHI, PHI->getType(), true,
PHI, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(),
IsExitBlock ? MemoryKind::ExitPHI : MemoryKind::PHI);
assert(Acc);
Acc->addIncoming(IncomingBlock, IncomingValue);
}
void ScopBuilder::addPHIReadAccess(ScopStmt *PHIStmt, PHINode *PHI) {
addMemoryAccess(PHIStmt, PHI, MemoryAccess::READ, PHI, PHI->getType(), true,
PHI, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(),
MemoryKind::PHI);
}
void ScopBuilder::buildDomain(ScopStmt &Stmt) {
isl::id Id = isl::id::alloc(scop->getIslCtx(), Stmt.getBaseName(), &Stmt);
Stmt.Domain = scop->getDomainConditions(&Stmt);
Stmt.Domain = Stmt.Domain.set_tuple_id(Id);
}
void ScopBuilder::collectSurroundingLoops(ScopStmt &Stmt) {
isl::set Domain = Stmt.getDomain();
BasicBlock *BB = Stmt.getEntryBlock();
Loop *L = LI.getLoopFor(BB);
while (L && Stmt.isRegionStmt() && Stmt.getRegion()->contains(L))
L = L->getParentLoop();
SmallVector<llvm::Loop *, 8> Loops;
while (L && Stmt.getParent()->getRegion().contains(L)) {
Loops.push_back(L);
L = L->getParentLoop();
}
Stmt.NestLoops.insert(Stmt.NestLoops.begin(), Loops.rbegin(), Loops.rend());
}
static MemoryAccess::ReductionType getReductionType(const BinaryOperator *BinOp,
const Instruction *Load) {
if (!BinOp)
return MemoryAccess::RT_NONE;
switch (BinOp->getOpcode()) {
case Instruction::FAdd:
if (!BinOp->isFast())
return MemoryAccess::RT_NONE;
[[fallthrough]];
case Instruction::Add:
return MemoryAccess::RT_ADD;
case Instruction::Or:
return MemoryAccess::RT_BOR;
case Instruction::Xor:
return MemoryAccess::RT_BXOR;
case Instruction::And:
return MemoryAccess::RT_BAND;
case Instruction::FMul:
if (!BinOp->isFast())
return MemoryAccess::RT_NONE;
[[fallthrough]];
case Instruction::Mul:
if (DisableMultiplicativeReductions)
return MemoryAccess::RT_NONE;
return MemoryAccess::RT_MUL;
default:
return MemoryAccess::RT_NONE;
}
}
bool hasIntersectingAccesses(isl::set AllAccs, MemoryAccess *LoadMA,
MemoryAccess *StoreMA, isl::set Domain,
SmallVector<MemoryAccess *, 8> &MemAccs) {
bool HasIntersectingAccs = false;
auto AllAccsNoParams = AllAccs.project_out_all_params();
for (MemoryAccess *MA : MemAccs) {
if (MA == LoadMA || MA == StoreMA)
continue;
auto AccRel = MA->getAccessRelation().intersect_domain(Domain);
auto Accs = AccRel.range();
auto AccsNoParams = Accs.project_out_all_params();
bool CompatibleSpace = AllAccsNoParams.has_equal_space(AccsNoParams);
if (CompatibleSpace) {
auto OverlapAccs = Accs.intersect(AllAccs);
bool DoesIntersect = !OverlapAccs.is_empty();
HasIntersectingAccs |= DoesIntersect;
}
}
return HasIntersectingAccs;
}
bool checkCandidatePairAccesses(MemoryAccess *LoadMA, MemoryAccess *StoreMA,
isl::set Domain,
SmallVector<MemoryAccess *, 8> &MemAccs) {
isl::map LoadAccs = LoadMA->getAccessRelation();
isl::map StoreAccs = StoreMA->getAccessRelation();
bool Valid = LoadAccs.has_equal_space(StoreAccs);
POLLY_DEBUG(dbgs() << " == The accessed space below is "
<< (Valid ? "" : "not ") << "equal!\n");
POLLY_DEBUG(LoadMA->dump(); StoreMA->dump());
if (Valid) {
isl::map R = isl::manage(LoadAccs.copy())
.intersect_domain(isl::manage(Domain.copy()));
isl::map W = isl::manage(StoreAccs.copy())
.intersect_domain(isl::manage(Domain.copy()));
isl::set RS = R.range();
isl::set WS = W.range();
isl::set InterAccs =
isl::manage(RS.copy()).intersect(isl::manage(WS.copy()));
Valid = !InterAccs.is_empty();
POLLY_DEBUG(dbgs() << " == The accessed memory is " << (Valid ? "" : "not ")
<< "overlapping!\n");
}
if (Valid) {
isl::map AllAccsRel = LoadAccs.unite(StoreAccs);
AllAccsRel = AllAccsRel.intersect_domain(Domain);
isl::set AllAccs = AllAccsRel.range();
Valid = !hasIntersectingAccesses(AllAccs, LoadMA, StoreMA, Domain, MemAccs);
POLLY_DEBUG(dbgs() << " == The accessed memory is " << (Valid ? "not " : "")
<< "accessed by other instructions!\n");
}
return Valid;
}
void ScopBuilder::checkForReductions(ScopStmt &Stmt) {
SmallVector<MemoryAccess *, 2> Loads;
SmallVector<std::pair<MemoryAccess *, MemoryAccess *>, 4> Candidates;
for (MemoryAccess *StoreMA : Stmt) {
if (StoreMA->isRead())
continue;
Loads.clear();
collectCandidateReductionLoads(StoreMA, Loads);
for (MemoryAccess *LoadMA : Loads)
Candidates.push_back(std::make_pair(LoadMA, StoreMA));
}
for (const auto &CandidatePair : Candidates) {
MemoryAccess *LoadMA = CandidatePair.first;
MemoryAccess *StoreMA = CandidatePair.second;
bool Valid = checkCandidatePairAccesses(LoadMA, StoreMA, Stmt.getDomain(),
Stmt.MemAccs);
if (!Valid)
continue;
const LoadInst *Load =
dyn_cast<const LoadInst>(CandidatePair.first->getAccessInstruction());
MemoryAccess::ReductionType RT =
getReductionType(dyn_cast<BinaryOperator>(Load->user_back()), Load);
LoadMA->markAsReductionLike(RT);
StoreMA->markAsReductionLike(RT);
}
}
void ScopBuilder::verifyInvariantLoads() {
auto &RIL = scop->getRequiredInvariantLoads();
for (LoadInst *LI : RIL) {
assert(LI && scop->contains(LI));
for (ScopStmt &Stmt : *scop)
if (Stmt.getArrayAccessOrNULLFor(LI)) {
scop->invalidate(INVARIANTLOAD, LI->getDebugLoc(), LI->getParent());
return;
}
}
}
void ScopBuilder::hoistInvariantLoads() {
if (!PollyInvariantLoadHoisting)
return;
isl::union_map Writes = scop->getWrites();
for (ScopStmt &Stmt : *scop) {
InvariantAccessesTy InvariantAccesses;
for (MemoryAccess *Access : Stmt) {
isl::set NHCtx = getNonHoistableCtx(Access, Writes);
if (!NHCtx.is_null())
InvariantAccesses.push_back({Access, NHCtx});
}
for (auto InvMA : InvariantAccesses)
Stmt.removeMemoryAccess(InvMA.MA);
addInvariantLoads(Stmt, InvariantAccesses);
}
}
static bool isAccessRangeTooComplex(isl::set AccessRange) {
unsigned NumTotalDims = 0;
for (isl::basic_set BSet : AccessRange.get_basic_set_list()) {
NumTotalDims += unsignedFromIslSize(BSet.dim(isl::dim::div));
NumTotalDims += unsignedFromIslSize(BSet.dim(isl::dim::set));
}
if (NumTotalDims > MaxDimensionsInAccessRange)
return true;
return false;
}
bool ScopBuilder::hasNonHoistableBasePtrInScop(MemoryAccess *MA,
isl::union_map Writes) {
if (auto *BasePtrMA = scop->lookupBasePtrAccess(MA)) {
return getNonHoistableCtx(BasePtrMA, Writes).is_null();
}
Value *BaseAddr = MA->getOriginalBaseAddr();
if (auto *BasePtrInst = dyn_cast<Instruction>(BaseAddr))
if (!isa<LoadInst>(BasePtrInst))
return scop->contains(BasePtrInst);
return false;
}
void ScopBuilder::addUserContext() {
if (UserContextStr.empty())
return;
isl::set UserContext = isl::set(scop->getIslCtx(), UserContextStr.c_str());
isl::space Space = scop->getParamSpace();
isl::size SpaceParams = Space.dim(isl::dim::param);
if (unsignedFromIslSize(SpaceParams) !=
unsignedFromIslSize(UserContext.dim(isl::dim::param))) {
std::string SpaceStr = stringFromIslObj(Space, "null");
errs() << "Error: the context provided in -polly-context has not the same "
<< "number of dimensions than the computed context. Due to this "
<< "mismatch, the -polly-context option is ignored. Please provide "
<< "the context in the parameter space: " << SpaceStr << ".\n";
return;
}
for (auto i : rangeIslSize(0, SpaceParams)) {
std::string NameContext =
scop->getContext().get_dim_name(isl::dim::param, i);
std::string NameUserContext = UserContext.get_dim_name(isl::dim::param, i);
if (NameContext != NameUserContext) {
std::string SpaceStr = stringFromIslObj(Space, "null");
errs() << "Error: the name of dimension " << i
<< " provided in -polly-context "
<< "is '" << NameUserContext << "', but the name in the computed "
<< "context is '" << NameContext
<< "'. Due to this name mismatch, "
<< "the -polly-context option is ignored. Please provide "
<< "the context in the parameter space: " << SpaceStr << ".\n";
return;
}
UserContext = UserContext.set_dim_id(isl::dim::param, i,
Space.get_dim_id(isl::dim::param, i));
}
isl::set newContext = scop->getContext().intersect(UserContext);
scop->setContext(newContext);
}
isl::set ScopBuilder::getNonHoistableCtx(MemoryAccess *Access,
isl::union_map Writes) {
auto &Stmt = *Access->getStatement();
BasicBlock *BB = Stmt.getEntryBlock();
if (Access->isScalarKind() || Access->isWrite() || !Access->isAffine() ||
Access->isMemoryIntrinsic())
return {};
auto *LI = cast<LoadInst>(Access->getAccessInstruction());
if (hasNonHoistableBasePtrInScop(Access, Writes))
return {};
isl::map AccessRelation = Access->getAccessRelation();
assert(!AccessRelation.is_empty());
if (AccessRelation.involves_dims(isl::dim::in, 0, Stmt.getNumIterators()))
return {};
AccessRelation = AccessRelation.intersect_domain(Stmt.getDomain());
isl::set SafeToLoad;
auto &DL = scop->getFunction().getDataLayout();
if (isSafeToLoadUnconditionally(LI->getPointerOperand(), LI->getType(),
LI->getAlign(), DL)) {
SafeToLoad = isl::set::universe(AccessRelation.get_space().range());
} else if (BB != LI->getParent()) {
return {};
} else {
SafeToLoad = AccessRelation.range();
}
if (isAccessRangeTooComplex(AccessRelation.range()))
return {};
isl::union_map Written = Writes.intersect_range(SafeToLoad);
isl::set WrittenCtx = Written.params();
bool IsWritten = !WrittenCtx.is_empty();
if (!IsWritten)
return WrittenCtx;
WrittenCtx = WrittenCtx.remove_divs();
bool TooComplex =
unsignedFromIslSize(WrittenCtx.n_basic_set()) >= MaxDisjunctsInDomain;
if (TooComplex || !isRequiredInvariantLoad(LI))
return {};
scop->addAssumption(INVARIANTLOAD, WrittenCtx, LI->getDebugLoc(),
AS_RESTRICTION, LI->getParent());
return WrittenCtx;
}
static bool isAParameter(llvm::Value *maybeParam, const Function &F) {
for (const llvm::Argument &Arg : F.args())
if (&Arg == maybeParam)
return true;
return false;
}
bool ScopBuilder::canAlwaysBeHoisted(MemoryAccess *MA,
bool StmtInvalidCtxIsEmpty,
bool MAInvalidCtxIsEmpty,
bool NonHoistableCtxIsEmpty) {
LoadInst *LInst = cast<LoadInst>(MA->getAccessInstruction());
const DataLayout &DL = LInst->getDataLayout();
if (PollyAllowDereferenceOfAllFunctionParams &&
isAParameter(LInst->getPointerOperand(), scop->getFunction()))
return true;
if (!isDereferenceableAndAlignedPointer(
LInst->getPointerOperand(), LInst->getType(), LInst->getAlign(), DL))
return false;
if (!NonHoistableCtxIsEmpty)
return false;
if (StmtInvalidCtxIsEmpty && MAInvalidCtxIsEmpty)
return true;
for (const SCEV *Subscript : MA->subscripts())
if (!isa<SCEVConstant>(Subscript))
return false;
return true;
}
void ScopBuilder::addInvariantLoads(ScopStmt &Stmt,
InvariantAccessesTy &InvMAs) {
if (InvMAs.empty())
return;
isl::set StmtInvalidCtx = Stmt.getInvalidContext();
bool StmtInvalidCtxIsEmpty = StmtInvalidCtx.is_empty();
isl::set DomainCtx = Stmt.getDomain().params();
DomainCtx = DomainCtx.subtract(StmtInvalidCtx);
if (unsignedFromIslSize(DomainCtx.n_basic_set()) >= MaxDisjunctsInDomain) {
auto *AccInst = InvMAs.front().MA->getAccessInstruction();
scop->invalidate(COMPLEXITY, AccInst->getDebugLoc(), AccInst->getParent());
return;
}
for (auto &InvMA : InvMAs) {
auto *MA = InvMA.MA;
Instruction *AccInst = MA->getAccessInstruction();
if (SE.isSCEVable(AccInst->getType())) {
SetVector<Value *> Values;
for (const SCEV *Parameter : scop->parameters()) {
Values.clear();
findValues(Parameter, SE, Values);
if (!Values.count(AccInst))
continue;
isl::id ParamId = scop->getIdForParam(Parameter);
if (!ParamId.is_null()) {
int Dim = DomainCtx.find_dim_by_id(isl::dim::param, ParamId);
if (Dim >= 0)
DomainCtx = DomainCtx.eliminate(isl::dim::param, Dim, 1);
}
}
}
}
for (auto &InvMA : InvMAs) {
auto *MA = InvMA.MA;
isl::set NHCtx = InvMA.NonHoistableCtx;
LoadInst *LInst = cast<LoadInst>(MA->getAccessInstruction());
Type *Ty = LInst->getType();
const SCEV *PointerSCEV = SE.getSCEV(LInst->getPointerOperand());
isl::set MAInvalidCtx = MA->getInvalidContext();
bool NonHoistableCtxIsEmpty = NHCtx.is_empty();
bool MAInvalidCtxIsEmpty = MAInvalidCtx.is_empty();
isl::set MACtx;
if (canAlwaysBeHoisted(MA, StmtInvalidCtxIsEmpty, MAInvalidCtxIsEmpty,
NonHoistableCtxIsEmpty)) {
MACtx = isl::set::universe(DomainCtx.get_space());
} else {
MACtx = DomainCtx;
MACtx = MACtx.subtract(MAInvalidCtx.unite(NHCtx));
MACtx = MACtx.gist_params(scop->getContext());
}
bool Consolidated = false;
for (auto &IAClass : scop->invariantEquivClasses()) {
if (PointerSCEV != IAClass.IdentifyingPointer || Ty != IAClass.AccessType)
continue;
auto &MAs = IAClass.InvariantAccesses;
if (!MAs.empty()) {
auto *LastMA = MAs.front();
isl::set AR = MA->getAccessRelation().range();
isl::set LastAR = LastMA->getAccessRelation().range();
bool SameAR = AR.is_equal(LastAR);
if (!SameAR)
continue;
}
MAs.push_front(MA);
Consolidated = true;
isl::set IAClassDomainCtx = IAClass.ExecutionContext;
if (!IAClassDomainCtx.is_null())
IAClassDomainCtx = IAClassDomainCtx.unite(MACtx).coalesce();
else
IAClassDomainCtx = MACtx;
IAClass.ExecutionContext = IAClassDomainCtx;
break;
}
if (Consolidated)
continue;
MACtx = MACtx.coalesce();
scop->addInvariantEquivClass(
InvariantEquivClassTy{PointerSCEV, MemoryAccessList{MA}, MACtx, Ty});
}
}
void ScopBuilder::collectCandidateReductionLoads(
MemoryAccess *StoreMA, SmallVectorImpl<MemoryAccess *> &Loads) {
ScopStmt *Stmt = StoreMA->getStatement();
auto *Store = dyn_cast<StoreInst>(StoreMA->getAccessInstruction());
if (!Store)
return;
auto *BinOp = dyn_cast<BinaryOperator>(Store->getValueOperand());
if (!BinOp)
return;
if (BinOp->getNumUses() != 1)
return;
if (!BinOp->isCommutative() || !BinOp->isAssociative())
return;
if (BinOp->getParent() != Store->getParent())
return;
if (DisableMultiplicativeReductions &&
(BinOp->getOpcode() == Instruction::Mul ||
BinOp->getOpcode() == Instruction::FMul))
return;
auto *PossibleLoad0 = dyn_cast<LoadInst>(BinOp->getOperand(0));
auto *PossibleLoad1 = dyn_cast<LoadInst>(BinOp->getOperand(1));
if (!PossibleLoad0 && !PossibleLoad1)
return;
if (PossibleLoad0 && PossibleLoad0->getNumUses() == 1)
if (PossibleLoad0->getParent() == Store->getParent())
Loads.push_back(&Stmt->getArrayAccessFor(PossibleLoad0));
if (PossibleLoad1 && PossibleLoad1->getNumUses() == 1)
if (PossibleLoad1->getParent() == Store->getParent())
Loads.push_back(&Stmt->getArrayAccessFor(PossibleLoad1));
}
static const ScopArrayInfo *findCanonicalArray(Scop &S,
MemoryAccessList &Accesses) {
for (MemoryAccess *Access : Accesses) {
const ScopArrayInfo *CanonicalArray = S.getScopArrayInfoOrNull(
Access->getAccessInstruction(), MemoryKind::Array);
if (CanonicalArray)
return CanonicalArray;
}
return nullptr;
}
static bool isUsedForIndirectHoistedLoad(Scop &S, const ScopArrayInfo *Array) {
for (InvariantEquivClassTy &EqClass2 : S.getInvariantAccesses())
for (MemoryAccess *Access2 : EqClass2.InvariantAccesses)
if (Access2->getScopArrayInfo() == Array)
return true;
return false;
}
static void replaceBasePtrArrays(Scop &S, const ScopArrayInfo *Old,
const ScopArrayInfo *New) {
for (ScopStmt &Stmt : S)
for (MemoryAccess *Access : Stmt) {
if (Access->getLatestScopArrayInfo() != Old)
continue;
isl::id Id = New->getBasePtrId();
isl::map Map = Access->getAccessRelation();
Map = Map.set_tuple_id(isl::dim::out, Id);
Access->setAccessRelation(Map);
}
}
void ScopBuilder::canonicalizeDynamicBasePtrs() {
for (InvariantEquivClassTy &EqClass : scop->InvariantEquivClasses) {
MemoryAccessList &BasePtrAccesses = EqClass.InvariantAccesses;
const ScopArrayInfo *CanonicalBasePtrSAI =
findCanonicalArray(*scop, BasePtrAccesses);
if (!CanonicalBasePtrSAI)
continue;
for (MemoryAccess *BasePtrAccess : BasePtrAccesses) {
const ScopArrayInfo *BasePtrSAI = scop->getScopArrayInfoOrNull(
BasePtrAccess->getAccessInstruction(), MemoryKind::Array);
if (!BasePtrSAI || BasePtrSAI == CanonicalBasePtrSAI ||
!BasePtrSAI->isCompatibleWith(CanonicalBasePtrSAI))
continue;
if (isUsedForIndirectHoistedLoad(*scop, BasePtrSAI))
continue;
replaceBasePtrArrays(*scop, BasePtrSAI, CanonicalBasePtrSAI);
}
}
}
void ScopBuilder::buildAccessRelations(ScopStmt &Stmt) {
for (MemoryAccess *Access : Stmt.MemAccs) {
Type *ElementType = Access->getElementType();
MemoryKind Ty;
if (Access->isPHIKind())
Ty = MemoryKind::PHI;
else if (Access->isExitPHIKind())
Ty = MemoryKind::ExitPHI;
else if (Access->isValueKind())
Ty = MemoryKind::Value;
else
Ty = MemoryKind::Array;
for (const SCEV *Size : Access->Sizes) {
if (!Size)
continue;
scop->getPwAff(Size, nullptr, false, &RecordedAssumptions);
}
auto *SAI = scop->getOrCreateScopArrayInfo(Access->getOriginalBaseAddr(),
ElementType, Access->Sizes, Ty);
for (const SCEV *Subscript : Access->subscripts()) {
if (!Access->isAffine() || !Subscript)
continue;
scop->getPwAff(Subscript, Stmt.getEntryBlock(), false,
&RecordedAssumptions);
}
Access->buildAccessRelation(SAI);
scop->addAccessData(Access);
}
}
static bool buildMinMaxAccess(isl::set Set,
Scop::MinMaxVectorTy &MinMaxAccesses, Scop &S) {
isl::pw_multi_aff MinPMA, MaxPMA;
isl::pw_aff LastDimAff;
isl::aff OneAff;
unsigned Pos;
Set = Set.remove_divs();
polly::simplify(Set);
if (unsignedFromIslSize(Set.n_basic_set()) > RunTimeChecksMaxAccessDisjuncts)
Set = Set.simple_hull();
if (isl_set_n_param(Set.get()) >
static_cast<isl_size>(RunTimeChecksMaxParameters)) {
unsigned InvolvedParams = 0;
for (unsigned u = 0, e = isl_set_n_param(Set.get()); u < e; u++)
if (Set.involves_dims(isl::dim::param, u, 1))
InvolvedParams++;
if (InvolvedParams > RunTimeChecksMaxParameters)
return false;
}
MinPMA = Set.lexmin_pw_multi_aff();
MaxPMA = Set.lexmax_pw_multi_aff();
MinPMA = MinPMA.coalesce();
MaxPMA = MaxPMA.coalesce();
if (MaxPMA.is_null())
return false;
unsigned MaxOutputSize = unsignedFromIslSize(MaxPMA.dim(isl::dim::out));
assert(MaxOutputSize >= 1 && "Assumed at least one output dimension");
Pos = MaxOutputSize - 1;
LastDimAff = MaxPMA.at(Pos);
OneAff = isl::aff(isl::local_space(LastDimAff.get_domain_space()));
OneAff = OneAff.add_constant_si(1);
LastDimAff = LastDimAff.add(OneAff);
MaxPMA = MaxPMA.set_pw_aff(Pos, LastDimAff);
if (MinPMA.is_null() || MaxPMA.is_null())
return false;
MinMaxAccesses.push_back(std::make_pair(MinPMA, MaxPMA));
return true;
}
bool ScopBuilder::calculateMinMaxAccess(AliasGroupTy AliasGroup,
Scop::MinMaxVectorTy &MinMaxAccesses) {
MinMaxAccesses.reserve(AliasGroup.size());
isl::union_set Domains = scop->getDomains();
isl::union_map Accesses = isl::union_map::empty(scop->getIslCtx());
for (MemoryAccess *MA : AliasGroup)
Accesses = Accesses.unite(MA->getAccessRelation());
Accesses = Accesses.intersect_domain(Domains);
isl::union_set Locations = Accesses.range();
bool LimitReached = false;
for (isl::set Set : Locations.get_set_list()) {
LimitReached |= !buildMinMaxAccess(Set, MinMaxAccesses, *scop);
if (LimitReached)
break;
}
return !LimitReached;
}
static isl::set getAccessDomain(MemoryAccess *MA) {
isl::set Domain = MA->getStatement()->getDomain();
Domain = Domain.project_out(isl::dim::set, 0,
unsignedFromIslSize(Domain.tuple_dim()));
return Domain.reset_tuple_id();
}
bool ScopBuilder::buildAliasChecks() {
if (!PollyUseRuntimeAliasChecks)
return true;
if (buildAliasGroups()) {
if (scop->getAliasGroups().size())
Scop::incrementNumberOfAliasingAssumptions(1);
return true;
}
scop->invalidate(ALIASING, DebugLoc());
POLLY_DEBUG(dbgs() << "\n\nNOTE: Run time checks for " << scop->getNameStr()
<< " could not be created. This SCoP has been dismissed.");
return false;
}
std::tuple<ScopBuilder::AliasGroupVectorTy, DenseSet<const ScopArrayInfo *>>
ScopBuilder::buildAliasGroupsForAccesses() {
BatchAAResults BAA(AA);
AliasSetTracker AST(BAA);
DenseMap<Value *, MemoryAccess *> PtrToAcc;
DenseSet<const ScopArrayInfo *> HasWriteAccess;
for (ScopStmt &Stmt : *scop) {
isl::set StmtDomain = Stmt.getDomain();
bool StmtDomainEmpty = StmtDomain.is_empty();
if (StmtDomainEmpty)
continue;
for (MemoryAccess *MA : Stmt) {
if (MA->isScalarKind())
continue;
if (!MA->isRead())
HasWriteAccess.insert(MA->getScopArrayInfo());
MemAccInst Acc(MA->getAccessInstruction());
if (MA->isRead() && isa<MemTransferInst>(Acc))
PtrToAcc[cast<MemTransferInst>(Acc)->getRawSource()] = MA;
else
PtrToAcc[Acc.getPointerOperand()] = MA;
AST.add(Acc);
}
}
AliasGroupVectorTy AliasGroups;
for (AliasSet &AS : AST) {
if (AS.isMustAlias() || AS.isForwardingAliasSet())
continue;
AliasGroupTy AG;
for (const Value *Ptr : AS.getPointers())
AG.push_back(PtrToAcc[const_cast<Value *>(Ptr)]);
if (AG.size() < 2)
continue;
AliasGroups.push_back(std::move(AG));
}
return std::make_tuple(AliasGroups, HasWriteAccess);
}
bool ScopBuilder::buildAliasGroups() {
AliasGroupVectorTy AliasGroups;
DenseSet<const ScopArrayInfo *> HasWriteAccess;
std::tie(AliasGroups, HasWriteAccess) = buildAliasGroupsForAccesses();
splitAliasGroupsByDomain(AliasGroups);
for (AliasGroupTy &AG : AliasGroups) {
if (!scop->hasFeasibleRuntimeContext())
return false;
{
IslMaxOperationsGuard MaxOpGuard(scop->getIslCtx().get(), OptComputeOut);
bool Valid = buildAliasGroup(AG, HasWriteAccess);
if (!Valid)
return false;
}
if (isl_ctx_last_error(scop->getIslCtx().get()) == isl_error_quota) {
scop->invalidate(COMPLEXITY, DebugLoc());
return false;
}
}
return true;
}
bool ScopBuilder::buildAliasGroup(
AliasGroupTy &AliasGroup, DenseSet<const ScopArrayInfo *> HasWriteAccess) {
AliasGroupTy ReadOnlyAccesses;
AliasGroupTy ReadWriteAccesses;
SmallPtrSet<const ScopArrayInfo *, 4> ReadWriteArrays;
SmallPtrSet<const ScopArrayInfo *, 4> ReadOnlyArrays;
if (AliasGroup.size() < 2)
return true;
for (MemoryAccess *Access : AliasGroup) {
ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "PossibleAlias",
Access->getAccessInstruction())
<< "Possibly aliasing pointer, use restrict keyword.");
const ScopArrayInfo *Array = Access->getScopArrayInfo();
if (HasWriteAccess.count(Array)) {
ReadWriteArrays.insert(Array);
ReadWriteAccesses.push_back(Access);
} else {
ReadOnlyArrays.insert(Array);
ReadOnlyAccesses.push_back(Access);
}
}
if (ReadOnlyAccesses.empty() && ReadWriteArrays.size() <= 1)
return true;
if (ReadWriteArrays.empty())
return true;
for (MemoryAccess *MA : AliasGroup) {
if (!MA->isAffine()) {
scop->invalidate(ALIASING, MA->getAccessInstruction()->getDebugLoc(),
MA->getAccessInstruction()->getParent());
return false;
}
}
for (MemoryAccess *MA : AliasGroup) {
if (MemoryAccess *BasePtrMA = scop->lookupBasePtrAccess(MA))
scop->addRequiredInvariantLoad(
cast<LoadInst>(BasePtrMA->getAccessInstruction()));
}
Scop::MinMaxVectorTy MinMaxAccessesReadWrite;
Scop::MinMaxVectorTy MinMaxAccessesReadOnly;
bool Valid;
Valid = calculateMinMaxAccess(ReadWriteAccesses, MinMaxAccessesReadWrite);
if (!Valid)
return false;
if (MinMaxAccessesReadWrite.size() + ReadOnlyArrays.size() >
RunTimeChecksMaxArraysPerGroup)
return false;
Valid = calculateMinMaxAccess(ReadOnlyAccesses, MinMaxAccessesReadOnly);
scop->addAliasGroup(MinMaxAccessesReadWrite, MinMaxAccessesReadOnly);
if (!Valid)
return false;
return true;
}
void ScopBuilder::splitAliasGroupsByDomain(AliasGroupVectorTy &AliasGroups) {
for (unsigned u = 0; u < AliasGroups.size(); u++) {
AliasGroupTy NewAG;
AliasGroupTy &AG = AliasGroups[u];
AliasGroupTy::iterator AGI = AG.begin();
isl::set AGDomain = getAccessDomain(*AGI);
while (AGI != AG.end()) {
MemoryAccess *MA = *AGI;
isl::set MADomain = getAccessDomain(MA);
if (AGDomain.is_disjoint(MADomain)) {
NewAG.push_back(MA);
AGI = AG.erase(AGI);
} else {
AGDomain = AGDomain.unite(MADomain);
AGI++;
}
}
if (NewAG.size() > 1)
AliasGroups.push_back(std::move(NewAG));
}
}
#ifndef NDEBUG
static void verifyUse(Scop *S, Use &Op, LoopInfo &LI) {
auto PhysUse = VirtualUse::create(S, Op, &LI, false);
auto VirtUse = VirtualUse::create(S, Op, &LI, true);
assert(PhysUse.getKind() == VirtUse.getKind());
}
static void verifyUses(Scop *S, LoopInfo &LI, DominatorTree &DT) {
for (auto *BB : S->getRegion().blocks()) {
for (auto &Inst : *BB) {
auto *Stmt = S->getStmtFor(&Inst);
if (!Stmt)
continue;
if (isIgnoredIntrinsic(&Inst))
continue;
if (Inst.isTerminator() && Stmt->isBlockStmt())
continue;
for (auto &Op : Inst.operands())
verifyUse(S, Op, LI);
if (isa<StoreInst>(Inst))
continue;
auto VirtDef =
VirtualUse::create(S, Stmt, Stmt->getSurroundingLoop(), &Inst, true);
assert(VirtDef.getKind() == VirtualUse::Synthesizable ||
VirtDef.getKind() == VirtualUse::Intra ||
VirtDef.getKind() == VirtualUse::Hoisted);
}
}
if (S->hasSingleExitEdge())
return;
if (!S->getRegion().isTopLevelRegion()) {
for (auto &Inst : *S->getRegion().getExit()) {
if (!isa<PHINode>(Inst))
break;
for (auto &Op : Inst.operands())
verifyUse(S, Op, LI);
}
}
}
#endif
void ScopBuilder::buildScop(Region &R, AssumptionCache &AC) {
scop.reset(new Scop(R, SE, LI, DT, *SD.getDetectionContext(&R), ORE,
SD.getNextID()));
buildStmts(R);
const InvariantLoadsSetTy &RIL = scop->getRequiredInvariantLoads();
for (BasicBlock *BB : scop->getRegion().blocks()) {
if (SD.isErrorBlock(*BB, scop->getRegion()))
continue;
for (Instruction &Inst : *BB) {
LoadInst *Load = dyn_cast<LoadInst>(&Inst);
if (!Load)
continue;
if (!RIL.count(Load))
continue;
ArrayRef<ScopStmt *> List = scop->getStmtListFor(BB);
assert(!List.empty());
ScopStmt *RILStmt = List.front();
buildMemoryAccess(Load, RILStmt);
}
}
buildAccessFunctions();
if (!R.isTopLevelRegion() && !scop->hasSingleExitEdge()) {
for (Instruction &Inst : *R.getExit()) {
PHINode *PHI = dyn_cast<PHINode>(&Inst);
if (!PHI)
break;
buildPHIAccesses(nullptr, PHI, nullptr, true);
}
}
auto *AF = SE.getConstant(IntegerType::getInt64Ty(SE.getContext()), 0);
for (auto GlobalReadPair : GlobalReads) {
ScopStmt *GlobalReadStmt = GlobalReadPair.first;
Instruction *GlobalRead = GlobalReadPair.second;
for (auto *BP : ArrayBasePointers)
addArrayAccess(GlobalReadStmt, MemAccInst(GlobalRead), MemoryAccess::READ,
BP, BP->getType(), false, {AF}, {nullptr}, GlobalRead);
}
buildInvariantEquivalenceClasses();
DenseMap<BasicBlock *, isl::set> InvalidDomainMap;
if (!buildDomains(&R, InvalidDomainMap)) {
POLLY_DEBUG(
dbgs() << "Bailing-out because buildDomains encountered problems\n");
return;
}
addUserAssumptions(AC, InvalidDomainMap);
for (ScopStmt &Stmt : scop->Stmts)
if (Stmt.isBlockStmt())
Stmt.setInvalidDomain(InvalidDomainMap[Stmt.getEntryBlock()]);
else
Stmt.setInvalidDomain(InvalidDomainMap[getRegionNodeBasicBlock(
Stmt.getRegion()->getNode())]);
scop->removeStmtNotInDomainMap();
scop->simplifySCoP(false);
if (scop->isEmpty()) {
POLLY_DEBUG(dbgs() << "Bailing-out because SCoP is empty\n");
return;
}
for (ScopStmt &Stmt : *scop) {
collectSurroundingLoops(Stmt);
buildDomain(Stmt);
buildAccessRelations(Stmt);
if (DetectReductions)
checkForReductions(Stmt);
}
if (!scop->hasFeasibleRuntimeContext()) {
POLLY_DEBUG(
dbgs() << "Bailing-out because of unfeasible context (early)\n");
return;
}
if (!scop->isProfitable(UnprofitableScalarAccs)) {
scop->invalidate(PROFITABLE, DebugLoc());
POLLY_DEBUG(
dbgs() << "Bailing-out because SCoP is not considered profitable\n");
return;
}
buildSchedule();
finalizeAccesses();
scop->realignParams();
addUserContext();
addRecordedAssumptions();
scop->simplifyContexts();
if (!buildAliasChecks()) {
POLLY_DEBUG(dbgs() << "Bailing-out because could not build alias checks\n");
return;
}
hoistInvariantLoads();
canonicalizeDynamicBasePtrs();
verifyInvariantLoads();
scop->simplifySCoP(true);
if (!scop->hasFeasibleRuntimeContext()) {
POLLY_DEBUG(dbgs() << "Bailing-out because of unfeasible context (late)\n");
return;
}
#ifndef NDEBUG
verifyUses(scop.get(), LI, DT);
#endif
}
ScopBuilder::ScopBuilder(Region *R, AssumptionCache &AC, AAResults &AA,
const DataLayout &DL, DominatorTree &DT, LoopInfo &LI,
ScopDetection &SD, ScalarEvolution &SE,
OptimizationRemarkEmitter &ORE)
: AA(AA), DL(DL), DT(DT), LI(LI), SD(SD), SE(SE), ORE(ORE) {
DebugLoc Beg, End;
auto P = getBBPairForRegion(R);
getDebugLocations(P, Beg, End);
std::string Msg = "SCoP begins here.";
ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEntry", Beg, P.first)
<< Msg);
buildScop(*R, AC);
POLLY_DEBUG(dbgs() << *scop);
if (!scop->hasFeasibleRuntimeContext()) {
InfeasibleScops++;
Msg = "SCoP ends here but was dismissed.";
POLLY_DEBUG(dbgs() << "SCoP detected but dismissed\n");
RecordedAssumptions.clear();
scop.reset();
} else {
Msg = "SCoP ends here.";
++ScopFound;
if (scop->getMaxLoopDepth() > 0)
++RichScopFound;
}
if (R->isTopLevelRegion())
ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEnd", End, P.first)
<< Msg);
else
ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEnd", End, P.second)
<< Msg);
}