#include "mlir/Dialect/Affine/Analysis/AffineAnalysis.h"
#include "mlir/Analysis/Presburger/IntegerRelation.h"
#include "mlir/Analysis/Presburger/PresburgerSpace.h"
#include "mlir/Analysis/SliceAnalysis.h"
#include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h"
#include "mlir/Dialect/Affine/Analysis/Utils.h"
#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/Affine/IR/AffineValueMap.h"
#include "mlir/IR/AffineExprVisitor.h"
#include "mlir/IR/BuiltinOps.h"
#include "mlir/IR/IntegerSet.h"
#include "mlir/Interfaces/SideEffectInterfaces.h"
#include "mlir/Interfaces/ViewLikeInterface.h"
#include "llvm/ADT/TypeSwitch.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include <optional>
#define DEBUG_TYPE "affine-analysis"
using namespace mlir;
using namespace affine;
using namespace presburger;
static Value getSupportedReduction(AffineForOp forOp, unsigned pos,
arith::AtomicRMWKind &kind) {
SmallVector<Operation *> combinerOps;
Value reducedVal =
matchReduction(forOp.getRegionIterArgs(), pos, combinerOps);
if (!reducedVal)
return nullptr;
if (combinerOps.size() > 1)
return nullptr;
Operation *combinerOp = combinerOps.back();
std::optional<arith::AtomicRMWKind> maybeKind =
TypeSwitch<Operation *, std::optional<arith::AtomicRMWKind>>(combinerOp)
.Case([](arith::AddFOp) { return arith::AtomicRMWKind::addf; })
.Case([](arith::MulFOp) { return arith::AtomicRMWKind::mulf; })
.Case([](arith::AddIOp) { return arith::AtomicRMWKind::addi; })
.Case([](arith::AndIOp) { return arith::AtomicRMWKind::andi; })
.Case([](arith::OrIOp) { return arith::AtomicRMWKind::ori; })
.Case([](arith::MulIOp) { return arith::AtomicRMWKind::muli; })
.Case(
[](arith::MinimumFOp) { return arith::AtomicRMWKind::minimumf; })
.Case(
[](arith::MaximumFOp) { return arith::AtomicRMWKind::maximumf; })
.Case([](arith::MinSIOp) { return arith::AtomicRMWKind::mins; })
.Case([](arith::MaxSIOp) { return arith::AtomicRMWKind::maxs; })
.Case([](arith::MinUIOp) { return arith::AtomicRMWKind::minu; })
.Case([](arith::MaxUIOp) { return arith::AtomicRMWKind::maxu; })
.Default([](Operation *) -> std::optional<arith::AtomicRMWKind> {
return std::nullopt;
});
if (!maybeKind)
return nullptr;
kind = *maybeKind;
return reducedVal;
}
void mlir::affine::getSupportedReductions(
AffineForOp forOp, SmallVectorImpl<LoopReduction> &supportedReductions) {
unsigned numIterArgs = forOp.getNumIterOperands();
if (numIterArgs == 0)
return;
supportedReductions.reserve(numIterArgs);
for (unsigned i = 0; i < numIterArgs; ++i) {
arith::AtomicRMWKind kind;
if (Value value = getSupportedReduction(forOp, i, kind))
supportedReductions.emplace_back(LoopReduction{kind, i, value});
}
}
bool mlir::affine::isLoopParallel(
AffineForOp forOp, SmallVectorImpl<LoopReduction> *parallelReductions) {
unsigned numIterArgs = forOp.getNumIterOperands();
if (numIterArgs > 0 && !parallelReductions)
return false;
if (parallelReductions) {
getSupportedReductions(forOp, *parallelReductions);
if (parallelReductions->size() != numIterArgs)
return false;
}
return isLoopMemoryParallel(forOp);
}
static bool isLocallyDefined(Value v, Operation *enclosingOp) {
Operation *defOp = v.getDefiningOp();
if (!defOp)
return false;
if (hasSingleEffect<MemoryEffects::Allocate>(defOp, v) &&
enclosingOp->isProperAncestor(defOp))
return true;
auto viewOp = dyn_cast<ViewLikeOpInterface>(defOp);
return viewOp && isLocallyDefined(viewOp.getViewSource(), enclosingOp);
}
bool mlir::affine::isLoopMemoryParallel(AffineForOp forOp) {
if (llvm::any_of(forOp.getResultTypes(), llvm::IsaPred<BaseMemRefType>))
return false;
SmallVector<Operation *, 8> loadAndStoreOps;
auto walkResult = forOp.walk([&](Operation *op) -> WalkResult {
if (auto readOp = dyn_cast<AffineReadOpInterface>(op)) {
if (!isLocallyDefined(readOp.getMemRef(), forOp))
loadAndStoreOps.push_back(op);
} else if (auto writeOp = dyn_cast<AffineWriteOpInterface>(op)) {
if (!isLocallyDefined(writeOp.getMemRef(), forOp))
loadAndStoreOps.push_back(op);
} else if (!isa<AffineForOp, AffineYieldOp, AffineIfOp>(op) &&
!hasSingleEffect<MemoryEffects::Allocate>(op) &&
!isMemoryEffectFree(op)) {
return WalkResult::interrupt();
}
return WalkResult::advance();
});
if (walkResult.wasInterrupted())
return false;
unsigned depth = getNestingDepth(forOp) + 1;
for (auto *srcOp : loadAndStoreOps) {
MemRefAccess srcAccess(srcOp);
for (auto *dstOp : loadAndStoreOps) {
MemRefAccess dstAccess(dstOp);
DependenceResult result =
checkMemrefAccessDependence(srcAccess, dstAccess, depth);
if (result.value != DependenceResult::NoDependence)
return false;
}
}
return true;
}
void mlir::affine::getReachableAffineApplyOps(
ArrayRef<Value> operands, SmallVectorImpl<Operation *> &affineApplyOps) {
struct State {
Value value;
unsigned operandIndex;
};
SmallVector<State, 4> worklist;
for (auto operand : operands) {
worklist.push_back({operand, 0});
}
while (!worklist.empty()) {
State &state = worklist.back();
auto *opInst = state.value.getDefiningOp();
if (!isa_and_nonnull<AffineApplyOp>(opInst)) {
worklist.pop_back();
continue;
}
if (state.operandIndex == 0) {
affineApplyOps.push_back(opInst);
}
if (state.operandIndex < opInst->getNumOperands()) {
auto nextOperand = opInst->getOperand(state.operandIndex);
++state.operandIndex;
worklist.push_back({nextOperand, 0});
} else {
worklist.pop_back();
}
}
}
LogicalResult mlir::affine::getIndexSet(MutableArrayRef<Operation *> ops,
FlatAffineValueConstraints *domain) {
SmallVector<Value, 4> indices;
SmallVector<Operation *, 8> loopOps;
size_t numDims = 0;
for (Operation *op : ops) {
if (!isa<AffineForOp, AffineIfOp, AffineParallelOp>(op)) {
LLVM_DEBUG(llvm::dbgs() << "getIndexSet only handles affine.for/if/"
"parallel ops");
return failure();
}
if (AffineForOp forOp = dyn_cast<AffineForOp>(op)) {
loopOps.push_back(forOp);
numDims += 1;
} else if (AffineParallelOp parallelOp = dyn_cast<AffineParallelOp>(op)) {
loopOps.push_back(parallelOp);
numDims += parallelOp.getNumDims();
}
}
extractInductionVars(loopOps, indices);
*domain = FlatAffineValueConstraints(numDims, 0,
0, indices);
for (Operation *op : ops) {
if (AffineForOp forOp = dyn_cast<AffineForOp>(op)) {
if (failed(domain->addAffineForOpDomain(forOp)))
return failure();
} else if (auto ifOp = dyn_cast<AffineIfOp>(op)) {
domain->addAffineIfOpDomain(ifOp);
} else if (auto parallelOp = dyn_cast<AffineParallelOp>(op))
if (failed(domain->addAffineParallelOpDomain(parallelOp)))
return failure();
}
return success();
}
static LogicalResult getOpIndexSet(Operation *op,
FlatAffineValueConstraints *indexSet) {
SmallVector<Operation *, 4> ops;
getEnclosingAffineOps(*op, &ops);
return getIndexSet(ops, indexSet);
}
static unsigned
getNumCommonLoops(const FlatAffineValueConstraints &srcDomain,
const FlatAffineValueConstraints &dstDomain,
SmallVectorImpl<AffineForOp> *commonLoops = nullptr) {
unsigned minNumLoops =
std::min(srcDomain.getNumDimVars(), dstDomain.getNumDimVars());
unsigned numCommonLoops = 0;
for (unsigned i = 0; i < minNumLoops; ++i) {
if ((!isAffineForInductionVar(srcDomain.getValue(i)) &&
!isAffineParallelInductionVar(srcDomain.getValue(i))) ||
(!isAffineForInductionVar(dstDomain.getValue(i)) &&
!isAffineParallelInductionVar(dstDomain.getValue(i))) ||
srcDomain.getValue(i) != dstDomain.getValue(i))
break;
if (commonLoops != nullptr)
commonLoops->push_back(getForInductionVarOwner(srcDomain.getValue(i)));
++numCommonLoops;
}
if (commonLoops != nullptr)
assert(commonLoops->size() == numCommonLoops);
return numCommonLoops;
}
static Block *getCommonBlockInAffineScope(Operation *opA, Operation *opB) {
auto getChainOfAncestorBlocks =
[&](Operation *op, SmallVectorImpl<Block *> &ancestorBlocks) {
Block *currBlock = op->getBlock();
while (currBlock &&
!currBlock->getParentOp()->hasTrait<OpTrait::AffineScope>()) {
ancestorBlocks.push_back(currBlock);
currBlock = currBlock->getParentOp()->getBlock();
}
assert(currBlock &&
"parent op starting an affine scope is always expected");
ancestorBlocks.push_back(currBlock);
};
SmallVector<Block *, 4> srcAncestorBlocks, dstAncestorBlocks;
getChainOfAncestorBlocks(opA, srcAncestorBlocks);
getChainOfAncestorBlocks(opB, dstAncestorBlocks);
Block *commonBlock = nullptr;
for (int i = srcAncestorBlocks.size() - 1, j = dstAncestorBlocks.size() - 1;
i >= 0 && j >= 0 && srcAncestorBlocks[i] == dstAncestorBlocks[j];
i--, j--)
commonBlock = srcAncestorBlocks[i];
return commonBlock;
}
static bool srcAppearsBeforeDstInAncestralBlock(const MemRefAccess &srcAccess,
const MemRefAccess &dstAccess) {
Block *commonBlock =
getCommonBlockInAffineScope(srcAccess.opInst, dstAccess.opInst);
assert(commonBlock &&
"ops expected to have a common surrounding block in affine scope");
Operation *srcOp = commonBlock->findAncestorOpInBlock(*srcAccess.opInst);
assert(srcOp && "src access op must lie in common block");
Operation *dstOp = commonBlock->findAncestorOpInBlock(*dstAccess.opInst);
assert(dstOp && "dest access op must lie in common block");
return srcOp->isBeforeInBlock(dstOp);
}
static void addOrderingConstraints(const FlatAffineValueConstraints &srcDomain,
const FlatAffineValueConstraints &dstDomain,
unsigned loopDepth,
IntegerRelation *dependenceDomain) {
unsigned numCols = dependenceDomain->getNumCols();
SmallVector<int64_t, 4> eq(numCols);
unsigned numSrcDims = srcDomain.getNumDimVars();
unsigned numCommonLoops = getNumCommonLoops(srcDomain, dstDomain);
unsigned numCommonLoopConstraints = std::min(numCommonLoops, loopDepth);
for (unsigned i = 0; i < numCommonLoopConstraints; ++i) {
std::fill(eq.begin(), eq.end(), 0);
eq[i] = -1;
eq[i + numSrcDims] = 1;
if (i == loopDepth - 1) {
eq[numCols - 1] = -1;
dependenceDomain->addInequality(eq);
} else {
dependenceDomain->addEquality(eq);
}
}
}
static void computeDirectionVector(
const FlatAffineValueConstraints &srcDomain,
const FlatAffineValueConstraints &dstDomain, unsigned loopDepth,
IntegerPolyhedron *dependenceDomain,
SmallVector<DependenceComponent, 2> *dependenceComponents) {
SmallVector<AffineForOp, 4> commonLoops;
unsigned numCommonLoops =
getNumCommonLoops(srcDomain, dstDomain, &commonLoops);
if (numCommonLoops == 0)
return;
unsigned numIdsToEliminate = dependenceDomain->getNumVars();
dependenceDomain->insertVar(VarKind::SetDim, 0,
numCommonLoops);
SmallVector<int64_t, 4> eq;
eq.resize(dependenceDomain->getNumCols());
unsigned numSrcDims = srcDomain.getNumDimVars();
for (unsigned j = 0; j < numCommonLoops; ++j) {
std::fill(eq.begin(), eq.end(), 0);
eq[j] = 1;
eq[j + numCommonLoops] = 1;
eq[j + numCommonLoops + numSrcDims] = -1;
dependenceDomain->addEquality(eq);
}
dependenceDomain->projectOut(numCommonLoops, numIdsToEliminate);
dependenceComponents->resize(numCommonLoops);
for (unsigned j = 0; j < numCommonLoops; ++j) {
(*dependenceComponents)[j].op = commonLoops[j].getOperation();
auto lbConst = dependenceDomain->getConstantBound64(BoundType::LB, j);
(*dependenceComponents)[j].lb =
lbConst.value_or(std::numeric_limits<int64_t>::min());
auto ubConst = dependenceDomain->getConstantBound64(BoundType::UB, j);
(*dependenceComponents)[j].ub =
ubConst.value_or(std::numeric_limits<int64_t>::max());
}
}
LogicalResult MemRefAccess::getAccessRelation(IntegerRelation &rel) const {
FlatAffineValueConstraints domain;
if (failed(getOpIndexSet(opInst, &domain)))
return failure();
AffineValueMap accessValueMap;
getAccessMap(&accessValueMap);
if (failed(getRelationFromMap(accessValueMap, rel)))
return failure();
unsigned inserts = 0;
for (unsigned i = 0, e = domain.getNumDimVars(); i < e; ++i) {
const Identifier domainIdi = Identifier(domain.getValue(i));
const Identifier *findBegin = rel.getIds(VarKind::SetDim).begin() + i;
const Identifier *findEnd = rel.getIds(VarKind::SetDim).end();
const Identifier *itr = std::find(findBegin, findEnd, domainIdi);
if (itr != findEnd) {
rel.swapVar(i, i + std::distance(findBegin, itr));
} else {
++inserts;
rel.insertVar(VarKind::SetDim, i);
rel.setId(VarKind::SetDim, i, domainIdi);
}
}
IntegerRelation domainRel = domain;
if (rel.getSpace().isUsingIds() && !domainRel.getSpace().isUsingIds())
domainRel.resetIds();
domainRel.appendVar(VarKind::Range, accessValueMap.getNumResults());
domainRel.mergeAndAlignSymbols(rel);
domainRel.mergeLocalVars(rel);
rel.append(domainRel);
rel.convertVarKind(VarKind::SetDim, 0, accessValueMap.getNumDims() + inserts,
VarKind::Domain);
return success();
}
void MemRefAccess::getAccessMap(AffineValueMap *accessMap) const {
AffineMap map;
if (auto loadOp = dyn_cast<AffineReadOpInterface>(opInst))
map = loadOp.getAffineMap();
else
map = cast<AffineWriteOpInterface>(opInst).getAffineMap();
SmallVector<Value, 8> operands(indices.begin(), indices.end());
fullyComposeAffineMapAndOperands(&map, &operands);
map = simplifyAffineMap(map);
canonicalizeMapAndOperands(&map, &operands);
accessMap->reset(map, operands);
}
DependenceResult mlir::affine::checkMemrefAccessDependence(
const MemRefAccess &srcAccess, const MemRefAccess &dstAccess,
unsigned loopDepth, FlatAffineValueConstraints *dependenceConstraints,
SmallVector<DependenceComponent, 2> *dependenceComponents, bool allowRAR) {
LLVM_DEBUG(llvm::dbgs() << "Checking for dependence at depth: "
<< Twine(loopDepth) << " between:\n";);
LLVM_DEBUG(srcAccess.opInst->dump());
LLVM_DEBUG(dstAccess.opInst->dump());
if (srcAccess.memref != dstAccess.memref)
return DependenceResult::NoDependence;
if (!allowRAR && !isa<AffineWriteOpInterface>(srcAccess.opInst) &&
!isa<AffineWriteOpInterface>(dstAccess.opInst))
return DependenceResult::NoDependence;
if (getAffineScope(srcAccess.opInst) != getAffineScope(dstAccess.opInst))
return DependenceResult::Failure;
if (!getCommonBlockInAffineScope(srcAccess.opInst, dstAccess.opInst))
return DependenceResult::Failure;
PresburgerSpace space = PresburgerSpace::getRelationSpace();
IntegerRelation srcRel(space), dstRel(space);
if (failed(srcAccess.getAccessRelation(srcRel)))
return DependenceResult::Failure;
if (failed(dstAccess.getAccessRelation(dstRel)))
return DependenceResult::Failure;
FlatAffineValueConstraints srcDomain(srcRel.getDomainSet());
FlatAffineValueConstraints dstDomain(dstRel.getDomainSet());
unsigned numCommonLoops = getNumCommonLoops(srcDomain, dstDomain);
assert(loopDepth <= numCommonLoops + 1);
if (!allowRAR && loopDepth > numCommonLoops &&
!srcAppearsBeforeDstInAncestralBlock(srcAccess, dstAccess)) {
return DependenceResult::NoDependence;
}
dstRel.inverse();
dstRel.mergeAndCompose(srcRel);
dstRel.convertVarKind(VarKind::Domain, 0, dstRel.getNumDomainVars(),
VarKind::Range, 0);
IntegerPolyhedron dependenceDomain(dstRel);
addOrderingConstraints(srcDomain, dstDomain, loopDepth, &dependenceDomain);
if (dependenceDomain.isEmpty())
return DependenceResult::NoDependence;
if (dependenceComponents != nullptr)
computeDirectionVector(srcDomain, dstDomain, loopDepth, &dependenceDomain,
dependenceComponents);
LLVM_DEBUG(llvm::dbgs() << "Dependence polyhedron:\n");
LLVM_DEBUG(dependenceDomain.dump());
FlatAffineValueConstraints result(dependenceDomain);
if (dependenceConstraints)
*dependenceConstraints = result;
return DependenceResult::HasDependence;
}
void mlir::affine::getDependenceComponents(
AffineForOp forOp, unsigned maxLoopDepth,
std::vector<SmallVector<DependenceComponent, 2>> *depCompsVec) {
SmallVector<Operation *, 8> loadAndStoreOps;
forOp->walk([&](Operation *op) {
if (isa<AffineReadOpInterface, AffineWriteOpInterface>(op))
loadAndStoreOps.push_back(op);
});
unsigned numOps = loadAndStoreOps.size();
for (unsigned d = 1; d <= maxLoopDepth; ++d) {
for (unsigned i = 0; i < numOps; ++i) {
auto *srcOp = loadAndStoreOps[i];
MemRefAccess srcAccess(srcOp);
for (unsigned j = 0; j < numOps; ++j) {
auto *dstOp = loadAndStoreOps[j];
MemRefAccess dstAccess(dstOp);
SmallVector<DependenceComponent, 2> depComps;
DependenceResult result = checkMemrefAccessDependence(
srcAccess, dstAccess, d, nullptr,
&depComps);
if (hasDependence(result))
depCompsVec->push_back(depComps);
}
}
}
}