#include "mlir/Dialect/Affine/Analysis/Utils.h"
#include "mlir/Analysis/Presburger/PresburgerRelation.h"
#include "mlir/Dialect/Affine/Analysis/AffineAnalysis.h"
#include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h"
#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/Affine/IR/AffineValueMap.h"
#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/Utils/StaticValueUtils.h"
#include "mlir/IR/IntegerSet.h"
#include "mlir/Interfaces/CallInterfaces.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include <optional>
#define DEBUG_TYPE "analysis-utils"
using namespace mlir;
using namespace affine;
using namespace presburger;
using llvm::SmallDenseMap;
using Node = MemRefDependenceGraph::Node;
void LoopNestStateCollector::collect(Operation *opToWalk) {
opToWalk->walk([&](Operation *op) {
if (isa<AffineForOp>(op))
forOps.push_back(cast<AffineForOp>(op));
else if (op->getNumRegions() != 0 && !isa<AffineIfOp>(op))
hasNonAffineRegionOp = true;
else if (isa<AffineReadOpInterface>(op))
loadOpInsts.push_back(op);
else if (isa<AffineWriteOpInterface>(op))
storeOpInsts.push_back(op);
});
}
unsigned Node::getLoadOpCount(Value memref) const {
unsigned loadOpCount = 0;
for (Operation *loadOp : loads) {
if (memref == cast<AffineReadOpInterface>(loadOp).getMemRef())
++loadOpCount;
}
return loadOpCount;
}
unsigned Node::getStoreOpCount(Value memref) const {
unsigned storeOpCount = 0;
for (Operation *storeOp : stores) {
if (memref == cast<AffineWriteOpInterface>(storeOp).getMemRef())
++storeOpCount;
}
return storeOpCount;
}
void Node::getStoreOpsForMemref(Value memref,
SmallVectorImpl<Operation *> *storeOps) const {
for (Operation *storeOp : stores) {
if (memref == cast<AffineWriteOpInterface>(storeOp).getMemRef())
storeOps->push_back(storeOp);
}
}
void Node::getLoadOpsForMemref(Value memref,
SmallVectorImpl<Operation *> *loadOps) const {
for (Operation *loadOp : loads) {
if (memref == cast<AffineReadOpInterface>(loadOp).getMemRef())
loadOps->push_back(loadOp);
}
}
void Node::getLoadAndStoreMemrefSet(
DenseSet<Value> *loadAndStoreMemrefSet) const {
llvm::SmallDenseSet<Value, 2> loadMemrefs;
for (Operation *loadOp : loads) {
loadMemrefs.insert(cast<AffineReadOpInterface>(loadOp).getMemRef());
}
for (Operation *storeOp : stores) {
auto memref = cast<AffineWriteOpInterface>(storeOp).getMemRef();
if (loadMemrefs.count(memref) > 0)
loadAndStoreMemrefSet->insert(memref);
}
}
bool MemRefDependenceGraph::init() {
LLVM_DEBUG(llvm::dbgs() << "--- Initializing MDG ---\n");
DenseMap<Value, SetVector<unsigned>> memrefAccesses;
DenseMap<Operation *, unsigned> forToNodeMap;
for (Operation &op : block) {
if (dyn_cast<AffineForOp>(op)) {
LoopNestStateCollector collector;
collector.collect(&op);
if (collector.hasNonAffineRegionOp)
return false;
Node node(nextNodeId++, &op);
for (auto *opInst : collector.loadOpInsts) {
node.loads.push_back(opInst);
auto memref = cast<AffineReadOpInterface>(opInst).getMemRef();
memrefAccesses[memref].insert(node.id);
}
for (auto *opInst : collector.storeOpInsts) {
node.stores.push_back(opInst);
auto memref = cast<AffineWriteOpInterface>(opInst).getMemRef();
memrefAccesses[memref].insert(node.id);
}
forToNodeMap[&op] = node.id;
nodes.insert({node.id, node});
} else if (dyn_cast<AffineReadOpInterface>(op)) {
Node node(nextNodeId++, &op);
node.loads.push_back(&op);
auto memref = cast<AffineReadOpInterface>(op).getMemRef();
memrefAccesses[memref].insert(node.id);
nodes.insert({node.id, node});
} else if (dyn_cast<AffineWriteOpInterface>(op)) {
Node node(nextNodeId++, &op);
node.stores.push_back(&op);
auto memref = cast<AffineWriteOpInterface>(op).getMemRef();
memrefAccesses[memref].insert(node.id);
nodes.insert({node.id, node});
} else if (op.getNumResults() > 0 && !op.use_empty()) {
Node node(nextNodeId++, &op);
nodes.insert({node.id, node});
} else if (!isMemoryEffectFree(&op) &&
(op.getNumRegions() == 0 || isa<RegionBranchOpInterface>(op))) {
Node node(nextNodeId++, &op);
nodes.insert({node.id, node});
} else if (op.getNumRegions() != 0) {
LLVM_DEBUG(llvm::dbgs()
<< "MDG init failed; unknown region-holding op found!\n");
return false;
}
}
for (auto &idAndNode : nodes) {
LLVM_DEBUG(llvm::dbgs() << "Create node " << idAndNode.first << " for:\n"
<< *(idAndNode.second.op) << "\n");
(void)idAndNode;
}
for (auto &idAndNode : nodes) {
const Node &node = idAndNode.second;
if (!node.stores.empty())
continue;
Operation *opInst = node.op;
for (Value value : opInst->getResults()) {
for (Operation *user : value.getUsers()) {
if (block.getParent()->findAncestorOpInRegion(*user)->getBlock() !=
&block)
continue;
SmallVector<AffineForOp, 4> loops;
getAffineForIVs(*user, &loops);
auto *it = llvm::find_if(loops, [&](AffineForOp loop) {
return loop->getBlock() == █
});
if (it == loops.end())
continue;
assert(forToNodeMap.count(*it) > 0 && "missing mapping");
unsigned userLoopNestId = forToNodeMap[*it];
addEdge(node.id, userLoopNestId, value);
}
}
}
for (auto &memrefAndList : memrefAccesses) {
unsigned n = memrefAndList.second.size();
for (unsigned i = 0; i < n; ++i) {
unsigned srcId = memrefAndList.second[i];
bool srcHasStore =
getNode(srcId)->getStoreOpCount(memrefAndList.first) > 0;
for (unsigned j = i + 1; j < n; ++j) {
unsigned dstId = memrefAndList.second[j];
bool dstHasStore =
getNode(dstId)->getStoreOpCount(memrefAndList.first) > 0;
if (srcHasStore || dstHasStore)
addEdge(srcId, dstId, memrefAndList.first);
}
}
}
return true;
}
Node *MemRefDependenceGraph::getNode(unsigned id) {
auto it = nodes.find(id);
assert(it != nodes.end());
return &it->second;
}
Node *MemRefDependenceGraph::getForOpNode(AffineForOp forOp) {
for (auto &idAndNode : nodes)
if (idAndNode.second.op == forOp)
return &idAndNode.second;
return nullptr;
}
unsigned MemRefDependenceGraph::addNode(Operation *op) {
Node node(nextNodeId++, op);
nodes.insert({node.id, node});
return node.id;
}
void MemRefDependenceGraph::removeNode(unsigned id) {
if (inEdges.count(id) > 0) {
SmallVector<Edge, 2> oldInEdges = inEdges[id];
for (auto &inEdge : oldInEdges) {
removeEdge(inEdge.id, id, inEdge.value);
}
}
if (outEdges.count(id) > 0) {
SmallVector<Edge, 2> oldOutEdges = outEdges[id];
for (auto &outEdge : oldOutEdges) {
removeEdge(id, outEdge.id, outEdge.value);
}
}
inEdges.erase(id);
outEdges.erase(id);
nodes.erase(id);
}
bool MemRefDependenceGraph::writesToLiveInOrEscapingMemrefs(unsigned id) {
Node *node = getNode(id);
for (auto *storeOpInst : node->stores) {
auto memref = cast<AffineWriteOpInterface>(storeOpInst).getMemRef();
auto *op = memref.getDefiningOp();
if (!op)
return true;
for (auto *user : memref.getUsers())
if (!isa<AffineMapAccessInterface>(*user))
return true;
}
return false;
}
bool MemRefDependenceGraph::hasEdge(unsigned srcId, unsigned dstId,
Value value) {
if (outEdges.count(srcId) == 0 || inEdges.count(dstId) == 0) {
return false;
}
bool hasOutEdge = llvm::any_of(outEdges[srcId], [=](Edge &edge) {
return edge.id == dstId && (!value || edge.value == value);
});
bool hasInEdge = llvm::any_of(inEdges[dstId], [=](Edge &edge) {
return edge.id == srcId && (!value || edge.value == value);
});
return hasOutEdge && hasInEdge;
}
void MemRefDependenceGraph::addEdge(unsigned srcId, unsigned dstId,
Value value) {
if (!hasEdge(srcId, dstId, value)) {
outEdges[srcId].push_back({dstId, value});
inEdges[dstId].push_back({srcId, value});
if (isa<MemRefType>(value.getType()))
memrefEdgeCount[value]++;
}
}
void MemRefDependenceGraph::removeEdge(unsigned srcId, unsigned dstId,
Value value) {
assert(inEdges.count(dstId) > 0);
assert(outEdges.count(srcId) > 0);
if (isa<MemRefType>(value.getType())) {
assert(memrefEdgeCount.count(value) > 0);
memrefEdgeCount[value]--;
}
for (auto *it = inEdges[dstId].begin(); it != inEdges[dstId].end(); ++it) {
if ((*it).id == srcId && (*it).value == value) {
inEdges[dstId].erase(it);
break;
}
}
for (auto *it = outEdges[srcId].begin(); it != outEdges[srcId].end(); ++it) {
if ((*it).id == dstId && (*it).value == value) {
outEdges[srcId].erase(it);
break;
}
}
}
bool MemRefDependenceGraph::hasDependencePath(unsigned srcId, unsigned dstId) {
SmallVector<std::pair<unsigned, unsigned>, 4> worklist;
worklist.push_back({srcId, 0});
Operation *dstOp = getNode(dstId)->op;
while (!worklist.empty()) {
auto &idAndIndex = worklist.back();
if (idAndIndex.first == dstId)
return true;
if (outEdges.count(idAndIndex.first) == 0 ||
idAndIndex.second == outEdges[idAndIndex.first].size()) {
worklist.pop_back();
continue;
}
Edge edge = outEdges[idAndIndex.first][idAndIndex.second];
++idAndIndex.second;
bool afterDst = dstOp->isBeforeInBlock(getNode(edge.id)->op);
if (!afterDst && edge.id != idAndIndex.first)
worklist.push_back({edge.id, 0});
}
return false;
}
unsigned MemRefDependenceGraph::getIncomingMemRefAccesses(unsigned id,
Value memref) {
unsigned inEdgeCount = 0;
if (inEdges.count(id) > 0)
for (auto &inEdge : inEdges[id])
if (inEdge.value == memref) {
Node *srcNode = getNode(inEdge.id);
if (srcNode->getStoreOpCount(memref) > 0)
++inEdgeCount;
}
return inEdgeCount;
}
unsigned MemRefDependenceGraph::getOutEdgeCount(unsigned id, Value memref) {
unsigned outEdgeCount = 0;
if (outEdges.count(id) > 0)
for (auto &outEdge : outEdges[id])
if (!memref || outEdge.value == memref)
++outEdgeCount;
return outEdgeCount;
}
void MemRefDependenceGraph::gatherDefiningNodes(
unsigned id, DenseSet<unsigned> &definingNodes) {
for (MemRefDependenceGraph::Edge edge : inEdges[id])
if (!isa<MemRefType>(edge.value.getType()))
definingNodes.insert(edge.id);
}
Operation *
MemRefDependenceGraph::getFusedLoopNestInsertionPoint(unsigned srcId,
unsigned dstId) {
if (outEdges.count(srcId) == 0)
return getNode(dstId)->op;
DenseSet<unsigned> definingNodes;
gatherDefiningNodes(dstId, definingNodes);
if (llvm::any_of(definingNodes,
[&](unsigned id) { return hasDependencePath(srcId, id); })) {
LLVM_DEBUG(llvm::dbgs()
<< "Can't fuse: a defining op with a user in the dst "
"loop has dependence from the src loop\n");
return nullptr;
}
SmallPtrSet<Operation *, 2> srcDepInsts;
for (auto &outEdge : outEdges[srcId])
if (outEdge.id != dstId)
srcDepInsts.insert(getNode(outEdge.id)->op);
SmallPtrSet<Operation *, 2> dstDepInsts;
for (auto &inEdge : inEdges[dstId])
if (inEdge.id != srcId)
dstDepInsts.insert(getNode(inEdge.id)->op);
Operation *srcNodeInst = getNode(srcId)->op;
Operation *dstNodeInst = getNode(dstId)->op;
SmallVector<Operation *, 2> depInsts;
std::optional<unsigned> firstSrcDepPos;
std::optional<unsigned> lastDstDepPos;
unsigned pos = 0;
for (Block::iterator it = std::next(Block::iterator(srcNodeInst));
it != Block::iterator(dstNodeInst); ++it) {
Operation *op = &(*it);
if (srcDepInsts.count(op) > 0 && firstSrcDepPos == std::nullopt)
firstSrcDepPos = pos;
if (dstDepInsts.count(op) > 0)
lastDstDepPos = pos;
depInsts.push_back(op);
++pos;
}
if (firstSrcDepPos.has_value()) {
if (lastDstDepPos.has_value()) {
if (*firstSrcDepPos <= *lastDstDepPos) {
return nullptr;
}
}
return depInsts[*firstSrcDepPos];
}
return dstNodeInst;
}
void MemRefDependenceGraph::updateEdges(unsigned srcId, unsigned dstId,
const DenseSet<Value> &privateMemRefs,
bool removeSrcId) {
if (inEdges.count(srcId) > 0) {
SmallVector<Edge, 2> oldInEdges = inEdges[srcId];
for (auto &inEdge : oldInEdges) {
if (privateMemRefs.count(inEdge.value) == 0)
addEdge(inEdge.id, dstId, inEdge.value);
}
}
if (outEdges.count(srcId) > 0) {
SmallVector<Edge, 2> oldOutEdges = outEdges[srcId];
for (auto &outEdge : oldOutEdges) {
if (outEdge.id == dstId)
removeEdge(srcId, outEdge.id, outEdge.value);
else if (removeSrcId) {
addEdge(dstId, outEdge.id, outEdge.value);
removeEdge(srcId, outEdge.id, outEdge.value);
}
}
}
if (inEdges.count(dstId) > 0 && !privateMemRefs.empty()) {
SmallVector<Edge, 2> oldInEdges = inEdges[dstId];
for (auto &inEdge : oldInEdges)
if (privateMemRefs.count(inEdge.value) > 0)
removeEdge(inEdge.id, dstId, inEdge.value);
}
}
void MemRefDependenceGraph::updateEdges(unsigned sibId, unsigned dstId) {
if (inEdges.count(sibId) > 0) {
SmallVector<Edge, 2> oldInEdges = inEdges[sibId];
for (auto &inEdge : oldInEdges) {
addEdge(inEdge.id, dstId, inEdge.value);
removeEdge(inEdge.id, sibId, inEdge.value);
}
}
if (outEdges.count(sibId) > 0) {
SmallVector<Edge, 2> oldOutEdges = outEdges[sibId];
for (auto &outEdge : oldOutEdges) {
addEdge(dstId, outEdge.id, outEdge.value);
removeEdge(sibId, outEdge.id, outEdge.value);
}
}
}
void MemRefDependenceGraph::addToNode(
unsigned id, const SmallVectorImpl<Operation *> &loads,
const SmallVectorImpl<Operation *> &stores) {
Node *node = getNode(id);
llvm::append_range(node->loads, loads);
llvm::append_range(node->stores, stores);
}
void MemRefDependenceGraph::clearNodeLoadAndStores(unsigned id) {
Node *node = getNode(id);
node->loads.clear();
node->stores.clear();
}
void MemRefDependenceGraph::forEachMemRefInputEdge(
unsigned id, const std::function<void(Edge)> &callback) {
if (inEdges.count(id) > 0)
forEachMemRefEdge(inEdges[id], callback);
}
void MemRefDependenceGraph::forEachMemRefOutputEdge(
unsigned id, const std::function<void(Edge)> &callback) {
if (outEdges.count(id) > 0)
forEachMemRefEdge(outEdges[id], callback);
}
void MemRefDependenceGraph::forEachMemRefEdge(
ArrayRef<Edge> edges, const std::function<void(Edge)> &callback) {
for (const auto &edge : edges) {
if (!isa<MemRefType>(edge.value.getType()))
continue;
assert(nodes.count(edge.id) > 0);
if (!isa<AffineForOp>(getNode(edge.id)->op))
continue;
callback(edge);
}
}
void MemRefDependenceGraph::print(raw_ostream &os) const {
os << "\nMemRefDependenceGraph\n";
os << "\nNodes:\n";
for (const auto &idAndNode : nodes) {
os << "Node: " << idAndNode.first << "\n";
auto it = inEdges.find(idAndNode.first);
if (it != inEdges.end()) {
for (const auto &e : it->second)
os << " InEdge: " << e.id << " " << e.value << "\n";
}
it = outEdges.find(idAndNode.first);
if (it != outEdges.end()) {
for (const auto &e : it->second)
os << " OutEdge: " << e.id << " " << e.value << "\n";
}
}
}
void mlir::affine::getAffineForIVs(Operation &op,
SmallVectorImpl<AffineForOp> *loops) {
auto *currOp = op.getParentOp();
AffineForOp currAffineForOp;
while (currOp && !currOp->hasTrait<OpTrait::AffineScope>()) {
if (auto currAffineForOp = dyn_cast<AffineForOp>(currOp))
loops->push_back(currAffineForOp);
currOp = currOp->getParentOp();
}
std::reverse(loops->begin(), loops->end());
}
void mlir::affine::getEnclosingAffineOps(Operation &op,
SmallVectorImpl<Operation *> *ops) {
ops->clear();
Operation *currOp = op.getParentOp();
while (currOp && !currOp->hasTrait<OpTrait::AffineScope>()) {
if (isa<AffineIfOp, AffineForOp, AffineParallelOp>(currOp))
ops->push_back(currOp);
currOp = currOp->getParentOp();
}
std::reverse(ops->begin(), ops->end());
}
LogicalResult ComputationSliceState::getSourceAsConstraints(
FlatAffineValueConstraints &cst) const {
assert(!ivs.empty() && "Cannot have a slice without its IVs");
cst = FlatAffineValueConstraints(ivs.size(), 0,
0, ivs);
for (Value iv : ivs) {
AffineForOp loop = getForInductionVarOwner(iv);
assert(loop && "Expected affine for");
if (failed(cst.addAffineForOpDomain(loop)))
return failure();
}
return success();
}
LogicalResult
ComputationSliceState::getAsConstraints(FlatAffineValueConstraints *cst) const {
assert(!lbOperands.empty());
unsigned numDims = ivs.size();
unsigned numSymbols = lbOperands[0].size();
SmallVector<Value, 4> values(ivs);
values.append(lbOperands[0].begin(), lbOperands[0].end());
*cst = FlatAffineValueConstraints(numDims, numSymbols, 0, values);
for (unsigned i = numDims, end = values.size(); i < end; ++i) {
Value value = values[i];
assert(cst->containsVar(value) && "value expected to be present");
if (isValidSymbol(value)) {
if (std::optional<int64_t> cOp = getConstantIntValue(value))
cst->addBound(BoundType::EQ, value, cOp.value());
} else if (auto loop = getForInductionVarOwner(value)) {
if (failed(cst->addAffineForOpDomain(loop)))
return failure();
}
}
LogicalResult ret = cst->addSliceBounds(ivs, lbs, ubs, lbOperands[0]);
assert(succeeded(ret) &&
"should not fail as we never have semi-affine slice maps");
(void)ret;
return success();
}
void ComputationSliceState::clearBounds() {
lbs.clear();
ubs.clear();
lbOperands.clear();
ubOperands.clear();
}
void ComputationSliceState::dump() const {
llvm::errs() << "\tIVs:\n";
for (Value iv : ivs)
llvm::errs() << "\t\t" << iv << "\n";
llvm::errs() << "\tLBs:\n";
for (auto en : llvm::enumerate(lbs)) {
llvm::errs() << "\t\t" << en.value() << "\n";
llvm::errs() << "\t\tOperands:\n";
for (Value lbOp : lbOperands[en.index()])
llvm::errs() << "\t\t\t" << lbOp << "\n";
}
llvm::errs() << "\tUBs:\n";
for (auto en : llvm::enumerate(ubs)) {
llvm::errs() << "\t\t" << en.value() << "\n";
llvm::errs() << "\t\tOperands:\n";
for (Value ubOp : ubOperands[en.index()])
llvm::errs() << "\t\t\t" << ubOp << "\n";
}
}
std::optional<bool> ComputationSliceState::isSliceMaximalFastCheck() const {
assert(lbs.size() == ubs.size() && !lbs.empty() && !ivs.empty() &&
"Unexpected number of lbs, ubs and ivs in slice");
for (unsigned i = 0, end = lbs.size(); i < end; ++i) {
AffineMap lbMap = lbs[i];
AffineMap ubMap = ubs[i];
if (!lbMap || !ubMap || lbMap.getNumResults() != 1 ||
ubMap.getNumResults() != 1 ||
lbMap.getResult(0) + 1 != ubMap.getResult(0) ||
isa<AffineConstantExpr>(lbMap.getResult(0)))
return std::nullopt;
AffineDimExpr result = dyn_cast<AffineDimExpr>(lbMap.getResult(0));
if (!result)
return std::nullopt;
AffineForOp dstLoop =
getForInductionVarOwner(lbOperands[i][result.getPosition()]);
if (!dstLoop)
return std::nullopt;
AffineMap dstLbMap = dstLoop.getLowerBoundMap();
AffineMap dstUbMap = dstLoop.getUpperBoundMap();
AffineForOp srcLoop = getForInductionVarOwner(ivs[i]);
assert(srcLoop && "Expected affine for");
AffineMap srcLbMap = srcLoop.getLowerBoundMap();
AffineMap srcUbMap = srcLoop.getUpperBoundMap();
if (srcLbMap.getNumResults() != 1 || srcUbMap.getNumResults() != 1 ||
dstLbMap.getNumResults() != 1 || dstUbMap.getNumResults() != 1)
return std::nullopt;
AffineExpr srcLbResult = srcLbMap.getResult(0);
AffineExpr dstLbResult = dstLbMap.getResult(0);
AffineExpr srcUbResult = srcUbMap.getResult(0);
AffineExpr dstUbResult = dstUbMap.getResult(0);
if (!isa<AffineConstantExpr>(srcLbResult) ||
!isa<AffineConstantExpr>(srcUbResult) ||
!isa<AffineConstantExpr>(dstLbResult) ||
!isa<AffineConstantExpr>(dstUbResult))
return std::nullopt;
if (srcLbResult != dstLbResult || srcUbResult != dstUbResult ||
srcLoop.getStep() != dstLoop.getStep())
return false;
}
return true;
}
std::optional<bool> ComputationSliceState::isSliceValid() const {
std::optional<bool> isValidFastCheck = isSliceMaximalFastCheck();
if (isValidFastCheck && *isValidFastCheck)
return true;
FlatAffineValueConstraints srcConstraints;
if (failed(getSourceAsConstraints(srcConstraints))) {
LLVM_DEBUG(llvm::dbgs() << "Unable to compute source's domain\n");
return std::nullopt;
}
if (srcConstraints.getNumSymbolVars() > 0) {
LLVM_DEBUG(llvm::dbgs() << "Cannot handle symbols in source domain\n");
return std::nullopt;
}
if (srcConstraints.getNumLocalVars() != 0) {
LLVM_DEBUG(llvm::dbgs() << "Cannot handle locals in source domain\n");
return std::nullopt;
}
FlatAffineValueConstraints sliceConstraints;
if (failed(getAsConstraints(&sliceConstraints))) {
LLVM_DEBUG(llvm::dbgs() << "Unable to compute slice's domain\n");
return std::nullopt;
}
sliceConstraints.projectOut(ivs.size(),
sliceConstraints.getNumVars() - ivs.size());
LLVM_DEBUG(llvm::dbgs() << "Domain of the source of the slice:\n");
LLVM_DEBUG(srcConstraints.dump());
LLVM_DEBUG(llvm::dbgs() << "Domain of the slice if this fusion succeeds "
"(expressed in terms of its source's IVs):\n");
LLVM_DEBUG(sliceConstraints.dump());
PresburgerSet srcSet(srcConstraints);
PresburgerSet sliceSet(sliceConstraints);
PresburgerSet diffSet = sliceSet.subtract(srcSet);
if (!diffSet.isIntegerEmpty()) {
LLVM_DEBUG(llvm::dbgs() << "Incorrect slice\n");
return false;
}
return true;
}
std::optional<bool> ComputationSliceState::isMaximal() const {
std::optional<bool> isMaximalFastCheck = isSliceMaximalFastCheck();
if (isMaximalFastCheck)
return isMaximalFastCheck;
FlatAffineValueConstraints srcConstraints(ivs.size(),
0,
0, ivs);
for (Value iv : ivs) {
AffineForOp loop = getForInductionVarOwner(iv);
assert(loop && "Expected affine for");
if (failed(srcConstraints.addAffineForOpDomain(loop)))
return std::nullopt;
}
SmallVector<Value> consumerIVs;
for (Value lbOp : lbOperands[0])
if (getForInductionVarOwner(lbOp))
consumerIVs.push_back(lbOp);
for (int i = consumerIVs.size(), end = ivs.size(); i < end; ++i)
consumerIVs.push_back(Value());
FlatAffineValueConstraints sliceConstraints(consumerIVs.size(),
0,
0, consumerIVs);
if (failed(sliceConstraints.addDomainFromSliceMaps(lbs, ubs, lbOperands[0])))
return std::nullopt;
if (srcConstraints.getNumDimVars() != sliceConstraints.getNumDimVars())
return std::nullopt;
PresburgerSet srcSet(srcConstraints);
PresburgerSet sliceSet(sliceConstraints);
PresburgerSet diffSet = srcSet.subtract(sliceSet);
return diffSet.isIntegerEmpty();
}
unsigned MemRefRegion::getRank() const {
return cast<MemRefType>(memref.getType()).getRank();
}
std::optional<int64_t> MemRefRegion::getConstantBoundingSizeAndShape(
SmallVectorImpl<int64_t> *shape, std::vector<SmallVector<int64_t, 4>> *lbs,
SmallVectorImpl<int64_t> *lbDivisors) const {
auto memRefType = cast<MemRefType>(memref.getType());
unsigned rank = memRefType.getRank();
if (shape)
shape->reserve(rank);
assert(rank == cst.getNumDimVars() && "inconsistent memref region");
FlatAffineValueConstraints cstWithShapeBounds(cst);
for (unsigned r = 0; r < rank; r++) {
cstWithShapeBounds.addBound(BoundType::LB, r, 0);
int64_t dimSize = memRefType.getDimSize(r);
if (ShapedType::isDynamic(dimSize))
continue;
cstWithShapeBounds.addBound(BoundType::UB, r, dimSize - 1);
}
int64_t numElements = 1;
int64_t diffConstant;
int64_t lbDivisor;
for (unsigned d = 0; d < rank; d++) {
SmallVector<int64_t, 4> lb;
std::optional<int64_t> diff =
cstWithShapeBounds.getConstantBoundOnDimSize64(d, &lb, &lbDivisor);
if (diff.has_value()) {
diffConstant = *diff;
assert(diffConstant >= 0 && "Dim size bound can't be negative");
assert(lbDivisor > 0);
} else {
auto dimSize = memRefType.getDimSize(d);
if (dimSize == ShapedType::kDynamic)
return std::nullopt;
diffConstant = dimSize;
lb.resize(cstWithShapeBounds.getNumSymbolVars() + 1, 0);
lbDivisor = 1;
}
numElements *= diffConstant;
if (lbs) {
lbs->push_back(lb);
assert(lbDivisors && "both lbs and lbDivisor or none");
lbDivisors->push_back(lbDivisor);
}
if (shape) {
shape->push_back(diffConstant);
}
}
return numElements;
}
void MemRefRegion::getLowerAndUpperBound(unsigned pos, AffineMap &lbMap,
AffineMap &ubMap) const {
assert(pos < cst.getNumDimVars() && "invalid position");
auto memRefType = cast<MemRefType>(memref.getType());
unsigned rank = memRefType.getRank();
assert(rank == cst.getNumDimVars() && "inconsistent memref region");
auto boundPairs = cst.getLowerAndUpperBound(
pos, 0, rank, cst.getNumDimAndSymbolVars(),
{}, memRefType.getContext());
lbMap = boundPairs.first;
ubMap = boundPairs.second;
assert(lbMap && "lower bound for a region must exist");
assert(ubMap && "upper bound for a region must exist");
assert(lbMap.getNumInputs() == cst.getNumDimAndSymbolVars() - rank);
assert(ubMap.getNumInputs() == cst.getNumDimAndSymbolVars() - rank);
}
LogicalResult MemRefRegion::unionBoundingBox(const MemRefRegion &other) {
assert(memref == other.memref);
return cst.unionBoundingBox(*other.getConstraints());
}
LogicalResult MemRefRegion::compute(Operation *op, unsigned loopDepth,
const ComputationSliceState *sliceState,
bool addMemRefDimBounds) {
assert((isa<AffineReadOpInterface, AffineWriteOpInterface>(op)) &&
"affine read/write op expected");
MemRefAccess access(op);
memref = access.memref;
write = access.isStore();
unsigned rank = access.getRank();
LLVM_DEBUG(llvm::dbgs() << "MemRefRegion::compute: " << *op
<< "\ndepth: " << loopDepth << "\n";);
if (rank == 0) {
SmallVector<Value, 4> ivs;
getAffineIVs(*op, ivs);
assert(loopDepth <= ivs.size() && "invalid 'loopDepth'");
ivs.resize(loopDepth);
cst = FlatAffineValueConstraints(rank, loopDepth, 0, ivs);
return success();
}
AffineValueMap accessValueMap;
access.getAccessMap(&accessValueMap);
AffineMap accessMap = accessValueMap.getAffineMap();
unsigned numDims = accessMap.getNumDims();
unsigned numSymbols = accessMap.getNumSymbols();
unsigned numOperands = accessValueMap.getNumOperands();
SmallVector<Value, 4> operands;
operands.resize(numOperands);
for (unsigned i = 0; i < numOperands; ++i)
operands[i] = accessValueMap.getOperand(i);
if (sliceState != nullptr) {
operands.reserve(operands.size() + sliceState->lbOperands[0].size());
for (auto extraOperand : sliceState->lbOperands[0]) {
if (!llvm::is_contained(operands, extraOperand)) {
operands.push_back(extraOperand);
numSymbols++;
}
}
}
cst = FlatAffineValueConstraints(numDims, numSymbols, 0, operands);
for (unsigned i = 0; i < numDims + numSymbols; ++i) {
auto operand = operands[i];
if (auto affineFor = getForInductionVarOwner(operand)) {
if (failed(cst.addAffineForOpDomain(affineFor)))
return failure();
} else if (auto parallelOp = getAffineParallelInductionVarOwner(operand)) {
if (failed(cst.addAffineParallelOpDomain(parallelOp)))
return failure();
} else if (isValidSymbol(operand)) {
Value symbol = operand;
if (auto constVal = getConstantIntValue(symbol))
cst.addBound(BoundType::EQ, symbol, constVal.value());
} else {
LLVM_DEBUG(llvm::dbgs() << "unknown affine dimensional value");
return failure();
}
}
if (sliceState != nullptr) {
for (auto operand : sliceState->lbOperands[0]) {
cst.addInductionVarOrTerminalSymbol(operand);
}
LogicalResult ret =
cst.addSliceBounds(sliceState->ivs, sliceState->lbs, sliceState->ubs,
sliceState->lbOperands[0]);
assert(succeeded(ret) &&
"should not fail as we never have semi-affine slice maps");
(void)ret;
}
if (failed(cst.composeMap(&accessValueMap))) {
op->emitError("getMemRefRegion: compose affine map failed");
LLVM_DEBUG(accessValueMap.getAffineMap().dump());
return failure();
}
cst.setDimSymbolSeparation(cst.getNumDimAndSymbolVars() - rank);
SmallVector<Value, 4> enclosingIVs;
getAffineIVs(*op, enclosingIVs);
assert(loopDepth <= enclosingIVs.size() && "invalid loop depth");
enclosingIVs.resize(loopDepth);
SmallVector<Value, 4> vars;
cst.getValues(cst.getNumDimVars(), cst.getNumDimAndSymbolVars(), &vars);
for (Value var : vars) {
if ((isAffineInductionVar(var)) && !llvm::is_contained(enclosingIVs, var)) {
cst.projectOut(var);
}
}
cst.projectOut(cst.getNumDimAndSymbolVars(), cst.getNumLocalVars());
cst.constantFoldVarRange(cst.getNumDimVars(),
cst.getNumSymbolVars());
assert(cst.getNumDimVars() == rank && "unexpected MemRefRegion format");
if (addMemRefDimBounds) {
auto memRefType = cast<MemRefType>(memref.getType());
for (unsigned r = 0; r < rank; r++) {
cst.addBound(BoundType::LB, r, 0);
if (memRefType.isDynamicDim(r))
continue;
cst.addBound(BoundType::UB, r, memRefType.getDimSize(r) - 1);
}
}
cst.removeTrivialRedundancy();
LLVM_DEBUG(llvm::dbgs() << "Memory region:\n");
LLVM_DEBUG(cst.dump());
return success();
}
std::optional<int64_t>
mlir::affine::getMemRefIntOrFloatEltSizeInBytes(MemRefType memRefType) {
auto elementType = memRefType.getElementType();
unsigned sizeInBits;
if (elementType.isIntOrFloat()) {
sizeInBits = elementType.getIntOrFloatBitWidth();
} else if (auto vectorType = dyn_cast<VectorType>(elementType)) {
if (vectorType.getElementType().isIntOrFloat())
sizeInBits =
vectorType.getElementTypeBitWidth() * vectorType.getNumElements();
else
return std::nullopt;
} else {
return std::nullopt;
}
return llvm::divideCeil(sizeInBits, 8);
}
std::optional<int64_t> MemRefRegion::getRegionSize() {
auto memRefType = cast<MemRefType>(memref.getType());
if (!memRefType.getLayout().isIdentity()) {
LLVM_DEBUG(llvm::dbgs() << "Non-identity layout map not yet supported\n");
return false;
}
SmallVector<Value, 4> memIndices;
SmallVector<Value, 4> bufIndices;
std::optional<int64_t> numElements = getConstantBoundingSizeAndShape();
if (!numElements) {
LLVM_DEBUG(llvm::dbgs() << "Dynamic shapes not yet supported\n");
return std::nullopt;
}
auto eltSize = getMemRefIntOrFloatEltSizeInBytes(memRefType);
if (!eltSize)
return std::nullopt;
return *eltSize * *numElements;
}
std::optional<uint64_t>
mlir::affine::getIntOrFloatMemRefSizeInBytes(MemRefType memRefType) {
if (!memRefType.hasStaticShape())
return std::nullopt;
auto elementType = memRefType.getElementType();
if (!elementType.isIntOrFloat() && !isa<VectorType>(elementType))
return std::nullopt;
auto sizeInBytes = getMemRefIntOrFloatEltSizeInBytes(memRefType);
if (!sizeInBytes)
return std::nullopt;
for (unsigned i = 0, e = memRefType.getRank(); i < e; i++) {
sizeInBytes = *sizeInBytes * memRefType.getDimSize(i);
}
return sizeInBytes;
}
template <typename LoadOrStoreOp>
LogicalResult mlir::affine::boundCheckLoadOrStoreOp(LoadOrStoreOp loadOrStoreOp,
bool emitError) {
static_assert(llvm::is_one_of<LoadOrStoreOp, AffineReadOpInterface,
AffineWriteOpInterface>::value,
"argument should be either a AffineReadOpInterface or a "
"AffineWriteOpInterface");
Operation *op = loadOrStoreOp.getOperation();
MemRefRegion region(op->getLoc());
if (failed(region.compute(op, 0, nullptr,
false)))
return success();
LLVM_DEBUG(llvm::dbgs() << "Memory region");
LLVM_DEBUG(region.getConstraints()->dump());
bool outOfBounds = false;
unsigned rank = loadOrStoreOp.getMemRefType().getRank();
for (unsigned r = 0; r < rank; r++) {
FlatAffineValueConstraints ucst(*region.getConstraints());
SmallVector<int64_t, 4> ineq(rank + 1, 0);
int64_t dimSize = loadOrStoreOp.getMemRefType().getDimSize(r);
if (dimSize == -1)
continue;
ucst.addBound(BoundType::LB, r, dimSize);
outOfBounds = !ucst.isEmpty();
if (outOfBounds && emitError) {
loadOrStoreOp.emitOpError()
<< "memref out of upper bound access along dimension #" << (r + 1);
}
FlatAffineValueConstraints lcst(*region.getConstraints());
std::fill(ineq.begin(), ineq.end(), 0);
lcst.addBound(BoundType::UB, r, -1);
outOfBounds = !lcst.isEmpty();
if (outOfBounds && emitError) {
loadOrStoreOp.emitOpError()
<< "memref out of lower bound access along dimension #" << (r + 1);
}
}
return failure(outOfBounds);
}
template LogicalResult
mlir::affine::boundCheckLoadOrStoreOp(AffineReadOpInterface loadOp,
bool emitError);
template LogicalResult
mlir::affine::boundCheckLoadOrStoreOp(AffineWriteOpInterface storeOp,
bool emitError);
static void findInstPosition(Operation *op, Block *limitBlock,
SmallVectorImpl<unsigned> *positions) {
Block *block = op->getBlock();
while (block != limitBlock) {
int instPosInBlock = std::distance(block->begin(), op->getIterator());
positions->push_back(instPosInBlock);
op = block->getParentOp();
block = op->getBlock();
}
std::reverse(positions->begin(), positions->end());
}
static Operation *getInstAtPosition(ArrayRef<unsigned> positions,
unsigned level, Block *block) {
unsigned i = 0;
for (auto &op : *block) {
if (i != positions[level]) {
++i;
continue;
}
if (level == positions.size() - 1)
return &op;
if (auto childAffineForOp = dyn_cast<AffineForOp>(op))
return getInstAtPosition(positions, level + 1,
childAffineForOp.getBody());
for (auto ®ion : op.getRegions()) {
for (auto &b : region)
if (auto *ret = getInstAtPosition(positions, level + 1, &b))
return ret;
}
return nullptr;
}
return nullptr;
}
static LogicalResult addMissingLoopIVBounds(SmallPtrSet<Value, 8> &ivs,
FlatAffineValueConstraints *cst) {
for (unsigned i = 0, e = cst->getNumDimVars(); i < e; ++i) {
auto value = cst->getValue(i);
if (ivs.count(value) == 0) {
assert(isAffineForInductionVar(value));
auto loop = getForInductionVarOwner(value);
if (failed(cst->addAffineForOpDomain(loop)))
return failure();
}
}
return success();
}
unsigned mlir::affine::getInnermostCommonLoopDepth(
ArrayRef<Operation *> ops, SmallVectorImpl<AffineForOp> *surroundingLoops) {
unsigned numOps = ops.size();
assert(numOps > 0 && "Expected at least one operation");
std::vector<SmallVector<AffineForOp, 4>> loops(numOps);
unsigned loopDepthLimit = std::numeric_limits<unsigned>::max();
for (unsigned i = 0; i < numOps; ++i) {
getAffineForIVs(*ops[i], &loops[i]);
loopDepthLimit =
std::min(loopDepthLimit, static_cast<unsigned>(loops[i].size()));
}
unsigned loopDepth = 0;
for (unsigned d = 0; d < loopDepthLimit; ++d) {
unsigned i;
for (i = 1; i < numOps; ++i) {
if (loops[i - 1][d] != loops[i][d])
return loopDepth;
}
if (surroundingLoops)
surroundingLoops->push_back(loops[i - 1][d]);
++loopDepth;
}
return loopDepth;
}
SliceComputationResult
mlir::affine::computeSliceUnion(ArrayRef<Operation *> opsA,
ArrayRef<Operation *> opsB, unsigned loopDepth,
unsigned numCommonLoops, bool isBackwardSlice,
ComputationSliceState *sliceUnion) {
FlatAffineValueConstraints sliceUnionCst;
assert(sliceUnionCst.getNumDimAndSymbolVars() == 0);
std::vector<std::pair<Operation *, Operation *>> dependentOpPairs;
for (auto *i : opsA) {
MemRefAccess srcAccess(i);
for (auto *j : opsB) {
MemRefAccess dstAccess(j);
if (srcAccess.memref != dstAccess.memref)
continue;
if ((!isBackwardSlice && loopDepth > getNestingDepth(i)) ||
(isBackwardSlice && loopDepth > getNestingDepth(j))) {
LLVM_DEBUG(llvm::dbgs() << "Invalid loop depth\n");
return SliceComputationResult::GenericFailure;
}
bool readReadAccesses = isa<AffineReadOpInterface>(srcAccess.opInst) &&
isa<AffineReadOpInterface>(dstAccess.opInst);
FlatAffineValueConstraints dependenceConstraints;
DependenceResult result = checkMemrefAccessDependence(
srcAccess, dstAccess, numCommonLoops + 1,
&dependenceConstraints, nullptr,
readReadAccesses);
if (result.value == DependenceResult::Failure) {
LLVM_DEBUG(llvm::dbgs() << "Dependence check failed\n");
return SliceComputationResult::GenericFailure;
}
if (result.value == DependenceResult::NoDependence)
continue;
dependentOpPairs.emplace_back(i, j);
ComputationSliceState tmpSliceState;
mlir::affine::getComputationSliceState(i, j, &dependenceConstraints,
loopDepth, isBackwardSlice,
&tmpSliceState);
if (sliceUnionCst.getNumDimAndSymbolVars() == 0) {
if (failed(tmpSliceState.getAsConstraints(&sliceUnionCst))) {
LLVM_DEBUG(llvm::dbgs()
<< "Unable to compute slice bound constraints\n");
return SliceComputationResult::GenericFailure;
}
assert(sliceUnionCst.getNumDimAndSymbolVars() > 0);
continue;
}
FlatAffineValueConstraints tmpSliceCst;
if (failed(tmpSliceState.getAsConstraints(&tmpSliceCst))) {
LLVM_DEBUG(llvm::dbgs()
<< "Unable to compute slice bound constraints\n");
return SliceComputationResult::GenericFailure;
}
if (!sliceUnionCst.areVarsAlignedWithOther(tmpSliceCst)) {
SmallPtrSet<Value, 8> sliceUnionIVs;
for (unsigned k = 0, l = sliceUnionCst.getNumDimVars(); k < l; ++k)
sliceUnionIVs.insert(sliceUnionCst.getValue(k));
SmallPtrSet<Value, 8> tmpSliceIVs;
for (unsigned k = 0, l = tmpSliceCst.getNumDimVars(); k < l; ++k)
tmpSliceIVs.insert(tmpSliceCst.getValue(k));
sliceUnionCst.mergeAndAlignVarsWithOther(0, &tmpSliceCst);
if (failed(addMissingLoopIVBounds(sliceUnionIVs, &sliceUnionCst)))
return SliceComputationResult::GenericFailure;
if (failed(addMissingLoopIVBounds(tmpSliceIVs, &tmpSliceCst)))
return SliceComputationResult::GenericFailure;
}
if (sliceUnionCst.getNumLocalVars() > 0 ||
tmpSliceCst.getNumLocalVars() > 0 ||
failed(sliceUnionCst.unionBoundingBox(tmpSliceCst))) {
LLVM_DEBUG(llvm::dbgs()
<< "Unable to compute union bounding box of slice bounds\n");
return SliceComputationResult::GenericFailure;
}
}
}
if (sliceUnionCst.getNumDimAndSymbolVars() == 0)
return SliceComputationResult::GenericFailure;
SmallVector<Operation *, 4> ops;
for (auto &dep : dependentOpPairs) {
ops.push_back(isBackwardSlice ? dep.second : dep.first);
}
SmallVector<AffineForOp, 4> surroundingLoops;
unsigned innermostCommonLoopDepth =
getInnermostCommonLoopDepth(ops, &surroundingLoops);
if (loopDepth > innermostCommonLoopDepth) {
LLVM_DEBUG(llvm::dbgs() << "Exceeds max loop depth\n");
return SliceComputationResult::GenericFailure;
}
unsigned numSliceLoopIVs = sliceUnionCst.getNumDimVars();
sliceUnionCst.convertLoopIVSymbolsToDims();
sliceUnion->clearBounds();
sliceUnion->lbs.resize(numSliceLoopIVs, AffineMap());
sliceUnion->ubs.resize(numSliceLoopIVs, AffineMap());
sliceUnionCst.getSliceBounds(0, numSliceLoopIVs,
opsA[0]->getContext(), &sliceUnion->lbs,
&sliceUnion->ubs);
SmallVector<Value, 4> sliceBoundOperands;
sliceUnionCst.getValues(numSliceLoopIVs,
sliceUnionCst.getNumDimAndSymbolVars(),
&sliceBoundOperands);
sliceUnion->ivs.clear();
sliceUnionCst.getValues(0, numSliceLoopIVs, &sliceUnion->ivs);
sliceUnion->insertPoint =
isBackwardSlice
? surroundingLoops[loopDepth - 1].getBody()->begin()
: std::prev(surroundingLoops[loopDepth - 1].getBody()->end());
sliceUnion->lbOperands.resize(numSliceLoopIVs, sliceBoundOperands);
sliceUnion->ubOperands.resize(numSliceLoopIVs, sliceBoundOperands);
std::optional<bool> isSliceValid = sliceUnion->isSliceValid();
if (!isSliceValid) {
LLVM_DEBUG(llvm::dbgs() << "Cannot determine if the slice is valid\n");
return SliceComputationResult::GenericFailure;
}
if (!*isSliceValid)
return SliceComputationResult::IncorrectSliceFailure;
return SliceComputationResult::Success;
}
static std::optional<uint64_t> getConstDifference(AffineMap lbMap,
AffineMap ubMap) {
assert(lbMap.getNumResults() == 1 && "expected single result bound map");
assert(ubMap.getNumResults() == 1 && "expected single result bound map");
assert(lbMap.getNumDims() == ubMap.getNumDims());
assert(lbMap.getNumSymbols() == ubMap.getNumSymbols());
AffineExpr lbExpr(lbMap.getResult(0));
AffineExpr ubExpr(ubMap.getResult(0));
auto loopSpanExpr = simplifyAffineExpr(ubExpr - lbExpr, lbMap.getNumDims(),
lbMap.getNumSymbols());
auto cExpr = dyn_cast<AffineConstantExpr>(loopSpanExpr);
if (!cExpr)
return std::nullopt;
return cExpr.getValue();
}
bool mlir::affine::buildSliceTripCountMap(
const ComputationSliceState &slice,
llvm::SmallDenseMap<Operation *, uint64_t, 8> *tripCountMap) {
unsigned numSrcLoopIVs = slice.ivs.size();
for (unsigned i = 0; i < numSrcLoopIVs; ++i) {
AffineForOp forOp = getForInductionVarOwner(slice.ivs[i]);
auto *op = forOp.getOperation();
AffineMap lbMap = slice.lbs[i];
AffineMap ubMap = slice.ubs[i];
if (!lbMap || lbMap.getNumResults() == 0 || !ubMap ||
ubMap.getNumResults() == 0) {
if (forOp.hasConstantLowerBound() && forOp.hasConstantUpperBound()) {
(*tripCountMap)[op] =
forOp.getConstantUpperBound() - forOp.getConstantLowerBound();
continue;
}
std::optional<uint64_t> maybeConstTripCount = getConstantTripCount(forOp);
if (maybeConstTripCount.has_value()) {
(*tripCountMap)[op] = *maybeConstTripCount;
continue;
}
return false;
}
std::optional<uint64_t> tripCount = getConstDifference(lbMap, ubMap);
if (!tripCount.has_value())
return false;
(*tripCountMap)[op] = *tripCount;
}
return true;
}
uint64_t mlir::affine::getSliceIterationCount(
const llvm::SmallDenseMap<Operation *, uint64_t, 8> &sliceTripCountMap) {
uint64_t iterCount = 1;
for (const auto &count : sliceTripCountMap) {
iterCount *= count.second;
}
return iterCount;
}
const char *const kSliceFusionBarrierAttrName = "slice_fusion_barrier";
void mlir::affine::getComputationSliceState(
Operation *depSourceOp, Operation *depSinkOp,
FlatAffineValueConstraints *dependenceConstraints, unsigned loopDepth,
bool isBackwardSlice, ComputationSliceState *sliceState) {
SmallVector<AffineForOp, 4> srcLoopIVs;
getAffineForIVs(*depSourceOp, &srcLoopIVs);
unsigned numSrcLoopIVs = srcLoopIVs.size();
SmallVector<AffineForOp, 4> dstLoopIVs;
getAffineForIVs(*depSinkOp, &dstLoopIVs);
unsigned numDstLoopIVs = dstLoopIVs.size();
assert((!isBackwardSlice && loopDepth <= numSrcLoopIVs) ||
(isBackwardSlice && loopDepth <= numDstLoopIVs));
unsigned pos = isBackwardSlice ? numSrcLoopIVs + loopDepth : loopDepth;
unsigned num =
isBackwardSlice ? numDstLoopIVs - loopDepth : numSrcLoopIVs - loopDepth;
dependenceConstraints->projectOut(pos, num);
unsigned offset = isBackwardSlice ? 0 : loopDepth;
unsigned numSliceLoopIVs = isBackwardSlice ? numSrcLoopIVs : numDstLoopIVs;
dependenceConstraints->getValues(offset, offset + numSliceLoopIVs,
&sliceState->ivs);
sliceState->lbs.resize(numSliceLoopIVs, AffineMap());
sliceState->ubs.resize(numSliceLoopIVs, AffineMap());
dependenceConstraints->getSliceBounds(offset, numSliceLoopIVs,
depSourceOp->getContext(),
&sliceState->lbs, &sliceState->ubs);
SmallVector<Value, 4> sliceBoundOperands;
unsigned numDimsAndSymbols = dependenceConstraints->getNumDimAndSymbolVars();
for (unsigned i = 0; i < numDimsAndSymbols; ++i) {
if (i < offset || i >= offset + numSliceLoopIVs) {
sliceBoundOperands.push_back(dependenceConstraints->getValue(i));
}
}
sliceState->lbOperands.resize(numSliceLoopIVs, sliceBoundOperands);
sliceState->ubOperands.resize(numSliceLoopIVs, sliceBoundOperands);
sliceState->insertPoint =
isBackwardSlice ? dstLoopIVs[loopDepth - 1].getBody()->begin()
: std::prev(srcLoopIVs[loopDepth - 1].getBody()->end());
llvm::SmallDenseSet<Value, 8> sequentialLoops;
if (isa<AffineReadOpInterface>(depSourceOp) &&
isa<AffineReadOpInterface>(depSinkOp)) {
getSequentialLoops(isBackwardSlice ? srcLoopIVs[0] : dstLoopIVs[0],
&sequentialLoops);
}
auto getSliceLoop = [&](unsigned i) {
return isBackwardSlice ? srcLoopIVs[i] : dstLoopIVs[i];
};
auto isInnermostInsertion = [&]() {
return (isBackwardSlice ? loopDepth >= srcLoopIVs.size()
: loopDepth >= dstLoopIVs.size());
};
llvm::SmallDenseMap<Operation *, uint64_t, 8> sliceTripCountMap;
auto srcIsUnitSlice = [&]() {
return (buildSliceTripCountMap(*sliceState, &sliceTripCountMap) &&
(getSliceIterationCount(sliceTripCountMap) == 1));
};
for (unsigned i = 0; i < numSliceLoopIVs; ++i) {
Value iv = getSliceLoop(i).getInductionVar();
if (sequentialLoops.count(iv) == 0 &&
getSliceLoop(i)->getAttr(kSliceFusionBarrierAttrName) == nullptr)
continue;
std::optional<bool> isMaximal = sliceState->isMaximal();
if (isLoopParallelAndContainsReduction(getSliceLoop(i)) &&
isInnermostInsertion() && srcIsUnitSlice() && isMaximal && *isMaximal)
continue;
for (unsigned j = i; j < numSliceLoopIVs; ++j) {
sliceState->lbs[j] = AffineMap();
sliceState->ubs[j] = AffineMap();
}
break;
}
}
AffineForOp mlir::affine::insertBackwardComputationSlice(
Operation *srcOpInst, Operation *dstOpInst, unsigned dstLoopDepth,
ComputationSliceState *sliceState) {
SmallVector<AffineForOp, 4> srcLoopIVs;
getAffineForIVs(*srcOpInst, &srcLoopIVs);
unsigned numSrcLoopIVs = srcLoopIVs.size();
SmallVector<AffineForOp, 4> dstLoopIVs;
getAffineForIVs(*dstOpInst, &dstLoopIVs);
unsigned dstLoopIVsSize = dstLoopIVs.size();
if (dstLoopDepth > dstLoopIVsSize) {
dstOpInst->emitError("invalid destination loop depth");
return AffineForOp();
}
SmallVector<unsigned, 4> positions;
findInstPosition(srcOpInst, srcLoopIVs[0]->getBlock(), &positions);
auto dstAffineForOp = dstLoopIVs[dstLoopDepth - 1];
OpBuilder b(dstAffineForOp.getBody(), dstAffineForOp.getBody()->begin());
auto sliceLoopNest =
cast<AffineForOp>(b.clone(*srcLoopIVs[0].getOperation()));
Operation *sliceInst =
getInstAtPosition(positions, 0, sliceLoopNest.getBody());
SmallVector<AffineForOp, 4> sliceSurroundingLoops;
getAffineForIVs(*sliceInst, &sliceSurroundingLoops);
unsigned sliceSurroundingLoopsSize = sliceSurroundingLoops.size();
(void)sliceSurroundingLoopsSize;
assert(dstLoopDepth + numSrcLoopIVs >= sliceSurroundingLoopsSize);
unsigned sliceLoopLimit = dstLoopDepth + numSrcLoopIVs;
(void)sliceLoopLimit;
assert(sliceLoopLimit >= sliceSurroundingLoopsSize);
for (unsigned i = 0; i < numSrcLoopIVs; ++i) {
auto forOp = sliceSurroundingLoops[dstLoopDepth + i];
if (AffineMap lbMap = sliceState->lbs[i])
forOp.setLowerBound(sliceState->lbOperands[i], lbMap);
if (AffineMap ubMap = sliceState->ubs[i])
forOp.setUpperBound(sliceState->ubOperands[i], ubMap);
}
return sliceLoopNest;
}
MemRefAccess::MemRefAccess(Operation *loadOrStoreOpInst) {
if (auto loadOp = dyn_cast<AffineReadOpInterface>(loadOrStoreOpInst)) {
memref = loadOp.getMemRef();
opInst = loadOrStoreOpInst;
llvm::append_range(indices, loadOp.getMapOperands());
} else {
assert(isa<AffineWriteOpInterface>(loadOrStoreOpInst) &&
"Affine read/write op expected");
auto storeOp = cast<AffineWriteOpInterface>(loadOrStoreOpInst);
opInst = loadOrStoreOpInst;
memref = storeOp.getMemRef();
llvm::append_range(indices, storeOp.getMapOperands());
}
}
unsigned MemRefAccess::getRank() const {
return cast<MemRefType>(memref.getType()).getRank();
}
bool MemRefAccess::isStore() const {
return isa<AffineWriteOpInterface>(opInst);
}
unsigned mlir::affine::getNestingDepth(Operation *op) {
Operation *currOp = op;
unsigned depth = 0;
while ((currOp = currOp->getParentOp())) {
if (isa<AffineForOp>(currOp))
depth++;
}
return depth;
}
bool MemRefAccess::operator==(const MemRefAccess &rhs) const {
if (memref != rhs.memref)
return false;
AffineValueMap diff, thisMap, rhsMap;
getAccessMap(&thisMap);
rhs.getAccessMap(&rhsMap);
AffineValueMap::difference(thisMap, rhsMap, &diff);
return llvm::all_of(diff.getAffineMap().getResults(),
[](AffineExpr e) { return e == 0; });
}
void mlir::affine::getAffineIVs(Operation &op, SmallVectorImpl<Value> &ivs) {
auto *currOp = op.getParentOp();
AffineForOp currAffineForOp;
while (currOp) {
if (AffineForOp currAffineForOp = dyn_cast<AffineForOp>(currOp))
ivs.push_back(currAffineForOp.getInductionVar());
else if (auto parOp = dyn_cast<AffineParallelOp>(currOp))
llvm::append_range(ivs, parOp.getIVs());
currOp = currOp->getParentOp();
}
std::reverse(ivs.begin(), ivs.end());
}
unsigned mlir::affine::getNumCommonSurroundingLoops(Operation &a,
Operation &b) {
SmallVector<Value, 4> loopsA, loopsB;
getAffineIVs(a, loopsA);
getAffineIVs(b, loopsB);
unsigned minNumLoops = std::min(loopsA.size(), loopsB.size());
unsigned numCommonLoops = 0;
for (unsigned i = 0; i < minNumLoops; ++i) {
if (loopsA[i] != loopsB[i])
break;
++numCommonLoops;
}
return numCommonLoops;
}
static std::optional<int64_t> getMemoryFootprintBytes(Block &block,
Block::iterator start,
Block::iterator end,
int memorySpace) {
SmallDenseMap<Value, std::unique_ptr<MemRefRegion>, 4> regions;
auto result = block.walk(start, end, [&](Operation *opInst) -> WalkResult {
if (!isa<AffineReadOpInterface, AffineWriteOpInterface>(opInst)) {
return WalkResult::advance();
}
auto region = std::make_unique<MemRefRegion>(opInst->getLoc());
if (failed(
region->compute(opInst,
getNestingDepth(&*block.begin())))) {
return opInst->emitError("error obtaining memory region\n");
}
auto it = regions.find(region->memref);
if (it == regions.end()) {
regions[region->memref] = std::move(region);
} else if (failed(it->second->unionBoundingBox(*region))) {
return opInst->emitWarning(
"getMemoryFootprintBytes: unable to perform a union on a memory "
"region");
}
return WalkResult::advance();
});
if (result.wasInterrupted())
return std::nullopt;
int64_t totalSizeInBytes = 0;
for (const auto ®ion : regions) {
std::optional<int64_t> size = region.second->getRegionSize();
if (!size.has_value())
return std::nullopt;
totalSizeInBytes += *size;
}
return totalSizeInBytes;
}
std::optional<int64_t> mlir::affine::getMemoryFootprintBytes(AffineForOp forOp,
int memorySpace) {
auto *forInst = forOp.getOperation();
return ::getMemoryFootprintBytes(
*forInst->getBlock(), Block::iterator(forInst),
std::next(Block::iterator(forInst)), memorySpace);
}
bool mlir::affine::isLoopParallelAndContainsReduction(AffineForOp forOp) {
SmallVector<LoopReduction> reductions;
if (!isLoopParallel(forOp, &reductions))
return false;
return !reductions.empty();
}
void mlir::affine::getSequentialLoops(
AffineForOp forOp, llvm::SmallDenseSet<Value, 8> *sequentialLoops) {
forOp->walk([&](Operation *op) {
if (auto innerFor = dyn_cast<AffineForOp>(op))
if (!isLoopParallel(innerFor))
sequentialLoops->insert(innerFor.getInductionVar());
});
}
IntegerSet mlir::affine::simplifyIntegerSet(IntegerSet set) {
FlatAffineValueConstraints fac(set);
if (fac.isEmpty())
return IntegerSet::getEmptySet(set.getNumDims(), set.getNumSymbols(),
set.getContext());
fac.removeTrivialRedundancy();
auto simplifiedSet = fac.getAsIntegerSet(set.getContext());
assert(simplifiedSet && "guaranteed to succeed while roundtripping");
return simplifiedSet;
}
static void unpackOptionalValues(ArrayRef<std::optional<Value>> source,
SmallVector<Value> &target) {
target =
llvm::to_vector<4>(llvm::map_range(source, [](std::optional<Value> val) {
return val.has_value() ? *val : Value();
}));
}
static LogicalResult alignAndAddBound(FlatAffineValueConstraints &constraints,
BoundType type, unsigned pos,
AffineMap map, ValueRange operands) {
SmallVector<Value> dims, syms, newSyms;
unpackOptionalValues(constraints.getMaybeValues(VarKind::SetDim), dims);
unpackOptionalValues(constraints.getMaybeValues(VarKind::Symbol), syms);
AffineMap alignedMap =
alignAffineMapWithValues(map, operands, dims, syms, &newSyms);
for (unsigned i = syms.size(); i < newSyms.size(); ++i)
constraints.appendSymbolVar(newSyms[i]);
return constraints.addBound(type, pos, alignedMap);
}
static AffineMap addConstToResults(AffineMap map, int64_t val) {
SmallVector<AffineExpr> newResults;
for (AffineExpr r : map.getResults())
newResults.push_back(r + val);
return AffineMap::get(map.getNumDims(), map.getNumSymbols(), newResults,
map.getContext());
}
FailureOr<AffineValueMap> mlir::affine::simplifyConstrainedMinMaxOp(
Operation *op, FlatAffineValueConstraints constraints) {
bool isMin = isa<AffineMinOp>(op);
assert((isMin || isa<AffineMaxOp>(op)) && "expect AffineMin/MaxOp");
MLIRContext *ctx = op->getContext();
Builder builder(ctx);
AffineMap map =
isMin ? cast<AffineMinOp>(op).getMap() : cast<AffineMaxOp>(op).getMap();
ValueRange operands = op->getOperands();
unsigned numResults = map.getNumResults();
unsigned dimOp = constraints.appendDimVar();
unsigned dimOpBound = constraints.appendDimVar();
unsigned resultDimStart = constraints.appendDimVar(numResults);
auto boundType = isMin ? BoundType::UB : BoundType::LB;
AffineMap mapLbUb = isMin ? addConstToResults(map, 1) : map;
if (failed(
alignAndAddBound(constraints, boundType, dimOp, mapLbUb, operands)))
return failure();
SmallVector<AffineMap> opLb(1), opUb(1);
constraints.getSliceBounds(dimOp, 1, ctx, &opLb, &opUb);
AffineMap sliceBound = isMin ? opUb[0] : opLb[0];
if (!sliceBound || sliceBound.getNumResults() != 1)
return failure();
AffineMap boundMap = isMin ? addConstToResults(sliceBound, -1) : sliceBound;
AffineMap alignedBoundMap = boundMap.shiftDims(1, dimOp);
if (failed(constraints.addBound(BoundType::EQ, dimOpBound, alignedBoundMap)))
return failure();
if (constraints.isEmpty())
return failure();
for (unsigned i = resultDimStart; i < resultDimStart + numResults; ++i) {
FlatAffineValueConstraints newConstr(constraints);
if (failed(alignAndAddBound(newConstr, BoundType::EQ, i,
map.getSubMap({i - resultDimStart}), operands)))
return failure();
SmallVector<int64_t> ineq(newConstr.getNumCols(), 0);
ineq[dimOpBound] = isMin ? 1 : -1;
ineq[i] = isMin ? -1 : 1;
ineq[newConstr.getNumCols() - 1] = -1;
newConstr.addInequality(ineq);
if (!newConstr.isEmpty())
return failure();
}
AffineMap newMap = alignedBoundMap;
SmallVector<Value> newOperands;
unpackOptionalValues(constraints.getMaybeValues(), newOperands);
for (int64_t i = 0, e = constraints.getNumDimAndSymbolVars(); i < e; ++i) {
if (!newOperands[i] || getConstantIntValue(newOperands[i]))
continue;
if (auto bound = constraints.getConstantBound64(BoundType::EQ, i)) {
AffineExpr expr =
i < newMap.getNumDims()
? builder.getAffineDimExpr(i)
: builder.getAffineSymbolExpr(i - newMap.getNumDims());
newMap = newMap.replace(expr, builder.getAffineConstantExpr(*bound),
newMap.getNumDims(), newMap.getNumSymbols());
}
}
affine::canonicalizeMapAndOperands(&newMap, &newOperands);
return AffineValueMap(newMap, newOperands);
}