#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/Linalg/IR/Linalg.h"
#include "mlir/Dialect/Linalg/Passes.h"
#include "mlir/Dialect/Linalg/Transforms/Transforms.h"
#include "mlir/Dialect/Linalg/Utils/Utils.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Dialect/Tensor/IR/Tensor.h"
#include "mlir/IR/AffineExpr.h"
#include "mlir/IR/AffineMap.h"
#include "mlir/IR/Dominance.h"
#include "mlir/Support/LLVM.h"
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
#include "mlir/Transforms/RegionUtils.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include <optional>
#include <set>
#define DEBUG_TYPE "linalg-fusion"
using namespace mlir;
using namespace mlir::linalg;
struct ShapeDimension {
Value shape;
unsigned dimension;
};
static ShapeDimension
getShapeDefiningLoopRange(LinalgOp op, unsigned loopDepth,
bool fromSubViewOpOnly = false) {
for (OpOperand &opOperand : op->getOpOperands()) {
if (fromSubViewOpOnly &&
!isa_and_nonnull<memref::SubViewOp, tensor::ExtractSliceOp>(
opOperand.get().getDefiningOp()))
continue;
AffineMap map = op.getMatchingIndexingMap(&opOperand);
LLVM_DEBUG(llvm::dbgs() << "getShapeDefiningLoopRange I/O idx: "
<< opOperand.getOperandNumber() << "\n");
LLVM_DEBUG(llvm::dbgs()
<< "getShapeDefiningLoopRange map: " << map << "\n");
SmallVector<Value, 8> shapeRanges(map.getNumResults(), nullptr);
for (const auto &en : llvm::enumerate(map.getResults())) {
auto dimExpr = dyn_cast<AffineDimExpr>(en.value());
if (!dimExpr)
continue;
if (loopDepth == cast<AffineDimExpr>(en.value()).getPosition()) {
LLVM_DEBUG(llvm::dbgs() << "getShapeDefiningLoopRange loopDepth: "
<< loopDepth << "\n");
LLVM_DEBUG(llvm::dbgs() << "getShapeDefiningLoopRange shape: "
<< opOperand.get() << "\n");
return ShapeDimension{opOperand.get(),
static_cast<unsigned>(en.index())};
}
}
}
llvm_unreachable("Expect to be able to extract a shape defining loop range");
}
static SmallVector<Value> getTiledOperands(LinalgOp producer) {
return producer->getOperands();
}
static LinalgOp fuse(OpBuilder &b, LinalgOp producer,
const DenseMap<unsigned, Range> &fusedLoopsAndRanges) {
SmallVector<OpFoldResult> ivs, tileSizes, sizeBounds;
SmallVector<Range> loopRanges;
Location loc = producer.getLoc();
for (unsigned i = 0, e = producer.getNumLoops(); i < e; ++i) {
auto shapeDim = getShapeDefiningLoopRange(producer, i);
OpFoldResult dim =
createFoldedDimOp(b, loc, shapeDim.shape, shapeDim.dimension);
sizeBounds.push_back(dim);
auto it = fusedLoopsAndRanges.find(i);
if (it != fusedLoopsAndRanges.end()) {
ivs.push_back(it->second.offset);
tileSizes.push_back(it->second.size);
loopRanges.push_back(it->second);
LLVM_DEBUG(llvm::dbgs() << "tiled loop#" << i << " with LoopRange "
<< loopRanges.back() << "\n");
} else {
tileSizes.push_back(b.getIndexAttr(0));
loopRanges.push_back(Range{b.getIndexAttr(0), dim, b.getIndexAttr(1)});
LLVM_DEBUG(llvm::dbgs() << "full loop#" << i << " with LoopRange "
<< loopRanges.back() << "\n");
}
}
SmallVector<Value, 8> clonedShapes;
clonedShapes.reserve(producer->getNumOperands());
clonedShapes.append(makeTiledShapes(
b, loc, producer, getTiledOperands(producer), ivs, tileSizes, sizeBounds,
false));
MutableOperandRange producerDpsInits = producer.getDpsInitsMutable();
SmallVector<Type, 4> resultTypes;
resultTypes.reserve(producer->getNumResults());
int64_t firstInitOperandIdx =
producerDpsInits.getAsOperandRange().getBeginOperandIndex();
for (int64_t i = 0, e = producer->getNumResults(); i < e; ++i) {
resultTypes.push_back(clonedShapes[firstInitOperandIdx + i].getType());
}
LinalgOp clonedOp = clone(b, producer, resultTypes, clonedShapes);
SmallVector<OpFoldResult> allIvs = llvm::to_vector(
llvm::map_range(loopRanges, [&](Range range) { return range.offset; }));
offsetIndices(b, clonedOp, allIvs);
return clonedOp;
}
static Range getRangeFromOperandShape(OpBuilder &b, Location loc,
Value shapedOperand, unsigned dim) {
Operation *shapeProducingOp = shapedOperand.getDefiningOp();
if (auto subViewOp = dyn_cast<memref::SubViewOp>(shapeProducingOp))
return subViewOp.getOrCreateRanges(b, loc)[dim];
if (auto sliceOp = dyn_cast<tensor::ExtractSliceOp>(shapeProducingOp))
return sliceOp.getOrCreateRanges(b, loc)[dim];
llvm_unreachable("SubviewOp or ExtractSliceOp expected");
}
static LinalgOp fuse(OpBuilder &b, LinalgOp producerOp, AffineMap producerMap,
OpOperand &consumerOpOperand) {
LLVM_DEBUG(llvm::dbgs() << "Producer map: " << producerMap << "\n");
DenseMap<unsigned, Range> fusedLoopsAndRanges;
Value shapedOperand = consumerOpOperand.get();
for (const auto &en : llvm::enumerate(producerMap.getResults())) {
unsigned posInProducerLoop = cast<AffineDimExpr>(en.value()).getPosition();
fusedLoopsAndRanges[posInProducerLoop] = getRangeFromOperandShape(
b, consumerOpOperand.getOwner()->getLoc(), shapedOperand, en.index());
}
return fuse(b, producerOp, fusedLoopsAndRanges);
}
static void getProducerOfTensor(Value tensor, OpResult &opResult) {
if (!isa<RankedTensorType>(tensor.getType()))
return;
while (true) {
LLVM_DEBUG(llvm::dbgs() << "\ngetProducerOfTensor: " << tensor);
if (auto linalgOp = tensor.getDefiningOp<LinalgOp>()) {
opResult = cast<OpResult>(tensor);
return;
}
if (auto sliceOp = tensor.getDefiningOp<tensor::ExtractSliceOp>()) {
tensor = sliceOp.getSource();
continue;
}
if (auto blockArg = dyn_cast<BlockArgument>(tensor)) {
if (auto forOp = blockArg.getDefiningOp<scf::ForOp>()) {
tensor = forOp.getInitArgs()[blockArg.getArgNumber()];
continue;
}
}
return;
}
}
FailureOr<FusionInfo>
mlir::linalg::fuseProducerOfTensor(OpBuilder &b, OpOperand &consumerOpOperand) {
Value inputTensor = consumerOpOperand.get();
OpResult producerOpResult;
getProducerOfTensor(inputTensor, producerOpResult);
if (!producerOpResult) {
LLVM_DEBUG(llvm::dbgs() << "\nUnable to find producer");
return failure();
}
return fuseProducerOfTensor(b, producerOpResult, consumerOpOperand);
}
FailureOr<FusionInfo>
mlir::linalg::fuseProducerOfTensor(OpBuilder &b, OpResult producerOpResult,
OpOperand &consumerOpOperand) {
auto producerOp = dyn_cast<LinalgOp>(producerOpResult.getOwner());
if (!producerOp)
return failure();
LinalgOp consumerOp = dyn_cast<LinalgOp>(consumerOpOperand.getOwner());
if (!consumerOp)
return failure();
Value inputTensor = consumerOpOperand.get();
auto sliceOp = inputTensor.getDefiningOp<tensor::ExtractSliceOp>();
if (!sliceOp) {
LLVM_DEBUG(llvm::dbgs()
<< "\nNot fusable, not an extract_slice op: " << inputTensor);
return failure();
}
if (consumerOpOperand.get().getParentBlock() ==
producerOpResult.getParentBlock())
return failure();
OpBuilder::InsertionGuard g(b);
b.setInsertionPoint(consumerOp);
LLVM_DEBUG(llvm::dbgs() << "Fuse into consumer: " << *consumerOp << "\n");
OpOperand *opOperand =
producerOp.getDpsInitOperand(producerOpResult.getResultNumber());
LinalgOp fusedProducer =
fuse(b, producerOp, producerOp.getMatchingIndexingMap(opOperand),
consumerOpOperand);
Value def = fusedProducer->getResult(producerOpResult.getResultNumber());
Type consumerType = consumerOpOperand.get().getType();
if (consumerType != def.getType())
def = b.create<tensor::CastOp>(fusedProducer.getLoc(), consumerType, def);
consumerOpOperand.set(def);
return FusionInfo{cast<LinalgOp>(producerOpResult.getOwner()), fusedProducer};
}