#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
#include "mlir/Dialect/GPU/Transforms/Passes.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Dialect/SCF/IR/SCF.h"
#include "mlir/Dialect/Vector/IR/VectorOps.h"
#include "mlir/IR/Operation.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
#include "llvm/ADT/TypeSwitch.h"
#include "llvm/Support/Debug.h"
namespace mlir {
#define GEN_PASS_DEF_GPUELIMINATEBARRIERS
#include "mlir/Dialect/GPU/Transforms/Passes.h.inc"
}
using namespace mlir;
using namespace mlir::gpu;
#define DEBUG_TYPE "gpu-erase-barriers"
#define DEBUG_TYPE_ALIAS "gpu-erase-barries-alias"
#define DBGS() (llvm::dbgs() << '[' << DEBUG_TYPE << "] ")
#define DBGS_ALIAS() (llvm::dbgs() << '[' << DEBUG_TYPE_ALIAS << "] ")
static bool isKnownNoEffectsOpWithoutInterface(Operation *op) {
return isa<memref::AssumeAlignmentOp>(op);
}
static bool isParallelRegionBoundary(Operation *op) {
if (op->hasAttr("__parallel_region_boundary_for_test"))
return true;
return isa<GPUFuncOp, LaunchOp>(op);
}
static bool isSequentialLoopLike(Operation *op) { return isa<scf::ForOp>(op); }
static bool hasSingleExecutionBody(Operation *op) {
return isa<scf::IfOp, memref::AllocaScopeOp>(op);
}
static bool producesDistinctBase(Operation *op) {
return isa_and_nonnull<memref::AllocOp, memref::AllocaOp>(op);
}
static void addAllValuelessEffects(
SmallVectorImpl<MemoryEffects::EffectInstance> &effects) {
effects.emplace_back(MemoryEffects::Effect::get<MemoryEffects::Read>());
effects.emplace_back(MemoryEffects::Effect::get<MemoryEffects::Write>());
effects.emplace_back(MemoryEffects::Effect::get<MemoryEffects::Allocate>());
effects.emplace_back(MemoryEffects::Effect::get<MemoryEffects::Free>());
}
static bool
collectEffects(Operation *op,
SmallVectorImpl<MemoryEffects::EffectInstance> &effects,
bool ignoreBarriers = true) {
if (ignoreBarriers && isa<BarrierOp>(op))
return true;
if (isKnownNoEffectsOpWithoutInterface(op))
return true;
if (auto iface = dyn_cast<MemoryEffectOpInterface>(op)) {
SmallVector<MemoryEffects::EffectInstance> localEffects;
iface.getEffects(localEffects);
llvm::append_range(effects, localEffects);
return true;
}
if (op->hasTrait<OpTrait::HasRecursiveMemoryEffects>()) {
for (auto ®ion : op->getRegions()) {
for (auto &block : region) {
for (auto &innerOp : block)
if (!collectEffects(&innerOp, effects, ignoreBarriers))
return false;
}
}
return true;
}
addAllValuelessEffects(effects);
return false;
}
static bool
getEffectsBefore(Operation *op,
SmallVectorImpl<MemoryEffects::EffectInstance> &effects,
bool stopAtBarrier) {
if (!op->getBlock())
return true;
Region *region = op->getBlock()->getParent();
if (region && !llvm::hasSingleElement(region->getBlocks())) {
addAllValuelessEffects(effects);
return false;
}
if (op != &op->getBlock()->front()) {
for (Operation *it = op->getPrevNode(); it != nullptr;
it = it->getPrevNode()) {
if (isa<BarrierOp>(it)) {
if (stopAtBarrier)
return true;
else
continue;
}
if (!collectEffects(it, effects))
return false;
}
}
if (isParallelRegionBoundary(op->getParentOp()))
return true;
if (!getEffectsBefore(op->getParentOp(), effects, stopAtBarrier))
return false;
if (isSequentialLoopLike(op->getParentOp())) {
return getEffectsBefore(op->getBlock()->getTerminator(), effects,
true);
}
bool conservative = false;
if (!hasSingleExecutionBody(op->getParentOp()))
op->getParentOp()->walk([&](Operation *in) {
if (conservative)
return WalkResult::interrupt();
if (!collectEffects(in, effects)) {
conservative = true;
return WalkResult::interrupt();
}
return WalkResult::advance();
});
return !conservative;
}
static bool
getEffectsAfter(Operation *op,
SmallVectorImpl<MemoryEffects::EffectInstance> &effects,
bool stopAtBarrier) {
if (!op->getBlock())
return true;
Region *region = op->getBlock()->getParent();
if (region && !llvm::hasSingleElement(region->getBlocks())) {
addAllValuelessEffects(effects);
return false;
}
if (op != &op->getBlock()->back())
for (Operation *it = op->getNextNode(); it != nullptr;
it = it->getNextNode()) {
if (isa<BarrierOp>(it)) {
if (stopAtBarrier)
return true;
continue;
}
if (!collectEffects(it, effects))
return false;
}
if (isParallelRegionBoundary(op->getParentOp()))
return true;
if (!getEffectsAfter(op->getParentOp(), effects, stopAtBarrier))
return false;
if (isSequentialLoopLike(op->getParentOp())) {
if (isa<BarrierOp>(op->getBlock()->front()))
return true;
bool exact = collectEffects(&op->getBlock()->front(), effects);
return getEffectsAfter(&op->getBlock()->front(), effects,
true) &&
exact;
}
bool conservative = false;
if (!hasSingleExecutionBody(op->getParentOp()))
op->getParentOp()->walk([&](Operation *in) {
if (conservative)
return WalkResult::interrupt();
if (!collectEffects(in, effects)) {
conservative = true;
return WalkResult::interrupt();
}
return WalkResult::advance();
});
return !conservative;
}
static Value getBase(Value v) {
while (true) {
Operation *definingOp = v.getDefiningOp();
if (!definingOp)
break;
bool shouldContinue =
TypeSwitch<Operation *, bool>(v.getDefiningOp())
.Case<memref::CastOp, memref::SubViewOp, memref::ViewOp>(
[&](auto op) {
v = op.getSource();
return true;
})
.Case<memref::TransposeOp>([&](auto op) {
v = op.getIn();
return true;
})
.Case<memref::CollapseShapeOp, memref::ExpandShapeOp>([&](auto op) {
v = op.getSrc();
return true;
})
.Default([](Operation *) { return false; });
if (!shouldContinue)
break;
}
return v;
}
static bool isFunctionArgument(Value v) {
auto arg = dyn_cast<BlockArgument>(v);
return arg && isa<FunctionOpInterface>(arg.getOwner()->getParentOp());
}
static Value propagatesCapture(Operation *op) {
return llvm::TypeSwitch<Operation *, Value>(op)
.Case(
[](ViewLikeOpInterface viewLike) { return viewLike.getViewSource(); })
.Case([](CastOpInterface castLike) { return castLike->getOperand(0); })
.Case([](memref::TransposeOp transpose) { return transpose.getIn(); })
.Case<memref::ExpandShapeOp, memref::CollapseShapeOp>(
[](auto op) { return op.getSrc(); })
.Default([](Operation *) { return Value(); });
}
static std::optional<bool> getKnownCapturingStatus(Operation *op, Value v) {
return llvm::TypeSwitch<Operation *, std::optional<bool>>(op)
.Case<memref::StoreOp, vector::TransferWriteOp>(
[&](auto op) { return op.getValue() == v; })
.Case<vector::StoreOp, vector::MaskedStoreOp>(
[&](auto op) { return op.getValueToStore() == v; })
.Case([](memref::DeallocOp) { return false; })
.Default([](Operation *) { return std::nullopt; });
}
static bool maybeCaptured(Value v) {
SmallVector<Value> todo = {v};
while (!todo.empty()) {
Value v = todo.pop_back_val();
for (Operation *user : v.getUsers()) {
auto iface = dyn_cast<MemoryEffectOpInterface>(user);
if (iface) {
SmallVector<MemoryEffects::EffectInstance> effects;
iface.getEffects(effects);
if (llvm::all_of(effects,
[](const MemoryEffects::EffectInstance &effect) {
return isa<MemoryEffects::Read>(effect.getEffect());
})) {
continue;
}
}
if (Value v = propagatesCapture(user)) {
todo.push_back(v);
continue;
}
std::optional<bool> knownCaptureStatus = getKnownCapturingStatus(user, v);
if (!knownCaptureStatus || *knownCaptureStatus)
return true;
}
}
return false;
}
static bool mayAlias(Value first, Value second) {
DEBUG_WITH_TYPE(DEBUG_TYPE_ALIAS, {
DBGS_ALIAS() << "checking aliasing between ";
DBGS_ALIAS() << first << "\n";
DBGS_ALIAS() << " and ";
DBGS_ALIAS() << second << "\n";
});
first = getBase(first);
second = getBase(second);
DEBUG_WITH_TYPE(DEBUG_TYPE_ALIAS, {
DBGS_ALIAS() << "base ";
DBGS_ALIAS() << first << "\n";
DBGS_ALIAS() << " and ";
DBGS_ALIAS() << second << "\n";
});
if (first == second) {
DEBUG_WITH_TYPE(DEBUG_TYPE_ALIAS, DBGS_ALIAS() << "-> do alias!\n");
return true;
}
if (auto globFirst = first.getDefiningOp<memref::GetGlobalOp>()) {
if (auto globSecond = second.getDefiningOp<memref::GetGlobalOp>()) {
return globFirst.getNameAttr() == globSecond.getNameAttr();
}
}
auto isNoaliasFuncArgument = [](Value value) {
auto bbArg = dyn_cast<BlockArgument>(value);
if (!bbArg)
return false;
auto iface = dyn_cast<FunctionOpInterface>(bbArg.getOwner()->getParentOp());
if (!iface)
return false;
return iface.getArgAttr(bbArg.getArgNumber(), "llvm.noalias") != nullptr;
};
if (isNoaliasFuncArgument(first) && isNoaliasFuncArgument(second))
return false;
bool isDistinct[] = {producesDistinctBase(first.getDefiningOp()),
producesDistinctBase(second.getDefiningOp())};
bool isGlobal[] = {first.getDefiningOp<memref::GetGlobalOp>() != nullptr,
second.getDefiningOp<memref::GetGlobalOp>() != nullptr};
if ((isDistinct[0] || isGlobal[0]) && (isDistinct[1] || isGlobal[1]))
return false;
bool isArg[] = {isFunctionArgument(first), isFunctionArgument(second)};
if ((isDistinct[0] && isArg[1]) || (isDistinct[1] && isArg[0]))
return false;
if (isDistinct[0] && !maybeCaptured(first))
return false;
if (isDistinct[1] && !maybeCaptured(second))
return false;
DEBUG_WITH_TYPE(DEBUG_TYPE_ALIAS, DBGS_ALIAS() << "-> may alias!\n");
return true;
}
static bool mayAlias(MemoryEffects::EffectInstance a, Value v2) {
if (Value v = a.getValue()) {
return mayAlias(v, v2);
}
return true;
}
static bool mayAlias(MemoryEffects::EffectInstance a,
MemoryEffects::EffectInstance b) {
if (a.getResource()->getResourceID() != b.getResource()->getResourceID())
return false;
if (Value v2 = b.getValue()) {
return mayAlias(a, v2);
} else if (Value v = a.getValue()) {
return mayAlias(b, v);
}
return true;
}
static bool
haveConflictingEffects(ArrayRef<MemoryEffects::EffectInstance> beforeEffects,
ArrayRef<MemoryEffects::EffectInstance> afterEffects) {
for (const MemoryEffects::EffectInstance &before : beforeEffects) {
for (const MemoryEffects::EffectInstance &after : afterEffects) {
if (!mayAlias(before, after))
continue;
if (isa<MemoryEffects::Read>(before.getEffect()) &&
isa<MemoryEffects::Read>(after.getEffect())) {
continue;
}
if (isa<MemoryEffects::Allocate>(before.getEffect()) ||
isa<MemoryEffects::Allocate>(after.getEffect())) {
continue;
}
if (isa<MemoryEffects::Free>(before.getEffect()))
continue;
LLVM_DEBUG(
DBGS() << "found a conflict between (before): " << before.getValue()
<< " read:" << isa<MemoryEffects::Read>(before.getEffect())
<< " write:" << isa<MemoryEffects::Write>(before.getEffect())
<< " alloc:"
<< isa<MemoryEffects::Allocate>(before.getEffect()) << " free:"
<< isa<MemoryEffects::Free>(before.getEffect()) << "\n");
LLVM_DEBUG(
DBGS() << "and (after): " << after.getValue()
<< " read:" << isa<MemoryEffects::Read>(after.getEffect())
<< " write:" << isa<MemoryEffects::Write>(after.getEffect())
<< " alloc:" << isa<MemoryEffects::Allocate>(after.getEffect())
<< " free:" << isa<MemoryEffects::Free>(after.getEffect())
<< "\n");
return true;
}
}
return false;
}
namespace {
class BarrierElimination final : public OpRewritePattern<BarrierOp> {
public:
using OpRewritePattern<BarrierOp>::OpRewritePattern;
LogicalResult matchAndRewrite(BarrierOp barrier,
PatternRewriter &rewriter) const override {
LLVM_DEBUG(DBGS() << "checking the necessity of: " << barrier << " "
<< barrier.getLoc() << "\n");
SmallVector<MemoryEffects::EffectInstance> beforeEffects;
getEffectsBefore(barrier, beforeEffects, true);
SmallVector<MemoryEffects::EffectInstance> afterEffects;
getEffectsAfter(barrier, afterEffects, true);
if (!haveConflictingEffects(beforeEffects, afterEffects)) {
LLVM_DEBUG(DBGS() << "the surrounding barriers are sufficient, removing "
<< barrier << "\n");
rewriter.eraseOp(barrier);
return success();
}
LLVM_DEBUG(DBGS() << "barrier is necessary: " << barrier << " "
<< barrier.getLoc() << "\n");
return failure();
}
};
class GpuEliminateBarriersPass
: public impl::GpuEliminateBarriersBase<GpuEliminateBarriersPass> {
void runOnOperation() override {
auto funcOp = getOperation();
RewritePatternSet patterns(&getContext());
mlir::populateGpuEliminateBarriersPatterns(patterns);
if (failed(applyPatternsAndFoldGreedily(funcOp, std::move(patterns)))) {
return signalPassFailure();
}
}
};
}
void mlir::populateGpuEliminateBarriersPatterns(RewritePatternSet &patterns) {
patterns.insert<BarrierElimination>(patterns.getContext());
}