#include "mlir/Dialect/GPU/Transforms/Passes.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
#include "mlir/Dialect/GPU/Transforms/ParallelLoopMapper.h"
#include "mlir/Dialect/SCF/IR/SCF.h"
#include "mlir/IR/AffineMap.h"
namespace mlir {
#define GEN_PASS_DEF_GPUMAPPARALLELLOOPSPASS
#include "mlir/Dialect/GPU/Transforms/Passes.h.inc"
}
namespace mlir {
using scf::ParallelOp;
StringRef gpu::getMappingAttrName() { return "mapping"; }
LogicalResult
gpu::setMappingAttr(ParallelOp ploopOp,
ArrayRef<ParallelLoopDimMappingAttr> mapping) {
llvm::DenseSet<gpu::Processor> specifiedMappings;
for (auto dimAttr : mapping) {
gpu::Processor processor = dimAttr.getProcessor();
if (processor != gpu::Processor::Sequential &&
specifiedMappings.count(processor))
return ploopOp.emitError(
"invalid mapping multiple loops to same processor");
specifiedMappings.insert(processor);
}
ArrayRef<Attribute> mappingAsAttrs(mapping.data(), mapping.size());
ploopOp->setAttr(getMappingAttrName(),
ArrayAttr::get(ploopOp.getContext(), mappingAsAttrs));
return success();
}
namespace gpu {
namespace {
enum MappingLevel { MapGrid = 0, MapBlock = 1, Sequential = 2 };
}
static constexpr int kNumHardwareIds = 3;
static MappingLevel &operator++(MappingLevel &mappingLevel) {
if (mappingLevel < Sequential) {
mappingLevel = static_cast<MappingLevel>(mappingLevel + 1);
}
return mappingLevel;
}
static Processor getHardwareIdForMapping(MappingLevel level, int dimension) {
if (dimension >= kNumHardwareIds || level == Sequential)
return Processor::Sequential;
switch (level) {
case MapGrid:
switch (dimension) {
case 0:
return Processor::BlockX;
case 1:
return Processor::BlockY;
case 2:
return Processor::BlockZ;
default:
return Processor::Sequential;
}
break;
case MapBlock:
switch (dimension) {
case 0:
return Processor::ThreadX;
case 1:
return Processor::ThreadY;
case 2:
return Processor::ThreadZ;
default:
return Processor::Sequential;
}
default:;
}
return Processor::Sequential;
}
static void mapParallelOp(ParallelOp parallelOp,
MappingLevel mappingLevel = MapGrid) {
if (parallelOp->getAttr(getMappingAttrName()) ||
((mappingLevel == MapGrid) && parallelOp->getParentOfType<ParallelOp>()))
return;
MLIRContext *ctx = parallelOp.getContext();
Builder b(ctx);
SmallVector<ParallelLoopDimMappingAttr, 4> attrs;
attrs.reserve(parallelOp.getNumLoops());
for (int i = 0, e = parallelOp.getNumLoops(); i < e; ++i) {
attrs.push_back(b.getAttr<ParallelLoopDimMappingAttr>(
getHardwareIdForMapping(mappingLevel, i), b.getDimIdentityMap(),
b.getDimIdentityMap()));
}
(void)setMappingAttr(parallelOp, attrs);
++mappingLevel;
for (Operation &op : *parallelOp.getBody()) {
if (ParallelOp nested = dyn_cast<ParallelOp>(op))
mapParallelOp(nested, mappingLevel);
}
}
namespace {
struct GpuMapParallelLoopsPass
: public impl::GpuMapParallelLoopsPassBase<GpuMapParallelLoopsPass> {
void runOnOperation() override {
for (Region ®ion : getOperation()->getRegions()) {
region.walk([](ParallelOp parallelOp) { mapParallelOp(parallelOp); });
}
}
};
}
}
}
std::unique_ptr<mlir::OperationPass<mlir::func::FuncOp>>
mlir::createGpuMapParallelLoopsPass() {
return std::make_unique<gpu::GpuMapParallelLoopsPass>();
}