#include "mlir/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.h"
#include "mlir/Dialect/LLVMIR/ROCDLDialect.h"
#include "mlir/IR/Operation.h"
#include "mlir/Target/LLVMIR/ModuleTranslation.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/Support/raw_ostream.h"
using namespace mlir;
using namespace mlir::LLVM;
using mlir::LLVM::detail::createIntrinsicCall;
static llvm::Value *createDeviceFunctionCall(llvm::IRBuilderBase &builder,
StringRef fnName, int parameter) {
llvm::Module *module = builder.GetInsertBlock()->getModule();
llvm::FunctionType *functionType = llvm::FunctionType::get(
llvm::Type::getInt64Ty(module->getContext()),
llvm::Type::getInt32Ty(module->getContext()),
false);
llvm::Function *fn = dyn_cast<llvm::Function>(
module->getOrInsertFunction(fnName, functionType).getCallee());
llvm::Value *fnOp0 = llvm::ConstantInt::get(
llvm::Type::getInt32Ty(module->getContext()), parameter);
return builder.CreateCall(fn, ArrayRef<llvm::Value *>(fnOp0));
}
namespace {
class ROCDLDialectLLVMIRTranslationInterface
: public LLVMTranslationDialectInterface {
public:
using LLVMTranslationDialectInterface::LLVMTranslationDialectInterface;
LogicalResult
convertOperation(Operation *op, llvm::IRBuilderBase &builder,
LLVM::ModuleTranslation &moduleTranslation) const final {
Operation &opInst = *op;
#include "mlir/Dialect/LLVMIR/ROCDLConversions.inc"
return failure();
}
LogicalResult
amendOperation(Operation *op, NamedAttribute attribute,
LLVM::ModuleTranslation &moduleTranslation) const final {
if (attribute.getName() == ROCDL::ROCDLDialect::getKernelFuncAttrName()) {
auto func = dyn_cast<LLVM::LLVMFuncOp>(op);
if (!func)
return failure();
llvm::Function *llvmFunc =
moduleTranslation.lookupFunction(func.getName());
llvmFunc->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
if (!llvmFunc->hasFnAttribute("amdgpu-flat-work-group-size")) {
llvmFunc->addFnAttr("amdgpu-flat-work-group-size", "1, 256");
}
llvmFunc->addFnAttr("amdgpu-implicitarg-num-bytes", "56");
}
if ("rocdl.max_flat_work_group_size" == attribute.getName()) {
auto func = dyn_cast<LLVM::LLVMFuncOp>(op);
if (!func)
return failure();
auto value = attribute.getValue().dyn_cast<IntegerAttr>();
if (!value)
return failure();
llvm::Function *llvmFunc =
moduleTranslation.lookupFunction(func.getName());
llvm::SmallString<8> llvmAttrValue;
llvm::raw_svector_ostream attrValueStream(llvmAttrValue);
attrValueStream << "1, " << value.getInt();
llvmFunc->addFnAttr("amdgpu-flat-work-group-size", llvmAttrValue);
}
return success();
}
};
}
void mlir::registerROCDLDialectTranslation(DialectRegistry ®istry) {
registry.insert<ROCDL::ROCDLDialect>();
registry.addExtension(+[](MLIRContext *ctx, ROCDL::ROCDLDialect *dialect) {
dialect->addInterfaces<ROCDLDialectLLVMIRTranslationInterface>();
});
}
void mlir::registerROCDLDialectTranslation(MLIRContext &context) {
DialectRegistry registry;
registerROCDLDialectTranslation(registry);
context.appendDialectRegistry(registry);
}