#include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h"
#include "mlir/Analysis/DataLayoutAnalysis.h"
#include "mlir/Conversion/ConvertToLLVM/ToLLVMInterface.h"
#include "mlir/Conversion/LLVMCommon/ConversionTarget.h"
#include "mlir/Conversion/LLVMCommon/Pattern.h"
#include "mlir/Conversion/LLVMCommon/TypeConverter.h"
#include "mlir/Conversion/MemRefToLLVM/AllocLikeConversion.h"
#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Dialect/LLVMIR/FunctionCallUtils.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/Dialect/LLVMIR/LLVMTypes.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Dialect/MemRef/Utils/MemRefUtils.h"
#include "mlir/IR/AffineMap.h"
#include "mlir/IR/BuiltinTypes.h"
#include "mlir/IR/IRMapping.h"
#include "mlir/Pass/Pass.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/Support/MathExtras.h"
#include <optional>
namespace mlir {
#define GEN_PASS_DEF_FINALIZEMEMREFTOLLVMCONVERSIONPASS
#include "mlir/Conversion/Passes.h.inc"
}
using namespace mlir;
namespace {
bool isStaticStrideOrOffset(int64_t strideOrOffset) {
return !ShapedType::isDynamic(strideOrOffset);
}
LLVM::LLVMFuncOp getFreeFn(const LLVMTypeConverter *typeConverter,
ModuleOp module) {
bool useGenericFn = typeConverter->getOptions().useGenericFunctions;
if (useGenericFn)
return LLVM::lookupOrCreateGenericFreeFn(module);
return LLVM::lookupOrCreateFreeFn(module);
}
struct AllocOpLowering : public AllocLikeOpLLVMLowering {
AllocOpLowering(const LLVMTypeConverter &converter)
: AllocLikeOpLLVMLowering(memref::AllocOp::getOperationName(),
converter) {}
std::tuple<Value, Value> allocateBuffer(ConversionPatternRewriter &rewriter,
Location loc, Value sizeBytes,
Operation *op) const override {
return allocateBufferManuallyAlign(
rewriter, loc, sizeBytes, op,
getAlignment(rewriter, loc, cast<memref::AllocOp>(op)));
}
};
struct AlignedAllocOpLowering : public AllocLikeOpLLVMLowering {
AlignedAllocOpLowering(const LLVMTypeConverter &converter)
: AllocLikeOpLLVMLowering(memref::AllocOp::getOperationName(),
converter) {}
std::tuple<Value, Value> allocateBuffer(ConversionPatternRewriter &rewriter,
Location loc, Value sizeBytes,
Operation *op) const override {
Value ptr = allocateBufferAutoAlign(
rewriter, loc, sizeBytes, op, &defaultLayout,
alignedAllocationGetAlignment(rewriter, loc, cast<memref::AllocOp>(op),
&defaultLayout));
if (!ptr)
return std::make_tuple(Value(), Value());
return std::make_tuple(ptr, ptr);
}
private:
DataLayout defaultLayout;
};
struct AllocaOpLowering : public AllocLikeOpLLVMLowering {
AllocaOpLowering(const LLVMTypeConverter &converter)
: AllocLikeOpLLVMLowering(memref::AllocaOp::getOperationName(),
converter) {
setRequiresNumElements();
}
std::tuple<Value, Value> allocateBuffer(ConversionPatternRewriter &rewriter,
Location loc, Value size,
Operation *op) const override {
auto allocaOp = cast<memref::AllocaOp>(op);
auto elementType =
typeConverter->convertType(allocaOp.getType().getElementType());
unsigned addrSpace =
*getTypeConverter()->getMemRefAddressSpace(allocaOp.getType());
auto elementPtrType =
LLVM::LLVMPointerType::get(rewriter.getContext(), addrSpace);
auto allocatedElementPtr =
rewriter.create<LLVM::AllocaOp>(loc, elementPtrType, elementType, size,
allocaOp.getAlignment().value_or(0));
return std::make_tuple(allocatedElementPtr, allocatedElementPtr);
}
};
struct AllocaScopeOpLowering
: public ConvertOpToLLVMPattern<memref::AllocaScopeOp> {
using ConvertOpToLLVMPattern<memref::AllocaScopeOp>::ConvertOpToLLVMPattern;
LogicalResult
matchAndRewrite(memref::AllocaScopeOp allocaScopeOp, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override {
OpBuilder::InsertionGuard guard(rewriter);
Location loc = allocaScopeOp.getLoc();
auto *currentBlock = rewriter.getInsertionBlock();
auto *remainingOpsBlock =
rewriter.splitBlock(currentBlock, rewriter.getInsertionPoint());
Block *continueBlock;
if (allocaScopeOp.getNumResults() == 0) {
continueBlock = remainingOpsBlock;
} else {
continueBlock = rewriter.createBlock(
remainingOpsBlock, allocaScopeOp.getResultTypes(),
SmallVector<Location>(allocaScopeOp->getNumResults(),
allocaScopeOp.getLoc()));
rewriter.create<LLVM::BrOp>(loc, ValueRange(), remainingOpsBlock);
}
Block *beforeBody = &allocaScopeOp.getBodyRegion().front();
Block *afterBody = &allocaScopeOp.getBodyRegion().back();
rewriter.inlineRegionBefore(allocaScopeOp.getBodyRegion(), continueBlock);
rewriter.setInsertionPointToEnd(currentBlock);
auto stackSaveOp =
rewriter.create<LLVM::StackSaveOp>(loc, getVoidPtrType());
rewriter.create<LLVM::BrOp>(loc, ValueRange(), beforeBody);
rewriter.setInsertionPointToEnd(afterBody);
auto returnOp =
cast<memref::AllocaScopeReturnOp>(afterBody->getTerminator());
auto branchOp = rewriter.replaceOpWithNewOp<LLVM::BrOp>(
returnOp, returnOp.getResults(), continueBlock);
rewriter.setInsertionPoint(branchOp);
rewriter.create<LLVM::StackRestoreOp>(loc, stackSaveOp);
rewriter.replaceOp(allocaScopeOp, continueBlock->getArguments());
return success();
}
};
struct AssumeAlignmentOpLowering
: public ConvertOpToLLVMPattern<memref::AssumeAlignmentOp> {
using ConvertOpToLLVMPattern<
memref::AssumeAlignmentOp>::ConvertOpToLLVMPattern;
explicit AssumeAlignmentOpLowering(const LLVMTypeConverter &converter)
: ConvertOpToLLVMPattern<memref::AssumeAlignmentOp>(converter) {}
LogicalResult
matchAndRewrite(memref::AssumeAlignmentOp op, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override {
Value memref = adaptor.getMemref();
unsigned alignment = op.getAlignment();
auto loc = op.getLoc();
auto srcMemRefType = cast<MemRefType>(op.getMemref().getType());
Value ptr = getStridedElementPtr(loc, srcMemRefType, memref, {},
rewriter);
MemRefDescriptor memRefDescriptor(memref);
auto intPtrType =
getIntPtrType(memRefDescriptor.getElementPtrType().getAddressSpace());
Value zero = createIndexAttrConstant(rewriter, loc, intPtrType, 0);
Value mask =
createIndexAttrConstant(rewriter, loc, intPtrType, alignment - 1);
Value ptrValue = rewriter.create<LLVM::PtrToIntOp>(loc, intPtrType, ptr);
rewriter.create<LLVM::AssumeOp>(
loc, rewriter.create<LLVM::ICmpOp>(
loc, LLVM::ICmpPredicate::eq,
rewriter.create<LLVM::AndOp>(loc, ptrValue, mask), zero));
rewriter.eraseOp(op);
return success();
}
};
struct DeallocOpLowering : public ConvertOpToLLVMPattern<memref::DeallocOp> {
using ConvertOpToLLVMPattern<memref::DeallocOp>::ConvertOpToLLVMPattern;
explicit DeallocOpLowering(const LLVMTypeConverter &converter)
: ConvertOpToLLVMPattern<memref::DeallocOp>(converter) {}
LogicalResult
matchAndRewrite(memref::DeallocOp op, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override {
LLVM::LLVMFuncOp freeFunc =
getFreeFn(getTypeConverter(), op->getParentOfType<ModuleOp>());
Value allocatedPtr;
if (auto unrankedTy =
llvm::dyn_cast<UnrankedMemRefType>(op.getMemref().getType())) {
auto elementPtrTy = LLVM::LLVMPointerType::get(
rewriter.getContext(), unrankedTy.getMemorySpaceAsInt());
allocatedPtr = UnrankedMemRefDescriptor::allocatedPtr(
rewriter, op.getLoc(),
UnrankedMemRefDescriptor(adaptor.getMemref())
.memRefDescPtr(rewriter, op.getLoc()),
elementPtrTy);
} else {
allocatedPtr = MemRefDescriptor(adaptor.getMemref())
.allocatedPtr(rewriter, op.getLoc());
}
rewriter.replaceOpWithNewOp<LLVM::CallOp>(op, freeFunc, allocatedPtr);
return success();
}
};
struct DimOpLowering : public ConvertOpToLLVMPattern<memref::DimOp> {
using ConvertOpToLLVMPattern<memref::DimOp>::ConvertOpToLLVMPattern;
LogicalResult
matchAndRewrite(memref::DimOp dimOp, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override {
Type operandType = dimOp.getSource().getType();
if (isa<UnrankedMemRefType>(operandType)) {
FailureOr<Value> extractedSize = extractSizeOfUnrankedMemRef(
operandType, dimOp, adaptor.getOperands(), rewriter);
if (failed(extractedSize))
return failure();
rewriter.replaceOp(dimOp, {*extractedSize});
return success();
}
if (isa<MemRefType>(operandType)) {
rewriter.replaceOp(
dimOp, {extractSizeOfRankedMemRef(operandType, dimOp,
adaptor.getOperands(), rewriter)});
return success();
}
llvm_unreachable("expected MemRefType or UnrankedMemRefType");
}
private:
FailureOr<Value>
extractSizeOfUnrankedMemRef(Type operandType, memref::DimOp dimOp,
OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const {
Location loc = dimOp.getLoc();
auto unrankedMemRefType = cast<UnrankedMemRefType>(operandType);
auto scalarMemRefType =
MemRefType::get({}, unrankedMemRefType.getElementType());
FailureOr<unsigned> maybeAddressSpace =
getTypeConverter()->getMemRefAddressSpace(unrankedMemRefType);
if (failed(maybeAddressSpace)) {
dimOp.emitOpError("memref memory space must be convertible to an integer "
"address space");
return failure();
}
unsigned addressSpace = *maybeAddressSpace;
UnrankedMemRefDescriptor unrankedDesc(adaptor.getSource());
Value underlyingRankedDesc = unrankedDesc.memRefDescPtr(rewriter, loc);
Type elementType = typeConverter->convertType(scalarMemRefType);
auto indexPtrTy =
LLVM::LLVMPointerType::get(rewriter.getContext(), addressSpace);
Value offsetPtr = rewriter.create<LLVM::GEPOp>(
loc, indexPtrTy, elementType, underlyingRankedDesc,
ArrayRef<LLVM::GEPArg>{0, 2});
Value idxPlusOne = rewriter.create<LLVM::AddOp>(
loc, createIndexAttrConstant(rewriter, loc, getIndexType(), 1),
adaptor.getIndex());
Value sizePtr = rewriter.create<LLVM::GEPOp>(
loc, indexPtrTy, getTypeConverter()->getIndexType(), offsetPtr,
idxPlusOne);
return rewriter
.create<LLVM::LoadOp>(loc, getTypeConverter()->getIndexType(), sizePtr)
.getResult();
}
std::optional<int64_t> getConstantDimIndex(memref::DimOp dimOp) const {
if (auto idx = dimOp.getConstantIndex())
return idx;
if (auto constantOp = dimOp.getIndex().getDefiningOp<LLVM::ConstantOp>())
return cast<IntegerAttr>(constantOp.getValue()).getValue().getSExtValue();
return std::nullopt;
}
Value extractSizeOfRankedMemRef(Type operandType, memref::DimOp dimOp,
OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const {
Location loc = dimOp.getLoc();
MemRefType memRefType = cast<MemRefType>(operandType);
Type indexType = getIndexType();
if (std::optional<int64_t> index = getConstantDimIndex(dimOp)) {
int64_t i = *index;
if (i >= 0 && i < memRefType.getRank()) {
if (memRefType.isDynamicDim(i)) {
MemRefDescriptor descriptor(adaptor.getSource());
return descriptor.size(rewriter, loc, i);
}
int64_t dimSize = memRefType.getDimSize(i);
return createIndexAttrConstant(rewriter, loc, indexType, dimSize);
}
}
Value index = adaptor.getIndex();
int64_t rank = memRefType.getRank();
MemRefDescriptor memrefDescriptor(adaptor.getSource());
return memrefDescriptor.size(rewriter, loc, index, rank);
}
};
template <typename Derived>
struct LoadStoreOpLowering : public ConvertOpToLLVMPattern<Derived> {
using ConvertOpToLLVMPattern<Derived>::ConvertOpToLLVMPattern;
using ConvertOpToLLVMPattern<Derived>::isConvertibleAndHasIdentityMaps;
using Base = LoadStoreOpLowering<Derived>;
LogicalResult match(Derived op) const override {
MemRefType type = op.getMemRefType();
return isConvertibleAndHasIdentityMaps(type) ? success() : failure();
}
};
struct GenericAtomicRMWOpLowering
: public LoadStoreOpLowering<memref::GenericAtomicRMWOp> {
using Base::Base;
LogicalResult
matchAndRewrite(memref::GenericAtomicRMWOp atomicOp, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override {
auto loc = atomicOp.getLoc();
Type valueType = typeConverter->convertType(atomicOp.getResult().getType());
auto *initBlock = rewriter.getInsertionBlock();
auto *loopBlock = rewriter.splitBlock(initBlock, Block::iterator(atomicOp));
loopBlock->addArgument(valueType, loc);
auto *endBlock =
rewriter.splitBlock(loopBlock, Block::iterator(atomicOp)++);
rewriter.setInsertionPointToEnd(initBlock);
auto memRefType = cast<MemRefType>(atomicOp.getMemref().getType());
auto dataPtr = getStridedElementPtr(loc, memRefType, adaptor.getMemref(),
adaptor.getIndices(), rewriter);
Value init = rewriter.create<LLVM::LoadOp>(
loc, typeConverter->convertType(memRefType.getElementType()), dataPtr);
rewriter.create<LLVM::BrOp>(loc, init, loopBlock);
rewriter.setInsertionPointToStart(loopBlock);
auto loopArgument = loopBlock->getArgument(0);
IRMapping mapping;
mapping.map(atomicOp.getCurrentValue(), loopArgument);
Block &entryBlock = atomicOp.body().front();
for (auto &nestedOp : entryBlock.without_terminator()) {
Operation *clone = rewriter.clone(nestedOp, mapping);
mapping.map(nestedOp.getResults(), clone->getResults());
}
Value result = mapping.lookup(entryBlock.getTerminator()->getOperand(0));
auto successOrdering = LLVM::AtomicOrdering::acq_rel;
auto failureOrdering = LLVM::AtomicOrdering::monotonic;
auto cmpxchg = rewriter.create<LLVM::AtomicCmpXchgOp>(
loc, dataPtr, loopArgument, result, successOrdering, failureOrdering);
Value newLoaded = rewriter.create<LLVM::ExtractValueOp>(loc, cmpxchg, 0);
Value ok = rewriter.create<LLVM::ExtractValueOp>(loc, cmpxchg, 1);
rewriter.create<LLVM::CondBrOp>(loc, ok, endBlock, ArrayRef<Value>(),
loopBlock, newLoaded);
rewriter.setInsertionPointToEnd(endBlock);
rewriter.replaceOp(atomicOp, {newLoaded});
return success();
}
};
static Type
convertGlobalMemrefTypeToLLVM(MemRefType type,
const LLVMTypeConverter &typeConverter) {
Type elementType = typeConverter.convertType(type.getElementType());
Type arrayTy = elementType;
for (int64_t dim : llvm::reverse(type.getShape()))
arrayTy = LLVM::LLVMArrayType::get(arrayTy, dim);
return arrayTy;
}
struct GlobalMemrefOpLowering
: public ConvertOpToLLVMPattern<memref::GlobalOp> {
using ConvertOpToLLVMPattern<memref::GlobalOp>::ConvertOpToLLVMPattern;
LogicalResult
matchAndRewrite(memref::GlobalOp global, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override {
MemRefType type = global.getType();
if (!isConvertibleAndHasIdentityMaps(type))
return failure();
Type arrayTy = convertGlobalMemrefTypeToLLVM(type, *getTypeConverter());
LLVM::Linkage linkage =
global.isPublic() ? LLVM::Linkage::External : LLVM::Linkage::Private;
Attribute initialValue = nullptr;
if (!global.isExternal() && !global.isUninitialized()) {
auto elementsAttr = llvm::cast<ElementsAttr>(*global.getInitialValue());
initialValue = elementsAttr;
if (type.getRank() == 0)
initialValue = elementsAttr.getSplatValue<Attribute>();
}
uint64_t alignment = global.getAlignment().value_or(0);
FailureOr<unsigned> addressSpace =
getTypeConverter()->getMemRefAddressSpace(type);
if (failed(addressSpace))
return global.emitOpError(
"memory space cannot be converted to an integer address space");
auto newGlobal = rewriter.replaceOpWithNewOp<LLVM::GlobalOp>(
global, arrayTy, global.getConstant(), linkage, global.getSymName(),
initialValue, alignment, *addressSpace);
if (!global.isExternal() && global.isUninitialized()) {
rewriter.createBlock(&newGlobal.getInitializerRegion());
Value undef[] = {
rewriter.create<LLVM::UndefOp>(global.getLoc(), arrayTy)};
rewriter.create<LLVM::ReturnOp>(global.getLoc(), undef);
}
return success();
}
};
struct GetGlobalMemrefOpLowering : public AllocLikeOpLLVMLowering {
GetGlobalMemrefOpLowering(const LLVMTypeConverter &converter)
: AllocLikeOpLLVMLowering(memref::GetGlobalOp::getOperationName(),
converter) {}
std::tuple<Value, Value> allocateBuffer(ConversionPatternRewriter &rewriter,
Location loc, Value sizeBytes,
Operation *op) const override {
auto getGlobalOp = cast<memref::GetGlobalOp>(op);
MemRefType type = cast<MemRefType>(getGlobalOp.getResult().getType());
FailureOr<unsigned> maybeAddressSpace =
getTypeConverter()->getMemRefAddressSpace(type);
if (failed(maybeAddressSpace))
return std::make_tuple(Value(), Value());
unsigned memSpace = *maybeAddressSpace;
Type arrayTy = convertGlobalMemrefTypeToLLVM(type, *getTypeConverter());
auto ptrTy = LLVM::LLVMPointerType::get(rewriter.getContext(), memSpace);
auto addressOf =
rewriter.create<LLVM::AddressOfOp>(loc, ptrTy, getGlobalOp.getName());
auto gep = rewriter.create<LLVM::GEPOp>(
loc, ptrTy, arrayTy, addressOf,
SmallVector<LLVM::GEPArg>(type.getRank() + 1, 0));
auto intPtrType = getIntPtrType(memSpace);
Value deadBeefConst =
createIndexAttrConstant(rewriter, op->getLoc(), intPtrType, 0xdeadbeef);
auto deadBeefPtr =
rewriter.create<LLVM::IntToPtrOp>(loc, ptrTy, deadBeefConst);
return std::make_tuple(deadBeefPtr, gep);
}
};
struct LoadOpLowering : public LoadStoreOpLowering<memref::LoadOp> {
using Base::Base;
LogicalResult
matchAndRewrite(memref::LoadOp loadOp, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override {
auto type = loadOp.getMemRefType();
Value dataPtr =
getStridedElementPtr(loadOp.getLoc(), type, adaptor.getMemref(),
adaptor.getIndices(), rewriter);
rewriter.replaceOpWithNewOp<LLVM::LoadOp>(
loadOp, typeConverter->convertType(type.getElementType()), dataPtr, 0,
false, loadOp.getNontemporal());
return success();
}
};
struct StoreOpLowering : public LoadStoreOpLowering<memref::StoreOp> {
using Base::Base;
LogicalResult
matchAndRewrite(memref::StoreOp op, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override {
auto type = op.getMemRefType();
Value dataPtr = getStridedElementPtr(op.getLoc(), type, adaptor.getMemref(),
adaptor.getIndices(), rewriter);
rewriter.replaceOpWithNewOp<LLVM::StoreOp>(op, adaptor.getValue(), dataPtr,
0, false, op.getNontemporal());
return success();
}
};
struct PrefetchOpLowering : public LoadStoreOpLowering<memref::PrefetchOp> {
using Base::Base;
LogicalResult
matchAndRewrite(memref::PrefetchOp prefetchOp, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override {
auto type = prefetchOp.getMemRefType();
auto loc = prefetchOp.getLoc();
Value dataPtr = getStridedElementPtr(loc, type, adaptor.getMemref(),
adaptor.getIndices(), rewriter);
IntegerAttr isWrite = rewriter.getI32IntegerAttr(prefetchOp.getIsWrite());
IntegerAttr localityHint = prefetchOp.getLocalityHintAttr();
IntegerAttr isData =
rewriter.getI32IntegerAttr(prefetchOp.getIsDataCache());
rewriter.replaceOpWithNewOp<LLVM::Prefetch>(prefetchOp, dataPtr, isWrite,
localityHint, isData);
return success();
}
};
struct RankOpLowering : public ConvertOpToLLVMPattern<memref::RankOp> {
using ConvertOpToLLVMPattern<memref::RankOp>::ConvertOpToLLVMPattern;
LogicalResult
matchAndRewrite(memref::RankOp op, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override {
Location loc = op.getLoc();
Type operandType = op.getMemref().getType();
if (dyn_cast<UnrankedMemRefType>(operandType)) {
UnrankedMemRefDescriptor desc(adaptor.getMemref());
rewriter.replaceOp(op, {desc.rank(rewriter, loc)});
return success();
}
if (auto rankedMemRefType = dyn_cast<MemRefType>(operandType)) {
Type indexType = getIndexType();
rewriter.replaceOp(op,
{createIndexAttrConstant(rewriter, loc, indexType,
rankedMemRefType.getRank())});
return success();
}
return failure();
}
};
struct MemRefCastOpLowering : public ConvertOpToLLVMPattern<memref::CastOp> {
using ConvertOpToLLVMPattern<memref::CastOp>::ConvertOpToLLVMPattern;
LogicalResult match(memref::CastOp memRefCastOp) const override {
Type srcType = memRefCastOp.getOperand().getType();
Type dstType = memRefCastOp.getType();
if (isa<MemRefType>(srcType) && isa<MemRefType>(dstType))
return success(typeConverter->convertType(srcType) ==
typeConverter->convertType(dstType));
assert(isa<UnrankedMemRefType>(srcType) ||
isa<UnrankedMemRefType>(dstType));
return !(isa<UnrankedMemRefType>(srcType) &&
isa<UnrankedMemRefType>(dstType))
? success()
: failure();
}
void rewrite(memref::CastOp memRefCastOp, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override {
auto srcType = memRefCastOp.getOperand().getType();
auto dstType = memRefCastOp.getType();
auto targetStructType = typeConverter->convertType(memRefCastOp.getType());
auto loc = memRefCastOp.getLoc();
if (isa<MemRefType>(srcType) && isa<MemRefType>(dstType))
return rewriter.replaceOp(memRefCastOp, {adaptor.getSource()});
if (isa<MemRefType>(srcType) && isa<UnrankedMemRefType>(dstType)) {
auto srcMemRefType = cast<MemRefType>(srcType);
int64_t rank = srcMemRefType.getRank();
auto ptr = getTypeConverter()->promoteOneMemRefDescriptor(
loc, adaptor.getSource(), rewriter);
auto rankVal = rewriter.create<LLVM::ConstantOp>(
loc, getIndexType(), rewriter.getIndexAttr(rank));
UnrankedMemRefDescriptor memRefDesc =
UnrankedMemRefDescriptor::undef(rewriter, loc, targetStructType);
memRefDesc.setRank(rewriter, loc, rankVal);
memRefDesc.setMemRefDescPtr(rewriter, loc, ptr);
rewriter.replaceOp(memRefCastOp, (Value)memRefDesc);
} else if (isa<UnrankedMemRefType>(srcType) && isa<MemRefType>(dstType)) {
UnrankedMemRefDescriptor memRefDesc(adaptor.getSource());
auto ptr = memRefDesc.memRefDescPtr(rewriter, loc);
auto loadOp = rewriter.create<LLVM::LoadOp>(loc, targetStructType, ptr);
rewriter.replaceOp(memRefCastOp, loadOp.getResult());
} else {
llvm_unreachable("Unsupported unranked memref to unranked memref cast");
}
}
};
struct MemRefCopyOpLowering : public ConvertOpToLLVMPattern<memref::CopyOp> {
using ConvertOpToLLVMPattern<memref::CopyOp>::ConvertOpToLLVMPattern;
LogicalResult
lowerToMemCopyIntrinsic(memref::CopyOp op, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const {
auto loc = op.getLoc();
auto srcType = dyn_cast<MemRefType>(op.getSource().getType());
MemRefDescriptor srcDesc(adaptor.getSource());
Value numElements = rewriter.create<LLVM::ConstantOp>(
loc, getIndexType(), rewriter.getIndexAttr(1));
for (int pos = 0; pos < srcType.getRank(); ++pos) {
auto size = srcDesc.size(rewriter, loc, pos);
numElements = rewriter.create<LLVM::MulOp>(loc, numElements, size);
}
auto sizeInBytes = getSizeInBytes(loc, srcType.getElementType(), rewriter);
Value totalSize =
rewriter.create<LLVM::MulOp>(loc, numElements, sizeInBytes);
Type elementType = typeConverter->convertType(srcType.getElementType());
Value srcBasePtr = srcDesc.alignedPtr(rewriter, loc);
Value srcOffset = srcDesc.offset(rewriter, loc);
Value srcPtr = rewriter.create<LLVM::GEPOp>(
loc, srcBasePtr.getType(), elementType, srcBasePtr, srcOffset);
MemRefDescriptor targetDesc(adaptor.getTarget());
Value targetBasePtr = targetDesc.alignedPtr(rewriter, loc);
Value targetOffset = targetDesc.offset(rewriter, loc);
Value targetPtr = rewriter.create<LLVM::GEPOp>(
loc, targetBasePtr.getType(), elementType, targetBasePtr, targetOffset);
rewriter.create<LLVM::MemcpyOp>(loc, targetPtr, srcPtr, totalSize,
false);
rewriter.eraseOp(op);
return success();
}
LogicalResult
lowerToMemCopyFunctionCall(memref::CopyOp op, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const {
auto loc = op.getLoc();
auto srcType = cast<BaseMemRefType>(op.getSource().getType());
auto targetType = cast<BaseMemRefType>(op.getTarget().getType());
auto makeUnranked = [&, this](Value ranked, MemRefType type) {
auto rank = rewriter.create<LLVM::ConstantOp>(loc, getIndexType(),
type.getRank());
auto *typeConverter = getTypeConverter();
auto ptr =
typeConverter->promoteOneMemRefDescriptor(loc, ranked, rewriter);
auto unrankedType =
UnrankedMemRefType::get(type.getElementType(), type.getMemorySpace());
return UnrankedMemRefDescriptor::pack(
rewriter, loc, *typeConverter, unrankedType, ValueRange{rank, ptr});
};
auto stackSaveOp =
rewriter.create<LLVM::StackSaveOp>(loc, getVoidPtrType());
auto srcMemRefType = dyn_cast<MemRefType>(srcType);
Value unrankedSource =
srcMemRefType ? makeUnranked(adaptor.getSource(), srcMemRefType)
: adaptor.getSource();
auto targetMemRefType = dyn_cast<MemRefType>(targetType);
Value unrankedTarget =
targetMemRefType ? makeUnranked(adaptor.getTarget(), targetMemRefType)
: adaptor.getTarget();
auto one = rewriter.create<LLVM::ConstantOp>(loc, getIndexType(),
rewriter.getIndexAttr(1));
auto promote = [&](Value desc) {
auto ptrType = LLVM::LLVMPointerType::get(rewriter.getContext());
auto allocated =
rewriter.create<LLVM::AllocaOp>(loc, ptrType, desc.getType(), one);
rewriter.create<LLVM::StoreOp>(loc, desc, allocated);
return allocated;
};
auto sourcePtr = promote(unrankedSource);
auto targetPtr = promote(unrankedTarget);
auto elemSize = getSizeInBytes(loc, srcType.getElementType(), rewriter);
auto copyFn = LLVM::lookupOrCreateMemRefCopyFn(
op->getParentOfType<ModuleOp>(), getIndexType(), sourcePtr.getType());
rewriter.create<LLVM::CallOp>(loc, copyFn,
ValueRange{elemSize, sourcePtr, targetPtr});
rewriter.create<LLVM::StackRestoreOp>(loc, stackSaveOp);
rewriter.eraseOp(op);
return success();
}
LogicalResult
matchAndRewrite(memref::CopyOp op, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override {
auto srcType = cast<BaseMemRefType>(op.getSource().getType());
auto targetType = cast<BaseMemRefType>(op.getTarget().getType());
auto isContiguousMemrefType = [&](BaseMemRefType type) {
auto memrefType = dyn_cast<mlir::MemRefType>(type);
return memrefType &&
(memrefType.getLayout().isIdentity() ||
(memrefType.hasStaticShape() && memrefType.getNumElements() > 0 &&
memref::isStaticShapeAndContiguousRowMajor(memrefType)));
};
if (isContiguousMemrefType(srcType) && isContiguousMemrefType(targetType))
return lowerToMemCopyIntrinsic(op, adaptor, rewriter);
return lowerToMemCopyFunctionCall(op, adaptor, rewriter);
}
};
struct MemorySpaceCastOpLowering
: public ConvertOpToLLVMPattern<memref::MemorySpaceCastOp> {
using ConvertOpToLLVMPattern<
memref::MemorySpaceCastOp>::ConvertOpToLLVMPattern;
LogicalResult
matchAndRewrite(memref::MemorySpaceCastOp op, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override {
Location loc = op.getLoc();
Type resultType = op.getDest().getType();
if (auto resultTypeR = dyn_cast<MemRefType>(resultType)) {
auto resultDescType =
cast<LLVM::LLVMStructType>(typeConverter->convertType(resultTypeR));
Type newPtrType = resultDescType.getBody()[0];
SmallVector<Value> descVals;
MemRefDescriptor::unpack(rewriter, loc, adaptor.getSource(), resultTypeR,
descVals);
descVals[0] =
rewriter.create<LLVM::AddrSpaceCastOp>(loc, newPtrType, descVals[0]);
descVals[1] =
rewriter.create<LLVM::AddrSpaceCastOp>(loc, newPtrType, descVals[1]);
Value result = MemRefDescriptor::pack(rewriter, loc, *getTypeConverter(),
resultTypeR, descVals);
rewriter.replaceOp(op, result);
return success();
}
if (auto resultTypeU = dyn_cast<UnrankedMemRefType>(resultType)) {
auto sourceType = cast<UnrankedMemRefType>(op.getSource().getType());
FailureOr<unsigned> maybeSourceAddrSpace =
getTypeConverter()->getMemRefAddressSpace(sourceType);
if (failed(maybeSourceAddrSpace))
return rewriter.notifyMatchFailure(loc,
"non-integer source address space");
unsigned sourceAddrSpace = *maybeSourceAddrSpace;
FailureOr<unsigned> maybeResultAddrSpace =
getTypeConverter()->getMemRefAddressSpace(resultTypeU);
if (failed(maybeResultAddrSpace))
return rewriter.notifyMatchFailure(loc,
"non-integer result address space");
unsigned resultAddrSpace = *maybeResultAddrSpace;
UnrankedMemRefDescriptor sourceDesc(adaptor.getSource());
Value rank = sourceDesc.rank(rewriter, loc);
Value sourceUnderlyingDesc = sourceDesc.memRefDescPtr(rewriter, loc);
auto result = UnrankedMemRefDescriptor::undef(
rewriter, loc, typeConverter->convertType(resultTypeU));
result.setRank(rewriter, loc, rank);
SmallVector<Value, 1> sizes;
UnrankedMemRefDescriptor::computeSizes(rewriter, loc, *getTypeConverter(),
result, resultAddrSpace, sizes);
Value resultUnderlyingSize = sizes.front();
Value resultUnderlyingDesc = rewriter.create<LLVM::AllocaOp>(
loc, getVoidPtrType(), rewriter.getI8Type(), resultUnderlyingSize);
result.setMemRefDescPtr(rewriter, loc, resultUnderlyingDesc);
auto sourceElemPtrType =
LLVM::LLVMPointerType::get(rewriter.getContext(), sourceAddrSpace);
auto resultElemPtrType =
LLVM::LLVMPointerType::get(rewriter.getContext(), resultAddrSpace);
Value allocatedPtr = sourceDesc.allocatedPtr(
rewriter, loc, sourceUnderlyingDesc, sourceElemPtrType);
Value alignedPtr =
sourceDesc.alignedPtr(rewriter, loc, *getTypeConverter(),
sourceUnderlyingDesc, sourceElemPtrType);
allocatedPtr = rewriter.create<LLVM::AddrSpaceCastOp>(
loc, resultElemPtrType, allocatedPtr);
alignedPtr = rewriter.create<LLVM::AddrSpaceCastOp>(
loc, resultElemPtrType, alignedPtr);
result.setAllocatedPtr(rewriter, loc, resultUnderlyingDesc,
resultElemPtrType, allocatedPtr);
result.setAlignedPtr(rewriter, loc, *getTypeConverter(),
resultUnderlyingDesc, resultElemPtrType, alignedPtr);
Value sourceIndexVals =
sourceDesc.offsetBasePtr(rewriter, loc, *getTypeConverter(),
sourceUnderlyingDesc, sourceElemPtrType);
Value resultIndexVals =
result.offsetBasePtr(rewriter, loc, *getTypeConverter(),
resultUnderlyingDesc, resultElemPtrType);
int64_t bytesToSkip =
2 * llvm::divideCeil(
getTypeConverter()->getPointerBitwidth(resultAddrSpace), 8);
Value bytesToSkipConst = rewriter.create<LLVM::ConstantOp>(
loc, getIndexType(), rewriter.getIndexAttr(bytesToSkip));
Value copySize = rewriter.create<LLVM::SubOp>(
loc, getIndexType(), resultUnderlyingSize, bytesToSkipConst);
rewriter.create<LLVM::MemcpyOp>(loc, resultIndexVals, sourceIndexVals,
copySize, false);
rewriter.replaceOp(op, ValueRange{result});
return success();
}
return rewriter.notifyMatchFailure(loc, "unexpected memref type");
}
};
static void extractPointersAndOffset(Location loc,
ConversionPatternRewriter &rewriter,
const LLVMTypeConverter &typeConverter,
Value originalOperand,
Value convertedOperand,
Value *allocatedPtr, Value *alignedPtr,
Value *offset = nullptr) {
Type operandType = originalOperand.getType();
if (isa<MemRefType>(operandType)) {
MemRefDescriptor desc(convertedOperand);
*allocatedPtr = desc.allocatedPtr(rewriter, loc);
*alignedPtr = desc.alignedPtr(rewriter, loc);
if (offset != nullptr)
*offset = desc.offset(rewriter, loc);
return;
}
unsigned memorySpace = *typeConverter.getMemRefAddressSpace(
cast<UnrankedMemRefType>(operandType));
auto elementPtrType =
LLVM::LLVMPointerType::get(rewriter.getContext(), memorySpace);
UnrankedMemRefDescriptor unrankedDesc(convertedOperand);
Value underlyingDescPtr = unrankedDesc.memRefDescPtr(rewriter, loc);
*allocatedPtr = UnrankedMemRefDescriptor::allocatedPtr(
rewriter, loc, underlyingDescPtr, elementPtrType);
*alignedPtr = UnrankedMemRefDescriptor::alignedPtr(
rewriter, loc, typeConverter, underlyingDescPtr, elementPtrType);
if (offset != nullptr) {
*offset = UnrankedMemRefDescriptor::offset(
rewriter, loc, typeConverter, underlyingDescPtr, elementPtrType);
}
}
struct MemRefReinterpretCastOpLowering
: public ConvertOpToLLVMPattern<memref::ReinterpretCastOp> {
using ConvertOpToLLVMPattern<
memref::ReinterpretCastOp>::ConvertOpToLLVMPattern;
LogicalResult
matchAndRewrite(memref::ReinterpretCastOp castOp, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override {
Type srcType = castOp.getSource().getType();
Value descriptor;
if (failed(convertSourceMemRefToDescriptor(rewriter, srcType, castOp,
adaptor, &descriptor)))
return failure();
rewriter.replaceOp(castOp, {descriptor});
return success();
}
private:
LogicalResult convertSourceMemRefToDescriptor(
ConversionPatternRewriter &rewriter, Type srcType,
memref::ReinterpretCastOp castOp,
memref::ReinterpretCastOp::Adaptor adaptor, Value *descriptor) const {
MemRefType targetMemRefType =
cast<MemRefType>(castOp.getResult().getType());
auto llvmTargetDescriptorTy = dyn_cast_or_null<LLVM::LLVMStructType>(
typeConverter->convertType(targetMemRefType));
if (!llvmTargetDescriptorTy)
return failure();
Location loc = castOp.getLoc();
auto desc = MemRefDescriptor::undef(rewriter, loc, llvmTargetDescriptorTy);
Value allocatedPtr, alignedPtr;
extractPointersAndOffset(loc, rewriter, *getTypeConverter(),
castOp.getSource(), adaptor.getSource(),
&allocatedPtr, &alignedPtr);
desc.setAllocatedPtr(rewriter, loc, allocatedPtr);
desc.setAlignedPtr(rewriter, loc, alignedPtr);
if (castOp.isDynamicOffset(0))
desc.setOffset(rewriter, loc, adaptor.getOffsets()[0]);
else
desc.setConstantOffset(rewriter, loc, castOp.getStaticOffset(0));
unsigned dynSizeId = 0;
unsigned dynStrideId = 0;
for (unsigned i = 0, e = targetMemRefType.getRank(); i < e; ++i) {
if (castOp.isDynamicSize(i))
desc.setSize(rewriter, loc, i, adaptor.getSizes()[dynSizeId++]);
else
desc.setConstantSize(rewriter, loc, i, castOp.getStaticSize(i));
if (castOp.isDynamicStride(i))
desc.setStride(rewriter, loc, i, adaptor.getStrides()[dynStrideId++]);
else
desc.setConstantStride(rewriter, loc, i, castOp.getStaticStride(i));
}
*descriptor = desc;
return success();
}
};
struct MemRefReshapeOpLowering
: public ConvertOpToLLVMPattern<memref::ReshapeOp> {
using ConvertOpToLLVMPattern<memref::ReshapeOp>::ConvertOpToLLVMPattern;
LogicalResult
matchAndRewrite(memref::ReshapeOp reshapeOp, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override {
Type srcType = reshapeOp.getSource().getType();
Value descriptor;
if (failed(convertSourceMemRefToDescriptor(rewriter, srcType, reshapeOp,
adaptor, &descriptor)))
return failure();
rewriter.replaceOp(reshapeOp, {descriptor});
return success();
}
private:
LogicalResult
convertSourceMemRefToDescriptor(ConversionPatternRewriter &rewriter,
Type srcType, memref::ReshapeOp reshapeOp,
memref::ReshapeOp::Adaptor adaptor,
Value *descriptor) const {
auto shapeMemRefType = cast<MemRefType>(reshapeOp.getShape().getType());
if (shapeMemRefType.hasStaticShape()) {
MemRefType targetMemRefType =
cast<MemRefType>(reshapeOp.getResult().getType());
auto llvmTargetDescriptorTy = dyn_cast_or_null<LLVM::LLVMStructType>(
typeConverter->convertType(targetMemRefType));
if (!llvmTargetDescriptorTy)
return failure();
Location loc = reshapeOp.getLoc();
auto desc =
MemRefDescriptor::undef(rewriter, loc, llvmTargetDescriptorTy);
Value allocatedPtr, alignedPtr;
extractPointersAndOffset(loc, rewriter, *getTypeConverter(),
reshapeOp.getSource(), adaptor.getSource(),
&allocatedPtr, &alignedPtr);
desc.setAllocatedPtr(rewriter, loc, allocatedPtr);
desc.setAlignedPtr(rewriter, loc, alignedPtr);
int64_t offset;
SmallVector<int64_t> strides;
if (failed(getStridesAndOffset(targetMemRefType, strides, offset)))
return rewriter.notifyMatchFailure(
reshapeOp, "failed to get stride and offset exprs");
if (!isStaticStrideOrOffset(offset))
return rewriter.notifyMatchFailure(reshapeOp,
"dynamic offset is unsupported");
desc.setConstantOffset(rewriter, loc, offset);
assert(targetMemRefType.getLayout().isIdentity() &&
"Identity layout map is a precondition of a valid reshape op");
Type indexType = getIndexType();
Value stride = nullptr;
int64_t targetRank = targetMemRefType.getRank();
for (auto i : llvm::reverse(llvm::seq<int64_t>(0, targetRank))) {
if (!ShapedType::isDynamic(strides[i])) {
stride =
createIndexAttrConstant(rewriter, loc, indexType, strides[i]);
} else if (!stride) {
stride = createIndexAttrConstant(rewriter, loc, indexType, 1);
}
Value dimSize;
if (!targetMemRefType.isDynamicDim(i)) {
dimSize = createIndexAttrConstant(rewriter, loc, indexType,
targetMemRefType.getDimSize(i));
} else {
Value shapeOp = reshapeOp.getShape();
Value index = createIndexAttrConstant(rewriter, loc, indexType, i);
dimSize = rewriter.create<memref::LoadOp>(loc, shapeOp, index);
Type indexType = getIndexType();
if (dimSize.getType() != indexType)
dimSize = typeConverter->materializeTargetConversion(
rewriter, loc, indexType, dimSize);
assert(dimSize && "Invalid memref element type");
}
desc.setSize(rewriter, loc, i, dimSize);
desc.setStride(rewriter, loc, i, stride);
stride = rewriter.create<LLVM::MulOp>(loc, stride, dimSize);
}
*descriptor = desc;
return success();
}
Location loc = reshapeOp.getLoc();
MemRefDescriptor shapeDesc(adaptor.getShape());
Value resultRank = shapeDesc.size(rewriter, loc, 0);
auto targetType = cast<UnrankedMemRefType>(reshapeOp.getResult().getType());
unsigned addressSpace =
*getTypeConverter()->getMemRefAddressSpace(targetType);
auto targetDesc = UnrankedMemRefDescriptor::undef(
rewriter, loc, typeConverter->convertType(targetType));
targetDesc.setRank(rewriter, loc, resultRank);
SmallVector<Value, 4> sizes;
UnrankedMemRefDescriptor::computeSizes(rewriter, loc, *getTypeConverter(),
targetDesc, addressSpace, sizes);
Value underlyingDescPtr = rewriter.create<LLVM::AllocaOp>(
loc, getVoidPtrType(), IntegerType::get(getContext(), 8),
sizes.front());
targetDesc.setMemRefDescPtr(rewriter, loc, underlyingDescPtr);
Value allocatedPtr, alignedPtr, offset;
extractPointersAndOffset(loc, rewriter, *getTypeConverter(),
reshapeOp.getSource(), adaptor.getSource(),
&allocatedPtr, &alignedPtr, &offset);
auto elementPtrType =
LLVM::LLVMPointerType::get(rewriter.getContext(), addressSpace);
UnrankedMemRefDescriptor::setAllocatedPtr(rewriter, loc, underlyingDescPtr,
elementPtrType, allocatedPtr);
UnrankedMemRefDescriptor::setAlignedPtr(rewriter, loc, *getTypeConverter(),
underlyingDescPtr, elementPtrType,
alignedPtr);
UnrankedMemRefDescriptor::setOffset(rewriter, loc, *getTypeConverter(),
underlyingDescPtr, elementPtrType,
offset);
Value targetSizesBase = UnrankedMemRefDescriptor::sizeBasePtr(
rewriter, loc, *getTypeConverter(), underlyingDescPtr, elementPtrType);
Value targetStridesBase = UnrankedMemRefDescriptor::strideBasePtr(
rewriter, loc, *getTypeConverter(), targetSizesBase, resultRank);
Value shapeOperandPtr = shapeDesc.alignedPtr(rewriter, loc);
Value oneIndex = createIndexAttrConstant(rewriter, loc, getIndexType(), 1);
Value resultRankMinusOne =
rewriter.create<LLVM::SubOp>(loc, resultRank, oneIndex);
Block *initBlock = rewriter.getInsertionBlock();
Type indexType = getTypeConverter()->getIndexType();
Block::iterator remainingOpsIt = std::next(rewriter.getInsertionPoint());
Block *condBlock = rewriter.createBlock(initBlock->getParent(), {},
{indexType, indexType}, {loc, loc});
Block *remainingBlock = rewriter.splitBlock(initBlock, remainingOpsIt);
rewriter.mergeBlocks(remainingBlock, condBlock, ValueRange());
rewriter.setInsertionPointToEnd(initBlock);
rewriter.create<LLVM::BrOp>(loc, ValueRange({resultRankMinusOne, oneIndex}),
condBlock);
rewriter.setInsertionPointToStart(condBlock);
Value indexArg = condBlock->getArgument(0);
Value strideArg = condBlock->getArgument(1);
Value zeroIndex = createIndexAttrConstant(rewriter, loc, indexType, 0);
Value pred = rewriter.create<LLVM::ICmpOp>(
loc, IntegerType::get(rewriter.getContext(), 1),
LLVM::ICmpPredicate::sge, indexArg, zeroIndex);
Block *bodyBlock =
rewriter.splitBlock(condBlock, rewriter.getInsertionPoint());
rewriter.setInsertionPointToStart(bodyBlock);
auto llvmIndexPtrType = LLVM::LLVMPointerType::get(rewriter.getContext());
Value sizeLoadGep = rewriter.create<LLVM::GEPOp>(
loc, llvmIndexPtrType,
typeConverter->convertType(shapeMemRefType.getElementType()),
shapeOperandPtr, indexArg);
Value size = rewriter.create<LLVM::LoadOp>(loc, indexType, sizeLoadGep);
UnrankedMemRefDescriptor::setSize(rewriter, loc, *getTypeConverter(),
targetSizesBase, indexArg, size);
UnrankedMemRefDescriptor::setStride(rewriter, loc, *getTypeConverter(),
targetStridesBase, indexArg, strideArg);
Value nextStride = rewriter.create<LLVM::MulOp>(loc, strideArg, size);
Value decrement = rewriter.create<LLVM::SubOp>(loc, indexArg, oneIndex);
rewriter.create<LLVM::BrOp>(loc, ValueRange({decrement, nextStride}),
condBlock);
Block *remainder =
rewriter.splitBlock(bodyBlock, rewriter.getInsertionPoint());
rewriter.setInsertionPointToEnd(condBlock);
rewriter.create<LLVM::CondBrOp>(loc, pred, bodyBlock, std::nullopt,
remainder, std::nullopt);
rewriter.setInsertionPointToStart(remainder);
*descriptor = targetDesc;
return success();
}
};
template <typename ReshapeOp>
class ReassociatingReshapeOpConversion
: public ConvertOpToLLVMPattern<ReshapeOp> {
public:
using ConvertOpToLLVMPattern<ReshapeOp>::ConvertOpToLLVMPattern;
using ReshapeOpAdaptor = typename ReshapeOp::Adaptor;
LogicalResult
matchAndRewrite(ReshapeOp reshapeOp, typename ReshapeOp::Adaptor adaptor,
ConversionPatternRewriter &rewriter) const override {
return rewriter.notifyMatchFailure(
reshapeOp,
"reassociation operations should have been expanded beforehand");
}
};
struct SubViewOpLowering : public ConvertOpToLLVMPattern<memref::SubViewOp> {
using ConvertOpToLLVMPattern<memref::SubViewOp>::ConvertOpToLLVMPattern;
LogicalResult
matchAndRewrite(memref::SubViewOp subViewOp, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override {
return rewriter.notifyMatchFailure(
subViewOp, "subview operations should have been expanded beforehand");
}
};
class TransposeOpLowering : public ConvertOpToLLVMPattern<memref::TransposeOp> {
public:
using ConvertOpToLLVMPattern<memref::TransposeOp>::ConvertOpToLLVMPattern;
LogicalResult
matchAndRewrite(memref::TransposeOp transposeOp, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override {
auto loc = transposeOp.getLoc();
MemRefDescriptor viewMemRef(adaptor.getIn());
if (transposeOp.getPermutation().isIdentity())
return rewriter.replaceOp(transposeOp, {viewMemRef}), success();
auto targetMemRef = MemRefDescriptor::undef(
rewriter, loc,
typeConverter->convertType(transposeOp.getIn().getType()));
targetMemRef.setAllocatedPtr(rewriter, loc,
viewMemRef.allocatedPtr(rewriter, loc));
targetMemRef.setAlignedPtr(rewriter, loc,
viewMemRef.alignedPtr(rewriter, loc));
targetMemRef.setOffset(rewriter, loc, viewMemRef.offset(rewriter, loc));
for (const auto &en :
llvm::enumerate(transposeOp.getPermutation().getResults())) {
int targetPos = en.index();
int sourcePos = cast<AffineDimExpr>(en.value()).getPosition();
targetMemRef.setSize(rewriter, loc, targetPos,
viewMemRef.size(rewriter, loc, sourcePos));
targetMemRef.setStride(rewriter, loc, targetPos,
viewMemRef.stride(rewriter, loc, sourcePos));
}
rewriter.replaceOp(transposeOp, {targetMemRef});
return success();
}
};
struct ViewOpLowering : public ConvertOpToLLVMPattern<memref::ViewOp> {
using ConvertOpToLLVMPattern<memref::ViewOp>::ConvertOpToLLVMPattern;
Value getSize(ConversionPatternRewriter &rewriter, Location loc,
ArrayRef<int64_t> shape, ValueRange dynamicSizes, unsigned idx,
Type indexType) const {
assert(idx < shape.size());
if (!ShapedType::isDynamic(shape[idx]))
return createIndexAttrConstant(rewriter, loc, indexType, shape[idx]);
unsigned nDynamic =
llvm::count_if(shape.take_front(idx), ShapedType::isDynamic);
return dynamicSizes[nDynamic];
}
Value getStride(ConversionPatternRewriter &rewriter, Location loc,
ArrayRef<int64_t> strides, Value nextSize,
Value runningStride, unsigned idx, Type indexType) const {
assert(idx < strides.size());
if (!ShapedType::isDynamic(strides[idx]))
return createIndexAttrConstant(rewriter, loc, indexType, strides[idx]);
if (nextSize)
return runningStride
? rewriter.create<LLVM::MulOp>(loc, runningStride, nextSize)
: nextSize;
assert(!runningStride);
return createIndexAttrConstant(rewriter, loc, indexType, 1);
}
LogicalResult
matchAndRewrite(memref::ViewOp viewOp, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override {
auto loc = viewOp.getLoc();
auto viewMemRefType = viewOp.getType();
auto targetElementTy =
typeConverter->convertType(viewMemRefType.getElementType());
auto targetDescTy = typeConverter->convertType(viewMemRefType);
if (!targetDescTy || !targetElementTy ||
!LLVM::isCompatibleType(targetElementTy) ||
!LLVM::isCompatibleType(targetDescTy))
return viewOp.emitWarning("Target descriptor type not converted to LLVM"),
failure();
int64_t offset;
SmallVector<int64_t, 4> strides;
auto successStrides = getStridesAndOffset(viewMemRefType, strides, offset);
if (failed(successStrides))
return viewOp.emitWarning("cannot cast to non-strided shape"), failure();
assert(offset == 0 && "expected offset to be 0");
if (!strides.empty() && (strides.back() != 1 && strides.back() != 0))
return viewOp.emitWarning("cannot cast to non-contiguous shape"),
failure();
MemRefDescriptor sourceMemRef(adaptor.getSource());
auto targetMemRef = MemRefDescriptor::undef(rewriter, loc, targetDescTy);
Value allocatedPtr = sourceMemRef.allocatedPtr(rewriter, loc);
auto srcMemRefType = cast<MemRefType>(viewOp.getSource().getType());
targetMemRef.setAllocatedPtr(rewriter, loc, allocatedPtr);
Value alignedPtr = sourceMemRef.alignedPtr(rewriter, loc);
alignedPtr = rewriter.create<LLVM::GEPOp>(
loc, alignedPtr.getType(),
typeConverter->convertType(srcMemRefType.getElementType()), alignedPtr,
adaptor.getByteShift());
targetMemRef.setAlignedPtr(rewriter, loc, alignedPtr);
Type indexType = getIndexType();
targetMemRef.setOffset(
rewriter, loc,
createIndexAttrConstant(rewriter, loc, indexType, offset));
if (viewMemRefType.getRank() == 0)
return rewriter.replaceOp(viewOp, {targetMemRef}), success();
Value stride = nullptr, nextSize = nullptr;
for (int i = viewMemRefType.getRank() - 1; i >= 0; --i) {
Value size = getSize(rewriter, loc, viewMemRefType.getShape(),
adaptor.getSizes(), i, indexType);
targetMemRef.setSize(rewriter, loc, i, size);
stride =
getStride(rewriter, loc, strides, nextSize, stride, i, indexType);
targetMemRef.setStride(rewriter, loc, i, stride);
nextSize = size;
}
rewriter.replaceOp(viewOp, {targetMemRef});
return success();
}
};
static std::optional<LLVM::AtomicBinOp>
matchSimpleAtomicOp(memref::AtomicRMWOp atomicOp) {
switch (atomicOp.getKind()) {
case arith::AtomicRMWKind::addf:
return LLVM::AtomicBinOp::fadd;
case arith::AtomicRMWKind::addi:
return LLVM::AtomicBinOp::add;
case arith::AtomicRMWKind::assign:
return LLVM::AtomicBinOp::xchg;
case arith::AtomicRMWKind::maximumf:
return LLVM::AtomicBinOp::fmax;
case arith::AtomicRMWKind::maxs:
return LLVM::AtomicBinOp::max;
case arith::AtomicRMWKind::maxu:
return LLVM::AtomicBinOp::umax;
case arith::AtomicRMWKind::minimumf:
return LLVM::AtomicBinOp::fmin;
case arith::AtomicRMWKind::mins:
return LLVM::AtomicBinOp::min;
case arith::AtomicRMWKind::minu:
return LLVM::AtomicBinOp::umin;
case arith::AtomicRMWKind::ori:
return LLVM::AtomicBinOp::_or;
case arith::AtomicRMWKind::andi:
return LLVM::AtomicBinOp::_and;
default:
return std::nullopt;
}
llvm_unreachable("Invalid AtomicRMWKind");
}
struct AtomicRMWOpLowering : public LoadStoreOpLowering<memref::AtomicRMWOp> {
using Base::Base;
LogicalResult
matchAndRewrite(memref::AtomicRMWOp atomicOp, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override {
auto maybeKind = matchSimpleAtomicOp(atomicOp);
if (!maybeKind)
return failure();
auto memRefType = atomicOp.getMemRefType();
SmallVector<int64_t> strides;
int64_t offset;
if (failed(getStridesAndOffset(memRefType, strides, offset)))
return failure();
auto dataPtr =
getStridedElementPtr(atomicOp.getLoc(), memRefType, adaptor.getMemref(),
adaptor.getIndices(), rewriter);
rewriter.replaceOpWithNewOp<LLVM::AtomicRMWOp>(
atomicOp, *maybeKind, dataPtr, adaptor.getValue(),
LLVM::AtomicOrdering::acq_rel);
return success();
}
};
class ConvertExtractAlignedPointerAsIndex
: public ConvertOpToLLVMPattern<memref::ExtractAlignedPointerAsIndexOp> {
public:
using ConvertOpToLLVMPattern<
memref::ExtractAlignedPointerAsIndexOp>::ConvertOpToLLVMPattern;
LogicalResult
matchAndRewrite(memref::ExtractAlignedPointerAsIndexOp extractOp,
OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override {
BaseMemRefType sourceTy = extractOp.getSource().getType();
Value alignedPtr;
if (sourceTy.hasRank()) {
MemRefDescriptor desc(adaptor.getSource());
alignedPtr = desc.alignedPtr(rewriter, extractOp->getLoc());
} else {
auto elementPtrTy = LLVM::LLVMPointerType::get(
rewriter.getContext(), sourceTy.getMemorySpaceAsInt());
UnrankedMemRefDescriptor desc(adaptor.getSource());
Value descPtr = desc.memRefDescPtr(rewriter, extractOp->getLoc());
alignedPtr = UnrankedMemRefDescriptor::alignedPtr(
rewriter, extractOp->getLoc(), *getTypeConverter(), descPtr,
elementPtrTy);
}
rewriter.replaceOpWithNewOp<LLVM::PtrToIntOp>(
extractOp, getTypeConverter()->getIndexType(), alignedPtr);
return success();
}
};
class ExtractStridedMetadataOpLowering
: public ConvertOpToLLVMPattern<memref::ExtractStridedMetadataOp> {
public:
using ConvertOpToLLVMPattern<
memref::ExtractStridedMetadataOp>::ConvertOpToLLVMPattern;
LogicalResult
matchAndRewrite(memref::ExtractStridedMetadataOp extractStridedMetadataOp,
OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override {
if (!LLVM::isCompatibleType(adaptor.getOperands().front().getType()))
return failure();
MemRefDescriptor sourceMemRef(adaptor.getSource());
Location loc = extractStridedMetadataOp.getLoc();
Value source = extractStridedMetadataOp.getSource();
auto sourceMemRefType = cast<MemRefType>(source.getType());
int64_t rank = sourceMemRefType.getRank();
SmallVector<Value> results;
results.reserve(2 + rank * 2);
Value baseBuffer = sourceMemRef.allocatedPtr(rewriter, loc);
Value alignedBuffer = sourceMemRef.alignedPtr(rewriter, loc);
MemRefDescriptor dstMemRef = MemRefDescriptor::fromStaticShape(
rewriter, loc, *getTypeConverter(),
cast<MemRefType>(extractStridedMetadataOp.getBaseBuffer().getType()),
baseBuffer, alignedBuffer);
results.push_back((Value)dstMemRef);
results.push_back(sourceMemRef.offset(rewriter, loc));
for (unsigned i = 0; i < rank; ++i)
results.push_back(sourceMemRef.size(rewriter, loc, i));
for (unsigned i = 0; i < rank; ++i)
results.push_back(sourceMemRef.stride(rewriter, loc, i));
rewriter.replaceOp(extractStridedMetadataOp, results);
return success();
}
};
}
void mlir::populateFinalizeMemRefToLLVMConversionPatterns(
LLVMTypeConverter &converter, RewritePatternSet &patterns) {
patterns.add<
AllocaOpLowering,
AllocaScopeOpLowering,
AtomicRMWOpLowering,
AssumeAlignmentOpLowering,
ConvertExtractAlignedPointerAsIndex,
DimOpLowering,
ExtractStridedMetadataOpLowering,
GenericAtomicRMWOpLowering,
GlobalMemrefOpLowering,
GetGlobalMemrefOpLowering,
LoadOpLowering,
MemRefCastOpLowering,
MemRefCopyOpLowering,
MemorySpaceCastOpLowering,
MemRefReinterpretCastOpLowering,
MemRefReshapeOpLowering,
PrefetchOpLowering,
RankOpLowering,
ReassociatingReshapeOpConversion<memref::ExpandShapeOp>,
ReassociatingReshapeOpConversion<memref::CollapseShapeOp>,
StoreOpLowering,
SubViewOpLowering,
TransposeOpLowering,
ViewOpLowering>(converter);
auto allocLowering = converter.getOptions().allocLowering;
if (allocLowering == LowerToLLVMOptions::AllocLowering::AlignedAlloc)
patterns.add<AlignedAllocOpLowering, DeallocOpLowering>(converter);
else if (allocLowering == LowerToLLVMOptions::AllocLowering::Malloc)
patterns.add<AllocOpLowering, DeallocOpLowering>(converter);
}
namespace {
struct FinalizeMemRefToLLVMConversionPass
: public impl::FinalizeMemRefToLLVMConversionPassBase<
FinalizeMemRefToLLVMConversionPass> {
using FinalizeMemRefToLLVMConversionPassBase::
FinalizeMemRefToLLVMConversionPassBase;
void runOnOperation() override {
Operation *op = getOperation();
const auto &dataLayoutAnalysis = getAnalysis<DataLayoutAnalysis>();
LowerToLLVMOptions options(&getContext(),
dataLayoutAnalysis.getAtOrAbove(op));
options.allocLowering =
(useAlignedAlloc ? LowerToLLVMOptions::AllocLowering::AlignedAlloc
: LowerToLLVMOptions::AllocLowering::Malloc);
options.useGenericFunctions = useGenericFunctions;
if (indexBitwidth != kDeriveIndexBitwidthFromDataLayout)
options.overrideIndexBitwidth(indexBitwidth);
LLVMTypeConverter typeConverter(&getContext(), options,
&dataLayoutAnalysis);
RewritePatternSet patterns(&getContext());
populateFinalizeMemRefToLLVMConversionPatterns(typeConverter, patterns);
LLVMConversionTarget target(getContext());
target.addLegalOp<func::FuncOp>();
if (failed(applyPartialConversion(op, target, std::move(patterns))))
signalPassFailure();
}
};
struct MemRefToLLVMDialectInterface : public ConvertToLLVMPatternInterface {
using ConvertToLLVMPatternInterface::ConvertToLLVMPatternInterface;
void loadDependentDialects(MLIRContext *context) const final {
context->loadDialect<LLVM::LLVMDialect>();
}
void populateConvertToLLVMConversionPatterns(
ConversionTarget &target, LLVMTypeConverter &typeConverter,
RewritePatternSet &patterns) const final {
populateFinalizeMemRefToLLVMConversionPatterns(typeConverter, patterns);
}
};
}
void mlir::registerConvertMemRefToLLVMInterface(DialectRegistry ®istry) {
registry.addExtension(+[](MLIRContext *ctx, memref::MemRefDialect *dialect) {
dialect->addInterfaces<MemRefToLLVMDialectInterface>();
});
}