#include "mlir/Conversion/VectorToLLVM/ConvertVectorToLLVM.h"
#include "mlir/Conversion/LLVMCommon/VectorPattern.h"
#include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
#include "mlir/Dialect/Arithmetic/Utils/Utils.h"
#include "mlir/Dialect/LLVMIR/FunctionCallUtils.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Dialect/Vector/Transforms/VectorTransforms.h"
#include "mlir/IR/BuiltinTypes.h"
#include "mlir/IR/TypeUtilities.h"
#include "mlir/Support/MathExtras.h"
#include "mlir/Target/LLVMIR/TypeToLLVM.h"
#include "mlir/Transforms/DialectConversion.h"
using namespace mlir;
using namespace mlir::vector;
static VectorType reducedVectorTypeFront(VectorType tp) {
assert((tp.getRank() > 1) && "unlowerable vector type");
unsigned numScalableDims = tp.getNumScalableDims();
if (tp.getShape().size() == numScalableDims)
--numScalableDims;
return VectorType::get(tp.getShape().drop_front(), tp.getElementType(),
numScalableDims);
}
static VectorType reducedVectorTypeBack(VectorType tp) {
assert((tp.getRank() > 1) && "unlowerable vector type");
unsigned numScalableDims = tp.getNumScalableDims();
if (numScalableDims > 0)
--numScalableDims;
return VectorType::get(tp.getShape().take_back(), tp.getElementType(),
numScalableDims);
}
static Value insertOne(ConversionPatternRewriter &rewriter,
LLVMTypeConverter &typeConverter, Location loc,
Value val1, Value val2, Type llvmType, int64_t rank,
int64_t pos) {
assert(rank > 0 && "0-D vector corner case should have been handled already");
if (rank == 1) {
auto idxType = rewriter.getIndexType();
auto constant = rewriter.create<LLVM::ConstantOp>(
loc, typeConverter.convertType(idxType),
rewriter.getIntegerAttr(idxType, pos));
return rewriter.create<LLVM::InsertElementOp>(loc, llvmType, val1, val2,
constant);
}
return rewriter.create<LLVM::InsertValueOp>(loc, llvmType, val1, val2,
rewriter.getI64ArrayAttr(pos));
}
static Value extractOne(ConversionPatternRewriter &rewriter,
LLVMTypeConverter &typeConverter, Location loc,
Value val, Type llvmType, int64_t rank, int64_t pos) {
if (rank <= 1) {
auto idxType = rewriter.getIndexType();
auto constant = rewriter.create<LLVM::ConstantOp>(
loc, typeConverter.convertType(idxType),
rewriter.getIntegerAttr(idxType, pos));
return rewriter.create<LLVM::ExtractElementOp>(loc, llvmType, val,
constant);
}
return rewriter.create<LLVM::ExtractValueOp>(loc, llvmType, val,
rewriter.getI64ArrayAttr(pos));
}
LogicalResult getMemRefAlignment(LLVMTypeConverter &typeConverter,
MemRefType memrefType, unsigned &align) {
Type elementTy = typeConverter.convertType(memrefType.getElementType());
if (!elementTy)
return failure();
llvm::LLVMContext llvmContext;
align = LLVM::TypeToLLVMIRTranslator(llvmContext)
.getPreferredAlignment(elementTy, typeConverter.getDataLayout());
return success();
}
static LogicalResult getIndexedPtrs(ConversionPatternRewriter &rewriter,
Location loc, Value memref, Value base,
Value index, MemRefType memRefType,
VectorType vType, Value &ptrs) {
int64_t offset;
SmallVector<int64_t, 4> strides;
auto successStrides = getStridesAndOffset(memRefType, strides, offset);
if (failed(successStrides) || strides.back() != 1 ||
memRefType.getMemorySpaceAsInt() != 0)
return failure();
auto pType = MemRefDescriptor(memref).getElementPtrType();
auto ptrsType = LLVM::getFixedVectorType(pType, vType.getDimSize(0));
ptrs = rewriter.create<LLVM::GEPOp>(loc, ptrsType, base, index);
return success();
}
static Value castDataPtr(ConversionPatternRewriter &rewriter, Location loc,
Value ptr, MemRefType memRefType, Type vt) {
auto pType = LLVM::LLVMPointerType::get(vt, memRefType.getMemorySpaceAsInt());
return rewriter.create<LLVM::BitcastOp>(loc, pType, ptr);
}
namespace {
using VectorScaleOpConversion =
OneToOneConvertToLLVMPattern<vector::VectorScaleOp, LLVM::vscale>;
class VectorBitCastOpConversion
: public ConvertOpToLLVMPattern<vector::BitCastOp> {
public:
using ConvertOpToLLVMPattern<vector::BitCastOp>::ConvertOpToLLVMPattern;
LogicalResult
matchAndRewrite(vector::BitCastOp bitCastOp, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override {
VectorType resultTy = bitCastOp.getResultVectorType();
if (resultTy.getRank() > 1)
return failure();
Type newResultTy = typeConverter->convertType(resultTy);
rewriter.replaceOpWithNewOp<LLVM::BitcastOp>(bitCastOp, newResultTy,
adaptor.getOperands()[0]);
return success();
}
};
class VectorMatmulOpConversion
: public ConvertOpToLLVMPattern<vector::MatmulOp> {
public:
using ConvertOpToLLVMPattern<vector::MatmulOp>::ConvertOpToLLVMPattern;
LogicalResult
matchAndRewrite(vector::MatmulOp matmulOp, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override {
rewriter.replaceOpWithNewOp<LLVM::MatrixMultiplyOp>(
matmulOp, typeConverter->convertType(matmulOp.getRes().getType()),
adaptor.getLhs(), adaptor.getRhs(), matmulOp.getLhsRows(),
matmulOp.getLhsColumns(), matmulOp.getRhsColumns());
return success();
}
};
class VectorFlatTransposeOpConversion
: public ConvertOpToLLVMPattern<vector::FlatTransposeOp> {
public:
using ConvertOpToLLVMPattern<vector::FlatTransposeOp>::ConvertOpToLLVMPattern;
LogicalResult
matchAndRewrite(vector::FlatTransposeOp transOp, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override {
rewriter.replaceOpWithNewOp<LLVM::MatrixTransposeOp>(
transOp, typeConverter->convertType(transOp.getRes().getType()),
adaptor.getMatrix(), transOp.getRows(), transOp.getColumns());
return success();
}
};
static void replaceLoadOrStoreOp(vector::LoadOp loadOp,
vector::LoadOpAdaptor adaptor,
VectorType vectorTy, Value ptr, unsigned align,
ConversionPatternRewriter &rewriter) {
rewriter.replaceOpWithNewOp<LLVM::LoadOp>(loadOp, ptr, align);
}
static void replaceLoadOrStoreOp(vector::MaskedLoadOp loadOp,
vector::MaskedLoadOpAdaptor adaptor,
VectorType vectorTy, Value ptr, unsigned align,
ConversionPatternRewriter &rewriter) {
rewriter.replaceOpWithNewOp<LLVM::MaskedLoadOp>(
loadOp, vectorTy, ptr, adaptor.getMask(), adaptor.getPassThru(), align);
}
static void replaceLoadOrStoreOp(vector::StoreOp storeOp,
vector::StoreOpAdaptor adaptor,
VectorType vectorTy, Value ptr, unsigned align,
ConversionPatternRewriter &rewriter) {
rewriter.replaceOpWithNewOp<LLVM::StoreOp>(storeOp, adaptor.getValueToStore(),
ptr, align);
}
static void replaceLoadOrStoreOp(vector::MaskedStoreOp storeOp,
vector::MaskedStoreOpAdaptor adaptor,
VectorType vectorTy, Value ptr, unsigned align,
ConversionPatternRewriter &rewriter) {
rewriter.replaceOpWithNewOp<LLVM::MaskedStoreOp>(
storeOp, adaptor.getValueToStore(), ptr, adaptor.getMask(), align);
}
template <class LoadOrStoreOp, class LoadOrStoreOpAdaptor>
class VectorLoadStoreConversion : public ConvertOpToLLVMPattern<LoadOrStoreOp> {
public:
using ConvertOpToLLVMPattern<LoadOrStoreOp>::ConvertOpToLLVMPattern;
LogicalResult
matchAndRewrite(LoadOrStoreOp loadOrStoreOp,
typename LoadOrStoreOp::Adaptor adaptor,
ConversionPatternRewriter &rewriter) const override {
VectorType vectorTy = loadOrStoreOp.getVectorType();
if (vectorTy.getRank() > 1)
return failure();
auto loc = loadOrStoreOp->getLoc();
MemRefType memRefTy = loadOrStoreOp.getMemRefType();
unsigned align;
if (failed(getMemRefAlignment(*this->getTypeConverter(), memRefTy, align)))
return failure();
auto vtype = this->typeConverter->convertType(loadOrStoreOp.getVectorType())
.template cast<VectorType>();
Value dataPtr = this->getStridedElementPtr(loc, memRefTy, adaptor.getBase(),
adaptor.getIndices(), rewriter);
Value ptr = castDataPtr(rewriter, loc, dataPtr, memRefTy, vtype);
replaceLoadOrStoreOp(loadOrStoreOp, adaptor, vtype, ptr, align, rewriter);
return success();
}
};
class VectorGatherOpConversion
: public ConvertOpToLLVMPattern<vector::GatherOp> {
public:
using ConvertOpToLLVMPattern<vector::GatherOp>::ConvertOpToLLVMPattern;
LogicalResult
matchAndRewrite(vector::GatherOp gather, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override {
auto loc = gather->getLoc();
MemRefType memRefType = gather.getMemRefType();
unsigned align;
if (failed(getMemRefAlignment(*getTypeConverter(), memRefType, align)))
return failure();
Value ptrs;
VectorType vType = gather.getVectorType();
Value ptr = getStridedElementPtr(loc, memRefType, adaptor.getBase(),
adaptor.getIndices(), rewriter);
if (failed(getIndexedPtrs(rewriter, loc, adaptor.getBase(), ptr,
adaptor.getIndexVec(), memRefType, vType, ptrs)))
return failure();
rewriter.replaceOpWithNewOp<LLVM::masked_gather>(
gather, typeConverter->convertType(vType), ptrs, adaptor.getMask(),
adaptor.getPassThru(), rewriter.getI32IntegerAttr(align));
return success();
}
};
class VectorScatterOpConversion
: public ConvertOpToLLVMPattern<vector::ScatterOp> {
public:
using ConvertOpToLLVMPattern<vector::ScatterOp>::ConvertOpToLLVMPattern;
LogicalResult
matchAndRewrite(vector::ScatterOp scatter, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override {
auto loc = scatter->getLoc();
MemRefType memRefType = scatter.getMemRefType();
unsigned align;
if (failed(getMemRefAlignment(*getTypeConverter(), memRefType, align)))
return failure();
Value ptrs;
VectorType vType = scatter.getVectorType();
Value ptr = getStridedElementPtr(loc, memRefType, adaptor.getBase(),
adaptor.getIndices(), rewriter);
if (failed(getIndexedPtrs(rewriter, loc, adaptor.getBase(), ptr,
adaptor.getIndexVec(), memRefType, vType, ptrs)))
return failure();
rewriter.replaceOpWithNewOp<LLVM::masked_scatter>(
scatter, adaptor.getValueToStore(), ptrs, adaptor.getMask(),
rewriter.getI32IntegerAttr(align));
return success();
}
};
class VectorExpandLoadOpConversion
: public ConvertOpToLLVMPattern<vector::ExpandLoadOp> {
public:
using ConvertOpToLLVMPattern<vector::ExpandLoadOp>::ConvertOpToLLVMPattern;
LogicalResult
matchAndRewrite(vector::ExpandLoadOp expand, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override {
auto loc = expand->getLoc();
MemRefType memRefType = expand.getMemRefType();
auto vtype = typeConverter->convertType(expand.getVectorType());
Value ptr = getStridedElementPtr(loc, memRefType, adaptor.getBase(),
adaptor.getIndices(), rewriter);
rewriter.replaceOpWithNewOp<LLVM::masked_expandload>(
expand, vtype, ptr, adaptor.getMask(), adaptor.getPassThru());
return success();
}
};
class VectorCompressStoreOpConversion
: public ConvertOpToLLVMPattern<vector::CompressStoreOp> {
public:
using ConvertOpToLLVMPattern<vector::CompressStoreOp>::ConvertOpToLLVMPattern;
LogicalResult
matchAndRewrite(vector::CompressStoreOp compress, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override {
auto loc = compress->getLoc();
MemRefType memRefType = compress.getMemRefType();
Value ptr = getStridedElementPtr(loc, memRefType, adaptor.getBase(),
adaptor.getIndices(), rewriter);
rewriter.replaceOpWithNewOp<LLVM::masked_compressstore>(
compress, adaptor.getValueToStore(), ptr, adaptor.getMask());
return success();
}
};
template <class VectorOp, class ScalarOp>
static Value createIntegerReductionArithmeticOpLowering(
ConversionPatternRewriter &rewriter, Location loc, Type llvmType,
Value vectorOperand, Value accumulator) {
Value result = rewriter.create<VectorOp>(loc, llvmType, vectorOperand);
if (accumulator)
result = rewriter.create<ScalarOp>(loc, accumulator, result);
return result;
}
template <class VectorOp>
static Value createIntegerReductionComparisonOpLowering(
ConversionPatternRewriter &rewriter, Location loc, Type llvmType,
Value vectorOperand, Value accumulator, LLVM::ICmpPredicate predicate) {
Value result = rewriter.create<VectorOp>(loc, llvmType, vectorOperand);
if (accumulator) {
Value cmp =
rewriter.create<LLVM::ICmpOp>(loc, predicate, accumulator, result);
result = rewriter.create<LLVM::SelectOp>(loc, cmp, accumulator, result);
}
return result;
}
static Value createMinMaxF(OpBuilder &builder, Location loc, Value lhs,
Value rhs, bool isMin) {
auto floatType = getElementTypeOrSelf(lhs.getType()).cast<FloatType>();
Type i1Type = builder.getI1Type();
if (auto vecType = lhs.getType().dyn_cast<VectorType>())
i1Type = VectorType::get(vecType.getShape(), i1Type);
Value cmp = builder.create<LLVM::FCmpOp>(
loc, i1Type, isMin ? LLVM::FCmpPredicate::olt : LLVM::FCmpPredicate::ogt,
lhs, rhs);
Value sel = builder.create<LLVM::SelectOp>(loc, cmp, lhs, rhs);
Value isNan = builder.create<LLVM::FCmpOp>(
loc, i1Type, LLVM::FCmpPredicate::uno, lhs, rhs);
Value nan = builder.create<LLVM::ConstantOp>(
loc, lhs.getType(),
builder.getFloatAttr(floatType,
APFloat::getQNaN(floatType.getFloatSemantics())));
return builder.create<LLVM::SelectOp>(loc, isNan, nan, sel);
}
class VectorReductionOpConversion
: public ConvertOpToLLVMPattern<vector::ReductionOp> {
public:
explicit VectorReductionOpConversion(LLVMTypeConverter &typeConv,
bool reassociateFPRed)
: ConvertOpToLLVMPattern<vector::ReductionOp>(typeConv),
reassociateFPReductions(reassociateFPRed) {}
LogicalResult
matchAndRewrite(vector::ReductionOp reductionOp, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override {
auto kind = reductionOp.getKind();
Type eltType = reductionOp.getDest().getType();
Type llvmType = typeConverter->convertType(eltType);
Value operand = adaptor.getVector();
Value acc = adaptor.getAcc();
Location loc = reductionOp.getLoc();
if (eltType.isIntOrIndex()) {
Value result;
switch (kind) {
case vector::CombiningKind::ADD:
result =
createIntegerReductionArithmeticOpLowering<LLVM::vector_reduce_add,
LLVM::AddOp>(
rewriter, loc, llvmType, operand, acc);
break;
case vector::CombiningKind::MUL:
result =
createIntegerReductionArithmeticOpLowering<LLVM::vector_reduce_mul,
LLVM::MulOp>(
rewriter, loc, llvmType, operand, acc);
break;
case vector::CombiningKind::MINUI:
result = createIntegerReductionComparisonOpLowering<
LLVM::vector_reduce_umin>(rewriter, loc, llvmType, operand, acc,
LLVM::ICmpPredicate::ule);
break;
case vector::CombiningKind::MINSI:
result = createIntegerReductionComparisonOpLowering<
LLVM::vector_reduce_smin>(rewriter, loc, llvmType, operand, acc,
LLVM::ICmpPredicate::sle);
break;
case vector::CombiningKind::MAXUI:
result = createIntegerReductionComparisonOpLowering<
LLVM::vector_reduce_umax>(rewriter, loc, llvmType, operand, acc,
LLVM::ICmpPredicate::uge);
break;
case vector::CombiningKind::MAXSI:
result = createIntegerReductionComparisonOpLowering<
LLVM::vector_reduce_smax>(rewriter, loc, llvmType, operand, acc,
LLVM::ICmpPredicate::sge);
break;
case vector::CombiningKind::AND:
result =
createIntegerReductionArithmeticOpLowering<LLVM::vector_reduce_and,
LLVM::AndOp>(
rewriter, loc, llvmType, operand, acc);
break;
case vector::CombiningKind::OR:
result =
createIntegerReductionArithmeticOpLowering<LLVM::vector_reduce_or,
LLVM::OrOp>(
rewriter, loc, llvmType, operand, acc);
break;
case vector::CombiningKind::XOR:
result =
createIntegerReductionArithmeticOpLowering<LLVM::vector_reduce_xor,
LLVM::XOrOp>(
rewriter, loc, llvmType, operand, acc);
break;
default:
return failure();
}
rewriter.replaceOp(reductionOp, result);
return success();
}
if (!eltType.isa<FloatType>())
return failure();
if (kind == vector::CombiningKind::ADD) {
Value acc = adaptor.getOperands().size() > 1
? adaptor.getOperands()[1]
: rewriter.create<LLVM::ConstantOp>(
reductionOp->getLoc(), llvmType,
rewriter.getZeroAttr(eltType));
rewriter.replaceOpWithNewOp<LLVM::vector_reduce_fadd>(
reductionOp, llvmType, acc, operand,
rewriter.getBoolAttr(reassociateFPReductions));
} else if (kind == vector::CombiningKind::MUL) {
Value acc = adaptor.getOperands().size() > 1
? adaptor.getOperands()[1]
: rewriter.create<LLVM::ConstantOp>(
reductionOp->getLoc(), llvmType,
rewriter.getFloatAttr(eltType, 1.0));
rewriter.replaceOpWithNewOp<LLVM::vector_reduce_fmul>(
reductionOp, llvmType, acc, operand,
rewriter.getBoolAttr(reassociateFPReductions));
} else if (kind == vector::CombiningKind::MINF) {
Value result =
rewriter.create<LLVM::vector_reduce_fmin>(loc, llvmType, operand);
if (acc)
result = createMinMaxF(rewriter, loc, result, acc, true);
rewriter.replaceOp(reductionOp, result);
} else if (kind == vector::CombiningKind::MAXF) {
Value result =
rewriter.create<LLVM::vector_reduce_fmax>(loc, llvmType, operand);
if (acc)
result = createMinMaxF(rewriter, loc, result, acc, false);
rewriter.replaceOp(reductionOp, result);
} else
return failure();
return success();
}
private:
const bool reassociateFPReductions;
};
class VectorShuffleOpConversion
: public ConvertOpToLLVMPattern<vector::ShuffleOp> {
public:
using ConvertOpToLLVMPattern<vector::ShuffleOp>::ConvertOpToLLVMPattern;
LogicalResult
matchAndRewrite(vector::ShuffleOp shuffleOp, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override {
auto loc = shuffleOp->getLoc();
auto v1Type = shuffleOp.getV1VectorType();
auto v2Type = shuffleOp.getV2VectorType();
auto vectorType = shuffleOp.getVectorType();
Type llvmType = typeConverter->convertType(vectorType);
auto maskArrayAttr = shuffleOp.getMask();
if (!llvmType)
return failure();
int64_t rank = vectorType.getRank();
assert(v1Type.getRank() == rank);
assert(v2Type.getRank() == rank);
int64_t v1Dim = v1Type.getDimSize(0);
if (rank == 1 && v1Type == v2Type) {
Value llvmShuffleOp = rewriter.create<LLVM::ShuffleVectorOp>(
loc, adaptor.getV1(), adaptor.getV2(), maskArrayAttr);
rewriter.replaceOp(shuffleOp, llvmShuffleOp);
return success();
}
Type eltType;
if (auto arrayType = llvmType.dyn_cast<LLVM::LLVMArrayType>())
eltType = arrayType.getElementType();
else
eltType = llvmType.cast<VectorType>().getElementType();
Value insert = rewriter.create<LLVM::UndefOp>(loc, llvmType);
int64_t insPos = 0;
for (const auto &en : llvm::enumerate(maskArrayAttr)) {
int64_t extPos = en.value().cast<IntegerAttr>().getInt();
Value value = adaptor.getV1();
if (extPos >= v1Dim) {
extPos -= v1Dim;
value = adaptor.getV2();
}
Value extract = extractOne(rewriter, *getTypeConverter(), loc, value,
eltType, rank, extPos);
insert = insertOne(rewriter, *getTypeConverter(), loc, insert, extract,
llvmType, rank, insPos++);
}
rewriter.replaceOp(shuffleOp, insert);
return success();
}
};
class VectorExtractElementOpConversion
: public ConvertOpToLLVMPattern<vector::ExtractElementOp> {
public:
using ConvertOpToLLVMPattern<
vector::ExtractElementOp>::ConvertOpToLLVMPattern;
LogicalResult
matchAndRewrite(vector::ExtractElementOp extractEltOp, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override {
auto vectorType = extractEltOp.getVectorType();
auto llvmType = typeConverter->convertType(vectorType.getElementType());
if (!llvmType)
return failure();
if (vectorType.getRank() == 0) {
Location loc = extractEltOp.getLoc();
auto idxType = rewriter.getIndexType();
auto zero = rewriter.create<LLVM::ConstantOp>(
loc, typeConverter->convertType(idxType),
rewriter.getIntegerAttr(idxType, 0));
rewriter.replaceOpWithNewOp<LLVM::ExtractElementOp>(
extractEltOp, llvmType, adaptor.getVector(), zero);
return success();
}
rewriter.replaceOpWithNewOp<LLVM::ExtractElementOp>(
extractEltOp, llvmType, adaptor.getVector(), adaptor.getPosition());
return success();
}
};
class VectorExtractOpConversion
: public ConvertOpToLLVMPattern<vector::ExtractOp> {
public:
using ConvertOpToLLVMPattern<vector::ExtractOp>::ConvertOpToLLVMPattern;
LogicalResult
matchAndRewrite(vector::ExtractOp extractOp, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override {
auto loc = extractOp->getLoc();
auto vectorType = extractOp.getVectorType();
auto resultType = extractOp.getResult().getType();
auto llvmResultType = typeConverter->convertType(resultType);
auto positionArrayAttr = extractOp.getPosition();
if (!llvmResultType)
return failure();
if (positionArrayAttr.empty()) {
rewriter.replaceOp(extractOp, adaptor.getVector());
return success();
}
if (resultType.isa<VectorType>()) {
Value extracted = rewriter.create<LLVM::ExtractValueOp>(
loc, llvmResultType, adaptor.getVector(), positionArrayAttr);
rewriter.replaceOp(extractOp, extracted);
return success();
}
auto *context = extractOp->getContext();
Value extracted = adaptor.getVector();
auto positionAttrs = positionArrayAttr.getValue();
if (positionAttrs.size() > 1) {
auto oneDVectorType = reducedVectorTypeBack(vectorType);
auto nMinusOnePositionAttrs =
ArrayAttr::get(context, positionAttrs.drop_back());
extracted = rewriter.create<LLVM::ExtractValueOp>(
loc, typeConverter->convertType(oneDVectorType), extracted,
nMinusOnePositionAttrs);
}
auto position = positionAttrs.back().cast<IntegerAttr>();
auto i64Type = IntegerType::get(rewriter.getContext(), 64);
auto constant = rewriter.create<LLVM::ConstantOp>(loc, i64Type, position);
extracted =
rewriter.create<LLVM::ExtractElementOp>(loc, extracted, constant);
rewriter.replaceOp(extractOp, extracted);
return success();
}
};
class VectorFMAOp1DConversion : public ConvertOpToLLVMPattern<vector::FMAOp> {
public:
using ConvertOpToLLVMPattern<vector::FMAOp>::ConvertOpToLLVMPattern;
LogicalResult
matchAndRewrite(vector::FMAOp fmaOp, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override {
VectorType vType = fmaOp.getVectorType();
if (vType.getRank() != 1)
return failure();
rewriter.replaceOpWithNewOp<LLVM::FMulAddOp>(
fmaOp, adaptor.getLhs(), adaptor.getRhs(), adaptor.getAcc());
return success();
}
};
class VectorInsertElementOpConversion
: public ConvertOpToLLVMPattern<vector::InsertElementOp> {
public:
using ConvertOpToLLVMPattern<vector::InsertElementOp>::ConvertOpToLLVMPattern;
LogicalResult
matchAndRewrite(vector::InsertElementOp insertEltOp, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override {
auto vectorType = insertEltOp.getDestVectorType();
auto llvmType = typeConverter->convertType(vectorType);
if (!llvmType)
return failure();
if (vectorType.getRank() == 0) {
Location loc = insertEltOp.getLoc();
auto idxType = rewriter.getIndexType();
auto zero = rewriter.create<LLVM::ConstantOp>(
loc, typeConverter->convertType(idxType),
rewriter.getIntegerAttr(idxType, 0));
rewriter.replaceOpWithNewOp<LLVM::InsertElementOp>(
insertEltOp, llvmType, adaptor.getDest(), adaptor.getSource(), zero);
return success();
}
rewriter.replaceOpWithNewOp<LLVM::InsertElementOp>(
insertEltOp, llvmType, adaptor.getDest(), adaptor.getSource(),
adaptor.getPosition());
return success();
}
};
class VectorInsertOpConversion
: public ConvertOpToLLVMPattern<vector::InsertOp> {
public:
using ConvertOpToLLVMPattern<vector::InsertOp>::ConvertOpToLLVMPattern;
LogicalResult
matchAndRewrite(vector::InsertOp insertOp, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override {
auto loc = insertOp->getLoc();
auto sourceType = insertOp.getSourceType();
auto destVectorType = insertOp.getDestVectorType();
auto llvmResultType = typeConverter->convertType(destVectorType);
auto positionArrayAttr = insertOp.getPosition();
if (!llvmResultType)
return failure();
if (positionArrayAttr.empty()) {
rewriter.replaceOp(insertOp, adaptor.getSource());
return success();
}
if (sourceType.isa<VectorType>()) {
Value inserted = rewriter.create<LLVM::InsertValueOp>(
loc, llvmResultType, adaptor.getDest(), adaptor.getSource(),
positionArrayAttr);
rewriter.replaceOp(insertOp, inserted);
return success();
}
auto *context = insertOp->getContext();
Value extracted = adaptor.getDest();
auto positionAttrs = positionArrayAttr.getValue();
auto position = positionAttrs.back().cast<IntegerAttr>();
auto oneDVectorType = destVectorType;
if (positionAttrs.size() > 1) {
oneDVectorType = reducedVectorTypeBack(destVectorType);
auto nMinusOnePositionAttrs =
ArrayAttr::get(context, positionAttrs.drop_back());
extracted = rewriter.create<LLVM::ExtractValueOp>(
loc, typeConverter->convertType(oneDVectorType), extracted,
nMinusOnePositionAttrs);
}
auto i64Type = IntegerType::get(rewriter.getContext(), 64);
auto constant = rewriter.create<LLVM::ConstantOp>(loc, i64Type, position);
Value inserted = rewriter.create<LLVM::InsertElementOp>(
loc, typeConverter->convertType(oneDVectorType), extracted,
adaptor.getSource(), constant);
if (positionAttrs.size() > 1) {
auto nMinusOnePositionAttrs =
ArrayAttr::get(context, positionAttrs.drop_back());
inserted = rewriter.create<LLVM::InsertValueOp>(
loc, llvmResultType, adaptor.getDest(), inserted,
nMinusOnePositionAttrs);
}
rewriter.replaceOp(insertOp, inserted);
return success();
}
};
class VectorFMAOpNDRewritePattern : public OpRewritePattern<FMAOp> {
public:
using OpRewritePattern<FMAOp>::OpRewritePattern;
void initialize() {
setHasBoundedRewriteRecursion();
}
LogicalResult matchAndRewrite(FMAOp op,
PatternRewriter &rewriter) const override {
auto vType = op.getVectorType();
if (vType.getRank() < 2)
return failure();
auto loc = op.getLoc();
auto elemType = vType.getElementType();
Value zero = rewriter.create<arith::ConstantOp>(
loc, elemType, rewriter.getZeroAttr(elemType));
Value desc = rewriter.create<vector::SplatOp>(loc, vType, zero);
for (int64_t i = 0, e = vType.getShape().front(); i != e; ++i) {
Value extrLHS = rewriter.create<ExtractOp>(loc, op.getLhs(), i);
Value extrRHS = rewriter.create<ExtractOp>(loc, op.getRhs(), i);
Value extrACC = rewriter.create<ExtractOp>(loc, op.getAcc(), i);
Value fma = rewriter.create<FMAOp>(loc, extrLHS, extrRHS, extrACC);
desc = rewriter.create<InsertOp>(loc, fma, desc, i);
}
rewriter.replaceOp(op, desc);
return success();
}
};
static llvm::Optional<SmallVector<int64_t, 4>>
computeContiguousStrides(MemRefType memRefType) {
int64_t offset;
SmallVector<int64_t, 4> strides;
if (failed(getStridesAndOffset(memRefType, strides, offset)))
return None;
if (!strides.empty() && strides.back() != 1)
return None;
if (memRefType.getLayout().isIdentity())
return strides;
auto sizes = memRefType.getShape();
for (int index = 0, e = strides.size() - 1; index < e; ++index) {
if (ShapedType::isDynamic(sizes[index + 1]) ||
ShapedType::isDynamicStrideOrOffset(strides[index]) ||
ShapedType::isDynamicStrideOrOffset(strides[index + 1]))
return None;
if (strides[index] != strides[index + 1] * sizes[index + 1])
return None;
}
return strides;
}
class VectorTypeCastOpConversion
: public ConvertOpToLLVMPattern<vector::TypeCastOp> {
public:
using ConvertOpToLLVMPattern<vector::TypeCastOp>::ConvertOpToLLVMPattern;
LogicalResult
matchAndRewrite(vector::TypeCastOp castOp, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override {
auto loc = castOp->getLoc();
MemRefType sourceMemRefType =
castOp.getOperand().getType().cast<MemRefType>();
MemRefType targetMemRefType = castOp.getType();
if (!sourceMemRefType.hasStaticShape() ||
!targetMemRefType.hasStaticShape())
return failure();
auto llvmSourceDescriptorTy =
adaptor.getOperands()[0].getType().dyn_cast<LLVM::LLVMStructType>();
if (!llvmSourceDescriptorTy)
return failure();
MemRefDescriptor sourceMemRef(adaptor.getOperands()[0]);
auto llvmTargetDescriptorTy = typeConverter->convertType(targetMemRefType)
.dyn_cast_or_null<LLVM::LLVMStructType>();
if (!llvmTargetDescriptorTy)
return failure();
auto sourceStrides = computeContiguousStrides(sourceMemRefType);
if (!sourceStrides)
return failure();
auto targetStrides = computeContiguousStrides(targetMemRefType);
if (!targetStrides)
return failure();
if (llvm::any_of(*targetStrides, ShapedType::isDynamicStrideOrOffset))
return failure();
auto int64Ty = IntegerType::get(rewriter.getContext(), 64);
auto desc = MemRefDescriptor::undef(rewriter, loc, llvmTargetDescriptorTy);
Type llvmTargetElementTy = desc.getElementPtrType();
Value allocated = sourceMemRef.allocatedPtr(rewriter, loc);
allocated =
rewriter.create<LLVM::BitcastOp>(loc, llvmTargetElementTy, allocated);
desc.setAllocatedPtr(rewriter, loc, allocated);
Value ptr = sourceMemRef.alignedPtr(rewriter, loc);
ptr = rewriter.create<LLVM::BitcastOp>(loc, llvmTargetElementTy, ptr);
desc.setAlignedPtr(rewriter, loc, ptr);
auto attr = rewriter.getIntegerAttr(rewriter.getIndexType(), 0);
auto zero = rewriter.create<LLVM::ConstantOp>(loc, int64Ty, attr);
desc.setOffset(rewriter, loc, zero);
for (const auto &indexedSize :
llvm::enumerate(targetMemRefType.getShape())) {
int64_t index = indexedSize.index();
auto sizeAttr =
rewriter.getIntegerAttr(rewriter.getIndexType(), indexedSize.value());
auto size = rewriter.create<LLVM::ConstantOp>(loc, int64Ty, sizeAttr);
desc.setSize(rewriter, loc, index, size);
auto strideAttr = rewriter.getIntegerAttr(rewriter.getIndexType(),
(*targetStrides)[index]);
auto stride = rewriter.create<LLVM::ConstantOp>(loc, int64Ty, strideAttr);
desc.setStride(rewriter, loc, index, stride);
}
rewriter.replaceOp(castOp, {desc});
return success();
}
};
class VectorCreateMaskOpRewritePattern
: public OpRewritePattern<vector::CreateMaskOp> {
public:
explicit VectorCreateMaskOpRewritePattern(MLIRContext *context,
bool enableIndexOpt)
: OpRewritePattern<vector::CreateMaskOp>(context),
force32BitVectorIndices(enableIndexOpt) {}
LogicalResult matchAndRewrite(vector::CreateMaskOp op,
PatternRewriter &rewriter) const override {
auto dstType = op.getType();
if (dstType.getRank() != 1 || !dstType.cast<VectorType>().isScalable())
return failure();
IntegerType idxType =
force32BitVectorIndices ? rewriter.getI32Type() : rewriter.getI64Type();
auto loc = op->getLoc();
Value indices = rewriter.create<LLVM::StepVectorOp>(
loc, LLVM::getVectorType(idxType, dstType.getShape()[0],
true));
auto bound = getValueOrCreateCastToIndexLike(rewriter, loc, idxType,
op.getOperand(0));
Value bounds = rewriter.create<SplatOp>(loc, indices.getType(), bound);
Value comp = rewriter.create<arith::CmpIOp>(loc, arith::CmpIPredicate::slt,
indices, bounds);
rewriter.replaceOp(op, comp);
return success();
}
private:
const bool force32BitVectorIndices;
};
class VectorPrintOpConversion : public ConvertOpToLLVMPattern<vector::PrintOp> {
public:
using ConvertOpToLLVMPattern<vector::PrintOp>::ConvertOpToLLVMPattern;
LogicalResult
matchAndRewrite(vector::PrintOp printOp, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override {
Type printType = printOp.getPrintType();
if (typeConverter->convertType(printType) == nullptr)
return failure();
PrintConversion conversion = PrintConversion::None;
VectorType vectorType = printType.dyn_cast<VectorType>();
Type eltType = vectorType ? vectorType.getElementType() : printType;
Operation *printer;
if (eltType.isF32()) {
printer =
LLVM::lookupOrCreatePrintF32Fn(printOp->getParentOfType<ModuleOp>());
} else if (eltType.isF64()) {
printer =
LLVM::lookupOrCreatePrintF64Fn(printOp->getParentOfType<ModuleOp>());
} else if (eltType.isIndex()) {
printer =
LLVM::lookupOrCreatePrintU64Fn(printOp->getParentOfType<ModuleOp>());
} else if (auto intTy = eltType.dyn_cast<IntegerType>()) {
unsigned width = intTy.getWidth();
if (intTy.isUnsigned()) {
if (width <= 64) {
if (width < 64)
conversion = PrintConversion::ZeroExt64;
printer = LLVM::lookupOrCreatePrintU64Fn(
printOp->getParentOfType<ModuleOp>());
} else {
return failure();
}
} else {
assert(intTy.isSignless() || intTy.isSigned());
if (width <= 64) {
if (width == 1)
conversion = PrintConversion::ZeroExt64;
else if (width < 64)
conversion = PrintConversion::SignExt64;
printer = LLVM::lookupOrCreatePrintI64Fn(
printOp->getParentOfType<ModuleOp>());
} else {
return failure();
}
}
} else {
return failure();
}
int64_t rank = vectorType ? vectorType.getRank() : 0;
Type type = vectorType ? vectorType : eltType;
emitRanks(rewriter, printOp, adaptor.getSource(), type, printer, rank,
conversion);
emitCall(rewriter, printOp->getLoc(),
LLVM::lookupOrCreatePrintNewlineFn(
printOp->getParentOfType<ModuleOp>()));
rewriter.eraseOp(printOp);
return success();
}
private:
enum class PrintConversion {
None,
ZeroExt64,
SignExt64
};
void emitRanks(ConversionPatternRewriter &rewriter, Operation *op,
Value value, Type type, Operation *printer, int64_t rank,
PrintConversion conversion) const {
VectorType vectorType = type.dyn_cast<VectorType>();
Location loc = op->getLoc();
if (!vectorType) {
assert(rank == 0 && "The scalar case expects rank == 0");
switch (conversion) {
case PrintConversion::ZeroExt64:
value = rewriter.create<arith::ExtUIOp>(
loc, IntegerType::get(rewriter.getContext(), 64), value);
break;
case PrintConversion::SignExt64:
value = rewriter.create<arith::ExtSIOp>(
loc, IntegerType::get(rewriter.getContext(), 64), value);
break;
case PrintConversion::None:
break;
}
emitCall(rewriter, loc, printer, value);
return;
}
emitCall(rewriter, loc,
LLVM::lookupOrCreatePrintOpenFn(op->getParentOfType<ModuleOp>()));
Operation *printComma =
LLVM::lookupOrCreatePrintCommaFn(op->getParentOfType<ModuleOp>());
if (rank <= 1) {
auto reducedType = vectorType.getElementType();
auto llvmType = typeConverter->convertType(reducedType);
int64_t dim = rank == 0 ? 1 : vectorType.getDimSize(0);
for (int64_t d = 0; d < dim; ++d) {
Value nestedVal = extractOne(rewriter, *getTypeConverter(), loc, value,
llvmType, 0, d);
emitRanks(rewriter, op, nestedVal, reducedType, printer, 0,
conversion);
if (d != dim - 1)
emitCall(rewriter, loc, printComma);
}
emitCall(
rewriter, loc,
LLVM::lookupOrCreatePrintCloseFn(op->getParentOfType<ModuleOp>()));
return;
}
int64_t dim = vectorType.getDimSize(0);
for (int64_t d = 0; d < dim; ++d) {
auto reducedType = reducedVectorTypeFront(vectorType);
auto llvmType = typeConverter->convertType(reducedType);
Value nestedVal = extractOne(rewriter, *getTypeConverter(), loc, value,
llvmType, rank, d);
emitRanks(rewriter, op, nestedVal, reducedType, printer, rank - 1,
conversion);
if (d != dim - 1)
emitCall(rewriter, loc, printComma);
}
emitCall(rewriter, loc,
LLVM::lookupOrCreatePrintCloseFn(op->getParentOfType<ModuleOp>()));
}
static void emitCall(ConversionPatternRewriter &rewriter, Location loc,
Operation *ref, ValueRange params = ValueRange()) {
rewriter.create<LLVM::CallOp>(loc, TypeRange(), SymbolRefAttr::get(ref),
params);
}
};
struct VectorSplatOpLowering : public ConvertOpToLLVMPattern<vector::SplatOp> {
using ConvertOpToLLVMPattern<vector::SplatOp>::ConvertOpToLLVMPattern;
LogicalResult
matchAndRewrite(vector::SplatOp splatOp, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override {
VectorType resultType = splatOp.getType().cast<VectorType>();
if (resultType.getRank() > 1)
return failure();
auto vectorType = typeConverter->convertType(splatOp.getType());
Value undef = rewriter.create<LLVM::UndefOp>(splatOp.getLoc(), vectorType);
auto zero = rewriter.create<LLVM::ConstantOp>(
splatOp.getLoc(),
typeConverter->convertType(rewriter.getIntegerType(32)),
rewriter.getZeroAttr(rewriter.getIntegerType(32)));
if (resultType.getRank() == 0) {
rewriter.replaceOpWithNewOp<LLVM::InsertElementOp>(
splatOp, vectorType, undef, adaptor.getInput(), zero);
return success();
}
auto v = rewriter.create<LLVM::InsertElementOp>(
splatOp.getLoc(), vectorType, undef, adaptor.getInput(), zero);
int64_t width = splatOp.getType().cast<VectorType>().getDimSize(0);
SmallVector<int32_t, 4> zeroValues(width, 0);
ArrayAttr zeroAttrs = rewriter.getI32ArrayAttr(zeroValues);
rewriter.replaceOpWithNewOp<LLVM::ShuffleVectorOp>(splatOp, v, undef,
zeroAttrs);
return success();
}
};
struct VectorSplatNdOpLowering : public ConvertOpToLLVMPattern<SplatOp> {
using ConvertOpToLLVMPattern<SplatOp>::ConvertOpToLLVMPattern;
LogicalResult
matchAndRewrite(SplatOp splatOp, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override {
VectorType resultType = splatOp.getType();
if (resultType.getRank() <= 1)
return failure();
auto loc = splatOp.getLoc();
auto vectorTypeInfo =
LLVM::detail::extractNDVectorTypeInfo(resultType, *getTypeConverter());
auto llvmNDVectorTy = vectorTypeInfo.llvmNDVectorTy;
auto llvm1DVectorTy = vectorTypeInfo.llvm1DVectorTy;
if (!llvmNDVectorTy || !llvm1DVectorTy)
return failure();
Value desc = rewriter.create<LLVM::UndefOp>(loc, llvmNDVectorTy);
Value vdesc = rewriter.create<LLVM::UndefOp>(loc, llvm1DVectorTy);
auto zero = rewriter.create<LLVM::ConstantOp>(
loc, typeConverter->convertType(rewriter.getIntegerType(32)),
rewriter.getZeroAttr(rewriter.getIntegerType(32)));
Value v = rewriter.create<LLVM::InsertElementOp>(loc, llvm1DVectorTy, vdesc,
adaptor.getInput(), zero);
int64_t width = resultType.getDimSize(resultType.getRank() - 1);
SmallVector<int32_t, 4> zeroValues(width, 0);
ArrayAttr zeroAttrs = rewriter.getI32ArrayAttr(zeroValues);
v = rewriter.create<LLVM::ShuffleVectorOp>(loc, v, v, zeroAttrs);
nDVectorIterate(vectorTypeInfo, rewriter, [&](ArrayAttr position) {
desc = rewriter.create<LLVM::InsertValueOp>(loc, llvmNDVectorTy, desc, v,
position);
});
rewriter.replaceOp(splatOp, desc);
return success();
}
};
}
void mlir::populateVectorToLLVMConversionPatterns(
LLVMTypeConverter &converter, RewritePatternSet &patterns,
bool reassociateFPReductions, bool force32BitVectorIndices) {
MLIRContext *ctx = converter.getDialect()->getContext();
patterns.add<VectorFMAOpNDRewritePattern>(ctx);
populateVectorInsertExtractStridedSliceTransforms(patterns);
patterns.add<VectorReductionOpConversion>(converter, reassociateFPReductions);
patterns.add<VectorCreateMaskOpRewritePattern>(ctx, force32BitVectorIndices);
patterns
.add<VectorBitCastOpConversion, VectorShuffleOpConversion,
VectorExtractElementOpConversion, VectorExtractOpConversion,
VectorFMAOp1DConversion, VectorInsertElementOpConversion,
VectorInsertOpConversion, VectorPrintOpConversion,
VectorTypeCastOpConversion, VectorScaleOpConversion,
VectorLoadStoreConversion<vector::LoadOp, vector::LoadOpAdaptor>,
VectorLoadStoreConversion<vector::MaskedLoadOp,
vector::MaskedLoadOpAdaptor>,
VectorLoadStoreConversion<vector::StoreOp, vector::StoreOpAdaptor>,
VectorLoadStoreConversion<vector::MaskedStoreOp,
vector::MaskedStoreOpAdaptor>,
VectorGatherOpConversion, VectorScatterOpConversion,
VectorExpandLoadOpConversion, VectorCompressStoreOpConversion,
VectorSplatOpLowering, VectorSplatNdOpLowering>(converter);
populateVectorTransferLoweringPatterns(patterns, 1);
}
void mlir::populateVectorToLLVMMatrixConversionPatterns(
LLVMTypeConverter &converter, RewritePatternSet &patterns) {
patterns.add<VectorMatmulOpConversion>(converter);
patterns.add<VectorFlatTransposeOpConversion>(converter);
}