#include "mlir/Dialect/Arith/Transforms/Passes.h"
#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/Vector/IR/VectorOps.h"
#include "mlir/IR/ImplicitLocOpBuilder.h"
#include "mlir/IR/TypeUtilities.h"
#include "mlir/Transforms/DialectConversion.h"
namespace mlir {
namespace arith {
#define GEN_PASS_DEF_ARITHEXPANDOPSPASS
#include "mlir/Dialect/Arith/Transforms/Passes.h.inc"
}
}
using namespace mlir;
static Value createConst(Location loc, Type type, int value,
PatternRewriter &rewriter) {
auto attr = rewriter.getIntegerAttr(getElementTypeOrSelf(type), value);
if (auto shapedTy = dyn_cast<ShapedType>(type)) {
return rewriter.create<arith::ConstantOp>(
loc, DenseElementsAttr::get(shapedTy, attr));
}
return rewriter.create<arith::ConstantOp>(loc, attr);
}
namespace {
struct CeilDivUIOpConverter : public OpRewritePattern<arith::CeilDivUIOp> {
using OpRewritePattern::OpRewritePattern;
LogicalResult matchAndRewrite(arith::CeilDivUIOp op,
PatternRewriter &rewriter) const final {
Location loc = op.getLoc();
Value a = op.getLhs();
Value b = op.getRhs();
Value zero = createConst(loc, a.getType(), 0, rewriter);
Value compare =
rewriter.create<arith::CmpIOp>(loc, arith::CmpIPredicate::eq, a, zero);
Value one = createConst(loc, a.getType(), 1, rewriter);
Value minusOne = rewriter.create<arith::SubIOp>(loc, a, one);
Value quotient = rewriter.create<arith::DivUIOp>(loc, minusOne, b);
Value plusOne = rewriter.create<arith::AddIOp>(loc, quotient, one);
rewriter.replaceOpWithNewOp<arith::SelectOp>(op, compare, zero, plusOne);
return success();
}
};
struct CeilDivSIOpConverter : public OpRewritePattern<arith::CeilDivSIOp> {
using OpRewritePattern::OpRewritePattern;
LogicalResult matchAndRewrite(arith::CeilDivSIOp op,
PatternRewriter &rewriter) const final {
Location loc = op.getLoc();
Type type = op.getType();
Value a = op.getLhs();
Value b = op.getRhs();
Value plusOne = createConst(loc, type, 1, rewriter);
Value zero = createConst(loc, type, 0, rewriter);
Value minusOne = createConst(loc, type, -1, rewriter);
Value compare =
rewriter.create<arith::CmpIOp>(loc, arith::CmpIPredicate::sgt, b, zero);
Value x = rewriter.create<arith::SelectOp>(loc, compare, minusOne, plusOne);
Value xPlusA = rewriter.create<arith::AddIOp>(loc, x, a);
Value xPlusADivB = rewriter.create<arith::DivSIOp>(loc, xPlusA, b);
Value posRes = rewriter.create<arith::AddIOp>(loc, plusOne, xPlusADivB);
Value minusA = rewriter.create<arith::SubIOp>(loc, zero, a);
Value minusADivB = rewriter.create<arith::DivSIOp>(loc, minusA, b);
Value negRes = rewriter.create<arith::SubIOp>(loc, zero, minusADivB);
Value aNeg =
rewriter.create<arith::CmpIOp>(loc, arith::CmpIPredicate::slt, a, zero);
Value aPos =
rewriter.create<arith::CmpIOp>(loc, arith::CmpIPredicate::sgt, a, zero);
Value bNeg =
rewriter.create<arith::CmpIOp>(loc, arith::CmpIPredicate::slt, b, zero);
Value bPos =
rewriter.create<arith::CmpIOp>(loc, arith::CmpIPredicate::sgt, b, zero);
Value firstTerm = rewriter.create<arith::AndIOp>(loc, aNeg, bNeg);
Value secondTerm = rewriter.create<arith::AndIOp>(loc, aPos, bPos);
Value compareRes =
rewriter.create<arith::OrIOp>(loc, firstTerm, secondTerm);
rewriter.replaceOpWithNewOp<arith::SelectOp>(op, compareRes, posRes,
negRes);
return success();
}
};
struct FloorDivSIOpConverter : public OpRewritePattern<arith::FloorDivSIOp> {
using OpRewritePattern::OpRewritePattern;
LogicalResult matchAndRewrite(arith::FloorDivSIOp op,
PatternRewriter &rewriter) const final {
Location loc = op.getLoc();
Type type = op.getType();
Value a = op.getLhs();
Value b = op.getRhs();
Value quotient = rewriter.create<arith::DivSIOp>(loc, a, b);
Value product = rewriter.create<arith::MulIOp>(loc, quotient, b);
Value notEqualDivisor = rewriter.create<arith::CmpIOp>(
loc, arith::CmpIPredicate::ne, a, product);
Value zero = createConst(loc, type, 0, rewriter);
Value aNeg =
rewriter.create<arith::CmpIOp>(loc, arith::CmpIPredicate::slt, a, zero);
Value bNeg =
rewriter.create<arith::CmpIOp>(loc, arith::CmpIPredicate::slt, b, zero);
Value signOpposite = rewriter.create<arith::CmpIOp>(
loc, arith::CmpIPredicate::ne, aNeg, bNeg);
Value cond =
rewriter.create<arith::AndIOp>(loc, notEqualDivisor, signOpposite);
Value minusOne = createConst(loc, type, -1, rewriter);
Value quotientMinusOne =
rewriter.create<arith::AddIOp>(loc, quotient, minusOne);
rewriter.replaceOpWithNewOp<arith::SelectOp>(op, cond, quotientMinusOne,
quotient);
return success();
}
};
template <typename OpTy, arith::CmpIPredicate pred>
struct MaxMinIOpConverter : public OpRewritePattern<OpTy> {
public:
using OpRewritePattern<OpTy>::OpRewritePattern;
LogicalResult matchAndRewrite(OpTy op,
PatternRewriter &rewriter) const final {
Value lhs = op.getLhs();
Value rhs = op.getRhs();
Value cmp = rewriter.create<arith::CmpIOp>(op.getLoc(), pred, lhs, rhs);
rewriter.replaceOpWithNewOp<arith::SelectOp>(op, cmp, lhs, rhs);
return success();
}
};
template <typename OpTy, arith::CmpFPredicate pred>
struct MaximumMinimumFOpConverter : public OpRewritePattern<OpTy> {
public:
using OpRewritePattern<OpTy>::OpRewritePattern;
LogicalResult matchAndRewrite(OpTy op,
PatternRewriter &rewriter) const final {
Value lhs = op.getLhs();
Value rhs = op.getRhs();
Location loc = op.getLoc();
static_assert(pred == arith::CmpFPredicate::UGT ||
pred == arith::CmpFPredicate::ULT,
"pred must be either UGT or ULT");
Value cmp = rewriter.create<arith::CmpFOp>(loc, pred, lhs, rhs);
Value select = rewriter.create<arith::SelectOp>(loc, cmp, lhs, rhs);
Value isNaN = rewriter.create<arith::CmpFOp>(loc, arith::CmpFPredicate::UNO,
rhs, rhs);
rewriter.replaceOpWithNewOp<arith::SelectOp>(op, isNaN, rhs, select);
return success();
}
};
template <typename OpTy, arith::CmpFPredicate pred>
struct MaxNumMinNumFOpConverter : public OpRewritePattern<OpTy> {
public:
using OpRewritePattern<OpTy>::OpRewritePattern;
LogicalResult matchAndRewrite(OpTy op,
PatternRewriter &rewriter) const final {
Value lhs = op.getLhs();
Value rhs = op.getRhs();
Location loc = op.getLoc();
static_assert(pred == arith::CmpFPredicate::UGT ||
pred == arith::CmpFPredicate::ULT,
"pred must be either UGT or ULT");
Value cmp = rewriter.create<arith::CmpFOp>(loc, pred, lhs, rhs);
Value select = rewriter.create<arith::SelectOp>(loc, cmp, lhs, rhs);
Value isNaN = rewriter.create<arith::CmpFOp>(loc, arith::CmpFPredicate::UNO,
lhs, lhs);
rewriter.replaceOpWithNewOp<arith::SelectOp>(op, isNaN, rhs, select);
return success();
}
};
struct BFloat16ExtFOpConverter : public OpRewritePattern<arith::ExtFOp> {
using OpRewritePattern::OpRewritePattern;
LogicalResult matchAndRewrite(arith::ExtFOp op,
PatternRewriter &rewriter) const final {
ImplicitLocOpBuilder b(op.getLoc(), rewriter);
auto operand = op.getOperand();
Type operandTy = operand.getType();
Type resultTy = op.getType();
Type operandETy = getElementTypeOrSelf(operandTy);
Type resultETy = getElementTypeOrSelf(resultTy);
if (!operandETy.isBF16() || !resultETy.isF32()) {
return rewriter.notifyMatchFailure(op, "not a ext of bf16 to f32.");
}
Type i16Ty = b.getI16Type();
Type i32Ty = b.getI32Type();
if (auto shapedTy = dyn_cast<ShapedType>(operandTy)) {
i16Ty = shapedTy.clone(i16Ty);
i32Ty = shapedTy.clone(i32Ty);
}
Value bitcast = b.create<arith::BitcastOp>(i16Ty, operand);
Value exti = b.create<arith::ExtUIOp>(i32Ty, bitcast);
Value c16 = createConst(op.getLoc(), i32Ty, 16, rewriter);
Value shl = b.create<arith::ShLIOp>(exti, c16);
Value result = b.create<arith::BitcastOp>(resultTy, shl);
rewriter.replaceOp(op, result);
return success();
}
};
struct BFloat16TruncFOpConverter : public OpRewritePattern<arith::TruncFOp> {
using OpRewritePattern::OpRewritePattern;
LogicalResult matchAndRewrite(arith::TruncFOp op,
PatternRewriter &rewriter) const final {
ImplicitLocOpBuilder b(op.getLoc(), rewriter);
auto operand = op.getOperand();
Type operandTy = operand.getType();
Type resultTy = op.getType();
Type operandETy = getElementTypeOrSelf(operandTy);
Type resultETy = getElementTypeOrSelf(resultTy);
if (!operandETy.isF32() || !resultETy.isBF16()) {
return rewriter.notifyMatchFailure(op, "not a trunc of f32 to bf16.");
}
if (op.getRoundingmodeAttr()) {
return rewriter.notifyMatchFailure(
op, "only applicable to default rounding mode.");
}
Type i16Ty = b.getI16Type();
Type i32Ty = b.getI32Type();
Type f32Ty = b.getF32Type();
if (auto shapedTy = dyn_cast<ShapedType>(operandTy)) {
i16Ty = shapedTy.clone(i16Ty);
i32Ty = shapedTy.clone(i32Ty);
f32Ty = shapedTy.clone(f32Ty);
}
Value isNan =
b.create<arith::CmpFOp>(arith::CmpFPredicate::UNE, operand, operand);
Value c7FFF = createConst(op.getLoc(), i32Ty, 0x7fff, rewriter);
Value c7FC0_i16 = createConst(op.getLoc(), i16Ty, 0x7fc0, rewriter);
Value c16 = createConst(op.getLoc(), i32Ty, 16, rewriter);
Value c1 = createConst(op.getLoc(), i32Ty, 1, rewriter);
Value bitcast = b.create<arith::BitcastOp>(i32Ty, operand);
Value bit16 =
b.create<arith::AndIOp>(b.create<arith::ShRUIOp>(bitcast, c16), c1);
Value roundingBias = b.create<arith::AddIOp>(bit16, c7FFF);
Value biased = b.create<arith::AddIOp>(bitcast, roundingBias);
Value biasedAndShifted = b.create<arith::ShRUIOp>(biased, c16);
Value normalCaseResult_i16 =
b.create<arith::TruncIOp>(i16Ty, biasedAndShifted);
Value select =
b.create<arith::SelectOp>(isNan, c7FC0_i16, normalCaseResult_i16);
Value result = b.create<arith::BitcastOp>(resultTy, select);
rewriter.replaceOp(op, result);
return success();
}
};
struct ArithExpandOpsPass
: public arith::impl::ArithExpandOpsPassBase<ArithExpandOpsPass> {
using ArithExpandOpsPassBase::ArithExpandOpsPassBase;
void runOnOperation() override {
RewritePatternSet patterns(&getContext());
ConversionTarget target(getContext());
arith::populateArithExpandOpsPatterns(patterns);
target.addLegalDialect<arith::ArithDialect>();
target.addIllegalOp<
arith::CeilDivSIOp,
arith::CeilDivUIOp,
arith::FloorDivSIOp,
arith::MaxSIOp,
arith::MaxUIOp,
arith::MinSIOp,
arith::MinUIOp,
arith::MaximumFOp,
arith::MinimumFOp,
arith::MaxNumFOp,
arith::MinNumFOp
>();
if (includeBf16) {
arith::populateExpandBFloat16Patterns(patterns);
target.addDynamicallyLegalOp<arith::ExtFOp>(
[](arith::ExtFOp op) {
Type inETy = getElementTypeOrSelf(op.getOperand().getType());
Type outETy = getElementTypeOrSelf(op.getType());
return !(inETy.isBF16() && outETy.isF32());
});
target.addDynamicallyLegalOp<arith::TruncFOp>(
[](arith::TruncFOp op) {
Type inETy = getElementTypeOrSelf(op.getOperand().getType());
Type outETy = getElementTypeOrSelf(op.getType());
return !(inETy.isF32() && outETy.isBF16());
});
}
if (failed(applyPartialConversion(getOperation(), target,
std::move(patterns))))
signalPassFailure();
}
};
}
void mlir::arith::populateCeilFloorDivExpandOpsPatterns(
RewritePatternSet &patterns) {
patterns
.add<CeilDivSIOpConverter, CeilDivUIOpConverter, FloorDivSIOpConverter>(
patterns.getContext());
}
void mlir::arith::populateExpandBFloat16Patterns(RewritePatternSet &patterns) {
patterns.add<BFloat16ExtFOpConverter, BFloat16TruncFOpConverter>(
patterns.getContext());
}
void mlir::arith::populateArithExpandOpsPatterns(RewritePatternSet &patterns) {
populateCeilFloorDivExpandOpsPatterns(patterns);
patterns.add<
MaxMinIOpConverter<MaxSIOp, arith::CmpIPredicate::sgt>,
MaxMinIOpConverter<MaxUIOp, arith::CmpIPredicate::ugt>,
MaxMinIOpConverter<MinSIOp, arith::CmpIPredicate::slt>,
MaxMinIOpConverter<MinUIOp, arith::CmpIPredicate::ult>,
MaximumMinimumFOpConverter<MaximumFOp, arith::CmpFPredicate::UGT>,
MaximumMinimumFOpConverter<MinimumFOp, arith::CmpFPredicate::ULT>,
MaxNumMinNumFOpConverter<MaxNumFOp, arith::CmpFPredicate::UGT>,
MaxNumMinNumFOpConverter<MinNumFOp, arith::CmpFPredicate::ULT>
>(patterns.getContext());
}