#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include <algorithm>
#include <cassert>
using namespace llvm;
#define DEBUG_TYPE "scalarize-masked-mem-intrin"
namespace {
class ScalarizeMaskedMemIntrin : public FunctionPass {
const TargetTransformInfo *TTI = nullptr;
public:
static char ID;
explicit ScalarizeMaskedMemIntrin() : FunctionPass(ID) {
initializeScalarizeMaskedMemIntrinPass(*PassRegistry::getPassRegistry());
}
bool runOnFunction(Function &F) override;
StringRef getPassName() const override {
return "Scalarize Masked Memory Intrinsics";
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<TargetTransformInfoWrapperPass>();
}
private:
bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT);
bool optimizeCallInst(CallInst *CI, bool &ModifiedDT);
};
}
char ScalarizeMaskedMemIntrin::ID = 0;
INITIALIZE_PASS(ScalarizeMaskedMemIntrin, DEBUG_TYPE,
"Scalarize unsupported masked memory intrinsics", false, false)
FunctionPass *llvm::createScalarizeMaskedMemIntrinPass() {
return new ScalarizeMaskedMemIntrin();
}
static void scalarizeMaskedLoad(CallInst *CI) {
Value *Ptr = CI->getArgOperand(0);
Value *Alignment = CI->getArgOperand(1);
Value *Mask = CI->getArgOperand(2);
Value *Src0 = CI->getArgOperand(3);
unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue();
VectorType *VecType = dyn_cast<VectorType>(CI->getType());
assert(VecType && "Unexpected return type of masked load intrinsic");
Type *EltTy = CI->getType()->getVectorElementType();
IRBuilder<> Builder(CI->getContext());
Instruction *InsertPt = CI;
BasicBlock *IfBlock = CI->getParent();
BasicBlock *CondBlock = nullptr;
BasicBlock *PrevIfBlock = CI->getParent();
Builder.SetInsertPoint(InsertPt);
Builder.SetCurrentDebugLocation(CI->getDebugLoc());
bool IsAllOnesMask =
isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue();
if (IsAllOnesMask) {
Value *NewI = Builder.CreateAlignedLoad(Ptr, AlignVal);
CI->replaceAllUsesWith(NewI);
CI->eraseFromParent();
return;
}
AlignVal = std::min(AlignVal, VecType->getScalarSizeInBits() / 8);
Type *NewPtrType =
EltTy->getPointerTo(cast<PointerType>(Ptr->getType())->getAddressSpace());
Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
unsigned VectorWidth = VecType->getNumElements();
Value *UndefVal = UndefValue::get(VecType);
Value *VResult = UndefVal;
if (isa<ConstantVector>(Mask)) {
for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue())
continue;
Value *Gep =
Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
LoadInst *Load = Builder.CreateAlignedLoad(Gep, AlignVal);
VResult =
Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx));
}
Value *NewI = Builder.CreateSelect(Mask, VResult, Src0);
CI->replaceAllUsesWith(NewI);
CI->eraseFromParent();
return;
}
PHINode *Phi = nullptr;
Value *PrevPhi = UndefVal;
for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
if (Idx > 0) {
Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
Phi->addIncoming(VResult, CondBlock);
Phi->addIncoming(PrevPhi, PrevIfBlock);
PrevPhi = Phi;
VResult = Phi;
}
Value *Predicate =
Builder.CreateExtractElement(Mask, Builder.getInt32(Idx));
Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate,
ConstantInt::get(Predicate->getType(), 1));
CondBlock = IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.load");
Builder.SetInsertPoint(InsertPt);
Value *Gep =
Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
LoadInst *Load = Builder.CreateAlignedLoad(Gep, AlignVal);
VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx));
BasicBlock *NewIfBlock =
CondBlock->splitBasicBlock(InsertPt->getIterator(), "else");
Builder.SetInsertPoint(InsertPt);
Instruction *OldBr = IfBlock->getTerminator();
BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
OldBr->eraseFromParent();
PrevIfBlock = IfBlock;
IfBlock = NewIfBlock;
}
Phi = Builder.CreatePHI(VecType, 2, "res.phi.select");
Phi->addIncoming(VResult, CondBlock);
Phi->addIncoming(PrevPhi, PrevIfBlock);
Value *NewI = Builder.CreateSelect(Mask, Phi, Src0);
CI->replaceAllUsesWith(NewI);
CI->eraseFromParent();
}
static void scalarizeMaskedStore(CallInst *CI) {
Value *Src = CI->getArgOperand(0);
Value *Ptr = CI->getArgOperand(1);
Value *Alignment = CI->getArgOperand(2);
Value *Mask = CI->getArgOperand(3);
unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue();
VectorType *VecType = dyn_cast<VectorType>(Src->getType());
assert(VecType && "Unexpected data type in masked store intrinsic");
Type *EltTy = VecType->getElementType();
IRBuilder<> Builder(CI->getContext());
Instruction *InsertPt = CI;
BasicBlock *IfBlock = CI->getParent();
Builder.SetInsertPoint(InsertPt);
Builder.SetCurrentDebugLocation(CI->getDebugLoc());
bool IsAllOnesMask =
isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue();
if (IsAllOnesMask) {
Builder.CreateAlignedStore(Src, Ptr, AlignVal);
CI->eraseFromParent();
return;
}
AlignVal = std::max(AlignVal, VecType->getScalarSizeInBits() / 8);
Type *NewPtrType =
EltTy->getPointerTo(cast<PointerType>(Ptr->getType())->getAddressSpace());
Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
unsigned VectorWidth = VecType->getNumElements();
if (isa<ConstantVector>(Mask)) {
for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue())
continue;
Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx));
Value *Gep =
Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
Builder.CreateAlignedStore(OneElt, Gep, AlignVal);
}
CI->eraseFromParent();
return;
}
for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
Value *Predicate =
Builder.CreateExtractElement(Mask, Builder.getInt32(Idx));
Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate,
ConstantInt::get(Predicate->getType(), 1));
BasicBlock *CondBlock =
IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.store");
Builder.SetInsertPoint(InsertPt);
Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx));
Value *Gep =
Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
Builder.CreateAlignedStore(OneElt, Gep, AlignVal);
BasicBlock *NewIfBlock =
CondBlock->splitBasicBlock(InsertPt->getIterator(), "else");
Builder.SetInsertPoint(InsertPt);
Instruction *OldBr = IfBlock->getTerminator();
BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
OldBr->eraseFromParent();
IfBlock = NewIfBlock;
}
CI->eraseFromParent();
}
static void scalarizeMaskedGather(CallInst *CI) {
Value *Ptrs = CI->getArgOperand(0);
Value *Alignment = CI->getArgOperand(1);
Value *Mask = CI->getArgOperand(2);
Value *Src0 = CI->getArgOperand(3);
VectorType *VecType = dyn_cast<VectorType>(CI->getType());
assert(VecType && "Unexpected return type of masked load intrinsic");
IRBuilder<> Builder(CI->getContext());
Instruction *InsertPt = CI;
BasicBlock *IfBlock = CI->getParent();
BasicBlock *CondBlock = nullptr;
BasicBlock *PrevIfBlock = CI->getParent();
Builder.SetInsertPoint(InsertPt);
unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue();
Builder.SetCurrentDebugLocation(CI->getDebugLoc());
Value *UndefVal = UndefValue::get(VecType);
Value *VResult = UndefVal;
unsigned VectorWidth = VecType->getNumElements();
bool IsConstMask = isa<ConstantVector>(Mask);
if (IsConstMask) {
for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue())
continue;
Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx),
"Ptr" + Twine(Idx));
LoadInst *Load =
Builder.CreateAlignedLoad(Ptr, AlignVal, "Load" + Twine(Idx));
VResult = Builder.CreateInsertElement(
VResult, Load, Builder.getInt32(Idx), "Res" + Twine(Idx));
}
Value *NewI = Builder.CreateSelect(Mask, VResult, Src0);
CI->replaceAllUsesWith(NewI);
CI->eraseFromParent();
return;
}
PHINode *Phi = nullptr;
Value *PrevPhi = UndefVal;
for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
if (Idx > 0) {
Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
Phi->addIncoming(VResult, CondBlock);
Phi->addIncoming(PrevPhi, PrevIfBlock);
PrevPhi = Phi;
VResult = Phi;
}
Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx),
"Mask" + Twine(Idx));
Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate,
ConstantInt::get(Predicate->getType(), 1),
"ToLoad" + Twine(Idx));
CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.load");
Builder.SetInsertPoint(InsertPt);
Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx),
"Ptr" + Twine(Idx));
LoadInst *Load =
Builder.CreateAlignedLoad(Ptr, AlignVal, "Load" + Twine(Idx));
VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx),
"Res" + Twine(Idx));
BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
Builder.SetInsertPoint(InsertPt);
Instruction *OldBr = IfBlock->getTerminator();
BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
OldBr->eraseFromParent();
PrevIfBlock = IfBlock;
IfBlock = NewIfBlock;
}
Phi = Builder.CreatePHI(VecType, 2, "res.phi.select");
Phi->addIncoming(VResult, CondBlock);
Phi->addIncoming(PrevPhi, PrevIfBlock);
Value *NewI = Builder.CreateSelect(Mask, Phi, Src0);
CI->replaceAllUsesWith(NewI);
CI->eraseFromParent();
}
static void scalarizeMaskedScatter(CallInst *CI) {
Value *Src = CI->getArgOperand(0);
Value *Ptrs = CI->getArgOperand(1);
Value *Alignment = CI->getArgOperand(2);
Value *Mask = CI->getArgOperand(3);
assert(isa<VectorType>(Src->getType()) &&
"Unexpected data type in masked scatter intrinsic");
assert(isa<VectorType>(Ptrs->getType()) &&
isa<PointerType>(Ptrs->getType()->getVectorElementType()) &&
"Vector of pointers is expected in masked scatter intrinsic");
IRBuilder<> Builder(CI->getContext());
Instruction *InsertPt = CI;
BasicBlock *IfBlock = CI->getParent();
Builder.SetInsertPoint(InsertPt);
Builder.SetCurrentDebugLocation(CI->getDebugLoc());
unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue();
unsigned VectorWidth = Src->getType()->getVectorNumElements();
bool IsConstMask = isa<ConstantVector>(Mask);
if (IsConstMask) {
for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue())
continue;
Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx),
"Elt" + Twine(Idx));
Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx),
"Ptr" + Twine(Idx));
Builder.CreateAlignedStore(OneElt, Ptr, AlignVal);
}
CI->eraseFromParent();
return;
}
for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx),
"Mask" + Twine(Idx));
Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate,
ConstantInt::get(Predicate->getType(), 1),
"ToStore" + Twine(Idx));
BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.store");
Builder.SetInsertPoint(InsertPt);
Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx),
"Elt" + Twine(Idx));
Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx),
"Ptr" + Twine(Idx));
Builder.CreateAlignedStore(OneElt, Ptr, AlignVal);
BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
Builder.SetInsertPoint(InsertPt);
Instruction *OldBr = IfBlock->getTerminator();
BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
OldBr->eraseFromParent();
IfBlock = NewIfBlock;
}
CI->eraseFromParent();
}
bool ScalarizeMaskedMemIntrin::runOnFunction(Function &F) {
bool EverMadeChange = false;
TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
bool MadeChange = true;
while (MadeChange) {
MadeChange = false;
for (Function::iterator I = F.begin(); I != F.end();) {
BasicBlock *BB = &*I++;
bool ModifiedDTOnIteration = false;
MadeChange |= optimizeBlock(*BB, ModifiedDTOnIteration);
if (ModifiedDTOnIteration)
break;
}
EverMadeChange |= MadeChange;
}
return EverMadeChange;
}
bool ScalarizeMaskedMemIntrin::optimizeBlock(BasicBlock &BB, bool &ModifiedDT) {
bool MadeChange = false;
BasicBlock::iterator CurInstIterator = BB.begin();
while (CurInstIterator != BB.end()) {
if (CallInst *CI = dyn_cast<CallInst>(&*CurInstIterator++))
MadeChange |= optimizeCallInst(CI, ModifiedDT);
if (ModifiedDT)
return true;
}
return MadeChange;
}
bool ScalarizeMaskedMemIntrin::optimizeCallInst(CallInst *CI,
bool &ModifiedDT) {
IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
if (II) {
switch (II->getIntrinsicID()) {
default:
break;
case Intrinsic::masked_load:
if (!TTI->isLegalMaskedLoad(CI->getType())) {
scalarizeMaskedLoad(CI);
ModifiedDT = true;
return true;
}
return false;
case Intrinsic::masked_store:
if (!TTI->isLegalMaskedStore(CI->getArgOperand(0)->getType())) {
scalarizeMaskedStore(CI);
ModifiedDT = true;
return true;
}
return false;
case Intrinsic::masked_gather:
if (!TTI->isLegalMaskedGather(CI->getType())) {
scalarizeMaskedGather(CI);
ModifiedDT = true;
return true;
}
return false;
case Intrinsic::masked_scatter:
if (!TTI->isLegalMaskedScatter(CI->getArgOperand(0)->getType())) {
scalarizeMaskedScatter(CI);
ModifiedDT = true;
return true;
}
return false;
}
}
return false;
}