#include "clang/AST/CharUnits.h"
#include "clang/AST/ParentMapContext.h"
#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
#include "clang/StaticAnalyzer/Checkers/Taint.h"
#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
#include "clang/StaticAnalyzer/Core/Checker.h"
#include "clang/StaticAnalyzer/Core/CheckerManager.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/APSIntType.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/raw_ostream.h"
#include <optional>
using namespace clang;
using namespace ento;
using namespace taint;
using llvm::formatv;
namespace {
static std::optional<QualType> determineElementType(const Expr *E,
const CheckerContext &C) {
const auto *ASE = dyn_cast<ArraySubscriptExpr>(E);
if (!ASE)
return std::nullopt;
const MemRegion *SubscriptBaseReg = C.getSVal(ASE->getBase()).getAsRegion();
if (!SubscriptBaseReg)
return std::nullopt;
if (isa<ElementRegion>(SubscriptBaseReg->StripCasts()))
return std::nullopt;
return ASE->getType();
}
static std::optional<int64_t>
determineElementSize(const std::optional<QualType> T, const CheckerContext &C) {
if (!T)
return std::nullopt;
return C.getASTContext().getTypeSizeInChars(*T).getQuantity();
}
class StateUpdateReporter {
const SubRegion *Reg;
const NonLoc ByteOffsetVal;
const std::optional<QualType> ElementType;
const std::optional<int64_t> ElementSize;
bool AssumedNonNegative = false;
std::optional<NonLoc> AssumedUpperBound = std::nullopt;
public:
StateUpdateReporter(const SubRegion *R, NonLoc ByteOffsVal, const Expr *E,
CheckerContext &C)
: Reg(R), ByteOffsetVal(ByteOffsVal),
ElementType(determineElementType(E, C)),
ElementSize(determineElementSize(ElementType, C)) {}
void recordNonNegativeAssumption() { AssumedNonNegative = true; }
void recordUpperBoundAssumption(NonLoc UpperBoundVal) {
AssumedUpperBound = UpperBoundVal;
}
bool assumedNonNegative() { return AssumedNonNegative; }
const NoteTag *createNoteTag(CheckerContext &C) const;
private:
std::string getMessage(PathSensitiveBugReport &BR) const;
static bool providesInformationAboutInteresting(SymbolRef Sym,
PathSensitiveBugReport &BR);
static bool providesInformationAboutInteresting(SVal SV,
PathSensitiveBugReport &BR) {
return providesInformationAboutInteresting(SV.getAsSymbol(), BR);
}
};
struct Messages {
std::string Short, Full;
};
class ArrayBoundCheckerV2 : public Checker<check::PostStmt<ArraySubscriptExpr>,
check::PostStmt<UnaryOperator>,
check::PostStmt<MemberExpr>> {
BugType BT{this, "Out-of-bound access"};
BugType TaintBT{this, "Out-of-bound access", categories::TaintedData};
void performCheck(const Expr *E, CheckerContext &C) const;
void reportOOB(CheckerContext &C, ProgramStateRef ErrorState, Messages Msgs,
NonLoc Offset, std::optional<NonLoc> Extent,
bool IsTaintBug = false) const;
static void markPartsInteresting(PathSensitiveBugReport &BR,
ProgramStateRef ErrorState, NonLoc Val,
bool MarkTaint);
static bool isFromCtypeMacro(const Stmt *S, ASTContext &AC);
static bool isIdiomaticPastTheEndPtr(const Expr *E, ProgramStateRef State,
NonLoc Offset, NonLoc Limit,
CheckerContext &C);
static bool isInAddressOf(const Stmt *S, ASTContext &AC);
public:
void checkPostStmt(const ArraySubscriptExpr *E, CheckerContext &C) const {
performCheck(E, C);
}
void checkPostStmt(const UnaryOperator *E, CheckerContext &C) const {
if (E->getOpcode() == UO_Deref)
performCheck(E, C);
}
void checkPostStmt(const MemberExpr *E, CheckerContext &C) const {
if (E->isArrow())
performCheck(E->getBase(), C);
}
};
}
static std::optional<std::pair<const SubRegion *, NonLoc>>
computeOffset(ProgramStateRef State, SValBuilder &SVB, SVal Location) {
QualType T = SVB.getArrayIndexType();
auto EvalBinOp = [&SVB, State, T](BinaryOperatorKind Op, NonLoc L, NonLoc R) {
return SVB.evalBinOpNN(State, Op, L, R, T).getAs<NonLoc>();
};
const SubRegion *OwnerRegion = nullptr;
std::optional<NonLoc> Offset = SVB.makeZeroArrayIndex();
const ElementRegion *CurRegion =
dyn_cast_or_null<ElementRegion>(Location.getAsRegion());
while (CurRegion) {
const auto Index = CurRegion->getIndex().getAs<NonLoc>();
if (!Index)
return std::nullopt;
QualType ElemType = CurRegion->getElementType();
if (ElemType->isIncompleteType())
return std::nullopt;
NonLoc Size = SVB.makeArrayIndex(
SVB.getContext().getTypeSizeInChars(ElemType).getQuantity());
auto Delta = EvalBinOp(BO_Mul, *Index, Size);
if (!Delta)
return std::nullopt;
Offset = EvalBinOp(BO_Add, *Offset, *Delta);
if (!Offset)
return std::nullopt;
OwnerRegion = CurRegion->getSuperRegion()->getAs<SubRegion>();
CurRegion = dyn_cast_or_null<ElementRegion>(OwnerRegion);
}
if (OwnerRegion)
return std::make_pair(OwnerRegion, *Offset);
return std::nullopt;
}
static std::pair<NonLoc, nonloc::ConcreteInt>
getSimplifiedOffsets(NonLoc offset, nonloc::ConcreteInt extent,
SValBuilder &svalBuilder) {
std::optional<nonloc::SymbolVal> SymVal = offset.getAs<nonloc::SymbolVal>();
if (SymVal && SymVal->isExpression()) {
if (const SymIntExpr *SIE = dyn_cast<SymIntExpr>(SymVal->getSymbol())) {
llvm::APSInt constant =
APSIntType(extent.getValue()).convert(SIE->getRHS());
switch (SIE->getOpcode()) {
case BO_Mul:
if ((extent.getValue() % constant) != 0)
return std::pair<NonLoc, nonloc::ConcreteInt>(offset, extent);
else
return getSimplifiedOffsets(
nonloc::SymbolVal(SIE->getLHS()),
svalBuilder.makeIntVal(extent.getValue() / constant),
svalBuilder);
case BO_Add:
return getSimplifiedOffsets(
nonloc::SymbolVal(SIE->getLHS()),
svalBuilder.makeIntVal(extent.getValue() - constant), svalBuilder);
default:
break;
}
}
}
return std::pair<NonLoc, nonloc::ConcreteInt>(offset, extent);
}
static bool isNegative(SValBuilder &SVB, ProgramStateRef State, NonLoc Value) {
const llvm::APSInt *MaxV = SVB.getMaxValue(State, Value);
return MaxV && MaxV->isNegative();
}
static bool isUnsigned(SValBuilder &SVB, NonLoc Value) {
QualType T = Value.getType(SVB.getContext());
return T->isUnsignedIntegerType();
}
static std::pair<ProgramStateRef, ProgramStateRef>
compareValueToThreshold(ProgramStateRef State, NonLoc Value, NonLoc Threshold,
SValBuilder &SVB, bool CheckEquality = false) {
if (auto ConcreteThreshold = Threshold.getAs<nonloc::ConcreteInt>()) {
std::tie(Value, Threshold) = getSimplifiedOffsets(Value, *ConcreteThreshold, SVB);
}
if (isNegative(SVB, State, Value) && isUnsigned(SVB, Threshold)) {
if (CheckEquality) {
return {nullptr, State};
}
return {State, nullptr};
}
if (isUnsigned(SVB, Value) && isNegative(SVB, State, Threshold)) {
return {nullptr, State};
}
const BinaryOperatorKind OpKind = CheckEquality ? BO_EQ : BO_LT;
auto BelowThreshold =
SVB.evalBinOpNN(State, OpKind, Value, Threshold, SVB.getConditionType())
.getAs<NonLoc>();
if (BelowThreshold)
return State->assume(*BelowThreshold);
return {nullptr, nullptr};
}
static std::string getRegionName(const SubRegion *Region) {
if (std::string RegName = Region->getDescriptiveName(); !RegName.empty())
return RegName;
if (const auto *FR = Region->getAs<FieldRegion>()) {
if (StringRef Name = FR->getDecl()->getName(); !Name.empty())
return formatv("the field '{0}'", Name);
return "the unnamed field";
}
if (isa<AllocaRegion>(Region))
return "the memory returned by 'alloca'";
if (isa<SymbolicRegion>(Region) &&
isa<HeapSpaceRegion>(Region->getMemorySpace()))
return "the heap area";
if (isa<StringRegion>(Region))
return "the string literal";
return "the region";
}
static std::optional<int64_t> getConcreteValue(NonLoc SV) {
if (auto ConcreteVal = SV.getAs<nonloc::ConcreteInt>()) {
return ConcreteVal->getValue().tryExtValue();
}
return std::nullopt;
}
static std::optional<int64_t> getConcreteValue(std::optional<NonLoc> SV) {
return SV ? getConcreteValue(*SV) : std::nullopt;
}
static Messages getPrecedesMsgs(const SubRegion *Region, NonLoc Offset) {
std::string RegName = getRegionName(Region), OffsetStr = "";
if (auto ConcreteOffset = getConcreteValue(Offset))
OffsetStr = formatv(" {0}", ConcreteOffset);
return {
formatv("Out of bound access to memory preceding {0}", RegName),
formatv("Access of {0} at negative byte offset{1}", RegName, OffsetStr)};
}
static bool tryDividePair(std::optional<int64_t> &Val1,
std::optional<int64_t> &Val2, int64_t Divisor) {
if (!Divisor)
return false;
const bool Val1HasRemainder = Val1 && *Val1 % Divisor;
const bool Val2HasRemainder = Val2 && *Val2 % Divisor;
if (!Val1HasRemainder && !Val2HasRemainder) {
if (Val1)
*Val1 /= Divisor;
if (Val2)
*Val2 /= Divisor;
return true;
}
return false;
}
static Messages getExceedsMsgs(ASTContext &ACtx, const SubRegion *Region,
NonLoc Offset, NonLoc Extent, SVal Location,
bool AlsoMentionUnderflow) {
std::string RegName = getRegionName(Region);
const auto *EReg = Location.getAsRegion()->getAs<ElementRegion>();
assert(EReg && "this checker only handles element access");
QualType ElemType = EReg->getElementType();
std::optional<int64_t> OffsetN = getConcreteValue(Offset);
std::optional<int64_t> ExtentN = getConcreteValue(Extent);
int64_t ElemSize = ACtx.getTypeSizeInChars(ElemType).getQuantity();
bool UseByteOffsets = !tryDividePair(OffsetN, ExtentN, ElemSize);
const char *OffsetOrIndex = UseByteOffsets ? "byte offset" : "index";
SmallString<256> Buf;
llvm::raw_svector_ostream Out(Buf);
Out << "Access of ";
if (!ExtentN && !UseByteOffsets)
Out << "'" << ElemType.getAsString() << "' element in ";
Out << RegName << " at ";
if (AlsoMentionUnderflow) {
Out << "a negative or overflowing " << OffsetOrIndex;
} else if (OffsetN) {
Out << OffsetOrIndex << " " << *OffsetN;
} else {
Out << "an overflowing " << OffsetOrIndex;
}
if (ExtentN) {
Out << ", while it holds only ";
if (*ExtentN != 1)
Out << *ExtentN;
else
Out << "a single";
if (UseByteOffsets)
Out << " byte";
else
Out << " '" << ElemType.getAsString() << "' element";
if (*ExtentN > 1)
Out << "s";
}
return {formatv("Out of bound access to memory {0} {1}",
AlsoMentionUnderflow ? "around" : "after the end of",
RegName),
std::string(Buf)};
}
static Messages getTaintMsgs(const SubRegion *Region, const char *OffsetName,
bool AlsoMentionUnderflow) {
std::string RegName = getRegionName(Region);
return {formatv("Potential out of bound access to {0} with tainted {1}",
RegName, OffsetName),
formatv("Access of {0} with a tainted {1} that may be {2}too large",
RegName, OffsetName,
AlsoMentionUnderflow ? "negative or " : "")};
}
const NoteTag *StateUpdateReporter::createNoteTag(CheckerContext &C) const {
if (!AssumedNonNegative && !AssumedUpperBound)
return nullptr;
return C.getNoteTag([*this](PathSensitiveBugReport &BR) -> std::string {
return getMessage(BR);
});
}
std::string StateUpdateReporter::getMessage(PathSensitiveBugReport &BR) const {
bool ShouldReportNonNegative = AssumedNonNegative;
if (!providesInformationAboutInteresting(ByteOffsetVal, BR)) {
if (AssumedUpperBound &&
providesInformationAboutInteresting(*AssumedUpperBound, BR)) {
ShouldReportNonNegative = false;
} else {
return "";
}
}
std::optional<int64_t> OffsetN = getConcreteValue(ByteOffsetVal);
std::optional<int64_t> ExtentN = getConcreteValue(AssumedUpperBound);
const bool UseIndex =
ElementSize && tryDividePair(OffsetN, ExtentN, *ElementSize);
SmallString<256> Buf;
llvm::raw_svector_ostream Out(Buf);
Out << "Assuming ";
if (UseIndex) {
Out << "index ";
if (OffsetN)
Out << "'" << OffsetN << "' ";
} else if (AssumedUpperBound) {
Out << "byte offset ";
if (OffsetN)
Out << "'" << OffsetN << "' ";
} else {
Out << "offset ";
}
Out << "is";
if (ShouldReportNonNegative) {
Out << " non-negative";
}
if (AssumedUpperBound) {
if (ShouldReportNonNegative)
Out << " and";
Out << " less than ";
if (ExtentN)
Out << *ExtentN << ", ";
if (UseIndex && ElementType)
Out << "the number of '" << ElementType->getAsString()
<< "' elements in ";
else
Out << "the extent of ";
Out << getRegionName(Reg);
}
return std::string(Out.str());
}
bool StateUpdateReporter::providesInformationAboutInteresting(
SymbolRef Sym, PathSensitiveBugReport &BR) {
if (!Sym)
return false;
for (SymbolRef PartSym : Sym->symbols()) {
if (BR.isInteresting(PartSym))
return true;
if (isa<SymSymExpr>(PartSym))
return false;
}
return false;
}
void ArrayBoundCheckerV2::performCheck(const Expr *E, CheckerContext &C) const {
const SVal Location = C.getSVal(E);
if (isFromCtypeMacro(E, C.getASTContext()))
return;
ProgramStateRef State = C.getState();
SValBuilder &SVB = C.getSValBuilder();
const std::optional<std::pair<const SubRegion *, NonLoc>> &RawOffset =
computeOffset(State, SVB, Location);
if (!RawOffset)
return;
auto [Reg, ByteOffset] = *RawOffset;
StateUpdateReporter SUR(Reg, ByteOffset, E, C);
const MemSpaceRegion *Space = Reg->getMemorySpace();
if (!(isa<SymbolicRegion>(Reg) && isa<UnknownSpaceRegion>(Space))) {
auto [PrecedesLowerBound, WithinLowerBound] = compareValueToThreshold(
State, ByteOffset, SVB.makeZeroArrayIndex(), SVB);
if (PrecedesLowerBound) {
if (!WithinLowerBound) {
Messages Msgs = getPrecedesMsgs(Reg, ByteOffset);
reportOOB(C, PrecedesLowerBound, Msgs, ByteOffset, std::nullopt);
return;
}
SUR.recordNonNegativeAssumption();
}
if (WithinLowerBound)
State = WithinLowerBound;
}
DefinedOrUnknownSVal Size = getDynamicExtent(State, Reg, SVB);
if (auto KnownSize = Size.getAs<NonLoc>()) {
bool AlsoMentionUnderflow = SUR.assumedNonNegative();
auto [WithinUpperBound, ExceedsUpperBound] =
compareValueToThreshold(State, ByteOffset, *KnownSize, SVB);
if (ExceedsUpperBound) {
if (!WithinUpperBound) {
if (isIdiomaticPastTheEndPtr(E, ExceedsUpperBound, ByteOffset,
*KnownSize, C)) {
C.addTransition(ExceedsUpperBound, SUR.createNoteTag(C));
return;
}
Messages Msgs =
getExceedsMsgs(C.getASTContext(), Reg, ByteOffset, *KnownSize,
Location, AlsoMentionUnderflow);
reportOOB(C, ExceedsUpperBound, Msgs, ByteOffset, KnownSize);
return;
}
if (isTainted(State, ByteOffset)) {
const char *OffsetName = "offset";
if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(E))
if (isTainted(State, ASE->getIdx(), C.getLocationContext()))
OffsetName = "index";
Messages Msgs = getTaintMsgs(Reg, OffsetName, AlsoMentionUnderflow);
reportOOB(C, ExceedsUpperBound, Msgs, ByteOffset, KnownSize,
true);
return;
}
SUR.recordUpperBoundAssumption(*KnownSize);
}
if (WithinUpperBound)
State = WithinUpperBound;
}
C.addTransition(State, SUR.createNoteTag(C));
}
void ArrayBoundCheckerV2::markPartsInteresting(PathSensitiveBugReport &BR,
ProgramStateRef ErrorState,
NonLoc Val, bool MarkTaint) {
if (SymbolRef Sym = Val.getAsSymbol()) {
for (SymbolRef PartSym : Sym->symbols())
BR.markInteresting(PartSym);
}
if (MarkTaint) {
for (SymbolRef Sym : getTaintedSymbols(ErrorState, Val))
BR.markInteresting(Sym);
}
}
void ArrayBoundCheckerV2::reportOOB(CheckerContext &C,
ProgramStateRef ErrorState, Messages Msgs,
NonLoc Offset, std::optional<NonLoc> Extent,
bool IsTaintBug ) const {
ExplodedNode *ErrorNode = C.generateErrorNode(ErrorState);
if (!ErrorNode)
return;
auto BR = std::make_unique<PathSensitiveBugReport>(
IsTaintBug ? TaintBT : BT, Msgs.Short, Msgs.Full, ErrorNode);
markPartsInteresting(*BR, ErrorState, Offset, IsTaintBug);
if (Extent)
markPartsInteresting(*BR, ErrorState, *Extent, IsTaintBug);
C.emitReport(std::move(BR));
}
bool ArrayBoundCheckerV2::isFromCtypeMacro(const Stmt *S, ASTContext &ACtx) {
SourceLocation Loc = S->getBeginLoc();
if (!Loc.isMacroID())
return false;
StringRef MacroName = Lexer::getImmediateMacroName(
Loc, ACtx.getSourceManager(), ACtx.getLangOpts());
if (MacroName.size() < 7 || MacroName[0] != 'i' || MacroName[1] != 's')
return false;
return ((MacroName == "isalnum") || (MacroName == "isalpha") ||
(MacroName == "isblank") || (MacroName == "isdigit") ||
(MacroName == "isgraph") || (MacroName == "islower") ||
(MacroName == "isnctrl") || (MacroName == "isprint") ||
(MacroName == "ispunct") || (MacroName == "isspace") ||
(MacroName == "isupper") || (MacroName == "isxdigit"));
}
bool ArrayBoundCheckerV2::isInAddressOf(const Stmt *S, ASTContext &ACtx) {
ParentMapContext &ParentCtx = ACtx.getParentMapContext();
do {
const DynTypedNodeList Parents = ParentCtx.getParents(*S);
if (Parents.empty())
return false;
S = Parents[0].get<Stmt>();
} while (isa_and_nonnull<ParenExpr, ImplicitCastExpr>(S));
const auto *UnaryOp = dyn_cast_or_null<UnaryOperator>(S);
return UnaryOp && UnaryOp->getOpcode() == UO_AddrOf;
}
bool ArrayBoundCheckerV2::isIdiomaticPastTheEndPtr(const Expr *E,
ProgramStateRef State,
NonLoc Offset, NonLoc Limit,
CheckerContext &C) {
if (isa<ArraySubscriptExpr>(E) && isInAddressOf(E, C.getASTContext())) {
auto [EqualsToThreshold, NotEqualToThreshold] = compareValueToThreshold(
State, Offset, Limit, C.getSValBuilder(), true);
return EqualsToThreshold && !NotEqualToThreshold;
}
return false;
}
void ento::registerArrayBoundCheckerV2(CheckerManager &mgr) {
mgr.registerChecker<ArrayBoundCheckerV2>();
}
bool ento::shouldRegisterArrayBoundCheckerV2(const CheckerManager &mgr) {
return true;
}