diff options
Diffstat (limited to 'contrib/llvm-project/clang/lib/StaticAnalyzer/Core')
23 files changed, 1674 insertions, 640 deletions
diff --git a/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/BugReporter.cpp b/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/BugReporter.cpp index d6f69ae03afe..771ed2578f6d 100644 --- a/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/BugReporter.cpp +++ b/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/BugReporter.cpp @@ -188,6 +188,9 @@ public: PathPieces &getMutablePieces() { return PD->getMutablePieces(); } bool shouldAddPathEdges() const { return Consumer->shouldAddPathEdges(); } + bool shouldAddControlNotes() const { + return Consumer->shouldAddControlNotes(); + } bool shouldGenerateDiagnostics() const { return Consumer->shouldGenerateDiagnostics(); } @@ -534,10 +537,10 @@ static void removeEdgesToDefaultInitializers(PathPieces &Pieces) { if (auto *CF = dyn_cast<PathDiagnosticControlFlowPiece>(I->get())) { const Stmt *Start = CF->getStartLocation().asStmt(); const Stmt *End = CF->getEndLocation().asStmt(); - if (Start && isa<CXXDefaultInitExpr>(Start)) { + if (isa_and_nonnull<CXXDefaultInitExpr>(Start)) { I = Pieces.erase(I); continue; - } else if (End && isa<CXXDefaultInitExpr>(End)) { + } else if (isa_and_nonnull<CXXDefaultInitExpr>(End)) { PathPieces::iterator Next = std::next(I); if (Next != E) { if (auto *NextCF = @@ -1232,8 +1235,11 @@ void PathDiagnosticBuilder::generatePathDiagnosticsForNode( } else if (auto BE = P.getAs<BlockEdge>()) { - if (!C.shouldAddPathEdges()) { + if (C.shouldAddControlNotes()) { generateMinimalDiagForBlockEdge(C, *BE); + } + + if (!C.shouldAddPathEdges()) { return; } @@ -1254,12 +1260,14 @@ void PathDiagnosticBuilder::generatePathDiagnosticsForNode( // do-while statements are explicitly excluded here auto p = std::make_shared<PathDiagnosticEventPiece>( - L, "Looping back to the head " - "of the loop"); + L, "Looping back to the head of the loop"); p->setPrunable(true); addEdgeToPath(C.getActivePath(), PrevLoc, p->getLocation()); - C.getActivePath().push_front(std::move(p)); + // We might've added a very similar control node already + if (!C.shouldAddControlNotes()) { + C.getActivePath().push_front(std::move(p)); + } if (const auto *CS = dyn_cast_or_null<CompoundStmt>(Body)) { addEdgeToPath(C.getActivePath(), PrevLoc, @@ -1300,10 +1308,13 @@ void PathDiagnosticBuilder::generatePathDiagnosticsForNode( auto PE = std::make_shared<PathDiagnosticEventPiece>(L, str); PE->setPrunable(true); addEdgeToPath(C.getActivePath(), PrevLoc, PE->getLocation()); - C.getActivePath().push_front(std::move(PE)); + + // We might've added a very similar control node already + if (!C.shouldAddControlNotes()) { + C.getActivePath().push_front(std::move(PE)); + } } - } else if (isa<BreakStmt>(Term) || isa<ContinueStmt>(Term) || - isa<GotoStmt>(Term)) { + } else if (isa<BreakStmt, ContinueStmt, GotoStmt>(Term)) { PathDiagnosticLocation L(Term, SM, C.getCurrLocationContext()); addEdgeToPath(C.getActivePath(), PrevLoc, L); } @@ -1342,9 +1353,7 @@ static const Stmt *getStmtParent(const Stmt *S, const ParentMap &PM) { if (!S) break; - if (isa<FullExpr>(S) || - isa<CXXBindTemporaryExpr>(S) || - isa<SubstNonTypeTemplateParmExpr>(S)) + if (isa<FullExpr, CXXBindTemporaryExpr, SubstNonTypeTemplateParmExpr>(S)) continue; break; @@ -1446,7 +1455,7 @@ static void addContextEdges(PathPieces &pieces, const LocationContext *LC) { break; // If the source is in the same context, we're already good. - if (llvm::find(SrcContexts, DstContext) != SrcContexts.end()) + if (llvm::is_contained(SrcContexts, DstContext)) break; // Update the subexpression node to point to the context edge. @@ -1540,9 +1549,8 @@ static void simplifySimpleBranches(PathPieces &pieces) { // We only perform this transformation for specific branch kinds. // We don't want to do this for do..while, for example. - if (!(isa<ForStmt>(s1Start) || isa<WhileStmt>(s1Start) || - isa<IfStmt>(s1Start) || isa<ObjCForCollectionStmt>(s1Start) || - isa<CXXForRangeStmt>(s1Start))) + if (!isa<ForStmt, WhileStmt, IfStmt, ObjCForCollectionStmt, + CXXForRangeStmt>(s1Start)) continue; // Is s1End the branch condition? @@ -3181,7 +3189,7 @@ findExecutedLines(const SourceManager &SM, const ExplodedNode *N) { P = N->getParentMap().getParent(RS); } - if (P && (isa<SwitchCase>(P) || isa<LabelStmt>(P))) + if (isa_and_nonnull<SwitchCase, LabelStmt>(P)) populateExecutedLinesWithStmt(P, SM, *ExecutedLines); } diff --git a/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp b/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp index d06a2d493303..8774dc3323ab 100644 --- a/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp +++ b/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp @@ -344,45 +344,178 @@ BugReporterVisitor::getDefaultEndPath(const BugReporterContext &BRC, } //===----------------------------------------------------------------------===// +// Implementation of NoStateChangeFuncVisitor. +//===----------------------------------------------------------------------===// + +bool NoStateChangeFuncVisitor::isModifiedInFrame(const ExplodedNode *N) { + const LocationContext *Ctx = N->getLocationContext(); + const StackFrameContext *SCtx = Ctx->getStackFrame(); + if (!FramesModifyingCalculated.count(SCtx)) + findModifyingFrames(N); + return FramesModifying.count(SCtx); +} + +void NoStateChangeFuncVisitor::markFrameAsModifying( + const StackFrameContext *SCtx) { + while (!SCtx->inTopFrame()) { + auto p = FramesModifying.insert(SCtx); + if (!p.second) + break; // Frame and all its parents already inserted. + + SCtx = SCtx->getParent()->getStackFrame(); + } +} + +static const ExplodedNode *getMatchingCallExitEnd(const ExplodedNode *N) { + assert(N->getLocationAs<CallEnter>()); + // The stackframe of the callee is only found in the nodes succeeding + // the CallEnter node. CallEnter's stack frame refers to the caller. + const StackFrameContext *OrigSCtx = N->getFirstSucc()->getStackFrame(); + + // Similarly, the nodes preceding CallExitEnd refer to the callee's stack + // frame. + auto IsMatchingCallExitEnd = [OrigSCtx](const ExplodedNode *N) { + return N->getLocationAs<CallExitEnd>() && + OrigSCtx == N->getFirstPred()->getStackFrame(); + }; + while (N && !IsMatchingCallExitEnd(N)) { + assert(N->succ_size() <= 1 && + "This function is to be used on the trimmed ExplodedGraph!"); + N = N->getFirstSucc(); + } + return N; +} + +void NoStateChangeFuncVisitor::findModifyingFrames( + const ExplodedNode *const CallExitBeginN) { + + assert(CallExitBeginN->getLocationAs<CallExitBegin>()); + + const StackFrameContext *const OriginalSCtx = + CallExitBeginN->getLocationContext()->getStackFrame(); + + const ExplodedNode *CurrCallExitBeginN = CallExitBeginN; + const StackFrameContext *CurrentSCtx = OriginalSCtx; + + for (const ExplodedNode *CurrN = CallExitBeginN; CurrN; + CurrN = CurrN->getFirstPred()) { + // Found a new inlined call. + if (CurrN->getLocationAs<CallExitBegin>()) { + CurrCallExitBeginN = CurrN; + CurrentSCtx = CurrN->getStackFrame(); + FramesModifyingCalculated.insert(CurrentSCtx); + // We won't see a change in between two identical exploded nodes: skip. + continue; + } + + if (auto CE = CurrN->getLocationAs<CallEnter>()) { + if (const ExplodedNode *CallExitEndN = getMatchingCallExitEnd(CurrN)) + if (wasModifiedInFunction(CurrN, CallExitEndN)) + markFrameAsModifying(CurrentSCtx); + + // We exited this inlined call, lets actualize the stack frame. + CurrentSCtx = CurrN->getStackFrame(); + + // Stop calculating at the current function, but always regard it as + // modifying, so we can avoid notes like this: + // void f(Foo &F) { + // F.field = 0; // note: 0 assigned to 'F.field' + // // note: returning without writing to 'F.field' + // } + if (CE->getCalleeContext() == OriginalSCtx) { + markFrameAsModifying(CurrentSCtx); + break; + } + } + + if (wasModifiedBeforeCallExit(CurrN, CurrCallExitBeginN)) + markFrameAsModifying(CurrentSCtx); + } +} + +PathDiagnosticPieceRef NoStateChangeFuncVisitor::VisitNode( + const ExplodedNode *N, BugReporterContext &BR, PathSensitiveBugReport &R) { + + const LocationContext *Ctx = N->getLocationContext(); + const StackFrameContext *SCtx = Ctx->getStackFrame(); + ProgramStateRef State = N->getState(); + auto CallExitLoc = N->getLocationAs<CallExitBegin>(); + + // No diagnostic if region was modified inside the frame. + if (!CallExitLoc || isModifiedInFrame(N)) + return nullptr; + + CallEventRef<> Call = + BR.getStateManager().getCallEventManager().getCaller(SCtx, State); + + // Optimistically suppress uninitialized value bugs that result + // from system headers having a chance to initialize the value + // but failing to do so. It's too unlikely a system header's fault. + // It's much more likely a situation in which the function has a failure + // mode that the user decided not to check. If we want to hunt such + // omitted checks, we should provide an explicit function-specific note + // describing the precondition under which the function isn't supposed to + // initialize its out-parameter, and additionally check that such + // precondition can actually be fulfilled on the current path. + if (Call->isInSystemHeader()) { + // We make an exception for system header functions that have no branches. + // Such functions unconditionally fail to initialize the variable. + // If they call other functions that have more paths within them, + // this suppression would still apply when we visit these inner functions. + // One common example of a standard function that doesn't ever initialize + // its out parameter is operator placement new; it's up to the follow-up + // constructor (if any) to initialize the memory. + if (!N->getStackFrame()->getCFG()->isLinear()) { + static int i = 0; + R.markInvalid(&i, nullptr); + } + return nullptr; + } + + if (const auto *MC = dyn_cast<ObjCMethodCall>(Call)) { + // If we failed to construct a piece for self, we still want to check + // whether the entity of interest is in a parameter. + if (PathDiagnosticPieceRef Piece = maybeEmitNoteForObjCSelf(R, *MC, N)) + return Piece; + } + + if (const auto *CCall = dyn_cast<CXXConstructorCall>(Call)) { + // Do not generate diagnostics for not modified parameters in + // constructors. + return maybeEmitNoteForCXXThis(R, *CCall, N); + } + + return maybeEmitNoteForParameters(R, *Call, N); +} + +//===----------------------------------------------------------------------===// // Implementation of NoStoreFuncVisitor. //===----------------------------------------------------------------------===// namespace { - /// Put a diagnostic on return statement of all inlined functions /// for which the region of interest \p RegionOfInterest was passed into, /// but not written inside, and it has caused an undefined read or a null /// pointer dereference outside. -class NoStoreFuncVisitor final : public BugReporterVisitor { +class NoStoreFuncVisitor final : public NoStateChangeFuncVisitor { const SubRegion *RegionOfInterest; MemRegionManager &MmrMgr; const SourceManager &SM; const PrintingPolicy &PP; - bugreporter::TrackingKind TKind; /// Recursion limit for dereferencing fields when looking for the /// region of interest. /// The limit of two indicates that we will dereference fields only once. static const unsigned DEREFERENCE_LIMIT = 2; - /// Frames writing into \c RegionOfInterest. - /// This visitor generates a note only if a function does not write into - /// a region of interest. This information is not immediately available - /// by looking at the node associated with the exit from the function - /// (usually the return statement). To avoid recomputing the same information - /// many times (going up the path for each node and checking whether the - /// region was written into) we instead lazily compute the - /// stack frames along the path which write into the region of interest. - llvm::SmallPtrSet<const StackFrameContext *, 32> FramesModifyingRegion; - llvm::SmallPtrSet<const StackFrameContext *, 32> FramesModifyingCalculated; - using RegionVector = SmallVector<const MemRegion *, 5>; public: NoStoreFuncVisitor(const SubRegion *R, bugreporter::TrackingKind TKind) - : RegionOfInterest(R), MmrMgr(R->getMemRegionManager()), + : NoStateChangeFuncVisitor(TKind), RegionOfInterest(R), + MmrMgr(R->getMemRegionManager()), SM(MmrMgr.getContext().getSourceManager()), - PP(MmrMgr.getContext().getPrintingPolicy()), TKind(TKind) {} + PP(MmrMgr.getContext().getPrintingPolicy()) {} void Profile(llvm::FoldingSetNodeID &ID) const override { static int Tag = 0; @@ -395,11 +528,13 @@ public: return static_cast<void *>(&Tag); } - PathDiagnosticPieceRef VisitNode(const ExplodedNode *N, - BugReporterContext &BR, - PathSensitiveBugReport &R) override; - private: + /// \return Whether \c RegionOfInterest was modified at \p CurrN compared to + /// the value it holds in \p CallExitBeginN. + virtual bool + wasModifiedBeforeCallExit(const ExplodedNode *CurrN, + const ExplodedNode *CallExitBeginN) override; + /// Attempts to find the region of interest in a given record decl, /// by either following the base classes or fields. /// Dereferences fields up to a given recursion limit. @@ -411,20 +546,21 @@ private: const MemRegion *R, const RegionVector &Vec = {}, int depth = 0); - /// Check and lazily calculate whether the region of interest is - /// modified in the stack frame to which \p N belongs. - /// The calculation is cached in FramesModifyingRegion. - bool isRegionOfInterestModifiedInFrame(const ExplodedNode *N) { - const LocationContext *Ctx = N->getLocationContext(); - const StackFrameContext *SCtx = Ctx->getStackFrame(); - if (!FramesModifyingCalculated.count(SCtx)) - findModifyingFrames(N); - return FramesModifyingRegion.count(SCtx); - } + // Region of interest corresponds to an IVar, exiting a method + // which could have written into that IVar, but did not. + virtual PathDiagnosticPieceRef + maybeEmitNoteForObjCSelf(PathSensitiveBugReport &R, + const ObjCMethodCall &Call, + const ExplodedNode *N) override final; + + virtual PathDiagnosticPieceRef + maybeEmitNoteForCXXThis(PathSensitiveBugReport &R, + const CXXConstructorCall &Call, + const ExplodedNode *N) override final; - /// Write to \c FramesModifyingRegion all stack frames along - /// the path in the current stack frame which modify \c RegionOfInterest. - void findModifyingFrames(const ExplodedNode *N); + virtual PathDiagnosticPieceRef + maybeEmitNoteForParameters(PathSensitiveBugReport &R, const CallEvent &Call, + const ExplodedNode *N) override final; /// Consume the information on the no-store stack frame in order to /// either emit a note or suppress the report enirely. @@ -436,22 +572,18 @@ private: const MemRegion *MatchedRegion, StringRef FirstElement, bool FirstIsReferenceType, unsigned IndirectionLevel); - /// Pretty-print region \p MatchedRegion to \p os. - /// \return Whether printing succeeded. - bool prettyPrintRegionName(StringRef FirstElement, bool FirstIsReferenceType, + bool prettyPrintRegionName(const RegionVector &FieldChain, const MemRegion *MatchedRegion, - const RegionVector &FieldChain, - int IndirectionLevel, + StringRef FirstElement, bool FirstIsReferenceType, + unsigned IndirectionLevel, llvm::raw_svector_ostream &os); - /// Print first item in the chain, return new separator. - static StringRef prettyPrintFirstElement(StringRef FirstElement, - bool MoreItemsExpected, - int IndirectionLevel, - llvm::raw_svector_ostream &os); + StringRef prettyPrintFirstElement(StringRef FirstElement, + bool MoreItemsExpected, + int IndirectionLevel, + llvm::raw_svector_ostream &os); }; - -} // end of anonymous namespace +} // namespace /// \return Whether the method declaration \p Parent /// syntactically has a binary operation writing into the ivar \p Ivar. @@ -486,25 +618,6 @@ static bool potentiallyWritesIntoIvar(const Decl *Parent, return false; } -/// Get parameters associated with runtime definition in order -/// to get the correct parameter name. -static ArrayRef<ParmVarDecl *> getCallParameters(CallEventRef<> Call) { - // Use runtime definition, if available. - RuntimeDefinition RD = Call->getRuntimeDefinition(); - if (const auto *FD = dyn_cast_or_null<FunctionDecl>(RD.getDecl())) - return FD->parameters(); - if (const auto *MD = dyn_cast_or_null<ObjCMethodDecl>(RD.getDecl())) - return MD->parameters(); - - return Call->parameters(); -} - -/// \return whether \p Ty points to a const type, or is a const reference. -static bool isPointerToConst(QualType Ty) { - return !Ty->getPointeeType().isNull() && - Ty->getPointeeType().getCanonicalType().isConstQualified(); -} - /// Attempts to find the region of interest in a given CXX decl, /// by either following the base classes or fields. /// Dereferences fields up to a given recursion limit. @@ -564,68 +677,66 @@ NoStoreFuncVisitor::findRegionOfInterestInRecord( } PathDiagnosticPieceRef -NoStoreFuncVisitor::VisitNode(const ExplodedNode *N, BugReporterContext &BR, - PathSensitiveBugReport &R) { - - const LocationContext *Ctx = N->getLocationContext(); - const StackFrameContext *SCtx = Ctx->getStackFrame(); - ProgramStateRef State = N->getState(); - auto CallExitLoc = N->getLocationAs<CallExitBegin>(); - - // No diagnostic if region was modified inside the frame. - if (!CallExitLoc || isRegionOfInterestModifiedInFrame(N)) - return nullptr; - - CallEventRef<> Call = - BR.getStateManager().getCallEventManager().getCaller(SCtx, State); - - // Region of interest corresponds to an IVar, exiting a method - // which could have written into that IVar, but did not. - if (const auto *MC = dyn_cast<ObjCMethodCall>(Call)) { - if (const auto *IvarR = dyn_cast<ObjCIvarRegion>(RegionOfInterest)) { - const MemRegion *SelfRegion = MC->getReceiverSVal().getAsRegion(); - if (RegionOfInterest->isSubRegionOf(SelfRegion) && - potentiallyWritesIntoIvar(Call->getRuntimeDefinition().getDecl(), - IvarR->getDecl())) - return maybeEmitNote(R, *Call, N, {}, SelfRegion, "self", - /*FirstIsReferenceType=*/false, 1); - } +NoStoreFuncVisitor::maybeEmitNoteForObjCSelf(PathSensitiveBugReport &R, + const ObjCMethodCall &Call, + const ExplodedNode *N) { + if (const auto *IvarR = dyn_cast<ObjCIvarRegion>(RegionOfInterest)) { + const MemRegion *SelfRegion = Call.getReceiverSVal().getAsRegion(); + if (RegionOfInterest->isSubRegionOf(SelfRegion) && + potentiallyWritesIntoIvar(Call.getRuntimeDefinition().getDecl(), + IvarR->getDecl())) + return maybeEmitNote(R, Call, N, {}, SelfRegion, "self", + /*FirstIsReferenceType=*/false, 1); } + return nullptr; +} - if (const auto *CCall = dyn_cast<CXXConstructorCall>(Call)) { - const MemRegion *ThisR = CCall->getCXXThisVal().getAsRegion(); - if (RegionOfInterest->isSubRegionOf(ThisR) && - !CCall->getDecl()->isImplicit()) - return maybeEmitNote(R, *Call, N, {}, ThisR, "this", - /*FirstIsReferenceType=*/false, 1); +PathDiagnosticPieceRef +NoStoreFuncVisitor::maybeEmitNoteForCXXThis(PathSensitiveBugReport &R, + const CXXConstructorCall &Call, + const ExplodedNode *N) { + const MemRegion *ThisR = Call.getCXXThisVal().getAsRegion(); + if (RegionOfInterest->isSubRegionOf(ThisR) && !Call.getDecl()->isImplicit()) + return maybeEmitNote(R, Call, N, {}, ThisR, "this", + /*FirstIsReferenceType=*/false, 1); + + // Do not generate diagnostics for not modified parameters in + // constructors. + return nullptr; +} - // Do not generate diagnostics for not modified parameters in - // constructors. - return nullptr; - } +/// \return whether \p Ty points to a const type, or is a const reference. +static bool isPointerToConst(QualType Ty) { + return !Ty->getPointeeType().isNull() && + Ty->getPointeeType().getCanonicalType().isConstQualified(); +} - ArrayRef<ParmVarDecl *> parameters = getCallParameters(Call); - for (unsigned I = 0; I < Call->getNumArgs() && I < parameters.size(); ++I) { - const ParmVarDecl *PVD = parameters[I]; - SVal V = Call->getArgSVal(I); +PathDiagnosticPieceRef NoStoreFuncVisitor::maybeEmitNoteForParameters( + PathSensitiveBugReport &R, const CallEvent &Call, const ExplodedNode *N) { + ArrayRef<ParmVarDecl *> Parameters = Call.parameters(); + for (unsigned I = 0; I < Call.getNumArgs() && I < Parameters.size(); ++I) { + const ParmVarDecl *PVD = Parameters[I]; + SVal V = Call.getArgSVal(I); bool ParamIsReferenceType = PVD->getType()->isReferenceType(); std::string ParamName = PVD->getNameAsString(); - int IndirectionLevel = 1; + unsigned IndirectionLevel = 1; QualType T = PVD->getType(); while (const MemRegion *MR = V.getAsRegion()) { if (RegionOfInterest->isSubRegionOf(MR) && !isPointerToConst(T)) - return maybeEmitNote(R, *Call, N, {}, MR, ParamName, + return maybeEmitNote(R, Call, N, {}, MR, ParamName, ParamIsReferenceType, IndirectionLevel); QualType PT = T->getPointeeType(); if (PT.isNull() || PT->isVoidType()) break; + ProgramStateRef State = N->getState(); + if (const RecordDecl *RD = PT->getAsRecordDecl()) if (Optional<RegionVector> P = findRegionOfInterestInRecord(RD, State, MR)) - return maybeEmitNote(R, *Call, N, *P, RegionOfInterest, ParamName, + return maybeEmitNote(R, Call, N, *P, RegionOfInterest, ParamName, ParamIsReferenceType, IndirectionLevel); V = State->getSVal(MR, PT); @@ -637,40 +748,11 @@ NoStoreFuncVisitor::VisitNode(const ExplodedNode *N, BugReporterContext &BR, return nullptr; } -void NoStoreFuncVisitor::findModifyingFrames(const ExplodedNode *N) { - assert(N->getLocationAs<CallExitBegin>()); - ProgramStateRef LastReturnState = N->getState(); - SVal ValueAtReturn = LastReturnState->getSVal(RegionOfInterest); - const LocationContext *Ctx = N->getLocationContext(); - const StackFrameContext *OriginalSCtx = Ctx->getStackFrame(); - - do { - ProgramStateRef State = N->getState(); - auto CallExitLoc = N->getLocationAs<CallExitBegin>(); - if (CallExitLoc) { - LastReturnState = State; - ValueAtReturn = LastReturnState->getSVal(RegionOfInterest); - } - - FramesModifyingCalculated.insert(N->getLocationContext()->getStackFrame()); - - if (wasRegionOfInterestModifiedAt(RegionOfInterest, N, ValueAtReturn)) { - const StackFrameContext *SCtx = N->getStackFrame(); - while (!SCtx->inTopFrame()) { - auto p = FramesModifyingRegion.insert(SCtx); - if (!p.second) - break; // Frame and all its parents already inserted. - SCtx = SCtx->getParent()->getStackFrame(); - } - } - - // Stop calculation at the call to the current function. - if (auto CE = N->getLocationAs<CallEnter>()) - if (CE->getCalleeContext() == OriginalSCtx) - break; - - N = N->getFirstPred(); - } while (N); +bool NoStoreFuncVisitor::wasModifiedBeforeCallExit( + const ExplodedNode *CurrN, const ExplodedNode *CallExitBeginN) { + return ::wasRegionOfInterestModifiedAt( + RegionOfInterest, CurrN, + CallExitBeginN->getState()->getSVal(RegionOfInterest)); } static llvm::StringLiteral WillBeUsedForACondition = @@ -681,27 +763,6 @@ PathDiagnosticPieceRef NoStoreFuncVisitor::maybeEmitNote( const RegionVector &FieldChain, const MemRegion *MatchedRegion, StringRef FirstElement, bool FirstIsReferenceType, unsigned IndirectionLevel) { - // Optimistically suppress uninitialized value bugs that result - // from system headers having a chance to initialize the value - // but failing to do so. It's too unlikely a system header's fault. - // It's much more likely a situation in which the function has a failure - // mode that the user decided not to check. If we want to hunt such - // omitted checks, we should provide an explicit function-specific note - // describing the precondition under which the function isn't supposed to - // initialize its out-parameter, and additionally check that such - // precondition can actually be fulfilled on the current path. - if (Call.isInSystemHeader()) { - // We make an exception for system header functions that have no branches. - // Such functions unconditionally fail to initialize the variable. - // If they call other functions that have more paths within them, - // this suppression would still apply when we visit these inner functions. - // One common example of a standard function that doesn't ever initialize - // its out parameter is operator placement new; it's up to the follow-up - // constructor (if any) to initialize the memory. - if (!N->getStackFrame()->getCFG()->isLinear()) - R.markInvalid(getTag(), nullptr); - return nullptr; - } PathDiagnosticLocation L = PathDiagnosticLocation::create(N->getLocation(), SM); @@ -717,8 +778,8 @@ PathDiagnosticPieceRef NoStoreFuncVisitor::maybeEmitNote( os << "Returning without writing to '"; // Do not generate the note if failed to pretty-print. - if (!prettyPrintRegionName(FirstElement, FirstIsReferenceType, MatchedRegion, - FieldChain, IndirectionLevel, os)) + if (!prettyPrintRegionName(FieldChain, MatchedRegion, FirstElement, + FirstIsReferenceType, IndirectionLevel, os)) return nullptr; os << "'"; @@ -727,11 +788,11 @@ PathDiagnosticPieceRef NoStoreFuncVisitor::maybeEmitNote( return std::make_shared<PathDiagnosticEventPiece>(L, os.str()); } -bool NoStoreFuncVisitor::prettyPrintRegionName(StringRef FirstElement, - bool FirstIsReferenceType, +bool NoStoreFuncVisitor::prettyPrintRegionName(const RegionVector &FieldChain, const MemRegion *MatchedRegion, - const RegionVector &FieldChain, - int IndirectionLevel, + StringRef FirstElement, + bool FirstIsReferenceType, + unsigned IndirectionLevel, llvm::raw_svector_ostream &os) { if (FirstIsReferenceType) @@ -754,7 +815,7 @@ bool NoStoreFuncVisitor::prettyPrintRegionName(StringRef FirstElement, // Just keep going up to the base region. // Element regions may appear due to casts. - if (isa<CXXBaseObjectRegion>(R) || isa<CXXTempObjectRegion>(R)) + if (isa<CXXBaseObjectRegion, CXXTempObjectRegion>(R)) continue; if (Sep.empty()) @@ -1153,7 +1214,7 @@ class StoreSiteFinder final : public TrackingBugReporterVisitor { public: /// \param V We're searching for the store where \c R received this value. /// \param R The region we're tracking. - /// \param TKind May limit the amount of notes added to the bug report. + /// \param Options Tracking behavior options. /// \param OriginSFC Only adds notes when the last store happened in a /// different stackframe to this one. Disregarded if the tracking kind /// is thorough. @@ -2674,9 +2735,8 @@ bool ConditionBRVisitor::patternMatch(const Expr *Ex, const Expr *OriginalExpr = Ex; Ex = Ex->IgnoreParenCasts(); - if (isa<GNUNullExpr>(Ex) || isa<ObjCBoolLiteralExpr>(Ex) || - isa<CXXBoolLiteralExpr>(Ex) || isa<IntegerLiteral>(Ex) || - isa<FloatingLiteral>(Ex)) { + if (isa<GNUNullExpr, ObjCBoolLiteralExpr, CXXBoolLiteralExpr, IntegerLiteral, + FloatingLiteral>(Ex)) { // Use heuristics to determine if the expression is a macro // expanding to a literal and if so, use the macro's name. SourceLocation BeginLoc = OriginalExpr->getBeginLoc(); diff --git a/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/CallDescription.cpp b/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/CallDescription.cpp new file mode 100644 index 000000000000..810fe365d021 --- /dev/null +++ b/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/CallDescription.cpp @@ -0,0 +1,146 @@ +//===- CallDescription.cpp - function/method call matching --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file This file defines a generic mechanism for matching for function and +/// method calls of C, C++, and Objective-C languages. Instances of these +/// classes are frequently used together with the CallEvent classes. +// +//===----------------------------------------------------------------------===// + +#include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Optional.h" +#include <iterator> + +using namespace llvm; +using namespace clang; + +using MaybeCount = Optional<unsigned>; + +// A constructor helper. +static MaybeCount readRequiredParams(MaybeCount RequiredArgs, + MaybeCount RequiredParams) { + if (RequiredParams) + return RequiredParams; + if (RequiredArgs) + return RequiredArgs; + return None; +} + +ento::CallDescription::CallDescription(CallDescriptionFlags Flags, + ArrayRef<const char *> QualifiedName, + MaybeCount RequiredArgs /*= None*/, + MaybeCount RequiredParams /*= None*/) + : RequiredArgs(RequiredArgs), + RequiredParams(readRequiredParams(RequiredArgs, RequiredParams)), + Flags(Flags) { + assert(!QualifiedName.empty()); + this->QualifiedName.reserve(QualifiedName.size()); + llvm::copy(QualifiedName, std::back_inserter(this->QualifiedName)); +} + +/// Construct a CallDescription with default flags. +ento::CallDescription::CallDescription(ArrayRef<const char *> QualifiedName, + MaybeCount RequiredArgs /*= None*/, + MaybeCount RequiredParams /*= None*/) + : CallDescription(CDF_None, QualifiedName, RequiredArgs, RequiredParams) {} + +bool ento::CallDescription::matches(const CallEvent &Call) const { + // FIXME: Add ObjC Message support. + if (Call.getKind() == CE_ObjCMessage) + return false; + + const auto *FD = dyn_cast_or_null<FunctionDecl>(Call.getDecl()); + if (!FD) + return false; + + if (Flags & CDF_MaybeBuiltin) { + return CheckerContext::isCLibraryFunction(FD, getFunctionName()) && + (!RequiredArgs || *RequiredArgs <= Call.getNumArgs()) && + (!RequiredParams || *RequiredParams <= Call.parameters().size()); + } + + if (!II.hasValue()) { + II = &Call.getState()->getStateManager().getContext().Idents.get( + getFunctionName()); + } + + const auto MatchNameOnly = [](const CallDescription &CD, + const NamedDecl *ND) -> bool { + DeclarationName Name = ND->getDeclName(); + if (const auto *II = Name.getAsIdentifierInfo()) + return II == CD.II.getValue(); // Fast case. + + // Fallback to the slow stringification and comparison for: + // C++ overloaded operators, constructors, destructors, etc. + // FIXME This comparison is way SLOWER than comparing pointers. + // At some point in the future, we should compare FunctionDecl pointers. + return Name.getAsString() == CD.getFunctionName(); + }; + + const auto ExactMatchArgAndParamCounts = + [](const CallEvent &Call, const CallDescription &CD) -> bool { + const bool ArgsMatch = + !CD.RequiredArgs || *CD.RequiredArgs == Call.getNumArgs(); + const bool ParamsMatch = + !CD.RequiredParams || *CD.RequiredParams == Call.parameters().size(); + return ArgsMatch && ParamsMatch; + }; + + const auto MatchQualifiedNameParts = [](const CallDescription &CD, + const Decl *D) -> bool { + const auto FindNextNamespaceOrRecord = + [](const DeclContext *Ctx) -> const DeclContext * { + while (Ctx && !isa<NamespaceDecl, RecordDecl>(Ctx)) + Ctx = Ctx->getParent(); + return Ctx; + }; + + auto QualifierPartsIt = CD.begin_qualified_name_parts(); + const auto QualifierPartsEndIt = CD.end_qualified_name_parts(); + + // Match namespace and record names. Skip unrelated names if they don't + // match. + const DeclContext *Ctx = FindNextNamespaceOrRecord(D->getDeclContext()); + for (; Ctx && QualifierPartsIt != QualifierPartsEndIt; + Ctx = FindNextNamespaceOrRecord(Ctx->getParent())) { + // If not matched just continue and try matching for the next one. + if (cast<NamedDecl>(Ctx)->getName() != *QualifierPartsIt) + continue; + ++QualifierPartsIt; + } + + // We matched if we consumed all expected qualifier segments. + return QualifierPartsIt == QualifierPartsEndIt; + }; + + // Let's start matching... + if (!ExactMatchArgAndParamCounts(Call, *this)) + return false; + + if (!MatchNameOnly(*this, FD)) + return false; + + if (!hasQualifiedNameParts()) + return true; + + return MatchQualifiedNameParts(*this, FD); +} + +ento::CallDescriptionSet::CallDescriptionSet( + std::initializer_list<CallDescription> &&List) { + Impl.LinearMap.reserve(List.size()); + for (const CallDescription &CD : List) + Impl.LinearMap.push_back({CD, /*unused*/ true}); +} + +bool ento::CallDescriptionSet::contains(const CallEvent &Call) const { + return static_cast<bool>(Impl.lookup(Call)); +} diff --git a/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/CallEvent.cpp b/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/CallEvent.cpp index 3785f498414f..764dad3e7ab4 100644 --- a/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/CallEvent.cpp +++ b/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/CallEvent.cpp @@ -36,6 +36,7 @@ #include "clang/Basic/SourceManager.h" #include "clang/Basic/Specifiers.h" #include "clang/CrossTU/CrossTranslationUnit.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h" #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" #include "clang/StaticAnalyzer/Core/PathSensitive/DynamicType.h" #include "clang/StaticAnalyzer/Core/PathSensitive/DynamicTypeInfo.h" @@ -73,26 +74,7 @@ QualType CallEvent::getResultType() const { const Expr *E = getOriginExpr(); if (!E) return Ctx.VoidTy; - assert(E); - - QualType ResultTy = E->getType(); - - // A function that returns a reference to 'int' will have a result type - // of simply 'int'. Check the origin expr's value kind to recover the - // proper type. - switch (E->getValueKind()) { - case VK_LValue: - ResultTy = Ctx.getLValueReferenceType(ResultTy); - break; - case VK_XValue: - ResultTy = Ctx.getRValueReferenceType(ResultTy); - break; - case VK_PRValue: - // No adjustment is necessary. - break; - } - - return ResultTy; + return Ctx.getReferenceQualifiedType(E); } static bool isCallback(QualType T) { @@ -321,64 +303,6 @@ ProgramPoint CallEvent::getProgramPoint(bool IsPreVisit, return PostImplicitCall(D, Loc, getLocationContext(), Tag); } -bool CallEvent::isCalled(const CallDescription &CD) const { - // FIXME: Add ObjC Message support. - if (getKind() == CE_ObjCMessage) - return false; - - const IdentifierInfo *II = getCalleeIdentifier(); - if (!II) - return false; - const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(getDecl()); - if (!FD) - return false; - - if (CD.Flags & CDF_MaybeBuiltin) { - return CheckerContext::isCLibraryFunction(FD, CD.getFunctionName()) && - (!CD.RequiredArgs || CD.RequiredArgs <= getNumArgs()) && - (!CD.RequiredParams || CD.RequiredParams <= parameters().size()); - } - - if (!CD.IsLookupDone) { - CD.IsLookupDone = true; - CD.II = &getState()->getStateManager().getContext().Idents.get( - CD.getFunctionName()); - } - - if (II != CD.II) - return false; - - // If CallDescription provides prefix names, use them to improve matching - // accuracy. - if (CD.QualifiedName.size() > 1 && FD) { - const DeclContext *Ctx = FD->getDeclContext(); - // See if we'll be able to match them all. - size_t NumUnmatched = CD.QualifiedName.size() - 1; - for (; Ctx && isa<NamedDecl>(Ctx); Ctx = Ctx->getParent()) { - if (NumUnmatched == 0) - break; - - if (const auto *ND = dyn_cast<NamespaceDecl>(Ctx)) { - if (ND->getName() == CD.QualifiedName[NumUnmatched - 1]) - --NumUnmatched; - continue; - } - - if (const auto *RD = dyn_cast<RecordDecl>(Ctx)) { - if (RD->getName() == CD.QualifiedName[NumUnmatched - 1]) - --NumUnmatched; - continue; - } - } - - if (NumUnmatched > 0) - return false; - } - - return (!CD.RequiredArgs || CD.RequiredArgs == getNumArgs()) && - (!CD.RequiredParams || CD.RequiredParams == parameters().size()); -} - SVal CallEvent::getArgSVal(unsigned Index) const { const Expr *ArgE = getArgExpr(Index); if (!ArgE) @@ -406,7 +330,6 @@ void CallEvent::dump(raw_ostream &Out) const { ASTContext &Ctx = getState()->getStateManager().getContext(); if (const Expr *E = getOriginExpr()) { E->printPretty(Out, nullptr, Ctx.getPrintingPolicy()); - Out << "\n"; return; } @@ -420,9 +343,7 @@ void CallEvent::dump(raw_ostream &Out) const { } bool CallEvent::isCallStmt(const Stmt *S) { - return isa<CallExpr>(S) || isa<ObjCMessageExpr>(S) - || isa<CXXConstructExpr>(S) - || isa<CXXNewExpr>(S); + return isa<CallExpr, ObjCMessageExpr, CXXConstructExpr, CXXNewExpr>(S); } QualType CallEvent::getDeclaredResultType(const Decl *D) { @@ -676,7 +597,7 @@ bool AnyFunctionCall::argumentsMayEscape() const { // - NSXXInsertXX, for example NSMapInsertIfAbsent, since they can // be deallocated by NSMapRemove. - if (FName.startswith("NS") && (FName.find("Insert") != StringRef::npos)) + if (FName.startswith("NS") && FName.contains("Insert")) return true; // - Many CF containers allow objects to escape through custom @@ -1058,12 +979,12 @@ const PseudoObjectExpr *ObjCMethodCall::getContainingPseudoObjectExpr() const { static const Expr * getSyntacticFromForPseudoObjectExpr(const PseudoObjectExpr *POE) { - const Expr *Syntactic = POE->getSyntacticForm(); + const Expr *Syntactic = POE->getSyntacticForm()->IgnoreParens(); // This handles the funny case of assigning to the result of a getter. // This can happen if the getter returns a non-const reference. if (const auto *BO = dyn_cast<BinaryOperator>(Syntactic)) - Syntactic = BO->getLHS(); + Syntactic = BO->getLHS()->IgnoreParens(); return Syntactic; } diff --git a/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/CheckerContext.cpp b/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/CheckerContext.cpp index 3d64ce453479..4c684c3ffd9b 100644 --- a/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/CheckerContext.cpp +++ b/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/CheckerContext.cpp @@ -38,7 +38,7 @@ StringRef CheckerContext::getCalleeName(const FunctionDecl *FunDecl) const { } StringRef CheckerContext::getDeclDescription(const Decl *D) { - if (isa<ObjCMethodDecl>(D) || isa<CXXMethodDecl>(D)) + if (isa<ObjCMethodDecl, CXXMethodDecl>(D)) return "method"; if (isa<BlockDecl>(D)) return "anonymous block"; @@ -55,7 +55,7 @@ bool CheckerContext::isCLibraryFunction(const FunctionDecl *FD, if (Name.empty()) return true; StringRef BName = FD->getASTContext().BuiltinInfo.getName(BId); - if (BName.find(Name) != StringRef::npos) + if (BName.contains(Name)) return true; } @@ -83,11 +83,10 @@ bool CheckerContext::isCLibraryFunction(const FunctionDecl *FD, if (FName.equals(Name)) return true; - if (FName.startswith("__inline") && (FName.find(Name) != StringRef::npos)) + if (FName.startswith("__inline") && FName.contains(Name)) return true; - if (FName.startswith("__") && FName.endswith("_chk") && - FName.find(Name) != StringRef::npos) + if (FName.startswith("__") && FName.endswith("_chk") && FName.contains(Name)) return true; return false; diff --git a/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/CheckerManager.cpp b/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/CheckerManager.cpp index e09399a83589..94287b7992dd 100644 --- a/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/CheckerManager.cpp +++ b/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/CheckerManager.cpp @@ -26,6 +26,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormatVariadic.h" #include <cassert> #include <vector> @@ -655,7 +656,7 @@ void CheckerManager::runCheckersForEvalCall(ExplodedNodeSet &Dst, ExprEngine &Eng, const EvalCallOptions &CallOpts) { for (auto *const Pred : Src) { - bool anyEvaluated = false; + Optional<CheckerNameRef> evaluatorChecker; ExplodedNodeSet checkDst; NodeBuilder B(Pred, checkDst, Eng.getBuilderContext()); @@ -674,10 +675,26 @@ void CheckerManager::runCheckersForEvalCall(ExplodedNodeSet &Dst, CheckerContext C(B, Eng, Pred, L); evaluated = EvalCallChecker(Call, C); } - assert(!(evaluated && anyEvaluated) - && "There are more than one checkers evaluating the call"); +#ifndef NDEBUG + if (evaluated && evaluatorChecker) { + const auto toString = [](const CallEvent &Call) -> std::string { + std::string Buf; + llvm::raw_string_ostream OS(Buf); + Call.dump(OS); + OS.flush(); + return Buf; + }; + std::string AssertionMessage = llvm::formatv( + "The '{0}' call has been already evaluated by the {1} checker, " + "while the {2} checker also tried to evaluate the same call. At " + "most one checker supposed to evaluate a call.", + toString(Call), evaluatorChecker->getName(), + EvalCallChecker.Checker->getCheckerName()); + llvm_unreachable(AssertionMessage.c_str()); + } +#endif if (evaluated) { - anyEvaluated = true; + evaluatorChecker = EvalCallChecker.Checker->getCheckerName(); Dst.insert(checkDst); #ifdef NDEBUG break; // on release don't check that no other checker also evals. @@ -686,7 +703,7 @@ void CheckerManager::runCheckersForEvalCall(ExplodedNodeSet &Dst, } // If none of the checkers evaluated the call, ask ExprEngine to handle it. - if (!anyEvaluated) { + if (!evaluatorChecker) { NodeBuilder B(Pred, Dst, Eng.getBuilderContext()); Eng.defaultEvalCall(B, Pred, Call, CallOpts); } diff --git a/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/Environment.cpp b/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/Environment.cpp index ee7474592528..64e915a09ceb 100644 --- a/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/Environment.cpp +++ b/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/Environment.cpp @@ -88,7 +88,7 @@ SVal Environment::getSVal(const EnvironmentEntry &Entry, const Stmt *S = Entry.getStmt(); assert(!isa<ObjCForCollectionStmt>(S) && "Use ExprEngine::hasMoreIteration()!"); - assert((isa<Expr>(S) || isa<ReturnStmt>(S)) && + assert((isa<Expr, ReturnStmt>(S)) && "Environment can only argue about Exprs, since only they express " "a value! Any non-expression statement stored in Environment is a " "result of a hack!"); diff --git a/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/ExplodedGraph.cpp b/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/ExplodedGraph.cpp index 635495e9bf60..294572b7dbe4 100644 --- a/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/ExplodedGraph.cpp +++ b/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/ExplodedGraph.cpp @@ -50,8 +50,7 @@ ExplodedGraph::~ExplodedGraph() = default; bool ExplodedGraph::isInterestingLValueExpr(const Expr *Ex) { if (!Ex->isLValue()) return false; - return isa<DeclRefExpr>(Ex) || isa<MemberExpr>(Ex) || - isa<ObjCIvarRefExpr>(Ex) || isa<ArraySubscriptExpr>(Ex); + return isa<DeclRefExpr, MemberExpr, ObjCIvarRefExpr, ArraySubscriptExpr>(Ex); } bool ExplodedGraph::shouldCollect(const ExplodedNode *node) { diff --git a/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp index 66332d3b848c..12b005d43c55 100644 --- a/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp +++ b/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp @@ -393,8 +393,7 @@ ProgramStateRef ExprEngine::createTemporaryRegionIfNeeded( SVal BaseReg = Reg; // Make the necessary adjustments to obtain the sub-object. - for (auto I = Adjustments.rbegin(), E = Adjustments.rend(); I != E; ++I) { - const SubobjectAdjustment &Adj = *I; + for (const SubobjectAdjustment &Adj : llvm::reverse(Adjustments)) { switch (Adj.Kind) { case SubobjectAdjustment::DerivedToBaseAdjustment: Reg = StoreMgr.evalDerivedToBase(Reg, Adj.DerivedToBase.BasePath); @@ -1297,8 +1296,10 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred, case Stmt::OMPInteropDirectiveClass: case Stmt::OMPDispatchDirectiveClass: case Stmt::OMPMaskedDirectiveClass: + case Stmt::OMPGenericLoopDirectiveClass: case Stmt::CapturedStmtClass: - case Stmt::OMPUnrollDirectiveClass: { + case Stmt::OMPUnrollDirectiveClass: + case Stmt::OMPMetaDirectiveClass: { const ExplodedNode *node = Bldr.generateSink(S, Pred, Pred->getState()); Engine.addAbortedBlock(node, currBldrCtx->getBlock()); break; @@ -1988,8 +1989,7 @@ void ExprEngine::processCFGBlockEntrance(const BlockEdge &L, if (BlockCount == AMgr.options.maxBlockVisitOnPath - 1 && AMgr.options.ShouldWidenLoops) { const Stmt *Term = nodeBuilder.getContext().getBlock()->getTerminatorStmt(); - if (!(Term && - (isa<ForStmt>(Term) || isa<WhileStmt>(Term) || isa<DoStmt>(Term)))) + if (!isa_and_nonnull<ForStmt, WhileStmt, DoStmt>(Term)) return; // Widen. const LocationContext *LCtx = Pred->getLocationContext(); @@ -2265,7 +2265,7 @@ void ExprEngine::processBranch(const Stmt *Condition, continue; } if (StTrue && StFalse) - assert(!isa<ObjCForCollectionStmt>(Condition));; + assert(!isa<ObjCForCollectionStmt>(Condition)); // Process the true branch. if (builder.isFeasible(true)) { @@ -2593,7 +2593,7 @@ void ExprEngine::VisitCommonDeclRefExpr(const Expr *Ex, const NamedDecl *D, ProgramPoint::PostLValueKind); return; } - if (isa<FieldDecl>(D) || isa<IndirectFieldDecl>(D)) { + if (isa<FieldDecl, IndirectFieldDecl>(D)) { // Delegate all work related to pointer to members to the surrounding // operator&. return; @@ -2670,7 +2670,7 @@ void ExprEngine::VisitMemberExpr(const MemberExpr *M, ExplodedNode *Pred, // Handle static member variables and enum constants accessed via // member syntax. - if (isa<VarDecl>(Member) || isa<EnumConstantDecl>(Member)) { + if (isa<VarDecl, EnumConstantDecl>(Member)) { for (const auto I : CheckedSet) VisitCommonDeclRefExpr(M, Member, I, EvalSet); } else { diff --git a/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp b/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp index 7ad3dca831ac..69d67cf9b465 100644 --- a/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp +++ b/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp @@ -550,7 +550,7 @@ void ExprEngine::VisitCompoundLiteralExpr(const CompoundLiteralExpr *CL, const Expr *Init = CL->getInitializer(); SVal V = State->getSVal(CL->getInitializer(), LCtx); - if (isa<CXXConstructExpr>(Init) || isa<CXXStdInitializerListExpr>(Init)) { + if (isa<CXXConstructExpr, CXXStdInitializerListExpr>(Init)) { // No work needed. Just pass the value up to this expression. } else { assert(isa<InitListExpr>(Init)); @@ -757,9 +757,8 @@ void ExprEngine::VisitInitListExpr(const InitListExpr *IE, return; } - for (InitListExpr::const_reverse_iterator it = IE->rbegin(), - ei = IE->rend(); it != ei; ++it) { - SVal V = state->getSVal(cast<Expr>(*it), LCtx); + for (const Stmt *S : llvm::reverse(*IE)) { + SVal V = state->getSVal(cast<Expr>(S), LCtx); vals = getBasicVals().prependSVal(V, vals); } @@ -984,8 +983,7 @@ void ExprEngine::VisitUnaryOperator(const UnaryOperator* U, ExplodedNode *Pred, if (const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(Ex)) { const ValueDecl *VD = DRE->getDecl(); - if (isa<CXXMethodDecl>(VD) || isa<FieldDecl>(VD) || - isa<IndirectFieldDecl>(VD)) { + if (isa<CXXMethodDecl, FieldDecl, IndirectFieldDecl>(VD)) { ProgramStateRef State = (*I)->getState(); const LocationContext *LCtx = (*I)->getLocationContext(); SVal SV = svalBuilder.getMemberPointer(cast<NamedDecl>(VD)); diff --git a/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp b/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp index cab65687444b..ba105f34a915 100644 --- a/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp +++ b/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp @@ -531,7 +531,7 @@ void ExprEngine::handleConstructor(const Expr *E, // FIXME: Instead of relying on the ParentMap, we should have the // trigger-statement (InitListExpr in this case) passed down from CFG or // otherwise always available during construction. - if (dyn_cast_or_null<InitListExpr>(LCtx->getParentMap().getParent(E))) { + if (isa_and_nonnull<InitListExpr>(LCtx->getParentMap().getParent(E))) { MemRegionManager &MRMgr = getSValBuilder().getRegionManager(); Target = loc::MemRegionVal(MRMgr.getCXXTempObjectRegion(E, LCtx)); CallOpts.IsCtorOrDtorWithImproperlyModeledTargetRegion = true; diff --git a/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp b/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp index 64fc32ea7554..3b847d6f0d87 100644 --- a/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp +++ b/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp @@ -27,6 +27,8 @@ #include "clang/Rewrite/Core/Rewriter.h" #include "clang/StaticAnalyzer/Core/PathDiagnosticConsumers.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Sequence.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/iterator_range.h" @@ -57,6 +59,8 @@ using namespace ento; namespace { +class ArrowMap; + class HTMLDiagnostics : public PathDiagnosticConsumer { PathDiagnosticConsumerOptions DiagOpts; std::string Directory; @@ -77,60 +81,93 @@ public: void FlushDiagnosticsImpl(std::vector<const PathDiagnostic *> &Diags, FilesMade *filesMade) override; - StringRef getName() const override { - return "HTMLDiagnostics"; - } + StringRef getName() const override { return "HTMLDiagnostics"; } bool supportsCrossFileDiagnostics() const override { return SupportsCrossFileDiagnostics; } - unsigned ProcessMacroPiece(raw_ostream &os, - const PathDiagnosticMacroPiece& P, + unsigned ProcessMacroPiece(raw_ostream &os, const PathDiagnosticMacroPiece &P, unsigned num); + unsigned ProcessControlFlowPiece(Rewriter &R, FileID BugFileID, + const PathDiagnosticControlFlowPiece &P, + unsigned Number); + void HandlePiece(Rewriter &R, FileID BugFileID, const PathDiagnosticPiece &P, const std::vector<SourceRange> &PopUpRanges, unsigned num, unsigned max); - void HighlightRange(Rewriter& R, FileID BugFileID, SourceRange Range, + void HighlightRange(Rewriter &R, FileID BugFileID, SourceRange Range, const char *HighlightStart = "<span class=\"mrange\">", const char *HighlightEnd = "</span>"); - void ReportDiag(const PathDiagnostic& D, - FilesMade *filesMade); + void ReportDiag(const PathDiagnostic &D, FilesMade *filesMade); // Generate the full HTML report - std::string GenerateHTML(const PathDiagnostic& D, Rewriter &R, - const SourceManager& SMgr, const PathPieces& path, + std::string GenerateHTML(const PathDiagnostic &D, Rewriter &R, + const SourceManager &SMgr, const PathPieces &path, const char *declName); // Add HTML header/footers to file specified by FID - void FinalizeHTML(const PathDiagnostic& D, Rewriter &R, - const SourceManager& SMgr, const PathPieces& path, + void FinalizeHTML(const PathDiagnostic &D, Rewriter &R, + const SourceManager &SMgr, const PathPieces &path, FileID FID, const FileEntry *Entry, const char *declName); // Rewrite the file specified by FID with HTML formatting. - void RewriteFile(Rewriter &R, const PathPieces& path, FileID FID); + void RewriteFile(Rewriter &R, const PathPieces &path, FileID FID); + PathGenerationScheme getGenerationScheme() const override { + return Everything; + } private: + void addArrowSVGs(Rewriter &R, FileID BugFileID, + const ArrowMap &ArrowIndices); + /// \return Javascript for displaying shortcuts help; StringRef showHelpJavascript(); /// \return Javascript for navigating the HTML report using j/k keys. StringRef generateKeyboardNavigationJavascript(); + /// \return Javascript for drawing control-flow arrows. + StringRef generateArrowDrawingJavascript(); + /// \return JavaScript for an option to only show relevant lines. - std::string showRelevantLinesJavascript( - const PathDiagnostic &D, const PathPieces &path); + std::string showRelevantLinesJavascript(const PathDiagnostic &D, + const PathPieces &path); /// Write executed lines from \p D in JSON format into \p os. - void dumpCoverageData(const PathDiagnostic &D, - const PathPieces &path, + void dumpCoverageData(const PathDiagnostic &D, const PathPieces &path, llvm::raw_string_ostream &os); }; +bool isArrowPiece(const PathDiagnosticPiece &P) { + return isa<PathDiagnosticControlFlowPiece>(P) && P.getString().empty(); +} + +unsigned getPathSizeWithoutArrows(const PathPieces &Path) { + unsigned TotalPieces = Path.size(); + unsigned TotalArrowPieces = llvm::count_if( + Path, [](const PathDiagnosticPieceRef &P) { return isArrowPiece(*P); }); + return TotalPieces - TotalArrowPieces; +} + +class ArrowMap : public std::vector<unsigned> { + using Base = std::vector<unsigned>; + +public: + ArrowMap(unsigned Size) : Base(Size, 0) {} + unsigned getTotalNumberOfArrows() const { return at(0); } +}; + +llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const ArrowMap &Indices) { + OS << "[ "; + llvm::interleave(Indices, OS, ","); + return OS << " ]"; +} + } // namespace void ento::createHTMLDiagnosticConsumer( @@ -208,6 +245,18 @@ void HTMLDiagnostics::FlushDiagnosticsImpl( ReportDiag(*Diag, filesMade); } +static llvm::SmallString<32> getIssueHash(const PathDiagnostic &D, + const Preprocessor &PP) { + SourceManager &SMgr = PP.getSourceManager(); + PathDiagnosticLocation UPDLoc = D.getUniqueingLoc(); + FullSourceLoc L(SMgr.getExpansionLoc(UPDLoc.isValid() + ? UPDLoc.asLocation() + : D.getLocation().asLocation()), + SMgr); + return getIssueHash(L, D.getCheckerName(), D.getBugType(), + D.getDeclWithIssue(), PP.getLangOpts()); +} + void HTMLDiagnostics::ReportDiag(const PathDiagnostic& D, FilesMade *filesMade) { // Create the HTML directory if it is missing. @@ -234,11 +283,6 @@ void HTMLDiagnostics::ReportDiag(const PathDiagnostic& D, // Create a new rewriter to generate HTML. Rewriter R(const_cast<SourceManager&>(SMgr), PP.getLangOpts()); - // The file for the first path element is considered the main report file, it - // will usually be equivalent to SMgr.getMainFileID(); however, it might be a - // header when -analyzer-opt-analyze-headers is used. - FileID ReportFile = path.front()->getLocation().asLocation().getExpansionLoc().getFileID(); - // Get the function/method name SmallString<128> declName("unknown"); int offsetDecl = 0; @@ -265,46 +309,52 @@ void HTMLDiagnostics::ReportDiag(const PathDiagnostic& D, // Create a path for the target HTML file. int FD; - SmallString<128> Model, ResultPath; - - if (!DiagOpts.ShouldWriteStableReportFilename) { - llvm::sys::path::append(Model, Directory, "report-%%%%%%.html"); - if (std::error_code EC = - llvm::sys::fs::make_absolute(Model)) { - llvm::errs() << "warning: could not make '" << Model - << "' absolute: " << EC.message() << '\n'; - return; - } - if (std::error_code EC = llvm::sys::fs::createUniqueFile( - Model, FD, ResultPath, llvm::sys::fs::OF_Text)) { - llvm::errs() << "warning: could not create file in '" << Directory - << "': " << EC.message() << '\n'; - return; - } - } else { - int i = 1; - std::error_code EC; - do { - // Find a filename which is not already used - const FileEntry* Entry = SMgr.getFileEntryForID(ReportFile); - std::stringstream filename; - Model = ""; - filename << "report-" - << llvm::sys::path::filename(Entry->getName()).str() - << "-" << declName.c_str() - << "-" << offsetDecl - << "-" << i << ".html"; - llvm::sys::path::append(Model, Directory, - filename.str()); - EC = llvm::sys::fs::openFileForReadWrite( - Model, FD, llvm::sys::fs::CD_CreateNew, llvm::sys::fs::OF_None); - if (EC && EC != llvm::errc::file_exists) { - llvm::errs() << "warning: could not create file '" << Model - << "': " << EC.message() << '\n'; - return; - } - i++; - } while (EC); + + SmallString<128> FileNameStr; + llvm::raw_svector_ostream FileName(FileNameStr); + FileName << "report-"; + + // Historically, neither the stable report filename nor the unstable report + // filename were actually stable. That said, the stable report filename + // was more stable because it was mostly composed of information + // about the bug report instead of being completely random. + // Now both stable and unstable report filenames are in fact stable + // but the stable report filename is still more verbose. + if (DiagOpts.ShouldWriteVerboseReportFilename) { + // FIXME: This code relies on knowing what constitutes the issue hash. + // Otherwise deduplication won't work correctly. + FileID ReportFile = + path.back()->getLocation().asLocation().getExpansionLoc().getFileID(); + + const FileEntry *Entry = SMgr.getFileEntryForID(ReportFile); + + FileName << llvm::sys::path::filename(Entry->getName()).str() << "-" + << declName.c_str() << "-" << offsetDecl << "-"; + } + + FileName << StringRef(getIssueHash(D, PP)).substr(0, 6).str() << ".html"; + + SmallString<128> ResultPath; + llvm::sys::path::append(ResultPath, Directory, FileName.str()); + if (std::error_code EC = llvm::sys::fs::make_absolute(ResultPath)) { + llvm::errs() << "warning: could not make '" << ResultPath + << "' absolute: " << EC.message() << '\n'; + return; + } + + if (std::error_code EC = llvm::sys::fs::openFileForReadWrite( + ResultPath, FD, llvm::sys::fs::CD_CreateNew, + llvm::sys::fs::OF_Text)) { + // Existence of the file corresponds to the situation where a different + // Clang instance has emitted a bug report with the same issue hash. + // This is an entirely normal situation that does not deserve a warning, + // as apart from hash collisions this can happen because the reports + // are in fact similar enough to be considered duplicates of each other. + if (EC != llvm::errc::file_exists) { + llvm::errs() << "warning: could not create file in '" << Directory + << "': " << EC.message() << '\n'; + } + return; } llvm::raw_fd_ostream os(FD, true); @@ -452,10 +502,11 @@ window.addEventListener("keydown", function (event) { if (event.defaultPrevented) { return; } - if (event.key == "S") { + // SHIFT + S + if (event.shiftKey && event.keyCode == 83) { var checked = document.getElementsByName("showCounterexample")[0].checked; filterCounterexample(!checked); - document.getElementsByName("showCounterexample")[0].checked = !checked; + document.getElementsByName("showCounterexample")[0].click(); } else { return; } @@ -475,6 +526,11 @@ document.addEventListener("DOMContentLoaded", function() { <label for="showCounterexample"> Show only relevant lines </label> + <input type="checkbox" name="showArrows" + id="showArrows" style="margin-left: 10px" /> + <label for="showArrows"> + Show control flow arrows + </label> </form> )<<<"; @@ -503,6 +559,9 @@ void HTMLDiagnostics::FinalizeHTML(const PathDiagnostic& D, Rewriter &R, R.InsertTextBefore(SMgr.getLocForStartOfFile(FID), generateKeyboardNavigationJavascript()); + R.InsertTextBefore(SMgr.getLocForStartOfFile(FID), + generateArrowDrawingJavascript()); + // Checkbox and javascript for filtering the output to the counterexample. R.InsertTextBefore(SMgr.getLocForStartOfFile(FID), showRelevantLinesJavascript(D, path)); @@ -570,6 +629,7 @@ void HTMLDiagnostics::FinalizeHTML(const PathDiagnostic& D, Rewriter &R, <a href="#" onclick="toggleHelp(); return false;">Close</a> </div> )<<<"; + R.InsertTextBefore(SMgr.getLocForStartOfFile(FID), os.str()); } @@ -591,7 +651,6 @@ void HTMLDiagnostics::FinalizeHTML(const PathDiagnostic& D, Rewriter &R, ? UPDLoc.asLocation() : D.getLocation().asLocation()), SMgr); - const Decl *DeclWithIssue = D.getDeclWithIssue(); StringRef BugCategory = D.getCategory(); if (!BugCategory.empty()) @@ -603,9 +662,7 @@ void HTMLDiagnostics::FinalizeHTML(const PathDiagnostic& D, Rewriter &R, os << "\n<!-- FUNCTIONNAME " << declName << " -->\n"; - os << "\n<!-- ISSUEHASHCONTENTOFLINEINCONTEXT " - << getIssueHash(L, D.getCheckerName(), D.getBugType(), DeclWithIssue, - PP.getLangOpts()) + os << "\n<!-- ISSUEHASHCONTENTOFLINEINCONTEXT " << getIssueHash(D, PP) << " -->\n"; os << "\n<!-- BUGLINE " @@ -616,7 +673,7 @@ void HTMLDiagnostics::FinalizeHTML(const PathDiagnostic& D, Rewriter &R, << ColumnNumber << " -->\n"; - os << "\n<!-- BUGPATHLENGTH " << path.size() << " -->\n"; + os << "\n<!-- BUGPATHLENGTH " << getPathSizeWithoutArrows(path) << " -->\n"; // Mark the end of the tags. os << "\n<!-- BUGMETAEND -->\n"; @@ -695,8 +752,7 @@ static void HandlePopUpPieceEndTag(Rewriter &R, Out << "</div></td><td>" << Piece.getString() << "</td></tr>"; // If no report made at this range mark the variable and add the end tags. - if (std::find(PopUpRanges.begin(), PopUpRanges.end(), Range) == - PopUpRanges.end()) { + if (!llvm::is_contained(PopUpRanges, Range)) { // Store that we create a report at this range. PopUpRanges.push_back(Range); @@ -711,30 +767,33 @@ static void HandlePopUpPieceEndTag(Rewriter &R, } } -void HTMLDiagnostics::RewriteFile(Rewriter &R, - const PathPieces& path, FileID FID) { +void HTMLDiagnostics::RewriteFile(Rewriter &R, const PathPieces &path, + FileID FID) { + // Process the path. // Maintain the counts of extra note pieces separately. - unsigned TotalPieces = path.size(); - unsigned TotalNotePieces = std::count_if( - path.begin(), path.end(), [](const PathDiagnosticPieceRef &p) { + unsigned TotalPieces = getPathSizeWithoutArrows(path); + unsigned TotalNotePieces = + llvm::count_if(path, [](const PathDiagnosticPieceRef &p) { return isa<PathDiagnosticNotePiece>(*p); }); - unsigned PopUpPieceCount = std::count_if( - path.begin(), path.end(), [](const PathDiagnosticPieceRef &p) { + unsigned PopUpPieceCount = + llvm::count_if(path, [](const PathDiagnosticPieceRef &p) { return isa<PathDiagnosticPopUpPiece>(*p); }); unsigned TotalRegularPieces = TotalPieces - TotalNotePieces - PopUpPieceCount; unsigned NumRegularPieces = TotalRegularPieces; unsigned NumNotePieces = TotalNotePieces; + unsigned NumberOfArrows = 0; // Stores the count of the regular piece indices. std::map<int, int> IndexMap; + ArrowMap ArrowIndices(TotalRegularPieces + 1); // Stores the different ranges where we have reported something. std::vector<SourceRange> PopUpRanges; - for (auto I = path.rbegin(), E = path.rend(); I != E; ++I) { - const auto &Piece = *I->get(); + for (const PathDiagnosticPieceRef &I : llvm::reverse(path)) { + const auto &Piece = *I.get(); if (isa<PathDiagnosticPopUpPiece>(Piece)) { ++IndexMap[NumRegularPieces]; @@ -744,18 +803,40 @@ void HTMLDiagnostics::RewriteFile(Rewriter &R, // as a separate pass through the piece list. HandlePiece(R, FID, Piece, PopUpRanges, NumNotePieces, TotalNotePieces); --NumNotePieces; + + } else if (isArrowPiece(Piece)) { + NumberOfArrows = ProcessControlFlowPiece( + R, FID, cast<PathDiagnosticControlFlowPiece>(Piece), NumberOfArrows); + ArrowIndices[NumRegularPieces] = NumberOfArrows; + } else { HandlePiece(R, FID, Piece, PopUpRanges, NumRegularPieces, TotalRegularPieces); --NumRegularPieces; + ArrowIndices[NumRegularPieces] = ArrowIndices[NumRegularPieces + 1]; } } + ArrowIndices[0] = NumberOfArrows; + + // At this point ArrowIndices represent the following data structure: + // [a_0, a_1, ..., a_N] + // where N is the number of events in the path. + // + // Then for every event with index i \in [0, N - 1], we can say that + // arrows with indices \in [a_(i+1), a_i) correspond to that event. + // We can say that because arrows with these indices appeared in the + // path in between the i-th and the (i+1)-th events. + assert(ArrowIndices.back() == 0 && + "No arrows should be after the last event"); + // This assertion also guarantees that all indices in are <= NumberOfArrows. + assert(llvm::is_sorted(ArrowIndices, std::greater<unsigned>()) && + "Incorrect arrow indices map"); // Secondary indexing if we are having multiple pop-ups between two notes. // (e.g. [(13) 'a' is 'true']; [(13.1) 'b' is 'false']; [(13.2) 'c' is...) NumRegularPieces = TotalRegularPieces; - for (auto I = path.rbegin(), E = path.rend(); I != E; ++I) { - const auto &Piece = *I->get(); + for (const PathDiagnosticPieceRef &I : llvm::reverse(path)) { + const auto &Piece = *I.get(); if (const auto *PopUpP = dyn_cast<PathDiagnosticPopUpPiece>(&Piece)) { int PopUpPieceIndex = IndexMap[NumRegularPieces]; @@ -771,7 +852,7 @@ void HTMLDiagnostics::RewriteFile(Rewriter &R, if (PopUpPieceIndex > 0) --IndexMap[NumRegularPieces]; - } else if (!isa<PathDiagnosticNotePiece>(Piece)) { + } else if (!isa<PathDiagnosticNotePiece>(Piece) && !isArrowPiece(Piece)) { --NumRegularPieces; } } @@ -783,6 +864,8 @@ void HTMLDiagnostics::RewriteFile(Rewriter &R, html::EscapeText(R, FID); html::AddLineNumbers(R, FID); + addArrowSVGs(R, FID, ArrowIndices); + // If we have a preprocessor, relex the file and syntax highlight. // We might not have a preprocessor if we come from a deserialized AST file, // for example. @@ -1007,8 +1090,7 @@ void HTMLDiagnostics::HandlePiece(Rewriter &R, FileID BugFileID, ArrayRef<SourceRange> Ranges = P.getRanges(); for (const auto &Range : Ranges) { // If we have already highlighted the range as a pop-up there is no work. - if (std::find(PopUpRanges.begin(), PopUpRanges.end(), Range) != - PopUpRanges.end()) + if (llvm::is_contained(PopUpRanges, Range)) continue; HighlightRange(R, LPosInfo.first, Range); @@ -1049,6 +1131,104 @@ unsigned HTMLDiagnostics::ProcessMacroPiece(raw_ostream &os, return num; } +void HTMLDiagnostics::addArrowSVGs(Rewriter &R, FileID BugFileID, + const ArrowMap &ArrowIndices) { + std::string S; + llvm::raw_string_ostream OS(S); + + OS << R"<<<( +<style type="text/css"> + svg { + position:absolute; + top:0; + left:0; + height:100%; + width:100%; + pointer-events: none; + overflow: visible + } + .arrow { + stroke-opacity: 0.2; + stroke-width: 1; + marker-end: url(#arrowhead); + } + + .arrow.selected { + stroke-opacity: 0.6; + stroke-width: 2; + marker-end: url(#arrowheadSelected); + } + + .arrowhead { + orient: auto; + stroke: none; + opacity: 0.6; + fill: blue; + } +</style> +<svg xmlns="http://www.w3.org/2000/svg"> + <defs> + <marker id="arrowheadSelected" class="arrowhead" opacity="0.6" + viewBox="0 0 10 10" refX="3" refY="5" + markerWidth="4" markerHeight="4"> + <path d="M 0 0 L 10 5 L 0 10 z" /> + </marker> + <marker id="arrowhead" class="arrowhead" opacity="0.2" + viewBox="0 0 10 10" refX="3" refY="5" + markerWidth="4" markerHeight="4"> + <path d="M 0 0 L 10 5 L 0 10 z" /> + </marker> + </defs> + <g id="arrows" fill="none" stroke="blue" visibility="hidden"> +)<<<"; + + for (unsigned Index : llvm::seq(0u, ArrowIndices.getTotalNumberOfArrows())) { + OS << " <path class=\"arrow\" id=\"arrow" << Index << "\"/>\n"; + } + + OS << R"<<<( + </g> +</svg> +<script type='text/javascript'> +const arrowIndices = )<<<"; + + OS << ArrowIndices << "\n</script>\n"; + + R.InsertTextBefore(R.getSourceMgr().getLocForStartOfFile(BugFileID), + OS.str()); +} + +std::string getSpanBeginForControl(const char *ClassName, unsigned Index) { + std::string Result; + llvm::raw_string_ostream OS(Result); + OS << "<span id=\"" << ClassName << Index << "\">"; + return OS.str(); +} + +std::string getSpanBeginForControlStart(unsigned Index) { + return getSpanBeginForControl("start", Index); +} + +std::string getSpanBeginForControlEnd(unsigned Index) { + return getSpanBeginForControl("end", Index); +} + +unsigned HTMLDiagnostics::ProcessControlFlowPiece( + Rewriter &R, FileID BugFileID, const PathDiagnosticControlFlowPiece &P, + unsigned Number) { + for (const PathDiagnosticLocationPair &LPair : P) { + std::string Start = getSpanBeginForControlStart(Number), + End = getSpanBeginForControlEnd(Number++); + + HighlightRange(R, BugFileID, LPair.getStart().asRange().getBegin(), + Start.c_str()); + HighlightRange(R, BugFileID, LPair.getEnd().asRange().getBegin(), + End.c_str()); + } + + return Number; +} + void HTMLDiagnostics::HighlightRange(Rewriter& R, FileID BugFileID, SourceRange Range, const char *HighlightStart, @@ -1109,7 +1289,7 @@ document.addEventListener("DOMContentLoaded", function() { }); var findNum = function() { - var s = document.querySelector(".selected"); + var s = document.querySelector(".msg.selected"); if (!s || s.id == "EndPath") { return 0; } @@ -1117,14 +1297,32 @@ var findNum = function() { return out; }; +var classListAdd = function(el, theClass) { + if(!el.className.baseVal) + el.className += " " + theClass; + else + el.className.baseVal += " " + theClass; +}; + +var classListRemove = function(el, theClass) { + var className = (!el.className.baseVal) ? + el.className : el.className.baseVal; + className = className.replace(" " + theClass, ""); + if(!el.className.baseVal) + el.className = className; + else + el.className.baseVal = className; +}; + var scrollTo = function(el) { querySelectorAllArray(".selected").forEach(function(s) { - s.classList.remove("selected"); + classListRemove(s, "selected"); }); - el.classList.add("selected"); + classListAdd(el, "selected"); window.scrollBy(0, el.getBoundingClientRect().top - (window.innerHeight / 2)); -} + highlightArrowsForSelectedEvent(); +}; var move = function(num, up, numItems) { if (num == 1 && up || num == numItems - 1 && !up) { @@ -1159,9 +1357,11 @@ window.addEventListener("keydown", function (event) { if (event.defaultPrevented) { return; } - if (event.key == "j") { + // key 'j' + if (event.keyCode == 74) { navigateTo(/*up=*/false); - } else if (event.key == "k") { + // key 'k' + } else if (event.keyCode == 75) { navigateTo(/*up=*/true); } else { return; @@ -1171,3 +1371,258 @@ window.addEventListener("keydown", function (event) { </script> )<<<"; } + +StringRef HTMLDiagnostics::generateArrowDrawingJavascript() { + return R"<<<( +<script type='text/javascript'> +// Return range of numbers from a range [lower, upper). +function range(lower, upper) { + var array = []; + for (var i = lower; i <= upper; ++i) { + array.push(i); + } + return array; +} + +var getRelatedArrowIndices = function(pathId) { + // HTML numeration of events is a bit different than it is in the path. + // Everything is rotated one step to the right, so the last element + // (error diagnostic) has index 0. + if (pathId == 0) { + // arrowIndices has at least 2 elements + pathId = arrowIndices.length - 1; + } + + return range(arrowIndices[pathId], arrowIndices[pathId - 1]); +} + +var highlightArrowsForSelectedEvent = function() { + const selectedNum = findNum(); + const arrowIndicesToHighlight = getRelatedArrowIndices(selectedNum); + arrowIndicesToHighlight.forEach((index) => { + var arrow = document.querySelector("#arrow" + index); + if(arrow) { + classListAdd(arrow, "selected") + } + }); +} + +var getAbsoluteBoundingRect = function(element) { + const relative = element.getBoundingClientRect(); + return { + left: relative.left + window.pageXOffset, + right: relative.right + window.pageXOffset, + top: relative.top + window.pageYOffset, + bottom: relative.bottom + window.pageYOffset, + height: relative.height, + width: relative.width + }; +} + +var drawArrow = function(index) { + // This function is based on the great answer from SO: + // https://stackoverflow.com/a/39575674/11582326 + var start = document.querySelector("#start" + index); + var end = document.querySelector("#end" + index); + var arrow = document.querySelector("#arrow" + index); + + var startRect = getAbsoluteBoundingRect(start); + var endRect = getAbsoluteBoundingRect(end); + + // It is an arrow from a token to itself, no need to visualize it. + if (startRect.top == endRect.top && + startRect.left == endRect.left) + return; + + // Each arrow is a very simple Bézier curve, with two nodes and + // two handles. So, we need to calculate four points in the window: + // * start node + var posStart = { x: 0, y: 0 }; + // * end node + var posEnd = { x: 0, y: 0 }; + // * handle for the start node + var startHandle = { x: 0, y: 0 }; + // * handle for the end node + var endHandle = { x: 0, y: 0 }; + // One can visualize it as follows: + // + // start handle + // / + // X"""_.-""""X + // .' \ + // / start node + // | + // | + // | end node + // \ / + // `->X + // X-' + // \ + // end handle + // + // NOTE: (0, 0) is the top left corner of the window. + + // We have 3 similar, but still different scenarios to cover: + // + // 1. Two tokens on different lines. + // -xxx + // / + // \ + // -> xxx + // In this situation, we draw arrow on the left curving to the left. + // 2. Two tokens on the same line, and the destination is on the right. + // ____ + // / \ + // / V + // xxx xxx + // In this situation, we draw arrow above curving upwards. + // 3. Two tokens on the same line, and the destination is on the left. + // xxx xxx + // ^ / + // \____/ + // In this situation, we draw arrow below curving downwards. + const onDifferentLines = startRect.top <= endRect.top - 5 || + startRect.top >= endRect.top + 5; + const leftToRight = startRect.left < endRect.left; + + // NOTE: various magic constants are chosen empirically for + // better positioning and look + if (onDifferentLines) { + // Case #1 + const topToBottom = startRect.top < endRect.top; + posStart.x = startRect.left - 1; + // We don't want to start it at the top left corner of the token, + // it doesn't feel like this is where the arrow comes from. + // For this reason, we start it in the middle of the left side + // of the token. + posStart.y = startRect.top + startRect.height / 2; + + // End node has arrow head and we give it a bit more space. + posEnd.x = endRect.left - 4; + posEnd.y = endRect.top; + + // Utility object with x and y offsets for handles. + var curvature = { + // We want bottom-to-top arrow to curve a bit more, so it doesn't + // overlap much with top-to-bottom curves (much more frequent). + x: topToBottom ? 15 : 25, + y: Math.min((posEnd.y - posStart.y) / 3, 10) + } + + // When destination is on the different line, we can make a + // curvier arrow because we have space for it. + // So, instead of using + // + // startHandle.x = posStart.x - curvature.x + // endHandle.x = posEnd.x - curvature.x + // + // We use the leftmost of these two values for both handles. + startHandle.x = Math.min(posStart.x, posEnd.x) - curvature.x; + endHandle.x = startHandle.x; + + // Curving downwards from the start node... + startHandle.y = posStart.y + curvature.y; + // ... and upwards from the end node. + endHandle.y = posEnd.y - curvature.y; + + } else if (leftToRight) { + // Case #2 + // Starting from the top right corner... + posStart.x = startRect.right - 1; + posStart.y = startRect.top; + + // ...and ending at the top left corner of the end token. + posEnd.x = endRect.left + 1; + posEnd.y = endRect.top - 1; + + // Utility object with x and y offsets for handles. + var curvature = { + x: Math.min((posEnd.x - posStart.x) / 3, 15), + y: 5 + } + + // Curving to the right... + startHandle.x = posStart.x + curvature.x; + // ... and upwards from the start node. + startHandle.y = posStart.y - curvature.y; + + // And to the left... + endHandle.x = posEnd.x - curvature.x; + // ... and upwards from the end node. + endHandle.y = posEnd.y - curvature.y; + + } else { + // Case #3 + // Starting from the bottom right corner... + posStart.x = startRect.right; + posStart.y = startRect.bottom; + + // ...and ending also at the bottom right corner, but of the end token. + posEnd.x = endRect.right - 1; + posEnd.y = endRect.bottom + 1; + + // Utility object with x and y offsets for handles. + var curvature = { + x: Math.min((posStart.x - posEnd.x) / 3, 15), + y: 5 + } + + // Curving to the left... + startHandle.x = posStart.x - curvature.x; + // ... and downwards from the start node. + startHandle.y = posStart.y + curvature.y; + + // And to the right... + endHandle.x = posEnd.x + curvature.x; + // ... and downwards from the end node. + endHandle.y = posEnd.y + curvature.y; + } + + // Put it all together into a path. + // More information on the format: + // https://developer.mozilla.org/en-US/docs/Web/SVG/Tutorial/Paths + var pathStr = "M" + posStart.x + "," + posStart.y + " " + + "C" + startHandle.x + "," + startHandle.y + " " + + endHandle.x + "," + endHandle.y + " " + + posEnd.x + "," + posEnd.y; + + arrow.setAttribute("d", pathStr); +}; + +var drawArrows = function() { + const numOfArrows = document.querySelectorAll("path[id^=arrow]").length; + for (var i = 0; i < numOfArrows; ++i) { + drawArrow(i); + } +} + +var toggleArrows = function(event) { + const arrows = document.querySelector("#arrows"); + if (event.target.checked) { + arrows.setAttribute("visibility", "visible"); + } else { + arrows.setAttribute("visibility", "hidden"); + } +} + +window.addEventListener("resize", drawArrows); +document.addEventListener("DOMContentLoaded", function() { + // Whenever we show invocation, locations change, i.e. we + // need to redraw arrows. + document + .querySelector('input[id="showinvocation"]') + .addEventListener("click", drawArrows); + // Hiding irrelevant lines also should cause arrow rerender. + document + .querySelector('input[name="showCounterexample"]') + .addEventListener("change", drawArrows); + document + .querySelector('input[name="showArrows"]') + .addEventListener("change", toggleArrows); + drawArrows(); + // Default highlighting for the last event. + highlightArrowsForSelectedEvent(); +}); +</script> + )<<<"; +} diff --git a/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/LoopUnrolling.cpp b/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/LoopUnrolling.cpp index e5f4e9ea30c9..8bf6fc085c6a 100644 --- a/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/LoopUnrolling.cpp +++ b/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/LoopUnrolling.cpp @@ -69,7 +69,7 @@ namespace clang { namespace ento { static bool isLoopStmt(const Stmt *S) { - return S && (isa<ForStmt>(S) || isa<WhileStmt>(S) || isa<DoStmt>(S)); + return isa_and_nonnull<ForStmt, WhileStmt, DoStmt>(S); } ProgramStateRef processLoopEnd(const Stmt *LoopStmt, ProgramStateRef State) { diff --git a/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/LoopWidening.cpp b/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/LoopWidening.cpp index 47e34dd84b9a..748c65f578a8 100644 --- a/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/LoopWidening.cpp +++ b/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/LoopWidening.cpp @@ -45,8 +45,7 @@ ProgramStateRef getWidenedLoopState(ProgramStateRef PrevState, const LocationContext *LCtx, unsigned BlockCount, const Stmt *LoopStmt) { - assert(isa<ForStmt>(LoopStmt) || isa<WhileStmt>(LoopStmt) || - isa<DoStmt>(LoopStmt)); + assert((isa<ForStmt, WhileStmt, DoStmt>(LoopStmt))); // Invalidate values in the current state. // TODO Make this more conservative by only invalidating values that might diff --git a/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/MemRegion.cpp b/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/MemRegion.cpp index bd725ee9eaa3..f77fcb030a15 100644 --- a/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/MemRegion.cpp +++ b/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/MemRegion.cpp @@ -28,6 +28,7 @@ #include "clang/Basic/IdentifierTable.h" #include "clang/Basic/LLVM.h" #include "clang/Basic/SourceManager.h" +#include "clang/StaticAnalyzer/Core/AnalyzerOptions.h" #include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h" #include "clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h" #include "clang/StaticAnalyzer/Core/PathSensitive/SVals.h" @@ -768,14 +769,39 @@ DefinedOrUnknownSVal MemRegionManager::getStaticSize(const MemRegion *MR, return UnknownVal(); QualType Ty = cast<TypedValueRegion>(SR)->getDesugaredValueType(Ctx); - DefinedOrUnknownSVal Size = getElementExtent(Ty, SVB); + const DefinedOrUnknownSVal Size = getElementExtent(Ty, SVB); + + // We currently don't model flexible array members (FAMs), which are: + // - int array[]; of IncompleteArrayType + // - int array[0]; of ConstantArrayType with size 0 + // - int array[1]; of ConstantArrayType with size 1 (*) + // (*): Consider single element array object members as FAM candidates only + // if the consider-single-element-arrays-as-flexible-array-members + // analyzer option is true. + // https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html + const auto isFlexibleArrayMemberCandidate = [this, + &SVB](QualType Ty) -> bool { + const ArrayType *AT = Ctx.getAsArrayType(Ty); + if (!AT) + return false; + if (isa<IncompleteArrayType>(AT)) + return true; + + if (const auto *CAT = dyn_cast<ConstantArrayType>(AT)) { + const llvm::APInt &Size = CAT->getSize(); + if (Size.isZero()) + return true; + + const AnalyzerOptions &Opts = SVB.getAnalyzerOptions(); + if (Opts.ShouldConsiderSingleElementArraysAsFlexibleArrayMembers && + Size.isOne()) + return true; + } + return false; + }; - // A zero-length array at the end of a struct often stands for dynamically - // allocated extra memory. - if (Size.isZeroConstant()) { - if (isa<ConstantArrayType>(Ty)) - return UnknownVal(); - } + if (isFlexibleArrayMemberCandidate(Ty)) + return UnknownVal(); return Size; } @@ -948,9 +974,9 @@ const VarRegion *MemRegionManager::getVarRegion(const VarDecl *D, // First handle the globals defined in system headers. if (Ctx.getSourceManager().isInSystemHeader(D->getLocation())) { - // Whitelist the system globals which often DO GET modified, assume the + // Allow the system globals which often DO GET modified, assume the // rest are immutable. - if (D->getName().find("errno") != StringRef::npos) + if (D->getName().contains("errno")) sReg = getGlobalsRegion(MemRegion::GlobalSystemSpaceRegionKind); else sReg = getGlobalsRegion(MemRegion::GlobalImmutableSpaceRegionKind); @@ -986,14 +1012,15 @@ const VarRegion *MemRegionManager::getVarRegion(const VarDecl *D, sReg = getUnknownRegion(); } else { if (D->hasLocalStorage()) { - sReg = isa<ParmVarDecl>(D) || isa<ImplicitParamDecl>(D) - ? static_cast<const MemRegion*>(getStackArgumentsRegion(STC)) - : static_cast<const MemRegion*>(getStackLocalsRegion(STC)); + sReg = + isa<ParmVarDecl, ImplicitParamDecl>(D) + ? static_cast<const MemRegion *>(getStackArgumentsRegion(STC)) + : static_cast<const MemRegion *>(getStackLocalsRegion(STC)); } else { assert(D->isStaticLocal()); const Decl *STCD = STC->getDecl(); - if (isa<FunctionDecl>(STCD) || isa<ObjCMethodDecl>(STCD)) + if (isa<FunctionDecl, ObjCMethodDecl>(STCD)) sReg = getGlobalsRegion(MemRegion::StaticGlobalSpaceRegionKind, getFunctionCodeRegion(cast<NamedDecl>(STCD))); else if (const auto *BD = dyn_cast<BlockDecl>(STCD)) { @@ -1257,9 +1284,7 @@ bool MemRegion::hasStackParametersStorage() const { } bool MemRegion::hasGlobalsOrParametersStorage() const { - const MemSpaceRegion *MS = getMemorySpace(); - return isa<StackArgumentsSpaceRegion>(MS) || - isa<GlobalsSpaceRegion>(MS); + return isa<StackArgumentsSpaceRegion, GlobalsSpaceRegion>(getMemorySpace()); } // getBaseRegion strips away all elements and fields, and get the base region diff --git a/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp b/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp index 69554576bdb2..74403a160b8e 100644 --- a/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp +++ b/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp @@ -494,15 +494,17 @@ RangeSet RangeSet::Factory::deletePoint(RangeSet From, return intersect(From, Upper, Lower); } -void Range::dump(raw_ostream &OS) const { +LLVM_DUMP_METHOD void Range::dump(raw_ostream &OS) const { OS << '[' << toString(From(), 10) << ", " << toString(To(), 10) << ']'; } +LLVM_DUMP_METHOD void Range::dump() const { dump(llvm::errs()); } -void RangeSet::dump(raw_ostream &OS) const { +LLVM_DUMP_METHOD void RangeSet::dump(raw_ostream &OS) const { OS << "{ "; llvm::interleaveComma(*this, OS, [&OS](const Range &R) { R.dump(OS); }); OS << " }"; } +LLVM_DUMP_METHOD void RangeSet::dump() const { dump(llvm::errs()); } REGISTER_SET_FACTORY_WITH_PROGRAMSTATE(SymbolSet, SymbolRef) @@ -599,6 +601,10 @@ public: LLVM_NODISCARD static inline Optional<bool> areEqual(ProgramStateRef State, SymbolRef First, SymbolRef Second); + /// Remove one member from the class. + LLVM_NODISCARD ProgramStateRef removeMember(ProgramStateRef State, + const SymbolRef Old); + /// Iterate over all symbols and try to simplify them. LLVM_NODISCARD static inline ProgramStateRef simplify(SValBuilder &SVB, RangeSet::Factory &F, @@ -655,6 +661,7 @@ private: inline ProgramStateRef mergeImpl(RangeSet::Factory &F, ProgramStateRef State, SymbolSet Members, EquivalenceClass Other, SymbolSet OtherMembers); + static inline bool addToDisequalityInfo(DisequalityMapTy &Info, ConstraintRangeTy &Constraints, RangeSet::Factory &F, ProgramStateRef State, @@ -1112,7 +1119,7 @@ private: if (!SSE) return llvm::None; - BinaryOperatorKind CurrentOP = SSE->getOpcode(); + const BinaryOperatorKind CurrentOP = SSE->getOpcode(); // We currently do not support <=> (C++20). if (!BinaryOperator::isComparisonOp(CurrentOP) || (CurrentOP == BO_Cmp)) @@ -1126,7 +1133,12 @@ private: SymbolManager &SymMgr = State->getSymbolManager(); - int UnknownStates = 0; + // We use this variable to store the last queried operator (`QueriedOP`) + // for which the `getCmpOpState` returned with `Unknown`. If there are two + // different OPs that returned `Unknown` then we have to query the special + // `UnknownX2` column. We assume that `getCmpOpState(CurrentOP, CurrentOP)` + // never returns `Unknown`, so `CurrentOP` is a good initial value. + BinaryOperatorKind LastQueriedOpToUnknown = CurrentOP; // Loop goes through all of the columns exept the last one ('UnknownX2'). // We treat `UnknownX2` column separately at the end of the loop body. @@ -1163,15 +1175,18 @@ private: CmpOpTable.getCmpOpState(CurrentOP, QueriedOP); if (BranchState == OperatorRelationsTable::Unknown) { - if (++UnknownStates == 2) - // If we met both Unknown states. + if (LastQueriedOpToUnknown != CurrentOP && + LastQueriedOpToUnknown != QueriedOP) { + // If we got the Unknown state for both different operators. // if (x <= y) // assume true // if (x != y) // assume true // if (x < y) // would be also true // Get a state from `UnknownX2` column. BranchState = CmpOpTable.getCmpOpStateForUnknownX2(CurrentOP); - else + } else { + LastQueriedOpToUnknown = QueriedOP; continue; + } } return (BranchState == OperatorRelationsTable::True) ? getTrueRange(T) @@ -1382,6 +1397,113 @@ RangeSet SymbolicRangeInferrer::VisitBinaryOperator<BO_Rem>(Range LHS, } //===----------------------------------------------------------------------===// +// Constraint manager implementation details +//===----------------------------------------------------------------------===// + +class RangeConstraintManager : public RangedConstraintManager { +public: + RangeConstraintManager(ExprEngine *EE, SValBuilder &SVB) + : RangedConstraintManager(EE, SVB), F(getBasicVals()) {} + + //===------------------------------------------------------------------===// + // Implementation for interface from ConstraintManager. + //===------------------------------------------------------------------===// + + bool haveEqualConstraints(ProgramStateRef S1, + ProgramStateRef S2) const override { + // NOTE: ClassMembers are as simple as back pointers for ClassMap, + // so comparing constraint ranges and class maps should be + // sufficient. + return S1->get<ConstraintRange>() == S2->get<ConstraintRange>() && + S1->get<ClassMap>() == S2->get<ClassMap>(); + } + + bool canReasonAbout(SVal X) const override; + + ConditionTruthVal checkNull(ProgramStateRef State, SymbolRef Sym) override; + + const llvm::APSInt *getSymVal(ProgramStateRef State, + SymbolRef Sym) const override; + + ProgramStateRef removeDeadBindings(ProgramStateRef State, + SymbolReaper &SymReaper) override; + + void printJson(raw_ostream &Out, ProgramStateRef State, const char *NL = "\n", + unsigned int Space = 0, bool IsDot = false) const override; + void printConstraints(raw_ostream &Out, ProgramStateRef State, + const char *NL = "\n", unsigned int Space = 0, + bool IsDot = false) const; + void printEquivalenceClasses(raw_ostream &Out, ProgramStateRef State, + const char *NL = "\n", unsigned int Space = 0, + bool IsDot = false) const; + void printDisequalities(raw_ostream &Out, ProgramStateRef State, + const char *NL = "\n", unsigned int Space = 0, + bool IsDot = false) const; + + //===------------------------------------------------------------------===// + // Implementation for interface from RangedConstraintManager. + //===------------------------------------------------------------------===// + + ProgramStateRef assumeSymNE(ProgramStateRef State, SymbolRef Sym, + const llvm::APSInt &V, + const llvm::APSInt &Adjustment) override; + + ProgramStateRef assumeSymEQ(ProgramStateRef State, SymbolRef Sym, + const llvm::APSInt &V, + const llvm::APSInt &Adjustment) override; + + ProgramStateRef assumeSymLT(ProgramStateRef State, SymbolRef Sym, + const llvm::APSInt &V, + const llvm::APSInt &Adjustment) override; + + ProgramStateRef assumeSymGT(ProgramStateRef State, SymbolRef Sym, + const llvm::APSInt &V, + const llvm::APSInt &Adjustment) override; + + ProgramStateRef assumeSymLE(ProgramStateRef State, SymbolRef Sym, + const llvm::APSInt &V, + const llvm::APSInt &Adjustment) override; + + ProgramStateRef assumeSymGE(ProgramStateRef State, SymbolRef Sym, + const llvm::APSInt &V, + const llvm::APSInt &Adjustment) override; + + ProgramStateRef assumeSymWithinInclusiveRange( + ProgramStateRef State, SymbolRef Sym, const llvm::APSInt &From, + const llvm::APSInt &To, const llvm::APSInt &Adjustment) override; + + ProgramStateRef assumeSymOutsideInclusiveRange( + ProgramStateRef State, SymbolRef Sym, const llvm::APSInt &From, + const llvm::APSInt &To, const llvm::APSInt &Adjustment) override; + +private: + RangeSet::Factory F; + + RangeSet getRange(ProgramStateRef State, SymbolRef Sym); + RangeSet getRange(ProgramStateRef State, EquivalenceClass Class); + ProgramStateRef setRange(ProgramStateRef State, SymbolRef Sym, + RangeSet Range); + ProgramStateRef setRange(ProgramStateRef State, EquivalenceClass Class, + RangeSet Range); + + RangeSet getSymLTRange(ProgramStateRef St, SymbolRef Sym, + const llvm::APSInt &Int, + const llvm::APSInt &Adjustment); + RangeSet getSymGTRange(ProgramStateRef St, SymbolRef Sym, + const llvm::APSInt &Int, + const llvm::APSInt &Adjustment); + RangeSet getSymLERange(ProgramStateRef St, SymbolRef Sym, + const llvm::APSInt &Int, + const llvm::APSInt &Adjustment); + RangeSet getSymLERange(llvm::function_ref<RangeSet()> RS, + const llvm::APSInt &Int, + const llvm::APSInt &Adjustment); + RangeSet getSymGERange(ProgramStateRef St, SymbolRef Sym, + const llvm::APSInt &Int, + const llvm::APSInt &Adjustment); +}; + +//===----------------------------------------------------------------------===// // Constraint assignment logic //===----------------------------------------------------------------------===// @@ -1492,7 +1614,28 @@ public: return Assignor.assign(CoS, NewConstraint); } + /// Handle expressions like: a % b != 0. + template <typename SymT> + bool handleRemainderOp(const SymT *Sym, RangeSet Constraint) { + if (Sym->getOpcode() != BO_Rem) + return true; + // a % b != 0 implies that a != 0. + if (!Constraint.containsZero()) { + SVal SymSVal = Builder.makeSymbolVal(Sym->getLHS()); + if (auto NonLocSymSVal = SymSVal.getAs<nonloc::SymbolVal>()) { + State = State->assume(*NonLocSymSVal, true); + if (!State) + return false; + } + } + return true; + } + inline bool assignSymExprToConst(const SymExpr *Sym, Const Constraint); + inline bool assignSymIntExprToRangeSet(const SymIntExpr *Sym, + RangeSet Constraint) { + return handleRemainderOp(Sym, Constraint); + } inline bool assignSymSymExprToRangeSet(const SymSymExpr *Sym, RangeSet Constraint); @@ -1568,11 +1711,9 @@ private: assert(!Constraint.isEmpty() && "Empty ranges shouldn't get here"); if (Constraint.getConcreteValue()) - return !Constraint.getConcreteValue()->isNullValue(); + return !Constraint.getConcreteValue()->isZero(); - APSIntType T{Constraint.getMinValue()}; - Const Zero = T.getZeroValue(); - if (!Constraint.contains(Zero)) + if (!Constraint.containsZero()) return true; return llvm::None; @@ -1583,112 +1724,6 @@ private: RangeSet::Factory &RangeFactory; }; -//===----------------------------------------------------------------------===// -// Constraint manager implementation details -//===----------------------------------------------------------------------===// - -class RangeConstraintManager : public RangedConstraintManager { -public: - RangeConstraintManager(ExprEngine *EE, SValBuilder &SVB) - : RangedConstraintManager(EE, SVB), F(getBasicVals()) {} - - //===------------------------------------------------------------------===// - // Implementation for interface from ConstraintManager. - //===------------------------------------------------------------------===// - - bool haveEqualConstraints(ProgramStateRef S1, - ProgramStateRef S2) const override { - // NOTE: ClassMembers are as simple as back pointers for ClassMap, - // so comparing constraint ranges and class maps should be - // sufficient. - return S1->get<ConstraintRange>() == S2->get<ConstraintRange>() && - S1->get<ClassMap>() == S2->get<ClassMap>(); - } - - bool canReasonAbout(SVal X) const override; - - ConditionTruthVal checkNull(ProgramStateRef State, SymbolRef Sym) override; - - const llvm::APSInt *getSymVal(ProgramStateRef State, - SymbolRef Sym) const override; - - ProgramStateRef removeDeadBindings(ProgramStateRef State, - SymbolReaper &SymReaper) override; - - void printJson(raw_ostream &Out, ProgramStateRef State, const char *NL = "\n", - unsigned int Space = 0, bool IsDot = false) const override; - void printConstraints(raw_ostream &Out, ProgramStateRef State, - const char *NL = "\n", unsigned int Space = 0, - bool IsDot = false) const; - void printEquivalenceClasses(raw_ostream &Out, ProgramStateRef State, - const char *NL = "\n", unsigned int Space = 0, - bool IsDot = false) const; - void printDisequalities(raw_ostream &Out, ProgramStateRef State, - const char *NL = "\n", unsigned int Space = 0, - bool IsDot = false) const; - - //===------------------------------------------------------------------===// - // Implementation for interface from RangedConstraintManager. - //===------------------------------------------------------------------===// - - ProgramStateRef assumeSymNE(ProgramStateRef State, SymbolRef Sym, - const llvm::APSInt &V, - const llvm::APSInt &Adjustment) override; - - ProgramStateRef assumeSymEQ(ProgramStateRef State, SymbolRef Sym, - const llvm::APSInt &V, - const llvm::APSInt &Adjustment) override; - - ProgramStateRef assumeSymLT(ProgramStateRef State, SymbolRef Sym, - const llvm::APSInt &V, - const llvm::APSInt &Adjustment) override; - - ProgramStateRef assumeSymGT(ProgramStateRef State, SymbolRef Sym, - const llvm::APSInt &V, - const llvm::APSInt &Adjustment) override; - - ProgramStateRef assumeSymLE(ProgramStateRef State, SymbolRef Sym, - const llvm::APSInt &V, - const llvm::APSInt &Adjustment) override; - - ProgramStateRef assumeSymGE(ProgramStateRef State, SymbolRef Sym, - const llvm::APSInt &V, - const llvm::APSInt &Adjustment) override; - - ProgramStateRef assumeSymWithinInclusiveRange( - ProgramStateRef State, SymbolRef Sym, const llvm::APSInt &From, - const llvm::APSInt &To, const llvm::APSInt &Adjustment) override; - - ProgramStateRef assumeSymOutsideInclusiveRange( - ProgramStateRef State, SymbolRef Sym, const llvm::APSInt &From, - const llvm::APSInt &To, const llvm::APSInt &Adjustment) override; - -private: - RangeSet::Factory F; - - RangeSet getRange(ProgramStateRef State, SymbolRef Sym); - RangeSet getRange(ProgramStateRef State, EquivalenceClass Class); - ProgramStateRef setRange(ProgramStateRef State, SymbolRef Sym, - RangeSet Range); - ProgramStateRef setRange(ProgramStateRef State, EquivalenceClass Class, - RangeSet Range); - - RangeSet getSymLTRange(ProgramStateRef St, SymbolRef Sym, - const llvm::APSInt &Int, - const llvm::APSInt &Adjustment); - RangeSet getSymGTRange(ProgramStateRef St, SymbolRef Sym, - const llvm::APSInt &Int, - const llvm::APSInt &Adjustment); - RangeSet getSymLERange(ProgramStateRef St, SymbolRef Sym, - const llvm::APSInt &Int, - const llvm::APSInt &Adjustment); - RangeSet getSymLERange(llvm::function_ref<RangeSet()> RS, - const llvm::APSInt &Int, - const llvm::APSInt &Adjustment); - RangeSet getSymGERange(ProgramStateRef St, SymbolRef Sym, - const llvm::APSInt &Int, - const llvm::APSInt &Adjustment); -}; bool ConstraintAssignor::assignSymExprToConst(const SymExpr *Sym, const llvm::APSInt &Constraint) { @@ -1716,11 +1751,26 @@ bool ConstraintAssignor::assignSymExprToConst(const SymExpr *Sym, return false; } + // We may have trivial equivalence classes in the disequality info as + // well, and we need to simplify them. + DisequalityMapTy DisequalityInfo = State->get<DisequalityMap>(); + for (std::pair<EquivalenceClass, ClassSet> DisequalityEntry : + DisequalityInfo) { + EquivalenceClass Class = DisequalityEntry.first; + ClassSet DisequalClasses = DisequalityEntry.second; + State = EquivalenceClass::simplify(Builder, RangeFactory, State, Class); + if (!State) + return false; + } + return true; } bool ConstraintAssignor::assignSymSymExprToRangeSet(const SymSymExpr *Sym, RangeSet Constraint) { + if (!handleRemainderOp(Sym, Constraint)) + return false; + Optional<bool> ConstraintAsBool = interpreteAsBool(Constraint); if (!ConstraintAsBool) @@ -2086,6 +2136,61 @@ inline Optional<bool> EquivalenceClass::areEqual(ProgramStateRef State, return llvm::None; } +LLVM_NODISCARD ProgramStateRef +EquivalenceClass::removeMember(ProgramStateRef State, const SymbolRef Old) { + + SymbolSet ClsMembers = getClassMembers(State); + assert(ClsMembers.contains(Old)); + + // We don't remove `Old`'s Sym->Class relation for two reasons: + // 1) This way constraints for the old symbol can still be found via it's + // equivalence class that it used to be the member of. + // 2) Performance and resource reasons. We can spare one removal and thus one + // additional tree in the forest of `ClassMap`. + + // Remove `Old`'s Class->Sym relation. + SymbolSet::Factory &F = getMembersFactory(State); + ClassMembersTy::Factory &EMFactory = State->get_context<ClassMembers>(); + ClsMembers = F.remove(ClsMembers, Old); + // Ensure another precondition of the removeMember function (we can check + // this only with isEmpty, thus we have to do the remove first). + assert(!ClsMembers.isEmpty() && + "Class should have had at least two members before member removal"); + // Overwrite the existing members assigned to this class. + ClassMembersTy ClassMembersMap = State->get<ClassMembers>(); + ClassMembersMap = EMFactory.add(ClassMembersMap, *this, ClsMembers); + State = State->set<ClassMembers>(ClassMembersMap); + + return State; +} + +// Re-evaluate an SVal with top-level `State->assume` logic. +LLVM_NODISCARD ProgramStateRef reAssume(ProgramStateRef State, + const RangeSet *Constraint, + SVal TheValue) { + if (!Constraint) + return State; + + const auto DefinedVal = TheValue.castAs<DefinedSVal>(); + + // If the SVal is 0, we can simply interpret that as `false`. + if (Constraint->encodesFalseRange()) + return State->assume(DefinedVal, false); + + // If the constraint does not encode 0 then we can interpret that as `true` + // AND as a Range(Set). + if (Constraint->encodesTrueRange()) { + State = State->assume(DefinedVal, true); + if (!State) + return nullptr; + // Fall through, re-assume based on the range values as well. + } + // Overestimate the individual Ranges with the RangeSet' lowest and + // highest values. + return State->assumeInclusiveRange(DefinedVal, Constraint->getMinValue(), + Constraint->getMaxValue(), true); +} + // Iterate over all symbols and try to simplify them. Once a symbol is // simplified then we check if we can merge the simplified symbol's equivalence // class to this class. This way, we simplify not just the symbols but the @@ -2096,14 +2201,62 @@ EquivalenceClass::simplify(SValBuilder &SVB, RangeSet::Factory &F, ProgramStateRef State, EquivalenceClass Class) { SymbolSet ClassMembers = Class.getClassMembers(State); for (const SymbolRef &MemberSym : ClassMembers) { - SymbolRef SimplifiedMemberSym = ento::simplify(State, MemberSym); + + const SVal SimplifiedMemberVal = simplifyToSVal(State, MemberSym); + const SymbolRef SimplifiedMemberSym = SimplifiedMemberVal.getAsSymbol(); + + // The symbol is collapsed to a constant, check if the current State is + // still feasible. + if (const auto CI = SimplifiedMemberVal.getAs<nonloc::ConcreteInt>()) { + const llvm::APSInt &SV = CI->getValue(); + const RangeSet *ClassConstraint = getConstraint(State, Class); + // We have found a contradiction. + if (ClassConstraint && !ClassConstraint->contains(SV)) + return nullptr; + } + if (SimplifiedMemberSym && MemberSym != SimplifiedMemberSym) { // The simplified symbol should be the member of the original Class, // however, it might be in another existing class at the moment. We // have to merge these classes. + ProgramStateRef OldState = State; State = merge(F, State, MemberSym, SimplifiedMemberSym); if (!State) return nullptr; + // No state change, no merge happened actually. + if (OldState == State) + continue; + + assert(find(State, MemberSym) == find(State, SimplifiedMemberSym)); + // Remove the old and more complex symbol. + State = find(State, MemberSym).removeMember(State, MemberSym); + + // Query the class constraint again b/c that may have changed during the + // merge above. + const RangeSet *ClassConstraint = getConstraint(State, Class); + + // Re-evaluate an SVal with top-level `State->assume`, this ignites + // a RECURSIVE algorithm that will reach a FIXPOINT. + // + // About performance and complexity: Let us assume that in a State we + // have N non-trivial equivalence classes and that all constraints and + // disequality info is related to non-trivial classes. In the worst case, + // we can simplify only one symbol of one class in each iteration. The + // number of symbols in one class cannot grow b/c we replace the old + // symbol with the simplified one. Also, the number of the equivalence + // classes can decrease only, b/c the algorithm does a merge operation + // optionally. We need N iterations in this case to reach the fixpoint. + // Thus, the steps needed to be done in the worst case is proportional to + // N*N. + // + // This worst case scenario can be extended to that case when we have + // trivial classes in the constraints and in the disequality map. This + // case can be reduced to the case with a State where there are only + // non-trivial classes. This is because a merge operation on two trivial + // classes results in one non-trivial class. + State = reAssume(State, ClassConstraint, SimplifiedMemberVal); + if (!State) + return nullptr; } } return State; diff --git a/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/RangedConstraintManager.cpp b/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/RangedConstraintManager.cpp index d227c025fb20..892d64ea4e4e 100644 --- a/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/RangedConstraintManager.cpp +++ b/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/RangedConstraintManager.cpp @@ -41,7 +41,12 @@ ProgramStateRef RangedConstraintManager::assumeSym(ProgramStateRef State, return assumeSymRel(State, SIE->getLHS(), op, SIE->getRHS()); } - } else if (const SymSymExpr *SSE = dyn_cast<SymSymExpr>(Sym)) { + // Handle adjustment with non-comparison ops. + const llvm::APSInt &Zero = getBasicVals().getValue(0, SIE->getType()); + return assumeSymRel(State, SIE, (Assumption ? BO_NE : BO_EQ), Zero); + } + + if (const auto *SSE = dyn_cast<SymSymExpr>(Sym)) { BinaryOperator::Opcode Op = SSE->getOpcode(); assert(BinaryOperator::isComparisonOp(Op)); @@ -226,9 +231,13 @@ void RangedConstraintManager::computeAdjustment(SymbolRef &Sym, } } -SymbolRef simplify(ProgramStateRef State, SymbolRef Sym) { +SVal simplifyToSVal(ProgramStateRef State, SymbolRef Sym) { SValBuilder &SVB = State->getStateManager().getSValBuilder(); - SVal SimplifiedVal = SVB.simplifySVal(State, SVB.makeSymbolVal(Sym)); + return SVB.simplifySVal(State, SVB.makeSymbolVal(Sym)); +} + +SymbolRef simplify(ProgramStateRef State, SymbolRef Sym) { + SVal SimplifiedVal = simplifyToSVal(State, Sym); if (SymbolRef SimplifiedSym = SimplifiedVal.getAsSymbol()) return SimplifiedSym; return Sym; diff --git a/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/RegionStore.cpp b/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/RegionStore.cpp index 4ffa1aacb41f..135130b35ba7 100644 --- a/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/RegionStore.cpp +++ b/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/RegionStore.cpp @@ -62,8 +62,8 @@ private: : P(r, k), Data(offset) { assert(r && "Must have known regions."); assert(getOffset() == offset && "Failed to store offset"); - assert((r == r->getBaseRegion() || isa<ObjCIvarRegion>(r) || - isa <CXXDerivedObjectRegion>(r)) && + assert((r == r->getBaseRegion() || + isa<ObjCIvarRegion, CXXDerivedObjectRegion>(r)) && "Not a base"); } public: @@ -437,6 +437,15 @@ public: RegionBindingsRef removeSubRegionBindings(RegionBindingsConstRef B, const SubRegion *R); + Optional<SVal> + getConstantValFromConstArrayInitializer(RegionBindingsConstRef B, + const ElementRegion *R); + Optional<SVal> + getSValFromInitListExpr(const InitListExpr *ILE, + const SmallVector<uint64_t, 2> &ConcreteOffsets, + QualType ElemT); + SVal getSValFromStringLiteral(const StringLiteral *SL, uint64_t Offset, + QualType ElemT); public: // Part of public interface to class. @@ -1135,7 +1144,7 @@ void InvalidateRegionsWorker::VisitCluster(const MemRegion *baseR, if (Regions) Regions->push_back(baseR); - if (isa<AllocaRegion>(baseR) || isa<SymbolicRegion>(baseR)) { + if (isa<AllocaRegion, SymbolicRegion>(baseR)) { // Invalidate the region by setting its default value to // conjured symbol. The type of the symbol is irrelevant. DefinedOrUnknownSVal V = @@ -1224,7 +1233,7 @@ void InvalidateRegionsWorker::VisitCluster(const MemRegion *baseR, // detection. SVal V = I.getData(); const MemRegion *R = V.getAsRegion(); - if (R && isa<SymbolicRegion>(R)) + if (isa_and_nonnull<SymbolicRegion>(R)) VisitBinding(V); } } @@ -1625,6 +1634,280 @@ RegionStoreManager::findLazyBinding(RegionBindingsConstRef B, return Result; } +/// This is a helper function for `getConstantValFromConstArrayInitializer`. +/// +/// Return an array of extents of the declared array type. +/// +/// E.g. for `int x[1][2][3];` returns { 1, 2, 3 }. +static SmallVector<uint64_t, 2> +getConstantArrayExtents(const ConstantArrayType *CAT) { + assert(CAT && "ConstantArrayType should not be null"); + CAT = cast<ConstantArrayType>(CAT->getCanonicalTypeInternal()); + SmallVector<uint64_t, 2> Extents; + do { + Extents.push_back(CAT->getSize().getZExtValue()); + } while ((CAT = dyn_cast<ConstantArrayType>(CAT->getElementType()))); + return Extents; +} + +/// This is a helper function for `getConstantValFromConstArrayInitializer`. +/// +/// Return an array of offsets from nested ElementRegions and a root base +/// region. The array is never empty and a base region is never null. +/// +/// E.g. for `Element{Element{Element{VarRegion},1},2},3}` returns { 3, 2, 1 }. +/// This represents an access through indirection: `arr[1][2][3];` +/// +/// \param ER The given (possibly nested) ElementRegion. +/// +/// \note The result array is in the reverse order of indirection expression: +/// arr[1][2][3] -> { 3, 2, 1 }. This helps to provide complexity O(n), where n +/// is a number of indirections. It may not affect performance in real-life +/// code, though. +static std::pair<SmallVector<SVal, 2>, const MemRegion *> +getElementRegionOffsetsWithBase(const ElementRegion *ER) { + assert(ER && "ConstantArrayType should not be null"); + const MemRegion *Base; + SmallVector<SVal, 2> SValOffsets; + do { + SValOffsets.push_back(ER->getIndex()); + Base = ER->getSuperRegion(); + ER = dyn_cast<ElementRegion>(Base); + } while (ER); + return {SValOffsets, Base}; +} + +/// This is a helper function for `getConstantValFromConstArrayInitializer`. +/// +/// Convert array of offsets from `SVal` to `uint64_t` in consideration of +/// respective array extents. +/// \param SrcOffsets [in] The array of offsets of type `SVal` in reversed +/// order (expectedly received from `getElementRegionOffsetsWithBase`). +/// \param ArrayExtents [in] The array of extents. +/// \param DstOffsets [out] The array of offsets of type `uint64_t`. +/// \returns: +/// - `None` for successful convertion. +/// - `UndefinedVal` or `UnknownVal` otherwise. It's expected that this SVal +/// will be returned as a suitable value of the access operation. +/// which should be returned as a correct +/// +/// \example: +/// const int arr[10][20][30] = {}; // ArrayExtents { 10, 20, 30 } +/// int x1 = arr[4][5][6]; // SrcOffsets { NonLoc(6), NonLoc(5), NonLoc(4) } +/// // DstOffsets { 4, 5, 6 } +/// // returns None +/// int x2 = arr[42][5][-6]; // returns UndefinedVal +/// int x3 = arr[4][5][x2]; // returns UnknownVal +static Optional<SVal> +convertOffsetsFromSvalToUnsigneds(const SmallVector<SVal, 2> &SrcOffsets, + const SmallVector<uint64_t, 2> ArrayExtents, + SmallVector<uint64_t, 2> &DstOffsets) { + // Check offsets for being out of bounds. + // C++20 [expr.add] 7.6.6.4 (excerpt): + // If P points to an array element i of an array object x with n + // elements, where i < 0 or i > n, the behavior is undefined. + // Dereferencing is not allowed on the "one past the last + // element", when i == n. + // Example: + // const int arr[3][2] = {{1, 2}, {3, 4}}; + // arr[0][0]; // 1 + // arr[0][1]; // 2 + // arr[0][2]; // UB + // arr[1][0]; // 3 + // arr[1][1]; // 4 + // arr[1][-1]; // UB + // arr[2][0]; // 0 + // arr[2][1]; // 0 + // arr[-2][0]; // UB + DstOffsets.resize(SrcOffsets.size()); + auto ExtentIt = ArrayExtents.begin(); + auto OffsetIt = DstOffsets.begin(); + // Reverse `SValOffsets` to make it consistent with `ArrayExtents`. + for (SVal V : llvm::reverse(SrcOffsets)) { + if (auto CI = V.getAs<nonloc::ConcreteInt>()) { + // When offset is out of array's bounds, result is UB. + const llvm::APSInt &Offset = CI->getValue(); + if (Offset.isNegative() || Offset.uge(*(ExtentIt++))) + return UndefinedVal(); + // Store index in a reversive order. + *(OffsetIt++) = Offset.getZExtValue(); + continue; + } + // Symbolic index presented. Return Unknown value. + // FIXME: We also need to take ElementRegions with symbolic indexes into + // account. + return UnknownVal(); + } + return None; +} + +Optional<SVal> RegionStoreManager::getConstantValFromConstArrayInitializer( + RegionBindingsConstRef B, const ElementRegion *R) { + assert(R && "ElementRegion should not be null"); + + // Treat an n-dimensional array. + SmallVector<SVal, 2> SValOffsets; + const MemRegion *Base; + std::tie(SValOffsets, Base) = getElementRegionOffsetsWithBase(R); + const VarRegion *VR = dyn_cast<VarRegion>(Base); + if (!VR) + return None; + + assert(!SValOffsets.empty() && "getElementRegionOffsets guarantees the " + "offsets vector is not empty."); + + // Check if the containing array has an initialized value that we can trust. + // We can trust a const value or a value of a global initializer in main(). + const VarDecl *VD = VR->getDecl(); + if (!VD->getType().isConstQualified() && + !R->getElementType().isConstQualified() && + (!B.isMainAnalysis() || !VD->hasGlobalStorage())) + return None; + + // Array's declaration should have `ConstantArrayType` type, because only this + // type contains an array extent. It may happen that array type can be of + // `IncompleteArrayType` type. To get the declaration of `ConstantArrayType` + // type, we should find the declaration in the redeclarations chain that has + // the initialization expression. + // NOTE: `getAnyInitializer` has an out-parameter, which returns a new `VD` + // from which an initializer is obtained. We replace current `VD` with the new + // `VD`. If the return value of the function is null than `VD` won't be + // replaced. + const Expr *Init = VD->getAnyInitializer(VD); + // NOTE: If `Init` is non-null, then a new `VD` is non-null for sure. So check + // `Init` for null only and don't worry about the replaced `VD`. + if (!Init) + return None; + + // Array's declaration should have ConstantArrayType type, because only this + // type contains an array extent. + const ConstantArrayType *CAT = Ctx.getAsConstantArrayType(VD->getType()); + if (!CAT) + return None; + + // Get array extents. + SmallVector<uint64_t, 2> Extents = getConstantArrayExtents(CAT); + + // The number of offsets should equal to the numbers of extents, + // otherwise wrong type punning occured. For instance: + // int arr[1][2][3]; + // auto ptr = (int(*)[42])arr; + // auto x = ptr[4][2]; // UB + // FIXME: Should return UndefinedVal. + if (SValOffsets.size() != Extents.size()) + return None; + + SmallVector<uint64_t, 2> ConcreteOffsets; + if (Optional<SVal> V = convertOffsetsFromSvalToUnsigneds(SValOffsets, Extents, + ConcreteOffsets)) + return *V; + + // Handle InitListExpr. + // Example: + // const char arr[4][2] = { { 1, 2 }, { 3 }, 4, 5 }; + if (const auto *ILE = dyn_cast<InitListExpr>(Init)) + return getSValFromInitListExpr(ILE, ConcreteOffsets, R->getElementType()); + + // Handle StringLiteral. + // Example: + // const char arr[] = "abc"; + if (const auto *SL = dyn_cast<StringLiteral>(Init)) + return getSValFromStringLiteral(SL, ConcreteOffsets.front(), + R->getElementType()); + + // FIXME: Handle CompoundLiteralExpr. + + return None; +} + +/// Returns an SVal, if possible, for the specified position of an +/// initialization list. +/// +/// \param ILE The given initialization list. +/// \param Offsets The array of unsigned offsets. E.g. for the expression +/// `int x = arr[1][2][3];` an array should be { 1, 2, 3 }. +/// \param ElemT The type of the result SVal expression. +/// \return Optional SVal for the particular position in the initialization +/// list. E.g. for the list `{{1, 2},[3, 4],{5, 6}, {}}` offsets: +/// - {1, 1} returns SVal{4}, because it's the second position in the second +/// sublist; +/// - {3, 0} returns SVal{0}, because there's no explicit value at this +/// position in the sublist. +/// +/// NOTE: Inorder to get a valid SVal, a caller shall guarantee valid offsets +/// for the given initialization list. Otherwise SVal can be an equivalent to 0 +/// or lead to assertion. +Optional<SVal> RegionStoreManager::getSValFromInitListExpr( + const InitListExpr *ILE, const SmallVector<uint64_t, 2> &Offsets, + QualType ElemT) { + assert(ILE && "InitListExpr should not be null"); + + for (uint64_t Offset : Offsets) { + // C++20 [dcl.init.string] 9.4.2.1: + // An array of ordinary character type [...] can be initialized by [...] + // an appropriately-typed string-literal enclosed in braces. + // Example: + // const char arr[] = { "abc" }; + if (ILE->isStringLiteralInit()) + if (const auto *SL = dyn_cast<StringLiteral>(ILE->getInit(0))) + return getSValFromStringLiteral(SL, Offset, ElemT); + + // C++20 [expr.add] 9.4.17.5 (excerpt): + // i-th array element is value-initialized for each k < i ≤ n, + // where k is an expression-list size and n is an array extent. + if (Offset >= ILE->getNumInits()) + return svalBuilder.makeZeroVal(ElemT); + + const Expr *E = ILE->getInit(Offset); + const auto *IL = dyn_cast<InitListExpr>(E); + if (!IL) + // Return a constant value, if it is presented. + // FIXME: Support other SVals. + return svalBuilder.getConstantVal(E); + + // Go to the nested initializer list. + ILE = IL; + } + llvm_unreachable( + "Unhandled InitListExpr sub-expressions or invalid offsets."); +} + +/// Returns an SVal, if possible, for the specified position in a string +/// literal. +/// +/// \param SL The given string literal. +/// \param Offset The unsigned offset. E.g. for the expression +/// `char x = str[42];` an offset should be 42. +/// E.g. for the string "abc" offset: +/// - 1 returns SVal{b}, because it's the second position in the string. +/// - 42 returns SVal{0}, because there's no explicit value at this +/// position in the string. +/// \param ElemT The type of the result SVal expression. +/// +/// NOTE: We return `0` for every offset >= the literal length for array +/// declarations, like: +/// const char str[42] = "123"; // Literal length is 4. +/// char c = str[41]; // Offset is 41. +/// FIXME: Nevertheless, we can't do the same for pointer declaraions, like: +/// const char * const str = "123"; // Literal length is 4. +/// char c = str[41]; // Offset is 41. Returns `0`, but Undef +/// // expected. +/// It should be properly handled before reaching this point. +/// The main problem is that we can't distinguish between these declarations, +/// because in case of array we can get the Decl from VarRegion, but in case +/// of pointer the region is a StringRegion, which doesn't contain a Decl. +/// Possible solution could be passing an array extent along with the offset. +SVal RegionStoreManager::getSValFromStringLiteral(const StringLiteral *SL, + uint64_t Offset, + QualType ElemT) { + assert(SL && "StringLiteral should not be null"); + // C++20 [dcl.init.string] 9.4.2.3: + // If there are fewer initializers than there are array elements, each + // element not explicitly initialized shall be zero-initialized [dcl.init]. + uint32_t Code = (Offset >= SL->getLength()) ? 0 : SL->getCodeUnit(Offset); + return svalBuilder.makeIntVal(Code, ElemT); +} + SVal RegionStoreManager::getBindingForElement(RegionBindingsConstRef B, const ElementRegion* R) { // Check if the region has a binding. @@ -1636,59 +1919,21 @@ SVal RegionStoreManager::getBindingForElement(RegionBindingsConstRef B, // Check if the region is an element region of a string literal. if (const StringRegion *StrR = dyn_cast<StringRegion>(superR)) { // FIXME: Handle loads from strings where the literal is treated as - // an integer, e.g., *((unsigned int*)"hello") + // an integer, e.g., *((unsigned int*)"hello"). Such loads are UB according + // to C++20 7.2.1.11 [basic.lval]. QualType T = Ctx.getAsArrayType(StrR->getValueType())->getElementType(); if (!Ctx.hasSameUnqualifiedType(T, R->getElementType())) return UnknownVal(); - - const StringLiteral *Str = StrR->getStringLiteral(); - SVal Idx = R->getIndex(); - if (Optional<nonloc::ConcreteInt> CI = Idx.getAs<nonloc::ConcreteInt>()) { - int64_t i = CI->getValue().getSExtValue(); - // Abort on string underrun. This can be possible by arbitrary - // clients of getBindingForElement(). - if (i < 0) + if (const auto CI = R->getIndex().getAs<nonloc::ConcreteInt>()) { + const llvm::APSInt &Idx = CI->getValue(); + if (Idx < 0) return UndefinedVal(); - int64_t length = Str->getLength(); - // Technically, only i == length is guaranteed to be null. - // However, such overflows should be caught before reaching this point; - // the only time such an access would be made is if a string literal was - // used to initialize a larger array. - char c = (i >= length) ? '\0' : Str->getCodeUnit(i); - return svalBuilder.makeIntVal(c, T); - } - } else if (const VarRegion *VR = dyn_cast<VarRegion>(superR)) { - // Check if the containing array has an initialized value that we can trust. - // We can trust a const value or a value of a global initializer in main(). - const VarDecl *VD = VR->getDecl(); - if (VD->getType().isConstQualified() || - R->getElementType().isConstQualified() || - (B.isMainAnalysis() && VD->hasGlobalStorage())) { - if (const Expr *Init = VD->getAnyInitializer()) { - if (const auto *InitList = dyn_cast<InitListExpr>(Init)) { - // The array index has to be known. - if (auto CI = R->getIndex().getAs<nonloc::ConcreteInt>()) { - int64_t i = CI->getValue().getSExtValue(); - // If it is known that the index is out of bounds, we can return - // an undefined value. - if (i < 0) - return UndefinedVal(); - - if (auto CAT = Ctx.getAsConstantArrayType(VD->getType())) - if (CAT->getSize().sle(i)) - return UndefinedVal(); - - // If there is a list, but no init, it must be zero. - if (i >= InitList->getNumInits()) - return svalBuilder.makeZeroVal(R->getElementType()); - - if (const Expr *ElemInit = InitList->getInit(i)) - if (Optional<SVal> V = svalBuilder.getConstantVal(ElemInit)) - return *V; - } - } - } + const StringLiteral *SL = StrR->getStringLiteral(); + return getSValFromStringLiteral(SL, Idx.getZExtValue(), T); } + } else if (isa<ElementRegion, VarRegion>(superR)) { + if (Optional<SVal> V = getConstantValFromConstArrayInitializer(B, R)) + return *V; } // Check for loads from a code text region. For such loads, just give up. diff --git a/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/SMTConstraintManager.cpp b/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/SMTConstraintManager.cpp index 7395622a659c..04165a443fff 100644 --- a/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/SMTConstraintManager.cpp +++ b/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/SMTConstraintManager.cpp @@ -1,9 +1,8 @@ //== SMTConstraintManager.cpp -----------------------------------*- C++ -*--==// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp b/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp index b459b5adb511..71bfc86ab8f7 100644 --- a/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp +++ b/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp @@ -49,6 +49,16 @@ using namespace ento; void SValBuilder::anchor() {} +SValBuilder::SValBuilder(llvm::BumpPtrAllocator &alloc, ASTContext &context, + ProgramStateManager &stateMgr) + : Context(context), BasicVals(context, alloc), + SymMgr(context, BasicVals, alloc), MemMgr(context, alloc), + StateMgr(stateMgr), + AnOpts( + stateMgr.getOwningEngine().getAnalysisManager().getAnalyzerOptions()), + ArrayIndexTy(context.LongLongTy), + ArrayIndexWidth(context.getTypeSize(ArrayIndexTy)) {} + DefinedOrUnknownSVal SValBuilder::makeZeroVal(QualType type) { if (Loc::isLocType(type)) return makeNull(); @@ -244,8 +254,7 @@ SValBuilder::getDerivedRegionValueSymbolVal(SymbolRef parentSymbol, } DefinedSVal SValBuilder::getMemberPointer(const NamedDecl *ND) { - assert(!ND || isa<CXXMethodDecl>(ND) || isa<FieldDecl>(ND) || - isa<IndirectFieldDecl>(ND)); + assert(!ND || (isa<CXXMethodDecl, FieldDecl, IndirectFieldDecl>(ND))); if (const auto *MD = dyn_cast_or_null<CXXMethodDecl>(ND)) { // Sema treats pointers to static member functions as have function pointer @@ -405,9 +414,7 @@ SVal SValBuilder::makeSymExprValNN(BinaryOperator::Opcode Op, // TODO: When the Max Complexity is reached, we should conjure a symbol // instead of generating an Unknown value and propagate the taint info to it. - const unsigned MaxComp = StateMgr.getOwningEngine() - .getAnalysisManager() - .options.MaxSymbolComplexity; + const unsigned MaxComp = AnOpts.MaxSymbolComplexity; if (symLHS && symRHS && (symLHS->computeComplexity() + symRHS->computeComplexity()) < MaxComp) @@ -725,16 +732,12 @@ SVal SValBuilder::evalCastSubKind(loc::MemRegionVal V, QualType CastTy, // This change is needed for architectures with varying // pointer widths. See the amdgcn opencl reproducer with // this change as an example: solver-sym-simplification-ptr-bool.cl - // FIXME: We could encounter a reference here, - // try returning a concrete 'true' since it might - // be easier on the solver. // FIXME: Cleanup remainder of `getZeroWithPtrWidth ()` // and `getIntWithPtrWidth()` functions to prevent future // confusion - const llvm::APSInt &Zero = Ty->isReferenceType() - ? BasicVals.getZeroWithPtrWidth() - : BasicVals.getZeroWithTypeSize(Ty); - return makeNonLoc(Sym, BO_NE, Zero, CastTy); + if (!Ty->isReferenceType()) + return makeNonLoc(Sym, BO_NE, BasicVals.getZeroWithTypeSize(Ty), + CastTy); } // Non-symbolic memory regions are always true. return makeTruthVal(true, CastTy); diff --git a/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp b/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp index e57d92fbcebb..681a1f64eadc 100644 --- a/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp +++ b/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp @@ -11,7 +11,6 @@ //===----------------------------------------------------------------------===// #include "clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h" -#include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h" #include "clang/StaticAnalyzer/Core/PathSensitive/APSIntType.h" #include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h" #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" @@ -25,7 +24,7 @@ class SimpleSValBuilder : public SValBuilder { public: SimpleSValBuilder(llvm::BumpPtrAllocator &alloc, ASTContext &context, ProgramStateManager &stateMgr) - : SValBuilder(alloc, context, stateMgr) {} + : SValBuilder(alloc, context, stateMgr) {} ~SimpleSValBuilder() override {} SVal evalMinus(NonLoc val) override; @@ -129,14 +128,14 @@ SVal SimpleSValBuilder::MakeSymIntVal(const SymExpr *LHS, // a&0 and a&(~0) if (RHS == 0) return makeIntVal(0, resultTy); - else if (RHS.isAllOnesValue()) + else if (RHS.isAllOnes()) isIdempotent = true; break; case BO_Or: // a|0 and a|(~0) if (RHS == 0) isIdempotent = true; - else if (RHS.isAllOnesValue()) { + else if (RHS.isAllOnes()) { const llvm::APSInt &Result = BasicVals.Convert(resultTy, RHS); return nonloc::ConcreteInt(Result); } @@ -320,13 +319,10 @@ static Optional<NonLoc> tryRearrange(ProgramStateRef State, // We expect everything to be of the same type - this type. QualType SingleTy; - auto &Opts = - StateMgr.getOwningEngine().getAnalysisManager().getAnalyzerOptions(); - // FIXME: After putting complexity threshold to the symbols we can always // rearrange additive operations but rearrange comparisons only if // option is set. - if(!Opts.ShouldAggressivelySimplifyBinaryOperation) + if (!SVB.getAnalyzerOptions().ShouldAggressivelySimplifyBinaryOperation) return None; SymbolRef LSym = Lhs.getAsSymbol(); @@ -513,7 +509,7 @@ SVal SimpleSValBuilder::evalBinOpNN(ProgramStateRef state, continue; case BO_Shr: // (~0)>>a - if (LHSValue.isAllOnesValue() && LHSValue.isSigned()) + if (LHSValue.isAllOnes() && LHSValue.isSigned()) return evalCast(lhs, resultTy, QualType{}); LLVM_FALLTHROUGH; case BO_Shl: diff --git a/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/Store.cpp b/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/Store.cpp index b867b0746f90..3cc0cd224d7a 100644 --- a/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/Store.cpp +++ b/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/Store.cpp @@ -84,7 +84,7 @@ Optional<const MemRegion *> StoreManager::castRegion(const MemRegion *R, // involved. Blocks can be casted to/from 'id', as they can be treated // as Objective-C objects. This could possibly be handled by enhancing // our reasoning of downcasts of symbolic objects. - if (isa<CodeTextRegion>(R) || isa<SymbolicRegion>(R)) + if (isa<CodeTextRegion, SymbolicRegion>(R)) return R; // We don't know what to make of it. Return a NULL region, which @@ -96,18 +96,24 @@ Optional<const MemRegion *> StoreManager::castRegion(const MemRegion *R, // already be handled. QualType PointeeTy = CastToTy->getPointeeType(); QualType CanonPointeeTy = Ctx.getCanonicalType(PointeeTy); + CanonPointeeTy = CanonPointeeTy.getLocalUnqualifiedType(); // Handle casts to void*. We just pass the region through. - if (CanonPointeeTy.getLocalUnqualifiedType() == Ctx.VoidTy) + if (CanonPointeeTy == Ctx.VoidTy) return R; - // Handle casts from compatible types. - if (R->isBoundable()) + const auto IsSameRegionType = [&Ctx](const MemRegion *R, QualType OtherTy) { if (const auto *TR = dyn_cast<TypedValueRegion>(R)) { QualType ObjTy = Ctx.getCanonicalType(TR->getValueType()); - if (CanonPointeeTy == ObjTy) - return R; + if (OtherTy == ObjTy.getLocalUnqualifiedType()) + return true; } + return false; + }; + + // Handle casts from compatible types. + if (R->isBoundable() && IsSameRegionType(R, CanonPointeeTy)) + return R; // Process region cast according to the kind of the region being cast. switch (R->getKind()) { @@ -174,16 +180,11 @@ Optional<const MemRegion *> StoreManager::castRegion(const MemRegion *R, CharUnits off = rawOff.getOffset(); if (off.isZero()) { - // Edge case: we are at 0 bytes off the beginning of baseR. We - // check to see if type we are casting to is the same as the base - // region. If so, just return the base region. - if (const auto *TR = dyn_cast<TypedValueRegion>(baseR)) { - QualType ObjTy = Ctx.getCanonicalType(TR->getValueType()); - QualType CanonPointeeTy = Ctx.getCanonicalType(PointeeTy); - if (CanonPointeeTy == ObjTy) - return baseR; - } - + // Edge case: we are at 0 bytes off the beginning of baseR. We check to + // see if the type we are casting to is the same as the type of the base + // region. If so, just return the base region. + if (IsSameRegionType(baseR, CanonPointeeTy)) + return baseR; // Otherwise, create a new ElementRegion at offset 0. return MakeElementRegion(cast<SubRegion>(baseR), PointeeTy); } @@ -442,6 +443,19 @@ SVal StoreManager::getLValueIvar(const ObjCIvarDecl *decl, SVal base) { SVal StoreManager::getLValueElement(QualType elementType, NonLoc Offset, SVal Base) { + + // Special case, if index is 0, return the same type as if + // this was not an array dereference. + if (Offset.isZeroConstant()) { + QualType BT = Base.getType(this->Ctx); + if (!BT.isNull() && !elementType.isNull()) { + QualType PointeeTy = BT->getPointeeType(); + if (!PointeeTy.isNull() && + PointeeTy.getCanonicalType() == elementType.getCanonicalType()) + return Base; + } + } + // If the base is an unknown or undefined value, just return it back. // FIXME: For absolute pointer addresses, we just return that value back as // well, although in reality we should return the offset added to that diff --git a/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp b/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp index 79a8eef30576..1ae1f97efd2e 100644 --- a/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp +++ b/contrib/llvm-project/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp @@ -425,19 +425,7 @@ bool SymbolReaper::isLiveRegion(const MemRegion *MR) { // tell if anything still refers to this region. Unlike SymbolicRegions, // AllocaRegions don't have associated symbols, though, so we don't actually // have a way to track their liveness. - if (isa<AllocaRegion>(MR)) - return true; - - if (isa<CXXThisRegion>(MR)) - return true; - - if (isa<MemSpaceRegion>(MR)) - return true; - - if (isa<CodeTextRegion>(MR)) - return true; - - return false; + return isa<AllocaRegion, CXXThisRegion, MemSpaceRegion, CodeTextRegion>(MR); } bool SymbolReaper::isLive(SymbolRef sym) { |