diff options
Diffstat (limited to 'lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp')
-rw-r--r-- | lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp | 264 |
1 files changed, 217 insertions, 47 deletions
diff --git a/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp b/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp index d3ab98033236..d442b26b3959 100644 --- a/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp +++ b/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp @@ -15,16 +15,18 @@ //===----------------------------------------------------------------------===// #include "Taint.h" -#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "Yaml.h" #include "clang/AST/Attr.h" #include "clang/Basic/Builtins.h" +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" #include "clang/StaticAnalyzer/Core/Checker.h" #include "clang/StaticAnalyzer/Core/CheckerManager.h" #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" -#include <climits> -#include <initializer_list> +#include "llvm/ADT/StringMap.h" +#include "llvm/Support/YAMLTraits.h" +#include <limits> #include <utility> using namespace clang; @@ -44,14 +46,51 @@ public: void checkPreStmt(const CallExpr *CE, CheckerContext &C) const; - void printState(raw_ostream &Out, ProgramStateRef State, - const char *NL, const char *Sep) const override; + void printState(raw_ostream &Out, ProgramStateRef State, const char *NL, + const char *Sep) const override; -private: - static const unsigned InvalidArgIndex = UINT_MAX; + using ArgVector = SmallVector<unsigned, 2>; + using SignedArgVector = SmallVector<int, 2>; + + enum class VariadicType { None, Src, Dst }; + + /// Used to parse the configuration file. + struct TaintConfiguration { + using NameArgsPair = std::pair<std::string, ArgVector>; + + struct Propagation { + std::string Name; + ArgVector SrcArgs; + SignedArgVector DstArgs; + VariadicType VarType; + unsigned VarIndex; + }; + + std::vector<Propagation> Propagations; + std::vector<NameArgsPair> Filters; + std::vector<NameArgsPair> Sinks; + + TaintConfiguration() = default; + TaintConfiguration(const TaintConfiguration &) = default; + TaintConfiguration(TaintConfiguration &&) = default; + TaintConfiguration &operator=(const TaintConfiguration &) = default; + TaintConfiguration &operator=(TaintConfiguration &&) = default; + }; + + /// Convert SignedArgVector to ArgVector. + ArgVector convertToArgVector(CheckerManager &Mgr, const std::string &Option, + SignedArgVector Args); + + /// Parse the config. + void parseConfiguration(CheckerManager &Mgr, const std::string &Option, + TaintConfiguration &&Config); + + static const unsigned InvalidArgIndex{std::numeric_limits<unsigned>::max()}; /// Denotes the return vale. - static const unsigned ReturnValueIndex = UINT_MAX - 1; + static const unsigned ReturnValueIndex{std::numeric_limits<unsigned>::max() - + 1}; +private: mutable std::unique_ptr<BugType> BT; void initBugType() const { if (!BT) @@ -76,28 +115,43 @@ private: static Optional<SVal> getPointedToSVal(CheckerContext &C, const Expr *Arg); /// Check for CWE-134: Uncontrolled Format String. - static const char MsgUncontrolledFormatString[]; + static constexpr llvm::StringLiteral MsgUncontrolledFormatString = + "Untrusted data is used as a format string " + "(CWE-134: Uncontrolled Format String)"; bool checkUncontrolledFormatString(const CallExpr *CE, CheckerContext &C) const; /// Check for: /// CERT/STR02-C. "Sanitize data passed to complex subsystems" /// CWE-78, "Failure to Sanitize Data into an OS Command" - static const char MsgSanitizeSystemArgs[]; + static constexpr llvm::StringLiteral MsgSanitizeSystemArgs = + "Untrusted data is passed to a system call " + "(CERT/STR02-C. Sanitize data passed to complex subsystems)"; bool checkSystemCall(const CallExpr *CE, StringRef Name, CheckerContext &C) const; /// Check if tainted data is used as a buffer size ins strn.. functions, /// and allocators. - static const char MsgTaintedBufferSize[]; + static constexpr llvm::StringLiteral MsgTaintedBufferSize = + "Untrusted data is used to specify the buffer size " + "(CERT/STR31-C. Guarantee that storage for strings has sufficient space " + "for character data and the null terminator)"; bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl, CheckerContext &C) const; + /// Check if tainted data is used as a custom sink's parameter. + static constexpr llvm::StringLiteral MsgCustomSink = + "Untrusted data is passed to a user-defined sink"; + bool checkCustomSinks(const CallExpr *CE, StringRef Name, + CheckerContext &C) const; + /// Generate a report if the expression is tainted or points to tainted data. - bool generateReportIfTainted(const Expr *E, const char Msg[], + bool generateReportIfTainted(const Expr *E, StringRef Msg, CheckerContext &C) const; - using ArgVector = SmallVector<unsigned, 2>; + struct TaintPropagationRule; + using NameRuleMap = llvm::StringMap<TaintPropagationRule>; + using NameArgMap = llvm::StringMap<ArgVector>; /// A struct used to specify taint propagation rules for a function. /// @@ -109,8 +163,6 @@ private: /// ReturnValueIndex is added to the dst list, the return value will be /// tainted. struct TaintPropagationRule { - enum class VariadicType { None, Src, Dst }; - using PropagationFuncType = bool (*)(bool IsTainted, const CallExpr *, CheckerContext &C); @@ -131,8 +183,7 @@ private: : VariadicIndex(InvalidArgIndex), VarType(VariadicType::None), PropagationFunc(nullptr) {} - TaintPropagationRule(std::initializer_list<unsigned> &&Src, - std::initializer_list<unsigned> &&Dst, + TaintPropagationRule(ArgVector &&Src, ArgVector &&Dst, VariadicType Var = VariadicType::None, unsigned VarIndex = InvalidArgIndex, PropagationFuncType Func = nullptr) @@ -141,7 +192,8 @@ private: /// Get the propagation rule for a given function. static TaintPropagationRule - getTaintPropagationRule(const FunctionDecl *FDecl, StringRef Name, + getTaintPropagationRule(const NameRuleMap &CustomPropagations, + const FunctionDecl *FDecl, StringRef Name, CheckerContext &C); void addSrcArg(unsigned A) { SrcArgs.push_back(A); } @@ -176,25 +228,71 @@ private: static bool postSocket(bool IsTainted, const CallExpr *CE, CheckerContext &C); }; + + /// Defines a map between the propagation function's name and + /// TaintPropagationRule. + NameRuleMap CustomPropagations; + + /// Defines a map between the filter function's name and filtering args. + NameArgMap CustomFilters; + + /// Defines a map between the sink function's name and sinking args. + NameArgMap CustomSinks; }; const unsigned GenericTaintChecker::ReturnValueIndex; const unsigned GenericTaintChecker::InvalidArgIndex; -const char GenericTaintChecker::MsgUncontrolledFormatString[] = - "Untrusted data is used as a format string " - "(CWE-134: Uncontrolled Format String)"; +// FIXME: these lines can be removed in C++17 +constexpr llvm::StringLiteral GenericTaintChecker::MsgUncontrolledFormatString; +constexpr llvm::StringLiteral GenericTaintChecker::MsgSanitizeSystemArgs; +constexpr llvm::StringLiteral GenericTaintChecker::MsgTaintedBufferSize; +constexpr llvm::StringLiteral GenericTaintChecker::MsgCustomSink; +} // end of anonymous namespace -const char GenericTaintChecker::MsgSanitizeSystemArgs[] = - "Untrusted data is passed to a system call " - "(CERT/STR02-C. Sanitize data passed to complex subsystems)"; +using TaintConfig = GenericTaintChecker::TaintConfiguration; -const char GenericTaintChecker::MsgTaintedBufferSize[] = - "Untrusted data is used to specify the buffer size " - "(CERT/STR31-C. Guarantee that storage for strings has sufficient space " - "for character data and the null terminator)"; +LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::Propagation) +LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::NameArgsPair) -} // end of anonymous namespace +namespace llvm { +namespace yaml { +template <> struct MappingTraits<TaintConfig> { + static void mapping(IO &IO, TaintConfig &Config) { + IO.mapOptional("Propagations", Config.Propagations); + IO.mapOptional("Filters", Config.Filters); + IO.mapOptional("Sinks", Config.Sinks); + } +}; + +template <> struct MappingTraits<TaintConfig::Propagation> { + static void mapping(IO &IO, TaintConfig::Propagation &Propagation) { + IO.mapRequired("Name", Propagation.Name); + IO.mapOptional("SrcArgs", Propagation.SrcArgs); + IO.mapOptional("DstArgs", Propagation.DstArgs); + IO.mapOptional("VariadicType", Propagation.VarType, + GenericTaintChecker::VariadicType::None); + IO.mapOptional("VariadicIndex", Propagation.VarIndex, + GenericTaintChecker::InvalidArgIndex); + } +}; + +template <> struct ScalarEnumerationTraits<GenericTaintChecker::VariadicType> { + static void enumeration(IO &IO, GenericTaintChecker::VariadicType &Value) { + IO.enumCase(Value, "None", GenericTaintChecker::VariadicType::None); + IO.enumCase(Value, "Src", GenericTaintChecker::VariadicType::Src); + IO.enumCase(Value, "Dst", GenericTaintChecker::VariadicType::Dst); + } +}; + +template <> struct MappingTraits<TaintConfig::NameArgsPair> { + static void mapping(IO &IO, TaintConfig::NameArgsPair &NameArg) { + IO.mapRequired("Name", NameArg.first); + IO.mapRequired("Args", NameArg.second); + } +}; +} // namespace yaml +} // namespace llvm /// A set which is used to pass information from call pre-visit instruction /// to the call post-visit. The values are unsigned integers, which are either @@ -202,9 +300,46 @@ const char GenericTaintChecker::MsgTaintedBufferSize[] = /// points to data, which should be tainted on return. REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned) +GenericTaintChecker::ArgVector GenericTaintChecker::convertToArgVector( + CheckerManager &Mgr, const std::string &Option, SignedArgVector Args) { + ArgVector Result; + for (int Arg : Args) { + if (Arg == -1) + Result.push_back(ReturnValueIndex); + else if (Arg < -1) { + Result.push_back(InvalidArgIndex); + Mgr.reportInvalidCheckerOptionValue( + this, Option, + "an argument number for propagation rules greater or equal to -1"); + } else + Result.push_back(static_cast<unsigned>(Arg)); + } + return Result; +} + +void GenericTaintChecker::parseConfiguration(CheckerManager &Mgr, + const std::string &Option, + TaintConfiguration &&Config) { + for (auto &P : Config.Propagations) { + GenericTaintChecker::CustomPropagations.try_emplace( + P.Name, std::move(P.SrcArgs), + convertToArgVector(Mgr, Option, P.DstArgs), P.VarType, P.VarIndex); + } + + for (auto &F : Config.Filters) { + GenericTaintChecker::CustomFilters.try_emplace(F.first, + std::move(F.second)); + } + + for (auto &S : Config.Sinks) { + GenericTaintChecker::CustomSinks.try_emplace(S.first, std::move(S.second)); + } +} + GenericTaintChecker::TaintPropagationRule GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule( - const FunctionDecl *FDecl, StringRef Name, CheckerContext &C) { + const NameRuleMap &CustomPropagations, const FunctionDecl *FDecl, + StringRef Name, CheckerContext &C) { // TODO: Currently, we might lose precision here: we always mark a return // value as tainted even if it's just a pointer, pointing to tainted data. @@ -218,7 +353,8 @@ GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule( .Case("freopen", TaintPropagationRule({}, {ReturnValueIndex})) .Case("getch", TaintPropagationRule({}, {ReturnValueIndex})) .Case("getchar", TaintPropagationRule({}, {ReturnValueIndex})) - .Case("getchar_unlocked", TaintPropagationRule({}, {ReturnValueIndex})) + .Case("getchar_unlocked", + TaintPropagationRule({}, {ReturnValueIndex})) .Case("getenv", TaintPropagationRule({}, {ReturnValueIndex})) .Case("gets", TaintPropagationRule({}, {0, ReturnValueIndex})) .Case("scanf", TaintPropagationRule({}, {}, VariadicType::Dst, 1)) @@ -297,6 +433,10 @@ GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule( // or smart memory copy: // - memccpy - copying until hitting a special character. + auto It = CustomPropagations.find(Name); + if (It != CustomPropagations.end()) + return It->getValue(); + return TaintPropagationRule(); } @@ -336,8 +476,8 @@ void GenericTaintChecker::addSourcesPre(const CallExpr *CE, return; // First, try generating a propagation rule for this function. - TaintPropagationRule Rule = - TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C); + TaintPropagationRule Rule = TaintPropagationRule::getTaintPropagationRule( + this->CustomPropagations, FDecl, Name, C); if (!Rule.isNull()) { State = Rule.process(CE, C); if (!State) @@ -409,6 +549,9 @@ bool GenericTaintChecker::checkPre(const CallExpr *CE, if (checkTaintedBufferSize(CE, FDecl, C)) return true; + if (checkCustomSinks(CE, Name, C)) + return true; + return false; } @@ -446,7 +589,8 @@ GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE, bool IsTainted = true; for (unsigned ArgNum : SrcArgs) { if (ArgNum >= CE->getNumArgs()) - return State; + continue; + if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C))) break; } @@ -454,7 +598,7 @@ GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE, // Check for taint in variadic arguments. if (!IsTainted && VariadicType::Src == VarType) { // Check if any of the arguments is tainted - for (unsigned int i = VariadicIndex; i < CE->getNumArgs(); ++i) { + for (unsigned i = VariadicIndex; i < CE->getNumArgs(); ++i) { if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C))) break; } @@ -474,8 +618,10 @@ GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE, continue; } + if (ArgNum >= CE->getNumArgs()) + continue; + // Mark the given argument. - assert(ArgNum < CE->getNumArgs()); State = State->add<TaintArgsOnPostVisit>(ArgNum); } @@ -485,7 +631,7 @@ GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE, // If they are not pointing to const data, mark data as tainted. // TODO: So far we are just going one level down; ideally we'd need to // recurse here. - for (unsigned int i = VariadicIndex; i < CE->getNumArgs(); ++i) { + for (unsigned i = VariadicIndex; i < CE->getNumArgs(); ++i) { const Expr *Arg = CE->getArg(i); // Process pointer argument. const Type *ArgTy = Arg->getType().getTypePtr(); @@ -550,7 +696,7 @@ bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) { static bool getPrintfFormatArgumentNum(const CallExpr *CE, const CheckerContext &C, - unsigned int &ArgNum) { + unsigned &ArgNum) { // Find if the function contains a format string argument. // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf, // vsnprintf, syslog, custom annotated functions. @@ -572,8 +718,7 @@ static bool getPrintfFormatArgumentNum(const CallExpr *CE, return false; } -bool GenericTaintChecker::generateReportIfTainted(const Expr *E, - const char Msg[], +bool GenericTaintChecker::generateReportIfTainted(const Expr *E, StringRef Msg, CheckerContext &C) const { assert(E); @@ -591,9 +736,9 @@ bool GenericTaintChecker::generateReportIfTainted(const Expr *E, // Generate diagnostic. if (ExplodedNode *N = C.generateNonFatalErrorNode()) { initBugType(); - auto report = llvm::make_unique<BugReport>(*BT, Msg, N); + auto report = std::make_unique<PathSensitiveBugReport>(*BT, Msg, N); report->addRange(E->getSourceRange()); - report->addVisitor(llvm::make_unique<TaintBugVisitor>(TaintedSVal)); + report->addVisitor(std::make_unique<TaintBugVisitor>(TaintedSVal)); C.emitReport(std::move(report)); return true; } @@ -603,7 +748,7 @@ bool GenericTaintChecker::generateReportIfTainted(const Expr *E, bool GenericTaintChecker::checkUncontrolledFormatString( const CallExpr *CE, CheckerContext &C) const { // Check if the function contains a format string argument. - unsigned int ArgNum = 0; + unsigned ArgNum = 0; if (!getPrintfFormatArgumentNum(CE, C, ArgNum)) return false; @@ -629,9 +774,9 @@ bool GenericTaintChecker::checkSystemCall(const CallExpr *CE, StringRef Name, .Case("execvP", 0) .Case("execve", 0) .Case("dlopen", 0) - .Default(UINT_MAX); + .Default(InvalidArgIndex); - if (ArgNum == UINT_MAX || CE->getNumArgs() < (ArgNum + 1)) + if (ArgNum == InvalidArgIndex || CE->getNumArgs() < (ArgNum + 1)) return false; return generateReportIfTainted(CE->getArg(ArgNum), MsgSanitizeSystemArgs, C); @@ -676,8 +821,33 @@ bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE, generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C); } -void ento::registerGenericTaintChecker(CheckerManager &mgr) { - mgr.registerChecker<GenericTaintChecker>(); +bool GenericTaintChecker::checkCustomSinks(const CallExpr *CE, StringRef Name, + CheckerContext &C) const { + auto It = CustomSinks.find(Name); + if (It == CustomSinks.end()) + return false; + + const GenericTaintChecker::ArgVector &Args = It->getValue(); + for (unsigned ArgNum : Args) { + if (ArgNum >= CE->getNumArgs()) + continue; + + if (generateReportIfTainted(CE->getArg(ArgNum), MsgCustomSink, C)) + return true; + } + + return false; +} + +void ento::registerGenericTaintChecker(CheckerManager &Mgr) { + auto *Checker = Mgr.registerChecker<GenericTaintChecker>(); + std::string Option{"Config"}; + StringRef ConfigFile = + Mgr.getAnalyzerOptions().getCheckerStringOption(Checker, Option); + llvm::Optional<TaintConfig> Config = + getConfiguration<TaintConfig>(Mgr, Checker, Option, ConfigFile); + if (Config) + Checker->parseConfiguration(Mgr, Option, std::move(Config.getValue())); } bool ento::shouldRegisterGenericTaintChecker(const LangOptions &LO) { |