aboutsummaryrefslogtreecommitdiff
path: root/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp')
-rw-r--r--lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp264
1 files changed, 217 insertions, 47 deletions
diff --git a/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp b/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
index d3ab98033236..d442b26b3959 100644
--- a/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
@@ -15,16 +15,18 @@
//===----------------------------------------------------------------------===//
#include "Taint.h"
-#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
+#include "Yaml.h"
#include "clang/AST/Attr.h"
#include "clang/Basic/Builtins.h"
+#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
#include "clang/StaticAnalyzer/Core/Checker.h"
#include "clang/StaticAnalyzer/Core/CheckerManager.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
-#include <climits>
-#include <initializer_list>
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Support/YAMLTraits.h"
+#include <limits>
#include <utility>
using namespace clang;
@@ -44,14 +46,51 @@ public:
void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
- void printState(raw_ostream &Out, ProgramStateRef State,
- const char *NL, const char *Sep) const override;
+ void printState(raw_ostream &Out, ProgramStateRef State, const char *NL,
+ const char *Sep) const override;
-private:
- static const unsigned InvalidArgIndex = UINT_MAX;
+ using ArgVector = SmallVector<unsigned, 2>;
+ using SignedArgVector = SmallVector<int, 2>;
+
+ enum class VariadicType { None, Src, Dst };
+
+ /// Used to parse the configuration file.
+ struct TaintConfiguration {
+ using NameArgsPair = std::pair<std::string, ArgVector>;
+
+ struct Propagation {
+ std::string Name;
+ ArgVector SrcArgs;
+ SignedArgVector DstArgs;
+ VariadicType VarType;
+ unsigned VarIndex;
+ };
+
+ std::vector<Propagation> Propagations;
+ std::vector<NameArgsPair> Filters;
+ std::vector<NameArgsPair> Sinks;
+
+ TaintConfiguration() = default;
+ TaintConfiguration(const TaintConfiguration &) = default;
+ TaintConfiguration(TaintConfiguration &&) = default;
+ TaintConfiguration &operator=(const TaintConfiguration &) = default;
+ TaintConfiguration &operator=(TaintConfiguration &&) = default;
+ };
+
+ /// Convert SignedArgVector to ArgVector.
+ ArgVector convertToArgVector(CheckerManager &Mgr, const std::string &Option,
+ SignedArgVector Args);
+
+ /// Parse the config.
+ void parseConfiguration(CheckerManager &Mgr, const std::string &Option,
+ TaintConfiguration &&Config);
+
+ static const unsigned InvalidArgIndex{std::numeric_limits<unsigned>::max()};
/// Denotes the return vale.
- static const unsigned ReturnValueIndex = UINT_MAX - 1;
+ static const unsigned ReturnValueIndex{std::numeric_limits<unsigned>::max() -
+ 1};
+private:
mutable std::unique_ptr<BugType> BT;
void initBugType() const {
if (!BT)
@@ -76,28 +115,43 @@ private:
static Optional<SVal> getPointedToSVal(CheckerContext &C, const Expr *Arg);
/// Check for CWE-134: Uncontrolled Format String.
- static const char MsgUncontrolledFormatString[];
+ static constexpr llvm::StringLiteral MsgUncontrolledFormatString =
+ "Untrusted data is used as a format string "
+ "(CWE-134: Uncontrolled Format String)";
bool checkUncontrolledFormatString(const CallExpr *CE,
CheckerContext &C) const;
/// Check for:
/// CERT/STR02-C. "Sanitize data passed to complex subsystems"
/// CWE-78, "Failure to Sanitize Data into an OS Command"
- static const char MsgSanitizeSystemArgs[];
+ static constexpr llvm::StringLiteral MsgSanitizeSystemArgs =
+ "Untrusted data is passed to a system call "
+ "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
bool checkSystemCall(const CallExpr *CE, StringRef Name,
CheckerContext &C) const;
/// Check if tainted data is used as a buffer size ins strn.. functions,
/// and allocators.
- static const char MsgTaintedBufferSize[];
+ static constexpr llvm::StringLiteral MsgTaintedBufferSize =
+ "Untrusted data is used to specify the buffer size "
+ "(CERT/STR31-C. Guarantee that storage for strings has sufficient space "
+ "for character data and the null terminator)";
bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl,
CheckerContext &C) const;
+ /// Check if tainted data is used as a custom sink's parameter.
+ static constexpr llvm::StringLiteral MsgCustomSink =
+ "Untrusted data is passed to a user-defined sink";
+ bool checkCustomSinks(const CallExpr *CE, StringRef Name,
+ CheckerContext &C) const;
+
/// Generate a report if the expression is tainted or points to tainted data.
- bool generateReportIfTainted(const Expr *E, const char Msg[],
+ bool generateReportIfTainted(const Expr *E, StringRef Msg,
CheckerContext &C) const;
- using ArgVector = SmallVector<unsigned, 2>;
+ struct TaintPropagationRule;
+ using NameRuleMap = llvm::StringMap<TaintPropagationRule>;
+ using NameArgMap = llvm::StringMap<ArgVector>;
/// A struct used to specify taint propagation rules for a function.
///
@@ -109,8 +163,6 @@ private:
/// ReturnValueIndex is added to the dst list, the return value will be
/// tainted.
struct TaintPropagationRule {
- enum class VariadicType { None, Src, Dst };
-
using PropagationFuncType = bool (*)(bool IsTainted, const CallExpr *,
CheckerContext &C);
@@ -131,8 +183,7 @@ private:
: VariadicIndex(InvalidArgIndex), VarType(VariadicType::None),
PropagationFunc(nullptr) {}
- TaintPropagationRule(std::initializer_list<unsigned> &&Src,
- std::initializer_list<unsigned> &&Dst,
+ TaintPropagationRule(ArgVector &&Src, ArgVector &&Dst,
VariadicType Var = VariadicType::None,
unsigned VarIndex = InvalidArgIndex,
PropagationFuncType Func = nullptr)
@@ -141,7 +192,8 @@ private:
/// Get the propagation rule for a given function.
static TaintPropagationRule
- getTaintPropagationRule(const FunctionDecl *FDecl, StringRef Name,
+ getTaintPropagationRule(const NameRuleMap &CustomPropagations,
+ const FunctionDecl *FDecl, StringRef Name,
CheckerContext &C);
void addSrcArg(unsigned A) { SrcArgs.push_back(A); }
@@ -176,25 +228,71 @@ private:
static bool postSocket(bool IsTainted, const CallExpr *CE,
CheckerContext &C);
};
+
+ /// Defines a map between the propagation function's name and
+ /// TaintPropagationRule.
+ NameRuleMap CustomPropagations;
+
+ /// Defines a map between the filter function's name and filtering args.
+ NameArgMap CustomFilters;
+
+ /// Defines a map between the sink function's name and sinking args.
+ NameArgMap CustomSinks;
};
const unsigned GenericTaintChecker::ReturnValueIndex;
const unsigned GenericTaintChecker::InvalidArgIndex;
-const char GenericTaintChecker::MsgUncontrolledFormatString[] =
- "Untrusted data is used as a format string "
- "(CWE-134: Uncontrolled Format String)";
+// FIXME: these lines can be removed in C++17
+constexpr llvm::StringLiteral GenericTaintChecker::MsgUncontrolledFormatString;
+constexpr llvm::StringLiteral GenericTaintChecker::MsgSanitizeSystemArgs;
+constexpr llvm::StringLiteral GenericTaintChecker::MsgTaintedBufferSize;
+constexpr llvm::StringLiteral GenericTaintChecker::MsgCustomSink;
+} // end of anonymous namespace
-const char GenericTaintChecker::MsgSanitizeSystemArgs[] =
- "Untrusted data is passed to a system call "
- "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
+using TaintConfig = GenericTaintChecker::TaintConfiguration;
-const char GenericTaintChecker::MsgTaintedBufferSize[] =
- "Untrusted data is used to specify the buffer size "
- "(CERT/STR31-C. Guarantee that storage for strings has sufficient space "
- "for character data and the null terminator)";
+LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::Propagation)
+LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::NameArgsPair)
-} // end of anonymous namespace
+namespace llvm {
+namespace yaml {
+template <> struct MappingTraits<TaintConfig> {
+ static void mapping(IO &IO, TaintConfig &Config) {
+ IO.mapOptional("Propagations", Config.Propagations);
+ IO.mapOptional("Filters", Config.Filters);
+ IO.mapOptional("Sinks", Config.Sinks);
+ }
+};
+
+template <> struct MappingTraits<TaintConfig::Propagation> {
+ static void mapping(IO &IO, TaintConfig::Propagation &Propagation) {
+ IO.mapRequired("Name", Propagation.Name);
+ IO.mapOptional("SrcArgs", Propagation.SrcArgs);
+ IO.mapOptional("DstArgs", Propagation.DstArgs);
+ IO.mapOptional("VariadicType", Propagation.VarType,
+ GenericTaintChecker::VariadicType::None);
+ IO.mapOptional("VariadicIndex", Propagation.VarIndex,
+ GenericTaintChecker::InvalidArgIndex);
+ }
+};
+
+template <> struct ScalarEnumerationTraits<GenericTaintChecker::VariadicType> {
+ static void enumeration(IO &IO, GenericTaintChecker::VariadicType &Value) {
+ IO.enumCase(Value, "None", GenericTaintChecker::VariadicType::None);
+ IO.enumCase(Value, "Src", GenericTaintChecker::VariadicType::Src);
+ IO.enumCase(Value, "Dst", GenericTaintChecker::VariadicType::Dst);
+ }
+};
+
+template <> struct MappingTraits<TaintConfig::NameArgsPair> {
+ static void mapping(IO &IO, TaintConfig::NameArgsPair &NameArg) {
+ IO.mapRequired("Name", NameArg.first);
+ IO.mapRequired("Args", NameArg.second);
+ }
+};
+} // namespace yaml
+} // namespace llvm
/// A set which is used to pass information from call pre-visit instruction
/// to the call post-visit. The values are unsigned integers, which are either
@@ -202,9 +300,46 @@ const char GenericTaintChecker::MsgTaintedBufferSize[] =
/// points to data, which should be tainted on return.
REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned)
+GenericTaintChecker::ArgVector GenericTaintChecker::convertToArgVector(
+ CheckerManager &Mgr, const std::string &Option, SignedArgVector Args) {
+ ArgVector Result;
+ for (int Arg : Args) {
+ if (Arg == -1)
+ Result.push_back(ReturnValueIndex);
+ else if (Arg < -1) {
+ Result.push_back(InvalidArgIndex);
+ Mgr.reportInvalidCheckerOptionValue(
+ this, Option,
+ "an argument number for propagation rules greater or equal to -1");
+ } else
+ Result.push_back(static_cast<unsigned>(Arg));
+ }
+ return Result;
+}
+
+void GenericTaintChecker::parseConfiguration(CheckerManager &Mgr,
+ const std::string &Option,
+ TaintConfiguration &&Config) {
+ for (auto &P : Config.Propagations) {
+ GenericTaintChecker::CustomPropagations.try_emplace(
+ P.Name, std::move(P.SrcArgs),
+ convertToArgVector(Mgr, Option, P.DstArgs), P.VarType, P.VarIndex);
+ }
+
+ for (auto &F : Config.Filters) {
+ GenericTaintChecker::CustomFilters.try_emplace(F.first,
+ std::move(F.second));
+ }
+
+ for (auto &S : Config.Sinks) {
+ GenericTaintChecker::CustomSinks.try_emplace(S.first, std::move(S.second));
+ }
+}
+
GenericTaintChecker::TaintPropagationRule
GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
- const FunctionDecl *FDecl, StringRef Name, CheckerContext &C) {
+ const NameRuleMap &CustomPropagations, const FunctionDecl *FDecl,
+ StringRef Name, CheckerContext &C) {
// TODO: Currently, we might lose precision here: we always mark a return
// value as tainted even if it's just a pointer, pointing to tainted data.
@@ -218,7 +353,8 @@ GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
.Case("freopen", TaintPropagationRule({}, {ReturnValueIndex}))
.Case("getch", TaintPropagationRule({}, {ReturnValueIndex}))
.Case("getchar", TaintPropagationRule({}, {ReturnValueIndex}))
- .Case("getchar_unlocked", TaintPropagationRule({}, {ReturnValueIndex}))
+ .Case("getchar_unlocked",
+ TaintPropagationRule({}, {ReturnValueIndex}))
.Case("getenv", TaintPropagationRule({}, {ReturnValueIndex}))
.Case("gets", TaintPropagationRule({}, {0, ReturnValueIndex}))
.Case("scanf", TaintPropagationRule({}, {}, VariadicType::Dst, 1))
@@ -297,6 +433,10 @@ GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
// or smart memory copy:
// - memccpy - copying until hitting a special character.
+ auto It = CustomPropagations.find(Name);
+ if (It != CustomPropagations.end())
+ return It->getValue();
+
return TaintPropagationRule();
}
@@ -336,8 +476,8 @@ void GenericTaintChecker::addSourcesPre(const CallExpr *CE,
return;
// First, try generating a propagation rule for this function.
- TaintPropagationRule Rule =
- TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C);
+ TaintPropagationRule Rule = TaintPropagationRule::getTaintPropagationRule(
+ this->CustomPropagations, FDecl, Name, C);
if (!Rule.isNull()) {
State = Rule.process(CE, C);
if (!State)
@@ -409,6 +549,9 @@ bool GenericTaintChecker::checkPre(const CallExpr *CE,
if (checkTaintedBufferSize(CE, FDecl, C))
return true;
+ if (checkCustomSinks(CE, Name, C))
+ return true;
+
return false;
}
@@ -446,7 +589,8 @@ GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE,
bool IsTainted = true;
for (unsigned ArgNum : SrcArgs) {
if (ArgNum >= CE->getNumArgs())
- return State;
+ continue;
+
if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C)))
break;
}
@@ -454,7 +598,7 @@ GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE,
// Check for taint in variadic arguments.
if (!IsTainted && VariadicType::Src == VarType) {
// Check if any of the arguments is tainted
- for (unsigned int i = VariadicIndex; i < CE->getNumArgs(); ++i) {
+ for (unsigned i = VariadicIndex; i < CE->getNumArgs(); ++i) {
if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C)))
break;
}
@@ -474,8 +618,10 @@ GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE,
continue;
}
+ if (ArgNum >= CE->getNumArgs())
+ continue;
+
// Mark the given argument.
- assert(ArgNum < CE->getNumArgs());
State = State->add<TaintArgsOnPostVisit>(ArgNum);
}
@@ -485,7 +631,7 @@ GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE,
// If they are not pointing to const data, mark data as tainted.
// TODO: So far we are just going one level down; ideally we'd need to
// recurse here.
- for (unsigned int i = VariadicIndex; i < CE->getNumArgs(); ++i) {
+ for (unsigned i = VariadicIndex; i < CE->getNumArgs(); ++i) {
const Expr *Arg = CE->getArg(i);
// Process pointer argument.
const Type *ArgTy = Arg->getType().getTypePtr();
@@ -550,7 +696,7 @@ bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) {
static bool getPrintfFormatArgumentNum(const CallExpr *CE,
const CheckerContext &C,
- unsigned int &ArgNum) {
+ unsigned &ArgNum) {
// Find if the function contains a format string argument.
// Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
// vsnprintf, syslog, custom annotated functions.
@@ -572,8 +718,7 @@ static bool getPrintfFormatArgumentNum(const CallExpr *CE,
return false;
}
-bool GenericTaintChecker::generateReportIfTainted(const Expr *E,
- const char Msg[],
+bool GenericTaintChecker::generateReportIfTainted(const Expr *E, StringRef Msg,
CheckerContext &C) const {
assert(E);
@@ -591,9 +736,9 @@ bool GenericTaintChecker::generateReportIfTainted(const Expr *E,
// Generate diagnostic.
if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
initBugType();
- auto report = llvm::make_unique<BugReport>(*BT, Msg, N);
+ auto report = std::make_unique<PathSensitiveBugReport>(*BT, Msg, N);
report->addRange(E->getSourceRange());
- report->addVisitor(llvm::make_unique<TaintBugVisitor>(TaintedSVal));
+ report->addVisitor(std::make_unique<TaintBugVisitor>(TaintedSVal));
C.emitReport(std::move(report));
return true;
}
@@ -603,7 +748,7 @@ bool GenericTaintChecker::generateReportIfTainted(const Expr *E,
bool GenericTaintChecker::checkUncontrolledFormatString(
const CallExpr *CE, CheckerContext &C) const {
// Check if the function contains a format string argument.
- unsigned int ArgNum = 0;
+ unsigned ArgNum = 0;
if (!getPrintfFormatArgumentNum(CE, C, ArgNum))
return false;
@@ -629,9 +774,9 @@ bool GenericTaintChecker::checkSystemCall(const CallExpr *CE, StringRef Name,
.Case("execvP", 0)
.Case("execve", 0)
.Case("dlopen", 0)
- .Default(UINT_MAX);
+ .Default(InvalidArgIndex);
- if (ArgNum == UINT_MAX || CE->getNumArgs() < (ArgNum + 1))
+ if (ArgNum == InvalidArgIndex || CE->getNumArgs() < (ArgNum + 1))
return false;
return generateReportIfTainted(CE->getArg(ArgNum), MsgSanitizeSystemArgs, C);
@@ -676,8 +821,33 @@ bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE,
generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C);
}
-void ento::registerGenericTaintChecker(CheckerManager &mgr) {
- mgr.registerChecker<GenericTaintChecker>();
+bool GenericTaintChecker::checkCustomSinks(const CallExpr *CE, StringRef Name,
+ CheckerContext &C) const {
+ auto It = CustomSinks.find(Name);
+ if (It == CustomSinks.end())
+ return false;
+
+ const GenericTaintChecker::ArgVector &Args = It->getValue();
+ for (unsigned ArgNum : Args) {
+ if (ArgNum >= CE->getNumArgs())
+ continue;
+
+ if (generateReportIfTainted(CE->getArg(ArgNum), MsgCustomSink, C))
+ return true;
+ }
+
+ return false;
+}
+
+void ento::registerGenericTaintChecker(CheckerManager &Mgr) {
+ auto *Checker = Mgr.registerChecker<GenericTaintChecker>();
+ std::string Option{"Config"};
+ StringRef ConfigFile =
+ Mgr.getAnalyzerOptions().getCheckerStringOption(Checker, Option);
+ llvm::Optional<TaintConfig> Config =
+ getConfiguration<TaintConfig>(Mgr, Checker, Option, ConfigFile);
+ if (Config)
+ Checker->parseConfiguration(Mgr, Option, std::move(Config.getValue()));
}
bool ento::shouldRegisterGenericTaintChecker(const LangOptions &LO) {