aboutsummaryrefslogtreecommitdiff
path: root/lib/Analysis/CloneDetection.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Analysis/CloneDetection.cpp')
-rw-r--r--lib/Analysis/CloneDetection.cpp155
1 files changed, 99 insertions, 56 deletions
diff --git a/lib/Analysis/CloneDetection.cpp b/lib/Analysis/CloneDetection.cpp
index 5ea74989a7ec..098803f9a417 100644
--- a/lib/Analysis/CloneDetection.cpp
+++ b/lib/Analysis/CloneDetection.cpp
@@ -13,16 +13,12 @@
#include "clang/Analysis/CloneDetection.h"
-#include "clang/AST/ASTContext.h"
-#include "clang/AST/RecursiveASTVisitor.h"
-#include "clang/AST/Stmt.h"
-#include "clang/Lex/Lexer.h"
+#include "clang/AST/DataCollection.h"
+#include "clang/AST/DeclTemplate.h"
#include "llvm/Support/MD5.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/Path.h"
using namespace clang;
-using namespace clang::clone_detection;
StmtSequence::StmtSequence(const CompoundStmt *Stmt, const Decl *D,
unsigned StartIndex, unsigned EndIndex)
@@ -91,34 +87,6 @@ SourceRange StmtSequence::getSourceRange() const {
return SourceRange(getStartLoc(), getEndLoc());
}
-/// Prints the macro name that contains the given SourceLocation into the given
-/// raw_string_ostream.
-static void printMacroName(llvm::raw_string_ostream &MacroStack,
- ASTContext &Context, SourceLocation Loc) {
- MacroStack << Lexer::getImmediateMacroName(Loc, Context.getSourceManager(),
- Context.getLangOpts());
-
- // Add an empty space at the end as a padding to prevent
- // that macro names concatenate to the names of other macros.
- MacroStack << " ";
-}
-
-std::string clone_detection::getMacroStack(SourceLocation Loc,
- ASTContext &Context) {
- std::string MacroStack;
- llvm::raw_string_ostream MacroStackStream(MacroStack);
- SourceManager &SM = Context.getSourceManager();
-
- // Iterate over all macros that expanded into the given SourceLocation.
- while (Loc.isMacroID()) {
- // Add the macro name to the stream.
- printMacroName(MacroStackStream, Context, Loc);
- Loc = SM.getImmediateMacroCallerLoc(Loc);
- }
- MacroStackStream.flush();
- return MacroStack;
-}
-
void CloneDetector::analyzeCodeBody(const Decl *D) {
assert(D);
assert(D->hasBody());
@@ -184,16 +152,17 @@ void OnlyLargestCloneConstraint::constrain(
}
}
-bool FilenamePatternConstraint::isAutoGenerated(const CloneDetector::CloneGroup &Group) {
+bool FilenamePatternConstraint::isAutoGenerated(
+ const CloneDetector::CloneGroup &Group) {
std::string Error;
- if (IgnoredFilesPattern.empty() || Group.empty() ||
+ if (IgnoredFilesPattern.empty() || Group.empty() ||
!IgnoredFilesRegex->isValid(Error))
return false;
for (const StmtSequence &S : Group) {
const SourceManager &SM = S.getASTContext().getSourceManager();
- StringRef Filename = llvm::sys::path::filename(SM.getFilename(
- S.getContainingDecl()->getLocation()));
+ StringRef Filename = llvm::sys::path::filename(
+ SM.getFilename(S.getContainingDecl()->getLocation()));
if (IgnoredFilesRegex->match(Filename))
return true;
}
@@ -201,6 +170,59 @@ bool FilenamePatternConstraint::isAutoGenerated(const CloneDetector::CloneGroup
return false;
}
+/// This class defines what a type II code clone is: If it collects for two
+/// statements the same data, then those two statements are considered to be
+/// clones of each other.
+///
+/// All collected data is forwarded to the given data consumer of the type T.
+/// The data consumer class needs to provide a member method with the signature:
+/// update(StringRef Str)
+namespace {
+template <class T>
+class CloneTypeIIStmtDataCollector
+ : public ConstStmtVisitor<CloneTypeIIStmtDataCollector<T>> {
+ ASTContext &Context;
+ /// The data sink to which all data is forwarded.
+ T &DataConsumer;
+
+ template <class Ty> void addData(const Ty &Data) {
+ data_collection::addDataToConsumer(DataConsumer, Data);
+ }
+
+public:
+ CloneTypeIIStmtDataCollector(const Stmt *S, ASTContext &Context,
+ T &DataConsumer)
+ : Context(Context), DataConsumer(DataConsumer) {
+ this->Visit(S);
+ }
+
+// Define a visit method for each class to collect data and subsequently visit
+// all parent classes. This uses a template so that custom visit methods by us
+// take precedence.
+#define DEF_ADD_DATA(CLASS, CODE) \
+ template <class = void> void Visit##CLASS(const CLASS *S) { \
+ CODE; \
+ ConstStmtVisitor<CloneTypeIIStmtDataCollector<T>>::Visit##CLASS(S); \
+ }
+
+#include "clang/AST/StmtDataCollectors.inc"
+
+// Type II clones ignore variable names and literals, so let's skip them.
+#define SKIP(CLASS) \
+ void Visit##CLASS(const CLASS *S) { \
+ ConstStmtVisitor<CloneTypeIIStmtDataCollector<T>>::Visit##CLASS(S); \
+ }
+ SKIP(DeclRefExpr)
+ SKIP(MemberExpr)
+ SKIP(IntegerLiteral)
+ SKIP(FloatingLiteral)
+ SKIP(StringLiteral)
+ SKIP(CXXBoolLiteralExpr)
+ SKIP(CharacterLiteral)
+#undef SKIP
+};
+} // end anonymous namespace
+
static size_t createHash(llvm::MD5 &Hash) {
size_t HashCode;
@@ -216,13 +238,22 @@ static size_t createHash(llvm::MD5 &Hash) {
return HashCode;
}
-size_t RecursiveCloneTypeIIConstraint::saveHash(
- const Stmt *S, const Decl *D,
- std::vector<std::pair<size_t, StmtSequence>> &StmtsByHash) {
+/// Generates and saves a hash code for the given Stmt.
+/// \param S The given Stmt.
+/// \param D The Decl containing S.
+/// \param StmtsByHash Output parameter that will contain the hash codes for
+/// each StmtSequence in the given Stmt.
+/// \return The hash code of the given Stmt.
+///
+/// If the given Stmt is a CompoundStmt, this method will also generate
+/// hashes for all possible StmtSequences in the children of this Stmt.
+static size_t
+saveHash(const Stmt *S, const Decl *D,
+ std::vector<std::pair<size_t, StmtSequence>> &StmtsByHash) {
llvm::MD5 Hash;
ASTContext &Context = D->getASTContext();
- StmtDataCollector<llvm::MD5>(S, Context, Hash);
+ CloneTypeIIStmtDataCollector<llvm::MD5>(S, Context, Hash);
auto CS = dyn_cast<CompoundStmt>(S);
SmallVector<size_t, 8> ChildHashes;
@@ -288,8 +319,8 @@ public:
static void CollectStmtSequenceData(const StmtSequence &Sequence,
FoldingSetNodeIDWrapper &OutputData) {
for (const Stmt *S : Sequence) {
- StmtDataCollector<FoldingSetNodeIDWrapper>(S, Sequence.getASTContext(),
- OutputData);
+ CloneTypeIIStmtDataCollector<FoldingSetNodeIDWrapper>(
+ S, Sequence.getASTContext(), OutputData);
for (const Stmt *Child : S->children()) {
if (!Child)
@@ -318,7 +349,7 @@ static bool areSequencesClones(const StmtSequence &LHS,
return DataLHS == DataRHS;
}
-void RecursiveCloneTypeIIConstraint::constrain(
+void RecursiveCloneTypeIIHashConstraint::constrain(
std::vector<CloneDetector::CloneGroup> &Sequences) {
// FIXME: Maybe we can do this in-place and don't need this additional vector.
std::vector<CloneDetector::CloneGroup> Result;
@@ -339,7 +370,7 @@ void RecursiveCloneTypeIIConstraint::constrain(
// Sort hash_codes in StmtsByHash.
std::stable_sort(StmtsByHash.begin(), StmtsByHash.end(),
[](std::pair<size_t, StmtSequence> LHS,
- std::pair<size_t, StmtSequence> RHS) {
+ std::pair<size_t, StmtSequence> RHS) {
return LHS.first < RHS.first;
});
@@ -359,8 +390,7 @@ void RecursiveCloneTypeIIConstraint::constrain(
for (; i < StmtsByHash.size(); ++i) {
// A different hash value means we have reached the end of the sequence.
- if (PrototypeHash != StmtsByHash[i].first ||
- !areSequencesClones(StmtsByHash[i].second, Current.second)) {
+ if (PrototypeHash != StmtsByHash[i].first) {
// The current sequence could be the start of a new CloneGroup. So we
// decrement i so that we visit it again in the outer loop.
// Note: i can never be 0 at this point because we are just comparing
@@ -383,8 +413,17 @@ void RecursiveCloneTypeIIConstraint::constrain(
Sequences = Result;
}
+void RecursiveCloneTypeIIVerifyConstraint::constrain(
+ std::vector<CloneDetector::CloneGroup> &Sequences) {
+ CloneConstraint::splitCloneGroups(
+ Sequences, [](const StmtSequence &A, const StmtSequence &B) {
+ return areSequencesClones(A, B);
+ });
+}
+
size_t MinComplexityConstraint::calculateStmtComplexity(
- const StmtSequence &Seq, const std::string &ParentMacroStack) {
+ const StmtSequence &Seq, std::size_t Limit,
+ const std::string &ParentMacroStack) {
if (Seq.empty())
return 0;
@@ -393,8 +432,8 @@ size_t MinComplexityConstraint::calculateStmtComplexity(
ASTContext &Context = Seq.getASTContext();
// Look up what macros expanded into the current statement.
- std::string StartMacroStack = getMacroStack(Seq.getStartLoc(), Context);
- std::string EndMacroStack = getMacroStack(Seq.getEndLoc(), Context);
+ std::string MacroStack =
+ data_collection::getMacroStack(Seq.getStartLoc(), Context);
// First, check if ParentMacroStack is not empty which means we are currently
// dealing with a parent statement which was expanded from a macro.
@@ -404,8 +443,7 @@ size_t MinComplexityConstraint::calculateStmtComplexity(
// macro expansion will only increase the total complexity by one.
// Note: This is not the final complexity of this statement as we still
// add the complexity of the child statements to the complexity value.
- if (!ParentMacroStack.empty() && (StartMacroStack == ParentMacroStack &&
- EndMacroStack == ParentMacroStack)) {
+ if (!ParentMacroStack.empty() && MacroStack == ParentMacroStack) {
Complexity = 0;
}
@@ -414,12 +452,16 @@ size_t MinComplexityConstraint::calculateStmtComplexity(
if (Seq.holdsSequence()) {
for (const Stmt *S : Seq) {
Complexity += calculateStmtComplexity(
- StmtSequence(S, Seq.getContainingDecl()), StartMacroStack);
+ StmtSequence(S, Seq.getContainingDecl()), Limit, MacroStack);
+ if (Complexity >= Limit)
+ return Limit;
}
} else {
for (const Stmt *S : Seq.front()->children()) {
Complexity += calculateStmtComplexity(
- StmtSequence(S, Seq.getContainingDecl()), StartMacroStack);
+ StmtSequence(S, Seq.getContainingDecl()), Limit, MacroStack);
+ if (Complexity >= Limit)
+ return Limit;
}
}
return Complexity;
@@ -437,7 +479,8 @@ void MatchingVariablePatternConstraint::constrain(
void CloneConstraint::splitCloneGroups(
std::vector<CloneDetector::CloneGroup> &CloneGroups,
- std::function<bool(const StmtSequence &, const StmtSequence &)> Compare) {
+ llvm::function_ref<bool(const StmtSequence &, const StmtSequence &)>
+ Compare) {
std::vector<CloneDetector::CloneGroup> Result;
for (auto &HashGroup : CloneGroups) {
// Contains all indexes in HashGroup that were already added to a