diff options
Diffstat (limited to 'lib/Analysis/CloneDetection.cpp')
-rw-r--r-- | lib/Analysis/CloneDetection.cpp | 155 |
1 files changed, 99 insertions, 56 deletions
diff --git a/lib/Analysis/CloneDetection.cpp b/lib/Analysis/CloneDetection.cpp index 5ea74989a7ec..098803f9a417 100644 --- a/lib/Analysis/CloneDetection.cpp +++ b/lib/Analysis/CloneDetection.cpp @@ -13,16 +13,12 @@ #include "clang/Analysis/CloneDetection.h" -#include "clang/AST/ASTContext.h" -#include "clang/AST/RecursiveASTVisitor.h" -#include "clang/AST/Stmt.h" -#include "clang/Lex/Lexer.h" +#include "clang/AST/DataCollection.h" +#include "clang/AST/DeclTemplate.h" #include "llvm/Support/MD5.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/Path.h" using namespace clang; -using namespace clang::clone_detection; StmtSequence::StmtSequence(const CompoundStmt *Stmt, const Decl *D, unsigned StartIndex, unsigned EndIndex) @@ -91,34 +87,6 @@ SourceRange StmtSequence::getSourceRange() const { return SourceRange(getStartLoc(), getEndLoc()); } -/// Prints the macro name that contains the given SourceLocation into the given -/// raw_string_ostream. -static void printMacroName(llvm::raw_string_ostream &MacroStack, - ASTContext &Context, SourceLocation Loc) { - MacroStack << Lexer::getImmediateMacroName(Loc, Context.getSourceManager(), - Context.getLangOpts()); - - // Add an empty space at the end as a padding to prevent - // that macro names concatenate to the names of other macros. - MacroStack << " "; -} - -std::string clone_detection::getMacroStack(SourceLocation Loc, - ASTContext &Context) { - std::string MacroStack; - llvm::raw_string_ostream MacroStackStream(MacroStack); - SourceManager &SM = Context.getSourceManager(); - - // Iterate over all macros that expanded into the given SourceLocation. - while (Loc.isMacroID()) { - // Add the macro name to the stream. - printMacroName(MacroStackStream, Context, Loc); - Loc = SM.getImmediateMacroCallerLoc(Loc); - } - MacroStackStream.flush(); - return MacroStack; -} - void CloneDetector::analyzeCodeBody(const Decl *D) { assert(D); assert(D->hasBody()); @@ -184,16 +152,17 @@ void OnlyLargestCloneConstraint::constrain( } } -bool FilenamePatternConstraint::isAutoGenerated(const CloneDetector::CloneGroup &Group) { +bool FilenamePatternConstraint::isAutoGenerated( + const CloneDetector::CloneGroup &Group) { std::string Error; - if (IgnoredFilesPattern.empty() || Group.empty() || + if (IgnoredFilesPattern.empty() || Group.empty() || !IgnoredFilesRegex->isValid(Error)) return false; for (const StmtSequence &S : Group) { const SourceManager &SM = S.getASTContext().getSourceManager(); - StringRef Filename = llvm::sys::path::filename(SM.getFilename( - S.getContainingDecl()->getLocation())); + StringRef Filename = llvm::sys::path::filename( + SM.getFilename(S.getContainingDecl()->getLocation())); if (IgnoredFilesRegex->match(Filename)) return true; } @@ -201,6 +170,59 @@ bool FilenamePatternConstraint::isAutoGenerated(const CloneDetector::CloneGroup return false; } +/// This class defines what a type II code clone is: If it collects for two +/// statements the same data, then those two statements are considered to be +/// clones of each other. +/// +/// All collected data is forwarded to the given data consumer of the type T. +/// The data consumer class needs to provide a member method with the signature: +/// update(StringRef Str) +namespace { +template <class T> +class CloneTypeIIStmtDataCollector + : public ConstStmtVisitor<CloneTypeIIStmtDataCollector<T>> { + ASTContext &Context; + /// The data sink to which all data is forwarded. + T &DataConsumer; + + template <class Ty> void addData(const Ty &Data) { + data_collection::addDataToConsumer(DataConsumer, Data); + } + +public: + CloneTypeIIStmtDataCollector(const Stmt *S, ASTContext &Context, + T &DataConsumer) + : Context(Context), DataConsumer(DataConsumer) { + this->Visit(S); + } + +// Define a visit method for each class to collect data and subsequently visit +// all parent classes. This uses a template so that custom visit methods by us +// take precedence. +#define DEF_ADD_DATA(CLASS, CODE) \ + template <class = void> void Visit##CLASS(const CLASS *S) { \ + CODE; \ + ConstStmtVisitor<CloneTypeIIStmtDataCollector<T>>::Visit##CLASS(S); \ + } + +#include "clang/AST/StmtDataCollectors.inc" + +// Type II clones ignore variable names and literals, so let's skip them. +#define SKIP(CLASS) \ + void Visit##CLASS(const CLASS *S) { \ + ConstStmtVisitor<CloneTypeIIStmtDataCollector<T>>::Visit##CLASS(S); \ + } + SKIP(DeclRefExpr) + SKIP(MemberExpr) + SKIP(IntegerLiteral) + SKIP(FloatingLiteral) + SKIP(StringLiteral) + SKIP(CXXBoolLiteralExpr) + SKIP(CharacterLiteral) +#undef SKIP +}; +} // end anonymous namespace + static size_t createHash(llvm::MD5 &Hash) { size_t HashCode; @@ -216,13 +238,22 @@ static size_t createHash(llvm::MD5 &Hash) { return HashCode; } -size_t RecursiveCloneTypeIIConstraint::saveHash( - const Stmt *S, const Decl *D, - std::vector<std::pair<size_t, StmtSequence>> &StmtsByHash) { +/// Generates and saves a hash code for the given Stmt. +/// \param S The given Stmt. +/// \param D The Decl containing S. +/// \param StmtsByHash Output parameter that will contain the hash codes for +/// each StmtSequence in the given Stmt. +/// \return The hash code of the given Stmt. +/// +/// If the given Stmt is a CompoundStmt, this method will also generate +/// hashes for all possible StmtSequences in the children of this Stmt. +static size_t +saveHash(const Stmt *S, const Decl *D, + std::vector<std::pair<size_t, StmtSequence>> &StmtsByHash) { llvm::MD5 Hash; ASTContext &Context = D->getASTContext(); - StmtDataCollector<llvm::MD5>(S, Context, Hash); + CloneTypeIIStmtDataCollector<llvm::MD5>(S, Context, Hash); auto CS = dyn_cast<CompoundStmt>(S); SmallVector<size_t, 8> ChildHashes; @@ -288,8 +319,8 @@ public: static void CollectStmtSequenceData(const StmtSequence &Sequence, FoldingSetNodeIDWrapper &OutputData) { for (const Stmt *S : Sequence) { - StmtDataCollector<FoldingSetNodeIDWrapper>(S, Sequence.getASTContext(), - OutputData); + CloneTypeIIStmtDataCollector<FoldingSetNodeIDWrapper>( + S, Sequence.getASTContext(), OutputData); for (const Stmt *Child : S->children()) { if (!Child) @@ -318,7 +349,7 @@ static bool areSequencesClones(const StmtSequence &LHS, return DataLHS == DataRHS; } -void RecursiveCloneTypeIIConstraint::constrain( +void RecursiveCloneTypeIIHashConstraint::constrain( std::vector<CloneDetector::CloneGroup> &Sequences) { // FIXME: Maybe we can do this in-place and don't need this additional vector. std::vector<CloneDetector::CloneGroup> Result; @@ -339,7 +370,7 @@ void RecursiveCloneTypeIIConstraint::constrain( // Sort hash_codes in StmtsByHash. std::stable_sort(StmtsByHash.begin(), StmtsByHash.end(), [](std::pair<size_t, StmtSequence> LHS, - std::pair<size_t, StmtSequence> RHS) { + std::pair<size_t, StmtSequence> RHS) { return LHS.first < RHS.first; }); @@ -359,8 +390,7 @@ void RecursiveCloneTypeIIConstraint::constrain( for (; i < StmtsByHash.size(); ++i) { // A different hash value means we have reached the end of the sequence. - if (PrototypeHash != StmtsByHash[i].first || - !areSequencesClones(StmtsByHash[i].second, Current.second)) { + if (PrototypeHash != StmtsByHash[i].first) { // The current sequence could be the start of a new CloneGroup. So we // decrement i so that we visit it again in the outer loop. // Note: i can never be 0 at this point because we are just comparing @@ -383,8 +413,17 @@ void RecursiveCloneTypeIIConstraint::constrain( Sequences = Result; } +void RecursiveCloneTypeIIVerifyConstraint::constrain( + std::vector<CloneDetector::CloneGroup> &Sequences) { + CloneConstraint::splitCloneGroups( + Sequences, [](const StmtSequence &A, const StmtSequence &B) { + return areSequencesClones(A, B); + }); +} + size_t MinComplexityConstraint::calculateStmtComplexity( - const StmtSequence &Seq, const std::string &ParentMacroStack) { + const StmtSequence &Seq, std::size_t Limit, + const std::string &ParentMacroStack) { if (Seq.empty()) return 0; @@ -393,8 +432,8 @@ size_t MinComplexityConstraint::calculateStmtComplexity( ASTContext &Context = Seq.getASTContext(); // Look up what macros expanded into the current statement. - std::string StartMacroStack = getMacroStack(Seq.getStartLoc(), Context); - std::string EndMacroStack = getMacroStack(Seq.getEndLoc(), Context); + std::string MacroStack = + data_collection::getMacroStack(Seq.getStartLoc(), Context); // First, check if ParentMacroStack is not empty which means we are currently // dealing with a parent statement which was expanded from a macro. @@ -404,8 +443,7 @@ size_t MinComplexityConstraint::calculateStmtComplexity( // macro expansion will only increase the total complexity by one. // Note: This is not the final complexity of this statement as we still // add the complexity of the child statements to the complexity value. - if (!ParentMacroStack.empty() && (StartMacroStack == ParentMacroStack && - EndMacroStack == ParentMacroStack)) { + if (!ParentMacroStack.empty() && MacroStack == ParentMacroStack) { Complexity = 0; } @@ -414,12 +452,16 @@ size_t MinComplexityConstraint::calculateStmtComplexity( if (Seq.holdsSequence()) { for (const Stmt *S : Seq) { Complexity += calculateStmtComplexity( - StmtSequence(S, Seq.getContainingDecl()), StartMacroStack); + StmtSequence(S, Seq.getContainingDecl()), Limit, MacroStack); + if (Complexity >= Limit) + return Limit; } } else { for (const Stmt *S : Seq.front()->children()) { Complexity += calculateStmtComplexity( - StmtSequence(S, Seq.getContainingDecl()), StartMacroStack); + StmtSequence(S, Seq.getContainingDecl()), Limit, MacroStack); + if (Complexity >= Limit) + return Limit; } } return Complexity; @@ -437,7 +479,8 @@ void MatchingVariablePatternConstraint::constrain( void CloneConstraint::splitCloneGroups( std::vector<CloneDetector::CloneGroup> &CloneGroups, - std::function<bool(const StmtSequence &, const StmtSequence &)> Compare) { + llvm::function_ref<bool(const StmtSequence &, const StmtSequence &)> + Compare) { std::vector<CloneDetector::CloneGroup> Result; for (auto &HashGroup : CloneGroups) { // Contains all indexes in HashGroup that were already added to a |