diff options
Diffstat (limited to 'clang/lib/Tooling')
36 files changed, 2646 insertions, 685 deletions
diff --git a/clang/lib/Tooling/ASTDiff/ASTDiff.cpp b/clang/lib/Tooling/ASTDiff/ASTDiff.cpp index 4d495228cb51..0821863adcc6 100644 --- a/clang/lib/Tooling/ASTDiff/ASTDiff.cpp +++ b/clang/lib/Tooling/ASTDiff/ASTDiff.cpp @@ -11,8 +11,9 @@ //===----------------------------------------------------------------------===// #include "clang/Tooling/ASTDiff/ASTDiff.h" - +#include "clang/AST/ParentMapContext.h" #include "clang/AST/RecursiveASTVisitor.h" +#include "clang/Basic/SourceManager.h" #include "clang/Lex/Lexer.h" #include "llvm/ADT/PriorityQueue.h" @@ -116,12 +117,12 @@ public: Impl(SyntaxTree *Parent, Stmt *N, ASTContext &AST); template <class T> Impl(SyntaxTree *Parent, - typename std::enable_if<std::is_base_of<Stmt, T>::value, T>::type *Node, + std::enable_if_t<std::is_base_of<Stmt, T>::value, T> *Node, ASTContext &AST) : Impl(Parent, dyn_cast<Stmt>(Node), AST) {} template <class T> Impl(SyntaxTree *Parent, - typename std::enable_if<std::is_base_of<Decl, T>::value, T>::type *Node, + std::enable_if_t<std::is_base_of<Decl, T>::value, T> *Node, ASTContext &AST) : Impl(Parent, dyn_cast<Decl>(Node), AST) {} @@ -397,7 +398,7 @@ static const DeclContext *getEnclosingDeclContext(ASTContext &AST, static std::string getInitializerValue(const CXXCtorInitializer *Init, const PrintingPolicy &TypePP) { if (Init->isAnyMemberInitializer()) - return Init->getAnyMember()->getName(); + return std::string(Init->getAnyMember()->getName()); if (Init->isBaseInitializer()) return QualType(Init->getBaseClass(), 0).getAsString(TypePP); if (Init->isDelegatingInitializer()) @@ -434,36 +435,36 @@ std::string SyntaxTree::Impl::getDeclValue(const Decl *D) const { T->getTypeForDecl()->getCanonicalTypeInternal().getAsString(TypePP) + ";"; if (auto *U = dyn_cast<UsingDirectiveDecl>(D)) - return U->getNominatedNamespace()->getName(); + return std::string(U->getNominatedNamespace()->getName()); if (auto *A = dyn_cast<AccessSpecDecl>(D)) { CharSourceRange Range(A->getSourceRange(), false); - return Lexer::getSourceText(Range, AST.getSourceManager(), - AST.getLangOpts()); + return std::string( + Lexer::getSourceText(Range, AST.getSourceManager(), AST.getLangOpts())); } return Value; } std::string SyntaxTree::Impl::getStmtValue(const Stmt *S) const { if (auto *U = dyn_cast<UnaryOperator>(S)) - return UnaryOperator::getOpcodeStr(U->getOpcode()); + return std::string(UnaryOperator::getOpcodeStr(U->getOpcode())); if (auto *B = dyn_cast<BinaryOperator>(S)) - return B->getOpcodeStr(); + return std::string(B->getOpcodeStr()); if (auto *M = dyn_cast<MemberExpr>(S)) return getRelativeName(M->getMemberDecl()); if (auto *I = dyn_cast<IntegerLiteral>(S)) { SmallString<256> Str; I->getValue().toString(Str, /*Radix=*/10, /*Signed=*/false); - return Str.str(); + return std::string(Str.str()); } if (auto *F = dyn_cast<FloatingLiteral>(S)) { SmallString<256> Str; F->getValue().toString(Str); - return Str.str(); + return std::string(Str.str()); } if (auto *D = dyn_cast<DeclRefExpr>(S)) return getRelativeName(D->getDecl(), getEnclosingDeclContext(AST, S)); if (auto *String = dyn_cast<StringLiteral>(S)) - return String->getString(); + return std::string(String->getString()); if (auto *B = dyn_cast<CXXBoolLiteralExpr>(S)) return B->getValue() ? "true" : "false"; return ""; @@ -683,9 +684,7 @@ private: } }; -ast_type_traits::ASTNodeKind Node::getType() const { - return ASTNode.getNodeKind(); -} +ASTNodeKind Node::getType() const { return ASTNode.getNodeKind(); } StringRef Node::getTypeLabel() const { return getType().asStringRef(); } diff --git a/clang/lib/Tooling/AllTUsExecution.cpp b/clang/lib/Tooling/AllTUsExecution.cpp index d85075f59607..7707c99c21d0 100644 --- a/clang/lib/Tooling/AllTUsExecution.cpp +++ b/clang/lib/Tooling/AllTUsExecution.cpp @@ -8,8 +8,9 @@ #include "clang/Tooling/AllTUsExecution.h" #include "clang/Tooling/ToolExecutorPluginRegistry.h" -#include "llvm/Support/Threading.h" +#include "llvm/Support/Regex.h" #include "llvm/Support/ThreadPool.h" +#include "llvm/Support/Threading.h" #include "llvm/Support/VirtualFileSystem.h" namespace clang { @@ -114,8 +115,7 @@ llvm::Error AllTUsToolExecutor::execute( auto &Action = Actions.front(); { - llvm::ThreadPool Pool(ThreadCount == 0 ? llvm::hardware_concurrency() - : ThreadCount); + llvm::ThreadPool Pool(llvm::hardware_concurrency(ThreadCount)); for (std::string File : Files) { Pool.async( [&](std::string Path) { diff --git a/clang/lib/Tooling/ArgumentsAdjusters.cpp b/clang/lib/Tooling/ArgumentsAdjusters.cpp index a609e4ed2469..a857b57fbf7b 100644 --- a/clang/lib/Tooling/ArgumentsAdjusters.cpp +++ b/clang/lib/Tooling/ArgumentsAdjusters.cpp @@ -26,7 +26,7 @@ ArgumentsAdjuster getClangSyntaxOnlyAdjuster() { return [](const CommandLineArguments &Args, StringRef /*unused*/) { CommandLineArguments AdjustedArgs; bool HasSyntaxOnly = false; - const std::vector<llvm::StringRef> OutputCommands = { + constexpr llvm::StringRef OutputCommands[] = { // FIXME: Add other options that generate output. "-save-temps", "--save-temps", @@ -42,6 +42,12 @@ ArgumentsAdjuster getClangSyntaxOnlyAdjuster() { if (!Arg.startswith("-fcolor-diagnostics") && !Arg.startswith("-fdiagnostics-color")) AdjustedArgs.push_back(Args[i]); + // If we strip a color option, make sure we strip any preceeding `-Xclang` + // option as well. + // FIXME: This should be added to most argument adjusters! + else if (!AdjustedArgs.empty() && AdjustedArgs.back() == "-Xclang") + AdjustedArgs.pop_back(); + if (Arg == "-fsyntax-only") HasSyntaxOnly = true; } @@ -92,7 +98,8 @@ ArgumentsAdjuster getClangStripDependencyFileAdjuster() { StringRef Arg = Args[i]; // All dependency-file options begin with -M. These include -MM, // -MF, -MG, -MP, -MT, -MQ, -MD, and -MMD. - if (!Arg.startswith("-M")) { + if (!Arg.startswith("-M") && !Arg.startswith("/showIncludes") && + !Arg.startswith("-showIncludes")) { AdjustedArgs.push_back(Args[i]); continue; } diff --git a/clang/lib/Tooling/CompilationDatabase.cpp b/clang/lib/Tooling/CompilationDatabase.cpp index c453e8d7df19..2b4c26dab96f 100644 --- a/clang/lib/Tooling/CompilationDatabase.cpp +++ b/clang/lib/Tooling/CompilationDatabase.cpp @@ -64,16 +64,14 @@ std::unique_ptr<CompilationDatabase> CompilationDatabase::loadFromDirectory(StringRef BuildDirectory, std::string &ErrorMessage) { llvm::raw_string_ostream ErrorStream(ErrorMessage); - for (CompilationDatabasePluginRegistry::iterator - It = CompilationDatabasePluginRegistry::begin(), - Ie = CompilationDatabasePluginRegistry::end(); - It != Ie; ++It) { + for (const CompilationDatabasePluginRegistry::entry &Database : + CompilationDatabasePluginRegistry::entries()) { std::string DatabaseErrorMessage; - std::unique_ptr<CompilationDatabasePlugin> Plugin(It->instantiate()); + std::unique_ptr<CompilationDatabasePlugin> Plugin(Database.instantiate()); if (std::unique_ptr<CompilationDatabase> DB = Plugin->loadFromDirectory(BuildDirectory, DatabaseErrorMessage)) return DB; - ErrorStream << It->getName() << ": " << DatabaseErrorMessage << "\n"; + ErrorStream << Database.getName() << ": " << DatabaseErrorMessage << "\n"; } return nullptr; } @@ -164,7 +162,7 @@ private: case driver::Action::InputClass: if (Collect) { const auto *IA = cast<driver::InputAction>(A); - Inputs.push_back(IA->getInputArg().getSpelling()); + Inputs.push_back(std::string(IA->getInputArg().getSpelling())); } break; @@ -233,7 +231,7 @@ std::string GetClangToolCommand() { SmallString<128> ClangToolPath; ClangToolPath = llvm::sys::path::parent_path(ClangExecutable); llvm::sys::path::append(ClangToolPath, "clang-tool"); - return ClangToolPath.str(); + return std::string(ClangToolPath.str()); } } // namespace @@ -368,8 +366,14 @@ FixedCompilationDatabase::loadFromFile(StringRef Path, std::string &ErrorMsg) { ErrorMsg = "Error while opening fixed database: " + Result.message(); return nullptr; } - std::vector<std::string> Args{llvm::line_iterator(**File), - llvm::line_iterator()}; + std::vector<std::string> Args; + for (llvm::StringRef Line : + llvm::make_range(llvm::line_iterator(**File), llvm::line_iterator())) { + // Stray whitespace is almost certainly unintended. + Line = Line.trim(); + if (!Line.empty()) + Args.push_back(Line.str()); + } return std::make_unique<FixedCompilationDatabase>( llvm::sys::path::parent_path(Path), std::move(Args)); } @@ -387,8 +391,8 @@ FixedCompilationDatabase(Twine Directory, ArrayRef<std::string> CommandLine) { std::vector<CompileCommand> FixedCompilationDatabase::getCompileCommands(StringRef FilePath) const { std::vector<CompileCommand> Result(CompileCommands); - Result[0].CommandLine.push_back(FilePath); - Result[0].Filename = FilePath; + Result[0].CommandLine.push_back(std::string(FilePath)); + Result[0].Filename = std::string(FilePath); return Result; } diff --git a/clang/lib/Tooling/Core/Diagnostic.cpp b/clang/lib/Tooling/Core/Diagnostic.cpp index 235bd7fc1433..b0c4ea8c5608 100644 --- a/clang/lib/Tooling/Core/Diagnostic.cpp +++ b/clang/lib/Tooling/Core/Diagnostic.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "clang/Tooling/Core/Diagnostic.h" +#include "clang/Basic/SourceLocation.h" #include "clang/Basic/SourceManager.h" #include "llvm/ADT/STLExtras.h" @@ -25,7 +26,7 @@ DiagnosticMessage::DiagnosticMessage(llvm::StringRef Message, SourceLocation Loc) : Message(Message), FileOffset(0) { assert(Loc.isValid() && Loc.isFileID()); - FilePath = Sources.getFilename(Loc); + FilePath = std::string(Sources.getFilename(Loc)); // Don't store offset in the scratch space. It doesn't tell anything to the // user. Moreover, it depends on the history of macro expansions and thus @@ -34,6 +35,16 @@ DiagnosticMessage::DiagnosticMessage(llvm::StringRef Message, FileOffset = Sources.getFileOffset(Loc); } +FileByteRange::FileByteRange( + const SourceManager &Sources, CharSourceRange Range) + : FileOffset(0), Length(0) { + FilePath = std::string(Sources.getFilename(Range.getBegin())); + if (!FilePath.empty()) { + FileOffset = Sources.getFileOffset(Range.getBegin()); + Length = Sources.getFileOffset(Range.getEnd()) - FileOffset; + } +} + Diagnostic::Diagnostic(llvm::StringRef DiagnosticName, Diagnostic::Level DiagLevel, StringRef BuildDirectory) : DiagnosticName(DiagnosticName), DiagLevel(DiagLevel), @@ -42,9 +53,10 @@ Diagnostic::Diagnostic(llvm::StringRef DiagnosticName, Diagnostic::Diagnostic(llvm::StringRef DiagnosticName, const DiagnosticMessage &Message, const SmallVector<DiagnosticMessage, 1> &Notes, - Level DiagLevel, llvm::StringRef BuildDirectory) + Level DiagLevel, llvm::StringRef BuildDirectory, + const SmallVector<FileByteRange, 1> &Ranges) : DiagnosticName(DiagnosticName), Message(Message), Notes(Notes), - DiagLevel(DiagLevel), BuildDirectory(BuildDirectory) {} + DiagLevel(DiagLevel), BuildDirectory(BuildDirectory), Ranges(Ranges) {} const llvm::StringMap<Replacements> *selectFirstFix(const Diagnostic& D) { if (!D.Message.Fix.empty()) diff --git a/clang/lib/Tooling/Core/Lookup.cpp b/clang/lib/Tooling/Core/Lookup.cpp index 735a5df5ed21..712724a268fb 100644 --- a/clang/lib/Tooling/Core/Lookup.cpp +++ b/clang/lib/Tooling/Core/Lookup.cpp @@ -11,10 +11,12 @@ //===----------------------------------------------------------------------===// #include "clang/Tooling/Core/Lookup.h" +#include "clang/AST/ASTContext.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/DeclarationName.h" #include "clang/Basic/SourceLocation.h" +#include "clang/Basic/SourceManager.h" #include "llvm/ADT/SmallVector.h" using namespace clang; using namespace clang::tooling; @@ -129,7 +131,7 @@ static std::string disambiguateSpellingInScope(StringRef Spelling, assert(QName.startswith("::")); assert(QName.endswith(Spelling)); if (Spelling.startswith("::")) - return Spelling; + return std::string(Spelling); auto UnspelledSpecifier = QName.drop_back(Spelling.size()); llvm::SmallVector<llvm::StringRef, 2> UnspelledScopes; @@ -168,7 +170,7 @@ static std::string disambiguateSpellingInScope(StringRef Spelling, }; // Add more qualifiers until the spelling is not ambiguous. - std::string Disambiguated = Spelling; + std::string Disambiguated = std::string(Spelling); while (IsAmbiguousSpelling(Disambiguated)) { if (UnspelledScopes.empty()) { Disambiguated = "::" + Disambiguated; @@ -206,8 +208,9 @@ std::string tooling::replaceNestedName(const NestedNameSpecifier *Use, !usingFromDifferentCanonicalNamespace(FromDecl->getDeclContext(), UseContext)) { auto Pos = ReplacementString.rfind("::"); - return Pos != StringRef::npos ? ReplacementString.substr(Pos + 2) - : ReplacementString; + return std::string(Pos != StringRef::npos + ? ReplacementString.substr(Pos + 2) + : ReplacementString); } // We did not match this because of a using statement, so we will need to // figure out how good a namespace match we have with our destination type. diff --git a/clang/lib/Tooling/Core/Replacement.cpp b/clang/lib/Tooling/Core/Replacement.cpp index 9ed03655bf2c..ab8e20539559 100644 --- a/clang/lib/Tooling/Core/Replacement.cpp +++ b/clang/lib/Tooling/Core/Replacement.cpp @@ -46,8 +46,8 @@ Replacement::Replacement() : FilePath(InvalidLocation) {} Replacement::Replacement(StringRef FilePath, unsigned Offset, unsigned Length, StringRef ReplacementText) - : FilePath(FilePath), ReplacementRange(Offset, Length), - ReplacementText(ReplacementText) {} + : FilePath(std::string(FilePath)), ReplacementRange(Offset, Length), + ReplacementText(std::string(ReplacementText)) {} Replacement::Replacement(const SourceManager &Sources, SourceLocation Start, unsigned Length, StringRef ReplacementText) { @@ -123,9 +123,9 @@ void Replacement::setFromSourceLocation(const SourceManager &Sources, const std::pair<FileID, unsigned> DecomposedLocation = Sources.getDecomposedLoc(Start); const FileEntry *Entry = Sources.getFileEntryForID(DecomposedLocation.first); - this->FilePath = Entry ? Entry->getName() : InvalidLocation; + this->FilePath = std::string(Entry ? Entry->getName() : InvalidLocation); this->ReplacementRange = Range(DecomposedLocation.second, Length); - this->ReplacementText = ReplacementText; + this->ReplacementText = std::string(ReplacementText); } // FIXME: This should go into the Lexer, but we need to figure out how @@ -367,8 +367,8 @@ class MergedReplacement { public: MergedReplacement(const Replacement &R, bool MergeSecond, int D) : MergeSecond(MergeSecond), Delta(D), FilePath(R.getFilePath()), - Offset(R.getOffset() + (MergeSecond ? 0 : Delta)), Length(R.getLength()), - Text(R.getReplacementText()) { + Offset(R.getOffset() + (MergeSecond ? 0 : Delta)), + Length(R.getLength()), Text(std::string(R.getReplacementText())) { Delta += MergeSecond ? 0 : Text.size() - Length; DeltaFirst = MergeSecond ? Text.size() - Length : 0; } diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp index b4d5a29ca695..b1b87e7fa573 100644 --- a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp +++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp @@ -106,7 +106,8 @@ DependencyScanningFilesystemSharedCache:: // sharding gives a performance edge by reducing the lock contention. // FIXME: A better heuristic might also consider the OS to account for // the different cost of lock contention on different OSes. - NumShards = std::max(2u, llvm::hardware_concurrency() / 4); + NumShards = + std::max(2u, llvm::hardware_concurrency().compute_thread_count() / 4); CacheShards = std::make_unique<CacheShard[]>(NumShards); } diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningTool.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningTool.cpp index f643c538f8f9..16040c2f4626 100644 --- a/clang/lib/Tooling/DependencyScanning/DependencyScanningTool.cpp +++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningTool.cpp @@ -8,24 +8,25 @@ #include "clang/Tooling/DependencyScanning/DependencyScanningTool.h" #include "clang/Frontend/Utils.h" -#include "llvm/Support/JSON.h" - -static llvm::json::Array toJSONSorted(const llvm::StringSet<> &Set) { - std::vector<llvm::StringRef> Strings; - for (auto &&I : Set) - Strings.push_back(I.getKey()); - std::sort(Strings.begin(), Strings.end()); - return llvm::json::Array(Strings); -} namespace clang{ namespace tooling{ namespace dependencies{ +std::vector<std::string> FullDependencies::getAdditionalCommandLine( + std::function<StringRef(ClangModuleDep)> LookupPCMPath, + std::function<const ModuleDeps &(ClangModuleDep)> LookupModuleDeps) const { + std::vector<std::string> Ret = AdditionalNonPathCommandLine; + + dependencies::detail::appendCommonModuleArguments( + ClangModuleDeps, LookupPCMPath, LookupModuleDeps, Ret); + + return Ret; +} + DependencyScanningTool::DependencyScanningTool( DependencyScanningService &Service) - : Format(Service.getFormat()), Worker(Service) { -} + : Worker(Service) {} llvm::Expected<std::string> DependencyScanningTool::getDependencyFile( const tooling::CompilationDatabase &Compilations, StringRef CWD) { @@ -36,7 +37,7 @@ llvm::Expected<std::string> DependencyScanningTool::getDependencyFile( StringRef File) override { if (!this->Opts) this->Opts = std::make_unique<DependencyOutputOptions>(Opts); - Dependencies.push_back(File); + Dependencies.push_back(std::string(File)); } void handleModuleDependency(ModuleDeps MD) override { @@ -75,11 +76,36 @@ llvm::Expected<std::string> DependencyScanningTool::getDependencyFile( std::vector<std::string> Dependencies; }; + // We expect a single command here because if a source file occurs multiple + // times in the original CDB, then `computeDependencies` would run the + // `DependencyScanningAction` once for every time the input occured in the + // CDB. Instead we split up the CDB into single command chunks to avoid this + // behavior. + assert(Compilations.getAllCompileCommands().size() == 1 && + "Expected a compilation database with a single command!"); + std::string Input = Compilations.getAllCompileCommands().front().Filename; + + MakeDependencyPrinterConsumer Consumer; + auto Result = Worker.computeDependencies(Input, CWD, Compilations, Consumer); + if (Result) + return std::move(Result); + std::string Output; + Consumer.printDependencies(Output); + return Output; +} + +llvm::Expected<FullDependenciesResult> +DependencyScanningTool::getFullDependencies( + const tooling::CompilationDatabase &Compilations, StringRef CWD, + const llvm::StringSet<> &AlreadySeen) { class FullDependencyPrinterConsumer : public DependencyConsumer { public: + FullDependencyPrinterConsumer(const llvm::StringSet<> &AlreadySeen) + : AlreadySeen(AlreadySeen) {} + void handleFileDependency(const DependencyOutputOptions &Opts, StringRef File) override { - Dependencies.push_back(File); + Dependencies.push_back(std::string(File)); } void handleModuleDependency(ModuleDeps MD) override { @@ -90,55 +116,41 @@ llvm::Expected<std::string> DependencyScanningTool::getDependencyFile( ContextHash = std::move(Hash); } - void printDependencies(std::string &S, StringRef MainFile) { - // Sort the modules by name to get a deterministic order. - std::vector<StringRef> Modules; - for (auto &&Dep : ClangModuleDeps) - Modules.push_back(Dep.first); - std::sort(Modules.begin(), Modules.end()); + FullDependenciesResult getFullDependencies() const { + FullDependencies FD; - llvm::raw_string_ostream OS(S); + FD.ContextHash = std::move(ContextHash); - using namespace llvm::json; + FD.FileDeps.assign(Dependencies.begin(), Dependencies.end()); - Array Imports; - for (auto &&ModName : Modules) { - auto &MD = ClangModuleDeps[ModName]; + for (auto &&M : ClangModuleDeps) { + auto &MD = M.second; if (MD.ImportedByMainFile) - Imports.push_back(MD.ModuleName); + FD.ClangModuleDeps.push_back({MD.ModuleName, ContextHash}); } - Array Mods; - for (auto &&ModName : Modules) { - auto &MD = ClangModuleDeps[ModName]; - Object Mod{ - {"name", MD.ModuleName}, - {"file-deps", toJSONSorted(MD.FileDeps)}, - {"clang-module-deps", toJSONSorted(MD.ClangModuleDeps)}, - {"clang-modulemap-file", MD.ClangModuleMapFile}, - }; - Mods.push_back(std::move(Mod)); - } + FullDependenciesResult FDR; - Object O{ - {"input-file", MainFile}, - {"clang-context-hash", ContextHash}, - {"file-deps", Dependencies}, - {"clang-module-deps", std::move(Imports)}, - {"clang-modules", std::move(Mods)}, - }; + for (auto &&M : ClangModuleDeps) { + // TODO: Avoid handleModuleDependency even being called for modules + // we've already seen. + if (AlreadySeen.count(M.first)) + continue; + FDR.DiscoveredModules.push_back(std::move(M.second)); + } - S = llvm::formatv("{0:2},\n", Value(std::move(O))).str(); - return; + FDR.FullDeps = std::move(FD); + return FDR; } private: std::vector<std::string> Dependencies; std::unordered_map<std::string, ModuleDeps> ClangModuleDeps; std::string ContextHash; + std::vector<std::string> OutputPaths; + const llvm::StringSet<> &AlreadySeen; }; - // We expect a single command here because if a source file occurs multiple // times in the original CDB, then `computeDependencies` would run the // `DependencyScanningAction` once for every time the input occured in the @@ -147,26 +159,13 @@ llvm::Expected<std::string> DependencyScanningTool::getDependencyFile( assert(Compilations.getAllCompileCommands().size() == 1 && "Expected a compilation database with a single command!"); std::string Input = Compilations.getAllCompileCommands().front().Filename; - - if (Format == ScanningOutputFormat::Make) { - MakeDependencyPrinterConsumer Consumer; - auto Result = - Worker.computeDependencies(Input, CWD, Compilations, Consumer); - if (Result) - return std::move(Result); - std::string Output; - Consumer.printDependencies(Output); - return Output; - } else { - FullDependencyPrinterConsumer Consumer; - auto Result = - Worker.computeDependencies(Input, CWD, Compilations, Consumer); - if (Result) - return std::move(Result); - std::string Output; - Consumer.printDependencies(Output, Input); - return Output; - } + + FullDependencyPrinterConsumer Consumer(AlreadySeen); + llvm::Error Result = + Worker.computeDependencies(Input, CWD, Compilations, Consumer); + if (Result) + return std::move(Result); + return Consumer.getFullDependencies(); } } // end namespace dependencies diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp index edf2cf8bd70f..32bbc578d2db 100644 --- a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp +++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp @@ -118,7 +118,7 @@ public: .ExcludedConditionalDirectiveSkipMappings = PPSkipMappings; } - FileMgr->getFileSystemOpts().WorkingDir = WorkingDirectory; + FileMgr->getFileSystemOpts().WorkingDir = std::string(WorkingDirectory); Compiler.setFileManager(FileMgr); Compiler.createSourceManager(*FileMgr); @@ -142,12 +142,17 @@ public: Consumer)); break; case ScanningOutputFormat::Full: - Compiler.addDependencyCollector( - std::make_shared<ModuleDepCollector>(Compiler, Consumer)); + Compiler.addDependencyCollector(std::make_shared<ModuleDepCollector>( + std::move(Opts), Compiler, Consumer)); break; } - Consumer.handleContextHash(Compiler.getInvocation().getModuleHash()); + // Consider different header search and diagnostic options to create + // different modules. This avoids the unsound aliasing of module PCMs. + // + // TODO: Implement diagnostic bucketing and header search pruning to reduce + // the impact of strict context hashing. + Compiler.getHeaderSearchOpts().ModulesStrictContextHash = true; auto Action = std::make_unique<PreprocessOnlyAction>(); const bool Result = Compiler.ExecuteAction(*Action); diff --git a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp index 422940047f2d..4f6eff799f22 100644 --- a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp +++ b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp @@ -17,12 +17,60 @@ using namespace clang; using namespace tooling; using namespace dependencies; +std::vector<std::string> ModuleDeps::getFullCommandLine( + std::function<StringRef(ClangModuleDep)> LookupPCMPath, + std::function<const ModuleDeps &(ClangModuleDep)> LookupModuleDeps) const { + std::vector<std::string> Ret = NonPathCommandLine; + + // TODO: Build full command line. That also means capturing the original + // command line into NonPathCommandLine. + + dependencies::detail::appendCommonModuleArguments( + ClangModuleDeps, LookupPCMPath, LookupModuleDeps, Ret); + + return Ret; +} + +void dependencies::detail::appendCommonModuleArguments( + llvm::ArrayRef<ClangModuleDep> Modules, + std::function<StringRef(ClangModuleDep)> LookupPCMPath, + std::function<const ModuleDeps &(ClangModuleDep)> LookupModuleDeps, + std::vector<std::string> &Result) { + llvm::StringSet<> AlreadyAdded; + + std::function<void(llvm::ArrayRef<ClangModuleDep>)> AddArgs = + [&](llvm::ArrayRef<ClangModuleDep> Modules) { + for (const ClangModuleDep &CMD : Modules) { + if (!AlreadyAdded.insert(CMD.ModuleName + CMD.ContextHash).second) + continue; + const ModuleDeps &M = LookupModuleDeps(CMD); + // Depth first traversal. + AddArgs(M.ClangModuleDeps); + Result.push_back(("-fmodule-file=" + LookupPCMPath(CMD)).str()); + if (!M.ClangModuleMapFile.empty()) { + Result.push_back("-fmodule-map-file=" + M.ClangModuleMapFile); + } + } + }; + + Result.push_back("-fno-implicit-modules"); + Result.push_back("-fno-implicit-module-maps"); + AddArgs(Modules); +} + void ModuleDepCollectorPP::FileChanged(SourceLocation Loc, FileChangeReason Reason, SrcMgr::CharacteristicKind FileType, FileID PrevFID) { if (Reason != PPCallbacks::EnterFile) return; + + // This has to be delayed as the context hash can change at the start of + // `CompilerInstance::ExecuteAction`. + if (MDC.ContextHash.empty()) { + MDC.ContextHash = Instance.getInvocation().getModuleHash(); + MDC.Consumer.handleContextHash(MDC.ContextHash); + } SourceManager &SM = Instance.getSourceManager(); @@ -37,7 +85,7 @@ void ModuleDepCollectorPP::FileChanged(SourceLocation Loc, StringRef FileName = llvm::sys::path::remove_leading_dotslash(File->getName()); - MDC.MainDeps.push_back(FileName); + MDC.MainDeps.push_back(std::string(FileName)); } void ModuleDepCollectorPP::InclusionDirective( @@ -48,9 +96,18 @@ void ModuleDepCollectorPP::InclusionDirective( if (!File && !Imported) { // This is a non-modular include that HeaderSearch failed to find. Add it // here as `FileChanged` will never see it. - MDC.MainDeps.push_back(FileName); + MDC.MainDeps.push_back(std::string(FileName)); } + handleImport(Imported); +} +void ModuleDepCollectorPP::moduleImport(SourceLocation ImportLoc, + ModuleIdPath Path, + const Module *Imported) { + handleImport(Imported); +} + +void ModuleDepCollectorPP::handleImport(const Module *Imported) { if (!Imported) return; @@ -61,8 +118,8 @@ void ModuleDepCollectorPP::InclusionDirective( void ModuleDepCollectorPP::EndOfMainFile() { FileID MainFileID = Instance.getSourceManager().getMainFileID(); - MDC.MainFile = - Instance.getSourceManager().getFileEntryForID(MainFileID)->getName(); + MDC.MainFile = std::string( + Instance.getSourceManager().getFileEntryForID(MainFileID)->getName()); for (const Module *M : DirectDeps) { handleTopLevelModule(M); @@ -71,9 +128,8 @@ void ModuleDepCollectorPP::EndOfMainFile() { for (auto &&I : MDC.Deps) MDC.Consumer.handleModuleDependency(I.second); - DependencyOutputOptions Opts; for (auto &&I : MDC.MainDeps) - MDC.Consumer.handleFileDependency(Opts, I); + MDC.Consumer.handleFileDependency(*MDC.Opts, I); } void ModuleDepCollectorPP::handleTopLevelModule(const Module *M) { @@ -92,9 +148,9 @@ void ModuleDepCollectorPP::handleTopLevelModule(const Module *M) { .getModuleMap() .getContainingModuleMapFile(M); - MD.ClangModuleMapFile = ModuleMap ? ModuleMap->getName() : ""; + MD.ClangModuleMapFile = std::string(ModuleMap ? ModuleMap->getName() : ""); MD.ModuleName = M->getFullModuleName(); - MD.ModulePCMPath = M->getASTFile()->getName(); + MD.ImplicitModulePCMPath = std::string(M->getASTFile()->getName()); MD.ContextHash = MDC.ContextHash; serialization::ModuleFile *MF = MDC.Instance.getASTReader()->getModuleManager().lookup(M->getASTFile()); @@ -103,30 +159,37 @@ void ModuleDepCollectorPP::handleTopLevelModule(const Module *M) { MD.FileDeps.insert(IF.getFile()->getName()); }); - addAllSubmoduleDeps(M, MD); + llvm::DenseSet<const Module *> AddedModules; + addAllSubmoduleDeps(M, MD, AddedModules); } -void ModuleDepCollectorPP::addAllSubmoduleDeps(const Module *M, - ModuleDeps &MD) { - addModuleDep(M, MD); +void ModuleDepCollectorPP::addAllSubmoduleDeps( + const Module *M, ModuleDeps &MD, + llvm::DenseSet<const Module *> &AddedModules) { + addModuleDep(M, MD, AddedModules); for (const Module *SubM : M->submodules()) - addAllSubmoduleDeps(SubM, MD); + addAllSubmoduleDeps(SubM, MD, AddedModules); } -void ModuleDepCollectorPP::addModuleDep(const Module *M, ModuleDeps &MD) { +void ModuleDepCollectorPP::addModuleDep( + const Module *M, ModuleDeps &MD, + llvm::DenseSet<const Module *> &AddedModules) { for (const Module *Import : M->Imports) { if (Import->getTopLevelModule() != M->getTopLevelModule()) { - MD.ClangModuleDeps.insert(Import->getTopLevelModuleName()); + if (AddedModules.insert(Import->getTopLevelModule()).second) + MD.ClangModuleDeps.push_back( + {std::string(Import->getTopLevelModuleName()), + Instance.getInvocation().getModuleHash()}); handleTopLevelModule(Import->getTopLevelModule()); } } } -ModuleDepCollector::ModuleDepCollector(CompilerInstance &I, - DependencyConsumer &C) - : Instance(I), Consumer(C), ContextHash(I.getInvocation().getModuleHash()) { -} +ModuleDepCollector::ModuleDepCollector( + std::unique_ptr<DependencyOutputOptions> Opts, CompilerInstance &I, + DependencyConsumer &C) + : Instance(I), Consumer(C), Opts(std::move(Opts)) {} void ModuleDepCollector::attachToPreprocessor(Preprocessor &PP) { PP.addPPCallbacks(std::make_unique<ModuleDepCollectorPP>(Instance, *this)); diff --git a/clang/lib/Tooling/Execution.cpp b/clang/lib/Tooling/Execution.cpp index c39a4fcdac82..247b260b97ed 100644 --- a/clang/lib/Tooling/Execution.cpp +++ b/clang/lib/Tooling/Execution.cpp @@ -63,18 +63,16 @@ createExecutorFromCommandLineArgsImpl(int &argc, const char **argv, /*Overview=*/Overview); if (!OptionsParser) return OptionsParser.takeError(); - for (auto I = ToolExecutorPluginRegistry::begin(), - E = ToolExecutorPluginRegistry::end(); - I != E; ++I) { - if (I->getName() != ExecutorName) { + for (const auto &TEPlugin : ToolExecutorPluginRegistry::entries()) { + if (TEPlugin.getName() != ExecutorName) { continue; } - std::unique_ptr<ToolExecutorPlugin> Plugin(I->instantiate()); + std::unique_ptr<ToolExecutorPlugin> Plugin(TEPlugin.instantiate()); llvm::Expected<std::unique_ptr<ToolExecutor>> Executor = Plugin->create(*OptionsParser); if (!Executor) { return llvm::make_error<llvm::StringError>( - llvm::Twine("Failed to create '") + I->getName() + + llvm::Twine("Failed to create '") + TEPlugin.getName() + "': " + llvm::toString(Executor.takeError()) + "\n", llvm::inconvertibleErrorCode()); } diff --git a/clang/lib/Tooling/ExpandResponseFilesCompilationDatabase.cpp b/clang/lib/Tooling/ExpandResponseFilesCompilationDatabase.cpp index 99298316718b..f1ab2aed54c0 100644 --- a/clang/lib/Tooling/ExpandResponseFilesCompilationDatabase.cpp +++ b/clang/lib/Tooling/ExpandResponseFilesCompilationDatabase.cpp @@ -12,6 +12,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/ConvertUTF.h" #include "llvm/Support/ErrorOr.h" +#include "llvm/Support/Host.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" #include "llvm/Support/StringSaver.h" diff --git a/clang/lib/Tooling/FileMatchTrie.cpp b/clang/lib/Tooling/FileMatchTrie.cpp index 7df5a16fd88f..88dea6bb6c9f 100644 --- a/clang/lib/Tooling/FileMatchTrie.cpp +++ b/clang/lib/Tooling/FileMatchTrie.cpp @@ -63,7 +63,7 @@ public: return; if (Path.empty()) { // This is an empty leaf. Store NewPath and return. - Path = NewPath; + Path = std::string(NewPath); return; } if (Children.empty()) { diff --git a/clang/lib/Tooling/Inclusions/HeaderIncludes.cpp b/clang/lib/Tooling/Inclusions/HeaderIncludes.cpp index 37a0816c803e..681fcc5c762a 100644 --- a/clang/lib/Tooling/Inclusions/HeaderIncludes.cpp +++ b/clang/lib/Tooling/Inclusions/HeaderIncludes.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "clang/Tooling/Inclusions/HeaderIncludes.h" +#include "clang/Basic/FileManager.h" #include "clang/Basic/SourceManager.h" #include "clang/Lex/Lexer.h" #include "llvm/ADT/Optional.h" @@ -319,7 +320,7 @@ HeaderIncludes::insert(llvm::StringRef IncludeName, bool IsAngled) const { (!IsAngled && StringRef(Inc.Name).startswith("\""))) return llvm::None; std::string Quoted = - llvm::formatv(IsAngled ? "<{0}>" : "\"{0}\"", IncludeName); + std::string(llvm::formatv(IsAngled ? "<{0}>" : "\"{0}\"", IncludeName)); StringRef QuotedName = Quoted; int Priority = Categories.getIncludePriority( QuotedName, /*CheckMainHeader=*/FirstIncludeOffset < 0); @@ -336,7 +337,8 @@ HeaderIncludes::insert(llvm::StringRef IncludeName, bool IsAngled) const { } } assert(InsertOffset <= Code.size()); - std::string NewInclude = llvm::formatv("#include {0}\n", QuotedName); + std::string NewInclude = + std::string(llvm::formatv("#include {0}\n", QuotedName)); // When inserting headers at end of the code, also append '\n' to the code // if it does not end with '\n'. // FIXME: when inserting multiple #includes at the end of code, only one diff --git a/clang/lib/Tooling/InterpolatingCompilationDatabase.cpp b/clang/lib/Tooling/InterpolatingCompilationDatabase.cpp index 2cc819a498c6..fa61560e5123 100644 --- a/clang/lib/Tooling/InterpolatingCompilationDatabase.cpp +++ b/clang/lib/Tooling/InterpolatingCompilationDatabase.cpp @@ -114,6 +114,9 @@ static types::ID foldType(types::ID Lang) { case types::TY_ObjCXX: case types::TY_ObjCXXHeader: return types::TY_ObjCXX; + case types::TY_CUDA: + case types::TY_CUDA_DEVICE: + return types::TY_CUDA; default: return types::TY_INVALID; } @@ -203,7 +206,7 @@ struct TransferableCommand { // Produce a CompileCommand for \p filename, based on this one. CompileCommand transferTo(StringRef Filename) const { CompileCommand Result = Cmd; - Result.Filename = Filename; + Result.Filename = std::string(Filename); bool TypeCertain; auto TargetType = guessType(Filename, &TypeCertain); // If the filename doesn't determine the language (.h), transfer with -x. @@ -217,7 +220,7 @@ struct TransferableCommand { if (ClangCLMode) { const StringRef Flag = toCLFlag(TargetType); if (!Flag.empty()) - Result.CommandLine.push_back(Flag); + Result.CommandLine.push_back(std::string(Flag)); } else { Result.CommandLine.push_back("-x"); Result.CommandLine.push_back(types::getTypeName(TargetType)); @@ -230,7 +233,7 @@ struct TransferableCommand { llvm::Twine(ClangCLMode ? "/std:" : "-std=") + LangStandard::getLangStandardForKind(Std).getName()).str()); } - Result.CommandLine.push_back(Filename); + Result.CommandLine.push_back(std::string(Filename)); Result.Heuristic = "inferred from " + Cmd.Filename; return Result; } diff --git a/clang/lib/Tooling/JSONCompilationDatabase.cpp b/clang/lib/Tooling/JSONCompilationDatabase.cpp index 04dd4dbf6248..4af361f538cb 100644 --- a/clang/lib/Tooling/JSONCompilationDatabase.cpp +++ b/clang/lib/Tooling/JSONCompilationDatabase.cpp @@ -305,7 +305,7 @@ nodeToCommandLine(JSONCommandLineSyntax Syntax, Arguments = unescapeCommandLine(Syntax, Nodes[0]->getValue(Storage)); else for (const auto *Node : Nodes) - Arguments.push_back(Node->getValue(Storage)); + Arguments.push_back(std::string(Node->getValue(Storage))); // There may be multiple wrappers: using distcc and ccache together is common. while (unwrapCommand(Arguments)) ; diff --git a/clang/lib/Tooling/Refactoring/ASTSelection.cpp b/clang/lib/Tooling/Refactoring/ASTSelection.cpp index 64e57af59011..af1eb491a20a 100644 --- a/clang/lib/Tooling/Refactoring/ASTSelection.cpp +++ b/clang/lib/Tooling/Refactoring/ASTSelection.cpp @@ -13,7 +13,6 @@ using namespace clang; using namespace tooling; -using ast_type_traits::DynTypedNode; namespace { diff --git a/clang/lib/Tooling/Refactoring/AtomicChange.cpp b/clang/lib/Tooling/Refactoring/AtomicChange.cpp index 4cf63306d262..069e9c1eb36e 100644 --- a/clang/lib/Tooling/Refactoring/AtomicChange.cpp +++ b/clang/lib/Tooling/Refactoring/AtomicChange.cpp @@ -200,10 +200,16 @@ AtomicChange::AtomicChange(const SourceManager &SM, FullKeyPosition.getSpellingLoc().getDecomposedLoc(); const FileEntry *FE = SM.getFileEntryForID(FileIDAndOffset.first); assert(FE && "Cannot create AtomicChange with invalid location."); - FilePath = FE->getName(); + FilePath = std::string(FE->getName()); Key = FilePath + ":" + std::to_string(FileIDAndOffset.second); } +AtomicChange::AtomicChange(const SourceManager &SM, SourceLocation KeyPosition, + llvm::Any M) + : AtomicChange(SM, KeyPosition) { + Metadata = std::move(M); +} + AtomicChange::AtomicChange(std::string Key, std::string FilePath, std::string Error, std::vector<std::string> InsertedHeaders, @@ -284,11 +290,11 @@ llvm::Error AtomicChange::insert(const SourceManager &SM, SourceLocation Loc, } void AtomicChange::addHeader(llvm::StringRef Header) { - InsertedHeaders.push_back(Header); + InsertedHeaders.push_back(std::string(Header)); } void AtomicChange::removeHeader(llvm::StringRef Header) { - RemovedHeaders.push_back(Header); + RemovedHeaders.push_back(std::string(Header)); } llvm::Expected<std::string> diff --git a/clang/lib/Tooling/Refactoring/Rename/RenamingAction.cpp b/clang/lib/Tooling/Refactoring/Rename/RenamingAction.cpp index b0634912e3fc..72598601d47d 100644 --- a/clang/lib/Tooling/Refactoring/Rename/RenamingAction.cpp +++ b/clang/lib/Tooling/Refactoring/Rename/RenamingAction.cpp @@ -170,7 +170,8 @@ static void convertChangesToFileReplacements( std::map<std::string, tooling::Replacements> *FileToReplaces) { for (const auto &AtomicChange : AtomicChanges) { for (const auto &Replace : AtomicChange.getReplacements()) { - llvm::Error Err = (*FileToReplaces)[Replace.getFilePath()].add(Replace); + llvm::Error Err = + (*FileToReplaces)[std::string(Replace.getFilePath())].add(Replace); if (Err) { llvm::errs() << "Renaming failed in " << Replace.getFilePath() << "! " << llvm::toString(std::move(Err)) << "\n"; diff --git a/clang/lib/Tooling/Refactoring/Rename/USRFinder.cpp b/clang/lib/Tooling/Refactoring/Rename/USRFinder.cpp index 55111202ac88..23f567f1c9ec 100644 --- a/clang/lib/Tooling/Refactoring/Rename/USRFinder.cpp +++ b/clang/lib/Tooling/Refactoring/Rename/USRFinder.cpp @@ -15,6 +15,7 @@ #include "clang/AST/AST.h" #include "clang/AST/ASTContext.h" #include "clang/AST/RecursiveASTVisitor.h" +#include "clang/Basic/SourceManager.h" #include "clang/Index/USRGeneration.h" #include "clang/Lex/Lexer.h" #include "clang/Tooling/Refactoring/RecursiveSymbolVisitor.h" diff --git a/clang/lib/Tooling/Refactoring/Rename/USRFindingAction.cpp b/clang/lib/Tooling/Refactoring/Rename/USRFindingAction.cpp index d966a5ef23c2..43dc32e158d3 100644 --- a/clang/lib/Tooling/Refactoring/Rename/USRFindingAction.cpp +++ b/clang/lib/Tooling/Refactoring/Rename/USRFindingAction.cpp @@ -126,15 +126,24 @@ private: addUSRsOfCtorDtors(TemplateDecl->getTemplatedDecl()); } - void addUSRsOfCtorDtors(const CXXRecordDecl *RecordDecl) { - RecordDecl = RecordDecl->getDefinition(); + void addUSRsOfCtorDtors(const CXXRecordDecl *RD) { + const auto* RecordDecl = RD->getDefinition(); // Skip if the CXXRecordDecl doesn't have definition. - if (!RecordDecl) + if (!RecordDecl) { + USRSet.insert(getUSRForDecl(RD)); return; + } for (const auto *CtorDecl : RecordDecl->ctors()) USRSet.insert(getUSRForDecl(CtorDecl)); + // Add template constructor decls, they are not in ctors() unfortunately. + if (RecordDecl->hasUserDeclaredConstructor()) + for (const auto *D : RecordDecl->decls()) + if (const auto *FTD = dyn_cast<FunctionTemplateDecl>(D)) + if (const auto *Ctor = + dyn_cast<CXXConstructorDecl>(FTD->getTemplatedDecl())) + USRSet.insert(getUSRForDecl(Ctor)); USRSet.insert(getUSRForDecl(RecordDecl->getDestructor())); USRSet.insert(getUSRForDecl(RecordDecl)); diff --git a/clang/lib/Tooling/Refactoring/Rename/USRLocFinder.cpp b/clang/lib/Tooling/Refactoring/Rename/USRLocFinder.cpp index 408e184f5bf5..dfc319dd0639 100644 --- a/clang/lib/Tooling/Refactoring/Rename/USRLocFinder.cpp +++ b/clang/lib/Tooling/Refactoring/Rename/USRLocFinder.cpp @@ -15,6 +15,7 @@ #include "clang/Tooling/Refactoring/Rename/USRLocFinder.h" #include "clang/AST/ASTContext.h" +#include "clang/AST/ParentMapContext.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/Basic/LLVM.h" #include "clang/Basic/SourceLocation.h" @@ -426,8 +427,7 @@ public: StartLoc, EndLoc, TemplateSpecType->getTemplateName().getAsTemplateDecl(), - getClosestAncestorDecl( - ast_type_traits::DynTypedNode::create(TargetLoc)), + getClosestAncestorDecl(DynTypedNode::create(TargetLoc)), GetNestedNameForType(TargetLoc), /*IgnorePrefixQualifers=*/false}; RenameInfos.push_back(Info); @@ -466,8 +466,7 @@ private: // FIXME: figure out how to handle it when there are multiple parents. if (Parents.size() != 1) return nullptr; - if (ast_type_traits::ASTNodeKind::getFromNodeKind<Decl>().isBaseOf( - Parents[0].getNodeKind())) + if (ASTNodeKind::getFromNodeKind<Decl>().isBaseOf(Parents[0].getNodeKind())) return Parents[0].template get<Decl>(); return getClosestAncestorDecl(Parents[0]); } @@ -536,7 +535,7 @@ createRenameAtomicChanges(llvm::ArrayRef<std::string> USRs, // Get the name without prefix qualifiers from NewName. size_t LastColonPos = NewName.find_last_of(':'); if (LastColonPos != std::string::npos) - ReplacedName = NewName.substr(LastColonPos + 1); + ReplacedName = std::string(NewName.substr(LastColonPos + 1)); } else { if (RenameInfo.FromDecl && RenameInfo.Context) { if (!llvm::isa<clang::TranslationUnitDecl>( diff --git a/clang/lib/Tooling/RefactoringCallbacks.cpp b/clang/lib/Tooling/RefactoringCallbacks.cpp index 919b83beb357..e3fc91afeb59 100644 --- a/clang/lib/Tooling/RefactoringCallbacks.cpp +++ b/clang/lib/Tooling/RefactoringCallbacks.cpp @@ -50,8 +50,8 @@ public: for (const auto &Callback : Refactoring.Callbacks) { for (const auto &Replacement : Callback->getReplacements()) { llvm::Error Err = - Refactoring.FileToReplaces[Replacement.getFilePath()].add( - Replacement); + Refactoring.FileToReplaces[std::string(Replacement.getFilePath())] + .add(Replacement); if (Err) { llvm::errs() << "Skipping replacement " << Replacement.toString() << " due to this error:\n" @@ -83,7 +83,7 @@ static Replacement replaceStmtWithStmt(SourceManager &Sources, const Stmt &From, } ReplaceStmtWithText::ReplaceStmtWithText(StringRef FromId, StringRef ToText) - : FromId(FromId), ToText(ToText) {} + : FromId(std::string(FromId)), ToText(std::string(ToText)) {} void ReplaceStmtWithText::run( const ast_matchers::MatchFinder::MatchResult &Result) { @@ -101,7 +101,7 @@ void ReplaceStmtWithText::run( } ReplaceStmtWithStmt::ReplaceStmtWithStmt(StringRef FromId, StringRef ToId) - : FromId(FromId), ToId(ToId) {} + : FromId(std::string(FromId)), ToId(std::string(ToId)) {} void ReplaceStmtWithStmt::run( const ast_matchers::MatchFinder::MatchResult &Result) { @@ -121,7 +121,7 @@ void ReplaceStmtWithStmt::run( ReplaceIfStmtWithItsBody::ReplaceIfStmtWithItsBody(StringRef Id, bool PickTrueBranch) - : Id(Id), PickTrueBranch(PickTrueBranch) {} + : Id(std::string(Id)), PickTrueBranch(PickTrueBranch) {} void ReplaceIfStmtWithItsBody::run( const ast_matchers::MatchFinder::MatchResult &Result) { @@ -153,7 +153,7 @@ void ReplaceIfStmtWithItsBody::run( ReplaceNodeWithTemplate::ReplaceNodeWithTemplate( llvm::StringRef FromId, std::vector<TemplateElement> Template) - : FromId(FromId), Template(std::move(Template)) {} + : FromId(std::string(FromId)), Template(std::move(Template)) {} llvm::Expected<std::unique_ptr<ReplaceNodeWithTemplate>> ReplaceNodeWithTemplate::create(StringRef FromId, StringRef ToTemplate) { @@ -172,8 +172,8 @@ ReplaceNodeWithTemplate::create(StringRef FromId, StringRef ToTemplate) { ToTemplate.substr(Index), llvm::inconvertibleErrorCode()); } - std::string SourceNodeName = - ToTemplate.substr(Index + 2, EndOfIdentifier - Index - 2); + std::string SourceNodeName = std::string( + ToTemplate.substr(Index + 2, EndOfIdentifier - Index - 2)); ParsedTemplate.push_back( TemplateElement{TemplateElement::Identifier, SourceNodeName}); Index = EndOfIdentifier + 1; @@ -185,9 +185,9 @@ ReplaceNodeWithTemplate::create(StringRef FromId, StringRef ToTemplate) { } } else { size_t NextIndex = ToTemplate.find('$', Index + 1); - ParsedTemplate.push_back( - TemplateElement{TemplateElement::Literal, - ToTemplate.substr(Index, NextIndex - Index)}); + ParsedTemplate.push_back(TemplateElement{ + TemplateElement::Literal, + std::string(ToTemplate.substr(Index, NextIndex - Index))}); Index = NextIndex; } } diff --git a/clang/lib/Tooling/Syntax/BuildTree.cpp b/clang/lib/Tooling/Syntax/BuildTree.cpp index aa8844771d37..1f192180ec45 100644 --- a/clang/lib/Tooling/Syntax/BuildTree.cpp +++ b/clang/lib/Tooling/Syntax/BuildTree.cpp @@ -6,20 +6,32 @@ // //===----------------------------------------------------------------------===// #include "clang/Tooling/Syntax/BuildTree.h" +#include "clang/AST/ASTFwd.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclBase.h" +#include "clang/AST/DeclCXX.h" +#include "clang/AST/DeclarationName.h" +#include "clang/AST/Expr.h" +#include "clang/AST/ExprCXX.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/Stmt.h" +#include "clang/AST/TypeLoc.h" +#include "clang/AST/TypeLocVisitor.h" #include "clang/Basic/LLVM.h" #include "clang/Basic/SourceLocation.h" #include "clang/Basic/SourceManager.h" +#include "clang/Basic/Specifiers.h" #include "clang/Basic/TokenKinds.h" #include "clang/Lex/Lexer.h" +#include "clang/Lex/LiteralSupport.h" #include "clang/Tooling/Syntax/Nodes.h" #include "clang/Tooling/Syntax/Tokens.h" #include "clang/Tooling/Syntax/Tree.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/PointerUnion.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Casting.h" @@ -27,6 +39,7 @@ #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/raw_ostream.h" +#include <cstddef> #include <map> using namespace clang; @@ -34,6 +47,207 @@ using namespace clang; LLVM_ATTRIBUTE_UNUSED static bool isImplicitExpr(clang::Expr *E) { return E->IgnoreImplicit() != E; } +namespace { +/// Get start location of the Declarator from the TypeLoc. +/// E.g.: +/// loc of `(` in `int (a)` +/// loc of `*` in `int *(a)` +/// loc of the first `(` in `int (*a)(int)` +/// loc of the `*` in `int *(a)(int)` +/// loc of the first `*` in `const int *const *volatile a;` +/// +/// It is non-trivial to get the start location because TypeLocs are stored +/// inside out. In the example above `*volatile` is the TypeLoc returned +/// by `Decl.getTypeSourceInfo()`, and `*const` is what `.getPointeeLoc()` +/// returns. +struct GetStartLoc : TypeLocVisitor<GetStartLoc, SourceLocation> { + SourceLocation VisitParenTypeLoc(ParenTypeLoc T) { + auto L = Visit(T.getInnerLoc()); + if (L.isValid()) + return L; + return T.getLParenLoc(); + } + + // Types spelled in the prefix part of the declarator. + SourceLocation VisitPointerTypeLoc(PointerTypeLoc T) { + return HandlePointer(T); + } + + SourceLocation VisitMemberPointerTypeLoc(MemberPointerTypeLoc T) { + return HandlePointer(T); + } + + SourceLocation VisitBlockPointerTypeLoc(BlockPointerTypeLoc T) { + return HandlePointer(T); + } + + SourceLocation VisitReferenceTypeLoc(ReferenceTypeLoc T) { + return HandlePointer(T); + } + + SourceLocation VisitObjCObjectPointerTypeLoc(ObjCObjectPointerTypeLoc T) { + return HandlePointer(T); + } + + // All other cases are not important, as they are either part of declaration + // specifiers (e.g. inheritors of TypeSpecTypeLoc) or introduce modifiers on + // existing declarators (e.g. QualifiedTypeLoc). They cannot start the + // declarator themselves, but their underlying type can. + SourceLocation VisitTypeLoc(TypeLoc T) { + auto N = T.getNextTypeLoc(); + if (!N) + return SourceLocation(); + return Visit(N); + } + + SourceLocation VisitFunctionProtoTypeLoc(FunctionProtoTypeLoc T) { + if (T.getTypePtr()->hasTrailingReturn()) + return SourceLocation(); // avoid recursing into the suffix of declarator. + return VisitTypeLoc(T); + } + +private: + template <class PtrLoc> SourceLocation HandlePointer(PtrLoc T) { + auto L = Visit(T.getPointeeLoc()); + if (L.isValid()) + return L; + return T.getLocalSourceRange().getBegin(); + } +}; +} // namespace + +static syntax::NodeKind getOperatorNodeKind(const CXXOperatorCallExpr &E) { + switch (E.getOperator()) { + // Comparison + case OO_EqualEqual: + case OO_ExclaimEqual: + case OO_Greater: + case OO_GreaterEqual: + case OO_Less: + case OO_LessEqual: + case OO_Spaceship: + // Assignment + case OO_Equal: + case OO_SlashEqual: + case OO_PercentEqual: + case OO_CaretEqual: + case OO_PipeEqual: + case OO_LessLessEqual: + case OO_GreaterGreaterEqual: + case OO_PlusEqual: + case OO_MinusEqual: + case OO_StarEqual: + case OO_AmpEqual: + // Binary computation + case OO_Slash: + case OO_Percent: + case OO_Caret: + case OO_Pipe: + case OO_LessLess: + case OO_GreaterGreater: + case OO_AmpAmp: + case OO_PipePipe: + case OO_ArrowStar: + case OO_Comma: + return syntax::NodeKind::BinaryOperatorExpression; + case OO_Tilde: + case OO_Exclaim: + return syntax::NodeKind::PrefixUnaryOperatorExpression; + // Prefix/Postfix increment/decrement + case OO_PlusPlus: + case OO_MinusMinus: + switch (E.getNumArgs()) { + case 1: + return syntax::NodeKind::PrefixUnaryOperatorExpression; + case 2: + return syntax::NodeKind::PostfixUnaryOperatorExpression; + default: + llvm_unreachable("Invalid number of arguments for operator"); + } + // Operators that can be unary or binary + case OO_Plus: + case OO_Minus: + case OO_Star: + case OO_Amp: + switch (E.getNumArgs()) { + case 1: + return syntax::NodeKind::PrefixUnaryOperatorExpression; + case 2: + return syntax::NodeKind::BinaryOperatorExpression; + default: + llvm_unreachable("Invalid number of arguments for operator"); + } + return syntax::NodeKind::BinaryOperatorExpression; + // Not yet supported by SyntaxTree + case OO_New: + case OO_Delete: + case OO_Array_New: + case OO_Array_Delete: + case OO_Coawait: + case OO_Call: + case OO_Subscript: + case OO_Arrow: + return syntax::NodeKind::UnknownExpression; + case OO_Conditional: // not overloadable + case NUM_OVERLOADED_OPERATORS: + case OO_None: + llvm_unreachable("Not an overloadable operator"); + } + llvm_unreachable("Unknown OverloadedOperatorKind enum"); +} + +/// Gets the range of declarator as defined by the C++ grammar. E.g. +/// `int a;` -> range of `a`, +/// `int *a;` -> range of `*a`, +/// `int a[10];` -> range of `a[10]`, +/// `int a[1][2][3];` -> range of `a[1][2][3]`, +/// `int *a = nullptr` -> range of `*a = nullptr`. +/// FIMXE: \p Name must be a source range, e.g. for `operator+`. +static SourceRange getDeclaratorRange(const SourceManager &SM, TypeLoc T, + SourceLocation Name, + SourceRange Initializer) { + SourceLocation Start = GetStartLoc().Visit(T); + SourceLocation End = T.getSourceRange().getEnd(); + assert(End.isValid()); + if (Name.isValid()) { + if (Start.isInvalid()) + Start = Name; + if (SM.isBeforeInTranslationUnit(End, Name)) + End = Name; + } + if (Initializer.isValid()) { + auto InitializerEnd = Initializer.getEnd(); + assert(SM.isBeforeInTranslationUnit(End, InitializerEnd) || + End == InitializerEnd); + End = InitializerEnd; + } + return SourceRange(Start, End); +} + +namespace { +/// All AST hierarchy roots that can be represented as pointers. +using ASTPtr = llvm::PointerUnion<Stmt *, Decl *>; +/// Maintains a mapping from AST to syntax tree nodes. This class will get more +/// complicated as we support more kinds of AST nodes, e.g. TypeLocs. +/// FIXME: expose this as public API. +class ASTToSyntaxMapping { +public: + void add(ASTPtr From, syntax::Tree *To) { + assert(To != nullptr); + assert(!From.isNull()); + + bool Added = Nodes.insert({From, To}).second; + (void)Added; + assert(Added && "mapping added twice"); + } + + syntax::Tree *find(ASTPtr P) const { return Nodes.lookup(P); } + +private: + llvm::DenseMap<ASTPtr, syntax::Tree *> Nodes; +}; +} // namespace + /// A helper class for constructing the syntax tree while traversing a clang /// AST. /// @@ -57,30 +271,44 @@ public: } llvm::BumpPtrAllocator &allocator() { return Arena.allocator(); } + const SourceManager &sourceManager() const { return Arena.sourceManager(); } /// Populate children for \p New node, assuming it covers tokens from \p /// Range. - void foldNode(llvm::ArrayRef<syntax::Token> Range, syntax::Tree *New); - - /// Must be called with the range of each `DeclaratorDecl`. Ensures the - /// corresponding declarator nodes are covered by `SimpleDeclaration`. - void noticeDeclaratorRange(llvm::ArrayRef<syntax::Token> Range); + void foldNode(llvm::ArrayRef<syntax::Token> Range, syntax::Tree *New, + ASTPtr From) { + assert(New); + Pending.foldChildren(Arena, Range, New); + if (From) + Mapping.add(From, New); + } + void foldNode(llvm::ArrayRef<syntax::Token> Range, syntax::Tree *New, + TypeLoc L) { + // FIXME: add mapping for TypeLocs + foldNode(Range, New, nullptr); + } /// Notifies that we should not consume trailing semicolon when computing /// token range of \p D. - void noticeDeclaratorWithoutSemicolon(Decl *D); + void noticeDeclWithoutSemicolon(Decl *D); /// Mark the \p Child node with a corresponding \p Role. All marked children /// should be consumed by foldNode. - /// (!) when called on expressions (clang::Expr is derived from clang::Stmt), - /// wraps expressions into expression statement. + /// When called on expressions (clang::Expr is derived from clang::Stmt), + /// wraps expressions into expression statement. void markStmtChild(Stmt *Child, NodeRole Role); /// Should be called for expressions in non-statement position to avoid /// wrapping into expression statement. void markExprChild(Expr *Child, NodeRole Role); - /// Set role for a token starting at \p Loc. void markChildToken(SourceLocation Loc, NodeRole R); + /// Set role for \p T. + void markChildToken(const syntax::Token *T, NodeRole R); + + /// Set role for \p N. + void markChild(syntax::Node *N, NodeRole R); + /// Set role for the syntax node matching \p N. + void markChild(ASTPtr N, NodeRole R); /// Finish building the tree and consume the root node. syntax::TranslationUnit *finalize() && { @@ -97,8 +325,16 @@ public: return TU; } - /// getRange() finds the syntax tokens corresponding to the passed source - /// locations. + /// Finds a token starting at \p L. The token must exist if \p L is valid. + const syntax::Token *findToken(SourceLocation L) const; + + /// Finds the syntax tokens corresponding to the \p SourceRange. + llvm::ArrayRef<syntax::Token> getRange(SourceRange Range) const { + assert(Range.isValid()); + return getRange(Range.getBegin(), Range.getEnd()); + } + + /// Finds the syntax tokens corresponding to the passed source locations. /// \p First is the start position of the first token and \p Last is the start /// position of the last token. llvm::ArrayRef<syntax::Token> getRange(SourceLocation First, @@ -109,23 +345,62 @@ public: Arena.sourceManager().isBeforeInTranslationUnit(First, Last)); return llvm::makeArrayRef(findToken(First), std::next(findToken(Last))); } - llvm::ArrayRef<syntax::Token> getRange(const Decl *D) const { - auto Tokens = getRange(D->getBeginLoc(), D->getEndLoc()); - if (llvm::isa<NamespaceDecl>(D)) - return Tokens; - if (DeclsWithoutSemicolons.count(D)) - return Tokens; - // FIXME: do not consume trailing semicolon on function definitions. - // Most declarations own a semicolon in syntax trees, but not in clang AST. - return withTrailingSemicolon(Tokens); + + llvm::ArrayRef<syntax::Token> + getTemplateRange(const ClassTemplateSpecializationDecl *D) const { + auto Tokens = getRange(D->getSourceRange()); + return maybeAppendSemicolon(Tokens, D); } + + /// Returns true if \p D is the last declarator in a chain and is thus + /// reponsible for creating SimpleDeclaration for the whole chain. + template <class T> + bool isResponsibleForCreatingDeclaration(const T *D) const { + static_assert((std::is_base_of<DeclaratorDecl, T>::value || + std::is_base_of<TypedefNameDecl, T>::value), + "only DeclaratorDecl and TypedefNameDecl are supported."); + + const Decl *Next = D->getNextDeclInContext(); + + // There's no next sibling, this one is responsible. + if (Next == nullptr) { + return true; + } + const auto *NextT = llvm::dyn_cast<T>(Next); + + // Next sibling is not the same type, this one is responsible. + if (NextT == nullptr) { + return true; + } + // Next sibling doesn't begin at the same loc, it must be a different + // declaration, so this declarator is responsible. + if (NextT->getBeginLoc() != D->getBeginLoc()) { + return true; + } + + // NextT is a member of the same declaration, and we need the last member to + // create declaration. This one is not responsible. + return false; + } + + llvm::ArrayRef<syntax::Token> getDeclarationRange(Decl *D) { + llvm::ArrayRef<clang::syntax::Token> Tokens; + // We want to drop the template parameters for specializations. + if (const auto *S = llvm::dyn_cast<TagDecl>(D)) + Tokens = getRange(S->TypeDecl::getBeginLoc(), S->getEndLoc()); + else + Tokens = getRange(D->getSourceRange()); + return maybeAppendSemicolon(Tokens, D); + } + llvm::ArrayRef<syntax::Token> getExprRange(const Expr *E) const { - return getRange(E->getBeginLoc(), E->getEndLoc()); + return getRange(E->getSourceRange()); } + /// Find the adjusted range for the statement, consuming the trailing /// semicolon when needed. llvm::ArrayRef<syntax::Token> getStmtRange(const Stmt *S) const { - auto Tokens = getRange(S->getBeginLoc(), S->getEndLoc()); + auto Tokens = getRange(S->getSourceRange()); if (isa<CompoundStmt>(S)) return Tokens; @@ -138,17 +413,31 @@ public: private: llvm::ArrayRef<syntax::Token> + maybeAppendSemicolon(llvm::ArrayRef<syntax::Token> Tokens, + const Decl *D) const { + if (llvm::isa<NamespaceDecl>(D)) + return Tokens; + if (DeclsWithoutSemicolons.count(D)) + return Tokens; + // FIXME: do not consume trailing semicolon on function definitions. + // Most declarations own a semicolon in syntax trees, but not in clang AST. + return withTrailingSemicolon(Tokens); + } + + llvm::ArrayRef<syntax::Token> withTrailingSemicolon(llvm::ArrayRef<syntax::Token> Tokens) const { assert(!Tokens.empty()); assert(Tokens.back().kind() != tok::eof); - // (!) we never consume 'eof', so looking at the next token is ok. + // We never consume 'eof', so looking at the next token is ok. if (Tokens.back().kind() != tok::semi && Tokens.end()->kind() == tok::semi) return llvm::makeArrayRef(Tokens.begin(), Tokens.end() + 1); return Tokens; } - /// Finds a token starting at \p L. The token must exist. - const syntax::Token *findToken(SourceLocation L) const; + void setRole(syntax::Node *N, NodeRole R) { + assert(N->role() == NodeRole::Detached); + N->setRole(R); + } /// A collection of trees covering the input tokens. /// When created, each tree corresponds to a single token in the file. @@ -166,12 +455,10 @@ private: auto *L = new (A.allocator()) syntax::Leaf(&T); L->Original = true; L->CanModify = A.tokenBuffer().spelledForExpanded(T).hasValue(); - Trees.insert(Trees.end(), {&T, NodeAndRole{L}}); + Trees.insert(Trees.end(), {&T, L}); } } - ~Forest() { assert(DelayedFolds.empty()); } - void assignRole(llvm::ArrayRef<syntax::Token> Range, syntax::NodeRole Role) { assert(!Range.empty()); @@ -181,56 +468,49 @@ private: assert((std::next(It) == Trees.end() || std::next(It)->first == Range.end()) && "no child with the specified range"); - It->second.Role = Role; + assert(It->second->role() == NodeRole::Detached && + "re-assigning role for a child"); + It->second->setRole(Role); } /// Add \p Node to the forest and attach child nodes based on \p Tokens. void foldChildren(const syntax::Arena &A, llvm::ArrayRef<syntax::Token> Tokens, syntax::Tree *Node) { - // Execute delayed folds inside `Tokens`. - auto BeginExecuted = DelayedFolds.lower_bound(Tokens.begin()); - auto It = BeginExecuted; - for (; It != DelayedFolds.end() && It->second.End <= Tokens.end(); ++It) - foldChildrenEager(A, llvm::makeArrayRef(It->first, It->second.End), - It->second.Node); - DelayedFolds.erase(BeginExecuted, It); - // Attach children to `Node`. - foldChildrenEager(A, Tokens, Node); - } + assert(Node->firstChild() == nullptr && "node already has children"); - /// Schedule a call to `foldChildren` that will only be executed when - /// containing node is folded. The range of delayed nodes can be extended by - /// calling `extendDelayedFold`. Only one delayed node for each starting - /// token is allowed. - void foldChildrenDelayed(llvm::ArrayRef<syntax::Token> Tokens, - syntax::Tree *Node) { - assert(!Tokens.empty()); - bool Inserted = - DelayedFolds.insert({Tokens.begin(), DelayedFold{Tokens.end(), Node}}) - .second; - (void)Inserted; - assert(Inserted && "Multiple delayed folds start at the same token"); - } + auto *FirstToken = Tokens.begin(); + auto BeginChildren = Trees.lower_bound(FirstToken); - /// If there a delayed fold, starting at `ExtendedRange.begin()`, extends - /// its endpoint to `ExtendedRange.end()` and returns true. - /// Otherwise, returns false. - bool extendDelayedFold(llvm::ArrayRef<syntax::Token> ExtendedRange) { - assert(!ExtendedRange.empty()); - auto It = DelayedFolds.find(ExtendedRange.data()); - if (It == DelayedFolds.end()) - return false; - assert(It->second.End <= ExtendedRange.end()); - It->second.End = ExtendedRange.end(); - return true; + assert((BeginChildren == Trees.end() || + BeginChildren->first == FirstToken) && + "fold crosses boundaries of existing subtrees"); + auto EndChildren = Trees.lower_bound(Tokens.end()); + assert( + (EndChildren == Trees.end() || EndChildren->first == Tokens.end()) && + "fold crosses boundaries of existing subtrees"); + + // We need to go in reverse order, because we can only prepend. + for (auto It = EndChildren; It != BeginChildren; --It) { + auto *C = std::prev(It)->second; + if (C->role() == NodeRole::Detached) + C->setRole(NodeRole::Unknown); + Node->prependChildLowLevel(C); + } + + // Mark that this node came from the AST and is backed by the source code. + Node->Original = true; + Node->CanModify = A.tokenBuffer().spelledForExpanded(Tokens).hasValue(); + + Trees.erase(BeginChildren, EndChildren); + Trees.insert({FirstToken, Node}); } // EXPECTS: all tokens were consumed and are owned by a single root node. syntax::Node *finalize() && { assert(Trees.size() == 1); - auto *Root = Trees.begin()->second.Node; + auto *Root = Trees.begin()->second; Trees = {}; return Root; } @@ -243,66 +523,19 @@ private: ? (std::next(It)->first - It->first) : A.tokenBuffer().expandedTokens().end() - It->first; - R += llvm::formatv("- '{0}' covers '{1}'+{2} tokens\n", - It->second.Node->kind(), - It->first->text(A.sourceManager()), CoveredTokens); - R += It->second.Node->dump(A); + R += std::string(llvm::formatv( + "- '{0}' covers '{1}'+{2} tokens\n", It->second->kind(), + It->first->text(A.sourceManager()), CoveredTokens)); + R += It->second->dump(A); } return R; } private: - /// Implementation detail of `foldChildren`, does acutal folding ignoring - /// delayed folds. - void foldChildrenEager(const syntax::Arena &A, - llvm::ArrayRef<syntax::Token> Tokens, - syntax::Tree *Node) { - assert(Node->firstChild() == nullptr && "node already has children"); - - auto *FirstToken = Tokens.begin(); - auto BeginChildren = Trees.lower_bound(FirstToken); - assert((BeginChildren == Trees.end() || - BeginChildren->first == FirstToken) && - "fold crosses boundaries of existing subtrees"); - auto EndChildren = Trees.lower_bound(Tokens.end()); - assert( - (EndChildren == Trees.end() || EndChildren->first == Tokens.end()) && - "fold crosses boundaries of existing subtrees"); - - // (!) we need to go in reverse order, because we can only prepend. - for (auto It = EndChildren; It != BeginChildren; --It) - Node->prependChildLowLevel(std::prev(It)->second.Node, - std::prev(It)->second.Role); - - // Mark that this node came from the AST and is backed by the source code. - Node->Original = true; - Node->CanModify = A.tokenBuffer().spelledForExpanded(Tokens).hasValue(); - - Trees.erase(BeginChildren, EndChildren); - Trees.insert({FirstToken, NodeAndRole(Node)}); - } - /// A with a role that should be assigned to it when adding to a parent. - struct NodeAndRole { - explicit NodeAndRole(syntax::Node *Node) - : Node(Node), Role(NodeRole::Unknown) {} - - syntax::Node *Node; - NodeRole Role; - }; - /// Maps from the start token to a subtree starting at that token. /// Keys in the map are pointers into the array of expanded tokens, so /// pointer order corresponds to the order of preprocessor tokens. - /// FIXME: storing the end tokens is redundant. - /// FIXME: the key of a map is redundant, it is also stored in NodeForRange. - std::map<const syntax::Token *, NodeAndRole> Trees; - - /// See documentation of `foldChildrenDelayed` for details. - struct DelayedFold { - const syntax::Token *End = nullptr; - syntax::Tree *Node = nullptr; - }; - std::map<const syntax::Token *, DelayedFold> DelayedFolds; + std::map<const syntax::Token *, syntax::Node *> Trees; }; /// For debugging purposes. @@ -314,49 +547,91 @@ private: LocationToToken; Forest Pending; llvm::DenseSet<Decl *> DeclsWithoutSemicolons; + ASTToSyntaxMapping Mapping; }; namespace { class BuildTreeVisitor : public RecursiveASTVisitor<BuildTreeVisitor> { public: - explicit BuildTreeVisitor(ASTContext &Ctx, syntax::TreeBuilder &Builder) - : Builder(Builder), LangOpts(Ctx.getLangOpts()) {} + explicit BuildTreeVisitor(ASTContext &Context, syntax::TreeBuilder &Builder) + : Builder(Builder), Context(Context) {} bool shouldTraversePostOrder() const { return true; } - bool WalkUpFromDeclaratorDecl(DeclaratorDecl *D) { - // Ensure declarators are covered by SimpleDeclaration. - Builder.noticeDeclaratorRange(Builder.getRange(D)); - // FIXME: build nodes for the declarator too. - return true; + bool WalkUpFromDeclaratorDecl(DeclaratorDecl *DD) { + return processDeclaratorAndDeclaration(DD); } - bool WalkUpFromTypedefNameDecl(TypedefNameDecl *D) { - // Also a declarator. - Builder.noticeDeclaratorRange(Builder.getRange(D)); - // FIXME: build nodes for the declarator too. - return true; + + bool WalkUpFromTypedefNameDecl(TypedefNameDecl *TD) { + return processDeclaratorAndDeclaration(TD); } bool VisitDecl(Decl *D) { assert(!D->isImplicit()); - Builder.foldNode(Builder.getRange(D), - new (allocator()) syntax::UnknownDeclaration()); + Builder.foldNode(Builder.getDeclarationRange(D), + new (allocator()) syntax::UnknownDeclaration(), D); + return true; + } + + // RAV does not call WalkUpFrom* on explicit instantiations, so we have to + // override Traverse. + // FIXME: make RAV call WalkUpFrom* instead. + bool + TraverseClassTemplateSpecializationDecl(ClassTemplateSpecializationDecl *C) { + if (!RecursiveASTVisitor::TraverseClassTemplateSpecializationDecl(C)) + return false; + if (C->isExplicitSpecialization()) + return true; // we are only interested in explicit instantiations. + auto *Declaration = + cast<syntax::SimpleDeclaration>(handleFreeStandingTagDecl(C)); + foldExplicitTemplateInstantiation( + Builder.getTemplateRange(C), Builder.findToken(C->getExternLoc()), + Builder.findToken(C->getTemplateKeywordLoc()), Declaration, C); + return true; + } + + bool WalkUpFromTemplateDecl(TemplateDecl *S) { + foldTemplateDeclaration( + Builder.getDeclarationRange(S), + Builder.findToken(S->getTemplateParameters()->getTemplateLoc()), + Builder.getDeclarationRange(S->getTemplatedDecl()), S); return true; } bool WalkUpFromTagDecl(TagDecl *C) { // FIXME: build the ClassSpecifier node. - if (C->isFreeStanding()) { - // Class is a declaration specifier and needs a spanning declaration node. - Builder.foldNode(Builder.getRange(C), - new (allocator()) syntax::SimpleDeclaration); + if (!C->isFreeStanding()) { + assert(C->getNumTemplateParameterLists() == 0); return true; } + handleFreeStandingTagDecl(C); return true; } + syntax::Declaration *handleFreeStandingTagDecl(TagDecl *C) { + assert(C->isFreeStanding()); + // Class is a declaration specifier and needs a spanning declaration node. + auto DeclarationRange = Builder.getDeclarationRange(C); + syntax::Declaration *Result = new (allocator()) syntax::SimpleDeclaration; + Builder.foldNode(DeclarationRange, Result, nullptr); + + // Build TemplateDeclaration nodes if we had template parameters. + auto ConsumeTemplateParameters = [&](const TemplateParameterList &L) { + const auto *TemplateKW = Builder.findToken(L.getTemplateLoc()); + auto R = llvm::makeArrayRef(TemplateKW, DeclarationRange.end()); + Result = + foldTemplateDeclaration(R, TemplateKW, DeclarationRange, nullptr); + DeclarationRange = R; + }; + if (auto *S = llvm::dyn_cast<ClassTemplatePartialSpecializationDecl>(C)) + ConsumeTemplateParameters(*S->getTemplateParameters()); + for (unsigned I = C->getNumTemplateParameterLists(); 0 < I; --I) + ConsumeTemplateParameters(*C->getTemplateParameterList(I - 1)); + return Result; + } + bool WalkUpFromTranslationUnitDecl(TranslationUnitDecl *TU) { - // (!) we do not want to call VisitDecl(), the declaration for translation + // We do not want to call VisitDecl(), the declaration for translation // unit is built by finalize(). return true; } @@ -370,14 +645,14 @@ public: Builder.markChildToken(S->getRBracLoc(), NodeRole::CloseParen); Builder.foldNode(Builder.getStmtRange(S), - new (allocator()) syntax::CompoundStatement); + new (allocator()) syntax::CompoundStatement, S); return true; } // Some statements are not yet handled by syntax trees. bool WalkUpFromStmt(Stmt *S) { Builder.foldNode(Builder.getStmtRange(S), - new (allocator()) syntax::UnknownStatement); + new (allocator()) syntax::UnknownStatement, S); return true; } @@ -386,27 +661,28 @@ public: // RAV traverses it as a statement, we produce invalid node kinds in that // case. // FIXME: should do this in RAV instead? - if (S->getInit() && !TraverseStmt(S->getInit())) - return false; - if (S->getLoopVariable() && !TraverseDecl(S->getLoopVariable())) - return false; - if (S->getRangeInit() && !TraverseStmt(S->getRangeInit())) - return false; - if (S->getBody() && !TraverseStmt(S->getBody())) - return false; - return true; + bool Result = [&, this]() { + if (S->getInit() && !TraverseStmt(S->getInit())) + return false; + if (S->getLoopVariable() && !TraverseDecl(S->getLoopVariable())) + return false; + if (S->getRangeInit() && !TraverseStmt(S->getRangeInit())) + return false; + if (S->getBody() && !TraverseStmt(S->getBody())) + return false; + return true; + }(); + WalkUpFromCXXForRangeStmt(S); + return Result; } bool TraverseStmt(Stmt *S) { if (auto *DS = llvm::dyn_cast_or_null<DeclStmt>(S)) { // We want to consume the semicolon, make sure SimpleDeclaration does not. for (auto *D : DS->decls()) - Builder.noticeDeclaratorWithoutSemicolon(D); + Builder.noticeDeclWithoutSemicolon(D); } else if (auto *E = llvm::dyn_cast_or_null<Expr>(S)) { - // (!) do not recurse into subexpressions. - // we do not have syntax trees for expressions yet, so we only want to see - // the first top-level expression. - return WalkUpFromExpr(E->IgnoreImplicit()); + return RecursiveASTVisitor::TraverseStmt(E->IgnoreImplicit()); } return RecursiveASTVisitor::TraverseStmt(S); } @@ -415,19 +691,306 @@ public: bool WalkUpFromExpr(Expr *E) { assert(!isImplicitExpr(E) && "should be handled by TraverseStmt"); Builder.foldNode(Builder.getExprRange(E), - new (allocator()) syntax::UnknownExpression); + new (allocator()) syntax::UnknownExpression, E); + return true; + } + + syntax::NestedNameSpecifier * + BuildNestedNameSpecifier(NestedNameSpecifierLoc QualifierLoc) { + if (!QualifierLoc) + return nullptr; + for (auto it = QualifierLoc; it; it = it.getPrefix()) { + auto *NS = new (allocator()) syntax::NameSpecifier; + Builder.foldNode(Builder.getRange(it.getLocalSourceRange()), NS, nullptr); + Builder.markChild(NS, syntax::NodeRole::NestedNameSpecifier_specifier); + } + auto *NNS = new (allocator()) syntax::NestedNameSpecifier; + Builder.foldNode(Builder.getRange(QualifierLoc.getSourceRange()), NNS, + nullptr); + return NNS; + } + + bool TraverseUserDefinedLiteral(UserDefinedLiteral *S) { + // The semantic AST node `UserDefinedLiteral` (UDL) may have one child node + // referencing the location of the UDL suffix (`_w` in `1.2_w`). The + // UDL suffix location does not point to the beginning of a token, so we + // can't represent the UDL suffix as a separate syntax tree node. + + return WalkUpFromUserDefinedLiteral(S); + } + + syntax::UserDefinedLiteralExpression * + buildUserDefinedLiteral(UserDefinedLiteral *S) { + switch (S->getLiteralOperatorKind()) { + case clang::UserDefinedLiteral::LOK_Integer: + return new (allocator()) syntax::IntegerUserDefinedLiteralExpression; + case clang::UserDefinedLiteral::LOK_Floating: + return new (allocator()) syntax::FloatUserDefinedLiteralExpression; + case clang::UserDefinedLiteral::LOK_Character: + return new (allocator()) syntax::CharUserDefinedLiteralExpression; + case clang::UserDefinedLiteral::LOK_String: + return new (allocator()) syntax::StringUserDefinedLiteralExpression; + case clang::UserDefinedLiteral::LOK_Raw: + case clang::UserDefinedLiteral::LOK_Template: + // For raw literal operator and numeric literal operator template we + // cannot get the type of the operand in the semantic AST. We get this + // information from the token. As integer and floating point have the same + // token kind, we run `NumericLiteralParser` again to distinguish them. + auto TokLoc = S->getBeginLoc(); + auto TokSpelling = + Builder.findToken(TokLoc)->text(Context.getSourceManager()); + auto Literal = + NumericLiteralParser(TokSpelling, TokLoc, Context.getSourceManager(), + Context.getLangOpts(), Context.getTargetInfo(), + Context.getDiagnostics()); + if (Literal.isIntegerLiteral()) + return new (allocator()) syntax::IntegerUserDefinedLiteralExpression; + else { + assert(Literal.isFloatingLiteral()); + return new (allocator()) syntax::FloatUserDefinedLiteralExpression; + } + } + llvm_unreachable("Unknown literal operator kind."); + } + + bool WalkUpFromUserDefinedLiteral(UserDefinedLiteral *S) { + Builder.markChildToken(S->getBeginLoc(), syntax::NodeRole::LiteralToken); + Builder.foldNode(Builder.getExprRange(S), buildUserDefinedLiteral(S), S); + return true; + } + + bool WalkUpFromDeclRefExpr(DeclRefExpr *S) { + if (auto *NNS = BuildNestedNameSpecifier(S->getQualifierLoc())) + Builder.markChild(NNS, syntax::NodeRole::IdExpression_qualifier); + + auto *unqualifiedId = new (allocator()) syntax::UnqualifiedId; + // Get `UnqualifiedId` from `DeclRefExpr`. + // FIXME: Extract this logic so that it can be used by `MemberExpr`, + // and other semantic constructs, now it is tied to `DeclRefExpr`. + if (!S->hasExplicitTemplateArgs()) { + Builder.foldNode(Builder.getRange(S->getNameInfo().getSourceRange()), + unqualifiedId, nullptr); + } else { + auto templateIdSourceRange = + SourceRange(S->getNameInfo().getBeginLoc(), S->getRAngleLoc()); + Builder.foldNode(Builder.getRange(templateIdSourceRange), unqualifiedId, + nullptr); + } + Builder.markChild(unqualifiedId, syntax::NodeRole::IdExpression_id); + + Builder.foldNode(Builder.getExprRange(S), + new (allocator()) syntax::IdExpression, S); + return true; + } + + bool WalkUpFromParenExpr(ParenExpr *S) { + Builder.markChildToken(S->getLParen(), syntax::NodeRole::OpenParen); + Builder.markExprChild(S->getSubExpr(), + syntax::NodeRole::ParenExpression_subExpression); + Builder.markChildToken(S->getRParen(), syntax::NodeRole::CloseParen); + Builder.foldNode(Builder.getExprRange(S), + new (allocator()) syntax::ParenExpression, S); + return true; + } + + bool WalkUpFromIntegerLiteral(IntegerLiteral *S) { + Builder.markChildToken(S->getLocation(), syntax::NodeRole::LiteralToken); + Builder.foldNode(Builder.getExprRange(S), + new (allocator()) syntax::IntegerLiteralExpression, S); + return true; + } + + bool WalkUpFromCharacterLiteral(CharacterLiteral *S) { + Builder.markChildToken(S->getLocation(), syntax::NodeRole::LiteralToken); + Builder.foldNode(Builder.getExprRange(S), + new (allocator()) syntax::CharacterLiteralExpression, S); + return true; + } + + bool WalkUpFromFloatingLiteral(FloatingLiteral *S) { + Builder.markChildToken(S->getLocation(), syntax::NodeRole::LiteralToken); + Builder.foldNode(Builder.getExprRange(S), + new (allocator()) syntax::FloatingLiteralExpression, S); + return true; + } + + bool WalkUpFromStringLiteral(StringLiteral *S) { + Builder.markChildToken(S->getBeginLoc(), syntax::NodeRole::LiteralToken); + Builder.foldNode(Builder.getExprRange(S), + new (allocator()) syntax::StringLiteralExpression, S); + return true; + } + + bool WalkUpFromCXXBoolLiteralExpr(CXXBoolLiteralExpr *S) { + Builder.markChildToken(S->getLocation(), syntax::NodeRole::LiteralToken); + Builder.foldNode(Builder.getExprRange(S), + new (allocator()) syntax::BoolLiteralExpression, S); + return true; + } + + bool WalkUpFromCXXNullPtrLiteralExpr(CXXNullPtrLiteralExpr *S) { + Builder.markChildToken(S->getLocation(), syntax::NodeRole::LiteralToken); + Builder.foldNode(Builder.getExprRange(S), + new (allocator()) syntax::CxxNullPtrExpression, S); + return true; + } + + bool WalkUpFromUnaryOperator(UnaryOperator *S) { + Builder.markChildToken(S->getOperatorLoc(), + syntax::NodeRole::OperatorExpression_operatorToken); + Builder.markExprChild(S->getSubExpr(), + syntax::NodeRole::UnaryOperatorExpression_operand); + + if (S->isPostfix()) + Builder.foldNode(Builder.getExprRange(S), + new (allocator()) syntax::PostfixUnaryOperatorExpression, + S); + else + Builder.foldNode(Builder.getExprRange(S), + new (allocator()) syntax::PrefixUnaryOperatorExpression, + S); + + return true; + } + + bool WalkUpFromBinaryOperator(BinaryOperator *S) { + Builder.markExprChild( + S->getLHS(), syntax::NodeRole::BinaryOperatorExpression_leftHandSide); + Builder.markChildToken(S->getOperatorLoc(), + syntax::NodeRole::OperatorExpression_operatorToken); + Builder.markExprChild( + S->getRHS(), syntax::NodeRole::BinaryOperatorExpression_rightHandSide); + Builder.foldNode(Builder.getExprRange(S), + new (allocator()) syntax::BinaryOperatorExpression, S); return true; } + bool TraverseCXXOperatorCallExpr(CXXOperatorCallExpr *S) { + if (getOperatorNodeKind(*S) == + syntax::NodeKind::PostfixUnaryOperatorExpression) { + // A postfix unary operator is declared as taking two operands. The + // second operand is used to distinguish from its prefix counterpart. In + // the semantic AST this "phantom" operand is represented as a + // `IntegerLiteral` with invalid `SourceLocation`. We skip visiting this + // operand because it does not correspond to anything written in source + // code + for (auto *child : S->children()) { + if (child->getSourceRange().isInvalid()) + continue; + if (!TraverseStmt(child)) + return false; + } + return WalkUpFromCXXOperatorCallExpr(S); + } else + return RecursiveASTVisitor::TraverseCXXOperatorCallExpr(S); + } + + bool WalkUpFromCXXOperatorCallExpr(CXXOperatorCallExpr *S) { + switch (getOperatorNodeKind(*S)) { + case syntax::NodeKind::BinaryOperatorExpression: + Builder.markExprChild( + S->getArg(0), + syntax::NodeRole::BinaryOperatorExpression_leftHandSide); + Builder.markChildToken( + S->getOperatorLoc(), + syntax::NodeRole::OperatorExpression_operatorToken); + Builder.markExprChild( + S->getArg(1), + syntax::NodeRole::BinaryOperatorExpression_rightHandSide); + Builder.foldNode(Builder.getExprRange(S), + new (allocator()) syntax::BinaryOperatorExpression, S); + return true; + case syntax::NodeKind::PrefixUnaryOperatorExpression: + Builder.markChildToken( + S->getOperatorLoc(), + syntax::NodeRole::OperatorExpression_operatorToken); + Builder.markExprChild(S->getArg(0), + syntax::NodeRole::UnaryOperatorExpression_operand); + Builder.foldNode(Builder.getExprRange(S), + new (allocator()) syntax::PrefixUnaryOperatorExpression, + S); + return true; + case syntax::NodeKind::PostfixUnaryOperatorExpression: + Builder.markChildToken( + S->getOperatorLoc(), + syntax::NodeRole::OperatorExpression_operatorToken); + Builder.markExprChild(S->getArg(0), + syntax::NodeRole::UnaryOperatorExpression_operand); + Builder.foldNode(Builder.getExprRange(S), + new (allocator()) syntax::PostfixUnaryOperatorExpression, + S); + return true; + case syntax::NodeKind::UnknownExpression: + return RecursiveASTVisitor::WalkUpFromCXXOperatorCallExpr(S); + default: + llvm_unreachable("getOperatorNodeKind() does not return this value"); + } + } + bool WalkUpFromNamespaceDecl(NamespaceDecl *S) { - auto Tokens = Builder.getRange(S); + auto Tokens = Builder.getDeclarationRange(S); if (Tokens.front().kind() == tok::coloncolon) { // Handle nested namespace definitions. Those start at '::' token, e.g. // namespace a^::b {} // FIXME: build corresponding nodes for the name of this namespace. return true; } - Builder.foldNode(Tokens, new (allocator()) syntax::NamespaceDefinition); + Builder.foldNode(Tokens, new (allocator()) syntax::NamespaceDefinition, S); + return true; + } + + bool TraverseParenTypeLoc(ParenTypeLoc L) { + // We reverse order of traversal to get the proper syntax structure. + if (!WalkUpFromParenTypeLoc(L)) + return false; + return TraverseTypeLoc(L.getInnerLoc()); + } + + bool WalkUpFromParenTypeLoc(ParenTypeLoc L) { + Builder.markChildToken(L.getLParenLoc(), syntax::NodeRole::OpenParen); + Builder.markChildToken(L.getRParenLoc(), syntax::NodeRole::CloseParen); + Builder.foldNode(Builder.getRange(L.getLParenLoc(), L.getRParenLoc()), + new (allocator()) syntax::ParenDeclarator, L); + return true; + } + + // Declarator chunks, they are produced by type locs and some clang::Decls. + bool WalkUpFromArrayTypeLoc(ArrayTypeLoc L) { + Builder.markChildToken(L.getLBracketLoc(), syntax::NodeRole::OpenParen); + Builder.markExprChild(L.getSizeExpr(), + syntax::NodeRole::ArraySubscript_sizeExpression); + Builder.markChildToken(L.getRBracketLoc(), syntax::NodeRole::CloseParen); + Builder.foldNode(Builder.getRange(L.getLBracketLoc(), L.getRBracketLoc()), + new (allocator()) syntax::ArraySubscript, L); + return true; + } + + bool WalkUpFromFunctionTypeLoc(FunctionTypeLoc L) { + Builder.markChildToken(L.getLParenLoc(), syntax::NodeRole::OpenParen); + for (auto *P : L.getParams()) { + Builder.markChild(P, syntax::NodeRole::ParametersAndQualifiers_parameter); + } + Builder.markChildToken(L.getRParenLoc(), syntax::NodeRole::CloseParen); + Builder.foldNode(Builder.getRange(L.getLParenLoc(), L.getEndLoc()), + new (allocator()) syntax::ParametersAndQualifiers, L); + return true; + } + + bool WalkUpFromFunctionProtoTypeLoc(FunctionProtoTypeLoc L) { + if (!L.getTypePtr()->hasTrailingReturn()) + return WalkUpFromFunctionTypeLoc(L); + + auto *TrailingReturnTokens = BuildTrailingReturn(L); + // Finish building the node for parameters. + Builder.markChild(TrailingReturnTokens, + syntax::NodeRole::ParametersAndQualifiers_trailingReturn); + return WalkUpFromFunctionTypeLoc(L); + } + + bool WalkUpFromMemberPointerTypeLoc(MemberPointerTypeLoc L) { + auto SR = L.getLocalSourceRange(); + Builder.foldNode(Builder.getRange(SR), + new (allocator()) syntax::MemberPointer, L); return true; } @@ -436,13 +999,13 @@ public: // and fold resulting nodes. bool WalkUpFromDeclStmt(DeclStmt *S) { Builder.foldNode(Builder.getStmtRange(S), - new (allocator()) syntax::DeclarationStatement); + new (allocator()) syntax::DeclarationStatement, S); return true; } bool WalkUpFromNullStmt(NullStmt *S) { Builder.foldNode(Builder.getStmtRange(S), - new (allocator()) syntax::EmptyStatement); + new (allocator()) syntax::EmptyStatement, S); return true; } @@ -451,7 +1014,7 @@ public: syntax::NodeRole::IntroducerKeyword); Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); Builder.foldNode(Builder.getStmtRange(S), - new (allocator()) syntax::SwitchStatement); + new (allocator()) syntax::SwitchStatement, S); return true; } @@ -461,7 +1024,7 @@ public: Builder.markExprChild(S->getLHS(), syntax::NodeRole::CaseStatement_value); Builder.markStmtChild(S->getSubStmt(), syntax::NodeRole::BodyStatement); Builder.foldNode(Builder.getStmtRange(S), - new (allocator()) syntax::CaseStatement); + new (allocator()) syntax::CaseStatement, S); return true; } @@ -470,7 +1033,7 @@ public: syntax::NodeRole::IntroducerKeyword); Builder.markStmtChild(S->getSubStmt(), syntax::NodeRole::BodyStatement); Builder.foldNode(Builder.getStmtRange(S), - new (allocator()) syntax::DefaultStatement); + new (allocator()) syntax::DefaultStatement, S); return true; } @@ -483,7 +1046,7 @@ public: Builder.markStmtChild(S->getElse(), syntax::NodeRole::IfStatement_elseStatement); Builder.foldNode(Builder.getStmtRange(S), - new (allocator()) syntax::IfStatement); + new (allocator()) syntax::IfStatement, S); return true; } @@ -491,7 +1054,7 @@ public: Builder.markChildToken(S->getForLoc(), syntax::NodeRole::IntroducerKeyword); Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); Builder.foldNode(Builder.getStmtRange(S), - new (allocator()) syntax::ForStatement); + new (allocator()) syntax::ForStatement, S); return true; } @@ -500,7 +1063,7 @@ public: syntax::NodeRole::IntroducerKeyword); Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); Builder.foldNode(Builder.getStmtRange(S), - new (allocator()) syntax::WhileStatement); + new (allocator()) syntax::WhileStatement, S); return true; } @@ -508,7 +1071,7 @@ public: Builder.markChildToken(S->getContinueLoc(), syntax::NodeRole::IntroducerKeyword); Builder.foldNode(Builder.getStmtRange(S), - new (allocator()) syntax::ContinueStatement); + new (allocator()) syntax::ContinueStatement, S); return true; } @@ -516,7 +1079,7 @@ public: Builder.markChildToken(S->getBreakLoc(), syntax::NodeRole::IntroducerKeyword); Builder.foldNode(Builder.getStmtRange(S), - new (allocator()) syntax::BreakStatement); + new (allocator()) syntax::BreakStatement, S); return true; } @@ -526,7 +1089,7 @@ public: Builder.markExprChild(S->getRetValue(), syntax::NodeRole::ReturnStatement_value); Builder.foldNode(Builder.getStmtRange(S), - new (allocator()) syntax::ReturnStatement); + new (allocator()) syntax::ReturnStatement, S); return true; } @@ -534,13 +1097,13 @@ public: Builder.markChildToken(S->getForLoc(), syntax::NodeRole::IntroducerKeyword); Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); Builder.foldNode(Builder.getStmtRange(S), - new (allocator()) syntax::RangeBasedForStatement); + new (allocator()) syntax::RangeBasedForStatement, S); return true; } bool WalkUpFromEmptyDecl(EmptyDecl *S) { - Builder.foldNode(Builder.getRange(S), - new (allocator()) syntax::EmptyDeclaration); + Builder.foldNode(Builder.getDeclarationRange(S), + new (allocator()) syntax::EmptyDeclaration, S); return true; } @@ -549,76 +1112,175 @@ public: syntax::NodeRole::StaticAssertDeclaration_condition); Builder.markExprChild(S->getMessage(), syntax::NodeRole::StaticAssertDeclaration_message); - Builder.foldNode(Builder.getRange(S), - new (allocator()) syntax::StaticAssertDeclaration); + Builder.foldNode(Builder.getDeclarationRange(S), + new (allocator()) syntax::StaticAssertDeclaration, S); return true; } bool WalkUpFromLinkageSpecDecl(LinkageSpecDecl *S) { - Builder.foldNode(Builder.getRange(S), - new (allocator()) syntax::LinkageSpecificationDeclaration); + Builder.foldNode(Builder.getDeclarationRange(S), + new (allocator()) syntax::LinkageSpecificationDeclaration, + S); return true; } bool WalkUpFromNamespaceAliasDecl(NamespaceAliasDecl *S) { - Builder.foldNode(Builder.getRange(S), - new (allocator()) syntax::NamespaceAliasDefinition); + Builder.foldNode(Builder.getDeclarationRange(S), + new (allocator()) syntax::NamespaceAliasDefinition, S); return true; } bool WalkUpFromUsingDirectiveDecl(UsingDirectiveDecl *S) { - Builder.foldNode(Builder.getRange(S), - new (allocator()) syntax::UsingNamespaceDirective); + Builder.foldNode(Builder.getDeclarationRange(S), + new (allocator()) syntax::UsingNamespaceDirective, S); return true; } bool WalkUpFromUsingDecl(UsingDecl *S) { - Builder.foldNode(Builder.getRange(S), - new (allocator()) syntax::UsingDeclaration); + Builder.foldNode(Builder.getDeclarationRange(S), + new (allocator()) syntax::UsingDeclaration, S); return true; } bool WalkUpFromUnresolvedUsingValueDecl(UnresolvedUsingValueDecl *S) { - Builder.foldNode(Builder.getRange(S), - new (allocator()) syntax::UsingDeclaration); + Builder.foldNode(Builder.getDeclarationRange(S), + new (allocator()) syntax::UsingDeclaration, S); return true; } bool WalkUpFromUnresolvedUsingTypenameDecl(UnresolvedUsingTypenameDecl *S) { - Builder.foldNode(Builder.getRange(S), - new (allocator()) syntax::UsingDeclaration); + Builder.foldNode(Builder.getDeclarationRange(S), + new (allocator()) syntax::UsingDeclaration, S); return true; } bool WalkUpFromTypeAliasDecl(TypeAliasDecl *S) { - Builder.foldNode(Builder.getRange(S), - new (allocator()) syntax::TypeAliasDeclaration); + Builder.foldNode(Builder.getDeclarationRange(S), + new (allocator()) syntax::TypeAliasDeclaration, S); return true; } private: + template <class T> SourceLocation getQualifiedNameStart(T *D) { + static_assert((std::is_base_of<DeclaratorDecl, T>::value || + std::is_base_of<TypedefNameDecl, T>::value), + "only DeclaratorDecl and TypedefNameDecl are supported."); + + auto DN = D->getDeclName(); + bool IsAnonymous = DN.isIdentifier() && !DN.getAsIdentifierInfo(); + if (IsAnonymous) + return SourceLocation(); + + if (const auto *DD = llvm::dyn_cast<DeclaratorDecl>(D)) { + if (DD->getQualifierLoc()) { + return DD->getQualifierLoc().getBeginLoc(); + } + } + + return D->getLocation(); + } + + SourceRange getInitializerRange(Decl *D) { + if (auto *V = llvm::dyn_cast<VarDecl>(D)) { + auto *I = V->getInit(); + // Initializers in range-based-for are not part of the declarator + if (I && !V->isCXXForRangeDecl()) + return I->getSourceRange(); + } + + return SourceRange(); + } + + /// Folds SimpleDeclarator node (if present) and in case this is the last + /// declarator in the chain it also folds SimpleDeclaration node. + template <class T> bool processDeclaratorAndDeclaration(T *D) { + SourceRange Initializer = getInitializerRange(D); + auto Range = getDeclaratorRange(Builder.sourceManager(), + D->getTypeSourceInfo()->getTypeLoc(), + getQualifiedNameStart(D), Initializer); + + // There doesn't have to be a declarator (e.g. `void foo(int)` only has + // declaration, but no declarator). + if (Range.getBegin().isValid()) { + auto *N = new (allocator()) syntax::SimpleDeclarator; + Builder.foldNode(Builder.getRange(Range), N, nullptr); + Builder.markChild(N, syntax::NodeRole::SimpleDeclaration_declarator); + } + + if (Builder.isResponsibleForCreatingDeclaration(D)) { + Builder.foldNode(Builder.getDeclarationRange(D), + new (allocator()) syntax::SimpleDeclaration, D); + } + return true; + } + + /// Returns the range of the built node. + syntax::TrailingReturnType *BuildTrailingReturn(FunctionProtoTypeLoc L) { + assert(L.getTypePtr()->hasTrailingReturn()); + + auto ReturnedType = L.getReturnLoc(); + // Build node for the declarator, if any. + auto ReturnDeclaratorRange = + getDeclaratorRange(this->Builder.sourceManager(), ReturnedType, + /*Name=*/SourceLocation(), + /*Initializer=*/SourceLocation()); + syntax::SimpleDeclarator *ReturnDeclarator = nullptr; + if (ReturnDeclaratorRange.isValid()) { + ReturnDeclarator = new (allocator()) syntax::SimpleDeclarator; + Builder.foldNode(Builder.getRange(ReturnDeclaratorRange), + ReturnDeclarator, nullptr); + } + + // Build node for trailing return type. + auto Return = Builder.getRange(ReturnedType.getSourceRange()); + const auto *Arrow = Return.begin() - 1; + assert(Arrow->kind() == tok::arrow); + auto Tokens = llvm::makeArrayRef(Arrow, Return.end()); + Builder.markChildToken(Arrow, syntax::NodeRole::ArrowToken); + if (ReturnDeclarator) + Builder.markChild(ReturnDeclarator, + syntax::NodeRole::TrailingReturnType_declarator); + auto *R = new (allocator()) syntax::TrailingReturnType; + Builder.foldNode(Tokens, R, L); + return R; + } + + void foldExplicitTemplateInstantiation( + ArrayRef<syntax::Token> Range, const syntax::Token *ExternKW, + const syntax::Token *TemplateKW, + syntax::SimpleDeclaration *InnerDeclaration, Decl *From) { + assert(!ExternKW || ExternKW->kind() == tok::kw_extern); + assert(TemplateKW && TemplateKW->kind() == tok::kw_template); + Builder.markChildToken(ExternKW, syntax::NodeRole::ExternKeyword); + Builder.markChildToken(TemplateKW, syntax::NodeRole::IntroducerKeyword); + Builder.markChild( + InnerDeclaration, + syntax::NodeRole::ExplicitTemplateInstantiation_declaration); + Builder.foldNode( + Range, new (allocator()) syntax::ExplicitTemplateInstantiation, From); + } + + syntax::TemplateDeclaration *foldTemplateDeclaration( + ArrayRef<syntax::Token> Range, const syntax::Token *TemplateKW, + ArrayRef<syntax::Token> TemplatedDeclaration, Decl *From) { + assert(TemplateKW && TemplateKW->kind() == tok::kw_template); + Builder.markChildToken(TemplateKW, syntax::NodeRole::IntroducerKeyword); + + auto *N = new (allocator()) syntax::TemplateDeclaration; + Builder.foldNode(Range, N, From); + Builder.markChild(N, syntax::NodeRole::TemplateDeclaration_declaration); + return N; + } + /// A small helper to save some typing. llvm::BumpPtrAllocator &allocator() { return Builder.allocator(); } syntax::TreeBuilder &Builder; - const LangOptions &LangOpts; + const ASTContext &Context; }; } // namespace -void syntax::TreeBuilder::foldNode(llvm::ArrayRef<syntax::Token> Range, - syntax::Tree *New) { - Pending.foldChildren(Arena, Range, New); -} - -void syntax::TreeBuilder::noticeDeclaratorRange( - llvm::ArrayRef<syntax::Token> Range) { - if (Pending.extendDelayedFold(Range)) - return; - Pending.foldChildrenDelayed(Range, - new (allocator()) syntax::SimpleDeclaration); -} - -void syntax::TreeBuilder::noticeDeclaratorWithoutSemicolon(Decl *D) { +void syntax::TreeBuilder::noticeDeclWithoutSemicolon(Decl *D) { DeclsWithoutSemicolons.insert(D); } @@ -628,31 +1290,55 @@ void syntax::TreeBuilder::markChildToken(SourceLocation Loc, NodeRole Role) { Pending.assignRole(*findToken(Loc), Role); } +void syntax::TreeBuilder::markChildToken(const syntax::Token *T, NodeRole R) { + if (!T) + return; + Pending.assignRole(*T, R); +} + +void syntax::TreeBuilder::markChild(syntax::Node *N, NodeRole R) { + assert(N); + setRole(N, R); +} + +void syntax::TreeBuilder::markChild(ASTPtr N, NodeRole R) { + auto *SN = Mapping.find(N); + assert(SN != nullptr); + setRole(SN, R); +} + void syntax::TreeBuilder::markStmtChild(Stmt *Child, NodeRole Role) { if (!Child) return; - auto Range = getStmtRange(Child); - // This is an expression in a statement position, consume the trailing - // semicolon and form an 'ExpressionStatement' node. - if (auto *E = dyn_cast<Expr>(Child)) { - Pending.assignRole(getExprRange(E), - NodeRole::ExpressionStatement_expression); - // (!) 'getRange(Stmt)' ensures this already covers a trailing semicolon. - Pending.foldChildren(Arena, Range, - new (allocator()) syntax::ExpressionStatement); - } - Pending.assignRole(Range, Role); + syntax::Tree *ChildNode; + if (Expr *ChildExpr = dyn_cast<Expr>(Child)) { + // This is an expression in a statement position, consume the trailing + // semicolon and form an 'ExpressionStatement' node. + markExprChild(ChildExpr, NodeRole::ExpressionStatement_expression); + ChildNode = new (allocator()) syntax::ExpressionStatement; + // (!) 'getStmtRange()' ensures this covers a trailing semicolon. + Pending.foldChildren(Arena, getStmtRange(Child), ChildNode); + } else { + ChildNode = Mapping.find(Child); + } + assert(ChildNode != nullptr); + setRole(ChildNode, Role); } void syntax::TreeBuilder::markExprChild(Expr *Child, NodeRole Role) { if (!Child) return; + Child = Child->IgnoreImplicit(); - Pending.assignRole(getExprRange(Child), Role); + syntax::Tree *ChildNode = Mapping.find(Child); + assert(ChildNode != nullptr); + setRole(ChildNode, Role); } const syntax::Token *syntax::TreeBuilder::findToken(SourceLocation L) const { + if (L.isInvalid()) + return nullptr; auto It = LocationToToken.find(L.getRawEncoding()); assert(It != LocationToToken.end()); return It->second; diff --git a/clang/lib/Tooling/Syntax/Mutations.cpp b/clang/lib/Tooling/Syntax/Mutations.cpp index 72458528202e..24048b297a11 100644 --- a/clang/lib/Tooling/Syntax/Mutations.cpp +++ b/clang/lib/Tooling/Syntax/Mutations.cpp @@ -35,7 +35,7 @@ public: assert(!New->isDetached()); assert(Role != NodeRole::Detached); - New->Role = static_cast<unsigned>(Role); + New->setRole(Role); auto *P = Anchor->parent(); P->replaceChildRangeLowLevel(Anchor, Anchor, New); diff --git a/clang/lib/Tooling/Syntax/Nodes.cpp b/clang/lib/Tooling/Syntax/Nodes.cpp index 5b0c5107c134..2435ae0a91dd 100644 --- a/clang/lib/Tooling/Syntax/Nodes.cpp +++ b/clang/lib/Tooling/Syntax/Nodes.cpp @@ -18,6 +18,38 @@ llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS, NodeKind K) { return OS << "TranslationUnit"; case NodeKind::UnknownExpression: return OS << "UnknownExpression"; + case NodeKind::ParenExpression: + return OS << "ParenExpression"; + case NodeKind::IntegerLiteralExpression: + return OS << "IntegerLiteralExpression"; + case NodeKind::CharacterLiteralExpression: + return OS << "CharacterLiteralExpression"; + case NodeKind::FloatingLiteralExpression: + return OS << "FloatingLiteralExpression"; + case NodeKind::StringLiteralExpression: + return OS << "StringLiteralExpression"; + case NodeKind::BoolLiteralExpression: + return OS << "BoolLiteralExpression"; + case NodeKind::CxxNullPtrExpression: + return OS << "CxxNullPtrExpression"; + case NodeKind::IntegerUserDefinedLiteralExpression: + return OS << "IntegerUserDefinedLiteralExpression"; + case NodeKind::FloatUserDefinedLiteralExpression: + return OS << "FloatUserDefinedLiteralExpression"; + case NodeKind::CharUserDefinedLiteralExpression: + return OS << "CharUserDefinedLiteralExpression"; + case NodeKind::StringUserDefinedLiteralExpression: + return OS << "StringUserDefinedLiteralExpression"; + case NodeKind::PrefixUnaryOperatorExpression: + return OS << "PrefixUnaryOperatorExpression"; + case NodeKind::PostfixUnaryOperatorExpression: + return OS << "PostfixUnaryOperatorExpression"; + case NodeKind::BinaryOperatorExpression: + return OS << "BinaryOperatorExpression"; + case NodeKind::UnqualifiedId: + return OS << "UnqualifiedId"; + case NodeKind::IdExpression: + return OS << "IdExpression"; case NodeKind::UnknownStatement: return OS << "UnknownStatement"; case NodeKind::DeclarationStatement: @@ -58,6 +90,10 @@ llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS, NodeKind K) { return OS << "LinkageSpecificationDeclaration"; case NodeKind::SimpleDeclaration: return OS << "SimpleDeclaration"; + case NodeKind::TemplateDeclaration: + return OS << "TemplateDeclaration"; + case NodeKind::ExplicitTemplateInstantiation: + return OS << "ExplicitTemplateInstantiation"; case NodeKind::NamespaceDefinition: return OS << "NamespaceDefinition"; case NodeKind::NamespaceAliasDefinition: @@ -68,6 +104,22 @@ llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS, NodeKind K) { return OS << "UsingDeclaration"; case NodeKind::TypeAliasDeclaration: return OS << "TypeAliasDeclaration"; + case NodeKind::SimpleDeclarator: + return OS << "SimpleDeclarator"; + case NodeKind::ParenDeclarator: + return OS << "ParenDeclarator"; + case NodeKind::ArraySubscript: + return OS << "ArraySubscript"; + case NodeKind::TrailingReturnType: + return OS << "TrailingReturnType"; + case NodeKind::ParametersAndQualifiers: + return OS << "ParametersAndQualifiers"; + case NodeKind::MemberPointer: + return OS << "MemberPointer"; + case NodeKind::NameSpecifier: + return OS << "NameSpecifier"; + case NodeKind::NestedNameSpecifier: + return OS << "NestedNameSpecifier"; } llvm_unreachable("unknown node kind"); } @@ -84,6 +136,12 @@ llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS, NodeRole R) { return OS << "CloseParen"; case syntax::NodeRole::IntroducerKeyword: return OS << "IntroducerKeyword"; + case syntax::NodeRole::LiteralToken: + return OS << "LiteralToken"; + case syntax::NodeRole::ArrowToken: + return OS << "ArrowToken"; + case syntax::NodeRole::ExternKeyword: + return OS << "ExternKeyword"; case syntax::NodeRole::BodyStatement: return OS << "BodyStatement"; case syntax::NodeRole::CaseStatement_value: @@ -94,6 +152,14 @@ llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS, NodeRole R) { return OS << "IfStatement_elseKeyword"; case syntax::NodeRole::IfStatement_elseStatement: return OS << "IfStatement_elseStatement"; + case syntax::NodeRole::OperatorExpression_operatorToken: + return OS << "OperatorExpression_operatorToken"; + case syntax::NodeRole::UnaryOperatorExpression_operand: + return OS << "UnaryOperatorExpression_operand"; + case syntax::NodeRole::BinaryOperatorExpression_leftHandSide: + return OS << "BinaryOperatorExpression_leftHandSide"; + case syntax::NodeRole::BinaryOperatorExpression_rightHandSide: + return OS << "BinaryOperatorExpression_rightHandSide"; case syntax::NodeRole::ReturnStatement_value: return OS << "ReturnStatement_value"; case syntax::NodeRole::ExpressionStatement_expression: @@ -104,10 +170,126 @@ llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS, NodeRole R) { return OS << "StaticAssertDeclaration_condition"; case syntax::NodeRole::StaticAssertDeclaration_message: return OS << "StaticAssertDeclaration_message"; + case syntax::NodeRole::SimpleDeclaration_declarator: + return OS << "SimpleDeclaration_declarator"; + case syntax::NodeRole::TemplateDeclaration_declaration: + return OS << "TemplateDeclaration_declaration"; + case syntax::NodeRole::ExplicitTemplateInstantiation_declaration: + return OS << "ExplicitTemplateInstantiation_declaration"; + case syntax::NodeRole::ArraySubscript_sizeExpression: + return OS << "ArraySubscript_sizeExpression"; + case syntax::NodeRole::TrailingReturnType_declarator: + return OS << "TrailingReturnType_declarator"; + case syntax::NodeRole::ParametersAndQualifiers_parameter: + return OS << "ParametersAndQualifiers_parameter"; + case syntax::NodeRole::ParametersAndQualifiers_trailingReturn: + return OS << "ParametersAndQualifiers_trailingReturn"; + case syntax::NodeRole::IdExpression_id: + return OS << "IdExpression_id"; + case syntax::NodeRole::IdExpression_qualifier: + return OS << "IdExpression_qualifier"; + case syntax::NodeRole::NestedNameSpecifier_specifier: + return OS << "NestedNameSpecifier_specifier"; + case syntax::NodeRole::ParenExpression_subExpression: + return OS << "ParenExpression_subExpression"; } llvm_unreachable("invalid role"); } +std::vector<syntax::NameSpecifier *> syntax::NestedNameSpecifier::specifiers() { + std::vector<syntax::NameSpecifier *> Children; + for (auto *C = firstChild(); C; C = C->nextSibling()) { + assert(C->role() == syntax::NodeRole::NestedNameSpecifier_specifier); + Children.push_back(llvm::cast<syntax::NameSpecifier>(C)); + } + return Children; +} + +syntax::NestedNameSpecifier *syntax::IdExpression::qualifier() { + return llvm::cast_or_null<syntax::NestedNameSpecifier>( + findChild(syntax::NodeRole::IdExpression_qualifier)); +} + +syntax::UnqualifiedId *syntax::IdExpression::unqualifiedId() { + return llvm::cast_or_null<syntax::UnqualifiedId>( + findChild(syntax::NodeRole::IdExpression_id)); +} + +syntax::Leaf *syntax::ParenExpression::openParen() { + return llvm::cast_or_null<syntax::Leaf>( + findChild(syntax::NodeRole::OpenParen)); +} + +syntax::Expression *syntax::ParenExpression::subExpression() { + return llvm::cast_or_null<syntax::Expression>( + findChild(syntax::NodeRole::ParenExpression_subExpression)); +} + +syntax::Leaf *syntax::ParenExpression::closeParen() { + return llvm::cast_or_null<syntax::Leaf>( + findChild(syntax::NodeRole::CloseParen)); +} + +syntax::Leaf *syntax::IntegerLiteralExpression::literalToken() { + return llvm::cast_or_null<syntax::Leaf>( + findChild(syntax::NodeRole::LiteralToken)); +} + +syntax::Leaf *syntax::CharacterLiteralExpression::literalToken() { + return llvm::cast_or_null<syntax::Leaf>( + findChild(syntax::NodeRole::LiteralToken)); +} + +syntax::Leaf *syntax::FloatingLiteralExpression::literalToken() { + return llvm::cast_or_null<syntax::Leaf>( + findChild(syntax::NodeRole::LiteralToken)); +} + +syntax::Leaf *syntax::StringLiteralExpression::literalToken() { + return llvm::cast_or_null<syntax::Leaf>( + findChild(syntax::NodeRole::LiteralToken)); +} + +syntax::Leaf *syntax::BoolLiteralExpression::literalToken() { + return llvm::cast_or_null<syntax::Leaf>( + findChild(syntax::NodeRole::LiteralToken)); +} + +syntax::Leaf *syntax::CxxNullPtrExpression::nullPtrKeyword() { + return llvm::cast_or_null<syntax::Leaf>( + findChild(syntax::NodeRole::LiteralToken)); +} + +syntax::Leaf *syntax::UserDefinedLiteralExpression::literalToken() { + return llvm::cast_or_null<syntax::Leaf>( + findChild(syntax::NodeRole::LiteralToken)); +} + +syntax::Expression *syntax::BinaryOperatorExpression::lhs() { + return llvm::cast_or_null<syntax::Expression>( + findChild(syntax::NodeRole::BinaryOperatorExpression_leftHandSide)); +} + +syntax::Leaf *syntax::UnaryOperatorExpression::operatorToken() { + return llvm::cast_or_null<syntax::Leaf>( + findChild(syntax::NodeRole::OperatorExpression_operatorToken)); +} + +syntax::Expression *syntax::UnaryOperatorExpression::operand() { + return llvm::cast_or_null<syntax::Expression>( + findChild(syntax::NodeRole::UnaryOperatorExpression_operand)); +} + +syntax::Leaf *syntax::BinaryOperatorExpression::operatorToken() { + return llvm::cast_or_null<syntax::Leaf>( + findChild(syntax::NodeRole::OperatorExpression_operatorToken)); +} + +syntax::Expression *syntax::BinaryOperatorExpression::rhs() { + return llvm::cast_or_null<syntax::Expression>( + findChild(syntax::NodeRole::BinaryOperatorExpression_rightHandSide)); +} + syntax::Leaf *syntax::SwitchStatement::switchKeyword() { return llvm::cast_or_null<syntax::Leaf>( findChild(syntax::NodeRole::IntroducerKeyword)); @@ -226,8 +408,8 @@ syntax::Leaf *syntax::CompoundStatement::lbrace() { std::vector<syntax::Statement *> syntax::CompoundStatement::statements() { std::vector<syntax::Statement *> Children; for (auto *C = firstChild(); C; C = C->nextSibling()) { - if (C->role() == syntax::NodeRole::CompoundStatement_statement) - Children.push_back(llvm::cast<syntax::Statement>(C)); + assert(C->role() == syntax::NodeRole::CompoundStatement_statement); + Children.push_back(llvm::cast<syntax::Statement>(C)); } return Children; } @@ -246,3 +428,98 @@ syntax::Expression *syntax::StaticAssertDeclaration::message() { return llvm::cast_or_null<syntax::Expression>( findChild(syntax::NodeRole::StaticAssertDeclaration_message)); } + +std::vector<syntax::SimpleDeclarator *> +syntax::SimpleDeclaration::declarators() { + std::vector<syntax::SimpleDeclarator *> Children; + for (auto *C = firstChild(); C; C = C->nextSibling()) { + if (C->role() == syntax::NodeRole::SimpleDeclaration_declarator) + Children.push_back(llvm::cast<syntax::SimpleDeclarator>(C)); + } + return Children; +} + +syntax::Leaf *syntax::TemplateDeclaration::templateKeyword() { + return llvm::cast_or_null<syntax::Leaf>( + findChild(syntax::NodeRole::IntroducerKeyword)); +} + +syntax::Declaration *syntax::TemplateDeclaration::declaration() { + return llvm::cast_or_null<syntax::Declaration>( + findChild(syntax::NodeRole::TemplateDeclaration_declaration)); +} + +syntax::Leaf *syntax::ExplicitTemplateInstantiation::templateKeyword() { + return llvm::cast_or_null<syntax::Leaf>( + findChild(syntax::NodeRole::IntroducerKeyword)); +} + +syntax::Leaf *syntax::ExplicitTemplateInstantiation::externKeyword() { + return llvm::cast_or_null<syntax::Leaf>( + findChild(syntax::NodeRole::ExternKeyword)); +} + +syntax::Declaration *syntax::ExplicitTemplateInstantiation::declaration() { + return llvm::cast_or_null<syntax::Declaration>( + findChild(syntax::NodeRole::ExplicitTemplateInstantiation_declaration)); +} + +syntax::Leaf *syntax::ParenDeclarator::lparen() { + return llvm::cast_or_null<syntax::Leaf>( + findChild(syntax::NodeRole::OpenParen)); +} + +syntax::Leaf *syntax::ParenDeclarator::rparen() { + return llvm::cast_or_null<syntax::Leaf>( + findChild(syntax::NodeRole::CloseParen)); +} + +syntax::Leaf *syntax::ArraySubscript::lbracket() { + return llvm::cast_or_null<syntax::Leaf>( + findChild(syntax::NodeRole::OpenParen)); +} + +syntax::Expression *syntax::ArraySubscript::sizeExpression() { + return llvm::cast_or_null<syntax::Expression>( + findChild(syntax::NodeRole::ArraySubscript_sizeExpression)); +} + +syntax::Leaf *syntax::ArraySubscript::rbracket() { + return llvm::cast_or_null<syntax::Leaf>( + findChild(syntax::NodeRole::CloseParen)); +} + +syntax::Leaf *syntax::TrailingReturnType::arrowToken() { + return llvm::cast_or_null<syntax::Leaf>( + findChild(syntax::NodeRole::ArrowToken)); +} + +syntax::SimpleDeclarator *syntax::TrailingReturnType::declarator() { + return llvm::cast_or_null<syntax::SimpleDeclarator>( + findChild(syntax::NodeRole::TrailingReturnType_declarator)); +} + +syntax::Leaf *syntax::ParametersAndQualifiers::lparen() { + return llvm::cast_or_null<syntax::Leaf>( + findChild(syntax::NodeRole::OpenParen)); +} + +std::vector<syntax::SimpleDeclaration *> +syntax::ParametersAndQualifiers::parameters() { + std::vector<syntax::SimpleDeclaration *> Children; + for (auto *C = firstChild(); C; C = C->nextSibling()) { + if (C->role() == syntax::NodeRole::ParametersAndQualifiers_parameter) + Children.push_back(llvm::cast<syntax::SimpleDeclaration>(C)); + } + return Children; +} + +syntax::Leaf *syntax::ParametersAndQualifiers::rparen() { + return llvm::cast_or_null<syntax::Leaf>( + findChild(syntax::NodeRole::CloseParen)); +} + +syntax::TrailingReturnType *syntax::ParametersAndQualifiers::trailingReturn() { + return llvm::cast_or_null<syntax::TrailingReturnType>( + findChild(syntax::NodeRole::ParametersAndQualifiers_trailingReturn)); +} diff --git a/clang/lib/Tooling/Syntax/Tokens.cpp b/clang/lib/Tooling/Syntax/Tokens.cpp index 3df1c064923a..c6b904822b8b 100644 --- a/clang/lib/Tooling/Syntax/Tokens.cpp +++ b/clang/lib/Tooling/Syntax/Tokens.cpp @@ -35,6 +35,69 @@ using namespace clang; using namespace clang::syntax; +namespace { +// Finds the smallest consecutive subsuquence of Toks that covers R. +llvm::ArrayRef<syntax::Token> +getTokensCovering(llvm::ArrayRef<syntax::Token> Toks, SourceRange R, + const SourceManager &SM) { + if (R.isInvalid()) + return {}; + const syntax::Token *Begin = + llvm::partition_point(Toks, [&](const syntax::Token &T) { + return SM.isBeforeInTranslationUnit(T.location(), R.getBegin()); + }); + const syntax::Token *End = + llvm::partition_point(Toks, [&](const syntax::Token &T) { + return !SM.isBeforeInTranslationUnit(R.getEnd(), T.location()); + }); + if (Begin > End) + return {}; + return {Begin, End}; +} + +// Finds the smallest expansion range that contains expanded tokens First and +// Last, e.g.: +// #define ID(x) x +// ID(ID(ID(a1) a2)) +// ~~ -> a1 +// ~~ -> a2 +// ~~~~~~~~~ -> a1 a2 +SourceRange findCommonRangeForMacroArgs(const syntax::Token &First, + const syntax::Token &Last, + const SourceManager &SM) { + SourceRange Res; + auto FirstLoc = First.location(), LastLoc = Last.location(); + // Keep traversing up the spelling chain as longs as tokens are part of the + // same expansion. + while (!FirstLoc.isFileID() && !LastLoc.isFileID()) { + auto ExpInfoFirst = SM.getSLocEntry(SM.getFileID(FirstLoc)).getExpansion(); + auto ExpInfoLast = SM.getSLocEntry(SM.getFileID(LastLoc)).getExpansion(); + // Stop if expansions have diverged. + if (ExpInfoFirst.getExpansionLocStart() != + ExpInfoLast.getExpansionLocStart()) + break; + // Do not continue into macro bodies. + if (!ExpInfoFirst.isMacroArgExpansion() || + !ExpInfoLast.isMacroArgExpansion()) + break; + FirstLoc = SM.getImmediateSpellingLoc(FirstLoc); + LastLoc = SM.getImmediateSpellingLoc(LastLoc); + // Update the result afterwards, as we want the tokens that triggered the + // expansion. + Res = {FirstLoc, LastLoc}; + } + // Normally mapping back to expansion location here only changes FileID, as + // we've already found some tokens expanded from the same macro argument, and + // they should map to a consecutive subset of spelled tokens. Unfortunately + // SourceManager::isBeforeInTranslationUnit discriminates sourcelocations + // based on their FileID in addition to offsets. So even though we are + // referring to same tokens, SourceManager might tell us that one is before + // the other if they've got different FileIDs. + return SM.getExpansionRange(CharSourceRange(Res, true)).getAsRange(); +} + +} // namespace + syntax::Token::Token(SourceLocation Location, unsigned Length, tok::TokenKind Kind) : Location(Location), Length(Length), Kind(Kind) { @@ -67,7 +130,8 @@ FileRange syntax::Token::range(const SourceManager &SM, auto F = First.range(SM); auto L = Last.range(SM); assert(F.file() == L.file() && "tokens from different files"); - assert((F == L || F.endOffset() <= L.beginOffset()) && "wrong order of tokens"); + assert((F == L || F.endOffset() <= L.beginOffset()) && + "wrong order of tokens"); return FileRange(F.file(), F.beginOffset(), L.endOffset()); } @@ -120,19 +184,7 @@ llvm::StringRef FileRange::text(const SourceManager &SM) const { } llvm::ArrayRef<syntax::Token> TokenBuffer::expandedTokens(SourceRange R) const { - if (R.isInvalid()) - return {}; - const Token *Begin = - llvm::partition_point(expandedTokens(), [&](const syntax::Token &T) { - return SourceMgr->isBeforeInTranslationUnit(T.location(), R.getBegin()); - }); - const Token *End = - llvm::partition_point(expandedTokens(), [&](const syntax::Token &T) { - return !SourceMgr->isBeforeInTranslationUnit(R.getEnd(), T.location()); - }); - if (Begin > End) - return {}; - return {Begin, End}; + return getTokensCovering(expandedTokens(), R, *SourceMgr); } CharSourceRange FileRange::toCharRange(const SourceManager &SM) const { @@ -161,19 +213,109 @@ TokenBuffer::spelledForExpandedToken(const syntax::Token *Expanded) const { // Our token could only be produced by the previous mapping. if (It == File.Mappings.begin()) { // No previous mapping, no need to modify offsets. - return {&File.SpelledTokens[ExpandedIndex - File.BeginExpanded], nullptr}; + return {&File.SpelledTokens[ExpandedIndex - File.BeginExpanded], + /*Mapping=*/nullptr}; } --It; // 'It' now points to last mapping that started before our token. // Check if the token is part of the mapping. if (ExpandedIndex < It->EndExpanded) - return {&File.SpelledTokens[It->BeginSpelled], /*Mapping*/ &*It}; + return {&File.SpelledTokens[It->BeginSpelled], /*Mapping=*/&*It}; // Not part of the mapping, use the index from previous mapping to compute the // corresponding spelled token. return { &File.SpelledTokens[It->EndSpelled + (ExpandedIndex - It->EndExpanded)], - /*Mapping*/ nullptr}; + /*Mapping=*/nullptr}; +} + +const TokenBuffer::Mapping * +TokenBuffer::mappingStartingBeforeSpelled(const MarkedFile &F, + const syntax::Token *Spelled) { + assert(F.SpelledTokens.data() <= Spelled); + unsigned SpelledI = Spelled - F.SpelledTokens.data(); + assert(SpelledI < F.SpelledTokens.size()); + + auto It = llvm::partition_point(F.Mappings, [SpelledI](const Mapping &M) { + return M.BeginSpelled <= SpelledI; + }); + if (It == F.Mappings.begin()) + return nullptr; + --It; + return &*It; +} + +llvm::SmallVector<llvm::ArrayRef<syntax::Token>, 1> +TokenBuffer::expandedForSpelled(llvm::ArrayRef<syntax::Token> Spelled) const { + if (Spelled.empty()) + return {}; + assert(Spelled.front().location().isFileID()); + + auto FID = sourceManager().getFileID(Spelled.front().location()); + auto It = Files.find(FID); + assert(It != Files.end()); + + const MarkedFile &File = It->second; + // `Spelled` must be a subrange of `File.SpelledTokens`. + assert(File.SpelledTokens.data() <= Spelled.data()); + assert(&Spelled.back() <= + File.SpelledTokens.data() + File.SpelledTokens.size()); +#ifndef NDEBUG + auto T1 = Spelled.back().location(); + auto T2 = File.SpelledTokens.back().location(); + assert(T1 == T2 || sourceManager().isBeforeInTranslationUnit(T1, T2)); +#endif + + auto *FrontMapping = mappingStartingBeforeSpelled(File, &Spelled.front()); + unsigned SpelledFrontI = &Spelled.front() - File.SpelledTokens.data(); + assert(SpelledFrontI < File.SpelledTokens.size()); + unsigned ExpandedBegin; + if (!FrontMapping) { + // No mapping that starts before the first token of Spelled, we don't have + // to modify offsets. + ExpandedBegin = File.BeginExpanded + SpelledFrontI; + } else if (SpelledFrontI < FrontMapping->EndSpelled) { + // This mapping applies to Spelled tokens. + if (SpelledFrontI != FrontMapping->BeginSpelled) { + // Spelled tokens don't cover the entire mapping, returning empty result. + return {}; // FIXME: support macro arguments. + } + // Spelled tokens start at the beginning of this mapping. + ExpandedBegin = FrontMapping->BeginExpanded; + } else { + // Spelled tokens start after the mapping ends (they start in the hole + // between 2 mappings, or between a mapping and end of the file). + ExpandedBegin = + FrontMapping->EndExpanded + (SpelledFrontI - FrontMapping->EndSpelled); + } + + auto *BackMapping = mappingStartingBeforeSpelled(File, &Spelled.back()); + unsigned SpelledBackI = &Spelled.back() - File.SpelledTokens.data(); + unsigned ExpandedEnd; + if (!BackMapping) { + // No mapping that starts before the last token of Spelled, we don't have to + // modify offsets. + ExpandedEnd = File.BeginExpanded + SpelledBackI + 1; + } else if (SpelledBackI < BackMapping->EndSpelled) { + // This mapping applies to Spelled tokens. + if (SpelledBackI + 1 != BackMapping->EndSpelled) { + // Spelled tokens don't cover the entire mapping, returning empty result. + return {}; // FIXME: support macro arguments. + } + ExpandedEnd = BackMapping->EndExpanded; + } else { + // Spelled tokens end after the mapping ends. + ExpandedEnd = + BackMapping->EndExpanded + (SpelledBackI - BackMapping->EndSpelled) + 1; + } + + assert(ExpandedBegin < ExpandedTokens.size()); + assert(ExpandedEnd < ExpandedTokens.size()); + // Avoid returning empty ranges. + if (ExpandedBegin == ExpandedEnd) + return {}; + return {llvm::makeArrayRef(ExpandedTokens.data() + ExpandedBegin, + ExpandedTokens.data() + ExpandedEnd)}; } llvm::ArrayRef<syntax::Token> TokenBuffer::spelledTokens(FileID FID) const { @@ -182,9 +324,20 @@ llvm::ArrayRef<syntax::Token> TokenBuffer::spelledTokens(FileID FID) const { return It->second.SpelledTokens; } +const syntax::Token *TokenBuffer::spelledTokenAt(SourceLocation Loc) const { + assert(Loc.isFileID()); + const auto *Tok = llvm::partition_point( + spelledTokens(SourceMgr->getFileID(Loc)), + [&](const syntax::Token &Tok) { return Tok.location() < Loc; }); + if (!Tok || Tok->location() != Loc) + return nullptr; + return Tok; +} + std::string TokenBuffer::Mapping::str() const { - return llvm::formatv("spelled tokens: [{0},{1}), expanded tokens: [{2},{3})", - BeginSpelled, EndSpelled, BeginExpanded, EndExpanded); + return std::string( + llvm::formatv("spelled tokens: [{0},{1}), expanded tokens: [{2},{3})", + BeginSpelled, EndSpelled, BeginExpanded, EndExpanded)); } llvm::Optional<llvm::ArrayRef<syntax::Token>> @@ -194,8 +347,6 @@ TokenBuffer::spelledForExpanded(llvm::ArrayRef<syntax::Token> Expanded) const { if (Expanded.empty()) return llvm::None; - // FIXME: also allow changes uniquely mapping to macro arguments. - const syntax::Token *BeginSpelled; const Mapping *BeginMapping; std::tie(BeginSpelled, BeginMapping) = @@ -213,12 +364,28 @@ TokenBuffer::spelledForExpanded(llvm::ArrayRef<syntax::Token> Expanded) const { const MarkedFile &File = Files.find(FID)->second; - // Do not allow changes that cross macro expansion boundaries. + // If both tokens are coming from a macro argument expansion, try and map to + // smallest part of the macro argument. BeginMapping && LastMapping check is + // only for performance, they are a prerequisite for Expanded.front() and + // Expanded.back() being part of a macro arg expansion. + if (BeginMapping && LastMapping && + SourceMgr->isMacroArgExpansion(Expanded.front().location()) && + SourceMgr->isMacroArgExpansion(Expanded.back().location())) { + auto CommonRange = findCommonRangeForMacroArgs(Expanded.front(), + Expanded.back(), *SourceMgr); + // It might be the case that tokens are arguments of different macro calls, + // in that case we should continue with the logic below instead of returning + // an empty range. + if (CommonRange.isValid()) + return getTokensCovering(File.SpelledTokens, CommonRange, *SourceMgr); + } + + // Do not allow changes that doesn't cover full expansion. unsigned BeginExpanded = Expanded.begin() - ExpandedTokens.data(); unsigned EndExpanded = Expanded.end() - ExpandedTokens.data(); - if (BeginMapping && BeginMapping->BeginExpanded < BeginExpanded) + if (BeginMapping && BeginExpanded != BeginMapping->BeginExpanded) return llvm::None; - if (LastMapping && EndExpanded < LastMapping->EndExpanded) + if (LastMapping && LastMapping->EndExpanded != EndExpanded) return llvm::None; // All is good, return the result. return llvm::makeArrayRef( @@ -253,24 +420,30 @@ TokenBuffer::expansionStartingAt(const syntax::Token *Spelled) const { ExpandedTokens.data() + M->EndExpanded); return E; } - llvm::ArrayRef<syntax::Token> syntax::spelledTokensTouching(SourceLocation Loc, - const syntax::TokenBuffer &Tokens) { + llvm::ArrayRef<syntax::Token> Tokens) { assert(Loc.isFileID()); - llvm::ArrayRef<syntax::Token> All = - Tokens.spelledTokens(Tokens.sourceManager().getFileID(Loc)); + auto *Right = llvm::partition_point( - All, [&](const syntax::Token &Tok) { return Tok.location() < Loc; }); - bool AcceptRight = Right != All.end() && Right->location() <= Loc; - bool AcceptLeft = Right != All.begin() && (Right - 1)->endLocation() >= Loc; + Tokens, [&](const syntax::Token &Tok) { return Tok.location() < Loc; }); + bool AcceptRight = Right != Tokens.end() && Right->location() <= Loc; + bool AcceptLeft = + Right != Tokens.begin() && (Right - 1)->endLocation() >= Loc; return llvm::makeArrayRef(Right - (AcceptLeft ? 1 : 0), Right + (AcceptRight ? 1 : 0)); } +llvm::ArrayRef<syntax::Token> +syntax::spelledTokensTouching(SourceLocation Loc, + const syntax::TokenBuffer &Tokens) { + return spelledTokensTouching( + Loc, Tokens.spelledTokens(Tokens.sourceManager().getFileID(Loc))); +} + const syntax::Token * syntax::spelledIdentifierTouching(SourceLocation Loc, - const syntax::TokenBuffer &Tokens) { + llvm::ArrayRef<syntax::Token> Tokens) { for (const syntax::Token &Tok : spelledTokensTouching(Loc, Tokens)) { if (Tok.kind() == tok::identifier) return &Tok; @@ -278,6 +451,13 @@ syntax::spelledIdentifierTouching(SourceLocation Loc, return nullptr; } +const syntax::Token * +syntax::spelledIdentifierTouching(SourceLocation Loc, + const syntax::TokenBuffer &Tokens) { + return spelledIdentifierTouching( + Loc, Tokens.spelledTokens(Tokens.sourceManager().getFileID(Loc))); +} + std::vector<const syntax::Token *> TokenBuffer::macroExpansions(FileID FID) const { auto FileIt = Files.find(FID); @@ -293,7 +473,8 @@ TokenBuffer::macroExpansions(FileID FID) const { return Expansions; } -std::vector<syntax::Token> syntax::tokenize(FileID FID, const SourceManager &SM, +std::vector<syntax::Token> syntax::tokenize(const FileRange &FR, + const SourceManager &SM, const LangOptions &LO) { std::vector<syntax::Token> Tokens; IdentifierTable Identifiers(LO); @@ -308,18 +489,28 @@ std::vector<syntax::Token> syntax::tokenize(FileID FID, const SourceManager &SM, Tokens.push_back(syntax::Token(T)); }; - Lexer L(FID, SM.getBuffer(FID), SM, LO); + auto SrcBuffer = SM.getBufferData(FR.file()); + Lexer L(SM.getLocForStartOfFile(FR.file()), LO, SrcBuffer.data(), + SrcBuffer.data() + FR.beginOffset(), + // We can't make BufEnd point to FR.endOffset, as Lexer requires a + // null terminated buffer. + SrcBuffer.data() + SrcBuffer.size()); clang::Token T; - while (!L.LexFromRawLexer(T)) + while (!L.LexFromRawLexer(T) && L.getCurrentBufferOffset() < FR.endOffset()) AddToken(T); - // 'eof' is only the last token if the input is null-terminated. Never store - // it, for consistency. - if (T.getKind() != tok::eof) + // LexFromRawLexer returns true when it parses the last token of the file, add + // it iff it starts within the range we are interested in. + if (SM.getFileOffset(T.getLocation()) < FR.endOffset()) AddToken(T); return Tokens; } +std::vector<syntax::Token> syntax::tokenize(FileID FID, const SourceManager &SM, + const LangOptions &LO) { + return tokenize(syntax::FileRange(FID, 0, SM.getFileIDSize(FID)), SM, LO); +} + /// Records information reqired to construct mappings for the token buffer that /// we are collecting. class TokenCollector::CollectPPExpansions : public PPCallbacks { @@ -335,14 +526,38 @@ public: SourceRange Range, const MacroArgs *Args) override { if (!Collector) return; - // Only record top-level expansions, not those where: + const auto &SM = Collector->PP.getSourceManager(); + // Only record top-level expansions that directly produce expanded tokens. + // This excludes those where: // - the macro use is inside a macro body, // - the macro appears in an argument to another macro. - if (!MacroNameTok.getLocation().isFileID() || - (LastExpansionEnd.isValid() && - Collector->PP.getSourceManager().isBeforeInTranslationUnit( - Range.getBegin(), LastExpansionEnd))) + // However macro expansion isn't really a tree, it's token rewrite rules, + // so there are other cases, e.g. + // #define B(X) X + // #define A 1 + B + // A(2) + // Both A and B produce expanded tokens, though the macro name 'B' comes + // from an expansion. The best we can do is merge the mappings for both. + + // The *last* token of any top-level macro expansion must be in a file. + // (In the example above, see the closing paren of the expansion of B). + if (!Range.getEnd().isFileID()) + return; + // If there's a current expansion that encloses this one, this one can't be + // top-level. + if (LastExpansionEnd.isValid() && + !SM.isBeforeInTranslationUnit(LastExpansionEnd, Range.getEnd())) return; + + // If the macro invocation (B) starts in a macro (A) but ends in a file, + // we'll create a merged mapping for A + B by overwriting the endpoint for + // A's startpoint. + if (!Range.getBegin().isFileID()) { + Range.setBegin(SM.getExpansionLoc(Range.getBegin())); + assert(Collector->Expansions.count(Range.getBegin().getRawEncoding()) && + "Overlapping macros should have same expansion location"); + } + Collector->Expansions[Range.getBegin().getRawEncoding()] = Range.getEnd(); LastExpansionEnd = Range.getEnd(); } @@ -399,197 +614,180 @@ public: } TokenBuffer build() && { - buildSpelledTokens(); - - // Walk over expanded tokens and spelled tokens in parallel, building the - // mappings between those using source locations. - // To correctly recover empty macro expansions, we also take locations - // reported to PPCallbacks::MacroExpands into account as we do not have any - // expanded tokens with source locations to guide us. - - // The 'eof' token is special, it is not part of spelled token stream. We - // handle it separately at the end. assert(!Result.ExpandedTokens.empty()); assert(Result.ExpandedTokens.back().kind() == tok::eof); - for (unsigned I = 0; I < Result.ExpandedTokens.size() - 1; ++I) { - // (!) I might be updated by the following call. - processExpandedToken(I); - } - // 'eof' not handled in the loop, do it here. - assert(SM.getMainFileID() == - SM.getFileID(Result.ExpandedTokens.back().location())); - fillGapUntil(Result.Files[SM.getMainFileID()], - Result.ExpandedTokens.back().location(), - Result.ExpandedTokens.size() - 1); - Result.Files[SM.getMainFileID()].EndExpanded = Result.ExpandedTokens.size(); + // Tokenize every file that contributed tokens to the expanded stream. + buildSpelledTokens(); - // Some files might have unaccounted spelled tokens at the end, add an empty - // mapping for those as they did not have expanded counterparts. - fillGapsAtEndOfFiles(); + // The expanded token stream consists of runs of tokens that came from + // the same source (a macro expansion, part of a file etc). + // Between these runs are the logical positions of spelled tokens that + // didn't expand to anything. + while (NextExpanded < Result.ExpandedTokens.size() - 1 /* eof */) { + // Create empty mappings for spelled tokens that expanded to nothing here. + // May advance NextSpelled, but NextExpanded is unchanged. + discard(); + // Create mapping for a contiguous run of expanded tokens. + // Advances NextExpanded past the run, and NextSpelled accordingly. + unsigned OldPosition = NextExpanded; + advance(); + if (NextExpanded == OldPosition) + diagnoseAdvanceFailure(); + } + // If any tokens remain in any of the files, they didn't expand to anything. + // Create empty mappings up until the end of the file. + for (const auto &File : Result.Files) + discard(File.first); + +#ifndef NDEBUG + for (auto &pair : Result.Files) { + auto &mappings = pair.second.Mappings; + assert(llvm::is_sorted(mappings, [](const TokenBuffer::Mapping &M1, + const TokenBuffer::Mapping &M2) { + return M1.BeginSpelled < M2.BeginSpelled && + M1.EndSpelled < M2.EndSpelled && + M1.BeginExpanded < M2.BeginExpanded && + M1.EndExpanded < M2.EndExpanded; + })); + } +#endif return std::move(Result); } private: - /// Process the next token in an expanded stream and move corresponding - /// spelled tokens, record any mapping if needed. - /// (!) \p I will be updated if this had to skip tokens, e.g. for macros. - void processExpandedToken(unsigned &I) { - auto L = Result.ExpandedTokens[I].location(); - if (L.isMacroID()) { - processMacroExpansion(SM.getExpansionRange(L), I); - return; + // Consume a sequence of spelled tokens that didn't expand to anything. + // In the simplest case, skips spelled tokens until finding one that produced + // the NextExpanded token, and creates an empty mapping for them. + // If Drain is provided, skips remaining tokens from that file instead. + void discard(llvm::Optional<FileID> Drain = llvm::None) { + SourceLocation Target = + Drain ? SM.getLocForEndOfFile(*Drain) + : SM.getExpansionLoc( + Result.ExpandedTokens[NextExpanded].location()); + FileID File = SM.getFileID(Target); + const auto &SpelledTokens = Result.Files[File].SpelledTokens; + auto &NextSpelled = this->NextSpelled[File]; + + TokenBuffer::Mapping Mapping; + Mapping.BeginSpelled = NextSpelled; + // When dropping trailing tokens from a file, the empty mapping should + // be positioned within the file's expanded-token range (at the end). + Mapping.BeginExpanded = Mapping.EndExpanded = + Drain ? Result.Files[*Drain].EndExpanded : NextExpanded; + // We may want to split into several adjacent empty mappings. + // FlushMapping() emits the current mapping and starts a new one. + auto FlushMapping = [&, this] { + Mapping.EndSpelled = NextSpelled; + if (Mapping.BeginSpelled != Mapping.EndSpelled) + Result.Files[File].Mappings.push_back(Mapping); + Mapping.BeginSpelled = NextSpelled; + }; + + while (NextSpelled < SpelledTokens.size() && + SpelledTokens[NextSpelled].location() < Target) { + // If we know mapping bounds at [NextSpelled, KnownEnd] (macro expansion) + // then we want to partition our (empty) mapping. + // [Start, NextSpelled) [NextSpelled, KnownEnd] (KnownEnd, Target) + SourceLocation KnownEnd = CollectedExpansions.lookup( + SpelledTokens[NextSpelled].location().getRawEncoding()); + if (KnownEnd.isValid()) { + FlushMapping(); // Emits [Start, NextSpelled) + while (NextSpelled < SpelledTokens.size() && + SpelledTokens[NextSpelled].location() <= KnownEnd) + ++NextSpelled; + FlushMapping(); // Emits [NextSpelled, KnownEnd] + // Now the loop contitues and will emit (KnownEnd, Target). + } else { + ++NextSpelled; + } } - if (L.isFileID()) { - auto FID = SM.getFileID(L); - TokenBuffer::MarkedFile &File = Result.Files[FID]; - - fillGapUntil(File, L, I); + FlushMapping(); + } - // Skip the token. - assert(File.SpelledTokens[NextSpelled[FID]].location() == L && - "no corresponding token in the spelled stream"); - ++NextSpelled[FID]; - return; + // Consumes the NextExpanded token and others that are part of the same run. + // Increases NextExpanded and NextSpelled by at least one, and adds a mapping + // (unless this is a run of file tokens, which we represent with no mapping). + void advance() { + const syntax::Token &Tok = Result.ExpandedTokens[NextExpanded]; + SourceLocation Expansion = SM.getExpansionLoc(Tok.location()); + FileID File = SM.getFileID(Expansion); + const auto &SpelledTokens = Result.Files[File].SpelledTokens; + auto &NextSpelled = this->NextSpelled[File]; + + if (Tok.location().isFileID()) { + // A run of file tokens continues while the expanded/spelled tokens match. + while (NextSpelled < SpelledTokens.size() && + NextExpanded < Result.ExpandedTokens.size() && + SpelledTokens[NextSpelled].location() == + Result.ExpandedTokens[NextExpanded].location()) { + ++NextSpelled; + ++NextExpanded; + } + // We need no mapping for file tokens copied to the expanded stream. + } else { + // We found a new macro expansion. We should have its spelling bounds. + auto End = CollectedExpansions.lookup(Expansion.getRawEncoding()); + assert(End.isValid() && "Macro expansion wasn't captured?"); + + // Mapping starts here... + TokenBuffer::Mapping Mapping; + Mapping.BeginExpanded = NextExpanded; + Mapping.BeginSpelled = NextSpelled; + // ... consumes spelled tokens within bounds we captured ... + while (NextSpelled < SpelledTokens.size() && + SpelledTokens[NextSpelled].location() <= End) + ++NextSpelled; + // ... consumes expanded tokens rooted at the same expansion ... + while (NextExpanded < Result.ExpandedTokens.size() && + SM.getExpansionLoc( + Result.ExpandedTokens[NextExpanded].location()) == Expansion) + ++NextExpanded; + // ... and ends here. + Mapping.EndExpanded = NextExpanded; + Mapping.EndSpelled = NextSpelled; + Result.Files[File].Mappings.push_back(Mapping); } } - /// Skipped expanded and spelled tokens of a macro expansion that covers \p - /// SpelledRange. Add a corresponding mapping. - /// (!) \p I will be the index of the last token in an expansion after this - /// function returns. - void processMacroExpansion(CharSourceRange SpelledRange, unsigned &I) { - auto FID = SM.getFileID(SpelledRange.getBegin()); - assert(FID == SM.getFileID(SpelledRange.getEnd())); - TokenBuffer::MarkedFile &File = Result.Files[FID]; - - fillGapUntil(File, SpelledRange.getBegin(), I); - - // Skip all expanded tokens from the same macro expansion. - unsigned BeginExpanded = I; - for (; I + 1 < Result.ExpandedTokens.size(); ++I) { - auto NextL = Result.ExpandedTokens[I + 1].location(); - if (!NextL.isMacroID() || - SM.getExpansionLoc(NextL) != SpelledRange.getBegin()) - break; + // advance() is supposed to consume at least one token - if not, we crash. + void diagnoseAdvanceFailure() { +#ifndef NDEBUG + // Show the failed-to-map token in context. + for (unsigned I = (NextExpanded < 10) ? 0 : NextExpanded - 10; + I < NextExpanded + 5 && I < Result.ExpandedTokens.size(); ++I) { + const char *L = + (I == NextExpanded) ? "!! " : (I < NextExpanded) ? "ok " : " "; + llvm::errs() << L << Result.ExpandedTokens[I].dumpForTests(SM) << "\n"; } - unsigned EndExpanded = I + 1; - consumeMapping(File, SM.getFileOffset(SpelledRange.getEnd()), BeginExpanded, - EndExpanded, NextSpelled[FID]); +#endif + llvm_unreachable("Couldn't map expanded token to spelled tokens!"); } /// Initializes TokenBuffer::Files and fills spelled tokens and expanded /// ranges for each of the files. void buildSpelledTokens() { for (unsigned I = 0; I < Result.ExpandedTokens.size(); ++I) { - auto FID = - SM.getFileID(SM.getExpansionLoc(Result.ExpandedTokens[I].location())); + const auto &Tok = Result.ExpandedTokens[I]; + auto FID = SM.getFileID(SM.getExpansionLoc(Tok.location())); auto It = Result.Files.try_emplace(FID); TokenBuffer::MarkedFile &File = It.first->second; - File.EndExpanded = I + 1; + // The eof token should not be considered part of the main-file's range. + File.EndExpanded = Tok.kind() == tok::eof ? I : I + 1; + if (!It.second) continue; // we have seen this file before. - // This is the first time we see this file. File.BeginExpanded = I; File.SpelledTokens = tokenize(FID, SM, LangOpts); } } - void consumeEmptyMapping(TokenBuffer::MarkedFile &File, unsigned EndOffset, - unsigned ExpandedIndex, unsigned &SpelledIndex) { - consumeMapping(File, EndOffset, ExpandedIndex, ExpandedIndex, SpelledIndex); - } - - /// Consumes spelled tokens that form a macro expansion and adds a entry to - /// the resulting token buffer. - /// (!) SpelledIndex is updated in-place. - void consumeMapping(TokenBuffer::MarkedFile &File, unsigned EndOffset, - unsigned BeginExpanded, unsigned EndExpanded, - unsigned &SpelledIndex) { - // We need to record this mapping before continuing. - unsigned MappingBegin = SpelledIndex; - ++SpelledIndex; - - bool HitMapping = - tryConsumeSpelledUntil(File, EndOffset + 1, SpelledIndex).hasValue(); - (void)HitMapping; - assert(!HitMapping && "recursive macro expansion?"); - - TokenBuffer::Mapping M; - M.BeginExpanded = BeginExpanded; - M.EndExpanded = EndExpanded; - M.BeginSpelled = MappingBegin; - M.EndSpelled = SpelledIndex; - - File.Mappings.push_back(M); - } - - /// Consumes spelled tokens until location \p L is reached and adds a mapping - /// covering the consumed tokens. The mapping will point to an empty expanded - /// range at position \p ExpandedIndex. - void fillGapUntil(TokenBuffer::MarkedFile &File, SourceLocation L, - unsigned ExpandedIndex) { - assert(L.isFileID()); - FileID FID; - unsigned Offset; - std::tie(FID, Offset) = SM.getDecomposedLoc(L); - - unsigned &SpelledIndex = NextSpelled[FID]; - unsigned MappingBegin = SpelledIndex; - while (true) { - auto EndLoc = tryConsumeSpelledUntil(File, Offset, SpelledIndex); - if (SpelledIndex != MappingBegin) { - TokenBuffer::Mapping M; - M.BeginSpelled = MappingBegin; - M.EndSpelled = SpelledIndex; - M.BeginExpanded = M.EndExpanded = ExpandedIndex; - File.Mappings.push_back(M); - } - if (!EndLoc) - break; - consumeEmptyMapping(File, SM.getFileOffset(*EndLoc), ExpandedIndex, - SpelledIndex); - - MappingBegin = SpelledIndex; - } - }; - - /// Consumes spelled tokens until it reaches Offset or a mapping boundary, - /// i.e. a name of a macro expansion or the start '#' token of a PP directive. - /// (!) NextSpelled is updated in place. - /// - /// returns None if \p Offset was reached, otherwise returns the end location - /// of a mapping that starts at \p NextSpelled. - llvm::Optional<SourceLocation> - tryConsumeSpelledUntil(TokenBuffer::MarkedFile &File, unsigned Offset, - unsigned &NextSpelled) { - for (; NextSpelled < File.SpelledTokens.size(); ++NextSpelled) { - auto L = File.SpelledTokens[NextSpelled].location(); - if (Offset <= SM.getFileOffset(L)) - return llvm::None; // reached the offset we are looking for. - auto Mapping = CollectedExpansions.find(L.getRawEncoding()); - if (Mapping != CollectedExpansions.end()) - return Mapping->second; // found a mapping before the offset. - } - return llvm::None; // no more tokens, we "reached" the offset. - } - - /// Adds empty mappings for unconsumed spelled tokens at the end of each file. - void fillGapsAtEndOfFiles() { - for (auto &F : Result.Files) { - if (F.second.SpelledTokens.empty()) - continue; - fillGapUntil(F.second, F.second.SpelledTokens.back().endLocation(), - F.second.EndExpanded); - } - } - TokenBuffer Result; - /// For each file, a position of the next spelled token we will consume. - llvm::DenseMap<FileID, unsigned> NextSpelled; + unsigned NextExpanded = 0; // cursor in ExpandedTokens + llvm::DenseMap<FileID, unsigned> NextSpelled; // cursor in SpelledTokens PPExpansions CollectedExpansions; const SourceManager &SM; const LangOptions &LangOpts; @@ -604,19 +802,20 @@ TokenBuffer TokenCollector::consume() && { } std::string syntax::Token::str() const { - return llvm::formatv("Token({0}, length = {1})", tok::getTokenName(kind()), - length()); + return std::string(llvm::formatv("Token({0}, length = {1})", + tok::getTokenName(kind()), length())); } std::string syntax::Token::dumpForTests(const SourceManager &SM) const { - return llvm::formatv("{0} {1}", tok::getTokenName(kind()), text(SM)); + return std::string(llvm::formatv("Token(`{0}`, {1}, length = {2})", text(SM), + tok::getTokenName(kind()), length())); } std::string TokenBuffer::dumpForTests() const { auto PrintToken = [this](const syntax::Token &T) -> std::string { if (T.kind() == tok::eof) return "<eof>"; - return T.text(*SourceMgr); + return std::string(T.text(*SourceMgr)); }; auto DumpTokens = [this, &PrintToken](llvm::raw_ostream &OS, diff --git a/clang/lib/Tooling/Syntax/Tree.cpp b/clang/lib/Tooling/Syntax/Tree.cpp index 9a6270ec4cce..37579e6145b6 100644 --- a/clang/lib/Tooling/Syntax/Tree.cpp +++ b/clang/lib/Tooling/Syntax/Tree.cpp @@ -58,22 +58,33 @@ bool syntax::Leaf::classof(const Node *N) { syntax::Node::Node(NodeKind Kind) : Parent(nullptr), NextSibling(nullptr), Kind(static_cast<unsigned>(Kind)), - Role(static_cast<unsigned>(NodeRole::Detached)), Original(false), - CanModify(false) {} + Role(0), Original(false), CanModify(false) { + this->setRole(NodeRole::Detached); +} bool syntax::Node::isDetached() const { return role() == NodeRole::Detached; } +void syntax::Node::setRole(NodeRole NR) { + this->Role = static_cast<unsigned>(NR); +} + bool syntax::Tree::classof(const Node *N) { return N->kind() > NodeKind::Leaf; } void syntax::Tree::prependChildLowLevel(Node *Child, NodeRole Role) { - assert(Child->Parent == nullptr); - assert(Child->NextSibling == nullptr); assert(Child->role() == NodeRole::Detached); assert(Role != NodeRole::Detached); + Child->setRole(Role); + prependChildLowLevel(Child); +} + +void syntax::Tree::prependChildLowLevel(Node *Child) { + assert(Child->Parent == nullptr); + assert(Child->NextSibling == nullptr); + assert(Child->role() != NodeRole::Detached); + Child->Parent = this; Child->NextSibling = this->FirstChild; - Child->Role = static_cast<unsigned>(Role); this->FirstChild = Child; } @@ -94,7 +105,7 @@ void syntax::Tree::replaceChildRangeLowLevel(Node *BeforeBegin, Node *End, N != End;) { auto *Next = N->NextSibling; - N->Role = static_cast<unsigned>(NodeRole::Detached); + N->setRole(NodeRole::Detached); N->Parent = nullptr; N->NextSibling = nullptr; if (N->Original) diff --git a/clang/lib/Tooling/Tooling.cpp b/clang/lib/Tooling/Tooling.cpp index 4a0618c50e42..40b6cff0d627 100644 --- a/clang/lib/Tooling/Tooling.cpp +++ b/clang/lib/Tooling/Tooling.cpp @@ -141,11 +141,13 @@ namespace clang { namespace tooling { /// Returns a clang build invocation initialized from the CC1 flags. -CompilerInvocation *newInvocation( - DiagnosticsEngine *Diagnostics, const llvm::opt::ArgStringList &CC1Args) { +CompilerInvocation *newInvocation(DiagnosticsEngine *Diagnostics, + const llvm::opt::ArgStringList &CC1Args, + const char *const BinaryName) { assert(!CC1Args.empty() && "Must at least contain the program name!"); CompilerInvocation *Invocation = new CompilerInvocation; - CompilerInvocation::CreateFromArgs(*Invocation, CC1Args, *Diagnostics); + CompilerInvocation::CreateFromArgs(*Invocation, CC1Args, *Diagnostics, + BinaryName); Invocation->getFrontendOpts().DisableFree = false; Invocation->getCodeGenOpts().DisableFree = false; return Invocation; @@ -234,7 +236,7 @@ llvm::Expected<std::string> getAbsolutePath(llvm::vfs::FileSystem &FS, if (auto EC = FS.makeAbsolute(AbsolutePath)) return llvm::errorCodeToError(EC); llvm::sys::path::native(AbsolutePath); - return AbsolutePath.str(); + return std::string(AbsolutePath.str()); } std::string getAbsolutePath(StringRef File) { @@ -345,7 +347,7 @@ bool ToolInvocation::run() { if (!CC1Args) return false; std::unique_ptr<CompilerInvocation> Invocation( - newInvocation(&Diagnostics, *CC1Args)); + newInvocation(&Diagnostics, *CC1Args, BinaryName)); // FIXME: remove this when all users have migrated! for (const auto &It : MappedFileContents) { // Inject the code as the given file name into the preprocessor options. @@ -619,7 +621,8 @@ buildASTFromCode(StringRef Code, StringRef FileName, std::unique_ptr<ASTUnit> buildASTFromCodeWithArgs( StringRef Code, const std::vector<std::string> &Args, StringRef FileName, StringRef ToolName, std::shared_ptr<PCHContainerOperations> PCHContainerOps, - ArgumentsAdjuster Adjuster, const FileContentMappings &VirtualMappedFiles) { + ArgumentsAdjuster Adjuster, const FileContentMappings &VirtualMappedFiles, + DiagnosticConsumer *DiagConsumer) { std::vector<std::unique_ptr<ASTUnit>> ASTs; ASTBuilderAction Action(ASTs); llvm::IntrusiveRefCntPtr<llvm::vfs::OverlayFileSystem> OverlayFileSystem( @@ -633,6 +636,7 @@ std::unique_ptr<ASTUnit> buildASTFromCodeWithArgs( ToolInvocation Invocation( getSyntaxOnlyToolArgs(ToolName, Adjuster(Args, FileName), FileName), &Action, Files.get(), std::move(PCHContainerOps)); + Invocation.setDiagnosticConsumer(DiagConsumer); InMemoryFileSystem->addFile(FileName, 0, llvm::MemoryBuffer::getMemBufferCopy(Code)); diff --git a/clang/lib/Tooling/Transformer/Parsing.cpp b/clang/lib/Tooling/Transformer/Parsing.cpp new file mode 100644 index 000000000000..1579115b9313 --- /dev/null +++ b/clang/lib/Tooling/Transformer/Parsing.cpp @@ -0,0 +1,279 @@ +//===--- Parsing.cpp - Parsing function implementations ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Tooling/Transformer/Parsing.h" +#include "clang/AST/Expr.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/Basic/CharInfo.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/Lex/Lexer.h" +#include "clang/Tooling/Transformer/RangeSelector.h" +#include "clang/Tooling/Transformer/SourceCode.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/Error.h" +#include <string> +#include <utility> +#include <vector> + +using namespace clang; +using namespace transformer; + +// FIXME: This implementation is entirely separate from that of the AST +// matchers. Given the similarity of the languages and uses of the two parsers, +// the two should share a common parsing infrastructure, as should other +// Transformer types. We intend to unify this implementation soon to share as +// much as possible with the AST Matchers parsing. + +namespace { +using llvm::Error; +using llvm::Expected; + +template <typename... Ts> using RangeSelectorOp = RangeSelector (*)(Ts...); + +struct ParseState { + // The remaining input to be processed. + StringRef Input; + // The original input. Not modified during parsing; only for reference in + // error reporting. + StringRef OriginalInput; +}; + +// Represents an intermediate result returned by a parsing function. Functions +// that don't generate values should use `llvm::None` +template <typename ResultType> struct ParseProgress { + ParseState State; + // Intermediate result generated by the Parser. + ResultType Value; +}; + +template <typename T> using ExpectedProgress = llvm::Expected<ParseProgress<T>>; +template <typename T> using ParseFunction = ExpectedProgress<T> (*)(ParseState); + +class ParseError : public llvm::ErrorInfo<ParseError> { +public: + // Required field for all ErrorInfo derivatives. + static char ID; + + ParseError(size_t Pos, std::string ErrorMsg, std::string InputExcerpt) + : Pos(Pos), ErrorMsg(std::move(ErrorMsg)), + Excerpt(std::move(InputExcerpt)) {} + + void log(llvm::raw_ostream &OS) const override { + OS << "parse error at position (" << Pos << "): " << ErrorMsg + << ": " + Excerpt; + } + + std::error_code convertToErrorCode() const override { + return llvm::inconvertibleErrorCode(); + } + + // Position of the error in the input string. + size_t Pos; + std::string ErrorMsg; + // Excerpt of the input starting at the error position. + std::string Excerpt; +}; + +char ParseError::ID; +} // namespace + +static const llvm::StringMap<RangeSelectorOp<std::string>> & +getUnaryStringSelectors() { + static const llvm::StringMap<RangeSelectorOp<std::string>> M = { + {"name", name}, + {"node", node}, + {"statement", statement}, + {"statements", statements}, + {"member", member}, + {"callArgs", callArgs}, + {"elseBranch", elseBranch}, + {"initListElements", initListElements}}; + return M; +} + +static const llvm::StringMap<RangeSelectorOp<RangeSelector>> & +getUnaryRangeSelectors() { + static const llvm::StringMap<RangeSelectorOp<RangeSelector>> M = { + {"before", before}, {"after", after}, {"expansion", expansion}}; + return M; +} + +static const llvm::StringMap<RangeSelectorOp<std::string, std::string>> & +getBinaryStringSelectors() { + static const llvm::StringMap<RangeSelectorOp<std::string, std::string>> M = { + {"encloseNodes", range}}; + return M; +} + +static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>> & +getBinaryRangeSelectors() { + static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>> + M = {{"enclose", range}}; + return M; +} + +template <typename Element> +llvm::Optional<Element> findOptional(const llvm::StringMap<Element> &Map, + llvm::StringRef Key) { + auto it = Map.find(Key); + if (it == Map.end()) + return llvm::None; + return it->second; +} + +template <typename ResultType> +ParseProgress<ResultType> makeParseProgress(ParseState State, + ResultType Result) { + return ParseProgress<ResultType>{State, std::move(Result)}; +} + +static llvm::Error makeParseError(const ParseState &S, std::string ErrorMsg) { + size_t Pos = S.OriginalInput.size() - S.Input.size(); + return llvm::make_error<ParseError>(Pos, std::move(ErrorMsg), + S.OriginalInput.substr(Pos, 20).str()); +} + +// Returns a new ParseState that advances \c S by \c N characters. +static ParseState advance(ParseState S, size_t N) { + S.Input = S.Input.drop_front(N); + return S; +} + +static StringRef consumeWhitespace(StringRef S) { + return S.drop_while([](char c) { return c >= 0 && isWhitespace(c); }); +} + +// Parses a single expected character \c c from \c State, skipping preceding +// whitespace. Error if the expected character isn't found. +static ExpectedProgress<llvm::NoneType> parseChar(char c, ParseState State) { + State.Input = consumeWhitespace(State.Input); + if (State.Input.empty() || State.Input.front() != c) + return makeParseError(State, + ("expected char not found: " + llvm::Twine(c)).str()); + return makeParseProgress(advance(State, 1), llvm::None); +} + +// Parses an identitifer "token" -- handles preceding whitespace. +static ExpectedProgress<std::string> parseId(ParseState State) { + State.Input = consumeWhitespace(State.Input); + auto Id = State.Input.take_while( + [](char c) { return c >= 0 && isIdentifierBody(c); }); + if (Id.empty()) + return makeParseError(State, "failed to parse name"); + return makeParseProgress(advance(State, Id.size()), Id.str()); +} + +// For consistency with the AST matcher parser and C++ code, node ids are +// written as strings. However, we do not support escaping in the string. +static ExpectedProgress<std::string> parseStringId(ParseState State) { + State.Input = consumeWhitespace(State.Input); + if (State.Input.empty()) + return makeParseError(State, "unexpected end of input"); + if (!State.Input.consume_front("\"")) + return makeParseError( + State, + "expecting string, but encountered other character or end of input"); + + StringRef Id = State.Input.take_until([](char c) { return c == '"'; }); + if (State.Input.size() == Id.size()) + return makeParseError(State, "unterminated string"); + // Advance past the trailing quote as well. + return makeParseProgress(advance(State, Id.size() + 1), Id.str()); +} + +// Parses a single element surrounded by parens. `Op` is applied to the parsed +// result to create the result of this function call. +template <typename T> +ExpectedProgress<RangeSelector> parseSingle(ParseFunction<T> ParseElement, + RangeSelectorOp<T> Op, + ParseState State) { + auto P = parseChar('(', State); + if (!P) + return P.takeError(); + + auto E = ParseElement(P->State); + if (!E) + return E.takeError(); + + P = parseChar(')', E->State); + if (!P) + return P.takeError(); + + return makeParseProgress(P->State, Op(std::move(E->Value))); +} + +// Parses a pair of elements surrounded by parens and separated by comma. `Op` +// is applied to the parsed results to create the result of this function call. +template <typename T> +ExpectedProgress<RangeSelector> parsePair(ParseFunction<T> ParseElement, + RangeSelectorOp<T, T> Op, + ParseState State) { + auto P = parseChar('(', State); + if (!P) + return P.takeError(); + + auto Left = ParseElement(P->State); + if (!Left) + return Left.takeError(); + + P = parseChar(',', Left->State); + if (!P) + return P.takeError(); + + auto Right = ParseElement(P->State); + if (!Right) + return Right.takeError(); + + P = parseChar(')', Right->State); + if (!P) + return P.takeError(); + + return makeParseProgress(P->State, + Op(std::move(Left->Value), std::move(Right->Value))); +} + +// Parses input for a stencil operator(single arg ops like AsValue, MemberOp or +// Id operator). Returns StencilType representing the operator on success and +// error if it fails to parse input for an operator. +static ExpectedProgress<RangeSelector> +parseRangeSelectorImpl(ParseState State) { + auto Id = parseId(State); + if (!Id) + return Id.takeError(); + + std::string OpName = std::move(Id->Value); + if (auto Op = findOptional(getUnaryStringSelectors(), OpName)) + return parseSingle(parseStringId, *Op, Id->State); + + if (auto Op = findOptional(getUnaryRangeSelectors(), OpName)) + return parseSingle(parseRangeSelectorImpl, *Op, Id->State); + + if (auto Op = findOptional(getBinaryStringSelectors(), OpName)) + return parsePair(parseStringId, *Op, Id->State); + + if (auto Op = findOptional(getBinaryRangeSelectors(), OpName)) + return parsePair(parseRangeSelectorImpl, *Op, Id->State); + + return makeParseError(State, "unknown selector name: " + OpName); +} + +Expected<RangeSelector> transformer::parseRangeSelector(llvm::StringRef Input) { + ParseState State = {Input, Input}; + ExpectedProgress<RangeSelector> Result = parseRangeSelectorImpl(State); + if (!Result) + return Result.takeError(); + State = Result->State; + // Discard any potentially trailing whitespace. + State.Input = consumeWhitespace(State.Input); + if (State.Input.empty()) + return Result->Value; + return makeParseError(State, "unexpected input after selector"); +} diff --git a/clang/lib/Tooling/Transformer/RangeSelector.cpp b/clang/lib/Tooling/Transformer/RangeSelector.cpp index 9f81423c9022..29b1a5b0372e 100644 --- a/clang/lib/Tooling/Transformer/RangeSelector.cpp +++ b/clang/lib/Tooling/Transformer/RangeSelector.cpp @@ -23,8 +23,6 @@ using namespace clang; using namespace transformer; using ast_matchers::MatchFinder; -using ast_type_traits::ASTNodeKind; -using ast_type_traits::DynTypedNode; using llvm::Error; using llvm::StringError; @@ -148,7 +146,7 @@ RangeSelector transformer::statement(std::string ID) { }; } -RangeSelector transformer::range(RangeSelector Begin, RangeSelector End) { +RangeSelector transformer::enclose(RangeSelector Begin, RangeSelector End) { return [Begin, End](const MatchResult &Result) -> Expected<CharSourceRange> { Expected<CharSourceRange> BeginRange = Begin(Result); if (!BeginRange) @@ -167,8 +165,9 @@ RangeSelector transformer::range(RangeSelector Begin, RangeSelector End) { }; } -RangeSelector transformer::range(std::string BeginID, std::string EndID) { - return transformer::range(node(std::move(BeginID)), node(std::move(EndID))); +RangeSelector transformer::encloseNodes(std::string BeginID, + std::string EndID) { + return transformer::enclose(node(std::move(BeginID)), node(std::move(EndID))); } RangeSelector transformer::member(std::string ID) { diff --git a/clang/lib/Tooling/Transformer/RewriteRule.cpp b/clang/lib/Tooling/Transformer/RewriteRule.cpp index 20d3a371950a..995bec03cd66 100644 --- a/clang/lib/Tooling/Transformer/RewriteRule.cpp +++ b/clang/lib/Tooling/Transformer/RewriteRule.cpp @@ -25,16 +25,14 @@ using namespace transformer; using ast_matchers::MatchFinder; using ast_matchers::internal::DynTypedMatcher; -using ast_type_traits::ASTNodeKind; using MatchResult = MatchFinder::MatchResult; -Expected<SmallVector<transformer::detail::Transformation, 1>> -transformer::detail::translateEdits(const MatchResult &Result, - llvm::ArrayRef<ASTEdit> Edits) { - SmallVector<transformer::detail::Transformation, 1> Transformations; - for (const auto &Edit : Edits) { - Expected<CharSourceRange> Range = Edit.TargetRange(Result); +static Expected<SmallVector<transformer::Edit, 1>> +translateEdits(const MatchResult &Result, ArrayRef<ASTEdit> ASTEdits) { + SmallVector<transformer::Edit, 1> Edits; + for (const auto &E : ASTEdits) { + Expected<CharSourceRange> Range = E.TargetRange(Result); if (!Range) return Range.takeError(); llvm::Optional<CharSourceRange> EditRange = @@ -42,21 +40,34 @@ transformer::detail::translateEdits(const MatchResult &Result, // FIXME: let user specify whether to treat this case as an error or ignore // it as is currently done. if (!EditRange) - return SmallVector<Transformation, 0>(); - auto Replacement = Edit.Replacement->eval(Result); + return SmallVector<Edit, 0>(); + auto Replacement = E.Replacement->eval(Result); if (!Replacement) return Replacement.takeError(); - transformer::detail::Transformation T; + transformer::Edit T; T.Range = *EditRange; T.Replacement = std::move(*Replacement); - Transformations.push_back(std::move(T)); + T.Metadata = E.Metadata; + Edits.push_back(std::move(T)); } - return Transformations; + return Edits; } -ASTEdit transformer::changeTo(RangeSelector S, TextGenerator Replacement) { +EditGenerator transformer::editList(SmallVector<ASTEdit, 1> Edits) { + return [Edits = std::move(Edits)](const MatchResult &Result) { + return translateEdits(Result, Edits); + }; +} + +EditGenerator transformer::edit(ASTEdit Edit) { + return [Edit = std::move(Edit)](const MatchResult &Result) { + return translateEdits(Result, {Edit}); + }; +} + +ASTEdit transformer::changeTo(RangeSelector Target, TextGenerator Replacement) { ASTEdit E; - E.TargetRange = std::move(S); + E.TargetRange = std::move(Target); E.Replacement = std::move(Replacement); return E; } @@ -83,8 +94,9 @@ ASTEdit transformer::remove(RangeSelector S) { return change(std::move(S), std::make_shared<SimpleTextGenerator>("")); } -RewriteRule transformer::makeRule(DynTypedMatcher M, SmallVector<ASTEdit, 1> Edits, - TextGenerator Explanation) { +RewriteRule transformer::makeRule(ast_matchers::internal::DynTypedMatcher M, + EditGenerator Edits, + TextGenerator Explanation) { return RewriteRule{{RewriteRule::Case{ std::move(M), std::move(Edits), std::move(Explanation), {}}}}; } @@ -105,10 +117,13 @@ static bool hasValidKind(const DynTypedMatcher &M) { #endif // Binds each rule's matcher to a unique (and deterministic) tag based on -// `TagBase` and the id paired with the case. +// `TagBase` and the id paired with the case. All of the returned matchers have +// their traversal kind explicitly set, either based on a pre-set kind or to the +// provided `DefaultTraversalKind`. static std::vector<DynTypedMatcher> taggedMatchers( StringRef TagBase, - const SmallVectorImpl<std::pair<size_t, RewriteRule::Case>> &Cases) { + const SmallVectorImpl<std::pair<size_t, RewriteRule::Case>> &Cases, + ast_type_traits::TraversalKind DefaultTraversalKind) { std::vector<DynTypedMatcher> Matchers; Matchers.reserve(Cases.size()); for (const auto &Case : Cases) { @@ -116,8 +131,10 @@ static std::vector<DynTypedMatcher> taggedMatchers( // HACK: Many matchers are not bindable, so ensure that tryBind will work. DynTypedMatcher BoundMatcher(Case.second.Matcher); BoundMatcher.setAllowBind(true); - auto M = BoundMatcher.tryBind(Tag); - Matchers.push_back(*std::move(M)); + auto M = *BoundMatcher.tryBind(Tag); + Matchers.push_back(!M.getTraversalKind() + ? M.withTraversalKind(DefaultTraversalKind) + : std::move(M)); } return Matchers; } @@ -147,14 +164,21 @@ transformer::detail::buildMatchers(const RewriteRule &Rule) { Buckets[Cases[I].Matcher.getSupportedKind()].emplace_back(I, Cases[I]); } + // Each anyOf explicitly controls the traversal kind. The anyOf itself is set + // to `TK_AsIs` to ensure no nodes are skipped, thereby deferring to the kind + // of the branches. Then, each branch is either left as is, if the kind is + // already set, or explicitly set to `TK_IgnoreUnlessSpelledInSource`. We + // choose this setting, because we think it is the one most friendly to + // beginners, who are (largely) the target audience of Transformer. std::vector<DynTypedMatcher> Matchers; for (const auto &Bucket : Buckets) { DynTypedMatcher M = DynTypedMatcher::constructVariadic( DynTypedMatcher::VO_AnyOf, Bucket.first, - taggedMatchers("Tag", Bucket.second)); + taggedMatchers("Tag", Bucket.second, TK_IgnoreUnlessSpelledInSource)); M.setAllowBind(true); // `tryBind` is guaranteed to succeed, because `AllowBind` was set to true. - Matchers.push_back(*M.tryBind(RewriteRule::RootID)); + Matchers.push_back( + M.tryBind(RewriteRule::RootID)->withTraversalKind(TK_AsIs)); } return Matchers; } diff --git a/clang/lib/Tooling/Transformer/SourceCode.cpp b/clang/lib/Tooling/Transformer/SourceCode.cpp index 836401d1e605..26b204851f05 100644 --- a/clang/lib/Tooling/Transformer/SourceCode.cpp +++ b/clang/lib/Tooling/Transformer/SourceCode.cpp @@ -10,10 +10,24 @@ // //===----------------------------------------------------------------------===// #include "clang/Tooling/Transformer/SourceCode.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/Attr.h" +#include "clang/AST/Comment.h" +#include "clang/AST/Decl.h" +#include "clang/AST/DeclCXX.h" +#include "clang/AST/DeclTemplate.h" +#include "clang/AST/Expr.h" +#include "clang/Basic/SourceManager.h" #include "clang/Lex/Lexer.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/Error.h" +#include <set> using namespace clang; +using llvm::errc; +using llvm::StringError; + StringRef clang::tooling::getText(CharSourceRange Range, const ASTContext &Context) { return Lexer::getSourceText(Range, Context.getSourceManager(), @@ -23,11 +37,45 @@ StringRef clang::tooling::getText(CharSourceRange Range, CharSourceRange clang::tooling::maybeExtendRange(CharSourceRange Range, tok::TokenKind Next, ASTContext &Context) { - Optional<Token> Tok = Lexer::findNextToken( - Range.getEnd(), Context.getSourceManager(), Context.getLangOpts()); - if (!Tok || !Tok->is(Next)) + CharSourceRange R = Lexer::getAsCharRange(Range, Context.getSourceManager(), + Context.getLangOpts()); + if (R.isInvalid()) + return Range; + Token Tok; + bool Err = + Lexer::getRawToken(R.getEnd(), Tok, Context.getSourceManager(), + Context.getLangOpts(), /*IgnoreWhiteSpace=*/true); + if (Err || !Tok.is(Next)) return Range; - return CharSourceRange::getTokenRange(Range.getBegin(), Tok->getLocation()); + return CharSourceRange::getTokenRange(Range.getBegin(), Tok.getLocation()); +} + +llvm::Error clang::tooling::validateEditRange(const CharSourceRange &Range, + const SourceManager &SM) { + if (Range.isInvalid()) + return llvm::make_error<StringError>(errc::invalid_argument, + "Invalid range"); + + if (Range.getBegin().isMacroID() || Range.getEnd().isMacroID()) + return llvm::make_error<StringError>( + errc::invalid_argument, "Range starts or ends in a macro expansion"); + + if (SM.isInSystemHeader(Range.getBegin()) || + SM.isInSystemHeader(Range.getEnd())) + return llvm::make_error<StringError>(errc::invalid_argument, + "Range is in system header"); + + std::pair<FileID, unsigned> BeginInfo = SM.getDecomposedLoc(Range.getBegin()); + std::pair<FileID, unsigned> EndInfo = SM.getDecomposedLoc(Range.getEnd()); + if (BeginInfo.first != EndInfo.first) + return llvm::make_error<StringError>( + errc::invalid_argument, "Range begins and ends in different files"); + + if (BeginInfo.second > EndInfo.second) + return llvm::make_error<StringError>( + errc::invalid_argument, "Range's begin is past its end"); + + return llvm::Error::success(); } llvm::Optional<CharSourceRange> @@ -46,20 +94,308 @@ clang::tooling::getRangeForEdit(const CharSourceRange &EditRange, // foo(DO_NOTHING(6)) // Decide whether the current behavior is desirable and modify if not. CharSourceRange Range = Lexer::makeFileCharRange(EditRange, SM, LangOpts); - if (Range.isInvalid()) - return None; + bool IsInvalid = llvm::errorToBool(validateEditRange(Range, SM)); + if (IsInvalid) + return llvm::None; + return Range; - if (Range.getBegin().isMacroID() || Range.getEnd().isMacroID()) - return None; - if (SM.isInSystemHeader(Range.getBegin()) || - SM.isInSystemHeader(Range.getEnd())) - return None; +} - std::pair<FileID, unsigned> BeginInfo = SM.getDecomposedLoc(Range.getBegin()); - std::pair<FileID, unsigned> EndInfo = SM.getDecomposedLoc(Range.getEnd()); - if (BeginInfo.first != EndInfo.first || - BeginInfo.second > EndInfo.second) - return None; +static bool startsWithNewline(const SourceManager &SM, const Token &Tok) { + return isVerticalWhitespace(SM.getCharacterData(Tok.getLocation())[0]); +} - return Range; +static bool contains(const std::set<tok::TokenKind> &Terminators, + const Token &Tok) { + return Terminators.count(Tok.getKind()) > 0; +} + +// Returns the exclusive, *file* end location of the entity whose last token is +// at location 'EntityLast'. That is, it returns the location one past the last +// relevant character. +// +// Associated tokens include comments, horizontal whitespace and 'Terminators' +// -- optional tokens, which, if any are found, will be included; if +// 'Terminators' is empty, we will not include any extra tokens beyond comments +// and horizontal whitespace. +static SourceLocation +getEntityEndLoc(const SourceManager &SM, SourceLocation EntityLast, + const std::set<tok::TokenKind> &Terminators, + const LangOptions &LangOpts) { + assert(EntityLast.isValid() && "Invalid end location found."); + + // We remember the last location of a non-horizontal-whitespace token we have + // lexed; this is the location up to which we will want to delete. + // FIXME: Support using the spelling loc here for cases where we want to + // analyze the macro text. + + CharSourceRange ExpansionRange = SM.getExpansionRange(EntityLast); + // FIXME: Should check isTokenRange(), for the (rare) case that + // `ExpansionRange` is a character range. + std::unique_ptr<Lexer> Lexer = [&]() { + bool Invalid = false; + auto FileOffset = SM.getDecomposedLoc(ExpansionRange.getEnd()); + llvm::StringRef File = SM.getBufferData(FileOffset.first, &Invalid); + assert(!Invalid && "Cannot get file/offset"); + return std::make_unique<clang::Lexer>( + SM.getLocForStartOfFile(FileOffset.first), LangOpts, File.begin(), + File.data() + FileOffset.second, File.end()); + }(); + + // Tell Lexer to return whitespace as pseudo-tokens (kind is tok::unknown). + Lexer->SetKeepWhitespaceMode(true); + + // Generally, the code we want to include looks like this ([] are optional), + // If Terminators is empty: + // [ <comment> ] [ <newline> ] + // Otherwise: + // ... <terminator> [ <comment> ] [ <newline> ] + + Token Tok; + bool Terminated = false; + + // First, lex to the current token (which is the last token of the range that + // is definitely associated with the decl). Then, we process the first token + // separately from the rest based on conditions that hold specifically for + // that first token. + // + // We do not search for a terminator if none is required or we've already + // encountered it. Otherwise, if the original `EntityLast` location was in a + // macro expansion, we don't have visibility into the text, so we assume we've + // already terminated. However, we note this assumption with + // `TerminatedByMacro`, because we'll want to handle it somewhat differently + // for the terminators semicolon and comma. These terminators can be safely + // associated with the entity when they appear after the macro -- extra + // semicolons have no effect on the program and a well-formed program won't + // have multiple commas in a row, so we're guaranteed that there is only one. + // + // FIXME: This handling of macros is more conservative than necessary. When + // the end of the expansion coincides with the end of the node, we can still + // safely analyze the code. But, it is more complicated, because we need to + // start by lexing the spelling loc for the first token and then switch to the + // expansion loc. + bool TerminatedByMacro = false; + Lexer->LexFromRawLexer(Tok); + if (Terminators.empty() || contains(Terminators, Tok)) + Terminated = true; + else if (EntityLast.isMacroID()) { + Terminated = true; + TerminatedByMacro = true; + } + + // We save the most recent candidate for the exclusive end location. + SourceLocation End = Tok.getEndLoc(); + + while (!Terminated) { + // Lex the next token we want to possibly expand the range with. + Lexer->LexFromRawLexer(Tok); + + switch (Tok.getKind()) { + case tok::eof: + // Unexpected separators. + case tok::l_brace: + case tok::r_brace: + case tok::comma: + return End; + // Whitespace pseudo-tokens. + case tok::unknown: + if (startsWithNewline(SM, Tok)) + // Include at least until the end of the line. + End = Tok.getEndLoc(); + break; + default: + if (contains(Terminators, Tok)) + Terminated = true; + End = Tok.getEndLoc(); + break; + } + } + + do { + // Lex the next token we want to possibly expand the range with. + Lexer->LexFromRawLexer(Tok); + + switch (Tok.getKind()) { + case tok::unknown: + if (startsWithNewline(SM, Tok)) + // We're done, but include this newline. + return Tok.getEndLoc(); + break; + case tok::comment: + // Include any comments we find on the way. + End = Tok.getEndLoc(); + break; + case tok::semi: + case tok::comma: + if (TerminatedByMacro && contains(Terminators, Tok)) { + End = Tok.getEndLoc(); + // We've found a real terminator. + TerminatedByMacro = false; + break; + } + // Found an unrelated token; stop and don't include it. + return End; + default: + // Found an unrelated token; stop and don't include it. + return End; + } + } while (true); +} + +// Returns the expected terminator tokens for the given declaration. +// +// If we do not know the correct terminator token, returns an empty set. +// +// There are cases where we have more than one possible terminator (for example, +// we find either a comma or a semicolon after a VarDecl). +static std::set<tok::TokenKind> getTerminators(const Decl &D) { + if (llvm::isa<RecordDecl>(D) || llvm::isa<UsingDecl>(D)) + return {tok::semi}; + + if (llvm::isa<FunctionDecl>(D) || llvm::isa<LinkageSpecDecl>(D)) + return {tok::r_brace, tok::semi}; + + if (llvm::isa<VarDecl>(D) || llvm::isa<FieldDecl>(D)) + return {tok::comma, tok::semi}; + + return {}; +} + +// Starting from `Loc`, skips whitespace up to, and including, a single +// newline. Returns the (exclusive) end of any skipped whitespace (that is, the +// location immediately after the whitespace). +static SourceLocation skipWhitespaceAndNewline(const SourceManager &SM, + SourceLocation Loc, + const LangOptions &LangOpts) { + const char *LocChars = SM.getCharacterData(Loc); + int i = 0; + while (isHorizontalWhitespace(LocChars[i])) + ++i; + if (isVerticalWhitespace(LocChars[i])) + ++i; + return Loc.getLocWithOffset(i); +} + +// Is `Loc` separated from any following decl by something meaningful (e.g. an +// empty line, a comment), ignoring horizontal whitespace? Since this is a +// heuristic, we return false when in doubt. `Loc` cannot be the first location +// in the file. +static bool atOrBeforeSeparation(const SourceManager &SM, SourceLocation Loc, + const LangOptions &LangOpts) { + // If the preceding character is a newline, we'll check for an empty line as a + // separator. However, we can't identify an empty line using tokens, so we + // analyse the characters. If we try to use tokens, we'll just end up with a + // whitespace token, whose characters we'd have to analyse anyhow. + bool Invalid = false; + const char *LocChars = + SM.getCharacterData(Loc.getLocWithOffset(-1), &Invalid); + assert(!Invalid && + "Loc must be a valid character and not the first of the source file."); + if (isVerticalWhitespace(LocChars[0])) { + for (int i = 1; isWhitespace(LocChars[i]); ++i) + if (isVerticalWhitespace(LocChars[i])) + return true; + } + // We didn't find an empty line, so lex the next token, skipping past any + // whitespace we just scanned. + Token Tok; + bool Failed = Lexer::getRawToken(Loc, Tok, SM, LangOpts, + /*IgnoreWhiteSpace=*/true); + if (Failed) + // Any text that confuses the lexer seems fair to consider a separation. + return true; + + switch (Tok.getKind()) { + case tok::comment: + case tok::l_brace: + case tok::r_brace: + case tok::eof: + return true; + default: + return false; + } +} + +CharSourceRange tooling::getAssociatedRange(const Decl &Decl, + ASTContext &Context) { + const SourceManager &SM = Context.getSourceManager(); + const LangOptions &LangOpts = Context.getLangOpts(); + CharSourceRange Range = CharSourceRange::getTokenRange(Decl.getSourceRange()); + + // First, expand to the start of the template<> declaration if necessary. + if (const auto *Record = llvm::dyn_cast<CXXRecordDecl>(&Decl)) { + if (const auto *T = Record->getDescribedClassTemplate()) + if (SM.isBeforeInTranslationUnit(T->getBeginLoc(), Range.getBegin())) + Range.setBegin(T->getBeginLoc()); + } else if (const auto *F = llvm::dyn_cast<FunctionDecl>(&Decl)) { + if (const auto *T = F->getDescribedFunctionTemplate()) + if (SM.isBeforeInTranslationUnit(T->getBeginLoc(), Range.getBegin())) + Range.setBegin(T->getBeginLoc()); + } + + // Next, expand the end location past trailing comments to include a potential + // newline at the end of the decl's line. + Range.setEnd( + getEntityEndLoc(SM, Decl.getEndLoc(), getTerminators(Decl), LangOpts)); + Range.setTokenRange(false); + + // Expand to include preceeding associated comments. We ignore any comments + // that are not preceeding the decl, since we've already skipped trailing + // comments with getEntityEndLoc. + if (const RawComment *Comment = + Decl.getASTContext().getRawCommentForDeclNoCache(&Decl)) + // Only include a preceding comment if: + // * it is *not* separate from the declaration (not including any newline + // that immediately follows the comment), + // * the decl *is* separate from any following entity (so, there are no + // other entities the comment could refer to), and + // * it is not a IfThisThenThat lint check. + if (SM.isBeforeInTranslationUnit(Comment->getBeginLoc(), + Range.getBegin()) && + !atOrBeforeSeparation( + SM, skipWhitespaceAndNewline(SM, Comment->getEndLoc(), LangOpts), + LangOpts) && + atOrBeforeSeparation(SM, Range.getEnd(), LangOpts)) { + const StringRef CommentText = Comment->getRawText(SM); + if (!CommentText.contains("LINT.IfChange") && + !CommentText.contains("LINT.ThenChange")) + Range.setBegin(Comment->getBeginLoc()); + } + // Add leading attributes. + for (auto *Attr : Decl.attrs()) { + if (Attr->getLocation().isInvalid() || + !SM.isBeforeInTranslationUnit(Attr->getLocation(), Range.getBegin())) + continue; + Range.setBegin(Attr->getLocation()); + + // Extend to the left '[[' or '__attribute((' if we saw the attribute, + // unless it is not a valid location. + bool Invalid; + StringRef Source = + SM.getBufferData(SM.getFileID(Range.getBegin()), &Invalid); + if (Invalid) + continue; + llvm::StringRef BeforeAttr = + Source.substr(0, SM.getFileOffset(Range.getBegin())); + llvm::StringRef BeforeAttrStripped = BeforeAttr.rtrim(); + + for (llvm::StringRef Prefix : {"[[", "__attribute__(("}) { + // Handle whitespace between attribute prefix and attribute value. + if (BeforeAttrStripped.endswith(Prefix)) { + // Move start to start position of prefix, which is + // length(BeforeAttr) - length(BeforeAttrStripped) + length(Prefix) + // positions to the left. + Range.setBegin(Range.getBegin().getLocWithOffset(static_cast<int>( + -BeforeAttr.size() + BeforeAttrStripped.size() - Prefix.size()))); + break; + // If we didn't see '[[' or '__attribute' it's probably coming from a + // macro expansion which is already handled by makeFileCharRange(), + // below. + } + } + } + + // Range.getEnd() is already fully un-expanded by getEntityEndLoc. But, + // Range.getBegin() may be inside an expansion. + return Lexer::makeFileCharRange(Range, SM, LangOpts); } diff --git a/clang/lib/Tooling/Transformer/Stencil.cpp b/clang/lib/Tooling/Transformer/Stencil.cpp index 8710e3cdf60f..2670bf7adabf 100644 --- a/clang/lib/Tooling/Transformer/Stencil.cpp +++ b/clang/lib/Tooling/Transformer/Stencil.cpp @@ -12,12 +12,14 @@ #include "clang/AST/Expr.h" #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/ASTMatchers/ASTMatchers.h" +#include "clang/Basic/SourceLocation.h" #include "clang/Lex/Lexer.h" #include "clang/Tooling/Transformer/SourceCode.h" #include "clang/Tooling/Transformer/SourceCodeBuilders.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/Errc.h" +#include "llvm/Support/Error.h" #include <atomic> #include <memory> #include <string> @@ -26,7 +28,6 @@ using namespace clang; using namespace transformer; using ast_matchers::MatchFinder; -using ast_type_traits::DynTypedNode; using llvm::errc; using llvm::Error; using llvm::Expected; @@ -81,14 +82,14 @@ struct SelectorData { // A stencil operation to build a member access `e.m` or `e->m`, as appropriate. struct AccessData { AccessData(StringRef BaseId, Stencil Member) - : BaseId(BaseId), Member(std::move(Member)) {} + : BaseId(std::string(BaseId)), Member(std::move(Member)) {} std::string BaseId; Stencil Member; }; struct IfBoundData { IfBoundData(StringRef Id, Stencil TrueStencil, Stencil FalseStencil) - : Id(Id), TrueStencil(std::move(TrueStencil)), + : Id(std::string(Id)), TrueStencil(std::move(TrueStencil)), FalseStencil(std::move(FalseStencil)) {} std::string Id; Stencil TrueStencil; @@ -227,10 +228,37 @@ Error evalData(const UnaryOperationData &Data, Error evalData(const SelectorData &Data, const MatchFinder::MatchResult &Match, std::string *Result) { - auto Range = Data.Selector(Match); - if (!Range) - return Range.takeError(); - *Result += tooling::getText(*Range, *Match.Context); + auto RawRange = Data.Selector(Match); + if (!RawRange) + return RawRange.takeError(); + CharSourceRange Range = Lexer::makeFileCharRange( + *RawRange, *Match.SourceManager, Match.Context->getLangOpts()); + if (Range.isInvalid()) { + // Validate the original range to attempt to get a meaningful error message. + // If it's valid, then something else is the cause and we just return the + // generic failure message. + if (auto Err = tooling::validateEditRange(*RawRange, *Match.SourceManager)) + return handleErrors(std::move(Err), [](std::unique_ptr<StringError> E) { + assert(E->convertToErrorCode() == + llvm::make_error_code(errc::invalid_argument) && + "Validation errors must carry the invalid_argument code"); + return llvm::createStringError( + errc::invalid_argument, + "selected range could not be resolved to a valid source range; " + + E->getMessage()); + }); + return llvm::createStringError( + errc::invalid_argument, + "selected range could not be resolved to a valid source range"); + } + // Validate `Range`, because `makeFileCharRange` accepts some ranges that + // `validateEditRange` rejects. + if (auto Err = tooling::validateEditRange(Range, *Match.SourceManager)) + return joinErrors( + llvm::createStringError(errc::invalid_argument, + "selected range is not valid for editing"), + std::move(Err)); + *Result += tooling::getText(Range, *Match.Context); return Error::success(); } @@ -294,47 +322,41 @@ public: }; } // namespace -Stencil transformer::detail::makeStencil(StringRef Text) { return text(Text); } - -Stencil transformer::detail::makeStencil(RangeSelector Selector) { - return selection(std::move(Selector)); +Stencil transformer::detail::makeStencil(StringRef Text) { + return std::make_shared<StencilImpl<RawTextData>>(std::string(Text)); } -Stencil transformer::text(StringRef Text) { - return std::make_shared<StencilImpl<RawTextData>>(Text); -} - -Stencil transformer::selection(RangeSelector Selector) { +Stencil transformer::detail::makeStencil(RangeSelector Selector) { return std::make_shared<StencilImpl<SelectorData>>(std::move(Selector)); } Stencil transformer::dPrint(StringRef Id) { - return std::make_shared<StencilImpl<DebugPrintNodeData>>(Id); + return std::make_shared<StencilImpl<DebugPrintNodeData>>(std::string(Id)); } Stencil transformer::expression(llvm::StringRef Id) { return std::make_shared<StencilImpl<UnaryOperationData>>( - UnaryNodeOperator::Parens, Id); + UnaryNodeOperator::Parens, std::string(Id)); } Stencil transformer::deref(llvm::StringRef ExprId) { return std::make_shared<StencilImpl<UnaryOperationData>>( - UnaryNodeOperator::Deref, ExprId); + UnaryNodeOperator::Deref, std::string(ExprId)); } Stencil transformer::maybeDeref(llvm::StringRef ExprId) { return std::make_shared<StencilImpl<UnaryOperationData>>( - UnaryNodeOperator::MaybeDeref, ExprId); + UnaryNodeOperator::MaybeDeref, std::string(ExprId)); } Stencil transformer::addressOf(llvm::StringRef ExprId) { return std::make_shared<StencilImpl<UnaryOperationData>>( - UnaryNodeOperator::AddressOf, ExprId); + UnaryNodeOperator::AddressOf, std::string(ExprId)); } Stencil transformer::maybeAddressOf(llvm::StringRef ExprId) { return std::make_shared<StencilImpl<UnaryOperationData>>( - UnaryNodeOperator::MaybeAddressOf, ExprId); + UnaryNodeOperator::MaybeAddressOf, std::string(ExprId)); } Stencil transformer::access(StringRef BaseId, Stencil Member) { diff --git a/clang/lib/Tooling/Transformer/Transformer.cpp b/clang/lib/Tooling/Transformer/Transformer.cpp index 71f0646f4c0e..e8fc00c4e953 100644 --- a/clang/lib/Tooling/Transformer/Transformer.cpp +++ b/clang/lib/Tooling/Transformer/Transformer.cpp @@ -12,6 +12,7 @@ #include "clang/Basic/SourceLocation.h" #include "clang/Tooling/Refactoring/AtomicChange.h" #include "llvm/Support/Error.h" +#include <map> #include <utility> #include <vector> @@ -31,7 +32,7 @@ void Transformer::run(const MatchFinder::MatchResult &Result) { transformer::RewriteRule::Case Case = transformer::detail::findSelectedCase(Result, Rule); - auto Transformations = transformer::detail::translateEdits(Result, Case.Edits); + auto Transformations = Case.Edits(Result); if (!Transformations) { Consumer(Transformations.takeError()); return; @@ -45,28 +46,39 @@ void Transformer::run(const MatchFinder::MatchResult &Result) { return; } - // Record the results in the AtomicChange, anchored at the location of the - // first change. - AtomicChange AC(*Result.SourceManager, - (*Transformations)[0].Range.getBegin()); + // Group the transformations, by file, into AtomicChanges, each anchored by + // the location of the first change in that file. + std::map<FileID, AtomicChange> ChangesByFileID; for (const auto &T : *Transformations) { + auto ID = Result.SourceManager->getFileID(T.Range.getBegin()); + auto Iter = ChangesByFileID + .emplace(ID, AtomicChange(*Result.SourceManager, + T.Range.getBegin(), T.Metadata)) + .first; + auto &AC = Iter->second; if (auto Err = AC.replace(*Result.SourceManager, T.Range, T.Replacement)) { Consumer(std::move(Err)); return; } } - for (const auto &I : Case.AddedIncludes) { - auto &Header = I.first; - switch (I.second) { - case transformer::IncludeFormat::Quoted: - AC.addHeader(Header); - break; - case transformer::IncludeFormat::Angled: - AC.addHeader((llvm::Twine("<") + Header + ">").str()); - break; + for (auto &IDChangePair : ChangesByFileID) { + auto &AC = IDChangePair.second; + // FIXME: this will add includes to *all* changed files, which may not be + // the intent. We should upgrade the representation to allow associating + // headers with specific edits. + for (const auto &I : Case.AddedIncludes) { + auto &Header = I.first; + switch (I.second) { + case transformer::IncludeFormat::Quoted: + AC.addHeader(Header); + break; + case transformer::IncludeFormat::Angled: + AC.addHeader((llvm::Twine("<") + Header + ">").str()); + break; + } } - } - Consumer(std::move(AC)); + Consumer(std::move(AC)); + } } |