aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/clang/lib/Basic/Sarif.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/clang/lib/Basic/Sarif.cpp')
-rw-r--r--contrib/llvm-project/clang/lib/Basic/Sarif.cpp389
1 files changed, 389 insertions, 0 deletions
diff --git a/contrib/llvm-project/clang/lib/Basic/Sarif.cpp b/contrib/llvm-project/clang/lib/Basic/Sarif.cpp
new file mode 100644
index 000000000000..faca9c508c08
--- /dev/null
+++ b/contrib/llvm-project/clang/lib/Basic/Sarif.cpp
@@ -0,0 +1,389 @@
+//===-- clang/Basic/Sarif.cpp - SarifDocumentWriter class definition ------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the declaration of the SARIFDocumentWriter class, and
+/// associated builders such as:
+/// - \ref SarifArtifact
+/// - \ref SarifArtifactLocation
+/// - \ref SarifRule
+/// - \ref SarifResult
+//===----------------------------------------------------------------------===//
+#include "clang/Basic/Sarif.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/SourceManager.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/ConvertUTF.h"
+#include "llvm/Support/JSON.h"
+#include "llvm/Support/Path.h"
+
+#include <string>
+#include <utility>
+
+using namespace clang;
+using namespace llvm;
+
+using clang::detail::SarifArtifact;
+using clang::detail::SarifArtifactLocation;
+
+static StringRef getFileName(const FileEntry &FE) {
+ StringRef Filename = FE.tryGetRealPathName();
+ if (Filename.empty())
+ Filename = FE.getName();
+ return Filename;
+}
+/// \name URI
+/// @{
+
+/// \internal
+/// \brief
+/// Return the RFC3986 encoding of the input character.
+///
+/// \param C Character to encode to RFC3986.
+///
+/// \return The RFC3986 representation of \c C.
+static std::string percentEncodeURICharacter(char C) {
+ // RFC 3986 claims alpha, numeric, and this handful of
+ // characters are not reserved for the path component and
+ // should be written out directly. Otherwise, percent
+ // encode the character and write that out instead of the
+ // reserved character.
+ if (llvm::isAlnum(C) ||
+ StringRef::npos != StringRef("-._~:@!$&'()*+,;=").find(C))
+ return std::string(&C, 1);
+ return "%" + llvm::toHex(StringRef(&C, 1));
+}
+
+/// \internal
+/// \brief Return a URI representing the given file name.
+///
+/// \param Filename The filename to be represented as URI.
+///
+/// \return RFC3986 URI representing the input file name.
+static std::string fileNameToURI(StringRef Filename) {
+ SmallString<32> Ret = StringRef("file://");
+
+ // Get the root name to see if it has a URI authority.
+ StringRef Root = sys::path::root_name(Filename);
+ if (Root.startswith("//")) {
+ // There is an authority, so add it to the URI.
+ Ret += Root.drop_front(2).str();
+ } else if (!Root.empty()) {
+ // There is no authority, so end the component and add the root to the URI.
+ Ret += Twine("/" + Root).str();
+ }
+
+ auto Iter = sys::path::begin(Filename), End = sys::path::end(Filename);
+ assert(Iter != End && "Expected there to be a non-root path component.");
+ // Add the rest of the path components, encoding any reserved characters;
+ // we skip past the first path component, as it was handled it above.
+ std::for_each(++Iter, End, [&Ret](StringRef Component) {
+ // For reasons unknown to me, we may get a backslash with Windows native
+ // paths for the initial backslash following the drive component, which
+ // we need to ignore as a URI path part.
+ if (Component == "\\")
+ return;
+
+ // Add the separator between the previous path part and the one being
+ // currently processed.
+ Ret += "/";
+
+ // URI encode the part.
+ for (char C : Component) {
+ Ret += percentEncodeURICharacter(C);
+ }
+ });
+
+ return std::string(Ret);
+}
+/// @}
+
+/// \brief Calculate the column position expressed in the number of UTF-8 code
+/// points from column start to the source location
+///
+/// \param Loc The source location whose column needs to be calculated.
+/// \param TokenLen Optional hint for when the token is multiple bytes long.
+///
+/// \return The column number as a UTF-8 aware byte offset from column start to
+/// the effective source location.
+static unsigned int adjustColumnPos(FullSourceLoc Loc,
+ unsigned int TokenLen = 0) {
+ assert(!Loc.isInvalid() && "invalid Loc when adjusting column position");
+
+ std::pair<FileID, unsigned> LocInfo = Loc.getDecomposedLoc();
+ Optional<MemoryBufferRef> Buf =
+ Loc.getManager().getBufferOrNone(LocInfo.first);
+ assert(Buf && "got an invalid buffer for the location's file");
+ assert(Buf->getBufferSize() >= (LocInfo.second + TokenLen) &&
+ "token extends past end of buffer?");
+
+ // Adjust the offset to be the start of the line, since we'll be counting
+ // Unicode characters from there until our column offset.
+ unsigned int Off = LocInfo.second - (Loc.getExpansionColumnNumber() - 1);
+ unsigned int Ret = 1;
+ while (Off < (LocInfo.second + TokenLen)) {
+ Off += getNumBytesForUTF8(Buf->getBuffer()[Off]);
+ Ret++;
+ }
+
+ return Ret;
+}
+
+/// \name SARIF Utilities
+/// @{
+
+/// \internal
+json::Object createMessage(StringRef Text) {
+ return json::Object{{"text", Text.str()}};
+}
+
+/// \internal
+/// \pre CharSourceRange must be a token range
+static json::Object createTextRegion(const SourceManager &SM,
+ const CharSourceRange &R) {
+ FullSourceLoc FirstTokenLoc{R.getBegin(), SM};
+ FullSourceLoc LastTokenLoc{R.getEnd(), SM};
+ json::Object Region{{"startLine", FirstTokenLoc.getExpansionLineNumber()},
+ {"startColumn", adjustColumnPos(FirstTokenLoc)},
+ {"endColumn", adjustColumnPos(LastTokenLoc)}};
+ if (FirstTokenLoc != LastTokenLoc) {
+ Region["endLine"] = LastTokenLoc.getExpansionLineNumber();
+ }
+ return Region;
+}
+
+static json::Object createLocation(json::Object &&PhysicalLocation,
+ StringRef Message = "") {
+ json::Object Ret{{"physicalLocation", std::move(PhysicalLocation)}};
+ if (!Message.empty())
+ Ret.insert({"message", createMessage(Message)});
+ return Ret;
+}
+
+static StringRef importanceToStr(ThreadFlowImportance I) {
+ switch (I) {
+ case ThreadFlowImportance::Important:
+ return "important";
+ case ThreadFlowImportance::Essential:
+ return "essential";
+ case ThreadFlowImportance::Unimportant:
+ return "unimportant";
+ }
+ llvm_unreachable("Fully covered switch is not so fully covered");
+}
+
+static json::Object
+createThreadFlowLocation(json::Object &&Location,
+ const ThreadFlowImportance &Importance) {
+ return json::Object{{"location", std::move(Location)},
+ {"importance", importanceToStr(Importance)}};
+}
+/// @}
+
+json::Object
+SarifDocumentWriter::createPhysicalLocation(const CharSourceRange &R) {
+ assert(R.isValid() &&
+ "Cannot create a physicalLocation from invalid SourceRange!");
+ assert(R.isCharRange() &&
+ "Cannot create a physicalLocation from a token range!");
+ FullSourceLoc Start{R.getBegin(), SourceMgr};
+ const FileEntry *FE = Start.getExpansionLoc().getFileEntry();
+ assert(FE != nullptr && "Diagnostic does not exist within a valid file!");
+
+ const std::string &FileURI = fileNameToURI(getFileName(*FE));
+ auto I = CurrentArtifacts.find(FileURI);
+
+ if (I == CurrentArtifacts.end()) {
+ uint32_t Idx = static_cast<uint32_t>(CurrentArtifacts.size());
+ const SarifArtifactLocation &Location =
+ SarifArtifactLocation::create(FileURI).setIndex(Idx);
+ const SarifArtifact &Artifact = SarifArtifact::create(Location)
+ .setRoles({"resultFile"})
+ .setLength(FE->getSize())
+ .setMimeType("text/plain");
+ auto StatusIter = CurrentArtifacts.insert({FileURI, Artifact});
+ // If inserted, ensure the original iterator points to the newly inserted
+ // element, so it can be used downstream.
+ if (StatusIter.second)
+ I = StatusIter.first;
+ }
+ assert(I != CurrentArtifacts.end() && "Failed to insert new artifact");
+ const SarifArtifactLocation &Location = I->second.Location;
+ uint32_t Idx = Location.Index.value();
+ return json::Object{{{"artifactLocation", json::Object{{{"index", Idx}}}},
+ {"region", createTextRegion(SourceMgr, R)}}};
+}
+
+json::Object &SarifDocumentWriter::getCurrentTool() {
+ assert(!Closed && "SARIF Document is closed. "
+ "Need to call createRun() before using getcurrentTool!");
+
+ // Since Closed = false here, expect there to be at least 1 Run, anything
+ // else is an invalid state.
+ assert(!Runs.empty() && "There are no runs associated with the document!");
+
+ return *Runs.back().getAsObject()->get("tool")->getAsObject();
+}
+
+void SarifDocumentWriter::reset() {
+ CurrentRules.clear();
+ CurrentArtifacts.clear();
+}
+
+void SarifDocumentWriter::endRun() {
+ // Exit early if trying to close a closed Document.
+ if (Closed) {
+ reset();
+ return;
+ }
+
+ // Since Closed = false here, expect there to be at least 1 Run, anything
+ // else is an invalid state.
+ assert(!Runs.empty() && "There are no runs associated with the document!");
+
+ // Flush all the rules.
+ json::Object &Tool = getCurrentTool();
+ json::Array Rules;
+ for (const SarifRule &R : CurrentRules) {
+ json::Object Rule{
+ {"name", R.Name},
+ {"id", R.Id},
+ {"fullDescription", json::Object{{"text", R.Description}}}};
+ if (!R.HelpURI.empty())
+ Rule["helpUri"] = R.HelpURI;
+ Rules.emplace_back(std::move(Rule));
+ }
+ json::Object &Driver = *Tool.getObject("driver");
+ Driver["rules"] = std::move(Rules);
+
+ // Flush all the artifacts.
+ json::Object &Run = getCurrentRun();
+ json::Array *Artifacts = Run.getArray("artifacts");
+ for (const auto &Pair : CurrentArtifacts) {
+ const SarifArtifact &A = Pair.getValue();
+ json::Object Loc{{"uri", A.Location.URI}};
+ if (A.Location.Index.has_value()) {
+ Loc["index"] = static_cast<int64_t>(A.Location.Index.value());
+ }
+ json::Object Artifact;
+ Artifact["location"] = std::move(Loc);
+ if (A.Length.has_value())
+ Artifact["length"] = static_cast<int64_t>(A.Length.value());
+ if (!A.Roles.empty())
+ Artifact["roles"] = json::Array(A.Roles);
+ if (!A.MimeType.empty())
+ Artifact["mimeType"] = A.MimeType;
+ if (A.Offset.has_value())
+ Artifact["offset"] = A.Offset;
+ Artifacts->push_back(json::Value(std::move(Artifact)));
+ }
+
+ // Clear, reset temporaries before next run.
+ reset();
+
+ // Mark the document as closed.
+ Closed = true;
+}
+
+json::Array
+SarifDocumentWriter::createThreadFlows(ArrayRef<ThreadFlow> ThreadFlows) {
+ json::Object Ret{{"locations", json::Array{}}};
+ json::Array Locs;
+ for (const auto &ThreadFlow : ThreadFlows) {
+ json::Object PLoc = createPhysicalLocation(ThreadFlow.Range);
+ json::Object Loc = createLocation(std::move(PLoc), ThreadFlow.Message);
+ Locs.emplace_back(
+ createThreadFlowLocation(std::move(Loc), ThreadFlow.Importance));
+ }
+ Ret["locations"] = std::move(Locs);
+ return json::Array{std::move(Ret)};
+}
+
+json::Object
+SarifDocumentWriter::createCodeFlow(ArrayRef<ThreadFlow> ThreadFlows) {
+ return json::Object{{"threadFlows", createThreadFlows(ThreadFlows)}};
+}
+
+void SarifDocumentWriter::createRun(StringRef ShortToolName,
+ StringRef LongToolName,
+ StringRef ToolVersion) {
+ // Clear resources associated with a previous run.
+ endRun();
+
+ // Signify a new run has begun.
+ Closed = false;
+
+ json::Object Tool{
+ {"driver",
+ json::Object{{"name", ShortToolName},
+ {"fullName", LongToolName},
+ {"language", "en-US"},
+ {"version", ToolVersion},
+ {"informationUri",
+ "https://clang.llvm.org/docs/UsersManual.html"}}}};
+ json::Object TheRun{{"tool", std::move(Tool)},
+ {"results", {}},
+ {"artifacts", {}},
+ {"columnKind", "unicodeCodePoints"}};
+ Runs.emplace_back(std::move(TheRun));
+}
+
+json::Object &SarifDocumentWriter::getCurrentRun() {
+ assert(!Closed &&
+ "SARIF Document is closed. "
+ "Can only getCurrentRun() if document is opened via createRun(), "
+ "create a run first");
+
+ // Since Closed = false here, expect there to be at least 1 Run, anything
+ // else is an invalid state.
+ assert(!Runs.empty() && "There are no runs associated with the document!");
+ return *Runs.back().getAsObject();
+}
+
+size_t SarifDocumentWriter::createRule(const SarifRule &Rule) {
+ size_t Ret = CurrentRules.size();
+ CurrentRules.emplace_back(Rule);
+ return Ret;
+}
+
+void SarifDocumentWriter::appendResult(const SarifResult &Result) {
+ size_t RuleIdx = Result.RuleIdx;
+ assert(RuleIdx < CurrentRules.size() &&
+ "Trying to reference a rule that doesn't exist");
+ json::Object Ret{{"message", createMessage(Result.DiagnosticMessage)},
+ {"ruleIndex", static_cast<int64_t>(RuleIdx)},
+ {"ruleId", CurrentRules[RuleIdx].Id}};
+ if (!Result.Locations.empty()) {
+ json::Array Locs;
+ for (auto &Range : Result.Locations) {
+ Locs.emplace_back(createLocation(createPhysicalLocation(Range)));
+ }
+ Ret["locations"] = std::move(Locs);
+ }
+ if (!Result.ThreadFlows.empty())
+ Ret["codeFlows"] = json::Array{createCodeFlow(Result.ThreadFlows)};
+ json::Object &Run = getCurrentRun();
+ json::Array *Results = Run.getArray("results");
+ Results->emplace_back(std::move(Ret));
+}
+
+json::Object SarifDocumentWriter::createDocument() {
+ // Flush all temporaries to their destinations if needed.
+ endRun();
+
+ json::Object Doc{
+ {"$schema", SchemaURI},
+ {"version", SchemaVersion},
+ };
+ if (!Runs.empty())
+ Doc["runs"] = json::Array(Runs);
+ return Doc;
+}