aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm/tools/clang/lib/Format
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/tools/clang/lib/Format')
-rw-r--r--contrib/llvm/tools/clang/lib/Format/AffectedRangeManager.cpp150
-rw-r--r--contrib/llvm/tools/clang/lib/Format/AffectedRangeManager.h67
-rw-r--r--contrib/llvm/tools/clang/lib/Format/BreakableToken.cpp922
-rw-r--r--contrib/llvm/tools/clang/lib/Format/BreakableToken.h413
-rw-r--r--contrib/llvm/tools/clang/lib/Format/ContinuationIndenter.cpp1403
-rw-r--r--contrib/llvm/tools/clang/lib/Format/ContinuationIndenter.h382
-rw-r--r--contrib/llvm/tools/clang/lib/Format/Encoding.h128
-rw-r--r--contrib/llvm/tools/clang/lib/Format/Format.cpp2113
-rw-r--r--contrib/llvm/tools/clang/lib/Format/FormatToken.cpp306
-rw-r--r--contrib/llvm/tools/clang/lib/Format/FormatToken.h726
-rw-r--r--contrib/llvm/tools/clang/lib/Format/FormatTokenLexer.cpp670
-rw-r--r--contrib/llvm/tools/clang/lib/Format/FormatTokenLexer.h115
-rw-r--r--contrib/llvm/tools/clang/lib/Format/NamespaceEndCommentsFixer.cpp207
-rw-r--r--contrib/llvm/tools/clang/lib/Format/NamespaceEndCommentsFixer.h37
-rw-r--r--contrib/llvm/tools/clang/lib/Format/SortJavaScriptImports.cpp456
-rw-r--r--contrib/llvm/tools/clang/lib/Format/SortJavaScriptImports.h36
-rw-r--r--contrib/llvm/tools/clang/lib/Format/TokenAnalyzer.cpp146
-rw-r--r--contrib/llvm/tools/clang/lib/Format/TokenAnalyzer.h103
-rw-r--r--contrib/llvm/tools/clang/lib/Format/TokenAnnotator.cpp2794
-rw-r--r--contrib/llvm/tools/clang/lib/Format/TokenAnnotator.h179
-rw-r--r--contrib/llvm/tools/clang/lib/Format/UnwrappedLineFormatter.cpp1043
-rw-r--r--contrib/llvm/tools/clang/lib/Format/UnwrappedLineFormatter.h74
-rw-r--r--contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.cpp2403
-rw-r--r--contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.h251
-rw-r--r--contrib/llvm/tools/clang/lib/Format/UsingDeclarationsSorter.cpp144
-rw-r--r--contrib/llvm/tools/clang/lib/Format/UsingDeclarationsSorter.h37
-rw-r--r--contrib/llvm/tools/clang/lib/Format/WhitespaceManager.cpp701
-rw-r--r--contrib/llvm/tools/clang/lib/Format/WhitespaceManager.h214
28 files changed, 16220 insertions, 0 deletions
diff --git a/contrib/llvm/tools/clang/lib/Format/AffectedRangeManager.cpp b/contrib/llvm/tools/clang/lib/Format/AffectedRangeManager.cpp
new file mode 100644
index 000000000000..5d4df1941209
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Format/AffectedRangeManager.cpp
@@ -0,0 +1,150 @@
+//===--- AffectedRangeManager.cpp - Format C++ code -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements AffectRangeManager class.
+///
+//===----------------------------------------------------------------------===//
+
+#include "AffectedRangeManager.h"
+
+#include "FormatToken.h"
+#include "TokenAnnotator.h"
+
+namespace clang {
+namespace format {
+
+bool AffectedRangeManager::computeAffectedLines(
+ SmallVectorImpl<AnnotatedLine *>::iterator I,
+ SmallVectorImpl<AnnotatedLine *>::iterator E) {
+ bool SomeLineAffected = false;
+ const AnnotatedLine *PreviousLine = nullptr;
+ while (I != E) {
+ AnnotatedLine *Line = *I;
+ Line->LeadingEmptyLinesAffected = affectsLeadingEmptyLines(*Line->First);
+
+ // If a line is part of a preprocessor directive, it needs to be formatted
+ // if any token within the directive is affected.
+ if (Line->InPPDirective) {
+ FormatToken *Last = Line->Last;
+ SmallVectorImpl<AnnotatedLine *>::iterator PPEnd = I + 1;
+ while (PPEnd != E && !(*PPEnd)->First->HasUnescapedNewline) {
+ Last = (*PPEnd)->Last;
+ ++PPEnd;
+ }
+
+ if (affectsTokenRange(*Line->First, *Last,
+ /*IncludeLeadingNewlines=*/false)) {
+ SomeLineAffected = true;
+ markAllAsAffected(I, PPEnd);
+ }
+ I = PPEnd;
+ continue;
+ }
+
+ if (nonPPLineAffected(Line, PreviousLine))
+ SomeLineAffected = true;
+
+ PreviousLine = Line;
+ ++I;
+ }
+ return SomeLineAffected;
+}
+
+bool AffectedRangeManager::affectsCharSourceRange(
+ const CharSourceRange &Range) {
+ for (SmallVectorImpl<CharSourceRange>::const_iterator I = Ranges.begin(),
+ E = Ranges.end();
+ I != E; ++I) {
+ if (!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(), I->getBegin()) &&
+ !SourceMgr.isBeforeInTranslationUnit(I->getEnd(), Range.getBegin()))
+ return true;
+ }
+ return false;
+}
+
+bool AffectedRangeManager::affectsTokenRange(const FormatToken &First,
+ const FormatToken &Last,
+ bool IncludeLeadingNewlines) {
+ SourceLocation Start = First.WhitespaceRange.getBegin();
+ if (!IncludeLeadingNewlines)
+ Start = Start.getLocWithOffset(First.LastNewlineOffset);
+ SourceLocation End = Last.getStartOfNonWhitespace();
+ End = End.getLocWithOffset(Last.TokenText.size());
+ CharSourceRange Range = CharSourceRange::getCharRange(Start, End);
+ return affectsCharSourceRange(Range);
+}
+
+bool AffectedRangeManager::affectsLeadingEmptyLines(const FormatToken &Tok) {
+ CharSourceRange EmptyLineRange = CharSourceRange::getCharRange(
+ Tok.WhitespaceRange.getBegin(),
+ Tok.WhitespaceRange.getBegin().getLocWithOffset(Tok.LastNewlineOffset));
+ return affectsCharSourceRange(EmptyLineRange);
+}
+
+void AffectedRangeManager::markAllAsAffected(
+ SmallVectorImpl<AnnotatedLine *>::iterator I,
+ SmallVectorImpl<AnnotatedLine *>::iterator E) {
+ while (I != E) {
+ (*I)->Affected = true;
+ markAllAsAffected((*I)->Children.begin(), (*I)->Children.end());
+ ++I;
+ }
+}
+
+bool AffectedRangeManager::nonPPLineAffected(
+ AnnotatedLine *Line, const AnnotatedLine *PreviousLine) {
+ bool SomeLineAffected = false;
+ Line->ChildrenAffected =
+ computeAffectedLines(Line->Children.begin(), Line->Children.end());
+ if (Line->ChildrenAffected)
+ SomeLineAffected = true;
+
+ // Stores whether one of the line's tokens is directly affected.
+ bool SomeTokenAffected = false;
+ // Stores whether we need to look at the leading newlines of the next token
+ // in order to determine whether it was affected.
+ bool IncludeLeadingNewlines = false;
+
+ // Stores whether the first child line of any of this line's tokens is
+ // affected.
+ bool SomeFirstChildAffected = false;
+
+ for (FormatToken *Tok = Line->First; Tok; Tok = Tok->Next) {
+ // Determine whether 'Tok' was affected.
+ if (affectsTokenRange(*Tok, *Tok, IncludeLeadingNewlines))
+ SomeTokenAffected = true;
+
+ // Determine whether the first child of 'Tok' was affected.
+ if (!Tok->Children.empty() && Tok->Children.front()->Affected)
+ SomeFirstChildAffected = true;
+
+ IncludeLeadingNewlines = Tok->Children.empty();
+ }
+
+ // Was this line moved, i.e. has it previously been on the same line as an
+ // affected line?
+ bool LineMoved = PreviousLine && PreviousLine->Affected &&
+ Line->First->NewlinesBefore == 0;
+
+ bool IsContinuedComment =
+ Line->First->is(tok::comment) && Line->First->Next == nullptr &&
+ Line->First->NewlinesBefore < 2 && PreviousLine &&
+ PreviousLine->Affected && PreviousLine->Last->is(tok::comment);
+
+ if (SomeTokenAffected || SomeFirstChildAffected || LineMoved ||
+ IsContinuedComment) {
+ Line->Affected = true;
+ SomeLineAffected = true;
+ }
+ return SomeLineAffected;
+}
+
+} // namespace format
+} // namespace clang
diff --git a/contrib/llvm/tools/clang/lib/Format/AffectedRangeManager.h b/contrib/llvm/tools/clang/lib/Format/AffectedRangeManager.h
new file mode 100644
index 000000000000..d8d5ee55acd8
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Format/AffectedRangeManager.h
@@ -0,0 +1,67 @@
+//===--- AffectedRangeManager.h - Format C++ code ---------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief AffectedRangeManager class manages affected ranges in the code.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_FORMAT_AFFECTEDRANGEMANAGER_H
+#define LLVM_CLANG_LIB_FORMAT_AFFECTEDRANGEMANAGER_H
+
+#include "clang/Basic/SourceManager.h"
+
+namespace clang {
+namespace format {
+
+struct FormatToken;
+class AnnotatedLine;
+
+class AffectedRangeManager {
+public:
+ AffectedRangeManager(const SourceManager &SourceMgr,
+ const ArrayRef<CharSourceRange> Ranges)
+ : SourceMgr(SourceMgr), Ranges(Ranges.begin(), Ranges.end()) {}
+
+ // Determines which lines are affected by the SourceRanges given as input.
+ // Returns \c true if at least one line between I and E or one of their
+ // children is affected.
+ bool computeAffectedLines(SmallVectorImpl<AnnotatedLine *>::iterator I,
+ SmallVectorImpl<AnnotatedLine *>::iterator E);
+
+ // Returns true if 'Range' intersects with one of the input ranges.
+ bool affectsCharSourceRange(const CharSourceRange &Range);
+
+private:
+ // Returns true if the range from 'First' to 'Last' intersects with one of the
+ // input ranges.
+ bool affectsTokenRange(const FormatToken &First, const FormatToken &Last,
+ bool IncludeLeadingNewlines);
+
+ // Returns true if one of the input ranges intersect the leading empty lines
+ // before 'Tok'.
+ bool affectsLeadingEmptyLines(const FormatToken &Tok);
+
+ // Marks all lines between I and E as well as all their children as affected.
+ void markAllAsAffected(SmallVectorImpl<AnnotatedLine *>::iterator I,
+ SmallVectorImpl<AnnotatedLine *>::iterator E);
+
+ // Determines whether 'Line' is affected by the SourceRanges given as input.
+ // Returns \c true if line or one if its children is affected.
+ bool nonPPLineAffected(AnnotatedLine *Line,
+ const AnnotatedLine *PreviousLine);
+
+ const SourceManager &SourceMgr;
+ const SmallVector<CharSourceRange, 8> Ranges;
+};
+
+} // namespace format
+} // namespace clang
+
+#endif // LLVM_CLANG_LIB_FORMAT_AFFECTEDRANGEMANAGER_H
diff --git a/contrib/llvm/tools/clang/lib/Format/BreakableToken.cpp b/contrib/llvm/tools/clang/lib/Format/BreakableToken.cpp
new file mode 100644
index 000000000000..3c9df62f80dc
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Format/BreakableToken.cpp
@@ -0,0 +1,922 @@
+//===--- BreakableToken.cpp - Format C++ code -----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief Contains implementation of BreakableToken class and classes derived
+/// from it.
+///
+//===----------------------------------------------------------------------===//
+
+#include "BreakableToken.h"
+#include "ContinuationIndenter.h"
+#include "clang/Basic/CharInfo.h"
+#include "clang/Format/Format.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Debug.h"
+#include <algorithm>
+
+#define DEBUG_TYPE "format-token-breaker"
+
+namespace clang {
+namespace format {
+
+static const char *const Blanks = " \t\v\f\r";
+static bool IsBlank(char C) {
+ switch (C) {
+ case ' ':
+ case '\t':
+ case '\v':
+ case '\f':
+ case '\r':
+ return true;
+ default:
+ return false;
+ }
+}
+
+static StringRef getLineCommentIndentPrefix(StringRef Comment) {
+ static const char *const KnownPrefixes[] = {
+ "///<", "//!<", "///", "//", "//!"};
+ StringRef LongestPrefix;
+ for (StringRef KnownPrefix : KnownPrefixes) {
+ if (Comment.startswith(KnownPrefix)) {
+ size_t PrefixLength = KnownPrefix.size();
+ while (PrefixLength < Comment.size() && Comment[PrefixLength] == ' ')
+ ++PrefixLength;
+ if (PrefixLength > LongestPrefix.size())
+ LongestPrefix = Comment.substr(0, PrefixLength);
+ }
+ }
+ return LongestPrefix;
+}
+
+static BreakableToken::Split getCommentSplit(StringRef Text,
+ unsigned ContentStartColumn,
+ unsigned ColumnLimit,
+ unsigned TabWidth,
+ encoding::Encoding Encoding) {
+ if (ColumnLimit <= ContentStartColumn + 1)
+ return BreakableToken::Split(StringRef::npos, 0);
+
+ unsigned MaxSplit = ColumnLimit - ContentStartColumn + 1;
+ unsigned MaxSplitBytes = 0;
+
+ for (unsigned NumChars = 0;
+ NumChars < MaxSplit && MaxSplitBytes < Text.size();) {
+ unsigned BytesInChar =
+ encoding::getCodePointNumBytes(Text[MaxSplitBytes], Encoding);
+ NumChars +=
+ encoding::columnWidthWithTabs(Text.substr(MaxSplitBytes, BytesInChar),
+ ContentStartColumn, TabWidth, Encoding);
+ MaxSplitBytes += BytesInChar;
+ }
+
+ StringRef::size_type SpaceOffset = Text.find_last_of(Blanks, MaxSplitBytes);
+
+ // Do not split before a number followed by a dot: this would be interpreted
+ // as a numbered list, which would prevent re-flowing in subsequent passes.
+ static llvm::Regex kNumberedListRegexp = llvm::Regex("^[1-9][0-9]?\\.");
+ if (SpaceOffset != StringRef::npos &&
+ kNumberedListRegexp.match(Text.substr(SpaceOffset).ltrim(Blanks)))
+ SpaceOffset = Text.find_last_of(Blanks, SpaceOffset);
+
+ if (SpaceOffset == StringRef::npos ||
+ // Don't break at leading whitespace.
+ Text.find_last_not_of(Blanks, SpaceOffset) == StringRef::npos) {
+ // Make sure that we don't break at leading whitespace that
+ // reaches past MaxSplit.
+ StringRef::size_type FirstNonWhitespace = Text.find_first_not_of(Blanks);
+ if (FirstNonWhitespace == StringRef::npos)
+ // If the comment is only whitespace, we cannot split.
+ return BreakableToken::Split(StringRef::npos, 0);
+ SpaceOffset = Text.find_first_of(
+ Blanks, std::max<unsigned>(MaxSplitBytes, FirstNonWhitespace));
+ }
+ if (SpaceOffset != StringRef::npos && SpaceOffset != 0) {
+ StringRef BeforeCut = Text.substr(0, SpaceOffset).rtrim(Blanks);
+ StringRef AfterCut = Text.substr(SpaceOffset).ltrim(Blanks);
+ return BreakableToken::Split(BeforeCut.size(),
+ AfterCut.begin() - BeforeCut.end());
+ }
+ return BreakableToken::Split(StringRef::npos, 0);
+}
+
+static BreakableToken::Split
+getStringSplit(StringRef Text, unsigned UsedColumns, unsigned ColumnLimit,
+ unsigned TabWidth, encoding::Encoding Encoding) {
+ // FIXME: Reduce unit test case.
+ if (Text.empty())
+ return BreakableToken::Split(StringRef::npos, 0);
+ if (ColumnLimit <= UsedColumns)
+ return BreakableToken::Split(StringRef::npos, 0);
+ unsigned MaxSplit = ColumnLimit - UsedColumns;
+ StringRef::size_type SpaceOffset = 0;
+ StringRef::size_type SlashOffset = 0;
+ StringRef::size_type WordStartOffset = 0;
+ StringRef::size_type SplitPoint = 0;
+ for (unsigned Chars = 0;;) {
+ unsigned Advance;
+ if (Text[0] == '\\') {
+ Advance = encoding::getEscapeSequenceLength(Text);
+ Chars += Advance;
+ } else {
+ Advance = encoding::getCodePointNumBytes(Text[0], Encoding);
+ Chars += encoding::columnWidthWithTabs(
+ Text.substr(0, Advance), UsedColumns + Chars, TabWidth, Encoding);
+ }
+
+ if (Chars > MaxSplit || Text.size() <= Advance)
+ break;
+
+ if (IsBlank(Text[0]))
+ SpaceOffset = SplitPoint;
+ if (Text[0] == '/')
+ SlashOffset = SplitPoint;
+ if (Advance == 1 && !isAlphanumeric(Text[0]))
+ WordStartOffset = SplitPoint;
+
+ SplitPoint += Advance;
+ Text = Text.substr(Advance);
+ }
+
+ if (SpaceOffset != 0)
+ return BreakableToken::Split(SpaceOffset + 1, 0);
+ if (SlashOffset != 0)
+ return BreakableToken::Split(SlashOffset + 1, 0);
+ if (WordStartOffset != 0)
+ return BreakableToken::Split(WordStartOffset + 1, 0);
+ if (SplitPoint != 0)
+ return BreakableToken::Split(SplitPoint, 0);
+ return BreakableToken::Split(StringRef::npos, 0);
+}
+
+bool switchesFormatting(const FormatToken &Token) {
+ assert((Token.is(TT_BlockComment) || Token.is(TT_LineComment)) &&
+ "formatting regions are switched by comment tokens");
+ StringRef Content = Token.TokenText.substr(2).ltrim();
+ return Content.startswith("clang-format on") ||
+ Content.startswith("clang-format off");
+}
+
+unsigned
+BreakableToken::getLineLengthAfterCompression(unsigned RemainingTokenColumns,
+ Split Split) const {
+ // Example: consider the content
+ // lala lala
+ // - RemainingTokenColumns is the original number of columns, 10;
+ // - Split is (4, 2), denoting the two spaces between the two words;
+ //
+ // We compute the number of columns when the split is compressed into a single
+ // space, like:
+ // lala lala
+ return RemainingTokenColumns + 1 - Split.second;
+}
+
+unsigned BreakableSingleLineToken::getLineCount() const { return 1; }
+
+unsigned BreakableSingleLineToken::getLineLengthAfterSplit(
+ unsigned LineIndex, unsigned TailOffset,
+ StringRef::size_type Length) const {
+ return StartColumn + Prefix.size() + Postfix.size() +
+ encoding::columnWidthWithTabs(Line.substr(TailOffset, Length),
+ StartColumn + Prefix.size(),
+ Style.TabWidth, Encoding);
+}
+
+BreakableSingleLineToken::BreakableSingleLineToken(
+ const FormatToken &Tok, unsigned StartColumn, StringRef Prefix,
+ StringRef Postfix, bool InPPDirective, encoding::Encoding Encoding,
+ const FormatStyle &Style)
+ : BreakableToken(Tok, InPPDirective, Encoding, Style),
+ StartColumn(StartColumn), Prefix(Prefix), Postfix(Postfix) {
+ assert(Tok.TokenText.startswith(Prefix) && Tok.TokenText.endswith(Postfix));
+ Line = Tok.TokenText.substr(
+ Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size());
+}
+
+BreakableStringLiteral::BreakableStringLiteral(
+ const FormatToken &Tok, unsigned StartColumn, StringRef Prefix,
+ StringRef Postfix, bool InPPDirective, encoding::Encoding Encoding,
+ const FormatStyle &Style)
+ : BreakableSingleLineToken(Tok, StartColumn, Prefix, Postfix, InPPDirective,
+ Encoding, Style) {}
+
+BreakableToken::Split
+BreakableStringLiteral::getSplit(unsigned LineIndex, unsigned TailOffset,
+ unsigned ColumnLimit,
+ llvm::Regex &CommentPragmasRegex) const {
+ return getStringSplit(Line.substr(TailOffset),
+ StartColumn + Prefix.size() + Postfix.size(),
+ ColumnLimit, Style.TabWidth, Encoding);
+}
+
+void BreakableStringLiteral::insertBreak(unsigned LineIndex,
+ unsigned TailOffset, Split Split,
+ WhitespaceManager &Whitespaces) {
+ Whitespaces.replaceWhitespaceInToken(
+ Tok, Prefix.size() + TailOffset + Split.first, Split.second, Postfix,
+ Prefix, InPPDirective, 1, StartColumn);
+}
+
+BreakableComment::BreakableComment(const FormatToken &Token,
+ unsigned StartColumn,
+ bool InPPDirective,
+ encoding::Encoding Encoding,
+ const FormatStyle &Style)
+ : BreakableToken(Token, InPPDirective, Encoding, Style),
+ StartColumn(StartColumn) {}
+
+unsigned BreakableComment::getLineCount() const { return Lines.size(); }
+
+BreakableToken::Split
+BreakableComment::getSplit(unsigned LineIndex, unsigned TailOffset,
+ unsigned ColumnLimit,
+ llvm::Regex &CommentPragmasRegex) const {
+ // Don't break lines matching the comment pragmas regex.
+ if (CommentPragmasRegex.match(Content[LineIndex]))
+ return Split(StringRef::npos, 0);
+ return getCommentSplit(Content[LineIndex].substr(TailOffset),
+ getContentStartColumn(LineIndex, TailOffset),
+ ColumnLimit, Style.TabWidth, Encoding);
+}
+
+void BreakableComment::compressWhitespace(unsigned LineIndex,
+ unsigned TailOffset, Split Split,
+ WhitespaceManager &Whitespaces) {
+ StringRef Text = Content[LineIndex].substr(TailOffset);
+ // Text is relative to the content line, but Whitespaces operates relative to
+ // the start of the corresponding token, so compute the start of the Split
+ // that needs to be compressed into a single space relative to the start of
+ // its token.
+ unsigned BreakOffsetInToken =
+ Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first;
+ unsigned CharsToRemove = Split.second;
+ Whitespaces.replaceWhitespaceInToken(
+ tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "", "",
+ /*InPPDirective=*/false, /*Newlines=*/0, /*Spaces=*/1);
+}
+
+BreakableToken::Split
+BreakableComment::getReflowSplit(StringRef Text, StringRef ReflowPrefix,
+ unsigned PreviousEndColumn,
+ unsigned ColumnLimit) const {
+ unsigned ReflowStartColumn = PreviousEndColumn + ReflowPrefix.size();
+ StringRef TrimmedText = Text.rtrim(Blanks);
+ // This is the width of the resulting line in case the full line of Text gets
+ // reflown up starting at ReflowStartColumn.
+ unsigned FullWidth = ReflowStartColumn + encoding::columnWidthWithTabs(
+ TrimmedText, ReflowStartColumn,
+ Style.TabWidth, Encoding);
+ // If the full line fits up, we return a reflow split after it,
+ // otherwise we compute the largest piece of text that fits after
+ // ReflowStartColumn.
+ Split ReflowSplit =
+ FullWidth <= ColumnLimit
+ ? Split(TrimmedText.size(), Text.size() - TrimmedText.size())
+ : getCommentSplit(Text, ReflowStartColumn, ColumnLimit,
+ Style.TabWidth, Encoding);
+
+ // We need to be extra careful here, because while it's OK to keep a long line
+ // if it can't be broken into smaller pieces (like when the first word of a
+ // long line is longer than the column limit), it's not OK to reflow that long
+ // word up. So we recompute the size of the previous line after reflowing and
+ // only return the reflow split if that's under the line limit.
+ if (ReflowSplit.first != StringRef::npos &&
+ // Check if the width of the newly reflown line is under the limit.
+ PreviousEndColumn + ReflowPrefix.size() +
+ encoding::columnWidthWithTabs(Text.substr(0, ReflowSplit.first),
+ PreviousEndColumn +
+ ReflowPrefix.size(),
+ Style.TabWidth, Encoding) <=
+ ColumnLimit) {
+ return ReflowSplit;
+ }
+ return Split(StringRef::npos, 0);
+}
+
+const FormatToken &BreakableComment::tokenAt(unsigned LineIndex) const {
+ return Tokens[LineIndex] ? *Tokens[LineIndex] : Tok;
+}
+
+static bool mayReflowContent(StringRef Content) {
+ Content = Content.trim(Blanks);
+ // Lines starting with '@' commonly have special meaning.
+ // Lines starting with '-', '-#', '+' or '*' are bulleted/numbered lists.
+ static const SmallVector<StringRef, 8> kSpecialMeaningPrefixes = {
+ "@", "TODO", "FIXME", "XXX", "-# ", "- ", "+ ", "* " };
+ bool hasSpecialMeaningPrefix = false;
+ for (StringRef Prefix : kSpecialMeaningPrefixes) {
+ if (Content.startswith(Prefix)) {
+ hasSpecialMeaningPrefix = true;
+ break;
+ }
+ }
+
+ // Numbered lists may also start with a number followed by '.'
+ // To avoid issues if a line starts with a number which is actually the end
+ // of a previous line, we only consider numbers with up to 2 digits.
+ static llvm::Regex kNumberedListRegexp = llvm::Regex("^[1-9][0-9]?\\. ");
+ hasSpecialMeaningPrefix = hasSpecialMeaningPrefix ||
+ kNumberedListRegexp.match(Content);
+
+ // Simple heuristic for what to reflow: content should contain at least two
+ // characters and either the first or second character must be
+ // non-punctuation.
+ return Content.size() >= 2 && !hasSpecialMeaningPrefix &&
+ !Content.endswith("\\") &&
+ // Note that this is UTF-8 safe, since if isPunctuation(Content[0]) is
+ // true, then the first code point must be 1 byte long.
+ (!isPunctuation(Content[0]) || !isPunctuation(Content[1]));
+}
+
+BreakableBlockComment::BreakableBlockComment(
+ const FormatToken &Token, unsigned StartColumn,
+ unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective,
+ encoding::Encoding Encoding, const FormatStyle &Style)
+ : BreakableComment(Token, StartColumn, InPPDirective, Encoding, Style) {
+ assert(Tok.is(TT_BlockComment) &&
+ "block comment section must start with a block comment");
+
+ StringRef TokenText(Tok.TokenText);
+ assert(TokenText.startswith("/*") && TokenText.endswith("*/"));
+ TokenText.substr(2, TokenText.size() - 4).split(Lines, "\n");
+
+ int IndentDelta = StartColumn - OriginalStartColumn;
+ Content.resize(Lines.size());
+ Content[0] = Lines[0];
+ ContentColumn.resize(Lines.size());
+ // Account for the initial '/*'.
+ ContentColumn[0] = StartColumn + 2;
+ Tokens.resize(Lines.size());
+ for (size_t i = 1; i < Lines.size(); ++i)
+ adjustWhitespace(i, IndentDelta);
+
+ // Align decorations with the column of the star on the first line,
+ // that is one column after the start "/*".
+ DecorationColumn = StartColumn + 1;
+
+ // Account for comment decoration patterns like this:
+ //
+ // /*
+ // ** blah blah blah
+ // */
+ if (Lines.size() >= 2 && Content[1].startswith("**") &&
+ static_cast<unsigned>(ContentColumn[1]) == StartColumn) {
+ DecorationColumn = StartColumn;
+ }
+
+ Decoration = "* ";
+ if (Lines.size() == 1 && !FirstInLine) {
+ // Comments for which FirstInLine is false can start on arbitrary column,
+ // and available horizontal space can be too small to align consecutive
+ // lines with the first one.
+ // FIXME: We could, probably, align them to current indentation level, but
+ // now we just wrap them without stars.
+ Decoration = "";
+ }
+ for (size_t i = 1, e = Lines.size(); i < e && !Decoration.empty(); ++i) {
+ // If the last line is empty, the closing "*/" will have a star.
+ if (i + 1 == e && Content[i].empty())
+ break;
+ if (!Content[i].empty() && i + 1 != e &&
+ Decoration.startswith(Content[i]))
+ continue;
+ while (!Content[i].startswith(Decoration))
+ Decoration = Decoration.substr(0, Decoration.size() - 1);
+ }
+
+ LastLineNeedsDecoration = true;
+ IndentAtLineBreak = ContentColumn[0] + 1;
+ for (size_t i = 1, e = Lines.size(); i < e; ++i) {
+ if (Content[i].empty()) {
+ if (i + 1 == e) {
+ // Empty last line means that we already have a star as a part of the
+ // trailing */. We also need to preserve whitespace, so that */ is
+ // correctly indented.
+ LastLineNeedsDecoration = false;
+ // Align the star in the last '*/' with the stars on the previous lines.
+ if (e >= 2 && !Decoration.empty()) {
+ ContentColumn[i] = DecorationColumn;
+ }
+ } else if (Decoration.empty()) {
+ // For all other lines, set the start column to 0 if they're empty, so
+ // we do not insert trailing whitespace anywhere.
+ ContentColumn[i] = 0;
+ }
+ continue;
+ }
+
+ // The first line already excludes the star.
+ // The last line excludes the star if LastLineNeedsDecoration is false.
+ // For all other lines, adjust the line to exclude the star and
+ // (optionally) the first whitespace.
+ unsigned DecorationSize = Decoration.startswith(Content[i])
+ ? Content[i].size()
+ : Decoration.size();
+ if (DecorationSize) {
+ ContentColumn[i] = DecorationColumn + DecorationSize;
+ }
+ Content[i] = Content[i].substr(DecorationSize);
+ if (!Decoration.startswith(Content[i]))
+ IndentAtLineBreak =
+ std::min<int>(IndentAtLineBreak, std::max(0, ContentColumn[i]));
+ }
+ IndentAtLineBreak =
+ std::max<unsigned>(IndentAtLineBreak, Decoration.size());
+
+ DEBUG({
+ llvm::dbgs() << "IndentAtLineBreak " << IndentAtLineBreak << "\n";
+ for (size_t i = 0; i < Lines.size(); ++i) {
+ llvm::dbgs() << i << " |" << Content[i] << "| "
+ << "CC=" << ContentColumn[i] << "| "
+ << "IN=" << (Content[i].data() - Lines[i].data()) << "\n";
+ }
+ });
+}
+
+void BreakableBlockComment::adjustWhitespace(unsigned LineIndex,
+ int IndentDelta) {
+ // When in a preprocessor directive, the trailing backslash in a block comment
+ // is not needed, but can serve a purpose of uniformity with necessary escaped
+ // newlines outside the comment. In this case we remove it here before
+ // trimming the trailing whitespace. The backslash will be re-added later when
+ // inserting a line break.
+ size_t EndOfPreviousLine = Lines[LineIndex - 1].size();
+ if (InPPDirective && Lines[LineIndex - 1].endswith("\\"))
+ --EndOfPreviousLine;
+
+ // Calculate the end of the non-whitespace text in the previous line.
+ EndOfPreviousLine =
+ Lines[LineIndex - 1].find_last_not_of(Blanks, EndOfPreviousLine);
+ if (EndOfPreviousLine == StringRef::npos)
+ EndOfPreviousLine = 0;
+ else
+ ++EndOfPreviousLine;
+ // Calculate the start of the non-whitespace text in the current line.
+ size_t StartOfLine = Lines[LineIndex].find_first_not_of(Blanks);
+ if (StartOfLine == StringRef::npos)
+ StartOfLine = Lines[LineIndex].rtrim("\r\n").size();
+
+ StringRef Whitespace = Lines[LineIndex].substr(0, StartOfLine);
+ // Adjust Lines to only contain relevant text.
+ size_t PreviousContentOffset =
+ Content[LineIndex - 1].data() - Lines[LineIndex - 1].data();
+ Content[LineIndex - 1] = Lines[LineIndex - 1].substr(
+ PreviousContentOffset, EndOfPreviousLine - PreviousContentOffset);
+ Content[LineIndex] = Lines[LineIndex].substr(StartOfLine);
+
+ // Adjust the start column uniformly across all lines.
+ ContentColumn[LineIndex] =
+ encoding::columnWidthWithTabs(Whitespace, 0, Style.TabWidth, Encoding) +
+ IndentDelta;
+}
+
+unsigned BreakableBlockComment::getLineLengthAfterSplit(
+ unsigned LineIndex, unsigned TailOffset,
+ StringRef::size_type Length) const {
+ unsigned ContentStartColumn = getContentStartColumn(LineIndex, TailOffset);
+ unsigned LineLength =
+ ContentStartColumn + encoding::columnWidthWithTabs(
+ Content[LineIndex].substr(TailOffset, Length),
+ ContentStartColumn, Style.TabWidth, Encoding);
+ // The last line gets a "*/" postfix.
+ if (LineIndex + 1 == Lines.size()) {
+ LineLength += 2;
+ // We never need a decoration when breaking just the trailing "*/" postfix.
+ // Note that checking that Length == 0 is not enough, since Length could
+ // also be StringRef::npos.
+ if (Content[LineIndex].substr(TailOffset, Length).empty()) {
+ LineLength -= Decoration.size();
+ }
+ }
+ return LineLength;
+}
+
+void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset,
+ Split Split,
+ WhitespaceManager &Whitespaces) {
+ StringRef Text = Content[LineIndex].substr(TailOffset);
+ StringRef Prefix = Decoration;
+ // We need this to account for the case when we have a decoration "* " for all
+ // the lines except for the last one, where the star in "*/" acts as a
+ // decoration.
+ unsigned LocalIndentAtLineBreak = IndentAtLineBreak;
+ if (LineIndex + 1 == Lines.size() &&
+ Text.size() == Split.first + Split.second) {
+ // For the last line we need to break before "*/", but not to add "* ".
+ Prefix = "";
+ if (LocalIndentAtLineBreak >= 2)
+ LocalIndentAtLineBreak -= 2;
+ }
+ // The split offset is from the beginning of the line. Convert it to an offset
+ // from the beginning of the token text.
+ unsigned BreakOffsetInToken =
+ Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first;
+ unsigned CharsToRemove = Split.second;
+ assert(LocalIndentAtLineBreak >= Prefix.size());
+ Whitespaces.replaceWhitespaceInToken(
+ tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "", Prefix,
+ InPPDirective, /*Newlines=*/1,
+ /*Spaces=*/LocalIndentAtLineBreak - Prefix.size());
+}
+
+BreakableToken::Split BreakableBlockComment::getSplitBefore(
+ unsigned LineIndex,
+ unsigned PreviousEndColumn,
+ unsigned ColumnLimit,
+ llvm::Regex &CommentPragmasRegex) const {
+ if (!mayReflow(LineIndex, CommentPragmasRegex))
+ return Split(StringRef::npos, 0);
+ StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks);
+ return getReflowSplit(TrimmedContent, ReflowPrefix, PreviousEndColumn,
+ ColumnLimit);
+}
+
+unsigned BreakableBlockComment::getReflownColumn(
+ StringRef Content,
+ unsigned LineIndex,
+ unsigned PreviousEndColumn) const {
+ unsigned StartColumn = PreviousEndColumn + ReflowPrefix.size();
+ // If this is the last line, it will carry around its '*/' postfix.
+ unsigned PostfixLength = (LineIndex + 1 == Lines.size() ? 2 : 0);
+ // The line is composed of previous text, reflow prefix, reflown text and
+ // postfix.
+ unsigned ReflownColumn =
+ StartColumn + encoding::columnWidthWithTabs(Content, StartColumn,
+ Style.TabWidth, Encoding) +
+ PostfixLength;
+ return ReflownColumn;
+}
+
+unsigned BreakableBlockComment::getLineLengthAfterSplitBefore(
+ unsigned LineIndex, unsigned TailOffset,
+ unsigned PreviousEndColumn,
+ unsigned ColumnLimit,
+ Split SplitBefore) const {
+ if (SplitBefore.first == StringRef::npos ||
+ // Block comment line contents contain the trailing whitespace after the
+ // decoration, so the need of left trim. Note that this behavior is
+ // consistent with the breaking of block comments where the indentation of
+ // a broken line is uniform across all the lines of the block comment.
+ SplitBefore.first + SplitBefore.second <
+ Content[LineIndex].ltrim().size()) {
+ // A piece of line, not the whole, gets reflown.
+ return getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos);
+ } else {
+ // The whole line gets reflown, need to check if we need to insert a break
+ // for the postfix or not.
+ StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks);
+ unsigned ReflownColumn =
+ getReflownColumn(TrimmedContent, LineIndex, PreviousEndColumn);
+ if (ReflownColumn <= ColumnLimit) {
+ return ReflownColumn;
+ }
+ return getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos);
+ }
+}
+void BreakableBlockComment::replaceWhitespaceBefore(
+ unsigned LineIndex, unsigned PreviousEndColumn, unsigned ColumnLimit,
+ Split SplitBefore, WhitespaceManager &Whitespaces) {
+ if (LineIndex == 0) return;
+ StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks);
+ if (SplitBefore.first != StringRef::npos) {
+ // Here we need to reflow.
+ assert(Tokens[LineIndex - 1] == Tokens[LineIndex] &&
+ "Reflowing whitespace within a token");
+ // This is the offset of the end of the last line relative to the start of
+ // the token text in the token.
+ unsigned WhitespaceOffsetInToken = Content[LineIndex - 1].data() +
+ Content[LineIndex - 1].size() -
+ tokenAt(LineIndex).TokenText.data();
+ unsigned WhitespaceLength = TrimmedContent.data() -
+ tokenAt(LineIndex).TokenText.data() -
+ WhitespaceOffsetInToken;
+ Whitespaces.replaceWhitespaceInToken(
+ tokenAt(LineIndex), WhitespaceOffsetInToken,
+ /*ReplaceChars=*/WhitespaceLength, /*PreviousPostfix=*/"",
+ /*CurrentPrefix=*/ReflowPrefix, InPPDirective, /*Newlines=*/0,
+ /*Spaces=*/0);
+ // Check if we need to also insert a break at the whitespace range.
+ // For this we first adapt the reflow split relative to the beginning of the
+ // content.
+ // Note that we don't need a penalty for this break, since it doesn't change
+ // the total number of lines.
+ Split BreakSplit = SplitBefore;
+ BreakSplit.first += TrimmedContent.data() - Content[LineIndex].data();
+ unsigned ReflownColumn =
+ getReflownColumn(TrimmedContent, LineIndex, PreviousEndColumn);
+ if (ReflownColumn > ColumnLimit) {
+ insertBreak(LineIndex, 0, BreakSplit, Whitespaces);
+ }
+ return;
+ }
+
+ // Here no reflow with the previous line will happen.
+ // Fix the decoration of the line at LineIndex.
+ StringRef Prefix = Decoration;
+ if (Content[LineIndex].empty()) {
+ if (LineIndex + 1 == Lines.size()) {
+ if (!LastLineNeedsDecoration) {
+ // If the last line was empty, we don't need a prefix, as the */ will
+ // line up with the decoration (if it exists).
+ Prefix = "";
+ }
+ } else if (!Decoration.empty()) {
+ // For other empty lines, if we do have a decoration, adapt it to not
+ // contain a trailing whitespace.
+ Prefix = Prefix.substr(0, 1);
+ }
+ } else {
+ if (ContentColumn[LineIndex] == 1) {
+ // This line starts immediately after the decorating *.
+ Prefix = Prefix.substr(0, 1);
+ }
+ }
+ // This is the offset of the end of the last line relative to the start of the
+ // token text in the token.
+ unsigned WhitespaceOffsetInToken = Content[LineIndex - 1].data() +
+ Content[LineIndex - 1].size() -
+ tokenAt(LineIndex).TokenText.data();
+ unsigned WhitespaceLength = Content[LineIndex].data() -
+ tokenAt(LineIndex).TokenText.data() -
+ WhitespaceOffsetInToken;
+ Whitespaces.replaceWhitespaceInToken(
+ tokenAt(LineIndex), WhitespaceOffsetInToken, WhitespaceLength, "", Prefix,
+ InPPDirective, /*Newlines=*/1, ContentColumn[LineIndex] - Prefix.size());
+}
+
+bool BreakableBlockComment::mayReflow(unsigned LineIndex,
+ llvm::Regex &CommentPragmasRegex) const {
+ // Content[LineIndex] may exclude the indent after the '*' decoration. In that
+ // case, we compute the start of the comment pragma manually.
+ StringRef IndentContent = Content[LineIndex];
+ if (Lines[LineIndex].ltrim(Blanks).startswith("*")) {
+ IndentContent = Lines[LineIndex].ltrim(Blanks).substr(1);
+ }
+ return LineIndex > 0 && !CommentPragmasRegex.match(IndentContent) &&
+ mayReflowContent(Content[LineIndex]) && !Tok.Finalized &&
+ !switchesFormatting(tokenAt(LineIndex));
+}
+
+unsigned
+BreakableBlockComment::getContentStartColumn(unsigned LineIndex,
+ unsigned TailOffset) const {
+ // If we break, we always break at the predefined indent.
+ if (TailOffset != 0)
+ return IndentAtLineBreak;
+ return std::max(0, ContentColumn[LineIndex]);
+}
+
+BreakableLineCommentSection::BreakableLineCommentSection(
+ const FormatToken &Token, unsigned StartColumn,
+ unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective,
+ encoding::Encoding Encoding, const FormatStyle &Style)
+ : BreakableComment(Token, StartColumn, InPPDirective, Encoding, Style) {
+ assert(Tok.is(TT_LineComment) &&
+ "line comment section must start with a line comment");
+ FormatToken *LineTok = nullptr;
+ for (const FormatToken *CurrentTok = &Tok;
+ CurrentTok && CurrentTok->is(TT_LineComment);
+ CurrentTok = CurrentTok->Next) {
+ LastLineTok = LineTok;
+ StringRef TokenText(CurrentTok->TokenText);
+ assert(TokenText.startswith("//"));
+ size_t FirstLineIndex = Lines.size();
+ TokenText.split(Lines, "\n");
+ Content.resize(Lines.size());
+ ContentColumn.resize(Lines.size());
+ OriginalContentColumn.resize(Lines.size());
+ Tokens.resize(Lines.size());
+ Prefix.resize(Lines.size());
+ OriginalPrefix.resize(Lines.size());
+ for (size_t i = FirstLineIndex, e = Lines.size(); i < e; ++i) {
+ // We need to trim the blanks in case this is not the first line in a
+ // multiline comment. Then the indent is included in Lines[i].
+ StringRef IndentPrefix =
+ getLineCommentIndentPrefix(Lines[i].ltrim(Blanks));
+ assert(IndentPrefix.startswith("//"));
+ OriginalPrefix[i] = Prefix[i] = IndentPrefix;
+ if (Lines[i].size() > Prefix[i].size() &&
+ isAlphanumeric(Lines[i][Prefix[i].size()])) {
+ if (Prefix[i] == "//")
+ Prefix[i] = "// ";
+ else if (Prefix[i] == "///")
+ Prefix[i] = "/// ";
+ else if (Prefix[i] == "//!")
+ Prefix[i] = "//! ";
+ else if (Prefix[i] == "///<")
+ Prefix[i] = "///< ";
+ else if (Prefix[i] == "//!<")
+ Prefix[i] = "//!< ";
+ }
+
+ Tokens[i] = LineTok;
+ Content[i] = Lines[i].substr(IndentPrefix.size());
+ OriginalContentColumn[i] =
+ StartColumn +
+ encoding::columnWidthWithTabs(OriginalPrefix[i],
+ StartColumn,
+ Style.TabWidth,
+ Encoding);
+ ContentColumn[i] =
+ StartColumn +
+ encoding::columnWidthWithTabs(Prefix[i],
+ StartColumn,
+ Style.TabWidth,
+ Encoding);
+
+ // Calculate the end of the non-whitespace text in this line.
+ size_t EndOfLine = Content[i].find_last_not_of(Blanks);
+ if (EndOfLine == StringRef::npos)
+ EndOfLine = Content[i].size();
+ else
+ ++EndOfLine;
+ Content[i] = Content[i].substr(0, EndOfLine);
+ }
+ LineTok = CurrentTok->Next;
+ if (CurrentTok->Next && !CurrentTok->Next->ContinuesLineCommentSection) {
+ // A line comment section needs to broken by a line comment that is
+ // preceded by at least two newlines. Note that we put this break here
+ // instead of breaking at a previous stage during parsing, since that
+ // would split the contents of the enum into two unwrapped lines in this
+ // example, which is undesirable:
+ // enum A {
+ // a, // comment about a
+ //
+ // // comment about b
+ // b
+ // };
+ //
+ // FIXME: Consider putting separate line comment sections as children to
+ // the unwrapped line instead.
+ break;
+ }
+ }
+}
+
+unsigned BreakableLineCommentSection::getLineLengthAfterSplit(
+ unsigned LineIndex, unsigned TailOffset,
+ StringRef::size_type Length) const {
+ unsigned ContentStartColumn =
+ (TailOffset == 0 ? ContentColumn[LineIndex]
+ : OriginalContentColumn[LineIndex]);
+ return ContentStartColumn + encoding::columnWidthWithTabs(
+ Content[LineIndex].substr(TailOffset, Length),
+ ContentStartColumn, Style.TabWidth, Encoding);
+}
+
+void BreakableLineCommentSection::insertBreak(unsigned LineIndex,
+ unsigned TailOffset, Split Split,
+ WhitespaceManager &Whitespaces) {
+ StringRef Text = Content[LineIndex].substr(TailOffset);
+ // Compute the offset of the split relative to the beginning of the token
+ // text.
+ unsigned BreakOffsetInToken =
+ Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first;
+ unsigned CharsToRemove = Split.second;
+ // Compute the size of the new indent, including the size of the new prefix of
+ // the newly broken line.
+ unsigned IndentAtLineBreak = OriginalContentColumn[LineIndex] +
+ Prefix[LineIndex].size() -
+ OriginalPrefix[LineIndex].size();
+ assert(IndentAtLineBreak >= Prefix[LineIndex].size());
+ Whitespaces.replaceWhitespaceInToken(
+ tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "",
+ Prefix[LineIndex], InPPDirective, /*Newlines=*/1,
+ /*Spaces=*/IndentAtLineBreak - Prefix[LineIndex].size());
+}
+
+BreakableComment::Split BreakableLineCommentSection::getSplitBefore(
+ unsigned LineIndex, unsigned PreviousEndColumn, unsigned ColumnLimit,
+ llvm::Regex &CommentPragmasRegex) const {
+ if (!mayReflow(LineIndex, CommentPragmasRegex))
+ return Split(StringRef::npos, 0);
+ return getReflowSplit(Content[LineIndex], ReflowPrefix, PreviousEndColumn,
+ ColumnLimit);
+}
+
+unsigned BreakableLineCommentSection::getLineLengthAfterSplitBefore(
+ unsigned LineIndex, unsigned TailOffset,
+ unsigned PreviousEndColumn,
+ unsigned ColumnLimit,
+ Split SplitBefore) const {
+ if (SplitBefore.first == StringRef::npos ||
+ SplitBefore.first + SplitBefore.second < Content[LineIndex].size()) {
+ // A piece of line, not the whole line, gets reflown.
+ return getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos);
+ } else {
+ // The whole line gets reflown.
+ unsigned StartColumn = PreviousEndColumn + ReflowPrefix.size();
+ return StartColumn + encoding::columnWidthWithTabs(Content[LineIndex],
+ StartColumn,
+ Style.TabWidth,
+ Encoding);
+ }
+}
+
+void BreakableLineCommentSection::replaceWhitespaceBefore(
+ unsigned LineIndex, unsigned PreviousEndColumn, unsigned ColumnLimit,
+ Split SplitBefore, WhitespaceManager &Whitespaces) {
+ // If this is the first line of a token, we need to inform Whitespace Manager
+ // about it: either adapt the whitespace range preceding it, or mark it as an
+ // untouchable token.
+ // This happens for instance here:
+ // // line 1 \
+ // // line 2
+ if (LineIndex > 0 && Tokens[LineIndex] != Tokens[LineIndex - 1]) {
+ if (SplitBefore.first != StringRef::npos) {
+ // Reflow happens between tokens. Replace the whitespace between the
+ // tokens by the empty string.
+ Whitespaces.replaceWhitespace(
+ *Tokens[LineIndex], /*Newlines=*/0, /*Spaces=*/0,
+ /*StartOfTokenColumn=*/StartColumn, /*InPPDirective=*/false);
+ // Replace the indent and prefix of the token with the reflow prefix.
+ unsigned WhitespaceLength =
+ Content[LineIndex].data() - tokenAt(LineIndex).TokenText.data();
+ Whitespaces.replaceWhitespaceInToken(*Tokens[LineIndex],
+ /*Offset=*/0,
+ /*ReplaceChars=*/WhitespaceLength,
+ /*PreviousPostfix=*/"",
+ /*CurrentPrefix=*/ReflowPrefix,
+ /*InPPDirective=*/false,
+ /*Newlines=*/0,
+ /*Spaces=*/0);
+ } else {
+ // This is the first line for the current token, but no reflow with the
+ // previous token is necessary. However, we still may need to adjust the
+ // start column. Note that ContentColumn[LineIndex] is the expected
+ // content column after a possible update to the prefix, hence the prefix
+ // length change is included.
+ unsigned LineColumn =
+ ContentColumn[LineIndex] -
+ (Content[LineIndex].data() - Lines[LineIndex].data()) +
+ (OriginalPrefix[LineIndex].size() - Prefix[LineIndex].size());
+
+ // We always want to create a replacement instead of adding an untouchable
+ // token, even if LineColumn is the same as the original column of the
+ // token. This is because WhitespaceManager doesn't align trailing
+ // comments if they are untouchable.
+ Whitespaces.replaceWhitespace(*Tokens[LineIndex],
+ /*Newlines=*/1,
+ /*Spaces=*/LineColumn,
+ /*StartOfTokenColumn=*/LineColumn,
+ /*InPPDirective=*/false);
+ }
+ }
+ if (OriginalPrefix[LineIndex] != Prefix[LineIndex]) {
+ // Adjust the prefix if necessary.
+
+ // Take care of the space possibly introduced after a decoration.
+ assert(Prefix[LineIndex] == (OriginalPrefix[LineIndex] + " ").str() &&
+ "Expecting a line comment prefix to differ from original by at most "
+ "a space");
+ Whitespaces.replaceWhitespaceInToken(
+ tokenAt(LineIndex), OriginalPrefix[LineIndex].size(), 0, "", "",
+ /*InPPDirective=*/false, /*Newlines=*/0, /*Spaces=*/1);
+ }
+ // Add a break after a reflow split has been introduced, if necessary.
+ // Note that this break doesn't need to be penalized, since it doesn't change
+ // the number of lines.
+ if (SplitBefore.first != StringRef::npos &&
+ SplitBefore.first + SplitBefore.second < Content[LineIndex].size()) {
+ insertBreak(LineIndex, 0, SplitBefore, Whitespaces);
+ }
+}
+
+void BreakableLineCommentSection::updateNextToken(LineState& State) const {
+ if (LastLineTok) {
+ State.NextToken = LastLineTok->Next;
+ }
+}
+
+bool BreakableLineCommentSection::mayReflow(
+ unsigned LineIndex, llvm::Regex &CommentPragmasRegex) const {
+ // Line comments have the indent as part of the prefix, so we need to
+ // recompute the start of the line.
+ StringRef IndentContent = Content[LineIndex];
+ if (Lines[LineIndex].startswith("//")) {
+ IndentContent = Lines[LineIndex].substr(2);
+ }
+ return LineIndex > 0 && !CommentPragmasRegex.match(IndentContent) &&
+ mayReflowContent(Content[LineIndex]) && !Tok.Finalized &&
+ !switchesFormatting(tokenAt(LineIndex)) &&
+ OriginalPrefix[LineIndex] == OriginalPrefix[LineIndex - 1];
+}
+
+unsigned
+BreakableLineCommentSection::getContentStartColumn(unsigned LineIndex,
+ unsigned TailOffset) const {
+ if (TailOffset != 0) {
+ return OriginalContentColumn[LineIndex];
+ }
+ return ContentColumn[LineIndex];
+}
+
+} // namespace format
+} // namespace clang
diff --git a/contrib/llvm/tools/clang/lib/Format/BreakableToken.h b/contrib/llvm/tools/clang/lib/Format/BreakableToken.h
new file mode 100644
index 000000000000..e642a538e21c
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Format/BreakableToken.h
@@ -0,0 +1,413 @@
+//===--- BreakableToken.h - Format C++ code -------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief Declares BreakableToken, BreakableStringLiteral, BreakableComment,
+/// BreakableBlockComment and BreakableLineCommentSection classes, that contain
+/// token type-specific logic to break long lines in tokens and reflow content
+/// between tokens.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_FORMAT_BREAKABLETOKEN_H
+#define LLVM_CLANG_LIB_FORMAT_BREAKABLETOKEN_H
+
+#include "Encoding.h"
+#include "TokenAnnotator.h"
+#include "WhitespaceManager.h"
+#include "llvm/Support/Regex.h"
+#include <utility>
+
+namespace clang {
+namespace format {
+
+/// \brief Checks if \p Token switches formatting, like /* clang-format off */.
+/// \p Token must be a comment.
+bool switchesFormatting(const FormatToken &Token);
+
+struct FormatStyle;
+
+/// \brief Base class for strategies on how to break tokens.
+///
+/// This is organised around the concept of a \c Split, which is a whitespace
+/// range that signifies a position of the content of a token where a
+/// reformatting might be done. Operating with splits is divided into 3
+/// operations:
+/// - getSplit, for finding a split starting at a position,
+/// - getLineLengthAfterSplit, for calculating the size in columns of the rest
+/// of the content after a split has been used for breaking, and
+/// - insertBreak, for executing the split using a whitespace manager.
+///
+/// There is a pair of operations that are used to compress a long whitespace
+/// range with a single space if that will bring the line lenght under the
+/// column limit:
+/// - getLineLengthAfterCompression, for calculating the size in columns of the
+/// line after a whitespace range has been compressed, and
+/// - compressWhitespace, for executing the whitespace compression using a
+/// whitespace manager; note that the compressed whitespace may be in the
+/// middle of the original line and of the reformatted line.
+///
+/// For tokens where the whitespace before each line needs to be also
+/// reformatted, for example for tokens supporting reflow, there are analogous
+/// operations that might be executed before the main line breaking occurs:
+/// - getSplitBefore, for finding a split such that the content preceding it
+/// needs to be specially reflown,
+/// - getLineLengthAfterSplitBefore, for calculating the line length in columns
+/// of the remainder of the content after the beginning of the content has
+/// been reformatted, and
+/// - replaceWhitespaceBefore, for executing the reflow using a whitespace
+/// manager.
+///
+/// FIXME: The interface seems set in stone, so we might want to just pull the
+/// strategy into the class, instead of controlling it from the outside.
+class BreakableToken {
+public:
+ /// \brief Contains starting character index and length of split.
+ typedef std::pair<StringRef::size_type, unsigned> Split;
+
+ virtual ~BreakableToken() {}
+
+ /// \brief Returns the number of lines in this token in the original code.
+ virtual unsigned getLineCount() const = 0;
+
+ /// \brief Returns the number of columns required to format the piece of line
+ /// at \p LineIndex, from byte offset \p TailOffset with length \p Length.
+ ///
+ /// Note that previous breaks are not taken into account. \p TailOffset is
+ /// always specified from the start of the (original) line.
+ /// \p Length can be set to StringRef::npos, which means "to the end of line".
+ virtual unsigned
+ getLineLengthAfterSplit(unsigned LineIndex, unsigned TailOffset,
+ StringRef::size_type Length) const = 0;
+
+ /// \brief Returns a range (offset, length) at which to break the line at
+ /// \p LineIndex, if previously broken at \p TailOffset. If possible, do not
+ /// violate \p ColumnLimit.
+ virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
+ unsigned ColumnLimit,
+ llvm::Regex &CommentPragmasRegex) const = 0;
+
+ /// \brief Emits the previously retrieved \p Split via \p Whitespaces.
+ virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
+ WhitespaceManager &Whitespaces) = 0;
+
+ /// \brief Returns the number of columns required to format the piece of line
+ /// at \p LineIndex, from byte offset \p TailOffset after the whitespace range
+ /// \p Split has been compressed into a single space.
+ unsigned getLineLengthAfterCompression(unsigned RemainingTokenColumns,
+ Split Split) const;
+
+ /// \brief Replaces the whitespace range described by \p Split with a single
+ /// space.
+ virtual void compressWhitespace(unsigned LineIndex, unsigned TailOffset,
+ Split Split,
+ WhitespaceManager &Whitespaces) = 0;
+
+ /// \brief Returns a whitespace range (offset, length) of the content at
+ /// \p LineIndex such that the content preceding this range needs to be
+ /// reformatted before any breaks are made to this line.
+ ///
+ /// \p PreviousEndColumn is the end column of the previous line after
+ /// formatting.
+ ///
+ /// A result having offset == StringRef::npos means that no piece of the line
+ /// needs to be reformatted before any breaks are made.
+ virtual Split getSplitBefore(unsigned LineIndex, unsigned PreviousEndColumn,
+ unsigned ColumnLimit,
+ llvm::Regex &CommentPragmasRegex) const {
+ return Split(StringRef::npos, 0);
+ }
+
+ /// \brief Returns the number of columns required to format the piece of line
+ /// at \p LineIndex after the content preceding the whitespace range specified
+ /// \p SplitBefore has been reformatted, but before any breaks are made to
+ /// this line.
+ virtual unsigned getLineLengthAfterSplitBefore(unsigned LineIndex,
+ unsigned TailOffset,
+ unsigned PreviousEndColumn,
+ unsigned ColumnLimit,
+ Split SplitBefore) const {
+ return getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos);
+ }
+
+ /// \brief Replaces the whitespace between \p LineIndex-1 and \p LineIndex.
+ /// Performs a reformatting of the content at \p LineIndex preceding the
+ /// whitespace range \p SplitBefore.
+ virtual void replaceWhitespaceBefore(unsigned LineIndex,
+ unsigned PreviousEndColumn,
+ unsigned ColumnLimit, Split SplitBefore,
+ WhitespaceManager &Whitespaces) {}
+
+ /// \brief Updates the next token of \p State to the next token after this
+ /// one. This can be used when this token manages a set of underlying tokens
+ /// as a unit and is responsible for the formatting of the them.
+ virtual void updateNextToken(LineState &State) const {}
+
+protected:
+ BreakableToken(const FormatToken &Tok, bool InPPDirective,
+ encoding::Encoding Encoding, const FormatStyle &Style)
+ : Tok(Tok), InPPDirective(InPPDirective), Encoding(Encoding),
+ Style(Style) {}
+
+ const FormatToken &Tok;
+ const bool InPPDirective;
+ const encoding::Encoding Encoding;
+ const FormatStyle &Style;
+};
+
+/// \brief Base class for single line tokens that can be broken.
+///
+/// \c getSplit() needs to be implemented by child classes.
+class BreakableSingleLineToken : public BreakableToken {
+public:
+ unsigned getLineCount() const override;
+ unsigned getLineLengthAfterSplit(unsigned LineIndex, unsigned TailOffset,
+ StringRef::size_type Length) const override;
+
+protected:
+ BreakableSingleLineToken(const FormatToken &Tok, unsigned StartColumn,
+ StringRef Prefix, StringRef Postfix,
+ bool InPPDirective, encoding::Encoding Encoding,
+ const FormatStyle &Style);
+
+ // The column in which the token starts.
+ unsigned StartColumn;
+ // The prefix a line needs after a break in the token.
+ StringRef Prefix;
+ // The postfix a line needs before introducing a break.
+ StringRef Postfix;
+ // The token text excluding the prefix and postfix.
+ StringRef Line;
+};
+
+class BreakableStringLiteral : public BreakableSingleLineToken {
+public:
+ /// \brief Creates a breakable token for a single line string literal.
+ ///
+ /// \p StartColumn specifies the column in which the token will start
+ /// after formatting.
+ BreakableStringLiteral(const FormatToken &Tok, unsigned StartColumn,
+ StringRef Prefix, StringRef Postfix,
+ bool InPPDirective, encoding::Encoding Encoding,
+ const FormatStyle &Style);
+
+ Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit,
+ llvm::Regex &CommentPragmasRegex) const override;
+ void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
+ WhitespaceManager &Whitespaces) override;
+ void compressWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split,
+ WhitespaceManager &Whitespaces) override {}
+};
+
+class BreakableComment : public BreakableToken {
+protected:
+ /// \brief Creates a breakable token for a comment.
+ ///
+ /// \p StartColumn specifies the column in which the comment will start after
+ /// formatting.
+ BreakableComment(const FormatToken &Token, unsigned StartColumn,
+ bool InPPDirective, encoding::Encoding Encoding,
+ const FormatStyle &Style);
+
+public:
+ unsigned getLineCount() const override;
+ Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit,
+ llvm::Regex &CommentPragmasRegex) const override;
+ void compressWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split,
+ WhitespaceManager &Whitespaces) override;
+
+protected:
+ virtual unsigned getContentStartColumn(unsigned LineIndex,
+ unsigned TailOffset) const = 0;
+
+ // Returns a split that divides Text into a left and right parts, such that
+ // the left part is suitable for reflowing after PreviousEndColumn.
+ Split getReflowSplit(StringRef Text, StringRef ReflowPrefix,
+ unsigned PreviousEndColumn, unsigned ColumnLimit) const;
+
+ // Returns the token containing the line at LineIndex.
+ const FormatToken &tokenAt(unsigned LineIndex) const;
+
+ // Checks if the content of line LineIndex may be reflown with the previous
+ // line.
+ virtual bool mayReflow(unsigned LineIndex,
+ llvm::Regex &CommentPragmasRegex) const = 0;
+
+ // Contains the original text of the lines of the block comment.
+ //
+ // In case of a block comments, excludes the leading /* in the first line and
+ // trailing */ in the last line. In case of line comments, excludes the
+ // leading // and spaces.
+ SmallVector<StringRef, 16> Lines;
+
+ // Contains the text of the lines excluding all leading and trailing
+ // whitespace between the lines. Note that the decoration (if present) is also
+ // not considered part of the text.
+ SmallVector<StringRef, 16> Content;
+
+ // Tokens[i] contains a reference to the token containing Lines[i] if the
+ // whitespace range before that token is managed by this block.
+ // Otherwise, Tokens[i] is a null pointer.
+ SmallVector<FormatToken *, 16> Tokens;
+
+ // ContentColumn[i] is the target column at which Content[i] should be.
+ // Note that this excludes a leading "* " or "*" in case of block comments
+ // where all lines have a "*" prefix, or the leading "// " or "//" in case of
+ // line comments.
+ //
+ // In block comments, the first line's target column is always positive. The
+ // remaining lines' target columns are relative to the first line to allow
+ // correct indentation of comments in \c WhitespaceManager. Thus they can be
+ // negative as well (in case the first line needs to be unindented more than
+ // there's actual whitespace in another line).
+ SmallVector<int, 16> ContentColumn;
+
+ // The intended start column of the first line of text from this section.
+ unsigned StartColumn;
+
+ // The prefix to use in front a line that has been reflown up.
+ // For example, when reflowing the second line after the first here:
+ // // comment 1
+ // // comment 2
+ // we expect:
+ // // comment 1 comment 2
+ // and not:
+ // // comment 1comment 2
+ StringRef ReflowPrefix = " ";
+};
+
+class BreakableBlockComment : public BreakableComment {
+public:
+ BreakableBlockComment(const FormatToken &Token, unsigned StartColumn,
+ unsigned OriginalStartColumn, bool FirstInLine,
+ bool InPPDirective, encoding::Encoding Encoding,
+ const FormatStyle &Style);
+
+ unsigned getLineLengthAfterSplit(unsigned LineIndex, unsigned TailOffset,
+ StringRef::size_type Length) const override;
+ void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
+ WhitespaceManager &Whitespaces) override;
+ Split getSplitBefore(unsigned LineIndex, unsigned PreviousEndColumn,
+ unsigned ColumnLimit,
+ llvm::Regex &CommentPragmasRegex) const override;
+ unsigned getLineLengthAfterSplitBefore(unsigned LineIndex,
+ unsigned TailOffset,
+ unsigned PreviousEndColumn,
+ unsigned ColumnLimit,
+ Split SplitBefore) const override;
+ void replaceWhitespaceBefore(unsigned LineIndex, unsigned PreviousEndColumn,
+ unsigned ColumnLimit, Split SplitBefore,
+ WhitespaceManager &Whitespaces) override;
+ bool mayReflow(unsigned LineIndex,
+ llvm::Regex &CommentPragmasRegex) const override;
+
+private:
+ // Rearranges the whitespace between Lines[LineIndex-1] and Lines[LineIndex].
+ //
+ // Updates Content[LineIndex-1] and Content[LineIndex] by stripping off
+ // leading and trailing whitespace.
+ //
+ // Sets ContentColumn to the intended column in which the text at
+ // Lines[LineIndex] starts (note that the decoration, if present, is not
+ // considered part of the text).
+ void adjustWhitespace(unsigned LineIndex, int IndentDelta);
+
+ // Computes the end column if the full Content from LineIndex gets reflown
+ // after PreviousEndColumn.
+ unsigned getReflownColumn(StringRef Content, unsigned LineIndex,
+ unsigned PreviousEndColumn) const;
+
+ unsigned getContentStartColumn(unsigned LineIndex,
+ unsigned TailOffset) const override;
+
+ // The column at which the text of a broken line should start.
+ // Note that an optional decoration would go before that column.
+ // IndentAtLineBreak is a uniform position for all lines in a block comment,
+ // regardless of their relative position.
+ // FIXME: Revisit the decision to do this; the main reason was to support
+ // patterns like
+ // /**************//**
+ // * Comment
+ // We could also support such patterns by special casing the first line
+ // instead.
+ unsigned IndentAtLineBreak;
+
+ // This is to distinguish between the case when the last line was empty and
+ // the case when it started with a decoration ("*" or "* ").
+ bool LastLineNeedsDecoration;
+
+ // Either "* " if all lines begin with a "*", or empty.
+ StringRef Decoration;
+
+ // If this block comment has decorations, this is the column of the start of
+ // the decorations.
+ unsigned DecorationColumn;
+};
+
+class BreakableLineCommentSection : public BreakableComment {
+public:
+ BreakableLineCommentSection(const FormatToken &Token, unsigned StartColumn,
+ unsigned OriginalStartColumn, bool FirstInLine,
+ bool InPPDirective, encoding::Encoding Encoding,
+ const FormatStyle &Style);
+
+ unsigned getLineLengthAfterSplit(unsigned LineIndex, unsigned TailOffset,
+ StringRef::size_type Length) const override;
+ void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
+ WhitespaceManager &Whitespaces) override;
+ Split getSplitBefore(unsigned LineIndex, unsigned PreviousEndColumn,
+ unsigned ColumnLimit,
+ llvm::Regex &CommentPragmasRegex) const override;
+ unsigned getLineLengthAfterSplitBefore(unsigned LineIndex,
+ unsigned TailOffset,
+ unsigned PreviousEndColumn,
+ unsigned ColumnLimit,
+ Split SplitBefore) const override;
+ void replaceWhitespaceBefore(unsigned LineIndex, unsigned PreviousEndColumn,
+ unsigned ColumnLimit, Split SplitBefore,
+ WhitespaceManager &Whitespaces) override;
+ void updateNextToken(LineState &State) const override;
+ bool mayReflow(unsigned LineIndex,
+ llvm::Regex &CommentPragmasRegex) const override;
+
+private:
+ unsigned getContentStartColumn(unsigned LineIndex,
+ unsigned TailOffset) const override;
+
+ // OriginalPrefix[i] contains the original prefix of line i, including
+ // trailing whitespace before the start of the content. The indentation
+ // preceding the prefix is not included.
+ // For example, if the line is:
+ // // content
+ // then the original prefix is "// ".
+ SmallVector<StringRef, 16> OriginalPrefix;
+
+ // Prefix[i] contains the intended leading "//" with trailing spaces to
+ // account for the indentation of content within the comment at line i after
+ // formatting. It can be different than the original prefix when the original
+ // line starts like this:
+ // //content
+ // Then the original prefix is "//", but the prefix is "// ".
+ SmallVector<StringRef, 16> Prefix;
+
+ SmallVector<unsigned, 16> OriginalContentColumn;
+
+ /// \brief The token to which the last line of this breakable token belongs
+ /// to; nullptr if that token is the initial token.
+ ///
+ /// The distinction is because if the token of the last line of this breakable
+ /// token is distinct from the initial token, this breakable token owns the
+ /// whitespace before the token of the last line, and the whitespace manager
+ /// must be able to modify it.
+ FormatToken *LastLineTok = nullptr;
+};
+} // namespace format
+} // namespace clang
+
+#endif
diff --git a/contrib/llvm/tools/clang/lib/Format/ContinuationIndenter.cpp b/contrib/llvm/tools/clang/lib/Format/ContinuationIndenter.cpp
new file mode 100644
index 000000000000..cca773cfe3cb
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Format/ContinuationIndenter.cpp
@@ -0,0 +1,1403 @@
+//===--- ContinuationIndenter.cpp - Format C++ code -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements the continuation indenter.
+///
+//===----------------------------------------------------------------------===//
+
+#include "BreakableToken.h"
+#include "ContinuationIndenter.h"
+#include "WhitespaceManager.h"
+#include "clang/Basic/OperatorPrecedence.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Format/Format.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "format-indenter"
+
+namespace clang {
+namespace format {
+
+// Returns the length of everything up to the first possible line break after
+// the ), ], } or > matching \c Tok.
+static unsigned getLengthToMatchingParen(const FormatToken &Tok) {
+ if (!Tok.MatchingParen)
+ return 0;
+ FormatToken *End = Tok.MatchingParen;
+ while (End->Next && !End->Next->CanBreakBefore) {
+ End = End->Next;
+ }
+ return End->TotalLength - Tok.TotalLength + 1;
+}
+
+static unsigned getLengthToNextOperator(const FormatToken &Tok) {
+ if (!Tok.NextOperator)
+ return 0;
+ return Tok.NextOperator->TotalLength - Tok.TotalLength;
+}
+
+// Returns \c true if \c Tok is the "." or "->" of a call and starts the next
+// segment of a builder type call.
+static bool startsSegmentOfBuilderTypeCall(const FormatToken &Tok) {
+ return Tok.isMemberAccess() && Tok.Previous && Tok.Previous->closesScope();
+}
+
+// Returns \c true if \c Current starts a new parameter.
+static bool startsNextParameter(const FormatToken &Current,
+ const FormatStyle &Style) {
+ const FormatToken &Previous = *Current.Previous;
+ if (Current.is(TT_CtorInitializerComma) &&
+ Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma)
+ return true;
+ return Previous.is(tok::comma) && !Current.isTrailingComment() &&
+ ((Previous.isNot(TT_CtorInitializerComma) ||
+ Style.BreakConstructorInitializers !=
+ FormatStyle::BCIS_BeforeComma) &&
+ (Previous.isNot(TT_InheritanceComma) ||
+ !Style.BreakBeforeInheritanceComma));
+}
+
+ContinuationIndenter::ContinuationIndenter(const FormatStyle &Style,
+ const AdditionalKeywords &Keywords,
+ const SourceManager &SourceMgr,
+ WhitespaceManager &Whitespaces,
+ encoding::Encoding Encoding,
+ bool BinPackInconclusiveFunctions)
+ : Style(Style), Keywords(Keywords), SourceMgr(SourceMgr),
+ Whitespaces(Whitespaces), Encoding(Encoding),
+ BinPackInconclusiveFunctions(BinPackInconclusiveFunctions),
+ CommentPragmasRegex(Style.CommentPragmas) {}
+
+LineState ContinuationIndenter::getInitialState(unsigned FirstIndent,
+ const AnnotatedLine *Line,
+ bool DryRun) {
+ LineState State;
+ State.FirstIndent = FirstIndent;
+ State.Column = FirstIndent;
+ State.Line = Line;
+ State.NextToken = Line->First;
+ State.Stack.push_back(ParenState(FirstIndent, FirstIndent,
+ /*AvoidBinPacking=*/false,
+ /*NoLineBreak=*/false));
+ State.LineContainsContinuedForLoopSection = false;
+ State.StartOfStringLiteral = 0;
+ State.StartOfLineLevel = 0;
+ State.LowestLevelOnLine = 0;
+ State.IgnoreStackForComparison = false;
+
+ // The first token has already been indented and thus consumed.
+ moveStateToNextToken(State, DryRun, /*Newline=*/false);
+ return State;
+}
+
+bool ContinuationIndenter::canBreak(const LineState &State) {
+ const FormatToken &Current = *State.NextToken;
+ const FormatToken &Previous = *Current.Previous;
+ assert(&Previous == Current.Previous);
+ if (!Current.CanBreakBefore &&
+ !(State.Stack.back().BreakBeforeClosingBrace &&
+ Current.closesBlockOrBlockTypeList(Style)))
+ return false;
+ // The opening "{" of a braced list has to be on the same line as the first
+ // element if it is nested in another braced init list or function call.
+ if (!Current.MustBreakBefore && Previous.is(tok::l_brace) &&
+ Previous.isNot(TT_DictLiteral) && Previous.BlockKind == BK_BracedInit &&
+ Previous.Previous &&
+ Previous.Previous->isOneOf(tok::l_brace, tok::l_paren, tok::comma))
+ return false;
+ // This prevents breaks like:
+ // ...
+ // SomeParameter, OtherParameter).DoSomething(
+ // ...
+ // As they hide "DoSomething" and are generally bad for readability.
+ if (Previous.opensScope() && Previous.isNot(tok::l_brace) &&
+ State.LowestLevelOnLine < State.StartOfLineLevel &&
+ State.LowestLevelOnLine < Current.NestingLevel)
+ return false;
+ if (Current.isMemberAccess() && State.Stack.back().ContainsUnwrappedBuilder)
+ return false;
+
+ // Don't create a 'hanging' indent if there are multiple blocks in a single
+ // statement.
+ if (Previous.is(tok::l_brace) && State.Stack.size() > 1 &&
+ State.Stack[State.Stack.size() - 2].NestedBlockInlined &&
+ State.Stack[State.Stack.size() - 2].HasMultipleNestedBlocks)
+ return false;
+
+ // Don't break after very short return types (e.g. "void") as that is often
+ // unexpected.
+ if (Current.is(TT_FunctionDeclarationName) && State.Column < 6) {
+ if (Style.AlwaysBreakAfterReturnType == FormatStyle::RTBS_None)
+ return false;
+ }
+
+ // If binary operators are moved to the next line (including commas for some
+ // styles of constructor initializers), that's always ok.
+ if (!Current.isOneOf(TT_BinaryOperator, tok::comma) &&
+ State.Stack.back().NoLineBreakInOperand)
+ return false;
+
+ return !State.Stack.back().NoLineBreak;
+}
+
+bool ContinuationIndenter::mustBreak(const LineState &State) {
+ const FormatToken &Current = *State.NextToken;
+ const FormatToken &Previous = *Current.Previous;
+ if (Current.MustBreakBefore || Current.is(TT_InlineASMColon))
+ return true;
+ if (State.Stack.back().BreakBeforeClosingBrace &&
+ Current.closesBlockOrBlockTypeList(Style))
+ return true;
+ if (Previous.is(tok::semi) && State.LineContainsContinuedForLoopSection)
+ return true;
+ if ((startsNextParameter(Current, Style) || Previous.is(tok::semi) ||
+ (Previous.is(TT_TemplateCloser) && Current.is(TT_StartOfName) &&
+ Style.isCpp() &&
+ // FIXME: This is a temporary workaround for the case where clang-format
+ // sets BreakBeforeParameter to avoid bin packing and this creates a
+ // completely unnecessary line break after a template type that isn't
+ // line-wrapped.
+ (Previous.NestingLevel == 1 || Style.BinPackParameters)) ||
+ (Style.BreakBeforeTernaryOperators && Current.is(TT_ConditionalExpr) &&
+ Previous.isNot(tok::question)) ||
+ (!Style.BreakBeforeTernaryOperators &&
+ Previous.is(TT_ConditionalExpr))) &&
+ State.Stack.back().BreakBeforeParameter && !Current.isTrailingComment() &&
+ !Current.isOneOf(tok::r_paren, tok::r_brace))
+ return true;
+ if (((Previous.is(TT_DictLiteral) && Previous.is(tok::l_brace)) ||
+ (Previous.is(TT_ArrayInitializerLSquare) &&
+ Previous.ParameterCount > 1)) &&
+ Style.ColumnLimit > 0 &&
+ getLengthToMatchingParen(Previous) + State.Column - 1 >
+ getColumnLimit(State))
+ return true;
+
+ const FormatToken &BreakConstructorInitializersToken =
+ Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon
+ ? Previous
+ : Current;
+ if (BreakConstructorInitializersToken.is(TT_CtorInitializerColon) &&
+ (State.Column + State.Line->Last->TotalLength - Previous.TotalLength >
+ getColumnLimit(State) ||
+ State.Stack.back().BreakBeforeParameter) &&
+ (Style.AllowShortFunctionsOnASingleLine != FormatStyle::SFS_All ||
+ Style.BreakConstructorInitializers != FormatStyle::BCIS_BeforeColon ||
+ Style.ColumnLimit != 0))
+ return true;
+
+ if (Current.is(TT_ObjCMethodExpr) && !Previous.is(TT_SelectorName) &&
+ State.Line->startsWith(TT_ObjCMethodSpecifier))
+ return true;
+ if (Current.is(TT_SelectorName) && State.Stack.back().ObjCSelectorNameFound &&
+ State.Stack.back().BreakBeforeParameter)
+ return true;
+
+ unsigned NewLineColumn = getNewLineColumn(State);
+ if (Current.isMemberAccess() && Style.ColumnLimit != 0 &&
+ State.Column + getLengthToNextOperator(Current) > Style.ColumnLimit &&
+ (State.Column > NewLineColumn ||
+ Current.NestingLevel < State.StartOfLineLevel))
+ return true;
+
+ if (startsSegmentOfBuilderTypeCall(Current) &&
+ (State.Stack.back().CallContinuation != 0 ||
+ State.Stack.back().BreakBeforeParameter) &&
+ // JavaScript is treated different here as there is a frequent pattern:
+ // SomeFunction(function() {
+ // ...
+ // }.bind(...));
+ // FIXME: We should find a more generic solution to this problem.
+ !(State.Column <= NewLineColumn &&
+ Style.Language == FormatStyle::LK_JavaScript))
+ return true;
+
+ if (State.Column <= NewLineColumn)
+ return false;
+
+ if (Style.AlwaysBreakBeforeMultilineStrings &&
+ (NewLineColumn == State.FirstIndent + Style.ContinuationIndentWidth ||
+ Previous.is(tok::comma) || Current.NestingLevel < 2) &&
+ !Previous.isOneOf(tok::kw_return, tok::lessless, tok::at) &&
+ !Previous.isOneOf(TT_InlineASMColon, TT_ConditionalExpr) &&
+ nextIsMultilineString(State))
+ return true;
+
+ // Using CanBreakBefore here and below takes care of the decision whether the
+ // current style uses wrapping before or after operators for the given
+ // operator.
+ if (Previous.is(TT_BinaryOperator) && Current.CanBreakBefore) {
+ // If we need to break somewhere inside the LHS of a binary expression, we
+ // should also break after the operator. Otherwise, the formatting would
+ // hide the operator precedence, e.g. in:
+ // if (aaaaaaaaaaaaaa ==
+ // bbbbbbbbbbbbbb && c) {..
+ // For comparisons, we only apply this rule, if the LHS is a binary
+ // expression itself as otherwise, the line breaks seem superfluous.
+ // We need special cases for ">>" which we have split into two ">" while
+ // lexing in order to make template parsing easier.
+ bool IsComparison = (Previous.getPrecedence() == prec::Relational ||
+ Previous.getPrecedence() == prec::Equality) &&
+ Previous.Previous &&
+ Previous.Previous->isNot(TT_BinaryOperator); // For >>.
+ bool LHSIsBinaryExpr =
+ Previous.Previous && Previous.Previous->EndsBinaryExpression;
+ if ((!IsComparison || LHSIsBinaryExpr) && !Current.isTrailingComment() &&
+ Previous.getPrecedence() != prec::Assignment &&
+ State.Stack.back().BreakBeforeParameter)
+ return true;
+ } else if (Current.is(TT_BinaryOperator) && Current.CanBreakBefore &&
+ State.Stack.back().BreakBeforeParameter) {
+ return true;
+ }
+
+ // Same as above, but for the first "<<" operator.
+ if (Current.is(tok::lessless) && Current.isNot(TT_OverloadedOperator) &&
+ State.Stack.back().BreakBeforeParameter &&
+ State.Stack.back().FirstLessLess == 0)
+ return true;
+
+ if (Current.NestingLevel == 0 && !Current.isTrailingComment()) {
+ // Always break after "template <...>" and leading annotations. This is only
+ // for cases where the entire line does not fit on a single line as a
+ // different LineFormatter would be used otherwise.
+ if (Previous.ClosesTemplateDeclaration)
+ return true;
+ if (Previous.is(TT_FunctionAnnotationRParen))
+ return true;
+ if (Previous.is(TT_LeadingJavaAnnotation) && Current.isNot(tok::l_paren) &&
+ Current.isNot(TT_LeadingJavaAnnotation))
+ return true;
+ }
+
+ // If the return type spans multiple lines, wrap before the function name.
+ if ((Current.is(TT_FunctionDeclarationName) ||
+ (Current.is(tok::kw_operator) && !Previous.is(tok::coloncolon))) &&
+ !Previous.is(tok::kw_template) && State.Stack.back().BreakBeforeParameter)
+ return true;
+
+ // The following could be precomputed as they do not depend on the state.
+ // However, as they should take effect only if the UnwrappedLine does not fit
+ // into the ColumnLimit, they are checked here in the ContinuationIndenter.
+ if (Style.ColumnLimit != 0 && Previous.BlockKind == BK_Block &&
+ Previous.is(tok::l_brace) && !Current.isOneOf(tok::r_brace, tok::comment))
+ return true;
+
+ if (Current.is(tok::lessless) &&
+ ((Previous.is(tok::identifier) && Previous.TokenText == "endl") ||
+ (Previous.Tok.isLiteral() && (Previous.TokenText.endswith("\\n\"") ||
+ Previous.TokenText == "\'\\n\'"))))
+ return true;
+
+ return false;
+}
+
+unsigned ContinuationIndenter::addTokenToState(LineState &State, bool Newline,
+ bool DryRun,
+ unsigned ExtraSpaces) {
+ const FormatToken &Current = *State.NextToken;
+
+ assert(!State.Stack.empty());
+ if ((Current.is(TT_ImplicitStringLiteral) &&
+ (Current.Previous->Tok.getIdentifierInfo() == nullptr ||
+ Current.Previous->Tok.getIdentifierInfo()->getPPKeywordID() ==
+ tok::pp_not_keyword))) {
+ unsigned EndColumn =
+ SourceMgr.getSpellingColumnNumber(Current.WhitespaceRange.getEnd());
+ if (Current.LastNewlineOffset != 0) {
+ // If there is a newline within this token, the final column will solely
+ // determined by the current end column.
+ State.Column = EndColumn;
+ } else {
+ unsigned StartColumn =
+ SourceMgr.getSpellingColumnNumber(Current.WhitespaceRange.getBegin());
+ assert(EndColumn >= StartColumn);
+ State.Column += EndColumn - StartColumn;
+ }
+ moveStateToNextToken(State, DryRun, /*Newline=*/false);
+ return 0;
+ }
+
+ unsigned Penalty = 0;
+ if (Newline)
+ Penalty = addTokenOnNewLine(State, DryRun);
+ else
+ addTokenOnCurrentLine(State, DryRun, ExtraSpaces);
+
+ return moveStateToNextToken(State, DryRun, Newline) + Penalty;
+}
+
+void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun,
+ unsigned ExtraSpaces) {
+ FormatToken &Current = *State.NextToken;
+ const FormatToken &Previous = *State.NextToken->Previous;
+ if (Current.is(tok::equal) &&
+ (State.Line->First->is(tok::kw_for) || Current.NestingLevel == 0) &&
+ State.Stack.back().VariablePos == 0) {
+ State.Stack.back().VariablePos = State.Column;
+ // Move over * and & if they are bound to the variable name.
+ const FormatToken *Tok = &Previous;
+ while (Tok && State.Stack.back().VariablePos >= Tok->ColumnWidth) {
+ State.Stack.back().VariablePos -= Tok->ColumnWidth;
+ if (Tok->SpacesRequiredBefore != 0)
+ break;
+ Tok = Tok->Previous;
+ }
+ if (Previous.PartOfMultiVariableDeclStmt)
+ State.Stack.back().LastSpace = State.Stack.back().VariablePos;
+ }
+
+ unsigned Spaces = Current.SpacesRequiredBefore + ExtraSpaces;
+
+ if (!DryRun)
+ Whitespaces.replaceWhitespace(Current, /*Newlines=*/0, Spaces,
+ State.Column + Spaces);
+
+ // If "BreakBeforeInheritanceComma" mode, don't break within the inheritance
+ // declaration unless there is multiple inheritance.
+ if (Style.BreakBeforeInheritanceComma && Current.is(TT_InheritanceColon))
+ State.Stack.back().NoLineBreak = true;
+
+ if (Current.is(TT_SelectorName) &&
+ !State.Stack.back().ObjCSelectorNameFound) {
+ unsigned MinIndent =
+ std::max(State.FirstIndent + Style.ContinuationIndentWidth,
+ State.Stack.back().Indent);
+ unsigned FirstColonPos = State.Column + Spaces + Current.ColumnWidth;
+ if (Current.LongestObjCSelectorName == 0)
+ State.Stack.back().AlignColons = false;
+ else if (MinIndent + Current.LongestObjCSelectorName > FirstColonPos)
+ State.Stack.back().ColonPos = MinIndent + Current.LongestObjCSelectorName;
+ else
+ State.Stack.back().ColonPos = FirstColonPos;
+ }
+
+ // In "AlwaysBreak" mode, enforce wrapping directly after the parenthesis by
+ // disallowing any further line breaks if there is no line break after the
+ // opening parenthesis. Don't break if it doesn't conserve columns.
+ if (Style.AlignAfterOpenBracket == FormatStyle::BAS_AlwaysBreak &&
+ Previous.isOneOf(tok::l_paren, TT_TemplateOpener, tok::l_square) &&
+ State.Column > getNewLineColumn(State) &&
+ (!Previous.Previous ||
+ !Previous.Previous->isOneOf(tok::kw_for, tok::kw_while,
+ tok::kw_switch)) &&
+ // Don't do this for simple (no expressions) one-argument function calls
+ // as that feels like needlessly wasting whitespace, e.g.:
+ //
+ // caaaaaaaaaaaall(
+ // caaaaaaaaaaaall(
+ // caaaaaaaaaaaall(
+ // caaaaaaaaaaaaaaaaaaaaaaall(aaaaaaaaaaaaaa, aaaaaaaaa))));
+ Current.FakeLParens.size() > 0 &&
+ Current.FakeLParens.back() > prec::Unknown)
+ State.Stack.back().NoLineBreak = true;
+ if (Previous.is(TT_TemplateString) && Previous.opensScope())
+ State.Stack.back().NoLineBreak = true;
+
+ if (Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign &&
+ Previous.opensScope() && Previous.isNot(TT_ObjCMethodExpr) &&
+ (Current.isNot(TT_LineComment) || Previous.BlockKind == BK_BracedInit))
+ State.Stack.back().Indent = State.Column + Spaces;
+ if (State.Stack.back().AvoidBinPacking && startsNextParameter(Current, Style))
+ State.Stack.back().NoLineBreak = true;
+ if (startsSegmentOfBuilderTypeCall(Current) &&
+ State.Column > getNewLineColumn(State))
+ State.Stack.back().ContainsUnwrappedBuilder = true;
+
+ if (Current.is(TT_LambdaArrow) && Style.Language == FormatStyle::LK_Java)
+ State.Stack.back().NoLineBreak = true;
+ if (Current.isMemberAccess() && Previous.is(tok::r_paren) &&
+ (Previous.MatchingParen &&
+ (Previous.TotalLength - Previous.MatchingParen->TotalLength > 10)))
+ // If there is a function call with long parameters, break before trailing
+ // calls. This prevents things like:
+ // EXPECT_CALL(SomeLongParameter).Times(
+ // 2);
+ // We don't want to do this for short parameters as they can just be
+ // indexes.
+ State.Stack.back().NoLineBreak = true;
+
+ // Don't allow the RHS of an operator to be split over multiple lines unless
+ // there is a line-break right after the operator.
+ // Exclude relational operators, as there, it is always more desirable to
+ // have the LHS 'left' of the RHS.
+ const FormatToken *P = Current.getPreviousNonComment();
+ if (!Current.is(tok::comment) && P &&
+ (P->isOneOf(TT_BinaryOperator, tok::comma) ||
+ (P->is(TT_ConditionalExpr) && P->is(tok::colon))) &&
+ !P->isOneOf(TT_OverloadedOperator, TT_CtorInitializerComma) &&
+ P->getPrecedence() != prec::Assignment &&
+ P->getPrecedence() != prec::Relational) {
+ bool BreakBeforeOperator =
+ P->MustBreakBefore || P->is(tok::lessless) ||
+ (P->is(TT_BinaryOperator) &&
+ Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None) ||
+ (P->is(TT_ConditionalExpr) && Style.BreakBeforeTernaryOperators);
+ // Don't do this if there are only two operands. In these cases, there is
+ // always a nice vertical separation between them and the extra line break
+ // does not help.
+ bool HasTwoOperands =
+ P->OperatorIndex == 0 && !P->NextOperator && !P->is(TT_ConditionalExpr);
+ if ((!BreakBeforeOperator && !(HasTwoOperands && Style.AlignOperands)) ||
+ (!State.Stack.back().LastOperatorWrapped && BreakBeforeOperator))
+ State.Stack.back().NoLineBreakInOperand = true;
+ }
+
+ State.Column += Spaces;
+ if (Current.isNot(tok::comment) && Previous.is(tok::l_paren) &&
+ Previous.Previous &&
+ (Previous.Previous->isOneOf(tok::kw_if, tok::kw_for) ||
+ Previous.Previous->endsSequence(tok::kw_constexpr, tok::kw_if))) {
+ // Treat the condition inside an if as if it was a second function
+ // parameter, i.e. let nested calls have a continuation indent.
+ State.Stack.back().LastSpace = State.Column;
+ State.Stack.back().NestedBlockIndent = State.Column;
+ } else if (!Current.isOneOf(tok::comment, tok::caret) &&
+ ((Previous.is(tok::comma) &&
+ !Previous.is(TT_OverloadedOperator)) ||
+ (Previous.is(tok::colon) && Previous.is(TT_ObjCMethodExpr)))) {
+ State.Stack.back().LastSpace = State.Column;
+ } else if (Previous.is(TT_CtorInitializerColon) &&
+ Style.BreakConstructorInitializers ==
+ FormatStyle::BCIS_AfterColon) {
+ State.Stack.back().Indent = State.Column;
+ State.Stack.back().LastSpace = State.Column;
+ } else if ((Previous.isOneOf(TT_BinaryOperator, TT_ConditionalExpr,
+ TT_CtorInitializerColon)) &&
+ ((Previous.getPrecedence() != prec::Assignment &&
+ (Previous.isNot(tok::lessless) || Previous.OperatorIndex != 0 ||
+ Previous.NextOperator)) ||
+ Current.StartsBinaryExpression)) {
+ // Indent relative to the RHS of the expression unless this is a simple
+ // assignment without binary expression on the RHS. Also indent relative to
+ // unary operators and the colons of constructor initializers.
+ State.Stack.back().LastSpace = State.Column;
+ } else if (Previous.is(TT_InheritanceColon)) {
+ State.Stack.back().Indent = State.Column;
+ State.Stack.back().LastSpace = State.Column;
+ } else if (Previous.opensScope()) {
+ // If a function has a trailing call, indent all parameters from the
+ // opening parenthesis. This avoids confusing indents like:
+ // OuterFunction(InnerFunctionCall( // break
+ // ParameterToInnerFunction)) // break
+ // .SecondInnerFunctionCall();
+ bool HasTrailingCall = false;
+ if (Previous.MatchingParen) {
+ const FormatToken *Next = Previous.MatchingParen->getNextNonComment();
+ HasTrailingCall = Next && Next->isMemberAccess();
+ }
+ if (HasTrailingCall && State.Stack.size() > 1 &&
+ State.Stack[State.Stack.size() - 2].CallContinuation == 0)
+ State.Stack.back().LastSpace = State.Column;
+ }
+}
+
+unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State,
+ bool DryRun) {
+ FormatToken &Current = *State.NextToken;
+ const FormatToken &Previous = *State.NextToken->Previous;
+
+ // Extra penalty that needs to be added because of the way certain line
+ // breaks are chosen.
+ unsigned Penalty = 0;
+
+ const FormatToken *PreviousNonComment = Current.getPreviousNonComment();
+ const FormatToken *NextNonComment = Previous.getNextNonComment();
+ if (!NextNonComment)
+ NextNonComment = &Current;
+ // The first line break on any NestingLevel causes an extra penalty in order
+ // prefer similar line breaks.
+ if (!State.Stack.back().ContainsLineBreak)
+ Penalty += 15;
+ State.Stack.back().ContainsLineBreak = true;
+
+ Penalty += State.NextToken->SplitPenalty;
+
+ // Breaking before the first "<<" is generally not desirable if the LHS is
+ // short. Also always add the penalty if the LHS is split over multiple lines
+ // to avoid unnecessary line breaks that just work around this penalty.
+ if (NextNonComment->is(tok::lessless) &&
+ State.Stack.back().FirstLessLess == 0 &&
+ (State.Column <= Style.ColumnLimit / 3 ||
+ State.Stack.back().BreakBeforeParameter))
+ Penalty += Style.PenaltyBreakFirstLessLess;
+
+ State.Column = getNewLineColumn(State);
+
+ // Indent nested blocks relative to this column, unless in a very specific
+ // JavaScript special case where:
+ //
+ // var loooooong_name =
+ // function() {
+ // // code
+ // }
+ //
+ // is common and should be formatted like a free-standing function. The same
+ // goes for wrapping before the lambda return type arrow.
+ if (!Current.is(TT_LambdaArrow) &&
+ (Style.Language != FormatStyle::LK_JavaScript ||
+ Current.NestingLevel != 0 || !PreviousNonComment ||
+ !PreviousNonComment->is(tok::equal) ||
+ !Current.isOneOf(Keywords.kw_async, Keywords.kw_function)))
+ State.Stack.back().NestedBlockIndent = State.Column;
+
+ if (NextNonComment->isMemberAccess()) {
+ if (State.Stack.back().CallContinuation == 0)
+ State.Stack.back().CallContinuation = State.Column;
+ } else if (NextNonComment->is(TT_SelectorName)) {
+ if (!State.Stack.back().ObjCSelectorNameFound) {
+ if (NextNonComment->LongestObjCSelectorName == 0) {
+ State.Stack.back().AlignColons = false;
+ } else {
+ State.Stack.back().ColonPos =
+ (Style.IndentWrappedFunctionNames
+ ? std::max(State.Stack.back().Indent,
+ State.FirstIndent + Style.ContinuationIndentWidth)
+ : State.Stack.back().Indent) +
+ NextNonComment->LongestObjCSelectorName;
+ }
+ } else if (State.Stack.back().AlignColons &&
+ State.Stack.back().ColonPos <= NextNonComment->ColumnWidth) {
+ State.Stack.back().ColonPos = State.Column + NextNonComment->ColumnWidth;
+ }
+ } else if (PreviousNonComment && PreviousNonComment->is(tok::colon) &&
+ PreviousNonComment->isOneOf(TT_ObjCMethodExpr, TT_DictLiteral)) {
+ // FIXME: This is hacky, find a better way. The problem is that in an ObjC
+ // method expression, the block should be aligned to the line starting it,
+ // e.g.:
+ // [aaaaaaaaaaaaaaa aaaaaaaaa: \\ break for some reason
+ // ^(int *i) {
+ // // ...
+ // }];
+ // Thus, we set LastSpace of the next higher NestingLevel, to which we move
+ // when we consume all of the "}"'s FakeRParens at the "{".
+ if (State.Stack.size() > 1)
+ State.Stack[State.Stack.size() - 2].LastSpace =
+ std::max(State.Stack.back().LastSpace, State.Stack.back().Indent) +
+ Style.ContinuationIndentWidth;
+ }
+
+ if ((PreviousNonComment &&
+ PreviousNonComment->isOneOf(tok::comma, tok::semi) &&
+ !State.Stack.back().AvoidBinPacking) ||
+ Previous.is(TT_BinaryOperator))
+ State.Stack.back().BreakBeforeParameter = false;
+ if (Previous.isOneOf(TT_TemplateCloser, TT_JavaAnnotation) &&
+ Current.NestingLevel == 0)
+ State.Stack.back().BreakBeforeParameter = false;
+ if (NextNonComment->is(tok::question) ||
+ (PreviousNonComment && PreviousNonComment->is(tok::question)))
+ State.Stack.back().BreakBeforeParameter = true;
+ if (Current.is(TT_BinaryOperator) && Current.CanBreakBefore)
+ State.Stack.back().BreakBeforeParameter = false;
+
+ if (!DryRun) {
+ unsigned Newlines = std::max(
+ 1u, std::min(Current.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1));
+ bool ContinuePPDirective =
+ State.Line->InPPDirective && State.Line->Type != LT_ImportStatement;
+ Whitespaces.replaceWhitespace(Current, Newlines, State.Column, State.Column,
+ ContinuePPDirective);
+ }
+
+ if (!Current.isTrailingComment())
+ State.Stack.back().LastSpace = State.Column;
+ if (Current.is(tok::lessless))
+ // If we are breaking before a "<<", we always want to indent relative to
+ // RHS. This is necessary only for "<<", as we special-case it and don't
+ // always indent relative to the RHS.
+ State.Stack.back().LastSpace += 3; // 3 -> width of "<< ".
+
+ State.StartOfLineLevel = Current.NestingLevel;
+ State.LowestLevelOnLine = Current.NestingLevel;
+
+ // Any break on this level means that the parent level has been broken
+ // and we need to avoid bin packing there.
+ bool NestedBlockSpecialCase =
+ !Style.isCpp() && Current.is(tok::r_brace) && State.Stack.size() > 1 &&
+ State.Stack[State.Stack.size() - 2].NestedBlockInlined;
+ if (!NestedBlockSpecialCase)
+ for (unsigned i = 0, e = State.Stack.size() - 1; i != e; ++i)
+ State.Stack[i].BreakBeforeParameter = true;
+
+ if (PreviousNonComment &&
+ !PreviousNonComment->isOneOf(tok::comma, tok::colon, tok::semi) &&
+ (PreviousNonComment->isNot(TT_TemplateCloser) ||
+ Current.NestingLevel != 0) &&
+ !PreviousNonComment->isOneOf(
+ TT_BinaryOperator, TT_FunctionAnnotationRParen, TT_JavaAnnotation,
+ TT_LeadingJavaAnnotation) &&
+ Current.isNot(TT_BinaryOperator) && !PreviousNonComment->opensScope())
+ State.Stack.back().BreakBeforeParameter = true;
+
+ // If we break after { or the [ of an array initializer, we should also break
+ // before the corresponding } or ].
+ if (PreviousNonComment &&
+ (PreviousNonComment->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) ||
+ (PreviousNonComment->is(TT_TemplateString) &&
+ PreviousNonComment->opensScope())))
+ State.Stack.back().BreakBeforeClosingBrace = true;
+
+ if (State.Stack.back().AvoidBinPacking) {
+ // If we are breaking after '(', '{', '<', this is not bin packing
+ // unless AllowAllParametersOfDeclarationOnNextLine is false or this is a
+ // dict/object literal.
+ if (!Previous.isOneOf(tok::l_paren, tok::l_brace, TT_BinaryOperator) ||
+ (!Style.AllowAllParametersOfDeclarationOnNextLine &&
+ State.Line->MustBeDeclaration) ||
+ Previous.is(TT_DictLiteral))
+ State.Stack.back().BreakBeforeParameter = true;
+ }
+
+ return Penalty;
+}
+
+unsigned ContinuationIndenter::getNewLineColumn(const LineState &State) {
+ if (!State.NextToken || !State.NextToken->Previous)
+ return 0;
+ FormatToken &Current = *State.NextToken;
+ const FormatToken &Previous = *Current.Previous;
+ // If we are continuing an expression, we want to use the continuation indent.
+ unsigned ContinuationIndent =
+ std::max(State.Stack.back().LastSpace, State.Stack.back().Indent) +
+ Style.ContinuationIndentWidth;
+ const FormatToken *PreviousNonComment = Current.getPreviousNonComment();
+ const FormatToken *NextNonComment = Previous.getNextNonComment();
+ if (!NextNonComment)
+ NextNonComment = &Current;
+
+ // Java specific bits.
+ if (Style.Language == FormatStyle::LK_Java &&
+ Current.isOneOf(Keywords.kw_implements, Keywords.kw_extends))
+ return std::max(State.Stack.back().LastSpace,
+ State.Stack.back().Indent + Style.ContinuationIndentWidth);
+
+ if (NextNonComment->is(tok::l_brace) && NextNonComment->BlockKind == BK_Block)
+ return Current.NestingLevel == 0 ? State.FirstIndent
+ : State.Stack.back().Indent;
+ if (Current.isOneOf(tok::r_brace, tok::r_square) && State.Stack.size() > 1) {
+ if (Current.closesBlockOrBlockTypeList(Style))
+ return State.Stack[State.Stack.size() - 2].NestedBlockIndent;
+ if (Current.MatchingParen &&
+ Current.MatchingParen->BlockKind == BK_BracedInit)
+ return State.Stack[State.Stack.size() - 2].LastSpace;
+ return State.FirstIndent;
+ }
+ // Indent a closing parenthesis at the previous level if followed by a semi or
+ // opening brace. This allows indentations such as:
+ // foo(
+ // a,
+ // );
+ // function foo(
+ // a,
+ // ) {
+ // code(); //
+ // }
+ if (Current.is(tok::r_paren) && State.Stack.size() > 1 &&
+ (!Current.Next || Current.Next->isOneOf(tok::semi, tok::l_brace)))
+ return State.Stack[State.Stack.size() - 2].LastSpace;
+ if (NextNonComment->is(TT_TemplateString) && NextNonComment->closesScope())
+ return State.Stack[State.Stack.size() - 2].LastSpace;
+ if (Current.is(tok::identifier) && Current.Next &&
+ Current.Next->is(TT_DictLiteral))
+ return State.Stack.back().Indent;
+ if (NextNonComment->is(TT_ObjCStringLiteral) &&
+ State.StartOfStringLiteral != 0)
+ return State.StartOfStringLiteral - 1;
+ if (NextNonComment->isStringLiteral() && State.StartOfStringLiteral != 0)
+ return State.StartOfStringLiteral;
+ if (NextNonComment->is(tok::lessless) &&
+ State.Stack.back().FirstLessLess != 0)
+ return State.Stack.back().FirstLessLess;
+ if (NextNonComment->isMemberAccess()) {
+ if (State.Stack.back().CallContinuation == 0)
+ return ContinuationIndent;
+ return State.Stack.back().CallContinuation;
+ }
+ if (State.Stack.back().QuestionColumn != 0 &&
+ ((NextNonComment->is(tok::colon) &&
+ NextNonComment->is(TT_ConditionalExpr)) ||
+ Previous.is(TT_ConditionalExpr)))
+ return State.Stack.back().QuestionColumn;
+ if (Previous.is(tok::comma) && State.Stack.back().VariablePos != 0)
+ return State.Stack.back().VariablePos;
+ if ((PreviousNonComment &&
+ (PreviousNonComment->ClosesTemplateDeclaration ||
+ PreviousNonComment->isOneOf(
+ TT_AttributeParen, TT_FunctionAnnotationRParen, TT_JavaAnnotation,
+ TT_LeadingJavaAnnotation))) ||
+ (!Style.IndentWrappedFunctionNames &&
+ NextNonComment->isOneOf(tok::kw_operator, TT_FunctionDeclarationName)))
+ return std::max(State.Stack.back().LastSpace, State.Stack.back().Indent);
+ if (NextNonComment->is(TT_SelectorName)) {
+ if (!State.Stack.back().ObjCSelectorNameFound) {
+ if (NextNonComment->LongestObjCSelectorName == 0)
+ return State.Stack.back().Indent;
+ return (Style.IndentWrappedFunctionNames
+ ? std::max(State.Stack.back().Indent,
+ State.FirstIndent + Style.ContinuationIndentWidth)
+ : State.Stack.back().Indent) +
+ NextNonComment->LongestObjCSelectorName -
+ NextNonComment->ColumnWidth;
+ }
+ if (!State.Stack.back().AlignColons)
+ return State.Stack.back().Indent;
+ if (State.Stack.back().ColonPos > NextNonComment->ColumnWidth)
+ return State.Stack.back().ColonPos - NextNonComment->ColumnWidth;
+ return State.Stack.back().Indent;
+ }
+ if (NextNonComment->is(tok::colon) && NextNonComment->is(TT_ObjCMethodExpr))
+ return State.Stack.back().ColonPos;
+ if (NextNonComment->is(TT_ArraySubscriptLSquare)) {
+ if (State.Stack.back().StartOfArraySubscripts != 0)
+ return State.Stack.back().StartOfArraySubscripts;
+ return ContinuationIndent;
+ }
+
+ // This ensure that we correctly format ObjC methods calls without inputs,
+ // i.e. where the last element isn't selector like: [callee method];
+ if (NextNonComment->is(tok::identifier) && NextNonComment->FakeRParens == 0 &&
+ NextNonComment->Next && NextNonComment->Next->is(TT_ObjCMethodExpr))
+ return State.Stack.back().Indent;
+
+ if (NextNonComment->isOneOf(TT_StartOfName, TT_PointerOrReference) ||
+ Previous.isOneOf(tok::coloncolon, tok::equal, TT_JsTypeColon))
+ return ContinuationIndent;
+ if (PreviousNonComment && PreviousNonComment->is(tok::colon) &&
+ PreviousNonComment->isOneOf(TT_ObjCMethodExpr, TT_DictLiteral))
+ return ContinuationIndent;
+ if (NextNonComment->is(TT_CtorInitializerComma))
+ return State.Stack.back().Indent;
+ if (PreviousNonComment && PreviousNonComment->is(TT_CtorInitializerColon) &&
+ Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon)
+ return State.Stack.back().Indent;
+ if (NextNonComment->isOneOf(TT_CtorInitializerColon, TT_InheritanceColon,
+ TT_InheritanceComma))
+ return State.FirstIndent + Style.ConstructorInitializerIndentWidth;
+ if (Previous.is(tok::r_paren) && !Current.isBinaryOperator() &&
+ !Current.isOneOf(tok::colon, tok::comment))
+ return ContinuationIndent;
+ if (State.Stack.back().Indent == State.FirstIndent && PreviousNonComment &&
+ PreviousNonComment->isNot(tok::r_brace))
+ // Ensure that we fall back to the continuation indent width instead of
+ // just flushing continuations left.
+ return State.Stack.back().Indent + Style.ContinuationIndentWidth;
+ return State.Stack.back().Indent;
+}
+
+unsigned ContinuationIndenter::moveStateToNextToken(LineState &State,
+ bool DryRun, bool Newline) {
+ assert(State.Stack.size());
+ const FormatToken &Current = *State.NextToken;
+
+ if (Current.isOneOf(tok::comma, TT_BinaryOperator))
+ State.Stack.back().NoLineBreakInOperand = false;
+ if (Current.is(TT_InheritanceColon))
+ State.Stack.back().AvoidBinPacking = true;
+ if (Current.is(tok::lessless) && Current.isNot(TT_OverloadedOperator)) {
+ if (State.Stack.back().FirstLessLess == 0)
+ State.Stack.back().FirstLessLess = State.Column;
+ else
+ State.Stack.back().LastOperatorWrapped = Newline;
+ }
+ if (Current.is(TT_BinaryOperator) && Current.isNot(tok::lessless))
+ State.Stack.back().LastOperatorWrapped = Newline;
+ if (Current.is(TT_ConditionalExpr) && Current.Previous &&
+ !Current.Previous->is(TT_ConditionalExpr))
+ State.Stack.back().LastOperatorWrapped = Newline;
+ if (Current.is(TT_ArraySubscriptLSquare) &&
+ State.Stack.back().StartOfArraySubscripts == 0)
+ State.Stack.back().StartOfArraySubscripts = State.Column;
+ if (Style.BreakBeforeTernaryOperators && Current.is(tok::question))
+ State.Stack.back().QuestionColumn = State.Column;
+ if (!Style.BreakBeforeTernaryOperators && Current.isNot(tok::colon)) {
+ const FormatToken *Previous = Current.Previous;
+ while (Previous && Previous->isTrailingComment())
+ Previous = Previous->Previous;
+ if (Previous && Previous->is(tok::question))
+ State.Stack.back().QuestionColumn = State.Column;
+ }
+ if (!Current.opensScope() && !Current.closesScope() &&
+ !Current.is(TT_PointerOrReference))
+ State.LowestLevelOnLine =
+ std::min(State.LowestLevelOnLine, Current.NestingLevel);
+ if (Current.isMemberAccess())
+ State.Stack.back().StartOfFunctionCall =
+ !Current.NextOperator ? 0 : State.Column;
+ if (Current.is(TT_SelectorName)) {
+ State.Stack.back().ObjCSelectorNameFound = true;
+ if (Style.IndentWrappedFunctionNames) {
+ State.Stack.back().Indent =
+ State.FirstIndent + Style.ContinuationIndentWidth;
+ }
+ }
+ if (Current.is(TT_CtorInitializerColon) &&
+ Style.BreakConstructorInitializers != FormatStyle::BCIS_AfterColon) {
+ // Indent 2 from the column, so:
+ // SomeClass::SomeClass()
+ // : First(...), ...
+ // Next(...)
+ // ^ line up here.
+ State.Stack.back().Indent =
+ State.Column + (Style.BreakConstructorInitializers ==
+ FormatStyle::BCIS_BeforeComma ? 0 : 2);
+ State.Stack.back().NestedBlockIndent = State.Stack.back().Indent;
+ if (Style.ConstructorInitializerAllOnOneLineOrOnePerLine)
+ State.Stack.back().AvoidBinPacking = true;
+ State.Stack.back().BreakBeforeParameter = false;
+ }
+ if (Current.is(TT_CtorInitializerColon) &&
+ Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon) {
+ State.Stack.back().Indent =
+ State.FirstIndent + Style.ConstructorInitializerIndentWidth;
+ State.Stack.back().NestedBlockIndent = State.Stack.back().Indent;
+ if (Style.ConstructorInitializerAllOnOneLineOrOnePerLine)
+ State.Stack.back().AvoidBinPacking = true;
+ }
+ if (Current.is(TT_InheritanceColon))
+ State.Stack.back().Indent =
+ State.FirstIndent + Style.ContinuationIndentWidth;
+ if (Current.isOneOf(TT_BinaryOperator, TT_ConditionalExpr) && Newline)
+ State.Stack.back().NestedBlockIndent =
+ State.Column + Current.ColumnWidth + 1;
+ if (Current.isOneOf(TT_LambdaLSquare, TT_LambdaArrow))
+ State.Stack.back().LastSpace = State.Column;
+
+ // Insert scopes created by fake parenthesis.
+ const FormatToken *Previous = Current.getPreviousNonComment();
+
+ // Add special behavior to support a format commonly used for JavaScript
+ // closures:
+ // SomeFunction(function() {
+ // foo();
+ // bar();
+ // }, a, b, c);
+ if (Current.isNot(tok::comment) && Previous &&
+ Previous->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) &&
+ !Previous->is(TT_DictLiteral) && State.Stack.size() > 1) {
+ if (State.Stack[State.Stack.size() - 2].NestedBlockInlined && Newline)
+ for (unsigned i = 0, e = State.Stack.size() - 1; i != e; ++i)
+ State.Stack[i].NoLineBreak = true;
+ State.Stack[State.Stack.size() - 2].NestedBlockInlined = false;
+ }
+ if (Previous && (Previous->isOneOf(tok::l_paren, tok::comma, tok::colon) ||
+ Previous->isOneOf(TT_BinaryOperator, TT_ConditionalExpr)) &&
+ !Previous->isOneOf(TT_DictLiteral, TT_ObjCMethodExpr)) {
+ State.Stack.back().NestedBlockInlined =
+ !Newline &&
+ (Previous->isNot(tok::l_paren) || Previous->ParameterCount > 1);
+ }
+
+ moveStatePastFakeLParens(State, Newline);
+ moveStatePastScopeCloser(State);
+ if (Current.is(TT_TemplateString) && Current.opensScope())
+ State.Stack.back().LastSpace =
+ (Current.IsMultiline ? Current.LastLineColumnWidth
+ : State.Column + Current.ColumnWidth) -
+ strlen("${");
+ bool CanBreakProtrudingToken = !State.Stack.back().NoLineBreak &&
+ !State.Stack.back().NoLineBreakInOperand;
+ moveStatePastScopeOpener(State, Newline);
+ moveStatePastFakeRParens(State);
+
+ if (Current.is(TT_ObjCStringLiteral) && State.StartOfStringLiteral == 0)
+ State.StartOfStringLiteral = State.Column + 1;
+ else if (Current.isStringLiteral() && State.StartOfStringLiteral == 0)
+ State.StartOfStringLiteral = State.Column;
+ else if (!Current.isOneOf(tok::comment, tok::identifier, tok::hash) &&
+ !Current.isStringLiteral())
+ State.StartOfStringLiteral = 0;
+
+ State.Column += Current.ColumnWidth;
+ State.NextToken = State.NextToken->Next;
+ unsigned Penalty = 0;
+ if (CanBreakProtrudingToken)
+ Penalty = breakProtrudingToken(Current, State, DryRun);
+ if (State.Column > getColumnLimit(State)) {
+ unsigned ExcessCharacters = State.Column - getColumnLimit(State);
+ Penalty += Style.PenaltyExcessCharacter * ExcessCharacters;
+ }
+
+ if (Current.Role)
+ Current.Role->formatFromToken(State, this, DryRun);
+ // If the previous has a special role, let it consume tokens as appropriate.
+ // It is necessary to start at the previous token for the only implemented
+ // role (comma separated list). That way, the decision whether or not to break
+ // after the "{" is already done and both options are tried and evaluated.
+ // FIXME: This is ugly, find a better way.
+ if (Previous && Previous->Role)
+ Penalty += Previous->Role->formatAfterToken(State, this, DryRun);
+
+ return Penalty;
+}
+
+void ContinuationIndenter::moveStatePastFakeLParens(LineState &State,
+ bool Newline) {
+ const FormatToken &Current = *State.NextToken;
+ const FormatToken *Previous = Current.getPreviousNonComment();
+
+ // Don't add extra indentation for the first fake parenthesis after
+ // 'return', assignments or opening <({[. The indentation for these cases
+ // is special cased.
+ bool SkipFirstExtraIndent =
+ (Previous && (Previous->opensScope() ||
+ Previous->isOneOf(tok::semi, tok::kw_return) ||
+ (Previous->getPrecedence() == prec::Assignment &&
+ Style.AlignOperands) ||
+ Previous->is(TT_ObjCMethodExpr)));
+ for (SmallVectorImpl<prec::Level>::const_reverse_iterator
+ I = Current.FakeLParens.rbegin(),
+ E = Current.FakeLParens.rend();
+ I != E; ++I) {
+ ParenState NewParenState = State.Stack.back();
+ NewParenState.ContainsLineBreak = false;
+ NewParenState.LastOperatorWrapped = true;
+ NewParenState.NoLineBreak =
+ NewParenState.NoLineBreak || State.Stack.back().NoLineBreakInOperand;
+
+ // Don't propagate AvoidBinPacking into subexpressions of arg/param lists.
+ if (*I > prec::Comma)
+ NewParenState.AvoidBinPacking = false;
+
+ // Indent from 'LastSpace' unless these are fake parentheses encapsulating
+ // a builder type call after 'return' or, if the alignment after opening
+ // brackets is disabled.
+ if (!Current.isTrailingComment() &&
+ (Style.AlignOperands || *I < prec::Assignment) &&
+ (!Previous || Previous->isNot(tok::kw_return) ||
+ (Style.Language != FormatStyle::LK_Java && *I > 0)) &&
+ (Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign ||
+ *I != prec::Comma || Current.NestingLevel == 0))
+ NewParenState.Indent =
+ std::max(std::max(State.Column, NewParenState.Indent),
+ State.Stack.back().LastSpace);
+
+ // Do not indent relative to the fake parentheses inserted for "." or "->".
+ // This is a special case to make the following to statements consistent:
+ // OuterFunction(InnerFunctionCall( // break
+ // ParameterToInnerFunction));
+ // OuterFunction(SomeObject.InnerFunctionCall( // break
+ // ParameterToInnerFunction));
+ if (*I > prec::Unknown)
+ NewParenState.LastSpace = std::max(NewParenState.LastSpace, State.Column);
+ if (*I != prec::Conditional && !Current.is(TT_UnaryOperator) &&
+ Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign)
+ NewParenState.StartOfFunctionCall = State.Column;
+
+ // Always indent conditional expressions. Never indent expression where
+ // the 'operator' is ',', ';' or an assignment (i.e. *I <=
+ // prec::Assignment) as those have different indentation rules. Indent
+ // other expression, unless the indentation needs to be skipped.
+ if (*I == prec::Conditional ||
+ (!SkipFirstExtraIndent && *I > prec::Assignment &&
+ !Current.isTrailingComment()))
+ NewParenState.Indent += Style.ContinuationIndentWidth;
+ if ((Previous && !Previous->opensScope()) || *I != prec::Comma)
+ NewParenState.BreakBeforeParameter = false;
+ State.Stack.push_back(NewParenState);
+ SkipFirstExtraIndent = false;
+ }
+}
+
+void ContinuationIndenter::moveStatePastFakeRParens(LineState &State) {
+ for (unsigned i = 0, e = State.NextToken->FakeRParens; i != e; ++i) {
+ unsigned VariablePos = State.Stack.back().VariablePos;
+ if (State.Stack.size() == 1) {
+ // Do not pop the last element.
+ break;
+ }
+ State.Stack.pop_back();
+ State.Stack.back().VariablePos = VariablePos;
+ }
+}
+
+void ContinuationIndenter::moveStatePastScopeOpener(LineState &State,
+ bool Newline) {
+ const FormatToken &Current = *State.NextToken;
+ if (!Current.opensScope())
+ return;
+
+ if (Current.MatchingParen && Current.BlockKind == BK_Block) {
+ moveStateToNewBlock(State);
+ return;
+ }
+
+ unsigned NewIndent;
+ unsigned LastSpace = State.Stack.back().LastSpace;
+ bool AvoidBinPacking;
+ bool BreakBeforeParameter = false;
+ unsigned NestedBlockIndent = std::max(State.Stack.back().StartOfFunctionCall,
+ State.Stack.back().NestedBlockIndent);
+ if (Current.isOneOf(tok::l_brace, TT_ArrayInitializerLSquare)) {
+ if (Current.opensBlockOrBlockTypeList(Style)) {
+ NewIndent = Style.IndentWidth +
+ std::min(State.Column, State.Stack.back().NestedBlockIndent);
+ } else {
+ NewIndent = State.Stack.back().LastSpace + Style.ContinuationIndentWidth;
+ }
+ const FormatToken *NextNoComment = Current.getNextNonComment();
+ bool EndsInComma = Current.MatchingParen &&
+ Current.MatchingParen->Previous &&
+ Current.MatchingParen->Previous->is(tok::comma);
+ AvoidBinPacking =
+ (Current.is(TT_ArrayInitializerLSquare) && EndsInComma) ||
+ Current.is(TT_DictLiteral) ||
+ Style.Language == FormatStyle::LK_Proto || !Style.BinPackArguments ||
+ (NextNoComment &&
+ NextNoComment->isOneOf(TT_DesignatedInitializerPeriod,
+ TT_DesignatedInitializerLSquare));
+ if (Current.ParameterCount > 1)
+ NestedBlockIndent = std::max(NestedBlockIndent, State.Column + 1);
+ } else {
+ NewIndent = Style.ContinuationIndentWidth +
+ std::max(State.Stack.back().LastSpace,
+ State.Stack.back().StartOfFunctionCall);
+
+ // Ensure that different different brackets force relative alignment, e.g.:
+ // void SomeFunction(vector< // break
+ // int> v);
+ // FIXME: We likely want to do this for more combinations of brackets.
+ // Verify that it is wanted for ObjC, too.
+ if (Current.is(tok::less) && Current.ParentBracket == tok::l_paren) {
+ NewIndent = std::max(NewIndent, State.Stack.back().Indent);
+ LastSpace = std::max(LastSpace, State.Stack.back().Indent);
+ }
+
+ // JavaScript template strings are special as we always want to indent
+ // nested expressions relative to the ${}. Otherwise, this can create quite
+ // a mess.
+ if (Current.is(TT_TemplateString)) {
+ unsigned Column = Current.IsMultiline
+ ? Current.LastLineColumnWidth
+ : State.Column + Current.ColumnWidth;
+ NewIndent = Column;
+ LastSpace = Column;
+ NestedBlockIndent = Column;
+ }
+
+ bool EndsInComma =
+ Current.MatchingParen &&
+ Current.MatchingParen->getPreviousNonComment() &&
+ Current.MatchingParen->getPreviousNonComment()->is(tok::comma);
+
+ AvoidBinPacking =
+ (Style.Language == FormatStyle::LK_JavaScript && EndsInComma) ||
+ (State.Line->MustBeDeclaration && !Style.BinPackParameters) ||
+ (!State.Line->MustBeDeclaration && !Style.BinPackArguments) ||
+ (Style.ExperimentalAutoDetectBinPacking &&
+ (Current.PackingKind == PPK_OnePerLine ||
+ (!BinPackInconclusiveFunctions &&
+ Current.PackingKind == PPK_Inconclusive)));
+
+ if (Current.is(TT_ObjCMethodExpr) && Current.MatchingParen) {
+ if (Style.ColumnLimit) {
+ // If this '[' opens an ObjC call, determine whether all parameters fit
+ // into one line and put one per line if they don't.
+ if (getLengthToMatchingParen(Current) + State.Column >
+ getColumnLimit(State))
+ BreakBeforeParameter = true;
+ } else {
+ // For ColumnLimit = 0, we have to figure out whether there is or has to
+ // be a line break within this call.
+ for (const FormatToken *Tok = &Current;
+ Tok && Tok != Current.MatchingParen; Tok = Tok->Next) {
+ if (Tok->MustBreakBefore ||
+ (Tok->CanBreakBefore && Tok->NewlinesBefore > 0)) {
+ BreakBeforeParameter = true;
+ break;
+ }
+ }
+ }
+ }
+
+ if (Style.Language == FormatStyle::LK_JavaScript && EndsInComma)
+ BreakBeforeParameter = true;
+ }
+ // Generally inherit NoLineBreak from the current scope to nested scope.
+ // However, don't do this for non-empty nested blocks, dict literals and
+ // array literals as these follow different indentation rules.
+ bool NoLineBreak =
+ Current.Children.empty() &&
+ !Current.isOneOf(TT_DictLiteral, TT_ArrayInitializerLSquare) &&
+ (State.Stack.back().NoLineBreak ||
+ State.Stack.back().NoLineBreakInOperand ||
+ (Current.is(TT_TemplateOpener) &&
+ State.Stack.back().ContainsUnwrappedBuilder));
+ State.Stack.push_back(
+ ParenState(NewIndent, LastSpace, AvoidBinPacking, NoLineBreak));
+ State.Stack.back().NestedBlockIndent = NestedBlockIndent;
+ State.Stack.back().BreakBeforeParameter = BreakBeforeParameter;
+ State.Stack.back().HasMultipleNestedBlocks = Current.BlockParameterCount > 1;
+}
+
+void ContinuationIndenter::moveStatePastScopeCloser(LineState &State) {
+ const FormatToken &Current = *State.NextToken;
+ if (!Current.closesScope())
+ return;
+
+ // If we encounter a closing ), ], } or >, we can remove a level from our
+ // stacks.
+ if (State.Stack.size() > 1 &&
+ (Current.isOneOf(tok::r_paren, tok::r_square, TT_TemplateString) ||
+ (Current.is(tok::r_brace) && State.NextToken != State.Line->First) ||
+ State.NextToken->is(TT_TemplateCloser)))
+ State.Stack.pop_back();
+
+ if (Current.is(tok::r_square)) {
+ // If this ends the array subscript expr, reset the corresponding value.
+ const FormatToken *NextNonComment = Current.getNextNonComment();
+ if (NextNonComment && NextNonComment->isNot(tok::l_square))
+ State.Stack.back().StartOfArraySubscripts = 0;
+ }
+}
+
+void ContinuationIndenter::moveStateToNewBlock(LineState &State) {
+ unsigned NestedBlockIndent = State.Stack.back().NestedBlockIndent;
+ // ObjC block sometimes follow special indentation rules.
+ unsigned NewIndent =
+ NestedBlockIndent + (State.NextToken->is(TT_ObjCBlockLBrace)
+ ? Style.ObjCBlockIndentWidth
+ : Style.IndentWidth);
+ State.Stack.push_back(ParenState(NewIndent, State.Stack.back().LastSpace,
+ /*AvoidBinPacking=*/true,
+ /*NoLineBreak=*/false));
+ State.Stack.back().NestedBlockIndent = NestedBlockIndent;
+ State.Stack.back().BreakBeforeParameter = true;
+}
+
+unsigned ContinuationIndenter::addMultilineToken(const FormatToken &Current,
+ LineState &State) {
+ if (!Current.IsMultiline)
+ return 0;
+
+ // Break before further function parameters on all levels.
+ for (unsigned i = 0, e = State.Stack.size(); i != e; ++i)
+ State.Stack[i].BreakBeforeParameter = true;
+
+ unsigned ColumnsUsed = State.Column;
+ // We can only affect layout of the first and the last line, so the penalty
+ // for all other lines is constant, and we ignore it.
+ State.Column = Current.LastLineColumnWidth;
+
+ if (ColumnsUsed > getColumnLimit(State))
+ return Style.PenaltyExcessCharacter * (ColumnsUsed - getColumnLimit(State));
+ return 0;
+}
+
+unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
+ LineState &State,
+ bool DryRun) {
+ // Don't break multi-line tokens other than block comments. Instead, just
+ // update the state.
+ if (Current.isNot(TT_BlockComment) && Current.IsMultiline)
+ return addMultilineToken(Current, State);
+
+ // Don't break implicit string literals or import statements.
+ if (Current.is(TT_ImplicitStringLiteral) ||
+ State.Line->Type == LT_ImportStatement)
+ return 0;
+
+ if (!Current.isStringLiteral() && !Current.is(tok::comment))
+ return 0;
+
+ std::unique_ptr<BreakableToken> Token;
+ unsigned StartColumn = State.Column - Current.ColumnWidth;
+ unsigned ColumnLimit = getColumnLimit(State);
+
+ if (Current.isStringLiteral()) {
+ // FIXME: String literal breaking is currently disabled for Java and JS, as
+ // it requires strings to be merged using "+" which we don't support.
+ if (Style.Language == FormatStyle::LK_Java ||
+ Style.Language == FormatStyle::LK_JavaScript ||
+ !Style.BreakStringLiterals)
+ return 0;
+
+ // Don't break string literals inside preprocessor directives (except for
+ // #define directives, as their contents are stored in separate lines and
+ // are not affected by this check).
+ // This way we avoid breaking code with line directives and unknown
+ // preprocessor directives that contain long string literals.
+ if (State.Line->Type == LT_PreprocessorDirective)
+ return 0;
+ // Exempts unterminated string literals from line breaking. The user will
+ // likely want to terminate the string before any line breaking is done.
+ if (Current.IsUnterminatedLiteral)
+ return 0;
+
+ StringRef Text = Current.TokenText;
+ StringRef Prefix;
+ StringRef Postfix;
+ // FIXME: Handle whitespace between '_T', '(', '"..."', and ')'.
+ // FIXME: Store Prefix and Suffix (or PrefixLength and SuffixLength to
+ // reduce the overhead) for each FormatToken, which is a string, so that we
+ // don't run multiple checks here on the hot path.
+ if ((Text.endswith(Postfix = "\"") &&
+ (Text.startswith(Prefix = "@\"") || Text.startswith(Prefix = "\"") ||
+ Text.startswith(Prefix = "u\"") || Text.startswith(Prefix = "U\"") ||
+ Text.startswith(Prefix = "u8\"") ||
+ Text.startswith(Prefix = "L\""))) ||
+ (Text.startswith(Prefix = "_T(\"") && Text.endswith(Postfix = "\")"))) {
+ Token.reset(new BreakableStringLiteral(Current, StartColumn, Prefix,
+ Postfix, State.Line->InPPDirective,
+ Encoding, Style));
+ } else {
+ return 0;
+ }
+ } else if (Current.is(TT_BlockComment)) {
+ if (!Current.isTrailingComment() || !Style.ReflowComments ||
+ // If a comment token switches formatting, like
+ // /* clang-format on */, we don't want to break it further,
+ // but we may still want to adjust its indentation.
+ switchesFormatting(Current))
+ return addMultilineToken(Current, State);
+ Token.reset(new BreakableBlockComment(
+ Current, StartColumn, Current.OriginalColumn, !Current.Previous,
+ State.Line->InPPDirective, Encoding, Style));
+ } else if (Current.is(TT_LineComment) &&
+ (Current.Previous == nullptr ||
+ Current.Previous->isNot(TT_ImplicitStringLiteral))) {
+ if (!Style.ReflowComments ||
+ CommentPragmasRegex.match(Current.TokenText.substr(2)) ||
+ switchesFormatting(Current))
+ return 0;
+ Token.reset(new BreakableLineCommentSection(
+ Current, StartColumn, Current.OriginalColumn, !Current.Previous,
+ /*InPPDirective=*/false, Encoding, Style));
+ // We don't insert backslashes when breaking line comments.
+ ColumnLimit = Style.ColumnLimit;
+ } else {
+ return 0;
+ }
+ if (Current.UnbreakableTailLength >= ColumnLimit)
+ return 0;
+
+ unsigned RemainingSpace = ColumnLimit - Current.UnbreakableTailLength;
+ bool BreakInserted = false;
+ // We use a conservative reflowing strategy. Reflow starts after a line is
+ // broken or the corresponding whitespace compressed. Reflow ends as soon as a
+ // line that doesn't get reflown with the previous line is reached.
+ bool ReflowInProgress = false;
+ unsigned Penalty = 0;
+ unsigned RemainingTokenColumns = 0;
+ for (unsigned LineIndex = 0, EndIndex = Token->getLineCount();
+ LineIndex != EndIndex; ++LineIndex) {
+ BreakableToken::Split SplitBefore(StringRef::npos, 0);
+ if (ReflowInProgress) {
+ SplitBefore = Token->getSplitBefore(LineIndex, RemainingTokenColumns,
+ RemainingSpace, CommentPragmasRegex);
+ }
+ ReflowInProgress = SplitBefore.first != StringRef::npos;
+ unsigned TailOffset =
+ ReflowInProgress ? (SplitBefore.first + SplitBefore.second) : 0;
+ if (!DryRun)
+ Token->replaceWhitespaceBefore(LineIndex, RemainingTokenColumns,
+ RemainingSpace, SplitBefore, Whitespaces);
+ RemainingTokenColumns = Token->getLineLengthAfterSplitBefore(
+ LineIndex, TailOffset, RemainingTokenColumns, ColumnLimit, SplitBefore);
+ while (RemainingTokenColumns > RemainingSpace) {
+ BreakableToken::Split Split = Token->getSplit(
+ LineIndex, TailOffset, ColumnLimit, CommentPragmasRegex);
+ if (Split.first == StringRef::npos) {
+ // The last line's penalty is handled in addNextStateToQueue().
+ if (LineIndex < EndIndex - 1)
+ Penalty += Style.PenaltyExcessCharacter *
+ (RemainingTokenColumns - RemainingSpace);
+ break;
+ }
+ assert(Split.first != 0);
+
+ // Check if compressing the whitespace range will bring the line length
+ // under the limit. If that is the case, we perform whitespace compression
+ // instead of inserting a line break.
+ unsigned RemainingTokenColumnsAfterCompression =
+ Token->getLineLengthAfterCompression(RemainingTokenColumns, Split);
+ if (RemainingTokenColumnsAfterCompression <= RemainingSpace) {
+ RemainingTokenColumns = RemainingTokenColumnsAfterCompression;
+ ReflowInProgress = true;
+ if (!DryRun)
+ Token->compressWhitespace(LineIndex, TailOffset, Split, Whitespaces);
+ break;
+ }
+
+ unsigned NewRemainingTokenColumns = Token->getLineLengthAfterSplit(
+ LineIndex, TailOffset + Split.first + Split.second, StringRef::npos);
+
+ // When breaking before a tab character, it may be moved by a few columns,
+ // but will still be expanded to the next tab stop, so we don't save any
+ // columns.
+ if (NewRemainingTokenColumns == RemainingTokenColumns)
+ break;
+
+ assert(NewRemainingTokenColumns < RemainingTokenColumns);
+ if (!DryRun)
+ Token->insertBreak(LineIndex, TailOffset, Split, Whitespaces);
+ Penalty += Current.SplitPenalty;
+ unsigned ColumnsUsed =
+ Token->getLineLengthAfterSplit(LineIndex, TailOffset, Split.first);
+ if (ColumnsUsed > ColumnLimit) {
+ Penalty += Style.PenaltyExcessCharacter * (ColumnsUsed - ColumnLimit);
+ }
+ TailOffset += Split.first + Split.second;
+ RemainingTokenColumns = NewRemainingTokenColumns;
+ ReflowInProgress = true;
+ BreakInserted = true;
+ }
+ }
+
+ State.Column = RemainingTokenColumns;
+
+ if (BreakInserted) {
+ // If we break the token inside a parameter list, we need to break before
+ // the next parameter on all levels, so that the next parameter is clearly
+ // visible. Line comments already introduce a break.
+ if (Current.isNot(TT_LineComment)) {
+ for (unsigned i = 0, e = State.Stack.size(); i != e; ++i)
+ State.Stack[i].BreakBeforeParameter = true;
+ }
+
+ Penalty += Current.isStringLiteral() ? Style.PenaltyBreakString
+ : Style.PenaltyBreakComment;
+
+ State.Stack.back().LastSpace = StartColumn;
+ }
+
+ Token->updateNextToken(State);
+
+ return Penalty;
+}
+
+unsigned ContinuationIndenter::getColumnLimit(const LineState &State) const {
+ // In preprocessor directives reserve two chars for trailing " \"
+ return Style.ColumnLimit - (State.Line->InPPDirective ? 2 : 0);
+}
+
+bool ContinuationIndenter::nextIsMultilineString(const LineState &State) {
+ const FormatToken &Current = *State.NextToken;
+ if (!Current.isStringLiteral() || Current.is(TT_ImplicitStringLiteral))
+ return false;
+ // We never consider raw string literals "multiline" for the purpose of
+ // AlwaysBreakBeforeMultilineStrings implementation as they are special-cased
+ // (see TokenAnnotator::mustBreakBefore().
+ if (Current.TokenText.startswith("R\""))
+ return false;
+ if (Current.IsMultiline)
+ return true;
+ if (Current.getNextNonComment() &&
+ Current.getNextNonComment()->isStringLiteral())
+ return true; // Implicit concatenation.
+ if (Style.ColumnLimit != 0 &&
+ State.Column + Current.ColumnWidth + Current.UnbreakableTailLength >
+ Style.ColumnLimit)
+ return true; // String will be split.
+ return false;
+}
+
+} // namespace format
+} // namespace clang
diff --git a/contrib/llvm/tools/clang/lib/Format/ContinuationIndenter.h b/contrib/llvm/tools/clang/lib/Format/ContinuationIndenter.h
new file mode 100644
index 000000000000..9a06aa6f6267
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Format/ContinuationIndenter.h
@@ -0,0 +1,382 @@
+//===--- ContinuationIndenter.h - Format C++ code ---------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements an indenter that manages the indentation of
+/// continuations.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
+#define LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
+
+#include "Encoding.h"
+#include "FormatToken.h"
+#include "clang/Format/Format.h"
+#include "llvm/Support/Regex.h"
+
+namespace clang {
+class SourceManager;
+
+namespace format {
+
+class AnnotatedLine;
+struct FormatToken;
+struct LineState;
+struct ParenState;
+class WhitespaceManager;
+
+class ContinuationIndenter {
+public:
+ /// \brief Constructs a \c ContinuationIndenter to format \p Line starting in
+ /// column \p FirstIndent.
+ ContinuationIndenter(const FormatStyle &Style,
+ const AdditionalKeywords &Keywords,
+ const SourceManager &SourceMgr,
+ WhitespaceManager &Whitespaces,
+ encoding::Encoding Encoding,
+ bool BinPackInconclusiveFunctions);
+
+ /// \brief Get the initial state, i.e. the state after placing \p Line's
+ /// first token at \p FirstIndent.
+ LineState getInitialState(unsigned FirstIndent, const AnnotatedLine *Line,
+ bool DryRun);
+
+ // FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a
+ // better home.
+ /// \brief Returns \c true, if a line break after \p State is allowed.
+ bool canBreak(const LineState &State);
+
+ /// \brief Returns \c true, if a line break after \p State is mandatory.
+ bool mustBreak(const LineState &State);
+
+ /// \brief Appends the next token to \p State and updates information
+ /// necessary for indentation.
+ ///
+ /// Puts the token on the current line if \p Newline is \c false and adds a
+ /// line break and necessary indentation otherwise.
+ ///
+ /// If \p DryRun is \c false, also creates and stores the required
+ /// \c Replacement.
+ unsigned addTokenToState(LineState &State, bool Newline, bool DryRun,
+ unsigned ExtraSpaces = 0);
+
+ /// \brief Get the column limit for this line. This is the style's column
+ /// limit, potentially reduced for preprocessor definitions.
+ unsigned getColumnLimit(const LineState &State) const;
+
+private:
+ /// \brief Mark the next token as consumed in \p State and modify its stacks
+ /// accordingly.
+ unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline);
+
+ /// \brief Update 'State' according to the next token's fake left parentheses.
+ void moveStatePastFakeLParens(LineState &State, bool Newline);
+ /// \brief Update 'State' according to the next token's fake r_parens.
+ void moveStatePastFakeRParens(LineState &State);
+
+ /// \brief Update 'State' according to the next token being one of "(<{[".
+ void moveStatePastScopeOpener(LineState &State, bool Newline);
+ /// \brief Update 'State' according to the next token being one of ")>}]".
+ void moveStatePastScopeCloser(LineState &State);
+ /// \brief Update 'State' with the next token opening a nested block.
+ void moveStateToNewBlock(LineState &State);
+
+ /// \brief If the current token sticks out over the end of the line, break
+ /// it if possible.
+ ///
+ /// \returns An extra penalty if a token was broken, otherwise 0.
+ ///
+ /// The returned penalty will cover the cost of the additional line breaks and
+ /// column limit violation in all lines except for the last one. The penalty
+ /// for the column limit violation in the last line (and in single line
+ /// tokens) is handled in \c addNextStateToQueue.
+ unsigned breakProtrudingToken(const FormatToken &Current, LineState &State,
+ bool DryRun);
+
+ /// \brief Appends the next token to \p State and updates information
+ /// necessary for indentation.
+ ///
+ /// Puts the token on the current line.
+ ///
+ /// If \p DryRun is \c false, also creates and stores the required
+ /// \c Replacement.
+ void addTokenOnCurrentLine(LineState &State, bool DryRun,
+ unsigned ExtraSpaces);
+
+ /// \brief Appends the next token to \p State and updates information
+ /// necessary for indentation.
+ ///
+ /// Adds a line break and necessary indentation.
+ ///
+ /// If \p DryRun is \c false, also creates and stores the required
+ /// \c Replacement.
+ unsigned addTokenOnNewLine(LineState &State, bool DryRun);
+
+ /// \brief Calculate the new column for a line wrap before the next token.
+ unsigned getNewLineColumn(const LineState &State);
+
+ /// \brief Adds a multiline token to the \p State.
+ ///
+ /// \returns Extra penalty for the first line of the literal: last line is
+ /// handled in \c addNextStateToQueue, and the penalty for other lines doesn't
+ /// matter, as we don't change them.
+ unsigned addMultilineToken(const FormatToken &Current, LineState &State);
+
+ /// \brief Returns \c true if the next token starts a multiline string
+ /// literal.
+ ///
+ /// This includes implicitly concatenated strings, strings that will be broken
+ /// by clang-format and string literals with escaped newlines.
+ bool nextIsMultilineString(const LineState &State);
+
+ FormatStyle Style;
+ const AdditionalKeywords &Keywords;
+ const SourceManager &SourceMgr;
+ WhitespaceManager &Whitespaces;
+ encoding::Encoding Encoding;
+ bool BinPackInconclusiveFunctions;
+ llvm::Regex CommentPragmasRegex;
+};
+
+struct ParenState {
+ ParenState(unsigned Indent, unsigned LastSpace, bool AvoidBinPacking,
+ bool NoLineBreak)
+ : Indent(Indent), LastSpace(LastSpace), NestedBlockIndent(Indent),
+ BreakBeforeClosingBrace(false), AvoidBinPacking(AvoidBinPacking),
+ BreakBeforeParameter(false), NoLineBreak(NoLineBreak),
+ NoLineBreakInOperand(false), LastOperatorWrapped(true),
+ ContainsLineBreak(false), ContainsUnwrappedBuilder(false),
+ AlignColons(true), ObjCSelectorNameFound(false),
+ HasMultipleNestedBlocks(false), NestedBlockInlined(false) {}
+
+ /// \brief The position to which a specific parenthesis level needs to be
+ /// indented.
+ unsigned Indent;
+
+ /// \brief The position of the last space on each level.
+ ///
+ /// Used e.g. to break like:
+ /// functionCall(Parameter, otherCall(
+ /// OtherParameter));
+ unsigned LastSpace;
+
+ /// \brief If a block relative to this parenthesis level gets wrapped, indent
+ /// it this much.
+ unsigned NestedBlockIndent;
+
+ /// \brief The position the first "<<" operator encountered on each level.
+ ///
+ /// Used to align "<<" operators. 0 if no such operator has been encountered
+ /// on a level.
+ unsigned FirstLessLess = 0;
+
+ /// \brief The column of a \c ? in a conditional expression;
+ unsigned QuestionColumn = 0;
+
+ /// \brief The position of the colon in an ObjC method declaration/call.
+ unsigned ColonPos = 0;
+
+ /// \brief The start of the most recent function in a builder-type call.
+ unsigned StartOfFunctionCall = 0;
+
+ /// \brief Contains the start of array subscript expressions, so that they
+ /// can be aligned.
+ unsigned StartOfArraySubscripts = 0;
+
+ /// \brief If a nested name specifier was broken over multiple lines, this
+ /// contains the start column of the second line. Otherwise 0.
+ unsigned NestedNameSpecifierContinuation = 0;
+
+ /// \brief If a call expression was broken over multiple lines, this
+ /// contains the start column of the second line. Otherwise 0.
+ unsigned CallContinuation = 0;
+
+ /// \brief The column of the first variable name in a variable declaration.
+ ///
+ /// Used to align further variables if necessary.
+ unsigned VariablePos = 0;
+
+ /// \brief Whether a newline needs to be inserted before the block's closing
+ /// brace.
+ ///
+ /// We only want to insert a newline before the closing brace if there also
+ /// was a newline after the beginning left brace.
+ bool BreakBeforeClosingBrace : 1;
+
+ /// \brief Avoid bin packing, i.e. multiple parameters/elements on multiple
+ /// lines, in this context.
+ bool AvoidBinPacking : 1;
+
+ /// \brief Break after the next comma (or all the commas in this context if
+ /// \c AvoidBinPacking is \c true).
+ bool BreakBeforeParameter : 1;
+
+ /// \brief Line breaking in this context would break a formatting rule.
+ bool NoLineBreak : 1;
+
+ /// \brief Same as \c NoLineBreak, but is restricted until the end of the
+ /// operand (including the next ",").
+ bool NoLineBreakInOperand : 1;
+
+ /// \brief True if the last binary operator on this level was wrapped to the
+ /// next line.
+ bool LastOperatorWrapped : 1;
+
+ /// \brief \c true if this \c ParenState already contains a line-break.
+ ///
+ /// The first line break in a certain \c ParenState causes extra penalty so
+ /// that clang-format prefers similar breaks, i.e. breaks in the same
+ /// parenthesis.
+ bool ContainsLineBreak : 1;
+
+ /// \brief \c true if this \c ParenState contains multiple segments of a
+ /// builder-type call on one line.
+ bool ContainsUnwrappedBuilder : 1;
+
+ /// \brief \c true if the colons of the curren ObjC method expression should
+ /// be aligned.
+ ///
+ /// Not considered for memoization as it will always have the same value at
+ /// the same token.
+ bool AlignColons : 1;
+
+ /// \brief \c true if at least one selector name was found in the current
+ /// ObjC method expression.
+ ///
+ /// Not considered for memoization as it will always have the same value at
+ /// the same token.
+ bool ObjCSelectorNameFound : 1;
+
+ /// \brief \c true if there are multiple nested blocks inside these parens.
+ ///
+ /// Not considered for memoization as it will always have the same value at
+ /// the same token.
+ bool HasMultipleNestedBlocks : 1;
+
+ // \brief The start of a nested block (e.g. lambda introducer in C++ or
+ // "function" in JavaScript) is not wrapped to a new line.
+ bool NestedBlockInlined : 1;
+
+ bool operator<(const ParenState &Other) const {
+ if (Indent != Other.Indent)
+ return Indent < Other.Indent;
+ if (LastSpace != Other.LastSpace)
+ return LastSpace < Other.LastSpace;
+ if (NestedBlockIndent != Other.NestedBlockIndent)
+ return NestedBlockIndent < Other.NestedBlockIndent;
+ if (FirstLessLess != Other.FirstLessLess)
+ return FirstLessLess < Other.FirstLessLess;
+ if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace)
+ return BreakBeforeClosingBrace;
+ if (QuestionColumn != Other.QuestionColumn)
+ return QuestionColumn < Other.QuestionColumn;
+ if (AvoidBinPacking != Other.AvoidBinPacking)
+ return AvoidBinPacking;
+ if (BreakBeforeParameter != Other.BreakBeforeParameter)
+ return BreakBeforeParameter;
+ if (NoLineBreak != Other.NoLineBreak)
+ return NoLineBreak;
+ if (LastOperatorWrapped != Other.LastOperatorWrapped)
+ return LastOperatorWrapped;
+ if (ColonPos != Other.ColonPos)
+ return ColonPos < Other.ColonPos;
+ if (StartOfFunctionCall != Other.StartOfFunctionCall)
+ return StartOfFunctionCall < Other.StartOfFunctionCall;
+ if (StartOfArraySubscripts != Other.StartOfArraySubscripts)
+ return StartOfArraySubscripts < Other.StartOfArraySubscripts;
+ if (CallContinuation != Other.CallContinuation)
+ return CallContinuation < Other.CallContinuation;
+ if (VariablePos != Other.VariablePos)
+ return VariablePos < Other.VariablePos;
+ if (ContainsLineBreak != Other.ContainsLineBreak)
+ return ContainsLineBreak;
+ if (ContainsUnwrappedBuilder != Other.ContainsUnwrappedBuilder)
+ return ContainsUnwrappedBuilder;
+ if (NestedBlockInlined != Other.NestedBlockInlined)
+ return NestedBlockInlined;
+ return false;
+ }
+};
+
+/// \brief The current state when indenting a unwrapped line.
+///
+/// As the indenting tries different combinations this is copied by value.
+struct LineState {
+ /// \brief The number of used columns in the current line.
+ unsigned Column;
+
+ /// \brief The token that needs to be next formatted.
+ FormatToken *NextToken;
+
+ /// \brief \c true if this line contains a continued for-loop section.
+ bool LineContainsContinuedForLoopSection;
+
+ /// \brief The \c NestingLevel at the start of this line.
+ unsigned StartOfLineLevel;
+
+ /// \brief The lowest \c NestingLevel on the current line.
+ unsigned LowestLevelOnLine;
+
+ /// \brief The start column of the string literal, if we're in a string
+ /// literal sequence, 0 otherwise.
+ unsigned StartOfStringLiteral;
+
+ /// \brief A stack keeping track of properties applying to parenthesis
+ /// levels.
+ std::vector<ParenState> Stack;
+
+ /// \brief Ignore the stack of \c ParenStates for state comparison.
+ ///
+ /// In long and deeply nested unwrapped lines, the current algorithm can
+ /// be insufficient for finding the best formatting with a reasonable amount
+ /// of time and memory. Setting this flag will effectively lead to the
+ /// algorithm not analyzing some combinations. However, these combinations
+ /// rarely contain the optimal solution: In short, accepting a higher
+ /// penalty early would need to lead to different values in the \c
+ /// ParenState stack (in an otherwise identical state) and these different
+ /// values would need to lead to a significant amount of avoided penalty
+ /// later.
+ ///
+ /// FIXME: Come up with a better algorithm instead.
+ bool IgnoreStackForComparison;
+
+ /// \brief The indent of the first token.
+ unsigned FirstIndent;
+
+ /// \brief The line that is being formatted.
+ ///
+ /// Does not need to be considered for memoization because it doesn't change.
+ const AnnotatedLine *Line;
+
+ /// \brief Comparison operator to be able to used \c LineState in \c map.
+ bool operator<(const LineState &Other) const {
+ if (NextToken != Other.NextToken)
+ return NextToken < Other.NextToken;
+ if (Column != Other.Column)
+ return Column < Other.Column;
+ if (LineContainsContinuedForLoopSection !=
+ Other.LineContainsContinuedForLoopSection)
+ return LineContainsContinuedForLoopSection;
+ if (StartOfLineLevel != Other.StartOfLineLevel)
+ return StartOfLineLevel < Other.StartOfLineLevel;
+ if (LowestLevelOnLine != Other.LowestLevelOnLine)
+ return LowestLevelOnLine < Other.LowestLevelOnLine;
+ if (StartOfStringLiteral != Other.StartOfStringLiteral)
+ return StartOfStringLiteral < Other.StartOfStringLiteral;
+ if (IgnoreStackForComparison || Other.IgnoreStackForComparison)
+ return false;
+ return Stack < Other.Stack;
+ }
+};
+
+} // end namespace format
+} // end namespace clang
+
+#endif
diff --git a/contrib/llvm/tools/clang/lib/Format/Encoding.h b/contrib/llvm/tools/clang/lib/Format/Encoding.h
new file mode 100644
index 000000000000..3339597b4edd
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Format/Encoding.h
@@ -0,0 +1,128 @@
+//===--- Encoding.h - Format C++ code -------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief Contains functions for text encoding manipulation. Supports UTF-8,
+/// 8-bit encodings and escape sequences in C++ string literals.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_FORMAT_ENCODING_H
+#define LLVM_CLANG_LIB_FORMAT_ENCODING_H
+
+#include "clang/Basic/LLVM.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/ConvertUTF.h"
+#include "llvm/Support/Unicode.h"
+
+namespace clang {
+namespace format {
+namespace encoding {
+
+enum Encoding {
+ Encoding_UTF8,
+ Encoding_Unknown // We treat all other encodings as 8-bit encodings.
+};
+
+/// \brief Detects encoding of the Text. If the Text can be decoded using UTF-8,
+/// it is considered UTF8, otherwise we treat it as some 8-bit encoding.
+inline Encoding detectEncoding(StringRef Text) {
+ const llvm::UTF8 *Ptr = reinterpret_cast<const llvm::UTF8 *>(Text.begin());
+ const llvm::UTF8 *BufEnd = reinterpret_cast<const llvm::UTF8 *>(Text.end());
+ if (llvm::isLegalUTF8String(&Ptr, BufEnd))
+ return Encoding_UTF8;
+ return Encoding_Unknown;
+}
+
+/// \brief Returns the number of columns required to display the \p Text on a
+/// generic Unicode-capable terminal. Text is assumed to use the specified
+/// \p Encoding.
+inline unsigned columnWidth(StringRef Text, Encoding Encoding) {
+ if (Encoding == Encoding_UTF8) {
+ int ContentWidth = llvm::sys::unicode::columnWidthUTF8(Text);
+ // FIXME: Figure out the correct way to handle this in the presence of both
+ // printable and unprintable multi-byte UTF-8 characters. Falling back to
+ // returning the number of bytes may cause problems, as columnWidth suddenly
+ // becomes non-additive.
+ if (ContentWidth >= 0)
+ return ContentWidth;
+ }
+ return Text.size();
+}
+
+/// \brief Returns the number of columns required to display the \p Text,
+/// starting from the \p StartColumn on a terminal with the \p TabWidth. The
+/// text is assumed to use the specified \p Encoding.
+inline unsigned columnWidthWithTabs(StringRef Text, unsigned StartColumn,
+ unsigned TabWidth, Encoding Encoding) {
+ unsigned TotalWidth = 0;
+ StringRef Tail = Text;
+ for (;;) {
+ StringRef::size_type TabPos = Tail.find('\t');
+ if (TabPos == StringRef::npos)
+ return TotalWidth + columnWidth(Tail, Encoding);
+ TotalWidth += columnWidth(Tail.substr(0, TabPos), Encoding);
+ TotalWidth += TabWidth - (TotalWidth + StartColumn) % TabWidth;
+ Tail = Tail.substr(TabPos + 1);
+ }
+}
+
+/// \brief Gets the number of bytes in a sequence representing a single
+/// codepoint and starting with FirstChar in the specified Encoding.
+inline unsigned getCodePointNumBytes(char FirstChar, Encoding Encoding) {
+ switch (Encoding) {
+ case Encoding_UTF8:
+ return llvm::getNumBytesForUTF8(FirstChar);
+ default:
+ return 1;
+ }
+}
+
+inline bool isOctDigit(char c) { return '0' <= c && c <= '7'; }
+
+inline bool isHexDigit(char c) {
+ return ('0' <= c && c <= '9') || ('a' <= c && c <= 'f') ||
+ ('A' <= c && c <= 'F');
+}
+
+/// \brief Gets the length of an escape sequence inside a C++ string literal.
+/// Text should span from the beginning of the escape sequence (starting with a
+/// backslash) to the end of the string literal.
+inline unsigned getEscapeSequenceLength(StringRef Text) {
+ assert(Text[0] == '\\');
+ if (Text.size() < 2)
+ return 1;
+
+ switch (Text[1]) {
+ case 'u':
+ return 6;
+ case 'U':
+ return 10;
+ case 'x': {
+ unsigned I = 2; // Point after '\x'.
+ while (I < Text.size() && isHexDigit(Text[I]))
+ ++I;
+ return I;
+ }
+ default:
+ if (isOctDigit(Text[1])) {
+ unsigned I = 1;
+ while (I < Text.size() && I < 4 && isOctDigit(Text[I]))
+ ++I;
+ return I;
+ }
+ return 1 + llvm::getNumBytesForUTF8(Text[1]);
+ }
+}
+
+} // namespace encoding
+} // namespace format
+} // namespace clang
+
+#endif
diff --git a/contrib/llvm/tools/clang/lib/Format/Format.cpp b/contrib/llvm/tools/clang/lib/Format/Format.cpp
new file mode 100644
index 000000000000..7659d56adf25
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Format/Format.cpp
@@ -0,0 +1,2113 @@
+//===--- Format.cpp - Format C++ code -------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements functions declared in Format.h. This will be
+/// split into separate files as we go.
+///
+//===----------------------------------------------------------------------===//
+
+#include "clang/Format/Format.h"
+#include "AffectedRangeManager.h"
+#include "ContinuationIndenter.h"
+#include "FormatTokenLexer.h"
+#include "NamespaceEndCommentsFixer.h"
+#include "SortJavaScriptImports.h"
+#include "TokenAnalyzer.h"
+#include "TokenAnnotator.h"
+#include "UnwrappedLineFormatter.h"
+#include "UnwrappedLineParser.h"
+#include "UsingDeclarationsSorter.h"
+#include "WhitespaceManager.h"
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/DiagnosticOptions.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Basic/VirtualFileSystem.h"
+#include "clang/Lex/Lexer.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Regex.h"
+#include "llvm/Support/YAMLTraits.h"
+#include <algorithm>
+#include <memory>
+#include <string>
+
+#define DEBUG_TYPE "format-formatter"
+
+using clang::format::FormatStyle;
+
+LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(std::string)
+LLVM_YAML_IS_SEQUENCE_VECTOR(clang::format::FormatStyle::IncludeCategory)
+
+namespace llvm {
+namespace yaml {
+template <> struct ScalarEnumerationTraits<FormatStyle::LanguageKind> {
+ static void enumeration(IO &IO, FormatStyle::LanguageKind &Value) {
+ IO.enumCase(Value, "Cpp", FormatStyle::LK_Cpp);
+ IO.enumCase(Value, "Java", FormatStyle::LK_Java);
+ IO.enumCase(Value, "JavaScript", FormatStyle::LK_JavaScript);
+ IO.enumCase(Value, "ObjC", FormatStyle::LK_ObjC);
+ IO.enumCase(Value, "Proto", FormatStyle::LK_Proto);
+ IO.enumCase(Value, "TableGen", FormatStyle::LK_TableGen);
+ }
+};
+
+template <> struct ScalarEnumerationTraits<FormatStyle::LanguageStandard> {
+ static void enumeration(IO &IO, FormatStyle::LanguageStandard &Value) {
+ IO.enumCase(Value, "Cpp03", FormatStyle::LS_Cpp03);
+ IO.enumCase(Value, "C++03", FormatStyle::LS_Cpp03);
+ IO.enumCase(Value, "Cpp11", FormatStyle::LS_Cpp11);
+ IO.enumCase(Value, "C++11", FormatStyle::LS_Cpp11);
+ IO.enumCase(Value, "Auto", FormatStyle::LS_Auto);
+ }
+};
+
+template <> struct ScalarEnumerationTraits<FormatStyle::UseTabStyle> {
+ static void enumeration(IO &IO, FormatStyle::UseTabStyle &Value) {
+ IO.enumCase(Value, "Never", FormatStyle::UT_Never);
+ IO.enumCase(Value, "false", FormatStyle::UT_Never);
+ IO.enumCase(Value, "Always", FormatStyle::UT_Always);
+ IO.enumCase(Value, "true", FormatStyle::UT_Always);
+ IO.enumCase(Value, "ForIndentation", FormatStyle::UT_ForIndentation);
+ IO.enumCase(Value, "ForContinuationAndIndentation",
+ FormatStyle::UT_ForContinuationAndIndentation);
+ }
+};
+
+template <> struct ScalarEnumerationTraits<FormatStyle::JavaScriptQuoteStyle> {
+ static void enumeration(IO &IO, FormatStyle::JavaScriptQuoteStyle &Value) {
+ IO.enumCase(Value, "Leave", FormatStyle::JSQS_Leave);
+ IO.enumCase(Value, "Single", FormatStyle::JSQS_Single);
+ IO.enumCase(Value, "Double", FormatStyle::JSQS_Double);
+ }
+};
+
+template <> struct ScalarEnumerationTraits<FormatStyle::ShortFunctionStyle> {
+ static void enumeration(IO &IO, FormatStyle::ShortFunctionStyle &Value) {
+ IO.enumCase(Value, "None", FormatStyle::SFS_None);
+ IO.enumCase(Value, "false", FormatStyle::SFS_None);
+ IO.enumCase(Value, "All", FormatStyle::SFS_All);
+ IO.enumCase(Value, "true", FormatStyle::SFS_All);
+ IO.enumCase(Value, "Inline", FormatStyle::SFS_Inline);
+ IO.enumCase(Value, "InlineOnly", FormatStyle::SFS_InlineOnly);
+ IO.enumCase(Value, "Empty", FormatStyle::SFS_Empty);
+ }
+};
+
+template <> struct ScalarEnumerationTraits<FormatStyle::BinaryOperatorStyle> {
+ static void enumeration(IO &IO, FormatStyle::BinaryOperatorStyle &Value) {
+ IO.enumCase(Value, "All", FormatStyle::BOS_All);
+ IO.enumCase(Value, "true", FormatStyle::BOS_All);
+ IO.enumCase(Value, "None", FormatStyle::BOS_None);
+ IO.enumCase(Value, "false", FormatStyle::BOS_None);
+ IO.enumCase(Value, "NonAssignment", FormatStyle::BOS_NonAssignment);
+ }
+};
+
+template <> struct ScalarEnumerationTraits<FormatStyle::BraceBreakingStyle> {
+ static void enumeration(IO &IO, FormatStyle::BraceBreakingStyle &Value) {
+ IO.enumCase(Value, "Attach", FormatStyle::BS_Attach);
+ IO.enumCase(Value, "Linux", FormatStyle::BS_Linux);
+ IO.enumCase(Value, "Mozilla", FormatStyle::BS_Mozilla);
+ IO.enumCase(Value, "Stroustrup", FormatStyle::BS_Stroustrup);
+ IO.enumCase(Value, "Allman", FormatStyle::BS_Allman);
+ IO.enumCase(Value, "GNU", FormatStyle::BS_GNU);
+ IO.enumCase(Value, "WebKit", FormatStyle::BS_WebKit);
+ IO.enumCase(Value, "Custom", FormatStyle::BS_Custom);
+ }
+};
+
+template <> struct ScalarEnumerationTraits<FormatStyle::BreakConstructorInitializersStyle> {
+ static void enumeration(IO &IO, FormatStyle::BreakConstructorInitializersStyle &Value) {
+ IO.enumCase(Value, "BeforeColon", FormatStyle::BCIS_BeforeColon);
+ IO.enumCase(Value, "BeforeComma", FormatStyle::BCIS_BeforeComma);
+ IO.enumCase(Value, "AfterColon", FormatStyle::BCIS_AfterColon);
+ }
+};
+
+template <>
+struct ScalarEnumerationTraits<FormatStyle::ReturnTypeBreakingStyle> {
+ static void enumeration(IO &IO, FormatStyle::ReturnTypeBreakingStyle &Value) {
+ IO.enumCase(Value, "None", FormatStyle::RTBS_None);
+ IO.enumCase(Value, "All", FormatStyle::RTBS_All);
+ IO.enumCase(Value, "TopLevel", FormatStyle::RTBS_TopLevel);
+ IO.enumCase(Value, "TopLevelDefinitions",
+ FormatStyle::RTBS_TopLevelDefinitions);
+ IO.enumCase(Value, "AllDefinitions", FormatStyle::RTBS_AllDefinitions);
+ }
+};
+
+template <>
+struct ScalarEnumerationTraits<FormatStyle::DefinitionReturnTypeBreakingStyle> {
+ static void
+ enumeration(IO &IO, FormatStyle::DefinitionReturnTypeBreakingStyle &Value) {
+ IO.enumCase(Value, "None", FormatStyle::DRTBS_None);
+ IO.enumCase(Value, "All", FormatStyle::DRTBS_All);
+ IO.enumCase(Value, "TopLevel", FormatStyle::DRTBS_TopLevel);
+
+ // For backward compatibility.
+ IO.enumCase(Value, "false", FormatStyle::DRTBS_None);
+ IO.enumCase(Value, "true", FormatStyle::DRTBS_All);
+ }
+};
+
+template <>
+struct ScalarEnumerationTraits<FormatStyle::NamespaceIndentationKind> {
+ static void enumeration(IO &IO,
+ FormatStyle::NamespaceIndentationKind &Value) {
+ IO.enumCase(Value, "None", FormatStyle::NI_None);
+ IO.enumCase(Value, "Inner", FormatStyle::NI_Inner);
+ IO.enumCase(Value, "All", FormatStyle::NI_All);
+ }
+};
+
+template <> struct ScalarEnumerationTraits<FormatStyle::BracketAlignmentStyle> {
+ static void enumeration(IO &IO, FormatStyle::BracketAlignmentStyle &Value) {
+ IO.enumCase(Value, "Align", FormatStyle::BAS_Align);
+ IO.enumCase(Value, "DontAlign", FormatStyle::BAS_DontAlign);
+ IO.enumCase(Value, "AlwaysBreak", FormatStyle::BAS_AlwaysBreak);
+
+ // For backward compatibility.
+ IO.enumCase(Value, "true", FormatStyle::BAS_Align);
+ IO.enumCase(Value, "false", FormatStyle::BAS_DontAlign);
+ }
+};
+
+template <> struct ScalarEnumerationTraits<FormatStyle::EscapedNewlineAlignmentStyle> {
+ static void enumeration(IO &IO, FormatStyle::EscapedNewlineAlignmentStyle &Value) {
+ IO.enumCase(Value, "DontAlign", FormatStyle::ENAS_DontAlign);
+ IO.enumCase(Value, "Left", FormatStyle::ENAS_Left);
+ IO.enumCase(Value, "Right", FormatStyle::ENAS_Right);
+
+ // For backward compatibility.
+ IO.enumCase(Value, "true", FormatStyle::ENAS_Left);
+ IO.enumCase(Value, "false", FormatStyle::ENAS_Right);
+ }
+};
+
+template <> struct ScalarEnumerationTraits<FormatStyle::PointerAlignmentStyle> {
+ static void enumeration(IO &IO, FormatStyle::PointerAlignmentStyle &Value) {
+ IO.enumCase(Value, "Middle", FormatStyle::PAS_Middle);
+ IO.enumCase(Value, "Left", FormatStyle::PAS_Left);
+ IO.enumCase(Value, "Right", FormatStyle::PAS_Right);
+
+ // For backward compatibility.
+ IO.enumCase(Value, "true", FormatStyle::PAS_Left);
+ IO.enumCase(Value, "false", FormatStyle::PAS_Right);
+ }
+};
+
+template <>
+struct ScalarEnumerationTraits<FormatStyle::SpaceBeforeParensOptions> {
+ static void enumeration(IO &IO,
+ FormatStyle::SpaceBeforeParensOptions &Value) {
+ IO.enumCase(Value, "Never", FormatStyle::SBPO_Never);
+ IO.enumCase(Value, "ControlStatements",
+ FormatStyle::SBPO_ControlStatements);
+ IO.enumCase(Value, "Always", FormatStyle::SBPO_Always);
+
+ // For backward compatibility.
+ IO.enumCase(Value, "false", FormatStyle::SBPO_Never);
+ IO.enumCase(Value, "true", FormatStyle::SBPO_ControlStatements);
+ }
+};
+
+template <> struct MappingTraits<FormatStyle> {
+ static void mapping(IO &IO, FormatStyle &Style) {
+ // When reading, read the language first, we need it for getPredefinedStyle.
+ IO.mapOptional("Language", Style.Language);
+
+ if (IO.outputting()) {
+ StringRef StylesArray[] = {"LLVM", "Google", "Chromium",
+ "Mozilla", "WebKit", "GNU"};
+ ArrayRef<StringRef> Styles(StylesArray);
+ for (size_t i = 0, e = Styles.size(); i < e; ++i) {
+ StringRef StyleName(Styles[i]);
+ FormatStyle PredefinedStyle;
+ if (getPredefinedStyle(StyleName, Style.Language, &PredefinedStyle) &&
+ Style == PredefinedStyle) {
+ IO.mapOptional("# BasedOnStyle", StyleName);
+ break;
+ }
+ }
+ } else {
+ StringRef BasedOnStyle;
+ IO.mapOptional("BasedOnStyle", BasedOnStyle);
+ if (!BasedOnStyle.empty()) {
+ FormatStyle::LanguageKind OldLanguage = Style.Language;
+ FormatStyle::LanguageKind Language =
+ ((FormatStyle *)IO.getContext())->Language;
+ if (!getPredefinedStyle(BasedOnStyle, Language, &Style)) {
+ IO.setError(Twine("Unknown value for BasedOnStyle: ", BasedOnStyle));
+ return;
+ }
+ Style.Language = OldLanguage;
+ }
+ }
+
+ // For backward compatibility.
+ if (!IO.outputting()) {
+ IO.mapOptional("AlignEscapedNewlinesLeft", Style.AlignEscapedNewlines);
+ IO.mapOptional("DerivePointerBinding", Style.DerivePointerAlignment);
+ IO.mapOptional("IndentFunctionDeclarationAfterType",
+ Style.IndentWrappedFunctionNames);
+ IO.mapOptional("PointerBindsToType", Style.PointerAlignment);
+ IO.mapOptional("SpaceAfterControlStatementKeyword",
+ Style.SpaceBeforeParens);
+ }
+
+ IO.mapOptional("AccessModifierOffset", Style.AccessModifierOffset);
+ IO.mapOptional("AlignAfterOpenBracket", Style.AlignAfterOpenBracket);
+ IO.mapOptional("AlignConsecutiveAssignments",
+ Style.AlignConsecutiveAssignments);
+ IO.mapOptional("AlignConsecutiveDeclarations",
+ Style.AlignConsecutiveDeclarations);
+ IO.mapOptional("AlignEscapedNewlines", Style.AlignEscapedNewlines);
+ IO.mapOptional("AlignOperands", Style.AlignOperands);
+ IO.mapOptional("AlignTrailingComments", Style.AlignTrailingComments);
+ IO.mapOptional("AllowAllParametersOfDeclarationOnNextLine",
+ Style.AllowAllParametersOfDeclarationOnNextLine);
+ IO.mapOptional("AllowShortBlocksOnASingleLine",
+ Style.AllowShortBlocksOnASingleLine);
+ IO.mapOptional("AllowShortCaseLabelsOnASingleLine",
+ Style.AllowShortCaseLabelsOnASingleLine);
+ IO.mapOptional("AllowShortFunctionsOnASingleLine",
+ Style.AllowShortFunctionsOnASingleLine);
+ IO.mapOptional("AllowShortIfStatementsOnASingleLine",
+ Style.AllowShortIfStatementsOnASingleLine);
+ IO.mapOptional("AllowShortLoopsOnASingleLine",
+ Style.AllowShortLoopsOnASingleLine);
+ IO.mapOptional("AlwaysBreakAfterDefinitionReturnType",
+ Style.AlwaysBreakAfterDefinitionReturnType);
+ IO.mapOptional("AlwaysBreakAfterReturnType",
+ Style.AlwaysBreakAfterReturnType);
+ // If AlwaysBreakAfterDefinitionReturnType was specified but
+ // AlwaysBreakAfterReturnType was not, initialize the latter from the
+ // former for backwards compatibility.
+ if (Style.AlwaysBreakAfterDefinitionReturnType != FormatStyle::DRTBS_None &&
+ Style.AlwaysBreakAfterReturnType == FormatStyle::RTBS_None) {
+ if (Style.AlwaysBreakAfterDefinitionReturnType == FormatStyle::DRTBS_All)
+ Style.AlwaysBreakAfterReturnType = FormatStyle::RTBS_AllDefinitions;
+ else if (Style.AlwaysBreakAfterDefinitionReturnType ==
+ FormatStyle::DRTBS_TopLevel)
+ Style.AlwaysBreakAfterReturnType =
+ FormatStyle::RTBS_TopLevelDefinitions;
+ }
+
+ IO.mapOptional("AlwaysBreakBeforeMultilineStrings",
+ Style.AlwaysBreakBeforeMultilineStrings);
+ IO.mapOptional("AlwaysBreakTemplateDeclarations",
+ Style.AlwaysBreakTemplateDeclarations);
+ IO.mapOptional("BinPackArguments", Style.BinPackArguments);
+ IO.mapOptional("BinPackParameters", Style.BinPackParameters);
+ IO.mapOptional("BraceWrapping", Style.BraceWrapping);
+ IO.mapOptional("BreakBeforeBinaryOperators",
+ Style.BreakBeforeBinaryOperators);
+ IO.mapOptional("BreakBeforeBraces", Style.BreakBeforeBraces);
+ IO.mapOptional("BreakBeforeInheritanceComma",
+ Style.BreakBeforeInheritanceComma);
+ IO.mapOptional("BreakBeforeTernaryOperators",
+ Style.BreakBeforeTernaryOperators);
+
+ bool BreakConstructorInitializersBeforeComma = false;
+ IO.mapOptional("BreakConstructorInitializersBeforeComma",
+ BreakConstructorInitializersBeforeComma);
+ IO.mapOptional("BreakConstructorInitializers",
+ Style.BreakConstructorInitializers);
+ // If BreakConstructorInitializersBeforeComma was specified but
+ // BreakConstructorInitializers was not, initialize the latter from the
+ // former for backwards compatibility.
+ if (BreakConstructorInitializersBeforeComma &&
+ Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeColon)
+ Style.BreakConstructorInitializers = FormatStyle::BCIS_BeforeComma;
+
+ IO.mapOptional("BreakAfterJavaFieldAnnotations",
+ Style.BreakAfterJavaFieldAnnotations);
+ IO.mapOptional("BreakStringLiterals", Style.BreakStringLiterals);
+ IO.mapOptional("ColumnLimit", Style.ColumnLimit);
+ IO.mapOptional("CommentPragmas", Style.CommentPragmas);
+ IO.mapOptional("CompactNamespaces", Style.CompactNamespaces);
+ IO.mapOptional("ConstructorInitializerAllOnOneLineOrOnePerLine",
+ Style.ConstructorInitializerAllOnOneLineOrOnePerLine);
+ IO.mapOptional("ConstructorInitializerIndentWidth",
+ Style.ConstructorInitializerIndentWidth);
+ IO.mapOptional("ContinuationIndentWidth", Style.ContinuationIndentWidth);
+ IO.mapOptional("Cpp11BracedListStyle", Style.Cpp11BracedListStyle);
+ IO.mapOptional("DerivePointerAlignment", Style.DerivePointerAlignment);
+ IO.mapOptional("DisableFormat", Style.DisableFormat);
+ IO.mapOptional("ExperimentalAutoDetectBinPacking",
+ Style.ExperimentalAutoDetectBinPacking);
+ IO.mapOptional("FixNamespaceComments", Style.FixNamespaceComments);
+ IO.mapOptional("ForEachMacros", Style.ForEachMacros);
+ IO.mapOptional("IncludeCategories", Style.IncludeCategories);
+ IO.mapOptional("IncludeIsMainRegex", Style.IncludeIsMainRegex);
+ IO.mapOptional("IndentCaseLabels", Style.IndentCaseLabels);
+ IO.mapOptional("IndentWidth", Style.IndentWidth);
+ IO.mapOptional("IndentWrappedFunctionNames",
+ Style.IndentWrappedFunctionNames);
+ IO.mapOptional("JavaScriptQuotes", Style.JavaScriptQuotes);
+ IO.mapOptional("JavaScriptWrapImports", Style.JavaScriptWrapImports);
+ IO.mapOptional("KeepEmptyLinesAtTheStartOfBlocks",
+ Style.KeepEmptyLinesAtTheStartOfBlocks);
+ IO.mapOptional("MacroBlockBegin", Style.MacroBlockBegin);
+ IO.mapOptional("MacroBlockEnd", Style.MacroBlockEnd);
+ IO.mapOptional("MaxEmptyLinesToKeep", Style.MaxEmptyLinesToKeep);
+ IO.mapOptional("NamespaceIndentation", Style.NamespaceIndentation);
+ IO.mapOptional("ObjCBlockIndentWidth", Style.ObjCBlockIndentWidth);
+ IO.mapOptional("ObjCSpaceAfterProperty", Style.ObjCSpaceAfterProperty);
+ IO.mapOptional("ObjCSpaceBeforeProtocolList",
+ Style.ObjCSpaceBeforeProtocolList);
+ IO.mapOptional("PenaltyBreakAssignment",
+ Style.PenaltyBreakAssignment);
+ IO.mapOptional("PenaltyBreakBeforeFirstCallParameter",
+ Style.PenaltyBreakBeforeFirstCallParameter);
+ IO.mapOptional("PenaltyBreakComment", Style.PenaltyBreakComment);
+ IO.mapOptional("PenaltyBreakFirstLessLess",
+ Style.PenaltyBreakFirstLessLess);
+ IO.mapOptional("PenaltyBreakString", Style.PenaltyBreakString);
+ IO.mapOptional("PenaltyExcessCharacter", Style.PenaltyExcessCharacter);
+ IO.mapOptional("PenaltyReturnTypeOnItsOwnLine",
+ Style.PenaltyReturnTypeOnItsOwnLine);
+ IO.mapOptional("PointerAlignment", Style.PointerAlignment);
+ IO.mapOptional("ReflowComments", Style.ReflowComments);
+ IO.mapOptional("SortIncludes", Style.SortIncludes);
+ IO.mapOptional("SortUsingDeclarations", Style.SortUsingDeclarations);
+ IO.mapOptional("SpaceAfterCStyleCast", Style.SpaceAfterCStyleCast);
+ IO.mapOptional("SpaceAfterTemplateKeyword", Style.SpaceAfterTemplateKeyword);
+ IO.mapOptional("SpaceBeforeAssignmentOperators",
+ Style.SpaceBeforeAssignmentOperators);
+ IO.mapOptional("SpaceBeforeParens", Style.SpaceBeforeParens);
+ IO.mapOptional("SpaceInEmptyParentheses", Style.SpaceInEmptyParentheses);
+ IO.mapOptional("SpacesBeforeTrailingComments",
+ Style.SpacesBeforeTrailingComments);
+ IO.mapOptional("SpacesInAngles", Style.SpacesInAngles);
+ IO.mapOptional("SpacesInContainerLiterals",
+ Style.SpacesInContainerLiterals);
+ IO.mapOptional("SpacesInCStyleCastParentheses",
+ Style.SpacesInCStyleCastParentheses);
+ IO.mapOptional("SpacesInParentheses", Style.SpacesInParentheses);
+ IO.mapOptional("SpacesInSquareBrackets", Style.SpacesInSquareBrackets);
+ IO.mapOptional("Standard", Style.Standard);
+ IO.mapOptional("TabWidth", Style.TabWidth);
+ IO.mapOptional("UseTab", Style.UseTab);
+ }
+};
+
+template <> struct MappingTraits<FormatStyle::BraceWrappingFlags> {
+ static void mapping(IO &IO, FormatStyle::BraceWrappingFlags &Wrapping) {
+ IO.mapOptional("AfterClass", Wrapping.AfterClass);
+ IO.mapOptional("AfterControlStatement", Wrapping.AfterControlStatement);
+ IO.mapOptional("AfterEnum", Wrapping.AfterEnum);
+ IO.mapOptional("AfterFunction", Wrapping.AfterFunction);
+ IO.mapOptional("AfterNamespace", Wrapping.AfterNamespace);
+ IO.mapOptional("AfterObjCDeclaration", Wrapping.AfterObjCDeclaration);
+ IO.mapOptional("AfterStruct", Wrapping.AfterStruct);
+ IO.mapOptional("AfterUnion", Wrapping.AfterUnion);
+ IO.mapOptional("BeforeCatch", Wrapping.BeforeCatch);
+ IO.mapOptional("BeforeElse", Wrapping.BeforeElse);
+ IO.mapOptional("IndentBraces", Wrapping.IndentBraces);
+ IO.mapOptional("SplitEmptyFunctionBody", Wrapping.SplitEmptyFunctionBody);
+ }
+};
+
+template <> struct MappingTraits<FormatStyle::IncludeCategory> {
+ static void mapping(IO &IO, FormatStyle::IncludeCategory &Category) {
+ IO.mapOptional("Regex", Category.Regex);
+ IO.mapOptional("Priority", Category.Priority);
+ }
+};
+
+// Allows to read vector<FormatStyle> while keeping default values.
+// IO.getContext() should contain a pointer to the FormatStyle structure, that
+// will be used to get default values for missing keys.
+// If the first element has no Language specified, it will be treated as the
+// default one for the following elements.
+template <> struct DocumentListTraits<std::vector<FormatStyle>> {
+ static size_t size(IO &IO, std::vector<FormatStyle> &Seq) {
+ return Seq.size();
+ }
+ static FormatStyle &element(IO &IO, std::vector<FormatStyle> &Seq,
+ size_t Index) {
+ if (Index >= Seq.size()) {
+ assert(Index == Seq.size());
+ FormatStyle Template;
+ if (Seq.size() > 0 && Seq[0].Language == FormatStyle::LK_None) {
+ Template = Seq[0];
+ } else {
+ Template = *((const FormatStyle *)IO.getContext());
+ Template.Language = FormatStyle::LK_None;
+ }
+ Seq.resize(Index + 1, Template);
+ }
+ return Seq[Index];
+ }
+};
+} // namespace yaml
+} // namespace llvm
+
+namespace clang {
+namespace format {
+
+const std::error_category &getParseCategory() {
+ static ParseErrorCategory C;
+ return C;
+}
+std::error_code make_error_code(ParseError e) {
+ return std::error_code(static_cast<int>(e), getParseCategory());
+}
+
+inline llvm::Error make_string_error(const llvm::Twine &Message) {
+ return llvm::make_error<llvm::StringError>(Message,
+ llvm::inconvertibleErrorCode());
+}
+
+const char *ParseErrorCategory::name() const noexcept {
+ return "clang-format.parse_error";
+}
+
+std::string ParseErrorCategory::message(int EV) const {
+ switch (static_cast<ParseError>(EV)) {
+ case ParseError::Success:
+ return "Success";
+ case ParseError::Error:
+ return "Invalid argument";
+ case ParseError::Unsuitable:
+ return "Unsuitable";
+ }
+ llvm_unreachable("unexpected parse error");
+}
+
+static FormatStyle expandPresets(const FormatStyle &Style) {
+ if (Style.BreakBeforeBraces == FormatStyle::BS_Custom)
+ return Style;
+ FormatStyle Expanded = Style;
+ Expanded.BraceWrapping = {false, false, false, false, false, false,
+ false, false, false, false, false, true};
+ switch (Style.BreakBeforeBraces) {
+ case FormatStyle::BS_Linux:
+ Expanded.BraceWrapping.AfterClass = true;
+ Expanded.BraceWrapping.AfterFunction = true;
+ Expanded.BraceWrapping.AfterNamespace = true;
+ break;
+ case FormatStyle::BS_Mozilla:
+ Expanded.BraceWrapping.AfterClass = true;
+ Expanded.BraceWrapping.AfterEnum = true;
+ Expanded.BraceWrapping.AfterFunction = true;
+ Expanded.BraceWrapping.AfterStruct = true;
+ Expanded.BraceWrapping.AfterUnion = true;
+ Expanded.BraceWrapping.SplitEmptyFunctionBody = false;
+ break;
+ case FormatStyle::BS_Stroustrup:
+ Expanded.BraceWrapping.AfterFunction = true;
+ Expanded.BraceWrapping.BeforeCatch = true;
+ Expanded.BraceWrapping.BeforeElse = true;
+ break;
+ case FormatStyle::BS_Allman:
+ Expanded.BraceWrapping.AfterClass = true;
+ Expanded.BraceWrapping.AfterControlStatement = true;
+ Expanded.BraceWrapping.AfterEnum = true;
+ Expanded.BraceWrapping.AfterFunction = true;
+ Expanded.BraceWrapping.AfterNamespace = true;
+ Expanded.BraceWrapping.AfterObjCDeclaration = true;
+ Expanded.BraceWrapping.AfterStruct = true;
+ Expanded.BraceWrapping.BeforeCatch = true;
+ Expanded.BraceWrapping.BeforeElse = true;
+ break;
+ case FormatStyle::BS_GNU:
+ Expanded.BraceWrapping = {true, true, true, true, true, true,
+ true, true, true, true, true, true};
+ break;
+ case FormatStyle::BS_WebKit:
+ Expanded.BraceWrapping.AfterFunction = true;
+ break;
+ default:
+ break;
+ }
+ return Expanded;
+}
+
+FormatStyle getLLVMStyle() {
+ FormatStyle LLVMStyle;
+ LLVMStyle.Language = FormatStyle::LK_Cpp;
+ LLVMStyle.AccessModifierOffset = -2;
+ LLVMStyle.AlignEscapedNewlines = FormatStyle::ENAS_Right;
+ LLVMStyle.AlignAfterOpenBracket = FormatStyle::BAS_Align;
+ LLVMStyle.AlignOperands = true;
+ LLVMStyle.AlignTrailingComments = true;
+ LLVMStyle.AlignConsecutiveAssignments = false;
+ LLVMStyle.AlignConsecutiveDeclarations = false;
+ LLVMStyle.AllowAllParametersOfDeclarationOnNextLine = true;
+ LLVMStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_All;
+ LLVMStyle.AllowShortBlocksOnASingleLine = false;
+ LLVMStyle.AllowShortCaseLabelsOnASingleLine = false;
+ LLVMStyle.AllowShortIfStatementsOnASingleLine = false;
+ LLVMStyle.AllowShortLoopsOnASingleLine = false;
+ LLVMStyle.AlwaysBreakAfterReturnType = FormatStyle::RTBS_None;
+ LLVMStyle.AlwaysBreakAfterDefinitionReturnType = FormatStyle::DRTBS_None;
+ LLVMStyle.AlwaysBreakBeforeMultilineStrings = false;
+ LLVMStyle.AlwaysBreakTemplateDeclarations = false;
+ LLVMStyle.BinPackArguments = true;
+ LLVMStyle.BinPackParameters = true;
+ LLVMStyle.BreakBeforeBinaryOperators = FormatStyle::BOS_None;
+ LLVMStyle.BreakBeforeTernaryOperators = true;
+ LLVMStyle.BreakBeforeBraces = FormatStyle::BS_Attach;
+ LLVMStyle.BraceWrapping = {false, false, false, false, false, false,
+ false, false, false, false, false, true};
+ LLVMStyle.BreakAfterJavaFieldAnnotations = false;
+ LLVMStyle.BreakConstructorInitializers = FormatStyle::BCIS_BeforeColon;
+ LLVMStyle.BreakBeforeInheritanceComma = false;
+ LLVMStyle.BreakStringLiterals = true;
+ LLVMStyle.ColumnLimit = 80;
+ LLVMStyle.CommentPragmas = "^ IWYU pragma:";
+ LLVMStyle.CompactNamespaces = false;
+ LLVMStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = false;
+ LLVMStyle.ConstructorInitializerIndentWidth = 4;
+ LLVMStyle.ContinuationIndentWidth = 4;
+ LLVMStyle.Cpp11BracedListStyle = true;
+ LLVMStyle.DerivePointerAlignment = false;
+ LLVMStyle.ExperimentalAutoDetectBinPacking = false;
+ LLVMStyle.FixNamespaceComments = true;
+ LLVMStyle.ForEachMacros.push_back("foreach");
+ LLVMStyle.ForEachMacros.push_back("Q_FOREACH");
+ LLVMStyle.ForEachMacros.push_back("BOOST_FOREACH");
+ LLVMStyle.IncludeCategories = {{"^\"(llvm|llvm-c|clang|clang-c)/", 2},
+ {"^(<|\"(gtest|isl|json)/)", 3},
+ {".*", 1}};
+ LLVMStyle.IncludeIsMainRegex = "$";
+ LLVMStyle.IndentCaseLabels = false;
+ LLVMStyle.IndentWrappedFunctionNames = false;
+ LLVMStyle.IndentWidth = 2;
+ LLVMStyle.JavaScriptQuotes = FormatStyle::JSQS_Leave;
+ LLVMStyle.JavaScriptWrapImports = true;
+ LLVMStyle.TabWidth = 8;
+ LLVMStyle.MaxEmptyLinesToKeep = 1;
+ LLVMStyle.KeepEmptyLinesAtTheStartOfBlocks = true;
+ LLVMStyle.NamespaceIndentation = FormatStyle::NI_None;
+ LLVMStyle.ObjCBlockIndentWidth = 2;
+ LLVMStyle.ObjCSpaceAfterProperty = false;
+ LLVMStyle.ObjCSpaceBeforeProtocolList = true;
+ LLVMStyle.PointerAlignment = FormatStyle::PAS_Right;
+ LLVMStyle.SpacesBeforeTrailingComments = 1;
+ LLVMStyle.Standard = FormatStyle::LS_Cpp11;
+ LLVMStyle.UseTab = FormatStyle::UT_Never;
+ LLVMStyle.ReflowComments = true;
+ LLVMStyle.SpacesInParentheses = false;
+ LLVMStyle.SpacesInSquareBrackets = false;
+ LLVMStyle.SpaceInEmptyParentheses = false;
+ LLVMStyle.SpacesInContainerLiterals = true;
+ LLVMStyle.SpacesInCStyleCastParentheses = false;
+ LLVMStyle.SpaceAfterCStyleCast = false;
+ LLVMStyle.SpaceAfterTemplateKeyword = true;
+ LLVMStyle.SpaceBeforeParens = FormatStyle::SBPO_ControlStatements;
+ LLVMStyle.SpaceBeforeAssignmentOperators = true;
+ LLVMStyle.SpacesInAngles = false;
+
+ LLVMStyle.PenaltyBreakAssignment = prec::Assignment;
+ LLVMStyle.PenaltyBreakComment = 300;
+ LLVMStyle.PenaltyBreakFirstLessLess = 120;
+ LLVMStyle.PenaltyBreakString = 1000;
+ LLVMStyle.PenaltyExcessCharacter = 1000000;
+ LLVMStyle.PenaltyReturnTypeOnItsOwnLine = 60;
+ LLVMStyle.PenaltyBreakBeforeFirstCallParameter = 19;
+
+ LLVMStyle.DisableFormat = false;
+ LLVMStyle.SortIncludes = true;
+ LLVMStyle.SortUsingDeclarations = true;
+
+ return LLVMStyle;
+}
+
+FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) {
+ FormatStyle GoogleStyle = getLLVMStyle();
+ GoogleStyle.Language = Language;
+
+ GoogleStyle.AccessModifierOffset = -1;
+ GoogleStyle.AlignEscapedNewlines = FormatStyle::ENAS_Left;
+ GoogleStyle.AllowShortIfStatementsOnASingleLine = true;
+ GoogleStyle.AllowShortLoopsOnASingleLine = true;
+ GoogleStyle.AlwaysBreakBeforeMultilineStrings = true;
+ GoogleStyle.AlwaysBreakTemplateDeclarations = true;
+ GoogleStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true;
+ GoogleStyle.DerivePointerAlignment = true;
+ GoogleStyle.IncludeCategories = {{"^<.*\\.h>", 1}, {"^<.*", 2}, {".*", 3}};
+ GoogleStyle.IncludeIsMainRegex = "([-_](test|unittest))?$";
+ GoogleStyle.IndentCaseLabels = true;
+ GoogleStyle.KeepEmptyLinesAtTheStartOfBlocks = false;
+ GoogleStyle.ObjCSpaceAfterProperty = false;
+ GoogleStyle.ObjCSpaceBeforeProtocolList = false;
+ GoogleStyle.PointerAlignment = FormatStyle::PAS_Left;
+ GoogleStyle.SpacesBeforeTrailingComments = 2;
+ GoogleStyle.Standard = FormatStyle::LS_Auto;
+
+ GoogleStyle.PenaltyReturnTypeOnItsOwnLine = 200;
+ GoogleStyle.PenaltyBreakBeforeFirstCallParameter = 1;
+
+ if (Language == FormatStyle::LK_Java) {
+ GoogleStyle.AlignAfterOpenBracket = FormatStyle::BAS_DontAlign;
+ GoogleStyle.AlignOperands = false;
+ GoogleStyle.AlignTrailingComments = false;
+ GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Empty;
+ GoogleStyle.AllowShortIfStatementsOnASingleLine = false;
+ GoogleStyle.AlwaysBreakBeforeMultilineStrings = false;
+ GoogleStyle.BreakBeforeBinaryOperators = FormatStyle::BOS_NonAssignment;
+ GoogleStyle.ColumnLimit = 100;
+ GoogleStyle.SpaceAfterCStyleCast = true;
+ GoogleStyle.SpacesBeforeTrailingComments = 1;
+ } else if (Language == FormatStyle::LK_JavaScript) {
+ GoogleStyle.AlignAfterOpenBracket = FormatStyle::BAS_AlwaysBreak;
+ GoogleStyle.AlignOperands = false;
+ GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Empty;
+ GoogleStyle.AlwaysBreakBeforeMultilineStrings = false;
+ GoogleStyle.BreakBeforeTernaryOperators = false;
+ // taze:, triple slash directives (`/// <...`), @tag followed by { for a lot
+ // of JSDoc tags, and @see, which is commonly followed by overlong URLs.
+ GoogleStyle.CommentPragmas =
+ "(taze:|^/[ \t]*<|(@[A-Za-z_0-9-]+[ \\t]*{)|@see)";
+ GoogleStyle.MaxEmptyLinesToKeep = 3;
+ GoogleStyle.NamespaceIndentation = FormatStyle::NI_All;
+ GoogleStyle.SpacesInContainerLiterals = false;
+ GoogleStyle.JavaScriptQuotes = FormatStyle::JSQS_Single;
+ GoogleStyle.JavaScriptWrapImports = false;
+ } else if (Language == FormatStyle::LK_Proto) {
+ GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_None;
+ GoogleStyle.SpacesInContainerLiterals = false;
+ } else if (Language == FormatStyle::LK_ObjC) {
+ GoogleStyle.ColumnLimit = 100;
+ }
+
+ return GoogleStyle;
+}
+
+FormatStyle getChromiumStyle(FormatStyle::LanguageKind Language) {
+ FormatStyle ChromiumStyle = getGoogleStyle(Language);
+ if (Language == FormatStyle::LK_Java) {
+ ChromiumStyle.AllowShortIfStatementsOnASingleLine = true;
+ ChromiumStyle.BreakAfterJavaFieldAnnotations = true;
+ ChromiumStyle.ContinuationIndentWidth = 8;
+ ChromiumStyle.IndentWidth = 4;
+ } else if (Language == FormatStyle::LK_JavaScript) {
+ ChromiumStyle.AllowShortIfStatementsOnASingleLine = false;
+ ChromiumStyle.AllowShortLoopsOnASingleLine = false;
+ } else {
+ ChromiumStyle.AllowAllParametersOfDeclarationOnNextLine = false;
+ ChromiumStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline;
+ ChromiumStyle.AllowShortIfStatementsOnASingleLine = false;
+ ChromiumStyle.AllowShortLoopsOnASingleLine = false;
+ ChromiumStyle.BinPackParameters = false;
+ ChromiumStyle.DerivePointerAlignment = false;
+ if (Language == FormatStyle::LK_ObjC)
+ ChromiumStyle.ColumnLimit = 80;
+ }
+ return ChromiumStyle;
+}
+
+FormatStyle getMozillaStyle() {
+ FormatStyle MozillaStyle = getLLVMStyle();
+ MozillaStyle.AllowAllParametersOfDeclarationOnNextLine = false;
+ MozillaStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline;
+ MozillaStyle.AlwaysBreakAfterReturnType =
+ FormatStyle::RTBS_TopLevel;
+ MozillaStyle.AlwaysBreakAfterDefinitionReturnType =
+ FormatStyle::DRTBS_TopLevel;
+ MozillaStyle.AlwaysBreakTemplateDeclarations = true;
+ MozillaStyle.BinPackParameters = false;
+ MozillaStyle.BinPackArguments = false;
+ MozillaStyle.BreakBeforeBraces = FormatStyle::BS_Mozilla;
+ MozillaStyle.BreakConstructorInitializers = FormatStyle::BCIS_BeforeComma;
+ MozillaStyle.BreakBeforeInheritanceComma = true;
+ MozillaStyle.ConstructorInitializerIndentWidth = 2;
+ MozillaStyle.ContinuationIndentWidth = 2;
+ MozillaStyle.Cpp11BracedListStyle = false;
+ MozillaStyle.FixNamespaceComments = false;
+ MozillaStyle.IndentCaseLabels = true;
+ MozillaStyle.ObjCSpaceAfterProperty = true;
+ MozillaStyle.ObjCSpaceBeforeProtocolList = false;
+ MozillaStyle.PenaltyReturnTypeOnItsOwnLine = 200;
+ MozillaStyle.PointerAlignment = FormatStyle::PAS_Left;
+ MozillaStyle.SpaceAfterTemplateKeyword = false;
+ return MozillaStyle;
+}
+
+FormatStyle getWebKitStyle() {
+ FormatStyle Style = getLLVMStyle();
+ Style.AccessModifierOffset = -4;
+ Style.AlignAfterOpenBracket = FormatStyle::BAS_DontAlign;
+ Style.AlignOperands = false;
+ Style.AlignTrailingComments = false;
+ Style.BreakBeforeBinaryOperators = FormatStyle::BOS_All;
+ Style.BreakBeforeBraces = FormatStyle::BS_WebKit;
+ Style.BreakConstructorInitializers = FormatStyle::BCIS_BeforeComma;
+ Style.Cpp11BracedListStyle = false;
+ Style.ColumnLimit = 0;
+ Style.FixNamespaceComments = false;
+ Style.IndentWidth = 4;
+ Style.NamespaceIndentation = FormatStyle::NI_Inner;
+ Style.ObjCBlockIndentWidth = 4;
+ Style.ObjCSpaceAfterProperty = true;
+ Style.PointerAlignment = FormatStyle::PAS_Left;
+ return Style;
+}
+
+FormatStyle getGNUStyle() {
+ FormatStyle Style = getLLVMStyle();
+ Style.AlwaysBreakAfterDefinitionReturnType = FormatStyle::DRTBS_All;
+ Style.AlwaysBreakAfterReturnType = FormatStyle::RTBS_AllDefinitions;
+ Style.BreakBeforeBinaryOperators = FormatStyle::BOS_All;
+ Style.BreakBeforeBraces = FormatStyle::BS_GNU;
+ Style.BreakBeforeTernaryOperators = true;
+ Style.Cpp11BracedListStyle = false;
+ Style.ColumnLimit = 79;
+ Style.FixNamespaceComments = false;
+ Style.SpaceBeforeParens = FormatStyle::SBPO_Always;
+ Style.Standard = FormatStyle::LS_Cpp03;
+ return Style;
+}
+
+FormatStyle getNoStyle() {
+ FormatStyle NoStyle = getLLVMStyle();
+ NoStyle.DisableFormat = true;
+ NoStyle.SortIncludes = false;
+ NoStyle.SortUsingDeclarations = false;
+ return NoStyle;
+}
+
+bool getPredefinedStyle(StringRef Name, FormatStyle::LanguageKind Language,
+ FormatStyle *Style) {
+ if (Name.equals_lower("llvm")) {
+ *Style = getLLVMStyle();
+ } else if (Name.equals_lower("chromium")) {
+ *Style = getChromiumStyle(Language);
+ } else if (Name.equals_lower("mozilla")) {
+ *Style = getMozillaStyle();
+ } else if (Name.equals_lower("google")) {
+ *Style = getGoogleStyle(Language);
+ } else if (Name.equals_lower("webkit")) {
+ *Style = getWebKitStyle();
+ } else if (Name.equals_lower("gnu")) {
+ *Style = getGNUStyle();
+ } else if (Name.equals_lower("none")) {
+ *Style = getNoStyle();
+ } else {
+ return false;
+ }
+
+ Style->Language = Language;
+ return true;
+}
+
+std::error_code parseConfiguration(StringRef Text, FormatStyle *Style) {
+ assert(Style);
+ FormatStyle::LanguageKind Language = Style->Language;
+ assert(Language != FormatStyle::LK_None);
+ if (Text.trim().empty())
+ return make_error_code(ParseError::Error);
+
+ std::vector<FormatStyle> Styles;
+ llvm::yaml::Input Input(Text);
+ // DocumentListTraits<vector<FormatStyle>> uses the context to get default
+ // values for the fields, keys for which are missing from the configuration.
+ // Mapping also uses the context to get the language to find the correct
+ // base style.
+ Input.setContext(Style);
+ Input >> Styles;
+ if (Input.error())
+ return Input.error();
+
+ for (unsigned i = 0; i < Styles.size(); ++i) {
+ // Ensures that only the first configuration can skip the Language option.
+ if (Styles[i].Language == FormatStyle::LK_None && i != 0)
+ return make_error_code(ParseError::Error);
+ // Ensure that each language is configured at most once.
+ for (unsigned j = 0; j < i; ++j) {
+ if (Styles[i].Language == Styles[j].Language) {
+ DEBUG(llvm::dbgs()
+ << "Duplicate languages in the config file on positions " << j
+ << " and " << i << "\n");
+ return make_error_code(ParseError::Error);
+ }
+ }
+ }
+ // Look for a suitable configuration starting from the end, so we can
+ // find the configuration for the specific language first, and the default
+ // configuration (which can only be at slot 0) after it.
+ for (int i = Styles.size() - 1; i >= 0; --i) {
+ if (Styles[i].Language == Language ||
+ Styles[i].Language == FormatStyle::LK_None) {
+ *Style = Styles[i];
+ Style->Language = Language;
+ return make_error_code(ParseError::Success);
+ }
+ }
+ return make_error_code(ParseError::Unsuitable);
+}
+
+std::string configurationAsText(const FormatStyle &Style) {
+ std::string Text;
+ llvm::raw_string_ostream Stream(Text);
+ llvm::yaml::Output Output(Stream);
+ // We use the same mapping method for input and output, so we need a non-const
+ // reference here.
+ FormatStyle NonConstStyle = expandPresets(Style);
+ Output << NonConstStyle;
+ return Stream.str();
+}
+
+namespace {
+
+class JavaScriptRequoter : public TokenAnalyzer {
+public:
+ JavaScriptRequoter(const Environment &Env, const FormatStyle &Style)
+ : TokenAnalyzer(Env, Style) {}
+
+ tooling::Replacements
+ analyze(TokenAnnotator &Annotator,
+ SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
+ FormatTokenLexer &Tokens) override {
+ AffectedRangeMgr.computeAffectedLines(AnnotatedLines.begin(),
+ AnnotatedLines.end());
+ tooling::Replacements Result;
+ requoteJSStringLiteral(AnnotatedLines, Result);
+ return Result;
+ }
+
+private:
+ // Replaces double/single-quoted string literal as appropriate, re-escaping
+ // the contents in the process.
+ void requoteJSStringLiteral(SmallVectorImpl<AnnotatedLine *> &Lines,
+ tooling::Replacements &Result) {
+ for (AnnotatedLine *Line : Lines) {
+ requoteJSStringLiteral(Line->Children, Result);
+ if (!Line->Affected)
+ continue;
+ for (FormatToken *FormatTok = Line->First; FormatTok;
+ FormatTok = FormatTok->Next) {
+ StringRef Input = FormatTok->TokenText;
+ if (FormatTok->Finalized || !FormatTok->isStringLiteral() ||
+ // NB: testing for not starting with a double quote to avoid
+ // breaking `template strings`.
+ (Style.JavaScriptQuotes == FormatStyle::JSQS_Single &&
+ !Input.startswith("\"")) ||
+ (Style.JavaScriptQuotes == FormatStyle::JSQS_Double &&
+ !Input.startswith("\'")))
+ continue;
+
+ // Change start and end quote.
+ bool IsSingle = Style.JavaScriptQuotes == FormatStyle::JSQS_Single;
+ SourceLocation Start = FormatTok->Tok.getLocation();
+ auto Replace = [&](SourceLocation Start, unsigned Length,
+ StringRef ReplacementText) {
+ auto Err = Result.add(tooling::Replacement(
+ Env.getSourceManager(), Start, Length, ReplacementText));
+ // FIXME: handle error. For now, print error message and skip the
+ // replacement for release version.
+ if (Err) {
+ llvm::errs() << llvm::toString(std::move(Err)) << "\n";
+ assert(false);
+ }
+ };
+ Replace(Start, 1, IsSingle ? "'" : "\"");
+ Replace(FormatTok->Tok.getEndLoc().getLocWithOffset(-1), 1,
+ IsSingle ? "'" : "\"");
+
+ // Escape internal quotes.
+ bool Escaped = false;
+ for (size_t i = 1; i < Input.size() - 1; i++) {
+ switch (Input[i]) {
+ case '\\':
+ if (!Escaped && i + 1 < Input.size() &&
+ ((IsSingle && Input[i + 1] == '"') ||
+ (!IsSingle && Input[i + 1] == '\''))) {
+ // Remove this \, it's escaping a " or ' that no longer needs
+ // escaping
+ Replace(Start.getLocWithOffset(i), 1, "");
+ continue;
+ }
+ Escaped = !Escaped;
+ break;
+ case '\"':
+ case '\'':
+ if (!Escaped && IsSingle == (Input[i] == '\'')) {
+ // Escape the quote.
+ Replace(Start.getLocWithOffset(i), 0, "\\");
+ }
+ Escaped = false;
+ break;
+ default:
+ Escaped = false;
+ break;
+ }
+ }
+ }
+ }
+ }
+};
+
+class Formatter : public TokenAnalyzer {
+public:
+ Formatter(const Environment &Env, const FormatStyle &Style,
+ FormattingAttemptStatus *Status)
+ : TokenAnalyzer(Env, Style), Status(Status) {}
+
+ tooling::Replacements
+ analyze(TokenAnnotator &Annotator,
+ SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
+ FormatTokenLexer &Tokens) override {
+ tooling::Replacements Result;
+ deriveLocalStyle(AnnotatedLines);
+ AffectedRangeMgr.computeAffectedLines(AnnotatedLines.begin(),
+ AnnotatedLines.end());
+ for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
+ Annotator.calculateFormattingInformation(*AnnotatedLines[i]);
+ }
+ Annotator.setCommentLineLevels(AnnotatedLines);
+
+ WhitespaceManager Whitespaces(
+ Env.getSourceManager(), Style,
+ inputUsesCRLF(Env.getSourceManager().getBufferData(Env.getFileID())));
+ ContinuationIndenter Indenter(Style, Tokens.getKeywords(),
+ Env.getSourceManager(), Whitespaces, Encoding,
+ BinPackInconclusiveFunctions);
+ UnwrappedLineFormatter(&Indenter, &Whitespaces, Style, Tokens.getKeywords(),
+ Env.getSourceManager(), Status)
+ .format(AnnotatedLines);
+ for (const auto &R : Whitespaces.generateReplacements())
+ if (Result.add(R))
+ return Result;
+ return Result;
+ }
+
+private:
+
+ static bool inputUsesCRLF(StringRef Text) {
+ return Text.count('\r') * 2 > Text.count('\n');
+ }
+
+ bool
+ hasCpp03IncompatibleFormat(const SmallVectorImpl<AnnotatedLine *> &Lines) {
+ for (const AnnotatedLine *Line : Lines) {
+ if (hasCpp03IncompatibleFormat(Line->Children))
+ return true;
+ for (FormatToken *Tok = Line->First->Next; Tok; Tok = Tok->Next) {
+ if (Tok->WhitespaceRange.getBegin() == Tok->WhitespaceRange.getEnd()) {
+ if (Tok->is(tok::coloncolon) && Tok->Previous->is(TT_TemplateOpener))
+ return true;
+ if (Tok->is(TT_TemplateCloser) &&
+ Tok->Previous->is(TT_TemplateCloser))
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+
+ int countVariableAlignments(const SmallVectorImpl<AnnotatedLine *> &Lines) {
+ int AlignmentDiff = 0;
+ for (const AnnotatedLine *Line : Lines) {
+ AlignmentDiff += countVariableAlignments(Line->Children);
+ for (FormatToken *Tok = Line->First; Tok && Tok->Next; Tok = Tok->Next) {
+ if (!Tok->is(TT_PointerOrReference))
+ continue;
+ bool SpaceBefore =
+ Tok->WhitespaceRange.getBegin() != Tok->WhitespaceRange.getEnd();
+ bool SpaceAfter = Tok->Next->WhitespaceRange.getBegin() !=
+ Tok->Next->WhitespaceRange.getEnd();
+ if (SpaceBefore && !SpaceAfter)
+ ++AlignmentDiff;
+ if (!SpaceBefore && SpaceAfter)
+ --AlignmentDiff;
+ }
+ }
+ return AlignmentDiff;
+ }
+
+ void
+ deriveLocalStyle(const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
+ bool HasBinPackedFunction = false;
+ bool HasOnePerLineFunction = false;
+ for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
+ if (!AnnotatedLines[i]->First->Next)
+ continue;
+ FormatToken *Tok = AnnotatedLines[i]->First->Next;
+ while (Tok->Next) {
+ if (Tok->PackingKind == PPK_BinPacked)
+ HasBinPackedFunction = true;
+ if (Tok->PackingKind == PPK_OnePerLine)
+ HasOnePerLineFunction = true;
+
+ Tok = Tok->Next;
+ }
+ }
+ if (Style.DerivePointerAlignment)
+ Style.PointerAlignment = countVariableAlignments(AnnotatedLines) <= 0
+ ? FormatStyle::PAS_Left
+ : FormatStyle::PAS_Right;
+ if (Style.Standard == FormatStyle::LS_Auto)
+ Style.Standard = hasCpp03IncompatibleFormat(AnnotatedLines)
+ ? FormatStyle::LS_Cpp11
+ : FormatStyle::LS_Cpp03;
+ BinPackInconclusiveFunctions =
+ HasBinPackedFunction || !HasOnePerLineFunction;
+ }
+
+ bool BinPackInconclusiveFunctions;
+ FormattingAttemptStatus *Status;
+};
+
+// This class clean up the erroneous/redundant code around the given ranges in
+// file.
+class Cleaner : public TokenAnalyzer {
+public:
+ Cleaner(const Environment &Env, const FormatStyle &Style)
+ : TokenAnalyzer(Env, Style),
+ DeletedTokens(FormatTokenLess(Env.getSourceManager())) {}
+
+ // FIXME: eliminate unused parameters.
+ tooling::Replacements
+ analyze(TokenAnnotator &Annotator,
+ SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
+ FormatTokenLexer &Tokens) override {
+ // FIXME: in the current implementation the granularity of affected range
+ // is an annotated line. However, this is not sufficient. Furthermore,
+ // redundant code introduced by replacements does not necessarily
+ // intercept with ranges of replacements that result in the redundancy.
+ // To determine if some redundant code is actually introduced by
+ // replacements(e.g. deletions), we need to come up with a more
+ // sophisticated way of computing affected ranges.
+ AffectedRangeMgr.computeAffectedLines(AnnotatedLines.begin(),
+ AnnotatedLines.end());
+
+ checkEmptyNamespace(AnnotatedLines);
+
+ for (auto &Line : AnnotatedLines) {
+ if (Line->Affected) {
+ cleanupRight(Line->First, tok::comma, tok::comma);
+ cleanupRight(Line->First, TT_CtorInitializerColon, tok::comma);
+ cleanupRight(Line->First, tok::l_paren, tok::comma);
+ cleanupLeft(Line->First, tok::comma, tok::r_paren);
+ cleanupLeft(Line->First, TT_CtorInitializerComma, tok::l_brace);
+ cleanupLeft(Line->First, TT_CtorInitializerColon, tok::l_brace);
+ cleanupLeft(Line->First, TT_CtorInitializerColon, tok::equal);
+ }
+ }
+
+ return generateFixes();
+ }
+
+private:
+ bool containsOnlyComments(const AnnotatedLine &Line) {
+ for (FormatToken *Tok = Line.First; Tok != nullptr; Tok = Tok->Next) {
+ if (Tok->isNot(tok::comment))
+ return false;
+ }
+ return true;
+ }
+
+ // Iterate through all lines and remove any empty (nested) namespaces.
+ void checkEmptyNamespace(SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
+ std::set<unsigned> DeletedLines;
+ for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
+ auto &Line = *AnnotatedLines[i];
+ if (Line.startsWith(tok::kw_namespace) ||
+ Line.startsWith(tok::kw_inline, tok::kw_namespace)) {
+ checkEmptyNamespace(AnnotatedLines, i, i, DeletedLines);
+ }
+ }
+
+ for (auto Line : DeletedLines) {
+ FormatToken *Tok = AnnotatedLines[Line]->First;
+ while (Tok) {
+ deleteToken(Tok);
+ Tok = Tok->Next;
+ }
+ }
+ }
+
+ // The function checks if the namespace, which starts from \p CurrentLine, and
+ // its nested namespaces are empty and delete them if they are empty. It also
+ // sets \p NewLine to the last line checked.
+ // Returns true if the current namespace is empty.
+ bool checkEmptyNamespace(SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
+ unsigned CurrentLine, unsigned &NewLine,
+ std::set<unsigned> &DeletedLines) {
+ unsigned InitLine = CurrentLine, End = AnnotatedLines.size();
+ if (Style.BraceWrapping.AfterNamespace) {
+ // If the left brace is in a new line, we should consume it first so that
+ // it does not make the namespace non-empty.
+ // FIXME: error handling if there is no left brace.
+ if (!AnnotatedLines[++CurrentLine]->startsWith(tok::l_brace)) {
+ NewLine = CurrentLine;
+ return false;
+ }
+ } else if (!AnnotatedLines[CurrentLine]->endsWith(tok::l_brace)) {
+ return false;
+ }
+ while (++CurrentLine < End) {
+ if (AnnotatedLines[CurrentLine]->startsWith(tok::r_brace))
+ break;
+
+ if (AnnotatedLines[CurrentLine]->startsWith(tok::kw_namespace) ||
+ AnnotatedLines[CurrentLine]->startsWith(tok::kw_inline,
+ tok::kw_namespace)) {
+ if (!checkEmptyNamespace(AnnotatedLines, CurrentLine, NewLine,
+ DeletedLines))
+ return false;
+ CurrentLine = NewLine;
+ continue;
+ }
+
+ if (containsOnlyComments(*AnnotatedLines[CurrentLine]))
+ continue;
+
+ // If there is anything other than comments or nested namespaces in the
+ // current namespace, the namespace cannot be empty.
+ NewLine = CurrentLine;
+ return false;
+ }
+
+ NewLine = CurrentLine;
+ if (CurrentLine >= End)
+ return false;
+
+ // Check if the empty namespace is actually affected by changed ranges.
+ if (!AffectedRangeMgr.affectsCharSourceRange(CharSourceRange::getCharRange(
+ AnnotatedLines[InitLine]->First->Tok.getLocation(),
+ AnnotatedLines[CurrentLine]->Last->Tok.getEndLoc())))
+ return false;
+
+ for (unsigned i = InitLine; i <= CurrentLine; ++i) {
+ DeletedLines.insert(i);
+ }
+
+ return true;
+ }
+
+ // Checks pairs {start, start->next},..., {end->previous, end} and deletes one
+ // of the token in the pair if the left token has \p LK token kind and the
+ // right token has \p RK token kind. If \p DeleteLeft is true, the left token
+ // is deleted on match; otherwise, the right token is deleted.
+ template <typename LeftKind, typename RightKind>
+ void cleanupPair(FormatToken *Start, LeftKind LK, RightKind RK,
+ bool DeleteLeft) {
+ auto NextNotDeleted = [this](const FormatToken &Tok) -> FormatToken * {
+ for (auto *Res = Tok.Next; Res; Res = Res->Next)
+ if (!Res->is(tok::comment) &&
+ DeletedTokens.find(Res) == DeletedTokens.end())
+ return Res;
+ return nullptr;
+ };
+ for (auto *Left = Start; Left;) {
+ auto *Right = NextNotDeleted(*Left);
+ if (!Right)
+ break;
+ if (Left->is(LK) && Right->is(RK)) {
+ deleteToken(DeleteLeft ? Left : Right);
+ for (auto *Tok = Left->Next; Tok && Tok != Right; Tok = Tok->Next)
+ deleteToken(Tok);
+ // If the right token is deleted, we should keep the left token
+ // unchanged and pair it with the new right token.
+ if (!DeleteLeft)
+ continue;
+ }
+ Left = Right;
+ }
+ }
+
+ template <typename LeftKind, typename RightKind>
+ void cleanupLeft(FormatToken *Start, LeftKind LK, RightKind RK) {
+ cleanupPair(Start, LK, RK, /*DeleteLeft=*/true);
+ }
+
+ template <typename LeftKind, typename RightKind>
+ void cleanupRight(FormatToken *Start, LeftKind LK, RightKind RK) {
+ cleanupPair(Start, LK, RK, /*DeleteLeft=*/false);
+ }
+
+ // Delete the given token.
+ inline void deleteToken(FormatToken *Tok) {
+ if (Tok)
+ DeletedTokens.insert(Tok);
+ }
+
+ tooling::Replacements generateFixes() {
+ tooling::Replacements Fixes;
+ std::vector<FormatToken *> Tokens;
+ std::copy(DeletedTokens.begin(), DeletedTokens.end(),
+ std::back_inserter(Tokens));
+
+ // Merge multiple continuous token deletions into one big deletion so that
+ // the number of replacements can be reduced. This makes computing affected
+ // ranges more efficient when we run reformat on the changed code.
+ unsigned Idx = 0;
+ while (Idx < Tokens.size()) {
+ unsigned St = Idx, End = Idx;
+ while ((End + 1) < Tokens.size() &&
+ Tokens[End]->Next == Tokens[End + 1]) {
+ End++;
+ }
+ auto SR = CharSourceRange::getCharRange(Tokens[St]->Tok.getLocation(),
+ Tokens[End]->Tok.getEndLoc());
+ auto Err =
+ Fixes.add(tooling::Replacement(Env.getSourceManager(), SR, ""));
+ // FIXME: better error handling. for now just print error message and skip
+ // for the release version.
+ if (Err) {
+ llvm::errs() << llvm::toString(std::move(Err)) << "\n";
+ assert(false && "Fixes must not conflict!");
+ }
+ Idx = End + 1;
+ }
+
+ return Fixes;
+ }
+
+ // Class for less-than inequality comparason for the set `RedundantTokens`.
+ // We store tokens in the order they appear in the translation unit so that
+ // we do not need to sort them in `generateFixes()`.
+ struct FormatTokenLess {
+ FormatTokenLess(const SourceManager &SM) : SM(SM) {}
+
+ bool operator()(const FormatToken *LHS, const FormatToken *RHS) const {
+ return SM.isBeforeInTranslationUnit(LHS->Tok.getLocation(),
+ RHS->Tok.getLocation());
+ }
+ const SourceManager &SM;
+ };
+
+ // Tokens to be deleted.
+ std::set<FormatToken *, FormatTokenLess> DeletedTokens;
+};
+
+struct IncludeDirective {
+ StringRef Filename;
+ StringRef Text;
+ unsigned Offset;
+ int Category;
+};
+
+} // end anonymous namespace
+
+// Determines whether 'Ranges' intersects with ('Start', 'End').
+static bool affectsRange(ArrayRef<tooling::Range> Ranges, unsigned Start,
+ unsigned End) {
+ for (auto Range : Ranges) {
+ if (Range.getOffset() < End &&
+ Range.getOffset() + Range.getLength() > Start)
+ return true;
+ }
+ return false;
+}
+
+// Returns a pair (Index, OffsetToEOL) describing the position of the cursor
+// before sorting/deduplicating. Index is the index of the include under the
+// cursor in the original set of includes. If this include has duplicates, it is
+// the index of the first of the duplicates as the others are going to be
+// removed. OffsetToEOL describes the cursor's position relative to the end of
+// its current line.
+// If `Cursor` is not on any #include, `Index` will be UINT_MAX.
+static std::pair<unsigned, unsigned>
+FindCursorIndex(const SmallVectorImpl<IncludeDirective> &Includes,
+ const SmallVectorImpl<unsigned> &Indices, unsigned Cursor) {
+ unsigned CursorIndex = UINT_MAX;
+ unsigned OffsetToEOL = 0;
+ for (int i = 0, e = Includes.size(); i != e; ++i) {
+ unsigned Start = Includes[Indices[i]].Offset;
+ unsigned End = Start + Includes[Indices[i]].Text.size();
+ if (!(Cursor >= Start && Cursor < End))
+ continue;
+ CursorIndex = Indices[i];
+ OffsetToEOL = End - Cursor;
+ // Put the cursor on the only remaining #include among the duplicate
+ // #includes.
+ while (--i >= 0 && Includes[CursorIndex].Text == Includes[Indices[i]].Text)
+ CursorIndex = i;
+ break;
+ }
+ return std::make_pair(CursorIndex, OffsetToEOL);
+}
+
+// Sorts and deduplicate a block of includes given by 'Includes' alphabetically
+// adding the necessary replacement to 'Replaces'. 'Includes' must be in strict
+// source order.
+// #include directives with the same text will be deduplicated, and only the
+// first #include in the duplicate #includes remains. If the `Cursor` is
+// provided and put on a deleted #include, it will be moved to the remaining
+// #include in the duplicate #includes.
+static void sortCppIncludes(const FormatStyle &Style,
+ const SmallVectorImpl<IncludeDirective> &Includes,
+ ArrayRef<tooling::Range> Ranges, StringRef FileName,
+ tooling::Replacements &Replaces, unsigned *Cursor) {
+ unsigned IncludesBeginOffset = Includes.front().Offset;
+ unsigned IncludesEndOffset =
+ Includes.back().Offset + Includes.back().Text.size();
+ unsigned IncludesBlockSize = IncludesEndOffset - IncludesBeginOffset;
+ if (!affectsRange(Ranges, IncludesBeginOffset, IncludesEndOffset))
+ return;
+ SmallVector<unsigned, 16> Indices;
+ for (unsigned i = 0, e = Includes.size(); i != e; ++i)
+ Indices.push_back(i);
+ std::stable_sort(
+ Indices.begin(), Indices.end(), [&](unsigned LHSI, unsigned RHSI) {
+ return std::tie(Includes[LHSI].Category, Includes[LHSI].Filename) <
+ std::tie(Includes[RHSI].Category, Includes[RHSI].Filename);
+ });
+ // The index of the include on which the cursor will be put after
+ // sorting/deduplicating.
+ unsigned CursorIndex;
+ // The offset from cursor to the end of line.
+ unsigned CursorToEOLOffset;
+ if (Cursor)
+ std::tie(CursorIndex, CursorToEOLOffset) =
+ FindCursorIndex(Includes, Indices, *Cursor);
+
+ // Deduplicate #includes.
+ Indices.erase(std::unique(Indices.begin(), Indices.end(),
+ [&](unsigned LHSI, unsigned RHSI) {
+ return Includes[LHSI].Text == Includes[RHSI].Text;
+ }),
+ Indices.end());
+
+ // If the #includes are out of order, we generate a single replacement fixing
+ // the entire block. Otherwise, no replacement is generated.
+ if (Indices.size() == Includes.size() &&
+ std::is_sorted(Indices.begin(), Indices.end()))
+ return;
+
+ std::string result;
+ for (unsigned Index : Indices) {
+ if (!result.empty())
+ result += "\n";
+ result += Includes[Index].Text;
+ if (Cursor && CursorIndex == Index)
+ *Cursor = IncludesBeginOffset + result.size() - CursorToEOLOffset;
+ }
+
+ auto Err = Replaces.add(tooling::Replacement(
+ FileName, Includes.front().Offset, IncludesBlockSize, result));
+ // FIXME: better error handling. For now, just skip the replacement for the
+ // release version.
+ if (Err) {
+ llvm::errs() << llvm::toString(std::move(Err)) << "\n";
+ assert(false);
+ }
+}
+
+namespace {
+
+// This class manages priorities of #include categories and calculates
+// priorities for headers.
+class IncludeCategoryManager {
+public:
+ IncludeCategoryManager(const FormatStyle &Style, StringRef FileName)
+ : Style(Style), FileName(FileName) {
+ FileStem = llvm::sys::path::stem(FileName);
+ for (const auto &Category : Style.IncludeCategories)
+ CategoryRegexs.emplace_back(Category.Regex);
+ IsMainFile = FileName.endswith(".c") || FileName.endswith(".cc") ||
+ FileName.endswith(".cpp") || FileName.endswith(".c++") ||
+ FileName.endswith(".cxx") || FileName.endswith(".m") ||
+ FileName.endswith(".mm");
+ }
+
+ // Returns the priority of the category which \p IncludeName belongs to.
+ // If \p CheckMainHeader is true and \p IncludeName is a main header, returns
+ // 0. Otherwise, returns the priority of the matching category or INT_MAX.
+ int getIncludePriority(StringRef IncludeName, bool CheckMainHeader) {
+ int Ret = INT_MAX;
+ for (unsigned i = 0, e = CategoryRegexs.size(); i != e; ++i)
+ if (CategoryRegexs[i].match(IncludeName)) {
+ Ret = Style.IncludeCategories[i].Priority;
+ break;
+ }
+ if (CheckMainHeader && IsMainFile && Ret > 0 && isMainHeader(IncludeName))
+ Ret = 0;
+ return Ret;
+ }
+
+private:
+ bool isMainHeader(StringRef IncludeName) const {
+ if (!IncludeName.startswith("\""))
+ return false;
+ StringRef HeaderStem =
+ llvm::sys::path::stem(IncludeName.drop_front(1).drop_back(1));
+ if (FileStem.startswith(HeaderStem)) {
+ llvm::Regex MainIncludeRegex(
+ (HeaderStem + Style.IncludeIsMainRegex).str());
+ if (MainIncludeRegex.match(FileStem))
+ return true;
+ }
+ return false;
+ }
+
+ const FormatStyle &Style;
+ bool IsMainFile;
+ StringRef FileName;
+ StringRef FileStem;
+ SmallVector<llvm::Regex, 4> CategoryRegexs;
+};
+
+const char IncludeRegexPattern[] =
+ R"(^[\t\ ]*#[\t\ ]*(import|include)[^"<]*(["<][^">]*[">]))";
+
+} // anonymous namespace
+
+tooling::Replacements sortCppIncludes(const FormatStyle &Style, StringRef Code,
+ ArrayRef<tooling::Range> Ranges,
+ StringRef FileName,
+ tooling::Replacements &Replaces,
+ unsigned *Cursor) {
+ unsigned Prev = 0;
+ unsigned SearchFrom = 0;
+ llvm::Regex IncludeRegex(IncludeRegexPattern);
+ SmallVector<StringRef, 4> Matches;
+ SmallVector<IncludeDirective, 16> IncludesInBlock;
+
+ // In compiled files, consider the first #include to be the main #include of
+ // the file if it is not a system #include. This ensures that the header
+ // doesn't have hidden dependencies
+ // (http://llvm.org/docs/CodingStandards.html#include-style).
+ //
+ // FIXME: Do some sanity checking, e.g. edit distance of the base name, to fix
+ // cases where the first #include is unlikely to be the main header.
+ IncludeCategoryManager Categories(Style, FileName);
+ bool FirstIncludeBlock = true;
+ bool MainIncludeFound = false;
+ bool FormattingOff = false;
+
+ for (;;) {
+ auto Pos = Code.find('\n', SearchFrom);
+ StringRef Line =
+ Code.substr(Prev, (Pos != StringRef::npos ? Pos : Code.size()) - Prev);
+
+ StringRef Trimmed = Line.trim();
+ if (Trimmed == "// clang-format off")
+ FormattingOff = true;
+ else if (Trimmed == "// clang-format on")
+ FormattingOff = false;
+
+ if (!FormattingOff && !Line.endswith("\\")) {
+ if (IncludeRegex.match(Line, &Matches)) {
+ StringRef IncludeName = Matches[2];
+ int Category = Categories.getIncludePriority(
+ IncludeName,
+ /*CheckMainHeader=*/!MainIncludeFound && FirstIncludeBlock);
+ if (Category == 0)
+ MainIncludeFound = true;
+ IncludesInBlock.push_back({IncludeName, Line, Prev, Category});
+ } else if (!IncludesInBlock.empty()) {
+ sortCppIncludes(Style, IncludesInBlock, Ranges, FileName, Replaces,
+ Cursor);
+ IncludesInBlock.clear();
+ FirstIncludeBlock = false;
+ }
+ Prev = Pos + 1;
+ }
+ if (Pos == StringRef::npos || Pos + 1 == Code.size())
+ break;
+ SearchFrom = Pos + 1;
+ }
+ if (!IncludesInBlock.empty())
+ sortCppIncludes(Style, IncludesInBlock, Ranges, FileName, Replaces, Cursor);
+ return Replaces;
+}
+
+bool isMpegTS(StringRef Code) {
+ // MPEG transport streams use the ".ts" file extension. clang-format should
+ // not attempt to format those. MPEG TS' frame format starts with 0x47 every
+ // 189 bytes - detect that and return.
+ return Code.size() > 188 && Code[0] == 0x47 && Code[188] == 0x47;
+}
+
+tooling::Replacements sortIncludes(const FormatStyle &Style, StringRef Code,
+ ArrayRef<tooling::Range> Ranges,
+ StringRef FileName, unsigned *Cursor) {
+ tooling::Replacements Replaces;
+ if (!Style.SortIncludes)
+ return Replaces;
+ if (Style.Language == FormatStyle::LanguageKind::LK_JavaScript &&
+ isMpegTS(Code))
+ return Replaces;
+ if (Style.Language == FormatStyle::LanguageKind::LK_JavaScript)
+ return sortJavaScriptImports(Style, Code, Ranges, FileName);
+ sortCppIncludes(Style, Code, Ranges, FileName, Replaces, Cursor);
+ return Replaces;
+}
+
+template <typename T>
+static llvm::Expected<tooling::Replacements>
+processReplacements(T ProcessFunc, StringRef Code,
+ const tooling::Replacements &Replaces,
+ const FormatStyle &Style) {
+ if (Replaces.empty())
+ return tooling::Replacements();
+
+ auto NewCode = applyAllReplacements(Code, Replaces);
+ if (!NewCode)
+ return NewCode.takeError();
+ std::vector<tooling::Range> ChangedRanges = Replaces.getAffectedRanges();
+ StringRef FileName = Replaces.begin()->getFilePath();
+
+ tooling::Replacements FormatReplaces =
+ ProcessFunc(Style, *NewCode, ChangedRanges, FileName);
+
+ return Replaces.merge(FormatReplaces);
+}
+
+llvm::Expected<tooling::Replacements>
+formatReplacements(StringRef Code, const tooling::Replacements &Replaces,
+ const FormatStyle &Style) {
+ // We need to use lambda function here since there are two versions of
+ // `sortIncludes`.
+ auto SortIncludes = [](const FormatStyle &Style, StringRef Code,
+ std::vector<tooling::Range> Ranges,
+ StringRef FileName) -> tooling::Replacements {
+ return sortIncludes(Style, Code, Ranges, FileName);
+ };
+ auto SortedReplaces =
+ processReplacements(SortIncludes, Code, Replaces, Style);
+ if (!SortedReplaces)
+ return SortedReplaces.takeError();
+
+ // We need to use lambda function here since there are two versions of
+ // `reformat`.
+ auto Reformat = [](const FormatStyle &Style, StringRef Code,
+ std::vector<tooling::Range> Ranges,
+ StringRef FileName) -> tooling::Replacements {
+ return reformat(Style, Code, Ranges, FileName);
+ };
+ return processReplacements(Reformat, Code, *SortedReplaces, Style);
+}
+
+namespace {
+
+inline bool isHeaderInsertion(const tooling::Replacement &Replace) {
+ return Replace.getOffset() == UINT_MAX && Replace.getLength() == 0 &&
+ llvm::Regex(IncludeRegexPattern).match(Replace.getReplacementText());
+}
+
+inline bool isHeaderDeletion(const tooling::Replacement &Replace) {
+ return Replace.getOffset() == UINT_MAX && Replace.getLength() == 1;
+}
+
+// Returns the offset after skipping a sequence of tokens, matched by \p
+// GetOffsetAfterSequence, from the start of the code.
+// \p GetOffsetAfterSequence should be a function that matches a sequence of
+// tokens and returns an offset after the sequence.
+unsigned getOffsetAfterTokenSequence(
+ StringRef FileName, StringRef Code, const FormatStyle &Style,
+ llvm::function_ref<unsigned(const SourceManager &, Lexer &, Token &)>
+ GetOffsetAfterSequence) {
+ std::unique_ptr<Environment> Env =
+ Environment::CreateVirtualEnvironment(Code, FileName, /*Ranges=*/{});
+ const SourceManager &SourceMgr = Env->getSourceManager();
+ Lexer Lex(Env->getFileID(), SourceMgr.getBuffer(Env->getFileID()), SourceMgr,
+ getFormattingLangOpts(Style));
+ Token Tok;
+ // Get the first token.
+ Lex.LexFromRawLexer(Tok);
+ return GetOffsetAfterSequence(SourceMgr, Lex, Tok);
+}
+
+// Check if a sequence of tokens is like "#<Name> <raw_identifier>". If it is,
+// \p Tok will be the token after this directive; otherwise, it can be any token
+// after the given \p Tok (including \p Tok).
+bool checkAndConsumeDirectiveWithName(Lexer &Lex, StringRef Name, Token &Tok) {
+ bool Matched = Tok.is(tok::hash) && !Lex.LexFromRawLexer(Tok) &&
+ Tok.is(tok::raw_identifier) &&
+ Tok.getRawIdentifier() == Name && !Lex.LexFromRawLexer(Tok) &&
+ Tok.is(tok::raw_identifier);
+ if (Matched)
+ Lex.LexFromRawLexer(Tok);
+ return Matched;
+}
+
+void skipComments(Lexer &Lex, Token &Tok) {
+ while (Tok.is(tok::comment))
+ if (Lex.LexFromRawLexer(Tok))
+ return;
+}
+
+// Returns the offset after header guard directives and any comments
+// before/after header guards. If no header guard presents in the code, this
+// will returns the offset after skipping all comments from the start of the
+// code.
+unsigned getOffsetAfterHeaderGuardsAndComments(StringRef FileName,
+ StringRef Code,
+ const FormatStyle &Style) {
+ return getOffsetAfterTokenSequence(
+ FileName, Code, Style,
+ [](const SourceManager &SM, Lexer &Lex, Token Tok) {
+ skipComments(Lex, Tok);
+ unsigned InitialOffset = SM.getFileOffset(Tok.getLocation());
+ if (checkAndConsumeDirectiveWithName(Lex, "ifndef", Tok)) {
+ skipComments(Lex, Tok);
+ if (checkAndConsumeDirectiveWithName(Lex, "define", Tok))
+ return SM.getFileOffset(Tok.getLocation());
+ }
+ return InitialOffset;
+ });
+}
+
+// Check if a sequence of tokens is like
+// "#include ("header.h" | <header.h>)".
+// If it is, \p Tok will be the token after this directive; otherwise, it can be
+// any token after the given \p Tok (including \p Tok).
+bool checkAndConsumeInclusiveDirective(Lexer &Lex, Token &Tok) {
+ auto Matched = [&]() {
+ Lex.LexFromRawLexer(Tok);
+ return true;
+ };
+ if (Tok.is(tok::hash) && !Lex.LexFromRawLexer(Tok) &&
+ Tok.is(tok::raw_identifier) && Tok.getRawIdentifier() == "include") {
+ if (Lex.LexFromRawLexer(Tok))
+ return false;
+ if (Tok.is(tok::string_literal))
+ return Matched();
+ if (Tok.is(tok::less)) {
+ while (!Lex.LexFromRawLexer(Tok) && Tok.isNot(tok::greater)) {
+ }
+ if (Tok.is(tok::greater))
+ return Matched();
+ }
+ }
+ return false;
+}
+
+// Returns the offset of the last #include directive after which a new
+// #include can be inserted. This ignores #include's after the #include block(s)
+// in the beginning of a file to avoid inserting headers into code sections
+// where new #include's should not be added by default.
+// These code sections include:
+// - raw string literals (containing #include).
+// - #if blocks.
+// - Special #include's among declarations (e.g. functions).
+//
+// If no #include after which a new #include can be inserted, this returns the
+// offset after skipping all comments from the start of the code.
+// Inserting after an #include is not allowed if it comes after code that is not
+// #include (e.g. pre-processing directive that is not #include, declarations).
+unsigned getMaxHeaderInsertionOffset(StringRef FileName, StringRef Code,
+ const FormatStyle &Style) {
+ return getOffsetAfterTokenSequence(
+ FileName, Code, Style,
+ [](const SourceManager &SM, Lexer &Lex, Token Tok) {
+ skipComments(Lex, Tok);
+ unsigned MaxOffset = SM.getFileOffset(Tok.getLocation());
+ while (checkAndConsumeInclusiveDirective(Lex, Tok))
+ MaxOffset = SM.getFileOffset(Tok.getLocation());
+ return MaxOffset;
+ });
+}
+
+bool isDeletedHeader(llvm::StringRef HeaderName,
+ const std::set<llvm::StringRef> &HeadersToDelete) {
+ return HeadersToDelete.count(HeaderName) ||
+ HeadersToDelete.count(HeaderName.trim("\"<>"));
+}
+
+// FIXME: insert empty lines between newly created blocks.
+tooling::Replacements
+fixCppIncludeInsertions(StringRef Code, const tooling::Replacements &Replaces,
+ const FormatStyle &Style) {
+ if (!Style.isCpp())
+ return Replaces;
+
+ tooling::Replacements HeaderInsertions;
+ std::set<llvm::StringRef> HeadersToDelete;
+ tooling::Replacements Result;
+ for (const auto &R : Replaces) {
+ if (isHeaderInsertion(R)) {
+ // Replacements from \p Replaces must be conflict-free already, so we can
+ // simply consume the error.
+ llvm::consumeError(HeaderInsertions.add(R));
+ } else if (isHeaderDeletion(R)) {
+ HeadersToDelete.insert(R.getReplacementText());
+ } else if (R.getOffset() == UINT_MAX) {
+ llvm::errs() << "Insertions other than header #include insertion are "
+ "not supported! "
+ << R.getReplacementText() << "\n";
+ } else {
+ llvm::consumeError(Result.add(R));
+ }
+ }
+ if (HeaderInsertions.empty() && HeadersToDelete.empty())
+ return Replaces;
+
+ llvm::Regex IncludeRegex(IncludeRegexPattern);
+ llvm::Regex DefineRegex(R"(^[\t\ ]*#[\t\ ]*define[\t\ ]*[^\\]*$)");
+ SmallVector<StringRef, 4> Matches;
+
+ StringRef FileName = Replaces.begin()->getFilePath();
+ IncludeCategoryManager Categories(Style, FileName);
+
+ // Record the offset of the end of the last include in each category.
+ std::map<int, int> CategoryEndOffsets;
+ // All possible priorities.
+ // Add 0 for main header and INT_MAX for headers that are not in any category.
+ std::set<int> Priorities = {0, INT_MAX};
+ for (const auto &Category : Style.IncludeCategories)
+ Priorities.insert(Category.Priority);
+ int FirstIncludeOffset = -1;
+ // All new headers should be inserted after this offset.
+ unsigned MinInsertOffset =
+ getOffsetAfterHeaderGuardsAndComments(FileName, Code, Style);
+ StringRef TrimmedCode = Code.drop_front(MinInsertOffset);
+ // Max insertion offset in the original code.
+ unsigned MaxInsertOffset =
+ MinInsertOffset +
+ getMaxHeaderInsertionOffset(FileName, TrimmedCode, Style);
+ SmallVector<StringRef, 32> Lines;
+ TrimmedCode.split(Lines, '\n');
+ unsigned Offset = MinInsertOffset;
+ unsigned NextLineOffset;
+ std::set<StringRef> ExistingIncludes;
+ for (auto Line : Lines) {
+ NextLineOffset = std::min(Code.size(), Offset + Line.size() + 1);
+ if (IncludeRegex.match(Line, &Matches)) {
+ // The header name with quotes or angle brackets.
+ StringRef IncludeName = Matches[2];
+ ExistingIncludes.insert(IncludeName);
+ // Only record the offset of current #include if we can insert after it.
+ if (Offset <= MaxInsertOffset) {
+ int Category = Categories.getIncludePriority(
+ IncludeName, /*CheckMainHeader=*/FirstIncludeOffset < 0);
+ CategoryEndOffsets[Category] = NextLineOffset;
+ if (FirstIncludeOffset < 0)
+ FirstIncludeOffset = Offset;
+ }
+ if (isDeletedHeader(IncludeName, HeadersToDelete)) {
+ // If this is the last line without trailing newline, we need to make
+ // sure we don't delete across the file boundary.
+ unsigned Length = std::min(Line.size() + 1, Code.size() - Offset);
+ llvm::Error Err =
+ Result.add(tooling::Replacement(FileName, Offset, Length, ""));
+ if (Err) {
+ // Ignore the deletion on conflict.
+ llvm::errs() << "Failed to add header deletion replacement for "
+ << IncludeName << ": " << llvm::toString(std::move(Err))
+ << "\n";
+ }
+ }
+ }
+ Offset = NextLineOffset;
+ }
+
+ // Populate CategoryEndOfssets:
+ // - Ensure that CategoryEndOffset[Highest] is always populated.
+ // - If CategoryEndOffset[Priority] isn't set, use the next higher value that
+ // is set, up to CategoryEndOffset[Highest].
+ auto Highest = Priorities.begin();
+ if (CategoryEndOffsets.find(*Highest) == CategoryEndOffsets.end()) {
+ if (FirstIncludeOffset >= 0)
+ CategoryEndOffsets[*Highest] = FirstIncludeOffset;
+ else
+ CategoryEndOffsets[*Highest] = MinInsertOffset;
+ }
+ // By this point, CategoryEndOffset[Highest] is always set appropriately:
+ // - to an appropriate location before/after existing #includes, or
+ // - to right after the header guard, or
+ // - to the beginning of the file.
+ for (auto I = ++Priorities.begin(), E = Priorities.end(); I != E; ++I)
+ if (CategoryEndOffsets.find(*I) == CategoryEndOffsets.end())
+ CategoryEndOffsets[*I] = CategoryEndOffsets[*std::prev(I)];
+
+ bool NeedNewLineAtEnd = !Code.empty() && Code.back() != '\n';
+ for (const auto &R : HeaderInsertions) {
+ auto IncludeDirective = R.getReplacementText();
+ bool Matched = IncludeRegex.match(IncludeDirective, &Matches);
+ assert(Matched && "Header insertion replacement must have replacement text "
+ "'#include ...'");
+ (void)Matched;
+ auto IncludeName = Matches[2];
+ if (ExistingIncludes.find(IncludeName) != ExistingIncludes.end()) {
+ DEBUG(llvm::dbgs() << "Skip adding existing include : " << IncludeName
+ << "\n");
+ continue;
+ }
+ int Category =
+ Categories.getIncludePriority(IncludeName, /*CheckMainHeader=*/true);
+ Offset = CategoryEndOffsets[Category];
+ std::string NewInclude = !IncludeDirective.endswith("\n")
+ ? (IncludeDirective + "\n").str()
+ : IncludeDirective.str();
+ // When inserting headers at end of the code, also append '\n' to the code
+ // if it does not end with '\n'.
+ if (NeedNewLineAtEnd && Offset == Code.size()) {
+ NewInclude = "\n" + NewInclude;
+ NeedNewLineAtEnd = false;
+ }
+ auto NewReplace = tooling::Replacement(FileName, Offset, 0, NewInclude);
+ auto Err = Result.add(NewReplace);
+ if (Err) {
+ llvm::consumeError(std::move(Err));
+ unsigned NewOffset = Result.getShiftedCodePosition(Offset);
+ NewReplace = tooling::Replacement(FileName, NewOffset, 0, NewInclude);
+ Result = Result.merge(tooling::Replacements(NewReplace));
+ }
+ }
+ return Result;
+}
+
+} // anonymous namespace
+
+llvm::Expected<tooling::Replacements>
+cleanupAroundReplacements(StringRef Code, const tooling::Replacements &Replaces,
+ const FormatStyle &Style) {
+ // We need to use lambda function here since there are two versions of
+ // `cleanup`.
+ auto Cleanup = [](const FormatStyle &Style, StringRef Code,
+ std::vector<tooling::Range> Ranges,
+ StringRef FileName) -> tooling::Replacements {
+ return cleanup(Style, Code, Ranges, FileName);
+ };
+ // Make header insertion replacements insert new headers into correct blocks.
+ tooling::Replacements NewReplaces =
+ fixCppIncludeInsertions(Code, Replaces, Style);
+ return processReplacements(Cleanup, Code, NewReplaces, Style);
+}
+
+tooling::Replacements reformat(const FormatStyle &Style, StringRef Code,
+ ArrayRef<tooling::Range> Ranges,
+ StringRef FileName,
+ FormattingAttemptStatus *Status) {
+ FormatStyle Expanded = expandPresets(Style);
+ if (Expanded.DisableFormat)
+ return tooling::Replacements();
+ if (Expanded.Language == FormatStyle::LK_JavaScript && isMpegTS(Code))
+ return tooling::Replacements();
+
+ typedef std::function<tooling::Replacements(const Environment &)>
+ AnalyzerPass;
+ SmallVector<AnalyzerPass, 4> Passes;
+
+ if (Style.Language == FormatStyle::LK_Cpp) {
+ if (Style.FixNamespaceComments)
+ Passes.emplace_back([&](const Environment &Env) {
+ return NamespaceEndCommentsFixer(Env, Expanded).process();
+ });
+
+ if (Style.SortUsingDeclarations)
+ Passes.emplace_back([&](const Environment &Env) {
+ return UsingDeclarationsSorter(Env, Expanded).process();
+ });
+ }
+
+ if (Style.Language == FormatStyle::LK_JavaScript &&
+ Style.JavaScriptQuotes != FormatStyle::JSQS_Leave)
+ Passes.emplace_back([&](const Environment &Env) {
+ return JavaScriptRequoter(Env, Expanded).process();
+ });
+
+ Passes.emplace_back([&](const Environment &Env) {
+ return Formatter(Env, Expanded, Status).process();
+ });
+
+ std::unique_ptr<Environment> Env =
+ Environment::CreateVirtualEnvironment(Code, FileName, Ranges);
+ llvm::Optional<std::string> CurrentCode = None;
+ tooling::Replacements Fixes;
+ for (size_t I = 0, E = Passes.size(); I < E; ++I) {
+ tooling::Replacements PassFixes = Passes[I](*Env);
+ auto NewCode = applyAllReplacements(
+ CurrentCode ? StringRef(*CurrentCode) : Code, PassFixes);
+ if (NewCode) {
+ Fixes = Fixes.merge(PassFixes);
+ if (I + 1 < E) {
+ CurrentCode = std::move(*NewCode);
+ Env = Environment::CreateVirtualEnvironment(
+ *CurrentCode, FileName,
+ tooling::calculateRangesAfterReplacements(Fixes, Ranges));
+ }
+ }
+ }
+
+ return Fixes;
+}
+
+tooling::Replacements cleanup(const FormatStyle &Style, StringRef Code,
+ ArrayRef<tooling::Range> Ranges,
+ StringRef FileName) {
+ // cleanups only apply to C++ (they mostly concern ctor commas etc.)
+ if (Style.Language != FormatStyle::LK_Cpp)
+ return tooling::Replacements();
+ std::unique_ptr<Environment> Env =
+ Environment::CreateVirtualEnvironment(Code, FileName, Ranges);
+ Cleaner Clean(*Env, Style);
+ return Clean.process();
+}
+
+tooling::Replacements reformat(const FormatStyle &Style, StringRef Code,
+ ArrayRef<tooling::Range> Ranges,
+ StringRef FileName, bool *IncompleteFormat) {
+ FormattingAttemptStatus Status;
+ auto Result = reformat(Style, Code, Ranges, FileName, &Status);
+ if (!Status.FormatComplete)
+ *IncompleteFormat = true;
+ return Result;
+}
+
+tooling::Replacements fixNamespaceEndComments(const FormatStyle &Style,
+ StringRef Code,
+ ArrayRef<tooling::Range> Ranges,
+ StringRef FileName) {
+ std::unique_ptr<Environment> Env =
+ Environment::CreateVirtualEnvironment(Code, FileName, Ranges);
+ NamespaceEndCommentsFixer Fix(*Env, Style);
+ return Fix.process();
+}
+
+tooling::Replacements sortUsingDeclarations(const FormatStyle &Style,
+ StringRef Code,
+ ArrayRef<tooling::Range> Ranges,
+ StringRef FileName) {
+ std::unique_ptr<Environment> Env =
+ Environment::CreateVirtualEnvironment(Code, FileName, Ranges);
+ UsingDeclarationsSorter Sorter(*Env, Style);
+ return Sorter.process();
+}
+
+LangOptions getFormattingLangOpts(const FormatStyle &Style) {
+ LangOptions LangOpts;
+ LangOpts.CPlusPlus = 1;
+ LangOpts.CPlusPlus11 = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1;
+ LangOpts.CPlusPlus14 = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1;
+ LangOpts.CPlusPlus1z = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1;
+ LangOpts.LineComment = 1;
+ bool AlternativeOperators = Style.isCpp();
+ LangOpts.CXXOperatorNames = AlternativeOperators ? 1 : 0;
+ LangOpts.Bool = 1;
+ LangOpts.ObjC1 = 1;
+ LangOpts.ObjC2 = 1;
+ LangOpts.MicrosoftExt = 1; // To get kw___try, kw___finally.
+ LangOpts.DeclSpecKeyword = 1; // To get __declspec.
+ return LangOpts;
+}
+
+const char *StyleOptionHelpDescription =
+ "Coding style, currently supports:\n"
+ " LLVM, Google, Chromium, Mozilla, WebKit.\n"
+ "Use -style=file to load style configuration from\n"
+ ".clang-format file located in one of the parent\n"
+ "directories of the source file (or current\n"
+ "directory for stdin).\n"
+ "Use -style=\"{key: value, ...}\" to set specific\n"
+ "parameters, e.g.:\n"
+ " -style=\"{BasedOnStyle: llvm, IndentWidth: 8}\"";
+
+static FormatStyle::LanguageKind getLanguageByFileName(StringRef FileName) {
+ if (FileName.endswith(".java"))
+ return FormatStyle::LK_Java;
+ if (FileName.endswith_lower(".js") || FileName.endswith_lower(".ts"))
+ return FormatStyle::LK_JavaScript; // JavaScript or TypeScript.
+ if (FileName.endswith(".m") || FileName.endswith(".mm"))
+ return FormatStyle::LK_ObjC;
+ if (FileName.endswith_lower(".proto") ||
+ FileName.endswith_lower(".protodevel"))
+ return FormatStyle::LK_Proto;
+ if (FileName.endswith_lower(".td"))
+ return FormatStyle::LK_TableGen;
+ return FormatStyle::LK_Cpp;
+}
+
+llvm::Expected<FormatStyle> getStyle(StringRef StyleName, StringRef FileName,
+ StringRef FallbackStyleName,
+ StringRef Code, vfs::FileSystem *FS) {
+ if (!FS) {
+ FS = vfs::getRealFileSystem().get();
+ }
+ FormatStyle Style = getLLVMStyle();
+ Style.Language = getLanguageByFileName(FileName);
+
+ // This is a very crude detection of whether a header contains ObjC code that
+ // should be improved over time and probably be done on tokens, not one the
+ // bare content of the file.
+ if (Style.Language == FormatStyle::LK_Cpp && FileName.endswith(".h") &&
+ (Code.contains("\n- (") || Code.contains("\n+ (")))
+ Style.Language = FormatStyle::LK_ObjC;
+
+ FormatStyle FallbackStyle = getNoStyle();
+ if (!getPredefinedStyle(FallbackStyleName, Style.Language, &FallbackStyle))
+ return make_string_error("Invalid fallback style \"" + FallbackStyleName);
+
+ if (StyleName.startswith("{")) {
+ // Parse YAML/JSON style from the command line.
+ if (std::error_code ec = parseConfiguration(StyleName, &Style))
+ return make_string_error("Error parsing -style: " + ec.message());
+ return Style;
+ }
+
+ if (!StyleName.equals_lower("file")) {
+ if (!getPredefinedStyle(StyleName, Style.Language, &Style))
+ return make_string_error("Invalid value for -style");
+ return Style;
+ }
+
+ // Look for .clang-format/_clang-format file in the file's parent directories.
+ SmallString<128> UnsuitableConfigFiles;
+ SmallString<128> Path(FileName);
+ if (std::error_code EC = FS->makeAbsolute(Path))
+ return make_string_error(EC.message());
+
+ for (StringRef Directory = Path; !Directory.empty();
+ Directory = llvm::sys::path::parent_path(Directory)) {
+
+ auto Status = FS->status(Directory);
+ if (!Status ||
+ Status->getType() != llvm::sys::fs::file_type::directory_file) {
+ continue;
+ }
+
+ SmallString<128> ConfigFile(Directory);
+
+ llvm::sys::path::append(ConfigFile, ".clang-format");
+ DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n");
+
+ Status = FS->status(ConfigFile.str());
+ bool FoundConfigFile =
+ Status && (Status->getType() == llvm::sys::fs::file_type::regular_file);
+ if (!FoundConfigFile) {
+ // Try _clang-format too, since dotfiles are not commonly used on Windows.
+ ConfigFile = Directory;
+ llvm::sys::path::append(ConfigFile, "_clang-format");
+ DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n");
+ Status = FS->status(ConfigFile.str());
+ FoundConfigFile = Status && (Status->getType() ==
+ llvm::sys::fs::file_type::regular_file);
+ }
+
+ if (FoundConfigFile) {
+ llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text =
+ FS->getBufferForFile(ConfigFile.str());
+ if (std::error_code EC = Text.getError())
+ return make_string_error(EC.message());
+ if (std::error_code ec =
+ parseConfiguration(Text.get()->getBuffer(), &Style)) {
+ if (ec == ParseError::Unsuitable) {
+ if (!UnsuitableConfigFiles.empty())
+ UnsuitableConfigFiles.append(", ");
+ UnsuitableConfigFiles.append(ConfigFile);
+ continue;
+ }
+ return make_string_error("Error reading " + ConfigFile + ": " +
+ ec.message());
+ }
+ DEBUG(llvm::dbgs() << "Using configuration file " << ConfigFile << "\n");
+ return Style;
+ }
+ }
+ if (!UnsuitableConfigFiles.empty())
+ return make_string_error("Configuration file(s) do(es) not support " +
+ getLanguageName(Style.Language) + ": " +
+ UnsuitableConfigFiles);
+ return FallbackStyle;
+}
+
+} // namespace format
+} // namespace clang
diff --git a/contrib/llvm/tools/clang/lib/Format/FormatToken.cpp b/contrib/llvm/tools/clang/lib/Format/FormatToken.cpp
new file mode 100644
index 000000000000..ba5bf03a6346
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Format/FormatToken.cpp
@@ -0,0 +1,306 @@
+//===--- FormatToken.cpp - Format C++ code --------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements specific functions of \c FormatTokens and their
+/// roles.
+///
+//===----------------------------------------------------------------------===//
+
+#include "FormatToken.h"
+#include "ContinuationIndenter.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Debug.h"
+#include <climits>
+
+namespace clang {
+namespace format {
+
+const char *getTokenTypeName(TokenType Type) {
+ static const char *const TokNames[] = {
+#define TYPE(X) #X,
+LIST_TOKEN_TYPES
+#undef TYPE
+ nullptr
+ };
+
+ if (Type < NUM_TOKEN_TYPES)
+ return TokNames[Type];
+ llvm_unreachable("unknown TokenType");
+ return nullptr;
+}
+
+// FIXME: This is copy&pasted from Sema. Put it in a common place and remove
+// duplication.
+bool FormatToken::isSimpleTypeSpecifier() const {
+ switch (Tok.getKind()) {
+ case tok::kw_short:
+ case tok::kw_long:
+ case tok::kw___int64:
+ case tok::kw___int128:
+ case tok::kw_signed:
+ case tok::kw_unsigned:
+ case tok::kw_void:
+ case tok::kw_char:
+ case tok::kw_int:
+ case tok::kw_half:
+ case tok::kw_float:
+ case tok::kw_double:
+ case tok::kw___float128:
+ case tok::kw_wchar_t:
+ case tok::kw_bool:
+ case tok::kw___underlying_type:
+ case tok::annot_typename:
+ case tok::kw_char16_t:
+ case tok::kw_char32_t:
+ case tok::kw_typeof:
+ case tok::kw_decltype:
+ return true;
+ default:
+ return false;
+ }
+}
+
+TokenRole::~TokenRole() {}
+
+void TokenRole::precomputeFormattingInfos(const FormatToken *Token) {}
+
+unsigned CommaSeparatedList::formatAfterToken(LineState &State,
+ ContinuationIndenter *Indenter,
+ bool DryRun) {
+ if (State.NextToken == nullptr || !State.NextToken->Previous)
+ return 0;
+
+ if (Formats.size() == 1)
+ return 0; // Handled by formatFromToken
+
+ // Ensure that we start on the opening brace.
+ const FormatToken *LBrace =
+ State.NextToken->Previous->getPreviousNonComment();
+ if (!LBrace || !LBrace->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) ||
+ LBrace->BlockKind == BK_Block || LBrace->Type == TT_DictLiteral ||
+ LBrace->Next->Type == TT_DesignatedInitializerPeriod)
+ return 0;
+
+ // Calculate the number of code points we have to format this list. As the
+ // first token is already placed, we have to subtract it.
+ unsigned RemainingCodePoints =
+ Style.ColumnLimit - State.Column + State.NextToken->Previous->ColumnWidth;
+
+ // Find the best ColumnFormat, i.e. the best number of columns to use.
+ const ColumnFormat *Format = getColumnFormat(RemainingCodePoints);
+
+ // If no ColumnFormat can be used, the braced list would generally be
+ // bin-packed. Add a severe penalty to this so that column layouts are
+ // preferred if possible.
+ if (!Format)
+ return 10000;
+
+ // Format the entire list.
+ unsigned Penalty = 0;
+ unsigned Column = 0;
+ unsigned Item = 0;
+ while (State.NextToken != LBrace->MatchingParen) {
+ bool NewLine = false;
+ unsigned ExtraSpaces = 0;
+
+ // If the previous token was one of our commas, we are now on the next item.
+ if (Item < Commas.size() && State.NextToken->Previous == Commas[Item]) {
+ if (!State.NextToken->isTrailingComment()) {
+ ExtraSpaces += Format->ColumnSizes[Column] - ItemLengths[Item];
+ ++Column;
+ }
+ ++Item;
+ }
+
+ if (Column == Format->Columns || State.NextToken->MustBreakBefore) {
+ Column = 0;
+ NewLine = true;
+ }
+
+ // Place token using the continuation indenter and store the penalty.
+ Penalty += Indenter->addTokenToState(State, NewLine, DryRun, ExtraSpaces);
+ }
+ return Penalty;
+}
+
+unsigned CommaSeparatedList::formatFromToken(LineState &State,
+ ContinuationIndenter *Indenter,
+ bool DryRun) {
+ // Formatting with 1 Column isn't really a column layout, so we don't need the
+ // special logic here. We can just avoid bin packing any of the parameters.
+ if (Formats.size() == 1 || HasNestedBracedList)
+ State.Stack.back().AvoidBinPacking = true;
+ return 0;
+}
+
+// Returns the lengths in code points between Begin and End (both included),
+// assuming that the entire sequence is put on a single line.
+static unsigned CodePointsBetween(const FormatToken *Begin,
+ const FormatToken *End) {
+ assert(End->TotalLength >= Begin->TotalLength);
+ return End->TotalLength - Begin->TotalLength + Begin->ColumnWidth;
+}
+
+void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) {
+ // FIXME: At some point we might want to do this for other lists, too.
+ if (!Token->MatchingParen ||
+ !Token->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare))
+ return;
+
+ // In C++11 braced list style, we should not format in columns unless they
+ // have many items (20 or more) or we allow bin-packing of function call
+ // arguments.
+ if (Style.Cpp11BracedListStyle && !Style.BinPackArguments &&
+ Commas.size() < 19)
+ return;
+
+ // Limit column layout for JavaScript array initializers to 20 or more items
+ // for now to introduce it carefully. We can become more aggressive if this
+ // necessary.
+ if (Token->is(TT_ArrayInitializerLSquare) && Commas.size() < 19)
+ return;
+
+ // Column format doesn't really make sense if we don't align after brackets.
+ if (Style.AlignAfterOpenBracket == FormatStyle::BAS_DontAlign)
+ return;
+
+ FormatToken *ItemBegin = Token->Next;
+ while (ItemBegin->isTrailingComment())
+ ItemBegin = ItemBegin->Next;
+ SmallVector<bool, 8> MustBreakBeforeItem;
+
+ // The lengths of an item if it is put at the end of the line. This includes
+ // trailing comments which are otherwise ignored for column alignment.
+ SmallVector<unsigned, 8> EndOfLineItemLength;
+
+ bool HasSeparatingComment = false;
+ for (unsigned i = 0, e = Commas.size() + 1; i != e; ++i) {
+ // Skip comments on their own line.
+ while (ItemBegin->HasUnescapedNewline && ItemBegin->isTrailingComment()) {
+ ItemBegin = ItemBegin->Next;
+ HasSeparatingComment = i > 0;
+ }
+
+ MustBreakBeforeItem.push_back(ItemBegin->MustBreakBefore);
+ if (ItemBegin->is(tok::l_brace))
+ HasNestedBracedList = true;
+ const FormatToken *ItemEnd = nullptr;
+ if (i == Commas.size()) {
+ ItemEnd = Token->MatchingParen;
+ const FormatToken *NonCommentEnd = ItemEnd->getPreviousNonComment();
+ ItemLengths.push_back(CodePointsBetween(ItemBegin, NonCommentEnd));
+ if (Style.Cpp11BracedListStyle &&
+ !ItemEnd->Previous->isTrailingComment()) {
+ // In Cpp11 braced list style, the } and possibly other subsequent
+ // tokens will need to stay on a line with the last element.
+ while (ItemEnd->Next && !ItemEnd->Next->CanBreakBefore)
+ ItemEnd = ItemEnd->Next;
+ } else {
+ // In other braced lists styles, the "}" can be wrapped to the new line.
+ ItemEnd = Token->MatchingParen->Previous;
+ }
+ } else {
+ ItemEnd = Commas[i];
+ // The comma is counted as part of the item when calculating the length.
+ ItemLengths.push_back(CodePointsBetween(ItemBegin, ItemEnd));
+
+ // Consume trailing comments so the are included in EndOfLineItemLength.
+ if (ItemEnd->Next && !ItemEnd->Next->HasUnescapedNewline &&
+ ItemEnd->Next->isTrailingComment())
+ ItemEnd = ItemEnd->Next;
+ }
+ EndOfLineItemLength.push_back(CodePointsBetween(ItemBegin, ItemEnd));
+ // If there is a trailing comma in the list, the next item will start at the
+ // closing brace. Don't create an extra item for this.
+ if (ItemEnd->getNextNonComment() == Token->MatchingParen)
+ break;
+ ItemBegin = ItemEnd->Next;
+ }
+
+ // Don't use column layout for lists with few elements and in presence of
+ // separating comments.
+ if (Commas.size() < 5 || HasSeparatingComment)
+ return;
+
+ if (Token->NestingLevel != 0 && Token->is(tok::l_brace) && Commas.size() < 19)
+ return;
+
+ // We can never place more than ColumnLimit / 3 items in a row (because of the
+ // spaces and the comma).
+ unsigned MaxItems = Style.ColumnLimit / 3;
+ std::vector<unsigned> MinSizeInColumn;
+ MinSizeInColumn.reserve(MaxItems);
+ for (unsigned Columns = 1; Columns <= MaxItems; ++Columns) {
+ ColumnFormat Format;
+ Format.Columns = Columns;
+ Format.ColumnSizes.resize(Columns);
+ MinSizeInColumn.assign(Columns, UINT_MAX);
+ Format.LineCount = 1;
+ bool HasRowWithSufficientColumns = false;
+ unsigned Column = 0;
+ for (unsigned i = 0, e = ItemLengths.size(); i != e; ++i) {
+ assert(i < MustBreakBeforeItem.size());
+ if (MustBreakBeforeItem[i] || Column == Columns) {
+ ++Format.LineCount;
+ Column = 0;
+ }
+ if (Column == Columns - 1)
+ HasRowWithSufficientColumns = true;
+ unsigned Length =
+ (Column == Columns - 1) ? EndOfLineItemLength[i] : ItemLengths[i];
+ Format.ColumnSizes[Column] = std::max(Format.ColumnSizes[Column], Length);
+ MinSizeInColumn[Column] = std::min(MinSizeInColumn[Column], Length);
+ ++Column;
+ }
+ // If all rows are terminated early (e.g. by trailing comments), we don't
+ // need to look further.
+ if (!HasRowWithSufficientColumns)
+ break;
+ Format.TotalWidth = Columns - 1; // Width of the N-1 spaces.
+
+ for (unsigned i = 0; i < Columns; ++i)
+ Format.TotalWidth += Format.ColumnSizes[i];
+
+ // Don't use this Format, if the difference between the longest and shortest
+ // element in a column exceeds a threshold to avoid excessive spaces.
+ if ([&] {
+ for (unsigned i = 0; i < Columns - 1; ++i)
+ if (Format.ColumnSizes[i] - MinSizeInColumn[i] > 10)
+ return true;
+ return false;
+ }())
+ continue;
+
+ // Ignore layouts that are bound to violate the column limit.
+ if (Format.TotalWidth > Style.ColumnLimit && Columns > 1)
+ continue;
+
+ Formats.push_back(Format);
+ }
+}
+
+const CommaSeparatedList::ColumnFormat *
+CommaSeparatedList::getColumnFormat(unsigned RemainingCharacters) const {
+ const ColumnFormat *BestFormat = nullptr;
+ for (SmallVector<ColumnFormat, 4>::const_reverse_iterator
+ I = Formats.rbegin(),
+ E = Formats.rend();
+ I != E; ++I) {
+ if (I->TotalWidth <= RemainingCharacters || I->Columns == 1) {
+ if (BestFormat && I->LineCount > BestFormat->LineCount)
+ break;
+ BestFormat = &*I;
+ }
+ }
+ return BestFormat;
+}
+
+} // namespace format
+} // namespace clang
diff --git a/contrib/llvm/tools/clang/lib/Format/FormatToken.h b/contrib/llvm/tools/clang/lib/Format/FormatToken.h
new file mode 100644
index 000000000000..c5bf48cdcc06
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Format/FormatToken.h
@@ -0,0 +1,726 @@
+//===--- FormatToken.h - Format C++ code ------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file contains the declaration of the FormatToken, a wrapper
+/// around Token with additional information related to formatting.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKEN_H
+#define LLVM_CLANG_LIB_FORMAT_FORMATTOKEN_H
+
+#include "clang/Basic/IdentifierTable.h"
+#include "clang/Basic/OperatorPrecedence.h"
+#include "clang/Format/Format.h"
+#include "clang/Lex/Lexer.h"
+#include <memory>
+
+namespace clang {
+namespace format {
+
+#define LIST_TOKEN_TYPES \
+ TYPE(ArrayInitializerLSquare) \
+ TYPE(ArraySubscriptLSquare) \
+ TYPE(AttributeParen) \
+ TYPE(BinaryOperator) \
+ TYPE(BitFieldColon) \
+ TYPE(BlockComment) \
+ TYPE(CastRParen) \
+ TYPE(ConditionalExpr) \
+ TYPE(ConflictAlternative) \
+ TYPE(ConflictEnd) \
+ TYPE(ConflictStart) \
+ TYPE(CtorInitializerColon) \
+ TYPE(CtorInitializerComma) \
+ TYPE(DesignatedInitializerLSquare) \
+ TYPE(DesignatedInitializerPeriod) \
+ TYPE(DictLiteral) \
+ TYPE(ForEachMacro) \
+ TYPE(FunctionAnnotationRParen) \
+ TYPE(FunctionDeclarationName) \
+ TYPE(FunctionLBrace) \
+ TYPE(FunctionTypeLParen) \
+ TYPE(ImplicitStringLiteral) \
+ TYPE(InheritanceColon) \
+ TYPE(InheritanceComma) \
+ TYPE(InlineASMBrace) \
+ TYPE(InlineASMColon) \
+ TYPE(JavaAnnotation) \
+ TYPE(JsComputedPropertyName) \
+ TYPE(JsExponentiation) \
+ TYPE(JsExponentiationEqual) \
+ TYPE(JsFatArrow) \
+ TYPE(JsNonNullAssertion) \
+ TYPE(JsTypeColon) \
+ TYPE(JsTypeOperator) \
+ TYPE(JsTypeOptionalQuestion) \
+ TYPE(LambdaArrow) \
+ TYPE(LambdaLSquare) \
+ TYPE(LeadingJavaAnnotation) \
+ TYPE(LineComment) \
+ TYPE(MacroBlockBegin) \
+ TYPE(MacroBlockEnd) \
+ TYPE(ObjCBlockLBrace) \
+ TYPE(ObjCBlockLParen) \
+ TYPE(ObjCDecl) \
+ TYPE(ObjCForIn) \
+ TYPE(ObjCMethodExpr) \
+ TYPE(ObjCMethodSpecifier) \
+ TYPE(ObjCProperty) \
+ TYPE(ObjCStringLiteral) \
+ TYPE(OverloadedOperator) \
+ TYPE(OverloadedOperatorLParen) \
+ TYPE(PointerOrReference) \
+ TYPE(PureVirtualSpecifier) \
+ TYPE(RangeBasedForLoopColon) \
+ TYPE(RegexLiteral) \
+ TYPE(SelectorName) \
+ TYPE(StartOfName) \
+ TYPE(TemplateCloser) \
+ TYPE(TemplateOpener) \
+ TYPE(TemplateString) \
+ TYPE(TrailingAnnotation) \
+ TYPE(TrailingReturnArrow) \
+ TYPE(TrailingUnaryOperator) \
+ TYPE(UnaryOperator) \
+ TYPE(Unknown)
+
+enum TokenType {
+#define TYPE(X) TT_##X,
+LIST_TOKEN_TYPES
+#undef TYPE
+ NUM_TOKEN_TYPES
+};
+
+/// \brief Determines the name of a token type.
+const char *getTokenTypeName(TokenType Type);
+
+// Represents what type of block a set of braces open.
+enum BraceBlockKind { BK_Unknown, BK_Block, BK_BracedInit };
+
+// The packing kind of a function's parameters.
+enum ParameterPackingKind { PPK_BinPacked, PPK_OnePerLine, PPK_Inconclusive };
+
+enum FormatDecision { FD_Unformatted, FD_Continue, FD_Break };
+
+class TokenRole;
+class AnnotatedLine;
+
+/// \brief A wrapper around a \c Token storing information about the
+/// whitespace characters preceding it.
+struct FormatToken {
+ FormatToken() {}
+
+ /// \brief The \c Token.
+ Token Tok;
+
+ /// \brief The number of newlines immediately before the \c Token.
+ ///
+ /// This can be used to determine what the user wrote in the original code
+ /// and thereby e.g. leave an empty line between two function definitions.
+ unsigned NewlinesBefore = 0;
+
+ /// \brief Whether there is at least one unescaped newline before the \c
+ /// Token.
+ bool HasUnescapedNewline = false;
+
+ /// \brief The range of the whitespace immediately preceding the \c Token.
+ SourceRange WhitespaceRange;
+
+ /// \brief The offset just past the last '\n' in this token's leading
+ /// whitespace (relative to \c WhiteSpaceStart). 0 if there is no '\n'.
+ unsigned LastNewlineOffset = 0;
+
+ /// \brief The width of the non-whitespace parts of the token (or its first
+ /// line for multi-line tokens) in columns.
+ /// We need this to correctly measure number of columns a token spans.
+ unsigned ColumnWidth = 0;
+
+ /// \brief Contains the width in columns of the last line of a multi-line
+ /// token.
+ unsigned LastLineColumnWidth = 0;
+
+ /// \brief Whether the token text contains newlines (escaped or not).
+ bool IsMultiline = false;
+
+ /// \brief Indicates that this is the first token of the file.
+ bool IsFirst = false;
+
+ /// \brief Whether there must be a line break before this token.
+ ///
+ /// This happens for example when a preprocessor directive ended directly
+ /// before the token.
+ bool MustBreakBefore = false;
+
+ /// \brief The raw text of the token.
+ ///
+ /// Contains the raw token text without leading whitespace and without leading
+ /// escaped newlines.
+ StringRef TokenText;
+
+ /// \brief Set to \c true if this token is an unterminated literal.
+ bool IsUnterminatedLiteral = 0;
+
+ /// \brief Contains the kind of block if this token is a brace.
+ BraceBlockKind BlockKind = BK_Unknown;
+
+ TokenType Type = TT_Unknown;
+
+ /// \brief The number of spaces that should be inserted before this token.
+ unsigned SpacesRequiredBefore = 0;
+
+ /// \brief \c true if it is allowed to break before this token.
+ bool CanBreakBefore = false;
+
+ /// \brief \c true if this is the ">" of "template<..>".
+ bool ClosesTemplateDeclaration = false;
+
+ /// \brief Number of parameters, if this is "(", "[" or "<".
+ ///
+ /// This is initialized to 1 as we don't need to distinguish functions with
+ /// 0 parameters from functions with 1 parameter. Thus, we can simply count
+ /// the number of commas.
+ unsigned ParameterCount = 0;
+
+ /// \brief Number of parameters that are nested blocks,
+ /// if this is "(", "[" or "<".
+ unsigned BlockParameterCount = 0;
+
+ /// \brief If this is a bracket ("<", "(", "[" or "{"), contains the kind of
+ /// the surrounding bracket.
+ tok::TokenKind ParentBracket = tok::unknown;
+
+ /// \brief A token can have a special role that can carry extra information
+ /// about the token's formatting.
+ std::unique_ptr<TokenRole> Role;
+
+ /// \brief If this is an opening parenthesis, how are the parameters packed?
+ ParameterPackingKind PackingKind = PPK_Inconclusive;
+
+ /// \brief The total length of the unwrapped line up to and including this
+ /// token.
+ unsigned TotalLength = 0;
+
+ /// \brief The original 0-based column of this token, including expanded tabs.
+ /// The configured TabWidth is used as tab width.
+ unsigned OriginalColumn = 0;
+
+ /// \brief The length of following tokens until the next natural split point,
+ /// or the next token that can be broken.
+ unsigned UnbreakableTailLength = 0;
+
+ // FIXME: Come up with a 'cleaner' concept.
+ /// \brief The binding strength of a token. This is a combined value of
+ /// operator precedence, parenthesis nesting, etc.
+ unsigned BindingStrength = 0;
+
+ /// \brief The nesting level of this token, i.e. the number of surrounding (),
+ /// [], {} or <>.
+ unsigned NestingLevel = 0;
+
+ /// \brief The indent level of this token. Copied from the surrounding line.
+ unsigned IndentLevel = 0;
+
+ /// \brief Penalty for inserting a line break before this token.
+ unsigned SplitPenalty = 0;
+
+ /// \brief If this is the first ObjC selector name in an ObjC method
+ /// definition or call, this contains the length of the longest name.
+ ///
+ /// This being set to 0 means that the selectors should not be colon-aligned,
+ /// e.g. because several of them are block-type.
+ unsigned LongestObjCSelectorName = 0;
+
+ /// \brief Stores the number of required fake parentheses and the
+ /// corresponding operator precedence.
+ ///
+ /// If multiple fake parentheses start at a token, this vector stores them in
+ /// reverse order, i.e. inner fake parenthesis first.
+ SmallVector<prec::Level, 4> FakeLParens;
+ /// \brief Insert this many fake ) after this token for correct indentation.
+ unsigned FakeRParens = 0;
+
+ /// \brief \c true if this token starts a binary expression, i.e. has at least
+ /// one fake l_paren with a precedence greater than prec::Unknown.
+ bool StartsBinaryExpression = false;
+ /// \brief \c true if this token ends a binary expression.
+ bool EndsBinaryExpression = false;
+
+ /// \brief Is this is an operator (or "."/"->") in a sequence of operators
+ /// with the same precedence, contains the 0-based operator index.
+ unsigned OperatorIndex = 0;
+
+ /// \brief If this is an operator (or "."/"->") in a sequence of operators
+ /// with the same precedence, points to the next operator.
+ FormatToken *NextOperator = nullptr;
+
+ /// \brief Is this token part of a \c DeclStmt defining multiple variables?
+ ///
+ /// Only set if \c Type == \c TT_StartOfName.
+ bool PartOfMultiVariableDeclStmt = false;
+
+ /// \brief Does this line comment continue a line comment section?
+ ///
+ /// Only set to true if \c Type == \c TT_LineComment.
+ bool ContinuesLineCommentSection = false;
+
+ /// \brief If this is a bracket, this points to the matching one.
+ FormatToken *MatchingParen = nullptr;
+
+ /// \brief The previous token in the unwrapped line.
+ FormatToken *Previous = nullptr;
+
+ /// \brief The next token in the unwrapped line.
+ FormatToken *Next = nullptr;
+
+ /// \brief If this token starts a block, this contains all the unwrapped lines
+ /// in it.
+ SmallVector<AnnotatedLine *, 1> Children;
+
+ /// \brief Stores the formatting decision for the token once it was made.
+ FormatDecision Decision = FD_Unformatted;
+
+ /// \brief If \c true, this token has been fully formatted (indented and
+ /// potentially re-formatted inside), and we do not allow further formatting
+ /// changes.
+ bool Finalized = false;
+
+ bool is(tok::TokenKind Kind) const { return Tok.is(Kind); }
+ bool is(TokenType TT) const { return Type == TT; }
+ bool is(const IdentifierInfo *II) const {
+ return II && II == Tok.getIdentifierInfo();
+ }
+ bool is(tok::PPKeywordKind Kind) const {
+ return Tok.getIdentifierInfo() &&
+ Tok.getIdentifierInfo()->getPPKeywordID() == Kind;
+ }
+ template <typename A, typename B> bool isOneOf(A K1, B K2) const {
+ return is(K1) || is(K2);
+ }
+ template <typename A, typename B, typename... Ts>
+ bool isOneOf(A K1, B K2, Ts... Ks) const {
+ return is(K1) || isOneOf(K2, Ks...);
+ }
+ template <typename T> bool isNot(T Kind) const { return !is(Kind); }
+
+ /// \c true if this token starts a sequence with the given tokens in order,
+ /// following the ``Next`` pointers, ignoring comments.
+ template <typename A, typename... Ts>
+ bool startsSequence(A K1, Ts... Tokens) const {
+ return startsSequenceInternal(K1, Tokens...);
+ }
+
+ /// \c true if this token ends a sequence with the given tokens in order,
+ /// following the ``Previous`` pointers, ignoring comments.
+ template <typename A, typename... Ts>
+ bool endsSequence(A K1, Ts... Tokens) const {
+ return endsSequenceInternal(K1, Tokens...);
+ }
+
+ bool isStringLiteral() const { return tok::isStringLiteral(Tok.getKind()); }
+
+ bool isObjCAtKeyword(tok::ObjCKeywordKind Kind) const {
+ return Tok.isObjCAtKeyword(Kind);
+ }
+
+ bool isAccessSpecifier(bool ColonRequired = true) const {
+ return isOneOf(tok::kw_public, tok::kw_protected, tok::kw_private) &&
+ (!ColonRequired || (Next && Next->is(tok::colon)));
+ }
+
+ /// \brief Determine whether the token is a simple-type-specifier.
+ bool isSimpleTypeSpecifier() const;
+
+ bool isObjCAccessSpecifier() const {
+ return is(tok::at) && Next && (Next->isObjCAtKeyword(tok::objc_public) ||
+ Next->isObjCAtKeyword(tok::objc_protected) ||
+ Next->isObjCAtKeyword(tok::objc_package) ||
+ Next->isObjCAtKeyword(tok::objc_private));
+ }
+
+ /// \brief Returns whether \p Tok is ([{ or a template opening <.
+ bool opensScope() const {
+ if (is(TT_TemplateString) && TokenText.endswith("${"))
+ return true;
+ return isOneOf(tok::l_paren, tok::l_brace, tok::l_square,
+ TT_TemplateOpener);
+ }
+ /// \brief Returns whether \p Tok is )]} or a template closing >.
+ bool closesScope() const {
+ if (is(TT_TemplateString) && TokenText.startswith("}"))
+ return true;
+ return isOneOf(tok::r_paren, tok::r_brace, tok::r_square,
+ TT_TemplateCloser);
+ }
+
+ /// \brief Returns \c true if this is a "." or "->" accessing a member.
+ bool isMemberAccess() const {
+ return isOneOf(tok::arrow, tok::period, tok::arrowstar) &&
+ !isOneOf(TT_DesignatedInitializerPeriod, TT_TrailingReturnArrow,
+ TT_LambdaArrow);
+ }
+
+ bool isUnaryOperator() const {
+ switch (Tok.getKind()) {
+ case tok::plus:
+ case tok::plusplus:
+ case tok::minus:
+ case tok::minusminus:
+ case tok::exclaim:
+ case tok::tilde:
+ case tok::kw_sizeof:
+ case tok::kw_alignof:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ bool isBinaryOperator() const {
+ // Comma is a binary operator, but does not behave as such wrt. formatting.
+ return getPrecedence() > prec::Comma;
+ }
+
+ bool isTrailingComment() const {
+ return is(tok::comment) &&
+ (is(TT_LineComment) || !Next || Next->NewlinesBefore > 0);
+ }
+
+ /// \brief Returns \c true if this is a keyword that can be used
+ /// like a function call (e.g. sizeof, typeid, ...).
+ bool isFunctionLikeKeyword() const {
+ switch (Tok.getKind()) {
+ case tok::kw_throw:
+ case tok::kw_typeid:
+ case tok::kw_return:
+ case tok::kw_sizeof:
+ case tok::kw_alignof:
+ case tok::kw_alignas:
+ case tok::kw_decltype:
+ case tok::kw_noexcept:
+ case tok::kw_static_assert:
+ case tok::kw___attribute:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ /// \brief Returns \c true if this is a string literal that's like a label,
+ /// e.g. ends with "=" or ":".
+ bool isLabelString() const {
+ if (!is(tok::string_literal))
+ return false;
+ StringRef Content = TokenText;
+ if (Content.startswith("\"") || Content.startswith("'"))
+ Content = Content.drop_front(1);
+ if (Content.endswith("\"") || Content.endswith("'"))
+ Content = Content.drop_back(1);
+ Content = Content.trim();
+ return Content.size() > 1 &&
+ (Content.back() == ':' || Content.back() == '=');
+ }
+
+ /// \brief Returns actual token start location without leading escaped
+ /// newlines and whitespace.
+ ///
+ /// This can be different to Tok.getLocation(), which includes leading escaped
+ /// newlines.
+ SourceLocation getStartOfNonWhitespace() const {
+ return WhitespaceRange.getEnd();
+ }
+
+ prec::Level getPrecedence() const {
+ return getBinOpPrecedence(Tok.getKind(), true, true);
+ }
+
+ /// \brief Returns the previous token ignoring comments.
+ FormatToken *getPreviousNonComment() const {
+ FormatToken *Tok = Previous;
+ while (Tok && Tok->is(tok::comment))
+ Tok = Tok->Previous;
+ return Tok;
+ }
+
+ /// \brief Returns the next token ignoring comments.
+ const FormatToken *getNextNonComment() const {
+ const FormatToken *Tok = Next;
+ while (Tok && Tok->is(tok::comment))
+ Tok = Tok->Next;
+ return Tok;
+ }
+
+ /// \brief Returns \c true if this tokens starts a block-type list, i.e. a
+ /// list that should be indented with a block indent.
+ bool opensBlockOrBlockTypeList(const FormatStyle &Style) const {
+ if (is(TT_TemplateString) && opensScope())
+ return true;
+ return is(TT_ArrayInitializerLSquare) ||
+ (is(tok::l_brace) &&
+ (BlockKind == BK_Block || is(TT_DictLiteral) ||
+ (!Style.Cpp11BracedListStyle && NestingLevel == 0)));
+ }
+
+ /// \brief Same as opensBlockOrBlockTypeList, but for the closing token.
+ bool closesBlockOrBlockTypeList(const FormatStyle &Style) const {
+ if (is(TT_TemplateString) && closesScope())
+ return true;
+ return MatchingParen && MatchingParen->opensBlockOrBlockTypeList(Style);
+ }
+
+private:
+ // Disallow copying.
+ FormatToken(const FormatToken &) = delete;
+ void operator=(const FormatToken &) = delete;
+
+ template <typename A, typename... Ts>
+ bool startsSequenceInternal(A K1, Ts... Tokens) const {
+ if (is(tok::comment) && Next)
+ return Next->startsSequenceInternal(K1, Tokens...);
+ return is(K1) && Next && Next->startsSequenceInternal(Tokens...);
+ }
+
+ template <typename A>
+ bool startsSequenceInternal(A K1) const {
+ if (is(tok::comment) && Next)
+ return Next->startsSequenceInternal(K1);
+ return is(K1);
+ }
+
+ template <typename A, typename... Ts>
+ bool endsSequenceInternal(A K1) const {
+ if (is(tok::comment) && Previous)
+ return Previous->endsSequenceInternal(K1);
+ return is(K1);
+ }
+
+ template <typename A, typename... Ts>
+ bool endsSequenceInternal(A K1, Ts... Tokens) const {
+ if (is(tok::comment) && Previous)
+ return Previous->endsSequenceInternal(K1, Tokens...);
+ return is(K1) && Previous && Previous->endsSequenceInternal(Tokens...);
+ }
+};
+
+class ContinuationIndenter;
+struct LineState;
+
+class TokenRole {
+public:
+ TokenRole(const FormatStyle &Style) : Style(Style) {}
+ virtual ~TokenRole();
+
+ /// \brief After the \c TokenAnnotator has finished annotating all the tokens,
+ /// this function precomputes required information for formatting.
+ virtual void precomputeFormattingInfos(const FormatToken *Token);
+
+ /// \brief Apply the special formatting that the given role demands.
+ ///
+ /// Assumes that the token having this role is already formatted.
+ ///
+ /// Continues formatting from \p State leaving indentation to \p Indenter and
+ /// returns the total penalty that this formatting incurs.
+ virtual unsigned formatFromToken(LineState &State,
+ ContinuationIndenter *Indenter,
+ bool DryRun) {
+ return 0;
+ }
+
+ /// \brief Same as \c formatFromToken, but assumes that the first token has
+ /// already been set thereby deciding on the first line break.
+ virtual unsigned formatAfterToken(LineState &State,
+ ContinuationIndenter *Indenter,
+ bool DryRun) {
+ return 0;
+ }
+
+ /// \brief Notifies the \c Role that a comma was found.
+ virtual void CommaFound(const FormatToken *Token) {}
+
+protected:
+ const FormatStyle &Style;
+};
+
+class CommaSeparatedList : public TokenRole {
+public:
+ CommaSeparatedList(const FormatStyle &Style)
+ : TokenRole(Style), HasNestedBracedList(false) {}
+
+ void precomputeFormattingInfos(const FormatToken *Token) override;
+
+ unsigned formatAfterToken(LineState &State, ContinuationIndenter *Indenter,
+ bool DryRun) override;
+
+ unsigned formatFromToken(LineState &State, ContinuationIndenter *Indenter,
+ bool DryRun) override;
+
+ /// \brief Adds \p Token as the next comma to the \c CommaSeparated list.
+ void CommaFound(const FormatToken *Token) override {
+ Commas.push_back(Token);
+ }
+
+private:
+ /// \brief A struct that holds information on how to format a given list with
+ /// a specific number of columns.
+ struct ColumnFormat {
+ /// \brief The number of columns to use.
+ unsigned Columns;
+
+ /// \brief The total width in characters.
+ unsigned TotalWidth;
+
+ /// \brief The number of lines required for this format.
+ unsigned LineCount;
+
+ /// \brief The size of each column in characters.
+ SmallVector<unsigned, 8> ColumnSizes;
+ };
+
+ /// \brief Calculate which \c ColumnFormat fits best into
+ /// \p RemainingCharacters.
+ const ColumnFormat *getColumnFormat(unsigned RemainingCharacters) const;
+
+ /// \brief The ordered \c FormatTokens making up the commas of this list.
+ SmallVector<const FormatToken *, 8> Commas;
+
+ /// \brief The length of each of the list's items in characters including the
+ /// trailing comma.
+ SmallVector<unsigned, 8> ItemLengths;
+
+ /// \brief Precomputed formats that can be used for this list.
+ SmallVector<ColumnFormat, 4> Formats;
+
+ bool HasNestedBracedList;
+};
+
+/// \brief Encapsulates keywords that are context sensitive or for languages not
+/// properly supported by Clang's lexer.
+struct AdditionalKeywords {
+ AdditionalKeywords(IdentifierTable &IdentTable) {
+ kw_final = &IdentTable.get("final");
+ kw_override = &IdentTable.get("override");
+ kw_in = &IdentTable.get("in");
+ kw_of = &IdentTable.get("of");
+ kw_CF_ENUM = &IdentTable.get("CF_ENUM");
+ kw_CF_OPTIONS = &IdentTable.get("CF_OPTIONS");
+ kw_NS_ENUM = &IdentTable.get("NS_ENUM");
+ kw_NS_OPTIONS = &IdentTable.get("NS_OPTIONS");
+
+ kw_as = &IdentTable.get("as");
+ kw_async = &IdentTable.get("async");
+ kw_await = &IdentTable.get("await");
+ kw_declare = &IdentTable.get("declare");
+ kw_finally = &IdentTable.get("finally");
+ kw_from = &IdentTable.get("from");
+ kw_function = &IdentTable.get("function");
+ kw_get = &IdentTable.get("get");
+ kw_import = &IdentTable.get("import");
+ kw_is = &IdentTable.get("is");
+ kw_let = &IdentTable.get("let");
+ kw_module = &IdentTable.get("module");
+ kw_set = &IdentTable.get("set");
+ kw_type = &IdentTable.get("type");
+ kw_var = &IdentTable.get("var");
+ kw_yield = &IdentTable.get("yield");
+
+ kw_abstract = &IdentTable.get("abstract");
+ kw_assert = &IdentTable.get("assert");
+ kw_extends = &IdentTable.get("extends");
+ kw_implements = &IdentTable.get("implements");
+ kw_instanceof = &IdentTable.get("instanceof");
+ kw_interface = &IdentTable.get("interface");
+ kw_native = &IdentTable.get("native");
+ kw_package = &IdentTable.get("package");
+ kw_synchronized = &IdentTable.get("synchronized");
+ kw_throws = &IdentTable.get("throws");
+ kw___except = &IdentTable.get("__except");
+ kw___has_include = &IdentTable.get("__has_include");
+ kw___has_include_next = &IdentTable.get("__has_include_next");
+
+ kw_mark = &IdentTable.get("mark");
+
+ kw_extend = &IdentTable.get("extend");
+ kw_option = &IdentTable.get("option");
+ kw_optional = &IdentTable.get("optional");
+ kw_repeated = &IdentTable.get("repeated");
+ kw_required = &IdentTable.get("required");
+ kw_returns = &IdentTable.get("returns");
+
+ kw_signals = &IdentTable.get("signals");
+ kw_qsignals = &IdentTable.get("Q_SIGNALS");
+ kw_slots = &IdentTable.get("slots");
+ kw_qslots = &IdentTable.get("Q_SLOTS");
+ }
+
+ // Context sensitive keywords.
+ IdentifierInfo *kw_final;
+ IdentifierInfo *kw_override;
+ IdentifierInfo *kw_in;
+ IdentifierInfo *kw_of;
+ IdentifierInfo *kw_CF_ENUM;
+ IdentifierInfo *kw_CF_OPTIONS;
+ IdentifierInfo *kw_NS_ENUM;
+ IdentifierInfo *kw_NS_OPTIONS;
+ IdentifierInfo *kw___except;
+ IdentifierInfo *kw___has_include;
+ IdentifierInfo *kw___has_include_next;
+
+ // JavaScript keywords.
+ IdentifierInfo *kw_as;
+ IdentifierInfo *kw_async;
+ IdentifierInfo *kw_await;
+ IdentifierInfo *kw_declare;
+ IdentifierInfo *kw_finally;
+ IdentifierInfo *kw_from;
+ IdentifierInfo *kw_function;
+ IdentifierInfo *kw_get;
+ IdentifierInfo *kw_import;
+ IdentifierInfo *kw_is;
+ IdentifierInfo *kw_let;
+ IdentifierInfo *kw_module;
+ IdentifierInfo *kw_set;
+ IdentifierInfo *kw_type;
+ IdentifierInfo *kw_var;
+ IdentifierInfo *kw_yield;
+
+ // Java keywords.
+ IdentifierInfo *kw_abstract;
+ IdentifierInfo *kw_assert;
+ IdentifierInfo *kw_extends;
+ IdentifierInfo *kw_implements;
+ IdentifierInfo *kw_instanceof;
+ IdentifierInfo *kw_interface;
+ IdentifierInfo *kw_native;
+ IdentifierInfo *kw_package;
+ IdentifierInfo *kw_synchronized;
+ IdentifierInfo *kw_throws;
+
+ // Pragma keywords.
+ IdentifierInfo *kw_mark;
+
+ // Proto keywords.
+ IdentifierInfo *kw_extend;
+ IdentifierInfo *kw_option;
+ IdentifierInfo *kw_optional;
+ IdentifierInfo *kw_repeated;
+ IdentifierInfo *kw_required;
+ IdentifierInfo *kw_returns;
+
+ // QT keywords.
+ IdentifierInfo *kw_signals;
+ IdentifierInfo *kw_qsignals;
+ IdentifierInfo *kw_slots;
+ IdentifierInfo *kw_qslots;
+};
+
+} // namespace format
+} // namespace clang
+
+#endif
diff --git a/contrib/llvm/tools/clang/lib/Format/FormatTokenLexer.cpp b/contrib/llvm/tools/clang/lib/Format/FormatTokenLexer.cpp
new file mode 100644
index 000000000000..45c3ae1afe5f
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Format/FormatTokenLexer.cpp
@@ -0,0 +1,670 @@
+//===--- FormatTokenLexer.cpp - Lex FormatTokens -------------*- C++ ----*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements FormatTokenLexer, which tokenizes a source file
+/// into a FormatToken stream suitable for ClangFormat.
+///
+//===----------------------------------------------------------------------===//
+
+#include "FormatTokenLexer.h"
+#include "FormatToken.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Format/Format.h"
+#include "llvm/Support/Regex.h"
+
+namespace clang {
+namespace format {
+
+FormatTokenLexer::FormatTokenLexer(const SourceManager &SourceMgr, FileID ID,
+ const FormatStyle &Style,
+ encoding::Encoding Encoding)
+ : FormatTok(nullptr), IsFirstToken(true), StateStack({LexerState::NORMAL}),
+ Column(0), TrailingWhitespace(0), SourceMgr(SourceMgr), ID(ID),
+ Style(Style), IdentTable(getFormattingLangOpts(Style)),
+ Keywords(IdentTable), Encoding(Encoding), FirstInLineIndex(0),
+ FormattingDisabled(false), MacroBlockBeginRegex(Style.MacroBlockBegin),
+ MacroBlockEndRegex(Style.MacroBlockEnd) {
+ Lex.reset(new Lexer(ID, SourceMgr.getBuffer(ID), SourceMgr,
+ getFormattingLangOpts(Style)));
+ Lex->SetKeepWhitespaceMode(true);
+
+ for (const std::string &ForEachMacro : Style.ForEachMacros)
+ ForEachMacros.push_back(&IdentTable.get(ForEachMacro));
+ std::sort(ForEachMacros.begin(), ForEachMacros.end());
+}
+
+ArrayRef<FormatToken *> FormatTokenLexer::lex() {
+ assert(Tokens.empty());
+ assert(FirstInLineIndex == 0);
+ do {
+ Tokens.push_back(getNextToken());
+ if (Style.Language == FormatStyle::LK_JavaScript) {
+ tryParseJSRegexLiteral();
+ handleTemplateStrings();
+ }
+ tryMergePreviousTokens();
+ if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline)
+ FirstInLineIndex = Tokens.size() - 1;
+ } while (Tokens.back()->Tok.isNot(tok::eof));
+ return Tokens;
+}
+
+void FormatTokenLexer::tryMergePreviousTokens() {
+ if (tryMerge_TMacro())
+ return;
+ if (tryMergeConflictMarkers())
+ return;
+ if (tryMergeLessLess())
+ return;
+ if (tryMergeNSStringLiteral())
+ return;
+
+ if (Style.Language == FormatStyle::LK_JavaScript) {
+ static const tok::TokenKind JSIdentity[] = {tok::equalequal, tok::equal};
+ static const tok::TokenKind JSNotIdentity[] = {tok::exclaimequal,
+ tok::equal};
+ static const tok::TokenKind JSShiftEqual[] = {tok::greater, tok::greater,
+ tok::greaterequal};
+ static const tok::TokenKind JSRightArrow[] = {tok::equal, tok::greater};
+ static const tok::TokenKind JSExponentiation[] = {tok::star, tok::star};
+ static const tok::TokenKind JSExponentiationEqual[] = {tok::star,
+ tok::starequal};
+
+ // FIXME: Investigate what token type gives the correct operator priority.
+ if (tryMergeTokens(JSIdentity, TT_BinaryOperator))
+ return;
+ if (tryMergeTokens(JSNotIdentity, TT_BinaryOperator))
+ return;
+ if (tryMergeTokens(JSShiftEqual, TT_BinaryOperator))
+ return;
+ if (tryMergeTokens(JSRightArrow, TT_JsFatArrow))
+ return;
+ if (tryMergeTokens(JSExponentiation, TT_JsExponentiation))
+ return;
+ if (tryMergeTokens(JSExponentiationEqual, TT_JsExponentiationEqual)) {
+ Tokens.back()->Tok.setKind(tok::starequal);
+ return;
+ }
+ }
+
+ if (Style.Language == FormatStyle::LK_Java) {
+ static const tok::TokenKind JavaRightLogicalShift[] = {tok::greater,
+ tok::greater,
+ tok::greater};
+ static const tok::TokenKind JavaRightLogicalShiftAssign[] = {tok::greater,
+ tok::greater,
+ tok::greaterequal};
+ if (tryMergeTokens(JavaRightLogicalShift, TT_BinaryOperator))
+ return;
+ if (tryMergeTokens(JavaRightLogicalShiftAssign, TT_BinaryOperator))
+ return;
+ }
+}
+
+bool FormatTokenLexer::tryMergeNSStringLiteral() {
+ if (Tokens.size() < 2)
+ return false;
+ auto &At = *(Tokens.end() - 2);
+ auto &String = *(Tokens.end() - 1);
+ if (!At->is(tok::at) || !String->is(tok::string_literal))
+ return false;
+ At->Tok.setKind(tok::string_literal);
+ At->TokenText = StringRef(At->TokenText.begin(),
+ String->TokenText.end() - At->TokenText.begin());
+ At->ColumnWidth += String->ColumnWidth;
+ At->Type = TT_ObjCStringLiteral;
+ Tokens.erase(Tokens.end() - 1);
+ return true;
+}
+
+bool FormatTokenLexer::tryMergeLessLess() {
+ // Merge X,less,less,Y into X,lessless,Y unless X or Y is less.
+ if (Tokens.size() < 3)
+ return false;
+
+ bool FourthTokenIsLess = false;
+ if (Tokens.size() > 3)
+ FourthTokenIsLess = (Tokens.end() - 4)[0]->is(tok::less);
+
+ auto First = Tokens.end() - 3;
+ if (First[2]->is(tok::less) || First[1]->isNot(tok::less) ||
+ First[0]->isNot(tok::less) || FourthTokenIsLess)
+ return false;
+
+ // Only merge if there currently is no whitespace between the two "<".
+ if (First[1]->WhitespaceRange.getBegin() !=
+ First[1]->WhitespaceRange.getEnd())
+ return false;
+
+ First[0]->Tok.setKind(tok::lessless);
+ First[0]->TokenText = "<<";
+ First[0]->ColumnWidth += 1;
+ Tokens.erase(Tokens.end() - 2);
+ return true;
+}
+
+bool FormatTokenLexer::tryMergeTokens(ArrayRef<tok::TokenKind> Kinds,
+ TokenType NewType) {
+ if (Tokens.size() < Kinds.size())
+ return false;
+
+ SmallVectorImpl<FormatToken *>::const_iterator First =
+ Tokens.end() - Kinds.size();
+ if (!First[0]->is(Kinds[0]))
+ return false;
+ unsigned AddLength = 0;
+ for (unsigned i = 1; i < Kinds.size(); ++i) {
+ if (!First[i]->is(Kinds[i]) ||
+ First[i]->WhitespaceRange.getBegin() !=
+ First[i]->WhitespaceRange.getEnd())
+ return false;
+ AddLength += First[i]->TokenText.size();
+ }
+ Tokens.resize(Tokens.size() - Kinds.size() + 1);
+ First[0]->TokenText = StringRef(First[0]->TokenText.data(),
+ First[0]->TokenText.size() + AddLength);
+ First[0]->ColumnWidth += AddLength;
+ First[0]->Type = NewType;
+ return true;
+}
+
+// Returns \c true if \p Tok can only be followed by an operand in JavaScript.
+bool FormatTokenLexer::precedesOperand(FormatToken *Tok) {
+ // NB: This is not entirely correct, as an r_paren can introduce an operand
+ // location in e.g. `if (foo) /bar/.exec(...);`. That is a rare enough
+ // corner case to not matter in practice, though.
+ return Tok->isOneOf(tok::period, tok::l_paren, tok::comma, tok::l_brace,
+ tok::r_brace, tok::l_square, tok::semi, tok::exclaim,
+ tok::colon, tok::question, tok::tilde) ||
+ Tok->isOneOf(tok::kw_return, tok::kw_do, tok::kw_case, tok::kw_throw,
+ tok::kw_else, tok::kw_new, tok::kw_delete, tok::kw_void,
+ tok::kw_typeof, Keywords.kw_instanceof, Keywords.kw_in) ||
+ Tok->isBinaryOperator();
+}
+
+bool FormatTokenLexer::canPrecedeRegexLiteral(FormatToken *Prev) {
+ if (!Prev)
+ return true;
+
+ // Regex literals can only follow after prefix unary operators, not after
+ // postfix unary operators. If the '++' is followed by a non-operand
+ // introducing token, the slash here is the operand and not the start of a
+ // regex.
+ // `!` is an unary prefix operator, but also a post-fix operator that casts
+ // away nullability, so the same check applies.
+ if (Prev->isOneOf(tok::plusplus, tok::minusminus, tok::exclaim))
+ return (Tokens.size() < 3 || precedesOperand(Tokens[Tokens.size() - 3]));
+
+ // The previous token must introduce an operand location where regex
+ // literals can occur.
+ if (!precedesOperand(Prev))
+ return false;
+
+ return true;
+}
+
+// Tries to parse a JavaScript Regex literal starting at the current token,
+// if that begins with a slash and is in a location where JavaScript allows
+// regex literals. Changes the current token to a regex literal and updates
+// its text if successful.
+void FormatTokenLexer::tryParseJSRegexLiteral() {
+ FormatToken *RegexToken = Tokens.back();
+ if (!RegexToken->isOneOf(tok::slash, tok::slashequal))
+ return;
+
+ FormatToken *Prev = nullptr;
+ for (auto I = Tokens.rbegin() + 1, E = Tokens.rend(); I != E; ++I) {
+ // NB: Because previous pointers are not initialized yet, this cannot use
+ // Token.getPreviousNonComment.
+ if ((*I)->isNot(tok::comment)) {
+ Prev = *I;
+ break;
+ }
+ }
+
+ if (!canPrecedeRegexLiteral(Prev))
+ return;
+
+ // 'Manually' lex ahead in the current file buffer.
+ const char *Offset = Lex->getBufferLocation();
+ const char *RegexBegin = Offset - RegexToken->TokenText.size();
+ StringRef Buffer = Lex->getBuffer();
+ bool InCharacterClass = false;
+ bool HaveClosingSlash = false;
+ for (; !HaveClosingSlash && Offset != Buffer.end(); ++Offset) {
+ // Regular expressions are terminated with a '/', which can only be
+ // escaped using '\' or a character class between '[' and ']'.
+ // See http://www.ecma-international.org/ecma-262/5.1/#sec-7.8.5.
+ switch (*Offset) {
+ case '\\':
+ // Skip the escaped character.
+ ++Offset;
+ break;
+ case '[':
+ InCharacterClass = true;
+ break;
+ case ']':
+ InCharacterClass = false;
+ break;
+ case '/':
+ if (!InCharacterClass)
+ HaveClosingSlash = true;
+ break;
+ }
+ }
+
+ RegexToken->Type = TT_RegexLiteral;
+ // Treat regex literals like other string_literals.
+ RegexToken->Tok.setKind(tok::string_literal);
+ RegexToken->TokenText = StringRef(RegexBegin, Offset - RegexBegin);
+ RegexToken->ColumnWidth = RegexToken->TokenText.size();
+
+ resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset)));
+}
+
+void FormatTokenLexer::handleTemplateStrings() {
+ FormatToken *BacktickToken = Tokens.back();
+
+ if (BacktickToken->is(tok::l_brace)) {
+ StateStack.push(LexerState::NORMAL);
+ return;
+ }
+ if (BacktickToken->is(tok::r_brace)) {
+ if (StateStack.size() == 1)
+ return;
+ StateStack.pop();
+ if (StateStack.top() != LexerState::TEMPLATE_STRING)
+ return;
+ // If back in TEMPLATE_STRING, fallthrough and continue parsing the
+ } else if (BacktickToken->is(tok::unknown) &&
+ BacktickToken->TokenText == "`") {
+ StateStack.push(LexerState::TEMPLATE_STRING);
+ } else {
+ return; // Not actually a template
+ }
+
+ // 'Manually' lex ahead in the current file buffer.
+ const char *Offset = Lex->getBufferLocation();
+ const char *TmplBegin = Offset - BacktickToken->TokenText.size(); // at "`"
+ for (; Offset != Lex->getBuffer().end(); ++Offset) {
+ if (Offset[0] == '`') {
+ StateStack.pop();
+ break;
+ }
+ if (Offset[0] == '\\') {
+ ++Offset; // Skip the escaped character.
+ } else if (Offset + 1 < Lex->getBuffer().end() && Offset[0] == '$' &&
+ Offset[1] == '{') {
+ // '${' introduces an expression interpolation in the template string.
+ StateStack.push(LexerState::NORMAL);
+ ++Offset;
+ break;
+ }
+ }
+
+ StringRef LiteralText(TmplBegin, Offset - TmplBegin + 1);
+ BacktickToken->Type = TT_TemplateString;
+ BacktickToken->Tok.setKind(tok::string_literal);
+ BacktickToken->TokenText = LiteralText;
+
+ // Adjust width for potentially multiline string literals.
+ size_t FirstBreak = LiteralText.find('\n');
+ StringRef FirstLineText = FirstBreak == StringRef::npos
+ ? LiteralText
+ : LiteralText.substr(0, FirstBreak);
+ BacktickToken->ColumnWidth = encoding::columnWidthWithTabs(
+ FirstLineText, BacktickToken->OriginalColumn, Style.TabWidth, Encoding);
+ size_t LastBreak = LiteralText.rfind('\n');
+ if (LastBreak != StringRef::npos) {
+ BacktickToken->IsMultiline = true;
+ unsigned StartColumn = 0; // The template tail spans the entire line.
+ BacktickToken->LastLineColumnWidth = encoding::columnWidthWithTabs(
+ LiteralText.substr(LastBreak + 1, LiteralText.size()), StartColumn,
+ Style.TabWidth, Encoding);
+ }
+
+ SourceLocation loc = Offset < Lex->getBuffer().end()
+ ? Lex->getSourceLocation(Offset + 1)
+ : SourceMgr.getLocForEndOfFile(ID);
+ resetLexer(SourceMgr.getFileOffset(loc));
+}
+
+bool FormatTokenLexer::tryMerge_TMacro() {
+ if (Tokens.size() < 4)
+ return false;
+ FormatToken *Last = Tokens.back();
+ if (!Last->is(tok::r_paren))
+ return false;
+
+ FormatToken *String = Tokens[Tokens.size() - 2];
+ if (!String->is(tok::string_literal) || String->IsMultiline)
+ return false;
+
+ if (!Tokens[Tokens.size() - 3]->is(tok::l_paren))
+ return false;
+
+ FormatToken *Macro = Tokens[Tokens.size() - 4];
+ if (Macro->TokenText != "_T")
+ return false;
+
+ const char *Start = Macro->TokenText.data();
+ const char *End = Last->TokenText.data() + Last->TokenText.size();
+ String->TokenText = StringRef(Start, End - Start);
+ String->IsFirst = Macro->IsFirst;
+ String->LastNewlineOffset = Macro->LastNewlineOffset;
+ String->WhitespaceRange = Macro->WhitespaceRange;
+ String->OriginalColumn = Macro->OriginalColumn;
+ String->ColumnWidth = encoding::columnWidthWithTabs(
+ String->TokenText, String->OriginalColumn, Style.TabWidth, Encoding);
+ String->NewlinesBefore = Macro->NewlinesBefore;
+ String->HasUnescapedNewline = Macro->HasUnescapedNewline;
+
+ Tokens.pop_back();
+ Tokens.pop_back();
+ Tokens.pop_back();
+ Tokens.back() = String;
+ return true;
+}
+
+bool FormatTokenLexer::tryMergeConflictMarkers() {
+ if (Tokens.back()->NewlinesBefore == 0 && Tokens.back()->isNot(tok::eof))
+ return false;
+
+ // Conflict lines look like:
+ // <marker> <text from the vcs>
+ // For example:
+ // >>>>>>> /file/in/file/system at revision 1234
+ //
+ // We merge all tokens in a line that starts with a conflict marker
+ // into a single token with a special token type that the unwrapped line
+ // parser will use to correctly rebuild the underlying code.
+
+ FileID ID;
+ // Get the position of the first token in the line.
+ unsigned FirstInLineOffset;
+ std::tie(ID, FirstInLineOffset) = SourceMgr.getDecomposedLoc(
+ Tokens[FirstInLineIndex]->getStartOfNonWhitespace());
+ StringRef Buffer = SourceMgr.getBuffer(ID)->getBuffer();
+ // Calculate the offset of the start of the current line.
+ auto LineOffset = Buffer.rfind('\n', FirstInLineOffset);
+ if (LineOffset == StringRef::npos) {
+ LineOffset = 0;
+ } else {
+ ++LineOffset;
+ }
+
+ auto FirstSpace = Buffer.find_first_of(" \n", LineOffset);
+ StringRef LineStart;
+ if (FirstSpace == StringRef::npos) {
+ LineStart = Buffer.substr(LineOffset);
+ } else {
+ LineStart = Buffer.substr(LineOffset, FirstSpace - LineOffset);
+ }
+
+ TokenType Type = TT_Unknown;
+ if (LineStart == "<<<<<<<" || LineStart == ">>>>") {
+ Type = TT_ConflictStart;
+ } else if (LineStart == "|||||||" || LineStart == "=======" ||
+ LineStart == "====") {
+ Type = TT_ConflictAlternative;
+ } else if (LineStart == ">>>>>>>" || LineStart == "<<<<") {
+ Type = TT_ConflictEnd;
+ }
+
+ if (Type != TT_Unknown) {
+ FormatToken *Next = Tokens.back();
+
+ Tokens.resize(FirstInLineIndex + 1);
+ // We do not need to build a complete token here, as we will skip it
+ // during parsing anyway (as we must not touch whitespace around conflict
+ // markers).
+ Tokens.back()->Type = Type;
+ Tokens.back()->Tok.setKind(tok::kw___unknown_anytype);
+
+ Tokens.push_back(Next);
+ return true;
+ }
+
+ return false;
+}
+
+FormatToken *FormatTokenLexer::getStashedToken() {
+ // Create a synthesized second '>' or '<' token.
+ Token Tok = FormatTok->Tok;
+ StringRef TokenText = FormatTok->TokenText;
+
+ unsigned OriginalColumn = FormatTok->OriginalColumn;
+ FormatTok = new (Allocator.Allocate()) FormatToken;
+ FormatTok->Tok = Tok;
+ SourceLocation TokLocation =
+ FormatTok->Tok.getLocation().getLocWithOffset(Tok.getLength() - 1);
+ FormatTok->Tok.setLocation(TokLocation);
+ FormatTok->WhitespaceRange = SourceRange(TokLocation, TokLocation);
+ FormatTok->TokenText = TokenText;
+ FormatTok->ColumnWidth = 1;
+ FormatTok->OriginalColumn = OriginalColumn + 1;
+
+ return FormatTok;
+}
+
+FormatToken *FormatTokenLexer::getNextToken() {
+ if (StateStack.top() == LexerState::TOKEN_STASHED) {
+ StateStack.pop();
+ return getStashedToken();
+ }
+
+ FormatTok = new (Allocator.Allocate()) FormatToken;
+ readRawToken(*FormatTok);
+ SourceLocation WhitespaceStart =
+ FormatTok->Tok.getLocation().getLocWithOffset(-TrailingWhitespace);
+ FormatTok->IsFirst = IsFirstToken;
+ IsFirstToken = false;
+
+ // Consume and record whitespace until we find a significant token.
+ unsigned WhitespaceLength = TrailingWhitespace;
+ while (FormatTok->Tok.is(tok::unknown)) {
+ StringRef Text = FormatTok->TokenText;
+ auto EscapesNewline = [&](int pos) {
+ // A '\r' here is just part of '\r\n'. Skip it.
+ if (pos >= 0 && Text[pos] == '\r')
+ --pos;
+ // See whether there is an odd number of '\' before this.
+ // FIXME: This is wrong. A '\' followed by a newline is always removed,
+ // regardless of whether there is another '\' before it.
+ // FIXME: Newlines can also be escaped by a '?' '?' '/' trigraph.
+ unsigned count = 0;
+ for (; pos >= 0; --pos, ++count)
+ if (Text[pos] != '\\')
+ break;
+ return count & 1;
+ };
+ // FIXME: This miscounts tok:unknown tokens that are not just
+ // whitespace, e.g. a '`' character.
+ for (int i = 0, e = Text.size(); i != e; ++i) {
+ switch (Text[i]) {
+ case '\n':
+ ++FormatTok->NewlinesBefore;
+ FormatTok->HasUnescapedNewline = !EscapesNewline(i - 1);
+ FormatTok->LastNewlineOffset = WhitespaceLength + i + 1;
+ Column = 0;
+ break;
+ case '\r':
+ FormatTok->LastNewlineOffset = WhitespaceLength + i + 1;
+ Column = 0;
+ break;
+ case '\f':
+ case '\v':
+ Column = 0;
+ break;
+ case ' ':
+ ++Column;
+ break;
+ case '\t':
+ Column += Style.TabWidth - Column % Style.TabWidth;
+ break;
+ case '\\':
+ if (i + 1 == e || (Text[i + 1] != '\r' && Text[i + 1] != '\n'))
+ FormatTok->Type = TT_ImplicitStringLiteral;
+ break;
+ default:
+ FormatTok->Type = TT_ImplicitStringLiteral;
+ break;
+ }
+ if (FormatTok->Type == TT_ImplicitStringLiteral)
+ break;
+ }
+
+ if (FormatTok->is(TT_ImplicitStringLiteral))
+ break;
+ WhitespaceLength += FormatTok->Tok.getLength();
+
+ readRawToken(*FormatTok);
+ }
+
+ // In case the token starts with escaped newlines, we want to
+ // take them into account as whitespace - this pattern is quite frequent
+ // in macro definitions.
+ // FIXME: Add a more explicit test.
+ while (FormatTok->TokenText.size() > 1 && FormatTok->TokenText[0] == '\\' &&
+ FormatTok->TokenText[1] == '\n') {
+ ++FormatTok->NewlinesBefore;
+ WhitespaceLength += 2;
+ FormatTok->LastNewlineOffset = 2;
+ Column = 0;
+ FormatTok->TokenText = FormatTok->TokenText.substr(2);
+ }
+
+ FormatTok->WhitespaceRange = SourceRange(
+ WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength));
+
+ FormatTok->OriginalColumn = Column;
+
+ TrailingWhitespace = 0;
+ if (FormatTok->Tok.is(tok::comment)) {
+ // FIXME: Add the trimmed whitespace to Column.
+ StringRef UntrimmedText = FormatTok->TokenText;
+ FormatTok->TokenText = FormatTok->TokenText.rtrim(" \t\v\f");
+ TrailingWhitespace = UntrimmedText.size() - FormatTok->TokenText.size();
+ } else if (FormatTok->Tok.is(tok::raw_identifier)) {
+ IdentifierInfo &Info = IdentTable.get(FormatTok->TokenText);
+ FormatTok->Tok.setIdentifierInfo(&Info);
+ FormatTok->Tok.setKind(Info.getTokenID());
+ if (Style.Language == FormatStyle::LK_Java &&
+ FormatTok->isOneOf(tok::kw_struct, tok::kw_union, tok::kw_delete,
+ tok::kw_operator)) {
+ FormatTok->Tok.setKind(tok::identifier);
+ FormatTok->Tok.setIdentifierInfo(nullptr);
+ } else if (Style.Language == FormatStyle::LK_JavaScript &&
+ FormatTok->isOneOf(tok::kw_struct, tok::kw_union,
+ tok::kw_operator)) {
+ FormatTok->Tok.setKind(tok::identifier);
+ FormatTok->Tok.setIdentifierInfo(nullptr);
+ }
+ } else if (FormatTok->Tok.is(tok::greatergreater)) {
+ FormatTok->Tok.setKind(tok::greater);
+ FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
+ ++Column;
+ StateStack.push(LexerState::TOKEN_STASHED);
+ } else if (FormatTok->Tok.is(tok::lessless)) {
+ FormatTok->Tok.setKind(tok::less);
+ FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
+ ++Column;
+ StateStack.push(LexerState::TOKEN_STASHED);
+ }
+
+ // Now FormatTok is the next non-whitespace token.
+
+ StringRef Text = FormatTok->TokenText;
+ size_t FirstNewlinePos = Text.find('\n');
+ if (FirstNewlinePos == StringRef::npos) {
+ // FIXME: ColumnWidth actually depends on the start column, we need to
+ // take this into account when the token is moved.
+ FormatTok->ColumnWidth =
+ encoding::columnWidthWithTabs(Text, Column, Style.TabWidth, Encoding);
+ Column += FormatTok->ColumnWidth;
+ } else {
+ FormatTok->IsMultiline = true;
+ // FIXME: ColumnWidth actually depends on the start column, we need to
+ // take this into account when the token is moved.
+ FormatTok->ColumnWidth = encoding::columnWidthWithTabs(
+ Text.substr(0, FirstNewlinePos), Column, Style.TabWidth, Encoding);
+
+ // The last line of the token always starts in column 0.
+ // Thus, the length can be precomputed even in the presence of tabs.
+ FormatTok->LastLineColumnWidth = encoding::columnWidthWithTabs(
+ Text.substr(Text.find_last_of('\n') + 1), 0, Style.TabWidth, Encoding);
+ Column = FormatTok->LastLineColumnWidth;
+ }
+
+ if (Style.isCpp()) {
+ if (!(Tokens.size() > 0 && Tokens.back()->Tok.getIdentifierInfo() &&
+ Tokens.back()->Tok.getIdentifierInfo()->getPPKeywordID() ==
+ tok::pp_define) &&
+ std::find(ForEachMacros.begin(), ForEachMacros.end(),
+ FormatTok->Tok.getIdentifierInfo()) != ForEachMacros.end()) {
+ FormatTok->Type = TT_ForEachMacro;
+ } else if (FormatTok->is(tok::identifier)) {
+ if (MacroBlockBeginRegex.match(Text)) {
+ FormatTok->Type = TT_MacroBlockBegin;
+ } else if (MacroBlockEndRegex.match(Text)) {
+ FormatTok->Type = TT_MacroBlockEnd;
+ }
+ }
+ }
+
+ return FormatTok;
+}
+
+void FormatTokenLexer::readRawToken(FormatToken &Tok) {
+ Lex->LexFromRawLexer(Tok.Tok);
+ Tok.TokenText = StringRef(SourceMgr.getCharacterData(Tok.Tok.getLocation()),
+ Tok.Tok.getLength());
+ // For formatting, treat unterminated string literals like normal string
+ // literals.
+ if (Tok.is(tok::unknown)) {
+ if (!Tok.TokenText.empty() && Tok.TokenText[0] == '"') {
+ Tok.Tok.setKind(tok::string_literal);
+ Tok.IsUnterminatedLiteral = true;
+ } else if (Style.Language == FormatStyle::LK_JavaScript &&
+ Tok.TokenText == "''") {
+ Tok.Tok.setKind(tok::string_literal);
+ }
+ }
+
+ if (Style.Language == FormatStyle::LK_JavaScript &&
+ Tok.is(tok::char_constant)) {
+ Tok.Tok.setKind(tok::string_literal);
+ }
+
+ if (Tok.is(tok::comment) && (Tok.TokenText == "// clang-format on" ||
+ Tok.TokenText == "/* clang-format on */")) {
+ FormattingDisabled = false;
+ }
+
+ Tok.Finalized = FormattingDisabled;
+
+ if (Tok.is(tok::comment) && (Tok.TokenText == "// clang-format off" ||
+ Tok.TokenText == "/* clang-format off */")) {
+ FormattingDisabled = true;
+ }
+}
+
+void FormatTokenLexer::resetLexer(unsigned Offset) {
+ StringRef Buffer = SourceMgr.getBufferData(ID);
+ Lex.reset(new Lexer(SourceMgr.getLocForStartOfFile(ID),
+ getFormattingLangOpts(Style), Buffer.begin(),
+ Buffer.begin() + Offset, Buffer.end()));
+ Lex->SetKeepWhitespaceMode(true);
+ TrailingWhitespace = 0;
+}
+
+} // namespace format
+} // namespace clang
diff --git a/contrib/llvm/tools/clang/lib/Format/FormatTokenLexer.h b/contrib/llvm/tools/clang/lib/Format/FormatTokenLexer.h
new file mode 100644
index 000000000000..bf10f09cd11e
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Format/FormatTokenLexer.h
@@ -0,0 +1,115 @@
+//===--- FormatTokenLexer.h - Format C++ code ----------------*- C++ ----*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file contains FormatTokenLexer, which tokenizes a source file
+/// into a token stream suitable for ClangFormat.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKENLEXER_H
+#define LLVM_CLANG_LIB_FORMAT_FORMATTOKENLEXER_H
+
+#include "Encoding.h"
+#include "FormatToken.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Format/Format.h"
+#include "llvm/Support/Regex.h"
+
+#include <stack>
+
+namespace clang {
+namespace format {
+
+enum LexerState {
+ NORMAL,
+ TEMPLATE_STRING,
+ TOKEN_STASHED,
+};
+
+class FormatTokenLexer {
+public:
+ FormatTokenLexer(const SourceManager &SourceMgr, FileID ID,
+ const FormatStyle &Style, encoding::Encoding Encoding);
+
+ ArrayRef<FormatToken *> lex();
+
+ const AdditionalKeywords &getKeywords() { return Keywords; }
+
+private:
+ void tryMergePreviousTokens();
+
+ bool tryMergeLessLess();
+ bool tryMergeNSStringLiteral();
+
+ bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds, TokenType NewType);
+
+ // Returns \c true if \p Tok can only be followed by an operand in JavaScript.
+ bool precedesOperand(FormatToken *Tok);
+
+ bool canPrecedeRegexLiteral(FormatToken *Prev);
+
+ // Tries to parse a JavaScript Regex literal starting at the current token,
+ // if that begins with a slash and is in a location where JavaScript allows
+ // regex literals. Changes the current token to a regex literal and updates
+ // its text if successful.
+ void tryParseJSRegexLiteral();
+
+ // Handles JavaScript template strings.
+ //
+ // JavaScript template strings use backticks ('`') as delimiters, and allow
+ // embedding expressions nested in ${expr-here}. Template strings can be
+ // nested recursively, i.e. expressions can contain template strings in turn.
+ //
+ // The code below parses starting from a backtick, up to a closing backtick or
+ // an opening ${. It also maintains a stack of lexing contexts to handle
+ // nested template parts by balancing curly braces.
+ void handleTemplateStrings();
+
+ bool tryMerge_TMacro();
+
+ bool tryMergeConflictMarkers();
+
+ FormatToken *getStashedToken();
+
+ FormatToken *getNextToken();
+
+ FormatToken *FormatTok;
+ bool IsFirstToken;
+ std::stack<LexerState> StateStack;
+ unsigned Column;
+ unsigned TrailingWhitespace;
+ std::unique_ptr<Lexer> Lex;
+ const SourceManager &SourceMgr;
+ FileID ID;
+ const FormatStyle &Style;
+ IdentifierTable IdentTable;
+ AdditionalKeywords Keywords;
+ encoding::Encoding Encoding;
+ llvm::SpecificBumpPtrAllocator<FormatToken> Allocator;
+ // Index (in 'Tokens') of the last token that starts a new line.
+ unsigned FirstInLineIndex;
+ SmallVector<FormatToken *, 16> Tokens;
+ SmallVector<IdentifierInfo *, 8> ForEachMacros;
+
+ bool FormattingDisabled;
+
+ llvm::Regex MacroBlockBeginRegex;
+ llvm::Regex MacroBlockEndRegex;
+
+ void readRawToken(FormatToken &Tok);
+
+ void resetLexer(unsigned Offset);
+};
+
+} // namespace format
+} // namespace clang
+
+#endif
diff --git a/contrib/llvm/tools/clang/lib/Format/NamespaceEndCommentsFixer.cpp b/contrib/llvm/tools/clang/lib/Format/NamespaceEndCommentsFixer.cpp
new file mode 100644
index 000000000000..1bbb41f757ae
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Format/NamespaceEndCommentsFixer.cpp
@@ -0,0 +1,207 @@
+//===--- NamespaceEndCommentsFixer.cpp --------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements NamespaceEndCommentsFixer, a TokenAnalyzer that
+/// fixes namespace end comments.
+///
+//===----------------------------------------------------------------------===//
+
+#include "NamespaceEndCommentsFixer.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Regex.h"
+
+#define DEBUG_TYPE "namespace-end-comments-fixer"
+
+namespace clang {
+namespace format {
+
+namespace {
+// The maximal number of unwrapped lines that a short namespace spans.
+// Short namespaces don't need an end comment.
+static const int kShortNamespaceMaxLines = 1;
+
+// Matches a valid namespace end comment.
+// Valid namespace end comments don't need to be edited.
+static llvm::Regex kNamespaceCommentPattern =
+ llvm::Regex("^/[/*] *(end (of )?)? *(anonymous|unnamed)? *"
+ "namespace( +([a-zA-Z0-9:_]+))?\\.? *(\\*/)?$",
+ llvm::Regex::IgnoreCase);
+
+// Computes the name of a namespace given the namespace token.
+// Returns "" for anonymous namespace.
+std::string computeName(const FormatToken *NamespaceTok) {
+ assert(NamespaceTok && NamespaceTok->is(tok::kw_namespace) &&
+ "expecting a namespace token");
+ std::string name = "";
+ // Collects all the non-comment tokens between 'namespace' and '{'.
+ const FormatToken *Tok = NamespaceTok->getNextNonComment();
+ while (Tok && !Tok->is(tok::l_brace)) {
+ name += Tok->TokenText;
+ Tok = Tok->getNextNonComment();
+ }
+ return name;
+}
+
+std::string computeEndCommentText(StringRef NamespaceName, bool AddNewline) {
+ std::string text = "// namespace";
+ if (!NamespaceName.empty()) {
+ text += ' ';
+ text += NamespaceName;
+ }
+ if (AddNewline)
+ text += '\n';
+ return text;
+}
+
+bool hasEndComment(const FormatToken *RBraceTok) {
+ return RBraceTok->Next && RBraceTok->Next->is(tok::comment);
+}
+
+bool validEndComment(const FormatToken *RBraceTok, StringRef NamespaceName) {
+ assert(hasEndComment(RBraceTok));
+ const FormatToken *Comment = RBraceTok->Next;
+ SmallVector<StringRef, 7> Groups;
+ if (kNamespaceCommentPattern.match(Comment->TokenText, &Groups)) {
+ StringRef NamespaceNameInComment = Groups.size() > 5 ? Groups[5] : "";
+ // Anonymous namespace comments must not mention a namespace name.
+ if (NamespaceName.empty() && !NamespaceNameInComment.empty())
+ return false;
+ StringRef AnonymousInComment = Groups.size() > 3 ? Groups[3] : "";
+ // Named namespace comments must not mention anonymous namespace.
+ if (!NamespaceName.empty() && !AnonymousInComment.empty())
+ return false;
+ return NamespaceNameInComment == NamespaceName;
+ }
+ return false;
+}
+
+void addEndComment(const FormatToken *RBraceTok, StringRef EndCommentText,
+ const SourceManager &SourceMgr,
+ tooling::Replacements *Fixes) {
+ auto EndLoc = RBraceTok->Tok.getEndLoc();
+ auto Range = CharSourceRange::getCharRange(EndLoc, EndLoc);
+ auto Err = Fixes->add(tooling::Replacement(SourceMgr, Range, EndCommentText));
+ if (Err) {
+ llvm::errs() << "Error while adding namespace end comment: "
+ << llvm::toString(std::move(Err)) << "\n";
+ }
+}
+
+void updateEndComment(const FormatToken *RBraceTok, StringRef EndCommentText,
+ const SourceManager &SourceMgr,
+ tooling::Replacements *Fixes) {
+ assert(hasEndComment(RBraceTok));
+ const FormatToken *Comment = RBraceTok->Next;
+ auto Range = CharSourceRange::getCharRange(Comment->getStartOfNonWhitespace(),
+ Comment->Tok.getEndLoc());
+ auto Err = Fixes->add(tooling::Replacement(SourceMgr, Range, EndCommentText));
+ if (Err) {
+ llvm::errs() << "Error while updating namespace end comment: "
+ << llvm::toString(std::move(Err)) << "\n";
+ }
+}
+
+const FormatToken *
+getNamespaceToken(const AnnotatedLine *line,
+ const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
+ if (!line->Affected || line->InPPDirective || !line->startsWith(tok::r_brace))
+ return nullptr;
+ size_t StartLineIndex = line->MatchingOpeningBlockLineIndex;
+ if (StartLineIndex == UnwrappedLine::kInvalidIndex)
+ return nullptr;
+ assert(StartLineIndex < AnnotatedLines.size());
+ const FormatToken *NamespaceTok = AnnotatedLines[StartLineIndex]->First;
+ // Detect "(inline)? namespace" in the beginning of a line.
+ if (NamespaceTok->is(tok::kw_inline))
+ NamespaceTok = NamespaceTok->getNextNonComment();
+ if (!NamespaceTok || NamespaceTok->isNot(tok::kw_namespace))
+ return nullptr;
+ return NamespaceTok;
+}
+} // namespace
+
+NamespaceEndCommentsFixer::NamespaceEndCommentsFixer(const Environment &Env,
+ const FormatStyle &Style)
+ : TokenAnalyzer(Env, Style) {}
+
+tooling::Replacements NamespaceEndCommentsFixer::analyze(
+ TokenAnnotator &Annotator, SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
+ FormatTokenLexer &Tokens) {
+ const SourceManager &SourceMgr = Env.getSourceManager();
+ AffectedRangeMgr.computeAffectedLines(AnnotatedLines.begin(),
+ AnnotatedLines.end());
+ tooling::Replacements Fixes;
+ std::string AllNamespaceNames = "";
+ size_t StartLineIndex = SIZE_MAX;
+ unsigned int CompactedNamespacesCount = 0;
+ for (size_t I = 0, E = AnnotatedLines.size(); I != E; ++I) {
+ const AnnotatedLine *EndLine = AnnotatedLines[I];
+ const FormatToken *NamespaceTok =
+ getNamespaceToken(EndLine, AnnotatedLines);
+ if (!NamespaceTok)
+ continue;
+ FormatToken *RBraceTok = EndLine->First;
+ if (RBraceTok->Finalized)
+ continue;
+ RBraceTok->Finalized = true;
+ const FormatToken *EndCommentPrevTok = RBraceTok;
+ // Namespaces often end with '};'. In that case, attach namespace end
+ // comments to the semicolon tokens.
+ if (RBraceTok->Next && RBraceTok->Next->is(tok::semi)) {
+ EndCommentPrevTok = RBraceTok->Next;
+ }
+ if (StartLineIndex == SIZE_MAX)
+ StartLineIndex = EndLine->MatchingOpeningBlockLineIndex;
+ std::string NamespaceName = computeName(NamespaceTok);
+ if (Style.CompactNamespaces) {
+ if ((I + 1 < E) &&
+ getNamespaceToken(AnnotatedLines[I + 1], AnnotatedLines) &&
+ StartLineIndex - CompactedNamespacesCount - 1 ==
+ AnnotatedLines[I + 1]->MatchingOpeningBlockLineIndex &&
+ !AnnotatedLines[I + 1]->First->Finalized) {
+ if (hasEndComment(EndCommentPrevTok)) {
+ // remove end comment, it will be merged in next one
+ updateEndComment(EndCommentPrevTok, std::string(), SourceMgr, &Fixes);
+ }
+ CompactedNamespacesCount++;
+ AllNamespaceNames = "::" + NamespaceName + AllNamespaceNames;
+ continue;
+ }
+ NamespaceName += std::move(AllNamespaceNames);
+ CompactedNamespacesCount = 0;
+ AllNamespaceNames = std::string();
+ }
+ // The next token in the token stream after the place where the end comment
+ // token must be. This is either the next token on the current line or the
+ // first token on the next line.
+ const FormatToken *EndCommentNextTok = EndCommentPrevTok->Next;
+ if (EndCommentNextTok && EndCommentNextTok->is(tok::comment))
+ EndCommentNextTok = EndCommentNextTok->Next;
+ if (!EndCommentNextTok && I + 1 < E)
+ EndCommentNextTok = AnnotatedLines[I + 1]->First;
+ bool AddNewline = EndCommentNextTok &&
+ EndCommentNextTok->NewlinesBefore == 0 &&
+ EndCommentNextTok->isNot(tok::eof);
+ const std::string EndCommentText =
+ computeEndCommentText(NamespaceName, AddNewline);
+ if (!hasEndComment(EndCommentPrevTok)) {
+ bool isShort = I - StartLineIndex <= kShortNamespaceMaxLines + 1;
+ if (!isShort)
+ addEndComment(EndCommentPrevTok, EndCommentText, SourceMgr, &Fixes);
+ } else if (!validEndComment(EndCommentPrevTok, NamespaceName)) {
+ updateEndComment(EndCommentPrevTok, EndCommentText, SourceMgr, &Fixes);
+ }
+ StartLineIndex = SIZE_MAX;
+ }
+ return Fixes;
+}
+
+} // namespace format
+} // namespace clang
diff --git a/contrib/llvm/tools/clang/lib/Format/NamespaceEndCommentsFixer.h b/contrib/llvm/tools/clang/lib/Format/NamespaceEndCommentsFixer.h
new file mode 100644
index 000000000000..7790668a2e82
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Format/NamespaceEndCommentsFixer.h
@@ -0,0 +1,37 @@
+//===--- NamespaceEndCommentsFixer.h ----------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file declares NamespaceEndCommentsFixer, a TokenAnalyzer that
+/// fixes namespace end comments.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_FORMAT_NAMESPACEENDCOMMENTSFIXER_H
+#define LLVM_CLANG_LIB_FORMAT_NAMESPACEENDCOMMENTSFIXER_H
+
+#include "TokenAnalyzer.h"
+
+namespace clang {
+namespace format {
+
+class NamespaceEndCommentsFixer : public TokenAnalyzer {
+public:
+ NamespaceEndCommentsFixer(const Environment &Env, const FormatStyle &Style);
+
+ tooling::Replacements
+ analyze(TokenAnnotator &Annotator,
+ SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
+ FormatTokenLexer &Tokens) override;
+};
+
+} // end namespace format
+} // end namespace clang
+
+#endif
diff --git a/contrib/llvm/tools/clang/lib/Format/SortJavaScriptImports.cpp b/contrib/llvm/tools/clang/lib/Format/SortJavaScriptImports.cpp
new file mode 100644
index 000000000000..e73695ca8477
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Format/SortJavaScriptImports.cpp
@@ -0,0 +1,456 @@
+//===--- SortJavaScriptImports.cpp - Sort ES6 Imports -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements a sort operation for JavaScript ES6 imports.
+///
+//===----------------------------------------------------------------------===//
+
+#include "SortJavaScriptImports.h"
+#include "TokenAnalyzer.h"
+#include "TokenAnnotator.h"
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/DiagnosticOptions.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Format/Format.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Debug.h"
+#include <algorithm>
+#include <string>
+
+#define DEBUG_TYPE "format-formatter"
+
+namespace clang {
+namespace format {
+
+class FormatTokenLexer;
+
+using clang::format::FormatStyle;
+
+// An imported symbol in a JavaScript ES6 import/export, possibly aliased.
+struct JsImportedSymbol {
+ StringRef Symbol;
+ StringRef Alias;
+ SourceRange Range;
+
+ bool operator==(const JsImportedSymbol &RHS) const {
+ // Ignore Range for comparison, it is only used to stitch code together,
+ // but imports at different code locations are still conceptually the same.
+ return Symbol == RHS.Symbol && Alias == RHS.Alias;
+ }
+};
+
+// An ES6 module reference.
+//
+// ES6 implements a module system, where individual modules (~= source files)
+// can reference other modules, either importing symbols from them, or exporting
+// symbols from them:
+// import {foo} from 'foo';
+// export {foo};
+// export {bar} from 'bar';
+//
+// `export`s with URLs are syntactic sugar for an import of the symbol from the
+// URL, followed by an export of the symbol, allowing this code to treat both
+// statements more or less identically, with the exception being that `export`s
+// are sorted last.
+//
+// imports and exports support individual symbols, but also a wildcard syntax:
+// import * as prefix from 'foo';
+// export * from 'bar';
+//
+// This struct represents both exports and imports to build up the information
+// required for sorting module references.
+struct JsModuleReference {
+ bool IsExport = false;
+ // Module references are sorted into these categories, in order.
+ enum ReferenceCategory {
+ SIDE_EFFECT, // "import 'something';"
+ ABSOLUTE, // from 'something'
+ RELATIVE_PARENT, // from '../*'
+ RELATIVE, // from './*'
+ };
+ ReferenceCategory Category = ReferenceCategory::SIDE_EFFECT;
+ // The URL imported, e.g. `import .. from 'url';`. Empty for `export {a, b};`.
+ StringRef URL;
+ // Prefix from "import * as prefix". Empty for symbol imports and `export *`.
+ // Implies an empty names list.
+ StringRef Prefix;
+ // Symbols from `import {SymbolA, SymbolB, ...} from ...;`.
+ SmallVector<JsImportedSymbol, 1> Symbols;
+ // Textual position of the import/export, including preceding and trailing
+ // comments.
+ SourceRange Range;
+};
+
+bool operator<(const JsModuleReference &LHS, const JsModuleReference &RHS) {
+ if (LHS.IsExport != RHS.IsExport)
+ return LHS.IsExport < RHS.IsExport;
+ if (LHS.Category != RHS.Category)
+ return LHS.Category < RHS.Category;
+ if (LHS.Category == JsModuleReference::ReferenceCategory::SIDE_EFFECT)
+ // Side effect imports might be ordering sensitive. Consider them equal so
+ // that they maintain their relative order in the stable sort below.
+ // This retains transitivity because LHS.Category == RHS.Category here.
+ return false;
+ // Empty URLs sort *last* (for export {...};).
+ if (LHS.URL.empty() != RHS.URL.empty())
+ return LHS.URL.empty() < RHS.URL.empty();
+ if (int Res = LHS.URL.compare_lower(RHS.URL))
+ return Res < 0;
+ // '*' imports (with prefix) sort before {a, b, ...} imports.
+ if (LHS.Prefix.empty() != RHS.Prefix.empty())
+ return LHS.Prefix.empty() < RHS.Prefix.empty();
+ if (LHS.Prefix != RHS.Prefix)
+ return LHS.Prefix > RHS.Prefix;
+ return false;
+}
+
+// JavaScriptImportSorter sorts JavaScript ES6 imports and exports. It is
+// implemented as a TokenAnalyzer because ES6 imports have substantial syntactic
+// structure, making it messy to sort them using regular expressions.
+class JavaScriptImportSorter : public TokenAnalyzer {
+public:
+ JavaScriptImportSorter(const Environment &Env, const FormatStyle &Style)
+ : TokenAnalyzer(Env, Style),
+ FileContents(Env.getSourceManager().getBufferData(Env.getFileID())) {}
+
+ tooling::Replacements
+ analyze(TokenAnnotator &Annotator,
+ SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
+ FormatTokenLexer &Tokens) override {
+ tooling::Replacements Result;
+ AffectedRangeMgr.computeAffectedLines(AnnotatedLines.begin(),
+ AnnotatedLines.end());
+
+ const AdditionalKeywords &Keywords = Tokens.getKeywords();
+ SmallVector<JsModuleReference, 16> References;
+ AnnotatedLine *FirstNonImportLine;
+ std::tie(References, FirstNonImportLine) =
+ parseModuleReferences(Keywords, AnnotatedLines);
+
+ if (References.empty())
+ return Result;
+
+ SmallVector<unsigned, 16> Indices;
+ for (unsigned i = 0, e = References.size(); i != e; ++i)
+ Indices.push_back(i);
+ std::stable_sort(Indices.begin(), Indices.end(),
+ [&](unsigned LHSI, unsigned RHSI) {
+ return References[LHSI] < References[RHSI];
+ });
+ bool ReferencesInOrder = std::is_sorted(Indices.begin(), Indices.end());
+
+ std::string ReferencesText;
+ bool SymbolsInOrder = true;
+ for (unsigned i = 0, e = Indices.size(); i != e; ++i) {
+ JsModuleReference Reference = References[Indices[i]];
+ if (appendReference(ReferencesText, Reference))
+ SymbolsInOrder = false;
+ if (i + 1 < e) {
+ // Insert breaks between imports and exports.
+ ReferencesText += "\n";
+ // Separate imports groups with two line breaks, but keep all exports
+ // in a single group.
+ if (!Reference.IsExport &&
+ (Reference.IsExport != References[Indices[i + 1]].IsExport ||
+ Reference.Category != References[Indices[i + 1]].Category))
+ ReferencesText += "\n";
+ }
+ }
+
+ if (ReferencesInOrder && SymbolsInOrder)
+ return Result;
+
+ SourceRange InsertionPoint = References[0].Range;
+ InsertionPoint.setEnd(References[References.size() - 1].Range.getEnd());
+
+ // The loop above might collapse previously existing line breaks between
+ // import blocks, and thus shrink the file. SortIncludes must not shrink
+ // overall source length as there is currently no re-calculation of ranges
+ // after applying source sorting.
+ // This loop just backfills trailing spaces after the imports, which are
+ // harmless and will be stripped by the subsequent formatting pass.
+ // FIXME: A better long term fix is to re-calculate Ranges after sorting.
+ unsigned PreviousSize = getSourceText(InsertionPoint).size();
+ while (ReferencesText.size() < PreviousSize) {
+ ReferencesText += " ";
+ }
+
+ // Separate references from the main code body of the file.
+ if (FirstNonImportLine && FirstNonImportLine->First->NewlinesBefore < 2)
+ ReferencesText += "\n";
+
+ DEBUG(llvm::dbgs() << "Replacing imports:\n"
+ << getSourceText(InsertionPoint) << "\nwith:\n"
+ << ReferencesText << "\n");
+ auto Err = Result.add(tooling::Replacement(
+ Env.getSourceManager(), CharSourceRange::getCharRange(InsertionPoint),
+ ReferencesText));
+ // FIXME: better error handling. For now, just print error message and skip
+ // the replacement for the release version.
+ if (Err) {
+ llvm::errs() << llvm::toString(std::move(Err)) << "\n";
+ assert(false);
+ }
+
+ return Result;
+ }
+
+private:
+ FormatToken *Current;
+ FormatToken *LineEnd;
+
+ FormatToken invalidToken;
+
+ StringRef FileContents;
+
+ void skipComments() { Current = skipComments(Current); }
+
+ FormatToken *skipComments(FormatToken *Tok) {
+ while (Tok && Tok->is(tok::comment))
+ Tok = Tok->Next;
+ return Tok;
+ }
+
+ void nextToken() {
+ Current = Current->Next;
+ skipComments();
+ if (!Current || Current == LineEnd->Next) {
+ // Set the current token to an invalid token, so that further parsing on
+ // this line fails.
+ invalidToken.Tok.setKind(tok::unknown);
+ Current = &invalidToken;
+ }
+ }
+
+ StringRef getSourceText(SourceRange Range) {
+ return getSourceText(Range.getBegin(), Range.getEnd());
+ }
+
+ StringRef getSourceText(SourceLocation Begin, SourceLocation End) {
+ const SourceManager &SM = Env.getSourceManager();
+ return FileContents.substr(SM.getFileOffset(Begin),
+ SM.getFileOffset(End) - SM.getFileOffset(Begin));
+ }
+
+ // Appends ``Reference`` to ``Buffer``, returning true if text within the
+ // ``Reference`` changed (e.g. symbol order).
+ bool appendReference(std::string &Buffer, JsModuleReference &Reference) {
+ // Sort the individual symbols within the import.
+ // E.g. `import {b, a} from 'x';` -> `import {a, b} from 'x';`
+ SmallVector<JsImportedSymbol, 1> Symbols = Reference.Symbols;
+ std::stable_sort(
+ Symbols.begin(), Symbols.end(),
+ [&](const JsImportedSymbol &LHS, const JsImportedSymbol &RHS) {
+ return LHS.Symbol.compare_lower(RHS.Symbol) < 0;
+ });
+ if (Symbols == Reference.Symbols) {
+ // No change in symbol order.
+ StringRef ReferenceStmt = getSourceText(Reference.Range);
+ Buffer += ReferenceStmt;
+ return false;
+ }
+ // Stitch together the module reference start...
+ SourceLocation SymbolsStart = Reference.Symbols.front().Range.getBegin();
+ SourceLocation SymbolsEnd = Reference.Symbols.back().Range.getEnd();
+ Buffer += getSourceText(Reference.Range.getBegin(), SymbolsStart);
+ // ... then the references in order ...
+ for (auto I = Symbols.begin(), E = Symbols.end(); I != E; ++I) {
+ if (I != Symbols.begin())
+ Buffer += ",";
+ Buffer += getSourceText(I->Range);
+ }
+ // ... followed by the module reference end.
+ Buffer += getSourceText(SymbolsEnd, Reference.Range.getEnd());
+ return true;
+ }
+
+ // Parses module references in the given lines. Returns the module references,
+ // and a pointer to the first "main code" line if that is adjacent to the
+ // affected lines of module references, nullptr otherwise.
+ std::pair<SmallVector<JsModuleReference, 16>, AnnotatedLine*>
+ parseModuleReferences(const AdditionalKeywords &Keywords,
+ SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
+ SmallVector<JsModuleReference, 16> References;
+ SourceLocation Start;
+ AnnotatedLine *FirstNonImportLine = nullptr;
+ bool AnyImportAffected = false;
+ for (auto Line : AnnotatedLines) {
+ Current = Line->First;
+ LineEnd = Line->Last;
+ skipComments();
+ if (Start.isInvalid() || References.empty())
+ // After the first file level comment, consider line comments to be part
+ // of the import that immediately follows them by using the previously
+ // set Start.
+ Start = Line->First->Tok.getLocation();
+ if (!Current) {
+ // Only comments on this line. Could be the first non-import line.
+ FirstNonImportLine = Line;
+ continue;
+ }
+ JsModuleReference Reference;
+ Reference.Range.setBegin(Start);
+ if (!parseModuleReference(Keywords, Reference)) {
+ if (!FirstNonImportLine)
+ FirstNonImportLine = Line; // if no comment before.
+ break;
+ }
+ FirstNonImportLine = nullptr;
+ AnyImportAffected = AnyImportAffected || Line->Affected;
+ Reference.Range.setEnd(LineEnd->Tok.getEndLoc());
+ DEBUG({
+ llvm::dbgs() << "JsModuleReference: {"
+ << "is_export: " << Reference.IsExport
+ << ", cat: " << Reference.Category
+ << ", url: " << Reference.URL
+ << ", prefix: " << Reference.Prefix;
+ for (size_t i = 0; i < Reference.Symbols.size(); ++i)
+ llvm::dbgs() << ", " << Reference.Symbols[i].Symbol << " as "
+ << Reference.Symbols[i].Alias;
+ llvm::dbgs() << ", text: " << getSourceText(Reference.Range);
+ llvm::dbgs() << "}\n";
+ });
+ References.push_back(Reference);
+ Start = SourceLocation();
+ }
+ // Sort imports if any import line was affected.
+ if (!AnyImportAffected)
+ References.clear();
+ return std::make_pair(References, FirstNonImportLine);
+ }
+
+ // Parses a JavaScript/ECMAScript 6 module reference.
+ // See http://www.ecma-international.org/ecma-262/6.0/#sec-scripts-and-modules
+ // for grammar EBNF (production ModuleItem).
+ bool parseModuleReference(const AdditionalKeywords &Keywords,
+ JsModuleReference &Reference) {
+ if (!Current || !Current->isOneOf(Keywords.kw_import, tok::kw_export))
+ return false;
+ Reference.IsExport = Current->is(tok::kw_export);
+
+ nextToken();
+ if (Current->isStringLiteral() && !Reference.IsExport) {
+ // "import 'side-effect';"
+ Reference.Category = JsModuleReference::ReferenceCategory::SIDE_EFFECT;
+ Reference.URL =
+ Current->TokenText.substr(1, Current->TokenText.size() - 2);
+ return true;
+ }
+
+ if (!parseModuleBindings(Keywords, Reference))
+ return false;
+
+ if (Current->is(Keywords.kw_from)) {
+ // imports have a 'from' clause, exports might not.
+ nextToken();
+ if (!Current->isStringLiteral())
+ return false;
+ // URL = TokenText without the quotes.
+ Reference.URL =
+ Current->TokenText.substr(1, Current->TokenText.size() - 2);
+ if (Reference.URL.startswith(".."))
+ Reference.Category =
+ JsModuleReference::ReferenceCategory::RELATIVE_PARENT;
+ else if (Reference.URL.startswith("."))
+ Reference.Category = JsModuleReference::ReferenceCategory::RELATIVE;
+ else
+ Reference.Category = JsModuleReference::ReferenceCategory::ABSOLUTE;
+ } else {
+ // w/o URL groups with "empty".
+ Reference.Category = JsModuleReference::ReferenceCategory::RELATIVE;
+ }
+ return true;
+ }
+
+ bool parseModuleBindings(const AdditionalKeywords &Keywords,
+ JsModuleReference &Reference) {
+ if (parseStarBinding(Keywords, Reference))
+ return true;
+ return parseNamedBindings(Keywords, Reference);
+ }
+
+ bool parseStarBinding(const AdditionalKeywords &Keywords,
+ JsModuleReference &Reference) {
+ // * as prefix from '...';
+ if (Current->isNot(tok::star))
+ return false;
+ nextToken();
+ if (Current->isNot(Keywords.kw_as))
+ return false;
+ nextToken();
+ if (Current->isNot(tok::identifier))
+ return false;
+ Reference.Prefix = Current->TokenText;
+ nextToken();
+ return true;
+ }
+
+ bool parseNamedBindings(const AdditionalKeywords &Keywords,
+ JsModuleReference &Reference) {
+ if (Current->is(tok::identifier)) {
+ nextToken();
+ if (Current->is(Keywords.kw_from))
+ return true;
+ if (Current->isNot(tok::comma))
+ return false;
+ nextToken(); // eat comma.
+ }
+ if (Current->isNot(tok::l_brace))
+ return false;
+
+ // {sym as alias, sym2 as ...} from '...';
+ while (Current->isNot(tok::r_brace)) {
+ nextToken();
+ if (Current->is(tok::r_brace))
+ break;
+ if (Current->isNot(tok::identifier))
+ return false;
+
+ JsImportedSymbol Symbol;
+ Symbol.Symbol = Current->TokenText;
+ // Make sure to include any preceding comments.
+ Symbol.Range.setBegin(
+ Current->getPreviousNonComment()->Next->WhitespaceRange.getBegin());
+ nextToken();
+
+ if (Current->is(Keywords.kw_as)) {
+ nextToken();
+ if (Current->isNot(tok::identifier))
+ return false;
+ Symbol.Alias = Current->TokenText;
+ nextToken();
+ }
+ Symbol.Range.setEnd(Current->Tok.getLocation());
+ Reference.Symbols.push_back(Symbol);
+
+ if (!Current->isOneOf(tok::r_brace, tok::comma))
+ return false;
+ }
+ nextToken(); // consume r_brace
+ return true;
+ }
+};
+
+tooling::Replacements sortJavaScriptImports(const FormatStyle &Style,
+ StringRef Code,
+ ArrayRef<tooling::Range> Ranges,
+ StringRef FileName) {
+ // FIXME: Cursor support.
+ std::unique_ptr<Environment> Env =
+ Environment::CreateVirtualEnvironment(Code, FileName, Ranges);
+ JavaScriptImportSorter Sorter(*Env, Style);
+ return Sorter.process();
+}
+
+} // end namespace format
+} // end namespace clang
diff --git a/contrib/llvm/tools/clang/lib/Format/SortJavaScriptImports.h b/contrib/llvm/tools/clang/lib/Format/SortJavaScriptImports.h
new file mode 100644
index 000000000000..f22a051008f0
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Format/SortJavaScriptImports.h
@@ -0,0 +1,36 @@
+//===--- SortJavaScriptImports.h - Sort ES6 Imports -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements a sorter for JavaScript ES6 imports.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_FORMAT_SORTJAVASCRIPTIMPORTS_H
+#define LLVM_CLANG_LIB_FORMAT_SORTJAVASCRIPTIMPORTS_H
+
+#include "clang/Basic/LLVM.h"
+#include "clang/Format/Format.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
+
+namespace clang {
+namespace format {
+
+// Sort JavaScript ES6 imports/exports in ``Code``. The generated replacements
+// only monotonically increase the length of the given code.
+tooling::Replacements sortJavaScriptImports(const FormatStyle &Style,
+ StringRef Code,
+ ArrayRef<tooling::Range> Ranges,
+ StringRef FileName);
+
+} // end namespace format
+} // end namespace clang
+
+#endif
diff --git a/contrib/llvm/tools/clang/lib/Format/TokenAnalyzer.cpp b/contrib/llvm/tools/clang/lib/Format/TokenAnalyzer.cpp
new file mode 100644
index 000000000000..f2e4e8ef0819
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Format/TokenAnalyzer.cpp
@@ -0,0 +1,146 @@
+//===--- TokenAnalyzer.cpp - Analyze Token Streams --------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements an abstract TokenAnalyzer and associated helper
+/// classes. TokenAnalyzer can be extended to generate replacements based on
+/// an annotated and pre-processed token stream.
+///
+//===----------------------------------------------------------------------===//
+
+#include "TokenAnalyzer.h"
+#include "AffectedRangeManager.h"
+#include "Encoding.h"
+#include "FormatToken.h"
+#include "FormatTokenLexer.h"
+#include "TokenAnnotator.h"
+#include "UnwrappedLineParser.h"
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/DiagnosticOptions.h"
+#include "clang/Basic/FileManager.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Format/Format.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "format-formatter"
+
+namespace clang {
+namespace format {
+
+// This sets up an virtual file system with file \p FileName containing \p
+// Code.
+std::unique_ptr<Environment>
+Environment::CreateVirtualEnvironment(StringRef Code, StringRef FileName,
+ ArrayRef<tooling::Range> Ranges) {
+ // This is referenced by `FileMgr` and will be released by `FileMgr` when it
+ // is deleted.
+ IntrusiveRefCntPtr<vfs::InMemoryFileSystem> InMemoryFileSystem(
+ new vfs::InMemoryFileSystem);
+ // This is passed to `SM` as reference, so the pointer has to be referenced
+ // in `Environment` so that `FileMgr` can out-live this function scope.
+ std::unique_ptr<FileManager> FileMgr(
+ new FileManager(FileSystemOptions(), InMemoryFileSystem));
+ // This is passed to `SM` as reference, so the pointer has to be referenced
+ // by `Environment` due to the same reason above.
+ std::unique_ptr<DiagnosticsEngine> Diagnostics(new DiagnosticsEngine(
+ IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs),
+ new DiagnosticOptions));
+ // This will be stored as reference, so the pointer has to be stored in
+ // due to the same reason above.
+ std::unique_ptr<SourceManager> VirtualSM(
+ new SourceManager(*Diagnostics, *FileMgr));
+ InMemoryFileSystem->addFile(
+ FileName, 0, llvm::MemoryBuffer::getMemBuffer(
+ Code, FileName, /*RequiresNullTerminator=*/false));
+ FileID ID = VirtualSM->createFileID(FileMgr->getFile(FileName),
+ SourceLocation(), clang::SrcMgr::C_User);
+ assert(ID.isValid());
+ SourceLocation StartOfFile = VirtualSM->getLocForStartOfFile(ID);
+ std::vector<CharSourceRange> CharRanges;
+ for (const tooling::Range &Range : Ranges) {
+ SourceLocation Start = StartOfFile.getLocWithOffset(Range.getOffset());
+ SourceLocation End = Start.getLocWithOffset(Range.getLength());
+ CharRanges.push_back(CharSourceRange::getCharRange(Start, End));
+ }
+ return llvm::make_unique<Environment>(ID, std::move(FileMgr),
+ std::move(VirtualSM),
+ std::move(Diagnostics), CharRanges);
+}
+
+TokenAnalyzer::TokenAnalyzer(const Environment &Env, const FormatStyle &Style)
+ : Style(Style), Env(Env),
+ AffectedRangeMgr(Env.getSourceManager(), Env.getCharRanges()),
+ UnwrappedLines(1),
+ Encoding(encoding::detectEncoding(
+ Env.getSourceManager().getBufferData(Env.getFileID()))) {
+ DEBUG(
+ llvm::dbgs() << "File encoding: "
+ << (Encoding == encoding::Encoding_UTF8 ? "UTF8" : "unknown")
+ << "\n");
+ DEBUG(llvm::dbgs() << "Language: " << getLanguageName(Style.Language)
+ << "\n");
+}
+
+tooling::Replacements TokenAnalyzer::process() {
+ tooling::Replacements Result;
+ FormatTokenLexer Tokens(Env.getSourceManager(), Env.getFileID(), Style,
+ Encoding);
+
+ UnwrappedLineParser Parser(Style, Tokens.getKeywords(), Tokens.lex(), *this);
+ Parser.parse();
+ assert(UnwrappedLines.rbegin()->empty());
+ for (unsigned Run = 0, RunE = UnwrappedLines.size(); Run + 1 != RunE; ++Run) {
+ DEBUG(llvm::dbgs() << "Run " << Run << "...\n");
+ SmallVector<AnnotatedLine *, 16> AnnotatedLines;
+
+ TokenAnnotator Annotator(Style, Tokens.getKeywords());
+ for (unsigned i = 0, e = UnwrappedLines[Run].size(); i != e; ++i) {
+ AnnotatedLines.push_back(new AnnotatedLine(UnwrappedLines[Run][i]));
+ Annotator.annotate(*AnnotatedLines.back());
+ }
+
+ tooling::Replacements RunResult =
+ analyze(Annotator, AnnotatedLines, Tokens);
+
+ DEBUG({
+ llvm::dbgs() << "Replacements for run " << Run << ":\n";
+ for (tooling::Replacements::const_iterator I = RunResult.begin(),
+ E = RunResult.end();
+ I != E; ++I) {
+ llvm::dbgs() << I->toString() << "\n";
+ }
+ });
+ for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
+ delete AnnotatedLines[i];
+ }
+ for (const auto &R : RunResult) {
+ auto Err = Result.add(R);
+ // FIXME: better error handling here. For now, simply return an empty
+ // Replacements to indicate failure.
+ if (Err) {
+ llvm::errs() << llvm::toString(std::move(Err)) << "\n";
+ return tooling::Replacements();
+ }
+ }
+ }
+ return Result;
+}
+
+void TokenAnalyzer::consumeUnwrappedLine(const UnwrappedLine &TheLine) {
+ assert(!UnwrappedLines.empty());
+ UnwrappedLines.back().push_back(TheLine);
+}
+
+void TokenAnalyzer::finishRun() {
+ UnwrappedLines.push_back(SmallVector<UnwrappedLine, 16>());
+}
+
+} // end namespace format
+} // end namespace clang
diff --git a/contrib/llvm/tools/clang/lib/Format/TokenAnalyzer.h b/contrib/llvm/tools/clang/lib/Format/TokenAnalyzer.h
new file mode 100644
index 000000000000..78a3d1bc8d9e
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Format/TokenAnalyzer.h
@@ -0,0 +1,103 @@
+//===--- TokenAnalyzer.h - Analyze Token Streams ----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file declares an abstract TokenAnalyzer, and associated helper
+/// classes. TokenAnalyzer can be extended to generate replacements based on
+/// an annotated and pre-processed token stream.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_FORMAT_TOKENANALYZER_H
+#define LLVM_CLANG_LIB_FORMAT_TOKENANALYZER_H
+
+#include "AffectedRangeManager.h"
+#include "Encoding.h"
+#include "FormatToken.h"
+#include "FormatTokenLexer.h"
+#include "TokenAnnotator.h"
+#include "UnwrappedLineParser.h"
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/DiagnosticOptions.h"
+#include "clang/Basic/FileManager.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Format/Format.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Debug.h"
+
+namespace clang {
+namespace format {
+
+class Environment {
+public:
+ Environment(SourceManager &SM, FileID ID, ArrayRef<CharSourceRange> Ranges)
+ : ID(ID), CharRanges(Ranges.begin(), Ranges.end()), SM(SM) {}
+
+ Environment(FileID ID, std::unique_ptr<FileManager> FileMgr,
+ std::unique_ptr<SourceManager> VirtualSM,
+ std::unique_ptr<DiagnosticsEngine> Diagnostics,
+ const std::vector<CharSourceRange> &CharRanges)
+ : ID(ID), CharRanges(CharRanges.begin(), CharRanges.end()),
+ SM(*VirtualSM), FileMgr(std::move(FileMgr)),
+ VirtualSM(std::move(VirtualSM)), Diagnostics(std::move(Diagnostics)) {}
+
+ // This sets up an virtual file system with file \p FileName containing \p
+ // Code.
+ static std::unique_ptr<Environment>
+ CreateVirtualEnvironment(StringRef Code, StringRef FileName,
+ ArrayRef<tooling::Range> Ranges);
+
+ FileID getFileID() const { return ID; }
+
+ ArrayRef<CharSourceRange> getCharRanges() const { return CharRanges; }
+
+ const SourceManager &getSourceManager() const { return SM; }
+
+private:
+ FileID ID;
+ SmallVector<CharSourceRange, 8> CharRanges;
+ SourceManager &SM;
+
+ // The order of these fields are important - they should be in the same order
+ // as they are created in `CreateVirtualEnvironment` so that they can be
+ // deleted in the reverse order as they are created.
+ std::unique_ptr<FileManager> FileMgr;
+ std::unique_ptr<SourceManager> VirtualSM;
+ std::unique_ptr<DiagnosticsEngine> Diagnostics;
+};
+
+class TokenAnalyzer : public UnwrappedLineConsumer {
+public:
+ TokenAnalyzer(const Environment &Env, const FormatStyle &Style);
+
+ tooling::Replacements process();
+
+protected:
+ virtual tooling::Replacements
+ analyze(TokenAnnotator &Annotator,
+ SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
+ FormatTokenLexer &Tokens) = 0;
+
+ void consumeUnwrappedLine(const UnwrappedLine &TheLine) override;
+
+ void finishRun() override;
+
+ FormatStyle Style;
+ // Stores Style, FileID and SourceManager etc.
+ const Environment &Env;
+ // AffectedRangeMgr stores ranges to be fixed.
+ AffectedRangeManager AffectedRangeMgr;
+ SmallVector<SmallVector<UnwrappedLine, 16>, 2> UnwrappedLines;
+ encoding::Encoding Encoding;
+};
+
+} // end namespace format
+} // end namespace clang
+
+#endif
diff --git a/contrib/llvm/tools/clang/lib/Format/TokenAnnotator.cpp b/contrib/llvm/tools/clang/lib/Format/TokenAnnotator.cpp
new file mode 100644
index 000000000000..505f42ec9a06
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Format/TokenAnnotator.cpp
@@ -0,0 +1,2794 @@
+//===--- TokenAnnotator.cpp - Format C++ code -----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements a token annotator, i.e. creates
+/// \c AnnotatedTokens out of \c FormatTokens with required extra information.
+///
+//===----------------------------------------------------------------------===//
+
+#include "TokenAnnotator.h"
+#include "clang/Basic/SourceManager.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "format-token-annotator"
+
+namespace clang {
+namespace format {
+
+namespace {
+
+/// \brief A parser that gathers additional information about tokens.
+///
+/// The \c TokenAnnotator tries to match parenthesis and square brakets and
+/// store a parenthesis levels. It also tries to resolve matching "<" and ">"
+/// into template parameter lists.
+class AnnotatingParser {
+public:
+ AnnotatingParser(const FormatStyle &Style, AnnotatedLine &Line,
+ const AdditionalKeywords &Keywords)
+ : Style(Style), Line(Line), CurrentToken(Line.First), AutoFound(false),
+ Keywords(Keywords) {
+ Contexts.push_back(Context(tok::unknown, 1, /*IsExpression=*/false));
+ resetTokenMetadata(CurrentToken);
+ }
+
+private:
+ bool parseAngle() {
+ if (!CurrentToken || !CurrentToken->Previous)
+ return false;
+ if (NonTemplateLess.count(CurrentToken->Previous))
+ return false;
+
+ const FormatToken& Previous = *CurrentToken->Previous;
+ if (Previous.Previous) {
+ if (Previous.Previous->Tok.isLiteral())
+ return false;
+ if (Previous.Previous->is(tok::r_paren) && Contexts.size() > 1 &&
+ (!Previous.Previous->MatchingParen ||
+ !Previous.Previous->MatchingParen->is(TT_OverloadedOperatorLParen)))
+ return false;
+ }
+
+ FormatToken *Left = CurrentToken->Previous;
+ Left->ParentBracket = Contexts.back().ContextKind;
+ ScopedContextCreator ContextCreator(*this, tok::less, 12);
+
+ // If this angle is in the context of an expression, we need to be more
+ // hesitant to detect it as opening template parameters.
+ bool InExprContext = Contexts.back().IsExpression;
+
+ Contexts.back().IsExpression = false;
+ // If there's a template keyword before the opening angle bracket, this is a
+ // template parameter, not an argument.
+ Contexts.back().InTemplateArgument =
+ Left->Previous && Left->Previous->Tok.isNot(tok::kw_template);
+
+ if (Style.Language == FormatStyle::LK_Java &&
+ CurrentToken->is(tok::question))
+ next();
+
+ while (CurrentToken) {
+ if (CurrentToken->is(tok::greater)) {
+ Left->MatchingParen = CurrentToken;
+ CurrentToken->MatchingParen = Left;
+ CurrentToken->Type = TT_TemplateCloser;
+ next();
+ return true;
+ }
+ if (CurrentToken->is(tok::question) &&
+ Style.Language == FormatStyle::LK_Java) {
+ next();
+ continue;
+ }
+ if (CurrentToken->isOneOf(tok::r_paren, tok::r_square, tok::r_brace) ||
+ (CurrentToken->isOneOf(tok::colon, tok::question) && InExprContext))
+ return false;
+ // If a && or || is found and interpreted as a binary operator, this set
+ // of angles is likely part of something like "a < b && c > d". If the
+ // angles are inside an expression, the ||/&& might also be a binary
+ // operator that was misinterpreted because we are parsing template
+ // parameters.
+ // FIXME: This is getting out of hand, write a decent parser.
+ if (CurrentToken->Previous->isOneOf(tok::pipepipe, tok::ampamp) &&
+ CurrentToken->Previous->is(TT_BinaryOperator) &&
+ Contexts[Contexts.size() - 2].IsExpression &&
+ !Line.startsWith(tok::kw_template))
+ return false;
+ updateParameterCount(Left, CurrentToken);
+ if (!consumeToken())
+ return false;
+ }
+ return false;
+ }
+
+ bool parseParens(bool LookForDecls = false) {
+ if (!CurrentToken)
+ return false;
+ FormatToken *Left = CurrentToken->Previous;
+ Left->ParentBracket = Contexts.back().ContextKind;
+ ScopedContextCreator ContextCreator(*this, tok::l_paren, 1);
+
+ // FIXME: This is a bit of a hack. Do better.
+ Contexts.back().ColonIsForRangeExpr =
+ Contexts.size() == 2 && Contexts[0].ColonIsForRangeExpr;
+
+ bool StartsObjCMethodExpr = false;
+ if (CurrentToken->is(tok::caret)) {
+ // (^ can start a block type.
+ Left->Type = TT_ObjCBlockLParen;
+ } else if (FormatToken *MaybeSel = Left->Previous) {
+ // @selector( starts a selector.
+ if (MaybeSel->isObjCAtKeyword(tok::objc_selector) && MaybeSel->Previous &&
+ MaybeSel->Previous->is(tok::at)) {
+ StartsObjCMethodExpr = true;
+ }
+ }
+
+ if (Left->is(TT_OverloadedOperatorLParen)) {
+ Contexts.back().IsExpression = false;
+ } else if (Style.Language == FormatStyle::LK_JavaScript &&
+ (Line.startsWith(Keywords.kw_type, tok::identifier) ||
+ Line.startsWith(tok::kw_export, Keywords.kw_type,
+ tok::identifier))) {
+ // type X = (...);
+ // export type X = (...);
+ Contexts.back().IsExpression = false;
+ } else if (Left->Previous &&
+ (Left->Previous->isOneOf(tok::kw_static_assert, tok::kw_decltype,
+ tok::kw_if, tok::kw_while, tok::l_paren,
+ tok::comma) ||
+ Left->Previous->endsSequence(tok::kw_constexpr, tok::kw_if) ||
+ Left->Previous->is(TT_BinaryOperator))) {
+ // static_assert, if and while usually contain expressions.
+ Contexts.back().IsExpression = true;
+ } else if (Style.Language == FormatStyle::LK_JavaScript && Left->Previous &&
+ (Left->Previous->is(Keywords.kw_function) ||
+ (Left->Previous->endsSequence(tok::identifier,
+ Keywords.kw_function)))) {
+ // function(...) or function f(...)
+ Contexts.back().IsExpression = false;
+ } else if (Style.Language == FormatStyle::LK_JavaScript && Left->Previous &&
+ Left->Previous->is(TT_JsTypeColon)) {
+ // let x: (SomeType);
+ Contexts.back().IsExpression = false;
+ } else if (Left->Previous && Left->Previous->is(tok::r_square) &&
+ Left->Previous->MatchingParen &&
+ Left->Previous->MatchingParen->is(TT_LambdaLSquare)) {
+ // This is a parameter list of a lambda expression.
+ Contexts.back().IsExpression = false;
+ } else if (Line.InPPDirective &&
+ (!Left->Previous || !Left->Previous->is(tok::identifier))) {
+ Contexts.back().IsExpression = true;
+ } else if (Contexts[Contexts.size() - 2].CaretFound) {
+ // This is the parameter list of an ObjC block.
+ Contexts.back().IsExpression = false;
+ } else if (Left->Previous && Left->Previous->is(tok::kw___attribute)) {
+ Left->Type = TT_AttributeParen;
+ } else if (Left->Previous && Left->Previous->is(TT_ForEachMacro)) {
+ // The first argument to a foreach macro is a declaration.
+ Contexts.back().IsForEachMacro = true;
+ Contexts.back().IsExpression = false;
+ } else if (Left->Previous && Left->Previous->MatchingParen &&
+ Left->Previous->MatchingParen->is(TT_ObjCBlockLParen)) {
+ Contexts.back().IsExpression = false;
+ } else if (!Line.MustBeDeclaration && !Line.InPPDirective) {
+ bool IsForOrCatch =
+ Left->Previous && Left->Previous->isOneOf(tok::kw_for, tok::kw_catch);
+ Contexts.back().IsExpression = !IsForOrCatch;
+ }
+
+ if (StartsObjCMethodExpr) {
+ Contexts.back().ColonIsObjCMethodExpr = true;
+ Left->Type = TT_ObjCMethodExpr;
+ }
+
+ bool MightBeFunctionType = !Contexts[Contexts.size() - 2].IsExpression;
+ bool ProbablyFunctionType = CurrentToken->isOneOf(tok::star, tok::amp);
+ bool HasMultipleLines = false;
+ bool HasMultipleParametersOnALine = false;
+ bool MightBeObjCForRangeLoop =
+ Left->Previous && Left->Previous->is(tok::kw_for);
+ while (CurrentToken) {
+ // LookForDecls is set when "if (" has been seen. Check for
+ // 'identifier' '*' 'identifier' followed by not '=' -- this
+ // '*' has to be a binary operator but determineStarAmpUsage() will
+ // categorize it as an unary operator, so set the right type here.
+ if (LookForDecls && CurrentToken->Next) {
+ FormatToken *Prev = CurrentToken->getPreviousNonComment();
+ if (Prev) {
+ FormatToken *PrevPrev = Prev->getPreviousNonComment();
+ FormatToken *Next = CurrentToken->Next;
+ if (PrevPrev && PrevPrev->is(tok::identifier) &&
+ Prev->isOneOf(tok::star, tok::amp, tok::ampamp) &&
+ CurrentToken->is(tok::identifier) && Next->isNot(tok::equal)) {
+ Prev->Type = TT_BinaryOperator;
+ LookForDecls = false;
+ }
+ }
+ }
+
+ if (CurrentToken->Previous->is(TT_PointerOrReference) &&
+ CurrentToken->Previous->Previous->isOneOf(tok::l_paren,
+ tok::coloncolon))
+ ProbablyFunctionType = true;
+ if (CurrentToken->is(tok::comma))
+ MightBeFunctionType = false;
+ if (CurrentToken->Previous->is(TT_BinaryOperator))
+ Contexts.back().IsExpression = true;
+ if (CurrentToken->is(tok::r_paren)) {
+ if (MightBeFunctionType && ProbablyFunctionType && CurrentToken->Next &&
+ (CurrentToken->Next->is(tok::l_paren) ||
+ (CurrentToken->Next->is(tok::l_square) && Line.MustBeDeclaration)))
+ Left->Type = TT_FunctionTypeLParen;
+ Left->MatchingParen = CurrentToken;
+ CurrentToken->MatchingParen = Left;
+
+ if (CurrentToken->Next && CurrentToken->Next->is(tok::l_brace) &&
+ Left->Previous && Left->Previous->is(tok::l_paren)) {
+ // Detect the case where macros are used to generate lambdas or
+ // function bodies, e.g.:
+ // auto my_lambda = MARCO((Type *type, int i) { .. body .. });
+ for (FormatToken *Tok = Left; Tok != CurrentToken; Tok = Tok->Next) {
+ if (Tok->is(TT_BinaryOperator) &&
+ Tok->isOneOf(tok::star, tok::amp, tok::ampamp))
+ Tok->Type = TT_PointerOrReference;
+ }
+ }
+
+ if (StartsObjCMethodExpr) {
+ CurrentToken->Type = TT_ObjCMethodExpr;
+ if (Contexts.back().FirstObjCSelectorName) {
+ Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
+ Contexts.back().LongestObjCSelectorName;
+ }
+ }
+
+ if (Left->is(TT_AttributeParen))
+ CurrentToken->Type = TT_AttributeParen;
+ if (Left->Previous && Left->Previous->is(TT_JavaAnnotation))
+ CurrentToken->Type = TT_JavaAnnotation;
+ if (Left->Previous && Left->Previous->is(TT_LeadingJavaAnnotation))
+ CurrentToken->Type = TT_LeadingJavaAnnotation;
+
+ if (!HasMultipleLines)
+ Left->PackingKind = PPK_Inconclusive;
+ else if (HasMultipleParametersOnALine)
+ Left->PackingKind = PPK_BinPacked;
+ else
+ Left->PackingKind = PPK_OnePerLine;
+
+ next();
+ return true;
+ }
+ if (CurrentToken->isOneOf(tok::r_square, tok::r_brace))
+ return false;
+
+ if (CurrentToken->is(tok::l_brace))
+ Left->Type = TT_Unknown; // Not TT_ObjCBlockLParen
+ if (CurrentToken->is(tok::comma) && CurrentToken->Next &&
+ !CurrentToken->Next->HasUnescapedNewline &&
+ !CurrentToken->Next->isTrailingComment())
+ HasMultipleParametersOnALine = true;
+ if ((CurrentToken->Previous->isOneOf(tok::kw_const, tok::kw_auto) ||
+ CurrentToken->Previous->isSimpleTypeSpecifier()) &&
+ !CurrentToken->is(tok::l_brace))
+ Contexts.back().IsExpression = false;
+ if (CurrentToken->isOneOf(tok::semi, tok::colon))
+ MightBeObjCForRangeLoop = false;
+ if (MightBeObjCForRangeLoop && CurrentToken->is(Keywords.kw_in))
+ CurrentToken->Type = TT_ObjCForIn;
+ // When we discover a 'new', we set CanBeExpression to 'false' in order to
+ // parse the type correctly. Reset that after a comma.
+ if (CurrentToken->is(tok::comma))
+ Contexts.back().CanBeExpression = true;
+
+ FormatToken *Tok = CurrentToken;
+ if (!consumeToken())
+ return false;
+ updateParameterCount(Left, Tok);
+ if (CurrentToken && CurrentToken->HasUnescapedNewline)
+ HasMultipleLines = true;
+ }
+ return false;
+ }
+
+ bool parseSquare() {
+ if (!CurrentToken)
+ return false;
+
+ // A '[' could be an index subscript (after an identifier or after
+ // ')' or ']'), it could be the start of an Objective-C method
+ // expression, or it could the start of an Objective-C array literal.
+ FormatToken *Left = CurrentToken->Previous;
+ Left->ParentBracket = Contexts.back().ContextKind;
+ FormatToken *Parent = Left->getPreviousNonComment();
+
+ // Cases where '>' is followed by '['.
+ // In C++, this can happen either in array of templates (foo<int>[10])
+ // or when array is a nested template type (unique_ptr<type1<type2>[]>).
+ bool CppArrayTemplates =
+ Style.isCpp() && Parent &&
+ Parent->is(TT_TemplateCloser) &&
+ (Contexts.back().CanBeExpression || Contexts.back().IsExpression ||
+ Contexts.back().InTemplateArgument);
+
+ bool StartsObjCMethodExpr =
+ !CppArrayTemplates && Style.isCpp() &&
+ Contexts.back().CanBeExpression && Left->isNot(TT_LambdaLSquare) &&
+ CurrentToken->isNot(tok::l_brace) &&
+ (!Parent ||
+ Parent->isOneOf(tok::colon, tok::l_square, tok::l_paren,
+ tok::kw_return, tok::kw_throw) ||
+ Parent->isUnaryOperator() ||
+ Parent->isOneOf(TT_ObjCForIn, TT_CastRParen) ||
+ getBinOpPrecedence(Parent->Tok.getKind(), true, true) > prec::Unknown);
+ bool ColonFound = false;
+
+ unsigned BindingIncrease = 1;
+ if (Left->is(TT_Unknown)) {
+ if (StartsObjCMethodExpr) {
+ Left->Type = TT_ObjCMethodExpr;
+ } else if (Style.Language == FormatStyle::LK_JavaScript && Parent &&
+ Contexts.back().ContextKind == tok::l_brace &&
+ Parent->isOneOf(tok::l_brace, tok::comma)) {
+ Left->Type = TT_JsComputedPropertyName;
+ } else if (Style.isCpp() && Contexts.back().ContextKind == tok::l_brace &&
+ Parent && Parent->isOneOf(tok::l_brace, tok::comma)) {
+ Left->Type = TT_DesignatedInitializerLSquare;
+ } else if (CurrentToken->is(tok::r_square) && Parent &&
+ Parent->is(TT_TemplateCloser)) {
+ Left->Type = TT_ArraySubscriptLSquare;
+ } else if (Style.Language == FormatStyle::LK_Proto ||
+ (!CppArrayTemplates && Parent &&
+ Parent->isOneOf(TT_BinaryOperator, TT_TemplateCloser, tok::at,
+ tok::comma, tok::l_paren, tok::l_square,
+ tok::question, tok::colon, tok::kw_return,
+ // Should only be relevant to JavaScript:
+ tok::kw_default))) {
+ Left->Type = TT_ArrayInitializerLSquare;
+ } else {
+ BindingIncrease = 10;
+ Left->Type = TT_ArraySubscriptLSquare;
+ }
+ }
+
+ ScopedContextCreator ContextCreator(*this, tok::l_square, BindingIncrease);
+ Contexts.back().IsExpression = true;
+ Contexts.back().ColonIsObjCMethodExpr = StartsObjCMethodExpr;
+
+ while (CurrentToken) {
+ if (CurrentToken->is(tok::r_square)) {
+ if (CurrentToken->Next && CurrentToken->Next->is(tok::l_paren) &&
+ Left->is(TT_ObjCMethodExpr)) {
+ // An ObjC method call is rarely followed by an open parenthesis.
+ // FIXME: Do we incorrectly label ":" with this?
+ StartsObjCMethodExpr = false;
+ Left->Type = TT_Unknown;
+ }
+ if (StartsObjCMethodExpr && CurrentToken->Previous != Left) {
+ CurrentToken->Type = TT_ObjCMethodExpr;
+ // determineStarAmpUsage() thinks that '*' '[' is allocating an
+ // array of pointers, but if '[' starts a selector then '*' is a
+ // binary operator.
+ if (Parent && Parent->is(TT_PointerOrReference))
+ Parent->Type = TT_BinaryOperator;
+ }
+ Left->MatchingParen = CurrentToken;
+ CurrentToken->MatchingParen = Left;
+ if (Contexts.back().FirstObjCSelectorName) {
+ Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
+ Contexts.back().LongestObjCSelectorName;
+ if (Left->BlockParameterCount > 1)
+ Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 0;
+ }
+ next();
+ return true;
+ }
+ if (CurrentToken->isOneOf(tok::r_paren, tok::r_brace))
+ return false;
+ if (CurrentToken->is(tok::colon)) {
+ if (Left->isOneOf(TT_ArraySubscriptLSquare,
+ TT_DesignatedInitializerLSquare)) {
+ Left->Type = TT_ObjCMethodExpr;
+ StartsObjCMethodExpr = true;
+ Contexts.back().ColonIsObjCMethodExpr = true;
+ if (Parent && Parent->is(tok::r_paren))
+ Parent->Type = TT_CastRParen;
+ }
+ ColonFound = true;
+ }
+ if (CurrentToken->is(tok::comma) && Left->is(TT_ObjCMethodExpr) &&
+ !ColonFound)
+ Left->Type = TT_ArrayInitializerLSquare;
+ FormatToken *Tok = CurrentToken;
+ if (!consumeToken())
+ return false;
+ updateParameterCount(Left, Tok);
+ }
+ return false;
+ }
+
+ bool parseBrace() {
+ if (CurrentToken) {
+ FormatToken *Left = CurrentToken->Previous;
+ Left->ParentBracket = Contexts.back().ContextKind;
+
+ if (Contexts.back().CaretFound)
+ Left->Type = TT_ObjCBlockLBrace;
+ Contexts.back().CaretFound = false;
+
+ ScopedContextCreator ContextCreator(*this, tok::l_brace, 1);
+ Contexts.back().ColonIsDictLiteral = true;
+ if (Left->BlockKind == BK_BracedInit)
+ Contexts.back().IsExpression = true;
+
+ while (CurrentToken) {
+ if (CurrentToken->is(tok::r_brace)) {
+ Left->MatchingParen = CurrentToken;
+ CurrentToken->MatchingParen = Left;
+ next();
+ return true;
+ }
+ if (CurrentToken->isOneOf(tok::r_paren, tok::r_square))
+ return false;
+ updateParameterCount(Left, CurrentToken);
+ if (CurrentToken->isOneOf(tok::colon, tok::l_brace)) {
+ FormatToken *Previous = CurrentToken->getPreviousNonComment();
+ if (((CurrentToken->is(tok::colon) &&
+ (!Contexts.back().ColonIsDictLiteral || !Style.isCpp())) ||
+ Style.Language == FormatStyle::LK_Proto) &&
+ (Previous->Tok.getIdentifierInfo() ||
+ Previous->is(tok::string_literal)))
+ Previous->Type = TT_SelectorName;
+ if (CurrentToken->is(tok::colon) ||
+ Style.Language == FormatStyle::LK_JavaScript)
+ Left->Type = TT_DictLiteral;
+ }
+ if (CurrentToken->is(tok::comma) &&
+ Style.Language == FormatStyle::LK_JavaScript)
+ Left->Type = TT_DictLiteral;
+ if (!consumeToken())
+ return false;
+ }
+ }
+ return true;
+ }
+
+ void updateParameterCount(FormatToken *Left, FormatToken *Current) {
+ if (Current->is(tok::l_brace) && Current->BlockKind == BK_Block)
+ ++Left->BlockParameterCount;
+ if (Current->is(tok::comma)) {
+ ++Left->ParameterCount;
+ if (!Left->Role)
+ Left->Role.reset(new CommaSeparatedList(Style));
+ Left->Role->CommaFound(Current);
+ } else if (Left->ParameterCount == 0 && Current->isNot(tok::comment)) {
+ Left->ParameterCount = 1;
+ }
+ }
+
+ bool parseConditional() {
+ while (CurrentToken) {
+ if (CurrentToken->is(tok::colon)) {
+ CurrentToken->Type = TT_ConditionalExpr;
+ next();
+ return true;
+ }
+ if (!consumeToken())
+ return false;
+ }
+ return false;
+ }
+
+ bool parseTemplateDeclaration() {
+ if (CurrentToken && CurrentToken->is(tok::less)) {
+ CurrentToken->Type = TT_TemplateOpener;
+ next();
+ if (!parseAngle())
+ return false;
+ if (CurrentToken)
+ CurrentToken->Previous->ClosesTemplateDeclaration = true;
+ return true;
+ }
+ return false;
+ }
+
+ bool consumeToken() {
+ FormatToken *Tok = CurrentToken;
+ next();
+ switch (Tok->Tok.getKind()) {
+ case tok::plus:
+ case tok::minus:
+ if (!Tok->Previous && Line.MustBeDeclaration)
+ Tok->Type = TT_ObjCMethodSpecifier;
+ break;
+ case tok::colon:
+ if (!Tok->Previous)
+ return false;
+ // Colons from ?: are handled in parseConditional().
+ if (Style.Language == FormatStyle::LK_JavaScript) {
+ if (Contexts.back().ColonIsForRangeExpr || // colon in for loop
+ (Contexts.size() == 1 && // switch/case labels
+ !Line.First->isOneOf(tok::kw_enum, tok::kw_case)) ||
+ Contexts.back().ContextKind == tok::l_paren || // function params
+ Contexts.back().ContextKind == tok::l_square || // array type
+ (Contexts.size() == 1 &&
+ Line.MustBeDeclaration)) { // method/property declaration
+ Tok->Type = TT_JsTypeColon;
+ break;
+ }
+ }
+ if (Contexts.back().ColonIsDictLiteral ||
+ Style.Language == FormatStyle::LK_Proto) {
+ Tok->Type = TT_DictLiteral;
+ } else if (Contexts.back().ColonIsObjCMethodExpr ||
+ Line.startsWith(TT_ObjCMethodSpecifier)) {
+ Tok->Type = TT_ObjCMethodExpr;
+ const FormatToken *BeforePrevious = Tok->Previous->Previous;
+ if (!BeforePrevious ||
+ !(BeforePrevious->is(TT_CastRParen) ||
+ (BeforePrevious->is(TT_ObjCMethodExpr) &&
+ BeforePrevious->is(tok::colon))) ||
+ BeforePrevious->is(tok::r_square) ||
+ Contexts.back().LongestObjCSelectorName == 0) {
+ Tok->Previous->Type = TT_SelectorName;
+ if (Tok->Previous->ColumnWidth >
+ Contexts.back().LongestObjCSelectorName)
+ Contexts.back().LongestObjCSelectorName =
+ Tok->Previous->ColumnWidth;
+ if (!Contexts.back().FirstObjCSelectorName)
+ Contexts.back().FirstObjCSelectorName = Tok->Previous;
+ }
+ } else if (Contexts.back().ColonIsForRangeExpr) {
+ Tok->Type = TT_RangeBasedForLoopColon;
+ } else if (CurrentToken && CurrentToken->is(tok::numeric_constant)) {
+ Tok->Type = TT_BitFieldColon;
+ } else if (Contexts.size() == 1 &&
+ !Line.First->isOneOf(tok::kw_enum, tok::kw_case)) {
+ if (Tok->getPreviousNonComment()->isOneOf(tok::r_paren,
+ tok::kw_noexcept))
+ Tok->Type = TT_CtorInitializerColon;
+ else
+ Tok->Type = TT_InheritanceColon;
+ } else if (Tok->Previous->is(tok::identifier) && Tok->Next &&
+ Tok->Next->isOneOf(tok::r_paren, tok::comma)) {
+ // This handles a special macro in ObjC code where selectors including
+ // the colon are passed as macro arguments.
+ Tok->Type = TT_ObjCMethodExpr;
+ } else if (Contexts.back().ContextKind == tok::l_paren) {
+ Tok->Type = TT_InlineASMColon;
+ }
+ break;
+ case tok::pipe:
+ case tok::amp:
+ // | and & in declarations/type expressions represent union and
+ // intersection types, respectively.
+ if (Style.Language == FormatStyle::LK_JavaScript &&
+ !Contexts.back().IsExpression)
+ Tok->Type = TT_JsTypeOperator;
+ break;
+ case tok::kw_if:
+ case tok::kw_while:
+ if (Tok->is(tok::kw_if) && CurrentToken && CurrentToken->is(tok::kw_constexpr))
+ next();
+ if (CurrentToken && CurrentToken->is(tok::l_paren)) {
+ next();
+ if (!parseParens(/*LookForDecls=*/true))
+ return false;
+ }
+ break;
+ case tok::kw_for:
+ if (Style.Language == FormatStyle::LK_JavaScript) {
+ if (Tok->Previous && Tok->Previous->is(tok::period))
+ break;
+ // JS' for await ( ...
+ if (CurrentToken && CurrentToken->is(Keywords.kw_await))
+ next();
+ }
+ Contexts.back().ColonIsForRangeExpr = true;
+ next();
+ if (!parseParens())
+ return false;
+ break;
+ case tok::l_paren:
+ // When faced with 'operator()()', the kw_operator handler incorrectly
+ // marks the first l_paren as a OverloadedOperatorLParen. Here, we make
+ // the first two parens OverloadedOperators and the second l_paren an
+ // OverloadedOperatorLParen.
+ if (Tok->Previous &&
+ Tok->Previous->is(tok::r_paren) &&
+ Tok->Previous->MatchingParen &&
+ Tok->Previous->MatchingParen->is(TT_OverloadedOperatorLParen)) {
+ Tok->Previous->Type = TT_OverloadedOperator;
+ Tok->Previous->MatchingParen->Type = TT_OverloadedOperator;
+ Tok->Type = TT_OverloadedOperatorLParen;
+ }
+
+ if (!parseParens())
+ return false;
+ if (Line.MustBeDeclaration && Contexts.size() == 1 &&
+ !Contexts.back().IsExpression && !Line.startsWith(TT_ObjCProperty) &&
+ (!Tok->Previous ||
+ !Tok->Previous->isOneOf(tok::kw_decltype, tok::kw___attribute,
+ TT_LeadingJavaAnnotation)))
+ Line.MightBeFunctionDecl = true;
+ break;
+ case tok::l_square:
+ if (!parseSquare())
+ return false;
+ break;
+ case tok::l_brace:
+ if (!parseBrace())
+ return false;
+ break;
+ case tok::less:
+ if (parseAngle()) {
+ Tok->Type = TT_TemplateOpener;
+ } else {
+ Tok->Type = TT_BinaryOperator;
+ NonTemplateLess.insert(Tok);
+ CurrentToken = Tok;
+ next();
+ }
+ break;
+ case tok::r_paren:
+ case tok::r_square:
+ return false;
+ case tok::r_brace:
+ // Lines can start with '}'.
+ if (Tok->Previous)
+ return false;
+ break;
+ case tok::greater:
+ Tok->Type = TT_BinaryOperator;
+ break;
+ case tok::kw_operator:
+ while (CurrentToken &&
+ !CurrentToken->isOneOf(tok::l_paren, tok::semi, tok::r_paren)) {
+ if (CurrentToken->isOneOf(tok::star, tok::amp))
+ CurrentToken->Type = TT_PointerOrReference;
+ consumeToken();
+ if (CurrentToken &&
+ CurrentToken->Previous->isOneOf(TT_BinaryOperator, tok::comma))
+ CurrentToken->Previous->Type = TT_OverloadedOperator;
+ }
+ if (CurrentToken) {
+ CurrentToken->Type = TT_OverloadedOperatorLParen;
+ if (CurrentToken->Previous->is(TT_BinaryOperator))
+ CurrentToken->Previous->Type = TT_OverloadedOperator;
+ }
+ break;
+ case tok::question:
+ if (Style.Language == FormatStyle::LK_JavaScript && Tok->Next &&
+ Tok->Next->isOneOf(tok::semi, tok::comma, tok::colon, tok::r_paren,
+ tok::r_brace)) {
+ // Question marks before semicolons, colons, etc. indicate optional
+ // types (fields, parameters), e.g.
+ // function(x?: string, y?) {...}
+ // class X { y?; }
+ Tok->Type = TT_JsTypeOptionalQuestion;
+ break;
+ }
+ // Declarations cannot be conditional expressions, this can only be part
+ // of a type declaration.
+ if (Line.MustBeDeclaration && !Contexts.back().IsExpression &&
+ Style.Language == FormatStyle::LK_JavaScript)
+ break;
+ parseConditional();
+ break;
+ case tok::kw_template:
+ parseTemplateDeclaration();
+ break;
+ case tok::comma:
+ if (Contexts.back().InCtorInitializer)
+ Tok->Type = TT_CtorInitializerComma;
+ else if (Contexts.back().InInheritanceList)
+ Tok->Type = TT_InheritanceComma;
+ else if (Contexts.back().FirstStartOfName &&
+ (Contexts.size() == 1 || Line.startsWith(tok::kw_for))) {
+ Contexts.back().FirstStartOfName->PartOfMultiVariableDeclStmt = true;
+ Line.IsMultiVariableDeclStmt = true;
+ }
+ if (Contexts.back().IsForEachMacro)
+ Contexts.back().IsExpression = true;
+ break;
+ case tok::identifier:
+ if (Tok->isOneOf(Keywords.kw___has_include,
+ Keywords.kw___has_include_next)) {
+ parseHasInclude();
+ }
+ break;
+ default:
+ break;
+ }
+ return true;
+ }
+
+ void parseIncludeDirective() {
+ if (CurrentToken && CurrentToken->is(tok::less)) {
+ next();
+ while (CurrentToken) {
+ // Mark tokens up to the trailing line comments as implicit string
+ // literals.
+ if (CurrentToken->isNot(tok::comment) &&
+ !CurrentToken->TokenText.startswith("//"))
+ CurrentToken->Type = TT_ImplicitStringLiteral;
+ next();
+ }
+ }
+ }
+
+ void parseWarningOrError() {
+ next();
+ // We still want to format the whitespace left of the first token of the
+ // warning or error.
+ next();
+ while (CurrentToken) {
+ CurrentToken->Type = TT_ImplicitStringLiteral;
+ next();
+ }
+ }
+
+ void parsePragma() {
+ next(); // Consume "pragma".
+ if (CurrentToken &&
+ CurrentToken->isOneOf(Keywords.kw_mark, Keywords.kw_option)) {
+ bool IsMark = CurrentToken->is(Keywords.kw_mark);
+ next(); // Consume "mark".
+ next(); // Consume first token (so we fix leading whitespace).
+ while (CurrentToken) {
+ if (IsMark || CurrentToken->Previous->is(TT_BinaryOperator))
+ CurrentToken->Type = TT_ImplicitStringLiteral;
+ next();
+ }
+ }
+ }
+
+ void parseHasInclude() {
+ if (!CurrentToken || !CurrentToken->is(tok::l_paren))
+ return;
+ next(); // '('
+ parseIncludeDirective();
+ next(); // ')'
+ }
+
+ LineType parsePreprocessorDirective() {
+ bool IsFirstToken = CurrentToken->IsFirst;
+ LineType Type = LT_PreprocessorDirective;
+ next();
+ if (!CurrentToken)
+ return Type;
+
+ if (Style.Language == FormatStyle::LK_JavaScript && IsFirstToken) {
+ // JavaScript files can contain shebang lines of the form:
+ // #!/usr/bin/env node
+ // Treat these like C++ #include directives.
+ while (CurrentToken) {
+ // Tokens cannot be comments here.
+ CurrentToken->Type = TT_ImplicitStringLiteral;
+ next();
+ }
+ return LT_ImportStatement;
+ }
+
+ if (CurrentToken->Tok.is(tok::numeric_constant)) {
+ CurrentToken->SpacesRequiredBefore = 1;
+ return Type;
+ }
+ // Hashes in the middle of a line can lead to any strange token
+ // sequence.
+ if (!CurrentToken->Tok.getIdentifierInfo())
+ return Type;
+ switch (CurrentToken->Tok.getIdentifierInfo()->getPPKeywordID()) {
+ case tok::pp_include:
+ case tok::pp_include_next:
+ case tok::pp_import:
+ next();
+ parseIncludeDirective();
+ Type = LT_ImportStatement;
+ break;
+ case tok::pp_error:
+ case tok::pp_warning:
+ parseWarningOrError();
+ break;
+ case tok::pp_pragma:
+ parsePragma();
+ break;
+ case tok::pp_if:
+ case tok::pp_elif:
+ Contexts.back().IsExpression = true;
+ parseLine();
+ break;
+ default:
+ break;
+ }
+ while (CurrentToken) {
+ FormatToken *Tok = CurrentToken;
+ next();
+ if (Tok->is(tok::l_paren))
+ parseParens();
+ else if (Tok->isOneOf(Keywords.kw___has_include,
+ Keywords.kw___has_include_next))
+ parseHasInclude();
+ }
+ return Type;
+ }
+
+public:
+ LineType parseLine() {
+ NonTemplateLess.clear();
+ if (CurrentToken->is(tok::hash))
+ return parsePreprocessorDirective();
+
+ // Directly allow to 'import <string-literal>' to support protocol buffer
+ // definitions (code.google.com/p/protobuf) or missing "#" (either way we
+ // should not break the line).
+ IdentifierInfo *Info = CurrentToken->Tok.getIdentifierInfo();
+ if ((Style.Language == FormatStyle::LK_Java &&
+ CurrentToken->is(Keywords.kw_package)) ||
+ (Info && Info->getPPKeywordID() == tok::pp_import &&
+ CurrentToken->Next &&
+ CurrentToken->Next->isOneOf(tok::string_literal, tok::identifier,
+ tok::kw_static))) {
+ next();
+ parseIncludeDirective();
+ return LT_ImportStatement;
+ }
+
+ // If this line starts and ends in '<' and '>', respectively, it is likely
+ // part of "#define <a/b.h>".
+ if (CurrentToken->is(tok::less) && Line.Last->is(tok::greater)) {
+ parseIncludeDirective();
+ return LT_ImportStatement;
+ }
+
+ // In .proto files, top-level options are very similar to import statements
+ // and should not be line-wrapped.
+ if (Style.Language == FormatStyle::LK_Proto && Line.Level == 0 &&
+ CurrentToken->is(Keywords.kw_option)) {
+ next();
+ if (CurrentToken && CurrentToken->is(tok::identifier))
+ return LT_ImportStatement;
+ }
+
+ bool KeywordVirtualFound = false;
+ bool ImportStatement = false;
+
+ // import {...} from '...';
+ if (Style.Language == FormatStyle::LK_JavaScript &&
+ CurrentToken->is(Keywords.kw_import))
+ ImportStatement = true;
+
+ while (CurrentToken) {
+ if (CurrentToken->is(tok::kw_virtual))
+ KeywordVirtualFound = true;
+ if (Style.Language == FormatStyle::LK_JavaScript) {
+ // export {...} from '...';
+ // An export followed by "from 'some string';" is a re-export from
+ // another module identified by a URI and is treated as a
+ // LT_ImportStatement (i.e. prevent wraps on it for long URIs).
+ // Just "export {...};" or "export class ..." should not be treated as
+ // an import in this sense.
+ if (Line.First->is(tok::kw_export) &&
+ CurrentToken->is(Keywords.kw_from) && CurrentToken->Next &&
+ CurrentToken->Next->isStringLiteral())
+ ImportStatement = true;
+ if (isClosureImportStatement(*CurrentToken))
+ ImportStatement = true;
+ }
+ if (!consumeToken())
+ return LT_Invalid;
+ }
+ if (KeywordVirtualFound)
+ return LT_VirtualFunctionDecl;
+ if (ImportStatement)
+ return LT_ImportStatement;
+
+ if (Line.startsWith(TT_ObjCMethodSpecifier)) {
+ if (Contexts.back().FirstObjCSelectorName)
+ Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
+ Contexts.back().LongestObjCSelectorName;
+ return LT_ObjCMethodDecl;
+ }
+
+ return LT_Other;
+ }
+
+private:
+ bool isClosureImportStatement(const FormatToken &Tok) {
+ // FIXME: Closure-library specific stuff should not be hard-coded but be
+ // configurable.
+ return Tok.TokenText == "goog" && Tok.Next && Tok.Next->is(tok::period) &&
+ Tok.Next->Next && (Tok.Next->Next->TokenText == "module" ||
+ Tok.Next->Next->TokenText == "provide" ||
+ Tok.Next->Next->TokenText == "require" ||
+ Tok.Next->Next->TokenText == "setTestOnly" ||
+ Tok.Next->Next->TokenText == "forwardDeclare") &&
+ Tok.Next->Next->Next && Tok.Next->Next->Next->is(tok::l_paren);
+ }
+
+ void resetTokenMetadata(FormatToken *Token) {
+ if (!Token)
+ return;
+
+ // Reset token type in case we have already looked at it and then
+ // recovered from an error (e.g. failure to find the matching >).
+ if (!CurrentToken->isOneOf(TT_LambdaLSquare, TT_ForEachMacro,
+ TT_FunctionLBrace, TT_ImplicitStringLiteral,
+ TT_InlineASMBrace, TT_JsFatArrow, TT_LambdaArrow,
+ TT_OverloadedOperator, TT_RegexLiteral,
+ TT_TemplateString, TT_ObjCStringLiteral))
+ CurrentToken->Type = TT_Unknown;
+ CurrentToken->Role.reset();
+ CurrentToken->MatchingParen = nullptr;
+ CurrentToken->FakeLParens.clear();
+ CurrentToken->FakeRParens = 0;
+ }
+
+ void next() {
+ if (CurrentToken) {
+ CurrentToken->NestingLevel = Contexts.size() - 1;
+ CurrentToken->BindingStrength = Contexts.back().BindingStrength;
+ modifyContext(*CurrentToken);
+ determineTokenType(*CurrentToken);
+ CurrentToken = CurrentToken->Next;
+ }
+
+ resetTokenMetadata(CurrentToken);
+ }
+
+ /// \brief A struct to hold information valid in a specific context, e.g.
+ /// a pair of parenthesis.
+ struct Context {
+ Context(tok::TokenKind ContextKind, unsigned BindingStrength,
+ bool IsExpression)
+ : ContextKind(ContextKind), BindingStrength(BindingStrength),
+ IsExpression(IsExpression) {}
+
+ tok::TokenKind ContextKind;
+ unsigned BindingStrength;
+ bool IsExpression;
+ unsigned LongestObjCSelectorName = 0;
+ bool ColonIsForRangeExpr = false;
+ bool ColonIsDictLiteral = false;
+ bool ColonIsObjCMethodExpr = false;
+ FormatToken *FirstObjCSelectorName = nullptr;
+ FormatToken *FirstStartOfName = nullptr;
+ bool CanBeExpression = true;
+ bool InTemplateArgument = false;
+ bool InCtorInitializer = false;
+ bool InInheritanceList = false;
+ bool CaretFound = false;
+ bool IsForEachMacro = false;
+ };
+
+ /// \brief Puts a new \c Context onto the stack \c Contexts for the lifetime
+ /// of each instance.
+ struct ScopedContextCreator {
+ AnnotatingParser &P;
+
+ ScopedContextCreator(AnnotatingParser &P, tok::TokenKind ContextKind,
+ unsigned Increase)
+ : P(P) {
+ P.Contexts.push_back(Context(ContextKind,
+ P.Contexts.back().BindingStrength + Increase,
+ P.Contexts.back().IsExpression));
+ }
+
+ ~ScopedContextCreator() { P.Contexts.pop_back(); }
+ };
+
+ void modifyContext(const FormatToken &Current) {
+ if (Current.getPrecedence() == prec::Assignment &&
+ !Line.First->isOneOf(tok::kw_template, tok::kw_using, tok::kw_return) &&
+ // Type aliases use `type X = ...;` in TypeScript and can be exported
+ // using `export type ...`.
+ !(Style.Language == FormatStyle::LK_JavaScript &&
+ (Line.startsWith(Keywords.kw_type, tok::identifier) ||
+ Line.startsWith(tok::kw_export, Keywords.kw_type,
+ tok::identifier))) &&
+ (!Current.Previous || Current.Previous->isNot(tok::kw_operator))) {
+ Contexts.back().IsExpression = true;
+ if (!Line.startsWith(TT_UnaryOperator)) {
+ for (FormatToken *Previous = Current.Previous;
+ Previous && Previous->Previous &&
+ !Previous->Previous->isOneOf(tok::comma, tok::semi);
+ Previous = Previous->Previous) {
+ if (Previous->isOneOf(tok::r_square, tok::r_paren)) {
+ Previous = Previous->MatchingParen;
+ if (!Previous)
+ break;
+ }
+ if (Previous->opensScope())
+ break;
+ if (Previous->isOneOf(TT_BinaryOperator, TT_UnaryOperator) &&
+ Previous->isOneOf(tok::star, tok::amp, tok::ampamp) &&
+ Previous->Previous && Previous->Previous->isNot(tok::equal))
+ Previous->Type = TT_PointerOrReference;
+ }
+ }
+ } else if (Current.is(tok::lessless) &&
+ (!Current.Previous || !Current.Previous->is(tok::kw_operator))) {
+ Contexts.back().IsExpression = true;
+ } else if (Current.isOneOf(tok::kw_return, tok::kw_throw)) {
+ Contexts.back().IsExpression = true;
+ } else if (Current.is(TT_TrailingReturnArrow)) {
+ Contexts.back().IsExpression = false;
+ } else if (Current.is(TT_LambdaArrow) || Current.is(Keywords.kw_assert)) {
+ Contexts.back().IsExpression = Style.Language == FormatStyle::LK_Java;
+ } else if (Current.Previous &&
+ Current.Previous->is(TT_CtorInitializerColon)) {
+ Contexts.back().IsExpression = true;
+ Contexts.back().InCtorInitializer = true;
+ } else if (Current.Previous &&
+ Current.Previous->is(TT_InheritanceColon)) {
+ Contexts.back().InInheritanceList = true;
+ } else if (Current.isOneOf(tok::r_paren, tok::greater, tok::comma)) {
+ for (FormatToken *Previous = Current.Previous;
+ Previous && Previous->isOneOf(tok::star, tok::amp);
+ Previous = Previous->Previous)
+ Previous->Type = TT_PointerOrReference;
+ if (Line.MustBeDeclaration && !Contexts.front().InCtorInitializer)
+ Contexts.back().IsExpression = false;
+ } else if (Current.is(tok::kw_new)) {
+ Contexts.back().CanBeExpression = false;
+ } else if (Current.isOneOf(tok::semi, tok::exclaim)) {
+ // This should be the condition or increment in a for-loop.
+ Contexts.back().IsExpression = true;
+ }
+ }
+
+ void determineTokenType(FormatToken &Current) {
+ if (!Current.is(TT_Unknown))
+ // The token type is already known.
+ return;
+
+ if (Style.Language == FormatStyle::LK_JavaScript) {
+ if (Current.is(tok::exclaim)) {
+ if (Current.Previous &&
+ (Current.Previous->isOneOf(tok::identifier, tok::kw_namespace,
+ tok::r_paren, tok::r_square,
+ tok::r_brace) ||
+ Current.Previous->Tok.isLiteral())) {
+ Current.Type = TT_JsNonNullAssertion;
+ return;
+ }
+ if (Current.Next &&
+ Current.Next->isOneOf(TT_BinaryOperator, Keywords.kw_as)) {
+ Current.Type = TT_JsNonNullAssertion;
+ return;
+ }
+ }
+ }
+
+ // Line.MightBeFunctionDecl can only be true after the parentheses of a
+ // function declaration have been found. In this case, 'Current' is a
+ // trailing token of this declaration and thus cannot be a name.
+ if (Current.is(Keywords.kw_instanceof)) {
+ Current.Type = TT_BinaryOperator;
+ } else if (isStartOfName(Current) &&
+ (!Line.MightBeFunctionDecl || Current.NestingLevel != 0)) {
+ Contexts.back().FirstStartOfName = &Current;
+ Current.Type = TT_StartOfName;
+ } else if (Current.isOneOf(tok::kw_auto, tok::kw___auto_type)) {
+ AutoFound = true;
+ } else if (Current.is(tok::arrow) &&
+ Style.Language == FormatStyle::LK_Java) {
+ Current.Type = TT_LambdaArrow;
+ } else if (Current.is(tok::arrow) && AutoFound && Line.MustBeDeclaration &&
+ Current.NestingLevel == 0) {
+ Current.Type = TT_TrailingReturnArrow;
+ } else if (Current.isOneOf(tok::star, tok::amp, tok::ampamp)) {
+ Current.Type =
+ determineStarAmpUsage(Current, Contexts.back().CanBeExpression &&
+ Contexts.back().IsExpression,
+ Contexts.back().InTemplateArgument);
+ } else if (Current.isOneOf(tok::minus, tok::plus, tok::caret)) {
+ Current.Type = determinePlusMinusCaretUsage(Current);
+ if (Current.is(TT_UnaryOperator) && Current.is(tok::caret))
+ Contexts.back().CaretFound = true;
+ } else if (Current.isOneOf(tok::minusminus, tok::plusplus)) {
+ Current.Type = determineIncrementUsage(Current);
+ } else if (Current.isOneOf(tok::exclaim, tok::tilde)) {
+ Current.Type = TT_UnaryOperator;
+ } else if (Current.is(tok::question)) {
+ if (Style.Language == FormatStyle::LK_JavaScript &&
+ Line.MustBeDeclaration && !Contexts.back().IsExpression) {
+ // In JavaScript, `interface X { foo?(): bar; }` is an optional method
+ // on the interface, not a ternary expression.
+ Current.Type = TT_JsTypeOptionalQuestion;
+ } else {
+ Current.Type = TT_ConditionalExpr;
+ }
+ } else if (Current.isBinaryOperator() &&
+ (!Current.Previous || Current.Previous->isNot(tok::l_square))) {
+ Current.Type = TT_BinaryOperator;
+ } else if (Current.is(tok::comment)) {
+ if (Current.TokenText.startswith("/*")) {
+ if (Current.TokenText.endswith("*/"))
+ Current.Type = TT_BlockComment;
+ else
+ // The lexer has for some reason determined a comment here. But we
+ // cannot really handle it, if it isn't properly terminated.
+ Current.Tok.setKind(tok::unknown);
+ } else {
+ Current.Type = TT_LineComment;
+ }
+ } else if (Current.is(tok::r_paren)) {
+ if (rParenEndsCast(Current))
+ Current.Type = TT_CastRParen;
+ if (Current.MatchingParen && Current.Next &&
+ !Current.Next->isBinaryOperator() &&
+ !Current.Next->isOneOf(tok::semi, tok::colon, tok::l_brace,
+ tok::comma, tok::period, tok::arrow,
+ tok::coloncolon))
+ if (FormatToken *AfterParen = Current.MatchingParen->Next) {
+ // Make sure this isn't the return type of an Obj-C block declaration
+ if (AfterParen->Tok.isNot(tok::caret)) {
+ if (FormatToken *BeforeParen = Current.MatchingParen->Previous)
+ if (BeforeParen->is(tok::identifier) &&
+ BeforeParen->TokenText == BeforeParen->TokenText.upper() &&
+ (!BeforeParen->Previous ||
+ BeforeParen->Previous->ClosesTemplateDeclaration))
+ Current.Type = TT_FunctionAnnotationRParen;
+ }
+ }
+ } else if (Current.is(tok::at) && Current.Next &&
+ Style.Language != FormatStyle::LK_JavaScript &&
+ Style.Language != FormatStyle::LK_Java) {
+ // In Java & JavaScript, "@..." is a decorator or annotation. In ObjC, it
+ // marks declarations and properties that need special formatting.
+ switch (Current.Next->Tok.getObjCKeywordID()) {
+ case tok::objc_interface:
+ case tok::objc_implementation:
+ case tok::objc_protocol:
+ Current.Type = TT_ObjCDecl;
+ break;
+ case tok::objc_property:
+ Current.Type = TT_ObjCProperty;
+ break;
+ default:
+ break;
+ }
+ } else if (Current.is(tok::period)) {
+ FormatToken *PreviousNoComment = Current.getPreviousNonComment();
+ if (PreviousNoComment &&
+ PreviousNoComment->isOneOf(tok::comma, tok::l_brace))
+ Current.Type = TT_DesignatedInitializerPeriod;
+ else if (Style.Language == FormatStyle::LK_Java && Current.Previous &&
+ Current.Previous->isOneOf(TT_JavaAnnotation,
+ TT_LeadingJavaAnnotation)) {
+ Current.Type = Current.Previous->Type;
+ }
+ } else if (Current.isOneOf(tok::identifier, tok::kw_const) &&
+ Current.Previous &&
+ !Current.Previous->isOneOf(tok::equal, tok::at) &&
+ Line.MightBeFunctionDecl && Contexts.size() == 1) {
+ // Line.MightBeFunctionDecl can only be true after the parentheses of a
+ // function declaration have been found.
+ Current.Type = TT_TrailingAnnotation;
+ } else if ((Style.Language == FormatStyle::LK_Java ||
+ Style.Language == FormatStyle::LK_JavaScript) &&
+ Current.Previous) {
+ if (Current.Previous->is(tok::at) &&
+ Current.isNot(Keywords.kw_interface)) {
+ const FormatToken &AtToken = *Current.Previous;
+ const FormatToken *Previous = AtToken.getPreviousNonComment();
+ if (!Previous || Previous->is(TT_LeadingJavaAnnotation))
+ Current.Type = TT_LeadingJavaAnnotation;
+ else
+ Current.Type = TT_JavaAnnotation;
+ } else if (Current.Previous->is(tok::period) &&
+ Current.Previous->isOneOf(TT_JavaAnnotation,
+ TT_LeadingJavaAnnotation)) {
+ Current.Type = Current.Previous->Type;
+ }
+ }
+ }
+
+ /// \brief Take a guess at whether \p Tok starts a name of a function or
+ /// variable declaration.
+ ///
+ /// This is a heuristic based on whether \p Tok is an identifier following
+ /// something that is likely a type.
+ bool isStartOfName(const FormatToken &Tok) {
+ if (Tok.isNot(tok::identifier) || !Tok.Previous)
+ return false;
+
+ if (Tok.Previous->isOneOf(TT_LeadingJavaAnnotation, Keywords.kw_instanceof,
+ Keywords.kw_as))
+ return false;
+ if (Style.Language == FormatStyle::LK_JavaScript &&
+ Tok.Previous->is(Keywords.kw_in))
+ return false;
+
+ // Skip "const" as it does not have an influence on whether this is a name.
+ FormatToken *PreviousNotConst = Tok.getPreviousNonComment();
+ while (PreviousNotConst && PreviousNotConst->is(tok::kw_const))
+ PreviousNotConst = PreviousNotConst->getPreviousNonComment();
+
+ if (!PreviousNotConst)
+ return false;
+
+ bool IsPPKeyword = PreviousNotConst->is(tok::identifier) &&
+ PreviousNotConst->Previous &&
+ PreviousNotConst->Previous->is(tok::hash);
+
+ if (PreviousNotConst->is(TT_TemplateCloser))
+ return PreviousNotConst && PreviousNotConst->MatchingParen &&
+ PreviousNotConst->MatchingParen->Previous &&
+ PreviousNotConst->MatchingParen->Previous->isNot(tok::period) &&
+ PreviousNotConst->MatchingParen->Previous->isNot(tok::kw_template);
+
+ if (PreviousNotConst->is(tok::r_paren) && PreviousNotConst->MatchingParen &&
+ PreviousNotConst->MatchingParen->Previous &&
+ PreviousNotConst->MatchingParen->Previous->is(tok::kw_decltype))
+ return true;
+
+ return (!IsPPKeyword &&
+ PreviousNotConst->isOneOf(tok::identifier, tok::kw_auto)) ||
+ PreviousNotConst->is(TT_PointerOrReference) ||
+ PreviousNotConst->isSimpleTypeSpecifier();
+ }
+
+ /// \brief Determine whether ')' is ending a cast.
+ bool rParenEndsCast(const FormatToken &Tok) {
+ // C-style casts are only used in C++ and Java.
+ if (!Style.isCpp() && Style.Language != FormatStyle::LK_Java)
+ return false;
+
+ // Empty parens aren't casts and there are no casts at the end of the line.
+ if (Tok.Previous == Tok.MatchingParen || !Tok.Next || !Tok.MatchingParen)
+ return false;
+
+ FormatToken *LeftOfParens = Tok.MatchingParen->getPreviousNonComment();
+ if (LeftOfParens) {
+ // If there is a closing parenthesis left of the current parentheses,
+ // look past it as these might be chained casts.
+ if (LeftOfParens->is(tok::r_paren)) {
+ if (!LeftOfParens->MatchingParen ||
+ !LeftOfParens->MatchingParen->Previous)
+ return false;
+ LeftOfParens = LeftOfParens->MatchingParen->Previous;
+ }
+
+ // If there is an identifier (or with a few exceptions a keyword) right
+ // before the parentheses, this is unlikely to be a cast.
+ if (LeftOfParens->Tok.getIdentifierInfo() &&
+ !LeftOfParens->isOneOf(Keywords.kw_in, tok::kw_return, tok::kw_case,
+ tok::kw_delete))
+ return false;
+
+ // Certain other tokens right before the parentheses are also signals that
+ // this cannot be a cast.
+ if (LeftOfParens->isOneOf(tok::at, tok::r_square, TT_OverloadedOperator,
+ TT_TemplateCloser, tok::ellipsis))
+ return false;
+ }
+
+ if (Tok.Next->is(tok::question))
+ return false;
+
+ // As Java has no function types, a "(" after the ")" likely means that this
+ // is a cast.
+ if (Style.Language == FormatStyle::LK_Java && Tok.Next->is(tok::l_paren))
+ return true;
+
+ // If a (non-string) literal follows, this is likely a cast.
+ if (Tok.Next->isNot(tok::string_literal) &&
+ (Tok.Next->Tok.isLiteral() ||
+ Tok.Next->isOneOf(tok::kw_sizeof, tok::kw_alignof)))
+ return true;
+
+ // Heuristically try to determine whether the parentheses contain a type.
+ bool ParensAreType =
+ !Tok.Previous ||
+ Tok.Previous->isOneOf(TT_PointerOrReference, TT_TemplateCloser) ||
+ Tok.Previous->isSimpleTypeSpecifier();
+ bool ParensCouldEndDecl =
+ Tok.Next->isOneOf(tok::equal, tok::semi, tok::l_brace, tok::greater);
+ if (ParensAreType && !ParensCouldEndDecl)
+ return true;
+
+ // At this point, we heuristically assume that there are no casts at the
+ // start of the line. We assume that we have found most cases where there
+ // are by the logic above, e.g. "(void)x;".
+ if (!LeftOfParens)
+ return false;
+
+ // Certain token types inside the parentheses mean that this can't be a
+ // cast.
+ for (const FormatToken *Token = Tok.MatchingParen->Next; Token != &Tok;
+ Token = Token->Next)
+ if (Token->is(TT_BinaryOperator))
+ return false;
+
+ // If the following token is an identifier or 'this', this is a cast. All
+ // cases where this can be something else are handled above.
+ if (Tok.Next->isOneOf(tok::identifier, tok::kw_this))
+ return true;
+
+ if (!Tok.Next->Next)
+ return false;
+
+ // If the next token after the parenthesis is a unary operator, assume
+ // that this is cast, unless there are unexpected tokens inside the
+ // parenthesis.
+ bool NextIsUnary =
+ Tok.Next->isUnaryOperator() || Tok.Next->isOneOf(tok::amp, tok::star);
+ if (!NextIsUnary || Tok.Next->is(tok::plus) ||
+ !Tok.Next->Next->isOneOf(tok::identifier, tok::numeric_constant))
+ return false;
+ // Search for unexpected tokens.
+ for (FormatToken *Prev = Tok.Previous; Prev != Tok.MatchingParen;
+ Prev = Prev->Previous) {
+ if (!Prev->isOneOf(tok::kw_const, tok::identifier, tok::coloncolon))
+ return false;
+ }
+ return true;
+ }
+
+ /// \brief Return the type of the given token assuming it is * or &.
+ TokenType determineStarAmpUsage(const FormatToken &Tok, bool IsExpression,
+ bool InTemplateArgument) {
+ if (Style.Language == FormatStyle::LK_JavaScript)
+ return TT_BinaryOperator;
+
+ const FormatToken *PrevToken = Tok.getPreviousNonComment();
+ if (!PrevToken)
+ return TT_UnaryOperator;
+
+ const FormatToken *NextToken = Tok.getNextNonComment();
+ if (!NextToken ||
+ NextToken->isOneOf(tok::arrow, tok::equal, tok::kw_const) ||
+ (NextToken->is(tok::l_brace) && !NextToken->getNextNonComment()))
+ return TT_PointerOrReference;
+
+ if (PrevToken->is(tok::coloncolon))
+ return TT_PointerOrReference;
+
+ if (PrevToken->isOneOf(tok::l_paren, tok::l_square, tok::l_brace,
+ tok::comma, tok::semi, tok::kw_return, tok::colon,
+ tok::equal, tok::kw_delete, tok::kw_sizeof) ||
+ PrevToken->isOneOf(TT_BinaryOperator, TT_ConditionalExpr,
+ TT_UnaryOperator, TT_CastRParen))
+ return TT_UnaryOperator;
+
+ if (NextToken->is(tok::l_square) && NextToken->isNot(TT_LambdaLSquare))
+ return TT_PointerOrReference;
+ if (NextToken->is(tok::kw_operator) && !IsExpression)
+ return TT_PointerOrReference;
+ if (NextToken->isOneOf(tok::comma, tok::semi))
+ return TT_PointerOrReference;
+
+ if (PrevToken->is(tok::r_paren) && PrevToken->MatchingParen &&
+ PrevToken->MatchingParen->Previous &&
+ PrevToken->MatchingParen->Previous->isOneOf(tok::kw_typeof,
+ tok::kw_decltype))
+ return TT_PointerOrReference;
+
+ if (PrevToken->Tok.isLiteral() ||
+ PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::kw_true,
+ tok::kw_false, tok::r_brace) ||
+ NextToken->Tok.isLiteral() ||
+ NextToken->isOneOf(tok::kw_true, tok::kw_false) ||
+ NextToken->isUnaryOperator() ||
+ // If we know we're in a template argument, there are no named
+ // declarations. Thus, having an identifier on the right-hand side
+ // indicates a binary operator.
+ (InTemplateArgument && NextToken->Tok.isAnyIdentifier()))
+ return TT_BinaryOperator;
+
+ // "&&(" is quite unlikely to be two successive unary "&".
+ if (Tok.is(tok::ampamp) && NextToken && NextToken->is(tok::l_paren))
+ return TT_BinaryOperator;
+
+ // This catches some cases where evaluation order is used as control flow:
+ // aaa && aaa->f();
+ const FormatToken *NextNextToken = NextToken->getNextNonComment();
+ if (NextNextToken && NextNextToken->is(tok::arrow))
+ return TT_BinaryOperator;
+
+ // It is very unlikely that we are going to find a pointer or reference type
+ // definition on the RHS of an assignment.
+ if (IsExpression && !Contexts.back().CaretFound)
+ return TT_BinaryOperator;
+
+ return TT_PointerOrReference;
+ }
+
+ TokenType determinePlusMinusCaretUsage(const FormatToken &Tok) {
+ const FormatToken *PrevToken = Tok.getPreviousNonComment();
+ if (!PrevToken)
+ return TT_UnaryOperator;
+
+ if (PrevToken->isOneOf(TT_CastRParen, TT_UnaryOperator) &&
+ !PrevToken->is(tok::exclaim))
+ // There aren't any trailing unary operators except for TypeScript's
+ // non-null operator (!). Thus, this must be squence of leading operators.
+ return TT_UnaryOperator;
+
+ // Use heuristics to recognize unary operators.
+ if (PrevToken->isOneOf(tok::equal, tok::l_paren, tok::comma, tok::l_square,
+ tok::question, tok::colon, tok::kw_return,
+ tok::kw_case, tok::at, tok::l_brace))
+ return TT_UnaryOperator;
+
+ // There can't be two consecutive binary operators.
+ if (PrevToken->is(TT_BinaryOperator))
+ return TT_UnaryOperator;
+
+ // Fall back to marking the token as binary operator.
+ return TT_BinaryOperator;
+ }
+
+ /// \brief Determine whether ++/-- are pre- or post-increments/-decrements.
+ TokenType determineIncrementUsage(const FormatToken &Tok) {
+ const FormatToken *PrevToken = Tok.getPreviousNonComment();
+ if (!PrevToken || PrevToken->is(TT_CastRParen))
+ return TT_UnaryOperator;
+ if (PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::identifier))
+ return TT_TrailingUnaryOperator;
+
+ return TT_UnaryOperator;
+ }
+
+ SmallVector<Context, 8> Contexts;
+
+ const FormatStyle &Style;
+ AnnotatedLine &Line;
+ FormatToken *CurrentToken;
+ bool AutoFound;
+ const AdditionalKeywords &Keywords;
+
+ // Set of "<" tokens that do not open a template parameter list. If parseAngle
+ // determines that a specific token can't be a template opener, it will make
+ // same decision irrespective of the decisions for tokens leading up to it.
+ // Store this information to prevent this from causing exponential runtime.
+ llvm::SmallPtrSet<FormatToken *, 16> NonTemplateLess;
+};
+
+static const int PrecedenceUnaryOperator = prec::PointerToMember + 1;
+static const int PrecedenceArrowAndPeriod = prec::PointerToMember + 2;
+
+/// \brief Parses binary expressions by inserting fake parenthesis based on
+/// operator precedence.
+class ExpressionParser {
+public:
+ ExpressionParser(const FormatStyle &Style, const AdditionalKeywords &Keywords,
+ AnnotatedLine &Line)
+ : Style(Style), Keywords(Keywords), Current(Line.First) {}
+
+ /// \brief Parse expressions with the given operatore precedence.
+ void parse(int Precedence = 0) {
+ // Skip 'return' and ObjC selector colons as they are not part of a binary
+ // expression.
+ while (Current && (Current->is(tok::kw_return) ||
+ (Current->is(tok::colon) &&
+ Current->isOneOf(TT_ObjCMethodExpr, TT_DictLiteral))))
+ next();
+
+ if (!Current || Precedence > PrecedenceArrowAndPeriod)
+ return;
+
+ // Conditional expressions need to be parsed separately for proper nesting.
+ if (Precedence == prec::Conditional) {
+ parseConditionalExpr();
+ return;
+ }
+
+ // Parse unary operators, which all have a higher precedence than binary
+ // operators.
+ if (Precedence == PrecedenceUnaryOperator) {
+ parseUnaryOperator();
+ return;
+ }
+
+ FormatToken *Start = Current;
+ FormatToken *LatestOperator = nullptr;
+ unsigned OperatorIndex = 0;
+
+ while (Current) {
+ // Consume operators with higher precedence.
+ parse(Precedence + 1);
+
+ int CurrentPrecedence = getCurrentPrecedence();
+
+ if (Current && Current->is(TT_SelectorName) &&
+ Precedence == CurrentPrecedence) {
+ if (LatestOperator)
+ addFakeParenthesis(Start, prec::Level(Precedence));
+ Start = Current;
+ }
+
+ // At the end of the line or when an operator with higher precedence is
+ // found, insert fake parenthesis and return.
+ if (!Current ||
+ (Current->closesScope() &&
+ (Current->MatchingParen || Current->is(TT_TemplateString))) ||
+ (CurrentPrecedence != -1 && CurrentPrecedence < Precedence) ||
+ (CurrentPrecedence == prec::Conditional &&
+ Precedence == prec::Assignment && Current->is(tok::colon))) {
+ break;
+ }
+
+ // Consume scopes: (), [], <> and {}
+ if (Current->opensScope()) {
+ // In fragment of a JavaScript template string can look like '}..${' and
+ // thus close a scope and open a new one at the same time.
+ while (Current && (!Current->closesScope() || Current->opensScope())) {
+ next();
+ parse();
+ }
+ next();
+ } else {
+ // Operator found.
+ if (CurrentPrecedence == Precedence) {
+ if (LatestOperator)
+ LatestOperator->NextOperator = Current;
+ LatestOperator = Current;
+ Current->OperatorIndex = OperatorIndex;
+ ++OperatorIndex;
+ }
+ next(/*SkipPastLeadingComments=*/Precedence > 0);
+ }
+ }
+
+ if (LatestOperator && (Current || Precedence > 0)) {
+ // LatestOperator->LastOperator = true;
+ if (Precedence == PrecedenceArrowAndPeriod) {
+ // Call expressions don't have a binary operator precedence.
+ addFakeParenthesis(Start, prec::Unknown);
+ } else {
+ addFakeParenthesis(Start, prec::Level(Precedence));
+ }
+ }
+ }
+
+private:
+ /// \brief Gets the precedence (+1) of the given token for binary operators
+ /// and other tokens that we treat like binary operators.
+ int getCurrentPrecedence() {
+ if (Current) {
+ const FormatToken *NextNonComment = Current->getNextNonComment();
+ if (Current->is(TT_ConditionalExpr))
+ return prec::Conditional;
+ if (NextNonComment && NextNonComment->is(tok::colon) &&
+ NextNonComment->is(TT_DictLiteral))
+ return prec::Assignment;
+ if (Current->is(TT_JsComputedPropertyName))
+ return prec::Assignment;
+ if (Current->is(TT_LambdaArrow))
+ return prec::Comma;
+ if (Current->is(TT_JsFatArrow))
+ return prec::Assignment;
+ if (Current->isOneOf(tok::semi, TT_InlineASMColon, TT_SelectorName) ||
+ (Current->is(tok::comment) && NextNonComment &&
+ NextNonComment->is(TT_SelectorName)))
+ return 0;
+ if (Current->is(TT_RangeBasedForLoopColon))
+ return prec::Comma;
+ if ((Style.Language == FormatStyle::LK_Java ||
+ Style.Language == FormatStyle::LK_JavaScript) &&
+ Current->is(Keywords.kw_instanceof))
+ return prec::Relational;
+ if (Style.Language == FormatStyle::LK_JavaScript &&
+ Current->isOneOf(Keywords.kw_in, Keywords.kw_as))
+ return prec::Relational;
+ if (Current->is(TT_BinaryOperator) || Current->is(tok::comma))
+ return Current->getPrecedence();
+ if (Current->isOneOf(tok::period, tok::arrow))
+ return PrecedenceArrowAndPeriod;
+ if ((Style.Language == FormatStyle::LK_Java ||
+ Style.Language == FormatStyle::LK_JavaScript) &&
+ Current->isOneOf(Keywords.kw_extends, Keywords.kw_implements,
+ Keywords.kw_throws))
+ return 0;
+ }
+ return -1;
+ }
+
+ void addFakeParenthesis(FormatToken *Start, prec::Level Precedence) {
+ Start->FakeLParens.push_back(Precedence);
+ if (Precedence > prec::Unknown)
+ Start->StartsBinaryExpression = true;
+ if (Current) {
+ FormatToken *Previous = Current->Previous;
+ while (Previous->is(tok::comment) && Previous->Previous)
+ Previous = Previous->Previous;
+ ++Previous->FakeRParens;
+ if (Precedence > prec::Unknown)
+ Previous->EndsBinaryExpression = true;
+ }
+ }
+
+ /// \brief Parse unary operator expressions and surround them with fake
+ /// parentheses if appropriate.
+ void parseUnaryOperator() {
+ if (!Current || Current->isNot(TT_UnaryOperator)) {
+ parse(PrecedenceArrowAndPeriod);
+ return;
+ }
+
+ FormatToken *Start = Current;
+ next();
+ parseUnaryOperator();
+
+ // The actual precedence doesn't matter.
+ addFakeParenthesis(Start, prec::Unknown);
+ }
+
+ void parseConditionalExpr() {
+ while (Current && Current->isTrailingComment()) {
+ next();
+ }
+ FormatToken *Start = Current;
+ parse(prec::LogicalOr);
+ if (!Current || !Current->is(tok::question))
+ return;
+ next();
+ parse(prec::Assignment);
+ if (!Current || Current->isNot(TT_ConditionalExpr))
+ return;
+ next();
+ parse(prec::Assignment);
+ addFakeParenthesis(Start, prec::Conditional);
+ }
+
+ void next(bool SkipPastLeadingComments = true) {
+ if (Current)
+ Current = Current->Next;
+ while (Current &&
+ (Current->NewlinesBefore == 0 || SkipPastLeadingComments) &&
+ Current->isTrailingComment())
+ Current = Current->Next;
+ }
+
+ const FormatStyle &Style;
+ const AdditionalKeywords &Keywords;
+ FormatToken *Current;
+};
+
+} // end anonymous namespace
+
+void TokenAnnotator::setCommentLineLevels(
+ SmallVectorImpl<AnnotatedLine *> &Lines) {
+ const AnnotatedLine *NextNonCommentLine = nullptr;
+ for (SmallVectorImpl<AnnotatedLine *>::reverse_iterator I = Lines.rbegin(),
+ E = Lines.rend();
+ I != E; ++I) {
+ bool CommentLine = (*I)->First;
+ for (const FormatToken *Tok = (*I)->First; Tok; Tok = Tok->Next) {
+ if (!Tok->is(tok::comment)) {
+ CommentLine = false;
+ break;
+ }
+ }
+ if (NextNonCommentLine && CommentLine)
+ (*I)->Level = NextNonCommentLine->Level;
+ else
+ NextNonCommentLine = (*I)->First->isNot(tok::r_brace) ? (*I) : nullptr;
+
+ setCommentLineLevels((*I)->Children);
+ }
+}
+
+static unsigned maxNestingDepth(const AnnotatedLine &Line) {
+ unsigned Result = 0;
+ for (const auto* Tok = Line.First; Tok != nullptr; Tok = Tok->Next)
+ Result = std::max(Result, Tok->NestingLevel);
+ return Result;
+}
+
+void TokenAnnotator::annotate(AnnotatedLine &Line) {
+ for (SmallVectorImpl<AnnotatedLine *>::iterator I = Line.Children.begin(),
+ E = Line.Children.end();
+ I != E; ++I) {
+ annotate(**I);
+ }
+ AnnotatingParser Parser(Style, Line, Keywords);
+ Line.Type = Parser.parseLine();
+
+ // With very deep nesting, ExpressionParser uses lots of stack and the
+ // formatting algorithm is very slow. We're not going to do a good job here
+ // anyway - it's probably generated code being formatted by mistake.
+ // Just skip the whole line.
+ if (maxNestingDepth(Line) > 50)
+ Line.Type = LT_Invalid;
+
+ if (Line.Type == LT_Invalid)
+ return;
+
+ ExpressionParser ExprParser(Style, Keywords, Line);
+ ExprParser.parse();
+
+ if (Line.startsWith(TT_ObjCMethodSpecifier))
+ Line.Type = LT_ObjCMethodDecl;
+ else if (Line.startsWith(TT_ObjCDecl))
+ Line.Type = LT_ObjCDecl;
+ else if (Line.startsWith(TT_ObjCProperty))
+ Line.Type = LT_ObjCProperty;
+
+ Line.First->SpacesRequiredBefore = 1;
+ Line.First->CanBreakBefore = Line.First->MustBreakBefore;
+}
+
+// This function heuristically determines whether 'Current' starts the name of a
+// function declaration.
+static bool isFunctionDeclarationName(const FormatToken &Current,
+ const AnnotatedLine &Line) {
+ auto skipOperatorName = [](const FormatToken* Next) -> const FormatToken* {
+ for (; Next; Next = Next->Next) {
+ if (Next->is(TT_OverloadedOperatorLParen))
+ return Next;
+ if (Next->is(TT_OverloadedOperator))
+ continue;
+ if (Next->isOneOf(tok::kw_new, tok::kw_delete)) {
+ // For 'new[]' and 'delete[]'.
+ if (Next->Next && Next->Next->is(tok::l_square) &&
+ Next->Next->Next && Next->Next->Next->is(tok::r_square))
+ Next = Next->Next->Next;
+ continue;
+ }
+
+ break;
+ }
+ return nullptr;
+ };
+
+ // Find parentheses of parameter list.
+ const FormatToken *Next = Current.Next;
+ if (Current.is(tok::kw_operator)) {
+ if (Current.Previous && Current.Previous->is(tok::coloncolon))
+ return false;
+ Next = skipOperatorName(Next);
+ } else {
+ if (!Current.is(TT_StartOfName) || Current.NestingLevel != 0)
+ return false;
+ for (; Next; Next = Next->Next) {
+ if (Next->is(TT_TemplateOpener)) {
+ Next = Next->MatchingParen;
+ } else if (Next->is(tok::coloncolon)) {
+ Next = Next->Next;
+ if (!Next)
+ return false;
+ if (Next->is(tok::kw_operator)) {
+ Next = skipOperatorName(Next->Next);
+ break;
+ }
+ if (!Next->is(tok::identifier))
+ return false;
+ } else if (Next->is(tok::l_paren)) {
+ break;
+ } else {
+ return false;
+ }
+ }
+ }
+
+ // Check whether parameter list can belong to a function declaration.
+ if (!Next || !Next->is(tok::l_paren) || !Next->MatchingParen)
+ return false;
+ // If the lines ends with "{", this is likely an function definition.
+ if (Line.Last->is(tok::l_brace))
+ return true;
+ if (Next->Next == Next->MatchingParen)
+ return true; // Empty parentheses.
+ // If there is an &/&& after the r_paren, this is likely a function.
+ if (Next->MatchingParen->Next &&
+ Next->MatchingParen->Next->is(TT_PointerOrReference))
+ return true;
+ for (const FormatToken *Tok = Next->Next; Tok && Tok != Next->MatchingParen;
+ Tok = Tok->Next) {
+ if (Tok->is(tok::l_paren) && Tok->MatchingParen) {
+ Tok = Tok->MatchingParen;
+ continue;
+ }
+ if (Tok->is(tok::kw_const) || Tok->isSimpleTypeSpecifier() ||
+ Tok->isOneOf(TT_PointerOrReference, TT_StartOfName, tok::ellipsis))
+ return true;
+ if (Tok->isOneOf(tok::l_brace, tok::string_literal, TT_ObjCMethodExpr) ||
+ Tok->Tok.isLiteral())
+ return false;
+ }
+ return false;
+}
+
+bool TokenAnnotator::mustBreakForReturnType(const AnnotatedLine &Line) const {
+ assert(Line.MightBeFunctionDecl);
+
+ if ((Style.AlwaysBreakAfterReturnType == FormatStyle::RTBS_TopLevel ||
+ Style.AlwaysBreakAfterReturnType ==
+ FormatStyle::RTBS_TopLevelDefinitions) &&
+ Line.Level > 0)
+ return false;
+
+ switch (Style.AlwaysBreakAfterReturnType) {
+ case FormatStyle::RTBS_None:
+ return false;
+ case FormatStyle::RTBS_All:
+ case FormatStyle::RTBS_TopLevel:
+ return true;
+ case FormatStyle::RTBS_AllDefinitions:
+ case FormatStyle::RTBS_TopLevelDefinitions:
+ return Line.mightBeFunctionDefinition();
+ }
+
+ return false;
+}
+
+void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) {
+ for (SmallVectorImpl<AnnotatedLine *>::iterator I = Line.Children.begin(),
+ E = Line.Children.end();
+ I != E; ++I) {
+ calculateFormattingInformation(**I);
+ }
+
+ Line.First->TotalLength =
+ Line.First->IsMultiline ? Style.ColumnLimit : Line.First->ColumnWidth;
+ FormatToken *Current = Line.First->Next;
+ bool InFunctionDecl = Line.MightBeFunctionDecl;
+ while (Current) {
+ if (isFunctionDeclarationName(*Current, Line))
+ Current->Type = TT_FunctionDeclarationName;
+ if (Current->is(TT_LineComment)) {
+ if (Current->Previous->BlockKind == BK_BracedInit &&
+ Current->Previous->opensScope())
+ Current->SpacesRequiredBefore = Style.Cpp11BracedListStyle ? 0 : 1;
+ else
+ Current->SpacesRequiredBefore = Style.SpacesBeforeTrailingComments;
+
+ // If we find a trailing comment, iterate backwards to determine whether
+ // it seems to relate to a specific parameter. If so, break before that
+ // parameter to avoid changing the comment's meaning. E.g. don't move 'b'
+ // to the previous line in:
+ // SomeFunction(a,
+ // b, // comment
+ // c);
+ if (!Current->HasUnescapedNewline) {
+ for (FormatToken *Parameter = Current->Previous; Parameter;
+ Parameter = Parameter->Previous) {
+ if (Parameter->isOneOf(tok::comment, tok::r_brace))
+ break;
+ if (Parameter->Previous && Parameter->Previous->is(tok::comma)) {
+ if (!Parameter->Previous->is(TT_CtorInitializerComma) &&
+ Parameter->HasUnescapedNewline)
+ Parameter->MustBreakBefore = true;
+ break;
+ }
+ }
+ }
+ } else if (Current->SpacesRequiredBefore == 0 &&
+ spaceRequiredBefore(Line, *Current)) {
+ Current->SpacesRequiredBefore = 1;
+ }
+
+ Current->MustBreakBefore =
+ Current->MustBreakBefore || mustBreakBefore(Line, *Current);
+
+ if (!Current->MustBreakBefore && InFunctionDecl &&
+ Current->is(TT_FunctionDeclarationName))
+ Current->MustBreakBefore = mustBreakForReturnType(Line);
+
+ Current->CanBreakBefore =
+ Current->MustBreakBefore || canBreakBefore(Line, *Current);
+ unsigned ChildSize = 0;
+ if (Current->Previous->Children.size() == 1) {
+ FormatToken &LastOfChild = *Current->Previous->Children[0]->Last;
+ ChildSize = LastOfChild.isTrailingComment() ? Style.ColumnLimit
+ : LastOfChild.TotalLength + 1;
+ }
+ const FormatToken *Prev = Current->Previous;
+ if (Current->MustBreakBefore || Prev->Children.size() > 1 ||
+ (Prev->Children.size() == 1 &&
+ Prev->Children[0]->First->MustBreakBefore) ||
+ Current->IsMultiline)
+ Current->TotalLength = Prev->TotalLength + Style.ColumnLimit;
+ else
+ Current->TotalLength = Prev->TotalLength + Current->ColumnWidth +
+ ChildSize + Current->SpacesRequiredBefore;
+
+ if (Current->is(TT_CtorInitializerColon))
+ InFunctionDecl = false;
+
+ // FIXME: Only calculate this if CanBreakBefore is true once static
+ // initializers etc. are sorted out.
+ // FIXME: Move magic numbers to a better place.
+ Current->SplitPenalty = 20 * Current->BindingStrength +
+ splitPenalty(Line, *Current, InFunctionDecl);
+
+ Current = Current->Next;
+ }
+
+ calculateUnbreakableTailLengths(Line);
+ unsigned IndentLevel = Line.Level;
+ for (Current = Line.First; Current != nullptr; Current = Current->Next) {
+ if (Current->Role)
+ Current->Role->precomputeFormattingInfos(Current);
+ if (Current->MatchingParen &&
+ Current->MatchingParen->opensBlockOrBlockTypeList(Style)) {
+ assert(IndentLevel > 0);
+ --IndentLevel;
+ }
+ Current->IndentLevel = IndentLevel;
+ if (Current->opensBlockOrBlockTypeList(Style))
+ ++IndentLevel;
+ }
+
+ DEBUG({ printDebugInfo(Line); });
+}
+
+void TokenAnnotator::calculateUnbreakableTailLengths(AnnotatedLine &Line) {
+ unsigned UnbreakableTailLength = 0;
+ FormatToken *Current = Line.Last;
+ while (Current) {
+ Current->UnbreakableTailLength = UnbreakableTailLength;
+ if (Current->CanBreakBefore ||
+ Current->isOneOf(tok::comment, tok::string_literal)) {
+ UnbreakableTailLength = 0;
+ } else {
+ UnbreakableTailLength +=
+ Current->ColumnWidth + Current->SpacesRequiredBefore;
+ }
+ Current = Current->Previous;
+ }
+}
+
+unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
+ const FormatToken &Tok,
+ bool InFunctionDecl) {
+ const FormatToken &Left = *Tok.Previous;
+ const FormatToken &Right = Tok;
+
+ if (Left.is(tok::semi))
+ return 0;
+
+ if (Style.Language == FormatStyle::LK_Java) {
+ if (Right.isOneOf(Keywords.kw_extends, Keywords.kw_throws))
+ return 1;
+ if (Right.is(Keywords.kw_implements))
+ return 2;
+ if (Left.is(tok::comma) && Left.NestingLevel == 0)
+ return 3;
+ } else if (Style.Language == FormatStyle::LK_JavaScript) {
+ if (Right.is(Keywords.kw_function) && Left.isNot(tok::comma))
+ return 100;
+ if (Left.is(TT_JsTypeColon))
+ return 35;
+ if ((Left.is(TT_TemplateString) && Left.TokenText.endswith("${")) ||
+ (Right.is(TT_TemplateString) && Right.TokenText.startswith("}")))
+ return 100;
+ }
+
+ if (Right.is(tok::identifier) && Right.Next && Right.Next->is(TT_DictLiteral))
+ return 1;
+ if (Right.is(tok::l_square)) {
+ if (Style.Language == FormatStyle::LK_Proto)
+ return 1;
+ if (Left.is(tok::r_square))
+ return 200;
+ // Slightly prefer formatting local lambda definitions like functions.
+ if (Right.is(TT_LambdaLSquare) && Left.is(tok::equal))
+ return 35;
+ if (!Right.isOneOf(TT_ObjCMethodExpr, TT_LambdaLSquare,
+ TT_ArrayInitializerLSquare,
+ TT_DesignatedInitializerLSquare))
+ return 500;
+ }
+
+ if (Right.isOneOf(TT_StartOfName, TT_FunctionDeclarationName) ||
+ Right.is(tok::kw_operator)) {
+ if (Line.startsWith(tok::kw_for) && Right.PartOfMultiVariableDeclStmt)
+ return 3;
+ if (Left.is(TT_StartOfName))
+ return 110;
+ if (InFunctionDecl && Right.NestingLevel == 0)
+ return Style.PenaltyReturnTypeOnItsOwnLine;
+ return 200;
+ }
+ if (Right.is(TT_PointerOrReference))
+ return 190;
+ if (Right.is(TT_LambdaArrow))
+ return 110;
+ if (Left.is(tok::equal) && Right.is(tok::l_brace))
+ return 160;
+ if (Left.is(TT_CastRParen))
+ return 100;
+ if (Left.is(tok::coloncolon) ||
+ (Right.is(tok::period) && Style.Language == FormatStyle::LK_Proto))
+ return 500;
+ if (Left.isOneOf(tok::kw_class, tok::kw_struct))
+ return 5000;
+ if (Left.is(tok::comment))
+ return 1000;
+
+ if (Left.isOneOf(TT_RangeBasedForLoopColon, TT_InheritanceColon, TT_CtorInitializerColon))
+ return 2;
+
+ if (Right.isMemberAccess()) {
+ // Breaking before the "./->" of a chained call/member access is reasonably
+ // cheap, as formatting those with one call per line is generally
+ // desirable. In particular, it should be cheaper to break before the call
+ // than it is to break inside a call's parameters, which could lead to weird
+ // "hanging" indents. The exception is the very last "./->" to support this
+ // frequent pattern:
+ //
+ // aaaaaaaa.aaaaaaaa.bbbbbbb().ccccccccccccccccccccc(
+ // dddddddd);
+ //
+ // which might otherwise be blown up onto many lines. Here, clang-format
+ // won't produce "hanging" indents anyway as there is no other trailing
+ // call.
+ //
+ // Also apply higher penalty is not a call as that might lead to a wrapping
+ // like:
+ //
+ // aaaaaaa
+ // .aaaaaaaaa.bbbbbbbb(cccccccc);
+ return !Right.NextOperator || !Right.NextOperator->Previous->closesScope()
+ ? 150
+ : 35;
+ }
+
+ if (Right.is(TT_TrailingAnnotation) &&
+ (!Right.Next || Right.Next->isNot(tok::l_paren))) {
+ // Moving trailing annotations to the next line is fine for ObjC method
+ // declarations.
+ if (Line.startsWith(TT_ObjCMethodSpecifier))
+ return 10;
+ // Generally, breaking before a trailing annotation is bad unless it is
+ // function-like. It seems to be especially preferable to keep standard
+ // annotations (i.e. "const", "final" and "override") on the same line.
+ // Use a slightly higher penalty after ")" so that annotations like
+ // "const override" are kept together.
+ bool is_short_annotation = Right.TokenText.size() < 10;
+ return (Left.is(tok::r_paren) ? 100 : 120) + (is_short_annotation ? 50 : 0);
+ }
+
+ // In for-loops, prefer breaking at ',' and ';'.
+ if (Line.startsWith(tok::kw_for) && Left.is(tok::equal))
+ return 4;
+
+ // In Objective-C method expressions, prefer breaking before "param:" over
+ // breaking after it.
+ if (Right.is(TT_SelectorName))
+ return 0;
+ if (Left.is(tok::colon) && Left.is(TT_ObjCMethodExpr))
+ return Line.MightBeFunctionDecl ? 50 : 500;
+
+ if (Left.is(tok::l_paren) && InFunctionDecl &&
+ Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign)
+ return 100;
+ if (Left.is(tok::l_paren) && Left.Previous &&
+ (Left.Previous->isOneOf(tok::kw_if, tok::kw_for)
+ || Left.Previous->endsSequence(tok::kw_constexpr, tok::kw_if)))
+ return 1000;
+ if (Left.is(tok::equal) && InFunctionDecl)
+ return 110;
+ if (Right.is(tok::r_brace))
+ return 1;
+ if (Left.is(TT_TemplateOpener))
+ return 100;
+ if (Left.opensScope()) {
+ if (Style.AlignAfterOpenBracket == FormatStyle::BAS_DontAlign)
+ return 0;
+ return Left.ParameterCount > 1 ? Style.PenaltyBreakBeforeFirstCallParameter
+ : 19;
+ }
+ if (Left.is(TT_JavaAnnotation))
+ return 50;
+
+ if (Left.isOneOf(tok::plus, tok::comma) && Left.Previous &&
+ Left.Previous->isLabelString() &&
+ (Left.NextOperator || Left.OperatorIndex != 0))
+ return 45;
+ if (Right.is(tok::plus) && Left.isLabelString() &&
+ (Right.NextOperator || Right.OperatorIndex != 0))
+ return 25;
+ if (Left.is(tok::comma))
+ return 1;
+ if (Right.is(tok::lessless) && Left.isLabelString() &&
+ (Right.NextOperator || Right.OperatorIndex != 1))
+ return 25;
+ if (Right.is(tok::lessless)) {
+ // Breaking at a << is really cheap.
+ if (!Left.is(tok::r_paren) || Right.OperatorIndex > 0)
+ // Slightly prefer to break before the first one in log-like statements.
+ return 2;
+ return 1;
+ }
+ if (Left.is(TT_ConditionalExpr))
+ return prec::Conditional;
+ prec::Level Level = Left.getPrecedence();
+ if (Level == prec::Unknown)
+ Level = Right.getPrecedence();
+ if (Level == prec::Assignment)
+ return Style.PenaltyBreakAssignment;
+ if (Level != prec::Unknown)
+ return Level;
+
+ return 3;
+}
+
+bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
+ const FormatToken &Left,
+ const FormatToken &Right) {
+ if (Left.is(tok::kw_return) && Right.isNot(tok::semi))
+ return true;
+ if (Style.ObjCSpaceAfterProperty && Line.Type == LT_ObjCProperty &&
+ Left.Tok.getObjCKeywordID() == tok::objc_property)
+ return true;
+ if (Right.is(tok::hashhash))
+ return Left.is(tok::hash);
+ if (Left.isOneOf(tok::hashhash, tok::hash))
+ return Right.is(tok::hash);
+ if (Left.is(tok::l_paren) && Right.is(tok::r_paren))
+ return Style.SpaceInEmptyParentheses;
+ if (Left.is(tok::l_paren) || Right.is(tok::r_paren))
+ return (Right.is(TT_CastRParen) ||
+ (Left.MatchingParen && Left.MatchingParen->is(TT_CastRParen)))
+ ? Style.SpacesInCStyleCastParentheses
+ : Style.SpacesInParentheses;
+ if (Right.isOneOf(tok::semi, tok::comma))
+ return false;
+ if (Right.is(tok::less) &&
+ Line.Type == LT_ObjCDecl && Style.ObjCSpaceBeforeProtocolList)
+ return true;
+ if (Right.is(tok::less) && Left.is(tok::kw_template))
+ return Style.SpaceAfterTemplateKeyword;
+ if (Left.isOneOf(tok::exclaim, tok::tilde))
+ return false;
+ if (Left.is(tok::at) &&
+ Right.isOneOf(tok::identifier, tok::string_literal, tok::char_constant,
+ tok::numeric_constant, tok::l_paren, tok::l_brace,
+ tok::kw_true, tok::kw_false))
+ return false;
+ if (Left.is(tok::colon))
+ return !Left.is(TT_ObjCMethodExpr);
+ if (Left.is(tok::coloncolon))
+ return false;
+ if (Left.is(tok::less) || Right.isOneOf(tok::greater, tok::less))
+ return false;
+ if (Right.is(tok::ellipsis))
+ return Left.Tok.isLiteral() || (Left.is(tok::identifier) && Left.Previous &&
+ Left.Previous->is(tok::kw_case));
+ if (Left.is(tok::l_square) && Right.is(tok::amp))
+ return false;
+ if (Right.is(TT_PointerOrReference))
+ return (Left.is(tok::r_paren) && Line.MightBeFunctionDecl) ||
+ (Left.Tok.isLiteral() || (Left.is(tok::kw_const) && Left.Previous &&
+ Left.Previous->is(tok::r_paren)) ||
+ (!Left.isOneOf(TT_PointerOrReference, tok::l_paren) &&
+ (Style.PointerAlignment != FormatStyle::PAS_Left ||
+ (Line.IsMultiVariableDeclStmt &&
+ (Left.NestingLevel == 0 ||
+ (Left.NestingLevel == 1 && Line.First->is(tok::kw_for)))))));
+ if (Right.is(TT_FunctionTypeLParen) && Left.isNot(tok::l_paren) &&
+ (!Left.is(TT_PointerOrReference) ||
+ (Style.PointerAlignment != FormatStyle::PAS_Right &&
+ !Line.IsMultiVariableDeclStmt)))
+ return true;
+ if (Left.is(TT_PointerOrReference))
+ return Right.Tok.isLiteral() || Right.is(TT_BlockComment) ||
+ (Right.isOneOf(Keywords.kw_override, Keywords.kw_final) &&
+ !Right.is(TT_StartOfName)) ||
+ (Right.is(tok::l_brace) && Right.BlockKind == BK_Block) ||
+ (!Right.isOneOf(TT_PointerOrReference, TT_ArraySubscriptLSquare,
+ tok::l_paren) &&
+ (Style.PointerAlignment != FormatStyle::PAS_Right &&
+ !Line.IsMultiVariableDeclStmt) &&
+ Left.Previous &&
+ !Left.Previous->isOneOf(tok::l_paren, tok::coloncolon));
+ if (Right.is(tok::star) && Left.is(tok::l_paren))
+ return false;
+ if (Left.is(tok::l_square))
+ return (Left.is(TT_ArrayInitializerLSquare) &&
+ Style.SpacesInContainerLiterals && Right.isNot(tok::r_square)) ||
+ (Left.is(TT_ArraySubscriptLSquare) && Style.SpacesInSquareBrackets &&
+ Right.isNot(tok::r_square));
+ if (Right.is(tok::r_square))
+ return Right.MatchingParen &&
+ ((Style.SpacesInContainerLiterals &&
+ Right.MatchingParen->is(TT_ArrayInitializerLSquare)) ||
+ (Style.SpacesInSquareBrackets &&
+ Right.MatchingParen->is(TT_ArraySubscriptLSquare)));
+ if (Right.is(tok::l_square) &&
+ !Right.isOneOf(TT_ObjCMethodExpr, TT_LambdaLSquare,
+ TT_DesignatedInitializerLSquare) &&
+ !Left.isOneOf(tok::numeric_constant, TT_DictLiteral))
+ return false;
+ if (Left.is(tok::l_brace) && Right.is(tok::r_brace))
+ return !Left.Children.empty(); // No spaces in "{}".
+ if ((Left.is(tok::l_brace) && Left.BlockKind != BK_Block) ||
+ (Right.is(tok::r_brace) && Right.MatchingParen &&
+ Right.MatchingParen->BlockKind != BK_Block))
+ return !Style.Cpp11BracedListStyle;
+ if (Left.is(TT_BlockComment))
+ return !Left.TokenText.endswith("=*/");
+ if (Right.is(tok::l_paren)) {
+ if (Left.is(tok::r_paren) && Left.is(TT_AttributeParen))
+ return true;
+ return Line.Type == LT_ObjCDecl || Left.is(tok::semi) ||
+ (Style.SpaceBeforeParens != FormatStyle::SBPO_Never &&
+ (Left.isOneOf(tok::kw_if, tok::pp_elif, tok::kw_for, tok::kw_while,
+ tok::kw_switch, tok::kw_case, TT_ForEachMacro,
+ TT_ObjCForIn) ||
+ Left.endsSequence(tok::kw_constexpr, tok::kw_if) ||
+ (Left.isOneOf(tok::kw_try, Keywords.kw___except, tok::kw_catch,
+ tok::kw_new, tok::kw_delete) &&
+ (!Left.Previous || Left.Previous->isNot(tok::period))))) ||
+ (Style.SpaceBeforeParens == FormatStyle::SBPO_Always &&
+ (Left.is(tok::identifier) || Left.isFunctionLikeKeyword() ||
+ Left.is(tok::r_paren)) &&
+ Line.Type != LT_PreprocessorDirective);
+ }
+ if (Left.is(tok::at) && Right.Tok.getObjCKeywordID() != tok::objc_not_keyword)
+ return false;
+ if (Right.is(TT_UnaryOperator))
+ return !Left.isOneOf(tok::l_paren, tok::l_square, tok::at) &&
+ (Left.isNot(tok::colon) || Left.isNot(TT_ObjCMethodExpr));
+ if ((Left.isOneOf(tok::identifier, tok::greater, tok::r_square,
+ tok::r_paren) ||
+ Left.isSimpleTypeSpecifier()) &&
+ Right.is(tok::l_brace) && Right.getNextNonComment() &&
+ Right.BlockKind != BK_Block)
+ return false;
+ if (Left.is(tok::period) || Right.is(tok::period))
+ return false;
+ if (Right.is(tok::hash) && Left.is(tok::identifier) && Left.TokenText == "L")
+ return false;
+ if (Left.is(TT_TemplateCloser) && Left.MatchingParen &&
+ Left.MatchingParen->Previous &&
+ Left.MatchingParen->Previous->is(tok::period))
+ // A.<B>DoSomething();
+ return false;
+ if (Left.is(TT_TemplateCloser) && Right.is(tok::l_square))
+ return false;
+ return true;
+}
+
+bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
+ const FormatToken &Right) {
+ const FormatToken &Left = *Right.Previous;
+ if (Right.Tok.getIdentifierInfo() && Left.Tok.getIdentifierInfo())
+ return true; // Never ever merge two identifiers.
+ if (Style.isCpp()) {
+ if (Left.is(tok::kw_operator))
+ return Right.is(tok::coloncolon);
+ } else if (Style.Language == FormatStyle::LK_Proto) {
+ if (Right.is(tok::period) &&
+ Left.isOneOf(Keywords.kw_optional, Keywords.kw_required,
+ Keywords.kw_repeated, Keywords.kw_extend))
+ return true;
+ if (Right.is(tok::l_paren) &&
+ Left.isOneOf(Keywords.kw_returns, Keywords.kw_option))
+ return true;
+ } else if (Style.Language == FormatStyle::LK_JavaScript) {
+ if (Left.is(TT_JsFatArrow))
+ return true;
+ // for await ( ...
+ if (Right.is(tok::l_paren) && Left.is(Keywords.kw_await) &&
+ Left.Previous && Left.Previous->is(tok::kw_for))
+ return true;
+ if (Left.is(Keywords.kw_async) && Right.is(tok::l_paren) &&
+ Right.MatchingParen) {
+ const FormatToken *Next = Right.MatchingParen->getNextNonComment();
+ // An async arrow function, for example: `x = async () => foo();`,
+ // as opposed to calling a function called async: `x = async();`
+ if (Next && Next->is(TT_JsFatArrow))
+ return true;
+ }
+ if ((Left.is(TT_TemplateString) && Left.TokenText.endswith("${")) ||
+ (Right.is(TT_TemplateString) && Right.TokenText.startswith("}")))
+ return false;
+ if (Left.is(tok::identifier) && Right.is(TT_TemplateString))
+ return false;
+ if (Right.is(tok::star) &&
+ Left.isOneOf(Keywords.kw_function, Keywords.kw_yield))
+ return false;
+ if (Right.isOneOf(tok::l_brace, tok::l_square) &&
+ Left.isOneOf(Keywords.kw_function, Keywords.kw_yield))
+ return true;
+ // JS methods can use some keywords as names (e.g. `delete()`).
+ if (Right.is(tok::l_paren) && Line.MustBeDeclaration &&
+ Left.Tok.getIdentifierInfo())
+ return false;
+ if ((Left.isOneOf(Keywords.kw_let, Keywords.kw_var, Keywords.kw_in,
+ tok::kw_const) ||
+ // "of" is only a keyword if it appears after another identifier
+ // (e.g. as "const x of y" in a for loop).
+ (Left.is(Keywords.kw_of) && Left.Previous &&
+ Left.Previous->Tok.getIdentifierInfo())) &&
+ (!Left.Previous || !Left.Previous->is(tok::period)))
+ return true;
+ if (Left.isOneOf(tok::kw_for, Keywords.kw_as) && Left.Previous &&
+ Left.Previous->is(tok::period) && Right.is(tok::l_paren))
+ return false;
+ if (Left.is(Keywords.kw_as) &&
+ Right.isOneOf(tok::l_square, tok::l_brace, tok::l_paren))
+ return true;
+ if (Left.is(tok::kw_default) && Left.Previous &&
+ Left.Previous->is(tok::kw_export))
+ return true;
+ if (Left.is(Keywords.kw_is) && Right.is(tok::l_brace))
+ return true;
+ if (Right.isOneOf(TT_JsTypeColon, TT_JsTypeOptionalQuestion))
+ return false;
+ if (Left.is(TT_JsTypeOperator) || Right.is(TT_JsTypeOperator))
+ return false;
+ if ((Left.is(tok::l_brace) || Right.is(tok::r_brace)) &&
+ Line.First->isOneOf(Keywords.kw_import, tok::kw_export))
+ return false;
+ if (Left.is(tok::ellipsis))
+ return false;
+ if (Left.is(TT_TemplateCloser) &&
+ !Right.isOneOf(tok::equal, tok::l_brace, tok::comma, tok::l_square,
+ Keywords.kw_implements, Keywords.kw_extends))
+ // Type assertions ('<type>expr') are not followed by whitespace. Other
+ // locations that should have whitespace following are identified by the
+ // above set of follower tokens.
+ return false;
+ if (Right.is(TT_JsNonNullAssertion))
+ return false;
+ if (Left.is(TT_JsNonNullAssertion) && Right.is(Keywords.kw_as))
+ return true; // "x! as string"
+ } else if (Style.Language == FormatStyle::LK_Java) {
+ if (Left.is(tok::r_square) && Right.is(tok::l_brace))
+ return true;
+ if (Left.is(Keywords.kw_synchronized) && Right.is(tok::l_paren))
+ return Style.SpaceBeforeParens != FormatStyle::SBPO_Never;
+ if ((Left.isOneOf(tok::kw_static, tok::kw_public, tok::kw_private,
+ tok::kw_protected) ||
+ Left.isOneOf(Keywords.kw_final, Keywords.kw_abstract,
+ Keywords.kw_native)) &&
+ Right.is(TT_TemplateOpener))
+ return true;
+ }
+ if (Left.is(TT_ImplicitStringLiteral))
+ return Right.WhitespaceRange.getBegin() != Right.WhitespaceRange.getEnd();
+ if (Line.Type == LT_ObjCMethodDecl) {
+ if (Left.is(TT_ObjCMethodSpecifier))
+ return true;
+ if (Left.is(tok::r_paren) && Right.is(tok::identifier))
+ // Don't space between ')' and <id>
+ return false;
+ }
+ if (Line.Type == LT_ObjCProperty &&
+ (Right.is(tok::equal) || Left.is(tok::equal)))
+ return false;
+
+ if (Right.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow) ||
+ Left.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow))
+ return true;
+ if (Right.is(TT_OverloadedOperatorLParen))
+ return Style.SpaceBeforeParens == FormatStyle::SBPO_Always;
+ if (Left.is(tok::comma))
+ return true;
+ if (Right.is(tok::comma))
+ return false;
+ if (Right.isOneOf(TT_CtorInitializerColon, TT_ObjCBlockLParen))
+ return true;
+ if (Right.is(tok::colon)) {
+ if (Line.First->isOneOf(tok::kw_case, tok::kw_default) ||
+ !Right.getNextNonComment() || Right.getNextNonComment()->is(tok::semi))
+ return false;
+ if (Right.is(TT_ObjCMethodExpr))
+ return false;
+ if (Left.is(tok::question))
+ return false;
+ if (Right.is(TT_InlineASMColon) && Left.is(tok::coloncolon))
+ return false;
+ if (Right.is(TT_DictLiteral))
+ return Style.SpacesInContainerLiterals;
+ return true;
+ }
+ if (Left.is(TT_UnaryOperator))
+ return Right.is(TT_BinaryOperator);
+
+ // If the next token is a binary operator or a selector name, we have
+ // incorrectly classified the parenthesis as a cast. FIXME: Detect correctly.
+ if (Left.is(TT_CastRParen))
+ return Style.SpaceAfterCStyleCast ||
+ Right.isOneOf(TT_BinaryOperator, TT_SelectorName);
+
+ if (Left.is(tok::greater) && Right.is(tok::greater))
+ return Right.is(TT_TemplateCloser) && Left.is(TT_TemplateCloser) &&
+ (Style.Standard != FormatStyle::LS_Cpp11 || Style.SpacesInAngles);
+ if (Right.isOneOf(tok::arrow, tok::arrowstar, tok::periodstar) ||
+ Left.isOneOf(tok::arrow, tok::period, tok::arrowstar, tok::periodstar) ||
+ (Right.is(tok::period) && Right.isNot(TT_DesignatedInitializerPeriod)))
+ return false;
+ if (!Style.SpaceBeforeAssignmentOperators &&
+ Right.getPrecedence() == prec::Assignment)
+ return false;
+ if (Right.is(tok::coloncolon) && Left.is(tok::identifier))
+ // Generally don't remove existing spaces between an identifier and "::".
+ // The identifier might actually be a macro name such as ALWAYS_INLINE. If
+ // this turns out to be too lenient, add analysis of the identifier itself.
+ return Right.WhitespaceRange.getBegin() != Right.WhitespaceRange.getEnd();
+ if (Right.is(tok::coloncolon) && !Left.isOneOf(tok::l_brace, tok::comment))
+ return (Left.is(TT_TemplateOpener) &&
+ Style.Standard == FormatStyle::LS_Cpp03) ||
+ !(Left.isOneOf(tok::l_paren, tok::r_paren, tok::l_square,
+ tok::kw___super, TT_TemplateCloser, TT_TemplateOpener));
+ if ((Left.is(TT_TemplateOpener)) != (Right.is(TT_TemplateCloser)))
+ return Style.SpacesInAngles;
+ if ((Right.is(TT_BinaryOperator) && !Left.is(tok::l_paren)) ||
+ (Left.isOneOf(TT_BinaryOperator, TT_ConditionalExpr) &&
+ !Right.is(tok::r_paren)))
+ return true;
+ if (Left.is(TT_TemplateCloser) && Right.is(tok::l_paren) &&
+ Right.isNot(TT_FunctionTypeLParen))
+ return Style.SpaceBeforeParens == FormatStyle::SBPO_Always;
+ if (Right.is(TT_TemplateOpener) && Left.is(tok::r_paren) &&
+ Left.MatchingParen && Left.MatchingParen->is(TT_OverloadedOperatorLParen))
+ return false;
+ if (Right.is(tok::less) && Left.isNot(tok::l_paren) &&
+ Line.startsWith(tok::hash))
+ return true;
+ if (Right.is(TT_TrailingUnaryOperator))
+ return false;
+ if (Left.is(TT_RegexLiteral))
+ return false;
+ return spaceRequiredBetween(Line, Left, Right);
+}
+
+// Returns 'true' if 'Tok' is a brace we'd want to break before in Allman style.
+static bool isAllmanBrace(const FormatToken &Tok) {
+ return Tok.is(tok::l_brace) && Tok.BlockKind == BK_Block &&
+ !Tok.isOneOf(TT_ObjCBlockLBrace, TT_DictLiteral);
+}
+
+bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,
+ const FormatToken &Right) {
+ const FormatToken &Left = *Right.Previous;
+ if (Right.NewlinesBefore > 1 && Style.MaxEmptyLinesToKeep > 0)
+ return true;
+
+ if (Style.Language == FormatStyle::LK_JavaScript) {
+ // FIXME: This might apply to other languages and token kinds.
+ if (Right.is(tok::string_literal) && Left.is(tok::plus) && Left.Previous &&
+ Left.Previous->is(tok::string_literal))
+ return true;
+ if (Left.is(TT_DictLiteral) && Left.is(tok::l_brace) && Line.Level == 0 &&
+ Left.Previous && Left.Previous->is(tok::equal) &&
+ Line.First->isOneOf(tok::identifier, Keywords.kw_import, tok::kw_export,
+ tok::kw_const) &&
+ // kw_var/kw_let are pseudo-tokens that are tok::identifier, so match
+ // above.
+ !Line.First->isOneOf(Keywords.kw_var, Keywords.kw_let))
+ // Object literals on the top level of a file are treated as "enum-style".
+ // Each key/value pair is put on a separate line, instead of bin-packing.
+ return true;
+ if (Left.is(tok::l_brace) && Line.Level == 0 &&
+ (Line.startsWith(tok::kw_enum) ||
+ Line.startsWith(tok::kw_export, tok::kw_enum)))
+ // JavaScript top-level enum key/value pairs are put on separate lines
+ // instead of bin-packing.
+ return true;
+ if (Right.is(tok::r_brace) && Left.is(tok::l_brace) &&
+ !Left.Children.empty())
+ // Support AllowShortFunctionsOnASingleLine for JavaScript.
+ return Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_None ||
+ Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_Empty ||
+ (Left.NestingLevel == 0 && Line.Level == 0 &&
+ Style.AllowShortFunctionsOnASingleLine &
+ FormatStyle::SFS_InlineOnly);
+ } else if (Style.Language == FormatStyle::LK_Java) {
+ if (Right.is(tok::plus) && Left.is(tok::string_literal) && Right.Next &&
+ Right.Next->is(tok::string_literal))
+ return true;
+ } else if (Style.Language == FormatStyle::LK_Cpp ||
+ Style.Language == FormatStyle::LK_ObjC ||
+ Style.Language == FormatStyle::LK_Proto) {
+ if (Left.isStringLiteral() && Right.isStringLiteral())
+ return true;
+ }
+
+ // If the last token before a '}', ']', or ')' is a comma or a trailing
+ // comment, the intention is to insert a line break after it in order to make
+ // shuffling around entries easier. Import statements, especially in
+ // JavaScript, can be an exception to this rule.
+ if (Style.JavaScriptWrapImports || Line.Type != LT_ImportStatement) {
+ const FormatToken *BeforeClosingBrace = nullptr;
+ if ((Left.isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) ||
+ (Style.Language == FormatStyle::LK_JavaScript &&
+ Left.is(tok::l_paren))) &&
+ Left.BlockKind != BK_Block && Left.MatchingParen)
+ BeforeClosingBrace = Left.MatchingParen->Previous;
+ else if (Right.MatchingParen &&
+ (Right.MatchingParen->isOneOf(tok::l_brace,
+ TT_ArrayInitializerLSquare) ||
+ (Style.Language == FormatStyle::LK_JavaScript &&
+ Right.MatchingParen->is(tok::l_paren))))
+ BeforeClosingBrace = &Left;
+ if (BeforeClosingBrace && (BeforeClosingBrace->is(tok::comma) ||
+ BeforeClosingBrace->isTrailingComment()))
+ return true;
+ }
+
+ if (Right.is(tok::comment))
+ return Left.BlockKind != BK_BracedInit &&
+ Left.isNot(TT_CtorInitializerColon) &&
+ (Right.NewlinesBefore > 0 && Right.HasUnescapedNewline);
+ if (Left.isTrailingComment())
+ return true;
+ if (Right.Previous->IsUnterminatedLiteral)
+ return true;
+ if (Right.is(tok::lessless) && Right.Next &&
+ Right.Previous->is(tok::string_literal) &&
+ Right.Next->is(tok::string_literal))
+ return true;
+ if (Right.Previous->ClosesTemplateDeclaration &&
+ Right.Previous->MatchingParen &&
+ Right.Previous->MatchingParen->NestingLevel == 0 &&
+ Style.AlwaysBreakTemplateDeclarations)
+ return true;
+ if (Right.is(TT_CtorInitializerComma) &&
+ Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma &&
+ !Style.ConstructorInitializerAllOnOneLineOrOnePerLine)
+ return true;
+ if (Right.is(TT_CtorInitializerColon) &&
+ Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma &&
+ !Style.ConstructorInitializerAllOnOneLineOrOnePerLine)
+ return true;
+ // Break only if we have multiple inheritance.
+ if (Style.BreakBeforeInheritanceComma &&
+ Right.is(TT_InheritanceComma))
+ return true;
+ if (Right.is(tok::string_literal) && Right.TokenText.startswith("R\""))
+ // Raw string literals are special wrt. line breaks. The author has made a
+ // deliberate choice and might have aligned the contents of the string
+ // literal accordingly. Thus, we try keep existing line breaks.
+ return Right.NewlinesBefore > 0;
+ if (Right.Previous->is(tok::l_brace) && Right.NestingLevel == 1 &&
+ Style.Language == FormatStyle::LK_Proto)
+ // Don't put enums onto single lines in protocol buffers.
+ return true;
+ if (Right.is(TT_InlineASMBrace))
+ return Right.HasUnescapedNewline;
+ if (isAllmanBrace(Left) || isAllmanBrace(Right))
+ return (Line.startsWith(tok::kw_enum) && Style.BraceWrapping.AfterEnum) ||
+ (Line.startsWith(tok::kw_class) && Style.BraceWrapping.AfterClass) ||
+ (Line.startsWith(tok::kw_struct) && Style.BraceWrapping.AfterStruct);
+ if (Left.is(TT_ObjCBlockLBrace) && !Style.AllowShortBlocksOnASingleLine)
+ return true;
+
+ if ((Style.Language == FormatStyle::LK_Java ||
+ Style.Language == FormatStyle::LK_JavaScript) &&
+ Left.is(TT_LeadingJavaAnnotation) &&
+ Right.isNot(TT_LeadingJavaAnnotation) && Right.isNot(tok::l_paren) &&
+ (Line.Last->is(tok::l_brace) || Style.BreakAfterJavaFieldAnnotations))
+ return true;
+
+ return false;
+}
+
+bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
+ const FormatToken &Right) {
+ const FormatToken &Left = *Right.Previous;
+
+ // Language-specific stuff.
+ if (Style.Language == FormatStyle::LK_Java) {
+ if (Left.isOneOf(Keywords.kw_throws, Keywords.kw_extends,
+ Keywords.kw_implements))
+ return false;
+ if (Right.isOneOf(Keywords.kw_throws, Keywords.kw_extends,
+ Keywords.kw_implements))
+ return true;
+ } else if (Style.Language == FormatStyle::LK_JavaScript) {
+ const FormatToken *NonComment = Right.getPreviousNonComment();
+ if (NonComment &&
+ NonComment->isOneOf(
+ tok::kw_return, tok::kw_continue, tok::kw_break, tok::kw_throw,
+ Keywords.kw_interface, Keywords.kw_type, tok::kw_static,
+ tok::kw_public, tok::kw_private, tok::kw_protected,
+ Keywords.kw_abstract, Keywords.kw_get, Keywords.kw_set))
+ return false; // Otherwise automatic semicolon insertion would trigger.
+ if (Left.is(TT_JsFatArrow) && Right.is(tok::l_brace))
+ return false;
+ if (Left.is(TT_JsTypeColon))
+ return true;
+ if (Right.NestingLevel == 0 && Right.is(Keywords.kw_is))
+ return false;
+ if (Left.is(Keywords.kw_in))
+ return Style.BreakBeforeBinaryOperators == FormatStyle::BOS_None;
+ if (Right.is(Keywords.kw_in))
+ return Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None;
+ if (Right.is(Keywords.kw_as))
+ return false; // must not break before as in 'x as type' casts
+ if (Left.is(Keywords.kw_as))
+ return true;
+ if (Left.is(TT_JsNonNullAssertion))
+ return true;
+ if (Left.is(Keywords.kw_declare) &&
+ Right.isOneOf(Keywords.kw_module, tok::kw_namespace,
+ Keywords.kw_function, tok::kw_class, tok::kw_enum,
+ Keywords.kw_interface, Keywords.kw_type, Keywords.kw_var,
+ Keywords.kw_let, tok::kw_const))
+ // See grammar for 'declare' statements at:
+ // https://github.com/Microsoft/TypeScript/blob/master/doc/spec.md#A.10
+ return false;
+ if (Left.isOneOf(Keywords.kw_module, tok::kw_namespace) &&
+ Right.isOneOf(tok::identifier, tok::string_literal))
+ return false; // must not break in "module foo { ...}"
+ if (Right.is(TT_TemplateString) && Right.closesScope())
+ return false;
+ if (Left.is(TT_TemplateString) && Left.opensScope())
+ return true;
+ }
+
+ if (Left.is(tok::at))
+ return false;
+ if (Left.Tok.getObjCKeywordID() == tok::objc_interface)
+ return false;
+ if (Left.isOneOf(TT_JavaAnnotation, TT_LeadingJavaAnnotation))
+ return !Right.is(tok::l_paren);
+ if (Right.is(TT_PointerOrReference))
+ return Line.IsMultiVariableDeclStmt ||
+ (Style.PointerAlignment == FormatStyle::PAS_Right &&
+ (!Right.Next || Right.Next->isNot(TT_FunctionDeclarationName)));
+ if (Right.isOneOf(TT_StartOfName, TT_FunctionDeclarationName) ||
+ Right.is(tok::kw_operator))
+ return true;
+ if (Left.is(TT_PointerOrReference))
+ return false;
+ if (Right.isTrailingComment())
+ // We rely on MustBreakBefore being set correctly here as we should not
+ // change the "binding" behavior of a comment.
+ // The first comment in a braced lists is always interpreted as belonging to
+ // the first list element. Otherwise, it should be placed outside of the
+ // list.
+ return Left.BlockKind == BK_BracedInit ||
+ (Left.is(TT_CtorInitializerColon) &&
+ Style.BreakConstructorInitializers ==
+ FormatStyle::BCIS_AfterColon);
+ if (Left.is(tok::question) && Right.is(tok::colon))
+ return false;
+ if (Right.is(TT_ConditionalExpr) || Right.is(tok::question))
+ return Style.BreakBeforeTernaryOperators;
+ if (Left.is(TT_ConditionalExpr) || Left.is(tok::question))
+ return !Style.BreakBeforeTernaryOperators;
+ if (Right.is(TT_InheritanceColon))
+ return true;
+ if (Right.is(TT_ObjCMethodExpr) && !Right.is(tok::r_square) &&
+ Left.isNot(TT_SelectorName))
+ return true;
+ if (Right.is(tok::colon) &&
+ !Right.isOneOf(TT_CtorInitializerColon, TT_InlineASMColon))
+ return false;
+ if (Left.is(tok::colon) && Left.isOneOf(TT_DictLiteral, TT_ObjCMethodExpr))
+ return true;
+ if (Right.is(TT_SelectorName) || (Right.is(tok::identifier) && Right.Next &&
+ Right.Next->is(TT_ObjCMethodExpr)))
+ return Left.isNot(tok::period); // FIXME: Properly parse ObjC calls.
+ if (Left.is(tok::r_paren) && Line.Type == LT_ObjCProperty)
+ return true;
+ if (Left.ClosesTemplateDeclaration || Left.is(TT_FunctionAnnotationRParen))
+ return true;
+ if (Right.isOneOf(TT_RangeBasedForLoopColon, TT_OverloadedOperatorLParen,
+ TT_OverloadedOperator))
+ return false;
+ if (Left.is(TT_RangeBasedForLoopColon))
+ return true;
+ if (Right.is(TT_RangeBasedForLoopColon))
+ return false;
+ if (Left.is(TT_TemplateCloser) && Right.is(TT_TemplateOpener))
+ return true;
+ if (Left.isOneOf(TT_TemplateCloser, TT_UnaryOperator) ||
+ Left.is(tok::kw_operator))
+ return false;
+ if (Left.is(tok::equal) && !Right.isOneOf(tok::kw_default, tok::kw_delete) &&
+ Line.Type == LT_VirtualFunctionDecl && Left.NestingLevel == 0)
+ return false;
+ if (Left.is(tok::l_paren) && Left.is(TT_AttributeParen))
+ return false;
+ if (Left.is(tok::l_paren) && Left.Previous &&
+ (Left.Previous->isOneOf(TT_BinaryOperator, TT_CastRParen)))
+ return false;
+ if (Right.is(TT_ImplicitStringLiteral))
+ return false;
+
+ if (Right.is(tok::r_paren) || Right.is(TT_TemplateCloser))
+ return false;
+ if (Right.is(tok::r_square) && Right.MatchingParen &&
+ Right.MatchingParen->is(TT_LambdaLSquare))
+ return false;
+
+ // We only break before r_brace if there was a corresponding break before
+ // the l_brace, which is tracked by BreakBeforeClosingBrace.
+ if (Right.is(tok::r_brace))
+ return Right.MatchingParen && Right.MatchingParen->BlockKind == BK_Block;
+
+ // Allow breaking after a trailing annotation, e.g. after a method
+ // declaration.
+ if (Left.is(TT_TrailingAnnotation))
+ return !Right.isOneOf(tok::l_brace, tok::semi, tok::equal, tok::l_paren,
+ tok::less, tok::coloncolon);
+
+ if (Right.is(tok::kw___attribute))
+ return true;
+
+ if (Left.is(tok::identifier) && Right.is(tok::string_literal))
+ return true;
+
+ if (Right.is(tok::identifier) && Right.Next && Right.Next->is(TT_DictLiteral))
+ return true;
+
+ if (Left.is(TT_CtorInitializerColon))
+ return Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon;
+ if (Right.is(TT_CtorInitializerColon))
+ return Style.BreakConstructorInitializers != FormatStyle::BCIS_AfterColon;
+ if (Left.is(TT_CtorInitializerComma) &&
+ Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma)
+ return false;
+ if (Right.is(TT_CtorInitializerComma) &&
+ Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma)
+ return true;
+ if (Left.is(TT_InheritanceComma) && Style.BreakBeforeInheritanceComma)
+ return false;
+ if (Right.is(TT_InheritanceComma) && Style.BreakBeforeInheritanceComma)
+ return true;
+ if ((Left.is(tok::greater) && Right.is(tok::greater)) ||
+ (Left.is(tok::less) && Right.is(tok::less)))
+ return false;
+ if (Right.is(TT_BinaryOperator) &&
+ Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None &&
+ (Style.BreakBeforeBinaryOperators == FormatStyle::BOS_All ||
+ Right.getPrecedence() != prec::Assignment))
+ return true;
+ if (Left.is(TT_ArrayInitializerLSquare))
+ return true;
+ if (Right.is(tok::kw_typename) && Left.isNot(tok::kw_const))
+ return true;
+ if ((Left.isBinaryOperator() || Left.is(TT_BinaryOperator)) &&
+ !Left.isOneOf(tok::arrowstar, tok::lessless) &&
+ Style.BreakBeforeBinaryOperators != FormatStyle::BOS_All &&
+ (Style.BreakBeforeBinaryOperators == FormatStyle::BOS_None ||
+ Left.getPrecedence() == prec::Assignment))
+ return true;
+ return Left.isOneOf(tok::comma, tok::coloncolon, tok::semi, tok::l_brace,
+ tok::kw_class, tok::kw_struct, tok::comment) ||
+ Right.isMemberAccess() ||
+ Right.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow, tok::lessless,
+ tok::colon, tok::l_square, tok::at) ||
+ (Left.is(tok::r_paren) &&
+ Right.isOneOf(tok::identifier, tok::kw_const)) ||
+ (Left.is(tok::l_paren) && !Right.is(tok::r_paren)) ||
+ (Left.is(TT_TemplateOpener) && !Right.is(TT_TemplateCloser));
+}
+
+void TokenAnnotator::printDebugInfo(const AnnotatedLine &Line) {
+ llvm::errs() << "AnnotatedTokens:\n";
+ const FormatToken *Tok = Line.First;
+ while (Tok) {
+ llvm::errs() << " M=" << Tok->MustBreakBefore
+ << " C=" << Tok->CanBreakBefore
+ << " T=" << getTokenTypeName(Tok->Type)
+ << " S=" << Tok->SpacesRequiredBefore
+ << " B=" << Tok->BlockParameterCount
+ << " BK=" << Tok->BlockKind
+ << " P=" << Tok->SplitPenalty << " Name=" << Tok->Tok.getName()
+ << " L=" << Tok->TotalLength << " PPK=" << Tok->PackingKind
+ << " FakeLParens=";
+ for (unsigned i = 0, e = Tok->FakeLParens.size(); i != e; ++i)
+ llvm::errs() << Tok->FakeLParens[i] << "/";
+ llvm::errs() << " FakeRParens=" << Tok->FakeRParens;
+ llvm::errs() << " Text='" << Tok->TokenText << "'\n";
+ if (!Tok->Next)
+ assert(Tok == Line.Last);
+ Tok = Tok->Next;
+ }
+ llvm::errs() << "----\n";
+}
+
+} // namespace format
+} // namespace clang
diff --git a/contrib/llvm/tools/clang/lib/Format/TokenAnnotator.h b/contrib/llvm/tools/clang/lib/Format/TokenAnnotator.h
new file mode 100644
index 000000000000..805509533bf9
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Format/TokenAnnotator.h
@@ -0,0 +1,179 @@
+//===--- TokenAnnotator.h - Format C++ code ---------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements a token annotator, i.e. creates
+/// \c AnnotatedTokens out of \c FormatTokens with required extra information.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H
+#define LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H
+
+#include "UnwrappedLineParser.h"
+#include "clang/Format/Format.h"
+
+namespace clang {
+class SourceManager;
+
+namespace format {
+
+enum LineType {
+ LT_Invalid,
+ LT_ImportStatement,
+ LT_ObjCDecl, // An @interface, @implementation, or @protocol line.
+ LT_ObjCMethodDecl,
+ LT_ObjCProperty, // An @property line.
+ LT_Other,
+ LT_PreprocessorDirective,
+ LT_VirtualFunctionDecl
+};
+
+class AnnotatedLine {
+public:
+ AnnotatedLine(const UnwrappedLine &Line)
+ : First(Line.Tokens.front().Tok), Level(Line.Level),
+ MatchingOpeningBlockLineIndex(Line.MatchingOpeningBlockLineIndex),
+ InPPDirective(Line.InPPDirective),
+ MustBeDeclaration(Line.MustBeDeclaration), MightBeFunctionDecl(false),
+ IsMultiVariableDeclStmt(false), Affected(false),
+ LeadingEmptyLinesAffected(false), ChildrenAffected(false) {
+ assert(!Line.Tokens.empty());
+
+ // Calculate Next and Previous for all tokens. Note that we must overwrite
+ // Next and Previous for every token, as previous formatting runs might have
+ // left them in a different state.
+ First->Previous = nullptr;
+ FormatToken *Current = First;
+ for (std::list<UnwrappedLineNode>::const_iterator I = ++Line.Tokens.begin(),
+ E = Line.Tokens.end();
+ I != E; ++I) {
+ const UnwrappedLineNode &Node = *I;
+ Current->Next = I->Tok;
+ I->Tok->Previous = Current;
+ Current = Current->Next;
+ Current->Children.clear();
+ for (const auto &Child : Node.Children) {
+ Children.push_back(new AnnotatedLine(Child));
+ Current->Children.push_back(Children.back());
+ }
+ }
+ Last = Current;
+ Last->Next = nullptr;
+ }
+
+ ~AnnotatedLine() {
+ for (unsigned i = 0, e = Children.size(); i != e; ++i) {
+ delete Children[i];
+ }
+ FormatToken *Current = First;
+ while (Current) {
+ Current->Children.clear();
+ Current->Role.reset();
+ Current = Current->Next;
+ }
+ }
+
+ /// \c true if this line starts with the given tokens in order, ignoring
+ /// comments.
+ template <typename... Ts> bool startsWith(Ts... Tokens) const {
+ return First && First->startsSequence(Tokens...);
+ }
+
+ /// \c true if this line ends with the given tokens in reversed order,
+ /// ignoring comments.
+ /// For example, given tokens [T1, T2, T3, ...], the function returns true if
+ /// this line is like "... T3 T2 T1".
+ template <typename... Ts> bool endsWith(Ts... Tokens) const {
+ return Last && Last->endsSequence(Tokens...);
+ }
+
+ /// \c true if this line looks like a function definition instead of a
+ /// function declaration. Asserts MightBeFunctionDecl.
+ bool mightBeFunctionDefinition() const {
+ assert(MightBeFunctionDecl);
+ // FIXME: Line.Last points to other characters than tok::semi
+ // and tok::lbrace.
+ return !Last->isOneOf(tok::semi, tok::comment);
+ }
+
+ FormatToken *First;
+ FormatToken *Last;
+
+ SmallVector<AnnotatedLine *, 0> Children;
+
+ LineType Type;
+ unsigned Level;
+ size_t MatchingOpeningBlockLineIndex;
+ bool InPPDirective;
+ bool MustBeDeclaration;
+ bool MightBeFunctionDecl;
+ bool IsMultiVariableDeclStmt;
+
+ /// \c True if this line should be formatted, i.e. intersects directly or
+ /// indirectly with one of the input ranges.
+ bool Affected;
+
+ /// \c True if the leading empty lines of this line intersect with one of the
+ /// input ranges.
+ bool LeadingEmptyLinesAffected;
+
+ /// \c True if one of this line's children intersects with an input range.
+ bool ChildrenAffected;
+
+private:
+ // Disallow copying.
+ AnnotatedLine(const AnnotatedLine &) = delete;
+ void operator=(const AnnotatedLine &) = delete;
+};
+
+/// \brief Determines extra information about the tokens comprising an
+/// \c UnwrappedLine.
+class TokenAnnotator {
+public:
+ TokenAnnotator(const FormatStyle &Style, const AdditionalKeywords &Keywords)
+ : Style(Style), Keywords(Keywords) {}
+
+ /// \brief Adapts the indent levels of comment lines to the indent of the
+ /// subsequent line.
+ // FIXME: Can/should this be done in the UnwrappedLineParser?
+ void setCommentLineLevels(SmallVectorImpl<AnnotatedLine *> &Lines);
+
+ void annotate(AnnotatedLine &Line);
+ void calculateFormattingInformation(AnnotatedLine &Line);
+
+private:
+ /// \brief Calculate the penalty for splitting before \c Tok.
+ unsigned splitPenalty(const AnnotatedLine &Line, const FormatToken &Tok,
+ bool InFunctionDecl);
+
+ bool spaceRequiredBetween(const AnnotatedLine &Line, const FormatToken &Left,
+ const FormatToken &Right);
+
+ bool spaceRequiredBefore(const AnnotatedLine &Line, const FormatToken &Tok);
+
+ bool mustBreakBefore(const AnnotatedLine &Line, const FormatToken &Right);
+
+ bool canBreakBefore(const AnnotatedLine &Line, const FormatToken &Right);
+
+ bool mustBreakForReturnType(const AnnotatedLine &Line) const;
+
+ void printDebugInfo(const AnnotatedLine &Line);
+
+ void calculateUnbreakableTailLengths(AnnotatedLine &Line);
+
+ const FormatStyle &Style;
+
+ const AdditionalKeywords &Keywords;
+};
+
+} // end namespace format
+} // end namespace clang
+
+#endif
diff --git a/contrib/llvm/tools/clang/lib/Format/UnwrappedLineFormatter.cpp b/contrib/llvm/tools/clang/lib/Format/UnwrappedLineFormatter.cpp
new file mode 100644
index 000000000000..8836c07cac71
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Format/UnwrappedLineFormatter.cpp
@@ -0,0 +1,1043 @@
+//===--- UnwrappedLineFormatter.cpp - Format C++ code ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "UnwrappedLineFormatter.h"
+#include "WhitespaceManager.h"
+#include "llvm/Support/Debug.h"
+#include <queue>
+
+#define DEBUG_TYPE "format-formatter"
+
+namespace clang {
+namespace format {
+
+namespace {
+
+bool startsExternCBlock(const AnnotatedLine &Line) {
+ const FormatToken *Next = Line.First->getNextNonComment();
+ const FormatToken *NextNext = Next ? Next->getNextNonComment() : nullptr;
+ return Line.startsWith(tok::kw_extern) && Next && Next->isStringLiteral() &&
+ NextNext && NextNext->is(tok::l_brace);
+}
+
+/// \brief Tracks the indent level of \c AnnotatedLines across levels.
+///
+/// \c nextLine must be called for each \c AnnotatedLine, after which \c
+/// getIndent() will return the indent for the last line \c nextLine was called
+/// with.
+/// If the line is not formatted (and thus the indent does not change), calling
+/// \c adjustToUnmodifiedLine after the call to \c nextLine will cause
+/// subsequent lines on the same level to be indented at the same level as the
+/// given line.
+class LevelIndentTracker {
+public:
+ LevelIndentTracker(const FormatStyle &Style,
+ const AdditionalKeywords &Keywords, unsigned StartLevel,
+ int AdditionalIndent)
+ : Style(Style), Keywords(Keywords), AdditionalIndent(AdditionalIndent) {
+ for (unsigned i = 0; i != StartLevel; ++i)
+ IndentForLevel.push_back(Style.IndentWidth * i + AdditionalIndent);
+ }
+
+ /// \brief Returns the indent for the current line.
+ unsigned getIndent() const { return Indent; }
+
+ /// \brief Update the indent state given that \p Line is going to be formatted
+ /// next.
+ void nextLine(const AnnotatedLine &Line) {
+ Offset = getIndentOffset(*Line.First);
+ // Update the indent level cache size so that we can rely on it
+ // having the right size in adjustToUnmodifiedline.
+ while (IndentForLevel.size() <= Line.Level)
+ IndentForLevel.push_back(-1);
+ if (Line.InPPDirective) {
+ Indent = Line.Level * Style.IndentWidth + AdditionalIndent;
+ } else {
+ IndentForLevel.resize(Line.Level + 1);
+ Indent = getIndent(IndentForLevel, Line.Level);
+ }
+ if (static_cast<int>(Indent) + Offset >= 0)
+ Indent += Offset;
+ }
+
+ /// \brief Update the indent state given that \p Line indent should be
+ /// skipped.
+ void skipLine(const AnnotatedLine &Line) {
+ while (IndentForLevel.size() <= Line.Level)
+ IndentForLevel.push_back(Indent);
+ }
+
+ /// \brief Update the level indent to adapt to the given \p Line.
+ ///
+ /// When a line is not formatted, we move the subsequent lines on the same
+ /// level to the same indent.
+ /// Note that \c nextLine must have been called before this method.
+ void adjustToUnmodifiedLine(const AnnotatedLine &Line) {
+ unsigned LevelIndent = Line.First->OriginalColumn;
+ if (static_cast<int>(LevelIndent) - Offset >= 0)
+ LevelIndent -= Offset;
+ if ((!Line.First->is(tok::comment) || IndentForLevel[Line.Level] == -1) &&
+ !Line.InPPDirective)
+ IndentForLevel[Line.Level] = LevelIndent;
+ }
+
+private:
+ /// \brief Get the offset of the line relatively to the level.
+ ///
+ /// For example, 'public:' labels in classes are offset by 1 or 2
+ /// characters to the left from their level.
+ int getIndentOffset(const FormatToken &RootToken) {
+ if (Style.Language == FormatStyle::LK_Java ||
+ Style.Language == FormatStyle::LK_JavaScript)
+ return 0;
+ if (RootToken.isAccessSpecifier(false) ||
+ RootToken.isObjCAccessSpecifier() ||
+ (RootToken.isOneOf(Keywords.kw_signals, Keywords.kw_qsignals) &&
+ RootToken.Next && RootToken.Next->is(tok::colon)))
+ return Style.AccessModifierOffset;
+ return 0;
+ }
+
+ /// \brief Get the indent of \p Level from \p IndentForLevel.
+ ///
+ /// \p IndentForLevel must contain the indent for the level \c l
+ /// at \p IndentForLevel[l], or a value < 0 if the indent for
+ /// that level is unknown.
+ unsigned getIndent(ArrayRef<int> IndentForLevel, unsigned Level) {
+ if (IndentForLevel[Level] != -1)
+ return IndentForLevel[Level];
+ if (Level == 0)
+ return 0;
+ return getIndent(IndentForLevel, Level - 1) + Style.IndentWidth;
+ }
+
+ const FormatStyle &Style;
+ const AdditionalKeywords &Keywords;
+ const unsigned AdditionalIndent;
+
+ /// \brief The indent in characters for each level.
+ std::vector<int> IndentForLevel;
+
+ /// \brief Offset of the current line relative to the indent level.
+ ///
+ /// For example, the 'public' keywords is often indented with a negative
+ /// offset.
+ int Offset = 0;
+
+ /// \brief The current line's indent.
+ unsigned Indent = 0;
+};
+
+bool isNamespaceDeclaration(const AnnotatedLine *Line) {
+ const FormatToken *NamespaceTok = Line->First;
+ // Detect "(inline)? namespace" in the beginning of a line.
+ if (NamespaceTok->is(tok::kw_inline))
+ NamespaceTok = NamespaceTok->getNextNonComment();
+ return NamespaceTok && NamespaceTok->is(tok::kw_namespace);
+}
+
+bool isEndOfNamespace(const AnnotatedLine *Line,
+ const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
+ if (!Line->startsWith(tok::r_brace))
+ return false;
+ size_t StartLineIndex = Line->MatchingOpeningBlockLineIndex;
+ if (StartLineIndex == UnwrappedLine::kInvalidIndex)
+ return false;
+ assert(StartLineIndex < AnnotatedLines.size());
+ return isNamespaceDeclaration(AnnotatedLines[StartLineIndex]);
+}
+
+class LineJoiner {
+public:
+ LineJoiner(const FormatStyle &Style, const AdditionalKeywords &Keywords,
+ const SmallVectorImpl<AnnotatedLine *> &Lines)
+ : Style(Style), Keywords(Keywords), End(Lines.end()), Next(Lines.begin()),
+ AnnotatedLines(Lines) {}
+
+ /// \brief Returns the next line, merging multiple lines into one if possible.
+ const AnnotatedLine *getNextMergedLine(bool DryRun,
+ LevelIndentTracker &IndentTracker) {
+ if (Next == End)
+ return nullptr;
+ const AnnotatedLine *Current = *Next;
+ IndentTracker.nextLine(*Current);
+ unsigned MergedLines =
+ tryFitMultipleLinesInOne(IndentTracker, Next, End);
+ if (MergedLines > 0 && Style.ColumnLimit == 0)
+ // Disallow line merging if there is a break at the start of one of the
+ // input lines.
+ for (unsigned i = 0; i < MergedLines; ++i)
+ if (Next[i + 1]->First->NewlinesBefore > 0)
+ MergedLines = 0;
+ if (!DryRun)
+ for (unsigned i = 0; i < MergedLines; ++i)
+ join(*Next[0], *Next[i + 1]);
+ Next = Next + MergedLines + 1;
+ return Current;
+ }
+
+private:
+ /// \brief Calculates how many lines can be merged into 1 starting at \p I.
+ unsigned
+ tryFitMultipleLinesInOne(LevelIndentTracker &IndentTracker,
+ SmallVectorImpl<AnnotatedLine *>::const_iterator I,
+ SmallVectorImpl<AnnotatedLine *>::const_iterator E) {
+ const unsigned Indent = IndentTracker.getIndent();
+
+ // Can't join the last line with anything.
+ if (I + 1 == E)
+ return 0;
+ // We can never merge stuff if there are trailing line comments.
+ const AnnotatedLine *TheLine = *I;
+ if (TheLine->Last->is(TT_LineComment))
+ return 0;
+ if (I[1]->Type == LT_Invalid || I[1]->First->MustBreakBefore)
+ return 0;
+ if (TheLine->InPPDirective &&
+ (!I[1]->InPPDirective || I[1]->First->HasUnescapedNewline))
+ return 0;
+
+ if (Style.ColumnLimit > 0 && Indent > Style.ColumnLimit)
+ return 0;
+
+ unsigned Limit =
+ Style.ColumnLimit == 0 ? UINT_MAX : Style.ColumnLimit - Indent;
+ // If we already exceed the column limit, we set 'Limit' to 0. The different
+ // tryMerge..() functions can then decide whether to still do merging.
+ Limit = TheLine->Last->TotalLength > Limit
+ ? 0
+ : Limit - TheLine->Last->TotalLength;
+
+ if (TheLine->Last->is(TT_FunctionLBrace) &&
+ TheLine->First == TheLine->Last &&
+ !Style.BraceWrapping.SplitEmptyFunctionBody &&
+ I[1]->First->is(tok::r_brace))
+ return tryMergeSimpleBlock(I, E, Limit);
+
+ // FIXME: TheLine->Level != 0 might or might not be the right check to do.
+ // If necessary, change to something smarter.
+ bool MergeShortFunctions =
+ Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_All ||
+ (Style.AllowShortFunctionsOnASingleLine >= FormatStyle::SFS_Empty &&
+ I[1]->First->is(tok::r_brace)) ||
+ (Style.AllowShortFunctionsOnASingleLine & FormatStyle::SFS_InlineOnly &&
+ TheLine->Level != 0);
+
+ if (Style.CompactNamespaces) {
+ if (isNamespaceDeclaration(TheLine)) {
+ int i = 0;
+ unsigned closingLine = TheLine->MatchingOpeningBlockLineIndex - 1;
+ for (; I + 1 + i != E && isNamespaceDeclaration(I[i + 1]) &&
+ closingLine == I[i + 1]->MatchingOpeningBlockLineIndex &&
+ I[i + 1]->Last->TotalLength < Limit;
+ i++, closingLine--) {
+ // No extra indent for compacted namespaces
+ IndentTracker.skipLine(*I[i + 1]);
+
+ Limit -= I[i + 1]->Last->TotalLength;
+ }
+ return i;
+ }
+
+ if (isEndOfNamespace(TheLine, AnnotatedLines)) {
+ int i = 0;
+ unsigned openingLine = TheLine->MatchingOpeningBlockLineIndex - 1;
+ for (; I + 1 + i != E && isEndOfNamespace(I[i + 1], AnnotatedLines) &&
+ openingLine == I[i + 1]->MatchingOpeningBlockLineIndex;
+ i++, openingLine--) {
+ // No space between consecutive braces
+ I[i + 1]->First->SpacesRequiredBefore = !I[i]->Last->is(tok::r_brace);
+
+ // Indent like the outer-most namespace
+ IndentTracker.nextLine(*I[i + 1]);
+ }
+ return i;
+ }
+ }
+
+ if (TheLine->Last->is(TT_FunctionLBrace) &&
+ TheLine->First != TheLine->Last) {
+ return MergeShortFunctions ? tryMergeSimpleBlock(I, E, Limit) : 0;
+ }
+ if (TheLine->Last->is(tok::l_brace)) {
+ return !Style.BraceWrapping.AfterFunction
+ ? tryMergeSimpleBlock(I, E, Limit)
+ : 0;
+ }
+ if (I[1]->First->is(TT_FunctionLBrace) &&
+ Style.BraceWrapping.AfterFunction) {
+ if (I[1]->Last->is(TT_LineComment))
+ return 0;
+
+ // Check for Limit <= 2 to account for the " {".
+ if (Limit <= 2 || (Style.ColumnLimit == 0 && containsMustBreak(TheLine)))
+ return 0;
+ Limit -= 2;
+
+ unsigned MergedLines = 0;
+ if (MergeShortFunctions ||
+ (Style.AllowShortFunctionsOnASingleLine >= FormatStyle::SFS_Empty &&
+ I[1]->First == I[1]->Last && I + 2 != E &&
+ I[2]->First->is(tok::r_brace))) {
+ MergedLines = tryMergeSimpleBlock(I + 1, E, Limit);
+ // If we managed to merge the block, count the function header, which is
+ // on a separate line.
+ if (MergedLines > 0)
+ ++MergedLines;
+ }
+ return MergedLines;
+ }
+ if (TheLine->First->is(tok::kw_if)) {
+ return Style.AllowShortIfStatementsOnASingleLine
+ ? tryMergeSimpleControlStatement(I, E, Limit)
+ : 0;
+ }
+ if (TheLine->First->isOneOf(tok::kw_for, tok::kw_while)) {
+ return Style.AllowShortLoopsOnASingleLine
+ ? tryMergeSimpleControlStatement(I, E, Limit)
+ : 0;
+ }
+ if (TheLine->First->isOneOf(tok::kw_case, tok::kw_default)) {
+ return Style.AllowShortCaseLabelsOnASingleLine
+ ? tryMergeShortCaseLabels(I, E, Limit)
+ : 0;
+ }
+ if (TheLine->InPPDirective &&
+ (TheLine->First->HasUnescapedNewline || TheLine->First->IsFirst)) {
+ return tryMergeSimplePPDirective(I, E, Limit);
+ }
+ return 0;
+ }
+
+ unsigned
+ tryMergeSimplePPDirective(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
+ SmallVectorImpl<AnnotatedLine *>::const_iterator E,
+ unsigned Limit) {
+ if (Limit == 0)
+ return 0;
+ if (I + 2 != E && I[2]->InPPDirective && !I[2]->First->HasUnescapedNewline)
+ return 0;
+ if (1 + I[1]->Last->TotalLength > Limit)
+ return 0;
+ return 1;
+ }
+
+ unsigned tryMergeSimpleControlStatement(
+ SmallVectorImpl<AnnotatedLine *>::const_iterator I,
+ SmallVectorImpl<AnnotatedLine *>::const_iterator E, unsigned Limit) {
+ if (Limit == 0)
+ return 0;
+ if (Style.BraceWrapping.AfterControlStatement &&
+ (I[1]->First->is(tok::l_brace) && !Style.AllowShortBlocksOnASingleLine))
+ return 0;
+ if (I[1]->InPPDirective != (*I)->InPPDirective ||
+ (I[1]->InPPDirective && I[1]->First->HasUnescapedNewline))
+ return 0;
+ Limit = limitConsideringMacros(I + 1, E, Limit);
+ AnnotatedLine &Line = **I;
+ if (Line.Last->isNot(tok::r_paren))
+ return 0;
+ if (1 + I[1]->Last->TotalLength > Limit)
+ return 0;
+ if (I[1]->First->isOneOf(tok::semi, tok::kw_if, tok::kw_for, tok::kw_while,
+ TT_LineComment))
+ return 0;
+ // Only inline simple if's (no nested if or else).
+ if (I + 2 != E && Line.startsWith(tok::kw_if) &&
+ I[2]->First->is(tok::kw_else))
+ return 0;
+ return 1;
+ }
+
+ unsigned
+ tryMergeShortCaseLabels(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
+ SmallVectorImpl<AnnotatedLine *>::const_iterator E,
+ unsigned Limit) {
+ if (Limit == 0 || I + 1 == E ||
+ I[1]->First->isOneOf(tok::kw_case, tok::kw_default))
+ return 0;
+ unsigned NumStmts = 0;
+ unsigned Length = 0;
+ bool InPPDirective = I[0]->InPPDirective;
+ for (; NumStmts < 3; ++NumStmts) {
+ if (I + 1 + NumStmts == E)
+ break;
+ const AnnotatedLine *Line = I[1 + NumStmts];
+ if (Line->InPPDirective != InPPDirective)
+ break;
+ if (Line->First->isOneOf(tok::kw_case, tok::kw_default, tok::r_brace))
+ break;
+ if (Line->First->isOneOf(tok::kw_if, tok::kw_for, tok::kw_switch,
+ tok::kw_while, tok::comment) ||
+ Line->Last->is(tok::comment))
+ return 0;
+ Length += I[1 + NumStmts]->Last->TotalLength + 1; // 1 for the space.
+ }
+ if (NumStmts == 0 || NumStmts == 3 || Length > Limit)
+ return 0;
+ return NumStmts;
+ }
+
+ unsigned
+ tryMergeSimpleBlock(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
+ SmallVectorImpl<AnnotatedLine *>::const_iterator E,
+ unsigned Limit) {
+ AnnotatedLine &Line = **I;
+
+ // Don't merge ObjC @ keywords and methods.
+ // FIXME: If an option to allow short exception handling clauses on a single
+ // line is added, change this to not return for @try and friends.
+ if (Style.Language != FormatStyle::LK_Java &&
+ Line.First->isOneOf(tok::at, tok::minus, tok::plus))
+ return 0;
+
+ // Check that the current line allows merging. This depends on whether we
+ // are in a control flow statements as well as several style flags.
+ if (Line.First->isOneOf(tok::kw_else, tok::kw_case) ||
+ (Line.First->Next && Line.First->Next->is(tok::kw_else)))
+ return 0;
+ if (Line.First->isOneOf(tok::kw_if, tok::kw_while, tok::kw_do, tok::kw_try,
+ tok::kw___try, tok::kw_catch, tok::kw___finally,
+ tok::kw_for, tok::r_brace, Keywords.kw___except)) {
+ if (!Style.AllowShortBlocksOnASingleLine)
+ return 0;
+ if (!Style.AllowShortIfStatementsOnASingleLine &&
+ Line.startsWith(tok::kw_if))
+ return 0;
+ if (!Style.AllowShortLoopsOnASingleLine &&
+ Line.First->isOneOf(tok::kw_while, tok::kw_do, tok::kw_for))
+ return 0;
+ // FIXME: Consider an option to allow short exception handling clauses on
+ // a single line.
+ // FIXME: This isn't covered by tests.
+ // FIXME: For catch, __except, __finally the first token on the line
+ // is '}', so this isn't correct here.
+ if (Line.First->isOneOf(tok::kw_try, tok::kw___try, tok::kw_catch,
+ Keywords.kw___except, tok::kw___finally))
+ return 0;
+ }
+
+ FormatToken *Tok = I[1]->First;
+ if (Tok->is(tok::r_brace) && !Tok->MustBreakBefore &&
+ (Tok->getNextNonComment() == nullptr ||
+ Tok->getNextNonComment()->is(tok::semi))) {
+ // We merge empty blocks even if the line exceeds the column limit.
+ Tok->SpacesRequiredBefore = 0;
+ Tok->CanBreakBefore = true;
+ return 1;
+ } else if (Limit != 0 && !Line.startsWith(tok::kw_namespace) &&
+ !startsExternCBlock(Line)) {
+ // We don't merge short records.
+ FormatToken *RecordTok =
+ Line.First->is(tok::kw_typedef) ? Line.First->Next : Line.First;
+ if (RecordTok &&
+ RecordTok->isOneOf(tok::kw_class, tok::kw_union, tok::kw_struct,
+ Keywords.kw_interface))
+ return 0;
+
+ // Check that we still have three lines and they fit into the limit.
+ if (I + 2 == E || I[2]->Type == LT_Invalid)
+ return 0;
+ Limit = limitConsideringMacros(I + 2, E, Limit);
+
+ if (!nextTwoLinesFitInto(I, Limit))
+ return 0;
+
+ // Second, check that the next line does not contain any braces - if it
+ // does, readability declines when putting it into a single line.
+ if (I[1]->Last->is(TT_LineComment))
+ return 0;
+ do {
+ if (Tok->is(tok::l_brace) && Tok->BlockKind != BK_BracedInit)
+ return 0;
+ Tok = Tok->Next;
+ } while (Tok);
+
+ // Last, check that the third line starts with a closing brace.
+ Tok = I[2]->First;
+ if (Tok->isNot(tok::r_brace))
+ return 0;
+
+ // Don't merge "if (a) { .. } else {".
+ if (Tok->Next && Tok->Next->is(tok::kw_else))
+ return 0;
+
+ return 2;
+ }
+ return 0;
+ }
+
+ /// Returns the modified column limit for \p I if it is inside a macro and
+ /// needs a trailing '\'.
+ unsigned
+ limitConsideringMacros(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
+ SmallVectorImpl<AnnotatedLine *>::const_iterator E,
+ unsigned Limit) {
+ if (I[0]->InPPDirective && I + 1 != E &&
+ !I[1]->First->HasUnescapedNewline && !I[1]->First->is(tok::eof)) {
+ return Limit < 2 ? 0 : Limit - 2;
+ }
+ return Limit;
+ }
+
+ bool nextTwoLinesFitInto(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
+ unsigned Limit) {
+ if (I[1]->First->MustBreakBefore || I[2]->First->MustBreakBefore)
+ return false;
+ return 1 + I[1]->Last->TotalLength + 1 + I[2]->Last->TotalLength <= Limit;
+ }
+
+ bool containsMustBreak(const AnnotatedLine *Line) {
+ for (const FormatToken *Tok = Line->First; Tok; Tok = Tok->Next) {
+ if (Tok->MustBreakBefore)
+ return true;
+ }
+ return false;
+ }
+
+ void join(AnnotatedLine &A, const AnnotatedLine &B) {
+ assert(!A.Last->Next);
+ assert(!B.First->Previous);
+ if (B.Affected)
+ A.Affected = true;
+ A.Last->Next = B.First;
+ B.First->Previous = A.Last;
+ B.First->CanBreakBefore = true;
+ unsigned LengthA = A.Last->TotalLength + B.First->SpacesRequiredBefore;
+ for (FormatToken *Tok = B.First; Tok; Tok = Tok->Next) {
+ Tok->TotalLength += LengthA;
+ A.Last = Tok;
+ }
+ }
+
+ const FormatStyle &Style;
+ const AdditionalKeywords &Keywords;
+ const SmallVectorImpl<AnnotatedLine *>::const_iterator End;
+
+ SmallVectorImpl<AnnotatedLine *>::const_iterator Next;
+ const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines;
+};
+
+static void markFinalized(FormatToken *Tok) {
+ for (; Tok; Tok = Tok->Next) {
+ Tok->Finalized = true;
+ for (AnnotatedLine *Child : Tok->Children)
+ markFinalized(Child->First);
+ }
+}
+
+#ifndef NDEBUG
+static void printLineState(const LineState &State) {
+ llvm::dbgs() << "State: ";
+ for (const ParenState &P : State.Stack) {
+ llvm::dbgs() << P.Indent << "|" << P.LastSpace << "|" << P.NestedBlockIndent
+ << " ";
+ }
+ llvm::dbgs() << State.NextToken->TokenText << "\n";
+}
+#endif
+
+/// \brief Base class for classes that format one \c AnnotatedLine.
+class LineFormatter {
+public:
+ LineFormatter(ContinuationIndenter *Indenter, WhitespaceManager *Whitespaces,
+ const FormatStyle &Style,
+ UnwrappedLineFormatter *BlockFormatter)
+ : Indenter(Indenter), Whitespaces(Whitespaces), Style(Style),
+ BlockFormatter(BlockFormatter) {}
+ virtual ~LineFormatter() {}
+
+ /// \brief Formats an \c AnnotatedLine and returns the penalty.
+ ///
+ /// If \p DryRun is \c false, directly applies the changes.
+ virtual unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent,
+ bool DryRun) = 0;
+
+protected:
+ /// \brief If the \p State's next token is an r_brace closing a nested block,
+ /// format the nested block before it.
+ ///
+ /// Returns \c true if all children could be placed successfully and adapts
+ /// \p Penalty as well as \p State. If \p DryRun is false, also directly
+ /// creates changes using \c Whitespaces.
+ ///
+ /// The crucial idea here is that children always get formatted upon
+ /// encountering the closing brace right after the nested block. Now, if we
+ /// are currently trying to keep the "}" on the same line (i.e. \p NewLine is
+ /// \c false), the entire block has to be kept on the same line (which is only
+ /// possible if it fits on the line, only contains a single statement, etc.
+ ///
+ /// If \p NewLine is true, we format the nested block on separate lines, i.e.
+ /// break after the "{", format all lines with correct indentation and the put
+ /// the closing "}" on yet another new line.
+ ///
+ /// This enables us to keep the simple structure of the
+ /// \c UnwrappedLineFormatter, where we only have two options for each token:
+ /// break or don't break.
+ bool formatChildren(LineState &State, bool NewLine, bool DryRun,
+ unsigned &Penalty) {
+ const FormatToken *LBrace = State.NextToken->getPreviousNonComment();
+ FormatToken &Previous = *State.NextToken->Previous;
+ if (!LBrace || LBrace->isNot(tok::l_brace) ||
+ LBrace->BlockKind != BK_Block || Previous.Children.size() == 0)
+ // The previous token does not open a block. Nothing to do. We don't
+ // assert so that we can simply call this function for all tokens.
+ return true;
+
+ if (NewLine) {
+ int AdditionalIndent = State.Stack.back().Indent -
+ Previous.Children[0]->Level * Style.IndentWidth;
+
+ Penalty +=
+ BlockFormatter->format(Previous.Children, DryRun, AdditionalIndent,
+ /*FixBadIndentation=*/true);
+ return true;
+ }
+
+ if (Previous.Children[0]->First->MustBreakBefore)
+ return false;
+
+ // Cannot merge into one line if this line ends on a comment.
+ if (Previous.is(tok::comment))
+ return false;
+
+ // Cannot merge multiple statements into a single line.
+ if (Previous.Children.size() > 1)
+ return false;
+
+ const AnnotatedLine *Child = Previous.Children[0];
+ // We can't put the closing "}" on a line with a trailing comment.
+ if (Child->Last->isTrailingComment())
+ return false;
+
+ // If the child line exceeds the column limit, we wouldn't want to merge it.
+ // We add +2 for the trailing " }".
+ if (Style.ColumnLimit > 0 &&
+ Child->Last->TotalLength + State.Column + 2 > Style.ColumnLimit)
+ return false;
+
+ if (!DryRun) {
+ Whitespaces->replaceWhitespace(
+ *Child->First, /*Newlines=*/0, /*Spaces=*/1,
+ /*StartOfTokenColumn=*/State.Column, State.Line->InPPDirective);
+ }
+ Penalty += formatLine(*Child, State.Column + 1, DryRun);
+
+ State.Column += 1 + Child->Last->TotalLength;
+ return true;
+ }
+
+ ContinuationIndenter *Indenter;
+
+private:
+ WhitespaceManager *Whitespaces;
+ const FormatStyle &Style;
+ UnwrappedLineFormatter *BlockFormatter;
+};
+
+/// \brief Formatter that keeps the existing line breaks.
+class NoColumnLimitLineFormatter : public LineFormatter {
+public:
+ NoColumnLimitLineFormatter(ContinuationIndenter *Indenter,
+ WhitespaceManager *Whitespaces,
+ const FormatStyle &Style,
+ UnwrappedLineFormatter *BlockFormatter)
+ : LineFormatter(Indenter, Whitespaces, Style, BlockFormatter) {}
+
+ /// \brief Formats the line, simply keeping all of the input's line breaking
+ /// decisions.
+ unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent,
+ bool DryRun) override {
+ assert(!DryRun);
+ LineState State =
+ Indenter->getInitialState(FirstIndent, &Line, /*DryRun=*/false);
+ while (State.NextToken) {
+ bool Newline =
+ Indenter->mustBreak(State) ||
+ (Indenter->canBreak(State) && State.NextToken->NewlinesBefore > 0);
+ unsigned Penalty = 0;
+ formatChildren(State, Newline, /*DryRun=*/false, Penalty);
+ Indenter->addTokenToState(State, Newline, /*DryRun=*/false);
+ }
+ return 0;
+ }
+};
+
+/// \brief Formatter that puts all tokens into a single line without breaks.
+class NoLineBreakFormatter : public LineFormatter {
+public:
+ NoLineBreakFormatter(ContinuationIndenter *Indenter,
+ WhitespaceManager *Whitespaces, const FormatStyle &Style,
+ UnwrappedLineFormatter *BlockFormatter)
+ : LineFormatter(Indenter, Whitespaces, Style, BlockFormatter) {}
+
+ /// \brief Puts all tokens into a single line.
+ unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent,
+ bool DryRun) override {
+ unsigned Penalty = 0;
+ LineState State = Indenter->getInitialState(FirstIndent, &Line, DryRun);
+ while (State.NextToken) {
+ formatChildren(State, /*Newline=*/false, DryRun, Penalty);
+ Indenter->addTokenToState(
+ State, /*Newline=*/State.NextToken->MustBreakBefore, DryRun);
+ }
+ return Penalty;
+ }
+};
+
+/// \brief Finds the best way to break lines.
+class OptimizingLineFormatter : public LineFormatter {
+public:
+ OptimizingLineFormatter(ContinuationIndenter *Indenter,
+ WhitespaceManager *Whitespaces,
+ const FormatStyle &Style,
+ UnwrappedLineFormatter *BlockFormatter)
+ : LineFormatter(Indenter, Whitespaces, Style, BlockFormatter) {}
+
+ /// \brief Formats the line by finding the best line breaks with line lengths
+ /// below the column limit.
+ unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent,
+ bool DryRun) override {
+ LineState State = Indenter->getInitialState(FirstIndent, &Line, DryRun);
+
+ // If the ObjC method declaration does not fit on a line, we should format
+ // it with one arg per line.
+ if (State.Line->Type == LT_ObjCMethodDecl)
+ State.Stack.back().BreakBeforeParameter = true;
+
+ // Find best solution in solution space.
+ return analyzeSolutionSpace(State, DryRun);
+ }
+
+private:
+ struct CompareLineStatePointers {
+ bool operator()(LineState *obj1, LineState *obj2) const {
+ return *obj1 < *obj2;
+ }
+ };
+
+ /// \brief A pair of <penalty, count> that is used to prioritize the BFS on.
+ ///
+ /// In case of equal penalties, we want to prefer states that were inserted
+ /// first. During state generation we make sure that we insert states first
+ /// that break the line as late as possible.
+ typedef std::pair<unsigned, unsigned> OrderedPenalty;
+
+ /// \brief An edge in the solution space from \c Previous->State to \c State,
+ /// inserting a newline dependent on the \c NewLine.
+ struct StateNode {
+ StateNode(const LineState &State, bool NewLine, StateNode *Previous)
+ : State(State), NewLine(NewLine), Previous(Previous) {}
+ LineState State;
+ bool NewLine;
+ StateNode *Previous;
+ };
+
+ /// \brief An item in the prioritized BFS search queue. The \c StateNode's
+ /// \c State has the given \c OrderedPenalty.
+ typedef std::pair<OrderedPenalty, StateNode *> QueueItem;
+
+ /// \brief The BFS queue type.
+ typedef std::priority_queue<QueueItem, std::vector<QueueItem>,
+ std::greater<QueueItem>> QueueType;
+
+ /// \brief Analyze the entire solution space starting from \p InitialState.
+ ///
+ /// This implements a variant of Dijkstra's algorithm on the graph that spans
+ /// the solution space (\c LineStates are the nodes). The algorithm tries to
+ /// find the shortest path (the one with lowest penalty) from \p InitialState
+ /// to a state where all tokens are placed. Returns the penalty.
+ ///
+ /// If \p DryRun is \c false, directly applies the changes.
+ unsigned analyzeSolutionSpace(LineState &InitialState, bool DryRun) {
+ std::set<LineState *, CompareLineStatePointers> Seen;
+
+ // Increasing count of \c StateNode items we have created. This is used to
+ // create a deterministic order independent of the container.
+ unsigned Count = 0;
+ QueueType Queue;
+
+ // Insert start element into queue.
+ StateNode *Node =
+ new (Allocator.Allocate()) StateNode(InitialState, false, nullptr);
+ Queue.push(QueueItem(OrderedPenalty(0, Count), Node));
+ ++Count;
+
+ unsigned Penalty = 0;
+
+ // While not empty, take first element and follow edges.
+ while (!Queue.empty()) {
+ Penalty = Queue.top().first.first;
+ StateNode *Node = Queue.top().second;
+ if (!Node->State.NextToken) {
+ DEBUG(llvm::dbgs() << "\n---\nPenalty for line: " << Penalty << "\n");
+ break;
+ }
+ Queue.pop();
+
+ // Cut off the analysis of certain solutions if the analysis gets too
+ // complex. See description of IgnoreStackForComparison.
+ if (Count > 50000)
+ Node->State.IgnoreStackForComparison = true;
+
+ if (!Seen.insert(&Node->State).second)
+ // State already examined with lower penalty.
+ continue;
+
+ FormatDecision LastFormat = Node->State.NextToken->Decision;
+ if (LastFormat == FD_Unformatted || LastFormat == FD_Continue)
+ addNextStateToQueue(Penalty, Node, /*NewLine=*/false, &Count, &Queue);
+ if (LastFormat == FD_Unformatted || LastFormat == FD_Break)
+ addNextStateToQueue(Penalty, Node, /*NewLine=*/true, &Count, &Queue);
+ }
+
+ if (Queue.empty()) {
+ // We were unable to find a solution, do nothing.
+ // FIXME: Add diagnostic?
+ DEBUG(llvm::dbgs() << "Could not find a solution.\n");
+ return 0;
+ }
+
+ // Reconstruct the solution.
+ if (!DryRun)
+ reconstructPath(InitialState, Queue.top().second);
+
+ DEBUG(llvm::dbgs() << "Total number of analyzed states: " << Count << "\n");
+ DEBUG(llvm::dbgs() << "---\n");
+
+ return Penalty;
+ }
+
+ /// \brief Add the following state to the analysis queue \c Queue.
+ ///
+ /// Assume the current state is \p PreviousNode and has been reached with a
+ /// penalty of \p Penalty. Insert a line break if \p NewLine is \c true.
+ void addNextStateToQueue(unsigned Penalty, StateNode *PreviousNode,
+ bool NewLine, unsigned *Count, QueueType *Queue) {
+ if (NewLine && !Indenter->canBreak(PreviousNode->State))
+ return;
+ if (!NewLine && Indenter->mustBreak(PreviousNode->State))
+ return;
+
+ StateNode *Node = new (Allocator.Allocate())
+ StateNode(PreviousNode->State, NewLine, PreviousNode);
+ if (!formatChildren(Node->State, NewLine, /*DryRun=*/true, Penalty))
+ return;
+
+ Penalty += Indenter->addTokenToState(Node->State, NewLine, true);
+
+ Queue->push(QueueItem(OrderedPenalty(Penalty, *Count), Node));
+ ++(*Count);
+ }
+
+ /// \brief Applies the best formatting by reconstructing the path in the
+ /// solution space that leads to \c Best.
+ void reconstructPath(LineState &State, StateNode *Best) {
+ std::deque<StateNode *> Path;
+ // We do not need a break before the initial token.
+ while (Best->Previous) {
+ Path.push_front(Best);
+ Best = Best->Previous;
+ }
+ for (std::deque<StateNode *>::iterator I = Path.begin(), E = Path.end();
+ I != E; ++I) {
+ unsigned Penalty = 0;
+ formatChildren(State, (*I)->NewLine, /*DryRun=*/false, Penalty);
+ Penalty += Indenter->addTokenToState(State, (*I)->NewLine, false);
+
+ DEBUG({
+ printLineState((*I)->Previous->State);
+ if ((*I)->NewLine) {
+ llvm::dbgs() << "Penalty for placing "
+ << (*I)->Previous->State.NextToken->Tok.getName() << ": "
+ << Penalty << "\n";
+ }
+ });
+ }
+ }
+
+ llvm::SpecificBumpPtrAllocator<StateNode> Allocator;
+};
+
+} // anonymous namespace
+
+unsigned
+UnwrappedLineFormatter::format(const SmallVectorImpl<AnnotatedLine *> &Lines,
+ bool DryRun, int AdditionalIndent,
+ bool FixBadIndentation) {
+ LineJoiner Joiner(Style, Keywords, Lines);
+
+ // Try to look up already computed penalty in DryRun-mode.
+ std::pair<const SmallVectorImpl<AnnotatedLine *> *, unsigned> CacheKey(
+ &Lines, AdditionalIndent);
+ auto CacheIt = PenaltyCache.find(CacheKey);
+ if (DryRun && CacheIt != PenaltyCache.end())
+ return CacheIt->second;
+
+ assert(!Lines.empty());
+ unsigned Penalty = 0;
+ LevelIndentTracker IndentTracker(Style, Keywords, Lines[0]->Level,
+ AdditionalIndent);
+ const AnnotatedLine *PreviousLine = nullptr;
+ const AnnotatedLine *NextLine = nullptr;
+
+ // The minimum level of consecutive lines that have been formatted.
+ unsigned RangeMinLevel = UINT_MAX;
+
+ for (const AnnotatedLine *Line =
+ Joiner.getNextMergedLine(DryRun, IndentTracker);
+ Line; Line = NextLine) {
+ const AnnotatedLine &TheLine = *Line;
+ unsigned Indent = IndentTracker.getIndent();
+
+ // We continue formatting unchanged lines to adjust their indent, e.g. if a
+ // scope was added. However, we need to carefully stop doing this when we
+ // exit the scope of affected lines to prevent indenting a the entire
+ // remaining file if it currently missing a closing brace.
+ bool ContinueFormatting =
+ TheLine.Level > RangeMinLevel ||
+ (TheLine.Level == RangeMinLevel && !TheLine.startsWith(tok::r_brace));
+
+ bool FixIndentation = (FixBadIndentation || ContinueFormatting) &&
+ Indent != TheLine.First->OriginalColumn;
+ bool ShouldFormat = TheLine.Affected || FixIndentation;
+ // We cannot format this line; if the reason is that the line had a
+ // parsing error, remember that.
+ if (ShouldFormat && TheLine.Type == LT_Invalid && Status) {
+ Status->FormatComplete = false;
+ Status->Line =
+ SourceMgr.getSpellingLineNumber(TheLine.First->Tok.getLocation());
+ }
+
+ if (ShouldFormat && TheLine.Type != LT_Invalid) {
+ if (!DryRun)
+ formatFirstToken(TheLine, PreviousLine, Indent);
+
+ NextLine = Joiner.getNextMergedLine(DryRun, IndentTracker);
+ unsigned ColumnLimit = getColumnLimit(TheLine.InPPDirective, NextLine);
+ bool FitsIntoOneLine =
+ TheLine.Last->TotalLength + Indent <= ColumnLimit ||
+ (TheLine.Type == LT_ImportStatement &&
+ (Style.Language != FormatStyle::LK_JavaScript ||
+ !Style.JavaScriptWrapImports));
+
+ if (Style.ColumnLimit == 0)
+ NoColumnLimitLineFormatter(Indenter, Whitespaces, Style, this)
+ .formatLine(TheLine, Indent, DryRun);
+ else if (FitsIntoOneLine)
+ Penalty += NoLineBreakFormatter(Indenter, Whitespaces, Style, this)
+ .formatLine(TheLine, Indent, DryRun);
+ else
+ Penalty += OptimizingLineFormatter(Indenter, Whitespaces, Style, this)
+ .formatLine(TheLine, Indent, DryRun);
+ RangeMinLevel = std::min(RangeMinLevel, TheLine.Level);
+ } else {
+ // If no token in the current line is affected, we still need to format
+ // affected children.
+ if (TheLine.ChildrenAffected)
+ for (const FormatToken *Tok = TheLine.First; Tok; Tok = Tok->Next)
+ if (!Tok->Children.empty())
+ format(Tok->Children, DryRun);
+
+ // Adapt following lines on the current indent level to the same level
+ // unless the current \c AnnotatedLine is not at the beginning of a line.
+ bool StartsNewLine =
+ TheLine.First->NewlinesBefore > 0 || TheLine.First->IsFirst;
+ if (StartsNewLine)
+ IndentTracker.adjustToUnmodifiedLine(TheLine);
+ if (!DryRun) {
+ bool ReformatLeadingWhitespace =
+ StartsNewLine && ((PreviousLine && PreviousLine->Affected) ||
+ TheLine.LeadingEmptyLinesAffected);
+ // Format the first token.
+ if (ReformatLeadingWhitespace)
+ formatFirstToken(TheLine, PreviousLine,
+ TheLine.First->OriginalColumn);
+ else
+ Whitespaces->addUntouchableToken(*TheLine.First,
+ TheLine.InPPDirective);
+
+ // Notify the WhitespaceManager about the unchanged whitespace.
+ for (FormatToken *Tok = TheLine.First->Next; Tok; Tok = Tok->Next)
+ Whitespaces->addUntouchableToken(*Tok, TheLine.InPPDirective);
+ }
+ NextLine = Joiner.getNextMergedLine(DryRun, IndentTracker);
+ RangeMinLevel = UINT_MAX;
+ }
+ if (!DryRun)
+ markFinalized(TheLine.First);
+ PreviousLine = &TheLine;
+ }
+ PenaltyCache[CacheKey] = Penalty;
+ return Penalty;
+}
+
+void UnwrappedLineFormatter::formatFirstToken(const AnnotatedLine &Line,
+ const AnnotatedLine *PreviousLine,
+ unsigned Indent) {
+ FormatToken& RootToken = *Line.First;
+ if (RootToken.is(tok::eof)) {
+ unsigned Newlines = std::min(RootToken.NewlinesBefore, 1u);
+ Whitespaces->replaceWhitespace(RootToken, Newlines, /*Spaces=*/0,
+ /*StartOfTokenColumn=*/0);
+ return;
+ }
+ unsigned Newlines =
+ std::min(RootToken.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1);
+ // Remove empty lines before "}" where applicable.
+ if (RootToken.is(tok::r_brace) &&
+ (!RootToken.Next ||
+ (RootToken.Next->is(tok::semi) && !RootToken.Next->Next)))
+ Newlines = std::min(Newlines, 1u);
+ if (Newlines == 0 && !RootToken.IsFirst)
+ Newlines = 1;
+ if (RootToken.IsFirst && !RootToken.HasUnescapedNewline)
+ Newlines = 0;
+
+ // Remove empty lines after "{".
+ if (!Style.KeepEmptyLinesAtTheStartOfBlocks && PreviousLine &&
+ PreviousLine->Last->is(tok::l_brace) &&
+ PreviousLine->First->isNot(tok::kw_namespace) &&
+ !startsExternCBlock(*PreviousLine))
+ Newlines = 1;
+
+ // Insert extra new line before access specifiers.
+ if (PreviousLine && PreviousLine->Last->isOneOf(tok::semi, tok::r_brace) &&
+ RootToken.isAccessSpecifier() && RootToken.NewlinesBefore == 1)
+ ++Newlines;
+
+ // Remove empty lines after access specifiers.
+ if (PreviousLine && PreviousLine->First->isAccessSpecifier() &&
+ (!PreviousLine->InPPDirective || !RootToken.HasUnescapedNewline))
+ Newlines = std::min(1u, Newlines);
+
+ Whitespaces->replaceWhitespace(RootToken, Newlines, Indent, Indent,
+ Line.InPPDirective &&
+ !RootToken.HasUnescapedNewline);
+}
+
+unsigned
+UnwrappedLineFormatter::getColumnLimit(bool InPPDirective,
+ const AnnotatedLine *NextLine) const {
+ // In preprocessor directives reserve two chars for trailing " \" if the
+ // next line continues the preprocessor directive.
+ bool ContinuesPPDirective =
+ InPPDirective &&
+ // If there is no next line, this is likely a child line and the parent
+ // continues the preprocessor directive.
+ (!NextLine ||
+ (NextLine->InPPDirective &&
+ // If there is an unescaped newline between this line and the next, the
+ // next line starts a new preprocessor directive.
+ !NextLine->First->HasUnescapedNewline));
+ return Style.ColumnLimit - (ContinuesPPDirective ? 2 : 0);
+}
+
+} // namespace format
+} // namespace clang
diff --git a/contrib/llvm/tools/clang/lib/Format/UnwrappedLineFormatter.h b/contrib/llvm/tools/clang/lib/Format/UnwrappedLineFormatter.h
new file mode 100644
index 000000000000..55f0d1cac689
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Format/UnwrappedLineFormatter.h
@@ -0,0 +1,74 @@
+//===--- UnwrappedLineFormatter.h - Format C++ code -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief Implements a combinartorial exploration of all the different
+/// linebreaks unwrapped lines can be formatted in.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_FORMAT_UNWRAPPEDLINEFORMATTER_H
+#define LLVM_CLANG_LIB_FORMAT_UNWRAPPEDLINEFORMATTER_H
+
+#include "ContinuationIndenter.h"
+#include "clang/Format/Format.h"
+#include <map>
+
+namespace clang {
+namespace format {
+
+class ContinuationIndenter;
+class WhitespaceManager;
+
+class UnwrappedLineFormatter {
+public:
+ UnwrappedLineFormatter(ContinuationIndenter *Indenter,
+ WhitespaceManager *Whitespaces,
+ const FormatStyle &Style,
+ const AdditionalKeywords &Keywords,
+ const SourceManager &SourceMgr,
+ FormattingAttemptStatus *Status)
+ : Indenter(Indenter), Whitespaces(Whitespaces), Style(Style),
+ Keywords(Keywords), SourceMgr(SourceMgr),
+ Status(Status) {}
+
+ /// \brief Format the current block and return the penalty.
+ unsigned format(const SmallVectorImpl<AnnotatedLine *> &Lines,
+ bool DryRun = false, int AdditionalIndent = 0,
+ bool FixBadIndentation = false);
+
+private:
+ /// \brief Add a new line and the required indent before the first Token
+ /// of the \c UnwrappedLine if there was no structural parsing error.
+ void formatFirstToken(const AnnotatedLine &Line,
+ const AnnotatedLine *PreviousLine, unsigned Indent);
+
+ /// \brief Returns the column limit for a line, taking into account whether we
+ /// need an escaped newline due to a continued preprocessor directive.
+ unsigned getColumnLimit(bool InPPDirective,
+ const AnnotatedLine *NextLine) const;
+
+ // Cache to store the penalty of formatting a vector of AnnotatedLines
+ // starting from a specific additional offset. Improves performance if there
+ // are many nested blocks.
+ std::map<std::pair<const SmallVectorImpl<AnnotatedLine *> *, unsigned>,
+ unsigned>
+ PenaltyCache;
+
+ ContinuationIndenter *Indenter;
+ WhitespaceManager *Whitespaces;
+ const FormatStyle &Style;
+ const AdditionalKeywords &Keywords;
+ const SourceManager &SourceMgr;
+ FormattingAttemptStatus *Status;
+};
+} // end namespace format
+} // end namespace clang
+
+#endif // LLVM_CLANG_LIB_FORMAT_UNWRAPPEDLINEFORMATTER_H
diff --git a/contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.cpp b/contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.cpp
new file mode 100644
index 000000000000..f7678bb6b2a5
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.cpp
@@ -0,0 +1,2403 @@
+//===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file contains the implementation of the UnwrappedLineParser,
+/// which turns a stream of tokens into UnwrappedLines.
+///
+//===----------------------------------------------------------------------===//
+
+#include "UnwrappedLineParser.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define DEBUG_TYPE "format-parser"
+
+namespace clang {
+namespace format {
+
+class FormatTokenSource {
+public:
+ virtual ~FormatTokenSource() {}
+ virtual FormatToken *getNextToken() = 0;
+
+ virtual unsigned getPosition() = 0;
+ virtual FormatToken *setPosition(unsigned Position) = 0;
+};
+
+namespace {
+
+class ScopedDeclarationState {
+public:
+ ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
+ bool MustBeDeclaration)
+ : Line(Line), Stack(Stack) {
+ Line.MustBeDeclaration = MustBeDeclaration;
+ Stack.push_back(MustBeDeclaration);
+ }
+ ~ScopedDeclarationState() {
+ Stack.pop_back();
+ if (!Stack.empty())
+ Line.MustBeDeclaration = Stack.back();
+ else
+ Line.MustBeDeclaration = true;
+ }
+
+private:
+ UnwrappedLine &Line;
+ std::vector<bool> &Stack;
+};
+
+static bool isLineComment(const FormatToken &FormatTok) {
+ return FormatTok.is(tok::comment) &&
+ FormatTok.TokenText.startswith("//");
+}
+
+// Checks if \p FormatTok is a line comment that continues the line comment
+// \p Previous. The original column of \p MinColumnToken is used to determine
+// whether \p FormatTok is indented enough to the right to continue \p Previous.
+static bool continuesLineComment(const FormatToken &FormatTok,
+ const FormatToken *Previous,
+ const FormatToken *MinColumnToken) {
+ if (!Previous || !MinColumnToken)
+ return false;
+ unsigned MinContinueColumn =
+ MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
+ return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
+ isLineComment(*Previous) &&
+ FormatTok.OriginalColumn >= MinContinueColumn;
+}
+
+class ScopedMacroState : public FormatTokenSource {
+public:
+ ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
+ FormatToken *&ResetToken)
+ : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
+ PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
+ Token(nullptr), PreviousToken(nullptr) {
+ TokenSource = this;
+ Line.Level = 0;
+ Line.InPPDirective = true;
+ }
+
+ ~ScopedMacroState() override {
+ TokenSource = PreviousTokenSource;
+ ResetToken = Token;
+ Line.InPPDirective = false;
+ Line.Level = PreviousLineLevel;
+ }
+
+ FormatToken *getNextToken() override {
+ // The \c UnwrappedLineParser guards against this by never calling
+ // \c getNextToken() after it has encountered the first eof token.
+ assert(!eof());
+ PreviousToken = Token;
+ Token = PreviousTokenSource->getNextToken();
+ if (eof())
+ return getFakeEOF();
+ return Token;
+ }
+
+ unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
+
+ FormatToken *setPosition(unsigned Position) override {
+ PreviousToken = nullptr;
+ Token = PreviousTokenSource->setPosition(Position);
+ return Token;
+ }
+
+private:
+ bool eof() {
+ return Token && Token->HasUnescapedNewline &&
+ !continuesLineComment(*Token, PreviousToken,
+ /*MinColumnToken=*/PreviousToken);
+ }
+
+ FormatToken *getFakeEOF() {
+ static bool EOFInitialized = false;
+ static FormatToken FormatTok;
+ if (!EOFInitialized) {
+ FormatTok.Tok.startToken();
+ FormatTok.Tok.setKind(tok::eof);
+ EOFInitialized = true;
+ }
+ return &FormatTok;
+ }
+
+ UnwrappedLine &Line;
+ FormatTokenSource *&TokenSource;
+ FormatToken *&ResetToken;
+ unsigned PreviousLineLevel;
+ FormatTokenSource *PreviousTokenSource;
+
+ FormatToken *Token;
+ FormatToken *PreviousToken;
+};
+
+} // end anonymous namespace
+
+class ScopedLineState {
+public:
+ ScopedLineState(UnwrappedLineParser &Parser,
+ bool SwitchToPreprocessorLines = false)
+ : Parser(Parser), OriginalLines(Parser.CurrentLines) {
+ if (SwitchToPreprocessorLines)
+ Parser.CurrentLines = &Parser.PreprocessorDirectives;
+ else if (!Parser.Line->Tokens.empty())
+ Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
+ PreBlockLine = std::move(Parser.Line);
+ Parser.Line = llvm::make_unique<UnwrappedLine>();
+ Parser.Line->Level = PreBlockLine->Level;
+ Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
+ }
+
+ ~ScopedLineState() {
+ if (!Parser.Line->Tokens.empty()) {
+ Parser.addUnwrappedLine();
+ }
+ assert(Parser.Line->Tokens.empty());
+ Parser.Line = std::move(PreBlockLine);
+ if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
+ Parser.MustBreakBeforeNextToken = true;
+ Parser.CurrentLines = OriginalLines;
+ }
+
+private:
+ UnwrappedLineParser &Parser;
+
+ std::unique_ptr<UnwrappedLine> PreBlockLine;
+ SmallVectorImpl<UnwrappedLine> *OriginalLines;
+};
+
+class CompoundStatementIndenter {
+public:
+ CompoundStatementIndenter(UnwrappedLineParser *Parser,
+ const FormatStyle &Style, unsigned &LineLevel)
+ : LineLevel(LineLevel), OldLineLevel(LineLevel) {
+ if (Style.BraceWrapping.AfterControlStatement)
+ Parser->addUnwrappedLine();
+ if (Style.BraceWrapping.IndentBraces)
+ ++LineLevel;
+ }
+ ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
+
+private:
+ unsigned &LineLevel;
+ unsigned OldLineLevel;
+};
+
+namespace {
+
+class IndexedTokenSource : public FormatTokenSource {
+public:
+ IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
+ : Tokens(Tokens), Position(-1) {}
+
+ FormatToken *getNextToken() override {
+ ++Position;
+ return Tokens[Position];
+ }
+
+ unsigned getPosition() override {
+ assert(Position >= 0);
+ return Position;
+ }
+
+ FormatToken *setPosition(unsigned P) override {
+ Position = P;
+ return Tokens[Position];
+ }
+
+ void reset() { Position = -1; }
+
+private:
+ ArrayRef<FormatToken *> Tokens;
+ int Position;
+};
+
+} // end anonymous namespace
+
+UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
+ const AdditionalKeywords &Keywords,
+ ArrayRef<FormatToken *> Tokens,
+ UnwrappedLineConsumer &Callback)
+ : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
+ CurrentLines(&Lines), Style(Style), Keywords(Keywords),
+ CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
+ Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1) {}
+
+void UnwrappedLineParser::reset() {
+ PPBranchLevel = -1;
+ Line.reset(new UnwrappedLine);
+ CommentsBeforeNextToken.clear();
+ FormatTok = nullptr;
+ MustBreakBeforeNextToken = false;
+ PreprocessorDirectives.clear();
+ CurrentLines = &Lines;
+ DeclarationScopeStack.clear();
+ PPStack.clear();
+}
+
+void UnwrappedLineParser::parse() {
+ IndexedTokenSource TokenSource(AllTokens);
+ do {
+ DEBUG(llvm::dbgs() << "----\n");
+ reset();
+ Tokens = &TokenSource;
+ TokenSource.reset();
+
+ readToken();
+ parseFile();
+ // Create line with eof token.
+ pushToken(FormatTok);
+ addUnwrappedLine();
+
+ for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
+ E = Lines.end();
+ I != E; ++I) {
+ Callback.consumeUnwrappedLine(*I);
+ }
+ Callback.finishRun();
+ Lines.clear();
+ while (!PPLevelBranchIndex.empty() &&
+ PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
+ PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
+ PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
+ }
+ if (!PPLevelBranchIndex.empty()) {
+ ++PPLevelBranchIndex.back();
+ assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
+ assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
+ }
+ } while (!PPLevelBranchIndex.empty());
+}
+
+void UnwrappedLineParser::parseFile() {
+ // The top-level context in a file always has declarations, except for pre-
+ // processor directives and JavaScript files.
+ bool MustBeDeclaration =
+ !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript;
+ ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
+ MustBeDeclaration);
+ parseLevel(/*HasOpeningBrace=*/false);
+ // Make sure to format the remaining tokens.
+ flushComments(true);
+ addUnwrappedLine();
+}
+
+void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
+ bool SwitchLabelEncountered = false;
+ do {
+ tok::TokenKind kind = FormatTok->Tok.getKind();
+ if (FormatTok->Type == TT_MacroBlockBegin) {
+ kind = tok::l_brace;
+ } else if (FormatTok->Type == TT_MacroBlockEnd) {
+ kind = tok::r_brace;
+ }
+
+ switch (kind) {
+ case tok::comment:
+ nextToken();
+ addUnwrappedLine();
+ break;
+ case tok::l_brace:
+ // FIXME: Add parameter whether this can happen - if this happens, we must
+ // be in a non-declaration context.
+ if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
+ continue;
+ parseBlock(/*MustBeDeclaration=*/false);
+ addUnwrappedLine();
+ break;
+ case tok::r_brace:
+ if (HasOpeningBrace)
+ return;
+ nextToken();
+ addUnwrappedLine();
+ break;
+ case tok::kw_default:
+ case tok::kw_case:
+ if (!SwitchLabelEncountered &&
+ (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
+ ++Line->Level;
+ SwitchLabelEncountered = true;
+ parseStructuralElement();
+ break;
+ default:
+ parseStructuralElement();
+ break;
+ }
+ } while (!eof());
+}
+
+void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
+ // We'll parse forward through the tokens until we hit
+ // a closing brace or eof - note that getNextToken() will
+ // parse macros, so this will magically work inside macro
+ // definitions, too.
+ unsigned StoredPosition = Tokens->getPosition();
+ FormatToken *Tok = FormatTok;
+ const FormatToken *PrevTok = getPreviousToken();
+ // Keep a stack of positions of lbrace tokens. We will
+ // update information about whether an lbrace starts a
+ // braced init list or a different block during the loop.
+ SmallVector<FormatToken *, 8> LBraceStack;
+ assert(Tok->Tok.is(tok::l_brace));
+ do {
+ // Get next non-comment token.
+ FormatToken *NextTok;
+ unsigned ReadTokens = 0;
+ do {
+ NextTok = Tokens->getNextToken();
+ ++ReadTokens;
+ } while (NextTok->is(tok::comment));
+
+ switch (Tok->Tok.getKind()) {
+ case tok::l_brace:
+ if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) {
+ if (PrevTok->is(tok::colon))
+ // A colon indicates this code is in a type, or a braced list
+ // following a label in an object literal ({a: {b: 1}}). The code
+ // below could be confused by semicolons between the individual
+ // members in a type member list, which would normally trigger
+ // BK_Block. In both cases, this must be parsed as an inline braced
+ // init.
+ Tok->BlockKind = BK_BracedInit;
+ else if (PrevTok->is(tok::r_paren))
+ // `) { }` can only occur in function or method declarations in JS.
+ Tok->BlockKind = BK_Block;
+ } else {
+ Tok->BlockKind = BK_Unknown;
+ }
+ LBraceStack.push_back(Tok);
+ break;
+ case tok::r_brace:
+ if (LBraceStack.empty())
+ break;
+ if (LBraceStack.back()->BlockKind == BK_Unknown) {
+ bool ProbablyBracedList = false;
+ if (Style.Language == FormatStyle::LK_Proto) {
+ ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
+ } else {
+ // Using OriginalColumn to distinguish between ObjC methods and
+ // binary operators is a bit hacky.
+ bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
+ NextTok->OriginalColumn == 0;
+
+ // If there is a comma, semicolon or right paren after the closing
+ // brace, we assume this is a braced initializer list. Note that
+ // regardless how we mark inner braces here, we will overwrite the
+ // BlockKind later if we parse a braced list (where all blocks
+ // inside are by default braced lists), or when we explicitly detect
+ // blocks (for example while parsing lambdas).
+ // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
+ // braced list in JS.
+ ProbablyBracedList =
+ (Style.Language == FormatStyle::LK_JavaScript &&
+ NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
+ Keywords.kw_as)) ||
+ (Style.isCpp() && NextTok->is(tok::l_paren)) ||
+ NextTok->isOneOf(tok::comma, tok::period, tok::colon,
+ tok::r_paren, tok::r_square, tok::l_brace,
+ tok::l_square, tok::ellipsis) ||
+ (NextTok->is(tok::identifier) &&
+ !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) ||
+ (NextTok->is(tok::semi) &&
+ (!ExpectClassBody || LBraceStack.size() != 1)) ||
+ (NextTok->isBinaryOperator() && !NextIsObjCMethod);
+ }
+ if (ProbablyBracedList) {
+ Tok->BlockKind = BK_BracedInit;
+ LBraceStack.back()->BlockKind = BK_BracedInit;
+ } else {
+ Tok->BlockKind = BK_Block;
+ LBraceStack.back()->BlockKind = BK_Block;
+ }
+ }
+ LBraceStack.pop_back();
+ break;
+ case tok::at:
+ case tok::semi:
+ case tok::kw_if:
+ case tok::kw_while:
+ case tok::kw_for:
+ case tok::kw_switch:
+ case tok::kw_try:
+ case tok::kw___try:
+ if (!LBraceStack.empty() && LBraceStack.back()->BlockKind == BK_Unknown)
+ LBraceStack.back()->BlockKind = BK_Block;
+ break;
+ default:
+ break;
+ }
+ PrevTok = Tok;
+ Tok = NextTok;
+ } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
+
+ // Assume other blocks for all unclosed opening braces.
+ for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
+ if (LBraceStack[i]->BlockKind == BK_Unknown)
+ LBraceStack[i]->BlockKind = BK_Block;
+ }
+
+ FormatTok = Tokens->setPosition(StoredPosition);
+}
+
+void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
+ bool MunchSemi) {
+ assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
+ "'{' or macro block token expected");
+ const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
+ FormatTok->BlockKind = BK_Block;
+
+ unsigned InitialLevel = Line->Level;
+ nextToken();
+
+ if (MacroBlock && FormatTok->is(tok::l_paren))
+ parseParens();
+
+ addUnwrappedLine();
+ size_t OpeningLineIndex = CurrentLines->empty()
+ ? (UnwrappedLine::kInvalidIndex)
+ : (CurrentLines->size() - 1);
+
+ ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
+ MustBeDeclaration);
+ if (AddLevel)
+ ++Line->Level;
+ parseLevel(/*HasOpeningBrace=*/true);
+
+ if (eof())
+ return;
+
+ if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
+ : !FormatTok->is(tok::r_brace)) {
+ Line->Level = InitialLevel;
+ FormatTok->BlockKind = BK_Block;
+ return;
+ }
+
+ nextToken(); // Munch the closing brace.
+
+ if (MacroBlock && FormatTok->is(tok::l_paren))
+ parseParens();
+
+ if (MunchSemi && FormatTok->Tok.is(tok::semi))
+ nextToken();
+ Line->Level = InitialLevel;
+ Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
+ if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
+ // Update the opening line to add the forward reference as well
+ (*CurrentLines)[OpeningLineIndex].MatchingOpeningBlockLineIndex =
+ CurrentLines->size() - 1;
+ }
+}
+
+static bool isGoogScope(const UnwrappedLine &Line) {
+ // FIXME: Closure-library specific stuff should not be hard-coded but be
+ // configurable.
+ if (Line.Tokens.size() < 4)
+ return false;
+ auto I = Line.Tokens.begin();
+ if (I->Tok->TokenText != "goog")
+ return false;
+ ++I;
+ if (I->Tok->isNot(tok::period))
+ return false;
+ ++I;
+ if (I->Tok->TokenText != "scope")
+ return false;
+ ++I;
+ return I->Tok->is(tok::l_paren);
+}
+
+static bool isIIFE(const UnwrappedLine &Line,
+ const AdditionalKeywords &Keywords) {
+ // Look for the start of an immediately invoked anonymous function.
+ // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
+ // This is commonly done in JavaScript to create a new, anonymous scope.
+ // Example: (function() { ... })()
+ if (Line.Tokens.size() < 3)
+ return false;
+ auto I = Line.Tokens.begin();
+ if (I->Tok->isNot(tok::l_paren))
+ return false;
+ ++I;
+ if (I->Tok->isNot(Keywords.kw_function))
+ return false;
+ ++I;
+ return I->Tok->is(tok::l_paren);
+}
+
+static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
+ const FormatToken &InitialToken) {
+ if (InitialToken.is(tok::kw_namespace))
+ return Style.BraceWrapping.AfterNamespace;
+ if (InitialToken.is(tok::kw_class))
+ return Style.BraceWrapping.AfterClass;
+ if (InitialToken.is(tok::kw_union))
+ return Style.BraceWrapping.AfterUnion;
+ if (InitialToken.is(tok::kw_struct))
+ return Style.BraceWrapping.AfterStruct;
+ return false;
+}
+
+void UnwrappedLineParser::parseChildBlock() {
+ FormatTok->BlockKind = BK_Block;
+ nextToken();
+ {
+ bool SkipIndent =
+ (Style.Language == FormatStyle::LK_JavaScript &&
+ (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
+ ScopedLineState LineState(*this);
+ ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
+ /*MustBeDeclaration=*/false);
+ Line->Level += SkipIndent ? 0 : 1;
+ parseLevel(/*HasOpeningBrace=*/true);
+ flushComments(isOnNewLine(*FormatTok));
+ Line->Level -= SkipIndent ? 0 : 1;
+ }
+ nextToken();
+}
+
+void UnwrappedLineParser::parsePPDirective() {
+ assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
+ ScopedMacroState MacroState(*Line, Tokens, FormatTok);
+ nextToken();
+
+ if (!FormatTok->Tok.getIdentifierInfo()) {
+ parsePPUnknown();
+ return;
+ }
+
+ switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
+ case tok::pp_define:
+ parsePPDefine();
+ return;
+ case tok::pp_if:
+ parsePPIf(/*IfDef=*/false);
+ break;
+ case tok::pp_ifdef:
+ case tok::pp_ifndef:
+ parsePPIf(/*IfDef=*/true);
+ break;
+ case tok::pp_else:
+ parsePPElse();
+ break;
+ case tok::pp_elif:
+ parsePPElIf();
+ break;
+ case tok::pp_endif:
+ parsePPEndIf();
+ break;
+ default:
+ parsePPUnknown();
+ break;
+ }
+}
+
+void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
+ if (Unreachable || (!PPStack.empty() && PPStack.back() == PP_Unreachable))
+ PPStack.push_back(PP_Unreachable);
+ else
+ PPStack.push_back(PP_Conditional);
+}
+
+void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
+ ++PPBranchLevel;
+ assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
+ if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
+ PPLevelBranchIndex.push_back(0);
+ PPLevelBranchCount.push_back(0);
+ }
+ PPChainBranchIndex.push(0);
+ bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
+ conditionalCompilationCondition(Unreachable || Skip);
+}
+
+void UnwrappedLineParser::conditionalCompilationAlternative() {
+ if (!PPStack.empty())
+ PPStack.pop_back();
+ assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
+ if (!PPChainBranchIndex.empty())
+ ++PPChainBranchIndex.top();
+ conditionalCompilationCondition(
+ PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
+ PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
+}
+
+void UnwrappedLineParser::conditionalCompilationEnd() {
+ assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
+ if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
+ if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
+ PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
+ }
+ }
+ // Guard against #endif's without #if.
+ if (PPBranchLevel > 0)
+ --PPBranchLevel;
+ if (!PPChainBranchIndex.empty())
+ PPChainBranchIndex.pop();
+ if (!PPStack.empty())
+ PPStack.pop_back();
+}
+
+void UnwrappedLineParser::parsePPIf(bool IfDef) {
+ bool IfNDef = FormatTok->is(tok::pp_ifndef);
+ nextToken();
+ bool Unreachable = false;
+ if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
+ Unreachable = true;
+ if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
+ Unreachable = true;
+ conditionalCompilationStart(Unreachable);
+ parsePPUnknown();
+}
+
+void UnwrappedLineParser::parsePPElse() {
+ conditionalCompilationAlternative();
+ parsePPUnknown();
+}
+
+void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
+
+void UnwrappedLineParser::parsePPEndIf() {
+ conditionalCompilationEnd();
+ parsePPUnknown();
+}
+
+void UnwrappedLineParser::parsePPDefine() {
+ nextToken();
+
+ if (FormatTok->Tok.getKind() != tok::identifier) {
+ parsePPUnknown();
+ return;
+ }
+ nextToken();
+ if (FormatTok->Tok.getKind() == tok::l_paren &&
+ FormatTok->WhitespaceRange.getBegin() ==
+ FormatTok->WhitespaceRange.getEnd()) {
+ parseParens();
+ }
+ addUnwrappedLine();
+ Line->Level = 1;
+
+ // Errors during a preprocessor directive can only affect the layout of the
+ // preprocessor directive, and thus we ignore them. An alternative approach
+ // would be to use the same approach we use on the file level (no
+ // re-indentation if there was a structural error) within the macro
+ // definition.
+ parseFile();
+}
+
+void UnwrappedLineParser::parsePPUnknown() {
+ do {
+ nextToken();
+ } while (!eof());
+ addUnwrappedLine();
+}
+
+// Here we blacklist certain tokens that are not usually the first token in an
+// unwrapped line. This is used in attempt to distinguish macro calls without
+// trailing semicolons from other constructs split to several lines.
+static bool tokenCanStartNewLine(const clang::Token &Tok) {
+ // Semicolon can be a null-statement, l_square can be a start of a macro or
+ // a C++11 attribute, but this doesn't seem to be common.
+ return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
+ Tok.isNot(tok::l_square) &&
+ // Tokens that can only be used as binary operators and a part of
+ // overloaded operator names.
+ Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
+ Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
+ Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
+ Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
+ Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
+ Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
+ Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
+ Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
+ Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
+ Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
+ Tok.isNot(tok::lesslessequal) &&
+ // Colon is used in labels, base class lists, initializer lists,
+ // range-based for loops, ternary operator, but should never be the
+ // first token in an unwrapped line.
+ Tok.isNot(tok::colon) &&
+ // 'noexcept' is a trailing annotation.
+ Tok.isNot(tok::kw_noexcept);
+}
+
+static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
+ const FormatToken *FormatTok) {
+ // FIXME: This returns true for C/C++ keywords like 'struct'.
+ return FormatTok->is(tok::identifier) &&
+ (FormatTok->Tok.getIdentifierInfo() == nullptr ||
+ !FormatTok->isOneOf(
+ Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
+ Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
+ Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
+ Keywords.kw_let, Keywords.kw_var, tok::kw_const,
+ Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
+ Keywords.kw_instanceof, Keywords.kw_interface,
+ Keywords.kw_throws));
+}
+
+static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
+ const FormatToken *FormatTok) {
+ return FormatTok->Tok.isLiteral() ||
+ FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
+ mustBeJSIdent(Keywords, FormatTok);
+}
+
+// isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
+// when encountered after a value (see mustBeJSIdentOrValue).
+static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
+ const FormatToken *FormatTok) {
+ return FormatTok->isOneOf(
+ tok::kw_return, Keywords.kw_yield,
+ // conditionals
+ tok::kw_if, tok::kw_else,
+ // loops
+ tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
+ // switch/case
+ tok::kw_switch, tok::kw_case,
+ // exceptions
+ tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
+ // declaration
+ tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
+ Keywords.kw_async, Keywords.kw_function,
+ // import/export
+ Keywords.kw_import, tok::kw_export);
+}
+
+// readTokenWithJavaScriptASI reads the next token and terminates the current
+// line if JavaScript Automatic Semicolon Insertion must
+// happen between the current token and the next token.
+//
+// This method is conservative - it cannot cover all edge cases of JavaScript,
+// but only aims to correctly handle certain well known cases. It *must not*
+// return true in speculative cases.
+void UnwrappedLineParser::readTokenWithJavaScriptASI() {
+ FormatToken *Previous = FormatTok;
+ readToken();
+ FormatToken *Next = FormatTok;
+
+ bool IsOnSameLine =
+ CommentsBeforeNextToken.empty()
+ ? Next->NewlinesBefore == 0
+ : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
+ if (IsOnSameLine)
+ return;
+
+ bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
+ bool PreviousStartsTemplateExpr =
+ Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
+ if (PreviousMustBeValue && Line && Line->Tokens.size() > 1) {
+ // If the token before the previous one is an '@', the previous token is an
+ // annotation and can precede another identifier/value.
+ const FormatToken *PrePrevious = std::prev(Line->Tokens.end(), 2)->Tok;
+ if (PrePrevious->is(tok::at))
+ return;
+ }
+ if (Next->is(tok::exclaim) && PreviousMustBeValue)
+ return addUnwrappedLine();
+ bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
+ bool NextEndsTemplateExpr =
+ Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
+ if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
+ (PreviousMustBeValue ||
+ Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
+ tok::minusminus)))
+ return addUnwrappedLine();
+ if (PreviousMustBeValue && isJSDeclOrStmt(Keywords, Next))
+ return addUnwrappedLine();
+}
+
+void UnwrappedLineParser::parseStructuralElement() {
+ assert(!FormatTok->is(tok::l_brace));
+ if (Style.Language == FormatStyle::LK_TableGen &&
+ FormatTok->is(tok::pp_include)) {
+ nextToken();
+ if (FormatTok->is(tok::string_literal))
+ nextToken();
+ addUnwrappedLine();
+ return;
+ }
+ switch (FormatTok->Tok.getKind()) {
+ case tok::at:
+ nextToken();
+ if (FormatTok->Tok.is(tok::l_brace)) {
+ parseBracedList();
+ break;
+ }
+ switch (FormatTok->Tok.getObjCKeywordID()) {
+ case tok::objc_public:
+ case tok::objc_protected:
+ case tok::objc_package:
+ case tok::objc_private:
+ return parseAccessSpecifier();
+ case tok::objc_interface:
+ case tok::objc_implementation:
+ return parseObjCInterfaceOrImplementation();
+ case tok::objc_protocol:
+ return parseObjCProtocol();
+ case tok::objc_end:
+ return; // Handled by the caller.
+ case tok::objc_optional:
+ case tok::objc_required:
+ nextToken();
+ addUnwrappedLine();
+ return;
+ case tok::objc_autoreleasepool:
+ nextToken();
+ if (FormatTok->Tok.is(tok::l_brace)) {
+ if (Style.BraceWrapping.AfterObjCDeclaration)
+ addUnwrappedLine();
+ parseBlock(/*MustBeDeclaration=*/false);
+ }
+ addUnwrappedLine();
+ return;
+ case tok::objc_try:
+ // This branch isn't strictly necessary (the kw_try case below would
+ // do this too after the tok::at is parsed above). But be explicit.
+ parseTryCatch();
+ return;
+ default:
+ break;
+ }
+ break;
+ case tok::kw_asm:
+ nextToken();
+ if (FormatTok->is(tok::l_brace)) {
+ FormatTok->Type = TT_InlineASMBrace;
+ nextToken();
+ while (FormatTok && FormatTok->isNot(tok::eof)) {
+ if (FormatTok->is(tok::r_brace)) {
+ FormatTok->Type = TT_InlineASMBrace;
+ nextToken();
+ addUnwrappedLine();
+ break;
+ }
+ FormatTok->Finalized = true;
+ nextToken();
+ }
+ }
+ break;
+ case tok::kw_namespace:
+ parseNamespace();
+ return;
+ case tok::kw_inline:
+ nextToken();
+ if (FormatTok->Tok.is(tok::kw_namespace)) {
+ parseNamespace();
+ return;
+ }
+ break;
+ case tok::kw_public:
+ case tok::kw_protected:
+ case tok::kw_private:
+ if (Style.Language == FormatStyle::LK_Java ||
+ Style.Language == FormatStyle::LK_JavaScript)
+ nextToken();
+ else
+ parseAccessSpecifier();
+ return;
+ case tok::kw_if:
+ parseIfThenElse();
+ return;
+ case tok::kw_for:
+ case tok::kw_while:
+ parseForOrWhileLoop();
+ return;
+ case tok::kw_do:
+ parseDoWhile();
+ return;
+ case tok::kw_switch:
+ parseSwitch();
+ return;
+ case tok::kw_default:
+ nextToken();
+ parseLabel();
+ return;
+ case tok::kw_case:
+ parseCaseLabel();
+ return;
+ case tok::kw_try:
+ case tok::kw___try:
+ parseTryCatch();
+ return;
+ case tok::kw_extern:
+ nextToken();
+ if (FormatTok->Tok.is(tok::string_literal)) {
+ nextToken();
+ if (FormatTok->Tok.is(tok::l_brace)) {
+ parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false);
+ addUnwrappedLine();
+ return;
+ }
+ }
+ break;
+ case tok::kw_export:
+ if (Style.Language == FormatStyle::LK_JavaScript) {
+ parseJavaScriptEs6ImportExport();
+ return;
+ }
+ break;
+ case tok::identifier:
+ if (FormatTok->is(TT_ForEachMacro)) {
+ parseForOrWhileLoop();
+ return;
+ }
+ if (FormatTok->is(TT_MacroBlockBegin)) {
+ parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true,
+ /*MunchSemi=*/false);
+ return;
+ }
+ if (FormatTok->is(Keywords.kw_import)) {
+ if (Style.Language == FormatStyle::LK_JavaScript) {
+ parseJavaScriptEs6ImportExport();
+ return;
+ }
+ if (Style.Language == FormatStyle::LK_Proto) {
+ nextToken();
+ if (FormatTok->is(tok::kw_public))
+ nextToken();
+ if (!FormatTok->is(tok::string_literal))
+ return;
+ nextToken();
+ if (FormatTok->is(tok::semi))
+ nextToken();
+ addUnwrappedLine();
+ return;
+ }
+ }
+ if (Style.isCpp() &&
+ FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
+ Keywords.kw_slots, Keywords.kw_qslots)) {
+ nextToken();
+ if (FormatTok->is(tok::colon)) {
+ nextToken();
+ addUnwrappedLine();
+ return;
+ }
+ }
+ // In all other cases, parse the declaration.
+ break;
+ default:
+ break;
+ }
+ do {
+ const FormatToken *Previous = getPreviousToken();
+ switch (FormatTok->Tok.getKind()) {
+ case tok::at:
+ nextToken();
+ if (FormatTok->Tok.is(tok::l_brace))
+ parseBracedList();
+ break;
+ case tok::kw_enum:
+ // Ignore if this is part of "template <enum ...".
+ if (Previous && Previous->is(tok::less)) {
+ nextToken();
+ break;
+ }
+
+ // parseEnum falls through and does not yet add an unwrapped line as an
+ // enum definition can start a structural element.
+ if (!parseEnum())
+ break;
+ // This only applies for C++.
+ if (!Style.isCpp()) {
+ addUnwrappedLine();
+ return;
+ }
+ break;
+ case tok::kw_typedef:
+ nextToken();
+ if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
+ Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS))
+ parseEnum();
+ break;
+ case tok::kw_struct:
+ case tok::kw_union:
+ case tok::kw_class:
+ // parseRecord falls through and does not yet add an unwrapped line as a
+ // record declaration or definition can start a structural element.
+ parseRecord();
+ // This does not apply for Java and JavaScript.
+ if (Style.Language == FormatStyle::LK_Java ||
+ Style.Language == FormatStyle::LK_JavaScript) {
+ if (FormatTok->is(tok::semi))
+ nextToken();
+ addUnwrappedLine();
+ return;
+ }
+ break;
+ case tok::period:
+ nextToken();
+ // In Java, classes have an implicit static member "class".
+ if (Style.Language == FormatStyle::LK_Java && FormatTok &&
+ FormatTok->is(tok::kw_class))
+ nextToken();
+ if (Style.Language == FormatStyle::LK_JavaScript && FormatTok &&
+ FormatTok->Tok.getIdentifierInfo())
+ // JavaScript only has pseudo keywords, all keywords are allowed to
+ // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
+ nextToken();
+ break;
+ case tok::semi:
+ nextToken();
+ addUnwrappedLine();
+ return;
+ case tok::r_brace:
+ addUnwrappedLine();
+ return;
+ case tok::l_paren:
+ parseParens();
+ break;
+ case tok::kw_operator:
+ nextToken();
+ if (FormatTok->isBinaryOperator())
+ nextToken();
+ break;
+ case tok::caret:
+ nextToken();
+ if (FormatTok->Tok.isAnyIdentifier() ||
+ FormatTok->isSimpleTypeSpecifier())
+ nextToken();
+ if (FormatTok->is(tok::l_paren))
+ parseParens();
+ if (FormatTok->is(tok::l_brace))
+ parseChildBlock();
+ break;
+ case tok::l_brace:
+ if (!tryToParseBracedList()) {
+ // A block outside of parentheses must be the last part of a
+ // structural element.
+ // FIXME: Figure out cases where this is not true, and add projections
+ // for them (the one we know is missing are lambdas).
+ if (Style.BraceWrapping.AfterFunction)
+ addUnwrappedLine();
+ FormatTok->Type = TT_FunctionLBrace;
+ parseBlock(/*MustBeDeclaration=*/false);
+ addUnwrappedLine();
+ return;
+ }
+ // Otherwise this was a braced init list, and the structural
+ // element continues.
+ break;
+ case tok::kw_try:
+ // We arrive here when parsing function-try blocks.
+ parseTryCatch();
+ return;
+ case tok::identifier: {
+ if (FormatTok->is(TT_MacroBlockEnd)) {
+ addUnwrappedLine();
+ return;
+ }
+
+ // Function declarations (as opposed to function expressions) are parsed
+ // on their own unwrapped line by continuing this loop. Function
+ // expressions (functions that are not on their own line) must not create
+ // a new unwrapped line, so they are special cased below.
+ size_t TokenCount = Line->Tokens.size();
+ if (Style.Language == FormatStyle::LK_JavaScript &&
+ FormatTok->is(Keywords.kw_function) &&
+ (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
+ Keywords.kw_async)))) {
+ tryToParseJSFunction();
+ break;
+ }
+ if ((Style.Language == FormatStyle::LK_JavaScript ||
+ Style.Language == FormatStyle::LK_Java) &&
+ FormatTok->is(Keywords.kw_interface)) {
+ if (Style.Language == FormatStyle::LK_JavaScript) {
+ // In JavaScript/TypeScript, "interface" can be used as a standalone
+ // identifier, e.g. in `var interface = 1;`. If "interface" is
+ // followed by another identifier, it is very like to be an actual
+ // interface declaration.
+ unsigned StoredPosition = Tokens->getPosition();
+ FormatToken *Next = Tokens->getNextToken();
+ FormatTok = Tokens->setPosition(StoredPosition);
+ if (Next && !mustBeJSIdent(Keywords, Next)) {
+ nextToken();
+ break;
+ }
+ }
+ parseRecord();
+ addUnwrappedLine();
+ return;
+ }
+
+ // See if the following token should start a new unwrapped line.
+ StringRef Text = FormatTok->TokenText;
+ nextToken();
+ if (Line->Tokens.size() == 1 &&
+ // JS doesn't have macros, and within classes colons indicate fields,
+ // not labels.
+ Style.Language != FormatStyle::LK_JavaScript) {
+ if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
+ Line->Tokens.begin()->Tok->MustBreakBefore = true;
+ parseLabel();
+ return;
+ }
+ // Recognize function-like macro usages without trailing semicolon as
+ // well as free-standing macros like Q_OBJECT.
+ bool FunctionLike = FormatTok->is(tok::l_paren);
+ if (FunctionLike)
+ parseParens();
+
+ bool FollowedByNewline =
+ CommentsBeforeNextToken.empty()
+ ? FormatTok->NewlinesBefore > 0
+ : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
+
+ if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
+ tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) {
+ addUnwrappedLine();
+ return;
+ }
+ }
+ break;
+ }
+ case tok::equal:
+ // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
+ // TT_JsFatArrow. The always start an expression or a child block if
+ // followed by a curly.
+ if (FormatTok->is(TT_JsFatArrow)) {
+ nextToken();
+ if (FormatTok->is(tok::l_brace))
+ parseChildBlock();
+ break;
+ }
+
+ nextToken();
+ if (FormatTok->Tok.is(tok::l_brace)) {
+ parseBracedList();
+ }
+ break;
+ case tok::l_square:
+ parseSquare();
+ break;
+ case tok::kw_new:
+ parseNew();
+ break;
+ default:
+ nextToken();
+ break;
+ }
+ } while (!eof());
+}
+
+bool UnwrappedLineParser::tryToParseLambda() {
+ if (!Style.isCpp()) {
+ nextToken();
+ return false;
+ }
+ const FormatToken* Previous = getPreviousToken();
+ if (Previous &&
+ (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
+ tok::kw_delete) ||
+ Previous->closesScope() || Previous->isSimpleTypeSpecifier())) {
+ nextToken();
+ return false;
+ }
+ assert(FormatTok->is(tok::l_square));
+ FormatToken &LSquare = *FormatTok;
+ if (!tryToParseLambdaIntroducer())
+ return false;
+
+ while (FormatTok->isNot(tok::l_brace)) {
+ if (FormatTok->isSimpleTypeSpecifier()) {
+ nextToken();
+ continue;
+ }
+ switch (FormatTok->Tok.getKind()) {
+ case tok::l_brace:
+ break;
+ case tok::l_paren:
+ parseParens();
+ break;
+ case tok::amp:
+ case tok::star:
+ case tok::kw_const:
+ case tok::comma:
+ case tok::less:
+ case tok::greater:
+ case tok::identifier:
+ case tok::numeric_constant:
+ case tok::coloncolon:
+ case tok::kw_mutable:
+ nextToken();
+ break;
+ case tok::arrow:
+ FormatTok->Type = TT_LambdaArrow;
+ nextToken();
+ break;
+ default:
+ return true;
+ }
+ }
+ LSquare.Type = TT_LambdaLSquare;
+ parseChildBlock();
+ return true;
+}
+
+bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
+ nextToken();
+ if (FormatTok->is(tok::equal)) {
+ nextToken();
+ if (FormatTok->is(tok::r_square)) {
+ nextToken();
+ return true;
+ }
+ if (FormatTok->isNot(tok::comma))
+ return false;
+ nextToken();
+ } else if (FormatTok->is(tok::amp)) {
+ nextToken();
+ if (FormatTok->is(tok::r_square)) {
+ nextToken();
+ return true;
+ }
+ if (!FormatTok->isOneOf(tok::comma, tok::identifier)) {
+ return false;
+ }
+ if (FormatTok->is(tok::comma))
+ nextToken();
+ } else if (FormatTok->is(tok::r_square)) {
+ nextToken();
+ return true;
+ }
+ do {
+ if (FormatTok->is(tok::amp))
+ nextToken();
+ if (!FormatTok->isOneOf(tok::identifier, tok::kw_this))
+ return false;
+ nextToken();
+ if (FormatTok->is(tok::ellipsis))
+ nextToken();
+ if (FormatTok->is(tok::comma)) {
+ nextToken();
+ } else if (FormatTok->is(tok::r_square)) {
+ nextToken();
+ return true;
+ } else {
+ return false;
+ }
+ } while (!eof());
+ return false;
+}
+
+void UnwrappedLineParser::tryToParseJSFunction() {
+ assert(FormatTok->is(Keywords.kw_function) ||
+ FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
+ if (FormatTok->is(Keywords.kw_async))
+ nextToken();
+ // Consume "function".
+ nextToken();
+
+ // Consume * (generator function). Treat it like C++'s overloaded operators.
+ if (FormatTok->is(tok::star)) {
+ FormatTok->Type = TT_OverloadedOperator;
+ nextToken();
+ }
+
+ // Consume function name.
+ if (FormatTok->is(tok::identifier))
+ nextToken();
+
+ if (FormatTok->isNot(tok::l_paren))
+ return;
+
+ // Parse formal parameter list.
+ parseParens();
+
+ if (FormatTok->is(tok::colon)) {
+ // Parse a type definition.
+ nextToken();
+
+ // Eat the type declaration. For braced inline object types, balance braces,
+ // otherwise just parse until finding an l_brace for the function body.
+ if (FormatTok->is(tok::l_brace))
+ tryToParseBracedList();
+ else
+ while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
+ nextToken();
+ }
+
+ if (FormatTok->is(tok::semi))
+ return;
+
+ parseChildBlock();
+}
+
+bool UnwrappedLineParser::tryToParseBracedList() {
+ if (FormatTok->BlockKind == BK_Unknown)
+ calculateBraceTypes();
+ assert(FormatTok->BlockKind != BK_Unknown);
+ if (FormatTok->BlockKind == BK_Block)
+ return false;
+ parseBracedList();
+ return true;
+}
+
+bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons) {
+ bool HasError = false;
+ nextToken();
+
+ // FIXME: Once we have an expression parser in the UnwrappedLineParser,
+ // replace this by using parseAssigmentExpression() inside.
+ do {
+ if (Style.Language == FormatStyle::LK_JavaScript) {
+ if (FormatTok->is(Keywords.kw_function) ||
+ FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
+ tryToParseJSFunction();
+ continue;
+ }
+ if (FormatTok->is(TT_JsFatArrow)) {
+ nextToken();
+ // Fat arrows can be followed by simple expressions or by child blocks
+ // in curly braces.
+ if (FormatTok->is(tok::l_brace)) {
+ parseChildBlock();
+ continue;
+ }
+ }
+ if (FormatTok->is(tok::l_brace)) {
+ // Could be a method inside of a braced list `{a() { return 1; }}`.
+ if (tryToParseBracedList())
+ continue;
+ parseChildBlock();
+ }
+ }
+ switch (FormatTok->Tok.getKind()) {
+ case tok::caret:
+ nextToken();
+ if (FormatTok->is(tok::l_brace)) {
+ parseChildBlock();
+ }
+ break;
+ case tok::l_square:
+ tryToParseLambda();
+ break;
+ case tok::l_paren:
+ parseParens();
+ // JavaScript can just have free standing methods and getters/setters in
+ // object literals. Detect them by a "{" following ")".
+ if (Style.Language == FormatStyle::LK_JavaScript) {
+ if (FormatTok->is(tok::l_brace))
+ parseChildBlock();
+ break;
+ }
+ break;
+ case tok::l_brace:
+ // Assume there are no blocks inside a braced init list apart
+ // from the ones we explicitly parse out (like lambdas).
+ FormatTok->BlockKind = BK_BracedInit;
+ parseBracedList();
+ break;
+ case tok::r_brace:
+ nextToken();
+ return !HasError;
+ case tok::semi:
+ // JavaScript (or more precisely TypeScript) can have semicolons in braced
+ // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
+ // used for error recovery if we have otherwise determined that this is
+ // a braced list.
+ if (Style.Language == FormatStyle::LK_JavaScript) {
+ nextToken();
+ break;
+ }
+ HasError = true;
+ if (!ContinueOnSemicolons)
+ return !HasError;
+ nextToken();
+ break;
+ case tok::comma:
+ nextToken();
+ break;
+ default:
+ nextToken();
+ break;
+ }
+ } while (!eof());
+ return false;
+}
+
+void UnwrappedLineParser::parseParens() {
+ assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
+ nextToken();
+ do {
+ switch (FormatTok->Tok.getKind()) {
+ case tok::l_paren:
+ parseParens();
+ if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
+ parseChildBlock();
+ break;
+ case tok::r_paren:
+ nextToken();
+ return;
+ case tok::r_brace:
+ // A "}" inside parenthesis is an error if there wasn't a matching "{".
+ return;
+ case tok::l_square:
+ tryToParseLambda();
+ break;
+ case tok::l_brace:
+ if (!tryToParseBracedList())
+ parseChildBlock();
+ break;
+ case tok::at:
+ nextToken();
+ if (FormatTok->Tok.is(tok::l_brace))
+ parseBracedList();
+ break;
+ case tok::kw_class:
+ if (Style.Language == FormatStyle::LK_JavaScript)
+ parseRecord(/*ParseAsExpr=*/true);
+ else
+ nextToken();
+ break;
+ case tok::identifier:
+ if (Style.Language == FormatStyle::LK_JavaScript &&
+ (FormatTok->is(Keywords.kw_function) ||
+ FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
+ tryToParseJSFunction();
+ else
+ nextToken();
+ break;
+ default:
+ nextToken();
+ break;
+ }
+ } while (!eof());
+}
+
+void UnwrappedLineParser::parseSquare() {
+ assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
+ if (tryToParseLambda())
+ return;
+ do {
+ switch (FormatTok->Tok.getKind()) {
+ case tok::l_paren:
+ parseParens();
+ break;
+ case tok::r_square:
+ nextToken();
+ return;
+ case tok::r_brace:
+ // A "}" inside parenthesis is an error if there wasn't a matching "{".
+ return;
+ case tok::l_square:
+ parseSquare();
+ break;
+ case tok::l_brace: {
+ if (!tryToParseBracedList())
+ parseChildBlock();
+ break;
+ }
+ case tok::at:
+ nextToken();
+ if (FormatTok->Tok.is(tok::l_brace))
+ parseBracedList();
+ break;
+ default:
+ nextToken();
+ break;
+ }
+ } while (!eof());
+}
+
+void UnwrappedLineParser::parseIfThenElse() {
+ assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
+ nextToken();
+ if (FormatTok->Tok.is(tok::kw_constexpr))
+ nextToken();
+ if (FormatTok->Tok.is(tok::l_paren))
+ parseParens();
+ bool NeedsUnwrappedLine = false;
+ if (FormatTok->Tok.is(tok::l_brace)) {
+ CompoundStatementIndenter Indenter(this, Style, Line->Level);
+ parseBlock(/*MustBeDeclaration=*/false);
+ if (Style.BraceWrapping.BeforeElse)
+ addUnwrappedLine();
+ else
+ NeedsUnwrappedLine = true;
+ } else {
+ addUnwrappedLine();
+ ++Line->Level;
+ parseStructuralElement();
+ --Line->Level;
+ }
+ if (FormatTok->Tok.is(tok::kw_else)) {
+ nextToken();
+ if (FormatTok->Tok.is(tok::l_brace)) {
+ CompoundStatementIndenter Indenter(this, Style, Line->Level);
+ parseBlock(/*MustBeDeclaration=*/false);
+ addUnwrappedLine();
+ } else if (FormatTok->Tok.is(tok::kw_if)) {
+ parseIfThenElse();
+ } else {
+ addUnwrappedLine();
+ ++Line->Level;
+ parseStructuralElement();
+ if (FormatTok->is(tok::eof))
+ addUnwrappedLine();
+ --Line->Level;
+ }
+ } else if (NeedsUnwrappedLine) {
+ addUnwrappedLine();
+ }
+}
+
+void UnwrappedLineParser::parseTryCatch() {
+ assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
+ nextToken();
+ bool NeedsUnwrappedLine = false;
+ if (FormatTok->is(tok::colon)) {
+ // We are in a function try block, what comes is an initializer list.
+ nextToken();
+ while (FormatTok->is(tok::identifier)) {
+ nextToken();
+ if (FormatTok->is(tok::l_paren))
+ parseParens();
+ if (FormatTok->is(tok::comma))
+ nextToken();
+ }
+ }
+ // Parse try with resource.
+ if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
+ parseParens();
+ }
+ if (FormatTok->is(tok::l_brace)) {
+ CompoundStatementIndenter Indenter(this, Style, Line->Level);
+ parseBlock(/*MustBeDeclaration=*/false);
+ if (Style.BraceWrapping.BeforeCatch) {
+ addUnwrappedLine();
+ } else {
+ NeedsUnwrappedLine = true;
+ }
+ } else if (!FormatTok->is(tok::kw_catch)) {
+ // The C++ standard requires a compound-statement after a try.
+ // If there's none, we try to assume there's a structuralElement
+ // and try to continue.
+ addUnwrappedLine();
+ ++Line->Level;
+ parseStructuralElement();
+ --Line->Level;
+ }
+ while (1) {
+ if (FormatTok->is(tok::at))
+ nextToken();
+ if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
+ tok::kw___finally) ||
+ ((Style.Language == FormatStyle::LK_Java ||
+ Style.Language == FormatStyle::LK_JavaScript) &&
+ FormatTok->is(Keywords.kw_finally)) ||
+ (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
+ FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
+ break;
+ nextToken();
+ while (FormatTok->isNot(tok::l_brace)) {
+ if (FormatTok->is(tok::l_paren)) {
+ parseParens();
+ continue;
+ }
+ if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
+ return;
+ nextToken();
+ }
+ NeedsUnwrappedLine = false;
+ CompoundStatementIndenter Indenter(this, Style, Line->Level);
+ parseBlock(/*MustBeDeclaration=*/false);
+ if (Style.BraceWrapping.BeforeCatch)
+ addUnwrappedLine();
+ else
+ NeedsUnwrappedLine = true;
+ }
+ if (NeedsUnwrappedLine)
+ addUnwrappedLine();
+}
+
+void UnwrappedLineParser::parseNamespace() {
+ assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected");
+
+ const FormatToken &InitialToken = *FormatTok;
+ nextToken();
+ while (FormatTok->isOneOf(tok::identifier, tok::coloncolon))
+ nextToken();
+ if (FormatTok->Tok.is(tok::l_brace)) {
+ if (ShouldBreakBeforeBrace(Style, InitialToken))
+ addUnwrappedLine();
+
+ bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
+ (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
+ DeclarationScopeStack.size() > 1);
+ parseBlock(/*MustBeDeclaration=*/true, AddLevel);
+ // Munch the semicolon after a namespace. This is more common than one would
+ // think. Puttin the semicolon into its own line is very ugly.
+ if (FormatTok->Tok.is(tok::semi))
+ nextToken();
+ addUnwrappedLine();
+ }
+ // FIXME: Add error handling.
+}
+
+void UnwrappedLineParser::parseNew() {
+ assert(FormatTok->is(tok::kw_new) && "'new' expected");
+ nextToken();
+ if (Style.Language != FormatStyle::LK_Java)
+ return;
+
+ // In Java, we can parse everything up to the parens, which aren't optional.
+ do {
+ // There should not be a ;, { or } before the new's open paren.
+ if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
+ return;
+
+ // Consume the parens.
+ if (FormatTok->is(tok::l_paren)) {
+ parseParens();
+
+ // If there is a class body of an anonymous class, consume that as child.
+ if (FormatTok->is(tok::l_brace))
+ parseChildBlock();
+ return;
+ }
+ nextToken();
+ } while (!eof());
+}
+
+void UnwrappedLineParser::parseForOrWhileLoop() {
+ assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
+ "'for', 'while' or foreach macro expected");
+ nextToken();
+ // JS' for await ( ...
+ if (Style.Language == FormatStyle::LK_JavaScript &&
+ FormatTok->is(Keywords.kw_await))
+ nextToken();
+ if (FormatTok->Tok.is(tok::l_paren))
+ parseParens();
+ if (FormatTok->Tok.is(tok::l_brace)) {
+ CompoundStatementIndenter Indenter(this, Style, Line->Level);
+ parseBlock(/*MustBeDeclaration=*/false);
+ addUnwrappedLine();
+ } else {
+ addUnwrappedLine();
+ ++Line->Level;
+ parseStructuralElement();
+ --Line->Level;
+ }
+}
+
+void UnwrappedLineParser::parseDoWhile() {
+ assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
+ nextToken();
+ if (FormatTok->Tok.is(tok::l_brace)) {
+ CompoundStatementIndenter Indenter(this, Style, Line->Level);
+ parseBlock(/*MustBeDeclaration=*/false);
+ if (Style.BraceWrapping.IndentBraces)
+ addUnwrappedLine();
+ } else {
+ addUnwrappedLine();
+ ++Line->Level;
+ parseStructuralElement();
+ --Line->Level;
+ }
+
+ // FIXME: Add error handling.
+ if (!FormatTok->Tok.is(tok::kw_while)) {
+ addUnwrappedLine();
+ return;
+ }
+
+ nextToken();
+ parseStructuralElement();
+}
+
+void UnwrappedLineParser::parseLabel() {
+ nextToken();
+ unsigned OldLineLevel = Line->Level;
+ if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
+ --Line->Level;
+ if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) {
+ CompoundStatementIndenter Indenter(this, Style, Line->Level);
+ parseBlock(/*MustBeDeclaration=*/false);
+ if (FormatTok->Tok.is(tok::kw_break)) {
+ if (Style.BraceWrapping.AfterControlStatement)
+ addUnwrappedLine();
+ parseStructuralElement();
+ }
+ addUnwrappedLine();
+ } else {
+ if (FormatTok->is(tok::semi))
+ nextToken();
+ addUnwrappedLine();
+ }
+ Line->Level = OldLineLevel;
+ if (FormatTok->isNot(tok::l_brace)) {
+ parseStructuralElement();
+ addUnwrappedLine();
+ }
+}
+
+void UnwrappedLineParser::parseCaseLabel() {
+ assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
+ // FIXME: fix handling of complex expressions here.
+ do {
+ nextToken();
+ } while (!eof() && !FormatTok->Tok.is(tok::colon));
+ parseLabel();
+}
+
+void UnwrappedLineParser::parseSwitch() {
+ assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
+ nextToken();
+ if (FormatTok->Tok.is(tok::l_paren))
+ parseParens();
+ if (FormatTok->Tok.is(tok::l_brace)) {
+ CompoundStatementIndenter Indenter(this, Style, Line->Level);
+ parseBlock(/*MustBeDeclaration=*/false);
+ addUnwrappedLine();
+ } else {
+ addUnwrappedLine();
+ ++Line->Level;
+ parseStructuralElement();
+ --Line->Level;
+ }
+}
+
+void UnwrappedLineParser::parseAccessSpecifier() {
+ nextToken();
+ // Understand Qt's slots.
+ if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
+ nextToken();
+ // Otherwise, we don't know what it is, and we'd better keep the next token.
+ if (FormatTok->Tok.is(tok::colon))
+ nextToken();
+ addUnwrappedLine();
+}
+
+bool UnwrappedLineParser::parseEnum() {
+ // Won't be 'enum' for NS_ENUMs.
+ if (FormatTok->Tok.is(tok::kw_enum))
+ nextToken();
+
+ // In TypeScript, "enum" can also be used as property name, e.g. in interface
+ // declarations. An "enum" keyword followed by a colon would be a syntax
+ // error and thus assume it is just an identifier.
+ if (Style.Language == FormatStyle::LK_JavaScript &&
+ FormatTok->isOneOf(tok::colon, tok::question))
+ return false;
+
+ // Eat up enum class ...
+ if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
+ nextToken();
+
+ while (FormatTok->Tok.getIdentifierInfo() ||
+ FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
+ tok::greater, tok::comma, tok::question)) {
+ nextToken();
+ // We can have macros or attributes in between 'enum' and the enum name.
+ if (FormatTok->is(tok::l_paren))
+ parseParens();
+ if (FormatTok->is(tok::identifier)) {
+ nextToken();
+ // If there are two identifiers in a row, this is likely an elaborate
+ // return type. In Java, this can be "implements", etc.
+ if (Style.isCpp() && FormatTok->is(tok::identifier))
+ return false;
+ }
+ }
+
+ // Just a declaration or something is wrong.
+ if (FormatTok->isNot(tok::l_brace))
+ return true;
+ FormatTok->BlockKind = BK_Block;
+
+ if (Style.Language == FormatStyle::LK_Java) {
+ // Java enums are different.
+ parseJavaEnumBody();
+ return true;
+ }
+ if (Style.Language == FormatStyle::LK_Proto) {
+ parseBlock(/*MustBeDeclaration=*/true);
+ return true;
+ }
+
+ // Parse enum body.
+ bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true);
+ if (HasError) {
+ if (FormatTok->is(tok::semi))
+ nextToken();
+ addUnwrappedLine();
+ }
+ return true;
+
+ // There is no addUnwrappedLine() here so that we fall through to parsing a
+ // structural element afterwards. Thus, in "enum A {} n, m;",
+ // "} n, m;" will end up in one unwrapped line.
+}
+
+void UnwrappedLineParser::parseJavaEnumBody() {
+ // Determine whether the enum is simple, i.e. does not have a semicolon or
+ // constants with class bodies. Simple enums can be formatted like braced
+ // lists, contracted to a single line, etc.
+ unsigned StoredPosition = Tokens->getPosition();
+ bool IsSimple = true;
+ FormatToken *Tok = Tokens->getNextToken();
+ while (Tok) {
+ if (Tok->is(tok::r_brace))
+ break;
+ if (Tok->isOneOf(tok::l_brace, tok::semi)) {
+ IsSimple = false;
+ break;
+ }
+ // FIXME: This will also mark enums with braces in the arguments to enum
+ // constants as "not simple". This is probably fine in practice, though.
+ Tok = Tokens->getNextToken();
+ }
+ FormatTok = Tokens->setPosition(StoredPosition);
+
+ if (IsSimple) {
+ parseBracedList();
+ addUnwrappedLine();
+ return;
+ }
+
+ // Parse the body of a more complex enum.
+ // First add a line for everything up to the "{".
+ nextToken();
+ addUnwrappedLine();
+ ++Line->Level;
+
+ // Parse the enum constants.
+ while (FormatTok) {
+ if (FormatTok->is(tok::l_brace)) {
+ // Parse the constant's class body.
+ parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
+ /*MunchSemi=*/false);
+ } else if (FormatTok->is(tok::l_paren)) {
+ parseParens();
+ } else if (FormatTok->is(tok::comma)) {
+ nextToken();
+ addUnwrappedLine();
+ } else if (FormatTok->is(tok::semi)) {
+ nextToken();
+ addUnwrappedLine();
+ break;
+ } else if (FormatTok->is(tok::r_brace)) {
+ addUnwrappedLine();
+ break;
+ } else {
+ nextToken();
+ }
+ }
+
+ // Parse the class body after the enum's ";" if any.
+ parseLevel(/*HasOpeningBrace=*/true);
+ nextToken();
+ --Line->Level;
+ addUnwrappedLine();
+}
+
+void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
+ const FormatToken &InitialToken = *FormatTok;
+ nextToken();
+
+ // The actual identifier can be a nested name specifier, and in macros
+ // it is often token-pasted.
+ while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
+ tok::kw___attribute, tok::kw___declspec,
+ tok::kw_alignas) ||
+ ((Style.Language == FormatStyle::LK_Java ||
+ Style.Language == FormatStyle::LK_JavaScript) &&
+ FormatTok->isOneOf(tok::period, tok::comma))) {
+ bool IsNonMacroIdentifier =
+ FormatTok->is(tok::identifier) &&
+ FormatTok->TokenText != FormatTok->TokenText.upper();
+ nextToken();
+ // We can have macros or attributes in between 'class' and the class name.
+ if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren))
+ parseParens();
+ }
+
+ // Note that parsing away template declarations here leads to incorrectly
+ // accepting function declarations as record declarations.
+ // In general, we cannot solve this problem. Consider:
+ // class A<int> B() {}
+ // which can be a function definition or a class definition when B() is a
+ // macro. If we find enough real-world cases where this is a problem, we
+ // can parse for the 'template' keyword in the beginning of the statement,
+ // and thus rule out the record production in case there is no template
+ // (this would still leave us with an ambiguity between template function
+ // and class declarations).
+ if (FormatTok->isOneOf(tok::colon, tok::less)) {
+ while (!eof()) {
+ if (FormatTok->is(tok::l_brace)) {
+ calculateBraceTypes(/*ExpectClassBody=*/true);
+ if (!tryToParseBracedList())
+ break;
+ }
+ if (FormatTok->Tok.is(tok::semi))
+ return;
+ nextToken();
+ }
+ }
+ if (FormatTok->Tok.is(tok::l_brace)) {
+ if (ParseAsExpr) {
+ parseChildBlock();
+ } else {
+ if (ShouldBreakBeforeBrace(Style, InitialToken))
+ addUnwrappedLine();
+
+ parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
+ /*MunchSemi=*/false);
+ }
+ }
+ // There is no addUnwrappedLine() here so that we fall through to parsing a
+ // structural element afterwards. Thus, in "class A {} n, m;",
+ // "} n, m;" will end up in one unwrapped line.
+}
+
+void UnwrappedLineParser::parseObjCProtocolList() {
+ assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
+ do
+ nextToken();
+ while (!eof() && FormatTok->Tok.isNot(tok::greater));
+ nextToken(); // Skip '>'.
+}
+
+void UnwrappedLineParser::parseObjCUntilAtEnd() {
+ do {
+ if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
+ nextToken();
+ addUnwrappedLine();
+ break;
+ }
+ if (FormatTok->is(tok::l_brace)) {
+ parseBlock(/*MustBeDeclaration=*/false);
+ // In ObjC interfaces, nothing should be following the "}".
+ addUnwrappedLine();
+ } else if (FormatTok->is(tok::r_brace)) {
+ // Ignore stray "}". parseStructuralElement doesn't consume them.
+ nextToken();
+ addUnwrappedLine();
+ } else {
+ parseStructuralElement();
+ }
+ } while (!eof());
+}
+
+void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
+ nextToken();
+ nextToken(); // interface name
+
+ // @interface can be followed by either a base class, or a category.
+ if (FormatTok->Tok.is(tok::colon)) {
+ nextToken();
+ nextToken(); // base class name
+ } else if (FormatTok->Tok.is(tok::l_paren))
+ // Skip category, if present.
+ parseParens();
+
+ if (FormatTok->Tok.is(tok::less))
+ parseObjCProtocolList();
+
+ if (FormatTok->Tok.is(tok::l_brace)) {
+ if (Style.BraceWrapping.AfterObjCDeclaration)
+ addUnwrappedLine();
+ parseBlock(/*MustBeDeclaration=*/true);
+ }
+
+ // With instance variables, this puts '}' on its own line. Without instance
+ // variables, this ends the @interface line.
+ addUnwrappedLine();
+
+ parseObjCUntilAtEnd();
+}
+
+void UnwrappedLineParser::parseObjCProtocol() {
+ nextToken();
+ nextToken(); // protocol name
+
+ if (FormatTok->Tok.is(tok::less))
+ parseObjCProtocolList();
+
+ // Check for protocol declaration.
+ if (FormatTok->Tok.is(tok::semi)) {
+ nextToken();
+ return addUnwrappedLine();
+ }
+
+ addUnwrappedLine();
+ parseObjCUntilAtEnd();
+}
+
+void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
+ bool IsImport = FormatTok->is(Keywords.kw_import);
+ assert(IsImport || FormatTok->is(tok::kw_export));
+ nextToken();
+
+ // Consume the "default" in "export default class/function".
+ if (FormatTok->is(tok::kw_default))
+ nextToken();
+
+ // Consume "async function", "function" and "default function", so that these
+ // get parsed as free-standing JS functions, i.e. do not require a trailing
+ // semicolon.
+ if (FormatTok->is(Keywords.kw_async))
+ nextToken();
+ if (FormatTok->is(Keywords.kw_function)) {
+ nextToken();
+ return;
+ }
+
+ // For imports, `export *`, `export {...}`, consume the rest of the line up
+ // to the terminating `;`. For everything else, just return and continue
+ // parsing the structural element, i.e. the declaration or expression for
+ // `export default`.
+ if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
+ !FormatTok->isStringLiteral())
+ return;
+
+ while (!eof()) {
+ if (FormatTok->is(tok::semi))
+ return;
+ if (Line->Tokens.size() == 0) {
+ // Common issue: Automatic Semicolon Insertion wrapped the line, so the
+ // import statement should terminate.
+ return;
+ }
+ if (FormatTok->is(tok::l_brace)) {
+ FormatTok->BlockKind = BK_Block;
+ parseBracedList();
+ } else {
+ nextToken();
+ }
+ }
+}
+
+LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
+ StringRef Prefix = "") {
+ llvm::dbgs() << Prefix << "Line(" << Line.Level << ")"
+ << (Line.InPPDirective ? " MACRO" : "") << ": ";
+ for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
+ E = Line.Tokens.end();
+ I != E; ++I) {
+ llvm::dbgs() << I->Tok->Tok.getName() << "["
+ << "T=" << I->Tok->Type
+ << ", OC=" << I->Tok->OriginalColumn << "] ";
+ }
+ for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
+ E = Line.Tokens.end();
+ I != E; ++I) {
+ const UnwrappedLineNode &Node = *I;
+ for (SmallVectorImpl<UnwrappedLine>::const_iterator
+ I = Node.Children.begin(),
+ E = Node.Children.end();
+ I != E; ++I) {
+ printDebugInfo(*I, "\nChild: ");
+ }
+ }
+ llvm::dbgs() << "\n";
+}
+
+void UnwrappedLineParser::addUnwrappedLine() {
+ if (Line->Tokens.empty())
+ return;
+ DEBUG({
+ if (CurrentLines == &Lines)
+ printDebugInfo(*Line);
+ });
+ CurrentLines->push_back(std::move(*Line));
+ Line->Tokens.clear();
+ Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
+ if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
+ CurrentLines->append(
+ std::make_move_iterator(PreprocessorDirectives.begin()),
+ std::make_move_iterator(PreprocessorDirectives.end()));
+ PreprocessorDirectives.clear();
+ }
+}
+
+bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
+
+bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
+ return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
+ FormatTok.NewlinesBefore > 0;
+}
+
+// Checks if \p FormatTok is a line comment that continues the line comment
+// section on \p Line.
+static bool continuesLineCommentSection(const FormatToken &FormatTok,
+ const UnwrappedLine &Line,
+ llvm::Regex &CommentPragmasRegex) {
+ if (Line.Tokens.empty())
+ return false;
+
+ StringRef IndentContent = FormatTok.TokenText;
+ if (FormatTok.TokenText.startswith("//") ||
+ FormatTok.TokenText.startswith("/*"))
+ IndentContent = FormatTok.TokenText.substr(2);
+ if (CommentPragmasRegex.match(IndentContent))
+ return false;
+
+ // If Line starts with a line comment, then FormatTok continues the comment
+ // section if its original column is greater or equal to the original start
+ // column of the line.
+ //
+ // Define the min column token of a line as follows: if a line ends in '{' or
+ // contains a '{' followed by a line comment, then the min column token is
+ // that '{'. Otherwise, the min column token of the line is the first token of
+ // the line.
+ //
+ // If Line starts with a token other than a line comment, then FormatTok
+ // continues the comment section if its original column is greater than the
+ // original start column of the min column token of the line.
+ //
+ // For example, the second line comment continues the first in these cases:
+ //
+ // // first line
+ // // second line
+ //
+ // and:
+ //
+ // // first line
+ // // second line
+ //
+ // and:
+ //
+ // int i; // first line
+ // // second line
+ //
+ // and:
+ //
+ // do { // first line
+ // // second line
+ // int i;
+ // } while (true);
+ //
+ // and:
+ //
+ // enum {
+ // a, // first line
+ // // second line
+ // b
+ // };
+ //
+ // The second line comment doesn't continue the first in these cases:
+ //
+ // // first line
+ // // second line
+ //
+ // and:
+ //
+ // int i; // first line
+ // // second line
+ //
+ // and:
+ //
+ // do { // first line
+ // // second line
+ // int i;
+ // } while (true);
+ //
+ // and:
+ //
+ // enum {
+ // a, // first line
+ // // second line
+ // };
+ const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
+
+ // Scan for '{//'. If found, use the column of '{' as a min column for line
+ // comment section continuation.
+ const FormatToken *PreviousToken = nullptr;
+ for (const UnwrappedLineNode &Node : Line.Tokens) {
+ if (PreviousToken && PreviousToken->is(tok::l_brace) &&
+ isLineComment(*Node.Tok)) {
+ MinColumnToken = PreviousToken;
+ break;
+ }
+ PreviousToken = Node.Tok;
+
+ // Grab the last newline preceding a token in this unwrapped line.
+ if (Node.Tok->NewlinesBefore > 0) {
+ MinColumnToken = Node.Tok;
+ }
+ }
+ if (PreviousToken && PreviousToken->is(tok::l_brace)) {
+ MinColumnToken = PreviousToken;
+ }
+
+ return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
+ MinColumnToken);
+}
+
+void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
+ bool JustComments = Line->Tokens.empty();
+ for (SmallVectorImpl<FormatToken *>::const_iterator
+ I = CommentsBeforeNextToken.begin(),
+ E = CommentsBeforeNextToken.end();
+ I != E; ++I) {
+ // Line comments that belong to the same line comment section are put on the
+ // same line since later we might want to reflow content between them.
+ // Additional fine-grained breaking of line comment sections is controlled
+ // by the class BreakableLineCommentSection in case it is desirable to keep
+ // several line comment sections in the same unwrapped line.
+ //
+ // FIXME: Consider putting separate line comment sections as children to the
+ // unwrapped line instead.
+ (*I)->ContinuesLineCommentSection =
+ continuesLineCommentSection(**I, *Line, CommentPragmasRegex);
+ if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection)
+ addUnwrappedLine();
+ pushToken(*I);
+ }
+ if (NewlineBeforeNext && JustComments)
+ addUnwrappedLine();
+ CommentsBeforeNextToken.clear();
+}
+
+void UnwrappedLineParser::nextToken() {
+ if (eof())
+ return;
+ flushComments(isOnNewLine(*FormatTok));
+ pushToken(FormatTok);
+ if (Style.Language != FormatStyle::LK_JavaScript)
+ readToken();
+ else
+ readTokenWithJavaScriptASI();
+}
+
+const FormatToken *UnwrappedLineParser::getPreviousToken() {
+ // FIXME: This is a dirty way to access the previous token. Find a better
+ // solution.
+ if (!Line || Line->Tokens.empty())
+ return nullptr;
+ return Line->Tokens.back().Tok;
+}
+
+void UnwrappedLineParser::distributeComments(
+ const SmallVectorImpl<FormatToken *> &Comments,
+ const FormatToken *NextTok) {
+ // Whether or not a line comment token continues a line is controlled by
+ // the method continuesLineCommentSection, with the following caveat:
+ //
+ // Define a trail of Comments to be a nonempty proper postfix of Comments such
+ // that each comment line from the trail is aligned with the next token, if
+ // the next token exists. If a trail exists, the beginning of the maximal
+ // trail is marked as a start of a new comment section.
+ //
+ // For example in this code:
+ //
+ // int a; // line about a
+ // // line 1 about b
+ // // line 2 about b
+ // int b;
+ //
+ // the two lines about b form a maximal trail, so there are two sections, the
+ // first one consisting of the single comment "// line about a" and the
+ // second one consisting of the next two comments.
+ if (Comments.empty())
+ return;
+ bool ShouldPushCommentsInCurrentLine = true;
+ bool HasTrailAlignedWithNextToken = false;
+ unsigned StartOfTrailAlignedWithNextToken = 0;
+ if (NextTok) {
+ // We are skipping the first element intentionally.
+ for (unsigned i = Comments.size() - 1; i > 0; --i) {
+ if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
+ HasTrailAlignedWithNextToken = true;
+ StartOfTrailAlignedWithNextToken = i;
+ }
+ }
+ }
+ for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
+ FormatToken *FormatTok = Comments[i];
+ if (HasTrailAlignedWithNextToken &&
+ i == StartOfTrailAlignedWithNextToken) {
+ FormatTok->ContinuesLineCommentSection = false;
+ } else {
+ FormatTok->ContinuesLineCommentSection =
+ continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
+ }
+ if (!FormatTok->ContinuesLineCommentSection &&
+ (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
+ ShouldPushCommentsInCurrentLine = false;
+ }
+ if (ShouldPushCommentsInCurrentLine) {
+ pushToken(FormatTok);
+ } else {
+ CommentsBeforeNextToken.push_back(FormatTok);
+ }
+ }
+}
+
+void UnwrappedLineParser::readToken() {
+ SmallVector<FormatToken *, 1> Comments;
+ do {
+ FormatTok = Tokens->getNextToken();
+ assert(FormatTok);
+ while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
+ (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
+ distributeComments(Comments, FormatTok);
+ Comments.clear();
+ // If there is an unfinished unwrapped line, we flush the preprocessor
+ // directives only after that unwrapped line was finished later.
+ bool SwitchToPreprocessorLines = !Line->Tokens.empty();
+ ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
+ // Comments stored before the preprocessor directive need to be output
+ // before the preprocessor directive, at the same level as the
+ // preprocessor directive, as we consider them to apply to the directive.
+ flushComments(isOnNewLine(*FormatTok));
+ parsePPDirective();
+ }
+ while (FormatTok->Type == TT_ConflictStart ||
+ FormatTok->Type == TT_ConflictEnd ||
+ FormatTok->Type == TT_ConflictAlternative) {
+ if (FormatTok->Type == TT_ConflictStart) {
+ conditionalCompilationStart(/*Unreachable=*/false);
+ } else if (FormatTok->Type == TT_ConflictAlternative) {
+ conditionalCompilationAlternative();
+ } else if (FormatTok->Type == TT_ConflictEnd) {
+ conditionalCompilationEnd();
+ }
+ FormatTok = Tokens->getNextToken();
+ FormatTok->MustBreakBefore = true;
+ }
+
+ if (!PPStack.empty() && (PPStack.back() == PP_Unreachable) &&
+ !Line->InPPDirective) {
+ continue;
+ }
+
+ if (!FormatTok->Tok.is(tok::comment)) {
+ distributeComments(Comments, FormatTok);
+ Comments.clear();
+ return;
+ }
+
+ Comments.push_back(FormatTok);
+ } while (!eof());
+
+ distributeComments(Comments, nullptr);
+ Comments.clear();
+}
+
+void UnwrappedLineParser::pushToken(FormatToken *Tok) {
+ Line->Tokens.push_back(UnwrappedLineNode(Tok));
+ if (MustBreakBeforeNextToken) {
+ Line->Tokens.back().Tok->MustBreakBefore = true;
+ MustBreakBeforeNextToken = false;
+ }
+}
+
+} // end namespace format
+} // end namespace clang
diff --git a/contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.h b/contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.h
new file mode 100644
index 000000000000..15d1d9cda7a2
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.h
@@ -0,0 +1,251 @@
+//===--- UnwrappedLineParser.h - Format C++ code ----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file contains the declaration of the UnwrappedLineParser,
+/// which turns a stream of tokens into UnwrappedLines.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_FORMAT_UNWRAPPEDLINEPARSER_H
+#define LLVM_CLANG_LIB_FORMAT_UNWRAPPEDLINEPARSER_H
+
+#include "FormatToken.h"
+#include "clang/Basic/IdentifierTable.h"
+#include "clang/Format/Format.h"
+#include "llvm/Support/Regex.h"
+#include <list>
+#include <stack>
+
+namespace clang {
+namespace format {
+
+struct UnwrappedLineNode;
+
+/// \brief An unwrapped line is a sequence of \c Token, that we would like to
+/// put on a single line if there was no column limit.
+///
+/// This is used as a main interface between the \c UnwrappedLineParser and the
+/// \c UnwrappedLineFormatter. The key property is that changing the formatting
+/// within an unwrapped line does not affect any other unwrapped lines.
+struct UnwrappedLine {
+ UnwrappedLine();
+
+ // FIXME: Don't use std::list here.
+ /// \brief The \c Tokens comprising this \c UnwrappedLine.
+ std::list<UnwrappedLineNode> Tokens;
+
+ /// \brief The indent level of the \c UnwrappedLine.
+ unsigned Level;
+
+ /// \brief Whether this \c UnwrappedLine is part of a preprocessor directive.
+ bool InPPDirective;
+
+ bool MustBeDeclaration;
+
+ /// \brief If this \c UnwrappedLine closes a block in a sequence of lines,
+ /// \c MatchingOpeningBlockLineIndex stores the index of the corresponding
+ /// opening line. Otherwise, \c MatchingOpeningBlockLineIndex must be
+ /// \c kInvalidIndex.
+ size_t MatchingOpeningBlockLineIndex;
+
+ static const size_t kInvalidIndex = -1;
+};
+
+class UnwrappedLineConsumer {
+public:
+ virtual ~UnwrappedLineConsumer() {}
+ virtual void consumeUnwrappedLine(const UnwrappedLine &Line) = 0;
+ virtual void finishRun() = 0;
+};
+
+class FormatTokenSource;
+
+class UnwrappedLineParser {
+public:
+ UnwrappedLineParser(const FormatStyle &Style,
+ const AdditionalKeywords &Keywords,
+ ArrayRef<FormatToken *> Tokens,
+ UnwrappedLineConsumer &Callback);
+
+ void parse();
+
+private:
+ void reset();
+ void parseFile();
+ void parseLevel(bool HasOpeningBrace);
+ void parseBlock(bool MustBeDeclaration, bool AddLevel = true,
+ bool MunchSemi = true);
+ void parseChildBlock();
+ void parsePPDirective();
+ void parsePPDefine();
+ void parsePPIf(bool IfDef);
+ void parsePPElIf();
+ void parsePPElse();
+ void parsePPEndIf();
+ void parsePPUnknown();
+ void readTokenWithJavaScriptASI();
+ void parseStructuralElement();
+ bool tryToParseBracedList();
+ bool parseBracedList(bool ContinueOnSemicolons = false);
+ void parseParens();
+ void parseSquare();
+ void parseIfThenElse();
+ void parseTryCatch();
+ void parseForOrWhileLoop();
+ void parseDoWhile();
+ void parseLabel();
+ void parseCaseLabel();
+ void parseSwitch();
+ void parseNamespace();
+ void parseNew();
+ void parseAccessSpecifier();
+ bool parseEnum();
+ void parseJavaEnumBody();
+ // Parses a record (aka class) as a top level element. If ParseAsExpr is true,
+ // parses the record as a child block, i.e. if the class declaration is an
+ // expression.
+ void parseRecord(bool ParseAsExpr = false);
+ void parseObjCProtocolList();
+ void parseObjCUntilAtEnd();
+ void parseObjCInterfaceOrImplementation();
+ void parseObjCProtocol();
+ void parseJavaScriptEs6ImportExport();
+ bool tryToParseLambda();
+ bool tryToParseLambdaIntroducer();
+ void tryToParseJSFunction();
+ void addUnwrappedLine();
+ bool eof() const;
+ void nextToken();
+ const FormatToken *getPreviousToken();
+ void readToken();
+
+ // Decides which comment tokens should be added to the current line and which
+ // should be added as comments before the next token.
+ //
+ // Comments specifies the sequence of comment tokens to analyze. They get
+ // either pushed to the current line or added to the comments before the next
+ // token.
+ //
+ // NextTok specifies the next token. A null pointer NextTok is supported, and
+ // signifies either the absense of a next token, or that the next token
+ // shouldn't be taken into accunt for the analysis.
+ void distributeComments(const SmallVectorImpl<FormatToken *> &Comments,
+ const FormatToken *NextTok);
+
+ // Adds the comment preceding the next token to unwrapped lines.
+ void flushComments(bool NewlineBeforeNext);
+ void pushToken(FormatToken *Tok);
+ void calculateBraceTypes(bool ExpectClassBody = false);
+
+ // Marks a conditional compilation edge (for example, an '#if', '#ifdef',
+ // '#else' or merge conflict marker). If 'Unreachable' is true, assumes
+ // this branch either cannot be taken (for example '#if false'), or should
+ // not be taken in this round.
+ void conditionalCompilationCondition(bool Unreachable);
+ void conditionalCompilationStart(bool Unreachable);
+ void conditionalCompilationAlternative();
+ void conditionalCompilationEnd();
+
+ bool isOnNewLine(const FormatToken &FormatTok);
+
+ // FIXME: We are constantly running into bugs where Line.Level is incorrectly
+ // subtracted from beyond 0. Introduce a method to subtract from Line.Level
+ // and use that everywhere in the Parser.
+ std::unique_ptr<UnwrappedLine> Line;
+
+ // Comments are sorted into unwrapped lines by whether they are in the same
+ // line as the previous token, or not. If not, they belong to the next token.
+ // Since the next token might already be in a new unwrapped line, we need to
+ // store the comments belonging to that token.
+ SmallVector<FormatToken *, 1> CommentsBeforeNextToken;
+ FormatToken *FormatTok;
+ bool MustBreakBeforeNextToken;
+
+ // The parsed lines. Only added to through \c CurrentLines.
+ SmallVector<UnwrappedLine, 8> Lines;
+
+ // Preprocessor directives are parsed out-of-order from other unwrapped lines.
+ // Thus, we need to keep a list of preprocessor directives to be reported
+ // after an unwarpped line that has been started was finished.
+ SmallVector<UnwrappedLine, 4> PreprocessorDirectives;
+
+ // New unwrapped lines are added via CurrentLines.
+ // Usually points to \c &Lines. While parsing a preprocessor directive when
+ // there is an unfinished previous unwrapped line, will point to
+ // \c &PreprocessorDirectives.
+ SmallVectorImpl<UnwrappedLine> *CurrentLines;
+
+ // We store for each line whether it must be a declaration depending on
+ // whether we are in a compound statement or not.
+ std::vector<bool> DeclarationScopeStack;
+
+ const FormatStyle &Style;
+ const AdditionalKeywords &Keywords;
+
+ llvm::Regex CommentPragmasRegex;
+
+ FormatTokenSource *Tokens;
+ UnwrappedLineConsumer &Callback;
+
+ // FIXME: This is a temporary measure until we have reworked the ownership
+ // of the format tokens. The goal is to have the actual tokens created and
+ // owned outside of and handed into the UnwrappedLineParser.
+ ArrayRef<FormatToken *> AllTokens;
+
+ // Represents preprocessor branch type, so we can find matching
+ // #if/#else/#endif directives.
+ enum PPBranchKind {
+ PP_Conditional, // Any #if, #ifdef, #ifndef, #elif, block outside #if 0
+ PP_Unreachable // #if 0 or a conditional preprocessor block inside #if 0
+ };
+
+ // Keeps a stack of currently active preprocessor branching directives.
+ SmallVector<PPBranchKind, 16> PPStack;
+
+ // The \c UnwrappedLineParser re-parses the code for each combination
+ // of preprocessor branches that can be taken.
+ // To that end, we take the same branch (#if, #else, or one of the #elif
+ // branches) for each nesting level of preprocessor branches.
+ // \c PPBranchLevel stores the current nesting level of preprocessor
+ // branches during one pass over the code.
+ int PPBranchLevel;
+
+ // Contains the current branch (#if, #else or one of the #elif branches)
+ // for each nesting level.
+ SmallVector<int, 8> PPLevelBranchIndex;
+
+ // Contains the maximum number of branches at each nesting level.
+ SmallVector<int, 8> PPLevelBranchCount;
+
+ // Contains the number of branches per nesting level we are currently
+ // in while parsing a preprocessor branch sequence.
+ // This is used to update PPLevelBranchCount at the end of a branch
+ // sequence.
+ std::stack<int> PPChainBranchIndex;
+
+ friend class ScopedLineState;
+ friend class CompoundStatementIndenter;
+};
+
+struct UnwrappedLineNode {
+ UnwrappedLineNode() : Tok(nullptr) {}
+ UnwrappedLineNode(FormatToken *Tok) : Tok(Tok) {}
+
+ FormatToken *Tok;
+ SmallVector<UnwrappedLine, 0> Children;
+};
+
+inline UnwrappedLine::UnwrappedLine() : Level(0), InPPDirective(false),
+ MustBeDeclaration(false), MatchingOpeningBlockLineIndex(kInvalidIndex) {}
+
+} // end namespace format
+} // end namespace clang
+
+#endif
diff --git a/contrib/llvm/tools/clang/lib/Format/UsingDeclarationsSorter.cpp b/contrib/llvm/tools/clang/lib/Format/UsingDeclarationsSorter.cpp
new file mode 100644
index 000000000000..fb4f59fbc9bc
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Format/UsingDeclarationsSorter.cpp
@@ -0,0 +1,144 @@
+//===--- UsingDeclarationsSorter.cpp ----------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements UsingDeclarationsSorter, a TokenAnalyzer that
+/// sorts consecutive using declarations.
+///
+//===----------------------------------------------------------------------===//
+
+#include "UsingDeclarationsSorter.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Regex.h"
+
+#include <algorithm>
+
+#define DEBUG_TYPE "using-declarations-sorter"
+
+namespace clang {
+namespace format {
+
+namespace {
+
+struct UsingDeclaration {
+ const AnnotatedLine *Line;
+ std::string Label;
+
+ UsingDeclaration(const AnnotatedLine *Line, const std::string &Label)
+ : Line(Line), Label(Label) {}
+
+ bool operator<(const UsingDeclaration &Other) const {
+ return Label < Other.Label;
+ }
+};
+
+/// Computes the label of a using declaration starting at tthe using token
+/// \p UsingTok.
+/// If \p UsingTok doesn't begin a using declaration, returns the empty string.
+/// Note that this detects specifically using declarations, as in:
+/// using A::B::C;
+/// and not type aliases, as in:
+/// using A = B::C;
+/// Type aliases are in general not safe to permute.
+std::string computeUsingDeclarationLabel(const FormatToken *UsingTok) {
+ assert(UsingTok && UsingTok->is(tok::kw_using) && "Expecting a using token");
+ std::string Label;
+ const FormatToken *Tok = UsingTok->Next;
+ if (Tok && Tok->is(tok::kw_typename)) {
+ Label.append("typename ");
+ Tok = Tok->Next;
+ }
+ if (Tok && Tok->is(tok::coloncolon)) {
+ Label.append("::");
+ Tok = Tok->Next;
+ }
+ bool HasIdentifier = false;
+ while (Tok && Tok->is(tok::identifier)) {
+ HasIdentifier = true;
+ Label.append(Tok->TokenText.str());
+ Tok = Tok->Next;
+ if (!Tok || Tok->isNot(tok::coloncolon))
+ break;
+ Label.append("::");
+ Tok = Tok->Next;
+ }
+ if (HasIdentifier && Tok && Tok->isOneOf(tok::semi, tok::comma))
+ return Label;
+ return "";
+}
+
+void endUsingDeclarationBlock(
+ SmallVectorImpl<UsingDeclaration> *UsingDeclarations,
+ const SourceManager &SourceMgr, tooling::Replacements *Fixes) {
+ SmallVector<UsingDeclaration, 4> SortedUsingDeclarations(
+ UsingDeclarations->begin(), UsingDeclarations->end());
+ std::sort(SortedUsingDeclarations.begin(), SortedUsingDeclarations.end());
+ for (size_t I = 0, E = UsingDeclarations->size(); I < E; ++I) {
+ if ((*UsingDeclarations)[I].Line == SortedUsingDeclarations[I].Line)
+ continue;
+ auto Begin = (*UsingDeclarations)[I].Line->First->Tok.getLocation();
+ auto End = (*UsingDeclarations)[I].Line->Last->Tok.getEndLoc();
+ auto SortedBegin =
+ SortedUsingDeclarations[I].Line->First->Tok.getLocation();
+ auto SortedEnd = SortedUsingDeclarations[I].Line->Last->Tok.getEndLoc();
+ StringRef Text(SourceMgr.getCharacterData(SortedBegin),
+ SourceMgr.getCharacterData(SortedEnd) -
+ SourceMgr.getCharacterData(SortedBegin));
+ DEBUG({
+ StringRef OldText(SourceMgr.getCharacterData(Begin),
+ SourceMgr.getCharacterData(End) -
+ SourceMgr.getCharacterData(Begin));
+ llvm::dbgs() << "Replacing '" << OldText << "' with '" << Text << "'\n";
+ });
+ auto Range = CharSourceRange::getCharRange(Begin, End);
+ auto Err = Fixes->add(tooling::Replacement(SourceMgr, Range, Text));
+ if (Err) {
+ llvm::errs() << "Error while sorting using declarations: "
+ << llvm::toString(std::move(Err)) << "\n";
+ }
+ }
+ UsingDeclarations->clear();
+}
+
+} // namespace
+
+UsingDeclarationsSorter::UsingDeclarationsSorter(const Environment &Env,
+ const FormatStyle &Style)
+ : TokenAnalyzer(Env, Style) {}
+
+tooling::Replacements UsingDeclarationsSorter::analyze(
+ TokenAnnotator &Annotator, SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
+ FormatTokenLexer &Tokens) {
+ const SourceManager &SourceMgr = Env.getSourceManager();
+ AffectedRangeMgr.computeAffectedLines(AnnotatedLines.begin(),
+ AnnotatedLines.end());
+ tooling::Replacements Fixes;
+ SmallVector<UsingDeclaration, 4> UsingDeclarations;
+ for (size_t I = 0, E = AnnotatedLines.size(); I != E; ++I) {
+ if (!AnnotatedLines[I]->Affected || AnnotatedLines[I]->InPPDirective ||
+ !AnnotatedLines[I]->startsWith(tok::kw_using) ||
+ AnnotatedLines[I]->First->Finalized) {
+ endUsingDeclarationBlock(&UsingDeclarations, SourceMgr, &Fixes);
+ continue;
+ }
+ if (AnnotatedLines[I]->First->NewlinesBefore > 1)
+ endUsingDeclarationBlock(&UsingDeclarations, SourceMgr, &Fixes);
+ std::string Label = computeUsingDeclarationLabel(AnnotatedLines[I]->First);
+ if (Label.empty()) {
+ endUsingDeclarationBlock(&UsingDeclarations, SourceMgr, &Fixes);
+ continue;
+ }
+ UsingDeclarations.push_back(UsingDeclaration(AnnotatedLines[I], Label));
+ }
+ endUsingDeclarationBlock(&UsingDeclarations, SourceMgr, &Fixes);
+ return Fixes;
+}
+
+} // namespace format
+} // namespace clang
diff --git a/contrib/llvm/tools/clang/lib/Format/UsingDeclarationsSorter.h b/contrib/llvm/tools/clang/lib/Format/UsingDeclarationsSorter.h
new file mode 100644
index 000000000000..f7d5f97e3a2a
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Format/UsingDeclarationsSorter.h
@@ -0,0 +1,37 @@
+//===--- UsingDeclarationsSorter.h ------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file declares UsingDeclarationsSorter, a TokenAnalyzer that
+/// sorts consecutive using declarations.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_FORMAT_USINGDECLARATIONSSORTER_H
+#define LLVM_CLANG_LIB_FORMAT_USINGDECLARATIONSSORTER_H
+
+#include "TokenAnalyzer.h"
+
+namespace clang {
+namespace format {
+
+class UsingDeclarationsSorter : public TokenAnalyzer {
+public:
+ UsingDeclarationsSorter(const Environment &Env, const FormatStyle &Style);
+
+ tooling::Replacements
+ analyze(TokenAnnotator &Annotator,
+ SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
+ FormatTokenLexer &Tokens) override;
+};
+
+} // end namespace format
+} // end namespace clang
+
+#endif
diff --git a/contrib/llvm/tools/clang/lib/Format/WhitespaceManager.cpp b/contrib/llvm/tools/clang/lib/Format/WhitespaceManager.cpp
new file mode 100644
index 000000000000..4b4fd13145fb
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Format/WhitespaceManager.cpp
@@ -0,0 +1,701 @@
+//===--- WhitespaceManager.cpp - Format C++ code --------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements WhitespaceManager class.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WhitespaceManager.h"
+#include "llvm/ADT/STLExtras.h"
+
+namespace clang {
+namespace format {
+
+bool WhitespaceManager::Change::IsBeforeInFile::
+operator()(const Change &C1, const Change &C2) const {
+ return SourceMgr.isBeforeInTranslationUnit(
+ C1.OriginalWhitespaceRange.getBegin(),
+ C2.OriginalWhitespaceRange.getBegin());
+}
+
+WhitespaceManager::Change::Change(const FormatToken &Tok,
+ bool CreateReplacement,
+ SourceRange OriginalWhitespaceRange,
+ int Spaces, unsigned StartOfTokenColumn,
+ unsigned NewlinesBefore,
+ StringRef PreviousLinePostfix,
+ StringRef CurrentLinePrefix,
+ bool ContinuesPPDirective, bool IsInsideToken)
+ : Tok(&Tok), CreateReplacement(CreateReplacement),
+ OriginalWhitespaceRange(OriginalWhitespaceRange),
+ StartOfTokenColumn(StartOfTokenColumn), NewlinesBefore(NewlinesBefore),
+ PreviousLinePostfix(PreviousLinePostfix),
+ CurrentLinePrefix(CurrentLinePrefix),
+ ContinuesPPDirective(ContinuesPPDirective), Spaces(Spaces),
+ IsInsideToken(IsInsideToken), IsTrailingComment(false), TokenLength(0),
+ PreviousEndOfTokenColumn(0), EscapedNewlineColumn(0),
+ StartOfBlockComment(nullptr), IndentationOffset(0) {}
+
+void WhitespaceManager::replaceWhitespace(FormatToken &Tok, unsigned Newlines,
+ unsigned Spaces,
+ unsigned StartOfTokenColumn,
+ bool InPPDirective) {
+ if (Tok.Finalized)
+ return;
+ Tok.Decision = (Newlines > 0) ? FD_Break : FD_Continue;
+ Changes.push_back(Change(Tok, /*CreateReplacement=*/true, Tok.WhitespaceRange,
+ Spaces, StartOfTokenColumn, Newlines, "", "",
+ InPPDirective && !Tok.IsFirst,
+ /*IsInsideToken=*/false));
+}
+
+void WhitespaceManager::addUntouchableToken(const FormatToken &Tok,
+ bool InPPDirective) {
+ if (Tok.Finalized)
+ return;
+ Changes.push_back(Change(Tok, /*CreateReplacement=*/false,
+ Tok.WhitespaceRange, /*Spaces=*/0,
+ Tok.OriginalColumn, Tok.NewlinesBefore, "", "",
+ InPPDirective && !Tok.IsFirst,
+ /*IsInsideToken=*/false));
+}
+
+void WhitespaceManager::replaceWhitespaceInToken(
+ const FormatToken &Tok, unsigned Offset, unsigned ReplaceChars,
+ StringRef PreviousPostfix, StringRef CurrentPrefix, bool InPPDirective,
+ unsigned Newlines, int Spaces) {
+ if (Tok.Finalized)
+ return;
+ SourceLocation Start = Tok.getStartOfNonWhitespace().getLocWithOffset(Offset);
+ Changes.push_back(
+ Change(Tok, /*CreateReplacement=*/true,
+ SourceRange(Start, Start.getLocWithOffset(ReplaceChars)), Spaces,
+ std::max(0, Spaces), Newlines, PreviousPostfix, CurrentPrefix,
+ InPPDirective && !Tok.IsFirst, /*IsInsideToken=*/true));
+}
+
+const tooling::Replacements &WhitespaceManager::generateReplacements() {
+ if (Changes.empty())
+ return Replaces;
+
+ std::sort(Changes.begin(), Changes.end(), Change::IsBeforeInFile(SourceMgr));
+ calculateLineBreakInformation();
+ alignConsecutiveDeclarations();
+ alignConsecutiveAssignments();
+ alignTrailingComments();
+ alignEscapedNewlines();
+ generateChanges();
+
+ return Replaces;
+}
+
+void WhitespaceManager::calculateLineBreakInformation() {
+ Changes[0].PreviousEndOfTokenColumn = 0;
+ Change *LastOutsideTokenChange = &Changes[0];
+ for (unsigned i = 1, e = Changes.size(); i != e; ++i) {
+ SourceLocation OriginalWhitespaceStart =
+ Changes[i].OriginalWhitespaceRange.getBegin();
+ SourceLocation PreviousOriginalWhitespaceEnd =
+ Changes[i - 1].OriginalWhitespaceRange.getEnd();
+ unsigned OriginalWhitespaceStartOffset =
+ SourceMgr.getFileOffset(OriginalWhitespaceStart);
+ unsigned PreviousOriginalWhitespaceEndOffset =
+ SourceMgr.getFileOffset(PreviousOriginalWhitespaceEnd);
+ assert(PreviousOriginalWhitespaceEndOffset <=
+ OriginalWhitespaceStartOffset);
+ const char *const PreviousOriginalWhitespaceEndData =
+ SourceMgr.getCharacterData(PreviousOriginalWhitespaceEnd);
+ StringRef Text(PreviousOriginalWhitespaceEndData,
+ SourceMgr.getCharacterData(OriginalWhitespaceStart) -
+ PreviousOriginalWhitespaceEndData);
+ // Usually consecutive changes would occur in consecutive tokens. This is
+ // not the case however when analyzing some preprocessor runs of the
+ // annotated lines. For example, in this code:
+ //
+ // #if A // line 1
+ // int i = 1;
+ // #else B // line 2
+ // int i = 2;
+ // #endif // line 3
+ //
+ // one of the runs will produce the sequence of lines marked with line 1, 2
+ // and 3. So the two consecutive whitespace changes just before '// line 2'
+ // and before '#endif // line 3' span multiple lines and tokens:
+ //
+ // #else B{change X}[// line 2
+ // int i = 2;
+ // ]{change Y}#endif // line 3
+ //
+ // For this reason, if the text between consecutive changes spans multiple
+ // newlines, the token length must be adjusted to the end of the original
+ // line of the token.
+ auto NewlinePos = Text.find_first_of('\n');
+ if (NewlinePos == StringRef::npos) {
+ Changes[i - 1].TokenLength = OriginalWhitespaceStartOffset -
+ PreviousOriginalWhitespaceEndOffset +
+ Changes[i].PreviousLinePostfix.size() +
+ Changes[i - 1].CurrentLinePrefix.size();
+ } else {
+ Changes[i - 1].TokenLength =
+ NewlinePos + Changes[i - 1].CurrentLinePrefix.size();
+ }
+
+ // If there are multiple changes in this token, sum up all the changes until
+ // the end of the line.
+ if (Changes[i - 1].IsInsideToken && Changes[i - 1].NewlinesBefore == 0)
+ LastOutsideTokenChange->TokenLength +=
+ Changes[i - 1].TokenLength + Changes[i - 1].Spaces;
+ else
+ LastOutsideTokenChange = &Changes[i - 1];
+
+ Changes[i].PreviousEndOfTokenColumn =
+ Changes[i - 1].StartOfTokenColumn + Changes[i - 1].TokenLength;
+
+ Changes[i - 1].IsTrailingComment =
+ (Changes[i].NewlinesBefore > 0 || Changes[i].Tok->is(tok::eof) ||
+ (Changes[i].IsInsideToken && Changes[i].Tok->is(tok::comment))) &&
+ Changes[i - 1].Tok->is(tok::comment) &&
+ // FIXME: This is a dirty hack. The problem is that
+ // BreakableLineCommentSection does comment reflow changes and here is
+ // the aligning of trailing comments. Consider the case where we reflow
+ // the second line up in this example:
+ //
+ // // line 1
+ // // line 2
+ //
+ // That amounts to 2 changes by BreakableLineCommentSection:
+ // - the first, delimited by (), for the whitespace between the tokens,
+ // - and second, delimited by [], for the whitespace at the beginning
+ // of the second token:
+ //
+ // // line 1(
+ // )[// ]line 2
+ //
+ // So in the end we have two changes like this:
+ //
+ // // line1()[ ]line 2
+ //
+ // Note that the OriginalWhitespaceStart of the second change is the
+ // same as the PreviousOriginalWhitespaceEnd of the first change.
+ // In this case, the below check ensures that the second change doesn't
+ // get treated as a trailing comment change here, since this might
+ // trigger additional whitespace to be wrongly inserted before "line 2"
+ // by the comment aligner here.
+ //
+ // For a proper solution we need a mechanism to say to WhitespaceManager
+ // that a particular change breaks the current sequence of trailing
+ // comments.
+ OriginalWhitespaceStart != PreviousOriginalWhitespaceEnd;
+ }
+ // FIXME: The last token is currently not always an eof token; in those
+ // cases, setting TokenLength of the last token to 0 is wrong.
+ Changes.back().TokenLength = 0;
+ Changes.back().IsTrailingComment = Changes.back().Tok->is(tok::comment);
+
+ const WhitespaceManager::Change *LastBlockComment = nullptr;
+ for (auto &Change : Changes) {
+ // Reset the IsTrailingComment flag for changes inside of trailing comments
+ // so they don't get realigned later. Comment line breaks however still need
+ // to be aligned.
+ if (Change.IsInsideToken && Change.NewlinesBefore == 0)
+ Change.IsTrailingComment = false;
+ Change.StartOfBlockComment = nullptr;
+ Change.IndentationOffset = 0;
+ if (Change.Tok->is(tok::comment)) {
+ if (Change.Tok->is(TT_LineComment) || !Change.IsInsideToken)
+ LastBlockComment = &Change;
+ else {
+ if ((Change.StartOfBlockComment = LastBlockComment))
+ Change.IndentationOffset =
+ Change.StartOfTokenColumn -
+ Change.StartOfBlockComment->StartOfTokenColumn;
+ }
+ } else {
+ LastBlockComment = nullptr;
+ }
+ }
+}
+
+// Align a single sequence of tokens, see AlignTokens below.
+template <typename F>
+static void
+AlignTokenSequence(unsigned Start, unsigned End, unsigned Column, F &&Matches,
+ SmallVector<WhitespaceManager::Change, 16> &Changes) {
+ bool FoundMatchOnLine = false;
+ int Shift = 0;
+
+ // ScopeStack keeps track of the current scope depth. It contains indices of
+ // the first token on each scope.
+ // We only run the "Matches" function on tokens from the outer-most scope.
+ // However, we do need to pay special attention to one class of tokens
+ // that are not in the outer-most scope, and that is function parameters
+ // which are split across multiple lines, as illustrated by this example:
+ // double a(int x);
+ // int b(int y,
+ // double z);
+ // In the above example, we need to take special care to ensure that
+ // 'double z' is indented along with it's owning function 'b'.
+ SmallVector<unsigned, 16> ScopeStack;
+
+ for (unsigned i = Start; i != End; ++i) {
+ if (ScopeStack.size() != 0 &&
+ Changes[i].nestingAndIndentLevel() <
+ Changes[ScopeStack.back()].nestingAndIndentLevel())
+ ScopeStack.pop_back();
+
+ if (i != Start && Changes[i].nestingAndIndentLevel() >
+ Changes[i - 1].nestingAndIndentLevel())
+ ScopeStack.push_back(i);
+
+ bool InsideNestedScope = ScopeStack.size() != 0;
+
+ if (Changes[i].NewlinesBefore > 0 && !InsideNestedScope) {
+ Shift = 0;
+ FoundMatchOnLine = false;
+ }
+
+ // If this is the first matching token to be aligned, remember by how many
+ // spaces it has to be shifted, so the rest of the changes on the line are
+ // shifted by the same amount
+ if (!FoundMatchOnLine && !InsideNestedScope && Matches(Changes[i])) {
+ FoundMatchOnLine = true;
+ Shift = Column - Changes[i].StartOfTokenColumn;
+ Changes[i].Spaces += Shift;
+ }
+
+ // This is for function parameters that are split across multiple lines,
+ // as mentioned in the ScopeStack comment.
+ if (InsideNestedScope && Changes[i].NewlinesBefore > 0) {
+ unsigned ScopeStart = ScopeStack.back();
+ if (Changes[ScopeStart - 1].Tok->is(TT_FunctionDeclarationName) ||
+ (ScopeStart > Start + 1 &&
+ Changes[ScopeStart - 2].Tok->is(TT_FunctionDeclarationName)))
+ Changes[i].Spaces += Shift;
+ }
+
+ assert(Shift >= 0);
+ Changes[i].StartOfTokenColumn += Shift;
+ if (i + 1 != Changes.size())
+ Changes[i + 1].PreviousEndOfTokenColumn += Shift;
+ }
+}
+
+// Walk through a subset of the changes, starting at StartAt, and find
+// sequences of matching tokens to align. To do so, keep track of the lines and
+// whether or not a matching token was found on a line. If a matching token is
+// found, extend the current sequence. If the current line cannot be part of a
+// sequence, e.g. because there is an empty line before it or it contains only
+// non-matching tokens, finalize the previous sequence.
+// The value returned is the token on which we stopped, either because we
+// exhausted all items inside Changes, or because we hit a scope level higher
+// than our initial scope.
+// This function is recursive. Each invocation processes only the scope level
+// equal to the initial level, which is the level of Changes[StartAt].
+// If we encounter a scope level greater than the initial level, then we call
+// ourselves recursively, thereby avoiding the pollution of the current state
+// with the alignment requirements of the nested sub-level. This recursive
+// behavior is necessary for aligning function prototypes that have one or more
+// arguments.
+// If this function encounters a scope level less than the initial level,
+// it returns the current position.
+// There is a non-obvious subtlety in the recursive behavior: Even though we
+// defer processing of nested levels to recursive invocations of this
+// function, when it comes time to align a sequence of tokens, we run the
+// alignment on the entire sequence, including the nested levels.
+// When doing so, most of the nested tokens are skipped, because their
+// alignment was already handled by the recursive invocations of this function.
+// However, the special exception is that we do NOT skip function parameters
+// that are split across multiple lines. See the test case in FormatTest.cpp
+// that mentions "split function parameter alignment" for an example of this.
+template <typename F>
+static unsigned AlignTokens(const FormatStyle &Style, F &&Matches,
+ SmallVector<WhitespaceManager::Change, 16> &Changes,
+ unsigned StartAt) {
+ unsigned MinColumn = 0;
+ unsigned MaxColumn = UINT_MAX;
+
+ // Line number of the start and the end of the current token sequence.
+ unsigned StartOfSequence = 0;
+ unsigned EndOfSequence = 0;
+
+ // Measure the scope level (i.e. depth of (), [], {}) of the first token, and
+ // abort when we hit any token in a higher scope than the starting one.
+ auto NestingAndIndentLevel = StartAt < Changes.size()
+ ? Changes[StartAt].nestingAndIndentLevel()
+ : std::pair<unsigned, unsigned>(0, 0);
+
+ // Keep track of the number of commas before the matching tokens, we will only
+ // align a sequence of matching tokens if they are preceded by the same number
+ // of commas.
+ unsigned CommasBeforeLastMatch = 0;
+ unsigned CommasBeforeMatch = 0;
+
+ // Whether a matching token has been found on the current line.
+ bool FoundMatchOnLine = false;
+
+ // Aligns a sequence of matching tokens, on the MinColumn column.
+ //
+ // Sequences start from the first matching token to align, and end at the
+ // first token of the first line that doesn't need to be aligned.
+ //
+ // We need to adjust the StartOfTokenColumn of each Change that is on a line
+ // containing any matching token to be aligned and located after such token.
+ auto AlignCurrentSequence = [&] {
+ if (StartOfSequence > 0 && StartOfSequence < EndOfSequence)
+ AlignTokenSequence(StartOfSequence, EndOfSequence, MinColumn, Matches,
+ Changes);
+ MinColumn = 0;
+ MaxColumn = UINT_MAX;
+ StartOfSequence = 0;
+ EndOfSequence = 0;
+ };
+
+ unsigned i = StartAt;
+ for (unsigned e = Changes.size(); i != e; ++i) {
+ if (Changes[i].nestingAndIndentLevel() < NestingAndIndentLevel)
+ break;
+
+ if (Changes[i].NewlinesBefore != 0) {
+ CommasBeforeMatch = 0;
+ EndOfSequence = i;
+ // If there is a blank line, or if the last line didn't contain any
+ // matching token, the sequence ends here.
+ if (Changes[i].NewlinesBefore > 1 || !FoundMatchOnLine)
+ AlignCurrentSequence();
+
+ FoundMatchOnLine = false;
+ }
+
+ if (Changes[i].Tok->is(tok::comma)) {
+ ++CommasBeforeMatch;
+ } else if (Changes[i].nestingAndIndentLevel() > NestingAndIndentLevel) {
+ // Call AlignTokens recursively, skipping over this scope block.
+ unsigned StoppedAt = AlignTokens(Style, Matches, Changes, i);
+ i = StoppedAt - 1;
+ continue;
+ }
+
+ if (!Matches(Changes[i]))
+ continue;
+
+ // If there is more than one matching token per line, or if the number of
+ // preceding commas, do not match anymore, end the sequence.
+ if (FoundMatchOnLine || CommasBeforeMatch != CommasBeforeLastMatch)
+ AlignCurrentSequence();
+
+ CommasBeforeLastMatch = CommasBeforeMatch;
+ FoundMatchOnLine = true;
+
+ if (StartOfSequence == 0)
+ StartOfSequence = i;
+
+ unsigned ChangeMinColumn = Changes[i].StartOfTokenColumn;
+ int LineLengthAfter = -Changes[i].Spaces;
+ for (unsigned j = i; j != e && Changes[j].NewlinesBefore == 0; ++j)
+ LineLengthAfter += Changes[j].Spaces + Changes[j].TokenLength;
+ unsigned ChangeMaxColumn = Style.ColumnLimit - LineLengthAfter;
+
+ // If we are restricted by the maximum column width, end the sequence.
+ if (ChangeMinColumn > MaxColumn || ChangeMaxColumn < MinColumn ||
+ CommasBeforeLastMatch != CommasBeforeMatch) {
+ AlignCurrentSequence();
+ StartOfSequence = i;
+ }
+
+ MinColumn = std::max(MinColumn, ChangeMinColumn);
+ MaxColumn = std::min(MaxColumn, ChangeMaxColumn);
+ }
+
+ EndOfSequence = i;
+ AlignCurrentSequence();
+ return i;
+}
+
+void WhitespaceManager::alignConsecutiveAssignments() {
+ if (!Style.AlignConsecutiveAssignments)
+ return;
+
+ AlignTokens(Style,
+ [&](const Change &C) {
+ // Do not align on equal signs that are first on a line.
+ if (C.NewlinesBefore > 0)
+ return false;
+
+ // Do not align on equal signs that are last on a line.
+ if (&C != &Changes.back() && (&C + 1)->NewlinesBefore > 0)
+ return false;
+
+ return C.Tok->is(tok::equal);
+ },
+ Changes, /*StartAt=*/0);
+}
+
+void WhitespaceManager::alignConsecutiveDeclarations() {
+ if (!Style.AlignConsecutiveDeclarations)
+ return;
+
+ // FIXME: Currently we don't handle properly the PointerAlignment: Right
+ // The * and & are not aligned and are left dangling. Something has to be done
+ // about it, but it raises the question of alignment of code like:
+ // const char* const* v1;
+ // float const* v2;
+ // SomeVeryLongType const& v3;
+ AlignTokens(Style,
+ [](Change const &C) {
+ // tok::kw_operator is necessary for aligning operator overload
+ // definitions.
+ return C.Tok->is(TT_StartOfName) ||
+ C.Tok->is(TT_FunctionDeclarationName) ||
+ C.Tok->is(tok::kw_operator);
+ },
+ Changes, /*StartAt=*/0);
+}
+
+void WhitespaceManager::alignTrailingComments() {
+ unsigned MinColumn = 0;
+ unsigned MaxColumn = UINT_MAX;
+ unsigned StartOfSequence = 0;
+ bool BreakBeforeNext = false;
+ unsigned Newlines = 0;
+ for (unsigned i = 0, e = Changes.size(); i != e; ++i) {
+ if (Changes[i].StartOfBlockComment)
+ continue;
+ Newlines += Changes[i].NewlinesBefore;
+ if (!Changes[i].IsTrailingComment)
+ continue;
+
+ unsigned ChangeMinColumn = Changes[i].StartOfTokenColumn;
+ unsigned ChangeMaxColumn = Style.ColumnLimit >= Changes[i].TokenLength
+ ? Style.ColumnLimit - Changes[i].TokenLength
+ : ChangeMinColumn;
+
+ // If we don't create a replacement for this change, we have to consider
+ // it to be immovable.
+ if (!Changes[i].CreateReplacement)
+ ChangeMaxColumn = ChangeMinColumn;
+
+ if (i + 1 != e && Changes[i + 1].ContinuesPPDirective)
+ ChangeMaxColumn -= 2;
+ // If this comment follows an } in column 0, it probably documents the
+ // closing of a namespace and we don't want to align it.
+ bool FollowsRBraceInColumn0 = i > 0 && Changes[i].NewlinesBefore == 0 &&
+ Changes[i - 1].Tok->is(tok::r_brace) &&
+ Changes[i - 1].StartOfTokenColumn == 0;
+ bool WasAlignedWithStartOfNextLine = false;
+ if (Changes[i].NewlinesBefore == 1) { // A comment on its own line.
+ unsigned CommentColumn = SourceMgr.getSpellingColumnNumber(
+ Changes[i].OriginalWhitespaceRange.getEnd());
+ for (unsigned j = i + 1; j != e; ++j) {
+ if (Changes[j].Tok->is(tok::comment))
+ continue;
+
+ unsigned NextColumn = SourceMgr.getSpellingColumnNumber(
+ Changes[j].OriginalWhitespaceRange.getEnd());
+ // The start of the next token was previously aligned with the
+ // start of this comment.
+ WasAlignedWithStartOfNextLine =
+ CommentColumn == NextColumn ||
+ CommentColumn == NextColumn + Style.IndentWidth;
+ break;
+ }
+ }
+ if (!Style.AlignTrailingComments || FollowsRBraceInColumn0) {
+ alignTrailingComments(StartOfSequence, i, MinColumn);
+ MinColumn = ChangeMinColumn;
+ MaxColumn = ChangeMinColumn;
+ StartOfSequence = i;
+ } else if (BreakBeforeNext || Newlines > 1 ||
+ (ChangeMinColumn > MaxColumn || ChangeMaxColumn < MinColumn) ||
+ // Break the comment sequence if the previous line did not end
+ // in a trailing comment.
+ (Changes[i].NewlinesBefore == 1 && i > 0 &&
+ !Changes[i - 1].IsTrailingComment) ||
+ WasAlignedWithStartOfNextLine) {
+ alignTrailingComments(StartOfSequence, i, MinColumn);
+ MinColumn = ChangeMinColumn;
+ MaxColumn = ChangeMaxColumn;
+ StartOfSequence = i;
+ } else {
+ MinColumn = std::max(MinColumn, ChangeMinColumn);
+ MaxColumn = std::min(MaxColumn, ChangeMaxColumn);
+ }
+ BreakBeforeNext =
+ (i == 0) || (Changes[i].NewlinesBefore > 1) ||
+ // Never start a sequence with a comment at the beginning of
+ // the line.
+ (Changes[i].NewlinesBefore == 1 && StartOfSequence == i);
+ Newlines = 0;
+ }
+ alignTrailingComments(StartOfSequence, Changes.size(), MinColumn);
+}
+
+void WhitespaceManager::alignTrailingComments(unsigned Start, unsigned End,
+ unsigned Column) {
+ for (unsigned i = Start; i != End; ++i) {
+ int Shift = 0;
+ if (Changes[i].IsTrailingComment) {
+ Shift = Column - Changes[i].StartOfTokenColumn;
+ }
+ if (Changes[i].StartOfBlockComment) {
+ Shift = Changes[i].IndentationOffset +
+ Changes[i].StartOfBlockComment->StartOfTokenColumn -
+ Changes[i].StartOfTokenColumn;
+ }
+ assert(Shift >= 0);
+ Changes[i].Spaces += Shift;
+ if (i + 1 != Changes.size())
+ Changes[i + 1].PreviousEndOfTokenColumn += Shift;
+ Changes[i].StartOfTokenColumn += Shift;
+ }
+}
+
+void WhitespaceManager::alignEscapedNewlines() {
+ if (Style.AlignEscapedNewlines == FormatStyle::ENAS_DontAlign)
+ return;
+
+ bool AlignLeft = Style.AlignEscapedNewlines == FormatStyle::ENAS_Left;
+ unsigned MaxEndOfLine = AlignLeft ? 0 : Style.ColumnLimit;
+ unsigned StartOfMacro = 0;
+ for (unsigned i = 1, e = Changes.size(); i < e; ++i) {
+ Change &C = Changes[i];
+ if (C.NewlinesBefore > 0) {
+ if (C.ContinuesPPDirective) {
+ MaxEndOfLine = std::max(C.PreviousEndOfTokenColumn + 2, MaxEndOfLine);
+ } else {
+ alignEscapedNewlines(StartOfMacro + 1, i, MaxEndOfLine);
+ MaxEndOfLine = AlignLeft ? 0 : Style.ColumnLimit;
+ StartOfMacro = i;
+ }
+ }
+ }
+ alignEscapedNewlines(StartOfMacro + 1, Changes.size(), MaxEndOfLine);
+}
+
+void WhitespaceManager::alignEscapedNewlines(unsigned Start, unsigned End,
+ unsigned Column) {
+ for (unsigned i = Start; i < End; ++i) {
+ Change &C = Changes[i];
+ if (C.NewlinesBefore > 0) {
+ assert(C.ContinuesPPDirective);
+ if (C.PreviousEndOfTokenColumn + 1 > Column)
+ C.EscapedNewlineColumn = 0;
+ else
+ C.EscapedNewlineColumn = Column;
+ }
+ }
+}
+
+void WhitespaceManager::generateChanges() {
+ for (unsigned i = 0, e = Changes.size(); i != e; ++i) {
+ const Change &C = Changes[i];
+ if (i > 0) {
+ assert(Changes[i - 1].OriginalWhitespaceRange.getBegin() !=
+ C.OriginalWhitespaceRange.getBegin() &&
+ "Generating two replacements for the same location");
+ }
+ if (C.CreateReplacement) {
+ std::string ReplacementText = C.PreviousLinePostfix;
+ if (C.ContinuesPPDirective)
+ appendNewlineText(ReplacementText, C.NewlinesBefore,
+ C.PreviousEndOfTokenColumn, C.EscapedNewlineColumn);
+ else
+ appendNewlineText(ReplacementText, C.NewlinesBefore);
+ appendIndentText(ReplacementText, C.Tok->IndentLevel,
+ std::max(0, C.Spaces),
+ C.StartOfTokenColumn - std::max(0, C.Spaces));
+ ReplacementText.append(C.CurrentLinePrefix);
+ storeReplacement(C.OriginalWhitespaceRange, ReplacementText);
+ }
+ }
+}
+
+void WhitespaceManager::storeReplacement(SourceRange Range,
+ StringRef Text) {
+ unsigned WhitespaceLength = SourceMgr.getFileOffset(Range.getEnd()) -
+ SourceMgr.getFileOffset(Range.getBegin());
+ // Don't create a replacement, if it does not change anything.
+ if (StringRef(SourceMgr.getCharacterData(Range.getBegin()),
+ WhitespaceLength) == Text)
+ return;
+ auto Err = Replaces.add(tooling::Replacement(
+ SourceMgr, CharSourceRange::getCharRange(Range), Text));
+ // FIXME: better error handling. For now, just print an error message in the
+ // release version.
+ if (Err) {
+ llvm::errs() << llvm::toString(std::move(Err)) << "\n";
+ assert(false);
+ }
+}
+
+void WhitespaceManager::appendNewlineText(std::string &Text,
+ unsigned Newlines) {
+ for (unsigned i = 0; i < Newlines; ++i)
+ Text.append(UseCRLF ? "\r\n" : "\n");
+}
+
+void WhitespaceManager::appendNewlineText(std::string &Text, unsigned Newlines,
+ unsigned PreviousEndOfTokenColumn,
+ unsigned EscapedNewlineColumn) {
+ if (Newlines > 0) {
+ unsigned Offset =
+ std::min<int>(EscapedNewlineColumn - 2, PreviousEndOfTokenColumn);
+ for (unsigned i = 0; i < Newlines; ++i) {
+ Text.append(EscapedNewlineColumn - Offset - 1, ' ');
+ Text.append(UseCRLF ? "\\\r\n" : "\\\n");
+ Offset = 0;
+ }
+ }
+}
+
+void WhitespaceManager::appendIndentText(std::string &Text,
+ unsigned IndentLevel, unsigned Spaces,
+ unsigned WhitespaceStartColumn) {
+ switch (Style.UseTab) {
+ case FormatStyle::UT_Never:
+ Text.append(Spaces, ' ');
+ break;
+ case FormatStyle::UT_Always: {
+ unsigned FirstTabWidth =
+ Style.TabWidth - WhitespaceStartColumn % Style.TabWidth;
+ // Indent with tabs only when there's at least one full tab.
+ if (FirstTabWidth + Style.TabWidth <= Spaces) {
+ Spaces -= FirstTabWidth;
+ Text.append("\t");
+ }
+ Text.append(Spaces / Style.TabWidth, '\t');
+ Text.append(Spaces % Style.TabWidth, ' ');
+ break;
+ }
+ case FormatStyle::UT_ForIndentation:
+ if (WhitespaceStartColumn == 0) {
+ unsigned Indentation = IndentLevel * Style.IndentWidth;
+ // This happens, e.g. when a line in a block comment is indented less than
+ // the first one.
+ if (Indentation > Spaces)
+ Indentation = Spaces;
+ unsigned Tabs = Indentation / Style.TabWidth;
+ Text.append(Tabs, '\t');
+ Spaces -= Tabs * Style.TabWidth;
+ }
+ Text.append(Spaces, ' ');
+ break;
+ case FormatStyle::UT_ForContinuationAndIndentation:
+ if (WhitespaceStartColumn == 0) {
+ unsigned Tabs = Spaces / Style.TabWidth;
+ Text.append(Tabs, '\t');
+ Spaces -= Tabs * Style.TabWidth;
+ }
+ Text.append(Spaces, ' ');
+ break;
+ }
+}
+
+} // namespace format
+} // namespace clang
diff --git a/contrib/llvm/tools/clang/lib/Format/WhitespaceManager.h b/contrib/llvm/tools/clang/lib/Format/WhitespaceManager.h
new file mode 100644
index 000000000000..50df59d09641
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Format/WhitespaceManager.h
@@ -0,0 +1,214 @@
+//===--- WhitespaceManager.h - Format C++ code ------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief WhitespaceManager class manages whitespace around tokens and their
+/// replacements.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_FORMAT_WHITESPACEMANAGER_H
+#define LLVM_CLANG_LIB_FORMAT_WHITESPACEMANAGER_H
+
+#include "TokenAnnotator.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Format/Format.h"
+#include <string>
+
+namespace clang {
+namespace format {
+
+/// \brief Manages the whitespaces around tokens and their replacements.
+///
+/// This includes special handling for certain constructs, e.g. the alignment of
+/// trailing line comments.
+///
+/// To guarantee correctness of alignment operations, the \c WhitespaceManager
+/// must be informed about every token in the source file; for each token, there
+/// must be exactly one call to either \c replaceWhitespace or
+/// \c addUntouchableToken.
+///
+/// There may be multiple calls to \c breakToken for a given token.
+class WhitespaceManager {
+public:
+ WhitespaceManager(const SourceManager &SourceMgr, const FormatStyle &Style,
+ bool UseCRLF)
+ : SourceMgr(SourceMgr), Style(Style), UseCRLF(UseCRLF) {}
+
+ /// \brief Replaces the whitespace in front of \p Tok. Only call once for
+ /// each \c AnnotatedToken.
+ ///
+ /// \p StartOfTokenColumn is the column at which the token will start after
+ /// this replacement. It is needed for determining how \p Spaces is turned
+ /// into tabs and spaces for some format styles.
+ void replaceWhitespace(FormatToken &Tok, unsigned Newlines, unsigned Spaces,
+ unsigned StartOfTokenColumn,
+ bool InPPDirective = false);
+
+ /// \brief Adds information about an unchangeable token's whitespace.
+ ///
+ /// Needs to be called for every token for which \c replaceWhitespace
+ /// was not called.
+ void addUntouchableToken(const FormatToken &Tok, bool InPPDirective);
+
+ /// \brief Inserts or replaces whitespace in the middle of a token.
+ ///
+ /// Inserts \p PreviousPostfix, \p Newlines, \p Spaces and \p CurrentPrefix
+ /// (in this order) at \p Offset inside \p Tok, replacing \p ReplaceChars
+ /// characters.
+ ///
+ /// Note: \p Spaces can be negative to retain information about initial
+ /// relative column offset between a line of a block comment and the start of
+ /// the comment. This negative offset may be compensated by trailing comment
+ /// alignment here. In all other cases negative \p Spaces will be truncated to
+ /// 0.
+ ///
+ /// When \p InPPDirective is true, escaped newlines are inserted. \p Spaces is
+ /// used to align backslashes correctly.
+ void replaceWhitespaceInToken(const FormatToken &Tok, unsigned Offset,
+ unsigned ReplaceChars,
+ StringRef PreviousPostfix,
+ StringRef CurrentPrefix, bool InPPDirective,
+ unsigned Newlines, int Spaces);
+
+ /// \brief Returns all the \c Replacements created during formatting.
+ const tooling::Replacements &generateReplacements();
+
+ /// \brief Represents a change before a token, a break inside a token,
+ /// or the layout of an unchanged token (or whitespace within).
+ struct Change {
+ /// \brief Functor to sort changes in original source order.
+ class IsBeforeInFile {
+ public:
+ IsBeforeInFile(const SourceManager &SourceMgr) : SourceMgr(SourceMgr) {}
+ bool operator()(const Change &C1, const Change &C2) const;
+
+ private:
+ const SourceManager &SourceMgr;
+ };
+
+ /// \brief Creates a \c Change.
+ ///
+ /// The generated \c Change will replace the characters at
+ /// \p OriginalWhitespaceRange with a concatenation of
+ /// \p PreviousLinePostfix, \p NewlinesBefore line breaks, \p Spaces spaces
+ /// and \p CurrentLinePrefix.
+ ///
+ /// \p StartOfTokenColumn and \p InPPDirective will be used to lay out
+ /// trailing comments and escaped newlines.
+ Change(const FormatToken &Tok, bool CreateReplacement,
+ SourceRange OriginalWhitespaceRange, int Spaces,
+ unsigned StartOfTokenColumn, unsigned NewlinesBefore,
+ StringRef PreviousLinePostfix, StringRef CurrentLinePrefix,
+ bool ContinuesPPDirective, bool IsInsideToken);
+
+ // The kind of the token whose whitespace this change replaces, or in which
+ // this change inserts whitespace.
+ // FIXME: Currently this is not set correctly for breaks inside comments, as
+ // the \c BreakableToken is still doing its own alignment.
+ const FormatToken *Tok;
+
+ bool CreateReplacement;
+ // Changes might be in the middle of a token, so we cannot just keep the
+ // FormatToken around to query its information.
+ SourceRange OriginalWhitespaceRange;
+ unsigned StartOfTokenColumn;
+ unsigned NewlinesBefore;
+ std::string PreviousLinePostfix;
+ std::string CurrentLinePrefix;
+ bool ContinuesPPDirective;
+
+ // The number of spaces in front of the token or broken part of the token.
+ // This will be adapted when aligning tokens.
+ // Can be negative to retain information about the initial relative offset
+ // of the lines in a block comment. This is used when aligning trailing
+ // comments. Uncompensated negative offset is truncated to 0.
+ int Spaces;
+
+ // If this change is inside of a token but not at the start of the token or
+ // directly after a newline.
+ bool IsInsideToken;
+
+ // \c IsTrailingComment, \c TokenLength, \c PreviousEndOfTokenColumn and
+ // \c EscapedNewlineColumn will be calculated in
+ // \c calculateLineBreakInformation.
+ bool IsTrailingComment;
+ unsigned TokenLength;
+ unsigned PreviousEndOfTokenColumn;
+ unsigned EscapedNewlineColumn;
+
+ // These fields are used to retain correct relative line indentation in a
+ // block comment when aligning trailing comments.
+ //
+ // If this Change represents a continuation of a block comment,
+ // \c StartOfBlockComment is pointer to the first Change in the block
+ // comment. \c IndentationOffset is a relative column offset to this
+ // change, so that the correct column can be reconstructed at the end of
+ // the alignment process.
+ const Change *StartOfBlockComment;
+ int IndentationOffset;
+
+ // A combination of nesting level and indent level, which are used in
+ // tandem to compute lexical scope, for the purposes of deciding
+ // when to stop consecutive alignment runs.
+ std::pair<unsigned, unsigned>
+ nestingAndIndentLevel() const {
+ return std::make_pair(Tok->NestingLevel, Tok->IndentLevel);
+ }
+ };
+
+private:
+ /// \brief Calculate \c IsTrailingComment, \c TokenLength for the last tokens
+ /// or token parts in a line and \c PreviousEndOfTokenColumn and
+ /// \c EscapedNewlineColumn for the first tokens or token parts in a line.
+ void calculateLineBreakInformation();
+
+ /// \brief Align consecutive assignments over all \c Changes.
+ void alignConsecutiveAssignments();
+
+ /// \brief Align consecutive declarations over all \c Changes.
+ void alignConsecutiveDeclarations();
+
+ /// \brief Align trailing comments over all \c Changes.
+ void alignTrailingComments();
+
+ /// \brief Align trailing comments from change \p Start to change \p End at
+ /// the specified \p Column.
+ void alignTrailingComments(unsigned Start, unsigned End, unsigned Column);
+
+ /// \brief Align escaped newlines over all \c Changes.
+ void alignEscapedNewlines();
+
+ /// \brief Align escaped newlines from change \p Start to change \p End at
+ /// the specified \p Column.
+ void alignEscapedNewlines(unsigned Start, unsigned End, unsigned Column);
+
+ /// \brief Fill \c Replaces with the replacements for all effective changes.
+ void generateChanges();
+
+ /// \brief Stores \p Text as the replacement for the whitespace in \p Range.
+ void storeReplacement(SourceRange Range, StringRef Text);
+ void appendNewlineText(std::string &Text, unsigned Newlines);
+ void appendNewlineText(std::string &Text, unsigned Newlines,
+ unsigned PreviousEndOfTokenColumn,
+ unsigned EscapedNewlineColumn);
+ void appendIndentText(std::string &Text, unsigned IndentLevel,
+ unsigned Spaces, unsigned WhitespaceStartColumn);
+
+ SmallVector<Change, 16> Changes;
+ const SourceManager &SourceMgr;
+ tooling::Replacements Replaces;
+ const FormatStyle &Style;
+ bool UseCRLF;
+};
+
+} // namespace format
+} // namespace clang
+
+#endif