src - FreeBSD source tree

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2017-12-18 20:11:37 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2017-12-18 20:11:37 +0000
commit	461a67fa15370a9ec88f8f8a240bf7c123bb2029 (patch)
tree	6942083d7d56bba40ec790a453ca58ad3baf6832 /lib/Format
parent	75c3240472ba6ac2669ee72ca67eb72d4e2851fc (diff)

Vendor import of clang trunk r321017:vendor/clang/clang-trunk-r321017

https://llvm.org/svn/llvm-project/cfe/trunk@321017

Notes

Notes: svn path=/vendor/clang/dist/; revision=326941 svn path=/vendor/clang/clang-trunk-r321017/; revision=326942; tag=vendor/clang/clang-trunk-r321017

Diffstat (limited to 'lib/Format')

-rw-r--r--

lib/Format/BreakableToken.cpp

542

-rw-r--r--

lib/Format/BreakableToken.h

317

-rw-r--r--

lib/Format/ContinuationIndenter.cpp

747

-rw-r--r--

lib/Format/ContinuationIndenter.h

-rw-r--r--

lib/Format/Format.cpp

195

-rw-r--r--

lib/Format/FormatInternal.h

-rw-r--r--

lib/Format/FormatToken.cpp

-rw-r--r--

lib/Format/FormatToken.h

167

-rw-r--r--

lib/Format/FormatTokenLexer.cpp

-rw-r--r--

lib/Format/FormatTokenLexer.h

-rw-r--r--

lib/Format/NamespaceEndCommentsFixer.cpp

-rw-r--r--

lib/Format/NamespaceEndCommentsFixer.h

-rw-r--r--

lib/Format/SortJavaScriptImports.cpp

-rw-r--r--

lib/Format/TokenAnalyzer.cpp

-rw-r--r--

lib/Format/TokenAnalyzer.h

-rw-r--r--

lib/Format/TokenAnnotator.cpp

265

-rw-r--r--

lib/Format/TokenAnnotator.h

-rw-r--r--

lib/Format/UnwrappedLineFormatter.cpp

259

-rw-r--r--

lib/Format/UnwrappedLineFormatter.h

-rw-r--r--

lib/Format/UnwrappedLineParser.cpp

320

-rw-r--r--

lib/Format/UnwrappedLineParser.h

-rw-r--r--

lib/Format/UsingDeclarationsSorter.cpp

-rw-r--r--

lib/Format/UsingDeclarationsSorter.h

-rw-r--r--

lib/Format/WhitespaceManager.cpp

-rw-r--r--

lib/Format/WhitespaceManager.h

25 files changed, 2288 insertions, 1088 deletions

diff --git a/lib/Format/BreakableToken.cpp b/lib/Format/BreakableToken.cpp
index 3c9df62f80dc..4735ab3564f0 100644
--- a/lib/Format/BreakableToken.cpp
+++ b/lib/Format/BreakableToken.cpp

@@ -40,9 +40,15 @@ static bool IsBlank(char C) {

}

-static StringRef getLineCommentIndentPrefix(StringRef Comment) {

- static const char *const KnownPrefixes[] = {

- "///<", "//!<", "///", "//", "//!"};

+static StringRef getLineCommentIndentPrefix(StringRef Comment,

+ const FormatStyle &Style) {

+ static const char *const KnownCStylePrefixes[] = {"///<", "//!<", "///", "//",

+ "//!"};

+ static const char *const KnownTextProtoPrefixes[] = {"//", "#"};

+ ArrayRef<const char *> KnownPrefixes(KnownCStylePrefixes);

+ if (Style.Language == FormatStyle::LK_TextProto)

+ KnownPrefixes = KnownTextProtoPrefixes;

StringRef LongestPrefix;

for (StringRef KnownPrefix : KnownPrefixes) {

if (Comment.startswith(KnownPrefix)) {

@@ -61,6 +67,8 @@ static BreakableToken::Split getCommentSplit(StringRef Text,

unsigned ColumnLimit,

unsigned TabWidth,

encoding::Encoding Encoding) {

+ DEBUG(llvm::dbgs() << "Comment split: \"" << Text << ", " << ColumnLimit

+ << "\", Content start: " << ContentStartColumn << "\n");

if (ColumnLimit <= ContentStartColumn + 1)

return BreakableToken::Split(StringRef::npos, 0);

@@ -165,7 +173,7 @@ bool switchesFormatting(const FormatToken &Token) {

}

unsigned

-BreakableToken::getLineLengthAfterCompression(unsigned RemainingTokenColumns,

+BreakableToken::getLengthAfterCompression(unsigned RemainingTokenColumns,

Split Split) const {

// Example: consider the content

// lala lala

@@ -175,58 +183,64 @@ BreakableToken::getLineLengthAfterCompression(unsigned RemainingTokenColumns,

// We compute the number of columns when the split is compressed into a single

// space, like:

// lala lala

+ //

+ // FIXME: Correctly measure the length of whitespace in Split.second so it

+ // works with tabs.

return RemainingTokenColumns + 1 - Split.second;

}

-unsigned BreakableSingleLineToken::getLineCount() const { return 1; }

+unsigned BreakableStringLiteral::getLineCount() const { return 1; }

+unsigned BreakableStringLiteral::getRangeLength(unsigned LineIndex,

+ unsigned Offset,

+ StringRef::size_type Length,

+ unsigned StartColumn) const {

+ llvm_unreachable("Getting the length of a part of the string literal "

+ "indicates that the code tries to reflow it.");

+unsigned

+BreakableStringLiteral::getRemainingLength(unsigned LineIndex, unsigned Offset,

+ unsigned StartColumn) const {

+ return UnbreakableTailLength + Postfix.size() +

+ encoding::columnWidthWithTabs(Line.substr(Offset, StringRef::npos),

+ StartColumn, Style.TabWidth, Encoding);

-unsigned BreakableSingleLineToken::getLineLengthAfterSplit(

- unsigned LineIndex, unsigned TailOffset,

- StringRef::size_type Length) const {

- return StartColumn + Prefix.size() + Postfix.size() +

- encoding::columnWidthWithTabs(Line.substr(TailOffset, Length),

- StartColumn + Prefix.size(),

- Style.TabWidth, Encoding);

+unsigned BreakableStringLiteral::getContentStartColumn(unsigned LineIndex,

+ bool Break) const {

+ return StartColumn + Prefix.size();

}

-BreakableSingleLineToken::BreakableSingleLineToken(

+BreakableStringLiteral::BreakableStringLiteral(

const FormatToken &Tok, unsigned StartColumn, StringRef Prefix,

StringRef Postfix, bool InPPDirective, encoding::Encoding Encoding,

const FormatStyle &Style)

: BreakableToken(Tok, InPPDirective, Encoding, Style),

- StartColumn(StartColumn), Prefix(Prefix), Postfix(Postfix) {

+ StartColumn(StartColumn), Prefix(Prefix), Postfix(Postfix),

+ UnbreakableTailLength(Tok.UnbreakableTailLength) {

assert(Tok.TokenText.startswith(Prefix) && Tok.TokenText.endswith(Postfix));

Line = Tok.TokenText.substr(

Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size());

}

-BreakableStringLiteral::BreakableStringLiteral(

- const FormatToken &Tok, unsigned StartColumn, StringRef Prefix,

- StringRef Postfix, bool InPPDirective, encoding::Encoding Encoding,

- const FormatStyle &Style)

- : BreakableSingleLineToken(Tok, StartColumn, Prefix, Postfix, InPPDirective,

- Encoding, Style) {}

-BreakableToken::Split

-BreakableStringLiteral::getSplit(unsigned LineIndex, unsigned TailOffset,

- unsigned ColumnLimit,

- llvm::Regex &CommentPragmasRegex) const {

- return getStringSplit(Line.substr(TailOffset),

- StartColumn + Prefix.size() + Postfix.size(),

- ColumnLimit, Style.TabWidth, Encoding);

+BreakableToken::Split BreakableStringLiteral::getSplit(

+ unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit,

+ unsigned ContentStartColumn, llvm::Regex &CommentPragmasRegex) const {

+ return getStringSplit(Line.substr(TailOffset), ContentStartColumn,

+ ColumnLimit - Postfix.size(), Style.TabWidth, Encoding);

}

void BreakableStringLiteral::insertBreak(unsigned LineIndex,

unsigned TailOffset, Split Split,

- WhitespaceManager &Whitespaces) {

+ WhitespaceManager &Whitespaces) const {

Whitespaces.replaceWhitespaceInToken(

Tok, Prefix.size() + TailOffset + Split.first, Split.second, Postfix,

Prefix, InPPDirective, 1, StartColumn);

}

BreakableComment::BreakableComment(const FormatToken &Token,

- unsigned StartColumn,

- bool InPPDirective,

+ unsigned StartColumn, bool InPPDirective,

encoding::Encoding Encoding,

const FormatStyle &Style)

: BreakableToken(Token, InPPDirective, Encoding, Style),

@@ -236,19 +250,19 @@ unsigned BreakableComment::getLineCount() const { return Lines.size(); }

BreakableToken::Split

BreakableComment::getSplit(unsigned LineIndex, unsigned TailOffset,

- unsigned ColumnLimit,

+ unsigned ColumnLimit, unsigned ContentStartColumn,

llvm::Regex &CommentPragmasRegex) const {

// Don't break lines matching the comment pragmas regex.

if (CommentPragmasRegex.match(Content[LineIndex]))

return Split(StringRef::npos, 0);

return getCommentSplit(Content[LineIndex].substr(TailOffset),

- getContentStartColumn(LineIndex, TailOffset),

- ColumnLimit, Style.TabWidth, Encoding);

+ ContentStartColumn, ColumnLimit, Style.TabWidth,

+ Encoding);

}

-void BreakableComment::compressWhitespace(unsigned LineIndex,

- unsigned TailOffset, Split Split,

- WhitespaceManager &Whitespaces) {

+void BreakableComment::compressWhitespace(

+ unsigned LineIndex, unsigned TailOffset, Split Split,

+ WhitespaceManager &Whitespaces) const {

StringRef Text = Content[LineIndex].substr(TailOffset);

// Text is relative to the content line, but Whitespaces operates relative to

// the start of the corresponding token, so compute the start of the Split

@@ -262,44 +276,6 @@ void BreakableComment::compressWhitespace(unsigned LineIndex,

/*InPPDirective=*/false, /*Newlines=*/0, /*Spaces=*/1);

}

-BreakableToken::Split

-BreakableComment::getReflowSplit(StringRef Text, StringRef ReflowPrefix,

- unsigned PreviousEndColumn,

- unsigned ColumnLimit) const {

- unsigned ReflowStartColumn = PreviousEndColumn + ReflowPrefix.size();

- StringRef TrimmedText = Text.rtrim(Blanks);

- // This is the width of the resulting line in case the full line of Text gets

- // reflown up starting at ReflowStartColumn.

- unsigned FullWidth = ReflowStartColumn + encoding::columnWidthWithTabs(

- TrimmedText, ReflowStartColumn,

- Style.TabWidth, Encoding);

- // If the full line fits up, we return a reflow split after it,

- // otherwise we compute the largest piece of text that fits after

- // ReflowStartColumn.

- Split ReflowSplit =

- FullWidth <= ColumnLimit

- ? Split(TrimmedText.size(), Text.size() - TrimmedText.size())

- : getCommentSplit(Text, ReflowStartColumn, ColumnLimit,

- Style.TabWidth, Encoding);

- // We need to be extra careful here, because while it's OK to keep a long line

- // if it can't be broken into smaller pieces (like when the first word of a

- // long line is longer than the column limit), it's not OK to reflow that long

- // word up. So we recompute the size of the previous line after reflowing and

- // only return the reflow split if that's under the line limit.

- if (ReflowSplit.first != StringRef::npos &&

- // Check if the width of the newly reflown line is under the limit.

- PreviousEndColumn + ReflowPrefix.size() +

- encoding::columnWidthWithTabs(Text.substr(0, ReflowSplit.first),

- PreviousEndColumn +

- ReflowPrefix.size(),

- Style.TabWidth, Encoding) <=

- ColumnLimit) {

- return ReflowSplit;

- }

- return Split(StringRef::npos, 0);

const FormatToken &BreakableComment::tokenAt(unsigned LineIndex) const {

return Tokens[LineIndex] ? *Tokens[LineIndex] : Tok;

}

@@ -309,7 +285,7 @@ static bool mayReflowContent(StringRef Content) {

// Lines starting with '@' commonly have special meaning.

// Lines starting with '-', '-#', '+' or '*' are bulleted/numbered lists.

static const SmallVector<StringRef, 8> kSpecialMeaningPrefixes = {

- "@", "TODO", "FIXME", "XXX", "-# ", "- ", "+ ", "* " };

+ "@", "TODO", "FIXME", "XXX", "-# ", "- ", "+ ", "* "};

bool hasSpecialMeaningPrefix = false;

for (StringRef Prefix : kSpecialMeaningPrefixes) {

if (Content.startswith(Prefix)) {

@@ -322,8 +298,8 @@ static bool mayReflowContent(StringRef Content) {

// To avoid issues if a line starts with a number which is actually the end

// of a previous line, we only consider numbers with up to 2 digits.

static llvm::Regex kNumberedListRegexp = llvm::Regex("^[1-9][0-9]?\\. ");

- hasSpecialMeaningPrefix = hasSpecialMeaningPrefix ||

- kNumberedListRegexp.match(Content);

+ hasSpecialMeaningPrefix =

+ hasSpecialMeaningPrefix || kNumberedListRegexp.match(Content);

// Simple heuristic for what to reflow: content should contain at least two

// characters and either the first or second character must be

@@ -339,7 +315,9 @@ BreakableBlockComment::BreakableBlockComment(

const FormatToken &Token, unsigned StartColumn,

unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective,

encoding::Encoding Encoding, const FormatStyle &Style)

- : BreakableComment(Token, StartColumn, InPPDirective, Encoding, Style) {

+ : BreakableComment(Token, StartColumn, InPPDirective, Encoding, Style),

+ DelimitersOnNewline(false),

+ UnbreakableTailLength(Token.UnbreakableTailLength) {

assert(Tok.is(TT_BlockComment) &&

"block comment section must start with a block comment");

@@ -384,8 +362,7 @@ BreakableBlockComment::BreakableBlockComment(

// If the last line is empty, the closing "*/" will have a star.

if (i + 1 == e && Content[i].empty())

break;

- if (!Content[i].empty() && i + 1 != e &&

- Decoration.startswith(Content[i]))

+ if (!Content[i].empty() && i + 1 != e && Decoration.startswith(Content[i]))

continue;

while (!Content[i].startswith(Decoration))

Decoration = Decoration.substr(0, Decoration.size() - 1);

@@ -427,11 +404,30 @@ BreakableBlockComment::BreakableBlockComment(

IndentAtLineBreak =

std::min<int>(IndentAtLineBreak, std::max(0, ContentColumn[i]));

}

- IndentAtLineBreak =

- std::max<unsigned>(IndentAtLineBreak, Decoration.size());

+ IndentAtLineBreak = std::max<unsigned>(IndentAtLineBreak, Decoration.size());

+ // Detect a multiline jsdoc comment and set DelimitersOnNewline in that case.

+ if (Style.Language == FormatStyle::LK_JavaScript ||

+ Style.Language == FormatStyle::LK_Java) {

+ if ((Lines[0] == "*" || Lines[0].startswith("* ")) && Lines.size() > 1) {

+ // This is a multiline jsdoc comment.

+ DelimitersOnNewline = true;

+ } else if (Lines[0].startswith("* ") && Lines.size() == 1) {

+ // Detect a long single-line comment, like:

+ // /** long long long */

+ // Below, '2' is the width of '*/'.

+ unsigned EndColumn =

+ ContentColumn[0] +

+ encoding::columnWidthWithTabs(Lines[0], ContentColumn[0],

+ Style.TabWidth, Encoding) +

+ 2;

+ DelimitersOnNewline = EndColumn > Style.ColumnLimit;

+ }

DEBUG({

llvm::dbgs() << "IndentAtLineBreak " << IndentAtLineBreak << "\n";

+ llvm::dbgs() << "DelimitersOnNewline " << DelimitersOnNewline << "\n";

for (size_t i = 0; i < Lines.size(); ++i) {

llvm::dbgs() << i << " |" << Content[i] << "| "

<< "CC=" << ContentColumn[i] << "| "

@@ -477,30 +473,45 @@ void BreakableBlockComment::adjustWhitespace(unsigned LineIndex,

IndentDelta;

}

-unsigned BreakableBlockComment::getLineLengthAfterSplit(

- unsigned LineIndex, unsigned TailOffset,

- StringRef::size_type Length) const {

- unsigned ContentStartColumn = getContentStartColumn(LineIndex, TailOffset);

+unsigned BreakableBlockComment::getRangeLength(unsigned LineIndex,

+ unsigned Offset,

+ StringRef::size_type Length,

+ unsigned StartColumn) const {

unsigned LineLength =

- ContentStartColumn + encoding::columnWidthWithTabs(

- Content[LineIndex].substr(TailOffset, Length),

- ContentStartColumn, Style.TabWidth, Encoding);

+ encoding::columnWidthWithTabs(Content[LineIndex].substr(Offset, Length),

+ StartColumn, Style.TabWidth, Encoding);

+ // FIXME: This should go into getRemainingLength instead, but we currently

+ // break tests when putting it there. Investigate how to fix those tests.

// The last line gets a "*/" postfix.

if (LineIndex + 1 == Lines.size()) {

LineLength += 2;

// We never need a decoration when breaking just the trailing "*/" postfix.

// Note that checking that Length == 0 is not enough, since Length could

// also be StringRef::npos.

- if (Content[LineIndex].substr(TailOffset, Length).empty()) {

+ if (Content[LineIndex].substr(Offset, StringRef::npos).empty()) {

LineLength -= Decoration.size();

}

return LineLength;

}

+unsigned BreakableBlockComment::getRemainingLength(unsigned LineIndex,

+ unsigned Offset,

+ unsigned StartColumn) const {

+ return UnbreakableTailLength +

+ getRangeLength(LineIndex, Offset, StringRef::npos, StartColumn);

+unsigned BreakableBlockComment::getContentStartColumn(unsigned LineIndex,

+ bool Break) const {

+ if (Break)

+ return IndentAtLineBreak;

+ return std::max(0, ContentColumn[LineIndex]);

void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset,

Split Split,

- WhitespaceManager &Whitespaces) {

+ WhitespaceManager &Whitespaces) const {

StringRef Text = Content[LineIndex].substr(TailOffset);

StringRef Prefix = Decoration;

// We need this to account for the case when we have a decoration "* " for all

@@ -526,97 +537,55 @@ void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset,

/*Spaces=*/LocalIndentAtLineBreak - Prefix.size());

}

-BreakableToken::Split BreakableBlockComment::getSplitBefore(

- unsigned LineIndex,

- unsigned PreviousEndColumn,

- unsigned ColumnLimit,

- llvm::Regex &CommentPragmasRegex) const {

+BreakableToken::Split

+BreakableBlockComment::getReflowSplit(unsigned LineIndex,

+ llvm::Regex &CommentPragmasRegex) const {

if (!mayReflow(LineIndex, CommentPragmasRegex))

return Split(StringRef::npos, 0);

- StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks);

- return getReflowSplit(TrimmedContent, ReflowPrefix, PreviousEndColumn,

- ColumnLimit);

-unsigned BreakableBlockComment::getReflownColumn(

- StringRef Content,

- unsigned LineIndex,

- unsigned PreviousEndColumn) const {

- unsigned StartColumn = PreviousEndColumn + ReflowPrefix.size();

- // If this is the last line, it will carry around its '*/' postfix.

- unsigned PostfixLength = (LineIndex + 1 == Lines.size() ? 2 : 0);

- // The line is composed of previous text, reflow prefix, reflown text and

- // postfix.

- unsigned ReflownColumn =

- StartColumn + encoding::columnWidthWithTabs(Content, StartColumn,

- Style.TabWidth, Encoding) +

- PostfixLength;

- return ReflownColumn;

-unsigned BreakableBlockComment::getLineLengthAfterSplitBefore(

- unsigned LineIndex, unsigned TailOffset,

- unsigned PreviousEndColumn,

- unsigned ColumnLimit,

- Split SplitBefore) const {

- if (SplitBefore.first == StringRef::npos ||

- // Block comment line contents contain the trailing whitespace after the

- // decoration, so the need of left trim. Note that this behavior is

- // consistent with the breaking of block comments where the indentation of

- // a broken line is uniform across all the lines of the block comment.

- SplitBefore.first + SplitBefore.second <

- Content[LineIndex].ltrim().size()) {

- // A piece of line, not the whole, gets reflown.

- return getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos);

- } else {

- // The whole line gets reflown, need to check if we need to insert a break

- // for the postfix or not.

- StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks);

- unsigned ReflownColumn =

- getReflownColumn(TrimmedContent, LineIndex, PreviousEndColumn);

- if (ReflownColumn <= ColumnLimit) {

- return ReflownColumn;

- }

- return getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos);

- }

+ size_t Trimmed = Content[LineIndex].find_first_not_of(Blanks);

+ return Split(0, Trimmed != StringRef::npos ? Trimmed : 0);

+bool BreakableBlockComment::introducesBreakBeforeToken() const {

+ // A break is introduced when we want delimiters on newline.

+ return DelimitersOnNewline &&

+ Lines[0].substr(1).find_first_not_of(Blanks) != StringRef::npos;

}

-void BreakableBlockComment::replaceWhitespaceBefore(

- unsigned LineIndex, unsigned PreviousEndColumn, unsigned ColumnLimit,

- Split SplitBefore, WhitespaceManager &Whitespaces) {

- if (LineIndex == 0) return;

+void BreakableBlockComment::reflow(unsigned LineIndex,

+ WhitespaceManager &Whitespaces) const {

StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks);

- if (SplitBefore.first != StringRef::npos) {

- // Here we need to reflow.

- assert(Tokens[LineIndex - 1] == Tokens[LineIndex] &&

- "Reflowing whitespace within a token");

- // This is the offset of the end of the last line relative to the start of

- // the token text in the token.

- unsigned WhitespaceOffsetInToken = Content[LineIndex - 1].data() +

- Content[LineIndex - 1].size() -

- tokenAt(LineIndex).TokenText.data();

- unsigned WhitespaceLength = TrimmedContent.data() -

- tokenAt(LineIndex).TokenText.data() -

- WhitespaceOffsetInToken;

- Whitespaces.replaceWhitespaceInToken(

- tokenAt(LineIndex), WhitespaceOffsetInToken,

- /*ReplaceChars=*/WhitespaceLength, /*PreviousPostfix=*/"",

- /*CurrentPrefix=*/ReflowPrefix, InPPDirective, /*Newlines=*/0,

- /*Spaces=*/0);

- // Check if we need to also insert a break at the whitespace range.

- // For this we first adapt the reflow split relative to the beginning of the

- // content.

- // Note that we don't need a penalty for this break, since it doesn't change

- // the total number of lines.

- Split BreakSplit = SplitBefore;

- BreakSplit.first += TrimmedContent.data() - Content[LineIndex].data();

- unsigned ReflownColumn =

- getReflownColumn(TrimmedContent, LineIndex, PreviousEndColumn);

- if (ReflownColumn > ColumnLimit) {

- insertBreak(LineIndex, 0, BreakSplit, Whitespaces);

+ // Here we need to reflow.

+ assert(Tokens[LineIndex - 1] == Tokens[LineIndex] &&

+ "Reflowing whitespace within a token");

+ // This is the offset of the end of the last line relative to the start of

+ // the token text in the token.

+ unsigned WhitespaceOffsetInToken = Content[LineIndex - 1].data() +

+ Content[LineIndex - 1].size() -

+ tokenAt(LineIndex).TokenText.data();

+ unsigned WhitespaceLength = TrimmedContent.data() -

+ tokenAt(LineIndex).TokenText.data() -

+ WhitespaceOffsetInToken;

+ Whitespaces.replaceWhitespaceInToken(

+ tokenAt(LineIndex), WhitespaceOffsetInToken,

+ /*ReplaceChars=*/WhitespaceLength, /*PreviousPostfix=*/"",

+ /*CurrentPrefix=*/ReflowPrefix, InPPDirective, /*Newlines=*/0,

+ /*Spaces=*/0);

+void BreakableBlockComment::adaptStartOfLine(

+ unsigned LineIndex, WhitespaceManager &Whitespaces) const {

+ if (LineIndex == 0) {

+ if (DelimitersOnNewline) {

+ // Since we're breaking at index 1 below, the break position and the

+ // break length are the same.

+ size_t BreakLength = Lines[0].substr(1).find_first_not_of(Blanks);

+ if (BreakLength != StringRef::npos)

+ insertBreak(LineIndex, 0, Split(1, BreakLength), Whitespaces);

}

return;

}

// Here no reflow with the previous line will happen.

// Fix the decoration of the line at LineIndex.

StringRef Prefix = Decoration;

@@ -651,6 +620,20 @@ void BreakableBlockComment::replaceWhitespaceBefore(

InPPDirective, /*Newlines=*/1, ContentColumn[LineIndex] - Prefix.size());

}

+BreakableToken::Split

+BreakableBlockComment::getSplitAfterLastLine(unsigned TailOffset) const {

+ if (DelimitersOnNewline) {

+ // Replace the trailing whitespace of the last line with a newline.

+ // In case the last line is empty, the ending '*/' is already on its own

+ // line.

+ StringRef Line = Content.back().substr(TailOffset);

+ StringRef TrimmedLine = Line.rtrim(Blanks);

+ if (!TrimmedLine.empty())

+ return Split(TrimmedLine.size(), Line.size() - TrimmedLine.size());

+ }

+ return Split(StringRef::npos, 0);

bool BreakableBlockComment::mayReflow(unsigned LineIndex,

llvm::Regex &CommentPragmasRegex) const {

// Content[LineIndex] may exclude the indent after the '*' decoration. In that

@@ -664,15 +647,6 @@ bool BreakableBlockComment::mayReflow(unsigned LineIndex,

!switchesFormatting(tokenAt(LineIndex));

}

-unsigned

-BreakableBlockComment::getContentStartColumn(unsigned LineIndex,

- unsigned TailOffset) const {

- // If we break, we always break at the predefined indent.

- if (TailOffset != 0)

- return IndentAtLineBreak;

- return std::max(0, ContentColumn[LineIndex]);

BreakableLineCommentSection::BreakableLineCommentSection(

const FormatToken &Token, unsigned StartColumn,

unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective,

@@ -686,7 +660,8 @@ BreakableLineCommentSection::BreakableLineCommentSection(

CurrentTok = CurrentTok->Next) {

LastLineTok = LineTok;

StringRef TokenText(CurrentTok->TokenText);

- assert(TokenText.startswith("//"));

+ assert((TokenText.startswith("//") || TokenText.startswith("#")) &&

+ "unsupported line comment prefix, '//' and '#' are supported");

size_t FirstLineIndex = Lines.size();

TokenText.split(Lines, "\n");

Content.resize(Lines.size());

@@ -696,11 +671,13 @@ BreakableLineCommentSection::BreakableLineCommentSection(

Prefix.resize(Lines.size());

OriginalPrefix.resize(Lines.size());

for (size_t i = FirstLineIndex, e = Lines.size(); i < e; ++i) {

+ Lines[i] = Lines[i].ltrim(Blanks);

// We need to trim the blanks in case this is not the first line in a

// multiline comment. Then the indent is included in Lines[i].

StringRef IndentPrefix =

- getLineCommentIndentPrefix(Lines[i].ltrim(Blanks));

- assert(IndentPrefix.startswith("//"));

+ getLineCommentIndentPrefix(Lines[i].ltrim(Blanks), Style);

+ assert((TokenText.startswith("//") || TokenText.startswith("#")) &&

+ "unsupported line comment prefix, '//' and '#' are supported");

OriginalPrefix[i] = Prefix[i] = IndentPrefix;

if (Lines[i].size() > Prefix[i].size() &&

isAlphanumeric(Lines[i][Prefix[i].size()])) {

@@ -714,22 +691,20 @@ BreakableLineCommentSection::BreakableLineCommentSection(

Prefix[i] = "///< ";

else if (Prefix[i] == "//!<")

Prefix[i] = "//!< ";

+ else if (Prefix[i] == "#" &&

+ Style.Language == FormatStyle::LK_TextProto)

+ Prefix[i] = "# ";

}

Tokens[i] = LineTok;

Content[i] = Lines[i].substr(IndentPrefix.size());

OriginalContentColumn[i] =

- StartColumn +

- encoding::columnWidthWithTabs(OriginalPrefix[i],

- StartColumn,

- Style.TabWidth,

- Encoding);

+ StartColumn + encoding::columnWidthWithTabs(OriginalPrefix[i],

+ StartColumn,

+ Style.TabWidth, Encoding);

ContentColumn[i] =

- StartColumn +

- encoding::columnWidthWithTabs(Prefix[i],

- StartColumn,

- Style.TabWidth,

- Encoding);

+ StartColumn + encoding::columnWidthWithTabs(Prefix[i], StartColumn,

+ Style.TabWidth, Encoding);

// Calculate the end of the non-whitespace text in this line.

size_t EndOfLine = Content[i].find_last_not_of(Blanks);

@@ -760,20 +735,25 @@ BreakableLineCommentSection::BreakableLineCommentSection(

}

-unsigned BreakableLineCommentSection::getLineLengthAfterSplit(

- unsigned LineIndex, unsigned TailOffset,

- StringRef::size_type Length) const {

- unsigned ContentStartColumn =

- (TailOffset == 0 ? ContentColumn[LineIndex]

- : OriginalContentColumn[LineIndex]);

- return ContentStartColumn + encoding::columnWidthWithTabs(

- Content[LineIndex].substr(TailOffset, Length),

- ContentStartColumn, Style.TabWidth, Encoding);

+unsigned

+BreakableLineCommentSection::getRangeLength(unsigned LineIndex, unsigned Offset,

+ StringRef::size_type Length,

+ unsigned StartColumn) const {

+ return encoding::columnWidthWithTabs(

+ Content[LineIndex].substr(Offset, Length), StartColumn, Style.TabWidth,

+ Encoding);

+unsigned BreakableLineCommentSection::getContentStartColumn(unsigned LineIndex,

+ bool Break) const {

+ if (Break)

+ return OriginalContentColumn[LineIndex];

+ return ContentColumn[LineIndex];

}

-void BreakableLineCommentSection::insertBreak(unsigned LineIndex,

- unsigned TailOffset, Split Split,

- WhitespaceManager &Whitespaces) {

+void BreakableLineCommentSection::insertBreak(

+ unsigned LineIndex, unsigned TailOffset, Split Split,

+ WhitespaceManager &Whitespaces) const {

StringRef Text = Content[LineIndex].substr(TailOffset);

// Compute the offset of the split relative to the beginning of the token

// text.

@@ -792,37 +772,42 @@ void BreakableLineCommentSection::insertBreak(unsigned LineIndex,

/*Spaces=*/IndentAtLineBreak - Prefix[LineIndex].size());

}

-BreakableComment::Split BreakableLineCommentSection::getSplitBefore(

- unsigned LineIndex, unsigned PreviousEndColumn, unsigned ColumnLimit,

- llvm::Regex &CommentPragmasRegex) const {

+BreakableComment::Split BreakableLineCommentSection::getReflowSplit(

+ unsigned LineIndex, llvm::Regex &CommentPragmasRegex) const {

if (!mayReflow(LineIndex, CommentPragmasRegex))

return Split(StringRef::npos, 0);

- return getReflowSplit(Content[LineIndex], ReflowPrefix, PreviousEndColumn,

- ColumnLimit);

-unsigned BreakableLineCommentSection::getLineLengthAfterSplitBefore(

- unsigned LineIndex, unsigned TailOffset,

- unsigned PreviousEndColumn,

- unsigned ColumnLimit,

- Split SplitBefore) const {

- if (SplitBefore.first == StringRef::npos ||

- SplitBefore.first + SplitBefore.second < Content[LineIndex].size()) {

- // A piece of line, not the whole line, gets reflown.

- return getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos);

- } else {

- // The whole line gets reflown.

- unsigned StartColumn = PreviousEndColumn + ReflowPrefix.size();

- return StartColumn + encoding::columnWidthWithTabs(Content[LineIndex],

- StartColumn,

- Style.TabWidth,

- Encoding);

- }

-void BreakableLineCommentSection::replaceWhitespaceBefore(

- unsigned LineIndex, unsigned PreviousEndColumn, unsigned ColumnLimit,

- Split SplitBefore, WhitespaceManager &Whitespaces) {

+ size_t Trimmed = Content[LineIndex].find_first_not_of(Blanks);

+ // In a line comment section each line is a separate token; thus, after a

+ // split we replace all whitespace before the current line comment token

+ // (which does not need to be included in the split), plus the start of the

+ // line up to where the content starts.

+ return Split(0, Trimmed != StringRef::npos ? Trimmed : 0);

+void BreakableLineCommentSection::reflow(unsigned LineIndex,

+ WhitespaceManager &Whitespaces) const {

+ // Reflow happens between tokens. Replace the whitespace between the

+ // tokens by the empty string.

+ Whitespaces.replaceWhitespace(

+ *Tokens[LineIndex], /*Newlines=*/0, /*Spaces=*/0,

+ /*StartOfTokenColumn=*/StartColumn, /*InPPDirective=*/false);

+ // Replace the indent and prefix of the token with the reflow prefix.

+ unsigned WhitespaceLength =

+ Content[LineIndex].data() - tokenAt(LineIndex).TokenText.data();

+ Whitespaces.replaceWhitespaceInToken(*Tokens[LineIndex],

+ /*Offset=*/0,

+ /*ReplaceChars=*/WhitespaceLength,

+ /*PreviousPostfix=*/"",

+ /*CurrentPrefix=*/ReflowPrefix,

+ /*InPPDirective=*/false,

+ /*Newlines=*/0,

+ /*Spaces=*/0);

+void BreakableLineCommentSection::adaptStartOfLine(

+ unsigned LineIndex, WhitespaceManager &Whitespaces) const {

// If this is the first line of a token, we need to inform Whitespace Manager

// about it: either adapt the whitespace range preceding it, or mark it as an

// untouchable token.

@@ -830,44 +815,25 @@ void BreakableLineCommentSection::replaceWhitespaceBefore(

// // line 1 \

// // line 2

if (LineIndex > 0 && Tokens[LineIndex] != Tokens[LineIndex - 1]) {

- if (SplitBefore.first != StringRef::npos) {

- // Reflow happens between tokens. Replace the whitespace between the

- // tokens by the empty string.

- Whitespaces.replaceWhitespace(

- *Tokens[LineIndex], /*Newlines=*/0, /*Spaces=*/0,

- /*StartOfTokenColumn=*/StartColumn, /*InPPDirective=*/false);

- // Replace the indent and prefix of the token with the reflow prefix.

- unsigned WhitespaceLength =

- Content[LineIndex].data() - tokenAt(LineIndex).TokenText.data();

- Whitespaces.replaceWhitespaceInToken(*Tokens[LineIndex],

- /*Offset=*/0,

- /*ReplaceChars=*/WhitespaceLength,

- /*PreviousPostfix=*/"",

- /*CurrentPrefix=*/ReflowPrefix,

- /*InPPDirective=*/false,

- /*Newlines=*/0,

- /*Spaces=*/0);

- } else {

- // This is the first line for the current token, but no reflow with the

- // previous token is necessary. However, we still may need to adjust the

- // start column. Note that ContentColumn[LineIndex] is the expected

- // content column after a possible update to the prefix, hence the prefix

- // length change is included.

- unsigned LineColumn =

- ContentColumn[LineIndex] -

- (Content[LineIndex].data() - Lines[LineIndex].data()) +

- (OriginalPrefix[LineIndex].size() - Prefix[LineIndex].size());

- // We always want to create a replacement instead of adding an untouchable

- // token, even if LineColumn is the same as the original column of the

- // token. This is because WhitespaceManager doesn't align trailing

- // comments if they are untouchable.

- Whitespaces.replaceWhitespace(*Tokens[LineIndex],

- /*Newlines=*/1,

- /*Spaces=*/LineColumn,

- /*StartOfTokenColumn=*/LineColumn,

- /*InPPDirective=*/false);

- }

+ // This is the first line for the current token, but no reflow with the

+ // previous token is necessary. However, we still may need to adjust the

+ // start column. Note that ContentColumn[LineIndex] is the expected

+ // content column after a possible update to the prefix, hence the prefix

+ // length change is included.

+ unsigned LineColumn =

+ ContentColumn[LineIndex] -

+ (Content[LineIndex].data() - Lines[LineIndex].data()) +

+ (OriginalPrefix[LineIndex].size() - Prefix[LineIndex].size());

+ // We always want to create a replacement instead of adding an untouchable

+ // token, even if LineColumn is the same as the original column of the

+ // token. This is because WhitespaceManager doesn't align trailing

+ // comments if they are untouchable.

+ Whitespaces.replaceWhitespace(*Tokens[LineIndex],

+ /*Newlines=*/1,

+ /*Spaces=*/LineColumn,

+ /*StartOfTokenColumn=*/LineColumn,

+ /*InPPDirective=*/false);

}

if (OriginalPrefix[LineIndex] != Prefix[LineIndex]) {

// Adjust the prefix if necessary.

@@ -880,16 +846,9 @@ void BreakableLineCommentSection::replaceWhitespaceBefore(

tokenAt(LineIndex), OriginalPrefix[LineIndex].size(), 0, "", "",

/*InPPDirective=*/false, /*Newlines=*/0, /*Spaces=*/1);

}

- // Add a break after a reflow split has been introduced, if necessary.

- // Note that this break doesn't need to be penalized, since it doesn't change

- // the number of lines.

- if (SplitBefore.first != StringRef::npos &&

- SplitBefore.first + SplitBefore.second < Content[LineIndex].size()) {

- insertBreak(LineIndex, 0, SplitBefore, Whitespaces);

- }

}

-void BreakableLineCommentSection::updateNextToken(LineState& State) const {

+void BreakableLineCommentSection::updateNextToken(LineState &State) const {

if (LastLineTok) {

State.NextToken = LastLineTok->Next;

}

@@ -903,20 +862,17 @@ bool BreakableLineCommentSection::mayReflow(

if (Lines[LineIndex].startswith("//")) {

IndentContent = Lines[LineIndex].substr(2);

}

+ // FIXME: Decide whether we want to reflow non-regular indents:

+ // Currently, we only reflow when the OriginalPrefix[LineIndex] matches the

+ // OriginalPrefix[LineIndex-1]. That means we don't reflow

+ // // text that protrudes

+ // // into text with different indent

+ // We do reflow in that case in block comments.

return LineIndex > 0 && !CommentPragmasRegex.match(IndentContent) &&

mayReflowContent(Content[LineIndex]) && !Tok.Finalized &&

!switchesFormatting(tokenAt(LineIndex)) &&

OriginalPrefix[LineIndex] == OriginalPrefix[LineIndex - 1];

}

-unsigned

-BreakableLineCommentSection::getContentStartColumn(unsigned LineIndex,

- unsigned TailOffset) const {

- if (TailOffset != 0) {

- return OriginalContentColumn[LineIndex];

- }

- return ContentColumn[LineIndex];

} // namespace format

} // namespace clang

diff --git a/lib/Format/BreakableToken.h b/lib/Format/BreakableToken.h
index e642a538e21c..8ef26ef464da 100644
--- a/lib/Format/BreakableToken.h
+++ b/lib/Format/BreakableToken.h

@@ -33,19 +33,32 @@ bool switchesFormatting(const FormatToken &Token);

struct FormatStyle;

-/// \brief Base class for strategies on how to break tokens.

+/// \brief Base class for tokens / ranges of tokens that can allow breaking

+/// within the tokens - for example, to avoid whitespace beyond the column

+/// limit, or to reflow text.

///

-/// This is organised around the concept of a \c Split, which is a whitespace

-/// range that signifies a position of the content of a token where a

-/// reformatting might be done. Operating with splits is divided into 3

-/// operations:

+/// Generally, a breakable token consists of logical lines, addressed by a line

+/// index. For example, in a sequence of line comments, each line comment is its

+/// own logical line; similarly, for a block comment, each line in the block

+/// comment is on its own logical line.

+///

+/// There are two methods to compute the layout of the token:

+/// - getRangeLength measures the number of columns needed for a range of text

+/// within a logical line, and

+/// - getContentStartColumn returns the start column at which we want the

+/// content of a logical line to start (potentially after introducing a line

+/// break).

+///

+/// The mechanism to adapt the layout of the breakable token is organised

+/// around the concept of a \c Split, which is a whitespace range that signifies

+/// a position of the content of a token where a reformatting might be done.

+///

+/// Operating with splits is divided into two operations:

/// - getSplit, for finding a split starting at a position,

-/// - getLineLengthAfterSplit, for calculating the size in columns of the rest

-/// of the content after a split has been used for breaking, and

/// - insertBreak, for executing the split using a whitespace manager.

///

/// There is a pair of operations that are used to compress a long whitespace

-/// range with a single space if that will bring the line lenght under the

+/// range with a single space if that will bring the line length under the

/// column limit:

/// - getLineLengthAfterCompression, for calculating the size in columns of the

/// line after a whitespace range has been compressed, and

@@ -56,16 +69,23 @@ struct FormatStyle;

/// For tokens where the whitespace before each line needs to be also

/// reformatted, for example for tokens supporting reflow, there are analogous

/// operations that might be executed before the main line breaking occurs:

-/// - getSplitBefore, for finding a split such that the content preceding it

+/// - getReflowSplit, for finding a split such that the content preceding it

/// needs to be specially reflown,

-/// - getLineLengthAfterSplitBefore, for calculating the line length in columns

-/// of the remainder of the content after the beginning of the content has

-/// been reformatted, and

-/// - replaceWhitespaceBefore, for executing the reflow using a whitespace

+/// - reflow, for executing the split using a whitespace manager,

+/// - introducesBreakBefore, for checking if reformatting the beginning

+/// of the content introduces a line break before it,

+/// - adaptStartOfLine, for executing the reflow using a whitespace

/// manager.

///

-/// FIXME: The interface seems set in stone, so we might want to just pull the

-/// strategy into the class, instead of controlling it from the outside.

+/// For tokens that require the whitespace after the last line to be

+/// reformatted, for example in multiline jsdoc comments that require the

+/// trailing '*/' to be on a line of itself, there are analogous operations

+/// that might be executed after the last line has been reformatted:

+/// - getSplitAfterLastLine, for finding a split after the last line that needs

+/// to be reflown,

+/// - replaceWhitespaceAfterLastLine, for executing the reflow using a

+/// whitespace manager.

+///

class BreakableToken {

public:

/// \brief Contains starting character index and length of split.

@@ -76,73 +96,122 @@ public:

/// \brief Returns the number of lines in this token in the original code.

virtual unsigned getLineCount() const = 0;

- /// \brief Returns the number of columns required to format the piece of line

- /// at \p LineIndex, from byte offset \p TailOffset with length \p Length.

+ /// \brief Returns the number of columns required to format the text in the

+ /// byte range [\p Offset, \p Offset \c + \p Length).

+ ///

+ /// \p Offset is the byte offset from the start of the content of the line

+ /// at \p LineIndex.

+ ///

+ /// \p StartColumn is the column at which the text starts in the formatted

+ /// file, needed to compute tab stops correctly.

+ virtual unsigned getRangeLength(unsigned LineIndex, unsigned Offset,

+ StringRef::size_type Length,

+ unsigned StartColumn) const = 0;

+ /// \brief Returns the number of columns required to format the text following

+ /// the byte \p Offset in the line \p LineIndex, including potentially

+ /// unbreakable sequences of tokens following after the end of the token.

+ ///

+ /// \p Offset is the byte offset from the start of the content of the line

+ /// at \p LineIndex.

+ ///

+ /// \p StartColumn is the column at which the text starts in the formatted

+ /// file, needed to compute tab stops correctly.

///

- /// Note that previous breaks are not taken into account. \p TailOffset is

- /// always specified from the start of the (original) line.

- /// \p Length can be set to StringRef::npos, which means "to the end of line".

- virtual unsigned

- getLineLengthAfterSplit(unsigned LineIndex, unsigned TailOffset,

- StringRef::size_type Length) const = 0;

+ /// For breakable tokens that never use extra space at the end of a line, this

+ /// is equivalent to getRangeLength with a Length of StringRef::npos.

+ virtual unsigned getRemainingLength(unsigned LineIndex, unsigned Offset,

+ unsigned StartColumn) const {

+ return getRangeLength(LineIndex, Offset, StringRef::npos, StartColumn);

+ }

+ /// \brief Returns the column at which content in line \p LineIndex starts,

+ /// assuming no reflow.

+ ///

+ /// If \p Break is true, returns the column at which the line should start

+ /// after the line break.

+ /// If \p Break is false, returns the column at which the line itself will

+ /// start.

+ virtual unsigned getContentStartColumn(unsigned LineIndex,

+ bool Break) const = 0;

/// \brief Returns a range (offset, length) at which to break the line at

/// \p LineIndex, if previously broken at \p TailOffset. If possible, do not

- /// violate \p ColumnLimit.

+ /// violate \p ColumnLimit, assuming the text starting at \p TailOffset in

+ /// the token is formatted starting at ContentStartColumn in the reformatted

+ /// file.

virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,

- unsigned ColumnLimit,

+ unsigned ColumnLimit, unsigned ContentStartColumn,

llvm::Regex &CommentPragmasRegex) const = 0;

/// \brief Emits the previously retrieved \p Split via \p Whitespaces.

virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,

- WhitespaceManager &Whitespaces) = 0;

+ WhitespaceManager &Whitespaces) const = 0;

- /// \brief Returns the number of columns required to format the piece of line

- /// at \p LineIndex, from byte offset \p TailOffset after the whitespace range

- /// \p Split has been compressed into a single space.

- unsigned getLineLengthAfterCompression(unsigned RemainingTokenColumns,

- Split Split) const;

+ /// \brief Returns the number of columns needed to format

+ /// \p RemainingTokenColumns, assuming that Split is within the range measured

+ /// by \p RemainingTokenColumns, and that the whitespace in Split is reduced

+ /// to a single space.

+ unsigned getLengthAfterCompression(unsigned RemainingTokenColumns,

+ Split Split) const;

/// \brief Replaces the whitespace range described by \p Split with a single

/// space.

virtual void compressWhitespace(unsigned LineIndex, unsigned TailOffset,

Split Split,

- WhitespaceManager &Whitespaces) = 0;

+ WhitespaceManager &Whitespaces) const = 0;

- /// \brief Returns a whitespace range (offset, length) of the content at

- /// \p LineIndex such that the content preceding this range needs to be

- /// reformatted before any breaks are made to this line.

+ /// \brief Returns whether the token supports reflowing text.

+ virtual bool supportsReflow() const { return false; }

+ /// \brief Returns a whitespace range (offset, length) of the content at \p

+ /// LineIndex such that the content of that line is reflown to the end of the

+ /// previous one.

///

- /// \p PreviousEndColumn is the end column of the previous line after

- /// formatting.

+ /// Returning (StringRef::npos, 0) indicates reflowing is not possible.

///

- /// A result having offset == StringRef::npos means that no piece of the line

- /// needs to be reformatted before any breaks are made.

- virtual Split getSplitBefore(unsigned LineIndex, unsigned PreviousEndColumn,

- unsigned ColumnLimit,

+ /// The range will include any whitespace preceding the specified line's

+ /// content.

+ ///

+ /// If the split is not contained within one token, for example when reflowing

+ /// line comments, returns (0, <length>).

+ virtual Split getReflowSplit(unsigned LineIndex,

llvm::Regex &CommentPragmasRegex) const {

return Split(StringRef::npos, 0);

}

- /// \brief Returns the number of columns required to format the piece of line

- /// at \p LineIndex after the content preceding the whitespace range specified

- /// \p SplitBefore has been reformatted, but before any breaks are made to

- /// this line.

- virtual unsigned getLineLengthAfterSplitBefore(unsigned LineIndex,

- unsigned TailOffset,

- unsigned PreviousEndColumn,

- unsigned ColumnLimit,

- Split SplitBefore) const {

- return getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos);

+ /// \brief Reflows the current line into the end of the previous one.

+ virtual void reflow(unsigned LineIndex,

+ WhitespaceManager &Whitespaces) const {}

+ /// \brief Returns whether there will be a line break at the start of the

+ /// token.

+ virtual bool introducesBreakBeforeToken() const {

+ return false;

}

/// \brief Replaces the whitespace between \p LineIndex-1 and \p LineIndex.

- /// Performs a reformatting of the content at \p LineIndex preceding the

- /// whitespace range \p SplitBefore.

- virtual void replaceWhitespaceBefore(unsigned LineIndex,

- unsigned PreviousEndColumn,

- unsigned ColumnLimit, Split SplitBefore,

- WhitespaceManager &Whitespaces) {}

+ virtual void adaptStartOfLine(unsigned LineIndex,

+ WhitespaceManager &Whitespaces) const {}

+ /// \brief Returns a whitespace range (offset, length) of the content at

+ /// the last line that needs to be reformatted after the last line has been

+ /// reformatted.

+ ///

+ /// A result having offset == StringRef::npos means that no reformat is

+ /// necessary.

+ virtual Split getSplitAfterLastLine(unsigned TailOffset) const {

+ return Split(StringRef::npos, 0);

+ }

+ /// \brief Replaces the whitespace from \p SplitAfterLastLine on the last line

+ /// after the last line has been formatted by performing a reformatting.

+ void replaceWhitespaceAfterLastLine(unsigned TailOffset,

+ Split SplitAfterLastLine,

+ WhitespaceManager &Whitespaces) const {

+ insertBreak(getLineCount() - 1, TailOffset, SplitAfterLastLine,

+ Whitespaces);

+ }

/// \brief Updates the next token of \p State to the next token after this

/// one. This can be used when this token manages a set of underlying tokens

@@ -161,32 +230,7 @@ protected:

const FormatStyle &Style;

};

-/// \brief Base class for single line tokens that can be broken.

-///

-/// \c getSplit() needs to be implemented by child classes.

-class BreakableSingleLineToken : public BreakableToken {

-public:

- unsigned getLineCount() const override;

- unsigned getLineLengthAfterSplit(unsigned LineIndex, unsigned TailOffset,

- StringRef::size_type Length) const override;

-protected:

- BreakableSingleLineToken(const FormatToken &Tok, unsigned StartColumn,

- StringRef Prefix, StringRef Postfix,

- bool InPPDirective, encoding::Encoding Encoding,

- const FormatStyle &Style);

- // The column in which the token starts.

- unsigned StartColumn;

- // The prefix a line needs after a break in the token.

- StringRef Prefix;

- // The postfix a line needs before introducing a break.

- StringRef Postfix;

- // The token text excluding the prefix and postfix.

- StringRef Line;

-};

-class BreakableStringLiteral : public BreakableSingleLineToken {

+class BreakableStringLiteral : public BreakableToken {

public:

/// \brief Creates a breakable token for a single line string literal.

///

@@ -198,11 +242,32 @@ public:

const FormatStyle &Style);

Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit,

+ unsigned ReflowColumn,

llvm::Regex &CommentPragmasRegex) const override;

void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,

- WhitespaceManager &Whitespaces) override;

+ WhitespaceManager &Whitespaces) const override;

void compressWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split,

- WhitespaceManager &Whitespaces) override {}

+ WhitespaceManager &Whitespaces) const override {}

+ unsigned getLineCount() const override;

+ unsigned getRangeLength(unsigned LineIndex, unsigned Offset,

+ StringRef::size_type Length,

+ unsigned StartColumn) const override;

+ unsigned getRemainingLength(unsigned LineIndex, unsigned Offset,

+ unsigned StartColumn) const override;

+ unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override;

+protected:

+ // The column in which the token starts.

+ unsigned StartColumn;

+ // The prefix a line needs after a break in the token.

+ StringRef Prefix;

+ // The postfix a line needs before introducing a break.

+ StringRef Postfix;

+ // The token text excluding the prefix and postfix.

+ StringRef Line;

+ // Length of the sequence of tokens after this string literal that cannot

+ // contain line breaks.

+ unsigned UnbreakableTailLength;

};

class BreakableComment : public BreakableToken {

@@ -216,21 +281,15 @@ protected:

const FormatStyle &Style);

public:

+ bool supportsReflow() const override { return true; }

unsigned getLineCount() const override;

Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit,

+ unsigned ReflowColumn,

llvm::Regex &CommentPragmasRegex) const override;

void compressWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split,

- WhitespaceManager &Whitespaces) override;

+ WhitespaceManager &Whitespaces) const override;

protected:

- virtual unsigned getContentStartColumn(unsigned LineIndex,

- unsigned TailOffset) const = 0;

- // Returns a split that divides Text into a left and right parts, such that

- // the left part is suitable for reflowing after PreviousEndColumn.

- Split getReflowSplit(StringRef Text, StringRef ReflowPrefix,

- unsigned PreviousEndColumn, unsigned ColumnLimit) const;

// Returns the token containing the line at LineIndex.

const FormatToken &tokenAt(unsigned LineIndex) const;

@@ -289,21 +348,23 @@ public:

bool InPPDirective, encoding::Encoding Encoding,

const FormatStyle &Style);

- unsigned getLineLengthAfterSplit(unsigned LineIndex, unsigned TailOffset,

- StringRef::size_type Length) const override;

+ unsigned getRangeLength(unsigned LineIndex, unsigned Offset,

+ StringRef::size_type Length,

+ unsigned StartColumn) const override;

+ unsigned getRemainingLength(unsigned LineIndex, unsigned Offset,

+ unsigned StartColumn) const override;

+ unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override;

void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,

- WhitespaceManager &Whitespaces) override;

- Split getSplitBefore(unsigned LineIndex, unsigned PreviousEndColumn,

- unsigned ColumnLimit,

+ WhitespaceManager &Whitespaces) const override;

+ Split getReflowSplit(unsigned LineIndex,

llvm::Regex &CommentPragmasRegex) const override;

- unsigned getLineLengthAfterSplitBefore(unsigned LineIndex,

- unsigned TailOffset,

- unsigned PreviousEndColumn,

- unsigned ColumnLimit,

- Split SplitBefore) const override;

- void replaceWhitespaceBefore(unsigned LineIndex, unsigned PreviousEndColumn,

- unsigned ColumnLimit, Split SplitBefore,

- WhitespaceManager &Whitespaces) override;

+ void reflow(unsigned LineIndex,

+ WhitespaceManager &Whitespaces) const override;

+ bool introducesBreakBeforeToken() const override;

+ void adaptStartOfLine(unsigned LineIndex,

+ WhitespaceManager &Whitespaces) const override;

+ Split getSplitAfterLastLine(unsigned TailOffset) const override;

bool mayReflow(unsigned LineIndex,

llvm::Regex &CommentPragmasRegex) const override;

@@ -318,14 +379,6 @@ private:

// considered part of the text).

void adjustWhitespace(unsigned LineIndex, int IndentDelta);

- // Computes the end column if the full Content from LineIndex gets reflown

- // after PreviousEndColumn.

- unsigned getReflownColumn(StringRef Content, unsigned LineIndex,

- unsigned PreviousEndColumn) const;

- unsigned getContentStartColumn(unsigned LineIndex,

- unsigned TailOffset) const override;

// The column at which the text of a broken line should start.

// Note that an optional decoration would go before that column.

// IndentAtLineBreak is a uniform position for all lines in a block comment,

@@ -348,6 +401,14 @@ private:

// If this block comment has decorations, this is the column of the start of

// the decorations.

unsigned DecorationColumn;

+ // If true, make sure that the opening '/**' and the closing '*/' ends on a

+ // line of itself. Styles like jsdoc require this for multiline comments.

+ bool DelimitersOnNewline;

+ // Length of the sequence of tokens after this string literal that cannot

+ // contain line breaks.

+ unsigned UnbreakableTailLength;

};

class BreakableLineCommentSection : public BreakableComment {

@@ -357,29 +418,23 @@ public:

bool InPPDirective, encoding::Encoding Encoding,

const FormatStyle &Style);

- unsigned getLineLengthAfterSplit(unsigned LineIndex, unsigned TailOffset,

- StringRef::size_type Length) const override;

+ unsigned getRangeLength(unsigned LineIndex, unsigned Offset,

+ StringRef::size_type Length,

+ unsigned StartColumn) const override;

+ unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override;

void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,

- WhitespaceManager &Whitespaces) override;

- Split getSplitBefore(unsigned LineIndex, unsigned PreviousEndColumn,

- unsigned ColumnLimit,

+ WhitespaceManager &Whitespaces) const override;

+ Split getReflowSplit(unsigned LineIndex,

llvm::Regex &CommentPragmasRegex) const override;

- unsigned getLineLengthAfterSplitBefore(unsigned LineIndex,

- unsigned TailOffset,

- unsigned PreviousEndColumn,

- unsigned ColumnLimit,

- Split SplitBefore) const override;

- void replaceWhitespaceBefore(unsigned LineIndex, unsigned PreviousEndColumn,

- unsigned ColumnLimit, Split SplitBefore,

- WhitespaceManager &Whitespaces) override;

+ void reflow(unsigned LineIndex,

+ WhitespaceManager &Whitespaces) const override;

+ void adaptStartOfLine(unsigned LineIndex,

+ WhitespaceManager &Whitespaces) const override;

void updateNextToken(LineState &State) const override;

bool mayReflow(unsigned LineIndex,

llvm::Regex &CommentPragmasRegex) const override;

private:

- unsigned getContentStartColumn(unsigned LineIndex,

- unsigned TailOffset) const override;

// OriginalPrefix[i] contains the original prefix of line i, including

// trailing whitespace before the start of the content. The indentation

// preceding the prefix is not included.

diff --git a/lib/Format/ContinuationIndenter.cpp b/lib/Format/ContinuationIndenter.cpp
index 3bf1cd8f7c13..a3d38b244c5c 100644
--- a/lib/Format/ContinuationIndenter.cpp
+++ b/lib/Format/ContinuationIndenter.cpp

@@ -12,8 +12,9 @@

///

//===----------------------------------------------------------------------===//

-#include "BreakableToken.h"

#include "ContinuationIndenter.h"

+#include "BreakableToken.h"

+#include "FormatInternal.h"

#include "WhitespaceManager.h"

#include "clang/Basic/OperatorPrecedence.h"

#include "clang/Basic/SourceManager.h"

@@ -76,6 +77,53 @@ static bool opensProtoMessageField(const FormatToken &LessTok,

(LessTok.Previous && LessTok.Previous->is(tok::equal))));

}

+// Returns the delimiter of a raw string literal, or None if TokenText is not

+// the text of a raw string literal. The delimiter could be the empty string.

+// For example, the delimiter of R"deli(cont)deli" is deli.

+static llvm::Optional<StringRef> getRawStringDelimiter(StringRef TokenText) {

+ if (TokenText.size() < 5 // The smallest raw string possible is 'R"()"'.

+ || !TokenText.startswith("R\"") || !TokenText.endswith("\""))

+ return None;

+ // A raw string starts with 'R"<delimiter>(' and delimiter is ascii and has

+ // size at most 16 by the standard, so the first '(' must be among the first

+ // 19 bytes.

+ size_t LParenPos = TokenText.substr(0, 19).find_first_of('(');

+ if (LParenPos == StringRef::npos)

+ return None;

+ StringRef Delimiter = TokenText.substr(2, LParenPos - 2);

+ // Check that the string ends in ')Delimiter"'.

+ size_t RParenPos = TokenText.size() - Delimiter.size() - 2;

+ if (TokenText[RParenPos] != ')')

+ return None;

+ if (!TokenText.substr(RParenPos + 1).startswith(Delimiter))

+ return None;

+ return Delimiter;

+RawStringFormatStyleManager::RawStringFormatStyleManager(

+ const FormatStyle &CodeStyle) {

+ for (const auto &RawStringFormat : CodeStyle.RawStringFormats) {

+ FormatStyle Style;

+ if (!getPredefinedStyle(RawStringFormat.BasedOnStyle,

+ RawStringFormat.Language, &Style)) {

+ Style = getLLVMStyle();

+ Style.Language = RawStringFormat.Language;

+ }

+ Style.ColumnLimit = CodeStyle.ColumnLimit;

+ DelimiterStyle.insert({RawStringFormat.Delimiter, Style});

+ }

+llvm::Optional<FormatStyle>

+RawStringFormatStyleManager::get(StringRef Delimiter) const {

+ auto It = DelimiterStyle.find(Delimiter);

+ if (It == DelimiterStyle.end())

+ return None;

+ return It->second;

ContinuationIndenter::ContinuationIndenter(const FormatStyle &Style,

const AdditionalKeywords &Keywords,

const SourceManager &SourceMgr,

@@ -85,20 +133,32 @@ ContinuationIndenter::ContinuationIndenter(const FormatStyle &Style,

: Style(Style), Keywords(Keywords), SourceMgr(SourceMgr),

Whitespaces(Whitespaces), Encoding(Encoding),

BinPackInconclusiveFunctions(BinPackInconclusiveFunctions),

- CommentPragmasRegex(Style.CommentPragmas) {}

+ CommentPragmasRegex(Style.CommentPragmas), RawStringFormats(Style) {}

LineState ContinuationIndenter::getInitialState(unsigned FirstIndent,

+ unsigned FirstStartColumn,

const AnnotatedLine *Line,

bool DryRun) {

LineState State;

State.FirstIndent = FirstIndent;

- State.Column = FirstIndent;

+ if (FirstStartColumn && Line->First->NewlinesBefore == 0)

+ State.Column = FirstStartColumn;

+ else

+ State.Column = FirstIndent;

+ // With preprocessor directive indentation, the line starts on column 0

+ // since it's indented after the hash, but FirstIndent is set to the

+ // preprocessor indent.

+ if (Style.IndentPPDirectives == FormatStyle::PPDIS_AfterHash &&

+ (Line->Type == LT_PreprocessorDirective ||

+ Line->Type == LT_ImportStatement))

+ State.Column = 0;

State.Line = Line;

State.NextToken = Line->First;

State.Stack.push_back(ParenState(FirstIndent, FirstIndent,

/*AvoidBinPacking=*/false,

/*NoLineBreak=*/false));

State.LineContainsContinuedForLoopSection = false;

+ State.NoContinuation = false;

State.StartOfStringLiteral = 0;

State.StartOfLineLevel = 0;

State.LowestLevelOnLine = 0;

@@ -120,9 +180,8 @@ bool ContinuationIndenter::canBreak(const LineState &State) {

const FormatToken &Current = *State.NextToken;

const FormatToken &Previous = *Current.Previous;

assert(&Previous == Current.Previous);

- if (!Current.CanBreakBefore &&

- !(State.Stack.back().BreakBeforeClosingBrace &&

- Current.closesBlockOrBlockTypeList(Style)))

+ if (!Current.CanBreakBefore && !(State.Stack.back().BreakBeforeClosingBrace &&

+ Current.closesBlockOrBlockTypeList(Style)))

return false;

// The opening "{" of a braced list has to be on the same line as the first

// element if it is nested in another braced init list or function call.

@@ -264,7 +323,8 @@ bool ContinuationIndenter::mustBreak(const LineState &State) {

// We need special cases for ">>" which we have split into two ">" while

// lexing in order to make template parsing easier.

bool IsComparison = (Previous.getPrecedence() == prec::Relational ||

- Previous.getPrecedence() == prec::Equality) &&

+ Previous.getPrecedence() == prec::Equality ||

+ Previous.getPrecedence() == prec::Spaceship) &&

Previous.Previous &&

Previous.Previous->isNot(TT_BinaryOperator); // For >>.

bool LHSIsBinaryExpr =

@@ -316,6 +376,12 @@ bool ContinuationIndenter::mustBreak(const LineState &State) {

Previous.TokenText == "\'\\n\'"))))

return true;

+ if (Previous.is(TT_BlockComment) && Previous.IsMultiline)

+ return true;

+ if (State.NoContinuation)

+ return true;

return false;

}

@@ -325,6 +391,8 @@ unsigned ContinuationIndenter::addTokenToState(LineState &State, bool Newline,

const FormatToken &Current = *State.NextToken;

assert(!State.Stack.empty());

+ State.NoContinuation = false;

if ((Current.is(TT_ImplicitStringLiteral) &&

(Current.Previous->Tok.getIdentifierInfo() == nullptr ||

Current.Previous->Tok.getIdentifierInfo()->getPPKeywordID() ==

@@ -376,9 +444,25 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun,

unsigned Spaces = Current.SpacesRequiredBefore + ExtraSpaces;

+ // Indent preprocessor directives after the hash if required.

+ int PPColumnCorrection = 0;

+ if (Style.IndentPPDirectives == FormatStyle::PPDIS_AfterHash &&

+ Previous.is(tok::hash) && State.FirstIndent > 0 &&

+ (State.Line->Type == LT_PreprocessorDirective ||

+ State.Line->Type == LT_ImportStatement)) {

+ Spaces += State.FirstIndent;

+ // For preprocessor indent with tabs, State.Column will be 1 because of the

+ // hash. This causes second-level indents onward to have an extra space

+ // after the tabs. We avoid this misalignment by subtracting 1 from the

+ // column value passed to replaceWhitespace().

+ if (Style.UseTab != FormatStyle::UT_Never)

+ PPColumnCorrection = -1;

+ }

if (!DryRun)

Whitespaces.replaceWhitespace(Current, /*Newlines=*/0, Spaces,

- State.Column + Spaces);

+ State.Column + Spaces + PPColumnCorrection);

// If "BreakBeforeInheritanceComma" mode, don't break within the inheritance

// declaration unless there is multiple inheritance.

@@ -405,9 +489,8 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun,

if (Style.AlignAfterOpenBracket == FormatStyle::BAS_AlwaysBreak &&

Previous.isOneOf(tok::l_paren, TT_TemplateOpener, tok::l_square) &&

State.Column > getNewLineColumn(State) &&

- (!Previous.Previous ||

- !Previous.Previous->isOneOf(tok::kw_for, tok::kw_while,

- tok::kw_switch)) &&

+ (!Previous.Previous || !Previous.Previous->isOneOf(

+ tok::kw_for, tok::kw_while, tok::kw_switch)) &&

// Don't do this for simple (no expressions) one-argument function calls

// as that feels like needlessly wasting whitespace, e.g.:

@@ -454,7 +537,8 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun,

(P->is(TT_ConditionalExpr) && P->is(tok::colon))) &&

!P->isOneOf(TT_OverloadedOperator, TT_CtorInitializerComma) &&

P->getPrecedence() != prec::Assignment &&

- P->getPrecedence() != prec::Relational) {

+ P->getPrecedence() != prec::Relational &&

+ P->getPrecedence() != prec::Spaceship) {

bool BreakBeforeOperator =

P->MustBreakBefore || P->is(tok::lessless) ||

(P->is(TT_BinaryOperator) &&

@@ -619,8 +703,18 @@ unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State,

State.Stack.back().BreakBeforeParameter = false;

if (!DryRun) {

+ unsigned MaxEmptyLinesToKeep = Style.MaxEmptyLinesToKeep + 1;

+ if (Current.is(tok::r_brace) && Current.MatchingParen &&

+ // Only strip trailing empty lines for l_braces that have children, i.e.

+ // for function expressions (lambdas, arrows, etc).

+ !Current.MatchingParen->Children.empty()) {

+ // lambdas and arrow functions are expressions, thus their r_brace is not

+ // on its own line, and thus not covered by UnwrappedLineFormatter's logic

+ // about removing empty lines on closing blocks. Special case them here.

+ MaxEmptyLinesToKeep = 1;

+ }

unsigned Newlines = std::max(

- 1u, std::min(Current.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1));

+ 1u, std::min(Current.NewlinesBefore, MaxEmptyLinesToKeep));

bool ContinuePPDirective =

State.Line->InPPDirective && State.Line->Type != LT_ImportStatement;

Whitespaces.replaceWhitespace(Current, Newlines, State.Column, State.Column,

@@ -661,9 +755,7 @@ unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State,

// before the corresponding } or ].

if (PreviousNonComment &&

(PreviousNonComment->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) ||

- opensProtoMessageField(*PreviousNonComment, Style) ||

- (PreviousNonComment->is(TT_TemplateString) &&

- PreviousNonComment->opensScope())))

+ opensProtoMessageField(*PreviousNonComment, Style)))

State.Stack.back().BreakBeforeClosingBrace = true;

if (State.Stack.back().AvoidBinPacking) {

@@ -731,7 +823,10 @@ unsigned ContinuationIndenter::getNewLineColumn(const LineState &State) {

if (NextNonComment->is(TT_TemplateString) && NextNonComment->closesScope())

return State.Stack[State.Stack.size() - 2].LastSpace;

if (Current.is(tok::identifier) && Current.Next &&

- Current.Next->is(TT_DictLiteral))

+ (Current.Next->is(TT_DictLiteral) ||

+ ((Style.Language == FormatStyle::LK_Proto ||

+ Style.Language == FormatStyle::LK_TextProto) &&

+ Current.Next->isOneOf(TT_TemplateOpener, tok::l_brace))))

return State.Stack.back().Indent;

if (NextNonComment->is(TT_ObjCStringLiteral) &&

State.StartOfStringLiteral != 0)

@@ -871,8 +966,10 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State,

// Next(...)

// ^ line up here.

State.Stack.back().Indent =

- State.Column + (Style.BreakConstructorInitializers ==

- FormatStyle::BCIS_BeforeComma ? 0 : 2);

+ State.Column +

+ (Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma

+ ? 0

+ : 2);

State.Stack.back().NestedBlockIndent = State.Stack.back().Indent;

if (Style.ConstructorInitializerAllOnOneLineOrOnePerLine)

State.Stack.back().AvoidBinPacking = true;

@@ -884,7 +981,7 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State,

State.FirstIndent + Style.ConstructorInitializerIndentWidth;

State.Stack.back().NestedBlockIndent = State.Stack.back().Indent;

if (Style.ConstructorInitializerAllOnOneLineOrOnePerLine)

- State.Stack.back().AvoidBinPacking = true;

+ State.Stack.back().AvoidBinPacking = true;

}

if (Current.is(TT_InheritanceColon))

State.Stack.back().Indent =

@@ -912,8 +1009,9 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State,

State.Stack[i].NoLineBreak = true;

State.Stack[State.Stack.size() - 2].NestedBlockInlined = false;

}

- if (Previous && (Previous->isOneOf(tok::l_paren, tok::comma, tok::colon) ||

- Previous->isOneOf(TT_BinaryOperator, TT_ConditionalExpr)) &&

+ if (Previous &&

+ (Previous->isOneOf(tok::l_paren, tok::comma, tok::colon) ||

+ Previous->isOneOf(TT_BinaryOperator, TT_ConditionalExpr)) &&

!Previous->isOneOf(TT_DictLiteral, TT_ObjCMethodExpr)) {

State.Stack.back().NestedBlockInlined =

!Newline &&

@@ -922,13 +1020,8 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State,

moveStatePastFakeLParens(State, Newline);

moveStatePastScopeCloser(State);

- if (Current.is(TT_TemplateString) && Current.opensScope())

- State.Stack.back().LastSpace =

- (Current.IsMultiline ? Current.LastLineColumnWidth

- : State.Column + Current.ColumnWidth) -

- strlen("${");

- bool CanBreakProtrudingToken = !State.Stack.back().NoLineBreak &&

- !State.Stack.back().NoLineBreakInOperand;

+ bool AllowBreak = !State.Stack.back().NoLineBreak &&

+ !State.Stack.back().NoLineBreakInOperand;

moveStatePastScopeOpener(State, Newline);

moveStatePastFakeRParens(State);

@@ -942,13 +1035,9 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State,

State.Column += Current.ColumnWidth;

State.NextToken = State.NextToken->Next;

- unsigned Penalty = 0;

- if (CanBreakProtrudingToken)

- Penalty = breakProtrudingToken(Current, State, DryRun);

- if (State.Column > getColumnLimit(State)) {

- unsigned ExcessCharacters = State.Column - getColumnLimit(State);

- Penalty += Style.PenaltyExcessCharacter * ExcessCharacters;

- }

+ unsigned Penalty =

+ handleEndOfLine(Current, State, DryRun, AllowBreak);

if (Current.Role)

Current.Role->formatFromToken(State, this, DryRun);

@@ -1072,14 +1161,13 @@ void ContinuationIndenter::moveStatePastScopeOpener(LineState &State,

bool EndsInComma = Current.MatchingParen &&

Current.MatchingParen->Previous &&

Current.MatchingParen->Previous->is(tok::comma);

- AvoidBinPacking =

- EndsInComma || Current.is(TT_DictLiteral) ||

- Style.Language == FormatStyle::LK_Proto ||

- Style.Language == FormatStyle::LK_TextProto ||

- !Style.BinPackArguments ||

- (NextNoComment &&

- NextNoComment->isOneOf(TT_DesignatedInitializerPeriod,

- TT_DesignatedInitializerLSquare));

+ AvoidBinPacking = EndsInComma || Current.is(TT_DictLiteral) ||

+ Style.Language == FormatStyle::LK_Proto ||

+ Style.Language == FormatStyle::LK_TextProto ||

+ !Style.BinPackArguments ||

+ (NextNoComment &&

+ NextNoComment->isOneOf(TT_DesignatedInitializerPeriod,

+ TT_DesignatedInitializerLSquare));

BreakBeforeParameter = EndsInComma;

if (Current.ParameterCount > 1)

NestedBlockIndent = std::max(NestedBlockIndent, State.Column + 1);

@@ -1098,18 +1186,6 @@ void ContinuationIndenter::moveStatePastScopeOpener(LineState &State,

LastSpace = std::max(LastSpace, State.Stack.back().Indent);

}

- // JavaScript template strings are special as we always want to indent

- // nested expressions relative to the ${}. Otherwise, this can create quite

- // a mess.

- if (Current.is(TT_TemplateString)) {

- unsigned Column = Current.IsMultiline

- ? Current.LastLineColumnWidth

- : State.Column + Current.ColumnWidth;

- NewIndent = Column;

- LastSpace = Column;

- NestedBlockIndent = Column;

- }

bool EndsInComma =

Current.MatchingParen &&

Current.MatchingParen->getPreviousNonComment() &&

@@ -1200,11 +1276,93 @@ void ContinuationIndenter::moveStateToNewBlock(LineState &State) {

State.Stack.back().BreakBeforeParameter = true;

}

-unsigned ContinuationIndenter::addMultilineToken(const FormatToken &Current,

- LineState &State) {

- if (!Current.IsMultiline)

+static unsigned getLastLineEndColumn(StringRef Text, unsigned StartColumn,

+ unsigned TabWidth,

+ encoding::Encoding Encoding) {

+ size_t LastNewlinePos = Text.find_last_of("\n");

+ if (LastNewlinePos == StringRef::npos) {

+ return StartColumn +

+ encoding::columnWidthWithTabs(Text, StartColumn, TabWidth, Encoding);

+ } else {

+ return encoding::columnWidthWithTabs(Text.substr(LastNewlinePos),

+ /*StartColumn=*/0, TabWidth, Encoding);

+ }

+unsigned ContinuationIndenter::reformatRawStringLiteral(

+ const FormatToken &Current, LineState &State,

+ const FormatStyle &RawStringStyle, bool DryRun) {

+ unsigned StartColumn = State.Column - Current.ColumnWidth;

+ auto Delimiter = *getRawStringDelimiter(Current.TokenText);

+ // The text of a raw string is between the leading 'R"delimiter(' and the

+ // trailing 'delimiter)"'.

+ unsigned PrefixSize = 3 + Delimiter.size();

+ unsigned SuffixSize = 2 + Delimiter.size();

+ // The first start column is the column the raw text starts.

+ unsigned FirstStartColumn = StartColumn + PrefixSize;

+ // The next start column is the intended indentation a line break inside

+ // the raw string at level 0. It is determined by the following rules:

+ // - if the content starts on newline, it is one level more than the current

+ // indent, and

+ // - if the content does not start on a newline, it is the first start

+ // column.

+ // These rules have the advantage that the formatted content both does not

+ // violate the rectangle rule and visually flows within the surrounding

+ // source.

+ bool ContentStartsOnNewline = Current.TokenText[PrefixSize] == '\n';

+ unsigned NextStartColumn = ContentStartsOnNewline

+ ? State.Stack.back().Indent + Style.IndentWidth

+ : FirstStartColumn;

+ // The last start column is the column the raw string suffix starts if it is

+ // put on a newline.

+ // The last start column is the intended indentation of the raw string postfix

+ // if it is put on a newline. It is determined by the following rules:

+ // - if the raw string prefix starts on a newline, it is the column where

+ // that raw string prefix starts, and

+ // - if the raw string prefix does not start on a newline, it is the current

+ // indent.

+ unsigned LastStartColumn = Current.NewlinesBefore

+ ? FirstStartColumn - PrefixSize

+ : State.Stack.back().Indent;

+ std::string RawText =

+ Current.TokenText.substr(PrefixSize).drop_back(SuffixSize);

+ std::pair<tooling::Replacements, unsigned> Fixes = internal::reformat(

+ RawStringStyle, RawText, {tooling::Range(0, RawText.size())},

+ FirstStartColumn, NextStartColumn, LastStartColumn, "<stdin>",

+ /*Status=*/nullptr);

+ auto NewCode = applyAllReplacements(RawText, Fixes.first);

+ tooling::Replacements NoFixes;

+ if (!NewCode) {

+ State.Column += Current.ColumnWidth;

return 0;

+ }

+ if (!DryRun) {

+ SourceLocation OriginLoc =

+ Current.Tok.getLocation().getLocWithOffset(PrefixSize);

+ for (const tooling::Replacement &Fix : Fixes.first) {

+ auto Err = Whitespaces.addReplacement(tooling::Replacement(

+ SourceMgr, OriginLoc.getLocWithOffset(Fix.getOffset()),

+ Fix.getLength(), Fix.getReplacementText()));

+ if (Err) {

+ llvm::errs() << "Failed to reformat raw string: "

+ << llvm::toString(std::move(Err)) << "\n";

+ }

+ unsigned RawLastLineEndColumn = getLastLineEndColumn(

+ *NewCode, FirstStartColumn, Style.TabWidth, Encoding);

+ State.Column = RawLastLineEndColumn + SuffixSize;

+ return Fixes.second;

+unsigned ContinuationIndenter::addMultilineToken(const FormatToken &Current,

+ LineState &State) {

// Break before further function parameters on all levels.

for (unsigned i = 0, e = State.Stack.size(); i != e; ++i)

State.Stack[i].BreakBeforeParameter = true;

@@ -1219,33 +1377,85 @@ unsigned ContinuationIndenter::addMultilineToken(const FormatToken &Current,

return 0;

}

-unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,

- LineState &State,

- bool DryRun) {

- // Don't break multi-line tokens other than block comments. Instead, just

- // update the state.

- if (Current.isNot(TT_BlockComment) && Current.IsMultiline)

- return addMultilineToken(Current, State);

- // Don't break implicit string literals or import statements.

- if (Current.is(TT_ImplicitStringLiteral) ||

- State.Line->Type == LT_ImportStatement)

- return 0;

+unsigned ContinuationIndenter::handleEndOfLine(const FormatToken &Current,

+ LineState &State, bool DryRun,

+ bool AllowBreak) {

+ unsigned Penalty = 0;

+ // Compute the raw string style to use in case this is a raw string literal

+ // that can be reformatted.

+ auto RawStringStyle = getRawStringStyle(Current, State);

+ if (RawStringStyle) {

+ Penalty = reformatRawStringLiteral(Current, State, *RawStringStyle, DryRun);

+ } else if (Current.IsMultiline && Current.isNot(TT_BlockComment)) {

+ // Don't break multi-line tokens other than block comments and raw string

+ // literals. Instead, just update the state.

+ Penalty = addMultilineToken(Current, State);

+ } else if (State.Line->Type != LT_ImportStatement) {

+ // We generally don't break import statements.

+ LineState OriginalState = State;

+ // Whether we force the reflowing algorithm to stay strictly within the

+ // column limit.

+ bool Strict = false;

+ // Whether the first non-strict attempt at reflowing did intentionally

+ // exceed the column limit.

+ bool Exceeded = false;

+ std::tie(Penalty, Exceeded) = breakProtrudingToken(

+ Current, State, AllowBreak, /*DryRun=*/true, Strict);

+ if (Exceeded) {

+ // If non-strict reflowing exceeds the column limit, try whether strict

+ // reflowing leads to an overall lower penalty.

+ LineState StrictState = OriginalState;

+ unsigned StrictPenalty =

+ breakProtrudingToken(Current, StrictState, AllowBreak,

+ /*DryRun=*/true, /*Strict=*/true)

+ .first;

+ Strict = StrictPenalty <= Penalty;

+ if (Strict) {

+ Penalty = StrictPenalty;

+ State = StrictState;

+ }

+ if (!DryRun) {

+ // If we're not in dry-run mode, apply the changes with the decision on

+ // strictness made above.

+ breakProtrudingToken(Current, OriginalState, AllowBreak, /*DryRun=*/false,

+ Strict);

+ }

+ if (State.Column > getColumnLimit(State)) {

+ unsigned ExcessCharacters = State.Column - getColumnLimit(State);

+ Penalty += Style.PenaltyExcessCharacter * ExcessCharacters;

+ }

+ return Penalty;

- if (!Current.isStringLiteral() && !Current.is(tok::comment))

- return 0;

+llvm::Optional<FormatStyle>

+ContinuationIndenter::getRawStringStyle(const FormatToken &Current,

+ const LineState &State) {

+ if (!Current.isStringLiteral())

+ return None;

+ auto Delimiter = getRawStringDelimiter(Current.TokenText);

+ if (!Delimiter)

+ return None;

+ auto RawStringStyle = RawStringFormats.get(*Delimiter);

+ if (!RawStringStyle)

+ return None;

+ RawStringStyle->ColumnLimit = getColumnLimit(State);

+ return RawStringStyle;

- std::unique_ptr<BreakableToken> Token;

+std::unique_ptr<BreakableToken> ContinuationIndenter::createBreakableToken(

+ const FormatToken &Current, LineState &State, bool AllowBreak) {

unsigned StartColumn = State.Column - Current.ColumnWidth;

- unsigned ColumnLimit = getColumnLimit(State);

if (Current.isStringLiteral()) {

// FIXME: String literal breaking is currently disabled for Java and JS, as

// it requires strings to be merged using "+" which we don't support.

if (Style.Language == FormatStyle::LK_Java ||

Style.Language == FormatStyle::LK_JavaScript ||

- !Style.BreakStringLiterals)

- return 0;

+ !Style.BreakStringLiterals ||

+ !AllowBreak)

+ return nullptr;

// Don't break string literals inside preprocessor directives (except for

// #define directives, as their contents are stored in separate lines and

@@ -1253,11 +1463,11 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,

// This way we avoid breaking code with line directives and unknown

// preprocessor directives that contain long string literals.

if (State.Line->Type == LT_PreprocessorDirective)

- return 0;

+ return nullptr;

// Exempts unterminated string literals from line breaking. The user will

// likely want to terminate the string before any line breaking is done.

if (Current.IsUnterminatedLiteral)

- return 0;

+ return nullptr;

StringRef Text = Current.TokenText;

StringRef Prefix;

@@ -1272,114 +1482,359 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,

Text.startswith(Prefix = "u8\"") ||

Text.startswith(Prefix = "L\""))) ||

(Text.startswith(Prefix = "_T(\"") && Text.endswith(Postfix = "\")"))) {

- Token.reset(new BreakableStringLiteral(Current, StartColumn, Prefix,

- Postfix, State.Line->InPPDirective,

- Encoding, Style));

- } else {

- return 0;

+ return llvm::make_unique<BreakableStringLiteral>(

+ Current, StartColumn, Prefix, Postfix, State.Line->InPPDirective,

+ Encoding, Style);

}

} else if (Current.is(TT_BlockComment)) {

- if (!Current.isTrailingComment() || !Style.ReflowComments ||

+ if (!Style.ReflowComments ||

// If a comment token switches formatting, like

// /* clang-format on */, we don't want to break it further,

// but we may still want to adjust its indentation.

- switchesFormatting(Current))

- return addMultilineToken(Current, State);

- Token.reset(new BreakableBlockComment(

+ switchesFormatting(Current)) {

+ return nullptr;

+ }

+ return llvm::make_unique<BreakableBlockComment>(

Current, StartColumn, Current.OriginalColumn, !Current.Previous,

- State.Line->InPPDirective, Encoding, Style));

+ State.Line->InPPDirective, Encoding, Style);

} else if (Current.is(TT_LineComment) &&

(Current.Previous == nullptr ||

Current.Previous->isNot(TT_ImplicitStringLiteral))) {

if (!Style.ReflowComments ||

CommentPragmasRegex.match(Current.TokenText.substr(2)) ||

switchesFormatting(Current))

- return 0;

- Token.reset(new BreakableLineCommentSection(

+ return nullptr;

+ return llvm::make_unique<BreakableLineCommentSection>(

Current, StartColumn, Current.OriginalColumn, !Current.Previous,

- /*InPPDirective=*/false, Encoding, Style));

+ /*InPPDirective=*/false, Encoding, Style);

+ }

+ return nullptr;

+std::pair<unsigned, bool>

+ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,

+ LineState &State, bool AllowBreak,

+ bool DryRun, bool Strict) {

+ std::unique_ptr<const BreakableToken> Token =

+ createBreakableToken(Current, State, AllowBreak);

+ if (!Token)

+ return {0, false};

+ assert(Token->getLineCount() > 0);

+ unsigned ColumnLimit = getColumnLimit(State);

+ if (Current.is(TT_LineComment)) {

// We don't insert backslashes when breaking line comments.

ColumnLimit = Style.ColumnLimit;

- } else {

- return 0;

}

if (Current.UnbreakableTailLength >= ColumnLimit)

- return 0;

- unsigned RemainingSpace = ColumnLimit - Current.UnbreakableTailLength;

- bool BreakInserted = false;

+ return {0, false};

+ // ColumnWidth was already accounted into State.Column before calling

+ // breakProtrudingToken.

+ unsigned StartColumn = State.Column - Current.ColumnWidth;

+ unsigned NewBreakPenalty = Current.isStringLiteral()

+ ? Style.PenaltyBreakString

+ : Style.PenaltyBreakComment;

+ // Stores whether we intentionally decide to let a line exceed the column

+ // limit.

+ bool Exceeded = false;

+ // Stores whether we introduce a break anywhere in the token.

+ bool BreakInserted = Token->introducesBreakBeforeToken();

+ // Store whether we inserted a new line break at the end of the previous

+ // logical line.

+ bool NewBreakBefore = false;

// We use a conservative reflowing strategy. Reflow starts after a line is

// broken or the corresponding whitespace compressed. Reflow ends as soon as a

// line that doesn't get reflown with the previous line is reached.

- bool ReflowInProgress = false;

+ bool Reflow = false;

+ // Keep track of where we are in the token:

+ // Where we are in the content of the current logical line.

+ unsigned TailOffset = 0;

+ // The column number we're currently at.

+ unsigned ContentStartColumn =

+ Token->getContentStartColumn(0, /*Break=*/false);

+ // The number of columns left in the current logical line after TailOffset.

+ unsigned RemainingTokenColumns =

+ Token->getRemainingLength(0, TailOffset, ContentStartColumn);

+ // Adapt the start of the token, for example indent.

+ if (!DryRun)

+ Token->adaptStartOfLine(0, Whitespaces);

unsigned Penalty = 0;

- unsigned RemainingTokenColumns = 0;

+ DEBUG(llvm::dbgs() << "Breaking protruding token at column " << StartColumn

+ << ".\n");

for (unsigned LineIndex = 0, EndIndex = Token->getLineCount();

LineIndex != EndIndex; ++LineIndex) {

- BreakableToken::Split SplitBefore(StringRef::npos, 0);

- if (ReflowInProgress) {

- SplitBefore = Token->getSplitBefore(LineIndex, RemainingTokenColumns,

- RemainingSpace, CommentPragmasRegex);

- }

- ReflowInProgress = SplitBefore.first != StringRef::npos;

- unsigned TailOffset =

- ReflowInProgress ? (SplitBefore.first + SplitBefore.second) : 0;

- if (!DryRun)

- Token->replaceWhitespaceBefore(LineIndex, RemainingTokenColumns,

- RemainingSpace, SplitBefore, Whitespaces);

- RemainingTokenColumns = Token->getLineLengthAfterSplitBefore(

- LineIndex, TailOffset, RemainingTokenColumns, ColumnLimit, SplitBefore);

- while (RemainingTokenColumns > RemainingSpace) {

- BreakableToken::Split Split = Token->getSplit(

- LineIndex, TailOffset, ColumnLimit, CommentPragmasRegex);

+ DEBUG(llvm::dbgs() << " Line: " << LineIndex << " (Reflow: " << Reflow

+ << ")\n");

+ NewBreakBefore = false;

+ // If we did reflow the previous line, we'll try reflowing again. Otherwise

+ // we'll start reflowing if the current line is broken or whitespace is

+ // compressed.

+ bool TryReflow = Reflow;

+ // Break the current token until we can fit the rest of the line.

+ while (ContentStartColumn + RemainingTokenColumns > ColumnLimit) {

+ DEBUG(llvm::dbgs() << " Over limit, need: "

+ << (ContentStartColumn + RemainingTokenColumns)

+ << ", space: " << ColumnLimit

+ << ", reflown prefix: " << ContentStartColumn

+ << ", offset in line: " << TailOffset << "\n");

+ // If the current token doesn't fit, find the latest possible split in the

+ // current line so that breaking at it will be under the column limit.

+ // FIXME: Use the earliest possible split while reflowing to correctly

+ // compress whitespace within a line.

+ BreakableToken::Split Split =

+ Token->getSplit(LineIndex, TailOffset, ColumnLimit,

+ ContentStartColumn, CommentPragmasRegex);

if (Split.first == StringRef::npos) {

- // The last line's penalty is handled in addNextStateToQueue().

+ // No break opportunity - update the penalty and continue with the next

+ // logical line.

if (LineIndex < EndIndex - 1)

+ // The last line's penalty is handled in addNextStateToQueue().

Penalty += Style.PenaltyExcessCharacter *

- (RemainingTokenColumns - RemainingSpace);

+ (ContentStartColumn + RemainingTokenColumns - ColumnLimit);

+ DEBUG(llvm::dbgs() << " No break opportunity.\n");

break;

}

assert(Split.first != 0);

- // Check if compressing the whitespace range will bring the line length

- // under the limit. If that is the case, we perform whitespace compression

- // instead of inserting a line break.

- unsigned RemainingTokenColumnsAfterCompression =

- Token->getLineLengthAfterCompression(RemainingTokenColumns, Split);

- if (RemainingTokenColumnsAfterCompression <= RemainingSpace) {

- RemainingTokenColumns = RemainingTokenColumnsAfterCompression;

- ReflowInProgress = true;

- if (!DryRun)

- Token->compressWhitespace(LineIndex, TailOffset, Split, Whitespaces);

- break;

+ if (Token->supportsReflow()) {

+ // Check whether the next natural split point after the current one can

+ // still fit the line, either because we can compress away whitespace,

+ // or because the penalty the excess characters introduce is lower than

+ // the break penalty.

+ // We only do this for tokens that support reflowing, and thus allow us

+ // to change the whitespace arbitrarily (e.g. comments).

+ // Other tokens, like string literals, can be broken on arbitrary

+ // positions.

+ // First, compute the columns from TailOffset to the next possible split

+ // position.

+ // For example:

+ // ColumnLimit: |

+ // // Some text that breaks

+ // ^ tail offset

+ // ^-- split

+ // ^-------- to split columns

+ // ^--- next split

+ // ^--------------- to next split columns

+ unsigned ToSplitColumns = Token->getRangeLength(

+ LineIndex, TailOffset, Split.first, ContentStartColumn);

+ DEBUG(llvm::dbgs() << " ToSplit: " << ToSplitColumns << "\n");

+ BreakableToken::Split NextSplit = Token->getSplit(

+ LineIndex, TailOffset + Split.first + Split.second, ColumnLimit,

+ ContentStartColumn + ToSplitColumns + 1, CommentPragmasRegex);

+ // Compute the columns necessary to fit the next non-breakable sequence

+ // into the current line.

+ unsigned ToNextSplitColumns = 0;

+ if (NextSplit.first == StringRef::npos) {

+ ToNextSplitColumns = Token->getRemainingLength(LineIndex, TailOffset,

+ ContentStartColumn);

+ } else {

+ ToNextSplitColumns = Token->getRangeLength(

+ LineIndex, TailOffset,

+ Split.first + Split.second + NextSplit.first, ContentStartColumn);

+ }

+ // Compress the whitespace between the break and the start of the next

+ // unbreakable sequence.

+ ToNextSplitColumns =

+ Token->getLengthAfterCompression(ToNextSplitColumns, Split);

+ DEBUG(llvm::dbgs() << " ContentStartColumn: " << ContentStartColumn

+ << "\n");

+ DEBUG(llvm::dbgs() << " ToNextSplit: " << ToNextSplitColumns << "\n");

+ // If the whitespace compression makes us fit, continue on the current

+ // line.

+ bool ContinueOnLine =

+ ContentStartColumn + ToNextSplitColumns <= ColumnLimit;

+ unsigned ExcessCharactersPenalty = 0;

+ if (!ContinueOnLine && !Strict) {

+ // Similarly, if the excess characters' penalty is lower than the

+ // penalty of introducing a new break, continue on the current line.

+ ExcessCharactersPenalty =

+ (ContentStartColumn + ToNextSplitColumns - ColumnLimit) *

+ Style.PenaltyExcessCharacter;

+ DEBUG(llvm::dbgs()

+ << " Penalty excess: " << ExcessCharactersPenalty

+ << "\n break : " << NewBreakPenalty << "\n");

+ if (ExcessCharactersPenalty < NewBreakPenalty) {

+ Exceeded = true;

+ ContinueOnLine = true;

+ }

+ if (ContinueOnLine) {

+ DEBUG(llvm::dbgs() << " Continuing on line...\n");

+ // The current line fits after compressing the whitespace - reflow

+ // the next line into it if possible.

+ TryReflow = true;

+ if (!DryRun)

+ Token->compressWhitespace(LineIndex, TailOffset, Split,

+ Whitespaces);

+ // When we continue on the same line, leave one space between content.

+ ContentStartColumn += ToSplitColumns + 1;

+ Penalty += ExcessCharactersPenalty;

+ TailOffset += Split.first + Split.second;

+ RemainingTokenColumns = Token->getRemainingLength(

+ LineIndex, TailOffset, ContentStartColumn);

+ continue;

+ }

}

- unsigned NewRemainingTokenColumns = Token->getLineLengthAfterSplit(

- LineIndex, TailOffset + Split.first + Split.second, StringRef::npos);

+ DEBUG(llvm::dbgs() << " Breaking...\n");

+ ContentStartColumn =

+ Token->getContentStartColumn(LineIndex, /*Break=*/true);

+ unsigned NewRemainingTokenColumns = Token->getRemainingLength(

+ LineIndex, TailOffset + Split.first + Split.second,

+ ContentStartColumn);

// When breaking before a tab character, it may be moved by a few columns,

// but will still be expanded to the next tab stop, so we don't save any

// columns.

- if (NewRemainingTokenColumns == RemainingTokenColumns)

+ if (NewRemainingTokenColumns == RemainingTokenColumns) {

+ // FIXME: Do we need to adjust the penalty?

break;

+ }

assert(NewRemainingTokenColumns < RemainingTokenColumns);

+ DEBUG(llvm::dbgs() << " Breaking at: " << TailOffset + Split.first

+ << ", " << Split.second << "\n");

if (!DryRun)

Token->insertBreak(LineIndex, TailOffset, Split, Whitespaces);

- Penalty += Current.SplitPenalty;

- unsigned ColumnsUsed =

- Token->getLineLengthAfterSplit(LineIndex, TailOffset, Split.first);

- if (ColumnsUsed > ColumnLimit) {

- Penalty += Style.PenaltyExcessCharacter * (ColumnsUsed - ColumnLimit);

- }

+ Penalty += NewBreakPenalty;

TailOffset += Split.first + Split.second;

RemainingTokenColumns = NewRemainingTokenColumns;

- ReflowInProgress = true;

BreakInserted = true;

+ NewBreakBefore = true;

}

+ // In case there's another line, prepare the state for the start of the next

+ // line.

+ if (LineIndex + 1 != EndIndex) {

+ unsigned NextLineIndex = LineIndex + 1;

+ if (NewBreakBefore)

+ // After breaking a line, try to reflow the next line into the current

+ // one once RemainingTokenColumns fits.

+ TryReflow = true;

+ if (TryReflow) {

+ // We decided that we want to try reflowing the next line into the

+ // current one.

+ // We will now adjust the state as if the reflow is successful (in

+ // preparation for the next line), and see whether that works. If we

+ // decide that we cannot reflow, we will later reset the state to the

+ // start of the next line.

+ Reflow = false;

+ // As we did not continue breaking the line, RemainingTokenColumns is

+ // known to fit after ContentStartColumn. Adapt ContentStartColumn to

+ // the position at which we want to format the next line if we do

+ // actually reflow.

+ // When we reflow, we need to add a space between the end of the current

+ // line and the next line's start column.

+ ContentStartColumn += RemainingTokenColumns + 1;

+ // Get the split that we need to reflow next logical line into the end

+ // of the current one; the split will include any leading whitespace of

+ // the next logical line.

+ BreakableToken::Split SplitBeforeNext =

+ Token->getReflowSplit(NextLineIndex, CommentPragmasRegex);

+ DEBUG(llvm::dbgs() << " Size of reflown text: " << ContentStartColumn

+ << "\n Potential reflow split: ");

+ if (SplitBeforeNext.first != StringRef::npos) {

+ DEBUG(llvm::dbgs() << SplitBeforeNext.first << ", "

+ << SplitBeforeNext.second << "\n");

+ TailOffset = SplitBeforeNext.first + SplitBeforeNext.second;

+ // If the rest of the next line fits into the current line below the

+ // column limit, we can safely reflow.

+ RemainingTokenColumns = Token->getRemainingLength(

+ NextLineIndex, TailOffset, ContentStartColumn);

+ Reflow = true;

+ if (ContentStartColumn + RemainingTokenColumns > ColumnLimit) {

+ DEBUG(llvm::dbgs() << " Over limit after reflow, need: "

+ << (ContentStartColumn + RemainingTokenColumns)

+ << ", space: " << ColumnLimit

+ << ", reflown prefix: " << ContentStartColumn

+ << ", offset in line: " << TailOffset << "\n");

+ // If the whole next line does not fit, try to find a point in

+ // the next line at which we can break so that attaching the part

+ // of the next line to that break point onto the current line is

+ // below the column limit.

+ BreakableToken::Split Split =

+ Token->getSplit(NextLineIndex, TailOffset, ColumnLimit,

+ ContentStartColumn, CommentPragmasRegex);

+ if (Split.first == StringRef::npos) {

+ DEBUG(llvm::dbgs() << " Did not find later break\n");

+ Reflow = false;

+ } else {

+ // Check whether the first split point gets us below the column

+ // limit. Note that we will execute this split below as part of

+ // the normal token breaking and reflow logic within the line.

+ unsigned ToSplitColumns = Token->getRangeLength(

+ NextLineIndex, TailOffset, Split.first, ContentStartColumn);

+ if (ContentStartColumn + ToSplitColumns > ColumnLimit) {

+ DEBUG(llvm::dbgs() << " Next split protrudes, need: "

+ << (ContentStartColumn + ToSplitColumns)

+ << ", space: " << ColumnLimit);

+ unsigned ExcessCharactersPenalty =

+ (ContentStartColumn + ToSplitColumns - ColumnLimit) *

+ Style.PenaltyExcessCharacter;

+ if (NewBreakPenalty < ExcessCharactersPenalty) {

+ Reflow = false;

+ }

+ } else {

+ DEBUG(llvm::dbgs() << "not found.\n");

+ }

+ if (!Reflow) {

+ // If we didn't reflow into the next line, the only space to consider is

+ // the next logical line. Reset our state to match the start of the next

+ // line.

+ TailOffset = 0;

+ ContentStartColumn =

+ Token->getContentStartColumn(NextLineIndex, /*Break=*/false);

+ RemainingTokenColumns = Token->getRemainingLength(

+ NextLineIndex, TailOffset, ContentStartColumn);

+ // Adapt the start of the token, for example indent.

+ if (!DryRun)

+ Token->adaptStartOfLine(NextLineIndex, Whitespaces);

+ } else {

+ // If we found a reflow split and have added a new break before the next

+ // line, we are going to remove the line break at the start of the next

+ // logical line. For example, here we'll add a new line break after

+ // 'text', and subsequently delete the line break between 'that' and

+ // 'reflows'.

+ // // some text that

+ // // reflows

+ // ->

+ // // some text

+ // // that reflows

+ // When adding the line break, we also added the penalty for it, so we

+ // need to subtract that penalty again when we remove the line break due

+ // to reflowing.

+ if (NewBreakBefore) {

+ assert(Penalty >= NewBreakPenalty);

+ Penalty -= NewBreakPenalty;

+ }

+ if (!DryRun)

+ Token->reflow(NextLineIndex, Whitespaces);

+ }

+ BreakableToken::Split SplitAfterLastLine =

+ Token->getSplitAfterLastLine(TailOffset);

+ if (SplitAfterLastLine.first != StringRef::npos) {

+ DEBUG(llvm::dbgs() << "Replacing whitespace after last line.\n");

+ if (!DryRun)

+ Token->replaceWhitespaceAfterLastLine(TailOffset, SplitAfterLastLine,

+ Whitespaces);

+ ContentStartColumn =

+ Token->getContentStartColumn(Token->getLineCount() - 1, /*Break=*/true);

+ RemainingTokenColumns = Token->getRemainingLength(

+ Token->getLineCount() - 1,

+ TailOffset + SplitAfterLastLine.first + SplitAfterLastLine.second,

+ ContentStartColumn);

}

- State.Column = RemainingTokenColumns;

+ State.Column = ContentStartColumn + RemainingTokenColumns -

+ Current.UnbreakableTailLength;

if (BreakInserted) {

// If we break the token inside a parameter list, we need to break before

@@ -1390,15 +1845,15 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,

State.Stack[i].BreakBeforeParameter = true;

}

- Penalty += Current.isStringLiteral() ? Style.PenaltyBreakString

- : Style.PenaltyBreakComment;

+ if (Current.is(TT_BlockComment))

+ State.NoContinuation = true;

State.Stack.back().LastSpace = StartColumn;

}

Token->updateNextToken(State);

- return Penalty;

+ return {Penalty, Exceeded};

}

unsigned ContinuationIndenter::getColumnLimit(const LineState &State) const {

diff --git a/lib/Format/ContinuationIndenter.h b/lib/Format/ContinuationIndenter.h
index 9a06aa6f6267..ded7bfab4267 100644
--- a/lib/Format/ContinuationIndenter.h
+++ b/lib/Format/ContinuationIndenter.h

@@ -20,6 +20,8 @@

#include "FormatToken.h"

#include "clang/Format/Format.h"

#include "llvm/Support/Regex.h"

+#include <map>

+#include <tuple>

namespace clang {

class SourceManager;

@@ -27,11 +29,21 @@ class SourceManager;

namespace format {

class AnnotatedLine;

+class BreakableToken;

struct FormatToken;

struct LineState;

struct ParenState;

+struct RawStringFormatStyleManager;

class WhitespaceManager;

+struct RawStringFormatStyleManager {

+ llvm::StringMap<FormatStyle> DelimiterStyle;

+ RawStringFormatStyleManager(const FormatStyle &CodeStyle);

+ llvm::Optional<FormatStyle> get(StringRef Delimiter) const;

+};

class ContinuationIndenter {

public:

/// \brief Constructs a \c ContinuationIndenter to format \p Line starting in

@@ -44,9 +56,11 @@ public:

bool BinPackInconclusiveFunctions);

/// \brief Get the initial state, i.e. the state after placing \p Line's

- /// first token at \p FirstIndent.

- LineState getInitialState(unsigned FirstIndent, const AnnotatedLine *Line,

- bool DryRun);

+ /// first token at \p FirstIndent. When reformatting a fragment of code, as in

+ /// the case of formatting inside raw string literals, \p FirstStartColumn is

+ /// the column at which the state of the parent formatter is.

+ LineState getInitialState(unsigned FirstIndent, unsigned FirstStartColumn,

+ const AnnotatedLine *Line, bool DryRun);

// FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a

// better home.

@@ -88,17 +102,52 @@ private:

/// \brief Update 'State' with the next token opening a nested block.

void moveStateToNewBlock(LineState &State);

+ /// \brief Reformats a raw string literal.

+ ///

+ /// \returns An extra penalty induced by reformatting the token.

+ unsigned reformatRawStringLiteral(const FormatToken &Current,

+ LineState &State,

+ const FormatStyle &RawStringStyle,

+ bool DryRun);

+ /// \brief If the current token is at the end of the current line, handle

+ /// the transition to the next line.

+ unsigned handleEndOfLine(const FormatToken &Current, LineState &State,

+ bool DryRun, bool AllowBreak);

+ /// \brief If \p Current is a raw string that is configured to be reformatted,

+ /// return the style to be used.

+ llvm::Optional<FormatStyle> getRawStringStyle(const FormatToken &Current,

+ const LineState &State);

/// \brief If the current token sticks out over the end of the line, break

/// it if possible.

///

- /// \returns An extra penalty if a token was broken, otherwise 0.

+ /// \returns A pair (penalty, exceeded), where penalty is the extra penalty

+ /// when tokens are broken or lines exceed the column limit, and exceeded

+ /// indicates whether the algorithm purposefully left lines exceeding the

+ /// column limit.

///

- /// The returned penalty will cover the cost of the additional line breaks and

- /// column limit violation in all lines except for the last one. The penalty

- /// for the column limit violation in the last line (and in single line

- /// tokens) is handled in \c addNextStateToQueue.

- unsigned breakProtrudingToken(const FormatToken &Current, LineState &State,

- bool DryRun);

+ /// The returned penalty will cover the cost of the additional line breaks

+ /// and column limit violation in all lines except for the last one. The

+ /// penalty for the column limit violation in the last line (and in single

+ /// line tokens) is handled in \c addNextStateToQueue.

+ ///

+ /// \p Strict indicates whether reflowing is allowed to leave characters

+ /// protruding the column limit; if true, lines will be split strictly within

+ /// the column limit where possible; if false, words are allowed to protrude

+ /// over the column limit as long as the penalty is less than the penalty

+ /// of a break.

+ std::pair<unsigned, bool> breakProtrudingToken(const FormatToken &Current,

+ LineState &State,

+ bool AllowBreak, bool DryRun,

+ bool Strict);

+ /// \brief Returns the \c BreakableToken starting at \p Current, or nullptr

+ /// if the current token cannot be broken.

+ std::unique_ptr<BreakableToken>

+ createBreakableToken(const FormatToken &Current, LineState &State,

+ bool AllowBreak);

/// \brief Appends the next token to \p State and updates information

/// necessary for indentation.

@@ -143,6 +192,7 @@ private:

encoding::Encoding Encoding;

bool BinPackInconclusiveFunctions;

llvm::Regex CommentPragmasRegex;

+ const RawStringFormatStyleManager RawStringFormats;

};

struct ParenState {

@@ -318,6 +368,9 @@ struct LineState {

/// \brief \c true if this line contains a continued for-loop section.

bool LineContainsContinuedForLoopSection;

+ /// \brief \c true if \p NextToken should not continue this line.

+ bool NoContinuation;

/// \brief The \c NestingLevel at the start of this line.

unsigned StartOfLineLevel;

@@ -364,6 +417,8 @@ struct LineState {

if (LineContainsContinuedForLoopSection !=

Other.LineContainsContinuedForLoopSection)

return LineContainsContinuedForLoopSection;

+ if (NoContinuation != Other.NoContinuation)

+ return NoContinuation;

if (StartOfLineLevel != Other.StartOfLineLevel)

return StartOfLineLevel < Other.StartOfLineLevel;

if (LowestLevelOnLine != Other.LowestLevelOnLine)

diff --git a/lib/Format/Format.cpp b/lib/Format/Format.cpp
index 6fe5be2c815d..217c6729ee39 100644
--- a/lib/Format/Format.cpp
+++ b/lib/Format/Format.cpp

@@ -16,6 +16,7 @@

#include "clang/Format/Format.h"

#include "AffectedRangeManager.h"

#include "ContinuationIndenter.h"

+#include "FormatInternal.h"

#include "FormatTokenLexer.h"

#include "NamespaceEndCommentsFixer.h"

#include "SortJavaScriptImports.h"

@@ -45,6 +46,7 @@

using clang::format::FormatStyle;

LLVM_YAML_IS_SEQUENCE_VECTOR(clang::format::FormatStyle::IncludeCategory)

+LLVM_YAML_IS_SEQUENCE_VECTOR(clang::format::FormatStyle::RawStringFormat)

namespace llvm {

namespace yaml {

@@ -125,8 +127,10 @@ template <> struct ScalarEnumerationTraits<FormatStyle::BraceBreakingStyle> {

}

};

-template <> struct ScalarEnumerationTraits<FormatStyle::BreakConstructorInitializersStyle> {

- static void enumeration(IO &IO, FormatStyle::BreakConstructorInitializersStyle &Value) {

+template <>

+struct ScalarEnumerationTraits<FormatStyle::BreakConstructorInitializersStyle> {

+ static void

+ enumeration(IO &IO, FormatStyle::BreakConstructorInitializersStyle &Value) {

IO.enumCase(Value, "BeforeColon", FormatStyle::BCIS_BeforeColon);

IO.enumCase(Value, "BeforeComma", FormatStyle::BCIS_BeforeComma);

IO.enumCase(Value, "AfterColon", FormatStyle::BCIS_AfterColon);

@@ -134,6 +138,14 @@ template <> struct ScalarEnumerationTraits<FormatStyle::BreakConstructorInitiali

};

template <>

+struct ScalarEnumerationTraits<FormatStyle::PPDirectiveIndentStyle> {

+ static void enumeration(IO &IO, FormatStyle::PPDirectiveIndentStyle &Value) {

+ IO.enumCase(Value, "None", FormatStyle::PPDIS_None);

+ IO.enumCase(Value, "AfterHash", FormatStyle::PPDIS_AfterHash);

+ }

+};

+template <>

struct ScalarEnumerationTraits<FormatStyle::ReturnTypeBreakingStyle> {

static void enumeration(IO &IO, FormatStyle::ReturnTypeBreakingStyle &Value) {

IO.enumCase(Value, "None", FormatStyle::RTBS_None);

@@ -181,8 +193,10 @@ template <> struct ScalarEnumerationTraits<FormatStyle::BracketAlignmentStyle> {

}

};

-template <> struct ScalarEnumerationTraits<FormatStyle::EscapedNewlineAlignmentStyle> {

- static void enumeration(IO &IO, FormatStyle::EscapedNewlineAlignmentStyle &Value) {

+template <>

+struct ScalarEnumerationTraits<FormatStyle::EscapedNewlineAlignmentStyle> {

+ static void enumeration(IO &IO,

+ FormatStyle::EscapedNewlineAlignmentStyle &Value) {

IO.enumCase(Value, "DontAlign", FormatStyle::ENAS_DontAlign);

IO.enumCase(Value, "Left", FormatStyle::ENAS_Left);

IO.enumCase(Value, "Right", FormatStyle::ENAS_Right);

@@ -347,9 +361,11 @@ template <> struct MappingTraits<FormatStyle> {

Style.ExperimentalAutoDetectBinPacking);

IO.mapOptional("FixNamespaceComments", Style.FixNamespaceComments);

IO.mapOptional("ForEachMacros", Style.ForEachMacros);

+ IO.mapOptional("IncludeBlocks", Style.IncludeBlocks);

IO.mapOptional("IncludeCategories", Style.IncludeCategories);

IO.mapOptional("IncludeIsMainRegex", Style.IncludeIsMainRegex);

IO.mapOptional("IndentCaseLabels", Style.IndentCaseLabels);

+ IO.mapOptional("IndentPPDirectives", Style.IndentPPDirectives);

IO.mapOptional("IndentWidth", Style.IndentWidth);

IO.mapOptional("IndentWrappedFunctionNames",

Style.IndentWrappedFunctionNames);

@@ -365,8 +381,7 @@ template <> struct MappingTraits<FormatStyle> {

IO.mapOptional("ObjCSpaceAfterProperty", Style.ObjCSpaceAfterProperty);

IO.mapOptional("ObjCSpaceBeforeProtocolList",

Style.ObjCSpaceBeforeProtocolList);

- IO.mapOptional("PenaltyBreakAssignment",

- Style.PenaltyBreakAssignment);

+ IO.mapOptional("PenaltyBreakAssignment", Style.PenaltyBreakAssignment);

IO.mapOptional("PenaltyBreakBeforeFirstCallParameter",

Style.PenaltyBreakBeforeFirstCallParameter);

IO.mapOptional("PenaltyBreakComment", Style.PenaltyBreakComment);

@@ -377,11 +392,13 @@ template <> struct MappingTraits<FormatStyle> {

IO.mapOptional("PenaltyReturnTypeOnItsOwnLine",

Style.PenaltyReturnTypeOnItsOwnLine);

IO.mapOptional("PointerAlignment", Style.PointerAlignment);

+ IO.mapOptional("RawStringFormats", Style.RawStringFormats);

IO.mapOptional("ReflowComments", Style.ReflowComments);

IO.mapOptional("SortIncludes", Style.SortIncludes);

IO.mapOptional("SortUsingDeclarations", Style.SortUsingDeclarations);

IO.mapOptional("SpaceAfterCStyleCast", Style.SpaceAfterCStyleCast);

- IO.mapOptional("SpaceAfterTemplateKeyword", Style.SpaceAfterTemplateKeyword);

+ IO.mapOptional("SpaceAfterTemplateKeyword",

+ Style.SpaceAfterTemplateKeyword);

IO.mapOptional("SpaceBeforeAssignmentOperators",

Style.SpaceBeforeAssignmentOperators);

IO.mapOptional("SpaceBeforeParens", Style.SpaceBeforeParens);

@@ -411,6 +428,7 @@ template <> struct MappingTraits<FormatStyle::BraceWrappingFlags> {

IO.mapOptional("AfterObjCDeclaration", Wrapping.AfterObjCDeclaration);

IO.mapOptional("AfterStruct", Wrapping.AfterStruct);

IO.mapOptional("AfterUnion", Wrapping.AfterUnion);

+ IO.mapOptional("AfterExternBlock", Wrapping.AfterExternBlock);

IO.mapOptional("BeforeCatch", Wrapping.BeforeCatch);

IO.mapOptional("BeforeElse", Wrapping.BeforeElse);

IO.mapOptional("IndentBraces", Wrapping.IndentBraces);

@@ -427,6 +445,22 @@ template <> struct MappingTraits<FormatStyle::IncludeCategory> {

}

};

+template <> struct ScalarEnumerationTraits<FormatStyle::IncludeBlocksStyle> {

+ static void enumeration(IO &IO, FormatStyle::IncludeBlocksStyle &Value) {

+ IO.enumCase(Value, "Preserve", FormatStyle::IBS_Preserve);

+ IO.enumCase(Value, "Merge", FormatStyle::IBS_Merge);

+ IO.enumCase(Value, "Regroup", FormatStyle::IBS_Regroup);

+ }

+};

+template <> struct MappingTraits<FormatStyle::RawStringFormat> {

+ static void mapping(IO &IO, FormatStyle::RawStringFormat &Format) {

+ IO.mapOptional("Delimiter", Format.Delimiter);

+ IO.mapOptional("Language", Format.Language);

+ IO.mapOptional("BasedOnStyle", Format.BasedOnStyle);

+ }

+};

// Allows to read vector<FormatStyle> while keeping default values.

// IO.getContext() should contain a pointer to the FormatStyle structure, that

// will be used to get default values for missing keys.

@@ -441,7 +475,7 @@ template <> struct DocumentListTraits<std::vector<FormatStyle>> {

if (Index >= Seq.size()) {

assert(Index == Seq.size());

FormatStyle Template;

- if (Seq.size() > 0 && Seq[0].Language == FormatStyle::LK_None) {

+ if (!Seq.empty() && Seq[0].Language == FormatStyle::LK_None) {

Template = Seq[0];

} else {

Template = *((const FormatStyle *)IO.getContext());

@@ -491,9 +525,9 @@ static FormatStyle expandPresets(const FormatStyle &Style) {

if (Style.BreakBeforeBraces == FormatStyle::BS_Custom)

return Style;

FormatStyle Expanded = Style;

- Expanded.BraceWrapping = {false, false, false, false, false, false,

- false, false, false, false, false, true,

- true, true};

+ Expanded.BraceWrapping = {false, false, false, false, false,

+ false, false, false, false, false,

+ false, false, true, true, true};

switch (Style.BreakBeforeBraces) {

case FormatStyle::BS_Linux:

Expanded.BraceWrapping.AfterClass = true;

@@ -506,6 +540,7 @@ static FormatStyle expandPresets(const FormatStyle &Style) {

Expanded.BraceWrapping.AfterFunction = true;

Expanded.BraceWrapping.AfterStruct = true;

Expanded.BraceWrapping.AfterUnion = true;

+ Expanded.BraceWrapping.AfterExternBlock = true;

Expanded.BraceWrapping.SplitEmptyFunction = true;

Expanded.BraceWrapping.SplitEmptyRecord = false;

break;

@@ -522,13 +557,13 @@ static FormatStyle expandPresets(const FormatStyle &Style) {

Expanded.BraceWrapping.AfterNamespace = true;

Expanded.BraceWrapping.AfterObjCDeclaration = true;

Expanded.BraceWrapping.AfterStruct = true;

+ Expanded.BraceWrapping.AfterExternBlock = true;

Expanded.BraceWrapping.BeforeCatch = true;

Expanded.BraceWrapping.BeforeElse = true;

break;

case FormatStyle::BS_GNU:

- Expanded.BraceWrapping = {true, true, true, true, true, true,

- true, true, true, true, true, true,

- true, true};

+ Expanded.BraceWrapping = {true, true, true, true, true, true, true, true,

+ true, true, true, true, true, true, true};

break;

case FormatStyle::BS_WebKit:

Expanded.BraceWrapping.AfterFunction = true;

@@ -564,9 +599,9 @@ FormatStyle getLLVMStyle() {

LLVMStyle.BreakBeforeBinaryOperators = FormatStyle::BOS_None;

LLVMStyle.BreakBeforeTernaryOperators = true;

LLVMStyle.BreakBeforeBraces = FormatStyle::BS_Attach;

- LLVMStyle.BraceWrapping = {false, false, false, false, false, false,

- false, false, false, false, false, true,

- true, true};

+ LLVMStyle.BraceWrapping = {false, false, false, false, false,

+ false, false, false, false, false,

+ false, false, true, true, true};

LLVMStyle.BreakAfterJavaFieldAnnotations = false;

LLVMStyle.BreakConstructorInitializers = FormatStyle::BCIS_BeforeColon;

LLVMStyle.BreakBeforeInheritanceComma = false;

@@ -588,7 +623,9 @@ FormatStyle getLLVMStyle() {

{"^(<|\"(gtest|gmock|isl|json)/)", 3},

{".*", 1}};

LLVMStyle.IncludeIsMainRegex = "(Test)?$";

+ LLVMStyle.IncludeBlocks = FormatStyle::IBS_Preserve;

LLVMStyle.IndentCaseLabels = false;

+ LLVMStyle.IndentPPDirectives = FormatStyle::PPDIS_None;

LLVMStyle.IndentWrappedFunctionNames = false;

LLVMStyle.IndentWidth = 2;

LLVMStyle.JavaScriptQuotes = FormatStyle::JSQS_Leave;

@@ -604,6 +641,7 @@ FormatStyle getLLVMStyle() {

LLVMStyle.SpacesBeforeTrailingComments = 1;

LLVMStyle.Standard = FormatStyle::LS_Cpp11;

LLVMStyle.UseTab = FormatStyle::UT_Never;

+ LLVMStyle.RawStringFormats = {{"pb", FormatStyle::LK_TextProto, "google"}};

LLVMStyle.ReflowComments = true;

LLVMStyle.SpacesInParentheses = false;

LLVMStyle.SpacesInSquareBrackets = false;

@@ -649,7 +687,8 @@ FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) {

GoogleStyle.AlwaysBreakTemplateDeclarations = true;

GoogleStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true;

GoogleStyle.DerivePointerAlignment = true;

- GoogleStyle.IncludeCategories = {{"^<.*\\.h>", 1}, {"^<.*", 2}, {".*", 3}};

+ GoogleStyle.IncludeCategories = {

+ {"^<ext/.*\\.h>", 2}, {"^<.*\\.h>", 1}, {"^<.*", 2}, {".*", 3}};

GoogleStyle.IncludeIsMainRegex = "([-_](test|unittest))?$";

GoogleStyle.IndentCaseLabels = true;

GoogleStyle.KeepEmptyLinesAtTheStartOfBlocks = false;

@@ -725,8 +764,7 @@ FormatStyle getMozillaStyle() {

FormatStyle MozillaStyle = getLLVMStyle();

MozillaStyle.AllowAllParametersOfDeclarationOnNextLine = false;

MozillaStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline;

- MozillaStyle.AlwaysBreakAfterReturnType =

- FormatStyle::RTBS_TopLevel;

+ MozillaStyle.AlwaysBreakAfterReturnType = FormatStyle::RTBS_TopLevel;

MozillaStyle.AlwaysBreakAfterDefinitionReturnType =

FormatStyle::DRTBS_TopLevel;

MozillaStyle.AlwaysBreakTemplateDeclarations = true;

@@ -879,7 +917,7 @@ public:

JavaScriptRequoter(const Environment &Env, const FormatStyle &Style)

: TokenAnalyzer(Env, Style) {}

- tooling::Replacements

+ std::pair<tooling::Replacements, unsigned>

analyze(TokenAnnotator &Annotator,

SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,

FormatTokenLexer &Tokens) override {

@@ -887,7 +925,7 @@ public:

AnnotatedLines.end());

tooling::Replacements Result;

requoteJSStringLiteral(AnnotatedLines, Result);

- return Result;

+ return {Result, 0};

}

private:

@@ -968,7 +1006,7 @@ public:

FormattingAttemptStatus *Status)

: TokenAnalyzer(Env, Style), Status(Status) {}

- tooling::Replacements

+ std::pair<tooling::Replacements, unsigned>

analyze(TokenAnnotator &Annotator,

SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,

FormatTokenLexer &Tokens) override {

@@ -987,17 +1025,23 @@ public:

ContinuationIndenter Indenter(Style, Tokens.getKeywords(),

Env.getSourceManager(), Whitespaces, Encoding,

BinPackInconclusiveFunctions);

- UnwrappedLineFormatter(&Indenter, &Whitespaces, Style, Tokens.getKeywords(),

- Env.getSourceManager(), Status)

- .format(AnnotatedLines);

+ unsigned Penalty =

+ UnwrappedLineFormatter(&Indenter, &Whitespaces, Style,

+ Tokens.getKeywords(), Env.getSourceManager(),

+ Status)

+ .format(AnnotatedLines, /*DryRun=*/false,

+ /*AdditionalIndent=*/0,

+ /*FixBadIndentation=*/false,

+ /*FirstStartColumn=*/Env.getFirstStartColumn(),

+ /*NextStartColumn=*/Env.getNextStartColumn(),

+ /*LastStartColumn=*/Env.getLastStartColumn());

for (const auto &R : Whitespaces.generateReplacements())

if (Result.add(R))

- return Result;

+ return std::make_pair(Result, 0);

+ return std::make_pair(Result, Penalty);

}

private:

static bool inputUsesCRLF(StringRef Text) {

return Text.count('\r') * 2 > Text.count('\n');

}

@@ -1082,7 +1126,7 @@ public:

DeletedTokens(FormatTokenLess(Env.getSourceManager())) {}

// FIXME: eliminate unused parameters.

- tooling::Replacements

+ std::pair<tooling::Replacements, unsigned>

analyze(TokenAnnotator &Annotator,

SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,

FormatTokenLexer &Tokens) override {

@@ -1110,7 +1154,7 @@ public:

}

- return generateFixes();

+ return {generateFixes(), 0};

}

private:

@@ -1386,19 +1430,27 @@ static void sortCppIncludes(const FormatStyle &Style,

}),

Indices.end());

+ int CurrentCategory = Includes.front().Category;

// If the #includes are out of order, we generate a single replacement fixing

// the entire block. Otherwise, no replacement is generated.

if (Indices.size() == Includes.size() &&

- std::is_sorted(Indices.begin(), Indices.end()))

+ std::is_sorted(Indices.begin(), Indices.end()) &&

+ Style.IncludeBlocks == FormatStyle::IBS_Preserve)

return;

std::string result;

for (unsigned Index : Indices) {

- if (!result.empty())

+ if (!result.empty()) {

result += "\n";

+ if (Style.IncludeBlocks == FormatStyle::IBS_Regroup &&

+ CurrentCategory != Includes[Index].Category)

+ result += "\n";

+ }

result += Includes[Index].Text;

if (Cursor && CursorIndex == Index)

*Cursor = IncludesBeginOffset + result.size() - CursorToEOLOffset;

+ CurrentCategory = Includes[Index].Category;

}

auto Err = Replaces.add(tooling::Replacement(

@@ -1506,6 +1558,10 @@ tooling::Replacements sortCppIncludes(const FormatStyle &Style, StringRef Code,

else if (Trimmed == "// clang-format on")

FormattingOff = false;

+ const bool EmptyLineSkipped =

+ Trimmed.empty() && (Style.IncludeBlocks == FormatStyle::IBS_Merge ||

+ Style.IncludeBlocks == FormatStyle::IBS_Regroup);

if (!FormattingOff && !Line.endswith("\\")) {

if (IncludeRegex.match(Line, &Matches)) {

StringRef IncludeName = Matches[2];

@@ -1515,7 +1571,7 @@ tooling::Replacements sortCppIncludes(const FormatStyle &Style, StringRef Code,

if (Category == 0)

MainIncludeFound = true;

IncludesInBlock.push_back({IncludeName, Line, Prev, Category});

- } else if (!IncludesInBlock.empty()) {

+ } else if (!IncludesInBlock.empty() && !EmptyLineSkipped) {

sortCppIncludes(Style, IncludesInBlock, Ranges, FileName, Replaces,

Cursor);

IncludesInBlock.clear();

@@ -1539,12 +1595,16 @@ bool isMpegTS(StringRef Code) {

return Code.size() > 188 && Code[0] == 0x47 && Code[188] == 0x47;

}

+bool isLikelyXml(StringRef Code) { return Code.ltrim().startswith("<"); }

tooling::Replacements sortIncludes(const FormatStyle &Style, StringRef Code,

ArrayRef<tooling::Range> Ranges,

StringRef FileName, unsigned *Cursor) {

tooling::Replacements Replaces;

if (!Style.SortIncludes)

return Replaces;

+ if (isLikelyXml(Code))

+ return Replaces;

if (Style.Language == FormatStyle::LanguageKind::LK_JavaScript &&

isMpegTS(Code))

return Replaces;

@@ -1887,17 +1947,22 @@ cleanupAroundReplacements(StringRef Code, const tooling::Replacements &Replaces,

return processReplacements(Cleanup, Code, NewReplaces, Style);

}

-tooling::Replacements reformat(const FormatStyle &Style, StringRef Code,

- ArrayRef<tooling::Range> Ranges,

- StringRef FileName,

- FormattingAttemptStatus *Status) {

+namespace internal {

+std::pair<tooling::Replacements, unsigned>

+reformat(const FormatStyle &Style, StringRef Code,

+ ArrayRef<tooling::Range> Ranges, unsigned FirstStartColumn,

+ unsigned NextStartColumn, unsigned LastStartColumn, StringRef FileName,

+ FormattingAttemptStatus *Status) {

FormatStyle Expanded = expandPresets(Style);

if (Expanded.DisableFormat)

- return tooling::Replacements();

+ return {tooling::Replacements(), 0};

+ if (isLikelyXml(Code))

+ return {tooling::Replacements(), 0};

if (Expanded.Language == FormatStyle::LK_JavaScript && isMpegTS(Code))

- return tooling::Replacements();

+ return {tooling::Replacements(), 0};

- typedef std::function<tooling::Replacements(const Environment &)>

+ typedef std::function<std::pair<tooling::Replacements, unsigned>(

+ const Environment &)>

AnalyzerPass;

SmallVector<AnalyzerPass, 4> Passes;

@@ -1923,26 +1988,42 @@ tooling::Replacements reformat(const FormatStyle &Style, StringRef Code,

return Formatter(Env, Expanded, Status).process();

});

- std::unique_ptr<Environment> Env =

- Environment::CreateVirtualEnvironment(Code, FileName, Ranges);

+ std::unique_ptr<Environment> Env = Environment::CreateVirtualEnvironment(

+ Code, FileName, Ranges, FirstStartColumn, NextStartColumn,

+ LastStartColumn);

llvm::Optional<std::string> CurrentCode = None;

tooling::Replacements Fixes;

+ unsigned Penalty = 0;

for (size_t I = 0, E = Passes.size(); I < E; ++I) {

- tooling::Replacements PassFixes = Passes[I](*Env);

+ std::pair<tooling::Replacements, unsigned> PassFixes = Passes[I](*Env);

auto NewCode = applyAllReplacements(

- CurrentCode ? StringRef(*CurrentCode) : Code, PassFixes);

+ CurrentCode ? StringRef(*CurrentCode) : Code, PassFixes.first);

if (NewCode) {

- Fixes = Fixes.merge(PassFixes);

+ Fixes = Fixes.merge(PassFixes.first);

+ Penalty += PassFixes.second;

if (I + 1 < E) {

CurrentCode = std::move(*NewCode);

Env = Environment::CreateVirtualEnvironment(

*CurrentCode, FileName,

- tooling::calculateRangesAfterReplacements(Fixes, Ranges));

+ tooling::calculateRangesAfterReplacements(Fixes, Ranges),

+ FirstStartColumn, NextStartColumn, LastStartColumn);

}

- return Fixes;

+ return {Fixes, Penalty};

+} // namespace internal

+tooling::Replacements reformat(const FormatStyle &Style, StringRef Code,

+ ArrayRef<tooling::Range> Ranges,

+ StringRef FileName,

+ FormattingAttemptStatus *Status) {

+ return internal::reformat(Style, Code, Ranges,

+ /*FirstStartColumn=*/0,

+ /*NextStartColumn=*/0,

+ /*LastStartColumn=*/0, FileName, Status)

+ .first;

}

tooling::Replacements cleanup(const FormatStyle &Style, StringRef Code,

@@ -1954,7 +2035,7 @@ tooling::Replacements cleanup(const FormatStyle &Style, StringRef Code,

std::unique_ptr<Environment> Env =

Environment::CreateVirtualEnvironment(Code, FileName, Ranges);

Cleaner Clean(*Env, Style);

- return Clean.process();

+ return Clean.process().first;

}

tooling::Replacements reformat(const FormatStyle &Style, StringRef Code,

@@ -1974,7 +2055,7 @@ tooling::Replacements fixNamespaceEndComments(const FormatStyle &Style,

std::unique_ptr<Environment> Env =

Environment::CreateVirtualEnvironment(Code, FileName, Ranges);

NamespaceEndCommentsFixer Fix(*Env, Style);

- return Fix.process();

+ return Fix.process().first;

}

tooling::Replacements sortUsingDeclarations(const FormatStyle &Style,

@@ -1984,7 +2065,7 @@ tooling::Replacements sortUsingDeclarations(const FormatStyle &Style,

std::unique_ptr<Environment> Env =

Environment::CreateVirtualEnvironment(Code, FileName, Ranges);

UsingDeclarationsSorter Sorter(*Env, Style);

- return Sorter.process();

+ return Sorter.process().first;

}

LangOptions getFormattingLangOpts(const FormatStyle &Style) {

@@ -1992,7 +2073,8 @@ LangOptions getFormattingLangOpts(const FormatStyle &Style) {

LangOpts.CPlusPlus = 1;

LangOpts.CPlusPlus11 = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1;

LangOpts.CPlusPlus14 = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1;

- LangOpts.CPlusPlus1z = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1;

+ LangOpts.CPlusPlus17 = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1;

+ LangOpts.CPlusPlus2a = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1;

LangOpts.LineComment = 1;

bool AlternativeOperators = Style.isCpp();

LangOpts.CXXOperatorNames = AlternativeOperators ? 1 : 0;

@@ -2025,6 +2107,11 @@ static FormatStyle::LanguageKind getLanguageByFileName(StringRef FileName) {

if (FileName.endswith_lower(".proto") ||

FileName.endswith_lower(".protodevel"))

return FormatStyle::LK_Proto;

+ if (FileName.endswith_lower(".textpb") ||

+ FileName.endswith_lower(".pb.txt") ||

+ FileName.endswith_lower(".textproto") ||

+ FileName.endswith_lower(".asciipb"))

+ return FormatStyle::LK_TextProto;

if (FileName.endswith_lower(".td"))

return FormatStyle::LK_TableGen;

return FormatStyle::LK_Cpp;

@@ -2043,7 +2130,9 @@ llvm::Expected<FormatStyle> getStyle(StringRef StyleName, StringRef FileName,

// should be improved over time and probably be done on tokens, not one the

// bare content of the file.

if (Style.Language == FormatStyle::LK_Cpp && FileName.endswith(".h") &&

- (Code.contains("\n- (") || Code.contains("\n+ (")))

+ (Code.contains("\n- (") || Code.contains("\n+ (") ||

+ Code.contains("\n@end\n") || Code.contains("\n@end ") ||

+ Code.endswith("@end")))

Style.Language = FormatStyle::LK_ObjC;

FormatStyle FallbackStyle = getNoStyle();

diff --git a/lib/Format/FormatInternal.h b/lib/Format/FormatInternal.h
new file mode 100644
index 000000000000..3984158467b3
--- /dev/null
+++ b/lib/Format/FormatInternal.h

@@ -0,0 +1,83 @@

+//===--- FormatInternal.h - Format C++ code ---------------------*- C++ -*-===//

+//

+// The LLVM Compiler Infrastructure

+//

+// This file is distributed under the University of Illinois Open Source

+// License. See LICENSE.TXT for details.

+//

+//===----------------------------------------------------------------------===//

+///

+/// \file

+/// \brief This file declares Format APIs to be used internally by the

+/// formatting library implementation.

+///

+//===----------------------------------------------------------------------===//

+#ifndef LLVM_CLANG_LIB_FORMAT_FORMATINTERNAL_H

+#define LLVM_CLANG_LIB_FORMAT_FORMATINTERNAL_H

+#include "BreakableToken.h"

+#include "clang/Tooling/Core/Lookup.h"

+#include <utility>

+namespace clang {

+namespace format {

+namespace internal {

+/// \brief Reformats the given \p Ranges in the code fragment \p Code.

+///

+/// A fragment of code could conceptually be surrounded by other code that might

+/// constrain how that fragment is laid out.

+/// For example, consider the fragment of code between 'R"(' and ')"',

+/// exclusive, in the following code:

+///

+/// void outer(int x) {

+/// string inner = R"(name: data

+/// ^ FirstStartColumn

+/// value: {

+/// x: 1

+/// ^ NextStartColumn

+/// }

+/// )";

+/// ^ LastStartColumn

+/// }

+///

+/// The outer code can influence the inner fragment as follows:

+/// * \p FirstStartColumn specifies the column at which \p Code starts.

+/// * \p NextStartColumn specifies the additional indent dictated by the

+/// surrounding code. It is applied to the rest of the lines of \p Code.

+/// * \p LastStartColumn specifies the column at which the last line of

+/// \p Code should end, in case the last line is an empty line.

+///

+/// In the case where the last line of the fragment contains content,

+/// the fragment ends at the end of that content and \p LastStartColumn is

+/// not taken into account, for example in:

+///

+/// void block() {

+/// string inner = R"(name: value)";

+/// }

+///

+/// Each range is extended on either end to its next bigger logic unit, i.e.

+/// everything that might influence its formatting or might be influenced by its

+/// formatting.

+///

+/// Returns a pair P, where:

+/// * P.first are the ``Replacements`` necessary to make all \p Ranges comply

+/// with \p Style.

+/// * P.second is the penalty induced by formatting the fragment \p Code.

+/// If the formatting of the fragment doesn't have a notion of penalty,

+/// returns 0.

+///

+/// If ``Status`` is non-null, its value will be populated with the status of

+/// this formatting attempt. See \c FormattingAttemptStatus.

+std::pair<tooling::Replacements, unsigned>

+reformat(const FormatStyle &Style, StringRef Code,

+ ArrayRef<tooling::Range> Ranges, unsigned FirstStartColumn,

+ unsigned NextStartColumn, unsigned LastStartColumn, StringRef FileName,

+ FormattingAttemptStatus *Status);

+} // namespace internal

+} // namespace format

+} // namespace clang

+#endif

diff --git a/lib/Format/FormatToken.cpp b/lib/Format/FormatToken.cpp
index ba5bf03a6346..10ac392abbf2 100644
--- a/lib/Format/FormatToken.cpp
+++ b/lib/Format/FormatToken.cpp

@@ -25,10 +25,9 @@ namespace format {

const char *getTokenTypeName(TokenType Type) {

static const char *const TokNames[] = {

#define TYPE(X) #X,

-LIST_TOKEN_TYPES

+ LIST_TOKEN_TYPES

#undef TYPE

- nullptr

- };

+ nullptr};

if (Type < NUM_TOKEN_TYPES)

return TokNames[Type];

@@ -52,6 +51,7 @@ bool FormatToken::isSimpleTypeSpecifier() const {

case tok::kw_half:

case tok::kw_float:

case tok::kw_double:

+ case tok::kw__Float16:

case tok::kw___float128:

case tok::kw_wchar_t:

case tok::kw_bool:

diff --git a/lib/Format/FormatToken.h b/lib/Format/FormatToken.h
index a60361a8e5fa..3dc0ab0e7cca 100644
--- a/lib/Format/FormatToken.h
+++ b/lib/Format/FormatToken.h

@@ -26,78 +26,79 @@

namespace clang {

namespace format {

-#define LIST_TOKEN_TYPES \

- TYPE(ArrayInitializerLSquare) \

- TYPE(ArraySubscriptLSquare) \

- TYPE(AttributeParen) \

- TYPE(BinaryOperator) \

- TYPE(BitFieldColon) \

- TYPE(BlockComment) \

- TYPE(CastRParen) \

- TYPE(ConditionalExpr) \

- TYPE(ConflictAlternative) \

- TYPE(ConflictEnd) \

- TYPE(ConflictStart) \

- TYPE(CtorInitializerColon) \

- TYPE(CtorInitializerComma) \

- TYPE(DesignatedInitializerLSquare) \

- TYPE(DesignatedInitializerPeriod) \

- TYPE(DictLiteral) \

- TYPE(ForEachMacro) \

- TYPE(FunctionAnnotationRParen) \

- TYPE(FunctionDeclarationName) \

- TYPE(FunctionLBrace) \

- TYPE(FunctionTypeLParen) \

- TYPE(ImplicitStringLiteral) \

- TYPE(InheritanceColon) \

- TYPE(InheritanceComma) \

- TYPE(InlineASMBrace) \

- TYPE(InlineASMColon) \

- TYPE(JavaAnnotation) \

- TYPE(JsComputedPropertyName) \

- TYPE(JsExponentiation) \

- TYPE(JsExponentiationEqual) \

- TYPE(JsFatArrow) \

- TYPE(JsNonNullAssertion) \

- TYPE(JsTypeColon) \

- TYPE(JsTypeOperator) \

- TYPE(JsTypeOptionalQuestion) \

- TYPE(LambdaArrow) \

- TYPE(LambdaLSquare) \

- TYPE(LeadingJavaAnnotation) \

- TYPE(LineComment) \

- TYPE(MacroBlockBegin) \

- TYPE(MacroBlockEnd) \

- TYPE(ObjCBlockLBrace) \

- TYPE(ObjCBlockLParen) \

- TYPE(ObjCDecl) \

- TYPE(ObjCForIn) \

- TYPE(ObjCMethodExpr) \

- TYPE(ObjCMethodSpecifier) \

- TYPE(ObjCProperty) \

- TYPE(ObjCStringLiteral) \

- TYPE(OverloadedOperator) \

- TYPE(OverloadedOperatorLParen) \

- TYPE(PointerOrReference) \

- TYPE(PureVirtualSpecifier) \

- TYPE(RangeBasedForLoopColon) \

- TYPE(RegexLiteral) \

- TYPE(SelectorName) \

- TYPE(StartOfName) \

- TYPE(TemplateCloser) \

- TYPE(TemplateOpener) \

- TYPE(TemplateString) \

- TYPE(TrailingAnnotation) \

- TYPE(TrailingReturnArrow) \

- TYPE(TrailingUnaryOperator) \

- TYPE(UnaryOperator) \

+#define LIST_TOKEN_TYPES \

+ TYPE(ArrayInitializerLSquare) \

+ TYPE(ArraySubscriptLSquare) \

+ TYPE(AttributeParen) \

+ TYPE(BinaryOperator) \

+ TYPE(BitFieldColon) \

+ TYPE(BlockComment) \

+ TYPE(CastRParen) \

+ TYPE(ConditionalExpr) \

+ TYPE(ConflictAlternative) \

+ TYPE(ConflictEnd) \

+ TYPE(ConflictStart) \

+ TYPE(CtorInitializerColon) \

+ TYPE(CtorInitializerComma) \

+ TYPE(DesignatedInitializerLSquare) \

+ TYPE(DesignatedInitializerPeriod) \

+ TYPE(DictLiteral) \

+ TYPE(ForEachMacro) \

+ TYPE(FunctionAnnotationRParen) \

+ TYPE(FunctionDeclarationName) \

+ TYPE(FunctionLBrace) \

+ TYPE(FunctionTypeLParen) \

+ TYPE(ImplicitStringLiteral) \

+ TYPE(InheritanceColon) \

+ TYPE(InheritanceComma) \

+ TYPE(InlineASMBrace) \

+ TYPE(InlineASMColon) \

+ TYPE(JavaAnnotation) \

+ TYPE(JsComputedPropertyName) \

+ TYPE(JsExponentiation) \

+ TYPE(JsExponentiationEqual) \

+ TYPE(JsFatArrow) \

+ TYPE(JsNonNullAssertion) \

+ TYPE(JsTypeColon) \

+ TYPE(JsTypeOperator) \

+ TYPE(JsTypeOptionalQuestion) \

+ TYPE(LambdaArrow) \

+ TYPE(LambdaLSquare) \

+ TYPE(LeadingJavaAnnotation) \

+ TYPE(LineComment) \

+ TYPE(MacroBlockBegin) \

+ TYPE(MacroBlockEnd) \

+ TYPE(ObjCBlockLBrace) \

+ TYPE(ObjCBlockLParen) \

+ TYPE(ObjCDecl) \

+ TYPE(ObjCForIn) \

+ TYPE(ObjCMethodExpr) \

+ TYPE(ObjCMethodSpecifier) \

+ TYPE(ObjCProperty) \

+ TYPE(ObjCStringLiteral) \

+ TYPE(OverloadedOperator) \

+ TYPE(OverloadedOperatorLParen) \

+ TYPE(PointerOrReference) \

+ TYPE(PureVirtualSpecifier) \

+ TYPE(RangeBasedForLoopColon) \

+ TYPE(RegexLiteral) \

+ TYPE(SelectorName) \

+ TYPE(StartOfName) \

+ TYPE(StructuredBindingLSquare) \

+ TYPE(TemplateCloser) \

+ TYPE(TemplateOpener) \

+ TYPE(TemplateString) \

+ TYPE(TrailingAnnotation) \

+ TYPE(TrailingReturnArrow) \

+ TYPE(TrailingUnaryOperator) \

+ TYPE(UnaryOperator) \

TYPE(Unknown)

enum TokenType {

#define TYPE(X) TT_##X,

-LIST_TOKEN_TYPES

+ LIST_TOKEN_TYPES

#undef TYPE

- NUM_TOKEN_TYPES

+ NUM_TOKEN_TYPES

};

/// \brief Determines the name of a token type.

@@ -340,10 +341,11 @@ struct FormatToken {

bool isSimpleTypeSpecifier() const;

bool isObjCAccessSpecifier() const {

- return is(tok::at) && Next && (Next->isObjCAtKeyword(tok::objc_public) ||

- Next->isObjCAtKeyword(tok::objc_protected) ||

- Next->isObjCAtKeyword(tok::objc_package) ||

- Next->isObjCAtKeyword(tok::objc_private));

+ return is(tok::at) && Next &&

+ (Next->isObjCAtKeyword(tok::objc_public) ||

+ Next->isObjCAtKeyword(tok::objc_protected) ||

+ Next->isObjCAtKeyword(tok::objc_package) ||

+ Next->isObjCAtKeyword(tok::objc_private));

}

/// \brief Returns whether \p Tok is ([{ or a template opening <.

@@ -471,6 +473,19 @@ struct FormatToken {

Style.Language == FormatStyle::LK_TextProto));

}

+ /// \brief Returns whether the token is the left square bracket of a C++

+ /// structured binding declaration.

+ bool isCppStructuredBinding(const FormatStyle &Style) const {

+ if (!Style.isCpp() || isNot(tok::l_square))

+ return false;

+ const FormatToken *T = this;

+ do {

+ T = T->getPreviousNonComment();

+ } while (T && T->isOneOf(tok::kw_const, tok::kw_volatile, tok::amp,

+ tok::ampamp));

+ return T && T->is(tok::kw_auto);

+ }

/// \brief Same as opensBlockOrBlockTypeList, but for the closing token.

bool closesBlockOrBlockTypeList(const FormatStyle &Style) const {

if (is(TT_TemplateString) && closesScope())

@@ -503,15 +518,13 @@ private:

return is(K1) && Next && Next->startsSequenceInternal(Tokens...);

}

- template <typename A>

- bool startsSequenceInternal(A K1) const {

+ template <typename A> bool startsSequenceInternal(A K1) const {

if (is(tok::comment) && Next)

return Next->startsSequenceInternal(K1);

return is(K1);

}

- template <typename A, typename... Ts>

- bool endsSequenceInternal(A K1) const {

+ template <typename A, typename... Ts> bool endsSequenceInternal(A K1) const {

if (is(tok::comment) && Previous)

return Previous->endsSequenceInternal(K1);

return is(K1);

@@ -644,6 +657,7 @@ struct AdditionalKeywords {

kw_readonly = &IdentTable.get("readonly");

kw_set = &IdentTable.get("set");

kw_type = &IdentTable.get("type");

+ kw_typeof = &IdentTable.get("typeof");

kw_var = &IdentTable.get("var");

kw_yield = &IdentTable.get("yield");

@@ -680,7 +694,7 @@ struct AdditionalKeywords {

JsExtraKeywords = std::unordered_set<IdentifierInfo *>(

{kw_as, kw_async, kw_await, kw_declare, kw_finally, kw_from,

kw_function, kw_get, kw_import, kw_is, kw_let, kw_module, kw_readonly,

- kw_set, kw_type, kw_var, kw_yield,

+ kw_set, kw_type, kw_typeof, kw_var, kw_yield,

// Keywords from the Java section.

kw_abstract, kw_extends, kw_implements, kw_instanceof, kw_interface});

}

@@ -714,6 +728,7 @@ struct AdditionalKeywords {

IdentifierInfo *kw_readonly;

IdentifierInfo *kw_set;

IdentifierInfo *kw_type;

+ IdentifierInfo *kw_typeof;

IdentifierInfo *kw_var;

IdentifierInfo *kw_yield;

diff --git a/lib/Format/FormatTokenLexer.cpp b/lib/Format/FormatTokenLexer.cpp
index 45c3ae1afe5f..199d2974c5c7 100644
--- a/lib/Format/FormatTokenLexer.cpp
+++ b/lib/Format/FormatTokenLexer.cpp

@@ -24,10 +24,10 @@ namespace clang {

namespace format {

FormatTokenLexer::FormatTokenLexer(const SourceManager &SourceMgr, FileID ID,

- const FormatStyle &Style,

+ unsigned Column, const FormatStyle &Style,

encoding::Encoding Encoding)

: FormatTok(nullptr), IsFirstToken(true), StateStack({LexerState::NORMAL}),

- Column(0), TrailingWhitespace(0), SourceMgr(SourceMgr), ID(ID),

+ Column(Column), TrailingWhitespace(0), SourceMgr(SourceMgr), ID(ID),

Style(Style), IdentTable(getFormattingLangOpts(Style)),

Keywords(IdentTable), Encoding(Encoding), FirstInLineIndex(0),

FormattingDisabled(false), MacroBlockBeginRegex(Style.MacroBlockBegin),

@@ -50,6 +50,8 @@ ArrayRef<FormatToken *> FormatTokenLexer::lex() {

tryParseJSRegexLiteral();

handleTemplateStrings();

}

+ if (Style.Language == FormatStyle::LK_TextProto)

+ tryParsePythonComment();

tryMergePreviousTokens();

if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline)

FirstInLineIndex = Tokens.size() - 1;

@@ -96,14 +98,8 @@ void FormatTokenLexer::tryMergePreviousTokens() {

}

if (Style.Language == FormatStyle::LK_Java) {

- static const tok::TokenKind JavaRightLogicalShift[] = {tok::greater,

- tok::greater,

- tok::greater};

- static const tok::TokenKind JavaRightLogicalShiftAssign[] = {tok::greater,

- tok::greater,

- tok::greaterequal};

- if (tryMergeTokens(JavaRightLogicalShift, TT_BinaryOperator))

- return;

+ static const tok::TokenKind JavaRightLogicalShiftAssign[] = {

+ tok::greater, tok::greater, tok::greaterequal};

if (tryMergeTokens(JavaRightLogicalShiftAssign, TT_BinaryOperator))

return;

}

@@ -162,9 +158,8 @@ bool FormatTokenLexer::tryMergeTokens(ArrayRef<tok::TokenKind> Kinds,

return false;

unsigned AddLength = 0;

for (unsigned i = 1; i < Kinds.size(); ++i) {

- if (!First[i]->is(Kinds[i]) ||

- First[i]->WhitespaceRange.getBegin() !=

- First[i]->WhitespaceRange.getEnd())

+ if (!First[i]->is(Kinds[i]) || First[i]->WhitespaceRange.getBegin() !=

+ First[i]->WhitespaceRange.getEnd())

return false;

AddLength += First[i]->TokenText.size();

}

@@ -337,6 +332,27 @@ void FormatTokenLexer::handleTemplateStrings() {

resetLexer(SourceMgr.getFileOffset(loc));

}

+void FormatTokenLexer::tryParsePythonComment() {

+ FormatToken *HashToken = Tokens.back();

+ if (HashToken->isNot(tok::hash))

+ return;

+ // Turn the remainder of this line into a comment.

+ const char *CommentBegin =

+ Lex->getBufferLocation() - HashToken->TokenText.size(); // at "#"

+ size_t From = CommentBegin - Lex->getBuffer().begin();

+ size_t To = Lex->getBuffer().find_first_of('\n', From);

+ if (To == StringRef::npos)

+ To = Lex->getBuffer().size();

+ size_t Len = To - From;

+ HashToken->Type = TT_LineComment;

+ HashToken->Tok.setKind(tok::comment);

+ HashToken->TokenText = Lex->getBuffer().substr(From, Len);

+ SourceLocation Loc = To < Lex->getBuffer().size()

+ ? Lex->getSourceLocation(CommentBegin + Len)

+ : SourceMgr.getLocForEndOfFile(ID);

+ resetLexer(SourceMgr.getFileOffset(Loc));

bool FormatTokenLexer::tryMerge_TMacro() {

if (Tokens.size() < 4)

return false;

@@ -529,17 +545,53 @@ FormatToken *FormatTokenLexer::getNextToken() {

readRawToken(*FormatTok);

}

+ // JavaScript and Java do not allow to escape the end of the line with a

+ // backslash. Backslashes are syntax errors in plain source, but can occur in

+ // comments. When a single line comment ends with a \, it'll cause the next

+ // line of code to be lexed as a comment, breaking formatting. The code below

+ // finds comments that contain a backslash followed by a line break, truncates

+ // the comment token at the backslash, and resets the lexer to restart behind

+ // the backslash.

+ if ((Style.Language == FormatStyle::LK_JavaScript ||

+ Style.Language == FormatStyle::LK_Java) &&

+ FormatTok->is(tok::comment) && FormatTok->TokenText.startswith("//")) {

+ size_t BackslashPos = FormatTok->TokenText.find('\\');

+ while (BackslashPos != StringRef::npos) {

+ if (BackslashPos + 1 < FormatTok->TokenText.size() &&

+ FormatTok->TokenText[BackslashPos + 1] == '\n') {

+ const char *Offset = Lex->getBufferLocation();

+ Offset -= FormatTok->TokenText.size();

+ Offset += BackslashPos + 1;

+ resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset)));

+ FormatTok->TokenText = FormatTok->TokenText.substr(0, BackslashPos + 1);

+ FormatTok->ColumnWidth = encoding::columnWidthWithTabs(

+ FormatTok->TokenText, FormatTok->OriginalColumn, Style.TabWidth,

+ Encoding);

+ break;

+ }

+ BackslashPos = FormatTok->TokenText.find('\\', BackslashPos + 1);

+ }

// In case the token starts with escaped newlines, we want to

// take them into account as whitespace - this pattern is quite frequent

// in macro definitions.

// FIXME: Add a more explicit test.

- while (FormatTok->TokenText.size() > 1 && FormatTok->TokenText[0] == '\\' &&

- FormatTok->TokenText[1] == '\n') {

+ while (FormatTok->TokenText.size() > 1 && FormatTok->TokenText[0] == '\\') {

+ unsigned SkippedWhitespace = 0;

+ if (FormatTok->TokenText.size() > 2 &&

+ (FormatTok->TokenText[1] == '\r' && FormatTok->TokenText[2] == '\n'))

+ SkippedWhitespace = 3;

+ else if (FormatTok->TokenText[1] == '\n')

+ SkippedWhitespace = 2;

+ else

+ break;

++FormatTok->NewlinesBefore;

- WhitespaceLength += 2;

- FormatTok->LastNewlineOffset = 2;

+ WhitespaceLength += SkippedWhitespace;

+ FormatTok->LastNewlineOffset = SkippedWhitespace;

Column = 0;

- FormatTok->TokenText = FormatTok->TokenText.substr(2);

+ FormatTok->TokenText = FormatTok->TokenText.substr(SkippedWhitespace);

}

FormatTok->WhitespaceRange = SourceRange(

diff --git a/lib/Format/FormatTokenLexer.h b/lib/Format/FormatTokenLexer.h
index bf10f09cd11e..59dc2a752f1f 100644
--- a/lib/Format/FormatTokenLexer.h
+++ b/lib/Format/FormatTokenLexer.h

@@ -36,7 +36,7 @@ enum LexerState {

class FormatTokenLexer {

public:

- FormatTokenLexer(const SourceManager &SourceMgr, FileID ID,

+ FormatTokenLexer(const SourceManager &SourceMgr, FileID ID, unsigned Column,

const FormatStyle &Style, encoding::Encoding Encoding);

ArrayRef<FormatToken *> lex();

@@ -73,6 +73,8 @@ private:

// nested template parts by balancing curly braces.

void handleTemplateStrings();

+ void tryParsePythonComment();

bool tryMerge_TMacro();

bool tryMergeConflictMarkers();

diff --git a/lib/Format/NamespaceEndCommentsFixer.cpp b/lib/Format/NamespaceEndCommentsFixer.cpp
index 85b70b8c0a76..df99bb2e1381 100644
--- a/lib/Format/NamespaceEndCommentsFixer.cpp
+++ b/lib/Format/NamespaceEndCommentsFixer.cpp

@@ -118,6 +118,12 @@ getNamespaceToken(const AnnotatedLine *line,

return nullptr;

assert(StartLineIndex < AnnotatedLines.size());

const FormatToken *NamespaceTok = AnnotatedLines[StartLineIndex]->First;

+ if (NamespaceTok->is(tok::l_brace)) {

+ // "namespace" keyword can be on the line preceding '{', e.g. in styles

+ // where BraceWrapping.AfterNamespace is true.

+ if (StartLineIndex > 0)

+ NamespaceTok = AnnotatedLines[StartLineIndex - 1]->First;

+ }

// Detect "(inline)? namespace" in the beginning of a line.

if (NamespaceTok->is(tok::kw_inline))

NamespaceTok = NamespaceTok->getNextNonComment();

@@ -131,7 +137,7 @@ NamespaceEndCommentsFixer::NamespaceEndCommentsFixer(const Environment &Env,

const FormatStyle &Style)

: TokenAnalyzer(Env, Style) {}

-tooling::Replacements NamespaceEndCommentsFixer::analyze(

+std::pair<tooling::Replacements, unsigned> NamespaceEndCommentsFixer::analyze(

TokenAnnotator &Annotator, SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,

FormatTokenLexer &Tokens) {

const SourceManager &SourceMgr = Env.getSourceManager();

@@ -200,7 +206,7 @@ tooling::Replacements NamespaceEndCommentsFixer::analyze(

}

StartLineIndex = SIZE_MAX;

}

- return Fixes;

+ return {Fixes, 0};

}

} // namespace format

diff --git a/lib/Format/NamespaceEndCommentsFixer.h b/lib/Format/NamespaceEndCommentsFixer.h
index 7790668a2e82..4779f0d27c92 100644
--- a/lib/Format/NamespaceEndCommentsFixer.h
+++ b/lib/Format/NamespaceEndCommentsFixer.h

@@ -25,7 +25,7 @@ class NamespaceEndCommentsFixer : public TokenAnalyzer {

public:

NamespaceEndCommentsFixer(const Environment &Env, const FormatStyle &Style);

- tooling::Replacements

+ std::pair<tooling::Replacements, unsigned>

analyze(TokenAnnotator &Annotator,

SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,

FormatTokenLexer &Tokens) override;

diff --git a/lib/Format/SortJavaScriptImports.cpp b/lib/Format/SortJavaScriptImports.cpp
index e73695ca8477..d0b979e100d5 100644
--- a/lib/Format/SortJavaScriptImports.cpp
+++ b/lib/Format/SortJavaScriptImports.cpp

@@ -123,7 +123,7 @@ public:

: TokenAnalyzer(Env, Style),

FileContents(Env.getSourceManager().getBufferData(Env.getFileID())) {}

- tooling::Replacements

+ std::pair<tooling::Replacements, unsigned>

analyze(TokenAnnotator &Annotator,

SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,

FormatTokenLexer &Tokens) override {

@@ -138,7 +138,7 @@ public:

parseModuleReferences(Keywords, AnnotatedLines);

if (References.empty())

- return Result;

+ return {Result, 0};

SmallVector<unsigned, 16> Indices;

for (unsigned i = 0, e = References.size(); i != e; ++i)

@@ -168,7 +168,7 @@ public:

}

if (ReferencesInOrder && SymbolsInOrder)

- return Result;

+ return {Result, 0};

SourceRange InsertionPoint = References[0].Range;

InsertionPoint.setEnd(References[References.size() - 1].Range.getEnd());

@@ -202,7 +202,7 @@ public:

assert(false);

}

- return Result;

+ return {Result, 0};

}

private:

@@ -277,7 +277,7 @@ private:

// Parses module references in the given lines. Returns the module references,

// and a pointer to the first "main code" line if that is adjacent to the

// affected lines of module references, nullptr otherwise.

- std::pair<SmallVector<JsModuleReference, 16>, AnnotatedLine*>

+ std::pair<SmallVector<JsModuleReference, 16>, AnnotatedLine *>

parseModuleReferences(const AdditionalKeywords &Keywords,

SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {

SmallVector<JsModuleReference, 16> References;

@@ -413,7 +413,7 @@ private:

nextToken();

if (Current->is(tok::r_brace))

break;

- if (Current->isNot(tok::identifier))

+ if (!Current->isOneOf(tok::identifier, tok::kw_default))

return false;

JsImportedSymbol Symbol;

@@ -425,7 +425,7 @@ private:

if (Current->is(Keywords.kw_as)) {

nextToken();

- if (Current->isNot(tok::identifier))

+ if (!Current->isOneOf(tok::identifier, tok::kw_default))

return false;

Symbol.Alias = Current->TokenText;

nextToken();

@@ -449,7 +449,7 @@ tooling::Replacements sortJavaScriptImports(const FormatStyle &Style,

std::unique_ptr<Environment> Env =

Environment::CreateVirtualEnvironment(Code, FileName, Ranges);

JavaScriptImportSorter Sorter(*Env, Style);

- return Sorter.process();

+ return Sorter.process().first;

}

} // end namespace format

diff --git a/lib/Format/TokenAnalyzer.cpp b/lib/Format/TokenAnalyzer.cpp
index f2e4e8ef0819..d1dfb1fea32b 100644
--- a/lib/Format/TokenAnalyzer.cpp
+++ b/lib/Format/TokenAnalyzer.cpp

@@ -38,7 +38,10 @@ namespace format {

// Code.

std::unique_ptr<Environment>

Environment::CreateVirtualEnvironment(StringRef Code, StringRef FileName,

- ArrayRef<tooling::Range> Ranges) {

+ ArrayRef<tooling::Range> Ranges,

+ unsigned FirstStartColumn,

+ unsigned NextStartColumn,

+ unsigned LastStartColumn) {

// This is referenced by `FileMgr` and will be released by `FileMgr` when it

// is deleted.

IntrusiveRefCntPtr<vfs::InMemoryFileSystem> InMemoryFileSystem(

@@ -57,8 +60,9 @@ Environment::CreateVirtualEnvironment(StringRef Code, StringRef FileName,

std::unique_ptr<SourceManager> VirtualSM(

new SourceManager(*Diagnostics, *FileMgr));

InMemoryFileSystem->addFile(

- FileName, 0, llvm::MemoryBuffer::getMemBuffer(

- Code, FileName, /*RequiresNullTerminator=*/false));

+ FileName, 0,

+ llvm::MemoryBuffer::getMemBuffer(Code, FileName,

+ /*RequiresNullTerminator=*/false));

FileID ID = VirtualSM->createFileID(FileMgr->getFile(FileName),

SourceLocation(), clang::SrcMgr::C_User);

assert(ID.isValid());

@@ -69,9 +73,9 @@ Environment::CreateVirtualEnvironment(StringRef Code, StringRef FileName,

SourceLocation End = Start.getLocWithOffset(Range.getLength());

CharRanges.push_back(CharSourceRange::getCharRange(Start, End));

}

- return llvm::make_unique<Environment>(ID, std::move(FileMgr),

- std::move(VirtualSM),

- std::move(Diagnostics), CharRanges);

+ return llvm::make_unique<Environment>(

+ ID, std::move(FileMgr), std::move(VirtualSM), std::move(Diagnostics),

+ CharRanges, FirstStartColumn, NextStartColumn, LastStartColumn);

}

TokenAnalyzer::TokenAnalyzer(const Environment &Env, const FormatStyle &Style)

@@ -88,14 +92,16 @@ TokenAnalyzer::TokenAnalyzer(const Environment &Env, const FormatStyle &Style)

<< "\n");

}

-tooling::Replacements TokenAnalyzer::process() {

+std::pair<tooling::Replacements, unsigned> TokenAnalyzer::process() {

tooling::Replacements Result;

- FormatTokenLexer Tokens(Env.getSourceManager(), Env.getFileID(), Style,

- Encoding);

+ FormatTokenLexer Tokens(Env.getSourceManager(), Env.getFileID(),

+ Env.getFirstStartColumn(), Style, Encoding);

- UnwrappedLineParser Parser(Style, Tokens.getKeywords(), Tokens.lex(), *this);

+ UnwrappedLineParser Parser(Style, Tokens.getKeywords(),

+ Env.getFirstStartColumn(), Tokens.lex(), *this);

Parser.parse();

assert(UnwrappedLines.rbegin()->empty());

+ unsigned Penalty = 0;

for (unsigned Run = 0, RunE = UnwrappedLines.size(); Run + 1 != RunE; ++Run) {

DEBUG(llvm::dbgs() << "Run " << Run << "...\n");

SmallVector<AnnotatedLine *, 16> AnnotatedLines;

@@ -106,13 +112,13 @@ tooling::Replacements TokenAnalyzer::process() {

Annotator.annotate(*AnnotatedLines.back());

}

- tooling::Replacements RunResult =

+ std::pair<tooling::Replacements, unsigned> RunResult =

analyze(Annotator, AnnotatedLines, Tokens);

DEBUG({

llvm::dbgs() << "Replacements for run " << Run << ":\n";

- for (tooling::Replacements::const_iterator I = RunResult.begin(),

- E = RunResult.end();

+ for (tooling::Replacements::const_iterator I = RunResult.first.begin(),

+ E = RunResult.first.end();

I != E; ++I) {

llvm::dbgs() << I->toString() << "\n";

}

@@ -120,17 +126,19 @@ tooling::Replacements TokenAnalyzer::process() {

for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {

delete AnnotatedLines[i];

}

- for (const auto &R : RunResult) {

+ Penalty += RunResult.second;

+ for (const auto &R : RunResult.first) {

auto Err = Result.add(R);

// FIXME: better error handling here. For now, simply return an empty

// Replacements to indicate failure.

if (Err) {

llvm::errs() << llvm::toString(std::move(Err)) << "\n";

- return tooling::Replacements();

+ return {tooling::Replacements(), 0};

}

- return Result;

+ return {Result, Penalty};

}

void TokenAnalyzer::consumeUnwrappedLine(const UnwrappedLine &TheLine) {

diff --git a/lib/Format/TokenAnalyzer.h b/lib/Format/TokenAnalyzer.h
index 78a3d1bc8d9e..96ea00b25ba1 100644
--- a/lib/Format/TokenAnalyzer.h
+++ b/lib/Format/TokenAnalyzer.h

@@ -37,21 +37,37 @@ namespace format {

class Environment {

public:

Environment(SourceManager &SM, FileID ID, ArrayRef<CharSourceRange> Ranges)

- : ID(ID), CharRanges(Ranges.begin(), Ranges.end()), SM(SM) {}

+ : ID(ID), CharRanges(Ranges.begin(), Ranges.end()), SM(SM),

+ FirstStartColumn(0),

+ NextStartColumn(0),

+ LastStartColumn(0) {}

Environment(FileID ID, std::unique_ptr<FileManager> FileMgr,

std::unique_ptr<SourceManager> VirtualSM,

std::unique_ptr<DiagnosticsEngine> Diagnostics,

- const std::vector<CharSourceRange> &CharRanges)

+ const std::vector<CharSourceRange> &CharRanges,

+ unsigned FirstStartColumn,

+ unsigned NextStartColumn,

+ unsigned LastStartColumn)

: ID(ID), CharRanges(CharRanges.begin(), CharRanges.end()),

- SM(*VirtualSM), FileMgr(std::move(FileMgr)),

+ SM(*VirtualSM),

+ FirstStartColumn(FirstStartColumn),

+ NextStartColumn(NextStartColumn),

+ LastStartColumn(LastStartColumn),

+ FileMgr(std::move(FileMgr)),

VirtualSM(std::move(VirtualSM)), Diagnostics(std::move(Diagnostics)) {}

- // This sets up an virtual file system with file \p FileName containing \p

- // Code.

+ // This sets up an virtual file system with file \p FileName containing the

+ // fragment \p Code. Assumes that \p Code starts at \p FirstStartColumn,

+ // that the next lines of \p Code should start at \p NextStartColumn, and

+ // that \p Code should end at \p LastStartColumn if it ends in newline.

+ // See also the documentation of clang::format::internal::reformat.

static std::unique_ptr<Environment>

CreateVirtualEnvironment(StringRef Code, StringRef FileName,

- ArrayRef<tooling::Range> Ranges);

+ ArrayRef<tooling::Range> Ranges,

+ unsigned FirstStartColumn = 0,

+ unsigned NextStartColumn = 0,

+ unsigned LastStartColumn = 0);

FileID getFileID() const { return ID; }

@@ -59,10 +75,25 @@ public:

const SourceManager &getSourceManager() const { return SM; }

+ // Returns the column at which the fragment of code managed by this

+ // environment starts.

+ unsigned getFirstStartColumn() const { return FirstStartColumn; }

+ // Returns the column at which subsequent lines of the fragment of code

+ // managed by this environment should start.

+ unsigned getNextStartColumn() const { return NextStartColumn; }

+ // Returns the column at which the fragment of code managed by this

+ // environment should end if it ends in a newline.

+ unsigned getLastStartColumn() const { return LastStartColumn; }

private:

FileID ID;

SmallVector<CharSourceRange, 8> CharRanges;

SourceManager &SM;

+ unsigned FirstStartColumn;

+ unsigned NextStartColumn;

+ unsigned LastStartColumn;

// The order of these fields are important - they should be in the same order

// as they are created in `CreateVirtualEnvironment` so that they can be

@@ -76,10 +107,10 @@ class TokenAnalyzer : public UnwrappedLineConsumer {

public:

TokenAnalyzer(const Environment &Env, const FormatStyle &Style);

- tooling::Replacements process();

+ std::pair<tooling::Replacements, unsigned> process();

protected:

- virtual tooling::Replacements

+ virtual std::pair<tooling::Replacements, unsigned>

analyze(TokenAnnotator &Annotator,

SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,

FormatTokenLexer &Tokens) = 0;

diff --git a/lib/Format/TokenAnnotator.cpp b/lib/Format/TokenAnnotator.cpp
index 46ea06b880ed..298c72b002f8 100644
--- a/lib/Format/TokenAnnotator.cpp
+++ b/lib/Format/TokenAnnotator.cpp

@@ -47,7 +47,7 @@ private:

if (NonTemplateLess.count(CurrentToken->Previous))

return false;

- const FormatToken& Previous = *CurrentToken->Previous;

+ const FormatToken &Previous = *CurrentToken->Previous; // The '<'.

if (Previous.Previous) {

if (Previous.Previous->Tok.isLiteral())

return false;

@@ -152,11 +152,11 @@ private:

// export type X = (...);

Contexts.back().IsExpression = false;

} else if (Left->Previous &&

- (Left->Previous->isOneOf(tok::kw_static_assert, tok::kw_decltype,

- tok::kw_if, tok::kw_while, tok::l_paren,

- tok::comma) ||

- Left->Previous->endsSequence(tok::kw_constexpr, tok::kw_if) ||

- Left->Previous->is(TT_BinaryOperator))) {

+ (Left->Previous->isOneOf(tok::kw_static_assert, tok::kw_decltype,

+ tok::kw_if, tok::kw_while, tok::l_paren,

+ tok::comma) ||

+ Left->Previous->endsSequence(tok::kw_constexpr, tok::kw_if) ||

+ Left->Previous->is(TT_BinaryOperator))) {

// static_assert, if and while usually contain expressions.

Contexts.back().IsExpression = true;

} else if (Style.Language == FormatStyle::LK_JavaScript && Left->Previous &&

@@ -325,8 +325,7 @@ private:

// In C++, this can happen either in array of templates (foo<int>[10])

// or when array is a nested template type (unique_ptr<type1<type2>[]>).

bool CppArrayTemplates =

- Style.isCpp() && Parent &&

- Parent->is(TT_TemplateCloser) &&

+ Style.isCpp() && Parent && Parent->is(TT_TemplateCloser) &&

(Contexts.back().CanBeExpression || Contexts.back().IsExpression ||

Contexts.back().InTemplateArgument);

@@ -343,7 +342,9 @@ private:

bool ColonFound = false;

unsigned BindingIncrease = 1;

- if (Left->is(TT_Unknown)) {

+ if (Left->isCppStructuredBinding(Style)) {

+ Left->Type = TT_StructuredBindingLSquare;

+ } else if (Left->is(TT_Unknown)) {

if (StartsObjCMethodExpr) {

Left->Type = TT_ObjCMethodExpr;

} else if (Style.Language == FormatStyle::LK_JavaScript && Parent &&

@@ -372,6 +373,10 @@ private:

ScopedContextCreator ContextCreator(*this, tok::l_square, BindingIncrease);

Contexts.back().IsExpression = true;

+ if (Style.Language == FormatStyle::LK_JavaScript && Parent &&

+ Parent->is(TT_JsTypeColon))

+ Contexts.back().IsExpression = false;

Contexts.back().ColonIsObjCMethodExpr = StartsObjCMethodExpr;

while (CurrentToken) {

@@ -439,6 +444,9 @@ private:

Contexts.back().ColonIsDictLiteral = true;

if (Left->BlockKind == BK_BracedInit)

Contexts.back().IsExpression = true;

+ if (Style.Language == FormatStyle::LK_JavaScript && Left->Previous &&

+ Left->Previous->is(TT_JsTypeColon))

+ Contexts.back().IsExpression = false;

while (CurrentToken) {

if (CurrentToken->is(tok::r_brace)) {

@@ -452,6 +460,8 @@ private:

updateParameterCount(Left, CurrentToken);

if (CurrentToken->isOneOf(tok::colon, tok::l_brace, tok::less)) {

FormatToken *Previous = CurrentToken->getPreviousNonComment();

+ if (Previous->is(TT_JsTypeOptionalQuestion))

+ Previous = Previous->getPreviousNonComment();

if (((CurrentToken->is(tok::colon) &&

(!Contexts.back().ColonIsDictLiteral || !Style.isCpp())) ||

Style.Language == FormatStyle::LK_Proto ||

@@ -531,8 +541,11 @@ private:

!Line.First->isOneOf(tok::kw_enum, tok::kw_case)) ||

Contexts.back().ContextKind == tok::l_paren || // function params

Contexts.back().ContextKind == tok::l_square || // array type

+ (!Contexts.back().IsExpression &&

+ Contexts.back().ContextKind == tok::l_brace) || // object type

(Contexts.size() == 1 &&

Line.MustBeDeclaration)) { // method/property declaration

+ Contexts.back().IsExpression = false;

Tok->Type = TT_JsTypeColon;

break;

}

@@ -593,7 +606,8 @@ private:

break;

case tok::kw_if:

case tok::kw_while:

- if (Tok->is(tok::kw_if) && CurrentToken && CurrentToken->is(tok::kw_constexpr))

+ if (Tok->is(tok::kw_if) && CurrentToken &&

+ CurrentToken->is(tok::kw_constexpr))

next();

if (CurrentToken && CurrentToken->is(tok::l_paren)) {

next();

@@ -603,7 +617,9 @@ private:

break;

case tok::kw_for:

if (Style.Language == FormatStyle::LK_JavaScript) {

- if (Tok->Previous && Tok->Previous->is(tok::period))

+ // x.for and {for: ...}

+ if ((Tok->Previous && Tok->Previous->is(tok::period)) ||

+ (Tok->Next && Tok->Next->is(tok::colon)))

break;

// JS' for await ( ...

if (CurrentToken && CurrentToken->is(Keywords.kw_await))

@@ -619,8 +635,7 @@ private:

// marks the first l_paren as a OverloadedOperatorLParen. Here, we make

// the first two parens OverloadedOperators and the second l_paren an

// OverloadedOperatorLParen.

- if (Tok->Previous &&

- Tok->Previous->is(tok::r_paren) &&

+ if (Tok->Previous && Tok->Previous->is(tok::r_paren) &&

Tok->Previous->MatchingParen &&

Tok->Previous->MatchingParen->is(TT_OverloadedOperatorLParen)) {

Tok->Previous->Type = TT_OverloadedOperator;

@@ -643,7 +658,7 @@ private:

break;

case tok::l_brace:

if (Style.Language == FormatStyle::LK_TextProto) {

- FormatToken *Previous =Tok->getPreviousNonComment();

+ FormatToken *Previous = Tok->getPreviousNonComment();

if (Previous && Previous->Type != TT_DictLiteral)

Previous->Type = TT_SelectorName;

}

@@ -683,7 +698,8 @@ private:

CurrentToken->Type = TT_PointerOrReference;

consumeToken();

if (CurrentToken &&

- CurrentToken->Previous->isOneOf(TT_BinaryOperator, tok::comma))

+ CurrentToken->Previous->isOneOf(TT_BinaryOperator, TT_UnaryOperator,

+ tok::comma))

CurrentToken->Previous->Type = TT_OverloadedOperator;

}

if (CurrentToken) {

@@ -740,8 +756,8 @@ private:

void parseIncludeDirective() {

if (CurrentToken && CurrentToken->is(tok::less)) {

- next();

- while (CurrentToken) {

+ next();

+ while (CurrentToken) {

// Mark tokens up to the trailing line comments as implicit string

// literals.

if (CurrentToken->isNot(tok::comment) &&

@@ -781,9 +797,9 @@ private:

void parseHasInclude() {

if (!CurrentToken || !CurrentToken->is(tok::l_paren))

return;

- next(); // '('

+ next(); // '('

parseIncludeDirective();

- next(); // ')'

+ next(); // ')'

}

LineType parsePreprocessorDirective() {

@@ -842,7 +858,7 @@ private:

if (Tok->is(tok::l_paren))

parseParens();

else if (Tok->isOneOf(Keywords.kw___has_include,

- Keywords.kw___has_include_next))

+ Keywords.kw___has_include_next))

parseHasInclude();

}

return Type;

@@ -855,7 +871,7 @@ public:

return parsePreprocessorDirective();

// Directly allow to 'import <string-literal>' to support protocol buffer

- // definitions (code.google.com/p/protobuf) or missing "#" (either way we

+ // definitions (github.com/google/protobuf) or missing "#" (either way we

// should not break the line).

IdentifierInfo *Info = CurrentToken->Tok.getIdentifierInfo();

if ((Style.Language == FormatStyle::LK_Java &&

@@ -933,11 +949,11 @@ private:

// FIXME: Closure-library specific stuff should not be hard-coded but be

// configurable.

return Tok.TokenText == "goog" && Tok.Next && Tok.Next->is(tok::period) &&

- Tok.Next->Next && (Tok.Next->Next->TokenText == "module" ||

- Tok.Next->Next->TokenText == "provide" ||

- Tok.Next->Next->TokenText == "require" ||

- Tok.Next->Next->TokenText == "setTestOnly" ||

- Tok.Next->Next->TokenText == "forwardDeclare") &&

+ Tok.Next->Next &&

+ (Tok.Next->Next->TokenText == "module" ||

+ Tok.Next->Next->TokenText == "provide" ||

+ Tok.Next->Next->TokenText == "require" ||

+ Tok.Next->Next->TokenText == "forwardDeclare") &&

Tok.Next->Next->Next && Tok.Next->Next->Next->is(tok::l_paren);

}

@@ -1054,8 +1070,7 @@ private:

Current.Previous->is(TT_CtorInitializerColon)) {

Contexts.back().IsExpression = true;

Contexts.back().InCtorInitializer = true;

- } else if (Current.Previous &&

- Current.Previous->is(TT_InheritanceColon)) {

+ } else if (Current.Previous && Current.Previous->is(TT_InheritanceColon)) {

Contexts.back().InInheritanceList = true;

} else if (Current.isOneOf(tok::r_paren, tok::greater, tok::comma)) {

for (FormatToken *Previous = Current.Previous;

@@ -1104,6 +1119,11 @@ private:

(!Line.MightBeFunctionDecl || Current.NestingLevel != 0)) {

Contexts.back().FirstStartOfName = &Current;

Current.Type = TT_StartOfName;

+ } else if (Current.is(tok::semi)) {

+ // Reset FirstStartOfName after finding a semicolon so that a for loop

+ // with multiple increment statements is not confused with a for loop

+ // having multiple variable declarations.

+ Contexts.back().FirstStartOfName = nullptr;

} else if (Current.isOneOf(tok::kw_auto, tok::kw___auto_type)) {

AutoFound = true;

} else if (Current.is(tok::arrow) &&

@@ -1113,10 +1133,10 @@ private:

Current.NestingLevel == 0) {

Current.Type = TT_TrailingReturnArrow;

} else if (Current.isOneOf(tok::star, tok::amp, tok::ampamp)) {

- Current.Type =

- determineStarAmpUsage(Current, Contexts.back().CanBeExpression &&

- Contexts.back().IsExpression,

- Contexts.back().InTemplateArgument);

+ Current.Type = determineStarAmpUsage(Current,

+ Contexts.back().CanBeExpression &&

+ Contexts.back().IsExpression,

+ Contexts.back().InTemplateArgument);

} else if (Current.isOneOf(tok::minus, tok::plus, tok::caret)) {

Current.Type = determinePlusMinusCaretUsage(Current);

if (Current.is(TT_UnaryOperator) && Current.is(tok::caret))

@@ -1396,11 +1416,13 @@ private:

if (NextToken->isOneOf(tok::comma, tok::semi))

return TT_PointerOrReference;

- if (PrevToken->is(tok::r_paren) && PrevToken->MatchingParen &&

- PrevToken->MatchingParen->Previous &&

- PrevToken->MatchingParen->Previous->isOneOf(tok::kw_typeof,

- tok::kw_decltype))

- return TT_PointerOrReference;

+ if (PrevToken->is(tok::r_paren) && PrevToken->MatchingParen) {

+ FormatToken *TokenBeforeMatchingParen =

+ PrevToken->MatchingParen->getPreviousNonComment();

+ if (TokenBeforeMatchingParen &&

+ TokenBeforeMatchingParen->isOneOf(tok::kw_typeof, tok::kw_decltype))

+ return TT_PointerOrReference;

+ }

if (PrevToken->Tok.isLiteral() ||

PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::kw_true,

@@ -1589,7 +1611,7 @@ private:

if (Current->is(TT_ConditionalExpr))

return prec::Conditional;

if (NextNonComment && Current->is(TT_SelectorName) &&

- (NextNonComment->is(TT_DictLiteral) ||

+ (NextNonComment->isOneOf(TT_DictLiteral, TT_JsTypeColon) ||

((Style.Language == FormatStyle::LK_Proto ||

Style.Language == FormatStyle::LK_TextProto) &&

NextNonComment->is(tok::less))))

@@ -1643,17 +1665,15 @@ private:

/// \brief Parse unary operator expressions and surround them with fake

/// parentheses if appropriate.

void parseUnaryOperator() {

- if (!Current || Current->isNot(TT_UnaryOperator)) {

- parse(PrecedenceArrowAndPeriod);

- return;

+ llvm::SmallVector<FormatToken *, 2> Tokens;

+ while (Current && Current->is(TT_UnaryOperator)) {

+ Tokens.push_back(Current);

+ next();

}

- FormatToken *Start = Current;

- next();

- parseUnaryOperator();

- // The actual precedence doesn't matter.

- addFakeParenthesis(Start, prec::Unknown);

+ parse(PrecedenceArrowAndPeriod);

+ for (FormatToken *Token : llvm::reverse(Tokens))

+ // The actual precedence doesn't matter.

+ addFakeParenthesis(Token, prec::Unknown);

}

void parseConditionalExpr() {

@@ -1722,7 +1742,7 @@ void TokenAnnotator::setCommentLineLevels(

static unsigned maxNestingDepth(const AnnotatedLine &Line) {

unsigned Result = 0;

- for (const auto* Tok = Line.First; Tok != nullptr; Tok = Tok->Next)

+ for (const auto *Tok = Line.First; Tok != nullptr; Tok = Tok->Next)

Result = std::max(Result, Tok->NestingLevel);

return Result;

}

@@ -1764,7 +1784,7 @@ void TokenAnnotator::annotate(AnnotatedLine &Line) {

// function declaration.

static bool isFunctionDeclarationName(const FormatToken &Current,

const AnnotatedLine &Line) {

- auto skipOperatorName = [](const FormatToken* Next) -> const FormatToken* {

+ auto skipOperatorName = [](const FormatToken *Next) -> const FormatToken * {

for (; Next; Next = Next->Next) {

if (Next->is(TT_OverloadedOperatorLParen))

return Next;

@@ -1772,8 +1792,8 @@ static bool isFunctionDeclarationName(const FormatToken &Current,

continue;

if (Next->isOneOf(tok::kw_new, tok::kw_delete)) {

// For 'new[]' and 'delete[]'.

- if (Next->Next && Next->Next->is(tok::l_square) &&

- Next->Next->Next && Next->Next->Next->is(tok::r_square))

+ if (Next->Next && Next->Next->is(tok::l_square) && Next->Next->Next &&

+ Next->Next->Next->is(tok::r_square))

Next = Next->Next->Next;

continue;

}

@@ -1872,7 +1892,8 @@ void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) {

}

Line.First->TotalLength =

- Line.First->IsMultiline ? Style.ColumnLimit : Line.First->ColumnWidth;

+ Line.First->IsMultiline ? Style.ColumnLimit

+ : Line.FirstStartColumn + Line.First->ColumnWidth;

FormatToken *Current = Line.First->Next;

bool InFunctionDecl = Line.MightBeFunctionDecl;

while (Current) {

@@ -2005,6 +2026,9 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,

if ((Left.is(TT_TemplateString) && Left.TokenText.endswith("${")) ||

(Right.is(TT_TemplateString) && Right.TokenText.startswith("}")))

return 100;

+ // Prefer breaking call chains (".foo") over empty "{}", "[]" or "()".

+ if (Left.opensScope() && Right.closesScope())

+ return 200;

}

if (Right.is(tok::identifier) && Right.Next && Right.Next->is(TT_DictLiteral))

@@ -2049,7 +2073,8 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,

if (Left.is(tok::comment))

return 1000;

- if (Left.isOneOf(TT_RangeBasedForLoopColon, TT_InheritanceColon, TT_CtorInitializerColon))

+ if (Left.isOneOf(TT_RangeBasedForLoopColon, TT_InheritanceColon,

+ TT_CtorInitializerColon))

return 2;

if (Right.isMemberAccess()) {

@@ -2107,8 +2132,8 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,

Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign)

return 100;

if (Left.is(tok::l_paren) && Left.Previous &&

- (Left.Previous->isOneOf(tok::kw_if, tok::kw_for)

- || Left.Previous->endsSequence(tok::kw_constexpr, tok::kw_if)))

+ (Left.Previous->isOneOf(tok::kw_if, tok::kw_for) ||

+ Left.Previous->endsSequence(tok::kw_constexpr, tok::kw_if)))

return 1000;

if (Left.is(tok::equal) && InFunctionDecl)

return 110;

@@ -2128,7 +2153,7 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,

if (Left.isOneOf(tok::plus, tok::comma) && Left.Previous &&

Left.Previous->isLabelString() &&

(Left.NextOperator || Left.OperatorIndex != 0))

- return 45;

+ return 50;

if (Right.is(tok::plus) && Left.isLabelString() &&

(Right.NextOperator || Right.OperatorIndex != 0))

return 25;

@@ -2162,6 +2187,8 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,

const FormatToken &Right) {

if (Left.is(tok::kw_return) && Right.isNot(tok::semi))

return true;

+ if (Left.is(Keywords.kw_assert) && Style.Language == FormatStyle::LK_Java)

+ return true;

if (Style.ObjCSpaceAfterProperty && Line.Type == LT_ObjCProperty &&

Left.Tok.getObjCKeywordID() == tok::objc_property)

return true;

@@ -2178,8 +2205,8 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,

: Style.SpacesInParentheses;

if (Right.isOneOf(tok::semi, tok::comma))

return false;

- if (Right.is(tok::less) &&

- Line.Type == LT_ObjCDecl && Style.ObjCSpaceBeforeProtocolList)

+ if (Right.is(tok::less) && Line.Type == LT_ObjCDecl &&

+ Style.ObjCSpaceBeforeProtocolList)

return true;

if (Right.is(tok::less) && Left.is(tok::kw_template))

return Style.SpaceAfterTemplateKeyword;

@@ -2201,15 +2228,23 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,

Left.Previous->is(tok::kw_case));

if (Left.is(tok::l_square) && Right.is(tok::amp))

return false;

- if (Right.is(TT_PointerOrReference))

- return (Left.is(tok::r_paren) && Line.MightBeFunctionDecl) ||

- (Left.Tok.isLiteral() || (Left.is(tok::kw_const) && Left.Previous &&

- Left.Previous->is(tok::r_paren)) ||

+ if (Right.is(TT_PointerOrReference)) {

+ if (Left.is(tok::r_paren) && Line.MightBeFunctionDecl) {

+ if (!Left.MatchingParen)

+ return true;

+ FormatToken *TokenBeforeMatchingParen =

+ Left.MatchingParen->getPreviousNonComment();

+ if (!TokenBeforeMatchingParen ||

+ !TokenBeforeMatchingParen->isOneOf(tok::kw_typeof, tok::kw_decltype))

+ return true;

+ }

+ return (Left.Tok.isLiteral() ||

(!Left.isOneOf(TT_PointerOrReference, tok::l_paren) &&

(Style.PointerAlignment != FormatStyle::PAS_Left ||

(Line.IsMultiVariableDeclStmt &&

(Left.NestingLevel == 0 ||

(Left.NestingLevel == 1 && Line.First->is(tok::kw_for)))))));

+ }

if (Right.is(TT_FunctionTypeLParen) && Left.isNot(tok::l_paren) &&

(!Left.is(TT_PointerOrReference) ||

(Style.PointerAlignment != FormatStyle::PAS_Right &&

@@ -2231,17 +2266,20 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,

if (Left.is(tok::l_square))

return (Left.is(TT_ArrayInitializerLSquare) &&

Style.SpacesInContainerLiterals && Right.isNot(tok::r_square)) ||

- (Left.is(TT_ArraySubscriptLSquare) && Style.SpacesInSquareBrackets &&

- Right.isNot(tok::r_square));

+ (Left.isOneOf(TT_ArraySubscriptLSquare,

+ TT_StructuredBindingLSquare) &&

+ Style.SpacesInSquareBrackets && Right.isNot(tok::r_square));

if (Right.is(tok::r_square))

return Right.MatchingParen &&

((Style.SpacesInContainerLiterals &&

Right.MatchingParen->is(TT_ArrayInitializerLSquare)) ||

(Style.SpacesInSquareBrackets &&

- Right.MatchingParen->is(TT_ArraySubscriptLSquare)));

+ Right.MatchingParen->isOneOf(TT_ArraySubscriptLSquare,

+ TT_StructuredBindingLSquare)));

if (Right.is(tok::l_square) &&

!Right.isOneOf(TT_ObjCMethodExpr, TT_LambdaLSquare,

- TT_DesignatedInitializerLSquare) &&

+ TT_DesignatedInitializerLSquare,

+ TT_StructuredBindingLSquare) &&

!Left.isOneOf(tok::numeric_constant, TT_DictLiteral))

return false;

if (Left.is(tok::l_brace) && Right.is(tok::r_brace))

@@ -2287,7 +2325,7 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,

if (Left.is(TT_TemplateCloser) && Left.MatchingParen &&

Left.MatchingParen->Previous &&

Left.MatchingParen->Previous->is(tok::period))

- // A.<B>DoSomething();

+ // A.<B<C<...>>>DoSomething();

return false;

if (Left.is(TT_TemplateCloser) && Right.is(tok::l_square))

return false;

@@ -2317,8 +2355,8 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,

if (Left.is(TT_JsFatArrow))

return true;

// for await ( ...

- if (Right.is(tok::l_paren) && Left.is(Keywords.kw_await) &&

- Left.Previous && Left.Previous->is(tok::kw_for))

+ if (Right.is(tok::l_paren) && Left.is(Keywords.kw_await) && Left.Previous &&

+ Left.Previous->is(tok::kw_for))

return true;

if (Left.is(Keywords.kw_async) && Right.is(tok::l_paren) &&

Right.MatchingParen) {

@@ -2341,18 +2379,31 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,

Left.isOneOf(Keywords.kw_function, Keywords.kw_yield))

return false;

if (Right.isOneOf(tok::l_brace, tok::l_square) &&

- Left.isOneOf(Keywords.kw_function, Keywords.kw_yield))

+ Left.isOneOf(Keywords.kw_function, Keywords.kw_yield,

+ Keywords.kw_extends, Keywords.kw_implements))

return true;

- // JS methods can use some keywords as names (e.g. `delete()`).

- if (Right.is(tok::l_paren) && Line.MustBeDeclaration &&

- Left.Tok.getIdentifierInfo())

- return false;

+ if (Right.is(tok::l_paren)) {

+ // JS methods can use some keywords as names (e.g. `delete()`).

+ if (Line.MustBeDeclaration && Left.Tok.getIdentifierInfo())

+ return false;

+ // Valid JS method names can include keywords, e.g. `foo.delete()` or

+ // `bar.instanceof()`. Recognize call positions by preceding period.

+ if (Left.Previous && Left.Previous->is(tok::period) &&

+ Left.Tok.getIdentifierInfo())

+ return false;

+ // Additional unary JavaScript operators that need a space after.

+ if (Left.isOneOf(tok::kw_throw, Keywords.kw_await, Keywords.kw_typeof,

+ tok::kw_void))

+ return true;

+ }

if ((Left.isOneOf(Keywords.kw_let, Keywords.kw_var, Keywords.kw_in,

tok::kw_const) ||

// "of" is only a keyword if it appears after another identifier

- // (e.g. as "const x of y" in a for loop).

+ // (e.g. as "const x of y" in a for loop), or after a destructuring

+ // operation (const [x, y] of z, const {a, b} of c).

(Left.is(Keywords.kw_of) && Left.Previous &&

- Left.Previous->Tok.getIdentifierInfo())) &&

+ (Left.Previous->Tok.getIdentifierInfo() ||

+ Left.Previous->isOneOf(tok::r_square, tok::r_brace)))) &&

(!Left.Previous || !Left.Previous->is(tok::period)))

return true;

if (Left.isOneOf(tok::kw_for, Keywords.kw_as) && Left.Previous &&

@@ -2384,8 +2435,9 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,

return false;

if (Right.is(TT_JsNonNullAssertion))

return false;

- if (Left.is(TT_JsNonNullAssertion) && Right.is(Keywords.kw_as))

- return true; // "x! as string"

+ if (Left.is(TT_JsNonNullAssertion) &&

+ Right.isOneOf(Keywords.kw_as, Keywords.kw_in))

+ return true; // "x! as string", "x! in y"

} else if (Style.Language == FormatStyle::LK_Java) {

if (Left.is(tok::r_square) && Right.is(tok::l_brace))

return true;

@@ -2464,9 +2516,18 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,

return (Left.is(TT_TemplateOpener) &&

Style.Standard == FormatStyle::LS_Cpp03) ||

!(Left.isOneOf(tok::l_paren, tok::r_paren, tok::l_square,

- tok::kw___super, TT_TemplateCloser, TT_TemplateOpener));

+ tok::kw___super, TT_TemplateCloser,

+ TT_TemplateOpener));

if ((Left.is(TT_TemplateOpener)) != (Right.is(TT_TemplateCloser)))

return Style.SpacesInAngles;

+ // Space before TT_StructuredBindingLSquare.

+ if (Right.is(TT_StructuredBindingLSquare))

+ return !Left.isOneOf(tok::amp, tok::ampamp) ||

+ Style.PointerAlignment != FormatStyle::PAS_Right;

+ // Space before & or && following a TT_StructuredBindingLSquare.

+ if (Right.Next && Right.Next->is(TT_StructuredBindingLSquare) &&

+ Right.isOneOf(tok::amp, tok::ampamp))

+ return Style.PointerAlignment != FormatStyle::PAS_Left;

if ((Right.is(TT_BinaryOperator) && !Left.is(tok::l_paren)) ||

(Left.isOneOf(TT_BinaryOperator, TT_ConditionalExpr) &&

!Right.is(tok::r_paren)))

@@ -2516,7 +2577,9 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,

return true;

if (Left.is(tok::l_brace) && Line.Level == 0 &&

(Line.startsWith(tok::kw_enum) ||

- Line.startsWith(tok::kw_export, tok::kw_enum)))

+ Line.startsWith(tok::kw_const, tok::kw_enum) ||

+ Line.startsWith(tok::kw_export, tok::kw_enum) ||

+ Line.startsWith(tok::kw_export, tok::kw_const, tok::kw_enum)))

// JavaScript top-level enum key/value pairs are put on separate lines

// instead of bin-packing.

return true;

@@ -2587,19 +2650,16 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,

!Style.ConstructorInitializerAllOnOneLineOrOnePerLine)

return true;

// Break only if we have multiple inheritance.

- if (Style.BreakBeforeInheritanceComma &&

- Right.is(TT_InheritanceComma))

- return true;

+ if (Style.BreakBeforeInheritanceComma && Right.is(TT_InheritanceComma))

+ return true;

if (Right.is(tok::string_literal) && Right.TokenText.startswith("R\""))

// Raw string literals are special wrt. line breaks. The author has made a

// deliberate choice and might have aligned the contents of the string

// literal accordingly. Thus, we try keep existing line breaks.

return Right.NewlinesBefore > 0;

if ((Right.Previous->is(tok::l_brace) ||

- (Right.Previous->is(tok::less) &&

- Right.Previous->Previous &&

- Right.Previous->Previous->is(tok::equal))

- ) &&

+ (Right.Previous->is(tok::less) && Right.Previous->Previous &&

+ Right.Previous->Previous->is(tok::equal))) &&

Right.NestingLevel == 1 && Style.Language == FormatStyle::LK_Proto) {

// Don't put enums or option definitions onto single lines in protocol

// buffers.

@@ -2609,6 +2669,8 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,

return Right.HasUnescapedNewline;

if (isAllmanBrace(Left) || isAllmanBrace(Right))

return (Line.startsWith(tok::kw_enum) && Style.BraceWrapping.AfterEnum) ||

+ (Line.startsWith(tok::kw_typedef, tok::kw_enum) &&

+ Style.BraceWrapping.AfterEnum) ||

(Line.startsWith(tok::kw_class) && Style.BraceWrapping.AfterClass) ||

(Line.startsWith(tok::kw_struct) && Style.BraceWrapping.AfterStruct);

if (Left.is(TT_ObjCBlockLBrace) && !Style.AllowShortBlocksOnASingleLine)

@@ -2639,13 +2701,16 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,

} else if (Style.Language == FormatStyle::LK_JavaScript) {

const FormatToken *NonComment = Right.getPreviousNonComment();

if (NonComment &&

- NonComment->isOneOf(tok::kw_return, tok::kw_continue, tok::kw_break,

- tok::kw_throw, Keywords.kw_interface,

- Keywords.kw_type, tok::kw_static, tok::kw_public,

- tok::kw_private, tok::kw_protected,

- Keywords.kw_readonly, Keywords.kw_abstract,

- Keywords.kw_get, Keywords.kw_set))

+ NonComment->isOneOf(

+ tok::kw_return, Keywords.kw_yield, tok::kw_continue, tok::kw_break,

+ tok::kw_throw, Keywords.kw_interface, Keywords.kw_type,

+ tok::kw_static, tok::kw_public, tok::kw_private, tok::kw_protected,

+ Keywords.kw_readonly, Keywords.kw_abstract, Keywords.kw_get,

+ Keywords.kw_set, Keywords.kw_async, Keywords.kw_await))

return false; // Otherwise automatic semicolon insertion would trigger.

+ if (Left.Tok.getIdentifierInfo() &&

+ Right.startsSequence(tok::l_square, tok::r_square))

+ return false; // breaking in "foo[]" creates illegal TS type syntax.

if (Left.is(TT_JsFatArrow) && Right.is(tok::l_brace))

return false;

if (Left.is(TT_JsTypeColon))

@@ -2702,8 +2767,7 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,

// list.

return Left.BlockKind == BK_BracedInit ||

(Left.is(TT_CtorInitializerColon) &&

- Style.BreakConstructorInitializers ==

- FormatStyle::BCIS_AfterColon);

+ Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon);

if (Left.is(tok::question) && Right.is(tok::colon))

return false;

if (Right.is(TT_ConditionalExpr) || Right.is(tok::question))

@@ -2820,7 +2884,7 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,

}

void TokenAnnotator::printDebugInfo(const AnnotatedLine &Line) {

- llvm::errs() << "AnnotatedTokens:\n";

+ llvm::errs() << "AnnotatedTokens(L=" << Line.Level << "):\n";

const FormatToken *Tok = Line.First;

while (Tok) {

llvm::errs() << " M=" << Tok->MustBreakBefore

@@ -2828,10 +2892,9 @@ void TokenAnnotator::printDebugInfo(const AnnotatedLine &Line) {

<< " T=" << getTokenTypeName(Tok->Type)

<< " S=" << Tok->SpacesRequiredBefore

<< " B=" << Tok->BlockParameterCount

- << " BK=" << Tok->BlockKind

- << " P=" << Tok->SplitPenalty << " Name=" << Tok->Tok.getName()

- << " L=" << Tok->TotalLength << " PPK=" << Tok->PackingKind

- << " FakeLParens=";

+ << " BK=" << Tok->BlockKind << " P=" << Tok->SplitPenalty

+ << " Name=" << Tok->Tok.getName() << " L=" << Tok->TotalLength

+ << " PPK=" << Tok->PackingKind << " FakeLParens=";

for (unsigned i = 0, e = Tok->FakeLParens.size(); i != e; ++i)

llvm::errs() << Tok->FakeLParens[i] << "/";

llvm::errs() << " FakeRParens=" << Tok->FakeRParens;

diff --git a/lib/Format/TokenAnnotator.h b/lib/Format/TokenAnnotator.h
index 805509533bf9..04a18d45b82e 100644
--- a/lib/Format/TokenAnnotator.h
+++ b/lib/Format/TokenAnnotator.h

@@ -43,7 +43,8 @@ public:

InPPDirective(Line.InPPDirective),

MustBeDeclaration(Line.MustBeDeclaration), MightBeFunctionDecl(false),

IsMultiVariableDeclStmt(false), Affected(false),

- LeadingEmptyLinesAffected(false), ChildrenAffected(false) {

+ LeadingEmptyLinesAffected(false), ChildrenAffected(false),

+ FirstStartColumn(Line.FirstStartColumn) {

assert(!Line.Tokens.empty());

// Calculate Next and Previous for all tokens. Note that we must overwrite

@@ -127,6 +128,8 @@ public:

/// \c True if one of this line's children intersects with an input range.

bool ChildrenAffected;

+ unsigned FirstStartColumn;

private:

// Disallow copying.

AnnotatedLine(const AnnotatedLine &) = delete;

diff --git a/lib/Format/UnwrappedLineFormatter.cpp b/lib/Format/UnwrappedLineFormatter.cpp
index 2005a2822924..60dc1a7169d1 100644
--- a/lib/Format/UnwrappedLineFormatter.cpp
+++ b/lib/Format/UnwrappedLineFormatter.cpp

@@ -164,8 +164,7 @@ public:

return nullptr;

const AnnotatedLine *Current = *Next;

IndentTracker.nextLine(*Current);

- unsigned MergedLines =

- tryFitMultipleLinesInOne(IndentTracker, Next, End);

+ unsigned MergedLines = tryFitMultipleLinesInOne(IndentTracker, Next, End);

if (MergedLines > 0 && Style.ColumnLimit == 0)

// Disallow line merging if there is a break at the start of one of the

// input lines.

@@ -228,14 +227,16 @@ private:

if (Tok && Tok->getNamespaceToken())

return !Style.BraceWrapping.SplitEmptyNamespace && EmptyBlock

- ? tryMergeSimpleBlock(I, E, Limit) : 0;

+ ? tryMergeSimpleBlock(I, E, Limit)

+ : 0;

if (Tok && Tok->is(tok::kw_typedef))

Tok = Tok->getNextNonComment();

if (Tok && Tok->isOneOf(tok::kw_class, tok::kw_struct, tok::kw_union,

- Keywords.kw_interface))

+ tok::kw_extern, Keywords.kw_interface))

return !Style.BraceWrapping.SplitEmptyRecord && EmptyBlock

- ? tryMergeSimpleBlock(I, E, Limit) : 0;

+ ? tryMergeSimpleBlock(I, E, Limit)

+ : 0;

}

// FIXME: TheLine->Level != 0 might or might not be the right check to do.

@@ -279,15 +280,43 @@ private:

}

+ // Try to merge a function block with left brace unwrapped

if (TheLine->Last->is(TT_FunctionLBrace) &&

TheLine->First != TheLine->Last) {

return MergeShortFunctions ? tryMergeSimpleBlock(I, E, Limit) : 0;

}

+ // Try to merge a control statement block with left brace unwrapped

+ if (TheLine->Last->is(tok::l_brace) && TheLine->First != TheLine->Last &&

+ TheLine->First->isOneOf(tok::kw_if, tok::kw_while, tok::kw_for)) {

+ return Style.AllowShortBlocksOnASingleLine

+ ? tryMergeSimpleBlock(I, E, Limit)

+ : 0;

+ }

+ // Try to merge a control statement block with left brace wrapped

+ if (I[1]->First->is(tok::l_brace) &&

+ TheLine->First->isOneOf(tok::kw_if, tok::kw_while, tok::kw_for)) {

+ return Style.BraceWrapping.AfterControlStatement

+ ? tryMergeSimpleBlock(I, E, Limit)

+ : 0;

+ }

+ // Try to merge either empty or one-line block if is precedeed by control

+ // statement token

+ if (TheLine->First->is(tok::l_brace) && TheLine->First == TheLine->Last &&

+ I != AnnotatedLines.begin() &&

+ I[-1]->First->isOneOf(tok::kw_if, tok::kw_while, tok::kw_for)) {

+ return Style.AllowShortBlocksOnASingleLine

+ ? tryMergeSimpleBlock(I - 1, E, Limit)

+ : 0;

+ }

+ // Try to merge a block with left brace wrapped that wasn't yet covered

if (TheLine->Last->is(tok::l_brace)) {

- return !Style.BraceWrapping.AfterFunction

+ return !Style.BraceWrapping.AfterFunction ||

+ (I[1]->First->is(tok::r_brace) &&

+ !Style.BraceWrapping.SplitEmptyRecord)

? tryMergeSimpleBlock(I, E, Limit)

: 0;

}

+ // Try to merge a function block with left brace wrapped

if (I[1]->First->is(TT_FunctionLBrace) &&

Style.BraceWrapping.AfterFunction) {

if (I[1]->Last->is(TT_LineComment))

@@ -382,7 +411,9 @@ private:

return 0;

unsigned NumStmts = 0;

unsigned Length = 0;

+ bool EndsWithComment = false;

bool InPPDirective = I[0]->InPPDirective;

+ const unsigned Level = I[0]->Level;

for (; NumStmts < 3; ++NumStmts) {

if (I + 1 + NumStmts == E)

break;

@@ -392,9 +423,26 @@ private:

if (Line->First->isOneOf(tok::kw_case, tok::kw_default, tok::r_brace))

break;

if (Line->First->isOneOf(tok::kw_if, tok::kw_for, tok::kw_switch,

- tok::kw_while, tok::comment) ||

- Line->Last->is(tok::comment))

+ tok::kw_while) ||

+ EndsWithComment)

return 0;

+ if (Line->First->is(tok::comment)) {

+ if (Level != Line->Level)

+ return 0;

+ SmallVectorImpl<AnnotatedLine *>::const_iterator J = I + 2 + NumStmts;

+ for (; J != E; ++J) {

+ Line = *J;

+ if (Line->InPPDirective != InPPDirective)

+ break;

+ if (Line->First->isOneOf(tok::kw_case, tok::kw_default, tok::r_brace))

+ break;

+ if (Line->First->isNot(tok::comment) || Level != Line->Level)

+ return 0;

+ }

+ break;

+ }

+ if (Line->Last->is(tok::comment))

+ EndsWithComment = true;

Length += I[1 + NumStmts]->Last->TotalLength + 1; // 1 for the space.

}

if (NumStmts == 0 || NumStmts == 3 || Length > Limit)

@@ -425,11 +473,27 @@ private:

tok::kw_for, tok::r_brace, Keywords.kw___except)) {

if (!Style.AllowShortBlocksOnASingleLine)

return 0;

+ // Don't merge when we can't except the case when

+ // the control statement block is empty

if (!Style.AllowShortIfStatementsOnASingleLine &&

- Line.startsWith(tok::kw_if))

+ Line.startsWith(tok::kw_if) &&

+ !Style.BraceWrapping.AfterControlStatement &&

+ !I[1]->First->is(tok::r_brace))

+ return 0;

+ if (!Style.AllowShortIfStatementsOnASingleLine &&

+ Line.startsWith(tok::kw_if) &&

+ Style.BraceWrapping.AfterControlStatement && I + 2 != E &&

+ !I[2]->First->is(tok::r_brace))

+ return 0;

+ if (!Style.AllowShortLoopsOnASingleLine &&

+ Line.First->isOneOf(tok::kw_while, tok::kw_do, tok::kw_for) &&

+ !Style.BraceWrapping.AfterControlStatement &&

+ !I[1]->First->is(tok::r_brace))

return 0;

if (!Style.AllowShortLoopsOnASingleLine &&

- Line.First->isOneOf(tok::kw_while, tok::kw_do, tok::kw_for))

+ Line.First->isOneOf(tok::kw_while, tok::kw_do, tok::kw_for) &&

+ Style.BraceWrapping.AfterControlStatement && I + 2 != E &&

+ !I[2]->First->is(tok::r_brace))

return 0;

// FIXME: Consider an option to allow short exception handling clauses on

// a single line.

@@ -441,52 +505,78 @@ private:

return 0;

}

- FormatToken *Tok = I[1]->First;

- if (Tok->is(tok::r_brace) && !Tok->MustBreakBefore &&

- (Tok->getNextNonComment() == nullptr ||

- Tok->getNextNonComment()->is(tok::semi))) {

- // We merge empty blocks even if the line exceeds the column limit.

- Tok->SpacesRequiredBefore = 0;

- Tok->CanBreakBefore = true;

- return 1;

- } else if (Limit != 0 && !Line.startsWith(tok::kw_namespace) &&

- !startsExternCBlock(Line)) {

- // We don't merge short records.

- FormatToken *RecordTok =

- Line.First->is(tok::kw_typedef) ? Line.First->Next : Line.First;

- if (RecordTok &&

- RecordTok->isOneOf(tok::kw_class, tok::kw_union, tok::kw_struct,

- Keywords.kw_interface))

- return 0;

+ if (Line.Last->is(tok::l_brace)) {

+ FormatToken *Tok = I[1]->First;

+ if (Tok->is(tok::r_brace) && !Tok->MustBreakBefore &&

+ (Tok->getNextNonComment() == nullptr ||

+ Tok->getNextNonComment()->is(tok::semi))) {

+ // We merge empty blocks even if the line exceeds the column limit.

+ Tok->SpacesRequiredBefore = 0;

+ Tok->CanBreakBefore = true;

+ return 1;

+ } else if (Limit != 0 && !Line.startsWith(tok::kw_namespace) &&

+ !startsExternCBlock(Line)) {

+ // We don't merge short records.

+ FormatToken *RecordTok = Line.First;

+ // Skip record modifiers.

+ while (RecordTok->Next &&

+ RecordTok->isOneOf(tok::kw_typedef, tok::kw_export,

+ Keywords.kw_declare, Keywords.kw_abstract,

+ tok::kw_default))

+ RecordTok = RecordTok->Next;

+ if (RecordTok &&

+ RecordTok->isOneOf(tok::kw_class, tok::kw_union, tok::kw_struct,

+ Keywords.kw_interface))

+ return 0;

- // Check that we still have three lines and they fit into the limit.

- if (I + 2 == E || I[2]->Type == LT_Invalid)

- return 0;

- Limit = limitConsideringMacros(I + 2, E, Limit);

+ // Check that we still have three lines and they fit into the limit.

+ if (I + 2 == E || I[2]->Type == LT_Invalid)

+ return 0;

+ Limit = limitConsideringMacros(I + 2, E, Limit);

- if (!nextTwoLinesFitInto(I, Limit))

- return 0;

+ if (!nextTwoLinesFitInto(I, Limit))

+ return 0;

- // Second, check that the next line does not contain any braces - if it

- // does, readability declines when putting it into a single line.

- if (I[1]->Last->is(TT_LineComment))

- return 0;

- do {

- if (Tok->is(tok::l_brace) && Tok->BlockKind != BK_BracedInit)

+ // Second, check that the next line does not contain any braces - if it

+ // does, readability declines when putting it into a single line.

+ if (I[1]->Last->is(TT_LineComment))

+ return 0;

+ do {

+ if (Tok->is(tok::l_brace) && Tok->BlockKind != BK_BracedInit)

+ return 0;

+ Tok = Tok->Next;

+ } while (Tok);

+ // Last, check that the third line starts with a closing brace.

+ Tok = I[2]->First;

+ if (Tok->isNot(tok::r_brace))

return 0;

- Tok = Tok->Next;

- } while (Tok);

- // Last, check that the third line starts with a closing brace.

- Tok = I[2]->First;

- if (Tok->isNot(tok::r_brace))

- return 0;

+ // Don't merge "if (a) { .. } else {".

+ if (Tok->Next && Tok->Next->is(tok::kw_else))

+ return 0;

- // Don't merge "if (a) { .. } else {".

- if (Tok->Next && Tok->Next->is(tok::kw_else))

+ return 2;

+ }

+ } else if (I[1]->First->is(tok::l_brace)) {

+ if (I[1]->Last->is(TT_LineComment))

return 0;

- return 2;

+ // Check for Limit <= 2 to account for the " {".

+ if (Limit <= 2 || (Style.ColumnLimit == 0 && containsMustBreak(*I)))

+ return 0;

+ Limit -= 2;

+ unsigned MergedLines = 0;

+ if (Style.AllowShortBlocksOnASingleLine ||

+ (I[1]->First == I[1]->Last && I + 2 != E &&

+ I[2]->First->is(tok::r_brace))) {

+ MergedLines = tryMergeSimpleBlock(I + 1, E, Limit);

+ // If we managed to merge the block, count the statement header, which

+ // is on a separate line.

+ if (MergedLines > 0)

+ ++MergedLines;

+ }

+ return MergedLines;

}

return 0;

}

@@ -574,7 +664,9 @@ public:

/// \brief Formats an \c AnnotatedLine and returns the penalty.

///

/// If \p DryRun is \c false, directly applies the changes.

- virtual unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent,

+ virtual unsigned formatLine(const AnnotatedLine &Line,

+ unsigned FirstIndent,

+ unsigned FirstStartColumn,

bool DryRun) = 0;

protected:

@@ -645,7 +737,8 @@ protected:

*Child->First, /*Newlines=*/0, /*Spaces=*/1,

/*StartOfTokenColumn=*/State.Column, State.Line->InPPDirective);

}

- Penalty += formatLine(*Child, State.Column + 1, DryRun);

+ Penalty +=

+ formatLine(*Child, State.Column + 1, /*FirstStartColumn=*/0, DryRun);

State.Column += 1 + Child->Last->TotalLength;

return true;

@@ -671,10 +764,10 @@ public:

/// \brief Formats the line, simply keeping all of the input's line breaking

/// decisions.

unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent,

- bool DryRun) override {

+ unsigned FirstStartColumn, bool DryRun) override {

assert(!DryRun);

- LineState State =

- Indenter->getInitialState(FirstIndent, &Line, /*DryRun=*/false);

+ LineState State = Indenter->getInitialState(FirstIndent, FirstStartColumn,

+ &Line, /*DryRun=*/false);

while (State.NextToken) {

bool Newline =

Indenter->mustBreak(State) ||

@@ -697,9 +790,10 @@ public:

/// \brief Puts all tokens into a single line.

unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent,

- bool DryRun) override {

+ unsigned FirstStartColumn, bool DryRun) override {

unsigned Penalty = 0;

- LineState State = Indenter->getInitialState(FirstIndent, &Line, DryRun);

+ LineState State =

+ Indenter->getInitialState(FirstIndent, FirstStartColumn, &Line, DryRun);

while (State.NextToken) {

formatChildren(State, /*Newline=*/false, DryRun, Penalty);

Indenter->addTokenToState(

@@ -721,8 +815,9 @@ public:

/// \brief Formats the line by finding the best line breaks with line lengths

/// below the column limit.

unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent,

- bool DryRun) override {

- LineState State = Indenter->getInitialState(FirstIndent, &Line, DryRun);

+ unsigned FirstStartColumn, bool DryRun) override {

+ LineState State =

+ Indenter->getInitialState(FirstIndent, FirstStartColumn, &Line, DryRun);

// If the ObjC method declaration does not fit on a line, we should format

// it with one arg per line.

@@ -763,7 +858,8 @@ private:

/// \brief The BFS queue type.

typedef std::priority_queue<QueueItem, std::vector<QueueItem>,

- std::greater<QueueItem>> QueueType;

+ std::greater<QueueItem>>

+ QueueType;

/// \brief Analyze the entire solution space starting from \p InitialState.

///

@@ -888,7 +984,10 @@ private:

unsigned

UnwrappedLineFormatter::format(const SmallVectorImpl<AnnotatedLine *> &Lines,

bool DryRun, int AdditionalIndent,

- bool FixBadIndentation) {

+ bool FixBadIndentation,

+ unsigned FirstStartColumn,

+ unsigned NextStartColumn,

+ unsigned LastStartColumn) {

LineJoiner Joiner(Style, Keywords, Lines);

// Try to look up already computed penalty in DryRun-mode.

@@ -908,9 +1007,10 @@ UnwrappedLineFormatter::format(const SmallVectorImpl<AnnotatedLine *> &Lines,

// The minimum level of consecutive lines that have been formatted.

unsigned RangeMinLevel = UINT_MAX;

+ bool FirstLine = true;

for (const AnnotatedLine *Line =

Joiner.getNextMergedLine(DryRun, IndentTracker);

- Line; Line = NextLine) {

+ Line; Line = NextLine, FirstLine = false) {

const AnnotatedLine &TheLine = *Line;

unsigned Indent = IndentTracker.getIndent();

@@ -934,8 +1034,12 @@ UnwrappedLineFormatter::format(const SmallVectorImpl<AnnotatedLine *> &Lines,

}

if (ShouldFormat && TheLine.Type != LT_Invalid) {

- if (!DryRun)

- formatFirstToken(TheLine, PreviousLine, Indent);

+ if (!DryRun) {

+ bool LastLine = Line->First->is(tok::eof);

+ formatFirstToken(TheLine, PreviousLine,

+ Indent,

+ LastLine ? LastStartColumn : NextStartColumn + Indent);

+ }

NextLine = Joiner.getNextMergedLine(DryRun, IndentTracker);

unsigned ColumnLimit = getColumnLimit(TheLine.InPPDirective, NextLine);

@@ -944,16 +1048,18 @@ UnwrappedLineFormatter::format(const SmallVectorImpl<AnnotatedLine *> &Lines,

(TheLine.Type == LT_ImportStatement &&

(Style.Language != FormatStyle::LK_JavaScript ||

!Style.JavaScriptWrapImports));

if (Style.ColumnLimit == 0)

NoColumnLimitLineFormatter(Indenter, Whitespaces, Style, this)

- .formatLine(TheLine, Indent, DryRun);

+ .formatLine(TheLine, NextStartColumn + Indent,

+ FirstLine ? FirstStartColumn : 0, DryRun);

else if (FitsIntoOneLine)

Penalty += NoLineBreakFormatter(Indenter, Whitespaces, Style, this)

- .formatLine(TheLine, Indent, DryRun);

+ .formatLine(TheLine, NextStartColumn + Indent,

+ FirstLine ? FirstStartColumn : 0, DryRun);

else

Penalty += OptimizingLineFormatter(Indenter, Whitespaces, Style, this)

- .formatLine(TheLine, Indent, DryRun);

+ .formatLine(TheLine, NextStartColumn + Indent,

+ FirstLine ? FirstStartColumn : 0, DryRun);

RangeMinLevel = std::min(RangeMinLevel, TheLine.Level);

} else {

// If no token in the current line is affected, we still need to format

@@ -976,6 +1082,7 @@ UnwrappedLineFormatter::format(const SmallVectorImpl<AnnotatedLine *> &Lines,

// Format the first token.

if (ReformatLeadingWhitespace)

formatFirstToken(TheLine, PreviousLine,

+ TheLine.First->OriginalColumn,

TheLine.First->OriginalColumn);

else

Whitespaces->addUntouchableToken(*TheLine.First,

@@ -998,12 +1105,14 @@ UnwrappedLineFormatter::format(const SmallVectorImpl<AnnotatedLine *> &Lines,

void UnwrappedLineFormatter::formatFirstToken(const AnnotatedLine &Line,

const AnnotatedLine *PreviousLine,

- unsigned Indent) {

- FormatToken& RootToken = *Line.First;

+ unsigned Indent,

+ unsigned NewlineIndent) {

+ FormatToken &RootToken = *Line.First;

if (RootToken.is(tok::eof)) {

unsigned Newlines = std::min(RootToken.NewlinesBefore, 1u);

- Whitespaces->replaceWhitespace(RootToken, Newlines, /*Spaces=*/0,

- /*StartOfTokenColumn=*/0);

+ unsigned TokenIndent = Newlines ? NewlineIndent : 0;

+ Whitespaces->replaceWhitespace(RootToken, Newlines, TokenIndent,

+ TokenIndent);

return;

}

unsigned Newlines =

@@ -1013,6 +1122,9 @@ void UnwrappedLineFormatter::formatFirstToken(const AnnotatedLine &Line,

(!RootToken.Next ||

(RootToken.Next->is(tok::semi) && !RootToken.Next->Next)))

Newlines = std::min(Newlines, 1u);

+ // Remove empty lines at the start of nested blocks (lambdas/arrow functions)

+ if (PreviousLine == nullptr && Line.Level > 0)

+ Newlines = std::min(Newlines, 1u);

if (Newlines == 0 && !RootToken.IsFirst)

Newlines = 1;

if (RootToken.IsFirst && !RootToken.HasUnescapedNewline)

@@ -1035,6 +1147,13 @@ void UnwrappedLineFormatter::formatFirstToken(const AnnotatedLine &Line,

(!PreviousLine->InPPDirective || !RootToken.HasUnescapedNewline))

Newlines = std::min(1u, Newlines);

+ if (Newlines)

+ Indent = NewlineIndent;

+ // Preprocessor directives get indented after the hash, if indented.

+ if (Line.Type == LT_PreprocessorDirective || Line.Type == LT_ImportStatement)

+ Indent = 0;

Whitespaces->replaceWhitespace(RootToken, Newlines, Indent, Indent,

Line.InPPDirective &&

!RootToken.HasUnescapedNewline);

diff --git a/lib/Format/UnwrappedLineFormatter.h b/lib/Format/UnwrappedLineFormatter.h
index 55f0d1cac689..6432ca83a4c9 100644
--- a/lib/Format/UnwrappedLineFormatter.h
+++ b/lib/Format/UnwrappedLineFormatter.h

@@ -35,19 +35,22 @@ public:

const SourceManager &SourceMgr,

FormattingAttemptStatus *Status)

: Indenter(Indenter), Whitespaces(Whitespaces), Style(Style),

- Keywords(Keywords), SourceMgr(SourceMgr),

- Status(Status) {}

+ Keywords(Keywords), SourceMgr(SourceMgr), Status(Status) {}

/// \brief Format the current block and return the penalty.

unsigned format(const SmallVectorImpl<AnnotatedLine *> &Lines,

bool DryRun = false, int AdditionalIndent = 0,

- bool FixBadIndentation = false);

+ bool FixBadIndentation = false,

+ unsigned FirstStartColumn = 0,

+ unsigned NextStartColumn = 0,

+ unsigned LastStartColumn = 0);

private:

/// \brief Add a new line and the required indent before the first Token

/// of the \c UnwrappedLine if there was no structural parsing error.

void formatFirstToken(const AnnotatedLine &Line,

- const AnnotatedLine *PreviousLine, unsigned Indent);

+ const AnnotatedLine *PreviousLine, unsigned Indent,

+ unsigned NewlineIndent);

/// \brief Returns the column limit for a line, taking into account whether we

/// need an escaped newline due to a continued preprocessor directive.

diff --git a/lib/Format/UnwrappedLineParser.cpp b/lib/Format/UnwrappedLineParser.cpp
index faac5a371c26..b8608dcac9c7 100644
--- a/lib/Format/UnwrappedLineParser.cpp
+++ b/lib/Format/UnwrappedLineParser.cpp

@@ -18,6 +18,8 @@

#include "llvm/Support/Debug.h"

#include "llvm/Support/raw_ostream.h"

+#include <algorithm>

#define DEBUG_TYPE "format-parser"

namespace clang {

@@ -56,8 +58,7 @@ private:

};

static bool isLineComment(const FormatToken &FormatTok) {

- return FormatTok.is(tok::comment) &&

- FormatTok.TokenText.startswith("//");

+ return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*");

}

// Checks if \p FormatTok is a line comment that continues the line comment

@@ -226,15 +227,21 @@ private:

UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,

const AdditionalKeywords &Keywords,

+ unsigned FirstStartColumn,

ArrayRef<FormatToken *> Tokens,

UnwrappedLineConsumer &Callback)

: Line(new UnwrappedLine), MustBreakBeforeNextToken(false),

CurrentLines(&Lines), Style(Style), Keywords(Keywords),

CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),

- Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1) {}

+ Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),

+ IfNdefCondition(nullptr), FoundIncludeGuardStart(false),

+ IncludeGuardRejected(false), FirstStartColumn(FirstStartColumn) {}

void UnwrappedLineParser::reset() {

PPBranchLevel = -1;

+ IfNdefCondition = nullptr;

+ FoundIncludeGuardStart = false;

+ IncludeGuardRejected = false;

Line.reset(new UnwrappedLine);

CommentsBeforeNextToken.clear();

FormatTok = nullptr;

@@ -243,10 +250,12 @@ void UnwrappedLineParser::reset() {

CurrentLines = &Lines;

DeclarationScopeStack.clear();

PPStack.clear();

+ Line->FirstStartColumn = FirstStartColumn;

}

void UnwrappedLineParser::parse() {

IndexedTokenSource TokenSource(AllTokens);

+ Line->FirstStartColumn = FirstStartColumn;

do {

DEBUG(llvm::dbgs() << "----\n");

reset();

@@ -326,6 +335,12 @@ void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {

break;

case tok::kw_default:

case tok::kw_case:

+ if (Style.Language == FormatStyle::LK_JavaScript &&

+ Line->MustBeDeclaration) {

+ // A 'case: string' style field declaration.

+ parseStructuralElement();

+ break;

+ }

if (!SwitchLabelEncountered &&

(Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))

++Line->Level;

@@ -346,7 +361,7 @@ void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {

// definitions, too.

unsigned StoredPosition = Tokens->getPosition();

FormatToken *Tok = FormatTok;

- const FormatToken *PrevTok = getPreviousToken();

+ const FormatToken *PrevTok = Tok->Previous;

// Keep a stack of positions of lbrace tokens. We will

// update information about whether an lbrace starts a

// braced init list or a different block during the loop.

@@ -364,13 +379,16 @@ void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {

switch (Tok->Tok.getKind()) {

case tok::l_brace:

if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) {

- if (PrevTok->is(tok::colon))

- // A colon indicates this code is in a type, or a braced list

- // following a label in an object literal ({a: {b: 1}}). The code

- // below could be confused by semicolons between the individual

- // members in a type member list, which would normally trigger

- // BK_Block. In both cases, this must be parsed as an inline braced

- // init.

+ if (PrevTok->isOneOf(tok::colon, tok::less))

+ // A ':' indicates this code is in a type, or a braced list

+ // following a label in an object literal ({a: {b: 1}}).

+ // A '<' could be an object used in a comparison, but that is nonsense

+ // code (can never return true), so more likely it is a generic type

+ // argument (`X<{a: string; b: number}>`).

+ // The code below could be confused by semicolons between the

+ // individual members in a type member list, which would normally

+ // trigger BK_Block. In both cases, this must be parsed as an inline

+ // braced init.

Tok->BlockKind = BK_BracedInit;

else if (PrevTok->is(tok::r_paren))

// `) { }` can only occur in function or method declarations in JS.

@@ -452,6 +470,21 @@ void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {

FormatTok = Tokens->setPosition(StoredPosition);

}

+template <class T>

+static inline void hash_combine(std::size_t &seed, const T &v) {

+ std::hash<T> hasher;

+ seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);

+size_t UnwrappedLineParser::computePPHash() const {

+ size_t h = 0;

+ for (const auto &i : PPStack) {

+ hash_combine(h, size_t(i.Kind));

+ hash_combine(h, i.Line);

+ }

+ return h;

void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,

bool MunchSemi) {

assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&

@@ -459,16 +492,21 @@ void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,

const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);

FormatTok->BlockKind = BK_Block;

+ size_t PPStartHash = computePPHash();

unsigned InitialLevel = Line->Level;

- nextToken();

+ nextToken(/*LevelDifference=*/AddLevel ? 1 : 0);

if (MacroBlock && FormatTok->is(tok::l_paren))

parseParens();

+ size_t NbPreprocessorDirectives =

+ CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;

addUnwrappedLine();

- size_t OpeningLineIndex = CurrentLines->empty()

- ? (UnwrappedLine::kInvalidIndex)

- : (CurrentLines->size() - 1);

+ size_t OpeningLineIndex =

+ CurrentLines->empty()

+ ? (UnwrappedLine::kInvalidIndex)

+ : (CurrentLines->size() - 1 - NbPreprocessorDirectives);

ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,

MustBeDeclaration);

@@ -486,7 +524,10 @@ void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,

return;

}

- nextToken(); // Munch the closing brace.

+ size_t PPEndHash = computePPHash();

+ // Munch the closing brace.

+ nextToken(/*LevelDifference=*/AddLevel ? -1 : 0);

if (MacroBlock && FormatTok->is(tok::l_paren))

parseParens();

@@ -494,11 +535,14 @@ void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,

if (MunchSemi && FormatTok->Tok.is(tok::semi))

nextToken();

Line->Level = InitialLevel;

- Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;

- if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {

- // Update the opening line to add the forward reference as well

- (*CurrentLines)[OpeningLineIndex].MatchingOpeningBlockLineIndex =

- CurrentLines->size() - 1;

+ if (PPStartHash == PPEndHash) {

+ Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;

+ if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {

+ // Update the opening line to add the forward reference as well

+ (*CurrentLines)[OpeningLineIndex].MatchingOpeningBlockLineIndex =

+ CurrentLines->size() - 1;

+ }

}

@@ -555,9 +599,8 @@ void UnwrappedLineParser::parseChildBlock() {

FormatTok->BlockKind = BK_Block;

nextToken();

{

- bool SkipIndent =

- (Style.Language == FormatStyle::LK_JavaScript &&

- (isGoogScope(*Line) || isIIFE(*Line, Keywords)));

+ bool SkipIndent = (Style.Language == FormatStyle::LK_JavaScript &&

+ (isGoogScope(*Line) || isIIFE(*Line, Keywords)));

ScopedLineState LineState(*this);

ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,

/*MustBeDeclaration=*/false);

@@ -606,10 +649,15 @@ void UnwrappedLineParser::parsePPDirective() {

}

void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {

- if (Unreachable || (!PPStack.empty() && PPStack.back() == PP_Unreachable))

- PPStack.push_back(PP_Unreachable);

+ size_t Line = CurrentLines->size();

+ if (CurrentLines == &PreprocessorDirectives)

+ Line += Lines.size();

+ if (Unreachable ||

+ (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable))

+ PPStack.push_back({PP_Unreachable, Line});

else

- PPStack.push_back(PP_Conditional);

+ PPStack.push_back({PP_Conditional, Line});

}

void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {

@@ -643,7 +691,7 @@ void UnwrappedLineParser::conditionalCompilationEnd() {

}

// Guard against #endif's without #if.

- if (PPBranchLevel > 0)

+ if (PPBranchLevel > -1)

--PPBranchLevel;

if (!PPChainBranchIndex.empty())

PPChainBranchIndex.pop();

@@ -660,12 +708,35 @@ void UnwrappedLineParser::parsePPIf(bool IfDef) {

if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")

Unreachable = true;

conditionalCompilationStart(Unreachable);

+ FormatToken *IfCondition = FormatTok;

+ // If there's a #ifndef on the first line, and the only lines before it are

+ // comments, it could be an include guard.

+ bool MaybeIncludeGuard = IfNDef;

+ if (!IncludeGuardRejected && !FoundIncludeGuardStart && MaybeIncludeGuard) {

+ for (auto &Line : Lines) {

+ if (!Line.Tokens.front().Tok->is(tok::comment)) {

+ MaybeIncludeGuard = false;

+ IncludeGuardRejected = true;

+ break;

+ }

+ --PPBranchLevel;

parsePPUnknown();

+ ++PPBranchLevel;

+ if (!IncludeGuardRejected && !FoundIncludeGuardStart && MaybeIncludeGuard)

+ IfNdefCondition = IfCondition;

}

void UnwrappedLineParser::parsePPElse() {

+ // If a potential include guard has an #else, it's not an include guard.

+ if (FoundIncludeGuardStart && PPBranchLevel == 0)

+ FoundIncludeGuardStart = false;

conditionalCompilationAlternative();

+ if (PPBranchLevel > -1)

+ --PPBranchLevel;

parsePPUnknown();

+ ++PPBranchLevel;

}

void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }

@@ -673,6 +744,16 @@ void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }

void UnwrappedLineParser::parsePPEndIf() {

conditionalCompilationEnd();

parsePPUnknown();

+ // If the #endif of a potential include guard is the last thing in the file,

+ // then we count it as a real include guard and subtract one from every

+ // preprocessor indent.

+ unsigned TokenPosition = Tokens->getPosition();

+ FormatToken *PeekNext = AllTokens[TokenPosition];

+ if (FoundIncludeGuardStart && PPBranchLevel == -1 && PeekNext->is(tok::eof) &&

+ Style.IndentPPDirectives != FormatStyle::PPDIS_None)

+ for (auto &Line : Lines)

+ if (Line.InPPDirective && Line.Level > 0)

+ --Line.Level;

}

void UnwrappedLineParser::parsePPDefine() {

@@ -682,14 +763,26 @@ void UnwrappedLineParser::parsePPDefine() {

parsePPUnknown();

return;

}

+ if (IfNdefCondition && IfNdefCondition->TokenText == FormatTok->TokenText) {

+ FoundIncludeGuardStart = true;

+ for (auto &Line : Lines) {

+ if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {

+ FoundIncludeGuardStart = false;

+ break;

+ }

+ IfNdefCondition = nullptr;

nextToken();

if (FormatTok->Tok.getKind() == tok::l_paren &&

FormatTok->WhitespaceRange.getBegin() ==

FormatTok->WhitespaceRange.getEnd()) {

parseParens();

}

+ if (Style.IndentPPDirectives == FormatStyle::PPDIS_AfterHash)

+ Line->Level += PPBranchLevel + 1;

addUnwrappedLine();

- Line->Level = 1;

+ ++Line->Level;

// Errors during a preprocessor directive can only affect the layout of the

// preprocessor directive, and thus we ignore them. An alternative approach

@@ -703,7 +796,10 @@ void UnwrappedLineParser::parsePPUnknown() {

do {

nextToken();

} while (!eof());

+ if (Style.IndentPPDirectives == FormatStyle::PPDIS_AfterHash)

+ Line->Level += PPBranchLevel + 1;

addUnwrappedLine();

+ IfNdefCondition = nullptr;

}

// Here we blacklist certain tokens that are not usually the first token in an

@@ -746,8 +842,8 @@ static bool mustBeJSIdent(const AdditionalKeywords &Keywords,

Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,

Keywords.kw_let, Keywords.kw_var, tok::kw_const,

Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,

- Keywords.kw_instanceof, Keywords.kw_interface,

- Keywords.kw_throws, Keywords.kw_from));

+ Keywords.kw_instanceof, Keywords.kw_interface, Keywords.kw_throws,

+ Keywords.kw_from));

}

static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,

@@ -800,11 +896,14 @@ void UnwrappedLineParser::readTokenWithJavaScriptASI() {

bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);

bool PreviousStartsTemplateExpr =

Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");

- if (PreviousMustBeValue && Line && Line->Tokens.size() > 1) {

- // If the token before the previous one is an '@', the previous token is an

- // annotation and can precede another identifier/value.

- const FormatToken *PrePrevious = std::prev(Line->Tokens.end(), 2)->Tok;

- if (PrePrevious->is(tok::at))

+ if (PreviousMustBeValue || Previous->is(tok::r_paren)) {

+ // If the line contains an '@' sign, the previous token might be an

+ // annotation, which can precede another identifier/value.

+ bool HasAt = std::find_if(Line->Tokens.begin(), Line->Tokens.end(),

+ [](UnwrappedLineNode &LineNode) {

+ return LineNode.Tok->is(tok::at);

+ }) != Line->Tokens.end();

+ if (HasAt)

return;

}

if (Next->is(tok::exclaim) && PreviousMustBeValue)

@@ -817,7 +916,8 @@ void UnwrappedLineParser::readTokenWithJavaScriptASI() {

Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,

tok::minusminus)))

return addUnwrappedLine();

- if (PreviousMustBeValue && isJSDeclOrStmt(Keywords, Next))

+ if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&

+ isJSDeclOrStmt(Keywords, Next))

return addUnwrappedLine();

}

@@ -922,13 +1022,22 @@ void UnwrappedLineParser::parseStructuralElement() {

parseDoWhile();

return;

case tok::kw_switch:

+ if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)

+ // 'switch: string' field declaration.

+ break;

parseSwitch();

return;

case tok::kw_default:

+ if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)

+ // 'default: string' field declaration.

+ break;

nextToken();

parseLabel();

return;

case tok::kw_case:

+ if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)

+ // 'case: string' field declaration.

+ break;

parseCaseLabel();

return;

case tok::kw_try:

@@ -940,7 +1049,12 @@ void UnwrappedLineParser::parseStructuralElement() {

if (FormatTok->Tok.is(tok::string_literal)) {

nextToken();

if (FormatTok->Tok.is(tok::l_brace)) {

- parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false);

+ if (Style.BraceWrapping.AfterExternBlock) {

+ addUnwrappedLine();

+ parseBlock(/*MustBeDeclaration=*/true);

+ } else {

+ parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false);

+ }

addUnwrappedLine();

return;

}

@@ -996,7 +1110,7 @@ void UnwrappedLineParser::parseStructuralElement() {

break;

}

do {

- const FormatToken *Previous = getPreviousToken();

+ const FormatToken *Previous = FormatTok->Previous;

switch (FormatTok->Tok.getKind()) {

case tok::at:

nextToken();

@@ -1186,7 +1300,7 @@ void UnwrappedLineParser::parseStructuralElement() {

nextToken();

parseBracedList();

} else if (Style.Language == FormatStyle::LK_Proto &&

- FormatTok->Tok.is(tok::less)) {

+ FormatTok->Tok.is(tok::less)) {

nextToken();

parseBracedList(/*ContinueOnSemicolons=*/false,

/*ClosingBraceKind=*/tok::greater);

@@ -1210,14 +1324,6 @@ bool UnwrappedLineParser::tryToParseLambda() {

nextToken();

return false;

}

- const FormatToken* Previous = getPreviousToken();

- if (Previous &&

- (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,

- tok::kw_delete) ||

- Previous->closesScope() || Previous->isSimpleTypeSpecifier())) {

- nextToken();

- return false;

- }

assert(FormatTok->is(tok::l_square));

FormatToken &LSquare = *FormatTok;

if (!tryToParseLambdaIntroducer())

@@ -1260,49 +1366,18 @@ bool UnwrappedLineParser::tryToParseLambda() {

}

bool UnwrappedLineParser::tryToParseLambdaIntroducer() {

- nextToken();

- if (FormatTok->is(tok::equal)) {

- nextToken();

- if (FormatTok->is(tok::r_square)) {

- nextToken();

- return true;

- }

- if (FormatTok->isNot(tok::comma))

- return false;

- nextToken();

- } else if (FormatTok->is(tok::amp)) {

- nextToken();

- if (FormatTok->is(tok::r_square)) {

- nextToken();

- return true;

- }

- if (!FormatTok->isOneOf(tok::comma, tok::identifier)) {

- return false;

- }

- if (FormatTok->is(tok::comma))

- nextToken();

- } else if (FormatTok->is(tok::r_square)) {

+ const FormatToken *Previous = FormatTok->Previous;

+ if (Previous &&

+ (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,

+ tok::kw_delete) ||

+ FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() ||

+ Previous->isSimpleTypeSpecifier())) {

nextToken();

- return true;

+ return false;

}

- do {

- if (FormatTok->is(tok::amp))

- nextToken();

- if (!FormatTok->isOneOf(tok::identifier, tok::kw_this))

- return false;

- nextToken();

- if (FormatTok->is(tok::ellipsis))

- nextToken();

- if (FormatTok->is(tok::comma)) {

- nextToken();

- } else if (FormatTok->is(tok::r_square)) {

- nextToken();

- return true;

- } else {

- return false;

- }

- } while (!eof());

- return false;

+ nextToken();

+ parseSquare(/*LambdaIntroducer=*/true);

+ return true;

}

void UnwrappedLineParser::tryToParseJSFunction() {

@@ -1419,6 +1494,15 @@ bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,

nextToken();

parseBracedList();

break;

+ case tok::less:

+ if (Style.Language == FormatStyle::LK_Proto) {

+ nextToken();

+ parseBracedList(/*ContinueOnSemicolons=*/false,

+ /*ClosingBraceKind=*/tok::greater);

+ } else {

+ nextToken();

+ }

+ break;

case tok::semi:

// JavaScript (or more precisely TypeScript) can have semicolons in braced

// lists (in so-called TypeMemberLists). Thus, the semicolon cannot be

@@ -1495,10 +1579,12 @@ void UnwrappedLineParser::parseParens() {

} while (!eof());

}

-void UnwrappedLineParser::parseSquare() {

- assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");

- if (tryToParseLambda())

- return;

+void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {

+ if (!LambdaIntroducer) {

+ assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");

+ if (tryToParseLambda())

+ return;

+ }

do {

switch (FormatTok->Tok.getKind()) {

case tok::l_paren:

@@ -1939,6 +2025,17 @@ void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {

((Style.Language == FormatStyle::LK_Java ||

Style.Language == FormatStyle::LK_JavaScript) &&

FormatTok->isOneOf(tok::period, tok::comma))) {

+ if (Style.Language == FormatStyle::LK_JavaScript &&

+ FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {

+ // JavaScript/TypeScript supports inline object types in

+ // extends/implements positions:

+ // class Foo implements {bar: number} { }

+ nextToken();

+ if (FormatTok->is(tok::l_brace)) {

+ tryToParseBracedList();

+ continue;

+ }

bool IsNonMacroIdentifier =

FormatTok->is(tok::identifier) &&

FormatTok->TokenText != FormatTok->TokenText.upper();

@@ -2090,7 +2187,7 @@ void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {

while (!eof()) {

if (FormatTok->is(tok::semi))

return;

- if (Line->Tokens.size() == 0) {

+ if (Line->Tokens.empty()) {

// Common issue: Automatic Semicolon Insertion wrapped the line, so the

// import statement should terminate.

return;

@@ -2107,14 +2204,15 @@ void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {

LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,

StringRef Prefix = "") {

- llvm::dbgs() << Prefix << "Line(" << Line.Level << ")"

+ llvm::dbgs() << Prefix << "Line(" << Line.Level

+ << ", FSC=" << Line.FirstStartColumn << ")"

<< (Line.InPPDirective ? " MACRO" : "") << ": ";

for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),

E = Line.Tokens.end();

I != E; ++I) {

llvm::dbgs() << I->Tok->Tok.getName() << "["

- << "T=" << I->Tok->Type

- << ", OC=" << I->Tok->OriginalColumn << "] ";

+ << "T=" << I->Tok->Type << ", OC=" << I->Tok->OriginalColumn

+ << "] ";

}

for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),

E = Line.Tokens.end();

@@ -2140,12 +2238,15 @@ void UnwrappedLineParser::addUnwrappedLine() {

CurrentLines->push_back(std::move(*Line));

Line->Tokens.clear();

Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;

+ Line->FirstStartColumn = 0;

if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {

CurrentLines->append(

std::make_move_iterator(PreprocessorDirectives.begin()),

std::make_move_iterator(PreprocessorDirectives.end()));

PreprocessorDirectives.clear();

}

+ // Disconnect the current token from the last token on the previous line.

+ FormatTok->Previous = nullptr;

}

bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }

@@ -2287,23 +2388,17 @@ void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {

CommentsBeforeNextToken.clear();

}

-void UnwrappedLineParser::nextToken() {

+void UnwrappedLineParser::nextToken(int LevelDifference) {

if (eof())

return;

flushComments(isOnNewLine(*FormatTok));

pushToken(FormatTok);

+ FormatToken *Previous = FormatTok;

if (Style.Language != FormatStyle::LK_JavaScript)

- readToken();

+ readToken(LevelDifference);

else

readTokenWithJavaScriptASI();

-const FormatToken *UnwrappedLineParser::getPreviousToken() {

- // FIXME: This is a dirty way to access the previous token. Find a better

- // solution.

- if (!Line || Line->Tokens.empty())

- return nullptr;

- return Line->Tokens.back().Tok;

+ FormatTok->Previous = Previous;

}

void UnwrappedLineParser::distributeComments(

@@ -2343,8 +2438,7 @@ void UnwrappedLineParser::distributeComments(

}

for (unsigned i = 0, e = Comments.size(); i < e; ++i) {

FormatToken *FormatTok = Comments[i];

- if (HasTrailAlignedWithNextToken &&

- i == StartOfTrailAlignedWithNextToken) {

+ if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {

FormatTok->ContinuesLineCommentSection = false;

} else {

FormatTok->ContinuesLineCommentSection =

@@ -2362,7 +2456,7 @@ void UnwrappedLineParser::distributeComments(

}

-void UnwrappedLineParser::readToken() {

+void UnwrappedLineParser::readToken(int LevelDifference) {

SmallVector<FormatToken *, 1> Comments;

do {

FormatTok = Tokens->getNextToken();

@@ -2375,6 +2469,10 @@ void UnwrappedLineParser::readToken() {

// directives only after that unwrapped line was finished later.

bool SwitchToPreprocessorLines = !Line->Tokens.empty();

ScopedLineState BlockState(*this, SwitchToPreprocessorLines);

+ assert((LevelDifference >= 0 ||

+ static_cast<unsigned>(-LevelDifference) <= Line->Level) &&

+ "LevelDifference makes Line->Level negative");

+ Line->Level += LevelDifference;

// Comments stored before the preprocessor directive need to be output

// before the preprocessor directive, at the same level as the

// preprocessor directive, as we consider them to apply to the directive.

@@ -2395,7 +2493,7 @@ void UnwrappedLineParser::readToken() {

FormatTok->MustBreakBefore = true;

}

- if (!PPStack.empty() && (PPStack.back() == PP_Unreachable) &&

+ if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&

!Line->InPPDirective) {

continue;

}

diff --git a/lib/Format/UnwrappedLineParser.h b/lib/Format/UnwrappedLineParser.h
index a2aa2f006728..1d8ccabbd0f8 100644
--- a/lib/Format/UnwrappedLineParser.h
+++ b/lib/Format/UnwrappedLineParser.h

@@ -56,6 +56,8 @@ struct UnwrappedLine {

size_t MatchingOpeningBlockLineIndex;

static const size_t kInvalidIndex = -1;

+ unsigned FirstStartColumn = 0;

};

class UnwrappedLineConsumer {

@@ -71,6 +73,7 @@ class UnwrappedLineParser {

public:

UnwrappedLineParser(const FormatStyle &Style,

const AdditionalKeywords &Keywords,

+ unsigned FirstStartColumn,

ArrayRef<FormatToken *> Tokens,

UnwrappedLineConsumer &Callback);

@@ -96,7 +99,7 @@ private:

bool parseBracedList(bool ContinueOnSemicolons = false,

tok::TokenKind ClosingBraceKind = tok::r_brace);

void parseParens();

- void parseSquare();

+ void parseSquare(bool LambdaIntroducer = false);

void parseIfThenElse();

void parseTryCatch();

void parseForOrWhileLoop();

@@ -123,9 +126,12 @@ private:

void tryToParseJSFunction();

void addUnwrappedLine();

bool eof() const;

- void nextToken();

- const FormatToken *getPreviousToken();

- void readToken();

+ // LevelDifference is the difference of levels after and before the current

+ // token. For example:

+ // - if the token is '{' and opens a block, LevelDifference is 1.

+ // - if the token is '}' and closes a block, LevelDifference is -1.

+ void nextToken(int LevelDifference = 0);

+ void readToken(int LevelDifference = 0);

// Decides which comment tokens should be added to the current line and which

// should be added as comments before the next token.

@@ -156,6 +162,11 @@ private:

bool isOnNewLine(const FormatToken &FormatTok);

+ // Compute hash of the current preprocessor branch.

+ // This is used to identify the different branches, and thus track if block

+ // open and close in the same branch.

+ size_t computePPHash() const;

// FIXME: We are constantly running into bugs where Line.Level is incorrectly

// subtracted from beyond 0. Introduce a method to subtract from Line.Level

// and use that everywhere in the Parser.

@@ -174,7 +185,7 @@ private:

// Preprocessor directives are parsed out-of-order from other unwrapped lines.

// Thus, we need to keep a list of preprocessor directives to be reported

- // after an unwarpped line that has been started was finished.

+ // after an unwrapped line that has been started was finished.

SmallVector<UnwrappedLine, 4> PreprocessorDirectives;

// New unwrapped lines are added via CurrentLines.

@@ -207,8 +218,14 @@ private:

PP_Unreachable // #if 0 or a conditional preprocessor block inside #if 0

};

+ struct PPBranch {

+ PPBranch(PPBranchKind Kind, size_t Line) : Kind(Kind), Line(Line) {}

+ PPBranchKind Kind;

+ size_t Line;

+ };

// Keeps a stack of currently active preprocessor branching directives.

- SmallVector<PPBranchKind, 16> PPStack;

+ SmallVector<PPBranch, 16> PPStack;

// The \c UnwrappedLineParser re-parses the code for each combination

// of preprocessor branches that can be taken.

@@ -231,6 +248,15 @@ private:

// sequence.

std::stack<int> PPChainBranchIndex;

+ // Contains the #ifndef condition for a potential include guard.

+ FormatToken *IfNdefCondition;

+ bool FoundIncludeGuardStart;

+ bool IncludeGuardRejected;

+ // Contains the first start column where the source begins. This is zero for

+ // normal source code and may be nonzero when formatting a code fragment that

+ // does not start at the beginning of the file.

+ unsigned FirstStartColumn;

friend class ScopedLineState;

friend class CompoundStatementIndenter;

};

@@ -243,8 +269,9 @@ struct UnwrappedLineNode {

SmallVector<UnwrappedLine, 0> Children;

};

-inline UnwrappedLine::UnwrappedLine() : Level(0), InPPDirective(false),

- MustBeDeclaration(false), MatchingOpeningBlockLineIndex(kInvalidIndex) {}

+inline UnwrappedLine::UnwrappedLine()

+ : Level(0), InPPDirective(false), MustBeDeclaration(false),

+ MatchingOpeningBlockLineIndex(kInvalidIndex) {}

} // end namespace format

} // end namespace clang

diff --git a/lib/Format/UsingDeclarationsSorter.cpp b/lib/Format/UsingDeclarationsSorter.cpp
index fb4f59fbc9bc..ef0c7a7d5a45 100644
--- a/lib/Format/UsingDeclarationsSorter.cpp
+++ b/lib/Format/UsingDeclarationsSorter.cpp

@@ -26,6 +26,45 @@ namespace format {

namespace {

+// The order of using declaration is defined as follows:

+// Split the strings by "::" and discard any initial empty strings. The last

+// element of each list is a non-namespace name; all others are namespace

+// names. Sort the lists of names lexicographically, where the sort order of

+// individual names is that all non-namespace names come before all namespace

+// names, and within those groups, names are in case-insensitive lexicographic

+// order.

+int compareLabels(StringRef A, StringRef B) {

+ SmallVector<StringRef, 2> NamesA;

+ A.split(NamesA, "::", /*MaxSplit=*/-1, /*KeepEmpty=*/false);

+ SmallVector<StringRef, 2> NamesB;

+ B.split(NamesB, "::", /*MaxSplit=*/-1, /*KeepEmpty=*/false);

+ size_t SizeA = NamesA.size();

+ size_t SizeB = NamesB.size();

+ for (size_t I = 0, E = std::min(SizeA, SizeB); I < E; ++I) {

+ if (I + 1 == SizeA) {

+ // I is the last index of NamesA and NamesA[I] is a non-namespace name.

+ // Non-namespace names come before all namespace names.

+ if (SizeB > SizeA)

+ return -1;

+ // Two names within a group compare case-insensitively.

+ return NamesA[I].compare_lower(NamesB[I]);

+ }

+ // I is the last index of NamesB and NamesB[I] is a non-namespace name.

+ // Non-namespace names come before all namespace names.

+ if (I + 1 == SizeB)

+ return 1;

+ // Two namespaces names within a group compare case-insensitively.

+ int C = NamesA[I].compare_lower(NamesB[I]);

+ if (C != 0)

+ return C;

+ }

+ return 0;

struct UsingDeclaration {

const AnnotatedLine *Line;

std::string Label;

@@ -34,7 +73,7 @@ struct UsingDeclaration {

: Line(Line), Label(Label) {}

bool operator<(const UsingDeclaration &Other) const {

- return Label < Other.Label;

+ return compareLabels(Label, Other.Label) < 0;

}

};

@@ -76,10 +115,42 @@ std::string computeUsingDeclarationLabel(const FormatToken *UsingTok) {

void endUsingDeclarationBlock(

SmallVectorImpl<UsingDeclaration> *UsingDeclarations,

const SourceManager &SourceMgr, tooling::Replacements *Fixes) {

+ bool BlockAffected = false;

+ for (const UsingDeclaration &Declaration : *UsingDeclarations) {

+ if (Declaration.Line->Affected) {

+ BlockAffected = true;

+ break;

+ }

+ if (!BlockAffected) {

+ UsingDeclarations->clear();

+ return;

+ }

SmallVector<UsingDeclaration, 4> SortedUsingDeclarations(

UsingDeclarations->begin(), UsingDeclarations->end());

- std::sort(SortedUsingDeclarations.begin(), SortedUsingDeclarations.end());

+ std::stable_sort(SortedUsingDeclarations.begin(),

+ SortedUsingDeclarations.end());

+ SortedUsingDeclarations.erase(

+ std::unique(SortedUsingDeclarations.begin(),

+ SortedUsingDeclarations.end(),

+ [](const UsingDeclaration &a, const UsingDeclaration &b) {

+ return a.Label == b.Label;

+ }),

+ SortedUsingDeclarations.end());

for (size_t I = 0, E = UsingDeclarations->size(); I < E; ++I) {

+ if (I >= SortedUsingDeclarations.size()) {

+ // This using declaration has been deduplicated, delete it.

+ auto Begin =

+ (*UsingDeclarations)[I].Line->First->WhitespaceRange.getBegin();

+ auto End = (*UsingDeclarations)[I].Line->Last->Tok.getEndLoc();

+ auto Range = CharSourceRange::getCharRange(Begin, End);

+ auto Err = Fixes->add(tooling::Replacement(SourceMgr, Range, ""));

+ if (Err) {

+ llvm::errs() << "Error while sorting using declarations: "

+ << llvm::toString(std::move(Err)) << "\n";

+ }

+ continue;

+ }

if ((*UsingDeclarations)[I].Line == SortedUsingDeclarations[I].Line)

continue;

auto Begin = (*UsingDeclarations)[I].Line->First->Tok.getLocation();

@@ -112,7 +183,7 @@ UsingDeclarationsSorter::UsingDeclarationsSorter(const Environment &Env,

const FormatStyle &Style)

: TokenAnalyzer(Env, Style) {}

-tooling::Replacements UsingDeclarationsSorter::analyze(

+std::pair<tooling::Replacements, unsigned> UsingDeclarationsSorter::analyze(

TokenAnnotator &Annotator, SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,

FormatTokenLexer &Tokens) {

const SourceManager &SourceMgr = Env.getSourceManager();

@@ -121,15 +192,17 @@ tooling::Replacements UsingDeclarationsSorter::analyze(

tooling::Replacements Fixes;

SmallVector<UsingDeclaration, 4> UsingDeclarations;

for (size_t I = 0, E = AnnotatedLines.size(); I != E; ++I) {

- if (!AnnotatedLines[I]->Affected || AnnotatedLines[I]->InPPDirective ||

- !AnnotatedLines[I]->startsWith(tok::kw_using) ||

- AnnotatedLines[I]->First->Finalized) {

+ const auto *FirstTok = AnnotatedLines[I]->First;

+ if (AnnotatedLines[I]->InPPDirective ||

+ !AnnotatedLines[I]->startsWith(tok::kw_using) || FirstTok->Finalized) {

endUsingDeclarationBlock(&UsingDeclarations, SourceMgr, &Fixes);

continue;

}

- if (AnnotatedLines[I]->First->NewlinesBefore > 1)

+ if (FirstTok->NewlinesBefore > 1)

endUsingDeclarationBlock(&UsingDeclarations, SourceMgr, &Fixes);

- std::string Label = computeUsingDeclarationLabel(AnnotatedLines[I]->First);

+ const auto *UsingTok =

+ FirstTok->is(tok::comment) ? FirstTok->getNextNonComment() : FirstTok;

+ std::string Label = computeUsingDeclarationLabel(UsingTok);

if (Label.empty()) {

endUsingDeclarationBlock(&UsingDeclarations, SourceMgr, &Fixes);

continue;

@@ -137,7 +210,7 @@ tooling::Replacements UsingDeclarationsSorter::analyze(

UsingDeclarations.push_back(UsingDeclaration(AnnotatedLines[I], Label));

}

endUsingDeclarationBlock(&UsingDeclarations, SourceMgr, &Fixes);

- return Fixes;

+ return {Fixes, 0};

}

} // namespace format

diff --git a/lib/Format/UsingDeclarationsSorter.h b/lib/Format/UsingDeclarationsSorter.h
index f7d5f97e3a2a..6f137712d841 100644
--- a/lib/Format/UsingDeclarationsSorter.h
+++ b/lib/Format/UsingDeclarationsSorter.h

@@ -25,7 +25,7 @@ class UsingDeclarationsSorter : public TokenAnalyzer {

public:

UsingDeclarationsSorter(const Environment &Env, const FormatStyle &Style);

- tooling::Replacements

+ std::pair<tooling::Replacements, unsigned>

analyze(TokenAnnotator &Annotator,

SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,

FormatTokenLexer &Tokens) override;

diff --git a/lib/Format/WhitespaceManager.cpp b/lib/Format/WhitespaceManager.cpp
index 377ec3a681b6..a5477a996327 100644
--- a/lib/Format/WhitespaceManager.cpp
+++ b/lib/Format/WhitespaceManager.cpp

@@ -67,6 +67,11 @@ void WhitespaceManager::addUntouchableToken(const FormatToken &Tok,

/*IsInsideToken=*/false));

}

+llvm::Error

+WhitespaceManager::addReplacement(const tooling::Replacement &Replacement) {

+ return Replaces.add(Replacement);

void WhitespaceManager::replaceWhitespaceInToken(

const FormatToken &Tok, unsigned Offset, unsigned ReplaceChars,

StringRef PreviousPostfix, StringRef CurrentPrefix, bool InPPDirective,

@@ -166,15 +171,15 @@ void WhitespaceManager::calculateLineBreakInformation() {

// BreakableLineCommentSection does comment reflow changes and here is

// the aligning of trailing comments. Consider the case where we reflow

// the second line up in this example:

- //

+ //

// // line 1

// // line 2

- //

+ //

// That amounts to 2 changes by BreakableLineCommentSection:

// - the first, delimited by (), for the whitespace between the tokens,

// - and second, delimited by [], for the whitespace at the beginning

// of the second token:

- //

+ //

// // line 1(

// )[// ]line 2

@@ -608,8 +613,9 @@ void WhitespaceManager::generateChanges() {

if (C.CreateReplacement) {

std::string ReplacementText = C.PreviousLinePostfix;

if (C.ContinuesPPDirective)

- appendNewlineText(ReplacementText, C.NewlinesBefore,

- C.PreviousEndOfTokenColumn, C.EscapedNewlineColumn);

+ appendEscapedNewlineText(ReplacementText, C.NewlinesBefore,

+ C.PreviousEndOfTokenColumn,

+ C.EscapedNewlineColumn);

else

appendNewlineText(ReplacementText, C.NewlinesBefore);

appendIndentText(ReplacementText, C.Tok->IndentLevel,

@@ -621,8 +627,7 @@ void WhitespaceManager::generateChanges() {

}

-void WhitespaceManager::storeReplacement(SourceRange Range,

- StringRef Text) {

+void WhitespaceManager::storeReplacement(SourceRange Range, StringRef Text) {

unsigned WhitespaceLength = SourceMgr.getFileOffset(Range.getEnd()) -

SourceMgr.getFileOffset(Range.getBegin());

// Don't create a replacement, if it does not change anything.

@@ -645,16 +650,16 @@ void WhitespaceManager::appendNewlineText(std::string &Text,

Text.append(UseCRLF ? "\r\n" : "\n");

}

-void WhitespaceManager::appendNewlineText(std::string &Text, unsigned Newlines,

- unsigned PreviousEndOfTokenColumn,

- unsigned EscapedNewlineColumn) {

+void WhitespaceManager::appendEscapedNewlineText(

+ std::string &Text, unsigned Newlines, unsigned PreviousEndOfTokenColumn,

+ unsigned EscapedNewlineColumn) {

if (Newlines > 0) {

- unsigned Offset =

- std::min<int>(EscapedNewlineColumn - 2, PreviousEndOfTokenColumn);

+ unsigned Spaces =

+ std::max<int>(1, EscapedNewlineColumn - PreviousEndOfTokenColumn - 1);

for (unsigned i = 0; i < Newlines; ++i) {

- Text.append(EscapedNewlineColumn - Offset - 1, ' ');

+ Text.append(Spaces, ' ');

Text.append(UseCRLF ? "\\\r\n" : "\\\n");

- Offset = 0;

+ Spaces = std::max<int>(0, EscapedNewlineColumn - 1);

}

diff --git a/lib/Format/WhitespaceManager.h b/lib/Format/WhitespaceManager.h
index 4e78ab43abaf..af20dc5616a7 100644
--- a/lib/Format/WhitespaceManager.h
+++ b/lib/Format/WhitespaceManager.h

@@ -57,6 +57,8 @@ public:

/// was not called.

void addUntouchableToken(const FormatToken &Tok, bool InPPDirective);

+ llvm::Error addReplacement(const tooling::Replacement &Replacement);

/// \brief Inserts or replaces whitespace in the middle of a token.

///

/// Inserts \p PreviousPostfix, \p Newlines, \p Spaces and \p CurrentPrefix

@@ -194,9 +196,9 @@ private:

/// \brief Stores \p Text as the replacement for the whitespace in \p Range.

void storeReplacement(SourceRange Range, StringRef Text);

void appendNewlineText(std::string &Text, unsigned Newlines);

- void appendNewlineText(std::string &Text, unsigned Newlines,

- unsigned PreviousEndOfTokenColumn,

- unsigned EscapedNewlineColumn);

+ void appendEscapedNewlineText(std::string &Text, unsigned Newlines,

+ unsigned PreviousEndOfTokenColumn,

+ unsigned EscapedNewlineColumn);

void appendIndentText(std::string &Text, unsigned IndentLevel,

unsigned Spaces, unsigned WhitespaceStartColumn);