diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2017-12-18 20:11:37 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2017-12-18 20:11:37 +0000 |
commit | 461a67fa15370a9ec88f8f8a240bf7c123bb2029 (patch) | |
tree | 6942083d7d56bba40ec790a453ca58ad3baf6832 /lib/Format | |
parent | 75c3240472ba6ac2669ee72ca67eb72d4e2851fc (diff) |
Vendor import of clang trunk r321017:vendor/clang/clang-trunk-r321017
Notes
Notes:
svn path=/vendor/clang/dist/; revision=326941
svn path=/vendor/clang/clang-trunk-r321017/; revision=326942; tag=vendor/clang/clang-trunk-r321017
Diffstat (limited to 'lib/Format')
25 files changed, 2288 insertions, 1088 deletions
diff --git a/lib/Format/BreakableToken.cpp b/lib/Format/BreakableToken.cpp index 3c9df62f80dc..4735ab3564f0 100644 --- a/lib/Format/BreakableToken.cpp +++ b/lib/Format/BreakableToken.cpp @@ -40,9 +40,15 @@ static bool IsBlank(char C) { } } -static StringRef getLineCommentIndentPrefix(StringRef Comment) { - static const char *const KnownPrefixes[] = { - "///<", "//!<", "///", "//", "//!"}; +static StringRef getLineCommentIndentPrefix(StringRef Comment, + const FormatStyle &Style) { + static const char *const KnownCStylePrefixes[] = {"///<", "//!<", "///", "//", + "//!"}; + static const char *const KnownTextProtoPrefixes[] = {"//", "#"}; + ArrayRef<const char *> KnownPrefixes(KnownCStylePrefixes); + if (Style.Language == FormatStyle::LK_TextProto) + KnownPrefixes = KnownTextProtoPrefixes; + StringRef LongestPrefix; for (StringRef KnownPrefix : KnownPrefixes) { if (Comment.startswith(KnownPrefix)) { @@ -61,6 +67,8 @@ static BreakableToken::Split getCommentSplit(StringRef Text, unsigned ColumnLimit, unsigned TabWidth, encoding::Encoding Encoding) { + DEBUG(llvm::dbgs() << "Comment split: \"" << Text << ", " << ColumnLimit + << "\", Content start: " << ContentStartColumn << "\n"); if (ColumnLimit <= ContentStartColumn + 1) return BreakableToken::Split(StringRef::npos, 0); @@ -165,7 +173,7 @@ bool switchesFormatting(const FormatToken &Token) { } unsigned -BreakableToken::getLineLengthAfterCompression(unsigned RemainingTokenColumns, +BreakableToken::getLengthAfterCompression(unsigned RemainingTokenColumns, Split Split) const { // Example: consider the content // lala lala @@ -175,58 +183,64 @@ BreakableToken::getLineLengthAfterCompression(unsigned RemainingTokenColumns, // We compute the number of columns when the split is compressed into a single // space, like: // lala lala + // + // FIXME: Correctly measure the length of whitespace in Split.second so it + // works with tabs. return RemainingTokenColumns + 1 - Split.second; } -unsigned BreakableSingleLineToken::getLineCount() const { return 1; } +unsigned BreakableStringLiteral::getLineCount() const { return 1; } + +unsigned BreakableStringLiteral::getRangeLength(unsigned LineIndex, + unsigned Offset, + StringRef::size_type Length, + unsigned StartColumn) const { + llvm_unreachable("Getting the length of a part of the string literal " + "indicates that the code tries to reflow it."); +} + +unsigned +BreakableStringLiteral::getRemainingLength(unsigned LineIndex, unsigned Offset, + unsigned StartColumn) const { + return UnbreakableTailLength + Postfix.size() + + encoding::columnWidthWithTabs(Line.substr(Offset, StringRef::npos), + StartColumn, Style.TabWidth, Encoding); +} -unsigned BreakableSingleLineToken::getLineLengthAfterSplit( - unsigned LineIndex, unsigned TailOffset, - StringRef::size_type Length) const { - return StartColumn + Prefix.size() + Postfix.size() + - encoding::columnWidthWithTabs(Line.substr(TailOffset, Length), - StartColumn + Prefix.size(), - Style.TabWidth, Encoding); +unsigned BreakableStringLiteral::getContentStartColumn(unsigned LineIndex, + bool Break) const { + return StartColumn + Prefix.size(); } -BreakableSingleLineToken::BreakableSingleLineToken( +BreakableStringLiteral::BreakableStringLiteral( const FormatToken &Tok, unsigned StartColumn, StringRef Prefix, StringRef Postfix, bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style) : BreakableToken(Tok, InPPDirective, Encoding, Style), - StartColumn(StartColumn), Prefix(Prefix), Postfix(Postfix) { + StartColumn(StartColumn), Prefix(Prefix), Postfix(Postfix), + UnbreakableTailLength(Tok.UnbreakableTailLength) { assert(Tok.TokenText.startswith(Prefix) && Tok.TokenText.endswith(Postfix)); Line = Tok.TokenText.substr( Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size()); } -BreakableStringLiteral::BreakableStringLiteral( - const FormatToken &Tok, unsigned StartColumn, StringRef Prefix, - StringRef Postfix, bool InPPDirective, encoding::Encoding Encoding, - const FormatStyle &Style) - : BreakableSingleLineToken(Tok, StartColumn, Prefix, Postfix, InPPDirective, - Encoding, Style) {} - -BreakableToken::Split -BreakableStringLiteral::getSplit(unsigned LineIndex, unsigned TailOffset, - unsigned ColumnLimit, - llvm::Regex &CommentPragmasRegex) const { - return getStringSplit(Line.substr(TailOffset), - StartColumn + Prefix.size() + Postfix.size(), - ColumnLimit, Style.TabWidth, Encoding); +BreakableToken::Split BreakableStringLiteral::getSplit( + unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit, + unsigned ContentStartColumn, llvm::Regex &CommentPragmasRegex) const { + return getStringSplit(Line.substr(TailOffset), ContentStartColumn, + ColumnLimit - Postfix.size(), Style.TabWidth, Encoding); } void BreakableStringLiteral::insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, - WhitespaceManager &Whitespaces) { + WhitespaceManager &Whitespaces) const { Whitespaces.replaceWhitespaceInToken( Tok, Prefix.size() + TailOffset + Split.first, Split.second, Postfix, Prefix, InPPDirective, 1, StartColumn); } BreakableComment::BreakableComment(const FormatToken &Token, - unsigned StartColumn, - bool InPPDirective, + unsigned StartColumn, bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style) : BreakableToken(Token, InPPDirective, Encoding, Style), @@ -236,19 +250,19 @@ unsigned BreakableComment::getLineCount() const { return Lines.size(); } BreakableToken::Split BreakableComment::getSplit(unsigned LineIndex, unsigned TailOffset, - unsigned ColumnLimit, + unsigned ColumnLimit, unsigned ContentStartColumn, llvm::Regex &CommentPragmasRegex) const { // Don't break lines matching the comment pragmas regex. if (CommentPragmasRegex.match(Content[LineIndex])) return Split(StringRef::npos, 0); return getCommentSplit(Content[LineIndex].substr(TailOffset), - getContentStartColumn(LineIndex, TailOffset), - ColumnLimit, Style.TabWidth, Encoding); + ContentStartColumn, ColumnLimit, Style.TabWidth, + Encoding); } -void BreakableComment::compressWhitespace(unsigned LineIndex, - unsigned TailOffset, Split Split, - WhitespaceManager &Whitespaces) { +void BreakableComment::compressWhitespace( + unsigned LineIndex, unsigned TailOffset, Split Split, + WhitespaceManager &Whitespaces) const { StringRef Text = Content[LineIndex].substr(TailOffset); // Text is relative to the content line, but Whitespaces operates relative to // the start of the corresponding token, so compute the start of the Split @@ -262,44 +276,6 @@ void BreakableComment::compressWhitespace(unsigned LineIndex, /*InPPDirective=*/false, /*Newlines=*/0, /*Spaces=*/1); } -BreakableToken::Split -BreakableComment::getReflowSplit(StringRef Text, StringRef ReflowPrefix, - unsigned PreviousEndColumn, - unsigned ColumnLimit) const { - unsigned ReflowStartColumn = PreviousEndColumn + ReflowPrefix.size(); - StringRef TrimmedText = Text.rtrim(Blanks); - // This is the width of the resulting line in case the full line of Text gets - // reflown up starting at ReflowStartColumn. - unsigned FullWidth = ReflowStartColumn + encoding::columnWidthWithTabs( - TrimmedText, ReflowStartColumn, - Style.TabWidth, Encoding); - // If the full line fits up, we return a reflow split after it, - // otherwise we compute the largest piece of text that fits after - // ReflowStartColumn. - Split ReflowSplit = - FullWidth <= ColumnLimit - ? Split(TrimmedText.size(), Text.size() - TrimmedText.size()) - : getCommentSplit(Text, ReflowStartColumn, ColumnLimit, - Style.TabWidth, Encoding); - - // We need to be extra careful here, because while it's OK to keep a long line - // if it can't be broken into smaller pieces (like when the first word of a - // long line is longer than the column limit), it's not OK to reflow that long - // word up. So we recompute the size of the previous line after reflowing and - // only return the reflow split if that's under the line limit. - if (ReflowSplit.first != StringRef::npos && - // Check if the width of the newly reflown line is under the limit. - PreviousEndColumn + ReflowPrefix.size() + - encoding::columnWidthWithTabs(Text.substr(0, ReflowSplit.first), - PreviousEndColumn + - ReflowPrefix.size(), - Style.TabWidth, Encoding) <= - ColumnLimit) { - return ReflowSplit; - } - return Split(StringRef::npos, 0); -} - const FormatToken &BreakableComment::tokenAt(unsigned LineIndex) const { return Tokens[LineIndex] ? *Tokens[LineIndex] : Tok; } @@ -309,7 +285,7 @@ static bool mayReflowContent(StringRef Content) { // Lines starting with '@' commonly have special meaning. // Lines starting with '-', '-#', '+' or '*' are bulleted/numbered lists. static const SmallVector<StringRef, 8> kSpecialMeaningPrefixes = { - "@", "TODO", "FIXME", "XXX", "-# ", "- ", "+ ", "* " }; + "@", "TODO", "FIXME", "XXX", "-# ", "- ", "+ ", "* "}; bool hasSpecialMeaningPrefix = false; for (StringRef Prefix : kSpecialMeaningPrefixes) { if (Content.startswith(Prefix)) { @@ -322,8 +298,8 @@ static bool mayReflowContent(StringRef Content) { // To avoid issues if a line starts with a number which is actually the end // of a previous line, we only consider numbers with up to 2 digits. static llvm::Regex kNumberedListRegexp = llvm::Regex("^[1-9][0-9]?\\. "); - hasSpecialMeaningPrefix = hasSpecialMeaningPrefix || - kNumberedListRegexp.match(Content); + hasSpecialMeaningPrefix = + hasSpecialMeaningPrefix || kNumberedListRegexp.match(Content); // Simple heuristic for what to reflow: content should contain at least two // characters and either the first or second character must be @@ -339,7 +315,9 @@ BreakableBlockComment::BreakableBlockComment( const FormatToken &Token, unsigned StartColumn, unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style) - : BreakableComment(Token, StartColumn, InPPDirective, Encoding, Style) { + : BreakableComment(Token, StartColumn, InPPDirective, Encoding, Style), + DelimitersOnNewline(false), + UnbreakableTailLength(Token.UnbreakableTailLength) { assert(Tok.is(TT_BlockComment) && "block comment section must start with a block comment"); @@ -384,8 +362,7 @@ BreakableBlockComment::BreakableBlockComment( // If the last line is empty, the closing "*/" will have a star. if (i + 1 == e && Content[i].empty()) break; - if (!Content[i].empty() && i + 1 != e && - Decoration.startswith(Content[i])) + if (!Content[i].empty() && i + 1 != e && Decoration.startswith(Content[i])) continue; while (!Content[i].startswith(Decoration)) Decoration = Decoration.substr(0, Decoration.size() - 1); @@ -427,11 +404,30 @@ BreakableBlockComment::BreakableBlockComment( IndentAtLineBreak = std::min<int>(IndentAtLineBreak, std::max(0, ContentColumn[i])); } - IndentAtLineBreak = - std::max<unsigned>(IndentAtLineBreak, Decoration.size()); + IndentAtLineBreak = std::max<unsigned>(IndentAtLineBreak, Decoration.size()); + + // Detect a multiline jsdoc comment and set DelimitersOnNewline in that case. + if (Style.Language == FormatStyle::LK_JavaScript || + Style.Language == FormatStyle::LK_Java) { + if ((Lines[0] == "*" || Lines[0].startswith("* ")) && Lines.size() > 1) { + // This is a multiline jsdoc comment. + DelimitersOnNewline = true; + } else if (Lines[0].startswith("* ") && Lines.size() == 1) { + // Detect a long single-line comment, like: + // /** long long long */ + // Below, '2' is the width of '*/'. + unsigned EndColumn = + ContentColumn[0] + + encoding::columnWidthWithTabs(Lines[0], ContentColumn[0], + Style.TabWidth, Encoding) + + 2; + DelimitersOnNewline = EndColumn > Style.ColumnLimit; + } + } DEBUG({ llvm::dbgs() << "IndentAtLineBreak " << IndentAtLineBreak << "\n"; + llvm::dbgs() << "DelimitersOnNewline " << DelimitersOnNewline << "\n"; for (size_t i = 0; i < Lines.size(); ++i) { llvm::dbgs() << i << " |" << Content[i] << "| " << "CC=" << ContentColumn[i] << "| " @@ -477,30 +473,45 @@ void BreakableBlockComment::adjustWhitespace(unsigned LineIndex, IndentDelta; } -unsigned BreakableBlockComment::getLineLengthAfterSplit( - unsigned LineIndex, unsigned TailOffset, - StringRef::size_type Length) const { - unsigned ContentStartColumn = getContentStartColumn(LineIndex, TailOffset); +unsigned BreakableBlockComment::getRangeLength(unsigned LineIndex, + unsigned Offset, + StringRef::size_type Length, + unsigned StartColumn) const { unsigned LineLength = - ContentStartColumn + encoding::columnWidthWithTabs( - Content[LineIndex].substr(TailOffset, Length), - ContentStartColumn, Style.TabWidth, Encoding); + encoding::columnWidthWithTabs(Content[LineIndex].substr(Offset, Length), + StartColumn, Style.TabWidth, Encoding); + // FIXME: This should go into getRemainingLength instead, but we currently + // break tests when putting it there. Investigate how to fix those tests. // The last line gets a "*/" postfix. if (LineIndex + 1 == Lines.size()) { LineLength += 2; // We never need a decoration when breaking just the trailing "*/" postfix. // Note that checking that Length == 0 is not enough, since Length could // also be StringRef::npos. - if (Content[LineIndex].substr(TailOffset, Length).empty()) { + if (Content[LineIndex].substr(Offset, StringRef::npos).empty()) { LineLength -= Decoration.size(); } } return LineLength; } +unsigned BreakableBlockComment::getRemainingLength(unsigned LineIndex, + unsigned Offset, + unsigned StartColumn) const { + return UnbreakableTailLength + + getRangeLength(LineIndex, Offset, StringRef::npos, StartColumn); +} + +unsigned BreakableBlockComment::getContentStartColumn(unsigned LineIndex, + bool Break) const { + if (Break) + return IndentAtLineBreak; + return std::max(0, ContentColumn[LineIndex]); +} + void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, - WhitespaceManager &Whitespaces) { + WhitespaceManager &Whitespaces) const { StringRef Text = Content[LineIndex].substr(TailOffset); StringRef Prefix = Decoration; // We need this to account for the case when we have a decoration "* " for all @@ -526,97 +537,55 @@ void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset, /*Spaces=*/LocalIndentAtLineBreak - Prefix.size()); } -BreakableToken::Split BreakableBlockComment::getSplitBefore( - unsigned LineIndex, - unsigned PreviousEndColumn, - unsigned ColumnLimit, - llvm::Regex &CommentPragmasRegex) const { +BreakableToken::Split +BreakableBlockComment::getReflowSplit(unsigned LineIndex, + llvm::Regex &CommentPragmasRegex) const { if (!mayReflow(LineIndex, CommentPragmasRegex)) return Split(StringRef::npos, 0); - StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks); - return getReflowSplit(TrimmedContent, ReflowPrefix, PreviousEndColumn, - ColumnLimit); -} - -unsigned BreakableBlockComment::getReflownColumn( - StringRef Content, - unsigned LineIndex, - unsigned PreviousEndColumn) const { - unsigned StartColumn = PreviousEndColumn + ReflowPrefix.size(); - // If this is the last line, it will carry around its '*/' postfix. - unsigned PostfixLength = (LineIndex + 1 == Lines.size() ? 2 : 0); - // The line is composed of previous text, reflow prefix, reflown text and - // postfix. - unsigned ReflownColumn = - StartColumn + encoding::columnWidthWithTabs(Content, StartColumn, - Style.TabWidth, Encoding) + - PostfixLength; - return ReflownColumn; -} - -unsigned BreakableBlockComment::getLineLengthAfterSplitBefore( - unsigned LineIndex, unsigned TailOffset, - unsigned PreviousEndColumn, - unsigned ColumnLimit, - Split SplitBefore) const { - if (SplitBefore.first == StringRef::npos || - // Block comment line contents contain the trailing whitespace after the - // decoration, so the need of left trim. Note that this behavior is - // consistent with the breaking of block comments where the indentation of - // a broken line is uniform across all the lines of the block comment. - SplitBefore.first + SplitBefore.second < - Content[LineIndex].ltrim().size()) { - // A piece of line, not the whole, gets reflown. - return getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos); - } else { - // The whole line gets reflown, need to check if we need to insert a break - // for the postfix or not. - StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks); - unsigned ReflownColumn = - getReflownColumn(TrimmedContent, LineIndex, PreviousEndColumn); - if (ReflownColumn <= ColumnLimit) { - return ReflownColumn; - } - return getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos); - } + + size_t Trimmed = Content[LineIndex].find_first_not_of(Blanks); + return Split(0, Trimmed != StringRef::npos ? Trimmed : 0); +} + +bool BreakableBlockComment::introducesBreakBeforeToken() const { + // A break is introduced when we want delimiters on newline. + return DelimitersOnNewline && + Lines[0].substr(1).find_first_not_of(Blanks) != StringRef::npos; } -void BreakableBlockComment::replaceWhitespaceBefore( - unsigned LineIndex, unsigned PreviousEndColumn, unsigned ColumnLimit, - Split SplitBefore, WhitespaceManager &Whitespaces) { - if (LineIndex == 0) return; + +void BreakableBlockComment::reflow(unsigned LineIndex, + WhitespaceManager &Whitespaces) const { StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks); - if (SplitBefore.first != StringRef::npos) { - // Here we need to reflow. - assert(Tokens[LineIndex - 1] == Tokens[LineIndex] && - "Reflowing whitespace within a token"); - // This is the offset of the end of the last line relative to the start of - // the token text in the token. - unsigned WhitespaceOffsetInToken = Content[LineIndex - 1].data() + - Content[LineIndex - 1].size() - - tokenAt(LineIndex).TokenText.data(); - unsigned WhitespaceLength = TrimmedContent.data() - - tokenAt(LineIndex).TokenText.data() - - WhitespaceOffsetInToken; - Whitespaces.replaceWhitespaceInToken( - tokenAt(LineIndex), WhitespaceOffsetInToken, - /*ReplaceChars=*/WhitespaceLength, /*PreviousPostfix=*/"", - /*CurrentPrefix=*/ReflowPrefix, InPPDirective, /*Newlines=*/0, - /*Spaces=*/0); - // Check if we need to also insert a break at the whitespace range. - // For this we first adapt the reflow split relative to the beginning of the - // content. - // Note that we don't need a penalty for this break, since it doesn't change - // the total number of lines. - Split BreakSplit = SplitBefore; - BreakSplit.first += TrimmedContent.data() - Content[LineIndex].data(); - unsigned ReflownColumn = - getReflownColumn(TrimmedContent, LineIndex, PreviousEndColumn); - if (ReflownColumn > ColumnLimit) { - insertBreak(LineIndex, 0, BreakSplit, Whitespaces); + // Here we need to reflow. + assert(Tokens[LineIndex - 1] == Tokens[LineIndex] && + "Reflowing whitespace within a token"); + // This is the offset of the end of the last line relative to the start of + // the token text in the token. + unsigned WhitespaceOffsetInToken = Content[LineIndex - 1].data() + + Content[LineIndex - 1].size() - + tokenAt(LineIndex).TokenText.data(); + unsigned WhitespaceLength = TrimmedContent.data() - + tokenAt(LineIndex).TokenText.data() - + WhitespaceOffsetInToken; + Whitespaces.replaceWhitespaceInToken( + tokenAt(LineIndex), WhitespaceOffsetInToken, + /*ReplaceChars=*/WhitespaceLength, /*PreviousPostfix=*/"", + /*CurrentPrefix=*/ReflowPrefix, InPPDirective, /*Newlines=*/0, + /*Spaces=*/0); +} + +void BreakableBlockComment::adaptStartOfLine( + unsigned LineIndex, WhitespaceManager &Whitespaces) const { + if (LineIndex == 0) { + if (DelimitersOnNewline) { + // Since we're breaking at index 1 below, the break position and the + // break length are the same. + size_t BreakLength = Lines[0].substr(1).find_first_not_of(Blanks); + if (BreakLength != StringRef::npos) + insertBreak(LineIndex, 0, Split(1, BreakLength), Whitespaces); } return; } - // Here no reflow with the previous line will happen. // Fix the decoration of the line at LineIndex. StringRef Prefix = Decoration; @@ -651,6 +620,20 @@ void BreakableBlockComment::replaceWhitespaceBefore( InPPDirective, /*Newlines=*/1, ContentColumn[LineIndex] - Prefix.size()); } +BreakableToken::Split +BreakableBlockComment::getSplitAfterLastLine(unsigned TailOffset) const { + if (DelimitersOnNewline) { + // Replace the trailing whitespace of the last line with a newline. + // In case the last line is empty, the ending '*/' is already on its own + // line. + StringRef Line = Content.back().substr(TailOffset); + StringRef TrimmedLine = Line.rtrim(Blanks); + if (!TrimmedLine.empty()) + return Split(TrimmedLine.size(), Line.size() - TrimmedLine.size()); + } + return Split(StringRef::npos, 0); +} + bool BreakableBlockComment::mayReflow(unsigned LineIndex, llvm::Regex &CommentPragmasRegex) const { // Content[LineIndex] may exclude the indent after the '*' decoration. In that @@ -664,15 +647,6 @@ bool BreakableBlockComment::mayReflow(unsigned LineIndex, !switchesFormatting(tokenAt(LineIndex)); } -unsigned -BreakableBlockComment::getContentStartColumn(unsigned LineIndex, - unsigned TailOffset) const { - // If we break, we always break at the predefined indent. - if (TailOffset != 0) - return IndentAtLineBreak; - return std::max(0, ContentColumn[LineIndex]); -} - BreakableLineCommentSection::BreakableLineCommentSection( const FormatToken &Token, unsigned StartColumn, unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective, @@ -686,7 +660,8 @@ BreakableLineCommentSection::BreakableLineCommentSection( CurrentTok = CurrentTok->Next) { LastLineTok = LineTok; StringRef TokenText(CurrentTok->TokenText); - assert(TokenText.startswith("//")); + assert((TokenText.startswith("//") || TokenText.startswith("#")) && + "unsupported line comment prefix, '//' and '#' are supported"); size_t FirstLineIndex = Lines.size(); TokenText.split(Lines, "\n"); Content.resize(Lines.size()); @@ -696,11 +671,13 @@ BreakableLineCommentSection::BreakableLineCommentSection( Prefix.resize(Lines.size()); OriginalPrefix.resize(Lines.size()); for (size_t i = FirstLineIndex, e = Lines.size(); i < e; ++i) { + Lines[i] = Lines[i].ltrim(Blanks); // We need to trim the blanks in case this is not the first line in a // multiline comment. Then the indent is included in Lines[i]. StringRef IndentPrefix = - getLineCommentIndentPrefix(Lines[i].ltrim(Blanks)); - assert(IndentPrefix.startswith("//")); + getLineCommentIndentPrefix(Lines[i].ltrim(Blanks), Style); + assert((TokenText.startswith("//") || TokenText.startswith("#")) && + "unsupported line comment prefix, '//' and '#' are supported"); OriginalPrefix[i] = Prefix[i] = IndentPrefix; if (Lines[i].size() > Prefix[i].size() && isAlphanumeric(Lines[i][Prefix[i].size()])) { @@ -714,22 +691,20 @@ BreakableLineCommentSection::BreakableLineCommentSection( Prefix[i] = "///< "; else if (Prefix[i] == "//!<") Prefix[i] = "//!< "; + else if (Prefix[i] == "#" && + Style.Language == FormatStyle::LK_TextProto) + Prefix[i] = "# "; } Tokens[i] = LineTok; Content[i] = Lines[i].substr(IndentPrefix.size()); OriginalContentColumn[i] = - StartColumn + - encoding::columnWidthWithTabs(OriginalPrefix[i], - StartColumn, - Style.TabWidth, - Encoding); + StartColumn + encoding::columnWidthWithTabs(OriginalPrefix[i], + StartColumn, + Style.TabWidth, Encoding); ContentColumn[i] = - StartColumn + - encoding::columnWidthWithTabs(Prefix[i], - StartColumn, - Style.TabWidth, - Encoding); + StartColumn + encoding::columnWidthWithTabs(Prefix[i], StartColumn, + Style.TabWidth, Encoding); // Calculate the end of the non-whitespace text in this line. size_t EndOfLine = Content[i].find_last_not_of(Blanks); @@ -760,20 +735,25 @@ BreakableLineCommentSection::BreakableLineCommentSection( } } -unsigned BreakableLineCommentSection::getLineLengthAfterSplit( - unsigned LineIndex, unsigned TailOffset, - StringRef::size_type Length) const { - unsigned ContentStartColumn = - (TailOffset == 0 ? ContentColumn[LineIndex] - : OriginalContentColumn[LineIndex]); - return ContentStartColumn + encoding::columnWidthWithTabs( - Content[LineIndex].substr(TailOffset, Length), - ContentStartColumn, Style.TabWidth, Encoding); +unsigned +BreakableLineCommentSection::getRangeLength(unsigned LineIndex, unsigned Offset, + StringRef::size_type Length, + unsigned StartColumn) const { + return encoding::columnWidthWithTabs( + Content[LineIndex].substr(Offset, Length), StartColumn, Style.TabWidth, + Encoding); +} + +unsigned BreakableLineCommentSection::getContentStartColumn(unsigned LineIndex, + bool Break) const { + if (Break) + return OriginalContentColumn[LineIndex]; + return ContentColumn[LineIndex]; } -void BreakableLineCommentSection::insertBreak(unsigned LineIndex, - unsigned TailOffset, Split Split, - WhitespaceManager &Whitespaces) { +void BreakableLineCommentSection::insertBreak( + unsigned LineIndex, unsigned TailOffset, Split Split, + WhitespaceManager &Whitespaces) const { StringRef Text = Content[LineIndex].substr(TailOffset); // Compute the offset of the split relative to the beginning of the token // text. @@ -792,37 +772,42 @@ void BreakableLineCommentSection::insertBreak(unsigned LineIndex, /*Spaces=*/IndentAtLineBreak - Prefix[LineIndex].size()); } -BreakableComment::Split BreakableLineCommentSection::getSplitBefore( - unsigned LineIndex, unsigned PreviousEndColumn, unsigned ColumnLimit, - llvm::Regex &CommentPragmasRegex) const { +BreakableComment::Split BreakableLineCommentSection::getReflowSplit( + unsigned LineIndex, llvm::Regex &CommentPragmasRegex) const { if (!mayReflow(LineIndex, CommentPragmasRegex)) return Split(StringRef::npos, 0); - return getReflowSplit(Content[LineIndex], ReflowPrefix, PreviousEndColumn, - ColumnLimit); -} - -unsigned BreakableLineCommentSection::getLineLengthAfterSplitBefore( - unsigned LineIndex, unsigned TailOffset, - unsigned PreviousEndColumn, - unsigned ColumnLimit, - Split SplitBefore) const { - if (SplitBefore.first == StringRef::npos || - SplitBefore.first + SplitBefore.second < Content[LineIndex].size()) { - // A piece of line, not the whole line, gets reflown. - return getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos); - } else { - // The whole line gets reflown. - unsigned StartColumn = PreviousEndColumn + ReflowPrefix.size(); - return StartColumn + encoding::columnWidthWithTabs(Content[LineIndex], - StartColumn, - Style.TabWidth, - Encoding); - } -} -void BreakableLineCommentSection::replaceWhitespaceBefore( - unsigned LineIndex, unsigned PreviousEndColumn, unsigned ColumnLimit, - Split SplitBefore, WhitespaceManager &Whitespaces) { + size_t Trimmed = Content[LineIndex].find_first_not_of(Blanks); + + // In a line comment section each line is a separate token; thus, after a + // split we replace all whitespace before the current line comment token + // (which does not need to be included in the split), plus the start of the + // line up to where the content starts. + return Split(0, Trimmed != StringRef::npos ? Trimmed : 0); +} + +void BreakableLineCommentSection::reflow(unsigned LineIndex, + WhitespaceManager &Whitespaces) const { + // Reflow happens between tokens. Replace the whitespace between the + // tokens by the empty string. + Whitespaces.replaceWhitespace( + *Tokens[LineIndex], /*Newlines=*/0, /*Spaces=*/0, + /*StartOfTokenColumn=*/StartColumn, /*InPPDirective=*/false); + // Replace the indent and prefix of the token with the reflow prefix. + unsigned WhitespaceLength = + Content[LineIndex].data() - tokenAt(LineIndex).TokenText.data(); + Whitespaces.replaceWhitespaceInToken(*Tokens[LineIndex], + /*Offset=*/0, + /*ReplaceChars=*/WhitespaceLength, + /*PreviousPostfix=*/"", + /*CurrentPrefix=*/ReflowPrefix, + /*InPPDirective=*/false, + /*Newlines=*/0, + /*Spaces=*/0); +} + +void BreakableLineCommentSection::adaptStartOfLine( + unsigned LineIndex, WhitespaceManager &Whitespaces) const { // If this is the first line of a token, we need to inform Whitespace Manager // about it: either adapt the whitespace range preceding it, or mark it as an // untouchable token. @@ -830,44 +815,25 @@ void BreakableLineCommentSection::replaceWhitespaceBefore( // // line 1 \ // // line 2 if (LineIndex > 0 && Tokens[LineIndex] != Tokens[LineIndex - 1]) { - if (SplitBefore.first != StringRef::npos) { - // Reflow happens between tokens. Replace the whitespace between the - // tokens by the empty string. - Whitespaces.replaceWhitespace( - *Tokens[LineIndex], /*Newlines=*/0, /*Spaces=*/0, - /*StartOfTokenColumn=*/StartColumn, /*InPPDirective=*/false); - // Replace the indent and prefix of the token with the reflow prefix. - unsigned WhitespaceLength = - Content[LineIndex].data() - tokenAt(LineIndex).TokenText.data(); - Whitespaces.replaceWhitespaceInToken(*Tokens[LineIndex], - /*Offset=*/0, - /*ReplaceChars=*/WhitespaceLength, - /*PreviousPostfix=*/"", - /*CurrentPrefix=*/ReflowPrefix, - /*InPPDirective=*/false, - /*Newlines=*/0, - /*Spaces=*/0); - } else { - // This is the first line for the current token, but no reflow with the - // previous token is necessary. However, we still may need to adjust the - // start column. Note that ContentColumn[LineIndex] is the expected - // content column after a possible update to the prefix, hence the prefix - // length change is included. - unsigned LineColumn = - ContentColumn[LineIndex] - - (Content[LineIndex].data() - Lines[LineIndex].data()) + - (OriginalPrefix[LineIndex].size() - Prefix[LineIndex].size()); - - // We always want to create a replacement instead of adding an untouchable - // token, even if LineColumn is the same as the original column of the - // token. This is because WhitespaceManager doesn't align trailing - // comments if they are untouchable. - Whitespaces.replaceWhitespace(*Tokens[LineIndex], - /*Newlines=*/1, - /*Spaces=*/LineColumn, - /*StartOfTokenColumn=*/LineColumn, - /*InPPDirective=*/false); - } + // This is the first line for the current token, but no reflow with the + // previous token is necessary. However, we still may need to adjust the + // start column. Note that ContentColumn[LineIndex] is the expected + // content column after a possible update to the prefix, hence the prefix + // length change is included. + unsigned LineColumn = + ContentColumn[LineIndex] - + (Content[LineIndex].data() - Lines[LineIndex].data()) + + (OriginalPrefix[LineIndex].size() - Prefix[LineIndex].size()); + + // We always want to create a replacement instead of adding an untouchable + // token, even if LineColumn is the same as the original column of the + // token. This is because WhitespaceManager doesn't align trailing + // comments if they are untouchable. + Whitespaces.replaceWhitespace(*Tokens[LineIndex], + /*Newlines=*/1, + /*Spaces=*/LineColumn, + /*StartOfTokenColumn=*/LineColumn, + /*InPPDirective=*/false); } if (OriginalPrefix[LineIndex] != Prefix[LineIndex]) { // Adjust the prefix if necessary. @@ -880,16 +846,9 @@ void BreakableLineCommentSection::replaceWhitespaceBefore( tokenAt(LineIndex), OriginalPrefix[LineIndex].size(), 0, "", "", /*InPPDirective=*/false, /*Newlines=*/0, /*Spaces=*/1); } - // Add a break after a reflow split has been introduced, if necessary. - // Note that this break doesn't need to be penalized, since it doesn't change - // the number of lines. - if (SplitBefore.first != StringRef::npos && - SplitBefore.first + SplitBefore.second < Content[LineIndex].size()) { - insertBreak(LineIndex, 0, SplitBefore, Whitespaces); - } } -void BreakableLineCommentSection::updateNextToken(LineState& State) const { +void BreakableLineCommentSection::updateNextToken(LineState &State) const { if (LastLineTok) { State.NextToken = LastLineTok->Next; } @@ -903,20 +862,17 @@ bool BreakableLineCommentSection::mayReflow( if (Lines[LineIndex].startswith("//")) { IndentContent = Lines[LineIndex].substr(2); } + // FIXME: Decide whether we want to reflow non-regular indents: + // Currently, we only reflow when the OriginalPrefix[LineIndex] matches the + // OriginalPrefix[LineIndex-1]. That means we don't reflow + // // text that protrudes + // // into text with different indent + // We do reflow in that case in block comments. return LineIndex > 0 && !CommentPragmasRegex.match(IndentContent) && mayReflowContent(Content[LineIndex]) && !Tok.Finalized && !switchesFormatting(tokenAt(LineIndex)) && OriginalPrefix[LineIndex] == OriginalPrefix[LineIndex - 1]; } -unsigned -BreakableLineCommentSection::getContentStartColumn(unsigned LineIndex, - unsigned TailOffset) const { - if (TailOffset != 0) { - return OriginalContentColumn[LineIndex]; - } - return ContentColumn[LineIndex]; -} - } // namespace format } // namespace clang diff --git a/lib/Format/BreakableToken.h b/lib/Format/BreakableToken.h index e642a538e21c..8ef26ef464da 100644 --- a/lib/Format/BreakableToken.h +++ b/lib/Format/BreakableToken.h @@ -33,19 +33,32 @@ bool switchesFormatting(const FormatToken &Token); struct FormatStyle; -/// \brief Base class for strategies on how to break tokens. +/// \brief Base class for tokens / ranges of tokens that can allow breaking +/// within the tokens - for example, to avoid whitespace beyond the column +/// limit, or to reflow text. /// -/// This is organised around the concept of a \c Split, which is a whitespace -/// range that signifies a position of the content of a token where a -/// reformatting might be done. Operating with splits is divided into 3 -/// operations: +/// Generally, a breakable token consists of logical lines, addressed by a line +/// index. For example, in a sequence of line comments, each line comment is its +/// own logical line; similarly, for a block comment, each line in the block +/// comment is on its own logical line. +/// +/// There are two methods to compute the layout of the token: +/// - getRangeLength measures the number of columns needed for a range of text +/// within a logical line, and +/// - getContentStartColumn returns the start column at which we want the +/// content of a logical line to start (potentially after introducing a line +/// break). +/// +/// The mechanism to adapt the layout of the breakable token is organised +/// around the concept of a \c Split, which is a whitespace range that signifies +/// a position of the content of a token where a reformatting might be done. +/// +/// Operating with splits is divided into two operations: /// - getSplit, for finding a split starting at a position, -/// - getLineLengthAfterSplit, for calculating the size in columns of the rest -/// of the content after a split has been used for breaking, and /// - insertBreak, for executing the split using a whitespace manager. /// /// There is a pair of operations that are used to compress a long whitespace -/// range with a single space if that will bring the line lenght under the +/// range with a single space if that will bring the line length under the /// column limit: /// - getLineLengthAfterCompression, for calculating the size in columns of the /// line after a whitespace range has been compressed, and @@ -56,16 +69,23 @@ struct FormatStyle; /// For tokens where the whitespace before each line needs to be also /// reformatted, for example for tokens supporting reflow, there are analogous /// operations that might be executed before the main line breaking occurs: -/// - getSplitBefore, for finding a split such that the content preceding it +/// - getReflowSplit, for finding a split such that the content preceding it /// needs to be specially reflown, -/// - getLineLengthAfterSplitBefore, for calculating the line length in columns -/// of the remainder of the content after the beginning of the content has -/// been reformatted, and -/// - replaceWhitespaceBefore, for executing the reflow using a whitespace +/// - reflow, for executing the split using a whitespace manager, +/// - introducesBreakBefore, for checking if reformatting the beginning +/// of the content introduces a line break before it, +/// - adaptStartOfLine, for executing the reflow using a whitespace /// manager. /// -/// FIXME: The interface seems set in stone, so we might want to just pull the -/// strategy into the class, instead of controlling it from the outside. +/// For tokens that require the whitespace after the last line to be +/// reformatted, for example in multiline jsdoc comments that require the +/// trailing '*/' to be on a line of itself, there are analogous operations +/// that might be executed after the last line has been reformatted: +/// - getSplitAfterLastLine, for finding a split after the last line that needs +/// to be reflown, +/// - replaceWhitespaceAfterLastLine, for executing the reflow using a +/// whitespace manager. +/// class BreakableToken { public: /// \brief Contains starting character index and length of split. @@ -76,73 +96,122 @@ public: /// \brief Returns the number of lines in this token in the original code. virtual unsigned getLineCount() const = 0; - /// \brief Returns the number of columns required to format the piece of line - /// at \p LineIndex, from byte offset \p TailOffset with length \p Length. + /// \brief Returns the number of columns required to format the text in the + /// byte range [\p Offset, \p Offset \c + \p Length). + /// + /// \p Offset is the byte offset from the start of the content of the line + /// at \p LineIndex. + /// + /// \p StartColumn is the column at which the text starts in the formatted + /// file, needed to compute tab stops correctly. + virtual unsigned getRangeLength(unsigned LineIndex, unsigned Offset, + StringRef::size_type Length, + unsigned StartColumn) const = 0; + + /// \brief Returns the number of columns required to format the text following + /// the byte \p Offset in the line \p LineIndex, including potentially + /// unbreakable sequences of tokens following after the end of the token. + /// + /// \p Offset is the byte offset from the start of the content of the line + /// at \p LineIndex. + /// + /// \p StartColumn is the column at which the text starts in the formatted + /// file, needed to compute tab stops correctly. /// - /// Note that previous breaks are not taken into account. \p TailOffset is - /// always specified from the start of the (original) line. - /// \p Length can be set to StringRef::npos, which means "to the end of line". - virtual unsigned - getLineLengthAfterSplit(unsigned LineIndex, unsigned TailOffset, - StringRef::size_type Length) const = 0; + /// For breakable tokens that never use extra space at the end of a line, this + /// is equivalent to getRangeLength with a Length of StringRef::npos. + virtual unsigned getRemainingLength(unsigned LineIndex, unsigned Offset, + unsigned StartColumn) const { + return getRangeLength(LineIndex, Offset, StringRef::npos, StartColumn); + } + + /// \brief Returns the column at which content in line \p LineIndex starts, + /// assuming no reflow. + /// + /// If \p Break is true, returns the column at which the line should start + /// after the line break. + /// If \p Break is false, returns the column at which the line itself will + /// start. + virtual unsigned getContentStartColumn(unsigned LineIndex, + bool Break) const = 0; /// \brief Returns a range (offset, length) at which to break the line at /// \p LineIndex, if previously broken at \p TailOffset. If possible, do not - /// violate \p ColumnLimit. + /// violate \p ColumnLimit, assuming the text starting at \p TailOffset in + /// the token is formatted starting at ContentStartColumn in the reformatted + /// file. virtual Split getSplit(unsigned LineIndex, unsigned TailOffset, - unsigned ColumnLimit, + unsigned ColumnLimit, unsigned ContentStartColumn, llvm::Regex &CommentPragmasRegex) const = 0; /// \brief Emits the previously retrieved \p Split via \p Whitespaces. virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, - WhitespaceManager &Whitespaces) = 0; + WhitespaceManager &Whitespaces) const = 0; - /// \brief Returns the number of columns required to format the piece of line - /// at \p LineIndex, from byte offset \p TailOffset after the whitespace range - /// \p Split has been compressed into a single space. - unsigned getLineLengthAfterCompression(unsigned RemainingTokenColumns, - Split Split) const; + /// \brief Returns the number of columns needed to format + /// \p RemainingTokenColumns, assuming that Split is within the range measured + /// by \p RemainingTokenColumns, and that the whitespace in Split is reduced + /// to a single space. + unsigned getLengthAfterCompression(unsigned RemainingTokenColumns, + Split Split) const; /// \brief Replaces the whitespace range described by \p Split with a single /// space. virtual void compressWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split, - WhitespaceManager &Whitespaces) = 0; + WhitespaceManager &Whitespaces) const = 0; - /// \brief Returns a whitespace range (offset, length) of the content at - /// \p LineIndex such that the content preceding this range needs to be - /// reformatted before any breaks are made to this line. + /// \brief Returns whether the token supports reflowing text. + virtual bool supportsReflow() const { return false; } + + /// \brief Returns a whitespace range (offset, length) of the content at \p + /// LineIndex such that the content of that line is reflown to the end of the + /// previous one. /// - /// \p PreviousEndColumn is the end column of the previous line after - /// formatting. + /// Returning (StringRef::npos, 0) indicates reflowing is not possible. /// - /// A result having offset == StringRef::npos means that no piece of the line - /// needs to be reformatted before any breaks are made. - virtual Split getSplitBefore(unsigned LineIndex, unsigned PreviousEndColumn, - unsigned ColumnLimit, + /// The range will include any whitespace preceding the specified line's + /// content. + /// + /// If the split is not contained within one token, for example when reflowing + /// line comments, returns (0, <length>). + virtual Split getReflowSplit(unsigned LineIndex, llvm::Regex &CommentPragmasRegex) const { return Split(StringRef::npos, 0); } - /// \brief Returns the number of columns required to format the piece of line - /// at \p LineIndex after the content preceding the whitespace range specified - /// \p SplitBefore has been reformatted, but before any breaks are made to - /// this line. - virtual unsigned getLineLengthAfterSplitBefore(unsigned LineIndex, - unsigned TailOffset, - unsigned PreviousEndColumn, - unsigned ColumnLimit, - Split SplitBefore) const { - return getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos); + /// \brief Reflows the current line into the end of the previous one. + virtual void reflow(unsigned LineIndex, + WhitespaceManager &Whitespaces) const {} + + /// \brief Returns whether there will be a line break at the start of the + /// token. + virtual bool introducesBreakBeforeToken() const { + return false; } /// \brief Replaces the whitespace between \p LineIndex-1 and \p LineIndex. - /// Performs a reformatting of the content at \p LineIndex preceding the - /// whitespace range \p SplitBefore. - virtual void replaceWhitespaceBefore(unsigned LineIndex, - unsigned PreviousEndColumn, - unsigned ColumnLimit, Split SplitBefore, - WhitespaceManager &Whitespaces) {} + virtual void adaptStartOfLine(unsigned LineIndex, + WhitespaceManager &Whitespaces) const {} + + /// \brief Returns a whitespace range (offset, length) of the content at + /// the last line that needs to be reformatted after the last line has been + /// reformatted. + /// + /// A result having offset == StringRef::npos means that no reformat is + /// necessary. + virtual Split getSplitAfterLastLine(unsigned TailOffset) const { + return Split(StringRef::npos, 0); + } + + /// \brief Replaces the whitespace from \p SplitAfterLastLine on the last line + /// after the last line has been formatted by performing a reformatting. + void replaceWhitespaceAfterLastLine(unsigned TailOffset, + Split SplitAfterLastLine, + WhitespaceManager &Whitespaces) const { + insertBreak(getLineCount() - 1, TailOffset, SplitAfterLastLine, + Whitespaces); + } /// \brief Updates the next token of \p State to the next token after this /// one. This can be used when this token manages a set of underlying tokens @@ -161,32 +230,7 @@ protected: const FormatStyle &Style; }; -/// \brief Base class for single line tokens that can be broken. -/// -/// \c getSplit() needs to be implemented by child classes. -class BreakableSingleLineToken : public BreakableToken { -public: - unsigned getLineCount() const override; - unsigned getLineLengthAfterSplit(unsigned LineIndex, unsigned TailOffset, - StringRef::size_type Length) const override; - -protected: - BreakableSingleLineToken(const FormatToken &Tok, unsigned StartColumn, - StringRef Prefix, StringRef Postfix, - bool InPPDirective, encoding::Encoding Encoding, - const FormatStyle &Style); - - // The column in which the token starts. - unsigned StartColumn; - // The prefix a line needs after a break in the token. - StringRef Prefix; - // The postfix a line needs before introducing a break. - StringRef Postfix; - // The token text excluding the prefix and postfix. - StringRef Line; -}; - -class BreakableStringLiteral : public BreakableSingleLineToken { +class BreakableStringLiteral : public BreakableToken { public: /// \brief Creates a breakable token for a single line string literal. /// @@ -198,11 +242,32 @@ public: const FormatStyle &Style); Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit, + unsigned ReflowColumn, llvm::Regex &CommentPragmasRegex) const override; void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, - WhitespaceManager &Whitespaces) override; + WhitespaceManager &Whitespaces) const override; void compressWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split, - WhitespaceManager &Whitespaces) override {} + WhitespaceManager &Whitespaces) const override {} + unsigned getLineCount() const override; + unsigned getRangeLength(unsigned LineIndex, unsigned Offset, + StringRef::size_type Length, + unsigned StartColumn) const override; + unsigned getRemainingLength(unsigned LineIndex, unsigned Offset, + unsigned StartColumn) const override; + unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override; + +protected: + // The column in which the token starts. + unsigned StartColumn; + // The prefix a line needs after a break in the token. + StringRef Prefix; + // The postfix a line needs before introducing a break. + StringRef Postfix; + // The token text excluding the prefix and postfix. + StringRef Line; + // Length of the sequence of tokens after this string literal that cannot + // contain line breaks. + unsigned UnbreakableTailLength; }; class BreakableComment : public BreakableToken { @@ -216,21 +281,15 @@ protected: const FormatStyle &Style); public: + bool supportsReflow() const override { return true; } unsigned getLineCount() const override; Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit, + unsigned ReflowColumn, llvm::Regex &CommentPragmasRegex) const override; void compressWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split, - WhitespaceManager &Whitespaces) override; + WhitespaceManager &Whitespaces) const override; protected: - virtual unsigned getContentStartColumn(unsigned LineIndex, - unsigned TailOffset) const = 0; - - // Returns a split that divides Text into a left and right parts, such that - // the left part is suitable for reflowing after PreviousEndColumn. - Split getReflowSplit(StringRef Text, StringRef ReflowPrefix, - unsigned PreviousEndColumn, unsigned ColumnLimit) const; - // Returns the token containing the line at LineIndex. const FormatToken &tokenAt(unsigned LineIndex) const; @@ -289,21 +348,23 @@ public: bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style); - unsigned getLineLengthAfterSplit(unsigned LineIndex, unsigned TailOffset, - StringRef::size_type Length) const override; + unsigned getRangeLength(unsigned LineIndex, unsigned Offset, + StringRef::size_type Length, + unsigned StartColumn) const override; + unsigned getRemainingLength(unsigned LineIndex, unsigned Offset, + unsigned StartColumn) const override; + unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override; void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, - WhitespaceManager &Whitespaces) override; - Split getSplitBefore(unsigned LineIndex, unsigned PreviousEndColumn, - unsigned ColumnLimit, + WhitespaceManager &Whitespaces) const override; + Split getReflowSplit(unsigned LineIndex, llvm::Regex &CommentPragmasRegex) const override; - unsigned getLineLengthAfterSplitBefore(unsigned LineIndex, - unsigned TailOffset, - unsigned PreviousEndColumn, - unsigned ColumnLimit, - Split SplitBefore) const override; - void replaceWhitespaceBefore(unsigned LineIndex, unsigned PreviousEndColumn, - unsigned ColumnLimit, Split SplitBefore, - WhitespaceManager &Whitespaces) override; + void reflow(unsigned LineIndex, + WhitespaceManager &Whitespaces) const override; + bool introducesBreakBeforeToken() const override; + void adaptStartOfLine(unsigned LineIndex, + WhitespaceManager &Whitespaces) const override; + Split getSplitAfterLastLine(unsigned TailOffset) const override; + bool mayReflow(unsigned LineIndex, llvm::Regex &CommentPragmasRegex) const override; @@ -318,14 +379,6 @@ private: // considered part of the text). void adjustWhitespace(unsigned LineIndex, int IndentDelta); - // Computes the end column if the full Content from LineIndex gets reflown - // after PreviousEndColumn. - unsigned getReflownColumn(StringRef Content, unsigned LineIndex, - unsigned PreviousEndColumn) const; - - unsigned getContentStartColumn(unsigned LineIndex, - unsigned TailOffset) const override; - // The column at which the text of a broken line should start. // Note that an optional decoration would go before that column. // IndentAtLineBreak is a uniform position for all lines in a block comment, @@ -348,6 +401,14 @@ private: // If this block comment has decorations, this is the column of the start of // the decorations. unsigned DecorationColumn; + + // If true, make sure that the opening '/**' and the closing '*/' ends on a + // line of itself. Styles like jsdoc require this for multiline comments. + bool DelimitersOnNewline; + + // Length of the sequence of tokens after this string literal that cannot + // contain line breaks. + unsigned UnbreakableTailLength; }; class BreakableLineCommentSection : public BreakableComment { @@ -357,29 +418,23 @@ public: bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style); - unsigned getLineLengthAfterSplit(unsigned LineIndex, unsigned TailOffset, - StringRef::size_type Length) const override; + unsigned getRangeLength(unsigned LineIndex, unsigned Offset, + StringRef::size_type Length, + unsigned StartColumn) const override; + unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override; void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, - WhitespaceManager &Whitespaces) override; - Split getSplitBefore(unsigned LineIndex, unsigned PreviousEndColumn, - unsigned ColumnLimit, + WhitespaceManager &Whitespaces) const override; + Split getReflowSplit(unsigned LineIndex, llvm::Regex &CommentPragmasRegex) const override; - unsigned getLineLengthAfterSplitBefore(unsigned LineIndex, - unsigned TailOffset, - unsigned PreviousEndColumn, - unsigned ColumnLimit, - Split SplitBefore) const override; - void replaceWhitespaceBefore(unsigned LineIndex, unsigned PreviousEndColumn, - unsigned ColumnLimit, Split SplitBefore, - WhitespaceManager &Whitespaces) override; + void reflow(unsigned LineIndex, + WhitespaceManager &Whitespaces) const override; + void adaptStartOfLine(unsigned LineIndex, + WhitespaceManager &Whitespaces) const override; void updateNextToken(LineState &State) const override; bool mayReflow(unsigned LineIndex, llvm::Regex &CommentPragmasRegex) const override; private: - unsigned getContentStartColumn(unsigned LineIndex, - unsigned TailOffset) const override; - // OriginalPrefix[i] contains the original prefix of line i, including // trailing whitespace before the start of the content. The indentation // preceding the prefix is not included. diff --git a/lib/Format/ContinuationIndenter.cpp b/lib/Format/ContinuationIndenter.cpp index 3bf1cd8f7c13..a3d38b244c5c 100644 --- a/lib/Format/ContinuationIndenter.cpp +++ b/lib/Format/ContinuationIndenter.cpp @@ -12,8 +12,9 @@ /// //===----------------------------------------------------------------------===// -#include "BreakableToken.h" #include "ContinuationIndenter.h" +#include "BreakableToken.h" +#include "FormatInternal.h" #include "WhitespaceManager.h" #include "clang/Basic/OperatorPrecedence.h" #include "clang/Basic/SourceManager.h" @@ -76,6 +77,53 @@ static bool opensProtoMessageField(const FormatToken &LessTok, (LessTok.Previous && LessTok.Previous->is(tok::equal)))); } +// Returns the delimiter of a raw string literal, or None if TokenText is not +// the text of a raw string literal. The delimiter could be the empty string. +// For example, the delimiter of R"deli(cont)deli" is deli. +static llvm::Optional<StringRef> getRawStringDelimiter(StringRef TokenText) { + if (TokenText.size() < 5 // The smallest raw string possible is 'R"()"'. + || !TokenText.startswith("R\"") || !TokenText.endswith("\"")) + return None; + + // A raw string starts with 'R"<delimiter>(' and delimiter is ascii and has + // size at most 16 by the standard, so the first '(' must be among the first + // 19 bytes. + size_t LParenPos = TokenText.substr(0, 19).find_first_of('('); + if (LParenPos == StringRef::npos) + return None; + StringRef Delimiter = TokenText.substr(2, LParenPos - 2); + + // Check that the string ends in ')Delimiter"'. + size_t RParenPos = TokenText.size() - Delimiter.size() - 2; + if (TokenText[RParenPos] != ')') + return None; + if (!TokenText.substr(RParenPos + 1).startswith(Delimiter)) + return None; + return Delimiter; +} + +RawStringFormatStyleManager::RawStringFormatStyleManager( + const FormatStyle &CodeStyle) { + for (const auto &RawStringFormat : CodeStyle.RawStringFormats) { + FormatStyle Style; + if (!getPredefinedStyle(RawStringFormat.BasedOnStyle, + RawStringFormat.Language, &Style)) { + Style = getLLVMStyle(); + Style.Language = RawStringFormat.Language; + } + Style.ColumnLimit = CodeStyle.ColumnLimit; + DelimiterStyle.insert({RawStringFormat.Delimiter, Style}); + } +} + +llvm::Optional<FormatStyle> +RawStringFormatStyleManager::get(StringRef Delimiter) const { + auto It = DelimiterStyle.find(Delimiter); + if (It == DelimiterStyle.end()) + return None; + return It->second; +} + ContinuationIndenter::ContinuationIndenter(const FormatStyle &Style, const AdditionalKeywords &Keywords, const SourceManager &SourceMgr, @@ -85,20 +133,32 @@ ContinuationIndenter::ContinuationIndenter(const FormatStyle &Style, : Style(Style), Keywords(Keywords), SourceMgr(SourceMgr), Whitespaces(Whitespaces), Encoding(Encoding), BinPackInconclusiveFunctions(BinPackInconclusiveFunctions), - CommentPragmasRegex(Style.CommentPragmas) {} + CommentPragmasRegex(Style.CommentPragmas), RawStringFormats(Style) {} LineState ContinuationIndenter::getInitialState(unsigned FirstIndent, + unsigned FirstStartColumn, const AnnotatedLine *Line, bool DryRun) { LineState State; State.FirstIndent = FirstIndent; - State.Column = FirstIndent; + if (FirstStartColumn && Line->First->NewlinesBefore == 0) + State.Column = FirstStartColumn; + else + State.Column = FirstIndent; + // With preprocessor directive indentation, the line starts on column 0 + // since it's indented after the hash, but FirstIndent is set to the + // preprocessor indent. + if (Style.IndentPPDirectives == FormatStyle::PPDIS_AfterHash && + (Line->Type == LT_PreprocessorDirective || + Line->Type == LT_ImportStatement)) + State.Column = 0; State.Line = Line; State.NextToken = Line->First; State.Stack.push_back(ParenState(FirstIndent, FirstIndent, /*AvoidBinPacking=*/false, /*NoLineBreak=*/false)); State.LineContainsContinuedForLoopSection = false; + State.NoContinuation = false; State.StartOfStringLiteral = 0; State.StartOfLineLevel = 0; State.LowestLevelOnLine = 0; @@ -120,9 +180,8 @@ bool ContinuationIndenter::canBreak(const LineState &State) { const FormatToken &Current = *State.NextToken; const FormatToken &Previous = *Current.Previous; assert(&Previous == Current.Previous); - if (!Current.CanBreakBefore && - !(State.Stack.back().BreakBeforeClosingBrace && - Current.closesBlockOrBlockTypeList(Style))) + if (!Current.CanBreakBefore && !(State.Stack.back().BreakBeforeClosingBrace && + Current.closesBlockOrBlockTypeList(Style))) return false; // The opening "{" of a braced list has to be on the same line as the first // element if it is nested in another braced init list or function call. @@ -264,7 +323,8 @@ bool ContinuationIndenter::mustBreak(const LineState &State) { // We need special cases for ">>" which we have split into two ">" while // lexing in order to make template parsing easier. bool IsComparison = (Previous.getPrecedence() == prec::Relational || - Previous.getPrecedence() == prec::Equality) && + Previous.getPrecedence() == prec::Equality || + Previous.getPrecedence() == prec::Spaceship) && Previous.Previous && Previous.Previous->isNot(TT_BinaryOperator); // For >>. bool LHSIsBinaryExpr = @@ -316,6 +376,12 @@ bool ContinuationIndenter::mustBreak(const LineState &State) { Previous.TokenText == "\'\\n\'")))) return true; + if (Previous.is(TT_BlockComment) && Previous.IsMultiline) + return true; + + if (State.NoContinuation) + return true; + return false; } @@ -325,6 +391,8 @@ unsigned ContinuationIndenter::addTokenToState(LineState &State, bool Newline, const FormatToken &Current = *State.NextToken; assert(!State.Stack.empty()); + State.NoContinuation = false; + if ((Current.is(TT_ImplicitStringLiteral) && (Current.Previous->Tok.getIdentifierInfo() == nullptr || Current.Previous->Tok.getIdentifierInfo()->getPPKeywordID() == @@ -376,9 +444,25 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun, unsigned Spaces = Current.SpacesRequiredBefore + ExtraSpaces; + // Indent preprocessor directives after the hash if required. + int PPColumnCorrection = 0; + if (Style.IndentPPDirectives == FormatStyle::PPDIS_AfterHash && + Previous.is(tok::hash) && State.FirstIndent > 0 && + (State.Line->Type == LT_PreprocessorDirective || + State.Line->Type == LT_ImportStatement)) { + Spaces += State.FirstIndent; + + // For preprocessor indent with tabs, State.Column will be 1 because of the + // hash. This causes second-level indents onward to have an extra space + // after the tabs. We avoid this misalignment by subtracting 1 from the + // column value passed to replaceWhitespace(). + if (Style.UseTab != FormatStyle::UT_Never) + PPColumnCorrection = -1; + } + if (!DryRun) Whitespaces.replaceWhitespace(Current, /*Newlines=*/0, Spaces, - State.Column + Spaces); + State.Column + Spaces + PPColumnCorrection); // If "BreakBeforeInheritanceComma" mode, don't break within the inheritance // declaration unless there is multiple inheritance. @@ -405,9 +489,8 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun, if (Style.AlignAfterOpenBracket == FormatStyle::BAS_AlwaysBreak && Previous.isOneOf(tok::l_paren, TT_TemplateOpener, tok::l_square) && State.Column > getNewLineColumn(State) && - (!Previous.Previous || - !Previous.Previous->isOneOf(tok::kw_for, tok::kw_while, - tok::kw_switch)) && + (!Previous.Previous || !Previous.Previous->isOneOf( + tok::kw_for, tok::kw_while, tok::kw_switch)) && // Don't do this for simple (no expressions) one-argument function calls // as that feels like needlessly wasting whitespace, e.g.: // @@ -454,7 +537,8 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun, (P->is(TT_ConditionalExpr) && P->is(tok::colon))) && !P->isOneOf(TT_OverloadedOperator, TT_CtorInitializerComma) && P->getPrecedence() != prec::Assignment && - P->getPrecedence() != prec::Relational) { + P->getPrecedence() != prec::Relational && + P->getPrecedence() != prec::Spaceship) { bool BreakBeforeOperator = P->MustBreakBefore || P->is(tok::lessless) || (P->is(TT_BinaryOperator) && @@ -619,8 +703,18 @@ unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State, State.Stack.back().BreakBeforeParameter = false; if (!DryRun) { + unsigned MaxEmptyLinesToKeep = Style.MaxEmptyLinesToKeep + 1; + if (Current.is(tok::r_brace) && Current.MatchingParen && + // Only strip trailing empty lines for l_braces that have children, i.e. + // for function expressions (lambdas, arrows, etc). + !Current.MatchingParen->Children.empty()) { + // lambdas and arrow functions are expressions, thus their r_brace is not + // on its own line, and thus not covered by UnwrappedLineFormatter's logic + // about removing empty lines on closing blocks. Special case them here. + MaxEmptyLinesToKeep = 1; + } unsigned Newlines = std::max( - 1u, std::min(Current.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1)); + 1u, std::min(Current.NewlinesBefore, MaxEmptyLinesToKeep)); bool ContinuePPDirective = State.Line->InPPDirective && State.Line->Type != LT_ImportStatement; Whitespaces.replaceWhitespace(Current, Newlines, State.Column, State.Column, @@ -661,9 +755,7 @@ unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State, // before the corresponding } or ]. if (PreviousNonComment && (PreviousNonComment->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) || - opensProtoMessageField(*PreviousNonComment, Style) || - (PreviousNonComment->is(TT_TemplateString) && - PreviousNonComment->opensScope()))) + opensProtoMessageField(*PreviousNonComment, Style))) State.Stack.back().BreakBeforeClosingBrace = true; if (State.Stack.back().AvoidBinPacking) { @@ -731,7 +823,10 @@ unsigned ContinuationIndenter::getNewLineColumn(const LineState &State) { if (NextNonComment->is(TT_TemplateString) && NextNonComment->closesScope()) return State.Stack[State.Stack.size() - 2].LastSpace; if (Current.is(tok::identifier) && Current.Next && - Current.Next->is(TT_DictLiteral)) + (Current.Next->is(TT_DictLiteral) || + ((Style.Language == FormatStyle::LK_Proto || + Style.Language == FormatStyle::LK_TextProto) && + Current.Next->isOneOf(TT_TemplateOpener, tok::l_brace)))) return State.Stack.back().Indent; if (NextNonComment->is(TT_ObjCStringLiteral) && State.StartOfStringLiteral != 0) @@ -871,8 +966,10 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State, // Next(...) // ^ line up here. State.Stack.back().Indent = - State.Column + (Style.BreakConstructorInitializers == - FormatStyle::BCIS_BeforeComma ? 0 : 2); + State.Column + + (Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma + ? 0 + : 2); State.Stack.back().NestedBlockIndent = State.Stack.back().Indent; if (Style.ConstructorInitializerAllOnOneLineOrOnePerLine) State.Stack.back().AvoidBinPacking = true; @@ -884,7 +981,7 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State, State.FirstIndent + Style.ConstructorInitializerIndentWidth; State.Stack.back().NestedBlockIndent = State.Stack.back().Indent; if (Style.ConstructorInitializerAllOnOneLineOrOnePerLine) - State.Stack.back().AvoidBinPacking = true; + State.Stack.back().AvoidBinPacking = true; } if (Current.is(TT_InheritanceColon)) State.Stack.back().Indent = @@ -912,8 +1009,9 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State, State.Stack[i].NoLineBreak = true; State.Stack[State.Stack.size() - 2].NestedBlockInlined = false; } - if (Previous && (Previous->isOneOf(tok::l_paren, tok::comma, tok::colon) || - Previous->isOneOf(TT_BinaryOperator, TT_ConditionalExpr)) && + if (Previous && + (Previous->isOneOf(tok::l_paren, tok::comma, tok::colon) || + Previous->isOneOf(TT_BinaryOperator, TT_ConditionalExpr)) && !Previous->isOneOf(TT_DictLiteral, TT_ObjCMethodExpr)) { State.Stack.back().NestedBlockInlined = !Newline && @@ -922,13 +1020,8 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State, moveStatePastFakeLParens(State, Newline); moveStatePastScopeCloser(State); - if (Current.is(TT_TemplateString) && Current.opensScope()) - State.Stack.back().LastSpace = - (Current.IsMultiline ? Current.LastLineColumnWidth - : State.Column + Current.ColumnWidth) - - strlen("${"); - bool CanBreakProtrudingToken = !State.Stack.back().NoLineBreak && - !State.Stack.back().NoLineBreakInOperand; + bool AllowBreak = !State.Stack.back().NoLineBreak && + !State.Stack.back().NoLineBreakInOperand; moveStatePastScopeOpener(State, Newline); moveStatePastFakeRParens(State); @@ -942,13 +1035,9 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State, State.Column += Current.ColumnWidth; State.NextToken = State.NextToken->Next; - unsigned Penalty = 0; - if (CanBreakProtrudingToken) - Penalty = breakProtrudingToken(Current, State, DryRun); - if (State.Column > getColumnLimit(State)) { - unsigned ExcessCharacters = State.Column - getColumnLimit(State); - Penalty += Style.PenaltyExcessCharacter * ExcessCharacters; - } + + unsigned Penalty = + handleEndOfLine(Current, State, DryRun, AllowBreak); if (Current.Role) Current.Role->formatFromToken(State, this, DryRun); @@ -1072,14 +1161,13 @@ void ContinuationIndenter::moveStatePastScopeOpener(LineState &State, bool EndsInComma = Current.MatchingParen && Current.MatchingParen->Previous && Current.MatchingParen->Previous->is(tok::comma); - AvoidBinPacking = - EndsInComma || Current.is(TT_DictLiteral) || - Style.Language == FormatStyle::LK_Proto || - Style.Language == FormatStyle::LK_TextProto || - !Style.BinPackArguments || - (NextNoComment && - NextNoComment->isOneOf(TT_DesignatedInitializerPeriod, - TT_DesignatedInitializerLSquare)); + AvoidBinPacking = EndsInComma || Current.is(TT_DictLiteral) || + Style.Language == FormatStyle::LK_Proto || + Style.Language == FormatStyle::LK_TextProto || + !Style.BinPackArguments || + (NextNoComment && + NextNoComment->isOneOf(TT_DesignatedInitializerPeriod, + TT_DesignatedInitializerLSquare)); BreakBeforeParameter = EndsInComma; if (Current.ParameterCount > 1) NestedBlockIndent = std::max(NestedBlockIndent, State.Column + 1); @@ -1098,18 +1186,6 @@ void ContinuationIndenter::moveStatePastScopeOpener(LineState &State, LastSpace = std::max(LastSpace, State.Stack.back().Indent); } - // JavaScript template strings are special as we always want to indent - // nested expressions relative to the ${}. Otherwise, this can create quite - // a mess. - if (Current.is(TT_TemplateString)) { - unsigned Column = Current.IsMultiline - ? Current.LastLineColumnWidth - : State.Column + Current.ColumnWidth; - NewIndent = Column; - LastSpace = Column; - NestedBlockIndent = Column; - } - bool EndsInComma = Current.MatchingParen && Current.MatchingParen->getPreviousNonComment() && @@ -1200,11 +1276,93 @@ void ContinuationIndenter::moveStateToNewBlock(LineState &State) { State.Stack.back().BreakBeforeParameter = true; } -unsigned ContinuationIndenter::addMultilineToken(const FormatToken &Current, - LineState &State) { - if (!Current.IsMultiline) +static unsigned getLastLineEndColumn(StringRef Text, unsigned StartColumn, + unsigned TabWidth, + encoding::Encoding Encoding) { + size_t LastNewlinePos = Text.find_last_of("\n"); + if (LastNewlinePos == StringRef::npos) { + return StartColumn + + encoding::columnWidthWithTabs(Text, StartColumn, TabWidth, Encoding); + } else { + return encoding::columnWidthWithTabs(Text.substr(LastNewlinePos), + /*StartColumn=*/0, TabWidth, Encoding); + } +} + +unsigned ContinuationIndenter::reformatRawStringLiteral( + const FormatToken &Current, LineState &State, + const FormatStyle &RawStringStyle, bool DryRun) { + unsigned StartColumn = State.Column - Current.ColumnWidth; + auto Delimiter = *getRawStringDelimiter(Current.TokenText); + // The text of a raw string is between the leading 'R"delimiter(' and the + // trailing 'delimiter)"'. + unsigned PrefixSize = 3 + Delimiter.size(); + unsigned SuffixSize = 2 + Delimiter.size(); + + // The first start column is the column the raw text starts. + unsigned FirstStartColumn = StartColumn + PrefixSize; + + // The next start column is the intended indentation a line break inside + // the raw string at level 0. It is determined by the following rules: + // - if the content starts on newline, it is one level more than the current + // indent, and + // - if the content does not start on a newline, it is the first start + // column. + // These rules have the advantage that the formatted content both does not + // violate the rectangle rule and visually flows within the surrounding + // source. + bool ContentStartsOnNewline = Current.TokenText[PrefixSize] == '\n'; + unsigned NextStartColumn = ContentStartsOnNewline + ? State.Stack.back().Indent + Style.IndentWidth + : FirstStartColumn; + + // The last start column is the column the raw string suffix starts if it is + // put on a newline. + // The last start column is the intended indentation of the raw string postfix + // if it is put on a newline. It is determined by the following rules: + // - if the raw string prefix starts on a newline, it is the column where + // that raw string prefix starts, and + // - if the raw string prefix does not start on a newline, it is the current + // indent. + unsigned LastStartColumn = Current.NewlinesBefore + ? FirstStartColumn - PrefixSize + : State.Stack.back().Indent; + + std::string RawText = + Current.TokenText.substr(PrefixSize).drop_back(SuffixSize); + + std::pair<tooling::Replacements, unsigned> Fixes = internal::reformat( + RawStringStyle, RawText, {tooling::Range(0, RawText.size())}, + FirstStartColumn, NextStartColumn, LastStartColumn, "<stdin>", + /*Status=*/nullptr); + + auto NewCode = applyAllReplacements(RawText, Fixes.first); + tooling::Replacements NoFixes; + if (!NewCode) { + State.Column += Current.ColumnWidth; return 0; + } + if (!DryRun) { + SourceLocation OriginLoc = + Current.Tok.getLocation().getLocWithOffset(PrefixSize); + for (const tooling::Replacement &Fix : Fixes.first) { + auto Err = Whitespaces.addReplacement(tooling::Replacement( + SourceMgr, OriginLoc.getLocWithOffset(Fix.getOffset()), + Fix.getLength(), Fix.getReplacementText())); + if (Err) { + llvm::errs() << "Failed to reformat raw string: " + << llvm::toString(std::move(Err)) << "\n"; + } + } + } + unsigned RawLastLineEndColumn = getLastLineEndColumn( + *NewCode, FirstStartColumn, Style.TabWidth, Encoding); + State.Column = RawLastLineEndColumn + SuffixSize; + return Fixes.second; +} +unsigned ContinuationIndenter::addMultilineToken(const FormatToken &Current, + LineState &State) { // Break before further function parameters on all levels. for (unsigned i = 0, e = State.Stack.size(); i != e; ++i) State.Stack[i].BreakBeforeParameter = true; @@ -1219,33 +1377,85 @@ unsigned ContinuationIndenter::addMultilineToken(const FormatToken &Current, return 0; } -unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, - LineState &State, - bool DryRun) { - // Don't break multi-line tokens other than block comments. Instead, just - // update the state. - if (Current.isNot(TT_BlockComment) && Current.IsMultiline) - return addMultilineToken(Current, State); - - // Don't break implicit string literals or import statements. - if (Current.is(TT_ImplicitStringLiteral) || - State.Line->Type == LT_ImportStatement) - return 0; +unsigned ContinuationIndenter::handleEndOfLine(const FormatToken &Current, + LineState &State, bool DryRun, + bool AllowBreak) { + unsigned Penalty = 0; + // Compute the raw string style to use in case this is a raw string literal + // that can be reformatted. + auto RawStringStyle = getRawStringStyle(Current, State); + if (RawStringStyle) { + Penalty = reformatRawStringLiteral(Current, State, *RawStringStyle, DryRun); + } else if (Current.IsMultiline && Current.isNot(TT_BlockComment)) { + // Don't break multi-line tokens other than block comments and raw string + // literals. Instead, just update the state. + Penalty = addMultilineToken(Current, State); + } else if (State.Line->Type != LT_ImportStatement) { + // We generally don't break import statements. + LineState OriginalState = State; + + // Whether we force the reflowing algorithm to stay strictly within the + // column limit. + bool Strict = false; + // Whether the first non-strict attempt at reflowing did intentionally + // exceed the column limit. + bool Exceeded = false; + std::tie(Penalty, Exceeded) = breakProtrudingToken( + Current, State, AllowBreak, /*DryRun=*/true, Strict); + if (Exceeded) { + // If non-strict reflowing exceeds the column limit, try whether strict + // reflowing leads to an overall lower penalty. + LineState StrictState = OriginalState; + unsigned StrictPenalty = + breakProtrudingToken(Current, StrictState, AllowBreak, + /*DryRun=*/true, /*Strict=*/true) + .first; + Strict = StrictPenalty <= Penalty; + if (Strict) { + Penalty = StrictPenalty; + State = StrictState; + } + } + if (!DryRun) { + // If we're not in dry-run mode, apply the changes with the decision on + // strictness made above. + breakProtrudingToken(Current, OriginalState, AllowBreak, /*DryRun=*/false, + Strict); + } + } + if (State.Column > getColumnLimit(State)) { + unsigned ExcessCharacters = State.Column - getColumnLimit(State); + Penalty += Style.PenaltyExcessCharacter * ExcessCharacters; + } + return Penalty; +} - if (!Current.isStringLiteral() && !Current.is(tok::comment)) - return 0; +llvm::Optional<FormatStyle> +ContinuationIndenter::getRawStringStyle(const FormatToken &Current, + const LineState &State) { + if (!Current.isStringLiteral()) + return None; + auto Delimiter = getRawStringDelimiter(Current.TokenText); + if (!Delimiter) + return None; + auto RawStringStyle = RawStringFormats.get(*Delimiter); + if (!RawStringStyle) + return None; + RawStringStyle->ColumnLimit = getColumnLimit(State); + return RawStringStyle; +} - std::unique_ptr<BreakableToken> Token; +std::unique_ptr<BreakableToken> ContinuationIndenter::createBreakableToken( + const FormatToken &Current, LineState &State, bool AllowBreak) { unsigned StartColumn = State.Column - Current.ColumnWidth; - unsigned ColumnLimit = getColumnLimit(State); - if (Current.isStringLiteral()) { // FIXME: String literal breaking is currently disabled for Java and JS, as // it requires strings to be merged using "+" which we don't support. if (Style.Language == FormatStyle::LK_Java || Style.Language == FormatStyle::LK_JavaScript || - !Style.BreakStringLiterals) - return 0; + !Style.BreakStringLiterals || + !AllowBreak) + return nullptr; // Don't break string literals inside preprocessor directives (except for // #define directives, as their contents are stored in separate lines and @@ -1253,11 +1463,11 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, // This way we avoid breaking code with line directives and unknown // preprocessor directives that contain long string literals. if (State.Line->Type == LT_PreprocessorDirective) - return 0; + return nullptr; // Exempts unterminated string literals from line breaking. The user will // likely want to terminate the string before any line breaking is done. if (Current.IsUnterminatedLiteral) - return 0; + return nullptr; StringRef Text = Current.TokenText; StringRef Prefix; @@ -1272,114 +1482,359 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, Text.startswith(Prefix = "u8\"") || Text.startswith(Prefix = "L\""))) || (Text.startswith(Prefix = "_T(\"") && Text.endswith(Postfix = "\")"))) { - Token.reset(new BreakableStringLiteral(Current, StartColumn, Prefix, - Postfix, State.Line->InPPDirective, - Encoding, Style)); - } else { - return 0; + return llvm::make_unique<BreakableStringLiteral>( + Current, StartColumn, Prefix, Postfix, State.Line->InPPDirective, + Encoding, Style); } } else if (Current.is(TT_BlockComment)) { - if (!Current.isTrailingComment() || !Style.ReflowComments || + if (!Style.ReflowComments || // If a comment token switches formatting, like // /* clang-format on */, we don't want to break it further, // but we may still want to adjust its indentation. - switchesFormatting(Current)) - return addMultilineToken(Current, State); - Token.reset(new BreakableBlockComment( + switchesFormatting(Current)) { + return nullptr; + } + return llvm::make_unique<BreakableBlockComment>( Current, StartColumn, Current.OriginalColumn, !Current.Previous, - State.Line->InPPDirective, Encoding, Style)); + State.Line->InPPDirective, Encoding, Style); } else if (Current.is(TT_LineComment) && (Current.Previous == nullptr || Current.Previous->isNot(TT_ImplicitStringLiteral))) { if (!Style.ReflowComments || CommentPragmasRegex.match(Current.TokenText.substr(2)) || switchesFormatting(Current)) - return 0; - Token.reset(new BreakableLineCommentSection( + return nullptr; + return llvm::make_unique<BreakableLineCommentSection>( Current, StartColumn, Current.OriginalColumn, !Current.Previous, - /*InPPDirective=*/false, Encoding, Style)); + /*InPPDirective=*/false, Encoding, Style); + } + return nullptr; +} + +std::pair<unsigned, bool> +ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, + LineState &State, bool AllowBreak, + bool DryRun, bool Strict) { + std::unique_ptr<const BreakableToken> Token = + createBreakableToken(Current, State, AllowBreak); + if (!Token) + return {0, false}; + assert(Token->getLineCount() > 0); + unsigned ColumnLimit = getColumnLimit(State); + if (Current.is(TT_LineComment)) { // We don't insert backslashes when breaking line comments. ColumnLimit = Style.ColumnLimit; - } else { - return 0; } if (Current.UnbreakableTailLength >= ColumnLimit) - return 0; - - unsigned RemainingSpace = ColumnLimit - Current.UnbreakableTailLength; - bool BreakInserted = false; + return {0, false}; + // ColumnWidth was already accounted into State.Column before calling + // breakProtrudingToken. + unsigned StartColumn = State.Column - Current.ColumnWidth; + unsigned NewBreakPenalty = Current.isStringLiteral() + ? Style.PenaltyBreakString + : Style.PenaltyBreakComment; + // Stores whether we intentionally decide to let a line exceed the column + // limit. + bool Exceeded = false; + // Stores whether we introduce a break anywhere in the token. + bool BreakInserted = Token->introducesBreakBeforeToken(); + // Store whether we inserted a new line break at the end of the previous + // logical line. + bool NewBreakBefore = false; // We use a conservative reflowing strategy. Reflow starts after a line is // broken or the corresponding whitespace compressed. Reflow ends as soon as a // line that doesn't get reflown with the previous line is reached. - bool ReflowInProgress = false; + bool Reflow = false; + // Keep track of where we are in the token: + // Where we are in the content of the current logical line. + unsigned TailOffset = 0; + // The column number we're currently at. + unsigned ContentStartColumn = + Token->getContentStartColumn(0, /*Break=*/false); + // The number of columns left in the current logical line after TailOffset. + unsigned RemainingTokenColumns = + Token->getRemainingLength(0, TailOffset, ContentStartColumn); + // Adapt the start of the token, for example indent. + if (!DryRun) + Token->adaptStartOfLine(0, Whitespaces); + unsigned Penalty = 0; - unsigned RemainingTokenColumns = 0; + DEBUG(llvm::dbgs() << "Breaking protruding token at column " << StartColumn + << ".\n"); for (unsigned LineIndex = 0, EndIndex = Token->getLineCount(); LineIndex != EndIndex; ++LineIndex) { - BreakableToken::Split SplitBefore(StringRef::npos, 0); - if (ReflowInProgress) { - SplitBefore = Token->getSplitBefore(LineIndex, RemainingTokenColumns, - RemainingSpace, CommentPragmasRegex); - } - ReflowInProgress = SplitBefore.first != StringRef::npos; - unsigned TailOffset = - ReflowInProgress ? (SplitBefore.first + SplitBefore.second) : 0; - if (!DryRun) - Token->replaceWhitespaceBefore(LineIndex, RemainingTokenColumns, - RemainingSpace, SplitBefore, Whitespaces); - RemainingTokenColumns = Token->getLineLengthAfterSplitBefore( - LineIndex, TailOffset, RemainingTokenColumns, ColumnLimit, SplitBefore); - while (RemainingTokenColumns > RemainingSpace) { - BreakableToken::Split Split = Token->getSplit( - LineIndex, TailOffset, ColumnLimit, CommentPragmasRegex); + DEBUG(llvm::dbgs() << " Line: " << LineIndex << " (Reflow: " << Reflow + << ")\n"); + NewBreakBefore = false; + // If we did reflow the previous line, we'll try reflowing again. Otherwise + // we'll start reflowing if the current line is broken or whitespace is + // compressed. + bool TryReflow = Reflow; + // Break the current token until we can fit the rest of the line. + while (ContentStartColumn + RemainingTokenColumns > ColumnLimit) { + DEBUG(llvm::dbgs() << " Over limit, need: " + << (ContentStartColumn + RemainingTokenColumns) + << ", space: " << ColumnLimit + << ", reflown prefix: " << ContentStartColumn + << ", offset in line: " << TailOffset << "\n"); + // If the current token doesn't fit, find the latest possible split in the + // current line so that breaking at it will be under the column limit. + // FIXME: Use the earliest possible split while reflowing to correctly + // compress whitespace within a line. + BreakableToken::Split Split = + Token->getSplit(LineIndex, TailOffset, ColumnLimit, + ContentStartColumn, CommentPragmasRegex); if (Split.first == StringRef::npos) { - // The last line's penalty is handled in addNextStateToQueue(). + // No break opportunity - update the penalty and continue with the next + // logical line. if (LineIndex < EndIndex - 1) + // The last line's penalty is handled in addNextStateToQueue(). Penalty += Style.PenaltyExcessCharacter * - (RemainingTokenColumns - RemainingSpace); + (ContentStartColumn + RemainingTokenColumns - ColumnLimit); + DEBUG(llvm::dbgs() << " No break opportunity.\n"); break; } assert(Split.first != 0); - // Check if compressing the whitespace range will bring the line length - // under the limit. If that is the case, we perform whitespace compression - // instead of inserting a line break. - unsigned RemainingTokenColumnsAfterCompression = - Token->getLineLengthAfterCompression(RemainingTokenColumns, Split); - if (RemainingTokenColumnsAfterCompression <= RemainingSpace) { - RemainingTokenColumns = RemainingTokenColumnsAfterCompression; - ReflowInProgress = true; - if (!DryRun) - Token->compressWhitespace(LineIndex, TailOffset, Split, Whitespaces); - break; + if (Token->supportsReflow()) { + // Check whether the next natural split point after the current one can + // still fit the line, either because we can compress away whitespace, + // or because the penalty the excess characters introduce is lower than + // the break penalty. + // We only do this for tokens that support reflowing, and thus allow us + // to change the whitespace arbitrarily (e.g. comments). + // Other tokens, like string literals, can be broken on arbitrary + // positions. + + // First, compute the columns from TailOffset to the next possible split + // position. + // For example: + // ColumnLimit: | + // // Some text that breaks + // ^ tail offset + // ^-- split + // ^-------- to split columns + // ^--- next split + // ^--------------- to next split columns + unsigned ToSplitColumns = Token->getRangeLength( + LineIndex, TailOffset, Split.first, ContentStartColumn); + DEBUG(llvm::dbgs() << " ToSplit: " << ToSplitColumns << "\n"); + + BreakableToken::Split NextSplit = Token->getSplit( + LineIndex, TailOffset + Split.first + Split.second, ColumnLimit, + ContentStartColumn + ToSplitColumns + 1, CommentPragmasRegex); + // Compute the columns necessary to fit the next non-breakable sequence + // into the current line. + unsigned ToNextSplitColumns = 0; + if (NextSplit.first == StringRef::npos) { + ToNextSplitColumns = Token->getRemainingLength(LineIndex, TailOffset, + ContentStartColumn); + } else { + ToNextSplitColumns = Token->getRangeLength( + LineIndex, TailOffset, + Split.first + Split.second + NextSplit.first, ContentStartColumn); + } + // Compress the whitespace between the break and the start of the next + // unbreakable sequence. + ToNextSplitColumns = + Token->getLengthAfterCompression(ToNextSplitColumns, Split); + DEBUG(llvm::dbgs() << " ContentStartColumn: " << ContentStartColumn + << "\n"); + DEBUG(llvm::dbgs() << " ToNextSplit: " << ToNextSplitColumns << "\n"); + // If the whitespace compression makes us fit, continue on the current + // line. + bool ContinueOnLine = + ContentStartColumn + ToNextSplitColumns <= ColumnLimit; + unsigned ExcessCharactersPenalty = 0; + if (!ContinueOnLine && !Strict) { + // Similarly, if the excess characters' penalty is lower than the + // penalty of introducing a new break, continue on the current line. + ExcessCharactersPenalty = + (ContentStartColumn + ToNextSplitColumns - ColumnLimit) * + Style.PenaltyExcessCharacter; + DEBUG(llvm::dbgs() + << " Penalty excess: " << ExcessCharactersPenalty + << "\n break : " << NewBreakPenalty << "\n"); + if (ExcessCharactersPenalty < NewBreakPenalty) { + Exceeded = true; + ContinueOnLine = true; + } + } + if (ContinueOnLine) { + DEBUG(llvm::dbgs() << " Continuing on line...\n"); + // The current line fits after compressing the whitespace - reflow + // the next line into it if possible. + TryReflow = true; + if (!DryRun) + Token->compressWhitespace(LineIndex, TailOffset, Split, + Whitespaces); + // When we continue on the same line, leave one space between content. + ContentStartColumn += ToSplitColumns + 1; + Penalty += ExcessCharactersPenalty; + TailOffset += Split.first + Split.second; + RemainingTokenColumns = Token->getRemainingLength( + LineIndex, TailOffset, ContentStartColumn); + continue; + } } - - unsigned NewRemainingTokenColumns = Token->getLineLengthAfterSplit( - LineIndex, TailOffset + Split.first + Split.second, StringRef::npos); + DEBUG(llvm::dbgs() << " Breaking...\n"); + ContentStartColumn = + Token->getContentStartColumn(LineIndex, /*Break=*/true); + unsigned NewRemainingTokenColumns = Token->getRemainingLength( + LineIndex, TailOffset + Split.first + Split.second, + ContentStartColumn); // When breaking before a tab character, it may be moved by a few columns, // but will still be expanded to the next tab stop, so we don't save any // columns. - if (NewRemainingTokenColumns == RemainingTokenColumns) + if (NewRemainingTokenColumns == RemainingTokenColumns) { + // FIXME: Do we need to adjust the penalty? break; - + } assert(NewRemainingTokenColumns < RemainingTokenColumns); + + DEBUG(llvm::dbgs() << " Breaking at: " << TailOffset + Split.first + << ", " << Split.second << "\n"); if (!DryRun) Token->insertBreak(LineIndex, TailOffset, Split, Whitespaces); - Penalty += Current.SplitPenalty; - unsigned ColumnsUsed = - Token->getLineLengthAfterSplit(LineIndex, TailOffset, Split.first); - if (ColumnsUsed > ColumnLimit) { - Penalty += Style.PenaltyExcessCharacter * (ColumnsUsed - ColumnLimit); - } + + Penalty += NewBreakPenalty; TailOffset += Split.first + Split.second; RemainingTokenColumns = NewRemainingTokenColumns; - ReflowInProgress = true; BreakInserted = true; + NewBreakBefore = true; } + // In case there's another line, prepare the state for the start of the next + // line. + if (LineIndex + 1 != EndIndex) { + unsigned NextLineIndex = LineIndex + 1; + if (NewBreakBefore) + // After breaking a line, try to reflow the next line into the current + // one once RemainingTokenColumns fits. + TryReflow = true; + if (TryReflow) { + // We decided that we want to try reflowing the next line into the + // current one. + // We will now adjust the state as if the reflow is successful (in + // preparation for the next line), and see whether that works. If we + // decide that we cannot reflow, we will later reset the state to the + // start of the next line. + Reflow = false; + // As we did not continue breaking the line, RemainingTokenColumns is + // known to fit after ContentStartColumn. Adapt ContentStartColumn to + // the position at which we want to format the next line if we do + // actually reflow. + // When we reflow, we need to add a space between the end of the current + // line and the next line's start column. + ContentStartColumn += RemainingTokenColumns + 1; + // Get the split that we need to reflow next logical line into the end + // of the current one; the split will include any leading whitespace of + // the next logical line. + BreakableToken::Split SplitBeforeNext = + Token->getReflowSplit(NextLineIndex, CommentPragmasRegex); + DEBUG(llvm::dbgs() << " Size of reflown text: " << ContentStartColumn + << "\n Potential reflow split: "); + if (SplitBeforeNext.first != StringRef::npos) { + DEBUG(llvm::dbgs() << SplitBeforeNext.first << ", " + << SplitBeforeNext.second << "\n"); + TailOffset = SplitBeforeNext.first + SplitBeforeNext.second; + // If the rest of the next line fits into the current line below the + // column limit, we can safely reflow. + RemainingTokenColumns = Token->getRemainingLength( + NextLineIndex, TailOffset, ContentStartColumn); + Reflow = true; + if (ContentStartColumn + RemainingTokenColumns > ColumnLimit) { + DEBUG(llvm::dbgs() << " Over limit after reflow, need: " + << (ContentStartColumn + RemainingTokenColumns) + << ", space: " << ColumnLimit + << ", reflown prefix: " << ContentStartColumn + << ", offset in line: " << TailOffset << "\n"); + // If the whole next line does not fit, try to find a point in + // the next line at which we can break so that attaching the part + // of the next line to that break point onto the current line is + // below the column limit. + BreakableToken::Split Split = + Token->getSplit(NextLineIndex, TailOffset, ColumnLimit, + ContentStartColumn, CommentPragmasRegex); + if (Split.first == StringRef::npos) { + DEBUG(llvm::dbgs() << " Did not find later break\n"); + Reflow = false; + } else { + // Check whether the first split point gets us below the column + // limit. Note that we will execute this split below as part of + // the normal token breaking and reflow logic within the line. + unsigned ToSplitColumns = Token->getRangeLength( + NextLineIndex, TailOffset, Split.first, ContentStartColumn); + if (ContentStartColumn + ToSplitColumns > ColumnLimit) { + DEBUG(llvm::dbgs() << " Next split protrudes, need: " + << (ContentStartColumn + ToSplitColumns) + << ", space: " << ColumnLimit); + unsigned ExcessCharactersPenalty = + (ContentStartColumn + ToSplitColumns - ColumnLimit) * + Style.PenaltyExcessCharacter; + if (NewBreakPenalty < ExcessCharactersPenalty) { + Reflow = false; + } + } + } + } + } else { + DEBUG(llvm::dbgs() << "not found.\n"); + } + } + if (!Reflow) { + // If we didn't reflow into the next line, the only space to consider is + // the next logical line. Reset our state to match the start of the next + // line. + TailOffset = 0; + ContentStartColumn = + Token->getContentStartColumn(NextLineIndex, /*Break=*/false); + RemainingTokenColumns = Token->getRemainingLength( + NextLineIndex, TailOffset, ContentStartColumn); + // Adapt the start of the token, for example indent. + if (!DryRun) + Token->adaptStartOfLine(NextLineIndex, Whitespaces); + } else { + // If we found a reflow split and have added a new break before the next + // line, we are going to remove the line break at the start of the next + // logical line. For example, here we'll add a new line break after + // 'text', and subsequently delete the line break between 'that' and + // 'reflows'. + // // some text that + // // reflows + // -> + // // some text + // // that reflows + // When adding the line break, we also added the penalty for it, so we + // need to subtract that penalty again when we remove the line break due + // to reflowing. + if (NewBreakBefore) { + assert(Penalty >= NewBreakPenalty); + Penalty -= NewBreakPenalty; + } + if (!DryRun) + Token->reflow(NextLineIndex, Whitespaces); + } + } + } + + BreakableToken::Split SplitAfterLastLine = + Token->getSplitAfterLastLine(TailOffset); + if (SplitAfterLastLine.first != StringRef::npos) { + DEBUG(llvm::dbgs() << "Replacing whitespace after last line.\n"); + if (!DryRun) + Token->replaceWhitespaceAfterLastLine(TailOffset, SplitAfterLastLine, + Whitespaces); + ContentStartColumn = + Token->getContentStartColumn(Token->getLineCount() - 1, /*Break=*/true); + RemainingTokenColumns = Token->getRemainingLength( + Token->getLineCount() - 1, + TailOffset + SplitAfterLastLine.first + SplitAfterLastLine.second, + ContentStartColumn); } - State.Column = RemainingTokenColumns; + State.Column = ContentStartColumn + RemainingTokenColumns - + Current.UnbreakableTailLength; if (BreakInserted) { // If we break the token inside a parameter list, we need to break before @@ -1390,15 +1845,15 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, State.Stack[i].BreakBeforeParameter = true; } - Penalty += Current.isStringLiteral() ? Style.PenaltyBreakString - : Style.PenaltyBreakComment; + if (Current.is(TT_BlockComment)) + State.NoContinuation = true; State.Stack.back().LastSpace = StartColumn; } Token->updateNextToken(State); - return Penalty; + return {Penalty, Exceeded}; } unsigned ContinuationIndenter::getColumnLimit(const LineState &State) const { diff --git a/lib/Format/ContinuationIndenter.h b/lib/Format/ContinuationIndenter.h index 9a06aa6f6267..ded7bfab4267 100644 --- a/lib/Format/ContinuationIndenter.h +++ b/lib/Format/ContinuationIndenter.h @@ -20,6 +20,8 @@ #include "FormatToken.h" #include "clang/Format/Format.h" #include "llvm/Support/Regex.h" +#include <map> +#include <tuple> namespace clang { class SourceManager; @@ -27,11 +29,21 @@ class SourceManager; namespace format { class AnnotatedLine; +class BreakableToken; struct FormatToken; struct LineState; struct ParenState; +struct RawStringFormatStyleManager; class WhitespaceManager; +struct RawStringFormatStyleManager { + llvm::StringMap<FormatStyle> DelimiterStyle; + + RawStringFormatStyleManager(const FormatStyle &CodeStyle); + + llvm::Optional<FormatStyle> get(StringRef Delimiter) const; +}; + class ContinuationIndenter { public: /// \brief Constructs a \c ContinuationIndenter to format \p Line starting in @@ -44,9 +56,11 @@ public: bool BinPackInconclusiveFunctions); /// \brief Get the initial state, i.e. the state after placing \p Line's - /// first token at \p FirstIndent. - LineState getInitialState(unsigned FirstIndent, const AnnotatedLine *Line, - bool DryRun); + /// first token at \p FirstIndent. When reformatting a fragment of code, as in + /// the case of formatting inside raw string literals, \p FirstStartColumn is + /// the column at which the state of the parent formatter is. + LineState getInitialState(unsigned FirstIndent, unsigned FirstStartColumn, + const AnnotatedLine *Line, bool DryRun); // FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a // better home. @@ -88,17 +102,52 @@ private: /// \brief Update 'State' with the next token opening a nested block. void moveStateToNewBlock(LineState &State); + /// \brief Reformats a raw string literal. + /// + /// \returns An extra penalty induced by reformatting the token. + unsigned reformatRawStringLiteral(const FormatToken &Current, + LineState &State, + const FormatStyle &RawStringStyle, + bool DryRun); + + /// \brief If the current token is at the end of the current line, handle + /// the transition to the next line. + unsigned handleEndOfLine(const FormatToken &Current, LineState &State, + bool DryRun, bool AllowBreak); + + /// \brief If \p Current is a raw string that is configured to be reformatted, + /// return the style to be used. + llvm::Optional<FormatStyle> getRawStringStyle(const FormatToken &Current, + const LineState &State); + /// \brief If the current token sticks out over the end of the line, break /// it if possible. /// - /// \returns An extra penalty if a token was broken, otherwise 0. + /// \returns A pair (penalty, exceeded), where penalty is the extra penalty + /// when tokens are broken or lines exceed the column limit, and exceeded + /// indicates whether the algorithm purposefully left lines exceeding the + /// column limit. /// - /// The returned penalty will cover the cost of the additional line breaks and - /// column limit violation in all lines except for the last one. The penalty - /// for the column limit violation in the last line (and in single line - /// tokens) is handled in \c addNextStateToQueue. - unsigned breakProtrudingToken(const FormatToken &Current, LineState &State, - bool DryRun); + /// The returned penalty will cover the cost of the additional line breaks + /// and column limit violation in all lines except for the last one. The + /// penalty for the column limit violation in the last line (and in single + /// line tokens) is handled in \c addNextStateToQueue. + /// + /// \p Strict indicates whether reflowing is allowed to leave characters + /// protruding the column limit; if true, lines will be split strictly within + /// the column limit where possible; if false, words are allowed to protrude + /// over the column limit as long as the penalty is less than the penalty + /// of a break. + std::pair<unsigned, bool> breakProtrudingToken(const FormatToken &Current, + LineState &State, + bool AllowBreak, bool DryRun, + bool Strict); + + /// \brief Returns the \c BreakableToken starting at \p Current, or nullptr + /// if the current token cannot be broken. + std::unique_ptr<BreakableToken> + createBreakableToken(const FormatToken &Current, LineState &State, + bool AllowBreak); /// \brief Appends the next token to \p State and updates information /// necessary for indentation. @@ -143,6 +192,7 @@ private: encoding::Encoding Encoding; bool BinPackInconclusiveFunctions; llvm::Regex CommentPragmasRegex; + const RawStringFormatStyleManager RawStringFormats; }; struct ParenState { @@ -318,6 +368,9 @@ struct LineState { /// \brief \c true if this line contains a continued for-loop section. bool LineContainsContinuedForLoopSection; + /// \brief \c true if \p NextToken should not continue this line. + bool NoContinuation; + /// \brief The \c NestingLevel at the start of this line. unsigned StartOfLineLevel; @@ -364,6 +417,8 @@ struct LineState { if (LineContainsContinuedForLoopSection != Other.LineContainsContinuedForLoopSection) return LineContainsContinuedForLoopSection; + if (NoContinuation != Other.NoContinuation) + return NoContinuation; if (StartOfLineLevel != Other.StartOfLineLevel) return StartOfLineLevel < Other.StartOfLineLevel; if (LowestLevelOnLine != Other.LowestLevelOnLine) diff --git a/lib/Format/Format.cpp b/lib/Format/Format.cpp index 6fe5be2c815d..217c6729ee39 100644 --- a/lib/Format/Format.cpp +++ b/lib/Format/Format.cpp @@ -16,6 +16,7 @@ #include "clang/Format/Format.h" #include "AffectedRangeManager.h" #include "ContinuationIndenter.h" +#include "FormatInternal.h" #include "FormatTokenLexer.h" #include "NamespaceEndCommentsFixer.h" #include "SortJavaScriptImports.h" @@ -45,6 +46,7 @@ using clang::format::FormatStyle; LLVM_YAML_IS_SEQUENCE_VECTOR(clang::format::FormatStyle::IncludeCategory) +LLVM_YAML_IS_SEQUENCE_VECTOR(clang::format::FormatStyle::RawStringFormat) namespace llvm { namespace yaml { @@ -125,8 +127,10 @@ template <> struct ScalarEnumerationTraits<FormatStyle::BraceBreakingStyle> { } }; -template <> struct ScalarEnumerationTraits<FormatStyle::BreakConstructorInitializersStyle> { - static void enumeration(IO &IO, FormatStyle::BreakConstructorInitializersStyle &Value) { +template <> +struct ScalarEnumerationTraits<FormatStyle::BreakConstructorInitializersStyle> { + static void + enumeration(IO &IO, FormatStyle::BreakConstructorInitializersStyle &Value) { IO.enumCase(Value, "BeforeColon", FormatStyle::BCIS_BeforeColon); IO.enumCase(Value, "BeforeComma", FormatStyle::BCIS_BeforeComma); IO.enumCase(Value, "AfterColon", FormatStyle::BCIS_AfterColon); @@ -134,6 +138,14 @@ template <> struct ScalarEnumerationTraits<FormatStyle::BreakConstructorInitiali }; template <> +struct ScalarEnumerationTraits<FormatStyle::PPDirectiveIndentStyle> { + static void enumeration(IO &IO, FormatStyle::PPDirectiveIndentStyle &Value) { + IO.enumCase(Value, "None", FormatStyle::PPDIS_None); + IO.enumCase(Value, "AfterHash", FormatStyle::PPDIS_AfterHash); + } +}; + +template <> struct ScalarEnumerationTraits<FormatStyle::ReturnTypeBreakingStyle> { static void enumeration(IO &IO, FormatStyle::ReturnTypeBreakingStyle &Value) { IO.enumCase(Value, "None", FormatStyle::RTBS_None); @@ -181,8 +193,10 @@ template <> struct ScalarEnumerationTraits<FormatStyle::BracketAlignmentStyle> { } }; -template <> struct ScalarEnumerationTraits<FormatStyle::EscapedNewlineAlignmentStyle> { - static void enumeration(IO &IO, FormatStyle::EscapedNewlineAlignmentStyle &Value) { +template <> +struct ScalarEnumerationTraits<FormatStyle::EscapedNewlineAlignmentStyle> { + static void enumeration(IO &IO, + FormatStyle::EscapedNewlineAlignmentStyle &Value) { IO.enumCase(Value, "DontAlign", FormatStyle::ENAS_DontAlign); IO.enumCase(Value, "Left", FormatStyle::ENAS_Left); IO.enumCase(Value, "Right", FormatStyle::ENAS_Right); @@ -347,9 +361,11 @@ template <> struct MappingTraits<FormatStyle> { Style.ExperimentalAutoDetectBinPacking); IO.mapOptional("FixNamespaceComments", Style.FixNamespaceComments); IO.mapOptional("ForEachMacros", Style.ForEachMacros); + IO.mapOptional("IncludeBlocks", Style.IncludeBlocks); IO.mapOptional("IncludeCategories", Style.IncludeCategories); IO.mapOptional("IncludeIsMainRegex", Style.IncludeIsMainRegex); IO.mapOptional("IndentCaseLabels", Style.IndentCaseLabels); + IO.mapOptional("IndentPPDirectives", Style.IndentPPDirectives); IO.mapOptional("IndentWidth", Style.IndentWidth); IO.mapOptional("IndentWrappedFunctionNames", Style.IndentWrappedFunctionNames); @@ -365,8 +381,7 @@ template <> struct MappingTraits<FormatStyle> { IO.mapOptional("ObjCSpaceAfterProperty", Style.ObjCSpaceAfterProperty); IO.mapOptional("ObjCSpaceBeforeProtocolList", Style.ObjCSpaceBeforeProtocolList); - IO.mapOptional("PenaltyBreakAssignment", - Style.PenaltyBreakAssignment); + IO.mapOptional("PenaltyBreakAssignment", Style.PenaltyBreakAssignment); IO.mapOptional("PenaltyBreakBeforeFirstCallParameter", Style.PenaltyBreakBeforeFirstCallParameter); IO.mapOptional("PenaltyBreakComment", Style.PenaltyBreakComment); @@ -377,11 +392,13 @@ template <> struct MappingTraits<FormatStyle> { IO.mapOptional("PenaltyReturnTypeOnItsOwnLine", Style.PenaltyReturnTypeOnItsOwnLine); IO.mapOptional("PointerAlignment", Style.PointerAlignment); + IO.mapOptional("RawStringFormats", Style.RawStringFormats); IO.mapOptional("ReflowComments", Style.ReflowComments); IO.mapOptional("SortIncludes", Style.SortIncludes); IO.mapOptional("SortUsingDeclarations", Style.SortUsingDeclarations); IO.mapOptional("SpaceAfterCStyleCast", Style.SpaceAfterCStyleCast); - IO.mapOptional("SpaceAfterTemplateKeyword", Style.SpaceAfterTemplateKeyword); + IO.mapOptional("SpaceAfterTemplateKeyword", + Style.SpaceAfterTemplateKeyword); IO.mapOptional("SpaceBeforeAssignmentOperators", Style.SpaceBeforeAssignmentOperators); IO.mapOptional("SpaceBeforeParens", Style.SpaceBeforeParens); @@ -411,6 +428,7 @@ template <> struct MappingTraits<FormatStyle::BraceWrappingFlags> { IO.mapOptional("AfterObjCDeclaration", Wrapping.AfterObjCDeclaration); IO.mapOptional("AfterStruct", Wrapping.AfterStruct); IO.mapOptional("AfterUnion", Wrapping.AfterUnion); + IO.mapOptional("AfterExternBlock", Wrapping.AfterExternBlock); IO.mapOptional("BeforeCatch", Wrapping.BeforeCatch); IO.mapOptional("BeforeElse", Wrapping.BeforeElse); IO.mapOptional("IndentBraces", Wrapping.IndentBraces); @@ -427,6 +445,22 @@ template <> struct MappingTraits<FormatStyle::IncludeCategory> { } }; +template <> struct ScalarEnumerationTraits<FormatStyle::IncludeBlocksStyle> { + static void enumeration(IO &IO, FormatStyle::IncludeBlocksStyle &Value) { + IO.enumCase(Value, "Preserve", FormatStyle::IBS_Preserve); + IO.enumCase(Value, "Merge", FormatStyle::IBS_Merge); + IO.enumCase(Value, "Regroup", FormatStyle::IBS_Regroup); + } +}; + +template <> struct MappingTraits<FormatStyle::RawStringFormat> { + static void mapping(IO &IO, FormatStyle::RawStringFormat &Format) { + IO.mapOptional("Delimiter", Format.Delimiter); + IO.mapOptional("Language", Format.Language); + IO.mapOptional("BasedOnStyle", Format.BasedOnStyle); + } +}; + // Allows to read vector<FormatStyle> while keeping default values. // IO.getContext() should contain a pointer to the FormatStyle structure, that // will be used to get default values for missing keys. @@ -441,7 +475,7 @@ template <> struct DocumentListTraits<std::vector<FormatStyle>> { if (Index >= Seq.size()) { assert(Index == Seq.size()); FormatStyle Template; - if (Seq.size() > 0 && Seq[0].Language == FormatStyle::LK_None) { + if (!Seq.empty() && Seq[0].Language == FormatStyle::LK_None) { Template = Seq[0]; } else { Template = *((const FormatStyle *)IO.getContext()); @@ -491,9 +525,9 @@ static FormatStyle expandPresets(const FormatStyle &Style) { if (Style.BreakBeforeBraces == FormatStyle::BS_Custom) return Style; FormatStyle Expanded = Style; - Expanded.BraceWrapping = {false, false, false, false, false, false, - false, false, false, false, false, true, - true, true}; + Expanded.BraceWrapping = {false, false, false, false, false, + false, false, false, false, false, + false, false, true, true, true}; switch (Style.BreakBeforeBraces) { case FormatStyle::BS_Linux: Expanded.BraceWrapping.AfterClass = true; @@ -506,6 +540,7 @@ static FormatStyle expandPresets(const FormatStyle &Style) { Expanded.BraceWrapping.AfterFunction = true; Expanded.BraceWrapping.AfterStruct = true; Expanded.BraceWrapping.AfterUnion = true; + Expanded.BraceWrapping.AfterExternBlock = true; Expanded.BraceWrapping.SplitEmptyFunction = true; Expanded.BraceWrapping.SplitEmptyRecord = false; break; @@ -522,13 +557,13 @@ static FormatStyle expandPresets(const FormatStyle &Style) { Expanded.BraceWrapping.AfterNamespace = true; Expanded.BraceWrapping.AfterObjCDeclaration = true; Expanded.BraceWrapping.AfterStruct = true; + Expanded.BraceWrapping.AfterExternBlock = true; Expanded.BraceWrapping.BeforeCatch = true; Expanded.BraceWrapping.BeforeElse = true; break; case FormatStyle::BS_GNU: - Expanded.BraceWrapping = {true, true, true, true, true, true, - true, true, true, true, true, true, - true, true}; + Expanded.BraceWrapping = {true, true, true, true, true, true, true, true, + true, true, true, true, true, true, true}; break; case FormatStyle::BS_WebKit: Expanded.BraceWrapping.AfterFunction = true; @@ -564,9 +599,9 @@ FormatStyle getLLVMStyle() { LLVMStyle.BreakBeforeBinaryOperators = FormatStyle::BOS_None; LLVMStyle.BreakBeforeTernaryOperators = true; LLVMStyle.BreakBeforeBraces = FormatStyle::BS_Attach; - LLVMStyle.BraceWrapping = {false, false, false, false, false, false, - false, false, false, false, false, true, - true, true}; + LLVMStyle.BraceWrapping = {false, false, false, false, false, + false, false, false, false, false, + false, false, true, true, true}; LLVMStyle.BreakAfterJavaFieldAnnotations = false; LLVMStyle.BreakConstructorInitializers = FormatStyle::BCIS_BeforeColon; LLVMStyle.BreakBeforeInheritanceComma = false; @@ -588,7 +623,9 @@ FormatStyle getLLVMStyle() { {"^(<|\"(gtest|gmock|isl|json)/)", 3}, {".*", 1}}; LLVMStyle.IncludeIsMainRegex = "(Test)?$"; + LLVMStyle.IncludeBlocks = FormatStyle::IBS_Preserve; LLVMStyle.IndentCaseLabels = false; + LLVMStyle.IndentPPDirectives = FormatStyle::PPDIS_None; LLVMStyle.IndentWrappedFunctionNames = false; LLVMStyle.IndentWidth = 2; LLVMStyle.JavaScriptQuotes = FormatStyle::JSQS_Leave; @@ -604,6 +641,7 @@ FormatStyle getLLVMStyle() { LLVMStyle.SpacesBeforeTrailingComments = 1; LLVMStyle.Standard = FormatStyle::LS_Cpp11; LLVMStyle.UseTab = FormatStyle::UT_Never; + LLVMStyle.RawStringFormats = {{"pb", FormatStyle::LK_TextProto, "google"}}; LLVMStyle.ReflowComments = true; LLVMStyle.SpacesInParentheses = false; LLVMStyle.SpacesInSquareBrackets = false; @@ -649,7 +687,8 @@ FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) { GoogleStyle.AlwaysBreakTemplateDeclarations = true; GoogleStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true; GoogleStyle.DerivePointerAlignment = true; - GoogleStyle.IncludeCategories = {{"^<.*\\.h>", 1}, {"^<.*", 2}, {".*", 3}}; + GoogleStyle.IncludeCategories = { + {"^<ext/.*\\.h>", 2}, {"^<.*\\.h>", 1}, {"^<.*", 2}, {".*", 3}}; GoogleStyle.IncludeIsMainRegex = "([-_](test|unittest))?$"; GoogleStyle.IndentCaseLabels = true; GoogleStyle.KeepEmptyLinesAtTheStartOfBlocks = false; @@ -725,8 +764,7 @@ FormatStyle getMozillaStyle() { FormatStyle MozillaStyle = getLLVMStyle(); MozillaStyle.AllowAllParametersOfDeclarationOnNextLine = false; MozillaStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline; - MozillaStyle.AlwaysBreakAfterReturnType = - FormatStyle::RTBS_TopLevel; + MozillaStyle.AlwaysBreakAfterReturnType = FormatStyle::RTBS_TopLevel; MozillaStyle.AlwaysBreakAfterDefinitionReturnType = FormatStyle::DRTBS_TopLevel; MozillaStyle.AlwaysBreakTemplateDeclarations = true; @@ -879,7 +917,7 @@ public: JavaScriptRequoter(const Environment &Env, const FormatStyle &Style) : TokenAnalyzer(Env, Style) {} - tooling::Replacements + std::pair<tooling::Replacements, unsigned> analyze(TokenAnnotator &Annotator, SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, FormatTokenLexer &Tokens) override { @@ -887,7 +925,7 @@ public: AnnotatedLines.end()); tooling::Replacements Result; requoteJSStringLiteral(AnnotatedLines, Result); - return Result; + return {Result, 0}; } private: @@ -968,7 +1006,7 @@ public: FormattingAttemptStatus *Status) : TokenAnalyzer(Env, Style), Status(Status) {} - tooling::Replacements + std::pair<tooling::Replacements, unsigned> analyze(TokenAnnotator &Annotator, SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, FormatTokenLexer &Tokens) override { @@ -987,17 +1025,23 @@ public: ContinuationIndenter Indenter(Style, Tokens.getKeywords(), Env.getSourceManager(), Whitespaces, Encoding, BinPackInconclusiveFunctions); - UnwrappedLineFormatter(&Indenter, &Whitespaces, Style, Tokens.getKeywords(), - Env.getSourceManager(), Status) - .format(AnnotatedLines); + unsigned Penalty = + UnwrappedLineFormatter(&Indenter, &Whitespaces, Style, + Tokens.getKeywords(), Env.getSourceManager(), + Status) + .format(AnnotatedLines, /*DryRun=*/false, + /*AdditionalIndent=*/0, + /*FixBadIndentation=*/false, + /*FirstStartColumn=*/Env.getFirstStartColumn(), + /*NextStartColumn=*/Env.getNextStartColumn(), + /*LastStartColumn=*/Env.getLastStartColumn()); for (const auto &R : Whitespaces.generateReplacements()) if (Result.add(R)) - return Result; - return Result; + return std::make_pair(Result, 0); + return std::make_pair(Result, Penalty); } private: - static bool inputUsesCRLF(StringRef Text) { return Text.count('\r') * 2 > Text.count('\n'); } @@ -1082,7 +1126,7 @@ public: DeletedTokens(FormatTokenLess(Env.getSourceManager())) {} // FIXME: eliminate unused parameters. - tooling::Replacements + std::pair<tooling::Replacements, unsigned> analyze(TokenAnnotator &Annotator, SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, FormatTokenLexer &Tokens) override { @@ -1110,7 +1154,7 @@ public: } } - return generateFixes(); + return {generateFixes(), 0}; } private: @@ -1386,19 +1430,27 @@ static void sortCppIncludes(const FormatStyle &Style, }), Indices.end()); + int CurrentCategory = Includes.front().Category; + // If the #includes are out of order, we generate a single replacement fixing // the entire block. Otherwise, no replacement is generated. if (Indices.size() == Includes.size() && - std::is_sorted(Indices.begin(), Indices.end())) + std::is_sorted(Indices.begin(), Indices.end()) && + Style.IncludeBlocks == FormatStyle::IBS_Preserve) return; std::string result; for (unsigned Index : Indices) { - if (!result.empty()) + if (!result.empty()) { result += "\n"; + if (Style.IncludeBlocks == FormatStyle::IBS_Regroup && + CurrentCategory != Includes[Index].Category) + result += "\n"; + } result += Includes[Index].Text; if (Cursor && CursorIndex == Index) *Cursor = IncludesBeginOffset + result.size() - CursorToEOLOffset; + CurrentCategory = Includes[Index].Category; } auto Err = Replaces.add(tooling::Replacement( @@ -1506,6 +1558,10 @@ tooling::Replacements sortCppIncludes(const FormatStyle &Style, StringRef Code, else if (Trimmed == "// clang-format on") FormattingOff = false; + const bool EmptyLineSkipped = + Trimmed.empty() && (Style.IncludeBlocks == FormatStyle::IBS_Merge || + Style.IncludeBlocks == FormatStyle::IBS_Regroup); + if (!FormattingOff && !Line.endswith("\\")) { if (IncludeRegex.match(Line, &Matches)) { StringRef IncludeName = Matches[2]; @@ -1515,7 +1571,7 @@ tooling::Replacements sortCppIncludes(const FormatStyle &Style, StringRef Code, if (Category == 0) MainIncludeFound = true; IncludesInBlock.push_back({IncludeName, Line, Prev, Category}); - } else if (!IncludesInBlock.empty()) { + } else if (!IncludesInBlock.empty() && !EmptyLineSkipped) { sortCppIncludes(Style, IncludesInBlock, Ranges, FileName, Replaces, Cursor); IncludesInBlock.clear(); @@ -1539,12 +1595,16 @@ bool isMpegTS(StringRef Code) { return Code.size() > 188 && Code[0] == 0x47 && Code[188] == 0x47; } +bool isLikelyXml(StringRef Code) { return Code.ltrim().startswith("<"); } + tooling::Replacements sortIncludes(const FormatStyle &Style, StringRef Code, ArrayRef<tooling::Range> Ranges, StringRef FileName, unsigned *Cursor) { tooling::Replacements Replaces; if (!Style.SortIncludes) return Replaces; + if (isLikelyXml(Code)) + return Replaces; if (Style.Language == FormatStyle::LanguageKind::LK_JavaScript && isMpegTS(Code)) return Replaces; @@ -1887,17 +1947,22 @@ cleanupAroundReplacements(StringRef Code, const tooling::Replacements &Replaces, return processReplacements(Cleanup, Code, NewReplaces, Style); } -tooling::Replacements reformat(const FormatStyle &Style, StringRef Code, - ArrayRef<tooling::Range> Ranges, - StringRef FileName, - FormattingAttemptStatus *Status) { +namespace internal { +std::pair<tooling::Replacements, unsigned> +reformat(const FormatStyle &Style, StringRef Code, + ArrayRef<tooling::Range> Ranges, unsigned FirstStartColumn, + unsigned NextStartColumn, unsigned LastStartColumn, StringRef FileName, + FormattingAttemptStatus *Status) { FormatStyle Expanded = expandPresets(Style); if (Expanded.DisableFormat) - return tooling::Replacements(); + return {tooling::Replacements(), 0}; + if (isLikelyXml(Code)) + return {tooling::Replacements(), 0}; if (Expanded.Language == FormatStyle::LK_JavaScript && isMpegTS(Code)) - return tooling::Replacements(); + return {tooling::Replacements(), 0}; - typedef std::function<tooling::Replacements(const Environment &)> + typedef std::function<std::pair<tooling::Replacements, unsigned>( + const Environment &)> AnalyzerPass; SmallVector<AnalyzerPass, 4> Passes; @@ -1923,26 +1988,42 @@ tooling::Replacements reformat(const FormatStyle &Style, StringRef Code, return Formatter(Env, Expanded, Status).process(); }); - std::unique_ptr<Environment> Env = - Environment::CreateVirtualEnvironment(Code, FileName, Ranges); + std::unique_ptr<Environment> Env = Environment::CreateVirtualEnvironment( + Code, FileName, Ranges, FirstStartColumn, NextStartColumn, + LastStartColumn); llvm::Optional<std::string> CurrentCode = None; tooling::Replacements Fixes; + unsigned Penalty = 0; for (size_t I = 0, E = Passes.size(); I < E; ++I) { - tooling::Replacements PassFixes = Passes[I](*Env); + std::pair<tooling::Replacements, unsigned> PassFixes = Passes[I](*Env); auto NewCode = applyAllReplacements( - CurrentCode ? StringRef(*CurrentCode) : Code, PassFixes); + CurrentCode ? StringRef(*CurrentCode) : Code, PassFixes.first); if (NewCode) { - Fixes = Fixes.merge(PassFixes); + Fixes = Fixes.merge(PassFixes.first); + Penalty += PassFixes.second; if (I + 1 < E) { CurrentCode = std::move(*NewCode); Env = Environment::CreateVirtualEnvironment( *CurrentCode, FileName, - tooling::calculateRangesAfterReplacements(Fixes, Ranges)); + tooling::calculateRangesAfterReplacements(Fixes, Ranges), + FirstStartColumn, NextStartColumn, LastStartColumn); } } } - return Fixes; + return {Fixes, Penalty}; +} +} // namespace internal + +tooling::Replacements reformat(const FormatStyle &Style, StringRef Code, + ArrayRef<tooling::Range> Ranges, + StringRef FileName, + FormattingAttemptStatus *Status) { + return internal::reformat(Style, Code, Ranges, + /*FirstStartColumn=*/0, + /*NextStartColumn=*/0, + /*LastStartColumn=*/0, FileName, Status) + .first; } tooling::Replacements cleanup(const FormatStyle &Style, StringRef Code, @@ -1954,7 +2035,7 @@ tooling::Replacements cleanup(const FormatStyle &Style, StringRef Code, std::unique_ptr<Environment> Env = Environment::CreateVirtualEnvironment(Code, FileName, Ranges); Cleaner Clean(*Env, Style); - return Clean.process(); + return Clean.process().first; } tooling::Replacements reformat(const FormatStyle &Style, StringRef Code, @@ -1974,7 +2055,7 @@ tooling::Replacements fixNamespaceEndComments(const FormatStyle &Style, std::unique_ptr<Environment> Env = Environment::CreateVirtualEnvironment(Code, FileName, Ranges); NamespaceEndCommentsFixer Fix(*Env, Style); - return Fix.process(); + return Fix.process().first; } tooling::Replacements sortUsingDeclarations(const FormatStyle &Style, @@ -1984,7 +2065,7 @@ tooling::Replacements sortUsingDeclarations(const FormatStyle &Style, std::unique_ptr<Environment> Env = Environment::CreateVirtualEnvironment(Code, FileName, Ranges); UsingDeclarationsSorter Sorter(*Env, Style); - return Sorter.process(); + return Sorter.process().first; } LangOptions getFormattingLangOpts(const FormatStyle &Style) { @@ -1992,7 +2073,8 @@ LangOptions getFormattingLangOpts(const FormatStyle &Style) { LangOpts.CPlusPlus = 1; LangOpts.CPlusPlus11 = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1; LangOpts.CPlusPlus14 = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1; - LangOpts.CPlusPlus1z = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1; + LangOpts.CPlusPlus17 = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1; + LangOpts.CPlusPlus2a = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1; LangOpts.LineComment = 1; bool AlternativeOperators = Style.isCpp(); LangOpts.CXXOperatorNames = AlternativeOperators ? 1 : 0; @@ -2025,6 +2107,11 @@ static FormatStyle::LanguageKind getLanguageByFileName(StringRef FileName) { if (FileName.endswith_lower(".proto") || FileName.endswith_lower(".protodevel")) return FormatStyle::LK_Proto; + if (FileName.endswith_lower(".textpb") || + FileName.endswith_lower(".pb.txt") || + FileName.endswith_lower(".textproto") || + FileName.endswith_lower(".asciipb")) + return FormatStyle::LK_TextProto; if (FileName.endswith_lower(".td")) return FormatStyle::LK_TableGen; return FormatStyle::LK_Cpp; @@ -2043,7 +2130,9 @@ llvm::Expected<FormatStyle> getStyle(StringRef StyleName, StringRef FileName, // should be improved over time and probably be done on tokens, not one the // bare content of the file. if (Style.Language == FormatStyle::LK_Cpp && FileName.endswith(".h") && - (Code.contains("\n- (") || Code.contains("\n+ ("))) + (Code.contains("\n- (") || Code.contains("\n+ (") || + Code.contains("\n@end\n") || Code.contains("\n@end ") || + Code.endswith("@end"))) Style.Language = FormatStyle::LK_ObjC; FormatStyle FallbackStyle = getNoStyle(); diff --git a/lib/Format/FormatInternal.h b/lib/Format/FormatInternal.h new file mode 100644 index 000000000000..3984158467b3 --- /dev/null +++ b/lib/Format/FormatInternal.h @@ -0,0 +1,83 @@ +//===--- FormatInternal.h - Format C++ code ---------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file declares Format APIs to be used internally by the +/// formatting library implementation. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LIB_FORMAT_FORMATINTERNAL_H +#define LLVM_CLANG_LIB_FORMAT_FORMATINTERNAL_H + +#include "BreakableToken.h" +#include "clang/Tooling/Core/Lookup.h" +#include <utility> + +namespace clang { +namespace format { +namespace internal { + +/// \brief Reformats the given \p Ranges in the code fragment \p Code. +/// +/// A fragment of code could conceptually be surrounded by other code that might +/// constrain how that fragment is laid out. +/// For example, consider the fragment of code between 'R"(' and ')"', +/// exclusive, in the following code: +/// +/// void outer(int x) { +/// string inner = R"(name: data +/// ^ FirstStartColumn +/// value: { +/// x: 1 +/// ^ NextStartColumn +/// } +/// )"; +/// ^ LastStartColumn +/// } +/// +/// The outer code can influence the inner fragment as follows: +/// * \p FirstStartColumn specifies the column at which \p Code starts. +/// * \p NextStartColumn specifies the additional indent dictated by the +/// surrounding code. It is applied to the rest of the lines of \p Code. +/// * \p LastStartColumn specifies the column at which the last line of +/// \p Code should end, in case the last line is an empty line. +/// +/// In the case where the last line of the fragment contains content, +/// the fragment ends at the end of that content and \p LastStartColumn is +/// not taken into account, for example in: +/// +/// void block() { +/// string inner = R"(name: value)"; +/// } +/// +/// Each range is extended on either end to its next bigger logic unit, i.e. +/// everything that might influence its formatting or might be influenced by its +/// formatting. +/// +/// Returns a pair P, where: +/// * P.first are the ``Replacements`` necessary to make all \p Ranges comply +/// with \p Style. +/// * P.second is the penalty induced by formatting the fragment \p Code. +/// If the formatting of the fragment doesn't have a notion of penalty, +/// returns 0. +/// +/// If ``Status`` is non-null, its value will be populated with the status of +/// this formatting attempt. See \c FormattingAttemptStatus. +std::pair<tooling::Replacements, unsigned> +reformat(const FormatStyle &Style, StringRef Code, + ArrayRef<tooling::Range> Ranges, unsigned FirstStartColumn, + unsigned NextStartColumn, unsigned LastStartColumn, StringRef FileName, + FormattingAttemptStatus *Status); + +} // namespace internal +} // namespace format +} // namespace clang + +#endif diff --git a/lib/Format/FormatToken.cpp b/lib/Format/FormatToken.cpp index ba5bf03a6346..10ac392abbf2 100644 --- a/lib/Format/FormatToken.cpp +++ b/lib/Format/FormatToken.cpp @@ -25,10 +25,9 @@ namespace format { const char *getTokenTypeName(TokenType Type) { static const char *const TokNames[] = { #define TYPE(X) #X, -LIST_TOKEN_TYPES + LIST_TOKEN_TYPES #undef TYPE - nullptr - }; + nullptr}; if (Type < NUM_TOKEN_TYPES) return TokNames[Type]; @@ -52,6 +51,7 @@ bool FormatToken::isSimpleTypeSpecifier() const { case tok::kw_half: case tok::kw_float: case tok::kw_double: + case tok::kw__Float16: case tok::kw___float128: case tok::kw_wchar_t: case tok::kw_bool: diff --git a/lib/Format/FormatToken.h b/lib/Format/FormatToken.h index a60361a8e5fa..3dc0ab0e7cca 100644 --- a/lib/Format/FormatToken.h +++ b/lib/Format/FormatToken.h @@ -26,78 +26,79 @@ namespace clang { namespace format { -#define LIST_TOKEN_TYPES \ - TYPE(ArrayInitializerLSquare) \ - TYPE(ArraySubscriptLSquare) \ - TYPE(AttributeParen) \ - TYPE(BinaryOperator) \ - TYPE(BitFieldColon) \ - TYPE(BlockComment) \ - TYPE(CastRParen) \ - TYPE(ConditionalExpr) \ - TYPE(ConflictAlternative) \ - TYPE(ConflictEnd) \ - TYPE(ConflictStart) \ - TYPE(CtorInitializerColon) \ - TYPE(CtorInitializerComma) \ - TYPE(DesignatedInitializerLSquare) \ - TYPE(DesignatedInitializerPeriod) \ - TYPE(DictLiteral) \ - TYPE(ForEachMacro) \ - TYPE(FunctionAnnotationRParen) \ - TYPE(FunctionDeclarationName) \ - TYPE(FunctionLBrace) \ - TYPE(FunctionTypeLParen) \ - TYPE(ImplicitStringLiteral) \ - TYPE(InheritanceColon) \ - TYPE(InheritanceComma) \ - TYPE(InlineASMBrace) \ - TYPE(InlineASMColon) \ - TYPE(JavaAnnotation) \ - TYPE(JsComputedPropertyName) \ - TYPE(JsExponentiation) \ - TYPE(JsExponentiationEqual) \ - TYPE(JsFatArrow) \ - TYPE(JsNonNullAssertion) \ - TYPE(JsTypeColon) \ - TYPE(JsTypeOperator) \ - TYPE(JsTypeOptionalQuestion) \ - TYPE(LambdaArrow) \ - TYPE(LambdaLSquare) \ - TYPE(LeadingJavaAnnotation) \ - TYPE(LineComment) \ - TYPE(MacroBlockBegin) \ - TYPE(MacroBlockEnd) \ - TYPE(ObjCBlockLBrace) \ - TYPE(ObjCBlockLParen) \ - TYPE(ObjCDecl) \ - TYPE(ObjCForIn) \ - TYPE(ObjCMethodExpr) \ - TYPE(ObjCMethodSpecifier) \ - TYPE(ObjCProperty) \ - TYPE(ObjCStringLiteral) \ - TYPE(OverloadedOperator) \ - TYPE(OverloadedOperatorLParen) \ - TYPE(PointerOrReference) \ - TYPE(PureVirtualSpecifier) \ - TYPE(RangeBasedForLoopColon) \ - TYPE(RegexLiteral) \ - TYPE(SelectorName) \ - TYPE(StartOfName) \ - TYPE(TemplateCloser) \ - TYPE(TemplateOpener) \ - TYPE(TemplateString) \ - TYPE(TrailingAnnotation) \ - TYPE(TrailingReturnArrow) \ - TYPE(TrailingUnaryOperator) \ - TYPE(UnaryOperator) \ +#define LIST_TOKEN_TYPES \ + TYPE(ArrayInitializerLSquare) \ + TYPE(ArraySubscriptLSquare) \ + TYPE(AttributeParen) \ + TYPE(BinaryOperator) \ + TYPE(BitFieldColon) \ + TYPE(BlockComment) \ + TYPE(CastRParen) \ + TYPE(ConditionalExpr) \ + TYPE(ConflictAlternative) \ + TYPE(ConflictEnd) \ + TYPE(ConflictStart) \ + TYPE(CtorInitializerColon) \ + TYPE(CtorInitializerComma) \ + TYPE(DesignatedInitializerLSquare) \ + TYPE(DesignatedInitializerPeriod) \ + TYPE(DictLiteral) \ + TYPE(ForEachMacro) \ + TYPE(FunctionAnnotationRParen) \ + TYPE(FunctionDeclarationName) \ + TYPE(FunctionLBrace) \ + TYPE(FunctionTypeLParen) \ + TYPE(ImplicitStringLiteral) \ + TYPE(InheritanceColon) \ + TYPE(InheritanceComma) \ + TYPE(InlineASMBrace) \ + TYPE(InlineASMColon) \ + TYPE(JavaAnnotation) \ + TYPE(JsComputedPropertyName) \ + TYPE(JsExponentiation) \ + TYPE(JsExponentiationEqual) \ + TYPE(JsFatArrow) \ + TYPE(JsNonNullAssertion) \ + TYPE(JsTypeColon) \ + TYPE(JsTypeOperator) \ + TYPE(JsTypeOptionalQuestion) \ + TYPE(LambdaArrow) \ + TYPE(LambdaLSquare) \ + TYPE(LeadingJavaAnnotation) \ + TYPE(LineComment) \ + TYPE(MacroBlockBegin) \ + TYPE(MacroBlockEnd) \ + TYPE(ObjCBlockLBrace) \ + TYPE(ObjCBlockLParen) \ + TYPE(ObjCDecl) \ + TYPE(ObjCForIn) \ + TYPE(ObjCMethodExpr) \ + TYPE(ObjCMethodSpecifier) \ + TYPE(ObjCProperty) \ + TYPE(ObjCStringLiteral) \ + TYPE(OverloadedOperator) \ + TYPE(OverloadedOperatorLParen) \ + TYPE(PointerOrReference) \ + TYPE(PureVirtualSpecifier) \ + TYPE(RangeBasedForLoopColon) \ + TYPE(RegexLiteral) \ + TYPE(SelectorName) \ + TYPE(StartOfName) \ + TYPE(StructuredBindingLSquare) \ + TYPE(TemplateCloser) \ + TYPE(TemplateOpener) \ + TYPE(TemplateString) \ + TYPE(TrailingAnnotation) \ + TYPE(TrailingReturnArrow) \ + TYPE(TrailingUnaryOperator) \ + TYPE(UnaryOperator) \ TYPE(Unknown) enum TokenType { #define TYPE(X) TT_##X, -LIST_TOKEN_TYPES + LIST_TOKEN_TYPES #undef TYPE - NUM_TOKEN_TYPES + NUM_TOKEN_TYPES }; /// \brief Determines the name of a token type. @@ -340,10 +341,11 @@ struct FormatToken { bool isSimpleTypeSpecifier() const; bool isObjCAccessSpecifier() const { - return is(tok::at) && Next && (Next->isObjCAtKeyword(tok::objc_public) || - Next->isObjCAtKeyword(tok::objc_protected) || - Next->isObjCAtKeyword(tok::objc_package) || - Next->isObjCAtKeyword(tok::objc_private)); + return is(tok::at) && Next && + (Next->isObjCAtKeyword(tok::objc_public) || + Next->isObjCAtKeyword(tok::objc_protected) || + Next->isObjCAtKeyword(tok::objc_package) || + Next->isObjCAtKeyword(tok::objc_private)); } /// \brief Returns whether \p Tok is ([{ or a template opening <. @@ -471,6 +473,19 @@ struct FormatToken { Style.Language == FormatStyle::LK_TextProto)); } + /// \brief Returns whether the token is the left square bracket of a C++ + /// structured binding declaration. + bool isCppStructuredBinding(const FormatStyle &Style) const { + if (!Style.isCpp() || isNot(tok::l_square)) + return false; + const FormatToken *T = this; + do { + T = T->getPreviousNonComment(); + } while (T && T->isOneOf(tok::kw_const, tok::kw_volatile, tok::amp, + tok::ampamp)); + return T && T->is(tok::kw_auto); + } + /// \brief Same as opensBlockOrBlockTypeList, but for the closing token. bool closesBlockOrBlockTypeList(const FormatStyle &Style) const { if (is(TT_TemplateString) && closesScope()) @@ -503,15 +518,13 @@ private: return is(K1) && Next && Next->startsSequenceInternal(Tokens...); } - template <typename A> - bool startsSequenceInternal(A K1) const { + template <typename A> bool startsSequenceInternal(A K1) const { if (is(tok::comment) && Next) return Next->startsSequenceInternal(K1); return is(K1); } - template <typename A, typename... Ts> - bool endsSequenceInternal(A K1) const { + template <typename A, typename... Ts> bool endsSequenceInternal(A K1) const { if (is(tok::comment) && Previous) return Previous->endsSequenceInternal(K1); return is(K1); @@ -644,6 +657,7 @@ struct AdditionalKeywords { kw_readonly = &IdentTable.get("readonly"); kw_set = &IdentTable.get("set"); kw_type = &IdentTable.get("type"); + kw_typeof = &IdentTable.get("typeof"); kw_var = &IdentTable.get("var"); kw_yield = &IdentTable.get("yield"); @@ -680,7 +694,7 @@ struct AdditionalKeywords { JsExtraKeywords = std::unordered_set<IdentifierInfo *>( {kw_as, kw_async, kw_await, kw_declare, kw_finally, kw_from, kw_function, kw_get, kw_import, kw_is, kw_let, kw_module, kw_readonly, - kw_set, kw_type, kw_var, kw_yield, + kw_set, kw_type, kw_typeof, kw_var, kw_yield, // Keywords from the Java section. kw_abstract, kw_extends, kw_implements, kw_instanceof, kw_interface}); } @@ -714,6 +728,7 @@ struct AdditionalKeywords { IdentifierInfo *kw_readonly; IdentifierInfo *kw_set; IdentifierInfo *kw_type; + IdentifierInfo *kw_typeof; IdentifierInfo *kw_var; IdentifierInfo *kw_yield; diff --git a/lib/Format/FormatTokenLexer.cpp b/lib/Format/FormatTokenLexer.cpp index 45c3ae1afe5f..199d2974c5c7 100644 --- a/lib/Format/FormatTokenLexer.cpp +++ b/lib/Format/FormatTokenLexer.cpp @@ -24,10 +24,10 @@ namespace clang { namespace format { FormatTokenLexer::FormatTokenLexer(const SourceManager &SourceMgr, FileID ID, - const FormatStyle &Style, + unsigned Column, const FormatStyle &Style, encoding::Encoding Encoding) : FormatTok(nullptr), IsFirstToken(true), StateStack({LexerState::NORMAL}), - Column(0), TrailingWhitespace(0), SourceMgr(SourceMgr), ID(ID), + Column(Column), TrailingWhitespace(0), SourceMgr(SourceMgr), ID(ID), Style(Style), IdentTable(getFormattingLangOpts(Style)), Keywords(IdentTable), Encoding(Encoding), FirstInLineIndex(0), FormattingDisabled(false), MacroBlockBeginRegex(Style.MacroBlockBegin), @@ -50,6 +50,8 @@ ArrayRef<FormatToken *> FormatTokenLexer::lex() { tryParseJSRegexLiteral(); handleTemplateStrings(); } + if (Style.Language == FormatStyle::LK_TextProto) + tryParsePythonComment(); tryMergePreviousTokens(); if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline) FirstInLineIndex = Tokens.size() - 1; @@ -96,14 +98,8 @@ void FormatTokenLexer::tryMergePreviousTokens() { } if (Style.Language == FormatStyle::LK_Java) { - static const tok::TokenKind JavaRightLogicalShift[] = {tok::greater, - tok::greater, - tok::greater}; - static const tok::TokenKind JavaRightLogicalShiftAssign[] = {tok::greater, - tok::greater, - tok::greaterequal}; - if (tryMergeTokens(JavaRightLogicalShift, TT_BinaryOperator)) - return; + static const tok::TokenKind JavaRightLogicalShiftAssign[] = { + tok::greater, tok::greater, tok::greaterequal}; if (tryMergeTokens(JavaRightLogicalShiftAssign, TT_BinaryOperator)) return; } @@ -162,9 +158,8 @@ bool FormatTokenLexer::tryMergeTokens(ArrayRef<tok::TokenKind> Kinds, return false; unsigned AddLength = 0; for (unsigned i = 1; i < Kinds.size(); ++i) { - if (!First[i]->is(Kinds[i]) || - First[i]->WhitespaceRange.getBegin() != - First[i]->WhitespaceRange.getEnd()) + if (!First[i]->is(Kinds[i]) || First[i]->WhitespaceRange.getBegin() != + First[i]->WhitespaceRange.getEnd()) return false; AddLength += First[i]->TokenText.size(); } @@ -337,6 +332,27 @@ void FormatTokenLexer::handleTemplateStrings() { resetLexer(SourceMgr.getFileOffset(loc)); } +void FormatTokenLexer::tryParsePythonComment() { + FormatToken *HashToken = Tokens.back(); + if (HashToken->isNot(tok::hash)) + return; + // Turn the remainder of this line into a comment. + const char *CommentBegin = + Lex->getBufferLocation() - HashToken->TokenText.size(); // at "#" + size_t From = CommentBegin - Lex->getBuffer().begin(); + size_t To = Lex->getBuffer().find_first_of('\n', From); + if (To == StringRef::npos) + To = Lex->getBuffer().size(); + size_t Len = To - From; + HashToken->Type = TT_LineComment; + HashToken->Tok.setKind(tok::comment); + HashToken->TokenText = Lex->getBuffer().substr(From, Len); + SourceLocation Loc = To < Lex->getBuffer().size() + ? Lex->getSourceLocation(CommentBegin + Len) + : SourceMgr.getLocForEndOfFile(ID); + resetLexer(SourceMgr.getFileOffset(Loc)); +} + bool FormatTokenLexer::tryMerge_TMacro() { if (Tokens.size() < 4) return false; @@ -529,17 +545,53 @@ FormatToken *FormatTokenLexer::getNextToken() { readRawToken(*FormatTok); } + // JavaScript and Java do not allow to escape the end of the line with a + // backslash. Backslashes are syntax errors in plain source, but can occur in + // comments. When a single line comment ends with a \, it'll cause the next + // line of code to be lexed as a comment, breaking formatting. The code below + // finds comments that contain a backslash followed by a line break, truncates + // the comment token at the backslash, and resets the lexer to restart behind + // the backslash. + if ((Style.Language == FormatStyle::LK_JavaScript || + Style.Language == FormatStyle::LK_Java) && + FormatTok->is(tok::comment) && FormatTok->TokenText.startswith("//")) { + size_t BackslashPos = FormatTok->TokenText.find('\\'); + while (BackslashPos != StringRef::npos) { + if (BackslashPos + 1 < FormatTok->TokenText.size() && + FormatTok->TokenText[BackslashPos + 1] == '\n') { + const char *Offset = Lex->getBufferLocation(); + Offset -= FormatTok->TokenText.size(); + Offset += BackslashPos + 1; + resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset))); + FormatTok->TokenText = FormatTok->TokenText.substr(0, BackslashPos + 1); + FormatTok->ColumnWidth = encoding::columnWidthWithTabs( + FormatTok->TokenText, FormatTok->OriginalColumn, Style.TabWidth, + Encoding); + break; + } + BackslashPos = FormatTok->TokenText.find('\\', BackslashPos + 1); + } + } + // In case the token starts with escaped newlines, we want to // take them into account as whitespace - this pattern is quite frequent // in macro definitions. // FIXME: Add a more explicit test. - while (FormatTok->TokenText.size() > 1 && FormatTok->TokenText[0] == '\\' && - FormatTok->TokenText[1] == '\n') { + while (FormatTok->TokenText.size() > 1 && FormatTok->TokenText[0] == '\\') { + unsigned SkippedWhitespace = 0; + if (FormatTok->TokenText.size() > 2 && + (FormatTok->TokenText[1] == '\r' && FormatTok->TokenText[2] == '\n')) + SkippedWhitespace = 3; + else if (FormatTok->TokenText[1] == '\n') + SkippedWhitespace = 2; + else + break; + ++FormatTok->NewlinesBefore; - WhitespaceLength += 2; - FormatTok->LastNewlineOffset = 2; + WhitespaceLength += SkippedWhitespace; + FormatTok->LastNewlineOffset = SkippedWhitespace; Column = 0; - FormatTok->TokenText = FormatTok->TokenText.substr(2); + FormatTok->TokenText = FormatTok->TokenText.substr(SkippedWhitespace); } FormatTok->WhitespaceRange = SourceRange( diff --git a/lib/Format/FormatTokenLexer.h b/lib/Format/FormatTokenLexer.h index bf10f09cd11e..59dc2a752f1f 100644 --- a/lib/Format/FormatTokenLexer.h +++ b/lib/Format/FormatTokenLexer.h @@ -36,7 +36,7 @@ enum LexerState { class FormatTokenLexer { public: - FormatTokenLexer(const SourceManager &SourceMgr, FileID ID, + FormatTokenLexer(const SourceManager &SourceMgr, FileID ID, unsigned Column, const FormatStyle &Style, encoding::Encoding Encoding); ArrayRef<FormatToken *> lex(); @@ -73,6 +73,8 @@ private: // nested template parts by balancing curly braces. void handleTemplateStrings(); + void tryParsePythonComment(); + bool tryMerge_TMacro(); bool tryMergeConflictMarkers(); diff --git a/lib/Format/NamespaceEndCommentsFixer.cpp b/lib/Format/NamespaceEndCommentsFixer.cpp index 85b70b8c0a76..df99bb2e1381 100644 --- a/lib/Format/NamespaceEndCommentsFixer.cpp +++ b/lib/Format/NamespaceEndCommentsFixer.cpp @@ -118,6 +118,12 @@ getNamespaceToken(const AnnotatedLine *line, return nullptr; assert(StartLineIndex < AnnotatedLines.size()); const FormatToken *NamespaceTok = AnnotatedLines[StartLineIndex]->First; + if (NamespaceTok->is(tok::l_brace)) { + // "namespace" keyword can be on the line preceding '{', e.g. in styles + // where BraceWrapping.AfterNamespace is true. + if (StartLineIndex > 0) + NamespaceTok = AnnotatedLines[StartLineIndex - 1]->First; + } // Detect "(inline)? namespace" in the beginning of a line. if (NamespaceTok->is(tok::kw_inline)) NamespaceTok = NamespaceTok->getNextNonComment(); @@ -131,7 +137,7 @@ NamespaceEndCommentsFixer::NamespaceEndCommentsFixer(const Environment &Env, const FormatStyle &Style) : TokenAnalyzer(Env, Style) {} -tooling::Replacements NamespaceEndCommentsFixer::analyze( +std::pair<tooling::Replacements, unsigned> NamespaceEndCommentsFixer::analyze( TokenAnnotator &Annotator, SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, FormatTokenLexer &Tokens) { const SourceManager &SourceMgr = Env.getSourceManager(); @@ -200,7 +206,7 @@ tooling::Replacements NamespaceEndCommentsFixer::analyze( } StartLineIndex = SIZE_MAX; } - return Fixes; + return {Fixes, 0}; } } // namespace format diff --git a/lib/Format/NamespaceEndCommentsFixer.h b/lib/Format/NamespaceEndCommentsFixer.h index 7790668a2e82..4779f0d27c92 100644 --- a/lib/Format/NamespaceEndCommentsFixer.h +++ b/lib/Format/NamespaceEndCommentsFixer.h @@ -25,7 +25,7 @@ class NamespaceEndCommentsFixer : public TokenAnalyzer { public: NamespaceEndCommentsFixer(const Environment &Env, const FormatStyle &Style); - tooling::Replacements + std::pair<tooling::Replacements, unsigned> analyze(TokenAnnotator &Annotator, SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, FormatTokenLexer &Tokens) override; diff --git a/lib/Format/SortJavaScriptImports.cpp b/lib/Format/SortJavaScriptImports.cpp index e73695ca8477..d0b979e100d5 100644 --- a/lib/Format/SortJavaScriptImports.cpp +++ b/lib/Format/SortJavaScriptImports.cpp @@ -123,7 +123,7 @@ public: : TokenAnalyzer(Env, Style), FileContents(Env.getSourceManager().getBufferData(Env.getFileID())) {} - tooling::Replacements + std::pair<tooling::Replacements, unsigned> analyze(TokenAnnotator &Annotator, SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, FormatTokenLexer &Tokens) override { @@ -138,7 +138,7 @@ public: parseModuleReferences(Keywords, AnnotatedLines); if (References.empty()) - return Result; + return {Result, 0}; SmallVector<unsigned, 16> Indices; for (unsigned i = 0, e = References.size(); i != e; ++i) @@ -168,7 +168,7 @@ public: } if (ReferencesInOrder && SymbolsInOrder) - return Result; + return {Result, 0}; SourceRange InsertionPoint = References[0].Range; InsertionPoint.setEnd(References[References.size() - 1].Range.getEnd()); @@ -202,7 +202,7 @@ public: assert(false); } - return Result; + return {Result, 0}; } private: @@ -277,7 +277,7 @@ private: // Parses module references in the given lines. Returns the module references, // and a pointer to the first "main code" line if that is adjacent to the // affected lines of module references, nullptr otherwise. - std::pair<SmallVector<JsModuleReference, 16>, AnnotatedLine*> + std::pair<SmallVector<JsModuleReference, 16>, AnnotatedLine *> parseModuleReferences(const AdditionalKeywords &Keywords, SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) { SmallVector<JsModuleReference, 16> References; @@ -413,7 +413,7 @@ private: nextToken(); if (Current->is(tok::r_brace)) break; - if (Current->isNot(tok::identifier)) + if (!Current->isOneOf(tok::identifier, tok::kw_default)) return false; JsImportedSymbol Symbol; @@ -425,7 +425,7 @@ private: if (Current->is(Keywords.kw_as)) { nextToken(); - if (Current->isNot(tok::identifier)) + if (!Current->isOneOf(tok::identifier, tok::kw_default)) return false; Symbol.Alias = Current->TokenText; nextToken(); @@ -449,7 +449,7 @@ tooling::Replacements sortJavaScriptImports(const FormatStyle &Style, std::unique_ptr<Environment> Env = Environment::CreateVirtualEnvironment(Code, FileName, Ranges); JavaScriptImportSorter Sorter(*Env, Style); - return Sorter.process(); + return Sorter.process().first; } } // end namespace format diff --git a/lib/Format/TokenAnalyzer.cpp b/lib/Format/TokenAnalyzer.cpp index f2e4e8ef0819..d1dfb1fea32b 100644 --- a/lib/Format/TokenAnalyzer.cpp +++ b/lib/Format/TokenAnalyzer.cpp @@ -38,7 +38,10 @@ namespace format { // Code. std::unique_ptr<Environment> Environment::CreateVirtualEnvironment(StringRef Code, StringRef FileName, - ArrayRef<tooling::Range> Ranges) { + ArrayRef<tooling::Range> Ranges, + unsigned FirstStartColumn, + unsigned NextStartColumn, + unsigned LastStartColumn) { // This is referenced by `FileMgr` and will be released by `FileMgr` when it // is deleted. IntrusiveRefCntPtr<vfs::InMemoryFileSystem> InMemoryFileSystem( @@ -57,8 +60,9 @@ Environment::CreateVirtualEnvironment(StringRef Code, StringRef FileName, std::unique_ptr<SourceManager> VirtualSM( new SourceManager(*Diagnostics, *FileMgr)); InMemoryFileSystem->addFile( - FileName, 0, llvm::MemoryBuffer::getMemBuffer( - Code, FileName, /*RequiresNullTerminator=*/false)); + FileName, 0, + llvm::MemoryBuffer::getMemBuffer(Code, FileName, + /*RequiresNullTerminator=*/false)); FileID ID = VirtualSM->createFileID(FileMgr->getFile(FileName), SourceLocation(), clang::SrcMgr::C_User); assert(ID.isValid()); @@ -69,9 +73,9 @@ Environment::CreateVirtualEnvironment(StringRef Code, StringRef FileName, SourceLocation End = Start.getLocWithOffset(Range.getLength()); CharRanges.push_back(CharSourceRange::getCharRange(Start, End)); } - return llvm::make_unique<Environment>(ID, std::move(FileMgr), - std::move(VirtualSM), - std::move(Diagnostics), CharRanges); + return llvm::make_unique<Environment>( + ID, std::move(FileMgr), std::move(VirtualSM), std::move(Diagnostics), + CharRanges, FirstStartColumn, NextStartColumn, LastStartColumn); } TokenAnalyzer::TokenAnalyzer(const Environment &Env, const FormatStyle &Style) @@ -88,14 +92,16 @@ TokenAnalyzer::TokenAnalyzer(const Environment &Env, const FormatStyle &Style) << "\n"); } -tooling::Replacements TokenAnalyzer::process() { +std::pair<tooling::Replacements, unsigned> TokenAnalyzer::process() { tooling::Replacements Result; - FormatTokenLexer Tokens(Env.getSourceManager(), Env.getFileID(), Style, - Encoding); + FormatTokenLexer Tokens(Env.getSourceManager(), Env.getFileID(), + Env.getFirstStartColumn(), Style, Encoding); - UnwrappedLineParser Parser(Style, Tokens.getKeywords(), Tokens.lex(), *this); + UnwrappedLineParser Parser(Style, Tokens.getKeywords(), + Env.getFirstStartColumn(), Tokens.lex(), *this); Parser.parse(); assert(UnwrappedLines.rbegin()->empty()); + unsigned Penalty = 0; for (unsigned Run = 0, RunE = UnwrappedLines.size(); Run + 1 != RunE; ++Run) { DEBUG(llvm::dbgs() << "Run " << Run << "...\n"); SmallVector<AnnotatedLine *, 16> AnnotatedLines; @@ -106,13 +112,13 @@ tooling::Replacements TokenAnalyzer::process() { Annotator.annotate(*AnnotatedLines.back()); } - tooling::Replacements RunResult = + std::pair<tooling::Replacements, unsigned> RunResult = analyze(Annotator, AnnotatedLines, Tokens); DEBUG({ llvm::dbgs() << "Replacements for run " << Run << ":\n"; - for (tooling::Replacements::const_iterator I = RunResult.begin(), - E = RunResult.end(); + for (tooling::Replacements::const_iterator I = RunResult.first.begin(), + E = RunResult.first.end(); I != E; ++I) { llvm::dbgs() << I->toString() << "\n"; } @@ -120,17 +126,19 @@ tooling::Replacements TokenAnalyzer::process() { for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { delete AnnotatedLines[i]; } - for (const auto &R : RunResult) { + + Penalty += RunResult.second; + for (const auto &R : RunResult.first) { auto Err = Result.add(R); // FIXME: better error handling here. For now, simply return an empty // Replacements to indicate failure. if (Err) { llvm::errs() << llvm::toString(std::move(Err)) << "\n"; - return tooling::Replacements(); + return {tooling::Replacements(), 0}; } } } - return Result; + return {Result, Penalty}; } void TokenAnalyzer::consumeUnwrappedLine(const UnwrappedLine &TheLine) { diff --git a/lib/Format/TokenAnalyzer.h b/lib/Format/TokenAnalyzer.h index 78a3d1bc8d9e..96ea00b25ba1 100644 --- a/lib/Format/TokenAnalyzer.h +++ b/lib/Format/TokenAnalyzer.h @@ -37,21 +37,37 @@ namespace format { class Environment { public: Environment(SourceManager &SM, FileID ID, ArrayRef<CharSourceRange> Ranges) - : ID(ID), CharRanges(Ranges.begin(), Ranges.end()), SM(SM) {} + : ID(ID), CharRanges(Ranges.begin(), Ranges.end()), SM(SM), + FirstStartColumn(0), + NextStartColumn(0), + LastStartColumn(0) {} Environment(FileID ID, std::unique_ptr<FileManager> FileMgr, std::unique_ptr<SourceManager> VirtualSM, std::unique_ptr<DiagnosticsEngine> Diagnostics, - const std::vector<CharSourceRange> &CharRanges) + const std::vector<CharSourceRange> &CharRanges, + unsigned FirstStartColumn, + unsigned NextStartColumn, + unsigned LastStartColumn) : ID(ID), CharRanges(CharRanges.begin(), CharRanges.end()), - SM(*VirtualSM), FileMgr(std::move(FileMgr)), + SM(*VirtualSM), + FirstStartColumn(FirstStartColumn), + NextStartColumn(NextStartColumn), + LastStartColumn(LastStartColumn), + FileMgr(std::move(FileMgr)), VirtualSM(std::move(VirtualSM)), Diagnostics(std::move(Diagnostics)) {} - // This sets up an virtual file system with file \p FileName containing \p - // Code. + // This sets up an virtual file system with file \p FileName containing the + // fragment \p Code. Assumes that \p Code starts at \p FirstStartColumn, + // that the next lines of \p Code should start at \p NextStartColumn, and + // that \p Code should end at \p LastStartColumn if it ends in newline. + // See also the documentation of clang::format::internal::reformat. static std::unique_ptr<Environment> CreateVirtualEnvironment(StringRef Code, StringRef FileName, - ArrayRef<tooling::Range> Ranges); + ArrayRef<tooling::Range> Ranges, + unsigned FirstStartColumn = 0, + unsigned NextStartColumn = 0, + unsigned LastStartColumn = 0); FileID getFileID() const { return ID; } @@ -59,10 +75,25 @@ public: const SourceManager &getSourceManager() const { return SM; } + // Returns the column at which the fragment of code managed by this + // environment starts. + unsigned getFirstStartColumn() const { return FirstStartColumn; } + + // Returns the column at which subsequent lines of the fragment of code + // managed by this environment should start. + unsigned getNextStartColumn() const { return NextStartColumn; } + + // Returns the column at which the fragment of code managed by this + // environment should end if it ends in a newline. + unsigned getLastStartColumn() const { return LastStartColumn; } + private: FileID ID; SmallVector<CharSourceRange, 8> CharRanges; SourceManager &SM; + unsigned FirstStartColumn; + unsigned NextStartColumn; + unsigned LastStartColumn; // The order of these fields are important - they should be in the same order // as they are created in `CreateVirtualEnvironment` so that they can be @@ -76,10 +107,10 @@ class TokenAnalyzer : public UnwrappedLineConsumer { public: TokenAnalyzer(const Environment &Env, const FormatStyle &Style); - tooling::Replacements process(); + std::pair<tooling::Replacements, unsigned> process(); protected: - virtual tooling::Replacements + virtual std::pair<tooling::Replacements, unsigned> analyze(TokenAnnotator &Annotator, SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, FormatTokenLexer &Tokens) = 0; diff --git a/lib/Format/TokenAnnotator.cpp b/lib/Format/TokenAnnotator.cpp index 46ea06b880ed..298c72b002f8 100644 --- a/lib/Format/TokenAnnotator.cpp +++ b/lib/Format/TokenAnnotator.cpp @@ -47,7 +47,7 @@ private: if (NonTemplateLess.count(CurrentToken->Previous)) return false; - const FormatToken& Previous = *CurrentToken->Previous; + const FormatToken &Previous = *CurrentToken->Previous; // The '<'. if (Previous.Previous) { if (Previous.Previous->Tok.isLiteral()) return false; @@ -152,11 +152,11 @@ private: // export type X = (...); Contexts.back().IsExpression = false; } else if (Left->Previous && - (Left->Previous->isOneOf(tok::kw_static_assert, tok::kw_decltype, - tok::kw_if, tok::kw_while, tok::l_paren, - tok::comma) || - Left->Previous->endsSequence(tok::kw_constexpr, tok::kw_if) || - Left->Previous->is(TT_BinaryOperator))) { + (Left->Previous->isOneOf(tok::kw_static_assert, tok::kw_decltype, + tok::kw_if, tok::kw_while, tok::l_paren, + tok::comma) || + Left->Previous->endsSequence(tok::kw_constexpr, tok::kw_if) || + Left->Previous->is(TT_BinaryOperator))) { // static_assert, if and while usually contain expressions. Contexts.back().IsExpression = true; } else if (Style.Language == FormatStyle::LK_JavaScript && Left->Previous && @@ -325,8 +325,7 @@ private: // In C++, this can happen either in array of templates (foo<int>[10]) // or when array is a nested template type (unique_ptr<type1<type2>[]>). bool CppArrayTemplates = - Style.isCpp() && Parent && - Parent->is(TT_TemplateCloser) && + Style.isCpp() && Parent && Parent->is(TT_TemplateCloser) && (Contexts.back().CanBeExpression || Contexts.back().IsExpression || Contexts.back().InTemplateArgument); @@ -343,7 +342,9 @@ private: bool ColonFound = false; unsigned BindingIncrease = 1; - if (Left->is(TT_Unknown)) { + if (Left->isCppStructuredBinding(Style)) { + Left->Type = TT_StructuredBindingLSquare; + } else if (Left->is(TT_Unknown)) { if (StartsObjCMethodExpr) { Left->Type = TT_ObjCMethodExpr; } else if (Style.Language == FormatStyle::LK_JavaScript && Parent && @@ -372,6 +373,10 @@ private: ScopedContextCreator ContextCreator(*this, tok::l_square, BindingIncrease); Contexts.back().IsExpression = true; + if (Style.Language == FormatStyle::LK_JavaScript && Parent && + Parent->is(TT_JsTypeColon)) + Contexts.back().IsExpression = false; + Contexts.back().ColonIsObjCMethodExpr = StartsObjCMethodExpr; while (CurrentToken) { @@ -439,6 +444,9 @@ private: Contexts.back().ColonIsDictLiteral = true; if (Left->BlockKind == BK_BracedInit) Contexts.back().IsExpression = true; + if (Style.Language == FormatStyle::LK_JavaScript && Left->Previous && + Left->Previous->is(TT_JsTypeColon)) + Contexts.back().IsExpression = false; while (CurrentToken) { if (CurrentToken->is(tok::r_brace)) { @@ -452,6 +460,8 @@ private: updateParameterCount(Left, CurrentToken); if (CurrentToken->isOneOf(tok::colon, tok::l_brace, tok::less)) { FormatToken *Previous = CurrentToken->getPreviousNonComment(); + if (Previous->is(TT_JsTypeOptionalQuestion)) + Previous = Previous->getPreviousNonComment(); if (((CurrentToken->is(tok::colon) && (!Contexts.back().ColonIsDictLiteral || !Style.isCpp())) || Style.Language == FormatStyle::LK_Proto || @@ -531,8 +541,11 @@ private: !Line.First->isOneOf(tok::kw_enum, tok::kw_case)) || Contexts.back().ContextKind == tok::l_paren || // function params Contexts.back().ContextKind == tok::l_square || // array type + (!Contexts.back().IsExpression && + Contexts.back().ContextKind == tok::l_brace) || // object type (Contexts.size() == 1 && Line.MustBeDeclaration)) { // method/property declaration + Contexts.back().IsExpression = false; Tok->Type = TT_JsTypeColon; break; } @@ -593,7 +606,8 @@ private: break; case tok::kw_if: case tok::kw_while: - if (Tok->is(tok::kw_if) && CurrentToken && CurrentToken->is(tok::kw_constexpr)) + if (Tok->is(tok::kw_if) && CurrentToken && + CurrentToken->is(tok::kw_constexpr)) next(); if (CurrentToken && CurrentToken->is(tok::l_paren)) { next(); @@ -603,7 +617,9 @@ private: break; case tok::kw_for: if (Style.Language == FormatStyle::LK_JavaScript) { - if (Tok->Previous && Tok->Previous->is(tok::period)) + // x.for and {for: ...} + if ((Tok->Previous && Tok->Previous->is(tok::period)) || + (Tok->Next && Tok->Next->is(tok::colon))) break; // JS' for await ( ... if (CurrentToken && CurrentToken->is(Keywords.kw_await)) @@ -619,8 +635,7 @@ private: // marks the first l_paren as a OverloadedOperatorLParen. Here, we make // the first two parens OverloadedOperators and the second l_paren an // OverloadedOperatorLParen. - if (Tok->Previous && - Tok->Previous->is(tok::r_paren) && + if (Tok->Previous && Tok->Previous->is(tok::r_paren) && Tok->Previous->MatchingParen && Tok->Previous->MatchingParen->is(TT_OverloadedOperatorLParen)) { Tok->Previous->Type = TT_OverloadedOperator; @@ -643,7 +658,7 @@ private: break; case tok::l_brace: if (Style.Language == FormatStyle::LK_TextProto) { - FormatToken *Previous =Tok->getPreviousNonComment(); + FormatToken *Previous = Tok->getPreviousNonComment(); if (Previous && Previous->Type != TT_DictLiteral) Previous->Type = TT_SelectorName; } @@ -683,7 +698,8 @@ private: CurrentToken->Type = TT_PointerOrReference; consumeToken(); if (CurrentToken && - CurrentToken->Previous->isOneOf(TT_BinaryOperator, tok::comma)) + CurrentToken->Previous->isOneOf(TT_BinaryOperator, TT_UnaryOperator, + tok::comma)) CurrentToken->Previous->Type = TT_OverloadedOperator; } if (CurrentToken) { @@ -740,8 +756,8 @@ private: void parseIncludeDirective() { if (CurrentToken && CurrentToken->is(tok::less)) { - next(); - while (CurrentToken) { + next(); + while (CurrentToken) { // Mark tokens up to the trailing line comments as implicit string // literals. if (CurrentToken->isNot(tok::comment) && @@ -781,9 +797,9 @@ private: void parseHasInclude() { if (!CurrentToken || !CurrentToken->is(tok::l_paren)) return; - next(); // '(' + next(); // '(' parseIncludeDirective(); - next(); // ')' + next(); // ')' } LineType parsePreprocessorDirective() { @@ -842,7 +858,7 @@ private: if (Tok->is(tok::l_paren)) parseParens(); else if (Tok->isOneOf(Keywords.kw___has_include, - Keywords.kw___has_include_next)) + Keywords.kw___has_include_next)) parseHasInclude(); } return Type; @@ -855,7 +871,7 @@ public: return parsePreprocessorDirective(); // Directly allow to 'import <string-literal>' to support protocol buffer - // definitions (code.google.com/p/protobuf) or missing "#" (either way we + // definitions (github.com/google/protobuf) or missing "#" (either way we // should not break the line). IdentifierInfo *Info = CurrentToken->Tok.getIdentifierInfo(); if ((Style.Language == FormatStyle::LK_Java && @@ -933,11 +949,11 @@ private: // FIXME: Closure-library specific stuff should not be hard-coded but be // configurable. return Tok.TokenText == "goog" && Tok.Next && Tok.Next->is(tok::period) && - Tok.Next->Next && (Tok.Next->Next->TokenText == "module" || - Tok.Next->Next->TokenText == "provide" || - Tok.Next->Next->TokenText == "require" || - Tok.Next->Next->TokenText == "setTestOnly" || - Tok.Next->Next->TokenText == "forwardDeclare") && + Tok.Next->Next && + (Tok.Next->Next->TokenText == "module" || + Tok.Next->Next->TokenText == "provide" || + Tok.Next->Next->TokenText == "require" || + Tok.Next->Next->TokenText == "forwardDeclare") && Tok.Next->Next->Next && Tok.Next->Next->Next->is(tok::l_paren); } @@ -1054,8 +1070,7 @@ private: Current.Previous->is(TT_CtorInitializerColon)) { Contexts.back().IsExpression = true; Contexts.back().InCtorInitializer = true; - } else if (Current.Previous && - Current.Previous->is(TT_InheritanceColon)) { + } else if (Current.Previous && Current.Previous->is(TT_InheritanceColon)) { Contexts.back().InInheritanceList = true; } else if (Current.isOneOf(tok::r_paren, tok::greater, tok::comma)) { for (FormatToken *Previous = Current.Previous; @@ -1104,6 +1119,11 @@ private: (!Line.MightBeFunctionDecl || Current.NestingLevel != 0)) { Contexts.back().FirstStartOfName = &Current; Current.Type = TT_StartOfName; + } else if (Current.is(tok::semi)) { + // Reset FirstStartOfName after finding a semicolon so that a for loop + // with multiple increment statements is not confused with a for loop + // having multiple variable declarations. + Contexts.back().FirstStartOfName = nullptr; } else if (Current.isOneOf(tok::kw_auto, tok::kw___auto_type)) { AutoFound = true; } else if (Current.is(tok::arrow) && @@ -1113,10 +1133,10 @@ private: Current.NestingLevel == 0) { Current.Type = TT_TrailingReturnArrow; } else if (Current.isOneOf(tok::star, tok::amp, tok::ampamp)) { - Current.Type = - determineStarAmpUsage(Current, Contexts.back().CanBeExpression && - Contexts.back().IsExpression, - Contexts.back().InTemplateArgument); + Current.Type = determineStarAmpUsage(Current, + Contexts.back().CanBeExpression && + Contexts.back().IsExpression, + Contexts.back().InTemplateArgument); } else if (Current.isOneOf(tok::minus, tok::plus, tok::caret)) { Current.Type = determinePlusMinusCaretUsage(Current); if (Current.is(TT_UnaryOperator) && Current.is(tok::caret)) @@ -1396,11 +1416,13 @@ private: if (NextToken->isOneOf(tok::comma, tok::semi)) return TT_PointerOrReference; - if (PrevToken->is(tok::r_paren) && PrevToken->MatchingParen && - PrevToken->MatchingParen->Previous && - PrevToken->MatchingParen->Previous->isOneOf(tok::kw_typeof, - tok::kw_decltype)) - return TT_PointerOrReference; + if (PrevToken->is(tok::r_paren) && PrevToken->MatchingParen) { + FormatToken *TokenBeforeMatchingParen = + PrevToken->MatchingParen->getPreviousNonComment(); + if (TokenBeforeMatchingParen && + TokenBeforeMatchingParen->isOneOf(tok::kw_typeof, tok::kw_decltype)) + return TT_PointerOrReference; + } if (PrevToken->Tok.isLiteral() || PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::kw_true, @@ -1589,7 +1611,7 @@ private: if (Current->is(TT_ConditionalExpr)) return prec::Conditional; if (NextNonComment && Current->is(TT_SelectorName) && - (NextNonComment->is(TT_DictLiteral) || + (NextNonComment->isOneOf(TT_DictLiteral, TT_JsTypeColon) || ((Style.Language == FormatStyle::LK_Proto || Style.Language == FormatStyle::LK_TextProto) && NextNonComment->is(tok::less)))) @@ -1643,17 +1665,15 @@ private: /// \brief Parse unary operator expressions and surround them with fake /// parentheses if appropriate. void parseUnaryOperator() { - if (!Current || Current->isNot(TT_UnaryOperator)) { - parse(PrecedenceArrowAndPeriod); - return; + llvm::SmallVector<FormatToken *, 2> Tokens; + while (Current && Current->is(TT_UnaryOperator)) { + Tokens.push_back(Current); + next(); } - - FormatToken *Start = Current; - next(); - parseUnaryOperator(); - - // The actual precedence doesn't matter. - addFakeParenthesis(Start, prec::Unknown); + parse(PrecedenceArrowAndPeriod); + for (FormatToken *Token : llvm::reverse(Tokens)) + // The actual precedence doesn't matter. + addFakeParenthesis(Token, prec::Unknown); } void parseConditionalExpr() { @@ -1722,7 +1742,7 @@ void TokenAnnotator::setCommentLineLevels( static unsigned maxNestingDepth(const AnnotatedLine &Line) { unsigned Result = 0; - for (const auto* Tok = Line.First; Tok != nullptr; Tok = Tok->Next) + for (const auto *Tok = Line.First; Tok != nullptr; Tok = Tok->Next) Result = std::max(Result, Tok->NestingLevel); return Result; } @@ -1764,7 +1784,7 @@ void TokenAnnotator::annotate(AnnotatedLine &Line) { // function declaration. static bool isFunctionDeclarationName(const FormatToken &Current, const AnnotatedLine &Line) { - auto skipOperatorName = [](const FormatToken* Next) -> const FormatToken* { + auto skipOperatorName = [](const FormatToken *Next) -> const FormatToken * { for (; Next; Next = Next->Next) { if (Next->is(TT_OverloadedOperatorLParen)) return Next; @@ -1772,8 +1792,8 @@ static bool isFunctionDeclarationName(const FormatToken &Current, continue; if (Next->isOneOf(tok::kw_new, tok::kw_delete)) { // For 'new[]' and 'delete[]'. - if (Next->Next && Next->Next->is(tok::l_square) && - Next->Next->Next && Next->Next->Next->is(tok::r_square)) + if (Next->Next && Next->Next->is(tok::l_square) && Next->Next->Next && + Next->Next->Next->is(tok::r_square)) Next = Next->Next->Next; continue; } @@ -1872,7 +1892,8 @@ void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) { } Line.First->TotalLength = - Line.First->IsMultiline ? Style.ColumnLimit : Line.First->ColumnWidth; + Line.First->IsMultiline ? Style.ColumnLimit + : Line.FirstStartColumn + Line.First->ColumnWidth; FormatToken *Current = Line.First->Next; bool InFunctionDecl = Line.MightBeFunctionDecl; while (Current) { @@ -2005,6 +2026,9 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, if ((Left.is(TT_TemplateString) && Left.TokenText.endswith("${")) || (Right.is(TT_TemplateString) && Right.TokenText.startswith("}"))) return 100; + // Prefer breaking call chains (".foo") over empty "{}", "[]" or "()". + if (Left.opensScope() && Right.closesScope()) + return 200; } if (Right.is(tok::identifier) && Right.Next && Right.Next->is(TT_DictLiteral)) @@ -2049,7 +2073,8 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, if (Left.is(tok::comment)) return 1000; - if (Left.isOneOf(TT_RangeBasedForLoopColon, TT_InheritanceColon, TT_CtorInitializerColon)) + if (Left.isOneOf(TT_RangeBasedForLoopColon, TT_InheritanceColon, + TT_CtorInitializerColon)) return 2; if (Right.isMemberAccess()) { @@ -2107,8 +2132,8 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign) return 100; if (Left.is(tok::l_paren) && Left.Previous && - (Left.Previous->isOneOf(tok::kw_if, tok::kw_for) - || Left.Previous->endsSequence(tok::kw_constexpr, tok::kw_if))) + (Left.Previous->isOneOf(tok::kw_if, tok::kw_for) || + Left.Previous->endsSequence(tok::kw_constexpr, tok::kw_if))) return 1000; if (Left.is(tok::equal) && InFunctionDecl) return 110; @@ -2128,7 +2153,7 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, if (Left.isOneOf(tok::plus, tok::comma) && Left.Previous && Left.Previous->isLabelString() && (Left.NextOperator || Left.OperatorIndex != 0)) - return 45; + return 50; if (Right.is(tok::plus) && Left.isLabelString() && (Right.NextOperator || Right.OperatorIndex != 0)) return 25; @@ -2162,6 +2187,8 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, const FormatToken &Right) { if (Left.is(tok::kw_return) && Right.isNot(tok::semi)) return true; + if (Left.is(Keywords.kw_assert) && Style.Language == FormatStyle::LK_Java) + return true; if (Style.ObjCSpaceAfterProperty && Line.Type == LT_ObjCProperty && Left.Tok.getObjCKeywordID() == tok::objc_property) return true; @@ -2178,8 +2205,8 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, : Style.SpacesInParentheses; if (Right.isOneOf(tok::semi, tok::comma)) return false; - if (Right.is(tok::less) && - Line.Type == LT_ObjCDecl && Style.ObjCSpaceBeforeProtocolList) + if (Right.is(tok::less) && Line.Type == LT_ObjCDecl && + Style.ObjCSpaceBeforeProtocolList) return true; if (Right.is(tok::less) && Left.is(tok::kw_template)) return Style.SpaceAfterTemplateKeyword; @@ -2201,15 +2228,23 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, Left.Previous->is(tok::kw_case)); if (Left.is(tok::l_square) && Right.is(tok::amp)) return false; - if (Right.is(TT_PointerOrReference)) - return (Left.is(tok::r_paren) && Line.MightBeFunctionDecl) || - (Left.Tok.isLiteral() || (Left.is(tok::kw_const) && Left.Previous && - Left.Previous->is(tok::r_paren)) || + if (Right.is(TT_PointerOrReference)) { + if (Left.is(tok::r_paren) && Line.MightBeFunctionDecl) { + if (!Left.MatchingParen) + return true; + FormatToken *TokenBeforeMatchingParen = + Left.MatchingParen->getPreviousNonComment(); + if (!TokenBeforeMatchingParen || + !TokenBeforeMatchingParen->isOneOf(tok::kw_typeof, tok::kw_decltype)) + return true; + } + return (Left.Tok.isLiteral() || (!Left.isOneOf(TT_PointerOrReference, tok::l_paren) && (Style.PointerAlignment != FormatStyle::PAS_Left || (Line.IsMultiVariableDeclStmt && (Left.NestingLevel == 0 || (Left.NestingLevel == 1 && Line.First->is(tok::kw_for))))))); + } if (Right.is(TT_FunctionTypeLParen) && Left.isNot(tok::l_paren) && (!Left.is(TT_PointerOrReference) || (Style.PointerAlignment != FormatStyle::PAS_Right && @@ -2231,17 +2266,20 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, if (Left.is(tok::l_square)) return (Left.is(TT_ArrayInitializerLSquare) && Style.SpacesInContainerLiterals && Right.isNot(tok::r_square)) || - (Left.is(TT_ArraySubscriptLSquare) && Style.SpacesInSquareBrackets && - Right.isNot(tok::r_square)); + (Left.isOneOf(TT_ArraySubscriptLSquare, + TT_StructuredBindingLSquare) && + Style.SpacesInSquareBrackets && Right.isNot(tok::r_square)); if (Right.is(tok::r_square)) return Right.MatchingParen && ((Style.SpacesInContainerLiterals && Right.MatchingParen->is(TT_ArrayInitializerLSquare)) || (Style.SpacesInSquareBrackets && - Right.MatchingParen->is(TT_ArraySubscriptLSquare))); + Right.MatchingParen->isOneOf(TT_ArraySubscriptLSquare, + TT_StructuredBindingLSquare))); if (Right.is(tok::l_square) && !Right.isOneOf(TT_ObjCMethodExpr, TT_LambdaLSquare, - TT_DesignatedInitializerLSquare) && + TT_DesignatedInitializerLSquare, + TT_StructuredBindingLSquare) && !Left.isOneOf(tok::numeric_constant, TT_DictLiteral)) return false; if (Left.is(tok::l_brace) && Right.is(tok::r_brace)) @@ -2287,7 +2325,7 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, if (Left.is(TT_TemplateCloser) && Left.MatchingParen && Left.MatchingParen->Previous && Left.MatchingParen->Previous->is(tok::period)) - // A.<B>DoSomething(); + // A.<B<C<...>>>DoSomething(); return false; if (Left.is(TT_TemplateCloser) && Right.is(tok::l_square)) return false; @@ -2317,8 +2355,8 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, if (Left.is(TT_JsFatArrow)) return true; // for await ( ... - if (Right.is(tok::l_paren) && Left.is(Keywords.kw_await) && - Left.Previous && Left.Previous->is(tok::kw_for)) + if (Right.is(tok::l_paren) && Left.is(Keywords.kw_await) && Left.Previous && + Left.Previous->is(tok::kw_for)) return true; if (Left.is(Keywords.kw_async) && Right.is(tok::l_paren) && Right.MatchingParen) { @@ -2341,18 +2379,31 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, Left.isOneOf(Keywords.kw_function, Keywords.kw_yield)) return false; if (Right.isOneOf(tok::l_brace, tok::l_square) && - Left.isOneOf(Keywords.kw_function, Keywords.kw_yield)) + Left.isOneOf(Keywords.kw_function, Keywords.kw_yield, + Keywords.kw_extends, Keywords.kw_implements)) return true; - // JS methods can use some keywords as names (e.g. `delete()`). - if (Right.is(tok::l_paren) && Line.MustBeDeclaration && - Left.Tok.getIdentifierInfo()) - return false; + if (Right.is(tok::l_paren)) { + // JS methods can use some keywords as names (e.g. `delete()`). + if (Line.MustBeDeclaration && Left.Tok.getIdentifierInfo()) + return false; + // Valid JS method names can include keywords, e.g. `foo.delete()` or + // `bar.instanceof()`. Recognize call positions by preceding period. + if (Left.Previous && Left.Previous->is(tok::period) && + Left.Tok.getIdentifierInfo()) + return false; + // Additional unary JavaScript operators that need a space after. + if (Left.isOneOf(tok::kw_throw, Keywords.kw_await, Keywords.kw_typeof, + tok::kw_void)) + return true; + } if ((Left.isOneOf(Keywords.kw_let, Keywords.kw_var, Keywords.kw_in, tok::kw_const) || // "of" is only a keyword if it appears after another identifier - // (e.g. as "const x of y" in a for loop). + // (e.g. as "const x of y" in a for loop), or after a destructuring + // operation (const [x, y] of z, const {a, b} of c). (Left.is(Keywords.kw_of) && Left.Previous && - Left.Previous->Tok.getIdentifierInfo())) && + (Left.Previous->Tok.getIdentifierInfo() || + Left.Previous->isOneOf(tok::r_square, tok::r_brace)))) && (!Left.Previous || !Left.Previous->is(tok::period))) return true; if (Left.isOneOf(tok::kw_for, Keywords.kw_as) && Left.Previous && @@ -2384,8 +2435,9 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, return false; if (Right.is(TT_JsNonNullAssertion)) return false; - if (Left.is(TT_JsNonNullAssertion) && Right.is(Keywords.kw_as)) - return true; // "x! as string" + if (Left.is(TT_JsNonNullAssertion) && + Right.isOneOf(Keywords.kw_as, Keywords.kw_in)) + return true; // "x! as string", "x! in y" } else if (Style.Language == FormatStyle::LK_Java) { if (Left.is(tok::r_square) && Right.is(tok::l_brace)) return true; @@ -2464,9 +2516,18 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, return (Left.is(TT_TemplateOpener) && Style.Standard == FormatStyle::LS_Cpp03) || !(Left.isOneOf(tok::l_paren, tok::r_paren, tok::l_square, - tok::kw___super, TT_TemplateCloser, TT_TemplateOpener)); + tok::kw___super, TT_TemplateCloser, + TT_TemplateOpener)); if ((Left.is(TT_TemplateOpener)) != (Right.is(TT_TemplateCloser))) return Style.SpacesInAngles; + // Space before TT_StructuredBindingLSquare. + if (Right.is(TT_StructuredBindingLSquare)) + return !Left.isOneOf(tok::amp, tok::ampamp) || + Style.PointerAlignment != FormatStyle::PAS_Right; + // Space before & or && following a TT_StructuredBindingLSquare. + if (Right.Next && Right.Next->is(TT_StructuredBindingLSquare) && + Right.isOneOf(tok::amp, tok::ampamp)) + return Style.PointerAlignment != FormatStyle::PAS_Left; if ((Right.is(TT_BinaryOperator) && !Left.is(tok::l_paren)) || (Left.isOneOf(TT_BinaryOperator, TT_ConditionalExpr) && !Right.is(tok::r_paren))) @@ -2516,7 +2577,9 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line, return true; if (Left.is(tok::l_brace) && Line.Level == 0 && (Line.startsWith(tok::kw_enum) || - Line.startsWith(tok::kw_export, tok::kw_enum))) + Line.startsWith(tok::kw_const, tok::kw_enum) || + Line.startsWith(tok::kw_export, tok::kw_enum) || + Line.startsWith(tok::kw_export, tok::kw_const, tok::kw_enum))) // JavaScript top-level enum key/value pairs are put on separate lines // instead of bin-packing. return true; @@ -2587,19 +2650,16 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line, !Style.ConstructorInitializerAllOnOneLineOrOnePerLine) return true; // Break only if we have multiple inheritance. - if (Style.BreakBeforeInheritanceComma && - Right.is(TT_InheritanceComma)) - return true; + if (Style.BreakBeforeInheritanceComma && Right.is(TT_InheritanceComma)) + return true; if (Right.is(tok::string_literal) && Right.TokenText.startswith("R\"")) // Raw string literals are special wrt. line breaks. The author has made a // deliberate choice and might have aligned the contents of the string // literal accordingly. Thus, we try keep existing line breaks. return Right.NewlinesBefore > 0; if ((Right.Previous->is(tok::l_brace) || - (Right.Previous->is(tok::less) && - Right.Previous->Previous && - Right.Previous->Previous->is(tok::equal)) - ) && + (Right.Previous->is(tok::less) && Right.Previous->Previous && + Right.Previous->Previous->is(tok::equal))) && Right.NestingLevel == 1 && Style.Language == FormatStyle::LK_Proto) { // Don't put enums or option definitions onto single lines in protocol // buffers. @@ -2609,6 +2669,8 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line, return Right.HasUnescapedNewline; if (isAllmanBrace(Left) || isAllmanBrace(Right)) return (Line.startsWith(tok::kw_enum) && Style.BraceWrapping.AfterEnum) || + (Line.startsWith(tok::kw_typedef, tok::kw_enum) && + Style.BraceWrapping.AfterEnum) || (Line.startsWith(tok::kw_class) && Style.BraceWrapping.AfterClass) || (Line.startsWith(tok::kw_struct) && Style.BraceWrapping.AfterStruct); if (Left.is(TT_ObjCBlockLBrace) && !Style.AllowShortBlocksOnASingleLine) @@ -2639,13 +2701,16 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, } else if (Style.Language == FormatStyle::LK_JavaScript) { const FormatToken *NonComment = Right.getPreviousNonComment(); if (NonComment && - NonComment->isOneOf(tok::kw_return, tok::kw_continue, tok::kw_break, - tok::kw_throw, Keywords.kw_interface, - Keywords.kw_type, tok::kw_static, tok::kw_public, - tok::kw_private, tok::kw_protected, - Keywords.kw_readonly, Keywords.kw_abstract, - Keywords.kw_get, Keywords.kw_set)) + NonComment->isOneOf( + tok::kw_return, Keywords.kw_yield, tok::kw_continue, tok::kw_break, + tok::kw_throw, Keywords.kw_interface, Keywords.kw_type, + tok::kw_static, tok::kw_public, tok::kw_private, tok::kw_protected, + Keywords.kw_readonly, Keywords.kw_abstract, Keywords.kw_get, + Keywords.kw_set, Keywords.kw_async, Keywords.kw_await)) return false; // Otherwise automatic semicolon insertion would trigger. + if (Left.Tok.getIdentifierInfo() && + Right.startsSequence(tok::l_square, tok::r_square)) + return false; // breaking in "foo[]" creates illegal TS type syntax. if (Left.is(TT_JsFatArrow) && Right.is(tok::l_brace)) return false; if (Left.is(TT_JsTypeColon)) @@ -2702,8 +2767,7 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, // list. return Left.BlockKind == BK_BracedInit || (Left.is(TT_CtorInitializerColon) && - Style.BreakConstructorInitializers == - FormatStyle::BCIS_AfterColon); + Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon); if (Left.is(tok::question) && Right.is(tok::colon)) return false; if (Right.is(TT_ConditionalExpr) || Right.is(tok::question)) @@ -2820,7 +2884,7 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, } void TokenAnnotator::printDebugInfo(const AnnotatedLine &Line) { - llvm::errs() << "AnnotatedTokens:\n"; + llvm::errs() << "AnnotatedTokens(L=" << Line.Level << "):\n"; const FormatToken *Tok = Line.First; while (Tok) { llvm::errs() << " M=" << Tok->MustBreakBefore @@ -2828,10 +2892,9 @@ void TokenAnnotator::printDebugInfo(const AnnotatedLine &Line) { << " T=" << getTokenTypeName(Tok->Type) << " S=" << Tok->SpacesRequiredBefore << " B=" << Tok->BlockParameterCount - << " BK=" << Tok->BlockKind - << " P=" << Tok->SplitPenalty << " Name=" << Tok->Tok.getName() - << " L=" << Tok->TotalLength << " PPK=" << Tok->PackingKind - << " FakeLParens="; + << " BK=" << Tok->BlockKind << " P=" << Tok->SplitPenalty + << " Name=" << Tok->Tok.getName() << " L=" << Tok->TotalLength + << " PPK=" << Tok->PackingKind << " FakeLParens="; for (unsigned i = 0, e = Tok->FakeLParens.size(); i != e; ++i) llvm::errs() << Tok->FakeLParens[i] << "/"; llvm::errs() << " FakeRParens=" << Tok->FakeRParens; diff --git a/lib/Format/TokenAnnotator.h b/lib/Format/TokenAnnotator.h index 805509533bf9..04a18d45b82e 100644 --- a/lib/Format/TokenAnnotator.h +++ b/lib/Format/TokenAnnotator.h @@ -43,7 +43,8 @@ public: InPPDirective(Line.InPPDirective), MustBeDeclaration(Line.MustBeDeclaration), MightBeFunctionDecl(false), IsMultiVariableDeclStmt(false), Affected(false), - LeadingEmptyLinesAffected(false), ChildrenAffected(false) { + LeadingEmptyLinesAffected(false), ChildrenAffected(false), + FirstStartColumn(Line.FirstStartColumn) { assert(!Line.Tokens.empty()); // Calculate Next and Previous for all tokens. Note that we must overwrite @@ -127,6 +128,8 @@ public: /// \c True if one of this line's children intersects with an input range. bool ChildrenAffected; + unsigned FirstStartColumn; + private: // Disallow copying. AnnotatedLine(const AnnotatedLine &) = delete; diff --git a/lib/Format/UnwrappedLineFormatter.cpp b/lib/Format/UnwrappedLineFormatter.cpp index 2005a2822924..60dc1a7169d1 100644 --- a/lib/Format/UnwrappedLineFormatter.cpp +++ b/lib/Format/UnwrappedLineFormatter.cpp @@ -164,8 +164,7 @@ public: return nullptr; const AnnotatedLine *Current = *Next; IndentTracker.nextLine(*Current); - unsigned MergedLines = - tryFitMultipleLinesInOne(IndentTracker, Next, End); + unsigned MergedLines = tryFitMultipleLinesInOne(IndentTracker, Next, End); if (MergedLines > 0 && Style.ColumnLimit == 0) // Disallow line merging if there is a break at the start of one of the // input lines. @@ -228,14 +227,16 @@ private: if (Tok && Tok->getNamespaceToken()) return !Style.BraceWrapping.SplitEmptyNamespace && EmptyBlock - ? tryMergeSimpleBlock(I, E, Limit) : 0; + ? tryMergeSimpleBlock(I, E, Limit) + : 0; if (Tok && Tok->is(tok::kw_typedef)) Tok = Tok->getNextNonComment(); if (Tok && Tok->isOneOf(tok::kw_class, tok::kw_struct, tok::kw_union, - Keywords.kw_interface)) + tok::kw_extern, Keywords.kw_interface)) return !Style.BraceWrapping.SplitEmptyRecord && EmptyBlock - ? tryMergeSimpleBlock(I, E, Limit) : 0; + ? tryMergeSimpleBlock(I, E, Limit) + : 0; } // FIXME: TheLine->Level != 0 might or might not be the right check to do. @@ -279,15 +280,43 @@ private: } } + // Try to merge a function block with left brace unwrapped if (TheLine->Last->is(TT_FunctionLBrace) && TheLine->First != TheLine->Last) { return MergeShortFunctions ? tryMergeSimpleBlock(I, E, Limit) : 0; } + // Try to merge a control statement block with left brace unwrapped + if (TheLine->Last->is(tok::l_brace) && TheLine->First != TheLine->Last && + TheLine->First->isOneOf(tok::kw_if, tok::kw_while, tok::kw_for)) { + return Style.AllowShortBlocksOnASingleLine + ? tryMergeSimpleBlock(I, E, Limit) + : 0; + } + // Try to merge a control statement block with left brace wrapped + if (I[1]->First->is(tok::l_brace) && + TheLine->First->isOneOf(tok::kw_if, tok::kw_while, tok::kw_for)) { + return Style.BraceWrapping.AfterControlStatement + ? tryMergeSimpleBlock(I, E, Limit) + : 0; + } + // Try to merge either empty or one-line block if is precedeed by control + // statement token + if (TheLine->First->is(tok::l_brace) && TheLine->First == TheLine->Last && + I != AnnotatedLines.begin() && + I[-1]->First->isOneOf(tok::kw_if, tok::kw_while, tok::kw_for)) { + return Style.AllowShortBlocksOnASingleLine + ? tryMergeSimpleBlock(I - 1, E, Limit) + : 0; + } + // Try to merge a block with left brace wrapped that wasn't yet covered if (TheLine->Last->is(tok::l_brace)) { - return !Style.BraceWrapping.AfterFunction + return !Style.BraceWrapping.AfterFunction || + (I[1]->First->is(tok::r_brace) && + !Style.BraceWrapping.SplitEmptyRecord) ? tryMergeSimpleBlock(I, E, Limit) : 0; } + // Try to merge a function block with left brace wrapped if (I[1]->First->is(TT_FunctionLBrace) && Style.BraceWrapping.AfterFunction) { if (I[1]->Last->is(TT_LineComment)) @@ -382,7 +411,9 @@ private: return 0; unsigned NumStmts = 0; unsigned Length = 0; + bool EndsWithComment = false; bool InPPDirective = I[0]->InPPDirective; + const unsigned Level = I[0]->Level; for (; NumStmts < 3; ++NumStmts) { if (I + 1 + NumStmts == E) break; @@ -392,9 +423,26 @@ private: if (Line->First->isOneOf(tok::kw_case, tok::kw_default, tok::r_brace)) break; if (Line->First->isOneOf(tok::kw_if, tok::kw_for, tok::kw_switch, - tok::kw_while, tok::comment) || - Line->Last->is(tok::comment)) + tok::kw_while) || + EndsWithComment) return 0; + if (Line->First->is(tok::comment)) { + if (Level != Line->Level) + return 0; + SmallVectorImpl<AnnotatedLine *>::const_iterator J = I + 2 + NumStmts; + for (; J != E; ++J) { + Line = *J; + if (Line->InPPDirective != InPPDirective) + break; + if (Line->First->isOneOf(tok::kw_case, tok::kw_default, tok::r_brace)) + break; + if (Line->First->isNot(tok::comment) || Level != Line->Level) + return 0; + } + break; + } + if (Line->Last->is(tok::comment)) + EndsWithComment = true; Length += I[1 + NumStmts]->Last->TotalLength + 1; // 1 for the space. } if (NumStmts == 0 || NumStmts == 3 || Length > Limit) @@ -425,11 +473,27 @@ private: tok::kw_for, tok::r_brace, Keywords.kw___except)) { if (!Style.AllowShortBlocksOnASingleLine) return 0; + // Don't merge when we can't except the case when + // the control statement block is empty if (!Style.AllowShortIfStatementsOnASingleLine && - Line.startsWith(tok::kw_if)) + Line.startsWith(tok::kw_if) && + !Style.BraceWrapping.AfterControlStatement && + !I[1]->First->is(tok::r_brace)) + return 0; + if (!Style.AllowShortIfStatementsOnASingleLine && + Line.startsWith(tok::kw_if) && + Style.BraceWrapping.AfterControlStatement && I + 2 != E && + !I[2]->First->is(tok::r_brace)) + return 0; + if (!Style.AllowShortLoopsOnASingleLine && + Line.First->isOneOf(tok::kw_while, tok::kw_do, tok::kw_for) && + !Style.BraceWrapping.AfterControlStatement && + !I[1]->First->is(tok::r_brace)) return 0; if (!Style.AllowShortLoopsOnASingleLine && - Line.First->isOneOf(tok::kw_while, tok::kw_do, tok::kw_for)) + Line.First->isOneOf(tok::kw_while, tok::kw_do, tok::kw_for) && + Style.BraceWrapping.AfterControlStatement && I + 2 != E && + !I[2]->First->is(tok::r_brace)) return 0; // FIXME: Consider an option to allow short exception handling clauses on // a single line. @@ -441,52 +505,78 @@ private: return 0; } - FormatToken *Tok = I[1]->First; - if (Tok->is(tok::r_brace) && !Tok->MustBreakBefore && - (Tok->getNextNonComment() == nullptr || - Tok->getNextNonComment()->is(tok::semi))) { - // We merge empty blocks even if the line exceeds the column limit. - Tok->SpacesRequiredBefore = 0; - Tok->CanBreakBefore = true; - return 1; - } else if (Limit != 0 && !Line.startsWith(tok::kw_namespace) && - !startsExternCBlock(Line)) { - // We don't merge short records. - FormatToken *RecordTok = - Line.First->is(tok::kw_typedef) ? Line.First->Next : Line.First; - if (RecordTok && - RecordTok->isOneOf(tok::kw_class, tok::kw_union, tok::kw_struct, - Keywords.kw_interface)) - return 0; + if (Line.Last->is(tok::l_brace)) { + FormatToken *Tok = I[1]->First; + if (Tok->is(tok::r_brace) && !Tok->MustBreakBefore && + (Tok->getNextNonComment() == nullptr || + Tok->getNextNonComment()->is(tok::semi))) { + // We merge empty blocks even if the line exceeds the column limit. + Tok->SpacesRequiredBefore = 0; + Tok->CanBreakBefore = true; + return 1; + } else if (Limit != 0 && !Line.startsWith(tok::kw_namespace) && + !startsExternCBlock(Line)) { + // We don't merge short records. + FormatToken *RecordTok = Line.First; + // Skip record modifiers. + while (RecordTok->Next && + RecordTok->isOneOf(tok::kw_typedef, tok::kw_export, + Keywords.kw_declare, Keywords.kw_abstract, + tok::kw_default)) + RecordTok = RecordTok->Next; + if (RecordTok && + RecordTok->isOneOf(tok::kw_class, tok::kw_union, tok::kw_struct, + Keywords.kw_interface)) + return 0; - // Check that we still have three lines and they fit into the limit. - if (I + 2 == E || I[2]->Type == LT_Invalid) - return 0; - Limit = limitConsideringMacros(I + 2, E, Limit); + // Check that we still have three lines and they fit into the limit. + if (I + 2 == E || I[2]->Type == LT_Invalid) + return 0; + Limit = limitConsideringMacros(I + 2, E, Limit); - if (!nextTwoLinesFitInto(I, Limit)) - return 0; + if (!nextTwoLinesFitInto(I, Limit)) + return 0; - // Second, check that the next line does not contain any braces - if it - // does, readability declines when putting it into a single line. - if (I[1]->Last->is(TT_LineComment)) - return 0; - do { - if (Tok->is(tok::l_brace) && Tok->BlockKind != BK_BracedInit) + // Second, check that the next line does not contain any braces - if it + // does, readability declines when putting it into a single line. + if (I[1]->Last->is(TT_LineComment)) + return 0; + do { + if (Tok->is(tok::l_brace) && Tok->BlockKind != BK_BracedInit) + return 0; + Tok = Tok->Next; + } while (Tok); + + // Last, check that the third line starts with a closing brace. + Tok = I[2]->First; + if (Tok->isNot(tok::r_brace)) return 0; - Tok = Tok->Next; - } while (Tok); - // Last, check that the third line starts with a closing brace. - Tok = I[2]->First; - if (Tok->isNot(tok::r_brace)) - return 0; + // Don't merge "if (a) { .. } else {". + if (Tok->Next && Tok->Next->is(tok::kw_else)) + return 0; - // Don't merge "if (a) { .. } else {". - if (Tok->Next && Tok->Next->is(tok::kw_else)) + return 2; + } + } else if (I[1]->First->is(tok::l_brace)) { + if (I[1]->Last->is(TT_LineComment)) return 0; - return 2; + // Check for Limit <= 2 to account for the " {". + if (Limit <= 2 || (Style.ColumnLimit == 0 && containsMustBreak(*I))) + return 0; + Limit -= 2; + unsigned MergedLines = 0; + if (Style.AllowShortBlocksOnASingleLine || + (I[1]->First == I[1]->Last && I + 2 != E && + I[2]->First->is(tok::r_brace))) { + MergedLines = tryMergeSimpleBlock(I + 1, E, Limit); + // If we managed to merge the block, count the statement header, which + // is on a separate line. + if (MergedLines > 0) + ++MergedLines; + } + return MergedLines; } return 0; } @@ -574,7 +664,9 @@ public: /// \brief Formats an \c AnnotatedLine and returns the penalty. /// /// If \p DryRun is \c false, directly applies the changes. - virtual unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent, + virtual unsigned formatLine(const AnnotatedLine &Line, + unsigned FirstIndent, + unsigned FirstStartColumn, bool DryRun) = 0; protected: @@ -645,7 +737,8 @@ protected: *Child->First, /*Newlines=*/0, /*Spaces=*/1, /*StartOfTokenColumn=*/State.Column, State.Line->InPPDirective); } - Penalty += formatLine(*Child, State.Column + 1, DryRun); + Penalty += + formatLine(*Child, State.Column + 1, /*FirstStartColumn=*/0, DryRun); State.Column += 1 + Child->Last->TotalLength; return true; @@ -671,10 +764,10 @@ public: /// \brief Formats the line, simply keeping all of the input's line breaking /// decisions. unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent, - bool DryRun) override { + unsigned FirstStartColumn, bool DryRun) override { assert(!DryRun); - LineState State = - Indenter->getInitialState(FirstIndent, &Line, /*DryRun=*/false); + LineState State = Indenter->getInitialState(FirstIndent, FirstStartColumn, + &Line, /*DryRun=*/false); while (State.NextToken) { bool Newline = Indenter->mustBreak(State) || @@ -697,9 +790,10 @@ public: /// \brief Puts all tokens into a single line. unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent, - bool DryRun) override { + unsigned FirstStartColumn, bool DryRun) override { unsigned Penalty = 0; - LineState State = Indenter->getInitialState(FirstIndent, &Line, DryRun); + LineState State = + Indenter->getInitialState(FirstIndent, FirstStartColumn, &Line, DryRun); while (State.NextToken) { formatChildren(State, /*Newline=*/false, DryRun, Penalty); Indenter->addTokenToState( @@ -721,8 +815,9 @@ public: /// \brief Formats the line by finding the best line breaks with line lengths /// below the column limit. unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent, - bool DryRun) override { - LineState State = Indenter->getInitialState(FirstIndent, &Line, DryRun); + unsigned FirstStartColumn, bool DryRun) override { + LineState State = + Indenter->getInitialState(FirstIndent, FirstStartColumn, &Line, DryRun); // If the ObjC method declaration does not fit on a line, we should format // it with one arg per line. @@ -763,7 +858,8 @@ private: /// \brief The BFS queue type. typedef std::priority_queue<QueueItem, std::vector<QueueItem>, - std::greater<QueueItem>> QueueType; + std::greater<QueueItem>> + QueueType; /// \brief Analyze the entire solution space starting from \p InitialState. /// @@ -888,7 +984,10 @@ private: unsigned UnwrappedLineFormatter::format(const SmallVectorImpl<AnnotatedLine *> &Lines, bool DryRun, int AdditionalIndent, - bool FixBadIndentation) { + bool FixBadIndentation, + unsigned FirstStartColumn, + unsigned NextStartColumn, + unsigned LastStartColumn) { LineJoiner Joiner(Style, Keywords, Lines); // Try to look up already computed penalty in DryRun-mode. @@ -908,9 +1007,10 @@ UnwrappedLineFormatter::format(const SmallVectorImpl<AnnotatedLine *> &Lines, // The minimum level of consecutive lines that have been formatted. unsigned RangeMinLevel = UINT_MAX; + bool FirstLine = true; for (const AnnotatedLine *Line = Joiner.getNextMergedLine(DryRun, IndentTracker); - Line; Line = NextLine) { + Line; Line = NextLine, FirstLine = false) { const AnnotatedLine &TheLine = *Line; unsigned Indent = IndentTracker.getIndent(); @@ -934,8 +1034,12 @@ UnwrappedLineFormatter::format(const SmallVectorImpl<AnnotatedLine *> &Lines, } if (ShouldFormat && TheLine.Type != LT_Invalid) { - if (!DryRun) - formatFirstToken(TheLine, PreviousLine, Indent); + if (!DryRun) { + bool LastLine = Line->First->is(tok::eof); + formatFirstToken(TheLine, PreviousLine, + Indent, + LastLine ? LastStartColumn : NextStartColumn + Indent); + } NextLine = Joiner.getNextMergedLine(DryRun, IndentTracker); unsigned ColumnLimit = getColumnLimit(TheLine.InPPDirective, NextLine); @@ -944,16 +1048,18 @@ UnwrappedLineFormatter::format(const SmallVectorImpl<AnnotatedLine *> &Lines, (TheLine.Type == LT_ImportStatement && (Style.Language != FormatStyle::LK_JavaScript || !Style.JavaScriptWrapImports)); - if (Style.ColumnLimit == 0) NoColumnLimitLineFormatter(Indenter, Whitespaces, Style, this) - .formatLine(TheLine, Indent, DryRun); + .formatLine(TheLine, NextStartColumn + Indent, + FirstLine ? FirstStartColumn : 0, DryRun); else if (FitsIntoOneLine) Penalty += NoLineBreakFormatter(Indenter, Whitespaces, Style, this) - .formatLine(TheLine, Indent, DryRun); + .formatLine(TheLine, NextStartColumn + Indent, + FirstLine ? FirstStartColumn : 0, DryRun); else Penalty += OptimizingLineFormatter(Indenter, Whitespaces, Style, this) - .formatLine(TheLine, Indent, DryRun); + .formatLine(TheLine, NextStartColumn + Indent, + FirstLine ? FirstStartColumn : 0, DryRun); RangeMinLevel = std::min(RangeMinLevel, TheLine.Level); } else { // If no token in the current line is affected, we still need to format @@ -976,6 +1082,7 @@ UnwrappedLineFormatter::format(const SmallVectorImpl<AnnotatedLine *> &Lines, // Format the first token. if (ReformatLeadingWhitespace) formatFirstToken(TheLine, PreviousLine, + TheLine.First->OriginalColumn, TheLine.First->OriginalColumn); else Whitespaces->addUntouchableToken(*TheLine.First, @@ -998,12 +1105,14 @@ UnwrappedLineFormatter::format(const SmallVectorImpl<AnnotatedLine *> &Lines, void UnwrappedLineFormatter::formatFirstToken(const AnnotatedLine &Line, const AnnotatedLine *PreviousLine, - unsigned Indent) { - FormatToken& RootToken = *Line.First; + unsigned Indent, + unsigned NewlineIndent) { + FormatToken &RootToken = *Line.First; if (RootToken.is(tok::eof)) { unsigned Newlines = std::min(RootToken.NewlinesBefore, 1u); - Whitespaces->replaceWhitespace(RootToken, Newlines, /*Spaces=*/0, - /*StartOfTokenColumn=*/0); + unsigned TokenIndent = Newlines ? NewlineIndent : 0; + Whitespaces->replaceWhitespace(RootToken, Newlines, TokenIndent, + TokenIndent); return; } unsigned Newlines = @@ -1013,6 +1122,9 @@ void UnwrappedLineFormatter::formatFirstToken(const AnnotatedLine &Line, (!RootToken.Next || (RootToken.Next->is(tok::semi) && !RootToken.Next->Next))) Newlines = std::min(Newlines, 1u); + // Remove empty lines at the start of nested blocks (lambdas/arrow functions) + if (PreviousLine == nullptr && Line.Level > 0) + Newlines = std::min(Newlines, 1u); if (Newlines == 0 && !RootToken.IsFirst) Newlines = 1; if (RootToken.IsFirst && !RootToken.HasUnescapedNewline) @@ -1035,6 +1147,13 @@ void UnwrappedLineFormatter::formatFirstToken(const AnnotatedLine &Line, (!PreviousLine->InPPDirective || !RootToken.HasUnescapedNewline)) Newlines = std::min(1u, Newlines); + if (Newlines) + Indent = NewlineIndent; + + // Preprocessor directives get indented after the hash, if indented. + if (Line.Type == LT_PreprocessorDirective || Line.Type == LT_ImportStatement) + Indent = 0; + Whitespaces->replaceWhitespace(RootToken, Newlines, Indent, Indent, Line.InPPDirective && !RootToken.HasUnescapedNewline); diff --git a/lib/Format/UnwrappedLineFormatter.h b/lib/Format/UnwrappedLineFormatter.h index 55f0d1cac689..6432ca83a4c9 100644 --- a/lib/Format/UnwrappedLineFormatter.h +++ b/lib/Format/UnwrappedLineFormatter.h @@ -35,19 +35,22 @@ public: const SourceManager &SourceMgr, FormattingAttemptStatus *Status) : Indenter(Indenter), Whitespaces(Whitespaces), Style(Style), - Keywords(Keywords), SourceMgr(SourceMgr), - Status(Status) {} + Keywords(Keywords), SourceMgr(SourceMgr), Status(Status) {} /// \brief Format the current block and return the penalty. unsigned format(const SmallVectorImpl<AnnotatedLine *> &Lines, bool DryRun = false, int AdditionalIndent = 0, - bool FixBadIndentation = false); + bool FixBadIndentation = false, + unsigned FirstStartColumn = 0, + unsigned NextStartColumn = 0, + unsigned LastStartColumn = 0); private: /// \brief Add a new line and the required indent before the first Token /// of the \c UnwrappedLine if there was no structural parsing error. void formatFirstToken(const AnnotatedLine &Line, - const AnnotatedLine *PreviousLine, unsigned Indent); + const AnnotatedLine *PreviousLine, unsigned Indent, + unsigned NewlineIndent); /// \brief Returns the column limit for a line, taking into account whether we /// need an escaped newline due to a continued preprocessor directive. diff --git a/lib/Format/UnwrappedLineParser.cpp b/lib/Format/UnwrappedLineParser.cpp index faac5a371c26..b8608dcac9c7 100644 --- a/lib/Format/UnwrappedLineParser.cpp +++ b/lib/Format/UnwrappedLineParser.cpp @@ -18,6 +18,8 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include <algorithm> + #define DEBUG_TYPE "format-parser" namespace clang { @@ -56,8 +58,7 @@ private: }; static bool isLineComment(const FormatToken &FormatTok) { - return FormatTok.is(tok::comment) && - FormatTok.TokenText.startswith("//"); + return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*"); } // Checks if \p FormatTok is a line comment that continues the line comment @@ -226,15 +227,21 @@ private: UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style, const AdditionalKeywords &Keywords, + unsigned FirstStartColumn, ArrayRef<FormatToken *> Tokens, UnwrappedLineConsumer &Callback) : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), CurrentLines(&Lines), Style(Style), Keywords(Keywords), CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr), - Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1) {} + Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1), + IfNdefCondition(nullptr), FoundIncludeGuardStart(false), + IncludeGuardRejected(false), FirstStartColumn(FirstStartColumn) {} void UnwrappedLineParser::reset() { PPBranchLevel = -1; + IfNdefCondition = nullptr; + FoundIncludeGuardStart = false; + IncludeGuardRejected = false; Line.reset(new UnwrappedLine); CommentsBeforeNextToken.clear(); FormatTok = nullptr; @@ -243,10 +250,12 @@ void UnwrappedLineParser::reset() { CurrentLines = &Lines; DeclarationScopeStack.clear(); PPStack.clear(); + Line->FirstStartColumn = FirstStartColumn; } void UnwrappedLineParser::parse() { IndexedTokenSource TokenSource(AllTokens); + Line->FirstStartColumn = FirstStartColumn; do { DEBUG(llvm::dbgs() << "----\n"); reset(); @@ -326,6 +335,12 @@ void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) { break; case tok::kw_default: case tok::kw_case: + if (Style.Language == FormatStyle::LK_JavaScript && + Line->MustBeDeclaration) { + // A 'case: string' style field declaration. + parseStructuralElement(); + break; + } if (!SwitchLabelEncountered && (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1))) ++Line->Level; @@ -346,7 +361,7 @@ void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { // definitions, too. unsigned StoredPosition = Tokens->getPosition(); FormatToken *Tok = FormatTok; - const FormatToken *PrevTok = getPreviousToken(); + const FormatToken *PrevTok = Tok->Previous; // Keep a stack of positions of lbrace tokens. We will // update information about whether an lbrace starts a // braced init list or a different block during the loop. @@ -364,13 +379,16 @@ void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { switch (Tok->Tok.getKind()) { case tok::l_brace: if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) { - if (PrevTok->is(tok::colon)) - // A colon indicates this code is in a type, or a braced list - // following a label in an object literal ({a: {b: 1}}). The code - // below could be confused by semicolons between the individual - // members in a type member list, which would normally trigger - // BK_Block. In both cases, this must be parsed as an inline braced - // init. + if (PrevTok->isOneOf(tok::colon, tok::less)) + // A ':' indicates this code is in a type, or a braced list + // following a label in an object literal ({a: {b: 1}}). + // A '<' could be an object used in a comparison, but that is nonsense + // code (can never return true), so more likely it is a generic type + // argument (`X<{a: string; b: number}>`). + // The code below could be confused by semicolons between the + // individual members in a type member list, which would normally + // trigger BK_Block. In both cases, this must be parsed as an inline + // braced init. Tok->BlockKind = BK_BracedInit; else if (PrevTok->is(tok::r_paren)) // `) { }` can only occur in function or method declarations in JS. @@ -452,6 +470,21 @@ void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { FormatTok = Tokens->setPosition(StoredPosition); } +template <class T> +static inline void hash_combine(std::size_t &seed, const T &v) { + std::hash<T> hasher; + seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); +} + +size_t UnwrappedLineParser::computePPHash() const { + size_t h = 0; + for (const auto &i : PPStack) { + hash_combine(h, size_t(i.Kind)); + hash_combine(h, i.Line); + } + return h; +} + void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel, bool MunchSemi) { assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) && @@ -459,16 +492,21 @@ void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel, const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin); FormatTok->BlockKind = BK_Block; + size_t PPStartHash = computePPHash(); + unsigned InitialLevel = Line->Level; - nextToken(); + nextToken(/*LevelDifference=*/AddLevel ? 1 : 0); if (MacroBlock && FormatTok->is(tok::l_paren)) parseParens(); + size_t NbPreprocessorDirectives = + CurrentLines == &Lines ? PreprocessorDirectives.size() : 0; addUnwrappedLine(); - size_t OpeningLineIndex = CurrentLines->empty() - ? (UnwrappedLine::kInvalidIndex) - : (CurrentLines->size() - 1); + size_t OpeningLineIndex = + CurrentLines->empty() + ? (UnwrappedLine::kInvalidIndex) + : (CurrentLines->size() - 1 - NbPreprocessorDirectives); ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, MustBeDeclaration); @@ -486,7 +524,10 @@ void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel, return; } - nextToken(); // Munch the closing brace. + size_t PPEndHash = computePPHash(); + + // Munch the closing brace. + nextToken(/*LevelDifference=*/AddLevel ? -1 : 0); if (MacroBlock && FormatTok->is(tok::l_paren)) parseParens(); @@ -494,11 +535,14 @@ void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel, if (MunchSemi && FormatTok->Tok.is(tok::semi)) nextToken(); Line->Level = InitialLevel; - Line->MatchingOpeningBlockLineIndex = OpeningLineIndex; - if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) { - // Update the opening line to add the forward reference as well - (*CurrentLines)[OpeningLineIndex].MatchingOpeningBlockLineIndex = - CurrentLines->size() - 1; + + if (PPStartHash == PPEndHash) { + Line->MatchingOpeningBlockLineIndex = OpeningLineIndex; + if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) { + // Update the opening line to add the forward reference as well + (*CurrentLines)[OpeningLineIndex].MatchingOpeningBlockLineIndex = + CurrentLines->size() - 1; + } } } @@ -555,9 +599,8 @@ void UnwrappedLineParser::parseChildBlock() { FormatTok->BlockKind = BK_Block; nextToken(); { - bool SkipIndent = - (Style.Language == FormatStyle::LK_JavaScript && - (isGoogScope(*Line) || isIIFE(*Line, Keywords))); + bool SkipIndent = (Style.Language == FormatStyle::LK_JavaScript && + (isGoogScope(*Line) || isIIFE(*Line, Keywords))); ScopedLineState LineState(*this); ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, /*MustBeDeclaration=*/false); @@ -606,10 +649,15 @@ void UnwrappedLineParser::parsePPDirective() { } void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) { - if (Unreachable || (!PPStack.empty() && PPStack.back() == PP_Unreachable)) - PPStack.push_back(PP_Unreachable); + size_t Line = CurrentLines->size(); + if (CurrentLines == &PreprocessorDirectives) + Line += Lines.size(); + + if (Unreachable || + (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) + PPStack.push_back({PP_Unreachable, Line}); else - PPStack.push_back(PP_Conditional); + PPStack.push_back({PP_Conditional, Line}); } void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) { @@ -643,7 +691,7 @@ void UnwrappedLineParser::conditionalCompilationEnd() { } } // Guard against #endif's without #if. - if (PPBranchLevel > 0) + if (PPBranchLevel > -1) --PPBranchLevel; if (!PPChainBranchIndex.empty()) PPChainBranchIndex.pop(); @@ -660,12 +708,35 @@ void UnwrappedLineParser::parsePPIf(bool IfDef) { if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG") Unreachable = true; conditionalCompilationStart(Unreachable); + FormatToken *IfCondition = FormatTok; + // If there's a #ifndef on the first line, and the only lines before it are + // comments, it could be an include guard. + bool MaybeIncludeGuard = IfNDef; + if (!IncludeGuardRejected && !FoundIncludeGuardStart && MaybeIncludeGuard) { + for (auto &Line : Lines) { + if (!Line.Tokens.front().Tok->is(tok::comment)) { + MaybeIncludeGuard = false; + IncludeGuardRejected = true; + break; + } + } + } + --PPBranchLevel; parsePPUnknown(); + ++PPBranchLevel; + if (!IncludeGuardRejected && !FoundIncludeGuardStart && MaybeIncludeGuard) + IfNdefCondition = IfCondition; } void UnwrappedLineParser::parsePPElse() { + // If a potential include guard has an #else, it's not an include guard. + if (FoundIncludeGuardStart && PPBranchLevel == 0) + FoundIncludeGuardStart = false; conditionalCompilationAlternative(); + if (PPBranchLevel > -1) + --PPBranchLevel; parsePPUnknown(); + ++PPBranchLevel; } void UnwrappedLineParser::parsePPElIf() { parsePPElse(); } @@ -673,6 +744,16 @@ void UnwrappedLineParser::parsePPElIf() { parsePPElse(); } void UnwrappedLineParser::parsePPEndIf() { conditionalCompilationEnd(); parsePPUnknown(); + // If the #endif of a potential include guard is the last thing in the file, + // then we count it as a real include guard and subtract one from every + // preprocessor indent. + unsigned TokenPosition = Tokens->getPosition(); + FormatToken *PeekNext = AllTokens[TokenPosition]; + if (FoundIncludeGuardStart && PPBranchLevel == -1 && PeekNext->is(tok::eof) && + Style.IndentPPDirectives != FormatStyle::PPDIS_None) + for (auto &Line : Lines) + if (Line.InPPDirective && Line.Level > 0) + --Line.Level; } void UnwrappedLineParser::parsePPDefine() { @@ -682,14 +763,26 @@ void UnwrappedLineParser::parsePPDefine() { parsePPUnknown(); return; } + if (IfNdefCondition && IfNdefCondition->TokenText == FormatTok->TokenText) { + FoundIncludeGuardStart = true; + for (auto &Line : Lines) { + if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) { + FoundIncludeGuardStart = false; + break; + } + } + } + IfNdefCondition = nullptr; nextToken(); if (FormatTok->Tok.getKind() == tok::l_paren && FormatTok->WhitespaceRange.getBegin() == FormatTok->WhitespaceRange.getEnd()) { parseParens(); } + if (Style.IndentPPDirectives == FormatStyle::PPDIS_AfterHash) + Line->Level += PPBranchLevel + 1; addUnwrappedLine(); - Line->Level = 1; + ++Line->Level; // Errors during a preprocessor directive can only affect the layout of the // preprocessor directive, and thus we ignore them. An alternative approach @@ -703,7 +796,10 @@ void UnwrappedLineParser::parsePPUnknown() { do { nextToken(); } while (!eof()); + if (Style.IndentPPDirectives == FormatStyle::PPDIS_AfterHash) + Line->Level += PPBranchLevel + 1; addUnwrappedLine(); + IfNdefCondition = nullptr; } // Here we blacklist certain tokens that are not usually the first token in an @@ -746,8 +842,8 @@ static bool mustBeJSIdent(const AdditionalKeywords &Keywords, Keywords.kw_function, Keywords.kw_import, Keywords.kw_is, Keywords.kw_let, Keywords.kw_var, tok::kw_const, Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements, - Keywords.kw_instanceof, Keywords.kw_interface, - Keywords.kw_throws, Keywords.kw_from)); + Keywords.kw_instanceof, Keywords.kw_interface, Keywords.kw_throws, + Keywords.kw_from)); } static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords, @@ -800,11 +896,14 @@ void UnwrappedLineParser::readTokenWithJavaScriptASI() { bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous); bool PreviousStartsTemplateExpr = Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${"); - if (PreviousMustBeValue && Line && Line->Tokens.size() > 1) { - // If the token before the previous one is an '@', the previous token is an - // annotation and can precede another identifier/value. - const FormatToken *PrePrevious = std::prev(Line->Tokens.end(), 2)->Tok; - if (PrePrevious->is(tok::at)) + if (PreviousMustBeValue || Previous->is(tok::r_paren)) { + // If the line contains an '@' sign, the previous token might be an + // annotation, which can precede another identifier/value. + bool HasAt = std::find_if(Line->Tokens.begin(), Line->Tokens.end(), + [](UnwrappedLineNode &LineNode) { + return LineNode.Tok->is(tok::at); + }) != Line->Tokens.end(); + if (HasAt) return; } if (Next->is(tok::exclaim) && PreviousMustBeValue) @@ -817,7 +916,8 @@ void UnwrappedLineParser::readTokenWithJavaScriptASI() { Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus, tok::minusminus))) return addUnwrappedLine(); - if (PreviousMustBeValue && isJSDeclOrStmt(Keywords, Next)) + if ((PreviousMustBeValue || Previous->is(tok::r_paren)) && + isJSDeclOrStmt(Keywords, Next)) return addUnwrappedLine(); } @@ -922,13 +1022,22 @@ void UnwrappedLineParser::parseStructuralElement() { parseDoWhile(); return; case tok::kw_switch: + if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) + // 'switch: string' field declaration. + break; parseSwitch(); return; case tok::kw_default: + if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) + // 'default: string' field declaration. + break; nextToken(); parseLabel(); return; case tok::kw_case: + if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) + // 'case: string' field declaration. + break; parseCaseLabel(); return; case tok::kw_try: @@ -940,7 +1049,12 @@ void UnwrappedLineParser::parseStructuralElement() { if (FormatTok->Tok.is(tok::string_literal)) { nextToken(); if (FormatTok->Tok.is(tok::l_brace)) { - parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false); + if (Style.BraceWrapping.AfterExternBlock) { + addUnwrappedLine(); + parseBlock(/*MustBeDeclaration=*/true); + } else { + parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false); + } addUnwrappedLine(); return; } @@ -996,7 +1110,7 @@ void UnwrappedLineParser::parseStructuralElement() { break; } do { - const FormatToken *Previous = getPreviousToken(); + const FormatToken *Previous = FormatTok->Previous; switch (FormatTok->Tok.getKind()) { case tok::at: nextToken(); @@ -1186,7 +1300,7 @@ void UnwrappedLineParser::parseStructuralElement() { nextToken(); parseBracedList(); } else if (Style.Language == FormatStyle::LK_Proto && - FormatTok->Tok.is(tok::less)) { + FormatTok->Tok.is(tok::less)) { nextToken(); parseBracedList(/*ContinueOnSemicolons=*/false, /*ClosingBraceKind=*/tok::greater); @@ -1210,14 +1324,6 @@ bool UnwrappedLineParser::tryToParseLambda() { nextToken(); return false; } - const FormatToken* Previous = getPreviousToken(); - if (Previous && - (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new, - tok::kw_delete) || - Previous->closesScope() || Previous->isSimpleTypeSpecifier())) { - nextToken(); - return false; - } assert(FormatTok->is(tok::l_square)); FormatToken &LSquare = *FormatTok; if (!tryToParseLambdaIntroducer()) @@ -1260,49 +1366,18 @@ bool UnwrappedLineParser::tryToParseLambda() { } bool UnwrappedLineParser::tryToParseLambdaIntroducer() { - nextToken(); - if (FormatTok->is(tok::equal)) { - nextToken(); - if (FormatTok->is(tok::r_square)) { - nextToken(); - return true; - } - if (FormatTok->isNot(tok::comma)) - return false; - nextToken(); - } else if (FormatTok->is(tok::amp)) { - nextToken(); - if (FormatTok->is(tok::r_square)) { - nextToken(); - return true; - } - if (!FormatTok->isOneOf(tok::comma, tok::identifier)) { - return false; - } - if (FormatTok->is(tok::comma)) - nextToken(); - } else if (FormatTok->is(tok::r_square)) { + const FormatToken *Previous = FormatTok->Previous; + if (Previous && + (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new, + tok::kw_delete) || + FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() || + Previous->isSimpleTypeSpecifier())) { nextToken(); - return true; + return false; } - do { - if (FormatTok->is(tok::amp)) - nextToken(); - if (!FormatTok->isOneOf(tok::identifier, tok::kw_this)) - return false; - nextToken(); - if (FormatTok->is(tok::ellipsis)) - nextToken(); - if (FormatTok->is(tok::comma)) { - nextToken(); - } else if (FormatTok->is(tok::r_square)) { - nextToken(); - return true; - } else { - return false; - } - } while (!eof()); - return false; + nextToken(); + parseSquare(/*LambdaIntroducer=*/true); + return true; } void UnwrappedLineParser::tryToParseJSFunction() { @@ -1419,6 +1494,15 @@ bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons, nextToken(); parseBracedList(); break; + case tok::less: + if (Style.Language == FormatStyle::LK_Proto) { + nextToken(); + parseBracedList(/*ContinueOnSemicolons=*/false, + /*ClosingBraceKind=*/tok::greater); + } else { + nextToken(); + } + break; case tok::semi: // JavaScript (or more precisely TypeScript) can have semicolons in braced // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be @@ -1495,10 +1579,12 @@ void UnwrappedLineParser::parseParens() { } while (!eof()); } -void UnwrappedLineParser::parseSquare() { - assert(FormatTok->Tok.is(tok::l_square) && "'[' expected."); - if (tryToParseLambda()) - return; +void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) { + if (!LambdaIntroducer) { + assert(FormatTok->Tok.is(tok::l_square) && "'[' expected."); + if (tryToParseLambda()) + return; + } do { switch (FormatTok->Tok.getKind()) { case tok::l_paren: @@ -1939,6 +2025,17 @@ void UnwrappedLineParser::parseRecord(bool ParseAsExpr) { ((Style.Language == FormatStyle::LK_Java || Style.Language == FormatStyle::LK_JavaScript) && FormatTok->isOneOf(tok::period, tok::comma))) { + if (Style.Language == FormatStyle::LK_JavaScript && + FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) { + // JavaScript/TypeScript supports inline object types in + // extends/implements positions: + // class Foo implements {bar: number} { } + nextToken(); + if (FormatTok->is(tok::l_brace)) { + tryToParseBracedList(); + continue; + } + } bool IsNonMacroIdentifier = FormatTok->is(tok::identifier) && FormatTok->TokenText != FormatTok->TokenText.upper(); @@ -2090,7 +2187,7 @@ void UnwrappedLineParser::parseJavaScriptEs6ImportExport() { while (!eof()) { if (FormatTok->is(tok::semi)) return; - if (Line->Tokens.size() == 0) { + if (Line->Tokens.empty()) { // Common issue: Automatic Semicolon Insertion wrapped the line, so the // import statement should terminate. return; @@ -2107,14 +2204,15 @@ void UnwrappedLineParser::parseJavaScriptEs6ImportExport() { LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line, StringRef Prefix = "") { - llvm::dbgs() << Prefix << "Line(" << Line.Level << ")" + llvm::dbgs() << Prefix << "Line(" << Line.Level + << ", FSC=" << Line.FirstStartColumn << ")" << (Line.InPPDirective ? " MACRO" : "") << ": "; for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), E = Line.Tokens.end(); I != E; ++I) { llvm::dbgs() << I->Tok->Tok.getName() << "[" - << "T=" << I->Tok->Type - << ", OC=" << I->Tok->OriginalColumn << "] "; + << "T=" << I->Tok->Type << ", OC=" << I->Tok->OriginalColumn + << "] "; } for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), E = Line.Tokens.end(); @@ -2140,12 +2238,15 @@ void UnwrappedLineParser::addUnwrappedLine() { CurrentLines->push_back(std::move(*Line)); Line->Tokens.clear(); Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex; + Line->FirstStartColumn = 0; if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) { CurrentLines->append( std::make_move_iterator(PreprocessorDirectives.begin()), std::make_move_iterator(PreprocessorDirectives.end())); PreprocessorDirectives.clear(); } + // Disconnect the current token from the last token on the previous line. + FormatTok->Previous = nullptr; } bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); } @@ -2287,23 +2388,17 @@ void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { CommentsBeforeNextToken.clear(); } -void UnwrappedLineParser::nextToken() { +void UnwrappedLineParser::nextToken(int LevelDifference) { if (eof()) return; flushComments(isOnNewLine(*FormatTok)); pushToken(FormatTok); + FormatToken *Previous = FormatTok; if (Style.Language != FormatStyle::LK_JavaScript) - readToken(); + readToken(LevelDifference); else readTokenWithJavaScriptASI(); -} - -const FormatToken *UnwrappedLineParser::getPreviousToken() { - // FIXME: This is a dirty way to access the previous token. Find a better - // solution. - if (!Line || Line->Tokens.empty()) - return nullptr; - return Line->Tokens.back().Tok; + FormatTok->Previous = Previous; } void UnwrappedLineParser::distributeComments( @@ -2343,8 +2438,7 @@ void UnwrappedLineParser::distributeComments( } for (unsigned i = 0, e = Comments.size(); i < e; ++i) { FormatToken *FormatTok = Comments[i]; - if (HasTrailAlignedWithNextToken && - i == StartOfTrailAlignedWithNextToken) { + if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) { FormatTok->ContinuesLineCommentSection = false; } else { FormatTok->ContinuesLineCommentSection = @@ -2362,7 +2456,7 @@ void UnwrappedLineParser::distributeComments( } } -void UnwrappedLineParser::readToken() { +void UnwrappedLineParser::readToken(int LevelDifference) { SmallVector<FormatToken *, 1> Comments; do { FormatTok = Tokens->getNextToken(); @@ -2375,6 +2469,10 @@ void UnwrappedLineParser::readToken() { // directives only after that unwrapped line was finished later. bool SwitchToPreprocessorLines = !Line->Tokens.empty(); ScopedLineState BlockState(*this, SwitchToPreprocessorLines); + assert((LevelDifference >= 0 || + static_cast<unsigned>(-LevelDifference) <= Line->Level) && + "LevelDifference makes Line->Level negative"); + Line->Level += LevelDifference; // Comments stored before the preprocessor directive need to be output // before the preprocessor directive, at the same level as the // preprocessor directive, as we consider them to apply to the directive. @@ -2395,7 +2493,7 @@ void UnwrappedLineParser::readToken() { FormatTok->MustBreakBefore = true; } - if (!PPStack.empty() && (PPStack.back() == PP_Unreachable) && + if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) && !Line->InPPDirective) { continue; } diff --git a/lib/Format/UnwrappedLineParser.h b/lib/Format/UnwrappedLineParser.h index a2aa2f006728..1d8ccabbd0f8 100644 --- a/lib/Format/UnwrappedLineParser.h +++ b/lib/Format/UnwrappedLineParser.h @@ -56,6 +56,8 @@ struct UnwrappedLine { size_t MatchingOpeningBlockLineIndex; static const size_t kInvalidIndex = -1; + + unsigned FirstStartColumn = 0; }; class UnwrappedLineConsumer { @@ -71,6 +73,7 @@ class UnwrappedLineParser { public: UnwrappedLineParser(const FormatStyle &Style, const AdditionalKeywords &Keywords, + unsigned FirstStartColumn, ArrayRef<FormatToken *> Tokens, UnwrappedLineConsumer &Callback); @@ -96,7 +99,7 @@ private: bool parseBracedList(bool ContinueOnSemicolons = false, tok::TokenKind ClosingBraceKind = tok::r_brace); void parseParens(); - void parseSquare(); + void parseSquare(bool LambdaIntroducer = false); void parseIfThenElse(); void parseTryCatch(); void parseForOrWhileLoop(); @@ -123,9 +126,12 @@ private: void tryToParseJSFunction(); void addUnwrappedLine(); bool eof() const; - void nextToken(); - const FormatToken *getPreviousToken(); - void readToken(); + // LevelDifference is the difference of levels after and before the current + // token. For example: + // - if the token is '{' and opens a block, LevelDifference is 1. + // - if the token is '}' and closes a block, LevelDifference is -1. + void nextToken(int LevelDifference = 0); + void readToken(int LevelDifference = 0); // Decides which comment tokens should be added to the current line and which // should be added as comments before the next token. @@ -156,6 +162,11 @@ private: bool isOnNewLine(const FormatToken &FormatTok); + // Compute hash of the current preprocessor branch. + // This is used to identify the different branches, and thus track if block + // open and close in the same branch. + size_t computePPHash() const; + // FIXME: We are constantly running into bugs where Line.Level is incorrectly // subtracted from beyond 0. Introduce a method to subtract from Line.Level // and use that everywhere in the Parser. @@ -174,7 +185,7 @@ private: // Preprocessor directives are parsed out-of-order from other unwrapped lines. // Thus, we need to keep a list of preprocessor directives to be reported - // after an unwarpped line that has been started was finished. + // after an unwrapped line that has been started was finished. SmallVector<UnwrappedLine, 4> PreprocessorDirectives; // New unwrapped lines are added via CurrentLines. @@ -207,8 +218,14 @@ private: PP_Unreachable // #if 0 or a conditional preprocessor block inside #if 0 }; + struct PPBranch { + PPBranch(PPBranchKind Kind, size_t Line) : Kind(Kind), Line(Line) {} + PPBranchKind Kind; + size_t Line; + }; + // Keeps a stack of currently active preprocessor branching directives. - SmallVector<PPBranchKind, 16> PPStack; + SmallVector<PPBranch, 16> PPStack; // The \c UnwrappedLineParser re-parses the code for each combination // of preprocessor branches that can be taken. @@ -231,6 +248,15 @@ private: // sequence. std::stack<int> PPChainBranchIndex; + // Contains the #ifndef condition for a potential include guard. + FormatToken *IfNdefCondition; + bool FoundIncludeGuardStart; + bool IncludeGuardRejected; + // Contains the first start column where the source begins. This is zero for + // normal source code and may be nonzero when formatting a code fragment that + // does not start at the beginning of the file. + unsigned FirstStartColumn; + friend class ScopedLineState; friend class CompoundStatementIndenter; }; @@ -243,8 +269,9 @@ struct UnwrappedLineNode { SmallVector<UnwrappedLine, 0> Children; }; -inline UnwrappedLine::UnwrappedLine() : Level(0), InPPDirective(false), - MustBeDeclaration(false), MatchingOpeningBlockLineIndex(kInvalidIndex) {} +inline UnwrappedLine::UnwrappedLine() + : Level(0), InPPDirective(false), MustBeDeclaration(false), + MatchingOpeningBlockLineIndex(kInvalidIndex) {} } // end namespace format } // end namespace clang diff --git a/lib/Format/UsingDeclarationsSorter.cpp b/lib/Format/UsingDeclarationsSorter.cpp index fb4f59fbc9bc..ef0c7a7d5a45 100644 --- a/lib/Format/UsingDeclarationsSorter.cpp +++ b/lib/Format/UsingDeclarationsSorter.cpp @@ -26,6 +26,45 @@ namespace format { namespace { +// The order of using declaration is defined as follows: +// Split the strings by "::" and discard any initial empty strings. The last +// element of each list is a non-namespace name; all others are namespace +// names. Sort the lists of names lexicographically, where the sort order of +// individual names is that all non-namespace names come before all namespace +// names, and within those groups, names are in case-insensitive lexicographic +// order. +int compareLabels(StringRef A, StringRef B) { + SmallVector<StringRef, 2> NamesA; + A.split(NamesA, "::", /*MaxSplit=*/-1, /*KeepEmpty=*/false); + SmallVector<StringRef, 2> NamesB; + B.split(NamesB, "::", /*MaxSplit=*/-1, /*KeepEmpty=*/false); + size_t SizeA = NamesA.size(); + size_t SizeB = NamesB.size(); + for (size_t I = 0, E = std::min(SizeA, SizeB); I < E; ++I) { + if (I + 1 == SizeA) { + // I is the last index of NamesA and NamesA[I] is a non-namespace name. + + // Non-namespace names come before all namespace names. + if (SizeB > SizeA) + return -1; + + // Two names within a group compare case-insensitively. + return NamesA[I].compare_lower(NamesB[I]); + } + + // I is the last index of NamesB and NamesB[I] is a non-namespace name. + // Non-namespace names come before all namespace names. + if (I + 1 == SizeB) + return 1; + + // Two namespaces names within a group compare case-insensitively. + int C = NamesA[I].compare_lower(NamesB[I]); + if (C != 0) + return C; + } + return 0; +} + struct UsingDeclaration { const AnnotatedLine *Line; std::string Label; @@ -34,7 +73,7 @@ struct UsingDeclaration { : Line(Line), Label(Label) {} bool operator<(const UsingDeclaration &Other) const { - return Label < Other.Label; + return compareLabels(Label, Other.Label) < 0; } }; @@ -76,10 +115,42 @@ std::string computeUsingDeclarationLabel(const FormatToken *UsingTok) { void endUsingDeclarationBlock( SmallVectorImpl<UsingDeclaration> *UsingDeclarations, const SourceManager &SourceMgr, tooling::Replacements *Fixes) { + bool BlockAffected = false; + for (const UsingDeclaration &Declaration : *UsingDeclarations) { + if (Declaration.Line->Affected) { + BlockAffected = true; + break; + } + } + if (!BlockAffected) { + UsingDeclarations->clear(); + return; + } SmallVector<UsingDeclaration, 4> SortedUsingDeclarations( UsingDeclarations->begin(), UsingDeclarations->end()); - std::sort(SortedUsingDeclarations.begin(), SortedUsingDeclarations.end()); + std::stable_sort(SortedUsingDeclarations.begin(), + SortedUsingDeclarations.end()); + SortedUsingDeclarations.erase( + std::unique(SortedUsingDeclarations.begin(), + SortedUsingDeclarations.end(), + [](const UsingDeclaration &a, const UsingDeclaration &b) { + return a.Label == b.Label; + }), + SortedUsingDeclarations.end()); for (size_t I = 0, E = UsingDeclarations->size(); I < E; ++I) { + if (I >= SortedUsingDeclarations.size()) { + // This using declaration has been deduplicated, delete it. + auto Begin = + (*UsingDeclarations)[I].Line->First->WhitespaceRange.getBegin(); + auto End = (*UsingDeclarations)[I].Line->Last->Tok.getEndLoc(); + auto Range = CharSourceRange::getCharRange(Begin, End); + auto Err = Fixes->add(tooling::Replacement(SourceMgr, Range, "")); + if (Err) { + llvm::errs() << "Error while sorting using declarations: " + << llvm::toString(std::move(Err)) << "\n"; + } + continue; + } if ((*UsingDeclarations)[I].Line == SortedUsingDeclarations[I].Line) continue; auto Begin = (*UsingDeclarations)[I].Line->First->Tok.getLocation(); @@ -112,7 +183,7 @@ UsingDeclarationsSorter::UsingDeclarationsSorter(const Environment &Env, const FormatStyle &Style) : TokenAnalyzer(Env, Style) {} -tooling::Replacements UsingDeclarationsSorter::analyze( +std::pair<tooling::Replacements, unsigned> UsingDeclarationsSorter::analyze( TokenAnnotator &Annotator, SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, FormatTokenLexer &Tokens) { const SourceManager &SourceMgr = Env.getSourceManager(); @@ -121,15 +192,17 @@ tooling::Replacements UsingDeclarationsSorter::analyze( tooling::Replacements Fixes; SmallVector<UsingDeclaration, 4> UsingDeclarations; for (size_t I = 0, E = AnnotatedLines.size(); I != E; ++I) { - if (!AnnotatedLines[I]->Affected || AnnotatedLines[I]->InPPDirective || - !AnnotatedLines[I]->startsWith(tok::kw_using) || - AnnotatedLines[I]->First->Finalized) { + const auto *FirstTok = AnnotatedLines[I]->First; + if (AnnotatedLines[I]->InPPDirective || + !AnnotatedLines[I]->startsWith(tok::kw_using) || FirstTok->Finalized) { endUsingDeclarationBlock(&UsingDeclarations, SourceMgr, &Fixes); continue; } - if (AnnotatedLines[I]->First->NewlinesBefore > 1) + if (FirstTok->NewlinesBefore > 1) endUsingDeclarationBlock(&UsingDeclarations, SourceMgr, &Fixes); - std::string Label = computeUsingDeclarationLabel(AnnotatedLines[I]->First); + const auto *UsingTok = + FirstTok->is(tok::comment) ? FirstTok->getNextNonComment() : FirstTok; + std::string Label = computeUsingDeclarationLabel(UsingTok); if (Label.empty()) { endUsingDeclarationBlock(&UsingDeclarations, SourceMgr, &Fixes); continue; @@ -137,7 +210,7 @@ tooling::Replacements UsingDeclarationsSorter::analyze( UsingDeclarations.push_back(UsingDeclaration(AnnotatedLines[I], Label)); } endUsingDeclarationBlock(&UsingDeclarations, SourceMgr, &Fixes); - return Fixes; + return {Fixes, 0}; } } // namespace format diff --git a/lib/Format/UsingDeclarationsSorter.h b/lib/Format/UsingDeclarationsSorter.h index f7d5f97e3a2a..6f137712d841 100644 --- a/lib/Format/UsingDeclarationsSorter.h +++ b/lib/Format/UsingDeclarationsSorter.h @@ -25,7 +25,7 @@ class UsingDeclarationsSorter : public TokenAnalyzer { public: UsingDeclarationsSorter(const Environment &Env, const FormatStyle &Style); - tooling::Replacements + std::pair<tooling::Replacements, unsigned> analyze(TokenAnnotator &Annotator, SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, FormatTokenLexer &Tokens) override; diff --git a/lib/Format/WhitespaceManager.cpp b/lib/Format/WhitespaceManager.cpp index 377ec3a681b6..a5477a996327 100644 --- a/lib/Format/WhitespaceManager.cpp +++ b/lib/Format/WhitespaceManager.cpp @@ -67,6 +67,11 @@ void WhitespaceManager::addUntouchableToken(const FormatToken &Tok, /*IsInsideToken=*/false)); } +llvm::Error +WhitespaceManager::addReplacement(const tooling::Replacement &Replacement) { + return Replaces.add(Replacement); +} + void WhitespaceManager::replaceWhitespaceInToken( const FormatToken &Tok, unsigned Offset, unsigned ReplaceChars, StringRef PreviousPostfix, StringRef CurrentPrefix, bool InPPDirective, @@ -166,15 +171,15 @@ void WhitespaceManager::calculateLineBreakInformation() { // BreakableLineCommentSection does comment reflow changes and here is // the aligning of trailing comments. Consider the case where we reflow // the second line up in this example: - // + // // // line 1 // // line 2 - // + // // That amounts to 2 changes by BreakableLineCommentSection: // - the first, delimited by (), for the whitespace between the tokens, // - and second, delimited by [], for the whitespace at the beginning // of the second token: - // + // // // line 1( // )[// ]line 2 // @@ -608,8 +613,9 @@ void WhitespaceManager::generateChanges() { if (C.CreateReplacement) { std::string ReplacementText = C.PreviousLinePostfix; if (C.ContinuesPPDirective) - appendNewlineText(ReplacementText, C.NewlinesBefore, - C.PreviousEndOfTokenColumn, C.EscapedNewlineColumn); + appendEscapedNewlineText(ReplacementText, C.NewlinesBefore, + C.PreviousEndOfTokenColumn, + C.EscapedNewlineColumn); else appendNewlineText(ReplacementText, C.NewlinesBefore); appendIndentText(ReplacementText, C.Tok->IndentLevel, @@ -621,8 +627,7 @@ void WhitespaceManager::generateChanges() { } } -void WhitespaceManager::storeReplacement(SourceRange Range, - StringRef Text) { +void WhitespaceManager::storeReplacement(SourceRange Range, StringRef Text) { unsigned WhitespaceLength = SourceMgr.getFileOffset(Range.getEnd()) - SourceMgr.getFileOffset(Range.getBegin()); // Don't create a replacement, if it does not change anything. @@ -645,16 +650,16 @@ void WhitespaceManager::appendNewlineText(std::string &Text, Text.append(UseCRLF ? "\r\n" : "\n"); } -void WhitespaceManager::appendNewlineText(std::string &Text, unsigned Newlines, - unsigned PreviousEndOfTokenColumn, - unsigned EscapedNewlineColumn) { +void WhitespaceManager::appendEscapedNewlineText( + std::string &Text, unsigned Newlines, unsigned PreviousEndOfTokenColumn, + unsigned EscapedNewlineColumn) { if (Newlines > 0) { - unsigned Offset = - std::min<int>(EscapedNewlineColumn - 2, PreviousEndOfTokenColumn); + unsigned Spaces = + std::max<int>(1, EscapedNewlineColumn - PreviousEndOfTokenColumn - 1); for (unsigned i = 0; i < Newlines; ++i) { - Text.append(EscapedNewlineColumn - Offset - 1, ' '); + Text.append(Spaces, ' '); Text.append(UseCRLF ? "\\\r\n" : "\\\n"); - Offset = 0; + Spaces = std::max<int>(0, EscapedNewlineColumn - 1); } } } diff --git a/lib/Format/WhitespaceManager.h b/lib/Format/WhitespaceManager.h index 4e78ab43abaf..af20dc5616a7 100644 --- a/lib/Format/WhitespaceManager.h +++ b/lib/Format/WhitespaceManager.h @@ -57,6 +57,8 @@ public: /// was not called. void addUntouchableToken(const FormatToken &Tok, bool InPPDirective); + llvm::Error addReplacement(const tooling::Replacement &Replacement); + /// \brief Inserts or replaces whitespace in the middle of a token. /// /// Inserts \p PreviousPostfix, \p Newlines, \p Spaces and \p CurrentPrefix @@ -194,9 +196,9 @@ private: /// \brief Stores \p Text as the replacement for the whitespace in \p Range. void storeReplacement(SourceRange Range, StringRef Text); void appendNewlineText(std::string &Text, unsigned Newlines); - void appendNewlineText(std::string &Text, unsigned Newlines, - unsigned PreviousEndOfTokenColumn, - unsigned EscapedNewlineColumn); + void appendEscapedNewlineText(std::string &Text, unsigned Newlines, + unsigned PreviousEndOfTokenColumn, + unsigned EscapedNewlineColumn); void appendIndentText(std::string &Text, unsigned IndentLevel, unsigned Spaces, unsigned WhitespaceStartColumn); |