diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2018-07-28 11:06:01 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2018-07-28 11:06:01 +0000 |
commit | 486754660bb926339aefcf012a3f848592babb8b (patch) | |
tree | ecdbc446c9876f4f120f701c243373cd3cb43db3 /lib/Format | |
parent | 55e6d896ad333f07bb3b1ba487df214fc268a4ab (diff) | |
download | src-486754660bb926339aefcf012a3f848592babb8b.tar.gz src-486754660bb926339aefcf012a3f848592babb8b.zip |
Vendor import of clang trunk r338150:vendor/clang/clang-trunk-r338150
Notes
Notes:
svn path=/vendor/clang/dist/; revision=336815
svn path=/vendor/clang/clang-trunk-r338150/; revision=336816; tag=vendor/clang/clang-trunk-r338150
Diffstat (limited to 'lib/Format')
30 files changed, 1976 insertions, 1086 deletions
diff --git a/lib/Format/AffectedRangeManager.cpp b/lib/Format/AffectedRangeManager.cpp index 5d4df1941209..b14316a14cd9 100644 --- a/lib/Format/AffectedRangeManager.cpp +++ b/lib/Format/AffectedRangeManager.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// /// /// \file -/// \brief This file implements AffectRangeManager class. +/// This file implements AffectRangeManager class. /// //===----------------------------------------------------------------------===// @@ -21,8 +21,9 @@ namespace clang { namespace format { bool AffectedRangeManager::computeAffectedLines( - SmallVectorImpl<AnnotatedLine *>::iterator I, - SmallVectorImpl<AnnotatedLine *>::iterator E) { + SmallVectorImpl<AnnotatedLine *> &Lines) { + SmallVectorImpl<AnnotatedLine *>::iterator I = Lines.begin(); + SmallVectorImpl<AnnotatedLine *>::iterator E = Lines.end(); bool SomeLineAffected = false; const AnnotatedLine *PreviousLine = nullptr; while (I != E) { @@ -48,7 +49,7 @@ bool AffectedRangeManager::computeAffectedLines( continue; } - if (nonPPLineAffected(Line, PreviousLine)) + if (nonPPLineAffected(Line, PreviousLine, Lines)) SomeLineAffected = true; PreviousLine = Line; @@ -99,10 +100,10 @@ void AffectedRangeManager::markAllAsAffected( } bool AffectedRangeManager::nonPPLineAffected( - AnnotatedLine *Line, const AnnotatedLine *PreviousLine) { + AnnotatedLine *Line, const AnnotatedLine *PreviousLine, + SmallVectorImpl<AnnotatedLine *> &Lines) { bool SomeLineAffected = false; - Line->ChildrenAffected = - computeAffectedLines(Line->Children.begin(), Line->Children.end()); + Line->ChildrenAffected = computeAffectedLines(Line->Children); if (Line->ChildrenAffected) SomeLineAffected = true; @@ -138,8 +139,13 @@ bool AffectedRangeManager::nonPPLineAffected( Line->First->NewlinesBefore < 2 && PreviousLine && PreviousLine->Affected && PreviousLine->Last->is(tok::comment); + bool IsAffectedClosingBrace = + Line->First->is(tok::r_brace) && + Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex && + Lines[Line->MatchingOpeningBlockLineIndex]->Affected; + if (SomeTokenAffected || SomeFirstChildAffected || LineMoved || - IsContinuedComment) { + IsContinuedComment || IsAffectedClosingBrace) { Line->Affected = true; SomeLineAffected = true; } diff --git a/lib/Format/AffectedRangeManager.h b/lib/Format/AffectedRangeManager.h index d8d5ee55acd8..b0c9dd259fb8 100644 --- a/lib/Format/AffectedRangeManager.h +++ b/lib/Format/AffectedRangeManager.h @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// /// /// \file -/// \brief AffectedRangeManager class manages affected ranges in the code. +/// AffectedRangeManager class manages affected ranges in the code. /// //===----------------------------------------------------------------------===// @@ -30,10 +30,9 @@ public: : SourceMgr(SourceMgr), Ranges(Ranges.begin(), Ranges.end()) {} // Determines which lines are affected by the SourceRanges given as input. - // Returns \c true if at least one line between I and E or one of their + // Returns \c true if at least one line in \p Lines or one of their // children is affected. - bool computeAffectedLines(SmallVectorImpl<AnnotatedLine *>::iterator I, - SmallVectorImpl<AnnotatedLine *>::iterator E); + bool computeAffectedLines(SmallVectorImpl<AnnotatedLine *> &Lines); // Returns true if 'Range' intersects with one of the input ranges. bool affectsCharSourceRange(const CharSourceRange &Range); @@ -54,8 +53,8 @@ private: // Determines whether 'Line' is affected by the SourceRanges given as input. // Returns \c true if line or one if its children is affected. - bool nonPPLineAffected(AnnotatedLine *Line, - const AnnotatedLine *PreviousLine); + bool nonPPLineAffected(AnnotatedLine *Line, const AnnotatedLine *PreviousLine, + SmallVectorImpl<AnnotatedLine *> &Lines); const SourceManager &SourceMgr; const SmallVector<CharSourceRange, 8> Ranges; diff --git a/lib/Format/BreakableToken.cpp b/lib/Format/BreakableToken.cpp index 4735ab3564f0..cc68f70100e3 100644 --- a/lib/Format/BreakableToken.cpp +++ b/lib/Format/BreakableToken.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// /// /// \file -/// \brief Contains implementation of BreakableToken class and classes derived +/// Contains implementation of BreakableToken class and classes derived /// from it. /// //===----------------------------------------------------------------------===// @@ -44,7 +44,8 @@ static StringRef getLineCommentIndentPrefix(StringRef Comment, const FormatStyle &Style) { static const char *const KnownCStylePrefixes[] = {"///<", "//!<", "///", "//", "//!"}; - static const char *const KnownTextProtoPrefixes[] = {"//", "#"}; + static const char *const KnownTextProtoPrefixes[] = {"//", "#", "##", "###", + "####"}; ArrayRef<const char *> KnownPrefixes(KnownCStylePrefixes); if (Style.Language == FormatStyle::LK_TextProto) KnownPrefixes = KnownTextProtoPrefixes; @@ -67,8 +68,9 @@ static BreakableToken::Split getCommentSplit(StringRef Text, unsigned ColumnLimit, unsigned TabWidth, encoding::Encoding Encoding) { - DEBUG(llvm::dbgs() << "Comment split: \"" << Text << ", " << ColumnLimit - << "\", Content start: " << ContentStartColumn << "\n"); + LLVM_DEBUG(llvm::dbgs() << "Comment split: \"" << Text << ", " << ColumnLimit + << "\", Content start: " << ContentStartColumn + << "\n"); if (ColumnLimit <= ContentStartColumn + 1) return BreakableToken::Split(StringRef::npos, 0); @@ -89,9 +91,9 @@ static BreakableToken::Split getCommentSplit(StringRef Text, // Do not split before a number followed by a dot: this would be interpreted // as a numbered list, which would prevent re-flowing in subsequent passes. - static llvm::Regex kNumberedListRegexp = llvm::Regex("^[1-9][0-9]?\\."); + static auto *const kNumberedListRegexp = new llvm::Regex("^[1-9][0-9]?\\."); if (SpaceOffset != StringRef::npos && - kNumberedListRegexp.match(Text.substr(SpaceOffset).ltrim(Blanks))) + kNumberedListRegexp->match(Text.substr(SpaceOffset).ltrim(Blanks))) SpaceOffset = Text.find_last_of(Blanks, SpaceOffset); if (SpaceOffset == StringRef::npos || @@ -214,11 +216,11 @@ unsigned BreakableStringLiteral::getContentStartColumn(unsigned LineIndex, BreakableStringLiteral::BreakableStringLiteral( const FormatToken &Tok, unsigned StartColumn, StringRef Prefix, - StringRef Postfix, bool InPPDirective, encoding::Encoding Encoding, - const FormatStyle &Style) + StringRef Postfix, unsigned UnbreakableTailLength, bool InPPDirective, + encoding::Encoding Encoding, const FormatStyle &Style) : BreakableToken(Tok, InPPDirective, Encoding, Style), StartColumn(StartColumn), Prefix(Prefix), Postfix(Postfix), - UnbreakableTailLength(Tok.UnbreakableTailLength) { + UnbreakableTailLength(UnbreakableTailLength) { assert(Tok.TokenText.startswith(Prefix) && Tok.TokenText.endswith(Postfix)); Line = Tok.TokenText.substr( Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size()); @@ -284,10 +286,9 @@ static bool mayReflowContent(StringRef Content) { Content = Content.trim(Blanks); // Lines starting with '@' commonly have special meaning. // Lines starting with '-', '-#', '+' or '*' are bulleted/numbered lists. - static const SmallVector<StringRef, 8> kSpecialMeaningPrefixes = { - "@", "TODO", "FIXME", "XXX", "-# ", "- ", "+ ", "* "}; bool hasSpecialMeaningPrefix = false; - for (StringRef Prefix : kSpecialMeaningPrefixes) { + for (StringRef Prefix : + {"@", "TODO", "FIXME", "XXX", "-# ", "- ", "+ ", "* "}) { if (Content.startswith(Prefix)) { hasSpecialMeaningPrefix = true; break; @@ -297,9 +298,9 @@ static bool mayReflowContent(StringRef Content) { // Numbered lists may also start with a number followed by '.' // To avoid issues if a line starts with a number which is actually the end // of a previous line, we only consider numbers with up to 2 digits. - static llvm::Regex kNumberedListRegexp = llvm::Regex("^[1-9][0-9]?\\. "); + static auto *const kNumberedListRegexp = new llvm::Regex("^[1-9][0-9]?\\. "); hasSpecialMeaningPrefix = - hasSpecialMeaningPrefix || kNumberedListRegexp.match(Content); + hasSpecialMeaningPrefix || kNumberedListRegexp->match(Content); // Simple heuristic for what to reflow: content should contain at least two // characters and either the first or second character must be @@ -425,7 +426,7 @@ BreakableBlockComment::BreakableBlockComment( } } - DEBUG({ + LLVM_DEBUG({ llvm::dbgs() << "IndentAtLineBreak " << IndentAtLineBreak << "\n"; llvm::dbgs() << "DelimitersOnNewline " << DelimitersOnNewline << "\n"; for (size_t i = 0; i < Lines.size(); ++i) { @@ -788,16 +789,47 @@ BreakableComment::Split BreakableLineCommentSection::getReflowSplit( void BreakableLineCommentSection::reflow(unsigned LineIndex, WhitespaceManager &Whitespaces) const { - // Reflow happens between tokens. Replace the whitespace between the - // tokens by the empty string. - Whitespaces.replaceWhitespace( - *Tokens[LineIndex], /*Newlines=*/0, /*Spaces=*/0, - /*StartOfTokenColumn=*/StartColumn, /*InPPDirective=*/false); + if (LineIndex > 0 && Tokens[LineIndex] != Tokens[LineIndex - 1]) { + // Reflow happens between tokens. Replace the whitespace between the + // tokens by the empty string. + Whitespaces.replaceWhitespace( + *Tokens[LineIndex], /*Newlines=*/0, /*Spaces=*/0, + /*StartOfTokenColumn=*/StartColumn, /*InPPDirective=*/false); + } else if (LineIndex > 0) { + // In case we're reflowing after the '\' in: + // + // // line comment \ + // // line 2 + // + // the reflow happens inside the single comment token (it is a single line + // comment with an unescaped newline). + // Replace the whitespace between the '\' and '//' with the empty string. + // + // Offset points to after the '\' relative to start of the token. + unsigned Offset = Lines[LineIndex - 1].data() + + Lines[LineIndex - 1].size() - + tokenAt(LineIndex - 1).TokenText.data(); + // WhitespaceLength is the number of chars between the '\' and the '//' on + // the next line. + unsigned WhitespaceLength = + Lines[LineIndex].data() - tokenAt(LineIndex).TokenText.data() - Offset; + Whitespaces.replaceWhitespaceInToken(*Tokens[LineIndex], + Offset, + /*ReplaceChars=*/WhitespaceLength, + /*PreviousPostfix=*/"", + /*CurrentPrefix=*/"", + /*InPPDirective=*/false, + /*Newlines=*/0, + /*Spaces=*/0); + + } // Replace the indent and prefix of the token with the reflow prefix. + unsigned Offset = + Lines[LineIndex].data() - tokenAt(LineIndex).TokenText.data(); unsigned WhitespaceLength = - Content[LineIndex].data() - tokenAt(LineIndex).TokenText.data(); + Content[LineIndex].data() - Lines[LineIndex].data(); Whitespaces.replaceWhitespaceInToken(*Tokens[LineIndex], - /*Offset=*/0, + Offset, /*ReplaceChars=*/WhitespaceLength, /*PreviousPostfix=*/"", /*CurrentPrefix=*/ReflowPrefix, diff --git a/lib/Format/BreakableToken.h b/lib/Format/BreakableToken.h index 8ef26ef464da..0fac8f08c026 100644 --- a/lib/Format/BreakableToken.h +++ b/lib/Format/BreakableToken.h @@ -1,4 +1,4 @@ -//===--- BreakableToken.h - Format C++ code -------------------------------===// +//===--- BreakableToken.h - Format C++ code ---------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// /// /// \file -/// \brief Declares BreakableToken, BreakableStringLiteral, BreakableComment, +/// Declares BreakableToken, BreakableStringLiteral, BreakableComment, /// BreakableBlockComment and BreakableLineCommentSection classes, that contain /// token type-specific logic to break long lines in tokens and reflow content /// between tokens. @@ -27,13 +27,13 @@ namespace clang { namespace format { -/// \brief Checks if \p Token switches formatting, like /* clang-format off */. +/// Checks if \p Token switches formatting, like /* clang-format off */. /// \p Token must be a comment. bool switchesFormatting(const FormatToken &Token); struct FormatStyle; -/// \brief Base class for tokens / ranges of tokens that can allow breaking +/// Base class for tokens / ranges of tokens that can allow breaking /// within the tokens - for example, to avoid whitespace beyond the column /// limit, or to reflow text. /// @@ -88,15 +88,15 @@ struct FormatStyle; /// class BreakableToken { public: - /// \brief Contains starting character index and length of split. + /// Contains starting character index and length of split. typedef std::pair<StringRef::size_type, unsigned> Split; virtual ~BreakableToken() {} - /// \brief Returns the number of lines in this token in the original code. + /// Returns the number of lines in this token in the original code. virtual unsigned getLineCount() const = 0; - /// \brief Returns the number of columns required to format the text in the + /// Returns the number of columns required to format the text in the /// byte range [\p Offset, \p Offset \c + \p Length). /// /// \p Offset is the byte offset from the start of the content of the line @@ -108,7 +108,7 @@ public: StringRef::size_type Length, unsigned StartColumn) const = 0; - /// \brief Returns the number of columns required to format the text following + /// Returns the number of columns required to format the text following /// the byte \p Offset in the line \p LineIndex, including potentially /// unbreakable sequences of tokens following after the end of the token. /// @@ -125,7 +125,7 @@ public: return getRangeLength(LineIndex, Offset, StringRef::npos, StartColumn); } - /// \brief Returns the column at which content in line \p LineIndex starts, + /// Returns the column at which content in line \p LineIndex starts, /// assuming no reflow. /// /// If \p Break is true, returns the column at which the line should start @@ -135,7 +135,7 @@ public: virtual unsigned getContentStartColumn(unsigned LineIndex, bool Break) const = 0; - /// \brief Returns a range (offset, length) at which to break the line at + /// Returns a range (offset, length) at which to break the line at /// \p LineIndex, if previously broken at \p TailOffset. If possible, do not /// violate \p ColumnLimit, assuming the text starting at \p TailOffset in /// the token is formatted starting at ContentStartColumn in the reformatted @@ -144,27 +144,27 @@ public: unsigned ColumnLimit, unsigned ContentStartColumn, llvm::Regex &CommentPragmasRegex) const = 0; - /// \brief Emits the previously retrieved \p Split via \p Whitespaces. + /// Emits the previously retrieved \p Split via \p Whitespaces. virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, WhitespaceManager &Whitespaces) const = 0; - /// \brief Returns the number of columns needed to format + /// Returns the number of columns needed to format /// \p RemainingTokenColumns, assuming that Split is within the range measured /// by \p RemainingTokenColumns, and that the whitespace in Split is reduced /// to a single space. unsigned getLengthAfterCompression(unsigned RemainingTokenColumns, Split Split) const; - /// \brief Replaces the whitespace range described by \p Split with a single + /// Replaces the whitespace range described by \p Split with a single /// space. virtual void compressWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split, WhitespaceManager &Whitespaces) const = 0; - /// \brief Returns whether the token supports reflowing text. + /// Returns whether the token supports reflowing text. virtual bool supportsReflow() const { return false; } - /// \brief Returns a whitespace range (offset, length) of the content at \p + /// Returns a whitespace range (offset, length) of the content at \p /// LineIndex such that the content of that line is reflown to the end of the /// previous one. /// @@ -180,21 +180,21 @@ public: return Split(StringRef::npos, 0); } - /// \brief Reflows the current line into the end of the previous one. + /// Reflows the current line into the end of the previous one. virtual void reflow(unsigned LineIndex, WhitespaceManager &Whitespaces) const {} - /// \brief Returns whether there will be a line break at the start of the + /// Returns whether there will be a line break at the start of the /// token. virtual bool introducesBreakBeforeToken() const { return false; } - /// \brief Replaces the whitespace between \p LineIndex-1 and \p LineIndex. + /// Replaces the whitespace between \p LineIndex-1 and \p LineIndex. virtual void adaptStartOfLine(unsigned LineIndex, WhitespaceManager &Whitespaces) const {} - /// \brief Returns a whitespace range (offset, length) of the content at + /// Returns a whitespace range (offset, length) of the content at /// the last line that needs to be reformatted after the last line has been /// reformatted. /// @@ -204,7 +204,7 @@ public: return Split(StringRef::npos, 0); } - /// \brief Replaces the whitespace from \p SplitAfterLastLine on the last line + /// Replaces the whitespace from \p SplitAfterLastLine on the last line /// after the last line has been formatted by performing a reformatting. void replaceWhitespaceAfterLastLine(unsigned TailOffset, Split SplitAfterLastLine, @@ -213,7 +213,7 @@ public: Whitespaces); } - /// \brief Updates the next token of \p State to the next token after this + /// Updates the next token of \p State to the next token after this /// one. This can be used when this token manages a set of underlying tokens /// as a unit and is responsible for the formatting of the them. virtual void updateNextToken(LineState &State) const {} @@ -232,17 +232,17 @@ protected: class BreakableStringLiteral : public BreakableToken { public: - /// \brief Creates a breakable token for a single line string literal. + /// Creates a breakable token for a single line string literal. /// /// \p StartColumn specifies the column in which the token will start /// after formatting. BreakableStringLiteral(const FormatToken &Tok, unsigned StartColumn, StringRef Prefix, StringRef Postfix, - bool InPPDirective, encoding::Encoding Encoding, - const FormatStyle &Style); + unsigned UnbreakableTailLength, bool InPPDirective, + encoding::Encoding Encoding, const FormatStyle &Style); Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit, - unsigned ReflowColumn, + unsigned ContentStartColumn, llvm::Regex &CommentPragmasRegex) const override; void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, WhitespaceManager &Whitespaces) const override; @@ -272,7 +272,7 @@ protected: class BreakableComment : public BreakableToken { protected: - /// \brief Creates a breakable token for a comment. + /// Creates a breakable token for a comment. /// /// \p StartColumn specifies the column in which the comment will start after /// formatting. @@ -284,7 +284,7 @@ public: bool supportsReflow() const override { return true; } unsigned getLineCount() const override; Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit, - unsigned ReflowColumn, + unsigned ContentStartColumn, llvm::Regex &CommentPragmasRegex) const override; void compressWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split, WhitespaceManager &Whitespaces) const override; @@ -453,7 +453,7 @@ private: SmallVector<unsigned, 16> OriginalContentColumn; - /// \brief The token to which the last line of this breakable token belongs + /// The token to which the last line of this breakable token belongs /// to; nullptr if that token is the initial token. /// /// The distinction is because if the token of the last line of this breakable diff --git a/lib/Format/CMakeLists.txt b/lib/Format/CMakeLists.txt index 42e6d53d9fe6..0019d045cd06 100644 --- a/lib/Format/CMakeLists.txt +++ b/lib/Format/CMakeLists.txt @@ -20,4 +20,5 @@ add_clang_library(clangFormat clangBasic clangLex clangToolingCore + clangToolingInclusions ) diff --git a/lib/Format/ContinuationIndenter.cpp b/lib/Format/ContinuationIndenter.cpp index a3d38b244c5c..90d2a9997111 100644 --- a/lib/Format/ContinuationIndenter.cpp +++ b/lib/Format/ContinuationIndenter.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// /// /// \file -/// \brief This file implements the continuation indenter. +/// This file implements the continuation indenter. /// //===----------------------------------------------------------------------===// @@ -26,14 +26,81 @@ namespace clang { namespace format { +// Returns true if a TT_SelectorName should be indented when wrapped, +// false otherwise. +static bool shouldIndentWrappedSelectorName(const FormatStyle &Style, + LineType LineType) { + return Style.IndentWrappedFunctionNames || LineType == LT_ObjCMethodDecl; +} + // Returns the length of everything up to the first possible line break after // the ), ], } or > matching \c Tok. -static unsigned getLengthToMatchingParen(const FormatToken &Tok) { +static unsigned getLengthToMatchingParen(const FormatToken &Tok, + const std::vector<ParenState> &Stack) { + // Normally whether or not a break before T is possible is calculated and + // stored in T.CanBreakBefore. Braces, array initializers and text proto + // messages like `key: < ... >` are an exception: a break is possible + // before a closing brace R if a break was inserted after the corresponding + // opening brace. The information about whether or not a break is needed + // before a closing brace R is stored in the ParenState field + // S.BreakBeforeClosingBrace where S is the state that R closes. + // + // In order to decide whether there can be a break before encountered right + // braces, this implementation iterates over the sequence of tokens and over + // the paren stack in lockstep, keeping track of the stack level which visited + // right braces correspond to in MatchingStackIndex. + // + // For example, consider: + // L. <- line number + // 1. { + // 2. {1}, + // 3. {2}, + // 4. {{3}}} + // ^ where we call this method with this token. + // The paren stack at this point contains 3 brace levels: + // 0. { at line 1, BreakBeforeClosingBrace: true + // 1. first { at line 4, BreakBeforeClosingBrace: false + // 2. second { at line 4, BreakBeforeClosingBrace: false, + // where there might be fake parens levels in-between these levels. + // The algorithm will start at the first } on line 4, which is the matching + // brace of the initial left brace and at level 2 of the stack. Then, + // examining BreakBeforeClosingBrace: false at level 2, it will continue to + // the second } on line 4, and will traverse the stack downwards until it + // finds the matching { on level 1. Then, examining BreakBeforeClosingBrace: + // false at level 1, it will continue to the third } on line 4 and will + // traverse the stack downwards until it finds the matching { on level 0. + // Then, examining BreakBeforeClosingBrace: true at level 0, the algorithm + // will stop and will use the second } on line 4 to determine the length to + // return, as in this example the range will include the tokens: {3}} + // + // The algorithm will only traverse the stack if it encounters braces, array + // initializer squares or text proto angle brackets. if (!Tok.MatchingParen) return 0; FormatToken *End = Tok.MatchingParen; - while (End->Next && !End->Next->CanBreakBefore) { - End = End->Next; + // Maintains a stack level corresponding to the current End token. + int MatchingStackIndex = Stack.size() - 1; + // Traverses the stack downwards, looking for the level to which LBrace + // corresponds. Returns either a pointer to the matching level or nullptr if + // LParen is not found in the initial portion of the stack up to + // MatchingStackIndex. + auto FindParenState = [&](const FormatToken *LBrace) -> const ParenState * { + while (MatchingStackIndex >= 0 && Stack[MatchingStackIndex].Tok != LBrace) + --MatchingStackIndex; + return MatchingStackIndex >= 0 ? &Stack[MatchingStackIndex] : nullptr; + }; + for (; End->Next; End = End->Next) { + if (End->Next->CanBreakBefore) + break; + if (!End->Next->closesScope()) + continue; + if (End->Next->MatchingParen && + End->Next->MatchingParen->isOneOf( + tok::l_brace, TT_ArrayInitializerLSquare, tok::less)) { + const ParenState *State = FindParenState(End->Next->MatchingParen); + if (State && State->BreakBeforeClosingBrace) + break; + } } return End->TotalLength - Tok.TotalLength + 1; } @@ -64,7 +131,7 @@ static bool startsNextParameter(const FormatToken &Current, Style.BreakConstructorInitializers != FormatStyle::BCIS_BeforeComma) && (Previous.isNot(TT_InheritanceComma) || - !Style.BreakBeforeInheritanceComma)); + Style.BreakInheritanceList != FormatStyle::BILS_BeforeComma)); } static bool opensProtoMessageField(const FormatToken &LessTok, @@ -102,28 +169,59 @@ static llvm::Optional<StringRef> getRawStringDelimiter(StringRef TokenText) { return Delimiter; } +// Returns the canonical delimiter for \p Language, or the empty string if no +// canonical delimiter is specified. +static StringRef +getCanonicalRawStringDelimiter(const FormatStyle &Style, + FormatStyle::LanguageKind Language) { + for (const auto &Format : Style.RawStringFormats) { + if (Format.Language == Language) + return StringRef(Format.CanonicalDelimiter); + } + return ""; +} + RawStringFormatStyleManager::RawStringFormatStyleManager( const FormatStyle &CodeStyle) { for (const auto &RawStringFormat : CodeStyle.RawStringFormats) { - FormatStyle Style; - if (!getPredefinedStyle(RawStringFormat.BasedOnStyle, - RawStringFormat.Language, &Style)) { - Style = getLLVMStyle(); - Style.Language = RawStringFormat.Language; + llvm::Optional<FormatStyle> LanguageStyle = + CodeStyle.GetLanguageStyle(RawStringFormat.Language); + if (!LanguageStyle) { + FormatStyle PredefinedStyle; + if (!getPredefinedStyle(RawStringFormat.BasedOnStyle, + RawStringFormat.Language, &PredefinedStyle)) { + PredefinedStyle = getLLVMStyle(); + PredefinedStyle.Language = RawStringFormat.Language; + } + LanguageStyle = PredefinedStyle; + } + LanguageStyle->ColumnLimit = CodeStyle.ColumnLimit; + for (StringRef Delimiter : RawStringFormat.Delimiters) { + DelimiterStyle.insert({Delimiter, *LanguageStyle}); + } + for (StringRef EnclosingFunction : RawStringFormat.EnclosingFunctions) { + EnclosingFunctionStyle.insert({EnclosingFunction, *LanguageStyle}); } - Style.ColumnLimit = CodeStyle.ColumnLimit; - DelimiterStyle.insert({RawStringFormat.Delimiter, Style}); } } llvm::Optional<FormatStyle> -RawStringFormatStyleManager::get(StringRef Delimiter) const { +RawStringFormatStyleManager::getDelimiterStyle(StringRef Delimiter) const { auto It = DelimiterStyle.find(Delimiter); if (It == DelimiterStyle.end()) return None; return It->second; } +llvm::Optional<FormatStyle> +RawStringFormatStyleManager::getEnclosingFunctionStyle( + StringRef EnclosingFunction) const { + auto It = EnclosingFunctionStyle.find(EnclosingFunction); + if (It == EnclosingFunctionStyle.end()) + return None; + return It->second; +} + ContinuationIndenter::ContinuationIndenter(const FormatStyle &Style, const AdditionalKeywords &Keywords, const SourceManager &SourceMgr, @@ -154,7 +252,7 @@ LineState ContinuationIndenter::getInitialState(unsigned FirstIndent, State.Column = 0; State.Line = Line; State.NextToken = Line->First; - State.Stack.push_back(ParenState(FirstIndent, FirstIndent, + State.Stack.push_back(ParenState(/*Tok=*/nullptr, FirstIndent, FirstIndent, /*AvoidBinPacking=*/false, /*NoLineBreak=*/false)); State.LineContainsContinuedForLoopSection = false; @@ -169,6 +267,7 @@ LineState ContinuationIndenter::getInitialState(unsigned FirstIndent, // global scope. State.Stack.back().AvoidBinPacking = true; State.Stack.back().BreakBeforeParameter = true; + State.Stack.back().AlignColons = false; } // The first token has already been indented and thus consumed. @@ -222,6 +321,9 @@ bool ContinuationIndenter::canBreak(const LineState &State) { State.Stack.back().NoLineBreakInOperand) return false; + if (Previous.is(tok::l_square) && Previous.is(TT_ObjCMethodExpr)) + return false; + return !State.Stack.back().NoLineBreak; } @@ -235,6 +337,11 @@ bool ContinuationIndenter::mustBreak(const LineState &State) { return true; if (Previous.is(tok::semi) && State.LineContainsContinuedForLoopSection) return true; + if (Style.Language == FormatStyle::LK_ObjC && + Current.ObjCSelectorNameParts > 1 && + Current.startsSequence(TT_SelectorName, tok::colon, tok::caret)) { + return true; + } if ((startsNextParameter(Current, Style) || Previous.is(tok::semi) || (Previous.is(TT_TemplateCloser) && Current.is(TT_StartOfName) && Style.isCpp() && @@ -255,7 +362,7 @@ bool ContinuationIndenter::mustBreak(const LineState &State) { Previous.ParameterCount > 1) || opensProtoMessageField(Previous, Style)) && Style.ColumnLimit > 0 && - getLengthToMatchingParen(Previous) + State.Column - 1 > + getLengthToMatchingParen(Previous, State.Stack) + State.Column - 1 > getColumnLimit(State)) return true; @@ -275,7 +382,8 @@ bool ContinuationIndenter::mustBreak(const LineState &State) { if (Current.is(TT_ObjCMethodExpr) && !Previous.is(TT_SelectorName) && State.Line->startsWith(TT_ObjCMethodSpecifier)) return true; - if (Current.is(TT_SelectorName) && State.Stack.back().ObjCSelectorNameFound && + if (Current.is(TT_SelectorName) && !Previous.is(tok::at) && + State.Stack.back().ObjCSelectorNameFound && State.Stack.back().BreakBeforeParameter) return true; @@ -298,6 +406,12 @@ bool ContinuationIndenter::mustBreak(const LineState &State) { Style.Language == FormatStyle::LK_JavaScript)) return true; + // If the template declaration spans multiple lines, force wrap before the + // function/class declaration + if (Previous.ClosesTemplateDeclaration && + State.Stack.back().BreakBeforeParameter && Current.CanBreakBefore) + return true; + if (State.Column <= NewLineColumn) return false; @@ -349,7 +463,7 @@ bool ContinuationIndenter::mustBreak(const LineState &State) { // for cases where the entire line does not fit on a single line as a // different LineFormatter would be used otherwise. if (Previous.ClosesTemplateDeclaration) - return true; + return Style.AlwaysBreakTemplateDeclarations != FormatStyle::BTDS_No; if (Previous.is(TT_FunctionAnnotationRParen)) return true; if (Previous.is(TT_LeadingJavaAnnotation) && Current.isNot(tok::l_paren) && @@ -466,7 +580,11 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun, // If "BreakBeforeInheritanceComma" mode, don't break within the inheritance // declaration unless there is multiple inheritance. - if (Style.BreakBeforeInheritanceComma && Current.is(TT_InheritanceColon)) + if (Style.BreakInheritanceList == FormatStyle::BILS_BeforeComma && + Current.is(TT_InheritanceColon)) + State.Stack.back().NoLineBreak = true; + if (Style.BreakInheritanceList == FormatStyle::BILS_AfterColon && + Previous.is(TT_InheritanceColon)) State.Stack.back().NoLineBreak = true; if (Current.is(TT_SelectorName) && @@ -661,11 +779,12 @@ unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State, State.Stack.back().AlignColons = false; } else { State.Stack.back().ColonPos = - (Style.IndentWrappedFunctionNames + (shouldIndentWrappedSelectorName(Style, State.Line->Type) ? std::max(State.Stack.back().Indent, State.FirstIndent + Style.ContinuationIndentWidth) : State.Stack.back().Indent) + - NextNonComment->LongestObjCSelectorName; + std::max(NextNonComment->LongestObjCSelectorName, + NextNonComment->ColumnWidth); } } else if (State.Stack.back().AlignColons && State.Stack.back().ColonPos <= NextNonComment->ColumnWidth) { @@ -693,7 +812,8 @@ unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State, !State.Stack.back().AvoidBinPacking) || Previous.is(TT_BinaryOperator)) State.Stack.back().BreakBeforeParameter = false; - if (Previous.isOneOf(TT_TemplateCloser, TT_JavaAnnotation) && + if (PreviousNonComment && + PreviousNonComment->isOneOf(TT_TemplateCloser, TT_JavaAnnotation) && Current.NestingLevel == 0) State.Stack.back().BreakBeforeParameter = false; if (NextNonComment->is(tok::question) || @@ -826,7 +946,7 @@ unsigned ContinuationIndenter::getNewLineColumn(const LineState &State) { (Current.Next->is(TT_DictLiteral) || ((Style.Language == FormatStyle::LK_Proto || Style.Language == FormatStyle::LK_TextProto) && - Current.Next->isOneOf(TT_TemplateOpener, tok::l_brace)))) + Current.Next->isOneOf(tok::less, tok::l_brace)))) return State.Stack.back().Indent; if (NextNonComment->is(TT_ObjCStringLiteral) && State.StartOfStringLiteral != 0) @@ -851,20 +971,29 @@ unsigned ContinuationIndenter::getNewLineColumn(const LineState &State) { if ((PreviousNonComment && (PreviousNonComment->ClosesTemplateDeclaration || PreviousNonComment->isOneOf( - TT_AttributeParen, TT_FunctionAnnotationRParen, TT_JavaAnnotation, - TT_LeadingJavaAnnotation))) || + TT_AttributeParen, TT_AttributeSquare, TT_FunctionAnnotationRParen, + TT_JavaAnnotation, TT_LeadingJavaAnnotation))) || (!Style.IndentWrappedFunctionNames && NextNonComment->isOneOf(tok::kw_operator, TT_FunctionDeclarationName))) return std::max(State.Stack.back().LastSpace, State.Stack.back().Indent); if (NextNonComment->is(TT_SelectorName)) { if (!State.Stack.back().ObjCSelectorNameFound) { - if (NextNonComment->LongestObjCSelectorName == 0) - return State.Stack.back().Indent; - return (Style.IndentWrappedFunctionNames - ? std::max(State.Stack.back().Indent, - State.FirstIndent + Style.ContinuationIndentWidth) - : State.Stack.back().Indent) + - NextNonComment->LongestObjCSelectorName - + unsigned MinIndent = State.Stack.back().Indent; + if (shouldIndentWrappedSelectorName(Style, State.Line->Type)) + MinIndent = std::max(MinIndent, + State.FirstIndent + Style.ContinuationIndentWidth); + // If LongestObjCSelectorName is 0, we are indenting the first + // part of an ObjC selector (or a selector component which is + // not colon-aligned due to block formatting). + // + // Otherwise, we are indenting a subsequent part of an ObjC + // selector which should be colon-aligned to the longest + // component of the ObjC selector. + // + // In either case, we want to respect Style.IndentWrappedFunctionNames. + return MinIndent + + std::max(NextNonComment->LongestObjCSelectorName, + NextNonComment->ColumnWidth) - NextNonComment->ColumnWidth; } if (!State.Stack.back().AlignColons) @@ -898,12 +1027,17 @@ unsigned ContinuationIndenter::getNewLineColumn(const LineState &State) { if (PreviousNonComment && PreviousNonComment->is(TT_CtorInitializerColon) && Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon) return State.Stack.back().Indent; + if (PreviousNonComment && PreviousNonComment->is(TT_InheritanceColon) && + Style.BreakInheritanceList == FormatStyle::BILS_AfterColon) + return State.Stack.back().Indent; if (NextNonComment->isOneOf(TT_CtorInitializerColon, TT_InheritanceColon, TT_InheritanceComma)) return State.FirstIndent + Style.ConstructorInitializerIndentWidth; if (Previous.is(tok::r_paren) && !Current.isBinaryOperator() && !Current.isOneOf(tok::colon, tok::comment)) return ContinuationIndent; + if (Current.is(TT_ProtoExtensionLSquare)) + return State.Stack.back().Indent; if (State.Stack.back().Indent == State.FirstIndent && PreviousNonComment && PreviousNonComment->isNot(tok::r_brace)) // Ensure that we fall back to the continuation indent width instead of @@ -951,13 +1085,8 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State, if (Current.isMemberAccess()) State.Stack.back().StartOfFunctionCall = !Current.NextOperator ? 0 : State.Column; - if (Current.is(TT_SelectorName)) { + if (Current.is(TT_SelectorName)) State.Stack.back().ObjCSelectorNameFound = true; - if (Style.IndentWrappedFunctionNames) { - State.Stack.back().Indent = - State.FirstIndent + Style.ContinuationIndentWidth; - } - } if (Current.is(TT_CtorInitializerColon) && Style.BreakConstructorInitializers != FormatStyle::BCIS_AfterColon) { // Indent 2 from the column, so: @@ -985,7 +1114,7 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State, } if (Current.is(TT_InheritanceColon)) State.Stack.back().Indent = - State.FirstIndent + Style.ContinuationIndentWidth; + State.FirstIndent + Style.ConstructorInitializerIndentWidth; if (Current.isOneOf(TT_BinaryOperator, TT_ConditionalExpr) && Newline) State.Stack.back().NestedBlockIndent = State.Column + Current.ColumnWidth + 1; @@ -1071,6 +1200,7 @@ void ContinuationIndenter::moveStatePastFakeLParens(LineState &State, E = Current.FakeLParens.rend(); I != E; ++I) { ParenState NewParenState = State.Stack.back(); + NewParenState.Tok = nullptr; NewParenState.ContainsLineBreak = false; NewParenState.LastOperatorWrapped = true; NewParenState.NoLineBreak = @@ -1180,7 +1310,6 @@ void ContinuationIndenter::moveStatePastScopeOpener(LineState &State, // void SomeFunction(vector< // break // int> v); // FIXME: We likely want to do this for more combinations of brackets. - // Verify that it is wanted for ObjC, too. if (Current.is(tok::less) && Current.ParentBracket == tok::l_paren) { NewIndent = std::max(NewIndent, State.Stack.back().Indent); LastSpace = std::max(LastSpace, State.Stack.back().Indent); @@ -1191,9 +1320,20 @@ void ContinuationIndenter::moveStatePastScopeOpener(LineState &State, Current.MatchingParen->getPreviousNonComment() && Current.MatchingParen->getPreviousNonComment()->is(tok::comma); + // If ObjCBinPackProtocolList is unspecified, fall back to BinPackParameters + // for backwards compatibility. + bool ObjCBinPackProtocolList = + (Style.ObjCBinPackProtocolList == FormatStyle::BPS_Auto && + Style.BinPackParameters) || + Style.ObjCBinPackProtocolList == FormatStyle::BPS_Always; + + bool BinPackDeclaration = + (State.Line->Type != LT_ObjCDecl && Style.BinPackParameters) || + (State.Line->Type == LT_ObjCDecl && ObjCBinPackProtocolList); + AvoidBinPacking = (Style.Language == FormatStyle::LK_JavaScript && EndsInComma) || - (State.Line->MustBeDeclaration && !Style.BinPackParameters) || + (State.Line->MustBeDeclaration && !BinPackDeclaration) || (!State.Line->MustBeDeclaration && !Style.BinPackArguments) || (Style.ExperimentalAutoDetectBinPacking && (Current.PackingKind == PPK_OnePerLine || @@ -1204,7 +1344,7 @@ void ContinuationIndenter::moveStatePastScopeOpener(LineState &State, if (Style.ColumnLimit) { // If this '[' opens an ObjC call, determine whether all parameters fit // into one line and put one per line if they don't. - if (getLengthToMatchingParen(Current) + State.Column > + if (getLengthToMatchingParen(Current, State.Stack) + State.Column > getColumnLimit(State)) BreakBeforeParameter = true; } else { @@ -1235,10 +1375,13 @@ void ContinuationIndenter::moveStatePastScopeOpener(LineState &State, (Current.is(TT_TemplateOpener) && State.Stack.back().ContainsUnwrappedBuilder)); State.Stack.push_back( - ParenState(NewIndent, LastSpace, AvoidBinPacking, NoLineBreak)); + ParenState(&Current, NewIndent, LastSpace, AvoidBinPacking, NoLineBreak)); State.Stack.back().NestedBlockIndent = NestedBlockIndent; State.Stack.back().BreakBeforeParameter = BreakBeforeParameter; State.Stack.back().HasMultipleNestedBlocks = Current.BlockParameterCount > 1; + State.Stack.back().IsInsideObjCArrayLiteral = + Current.is(TT_ArrayInitializerLSquare) && Current.Previous && + Current.Previous->is(tok::at); } void ContinuationIndenter::moveStatePastScopeCloser(LineState &State) { @@ -1251,9 +1394,34 @@ void ContinuationIndenter::moveStatePastScopeCloser(LineState &State) { if (State.Stack.size() > 1 && (Current.isOneOf(tok::r_paren, tok::r_square, TT_TemplateString) || (Current.is(tok::r_brace) && State.NextToken != State.Line->First) || - State.NextToken->is(TT_TemplateCloser))) + State.NextToken->is(TT_TemplateCloser) || + (Current.is(tok::greater) && Current.is(TT_DictLiteral)))) State.Stack.pop_back(); + // Reevaluate whether ObjC message arguments fit into one line. + // If a receiver spans multiple lines, e.g.: + // [[object block:^{ + // return 42; + // }] a:42 b:42]; + // BreakBeforeParameter is calculated based on an incorrect assumption + // (it is checked whether the whole expression fits into one line without + // considering a line break inside a message receiver). + // We check whether arguements fit after receiver scope closer (into the same + // line). + if (State.Stack.back().BreakBeforeParameter && Current.MatchingParen && + Current.MatchingParen->Previous) { + const FormatToken &CurrentScopeOpener = *Current.MatchingParen->Previous; + if (CurrentScopeOpener.is(TT_ObjCMethodExpr) && + CurrentScopeOpener.MatchingParen) { + int NecessarySpaceInLine = + getLengthToMatchingParen(CurrentScopeOpener, State.Stack) + + CurrentScopeOpener.TotalLength - Current.TotalLength - 1; + if (State.Column + Current.ColumnWidth + NecessarySpaceInLine <= + Style.ColumnLimit) + State.Stack.back().BreakBeforeParameter = false; + } + } + if (Current.is(tok::r_square)) { // If this ends the array subscript expr, reset the corresponding value. const FormatToken *NextNonComment = Current.getNextNonComment(); @@ -1269,7 +1437,8 @@ void ContinuationIndenter::moveStateToNewBlock(LineState &State) { NestedBlockIndent + (State.NextToken->is(TT_ObjCBlockLBrace) ? Style.ObjCBlockIndentWidth : Style.IndentWidth); - State.Stack.push_back(ParenState(NewIndent, State.Stack.back().LastSpace, + State.Stack.push_back(ParenState(State.NextToken, NewIndent, + State.Stack.back().LastSpace, /*AvoidBinPacking=*/true, /*NoLineBreak=*/false)); State.Stack.back().NestedBlockIndent = NestedBlockIndent; @@ -1293,14 +1462,32 @@ unsigned ContinuationIndenter::reformatRawStringLiteral( const FormatToken &Current, LineState &State, const FormatStyle &RawStringStyle, bool DryRun) { unsigned StartColumn = State.Column - Current.ColumnWidth; - auto Delimiter = *getRawStringDelimiter(Current.TokenText); + StringRef OldDelimiter = *getRawStringDelimiter(Current.TokenText); + StringRef NewDelimiter = + getCanonicalRawStringDelimiter(Style, RawStringStyle.Language); + if (NewDelimiter.empty() || OldDelimiter.empty()) + NewDelimiter = OldDelimiter; // The text of a raw string is between the leading 'R"delimiter(' and the // trailing 'delimiter)"'. - unsigned PrefixSize = 3 + Delimiter.size(); - unsigned SuffixSize = 2 + Delimiter.size(); + unsigned OldPrefixSize = 3 + OldDelimiter.size(); + unsigned OldSuffixSize = 2 + OldDelimiter.size(); + // We create a virtual text environment which expects a null-terminated + // string, so we cannot use StringRef. + std::string RawText = + Current.TokenText.substr(OldPrefixSize).drop_back(OldSuffixSize); + if (NewDelimiter != OldDelimiter) { + // Don't update to the canonical delimiter 'deli' if ')deli"' occurs in the + // raw string. + std::string CanonicalDelimiterSuffix = (")" + NewDelimiter + "\"").str(); + if (StringRef(RawText).contains(CanonicalDelimiterSuffix)) + NewDelimiter = OldDelimiter; + } + + unsigned NewPrefixSize = 3 + NewDelimiter.size(); + unsigned NewSuffixSize = 2 + NewDelimiter.size(); - // The first start column is the column the raw text starts. - unsigned FirstStartColumn = StartColumn + PrefixSize; + // The first start column is the column the raw text starts after formatting. + unsigned FirstStartColumn = StartColumn + NewPrefixSize; // The next start column is the intended indentation a line break inside // the raw string at level 0. It is determined by the following rules: @@ -1311,10 +1498,11 @@ unsigned ContinuationIndenter::reformatRawStringLiteral( // These rules have the advantage that the formatted content both does not // violate the rectangle rule and visually flows within the surrounding // source. - bool ContentStartsOnNewline = Current.TokenText[PrefixSize] == '\n'; - unsigned NextStartColumn = ContentStartsOnNewline - ? State.Stack.back().Indent + Style.IndentWidth - : FirstStartColumn; + bool ContentStartsOnNewline = Current.TokenText[OldPrefixSize] == '\n'; + unsigned NextStartColumn = + ContentStartsOnNewline + ? State.Stack.back().NestedBlockIndent + Style.IndentWidth + : FirstStartColumn; // The last start column is the column the raw string suffix starts if it is // put on a newline. @@ -1325,11 +1513,8 @@ unsigned ContinuationIndenter::reformatRawStringLiteral( // - if the raw string prefix does not start on a newline, it is the current // indent. unsigned LastStartColumn = Current.NewlinesBefore - ? FirstStartColumn - PrefixSize - : State.Stack.back().Indent; - - std::string RawText = - Current.TokenText.substr(PrefixSize).drop_back(SuffixSize); + ? FirstStartColumn - NewPrefixSize + : State.Stack.back().NestedBlockIndent; std::pair<tooling::Replacements, unsigned> Fixes = internal::reformat( RawStringStyle, RawText, {tooling::Range(0, RawText.size())}, @@ -1343,8 +1528,33 @@ unsigned ContinuationIndenter::reformatRawStringLiteral( return 0; } if (!DryRun) { + if (NewDelimiter != OldDelimiter) { + // In 'R"delimiter(...', the delimiter starts 2 characters after the start + // of the token. + SourceLocation PrefixDelimiterStart = + Current.Tok.getLocation().getLocWithOffset(2); + auto PrefixErr = Whitespaces.addReplacement(tooling::Replacement( + SourceMgr, PrefixDelimiterStart, OldDelimiter.size(), NewDelimiter)); + if (PrefixErr) { + llvm::errs() + << "Failed to update the prefix delimiter of a raw string: " + << llvm::toString(std::move(PrefixErr)) << "\n"; + } + // In 'R"delimiter(...)delimiter"', the suffix delimiter starts at + // position length - 1 - |delimiter|. + SourceLocation SuffixDelimiterStart = + Current.Tok.getLocation().getLocWithOffset(Current.TokenText.size() - + 1 - OldDelimiter.size()); + auto SuffixErr = Whitespaces.addReplacement(tooling::Replacement( + SourceMgr, SuffixDelimiterStart, OldDelimiter.size(), NewDelimiter)); + if (SuffixErr) { + llvm::errs() + << "Failed to update the suffix delimiter of a raw string: " + << llvm::toString(std::move(SuffixErr)) << "\n"; + } + } SourceLocation OriginLoc = - Current.Tok.getLocation().getLocWithOffset(PrefixSize); + Current.Tok.getLocation().getLocWithOffset(OldPrefixSize); for (const tooling::Replacement &Fix : Fixes.first) { auto Err = Whitespaces.addReplacement(tooling::Replacement( SourceMgr, OriginLoc.getLocWithOffset(Fix.getOffset()), @@ -1357,8 +1567,14 @@ unsigned ContinuationIndenter::reformatRawStringLiteral( } unsigned RawLastLineEndColumn = getLastLineEndColumn( *NewCode, FirstStartColumn, Style.TabWidth, Encoding); - State.Column = RawLastLineEndColumn + SuffixSize; - return Fixes.second; + State.Column = RawLastLineEndColumn + NewSuffixSize; + // Since we're updating the column to after the raw string literal here, we + // have to manually add the penalty for the prefix R"delim( over the column + // limit. + unsigned PrefixExcessCharacters = + StartColumn + NewPrefixSize > Style.ColumnLimit ? + StartColumn + NewPrefixSize - Style.ColumnLimit : 0; + return Fixes.second + PrefixExcessCharacters * Style.PenaltyExcessCharacter; } unsigned ContinuationIndenter::addMultilineToken(const FormatToken &Current, @@ -1384,7 +1600,7 @@ unsigned ContinuationIndenter::handleEndOfLine(const FormatToken &Current, // Compute the raw string style to use in case this is a raw string literal // that can be reformatted. auto RawStringStyle = getRawStringStyle(Current, State); - if (RawStringStyle) { + if (RawStringStyle && !Current.Finalized) { Penalty = reformatRawStringLiteral(Current, State, *RawStringStyle, DryRun); } else if (Current.IsMultiline && Current.isNot(TT_BlockComment)) { // Don't break multi-line tokens other than block comments and raw string @@ -1430,6 +1646,26 @@ unsigned ContinuationIndenter::handleEndOfLine(const FormatToken &Current, return Penalty; } +// Returns the enclosing function name of a token, or the empty string if not +// found. +static StringRef getEnclosingFunctionName(const FormatToken &Current) { + // Look for: 'function(' or 'function<templates>(' before Current. + auto Tok = Current.getPreviousNonComment(); + if (!Tok || !Tok->is(tok::l_paren)) + return ""; + Tok = Tok->getPreviousNonComment(); + if (!Tok) + return ""; + if (Tok->is(TT_TemplateCloser)) { + Tok = Tok->MatchingParen; + if (Tok) + Tok = Tok->getPreviousNonComment(); + } + if (!Tok || !Tok->is(tok::identifier)) + return ""; + return Tok->TokenText; +} + llvm::Optional<FormatStyle> ContinuationIndenter::getRawStringStyle(const FormatToken &Current, const LineState &State) { @@ -1438,7 +1674,10 @@ ContinuationIndenter::getRawStringStyle(const FormatToken &Current, auto Delimiter = getRawStringDelimiter(Current.TokenText); if (!Delimiter) return None; - auto RawStringStyle = RawStringFormats.get(*Delimiter); + auto RawStringStyle = RawStringFormats.getDelimiterStyle(*Delimiter); + if (!RawStringStyle && Delimiter->empty()) + RawStringStyle = RawStringFormats.getEnclosingFunctionStyle( + getEnclosingFunctionName(Current)); if (!RawStringStyle) return None; RawStringStyle->ColumnLimit = getColumnLimit(State); @@ -1468,6 +1707,11 @@ std::unique_ptr<BreakableToken> ContinuationIndenter::createBreakableToken( // likely want to terminate the string before any line breaking is done. if (Current.IsUnterminatedLiteral) return nullptr; + // Don't break string literals inside Objective-C array literals (doing so + // raises the warning -Wobjc-string-concatenation). + if (State.Stack.back().IsInsideObjCArrayLiteral) { + return nullptr; + } StringRef Text = Current.TokenText; StringRef Prefix; @@ -1482,9 +1726,16 @@ std::unique_ptr<BreakableToken> ContinuationIndenter::createBreakableToken( Text.startswith(Prefix = "u8\"") || Text.startswith(Prefix = "L\""))) || (Text.startswith(Prefix = "_T(\"") && Text.endswith(Postfix = "\")"))) { + // We need this to address the case where there is an unbreakable tail + // only if certain other formatting decisions have been taken. The + // UnbreakableTailLength of Current is an overapproximation is that case + // and we need to be correct here. + unsigned UnbreakableTailLength = (State.NextToken && canBreak(State)) + ? 0 + : Current.UnbreakableTailLength; return llvm::make_unique<BreakableStringLiteral>( - Current, StartColumn, Prefix, Postfix, State.Line->InPPDirective, - Encoding, Style); + Current, StartColumn, Prefix, Postfix, UnbreakableTailLength, + State.Line->InPPDirective, Encoding, Style); } } else if (Current.is(TT_BlockComment)) { if (!Style.ReflowComments || @@ -1559,12 +1810,12 @@ ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, Token->adaptStartOfLine(0, Whitespaces); unsigned Penalty = 0; - DEBUG(llvm::dbgs() << "Breaking protruding token at column " << StartColumn - << ".\n"); + LLVM_DEBUG(llvm::dbgs() << "Breaking protruding token at column " + << StartColumn << ".\n"); for (unsigned LineIndex = 0, EndIndex = Token->getLineCount(); LineIndex != EndIndex; ++LineIndex) { - DEBUG(llvm::dbgs() << " Line: " << LineIndex << " (Reflow: " << Reflow - << ")\n"); + LLVM_DEBUG(llvm::dbgs() + << " Line: " << LineIndex << " (Reflow: " << Reflow << ")\n"); NewBreakBefore = false; // If we did reflow the previous line, we'll try reflowing again. Otherwise // we'll start reflowing if the current line is broken or whitespace is @@ -1572,11 +1823,11 @@ ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, bool TryReflow = Reflow; // Break the current token until we can fit the rest of the line. while (ContentStartColumn + RemainingTokenColumns > ColumnLimit) { - DEBUG(llvm::dbgs() << " Over limit, need: " - << (ContentStartColumn + RemainingTokenColumns) - << ", space: " << ColumnLimit - << ", reflown prefix: " << ContentStartColumn - << ", offset in line: " << TailOffset << "\n"); + LLVM_DEBUG(llvm::dbgs() << " Over limit, need: " + << (ContentStartColumn + RemainingTokenColumns) + << ", space: " << ColumnLimit + << ", reflown prefix: " << ContentStartColumn + << ", offset in line: " << TailOffset << "\n"); // If the current token doesn't fit, find the latest possible split in the // current line so that breaking at it will be under the column limit. // FIXME: Use the earliest possible split while reflowing to correctly @@ -1591,7 +1842,7 @@ ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, // The last line's penalty is handled in addNextStateToQueue(). Penalty += Style.PenaltyExcessCharacter * (ContentStartColumn + RemainingTokenColumns - ColumnLimit); - DEBUG(llvm::dbgs() << " No break opportunity.\n"); + LLVM_DEBUG(llvm::dbgs() << " No break opportunity.\n"); break; } assert(Split.first != 0); @@ -1618,7 +1869,7 @@ ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, // ^--------------- to next split columns unsigned ToSplitColumns = Token->getRangeLength( LineIndex, TailOffset, Split.first, ContentStartColumn); - DEBUG(llvm::dbgs() << " ToSplit: " << ToSplitColumns << "\n"); + LLVM_DEBUG(llvm::dbgs() << " ToSplit: " << ToSplitColumns << "\n"); BreakableToken::Split NextSplit = Token->getSplit( LineIndex, TailOffset + Split.first + Split.second, ColumnLimit, @@ -1638,9 +1889,10 @@ ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, // unbreakable sequence. ToNextSplitColumns = Token->getLengthAfterCompression(ToNextSplitColumns, Split); - DEBUG(llvm::dbgs() << " ContentStartColumn: " << ContentStartColumn - << "\n"); - DEBUG(llvm::dbgs() << " ToNextSplit: " << ToNextSplitColumns << "\n"); + LLVM_DEBUG(llvm::dbgs() + << " ContentStartColumn: " << ContentStartColumn << "\n"); + LLVM_DEBUG(llvm::dbgs() + << " ToNextSplit: " << ToNextSplitColumns << "\n"); // If the whitespace compression makes us fit, continue on the current // line. bool ContinueOnLine = @@ -1652,16 +1904,16 @@ ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, ExcessCharactersPenalty = (ContentStartColumn + ToNextSplitColumns - ColumnLimit) * Style.PenaltyExcessCharacter; - DEBUG(llvm::dbgs() - << " Penalty excess: " << ExcessCharactersPenalty - << "\n break : " << NewBreakPenalty << "\n"); + LLVM_DEBUG(llvm::dbgs() + << " Penalty excess: " << ExcessCharactersPenalty + << "\n break : " << NewBreakPenalty << "\n"); if (ExcessCharactersPenalty < NewBreakPenalty) { Exceeded = true; ContinueOnLine = true; } } if (ContinueOnLine) { - DEBUG(llvm::dbgs() << " Continuing on line...\n"); + LLVM_DEBUG(llvm::dbgs() << " Continuing on line...\n"); // The current line fits after compressing the whitespace - reflow // the next line into it if possible. TryReflow = true; @@ -1677,7 +1929,7 @@ ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, continue; } } - DEBUG(llvm::dbgs() << " Breaking...\n"); + LLVM_DEBUG(llvm::dbgs() << " Breaking...\n"); ContentStartColumn = Token->getContentStartColumn(LineIndex, /*Break=*/true); unsigned NewRemainingTokenColumns = Token->getRemainingLength( @@ -1693,8 +1945,8 @@ ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, } assert(NewRemainingTokenColumns < RemainingTokenColumns); - DEBUG(llvm::dbgs() << " Breaking at: " << TailOffset + Split.first - << ", " << Split.second << "\n"); + LLVM_DEBUG(llvm::dbgs() << " Breaking at: " << TailOffset + Split.first + << ", " << Split.second << "\n"); if (!DryRun) Token->insertBreak(LineIndex, TailOffset, Split, Whitespaces); @@ -1732,11 +1984,12 @@ ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, // the next logical line. BreakableToken::Split SplitBeforeNext = Token->getReflowSplit(NextLineIndex, CommentPragmasRegex); - DEBUG(llvm::dbgs() << " Size of reflown text: " << ContentStartColumn - << "\n Potential reflow split: "); + LLVM_DEBUG(llvm::dbgs() + << " Size of reflown text: " << ContentStartColumn + << "\n Potential reflow split: "); if (SplitBeforeNext.first != StringRef::npos) { - DEBUG(llvm::dbgs() << SplitBeforeNext.first << ", " - << SplitBeforeNext.second << "\n"); + LLVM_DEBUG(llvm::dbgs() << SplitBeforeNext.first << ", " + << SplitBeforeNext.second << "\n"); TailOffset = SplitBeforeNext.first + SplitBeforeNext.second; // If the rest of the next line fits into the current line below the // column limit, we can safely reflow. @@ -1744,11 +1997,12 @@ ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, NextLineIndex, TailOffset, ContentStartColumn); Reflow = true; if (ContentStartColumn + RemainingTokenColumns > ColumnLimit) { - DEBUG(llvm::dbgs() << " Over limit after reflow, need: " - << (ContentStartColumn + RemainingTokenColumns) - << ", space: " << ColumnLimit - << ", reflown prefix: " << ContentStartColumn - << ", offset in line: " << TailOffset << "\n"); + LLVM_DEBUG(llvm::dbgs() + << " Over limit after reflow, need: " + << (ContentStartColumn + RemainingTokenColumns) + << ", space: " << ColumnLimit + << ", reflown prefix: " << ContentStartColumn + << ", offset in line: " << TailOffset << "\n"); // If the whole next line does not fit, try to find a point in // the next line at which we can break so that attaching the part // of the next line to that break point onto the current line is @@ -1757,7 +2011,7 @@ ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, Token->getSplit(NextLineIndex, TailOffset, ColumnLimit, ContentStartColumn, CommentPragmasRegex); if (Split.first == StringRef::npos) { - DEBUG(llvm::dbgs() << " Did not find later break\n"); + LLVM_DEBUG(llvm::dbgs() << " Did not find later break\n"); Reflow = false; } else { // Check whether the first split point gets us below the column @@ -1766,9 +2020,9 @@ ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, unsigned ToSplitColumns = Token->getRangeLength( NextLineIndex, TailOffset, Split.first, ContentStartColumn); if (ContentStartColumn + ToSplitColumns > ColumnLimit) { - DEBUG(llvm::dbgs() << " Next split protrudes, need: " - << (ContentStartColumn + ToSplitColumns) - << ", space: " << ColumnLimit); + LLVM_DEBUG(llvm::dbgs() << " Next split protrudes, need: " + << (ContentStartColumn + ToSplitColumns) + << ", space: " << ColumnLimit); unsigned ExcessCharactersPenalty = (ContentStartColumn + ToSplitColumns - ColumnLimit) * Style.PenaltyExcessCharacter; @@ -1779,7 +2033,7 @@ ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, } } } else { - DEBUG(llvm::dbgs() << "not found.\n"); + LLVM_DEBUG(llvm::dbgs() << "not found.\n"); } } if (!Reflow) { @@ -1821,7 +2075,7 @@ ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, BreakableToken::Split SplitAfterLastLine = Token->getSplitAfterLastLine(TailOffset); if (SplitAfterLastLine.first != StringRef::npos) { - DEBUG(llvm::dbgs() << "Replacing whitespace after last line.\n"); + LLVM_DEBUG(llvm::dbgs() << "Replacing whitespace after last line.\n"); if (!DryRun) Token->replaceWhitespaceAfterLastLine(TailOffset, SplitAfterLastLine, Whitespaces); @@ -1875,7 +2129,7 @@ bool ContinuationIndenter::nextIsMultilineString(const LineState &State) { if (Current.getNextNonComment() && Current.getNextNonComment()->isStringLiteral()) return true; // Implicit concatenation. - if (Style.ColumnLimit != 0 && + if (Style.ColumnLimit != 0 && Style.BreakStringLiterals && State.Column + Current.ColumnWidth + Current.UnbreakableTailLength > Style.ColumnLimit) return true; // String will be split. diff --git a/lib/Format/ContinuationIndenter.h b/lib/Format/ContinuationIndenter.h index ded7bfab4267..4ff05ba99f1a 100644 --- a/lib/Format/ContinuationIndenter.h +++ b/lib/Format/ContinuationIndenter.h @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// /// /// \file -/// \brief This file implements an indenter that manages the indentation of +/// This file implements an indenter that manages the indentation of /// continuations. /// //===----------------------------------------------------------------------===// @@ -38,15 +38,19 @@ class WhitespaceManager; struct RawStringFormatStyleManager { llvm::StringMap<FormatStyle> DelimiterStyle; + llvm::StringMap<FormatStyle> EnclosingFunctionStyle; RawStringFormatStyleManager(const FormatStyle &CodeStyle); - llvm::Optional<FormatStyle> get(StringRef Delimiter) const; + llvm::Optional<FormatStyle> getDelimiterStyle(StringRef Delimiter) const; + + llvm::Optional<FormatStyle> + getEnclosingFunctionStyle(StringRef EnclosingFunction) const; }; class ContinuationIndenter { public: - /// \brief Constructs a \c ContinuationIndenter to format \p Line starting in + /// Constructs a \c ContinuationIndenter to format \p Line starting in /// column \p FirstIndent. ContinuationIndenter(const FormatStyle &Style, const AdditionalKeywords &Keywords, @@ -55,7 +59,7 @@ public: encoding::Encoding Encoding, bool BinPackInconclusiveFunctions); - /// \brief Get the initial state, i.e. the state after placing \p Line's + /// Get the initial state, i.e. the state after placing \p Line's /// first token at \p FirstIndent. When reformatting a fragment of code, as in /// the case of formatting inside raw string literals, \p FirstStartColumn is /// the column at which the state of the parent formatter is. @@ -64,13 +68,13 @@ public: // FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a // better home. - /// \brief Returns \c true, if a line break after \p State is allowed. + /// Returns \c true, if a line break after \p State is allowed. bool canBreak(const LineState &State); - /// \brief Returns \c true, if a line break after \p State is mandatory. + /// Returns \c true, if a line break after \p State is mandatory. bool mustBreak(const LineState &State); - /// \brief Appends the next token to \p State and updates information + /// Appends the next token to \p State and updates information /// necessary for indentation. /// /// Puts the token on the current line if \p Newline is \c false and adds a @@ -81,28 +85,28 @@ public: unsigned addTokenToState(LineState &State, bool Newline, bool DryRun, unsigned ExtraSpaces = 0); - /// \brief Get the column limit for this line. This is the style's column + /// Get the column limit for this line. This is the style's column /// limit, potentially reduced for preprocessor definitions. unsigned getColumnLimit(const LineState &State) const; private: - /// \brief Mark the next token as consumed in \p State and modify its stacks + /// Mark the next token as consumed in \p State and modify its stacks /// accordingly. unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline); - /// \brief Update 'State' according to the next token's fake left parentheses. + /// Update 'State' according to the next token's fake left parentheses. void moveStatePastFakeLParens(LineState &State, bool Newline); - /// \brief Update 'State' according to the next token's fake r_parens. + /// Update 'State' according to the next token's fake r_parens. void moveStatePastFakeRParens(LineState &State); - /// \brief Update 'State' according to the next token being one of "(<{[". + /// Update 'State' according to the next token being one of "(<{[". void moveStatePastScopeOpener(LineState &State, bool Newline); - /// \brief Update 'State' according to the next token being one of ")>}]". + /// Update 'State' according to the next token being one of ")>}]". void moveStatePastScopeCloser(LineState &State); - /// \brief Update 'State' with the next token opening a nested block. + /// Update 'State' with the next token opening a nested block. void moveStateToNewBlock(LineState &State); - /// \brief Reformats a raw string literal. + /// Reformats a raw string literal. /// /// \returns An extra penalty induced by reformatting the token. unsigned reformatRawStringLiteral(const FormatToken &Current, @@ -110,17 +114,17 @@ private: const FormatStyle &RawStringStyle, bool DryRun); - /// \brief If the current token is at the end of the current line, handle + /// If the current token is at the end of the current line, handle /// the transition to the next line. unsigned handleEndOfLine(const FormatToken &Current, LineState &State, bool DryRun, bool AllowBreak); - /// \brief If \p Current is a raw string that is configured to be reformatted, + /// If \p Current is a raw string that is configured to be reformatted, /// return the style to be used. llvm::Optional<FormatStyle> getRawStringStyle(const FormatToken &Current, const LineState &State); - /// \brief If the current token sticks out over the end of the line, break + /// If the current token sticks out over the end of the line, break /// it if possible. /// /// \returns A pair (penalty, exceeded), where penalty is the extra penalty @@ -143,13 +147,13 @@ private: bool AllowBreak, bool DryRun, bool Strict); - /// \brief Returns the \c BreakableToken starting at \p Current, or nullptr + /// Returns the \c BreakableToken starting at \p Current, or nullptr /// if the current token cannot be broken. std::unique_ptr<BreakableToken> createBreakableToken(const FormatToken &Current, LineState &State, bool AllowBreak); - /// \brief Appends the next token to \p State and updates information + /// Appends the next token to \p State and updates information /// necessary for indentation. /// /// Puts the token on the current line. @@ -159,7 +163,7 @@ private: void addTokenOnCurrentLine(LineState &State, bool DryRun, unsigned ExtraSpaces); - /// \brief Appends the next token to \p State and updates information + /// Appends the next token to \p State and updates information /// necessary for indentation. /// /// Adds a line break and necessary indentation. @@ -168,17 +172,17 @@ private: /// \c Replacement. unsigned addTokenOnNewLine(LineState &State, bool DryRun); - /// \brief Calculate the new column for a line wrap before the next token. + /// Calculate the new column for a line wrap before the next token. unsigned getNewLineColumn(const LineState &State); - /// \brief Adds a multiline token to the \p State. + /// Adds a multiline token to the \p State. /// /// \returns Extra penalty for the first line of the literal: last line is /// handled in \c addNextStateToQueue, and the penalty for other lines doesn't /// matter, as we don't change them. unsigned addMultilineToken(const FormatToken &Current, LineState &State); - /// \brief Returns \c true if the next token starts a multiline string + /// Returns \c true if the next token starts a multiline string /// literal. /// /// This includes implicitly concatenated strings, strings that will be broken @@ -196,124 +200,136 @@ private: }; struct ParenState { - ParenState(unsigned Indent, unsigned LastSpace, bool AvoidBinPacking, - bool NoLineBreak) - : Indent(Indent), LastSpace(LastSpace), NestedBlockIndent(Indent), - BreakBeforeClosingBrace(false), AvoidBinPacking(AvoidBinPacking), - BreakBeforeParameter(false), NoLineBreak(NoLineBreak), - NoLineBreakInOperand(false), LastOperatorWrapped(true), - ContainsLineBreak(false), ContainsUnwrappedBuilder(false), - AlignColons(true), ObjCSelectorNameFound(false), - HasMultipleNestedBlocks(false), NestedBlockInlined(false) {} - - /// \brief The position to which a specific parenthesis level needs to be + ParenState(const FormatToken *Tok, unsigned Indent, unsigned LastSpace, + bool AvoidBinPacking, bool NoLineBreak) + : Tok(Tok), Indent(Indent), LastSpace(LastSpace), + NestedBlockIndent(Indent), BreakBeforeClosingBrace(false), + AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false), + NoLineBreak(NoLineBreak), NoLineBreakInOperand(false), + LastOperatorWrapped(true), ContainsLineBreak(false), + ContainsUnwrappedBuilder(false), AlignColons(true), + ObjCSelectorNameFound(false), HasMultipleNestedBlocks(false), + NestedBlockInlined(false), IsInsideObjCArrayLiteral(false) {} + + /// \brief The token opening this parenthesis level, or nullptr if this level + /// is opened by fake parenthesis. + /// + /// Not considered for memoization as it will always have the same value at + /// the same token. + const FormatToken *Tok; + + /// The position to which a specific parenthesis level needs to be /// indented. unsigned Indent; - /// \brief The position of the last space on each level. + /// The position of the last space on each level. /// /// Used e.g. to break like: /// functionCall(Parameter, otherCall( /// OtherParameter)); unsigned LastSpace; - /// \brief If a block relative to this parenthesis level gets wrapped, indent + /// If a block relative to this parenthesis level gets wrapped, indent /// it this much. unsigned NestedBlockIndent; - /// \brief The position the first "<<" operator encountered on each level. + /// The position the first "<<" operator encountered on each level. /// /// Used to align "<<" operators. 0 if no such operator has been encountered /// on a level. unsigned FirstLessLess = 0; - /// \brief The column of a \c ? in a conditional expression; + /// The column of a \c ? in a conditional expression; unsigned QuestionColumn = 0; - /// \brief The position of the colon in an ObjC method declaration/call. + /// The position of the colon in an ObjC method declaration/call. unsigned ColonPos = 0; - /// \brief The start of the most recent function in a builder-type call. + /// The start of the most recent function in a builder-type call. unsigned StartOfFunctionCall = 0; - /// \brief Contains the start of array subscript expressions, so that they + /// Contains the start of array subscript expressions, so that they /// can be aligned. unsigned StartOfArraySubscripts = 0; - /// \brief If a nested name specifier was broken over multiple lines, this + /// If a nested name specifier was broken over multiple lines, this /// contains the start column of the second line. Otherwise 0. unsigned NestedNameSpecifierContinuation = 0; - /// \brief If a call expression was broken over multiple lines, this + /// If a call expression was broken over multiple lines, this /// contains the start column of the second line. Otherwise 0. unsigned CallContinuation = 0; - /// \brief The column of the first variable name in a variable declaration. + /// The column of the first variable name in a variable declaration. /// /// Used to align further variables if necessary. unsigned VariablePos = 0; - /// \brief Whether a newline needs to be inserted before the block's closing + /// Whether a newline needs to be inserted before the block's closing /// brace. /// /// We only want to insert a newline before the closing brace if there also /// was a newline after the beginning left brace. bool BreakBeforeClosingBrace : 1; - /// \brief Avoid bin packing, i.e. multiple parameters/elements on multiple + /// Avoid bin packing, i.e. multiple parameters/elements on multiple /// lines, in this context. bool AvoidBinPacking : 1; - /// \brief Break after the next comma (or all the commas in this context if + /// Break after the next comma (or all the commas in this context if /// \c AvoidBinPacking is \c true). bool BreakBeforeParameter : 1; - /// \brief Line breaking in this context would break a formatting rule. + /// Line breaking in this context would break a formatting rule. bool NoLineBreak : 1; - /// \brief Same as \c NoLineBreak, but is restricted until the end of the + /// Same as \c NoLineBreak, but is restricted until the end of the /// operand (including the next ","). bool NoLineBreakInOperand : 1; - /// \brief True if the last binary operator on this level was wrapped to the + /// True if the last binary operator on this level was wrapped to the /// next line. bool LastOperatorWrapped : 1; - /// \brief \c true if this \c ParenState already contains a line-break. + /// \c true if this \c ParenState already contains a line-break. /// /// The first line break in a certain \c ParenState causes extra penalty so /// that clang-format prefers similar breaks, i.e. breaks in the same /// parenthesis. bool ContainsLineBreak : 1; - /// \brief \c true if this \c ParenState contains multiple segments of a + /// \c true if this \c ParenState contains multiple segments of a /// builder-type call on one line. bool ContainsUnwrappedBuilder : 1; - /// \brief \c true if the colons of the curren ObjC method expression should + /// \c true if the colons of the curren ObjC method expression should /// be aligned. /// /// Not considered for memoization as it will always have the same value at /// the same token. bool AlignColons : 1; - /// \brief \c true if at least one selector name was found in the current + /// \c true if at least one selector name was found in the current /// ObjC method expression. /// /// Not considered for memoization as it will always have the same value at /// the same token. bool ObjCSelectorNameFound : 1; - /// \brief \c true if there are multiple nested blocks inside these parens. + /// \c true if there are multiple nested blocks inside these parens. /// /// Not considered for memoization as it will always have the same value at /// the same token. bool HasMultipleNestedBlocks : 1; - // \brief The start of a nested block (e.g. lambda introducer in C++ or - // "function" in JavaScript) is not wrapped to a new line. + /// The start of a nested block (e.g. lambda introducer in C++ or + /// "function" in JavaScript) is not wrapped to a new line. bool NestedBlockInlined : 1; + /// \c true if the current \c ParenState represents an Objective-C + /// array literal. + bool IsInsideObjCArrayLiteral : 1; + bool operator<(const ParenState &Other) const { if (Indent != Other.Indent) return Indent < Other.Indent; @@ -355,37 +371,37 @@ struct ParenState { } }; -/// \brief The current state when indenting a unwrapped line. +/// The current state when indenting a unwrapped line. /// /// As the indenting tries different combinations this is copied by value. struct LineState { - /// \brief The number of used columns in the current line. + /// The number of used columns in the current line. unsigned Column; - /// \brief The token that needs to be next formatted. + /// The token that needs to be next formatted. FormatToken *NextToken; - /// \brief \c true if this line contains a continued for-loop section. + /// \c true if this line contains a continued for-loop section. bool LineContainsContinuedForLoopSection; - /// \brief \c true if \p NextToken should not continue this line. + /// \c true if \p NextToken should not continue this line. bool NoContinuation; - /// \brief The \c NestingLevel at the start of this line. + /// The \c NestingLevel at the start of this line. unsigned StartOfLineLevel; - /// \brief The lowest \c NestingLevel on the current line. + /// The lowest \c NestingLevel on the current line. unsigned LowestLevelOnLine; - /// \brief The start column of the string literal, if we're in a string + /// The start column of the string literal, if we're in a string /// literal sequence, 0 otherwise. unsigned StartOfStringLiteral; - /// \brief A stack keeping track of properties applying to parenthesis + /// A stack keeping track of properties applying to parenthesis /// levels. std::vector<ParenState> Stack; - /// \brief Ignore the stack of \c ParenStates for state comparison. + /// Ignore the stack of \c ParenStates for state comparison. /// /// In long and deeply nested unwrapped lines, the current algorithm can /// be insufficient for finding the best formatting with a reasonable amount @@ -400,15 +416,15 @@ struct LineState { /// FIXME: Come up with a better algorithm instead. bool IgnoreStackForComparison; - /// \brief The indent of the first token. + /// The indent of the first token. unsigned FirstIndent; - /// \brief The line that is being formatted. + /// The line that is being formatted. /// /// Does not need to be considered for memoization because it doesn't change. const AnnotatedLine *Line; - /// \brief Comparison operator to be able to used \c LineState in \c map. + /// Comparison operator to be able to used \c LineState in \c map. bool operator<(const LineState &Other) const { if (NextToken != Other.NextToken) return NextToken < Other.NextToken; diff --git a/lib/Format/Encoding.h b/lib/Format/Encoding.h index 3339597b4edd..4c877e7e49d5 100644 --- a/lib/Format/Encoding.h +++ b/lib/Format/Encoding.h @@ -1,4 +1,4 @@ -//===--- Encoding.h - Format C++ code -------------------------------------===// +//===--- Encoding.h - Format C++ code ---------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// /// /// \file -/// \brief Contains functions for text encoding manipulation. Supports UTF-8, +/// Contains functions for text encoding manipulation. Supports UTF-8, /// 8-bit encodings and escape sequences in C++ string literals. /// //===----------------------------------------------------------------------===// @@ -30,7 +30,7 @@ enum Encoding { Encoding_Unknown // We treat all other encodings as 8-bit encodings. }; -/// \brief Detects encoding of the Text. If the Text can be decoded using UTF-8, +/// Detects encoding of the Text. If the Text can be decoded using UTF-8, /// it is considered UTF8, otherwise we treat it as some 8-bit encoding. inline Encoding detectEncoding(StringRef Text) { const llvm::UTF8 *Ptr = reinterpret_cast<const llvm::UTF8 *>(Text.begin()); @@ -40,7 +40,7 @@ inline Encoding detectEncoding(StringRef Text) { return Encoding_Unknown; } -/// \brief Returns the number of columns required to display the \p Text on a +/// Returns the number of columns required to display the \p Text on a /// generic Unicode-capable terminal. Text is assumed to use the specified /// \p Encoding. inline unsigned columnWidth(StringRef Text, Encoding Encoding) { @@ -56,7 +56,7 @@ inline unsigned columnWidth(StringRef Text, Encoding Encoding) { return Text.size(); } -/// \brief Returns the number of columns required to display the \p Text, +/// Returns the number of columns required to display the \p Text, /// starting from the \p StartColumn on a terminal with the \p TabWidth. The /// text is assumed to use the specified \p Encoding. inline unsigned columnWidthWithTabs(StringRef Text, unsigned StartColumn, @@ -73,7 +73,7 @@ inline unsigned columnWidthWithTabs(StringRef Text, unsigned StartColumn, } } -/// \brief Gets the number of bytes in a sequence representing a single +/// Gets the number of bytes in a sequence representing a single /// codepoint and starting with FirstChar in the specified Encoding. inline unsigned getCodePointNumBytes(char FirstChar, Encoding Encoding) { switch (Encoding) { @@ -91,7 +91,7 @@ inline bool isHexDigit(char c) { ('A' <= c && c <= 'F'); } -/// \brief Gets the length of an escape sequence inside a C++ string literal. +/// Gets the length of an escape sequence inside a C++ string literal. /// Text should span from the beginning of the escape sequence (starting with a /// backslash) to the end of the string literal. inline unsigned getEscapeSequenceLength(StringRef Text) { diff --git a/lib/Format/Format.cpp b/lib/Format/Format.cpp index 217c6729ee39..59d34308c0a9 100644 --- a/lib/Format/Format.cpp +++ b/lib/Format/Format.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// /// /// \file -/// \brief This file implements functions declared in Format.h. This will be +/// This file implements functions declared in Format.h. This will be /// split into separate files as we go. /// //===----------------------------------------------------------------------===// @@ -31,7 +31,9 @@ #include "clang/Basic/SourceManager.h" #include "clang/Basic/VirtualFileSystem.h" #include "clang/Lex/Lexer.h" +#include "clang/Tooling/Inclusions/HeaderIncludes.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Path.h" @@ -39,13 +41,14 @@ #include "llvm/Support/YAMLTraits.h" #include <algorithm> #include <memory> +#include <mutex> #include <string> +#include <unordered_map> #define DEBUG_TYPE "format-formatter" using clang::format::FormatStyle; -LLVM_YAML_IS_SEQUENCE_VECTOR(clang::format::FormatStyle::IncludeCategory) LLVM_YAML_IS_SEQUENCE_VECTOR(clang::format::FormatStyle::RawStringFormat) namespace llvm { @@ -104,6 +107,14 @@ template <> struct ScalarEnumerationTraits<FormatStyle::ShortFunctionStyle> { } }; +template <> struct ScalarEnumerationTraits<FormatStyle::BinPackStyle> { + static void enumeration(IO &IO, FormatStyle::BinPackStyle &Value) { + IO.enumCase(Value, "Auto", FormatStyle::BPS_Auto); + IO.enumCase(Value, "Always", FormatStyle::BPS_Always); + IO.enumCase(Value, "Never", FormatStyle::BPS_Never); + } +}; + template <> struct ScalarEnumerationTraits<FormatStyle::BinaryOperatorStyle> { static void enumeration(IO &IO, FormatStyle::BinaryOperatorStyle &Value) { IO.enumCase(Value, "All", FormatStyle::BOS_All); @@ -138,6 +149,16 @@ struct ScalarEnumerationTraits<FormatStyle::BreakConstructorInitializersStyle> { }; template <> +struct ScalarEnumerationTraits<FormatStyle::BreakInheritanceListStyle> { + static void + enumeration(IO &IO, FormatStyle::BreakInheritanceListStyle &Value) { + IO.enumCase(Value, "BeforeColon", FormatStyle::BILS_BeforeColon); + IO.enumCase(Value, "BeforeComma", FormatStyle::BILS_BeforeComma); + IO.enumCase(Value, "AfterColon", FormatStyle::BILS_AfterColon); + } +}; + +template <> struct ScalarEnumerationTraits<FormatStyle::PPDirectiveIndentStyle> { static void enumeration(IO &IO, FormatStyle::PPDirectiveIndentStyle &Value) { IO.enumCase(Value, "None", FormatStyle::PPDIS_None); @@ -158,6 +179,19 @@ struct ScalarEnumerationTraits<FormatStyle::ReturnTypeBreakingStyle> { }; template <> +struct ScalarEnumerationTraits<FormatStyle::BreakTemplateDeclarationsStyle> { + static void enumeration(IO &IO, FormatStyle::BreakTemplateDeclarationsStyle &Value) { + IO.enumCase(Value, "No", FormatStyle::BTDS_No); + IO.enumCase(Value, "MultiLine", FormatStyle::BTDS_MultiLine); + IO.enumCase(Value, "Yes", FormatStyle::BTDS_Yes); + + // For backward compatibility. + IO.enumCase(Value, "false", FormatStyle::BTDS_MultiLine); + IO.enumCase(Value, "true", FormatStyle::BTDS_Yes); + } +}; + +template <> struct ScalarEnumerationTraits<FormatStyle::DefinitionReturnTypeBreakingStyle> { static void enumeration(IO &IO, FormatStyle::DefinitionReturnTypeBreakingStyle &Value) { @@ -326,8 +360,19 @@ template <> struct MappingTraits<FormatStyle> { IO.mapOptional("BreakBeforeBinaryOperators", Style.BreakBeforeBinaryOperators); IO.mapOptional("BreakBeforeBraces", Style.BreakBeforeBraces); + + bool BreakBeforeInheritanceComma = false; IO.mapOptional("BreakBeforeInheritanceComma", - Style.BreakBeforeInheritanceComma); + BreakBeforeInheritanceComma); + IO.mapOptional("BreakInheritanceList", + Style.BreakInheritanceList); + // If BreakBeforeInheritanceComma was specified but + // BreakInheritance was not, initialize the latter from the + // former for backwards compatibility. + if (BreakBeforeInheritanceComma && + Style.BreakInheritanceList == FormatStyle::BILS_BeforeColon) + Style.BreakInheritanceList = FormatStyle::BILS_BeforeComma; + IO.mapOptional("BreakBeforeTernaryOperators", Style.BreakBeforeTernaryOperators); @@ -361,9 +406,9 @@ template <> struct MappingTraits<FormatStyle> { Style.ExperimentalAutoDetectBinPacking); IO.mapOptional("FixNamespaceComments", Style.FixNamespaceComments); IO.mapOptional("ForEachMacros", Style.ForEachMacros); - IO.mapOptional("IncludeBlocks", Style.IncludeBlocks); - IO.mapOptional("IncludeCategories", Style.IncludeCategories); - IO.mapOptional("IncludeIsMainRegex", Style.IncludeIsMainRegex); + IO.mapOptional("IncludeBlocks", Style.IncludeStyle.IncludeBlocks); + IO.mapOptional("IncludeCategories", Style.IncludeStyle.IncludeCategories); + IO.mapOptional("IncludeIsMainRegex", Style.IncludeStyle.IncludeIsMainRegex); IO.mapOptional("IndentCaseLabels", Style.IndentCaseLabels); IO.mapOptional("IndentPPDirectives", Style.IndentPPDirectives); IO.mapOptional("IndentWidth", Style.IndentWidth); @@ -377,6 +422,7 @@ template <> struct MappingTraits<FormatStyle> { IO.mapOptional("MacroBlockEnd", Style.MacroBlockEnd); IO.mapOptional("MaxEmptyLinesToKeep", Style.MaxEmptyLinesToKeep); IO.mapOptional("NamespaceIndentation", Style.NamespaceIndentation); + IO.mapOptional("ObjCBinPackProtocolList", Style.ObjCBinPackProtocolList); IO.mapOptional("ObjCBlockIndentWidth", Style.ObjCBlockIndentWidth); IO.mapOptional("ObjCSpaceAfterProperty", Style.ObjCSpaceAfterProperty); IO.mapOptional("ObjCSpaceBeforeProtocolList", @@ -388,6 +434,8 @@ template <> struct MappingTraits<FormatStyle> { IO.mapOptional("PenaltyBreakFirstLessLess", Style.PenaltyBreakFirstLessLess); IO.mapOptional("PenaltyBreakString", Style.PenaltyBreakString); + IO.mapOptional("PenaltyBreakTemplateDeclaration", + Style.PenaltyBreakTemplateDeclaration); IO.mapOptional("PenaltyExcessCharacter", Style.PenaltyExcessCharacter); IO.mapOptional("PenaltyReturnTypeOnItsOwnLine", Style.PenaltyReturnTypeOnItsOwnLine); @@ -401,7 +449,15 @@ template <> struct MappingTraits<FormatStyle> { Style.SpaceAfterTemplateKeyword); IO.mapOptional("SpaceBeforeAssignmentOperators", Style.SpaceBeforeAssignmentOperators); + IO.mapOptional("SpaceBeforeCpp11BracedList", + Style.SpaceBeforeCpp11BracedList); + IO.mapOptional("SpaceBeforeCtorInitializerColon", + Style.SpaceBeforeCtorInitializerColon); + IO.mapOptional("SpaceBeforeInheritanceColon", + Style.SpaceBeforeInheritanceColon); IO.mapOptional("SpaceBeforeParens", Style.SpaceBeforeParens); + IO.mapOptional("SpaceBeforeRangeBasedForLoopColon", + Style.SpaceBeforeRangeBasedForLoopColon); IO.mapOptional("SpaceInEmptyParentheses", Style.SpaceInEmptyParentheses); IO.mapOptional("SpacesBeforeTrailingComments", Style.SpacesBeforeTrailingComments); @@ -438,25 +494,12 @@ template <> struct MappingTraits<FormatStyle::BraceWrappingFlags> { } }; -template <> struct MappingTraits<FormatStyle::IncludeCategory> { - static void mapping(IO &IO, FormatStyle::IncludeCategory &Category) { - IO.mapOptional("Regex", Category.Regex); - IO.mapOptional("Priority", Category.Priority); - } -}; - -template <> struct ScalarEnumerationTraits<FormatStyle::IncludeBlocksStyle> { - static void enumeration(IO &IO, FormatStyle::IncludeBlocksStyle &Value) { - IO.enumCase(Value, "Preserve", FormatStyle::IBS_Preserve); - IO.enumCase(Value, "Merge", FormatStyle::IBS_Merge); - IO.enumCase(Value, "Regroup", FormatStyle::IBS_Regroup); - } -}; - template <> struct MappingTraits<FormatStyle::RawStringFormat> { static void mapping(IO &IO, FormatStyle::RawStringFormat &Format) { - IO.mapOptional("Delimiter", Format.Delimiter); IO.mapOptional("Language", Format.Language); + IO.mapOptional("Delimiters", Format.Delimiters); + IO.mapOptional("EnclosingFunctions", Format.EnclosingFunctions); + IO.mapOptional("CanonicalDelimiter", Format.CanonicalDelimiter); IO.mapOptional("BasedOnStyle", Format.BasedOnStyle); } }; @@ -493,7 +536,7 @@ namespace clang { namespace format { const std::error_category &getParseCategory() { - static ParseErrorCategory C; + static const ParseErrorCategory C{}; return C; } std::error_code make_error_code(ParseError e) { @@ -593,7 +636,7 @@ FormatStyle getLLVMStyle() { LLVMStyle.AlwaysBreakAfterReturnType = FormatStyle::RTBS_None; LLVMStyle.AlwaysBreakAfterDefinitionReturnType = FormatStyle::DRTBS_None; LLVMStyle.AlwaysBreakBeforeMultilineStrings = false; - LLVMStyle.AlwaysBreakTemplateDeclarations = false; + LLVMStyle.AlwaysBreakTemplateDeclarations = FormatStyle::BTDS_MultiLine; LLVMStyle.BinPackArguments = true; LLVMStyle.BinPackParameters = true; LLVMStyle.BreakBeforeBinaryOperators = FormatStyle::BOS_None; @@ -604,7 +647,7 @@ FormatStyle getLLVMStyle() { false, false, true, true, true}; LLVMStyle.BreakAfterJavaFieldAnnotations = false; LLVMStyle.BreakConstructorInitializers = FormatStyle::BCIS_BeforeColon; - LLVMStyle.BreakBeforeInheritanceComma = false; + LLVMStyle.BreakInheritanceList = FormatStyle::BILS_BeforeColon; LLVMStyle.BreakStringLiterals = true; LLVMStyle.ColumnLimit = 80; LLVMStyle.CommentPragmas = "^ IWYU pragma:"; @@ -619,11 +662,12 @@ FormatStyle getLLVMStyle() { LLVMStyle.ForEachMacros.push_back("foreach"); LLVMStyle.ForEachMacros.push_back("Q_FOREACH"); LLVMStyle.ForEachMacros.push_back("BOOST_FOREACH"); - LLVMStyle.IncludeCategories = {{"^\"(llvm|llvm-c|clang|clang-c)/", 2}, - {"^(<|\"(gtest|gmock|isl|json)/)", 3}, - {".*", 1}}; - LLVMStyle.IncludeIsMainRegex = "(Test)?$"; - LLVMStyle.IncludeBlocks = FormatStyle::IBS_Preserve; + LLVMStyle.IncludeStyle.IncludeCategories = { + {"^\"(llvm|llvm-c|clang|clang-c)/", 2}, + {"^(<|\"(gtest|gmock|isl|json)/)", 3}, + {".*", 1}}; + LLVMStyle.IncludeStyle.IncludeIsMainRegex = "(Test)?$"; + LLVMStyle.IncludeStyle.IncludeBlocks = tooling::IncludeStyle::IBS_Preserve; LLVMStyle.IndentCaseLabels = false; LLVMStyle.IndentPPDirectives = FormatStyle::PPDIS_None; LLVMStyle.IndentWrappedFunctionNames = false; @@ -634,6 +678,7 @@ FormatStyle getLLVMStyle() { LLVMStyle.MaxEmptyLinesToKeep = 1; LLVMStyle.KeepEmptyLinesAtTheStartOfBlocks = true; LLVMStyle.NamespaceIndentation = FormatStyle::NI_None; + LLVMStyle.ObjCBinPackProtocolList = FormatStyle::BPS_Auto; LLVMStyle.ObjCBlockIndentWidth = 2; LLVMStyle.ObjCSpaceAfterProperty = false; LLVMStyle.ObjCSpaceBeforeProtocolList = true; @@ -641,7 +686,6 @@ FormatStyle getLLVMStyle() { LLVMStyle.SpacesBeforeTrailingComments = 1; LLVMStyle.Standard = FormatStyle::LS_Cpp11; LLVMStyle.UseTab = FormatStyle::UT_Never; - LLVMStyle.RawStringFormats = {{"pb", FormatStyle::LK_TextProto, "google"}}; LLVMStyle.ReflowComments = true; LLVMStyle.SpacesInParentheses = false; LLVMStyle.SpacesInSquareBrackets = false; @@ -650,8 +694,12 @@ FormatStyle getLLVMStyle() { LLVMStyle.SpacesInCStyleCastParentheses = false; LLVMStyle.SpaceAfterCStyleCast = false; LLVMStyle.SpaceAfterTemplateKeyword = true; + LLVMStyle.SpaceBeforeCtorInitializerColon = true; + LLVMStyle.SpaceBeforeInheritanceColon = true; LLVMStyle.SpaceBeforeParens = FormatStyle::SBPO_ControlStatements; + LLVMStyle.SpaceBeforeRangeBasedForLoopColon = true; LLVMStyle.SpaceBeforeAssignmentOperators = true; + LLVMStyle.SpaceBeforeCpp11BracedList = false; LLVMStyle.SpacesInAngles = false; LLVMStyle.PenaltyBreakAssignment = prec::Assignment; @@ -661,6 +709,7 @@ FormatStyle getLLVMStyle() { LLVMStyle.PenaltyExcessCharacter = 1000000; LLVMStyle.PenaltyReturnTypeOnItsOwnLine = 60; LLVMStyle.PenaltyBreakBeforeFirstCallParameter = 19; + LLVMStyle.PenaltyBreakTemplateDeclaration = prec::Relational; LLVMStyle.DisableFormat = false; LLVMStyle.SortIncludes = true; @@ -673,6 +722,7 @@ FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) { if (Language == FormatStyle::LK_TextProto) { FormatStyle GoogleStyle = getGoogleStyle(FormatStyle::LK_Proto); GoogleStyle.Language = FormatStyle::LK_TextProto; + return GoogleStyle; } @@ -684,17 +734,57 @@ FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) { GoogleStyle.AllowShortIfStatementsOnASingleLine = true; GoogleStyle.AllowShortLoopsOnASingleLine = true; GoogleStyle.AlwaysBreakBeforeMultilineStrings = true; - GoogleStyle.AlwaysBreakTemplateDeclarations = true; + GoogleStyle.AlwaysBreakTemplateDeclarations = FormatStyle::BTDS_Yes; GoogleStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true; GoogleStyle.DerivePointerAlignment = true; - GoogleStyle.IncludeCategories = { + GoogleStyle.IncludeStyle.IncludeCategories = { {"^<ext/.*\\.h>", 2}, {"^<.*\\.h>", 1}, {"^<.*", 2}, {".*", 3}}; - GoogleStyle.IncludeIsMainRegex = "([-_](test|unittest))?$"; + GoogleStyle.IncludeStyle.IncludeIsMainRegex = "([-_](test|unittest))?$"; GoogleStyle.IndentCaseLabels = true; GoogleStyle.KeepEmptyLinesAtTheStartOfBlocks = false; + GoogleStyle.ObjCBinPackProtocolList = FormatStyle::BPS_Never; GoogleStyle.ObjCSpaceAfterProperty = false; - GoogleStyle.ObjCSpaceBeforeProtocolList = false; + GoogleStyle.ObjCSpaceBeforeProtocolList = true; GoogleStyle.PointerAlignment = FormatStyle::PAS_Left; + GoogleStyle.RawStringFormats = { + { + FormatStyle::LK_Cpp, + /*Delimiters=*/ + { + "cc", + "CC", + "cpp", + "Cpp", + "CPP", + "c++", + "C++", + }, + /*EnclosingFunctionNames=*/ + {}, + /*CanonicalDelimiter=*/"", + /*BasedOnStyle=*/"google", + }, + { + FormatStyle::LK_TextProto, + /*Delimiters=*/ + { + "pb", + "PB", + "proto", + "PROTO", + }, + /*EnclosingFunctionNames=*/ + { + "EqualsProto", + "EquivToProto", + "PARSE_TEST_PROTO", + "PARSE_TEXT_PROTO", + "ParseTextOrDie", + }, + /*CanonicalDelimiter=*/"", + /*BasedOnStyle=*/"google", + }, + }; GoogleStyle.SpacesBeforeTrailingComments = 2; GoogleStyle.Standard = FormatStyle::LS_Auto; @@ -729,8 +819,17 @@ FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) { GoogleStyle.JavaScriptWrapImports = false; } else if (Language == FormatStyle::LK_Proto) { GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_None; + GoogleStyle.AlwaysBreakBeforeMultilineStrings = false; GoogleStyle.SpacesInContainerLiterals = false; + GoogleStyle.Cpp11BracedListStyle = false; + // This affects protocol buffer options specifications and text protos. + // Text protos are currently mostly formatted inside C++ raw string literals + // and often the current breaking behavior of string literals is not + // beneficial there. Investigate turning this on once proper string reflow + // has been implemented. + GoogleStyle.BreakStringLiterals = false; } else if (Language == FormatStyle::LK_ObjC) { + GoogleStyle.AlwaysBreakBeforeMultilineStrings = false; GoogleStyle.ColumnLimit = 100; } @@ -767,12 +866,12 @@ FormatStyle getMozillaStyle() { MozillaStyle.AlwaysBreakAfterReturnType = FormatStyle::RTBS_TopLevel; MozillaStyle.AlwaysBreakAfterDefinitionReturnType = FormatStyle::DRTBS_TopLevel; - MozillaStyle.AlwaysBreakTemplateDeclarations = true; + MozillaStyle.AlwaysBreakTemplateDeclarations = FormatStyle::BTDS_Yes; MozillaStyle.BinPackParameters = false; MozillaStyle.BinPackArguments = false; MozillaStyle.BreakBeforeBraces = FormatStyle::BS_Mozilla; MozillaStyle.BreakConstructorInitializers = FormatStyle::BCIS_BeforeComma; - MozillaStyle.BreakBeforeInheritanceComma = true; + MozillaStyle.BreakInheritanceList = FormatStyle::BILS_BeforeComma; MozillaStyle.ConstructorInitializerIndentWidth = 2; MozillaStyle.ContinuationIndentWidth = 2; MozillaStyle.Cpp11BracedListStyle = false; @@ -803,6 +902,7 @@ FormatStyle getWebKitStyle() { Style.ObjCBlockIndentWidth = 4; Style.ObjCSpaceAfterProperty = true; Style.PointerAlignment = FormatStyle::PAS_Left; + Style.SpaceBeforeCpp11BracedList = true; return Style; } @@ -859,7 +959,7 @@ std::error_code parseConfiguration(StringRef Text, FormatStyle *Style) { assert(Language != FormatStyle::LK_None); if (Text.trim().empty()) return make_error_code(ParseError::Error); - + Style->StyleSet.Clear(); std::vector<FormatStyle> Styles; llvm::yaml::Input Input(Text); // DocumentListTraits<vector<FormatStyle>> uses the context to get default @@ -878,9 +978,9 @@ std::error_code parseConfiguration(StringRef Text, FormatStyle *Style) { // Ensure that each language is configured at most once. for (unsigned j = 0; j < i; ++j) { if (Styles[i].Language == Styles[j].Language) { - DEBUG(llvm::dbgs() - << "Duplicate languages in the config file on positions " << j - << " and " << i << "\n"); + LLVM_DEBUG(llvm::dbgs() + << "Duplicate languages in the config file on positions " + << j << " and " << i << "\n"); return make_error_code(ParseError::Error); } } @@ -888,15 +988,23 @@ std::error_code parseConfiguration(StringRef Text, FormatStyle *Style) { // Look for a suitable configuration starting from the end, so we can // find the configuration for the specific language first, and the default // configuration (which can only be at slot 0) after it. + FormatStyle::FormatStyleSet StyleSet; + bool LanguageFound = false; for (int i = Styles.size() - 1; i >= 0; --i) { - if (Styles[i].Language == Language || - Styles[i].Language == FormatStyle::LK_None) { - *Style = Styles[i]; - Style->Language = Language; - return make_error_code(ParseError::Success); - } - } - return make_error_code(ParseError::Unsuitable); + if (Styles[i].Language != FormatStyle::LK_None) + StyleSet.Add(Styles[i]); + if (Styles[i].Language == Language) + LanguageFound = true; + } + if (!LanguageFound) { + if (Styles.empty() || Styles[0].Language != FormatStyle::LK_None) + return make_error_code(ParseError::Unsuitable); + FormatStyle DefaultStyle = Styles[0]; + DefaultStyle.Language = Language; + StyleSet.Add(std::move(DefaultStyle)); + } + *Style = *StyleSet.Get(Language); + return make_error_code(ParseError::Success); } std::string configurationAsText(const FormatStyle &Style) { @@ -910,6 +1018,38 @@ std::string configurationAsText(const FormatStyle &Style) { return Stream.str(); } +llvm::Optional<FormatStyle> +FormatStyle::FormatStyleSet::Get(FormatStyle::LanguageKind Language) const { + if (!Styles) + return None; + auto It = Styles->find(Language); + if (It == Styles->end()) + return None; + FormatStyle Style = It->second; + Style.StyleSet = *this; + return Style; +} + +void FormatStyle::FormatStyleSet::Add(FormatStyle Style) { + assert(Style.Language != LK_None && + "Cannot add a style for LK_None to a StyleSet"); + assert( + !Style.StyleSet.Styles && + "Cannot add a style associated with an existing StyleSet to a StyleSet"); + if (!Styles) + Styles = std::make_shared<MapType>(); + (*Styles)[Style.Language] = std::move(Style); +} + +void FormatStyle::FormatStyleSet::Clear() { + Styles.reset(); +} + +llvm::Optional<FormatStyle> +FormatStyle::GetLanguageStyle(FormatStyle::LanguageKind Language) const { + return StyleSet.Get(Language); +} + namespace { class JavaScriptRequoter : public TokenAnalyzer { @@ -921,8 +1061,7 @@ public: analyze(TokenAnnotator &Annotator, SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, FormatTokenLexer &Tokens) override { - AffectedRangeMgr.computeAffectedLines(AnnotatedLines.begin(), - AnnotatedLines.end()); + AffectedRangeMgr.computeAffectedLines(AnnotatedLines); tooling::Replacements Result; requoteJSStringLiteral(AnnotatedLines, Result); return {Result, 0}; @@ -1012,8 +1151,7 @@ public: FormatTokenLexer &Tokens) override { tooling::Replacements Result; deriveLocalStyle(AnnotatedLines); - AffectedRangeMgr.computeAffectedLines(AnnotatedLines.begin(), - AnnotatedLines.end()); + AffectedRangeMgr.computeAffectedLines(AnnotatedLines); for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { Annotator.calculateFormattingInformation(*AnnotatedLines[i]); } @@ -1137,8 +1275,7 @@ public: // To determine if some redundant code is actually introduced by // replacements(e.g. deletions), we need to come up with a more // sophisticated way of computing affected ranges. - AffectedRangeMgr.computeAffectedLines(AnnotatedLines.begin(), - AnnotatedLines.end()); + AffectedRangeMgr.computeAffectedLines(AnnotatedLines); checkEmptyNamespace(AnnotatedLines); @@ -1341,6 +1478,128 @@ private: std::set<FormatToken *, FormatTokenLess> DeletedTokens; }; +class ObjCHeaderStyleGuesser : public TokenAnalyzer { +public: + ObjCHeaderStyleGuesser(const Environment &Env, const FormatStyle &Style) + : TokenAnalyzer(Env, Style), IsObjC(false) {} + + std::pair<tooling::Replacements, unsigned> + analyze(TokenAnnotator &Annotator, + SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, + FormatTokenLexer &Tokens) override { + assert(Style.Language == FormatStyle::LK_Cpp); + IsObjC = guessIsObjC(AnnotatedLines, Tokens.getKeywords()); + tooling::Replacements Result; + return {Result, 0}; + } + + bool isObjC() { return IsObjC; } + +private: + static bool guessIsObjC(const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, + const AdditionalKeywords &Keywords) { + // Keep this array sorted, since we are binary searching over it. + static constexpr llvm::StringLiteral FoundationIdentifiers[] = { + "CGFloat", + "CGPoint", + "CGPointMake", + "CGPointZero", + "CGRect", + "CGRectEdge", + "CGRectInfinite", + "CGRectMake", + "CGRectNull", + "CGRectZero", + "CGSize", + "CGSizeMake", + "CGVector", + "CGVectorMake", + "NSAffineTransform", + "NSArray", + "NSAttributedString", + "NSBlockOperation", + "NSBundle", + "NSCache", + "NSCalendar", + "NSCharacterSet", + "NSCountedSet", + "NSData", + "NSDataDetector", + "NSDecimal", + "NSDecimalNumber", + "NSDictionary", + "NSEdgeInsets", + "NSHashTable", + "NSIndexPath", + "NSIndexSet", + "NSInteger", + "NSInvocationOperation", + "NSLocale", + "NSMapTable", + "NSMutableArray", + "NSMutableAttributedString", + "NSMutableCharacterSet", + "NSMutableData", + "NSMutableDictionary", + "NSMutableIndexSet", + "NSMutableOrderedSet", + "NSMutableSet", + "NSMutableString", + "NSNumber", + "NSNumberFormatter", + "NSObject", + "NSOperation", + "NSOperationQueue", + "NSOperationQueuePriority", + "NSOrderedSet", + "NSPoint", + "NSPointerArray", + "NSQualityOfService", + "NSRange", + "NSRect", + "NSRegularExpression", + "NSSet", + "NSSize", + "NSString", + "NSTimeZone", + "NSUInteger", + "NSURL", + "NSURLComponents", + "NSURLQueryItem", + "NSUUID", + "NSValue", + "UIImage", + "UIView", + }; + + for (auto Line : AnnotatedLines) { + for (const FormatToken *FormatTok = Line->First; FormatTok; + FormatTok = FormatTok->Next) { + if ((FormatTok->Previous && FormatTok->Previous->is(tok::at) && + (FormatTok->Tok.getObjCKeywordID() != tok::objc_not_keyword || + FormatTok->isOneOf(tok::numeric_constant, tok::l_square, + tok::l_brace))) || + (FormatTok->Tok.isAnyIdentifier() && + std::binary_search(std::begin(FoundationIdentifiers), + std::end(FoundationIdentifiers), + FormatTok->TokenText)) || + FormatTok->is(TT_ObjCStringLiteral) || + FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS, + TT_ObjCBlockLBrace, TT_ObjCBlockLParen, + TT_ObjCDecl, TT_ObjCForIn, TT_ObjCMethodExpr, + TT_ObjCMethodSpecifier, TT_ObjCProperty)) { + return true; + } + if (guessIsObjC(Line->Children, Keywords)) + return true; + } + } + return false; + } + + bool IsObjC; +}; + struct IncludeDirective { StringRef Filename; StringRef Text; @@ -1436,14 +1695,15 @@ static void sortCppIncludes(const FormatStyle &Style, // the entire block. Otherwise, no replacement is generated. if (Indices.size() == Includes.size() && std::is_sorted(Indices.begin(), Indices.end()) && - Style.IncludeBlocks == FormatStyle::IBS_Preserve) + Style.IncludeStyle.IncludeBlocks == tooling::IncludeStyle::IBS_Preserve) return; std::string result; for (unsigned Index : Indices) { if (!result.empty()) { result += "\n"; - if (Style.IncludeBlocks == FormatStyle::IBS_Regroup && + if (Style.IncludeStyle.IncludeBlocks == + tooling::IncludeStyle::IBS_Regroup && CurrentCategory != Includes[Index].Category) result += "\n"; } @@ -1465,60 +1725,6 @@ static void sortCppIncludes(const FormatStyle &Style, namespace { -// This class manages priorities of #include categories and calculates -// priorities for headers. -class IncludeCategoryManager { -public: - IncludeCategoryManager(const FormatStyle &Style, StringRef FileName) - : Style(Style), FileName(FileName) { - FileStem = llvm::sys::path::stem(FileName); - for (const auto &Category : Style.IncludeCategories) - CategoryRegexs.emplace_back(Category.Regex, llvm::Regex::IgnoreCase); - IsMainFile = FileName.endswith(".c") || FileName.endswith(".cc") || - FileName.endswith(".cpp") || FileName.endswith(".c++") || - FileName.endswith(".cxx") || FileName.endswith(".m") || - FileName.endswith(".mm"); - } - - // Returns the priority of the category which \p IncludeName belongs to. - // If \p CheckMainHeader is true and \p IncludeName is a main header, returns - // 0. Otherwise, returns the priority of the matching category or INT_MAX. - int getIncludePriority(StringRef IncludeName, bool CheckMainHeader) { - int Ret = INT_MAX; - for (unsigned i = 0, e = CategoryRegexs.size(); i != e; ++i) - if (CategoryRegexs[i].match(IncludeName)) { - Ret = Style.IncludeCategories[i].Priority; - break; - } - if (CheckMainHeader && IsMainFile && Ret > 0 && isMainHeader(IncludeName)) - Ret = 0; - return Ret; - } - -private: - bool isMainHeader(StringRef IncludeName) const { - if (!IncludeName.startswith("\"")) - return false; - StringRef HeaderStem = - llvm::sys::path::stem(IncludeName.drop_front(1).drop_back(1)); - if (FileStem.startswith(HeaderStem) || - FileStem.startswith_lower(HeaderStem)) { - llvm::Regex MainIncludeRegex( - (HeaderStem + Style.IncludeIsMainRegex).str(), - llvm::Regex::IgnoreCase); - if (MainIncludeRegex.match(FileStem)) - return true; - } - return false; - } - - const FormatStyle &Style; - bool IsMainFile; - StringRef FileName; - StringRef FileStem; - SmallVector<llvm::Regex, 4> CategoryRegexs; -}; - const char IncludeRegexPattern[] = R"(^[\t\ ]*#[\t\ ]*(import|include)[^"<]*(["<][^">]*[">]))"; @@ -1542,7 +1748,7 @@ tooling::Replacements sortCppIncludes(const FormatStyle &Style, StringRef Code, // // FIXME: Do some sanity checking, e.g. edit distance of the base name, to fix // cases where the first #include is unlikely to be the main header. - IncludeCategoryManager Categories(Style, FileName); + tooling::IncludeCategoryManager Categories(Style.IncludeStyle, FileName); bool FirstIncludeBlock = true; bool MainIncludeFound = false; bool FormattingOff = false; @@ -1559,8 +1765,10 @@ tooling::Replacements sortCppIncludes(const FormatStyle &Style, StringRef Code, FormattingOff = false; const bool EmptyLineSkipped = - Trimmed.empty() && (Style.IncludeBlocks == FormatStyle::IBS_Merge || - Style.IncludeBlocks == FormatStyle::IBS_Regroup); + Trimmed.empty() && + (Style.IncludeStyle.IncludeBlocks == tooling::IncludeStyle::IBS_Merge || + Style.IncludeStyle.IncludeBlocks == + tooling::IncludeStyle::IBS_Regroup); if (!FormattingOff && !Line.endswith("\\")) { if (IncludeRegex.match(Line, &Matches)) { @@ -1670,122 +1878,6 @@ inline bool isHeaderDeletion(const tooling::Replacement &Replace) { return Replace.getOffset() == UINT_MAX && Replace.getLength() == 1; } -// Returns the offset after skipping a sequence of tokens, matched by \p -// GetOffsetAfterSequence, from the start of the code. -// \p GetOffsetAfterSequence should be a function that matches a sequence of -// tokens and returns an offset after the sequence. -unsigned getOffsetAfterTokenSequence( - StringRef FileName, StringRef Code, const FormatStyle &Style, - llvm::function_ref<unsigned(const SourceManager &, Lexer &, Token &)> - GetOffsetAfterSequence) { - std::unique_ptr<Environment> Env = - Environment::CreateVirtualEnvironment(Code, FileName, /*Ranges=*/{}); - const SourceManager &SourceMgr = Env->getSourceManager(); - Lexer Lex(Env->getFileID(), SourceMgr.getBuffer(Env->getFileID()), SourceMgr, - getFormattingLangOpts(Style)); - Token Tok; - // Get the first token. - Lex.LexFromRawLexer(Tok); - return GetOffsetAfterSequence(SourceMgr, Lex, Tok); -} - -// Check if a sequence of tokens is like "#<Name> <raw_identifier>". If it is, -// \p Tok will be the token after this directive; otherwise, it can be any token -// after the given \p Tok (including \p Tok). -bool checkAndConsumeDirectiveWithName(Lexer &Lex, StringRef Name, Token &Tok) { - bool Matched = Tok.is(tok::hash) && !Lex.LexFromRawLexer(Tok) && - Tok.is(tok::raw_identifier) && - Tok.getRawIdentifier() == Name && !Lex.LexFromRawLexer(Tok) && - Tok.is(tok::raw_identifier); - if (Matched) - Lex.LexFromRawLexer(Tok); - return Matched; -} - -void skipComments(Lexer &Lex, Token &Tok) { - while (Tok.is(tok::comment)) - if (Lex.LexFromRawLexer(Tok)) - return; -} - -// Returns the offset after header guard directives and any comments -// before/after header guards. If no header guard presents in the code, this -// will returns the offset after skipping all comments from the start of the -// code. -unsigned getOffsetAfterHeaderGuardsAndComments(StringRef FileName, - StringRef Code, - const FormatStyle &Style) { - return getOffsetAfterTokenSequence( - FileName, Code, Style, - [](const SourceManager &SM, Lexer &Lex, Token Tok) { - skipComments(Lex, Tok); - unsigned InitialOffset = SM.getFileOffset(Tok.getLocation()); - if (checkAndConsumeDirectiveWithName(Lex, "ifndef", Tok)) { - skipComments(Lex, Tok); - if (checkAndConsumeDirectiveWithName(Lex, "define", Tok)) - return SM.getFileOffset(Tok.getLocation()); - } - return InitialOffset; - }); -} - -// Check if a sequence of tokens is like -// "#include ("header.h" | <header.h>)". -// If it is, \p Tok will be the token after this directive; otherwise, it can be -// any token after the given \p Tok (including \p Tok). -bool checkAndConsumeInclusiveDirective(Lexer &Lex, Token &Tok) { - auto Matched = [&]() { - Lex.LexFromRawLexer(Tok); - return true; - }; - if (Tok.is(tok::hash) && !Lex.LexFromRawLexer(Tok) && - Tok.is(tok::raw_identifier) && Tok.getRawIdentifier() == "include") { - if (Lex.LexFromRawLexer(Tok)) - return false; - if (Tok.is(tok::string_literal)) - return Matched(); - if (Tok.is(tok::less)) { - while (!Lex.LexFromRawLexer(Tok) && Tok.isNot(tok::greater)) { - } - if (Tok.is(tok::greater)) - return Matched(); - } - } - return false; -} - -// Returns the offset of the last #include directive after which a new -// #include can be inserted. This ignores #include's after the #include block(s) -// in the beginning of a file to avoid inserting headers into code sections -// where new #include's should not be added by default. -// These code sections include: -// - raw string literals (containing #include). -// - #if blocks. -// - Special #include's among declarations (e.g. functions). -// -// If no #include after which a new #include can be inserted, this returns the -// offset after skipping all comments from the start of the code. -// Inserting after an #include is not allowed if it comes after code that is not -// #include (e.g. pre-processing directive that is not #include, declarations). -unsigned getMaxHeaderInsertionOffset(StringRef FileName, StringRef Code, - const FormatStyle &Style) { - return getOffsetAfterTokenSequence( - FileName, Code, Style, - [](const SourceManager &SM, Lexer &Lex, Token Tok) { - skipComments(Lex, Tok); - unsigned MaxOffset = SM.getFileOffset(Tok.getLocation()); - while (checkAndConsumeInclusiveDirective(Lex, Tok)) - MaxOffset = SM.getFileOffset(Tok.getLocation()); - return MaxOffset; - }); -} - -bool isDeletedHeader(llvm::StringRef HeaderName, - const std::set<llvm::StringRef> &HeadersToDelete) { - return HeadersToDelete.count(HeaderName) || - HeadersToDelete.count(HeaderName.trim("\"<>")); -} - // FIXME: insert empty lines between newly created blocks. tooling::Replacements fixCppIncludeInsertions(StringRef Code, const tooling::Replacements &Replaces, @@ -1814,85 +1906,26 @@ fixCppIncludeInsertions(StringRef Code, const tooling::Replacements &Replaces, if (HeaderInsertions.empty() && HeadersToDelete.empty()) return Replaces; - llvm::Regex IncludeRegex(IncludeRegexPattern); - llvm::Regex DefineRegex(R"(^[\t\ ]*#[\t\ ]*define[\t\ ]*[^\\]*$)"); - SmallVector<StringRef, 4> Matches; StringRef FileName = Replaces.begin()->getFilePath(); - IncludeCategoryManager Categories(Style, FileName); - - // Record the offset of the end of the last include in each category. - std::map<int, int> CategoryEndOffsets; - // All possible priorities. - // Add 0 for main header and INT_MAX for headers that are not in any category. - std::set<int> Priorities = {0, INT_MAX}; - for (const auto &Category : Style.IncludeCategories) - Priorities.insert(Category.Priority); - int FirstIncludeOffset = -1; - // All new headers should be inserted after this offset. - unsigned MinInsertOffset = - getOffsetAfterHeaderGuardsAndComments(FileName, Code, Style); - StringRef TrimmedCode = Code.drop_front(MinInsertOffset); - // Max insertion offset in the original code. - unsigned MaxInsertOffset = - MinInsertOffset + - getMaxHeaderInsertionOffset(FileName, TrimmedCode, Style); - SmallVector<StringRef, 32> Lines; - TrimmedCode.split(Lines, '\n'); - unsigned Offset = MinInsertOffset; - unsigned NextLineOffset; - std::set<StringRef> ExistingIncludes; - for (auto Line : Lines) { - NextLineOffset = std::min(Code.size(), Offset + Line.size() + 1); - if (IncludeRegex.match(Line, &Matches)) { - // The header name with quotes or angle brackets. - StringRef IncludeName = Matches[2]; - ExistingIncludes.insert(IncludeName); - // Only record the offset of current #include if we can insert after it. - if (Offset <= MaxInsertOffset) { - int Category = Categories.getIncludePriority( - IncludeName, /*CheckMainHeader=*/FirstIncludeOffset < 0); - CategoryEndOffsets[Category] = NextLineOffset; - if (FirstIncludeOffset < 0) - FirstIncludeOffset = Offset; - } - if (isDeletedHeader(IncludeName, HeadersToDelete)) { - // If this is the last line without trailing newline, we need to make - // sure we don't delete across the file boundary. - unsigned Length = std::min(Line.size() + 1, Code.size() - Offset); - llvm::Error Err = - Result.add(tooling::Replacement(FileName, Offset, Length, "")); - if (Err) { - // Ignore the deletion on conflict. - llvm::errs() << "Failed to add header deletion replacement for " - << IncludeName << ": " << llvm::toString(std::move(Err)) - << "\n"; - } + tooling::HeaderIncludes Includes(FileName, Code, Style.IncludeStyle); + + for (const auto &Header : HeadersToDelete) { + tooling::Replacements Replaces = + Includes.remove(Header.trim("\"<>"), Header.startswith("<")); + for (const auto &R : Replaces) { + auto Err = Result.add(R); + if (Err) { + // Ignore the deletion on conflict. + llvm::errs() << "Failed to add header deletion replacement for " + << Header << ": " << llvm::toString(std::move(Err)) + << "\n"; } } - Offset = NextLineOffset; - } - - // Populate CategoryEndOfssets: - // - Ensure that CategoryEndOffset[Highest] is always populated. - // - If CategoryEndOffset[Priority] isn't set, use the next higher value that - // is set, up to CategoryEndOffset[Highest]. - auto Highest = Priorities.begin(); - if (CategoryEndOffsets.find(*Highest) == CategoryEndOffsets.end()) { - if (FirstIncludeOffset >= 0) - CategoryEndOffsets[*Highest] = FirstIncludeOffset; - else - CategoryEndOffsets[*Highest] = MinInsertOffset; - } - // By this point, CategoryEndOffset[Highest] is always set appropriately: - // - to an appropriate location before/after existing #includes, or - // - to right after the header guard, or - // - to the beginning of the file. - for (auto I = ++Priorities.begin(), E = Priorities.end(); I != E; ++I) - if (CategoryEndOffsets.find(*I) == CategoryEndOffsets.end()) - CategoryEndOffsets[*I] = CategoryEndOffsets[*std::prev(I)]; - - bool NeedNewLineAtEnd = !Code.empty() && Code.back() != '\n'; + } + + llvm::Regex IncludeRegex = llvm::Regex(IncludeRegexPattern); + llvm::SmallVector<StringRef, 4> Matches; for (const auto &R : HeaderInsertions) { auto IncludeDirective = R.getReplacementText(); bool Matched = IncludeRegex.match(IncludeDirective, &Matches); @@ -1900,30 +1933,17 @@ fixCppIncludeInsertions(StringRef Code, const tooling::Replacements &Replaces, "'#include ...'"); (void)Matched; auto IncludeName = Matches[2]; - if (ExistingIncludes.find(IncludeName) != ExistingIncludes.end()) { - DEBUG(llvm::dbgs() << "Skip adding existing include : " << IncludeName - << "\n"); - continue; - } - int Category = - Categories.getIncludePriority(IncludeName, /*CheckMainHeader=*/true); - Offset = CategoryEndOffsets[Category]; - std::string NewInclude = !IncludeDirective.endswith("\n") - ? (IncludeDirective + "\n").str() - : IncludeDirective.str(); - // When inserting headers at end of the code, also append '\n' to the code - // if it does not end with '\n'. - if (NeedNewLineAtEnd && Offset == Code.size()) { - NewInclude = "\n" + NewInclude; - NeedNewLineAtEnd = false; - } - auto NewReplace = tooling::Replacement(FileName, Offset, 0, NewInclude); - auto Err = Result.add(NewReplace); - if (Err) { - llvm::consumeError(std::move(Err)); - unsigned NewOffset = Result.getShiftedCodePosition(Offset); - NewReplace = tooling::Replacement(FileName, NewOffset, 0, NewInclude); - Result = Result.merge(tooling::Replacements(NewReplace)); + auto Replace = + Includes.insert(IncludeName.trim("\"<>"), IncludeName.startswith("<")); + if (Replace) { + auto Err = Result.add(*Replace); + if (Err) { + llvm::consumeError(std::move(Err)); + unsigned NewOffset = Result.getShiftedCodePosition(Replace->getOffset()); + auto Shifted = tooling::Replacement(FileName, NewOffset, 0, + Replace->getReplacementText()); + Result = Result.merge(tooling::Replacements(Shifted)); + } } } return Result; @@ -1988,9 +2008,9 @@ reformat(const FormatStyle &Style, StringRef Code, return Formatter(Env, Expanded, Status).process(); }); - std::unique_ptr<Environment> Env = Environment::CreateVirtualEnvironment( - Code, FileName, Ranges, FirstStartColumn, NextStartColumn, - LastStartColumn); + auto Env = + llvm::make_unique<Environment>(Code, FileName, Ranges, FirstStartColumn, + NextStartColumn, LastStartColumn); llvm::Optional<std::string> CurrentCode = None; tooling::Replacements Fixes; unsigned Penalty = 0; @@ -2003,7 +2023,7 @@ reformat(const FormatStyle &Style, StringRef Code, Penalty += PassFixes.second; if (I + 1 < E) { CurrentCode = std::move(*NewCode); - Env = Environment::CreateVirtualEnvironment( + Env = llvm::make_unique<Environment>( *CurrentCode, FileName, tooling::calculateRangesAfterReplacements(Fixes, Ranges), FirstStartColumn, NextStartColumn, LastStartColumn); @@ -2032,10 +2052,7 @@ tooling::Replacements cleanup(const FormatStyle &Style, StringRef Code, // cleanups only apply to C++ (they mostly concern ctor commas etc.) if (Style.Language != FormatStyle::LK_Cpp) return tooling::Replacements(); - std::unique_ptr<Environment> Env = - Environment::CreateVirtualEnvironment(Code, FileName, Ranges); - Cleaner Clean(*Env, Style); - return Clean.process().first; + return Cleaner(Environment(Code, FileName, Ranges), Style).process().first; } tooling::Replacements reformat(const FormatStyle &Style, StringRef Code, @@ -2052,20 +2069,18 @@ tooling::Replacements fixNamespaceEndComments(const FormatStyle &Style, StringRef Code, ArrayRef<tooling::Range> Ranges, StringRef FileName) { - std::unique_ptr<Environment> Env = - Environment::CreateVirtualEnvironment(Code, FileName, Ranges); - NamespaceEndCommentsFixer Fix(*Env, Style); - return Fix.process().first; + return NamespaceEndCommentsFixer(Environment(Code, FileName, Ranges), Style) + .process() + .first; } tooling::Replacements sortUsingDeclarations(const FormatStyle &Style, StringRef Code, ArrayRef<tooling::Range> Ranges, StringRef FileName) { - std::unique_ptr<Environment> Env = - Environment::CreateVirtualEnvironment(Code, FileName, Ranges); - UsingDeclarationsSorter Sorter(*Env, Style); - return Sorter.process().first; + return UsingDeclarationsSorter(Environment(Code, FileName, Ranges), Style) + .process() + .first; } LangOptions getFormattingLangOpts(const FormatStyle &Style) { @@ -2117,6 +2132,28 @@ static FormatStyle::LanguageKind getLanguageByFileName(StringRef FileName) { return FormatStyle::LK_Cpp; } +FormatStyle::LanguageKind guessLanguage(StringRef FileName, StringRef Code) { + const auto GuessedLanguage = getLanguageByFileName(FileName); + if (GuessedLanguage == FormatStyle::LK_Cpp) { + auto Extension = llvm::sys::path::extension(FileName); + // If there's no file extension (or it's .h), we need to check the contents + // of the code to see if it contains Objective-C. + if (Extension.empty() || Extension == ".h") { + auto NonEmptyFileName = FileName.empty() ? "guess.h" : FileName; + Environment Env(Code, NonEmptyFileName, /*Ranges=*/{}); + ObjCHeaderStyleGuesser Guesser(Env, getLLVMStyle()); + Guesser.process(); + if (Guesser.isObjC()) + return FormatStyle::LK_ObjC; + } + } + return GuessedLanguage; +} + +const char *DefaultFormatStyle = "file"; + +const char *DefaultFallbackStyle = "LLVM"; + llvm::Expected<FormatStyle> getStyle(StringRef StyleName, StringRef FileName, StringRef FallbackStyleName, StringRef Code, vfs::FileSystem *FS) { @@ -2124,16 +2161,7 @@ llvm::Expected<FormatStyle> getStyle(StringRef StyleName, StringRef FileName, FS = vfs::getRealFileSystem().get(); } FormatStyle Style = getLLVMStyle(); - Style.Language = getLanguageByFileName(FileName); - - // This is a very crude detection of whether a header contains ObjC code that - // should be improved over time and probably be done on tokens, not one the - // bare content of the file. - if (Style.Language == FormatStyle::LK_Cpp && FileName.endswith(".h") && - (Code.contains("\n- (") || Code.contains("\n+ (") || - Code.contains("\n@end\n") || Code.contains("\n@end ") || - Code.endswith("@end"))) - Style.Language = FormatStyle::LK_ObjC; + Style.Language = guessLanguage(FileName, Code); FormatStyle FallbackStyle = getNoStyle(); if (!getPredefinedStyle(FallbackStyleName, Style.Language, &FallbackStyle)) @@ -2170,7 +2198,7 @@ llvm::Expected<FormatStyle> getStyle(StringRef StyleName, StringRef FileName, SmallString<128> ConfigFile(Directory); llvm::sys::path::append(ConfigFile, ".clang-format"); - DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n"); + LLVM_DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n"); Status = FS->status(ConfigFile.str()); bool FoundConfigFile = @@ -2179,7 +2207,7 @@ llvm::Expected<FormatStyle> getStyle(StringRef StyleName, StringRef FileName, // Try _clang-format too, since dotfiles are not commonly used on Windows. ConfigFile = Directory; llvm::sys::path::append(ConfigFile, "_clang-format"); - DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n"); + LLVM_DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n"); Status = FS->status(ConfigFile.str()); FoundConfigFile = Status && (Status->getType() == llvm::sys::fs::file_type::regular_file); @@ -2201,7 +2229,8 @@ llvm::Expected<FormatStyle> getStyle(StringRef StyleName, StringRef FileName, return make_string_error("Error reading " + ConfigFile + ": " + ec.message()); } - DEBUG(llvm::dbgs() << "Using configuration file " << ConfigFile << "\n"); + LLVM_DEBUG(llvm::dbgs() + << "Using configuration file " << ConfigFile << "\n"); return Style; } } diff --git a/lib/Format/FormatInternal.h b/lib/Format/FormatInternal.h index 3984158467b3..5c59e7656eee 100644 --- a/lib/Format/FormatInternal.h +++ b/lib/Format/FormatInternal.h @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// /// /// \file -/// \brief This file declares Format APIs to be used internally by the +/// This file declares Format APIs to be used internally by the /// formatting library implementation. /// //===----------------------------------------------------------------------===// @@ -24,7 +24,7 @@ namespace clang { namespace format { namespace internal { -/// \brief Reformats the given \p Ranges in the code fragment \p Code. +/// Reformats the given \p Ranges in the code fragment \p Code. /// /// A fragment of code could conceptually be surrounded by other code that might /// constrain how that fragment is laid out. diff --git a/lib/Format/FormatToken.cpp b/lib/Format/FormatToken.cpp index 10ac392abbf2..62b08c576e05 100644 --- a/lib/Format/FormatToken.cpp +++ b/lib/Format/FormatToken.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// /// /// \file -/// \brief This file implements specific functions of \c FormatTokens and their +/// This file implements specific functions of \c FormatTokens and their /// roles. /// //===----------------------------------------------------------------------===// @@ -57,6 +57,7 @@ bool FormatToken::isSimpleTypeSpecifier() const { case tok::kw_bool: case tok::kw___underlying_type: case tok::annot_typename: + case tok::kw_char8_t: case tok::kw_char16_t: case tok::kw_char32_t: case tok::kw_typeof: diff --git a/lib/Format/FormatToken.h b/lib/Format/FormatToken.h index 3dc0ab0e7cca..9094e7689e1d 100644 --- a/lib/Format/FormatToken.h +++ b/lib/Format/FormatToken.h @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// /// /// \file -/// \brief This file contains the declaration of the FormatToken, a wrapper +/// This file contains the declaration of the FormatToken, a wrapper /// around Token with additional information related to formatting. /// //===----------------------------------------------------------------------===// @@ -29,7 +29,9 @@ namespace format { #define LIST_TOKEN_TYPES \ TYPE(ArrayInitializerLSquare) \ TYPE(ArraySubscriptLSquare) \ + TYPE(AttributeColon) \ TYPE(AttributeParen) \ + TYPE(AttributeSquare) \ TYPE(BinaryOperator) \ TYPE(BitFieldColon) \ TYPE(BlockComment) \ @@ -88,6 +90,7 @@ namespace format { TYPE(TemplateCloser) \ TYPE(TemplateOpener) \ TYPE(TemplateString) \ + TYPE(ProtoExtensionLSquare) \ TYPE(TrailingAnnotation) \ TYPE(TrailingReturnArrow) \ TYPE(TrailingUnaryOperator) \ @@ -101,7 +104,7 @@ enum TokenType { NUM_TOKEN_TYPES }; -/// \brief Determines the name of a token type. +/// Determines the name of a token type. const char *getTokenTypeName(TokenType Type); // Represents what type of block a set of braces open. @@ -115,181 +118,191 @@ enum FormatDecision { FD_Unformatted, FD_Continue, FD_Break }; class TokenRole; class AnnotatedLine; -/// \brief A wrapper around a \c Token storing information about the +/// A wrapper around a \c Token storing information about the /// whitespace characters preceding it. struct FormatToken { FormatToken() {} - /// \brief The \c Token. + /// The \c Token. Token Tok; - /// \brief The number of newlines immediately before the \c Token. + /// The number of newlines immediately before the \c Token. /// /// This can be used to determine what the user wrote in the original code /// and thereby e.g. leave an empty line between two function definitions. unsigned NewlinesBefore = 0; - /// \brief Whether there is at least one unescaped newline before the \c + /// Whether there is at least one unescaped newline before the \c /// Token. bool HasUnescapedNewline = false; - /// \brief The range of the whitespace immediately preceding the \c Token. + /// The range of the whitespace immediately preceding the \c Token. SourceRange WhitespaceRange; - /// \brief The offset just past the last '\n' in this token's leading + /// The offset just past the last '\n' in this token's leading /// whitespace (relative to \c WhiteSpaceStart). 0 if there is no '\n'. unsigned LastNewlineOffset = 0; - /// \brief The width of the non-whitespace parts of the token (or its first + /// The width of the non-whitespace parts of the token (or its first /// line for multi-line tokens) in columns. /// We need this to correctly measure number of columns a token spans. unsigned ColumnWidth = 0; - /// \brief Contains the width in columns of the last line of a multi-line + /// Contains the width in columns of the last line of a multi-line /// token. unsigned LastLineColumnWidth = 0; - /// \brief Whether the token text contains newlines (escaped or not). + /// Whether the token text contains newlines (escaped or not). bool IsMultiline = false; - /// \brief Indicates that this is the first token of the file. + /// Indicates that this is the first token of the file. bool IsFirst = false; - /// \brief Whether there must be a line break before this token. + /// Whether there must be a line break before this token. /// /// This happens for example when a preprocessor directive ended directly /// before the token. bool MustBreakBefore = false; - /// \brief The raw text of the token. + /// The raw text of the token. /// /// Contains the raw token text without leading whitespace and without leading /// escaped newlines. StringRef TokenText; - /// \brief Set to \c true if this token is an unterminated literal. + /// Set to \c true if this token is an unterminated literal. bool IsUnterminatedLiteral = 0; - /// \brief Contains the kind of block if this token is a brace. + /// Contains the kind of block if this token is a brace. BraceBlockKind BlockKind = BK_Unknown; TokenType Type = TT_Unknown; - /// \brief The number of spaces that should be inserted before this token. + /// The number of spaces that should be inserted before this token. unsigned SpacesRequiredBefore = 0; - /// \brief \c true if it is allowed to break before this token. + /// \c true if it is allowed to break before this token. bool CanBreakBefore = false; - /// \brief \c true if this is the ">" of "template<..>". + /// \c true if this is the ">" of "template<..>". bool ClosesTemplateDeclaration = false; - /// \brief Number of parameters, if this is "(", "[" or "<". + /// Number of parameters, if this is "(", "[" or "<". /// /// This is initialized to 1 as we don't need to distinguish functions with /// 0 parameters from functions with 1 parameter. Thus, we can simply count /// the number of commas. unsigned ParameterCount = 0; - /// \brief Number of parameters that are nested blocks, + /// Number of parameters that are nested blocks, /// if this is "(", "[" or "<". unsigned BlockParameterCount = 0; - /// \brief If this is a bracket ("<", "(", "[" or "{"), contains the kind of + /// If this is a bracket ("<", "(", "[" or "{"), contains the kind of /// the surrounding bracket. tok::TokenKind ParentBracket = tok::unknown; - /// \brief A token can have a special role that can carry extra information + /// A token can have a special role that can carry extra information /// about the token's formatting. std::unique_ptr<TokenRole> Role; - /// \brief If this is an opening parenthesis, how are the parameters packed? + /// If this is an opening parenthesis, how are the parameters packed? ParameterPackingKind PackingKind = PPK_Inconclusive; - /// \brief The total length of the unwrapped line up to and including this + /// The total length of the unwrapped line up to and including this /// token. unsigned TotalLength = 0; - /// \brief The original 0-based column of this token, including expanded tabs. + /// The original 0-based column of this token, including expanded tabs. /// The configured TabWidth is used as tab width. unsigned OriginalColumn = 0; - /// \brief The length of following tokens until the next natural split point, + /// The length of following tokens until the next natural split point, /// or the next token that can be broken. unsigned UnbreakableTailLength = 0; // FIXME: Come up with a 'cleaner' concept. - /// \brief The binding strength of a token. This is a combined value of + /// The binding strength of a token. This is a combined value of /// operator precedence, parenthesis nesting, etc. unsigned BindingStrength = 0; - /// \brief The nesting level of this token, i.e. the number of surrounding (), + /// The nesting level of this token, i.e. the number of surrounding (), /// [], {} or <>. unsigned NestingLevel = 0; - /// \brief The indent level of this token. Copied from the surrounding line. + /// The indent level of this token. Copied from the surrounding line. unsigned IndentLevel = 0; - /// \brief Penalty for inserting a line break before this token. + /// Penalty for inserting a line break before this token. unsigned SplitPenalty = 0; - /// \brief If this is the first ObjC selector name in an ObjC method + /// If this is the first ObjC selector name in an ObjC method /// definition or call, this contains the length of the longest name. /// /// This being set to 0 means that the selectors should not be colon-aligned, /// e.g. because several of them are block-type. unsigned LongestObjCSelectorName = 0; - /// \brief Stores the number of required fake parentheses and the + /// If this is the first ObjC selector name in an ObjC method + /// definition or call, this contains the number of parts that the whole + /// selector consist of. + unsigned ObjCSelectorNameParts = 0; + + /// The 0-based index of the parameter/argument. For ObjC it is set + /// for the selector name token. + /// For now calculated only for ObjC. + unsigned ParameterIndex = 0; + + /// Stores the number of required fake parentheses and the /// corresponding operator precedence. /// /// If multiple fake parentheses start at a token, this vector stores them in /// reverse order, i.e. inner fake parenthesis first. SmallVector<prec::Level, 4> FakeLParens; - /// \brief Insert this many fake ) after this token for correct indentation. + /// Insert this many fake ) after this token for correct indentation. unsigned FakeRParens = 0; - /// \brief \c true if this token starts a binary expression, i.e. has at least + /// \c true if this token starts a binary expression, i.e. has at least /// one fake l_paren with a precedence greater than prec::Unknown. bool StartsBinaryExpression = false; - /// \brief \c true if this token ends a binary expression. + /// \c true if this token ends a binary expression. bool EndsBinaryExpression = false; - /// \brief Is this is an operator (or "."/"->") in a sequence of operators + /// Is this is an operator (or "."/"->") in a sequence of operators /// with the same precedence, contains the 0-based operator index. unsigned OperatorIndex = 0; - /// \brief If this is an operator (or "."/"->") in a sequence of operators + /// If this is an operator (or "."/"->") in a sequence of operators /// with the same precedence, points to the next operator. FormatToken *NextOperator = nullptr; - /// \brief Is this token part of a \c DeclStmt defining multiple variables? + /// Is this token part of a \c DeclStmt defining multiple variables? /// /// Only set if \c Type == \c TT_StartOfName. bool PartOfMultiVariableDeclStmt = false; - /// \brief Does this line comment continue a line comment section? + /// Does this line comment continue a line comment section? /// /// Only set to true if \c Type == \c TT_LineComment. bool ContinuesLineCommentSection = false; - /// \brief If this is a bracket, this points to the matching one. + /// If this is a bracket, this points to the matching one. FormatToken *MatchingParen = nullptr; - /// \brief The previous token in the unwrapped line. + /// The previous token in the unwrapped line. FormatToken *Previous = nullptr; - /// \brief The next token in the unwrapped line. + /// The next token in the unwrapped line. FormatToken *Next = nullptr; - /// \brief If this token starts a block, this contains all the unwrapped lines + /// If this token starts a block, this contains all the unwrapped lines /// in it. SmallVector<AnnotatedLine *, 1> Children; - /// \brief Stores the formatting decision for the token once it was made. + /// Stores the formatting decision for the token once it was made. FormatDecision Decision = FD_Unformatted; - /// \brief If \c true, this token has been fully formatted (indented and + /// If \c true, this token has been fully formatted (indented and /// potentially re-formatted inside), and we do not allow further formatting /// changes. bool Finalized = false; @@ -337,7 +350,7 @@ struct FormatToken { (!ColonRequired || (Next && Next->is(tok::colon))); } - /// \brief Determine whether the token is a simple-type-specifier. + /// Determine whether the token is a simple-type-specifier. bool isSimpleTypeSpecifier() const; bool isObjCAccessSpecifier() const { @@ -348,22 +361,28 @@ struct FormatToken { Next->isObjCAtKeyword(tok::objc_private)); } - /// \brief Returns whether \p Tok is ([{ or a template opening <. + /// Returns whether \p Tok is ([{ or an opening < of a template or in + /// protos. bool opensScope() const { if (is(TT_TemplateString) && TokenText.endswith("${")) return true; + if (is(TT_DictLiteral) && is(tok::less)) + return true; return isOneOf(tok::l_paren, tok::l_brace, tok::l_square, TT_TemplateOpener); } - /// \brief Returns whether \p Tok is )]} or a template closing >. + /// Returns whether \p Tok is )]} or a closing > of a template or in + /// protos. bool closesScope() const { if (is(TT_TemplateString) && TokenText.startswith("}")) return true; + if (is(TT_DictLiteral) && is(tok::greater)) + return true; return isOneOf(tok::r_paren, tok::r_brace, tok::r_square, TT_TemplateCloser); } - /// \brief Returns \c true if this is a "." or "->" accessing a member. + /// Returns \c true if this is a "." or "->" accessing a member. bool isMemberAccess() const { return isOneOf(tok::arrow, tok::period, tok::arrowstar) && !isOneOf(TT_DesignatedInitializerPeriod, TT_TrailingReturnArrow, @@ -396,7 +415,7 @@ struct FormatToken { (is(TT_LineComment) || !Next || Next->NewlinesBefore > 0); } - /// \brief Returns \c true if this is a keyword that can be used + /// Returns \c true if this is a keyword that can be used /// like a function call (e.g. sizeof, typeid, ...). bool isFunctionLikeKeyword() const { switch (Tok.getKind()) { @@ -416,7 +435,7 @@ struct FormatToken { } } - /// \brief Returns \c true if this is a string literal that's like a label, + /// Returns \c true if this is a string literal that's like a label, /// e.g. ends with "=" or ":". bool isLabelString() const { if (!is(tok::string_literal)) @@ -431,7 +450,7 @@ struct FormatToken { (Content.back() == ':' || Content.back() == '='); } - /// \brief Returns actual token start location without leading escaped + /// Returns actual token start location without leading escaped /// newlines and whitespace. /// /// This can be different to Tok.getLocation(), which includes leading escaped @@ -441,10 +460,11 @@ struct FormatToken { } prec::Level getPrecedence() const { - return getBinOpPrecedence(Tok.getKind(), true, true); + return getBinOpPrecedence(Tok.getKind(), /*GreaterThanIsOperator=*/true, + /*CPlusPlus11=*/true); } - /// \brief Returns the previous token ignoring comments. + /// Returns the previous token ignoring comments. FormatToken *getPreviousNonComment() const { FormatToken *Tok = Previous; while (Tok && Tok->is(tok::comment)) @@ -452,7 +472,7 @@ struct FormatToken { return Tok; } - /// \brief Returns the next token ignoring comments. + /// Returns the next token ignoring comments. const FormatToken *getNextNonComment() const { const FormatToken *Tok = Next; while (Tok && Tok->is(tok::comment)) @@ -460,12 +480,13 @@ struct FormatToken { return Tok; } - /// \brief Returns \c true if this tokens starts a block-type list, i.e. a + /// Returns \c true if this tokens starts a block-type list, i.e. a /// list that should be indented with a block indent. bool opensBlockOrBlockTypeList(const FormatStyle &Style) const { if (is(TT_TemplateString) && opensScope()) return true; return is(TT_ArrayInitializerLSquare) || + is(TT_ProtoExtensionLSquare) || (is(tok::l_brace) && (BlockKind == BK_Block || is(TT_DictLiteral) || (!Style.Cpp11BracedListStyle && NestingLevel == 0))) || @@ -473,7 +494,7 @@ struct FormatToken { Style.Language == FormatStyle::LK_TextProto)); } - /// \brief Returns whether the token is the left square bracket of a C++ + /// Returns whether the token is the left square bracket of a C++ /// structured binding declaration. bool isCppStructuredBinding(const FormatStyle &Style) const { if (!Style.isCpp() || isNot(tok::l_square)) @@ -486,14 +507,14 @@ struct FormatToken { return T && T->is(tok::kw_auto); } - /// \brief Same as opensBlockOrBlockTypeList, but for the closing token. + /// Same as opensBlockOrBlockTypeList, but for the closing token. bool closesBlockOrBlockTypeList(const FormatStyle &Style) const { if (is(TT_TemplateString) && closesScope()) return true; return MatchingParen && MatchingParen->opensBlockOrBlockTypeList(Style); } - /// \brief Return the actual namespace token, if this token starts a namespace + /// Return the actual namespace token, if this token starts a namespace /// block. const FormatToken *getNamespaceToken() const { const FormatToken *NamespaceTok = this; @@ -546,11 +567,11 @@ public: TokenRole(const FormatStyle &Style) : Style(Style) {} virtual ~TokenRole(); - /// \brief After the \c TokenAnnotator has finished annotating all the tokens, + /// After the \c TokenAnnotator has finished annotating all the tokens, /// this function precomputes required information for formatting. virtual void precomputeFormattingInfos(const FormatToken *Token); - /// \brief Apply the special formatting that the given role demands. + /// Apply the special formatting that the given role demands. /// /// Assumes that the token having this role is already formatted. /// @@ -562,7 +583,7 @@ public: return 0; } - /// \brief Same as \c formatFromToken, but assumes that the first token has + /// Same as \c formatFromToken, but assumes that the first token has /// already been set thereby deciding on the first line break. virtual unsigned formatAfterToken(LineState &State, ContinuationIndenter *Indenter, @@ -570,7 +591,7 @@ public: return 0; } - /// \brief Notifies the \c Role that a comma was found. + /// Notifies the \c Role that a comma was found. virtual void CommaFound(const FormatToken *Token) {} protected: @@ -590,46 +611,46 @@ public: unsigned formatFromToken(LineState &State, ContinuationIndenter *Indenter, bool DryRun) override; - /// \brief Adds \p Token as the next comma to the \c CommaSeparated list. + /// Adds \p Token as the next comma to the \c CommaSeparated list. void CommaFound(const FormatToken *Token) override { Commas.push_back(Token); } private: - /// \brief A struct that holds information on how to format a given list with + /// A struct that holds information on how to format a given list with /// a specific number of columns. struct ColumnFormat { - /// \brief The number of columns to use. + /// The number of columns to use. unsigned Columns; - /// \brief The total width in characters. + /// The total width in characters. unsigned TotalWidth; - /// \brief The number of lines required for this format. + /// The number of lines required for this format. unsigned LineCount; - /// \brief The size of each column in characters. + /// The size of each column in characters. SmallVector<unsigned, 8> ColumnSizes; }; - /// \brief Calculate which \c ColumnFormat fits best into + /// Calculate which \c ColumnFormat fits best into /// \p RemainingCharacters. const ColumnFormat *getColumnFormat(unsigned RemainingCharacters) const; - /// \brief The ordered \c FormatTokens making up the commas of this list. + /// The ordered \c FormatTokens making up the commas of this list. SmallVector<const FormatToken *, 8> Commas; - /// \brief The length of each of the list's items in characters including the + /// The length of each of the list's items in characters including the /// trailing comma. SmallVector<unsigned, 8> ItemLengths; - /// \brief Precomputed formats that can be used for this list. + /// Precomputed formats that can be used for this list. SmallVector<ColumnFormat, 4> Formats; bool HasNestedBracedList; }; -/// \brief Encapsulates keywords that are context sensitive or for languages not +/// Encapsulates keywords that are context sensitive or for languages not /// properly supported by Clang's lexer. struct AdditionalKeywords { AdditionalKeywords(IdentifierTable &IdentTable) { @@ -761,7 +782,7 @@ struct AdditionalKeywords { IdentifierInfo *kw_slots; IdentifierInfo *kw_qslots; - /// \brief Returns \c true if \p Tok is a true JavaScript identifier, returns + /// Returns \c true if \p Tok is a true JavaScript identifier, returns /// \c false if it is a keyword or a pseudo keyword. bool IsJavaScriptIdentifier(const FormatToken &Tok) const { return Tok.is(tok::identifier) && @@ -770,7 +791,7 @@ struct AdditionalKeywords { } private: - /// \brief The JavaScript keywords beyond the C++ keyword set. + /// The JavaScript keywords beyond the C++ keyword set. std::unordered_set<IdentifierInfo *> JsExtraKeywords; }; diff --git a/lib/Format/FormatTokenLexer.cpp b/lib/Format/FormatTokenLexer.cpp index 199d2974c5c7..c7f720a443d3 100644 --- a/lib/Format/FormatTokenLexer.cpp +++ b/lib/Format/FormatTokenLexer.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// /// /// \file -/// \brief This file implements FormatTokenLexer, which tokenizes a source file +/// This file implements FormatTokenLexer, which tokenizes a source file /// into a FormatToken stream suitable for ClangFormat. /// //===----------------------------------------------------------------------===// @@ -38,7 +38,7 @@ FormatTokenLexer::FormatTokenLexer(const SourceManager &SourceMgr, FileID ID, for (const std::string &ForEachMacro : Style.ForEachMacros) ForEachMacros.push_back(&IdentTable.get(ForEachMacro)); - std::sort(ForEachMacros.begin(), ForEachMacros.end()); + llvm::sort(ForEachMacros.begin(), ForEachMacros.end()); } ArrayRef<FormatToken *> FormatTokenLexer::lex() { @@ -334,7 +334,7 @@ void FormatTokenLexer::handleTemplateStrings() { void FormatTokenLexer::tryParsePythonComment() { FormatToken *HashToken = Tokens.back(); - if (HashToken->isNot(tok::hash)) + if (!HashToken->isOneOf(tok::hash, tok::hashhash)) return; // Turn the remainder of this line into a comment. const char *CommentBegin = @@ -691,7 +691,9 @@ void FormatTokenLexer::readRawToken(FormatToken &Tok) { } } - if (Style.Language == FormatStyle::LK_JavaScript && + if ((Style.Language == FormatStyle::LK_JavaScript || + Style.Language == FormatStyle::LK_Proto || + Style.Language == FormatStyle::LK_TextProto) && Tok.is(tok::char_constant)) { Tok.Tok.setKind(tok::string_literal); } diff --git a/lib/Format/FormatTokenLexer.h b/lib/Format/FormatTokenLexer.h index 59dc2a752f1f..3b79d27480e3 100644 --- a/lib/Format/FormatTokenLexer.h +++ b/lib/Format/FormatTokenLexer.h @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// /// /// \file -/// \brief This file contains FormatTokenLexer, which tokenizes a source file +/// This file contains FormatTokenLexer, which tokenizes a source file /// into a token stream suitable for ClangFormat. /// //===----------------------------------------------------------------------===// diff --git a/lib/Format/NamespaceEndCommentsFixer.cpp b/lib/Format/NamespaceEndCommentsFixer.cpp index df99bb2e1381..995b3219a1f4 100644 --- a/lib/Format/NamespaceEndCommentsFixer.cpp +++ b/lib/Format/NamespaceEndCommentsFixer.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// /// /// \file -/// \brief This file implements NamespaceEndCommentsFixer, a TokenAnalyzer that +/// This file implements NamespaceEndCommentsFixer, a TokenAnalyzer that /// fixes namespace end comments. /// //===----------------------------------------------------------------------===// @@ -27,13 +27,6 @@ namespace { // Short namespaces don't need an end comment. static const int kShortNamespaceMaxLines = 1; -// Matches a valid namespace end comment. -// Valid namespace end comments don't need to be edited. -static llvm::Regex kNamespaceCommentPattern = - llvm::Regex("^/[/*] *(end (of )?)? *(anonymous|unnamed)? *" - "namespace( +([a-zA-Z0-9:_]+))?\\.? *(\\*/)?$", - llvm::Regex::IgnoreCase); - // Computes the name of a namespace given the namespace token. // Returns "" for anonymous namespace. std::string computeName(const FormatToken *NamespaceTok) { @@ -67,8 +60,15 @@ bool hasEndComment(const FormatToken *RBraceTok) { bool validEndComment(const FormatToken *RBraceTok, StringRef NamespaceName) { assert(hasEndComment(RBraceTok)); const FormatToken *Comment = RBraceTok->Next; + + // Matches a valid namespace end comment. + // Valid namespace end comments don't need to be edited. + static llvm::Regex *const NamespaceCommentPattern = + new llvm::Regex("^/[/*] *(end (of )?)? *(anonymous|unnamed)? *" + "namespace( +([a-zA-Z0-9:_]+))?\\.? *(\\*/)?$", + llvm::Regex::IgnoreCase); SmallVector<StringRef, 7> Groups; - if (kNamespaceCommentPattern.match(Comment->TokenText, &Groups)) { + if (NamespaceCommentPattern->match(Comment->TokenText, &Groups)) { StringRef NamespaceNameInComment = Groups.size() > 5 ? Groups[5] : ""; // Anonymous namespace comments must not mention a namespace name. if (NamespaceName.empty() && !NamespaceNameInComment.empty()) @@ -107,13 +107,14 @@ void updateEndComment(const FormatToken *RBraceTok, StringRef EndCommentText, << llvm::toString(std::move(Err)) << "\n"; } } +} // namespace const FormatToken * -getNamespaceToken(const AnnotatedLine *line, +getNamespaceToken(const AnnotatedLine *Line, const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) { - if (!line->Affected || line->InPPDirective || !line->startsWith(tok::r_brace)) + if (!Line->Affected || Line->InPPDirective || !Line->startsWith(tok::r_brace)) return nullptr; - size_t StartLineIndex = line->MatchingOpeningBlockLineIndex; + size_t StartLineIndex = Line->MatchingOpeningBlockLineIndex; if (StartLineIndex == UnwrappedLine::kInvalidIndex) return nullptr; assert(StartLineIndex < AnnotatedLines.size()); @@ -131,7 +132,6 @@ getNamespaceToken(const AnnotatedLine *line, return nullptr; return NamespaceTok; } -} // namespace NamespaceEndCommentsFixer::NamespaceEndCommentsFixer(const Environment &Env, const FormatStyle &Style) @@ -141,8 +141,7 @@ std::pair<tooling::Replacements, unsigned> NamespaceEndCommentsFixer::analyze( TokenAnnotator &Annotator, SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, FormatTokenLexer &Tokens) { const SourceManager &SourceMgr = Env.getSourceManager(); - AffectedRangeMgr.computeAffectedLines(AnnotatedLines.begin(), - AnnotatedLines.end()); + AffectedRangeMgr.computeAffectedLines(AnnotatedLines); tooling::Replacements Fixes; std::string AllNamespaceNames = ""; size_t StartLineIndex = SIZE_MAX; diff --git a/lib/Format/NamespaceEndCommentsFixer.h b/lib/Format/NamespaceEndCommentsFixer.h index 4779f0d27c92..07a1c7bb0c35 100644 --- a/lib/Format/NamespaceEndCommentsFixer.h +++ b/lib/Format/NamespaceEndCommentsFixer.h @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// /// /// \file -/// \brief This file declares NamespaceEndCommentsFixer, a TokenAnalyzer that +/// This file declares NamespaceEndCommentsFixer, a TokenAnalyzer that /// fixes namespace end comments. /// //===----------------------------------------------------------------------===// @@ -21,6 +21,16 @@ namespace clang { namespace format { +// Finds the namespace token corresponding to a closing namespace `}`, if that +// is to be formatted. +// If \p Line contains the closing `}` of a namespace, is affected and is not in +// a preprocessor directive, the result will be the matching namespace token. +// Otherwise returns null. +// \p AnnotatedLines is the sequence of lines from which \p Line is a member of. +const FormatToken * +getNamespaceToken(const AnnotatedLine *Line, + const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines); + class NamespaceEndCommentsFixer : public TokenAnalyzer { public: NamespaceEndCommentsFixer(const Environment &Env, const FormatStyle &Style); diff --git a/lib/Format/SortJavaScriptImports.cpp b/lib/Format/SortJavaScriptImports.cpp index d0b979e100d5..2ec577382ffb 100644 --- a/lib/Format/SortJavaScriptImports.cpp +++ b/lib/Format/SortJavaScriptImports.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// /// /// \file -/// \brief This file implements a sort operation for JavaScript ES6 imports. +/// This file implements a sort operation for JavaScript ES6 imports. /// //===----------------------------------------------------------------------===// @@ -128,8 +128,7 @@ public: SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, FormatTokenLexer &Tokens) override { tooling::Replacements Result; - AffectedRangeMgr.computeAffectedLines(AnnotatedLines.begin(), - AnnotatedLines.end()); + AffectedRangeMgr.computeAffectedLines(AnnotatedLines); const AdditionalKeywords &Keywords = Tokens.getKeywords(); SmallVector<JsModuleReference, 16> References; @@ -189,9 +188,9 @@ public: if (FirstNonImportLine && FirstNonImportLine->First->NewlinesBefore < 2) ReferencesText += "\n"; - DEBUG(llvm::dbgs() << "Replacing imports:\n" - << getSourceText(InsertionPoint) << "\nwith:\n" - << ReferencesText << "\n"); + LLVM_DEBUG(llvm::dbgs() << "Replacing imports:\n" + << getSourceText(InsertionPoint) << "\nwith:\n" + << ReferencesText << "\n"); auto Err = Result.add(tooling::Replacement( Env.getSourceManager(), CharSourceRange::getCharRange(InsertionPoint), ReferencesText)); @@ -308,7 +307,7 @@ private: FirstNonImportLine = nullptr; AnyImportAffected = AnyImportAffected || Line->Affected; Reference.Range.setEnd(LineEnd->Tok.getEndLoc()); - DEBUG({ + LLVM_DEBUG({ llvm::dbgs() << "JsModuleReference: {" << "is_export: " << Reference.IsExport << ", cat: " << Reference.Category @@ -446,10 +445,9 @@ tooling::Replacements sortJavaScriptImports(const FormatStyle &Style, ArrayRef<tooling::Range> Ranges, StringRef FileName) { // FIXME: Cursor support. - std::unique_ptr<Environment> Env = - Environment::CreateVirtualEnvironment(Code, FileName, Ranges); - JavaScriptImportSorter Sorter(*Env, Style); - return Sorter.process().first; + return JavaScriptImportSorter(Environment(Code, FileName, Ranges), Style) + .process() + .first; } } // end namespace format diff --git a/lib/Format/SortJavaScriptImports.h b/lib/Format/SortJavaScriptImports.h index f22a051008f0..ecab0ae54cb3 100644 --- a/lib/Format/SortJavaScriptImports.h +++ b/lib/Format/SortJavaScriptImports.h @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// /// /// \file -/// \brief This file implements a sorter for JavaScript ES6 imports. +/// This file implements a sorter for JavaScript ES6 imports. /// //===----------------------------------------------------------------------===// diff --git a/lib/Format/TokenAnalyzer.cpp b/lib/Format/TokenAnalyzer.cpp index d1dfb1fea32b..99fc61ef1c32 100644 --- a/lib/Format/TokenAnalyzer.cpp +++ b/lib/Format/TokenAnalyzer.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// /// /// \file -/// \brief This file implements an abstract TokenAnalyzer and associated helper +/// This file implements an abstract TokenAnalyzer and associated helper /// classes. TokenAnalyzer can be extended to generate replacements based on /// an annotated and pre-processed token stream. /// @@ -34,48 +34,19 @@ namespace clang { namespace format { -// This sets up an virtual file system with file \p FileName containing \p -// Code. -std::unique_ptr<Environment> -Environment::CreateVirtualEnvironment(StringRef Code, StringRef FileName, - ArrayRef<tooling::Range> Ranges, - unsigned FirstStartColumn, - unsigned NextStartColumn, - unsigned LastStartColumn) { - // This is referenced by `FileMgr` and will be released by `FileMgr` when it - // is deleted. - IntrusiveRefCntPtr<vfs::InMemoryFileSystem> InMemoryFileSystem( - new vfs::InMemoryFileSystem); - // This is passed to `SM` as reference, so the pointer has to be referenced - // in `Environment` so that `FileMgr` can out-live this function scope. - std::unique_ptr<FileManager> FileMgr( - new FileManager(FileSystemOptions(), InMemoryFileSystem)); - // This is passed to `SM` as reference, so the pointer has to be referenced - // by `Environment` due to the same reason above. - std::unique_ptr<DiagnosticsEngine> Diagnostics(new DiagnosticsEngine( - IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs), - new DiagnosticOptions)); - // This will be stored as reference, so the pointer has to be stored in - // due to the same reason above. - std::unique_ptr<SourceManager> VirtualSM( - new SourceManager(*Diagnostics, *FileMgr)); - InMemoryFileSystem->addFile( - FileName, 0, - llvm::MemoryBuffer::getMemBuffer(Code, FileName, - /*RequiresNullTerminator=*/false)); - FileID ID = VirtualSM->createFileID(FileMgr->getFile(FileName), - SourceLocation(), clang::SrcMgr::C_User); - assert(ID.isValid()); - SourceLocation StartOfFile = VirtualSM->getLocForStartOfFile(ID); - std::vector<CharSourceRange> CharRanges; +Environment::Environment(StringRef Code, StringRef FileName, + ArrayRef<tooling::Range> Ranges, + unsigned FirstStartColumn, unsigned NextStartColumn, + unsigned LastStartColumn) + : VirtualSM(new SourceManagerForFile(FileName, Code)), SM(VirtualSM->get()), + ID(VirtualSM->get().getMainFileID()), FirstStartColumn(FirstStartColumn), + NextStartColumn(NextStartColumn), LastStartColumn(LastStartColumn) { + SourceLocation StartOfFile = SM.getLocForStartOfFile(ID); for (const tooling::Range &Range : Ranges) { SourceLocation Start = StartOfFile.getLocWithOffset(Range.getOffset()); SourceLocation End = Start.getLocWithOffset(Range.getLength()); CharRanges.push_back(CharSourceRange::getCharRange(Start, End)); } - return llvm::make_unique<Environment>( - ID, std::move(FileMgr), std::move(VirtualSM), std::move(Diagnostics), - CharRanges, FirstStartColumn, NextStartColumn, LastStartColumn); } TokenAnalyzer::TokenAnalyzer(const Environment &Env, const FormatStyle &Style) @@ -84,12 +55,12 @@ TokenAnalyzer::TokenAnalyzer(const Environment &Env, const FormatStyle &Style) UnwrappedLines(1), Encoding(encoding::detectEncoding( Env.getSourceManager().getBufferData(Env.getFileID()))) { - DEBUG( + LLVM_DEBUG( llvm::dbgs() << "File encoding: " << (Encoding == encoding::Encoding_UTF8 ? "UTF8" : "unknown") << "\n"); - DEBUG(llvm::dbgs() << "Language: " << getLanguageName(Style.Language) - << "\n"); + LLVM_DEBUG(llvm::dbgs() << "Language: " << getLanguageName(Style.Language) + << "\n"); } std::pair<tooling::Replacements, unsigned> TokenAnalyzer::process() { @@ -103,7 +74,7 @@ std::pair<tooling::Replacements, unsigned> TokenAnalyzer::process() { assert(UnwrappedLines.rbegin()->empty()); unsigned Penalty = 0; for (unsigned Run = 0, RunE = UnwrappedLines.size(); Run + 1 != RunE; ++Run) { - DEBUG(llvm::dbgs() << "Run " << Run << "...\n"); + LLVM_DEBUG(llvm::dbgs() << "Run " << Run << "...\n"); SmallVector<AnnotatedLine *, 16> AnnotatedLines; TokenAnnotator Annotator(Style, Tokens.getKeywords()); @@ -115,7 +86,7 @@ std::pair<tooling::Replacements, unsigned> TokenAnalyzer::process() { std::pair<tooling::Replacements, unsigned> RunResult = analyze(Annotator, AnnotatedLines, Tokens); - DEBUG({ + LLVM_DEBUG({ llvm::dbgs() << "Replacements for run " << Run << ":\n"; for (tooling::Replacements::const_iterator I = RunResult.first.begin(), E = RunResult.first.end(); diff --git a/lib/Format/TokenAnalyzer.h b/lib/Format/TokenAnalyzer.h index 96ea00b25ba1..e43a860e46cf 100644 --- a/lib/Format/TokenAnalyzer.h +++ b/lib/Format/TokenAnalyzer.h @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// /// /// \file -/// \brief This file declares an abstract TokenAnalyzer, and associated helper +/// This file declares an abstract TokenAnalyzer, and associated helper /// classes. TokenAnalyzer can be extended to generate replacements based on /// an annotated and pre-processed token stream. /// @@ -37,44 +37,24 @@ namespace format { class Environment { public: Environment(SourceManager &SM, FileID ID, ArrayRef<CharSourceRange> Ranges) - : ID(ID), CharRanges(Ranges.begin(), Ranges.end()), SM(SM), - FirstStartColumn(0), - NextStartColumn(0), - LastStartColumn(0) {} - - Environment(FileID ID, std::unique_ptr<FileManager> FileMgr, - std::unique_ptr<SourceManager> VirtualSM, - std::unique_ptr<DiagnosticsEngine> Diagnostics, - const std::vector<CharSourceRange> &CharRanges, - unsigned FirstStartColumn, - unsigned NextStartColumn, - unsigned LastStartColumn) - : ID(ID), CharRanges(CharRanges.begin(), CharRanges.end()), - SM(*VirtualSM), - FirstStartColumn(FirstStartColumn), - NextStartColumn(NextStartColumn), - LastStartColumn(LastStartColumn), - FileMgr(std::move(FileMgr)), - VirtualSM(std::move(VirtualSM)), Diagnostics(std::move(Diagnostics)) {} + : SM(SM), ID(ID), CharRanges(Ranges.begin(), Ranges.end()), + FirstStartColumn(0), NextStartColumn(0), LastStartColumn(0) {} // This sets up an virtual file system with file \p FileName containing the // fragment \p Code. Assumes that \p Code starts at \p FirstStartColumn, // that the next lines of \p Code should start at \p NextStartColumn, and // that \p Code should end at \p LastStartColumn if it ends in newline. // See also the documentation of clang::format::internal::reformat. - static std::unique_ptr<Environment> - CreateVirtualEnvironment(StringRef Code, StringRef FileName, - ArrayRef<tooling::Range> Ranges, - unsigned FirstStartColumn = 0, - unsigned NextStartColumn = 0, - unsigned LastStartColumn = 0); + Environment(StringRef Code, StringRef FileName, + ArrayRef<tooling::Range> Ranges, unsigned FirstStartColumn = 0, + unsigned NextStartColumn = 0, unsigned LastStartColumn = 0); FileID getFileID() const { return ID; } - ArrayRef<CharSourceRange> getCharRanges() const { return CharRanges; } - const SourceManager &getSourceManager() const { return SM; } + ArrayRef<CharSourceRange> getCharRanges() const { return CharRanges; } + // Returns the column at which the fragment of code managed by this // environment starts. unsigned getFirstStartColumn() const { return FirstStartColumn; } @@ -88,19 +68,18 @@ public: unsigned getLastStartColumn() const { return LastStartColumn; } private: + // This is only set if constructed from string. + std::unique_ptr<SourceManagerForFile> VirtualSM; + + // This refers to either a SourceManager provided by users or VirtualSM + // created for a single file. + SourceManager &SM; FileID ID; + SmallVector<CharSourceRange, 8> CharRanges; - SourceManager &SM; unsigned FirstStartColumn; unsigned NextStartColumn; unsigned LastStartColumn; - - // The order of these fields are important - they should be in the same order - // as they are created in `CreateVirtualEnvironment` so that they can be - // deleted in the reverse order as they are created. - std::unique_ptr<FileManager> FileMgr; - std::unique_ptr<SourceManager> VirtualSM; - std::unique_ptr<DiagnosticsEngine> Diagnostics; }; class TokenAnalyzer : public UnwrappedLineConsumer { diff --git a/lib/Format/TokenAnnotator.cpp b/lib/Format/TokenAnnotator.cpp index 298c72b002f8..3a19215e1803 100644 --- a/lib/Format/TokenAnnotator.cpp +++ b/lib/Format/TokenAnnotator.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// /// /// \file -/// \brief This file implements a token annotator, i.e. creates +/// This file implements a token annotator, i.e. creates /// \c AnnotatedTokens out of \c FormatTokens with required extra information. /// //===----------------------------------------------------------------------===// @@ -25,7 +25,22 @@ namespace format { namespace { -/// \brief A parser that gathers additional information about tokens. +/// Returns \c true if the token can be used as an identifier in +/// an Objective-C \c @selector, \c false otherwise. +/// +/// Because getFormattingLangOpts() always lexes source code as +/// Objective-C++, C++ keywords like \c new and \c delete are +/// lexed as tok::kw_*, not tok::identifier, even for Objective-C. +/// +/// For Objective-C and Objective-C++, both identifiers and keywords +/// are valid inside @selector(...) (or a macro which +/// invokes @selector(...)). So, we allow treat any identifier or +/// keyword as a potential Objective-C selector component. +static bool canBeObjCSelectorComponent(const FormatToken &Tok) { + return Tok.Tok.getIdentifierInfo() != nullptr; +} + +/// A parser that gathers additional information about tokens. /// /// The \c TokenAnnotator tries to match parenthesis and square brakets and /// store a parenthesis levels. It also tries to resolve matching "<" and ">" @@ -79,7 +94,17 @@ private: if (CurrentToken->is(tok::greater)) { Left->MatchingParen = CurrentToken; CurrentToken->MatchingParen = Left; - CurrentToken->Type = TT_TemplateCloser; + // In TT_Proto, we must distignuish between: + // map<key, value> + // msg < item: data > + // msg: < item: data > + // In TT_TextProto, map<key, value> does not occur. + if (Style.Language == FormatStyle::LK_TextProto || + (Style.Language == FormatStyle::LK_Proto && Left->Previous && + Left->Previous->isOneOf(TT_SelectorName, TT_DictLiteral))) + CurrentToken->Type = TT_DictLiteral; + else + CurrentToken->Type = TT_TemplateCloser; next(); return true; } @@ -131,10 +156,7 @@ private: Contexts.size() == 2 && Contexts[0].ColonIsForRangeExpr; bool StartsObjCMethodExpr = false; - if (CurrentToken->is(tok::caret)) { - // (^ can start a block type. - Left->Type = TT_ObjCBlockLParen; - } else if (FormatToken *MaybeSel = Left->Previous) { + if (FormatToken *MaybeSel = Left->Previous) { // @selector( starts a selector. if (MaybeSel->isObjCAtKeyword(tok::objc_selector) && MaybeSel->Previous && MaybeSel->Previous->is(tok::at)) { @@ -200,12 +222,21 @@ private: Left->Type = TT_ObjCMethodExpr; } + // MightBeFunctionType and ProbablyFunctionType are used for + // function pointer and reference types as well as Objective-C + // block types: + // + // void (*FunctionPointer)(void); + // void (&FunctionReference)(void); + // void (^ObjCBlock)(void); bool MightBeFunctionType = !Contexts[Contexts.size() - 2].IsExpression; - bool ProbablyFunctionType = CurrentToken->isOneOf(tok::star, tok::amp); + bool ProbablyFunctionType = + CurrentToken->isOneOf(tok::star, tok::amp, tok::caret); bool HasMultipleLines = false; bool HasMultipleParametersOnALine = false; bool MightBeObjCForRangeLoop = Left->Previous && Left->Previous->is(tok::kw_for); + FormatToken *PossibleObjCForInToken = nullptr; while (CurrentToken) { // LookForDecls is set when "if (" has been seen. Check for // 'identifier' '*' 'identifier' followed by not '=' -- this @@ -237,7 +268,8 @@ private: if (MightBeFunctionType && ProbablyFunctionType && CurrentToken->Next && (CurrentToken->Next->is(tok::l_paren) || (CurrentToken->Next->is(tok::l_square) && Line.MustBeDeclaration))) - Left->Type = TT_FunctionTypeLParen; + Left->Type = Left->Next->is(tok::caret) ? TT_ObjCBlockLParen + : TT_FunctionTypeLParen; Left->MatchingParen = CurrentToken; CurrentToken->MatchingParen = Left; @@ -291,10 +323,17 @@ private: CurrentToken->Previous->isSimpleTypeSpecifier()) && !CurrentToken->is(tok::l_brace)) Contexts.back().IsExpression = false; - if (CurrentToken->isOneOf(tok::semi, tok::colon)) + if (CurrentToken->isOneOf(tok::semi, tok::colon)) { MightBeObjCForRangeLoop = false; - if (MightBeObjCForRangeLoop && CurrentToken->is(Keywords.kw_in)) - CurrentToken->Type = TT_ObjCForIn; + if (PossibleObjCForInToken) { + PossibleObjCForInToken->Type = TT_Unknown; + PossibleObjCForInToken = nullptr; + } + } + if (MightBeObjCForRangeLoop && CurrentToken->is(Keywords.kw_in)) { + PossibleObjCForInToken = CurrentToken; + PossibleObjCForInToken->Type = TT_ObjCForIn; + } // When we discover a 'new', we set CanBeExpression to 'false' in order to // parse the type correctly. Reset that after a comma. if (CurrentToken->is(tok::comma)) @@ -310,13 +349,40 @@ private: return false; } + bool isCpp11AttributeSpecifier(const FormatToken &Tok) { + if (!Style.isCpp() || !Tok.startsSequence(tok::l_square, tok::l_square)) + return false; + const FormatToken *AttrTok = Tok.Next->Next; + if (!AttrTok) + return false; + // C++17 '[[using ns: foo, bar(baz, blech)]]' + // We assume nobody will name an ObjC variable 'using'. + if (AttrTok->startsSequence(tok::kw_using, tok::identifier, tok::colon)) + return true; + if (AttrTok->isNot(tok::identifier)) + return false; + while (AttrTok && !AttrTok->startsSequence(tok::r_square, tok::r_square)) { + // ObjC message send. We assume nobody will use : in a C++11 attribute + // specifier parameter, although this is technically valid: + // [[foo(:)]] + if (AttrTok->is(tok::colon) || + AttrTok->startsSequence(tok::identifier, tok::identifier)) + return false; + if (AttrTok->is(tok::ellipsis)) + return true; + AttrTok = AttrTok->Next; + } + return AttrTok && AttrTok->startsSequence(tok::r_square, tok::r_square); + } + bool parseSquare() { if (!CurrentToken) return false; // A '[' could be an index subscript (after an identifier or after // ')' or ']'), it could be the start of an Objective-C method - // expression, or it could the start of an Objective-C array literal. + // expression, it could the start of an Objective-C array literal, + // or it could be a C++ attribute specifier [[foo::bar]]. FormatToken *Left = CurrentToken->Previous; Left->ParentBracket = Contexts.back().ContextKind; FormatToken *Parent = Left->getPreviousNonComment(); @@ -329,14 +395,18 @@ private: (Contexts.back().CanBeExpression || Contexts.back().IsExpression || Contexts.back().InTemplateArgument); + bool IsCpp11AttributeSpecifier = isCpp11AttributeSpecifier(*Left) || + Contexts.back().InCpp11AttributeSpecifier; + bool StartsObjCMethodExpr = - !CppArrayTemplates && Style.isCpp() && + !CppArrayTemplates && Style.isCpp() && !IsCpp11AttributeSpecifier && Contexts.back().CanBeExpression && Left->isNot(TT_LambdaLSquare) && - CurrentToken->isNot(tok::l_brace) && + !CurrentToken->isOneOf(tok::l_brace, tok::r_square) && (!Parent || Parent->isOneOf(tok::colon, tok::l_square, tok::l_paren, tok::kw_return, tok::kw_throw) || Parent->isUnaryOperator() || + // FIXME(bug 36976): ObjC return types shouldn't use TT_CastRParen. Parent->isOneOf(TT_ObjCForIn, TT_CastRParen) || getBinOpPrecedence(Parent->Tok.getKind(), true, true) > prec::Unknown); bool ColonFound = false; @@ -347,6 +417,8 @@ private: } else if (Left->is(TT_Unknown)) { if (StartsObjCMethodExpr) { Left->Type = TT_ObjCMethodExpr; + } else if (IsCpp11AttributeSpecifier) { + Left->Type = TT_AttributeSquare; } else if (Style.Language == FormatStyle::LK_JavaScript && Parent && Contexts.back().ContextKind == tok::l_brace && Parent->isOneOf(tok::l_brace, tok::comma)) { @@ -358,12 +430,48 @@ private: Parent->is(TT_TemplateCloser)) { Left->Type = TT_ArraySubscriptLSquare; } else if (Style.Language == FormatStyle::LK_Proto || - (!CppArrayTemplates && Parent && - Parent->isOneOf(TT_BinaryOperator, TT_TemplateCloser, tok::at, - tok::comma, tok::l_paren, tok::l_square, - tok::question, tok::colon, tok::kw_return, - // Should only be relevant to JavaScript: - tok::kw_default))) { + Style.Language == FormatStyle::LK_TextProto) { + // Square braces in LK_Proto can either be message field attributes: + // + // optional Aaa aaa = 1 [ + // (aaa) = aaa + // ]; + // + // extensions 123 [ + // (aaa) = aaa + // ]; + // + // or text proto extensions (in options): + // + // option (Aaa.options) = { + // [type.type/type] { + // key: value + // } + // } + // + // or repeated fields (in options): + // + // option (Aaa.options) = { + // keys: [ 1, 2, 3 ] + // } + // + // In the first and the third case we want to spread the contents inside + // the square braces; in the second we want to keep them inline. + Left->Type = TT_ArrayInitializerLSquare; + if (!Left->endsSequence(tok::l_square, tok::numeric_constant, + tok::equal) && + !Left->endsSequence(tok::l_square, tok::numeric_constant, + tok::identifier) && + !Left->endsSequence(tok::l_square, tok::colon, TT_SelectorName)) { + Left->Type = TT_ProtoExtensionLSquare; + BindingIncrease = 10; + } + } else if (!CppArrayTemplates && Parent && + Parent->isOneOf(TT_BinaryOperator, TT_TemplateCloser, tok::at, + tok::comma, tok::l_paren, tok::l_square, + tok::question, tok::colon, tok::kw_return, + // Should only be relevant to JavaScript: + tok::kw_default)) { Left->Type = TT_ArrayInitializerLSquare; } else { BindingIncrease = 10; @@ -378,11 +486,14 @@ private: Contexts.back().IsExpression = false; Contexts.back().ColonIsObjCMethodExpr = StartsObjCMethodExpr; + Contexts.back().InCpp11AttributeSpecifier = IsCpp11AttributeSpecifier; while (CurrentToken) { if (CurrentToken->is(tok::r_square)) { - if (CurrentToken->Next && CurrentToken->Next->is(tok::l_paren) && - Left->is(TT_ObjCMethodExpr)) { + if (IsCpp11AttributeSpecifier) + CurrentToken->Type = TT_AttributeSquare; + else if (CurrentToken->Next && CurrentToken->Next->is(tok::l_paren) && + Left->is(TT_ObjCMethodExpr)) { // An ObjC method call is rarely followed by an open parenthesis. // FIXME: Do we incorrectly label ":" with this? StartsObjCMethodExpr = false; @@ -390,6 +501,12 @@ private: } if (StartsObjCMethodExpr && CurrentToken->Previous != Left) { CurrentToken->Type = TT_ObjCMethodExpr; + // If we haven't seen a colon yet, make sure the last identifier + // before the r_square is tagged as a selector name component. + if (!ColonFound && CurrentToken->Previous && + CurrentToken->Previous->is(TT_Unknown) && + canBeObjCSelectorComponent(*CurrentToken->Previous)) + CurrentToken->Previous->Type = TT_SelectorName; // determineStarAmpUsage() thinks that '*' '[' is allocating an // array of pointers, but if '[' starts a selector then '*' is a // binary operator. @@ -398,6 +515,20 @@ private: } Left->MatchingParen = CurrentToken; CurrentToken->MatchingParen = Left; + // FirstObjCSelectorName is set when a colon is found. This does + // not work, however, when the method has no parameters. + // Here, we set FirstObjCSelectorName when the end of the method call is + // reached, in case it was not set already. + if (!Contexts.back().FirstObjCSelectorName) { + FormatToken* Previous = CurrentToken->getPreviousNonComment(); + if (Previous && Previous->is(TT_SelectorName)) { + Previous->ObjCSelectorNameParts = 1; + Contexts.back().FirstObjCSelectorName = Previous; + } + } else { + Left->ParameterCount = + Contexts.back().FirstObjCSelectorName->ObjCSelectorNameParts; + } if (Contexts.back().FirstObjCSelectorName) { Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = Contexts.back().LongestObjCSelectorName; @@ -410,12 +541,19 @@ private: if (CurrentToken->isOneOf(tok::r_paren, tok::r_brace)) return false; if (CurrentToken->is(tok::colon)) { - if (Left->isOneOf(TT_ArraySubscriptLSquare, - TT_DesignatedInitializerLSquare)) { + if (IsCpp11AttributeSpecifier && + CurrentToken->endsSequence(tok::colon, tok::identifier, + tok::kw_using)) { + // Remember that this is a [[using ns: foo]] C++ attribute, so we + // don't add a space before the colon (unlike other colons). + CurrentToken->Type = TT_AttributeColon; + } else if (Left->isOneOf(TT_ArraySubscriptLSquare, + TT_DesignatedInitializerLSquare)) { Left->Type = TT_ObjCMethodExpr; StartsObjCMethodExpr = true; Contexts.back().ColonIsObjCMethodExpr = true; if (Parent && Parent->is(tok::r_paren)) + // FIXME(bug 36976): ObjC return types shouldn't use TT_CastRParen. Parent->Type = TT_CastRParen; } ColonFound = true; @@ -462,13 +600,15 @@ private: FormatToken *Previous = CurrentToken->getPreviousNonComment(); if (Previous->is(TT_JsTypeOptionalQuestion)) Previous = Previous->getPreviousNonComment(); - if (((CurrentToken->is(tok::colon) && - (!Contexts.back().ColonIsDictLiteral || !Style.isCpp())) || - Style.Language == FormatStyle::LK_Proto || - Style.Language == FormatStyle::LK_TextProto) && - (Previous->Tok.getIdentifierInfo() || - Previous->is(tok::string_literal))) - Previous->Type = TT_SelectorName; + if ((CurrentToken->is(tok::colon) && + (!Contexts.back().ColonIsDictLiteral || !Style.isCpp())) || + Style.Language == FormatStyle::LK_Proto || + Style.Language == FormatStyle::LK_TextProto) { + Left->Type = TT_DictLiteral; + if (Previous->Tok.getIdentifierInfo() || + Previous->is(tok::string_literal)) + Previous->Type = TT_SelectorName; + } if (CurrentToken->is(tok::colon) || Style.Language == FormatStyle::LK_JavaScript) Left->Type = TT_DictLiteral; @@ -484,6 +624,9 @@ private: } void updateParameterCount(FormatToken *Left, FormatToken *Current) { + // For ObjC methods, the number of parameters is calculated differently as + // method declarations have a different structure (the parameters are not + // inside a bracket scope). if (Current->is(tok::l_brace) && Current->BlockKind == BK_Block) ++Left->BlockParameterCount; if (Current->is(tok::comma)) { @@ -562,19 +705,29 @@ private: Line.startsWith(TT_ObjCMethodSpecifier)) { Tok->Type = TT_ObjCMethodExpr; const FormatToken *BeforePrevious = Tok->Previous->Previous; + // Ensure we tag all identifiers in method declarations as + // TT_SelectorName. + bool UnknownIdentifierInMethodDeclaration = + Line.startsWith(TT_ObjCMethodSpecifier) && + Tok->Previous->is(tok::identifier) && Tok->Previous->is(TT_Unknown); if (!BeforePrevious || + // FIXME(bug 36976): ObjC return types shouldn't use TT_CastRParen. !(BeforePrevious->is(TT_CastRParen) || (BeforePrevious->is(TT_ObjCMethodExpr) && BeforePrevious->is(tok::colon))) || BeforePrevious->is(tok::r_square) || - Contexts.back().LongestObjCSelectorName == 0) { + Contexts.back().LongestObjCSelectorName == 0 || + UnknownIdentifierInMethodDeclaration) { Tok->Previous->Type = TT_SelectorName; - if (Tok->Previous->ColumnWidth > - Contexts.back().LongestObjCSelectorName) - Contexts.back().LongestObjCSelectorName = - Tok->Previous->ColumnWidth; if (!Contexts.back().FirstObjCSelectorName) Contexts.back().FirstObjCSelectorName = Tok->Previous; + else if (Tok->Previous->ColumnWidth > + Contexts.back().LongestObjCSelectorName) + Contexts.back().LongestObjCSelectorName = + Tok->Previous->ColumnWidth; + Tok->Previous->ParameterIndex = + Contexts.back().FirstObjCSelectorName->ObjCSelectorNameParts; + ++Contexts.back().FirstObjCSelectorName->ObjCSelectorNameParts; } } else if (Contexts.back().ColonIsForRangeExpr) { Tok->Type = TT_RangeBasedForLoopColon; @@ -587,8 +740,10 @@ private: Tok->Type = TT_CtorInitializerColon; else Tok->Type = TT_InheritanceColon; - } else if (Tok->Previous->is(tok::identifier) && Tok->Next && - Tok->Next->isOneOf(tok::r_paren, tok::comma)) { + } else if (canBeObjCSelectorComponent(*Tok->Previous) && Tok->Next && + (Tok->Next->isOneOf(tok::r_paren, tok::comma) || + (canBeObjCSelectorComponent(*Tok->Next) && Tok->Next->Next && + Tok->Next->Next->is(tok::colon)))) { // This handles a special macro in ObjC code where selectors including // the colon are passed as macro arguments. Tok->Type = TT_ObjCMethodExpr; @@ -668,7 +823,15 @@ private: case tok::less: if (parseAngle()) { Tok->Type = TT_TemplateOpener; - if (Style.Language == FormatStyle::LK_TextProto) { + // In TT_Proto, we must distignuish between: + // map<key, value> + // msg < item: data > + // msg: < item: data > + // In TT_TextProto, map<key, value> does not occur. + if (Style.Language == FormatStyle::LK_TextProto || + (Style.Language == FormatStyle::LK_Proto && Tok->Previous && + Tok->Previous->isOneOf(TT_SelectorName, TT_DictLiteral))) { + Tok->Type = TT_DictLiteral; FormatToken *Previous = Tok->getPreviousNonComment(); if (Previous && Previous->Type != TT_DictLiteral) Previous->Type = TT_SelectorName; @@ -689,9 +852,13 @@ private: return false; break; case tok::greater: - Tok->Type = TT_BinaryOperator; + if (Style.Language != FormatStyle::LK_TextProto) + Tok->Type = TT_BinaryOperator; break; case tok::kw_operator: + if (Style.Language == FormatStyle::LK_TextProto || + Style.Language == FormatStyle::LK_Proto) + break; while (CurrentToken && !CurrentToken->isOneOf(tok::l_paren, tok::semi, tok::r_paren)) { if (CurrentToken->isOneOf(tok::star, tok::amp)) @@ -987,7 +1154,7 @@ private: resetTokenMetadata(CurrentToken); } - /// \brief A struct to hold information valid in a specific context, e.g. + /// A struct to hold information valid in a specific context, e.g. /// a pair of parenthesis. struct Context { Context(tok::TokenKind ContextKind, unsigned BindingStrength, @@ -1010,9 +1177,10 @@ private: bool InInheritanceList = false; bool CaretFound = false; bool IsForEachMacro = false; + bool InCpp11AttributeSpecifier = false; }; - /// \brief Puts a new \c Context onto the stack \c Contexts for the lifetime + /// Puts a new \c Context onto the stack \c Contexts for the lifetime /// of each instance. struct ScopedContextCreator { AnnotatingParser &P; @@ -1155,7 +1323,9 @@ private: Current.Type = TT_ConditionalExpr; } } else if (Current.isBinaryOperator() && - (!Current.Previous || Current.Previous->isNot(tok::l_square))) { + (!Current.Previous || Current.Previous->isNot(tok::l_square)) && + (!Current.is(tok::greater) && + Style.Language != FormatStyle::LK_TextProto)) { Current.Type = TT_BinaryOperator; } else if (Current.is(tok::comment)) { if (Current.TokenText.startswith("/*")) { @@ -1214,6 +1384,17 @@ private: TT_LeadingJavaAnnotation)) { Current.Type = Current.Previous->Type; } + } else if (canBeObjCSelectorComponent(Current) && + // FIXME(bug 36976): ObjC return types shouldn't use TT_CastRParen. + Current.Previous && Current.Previous->is(TT_CastRParen) && + Current.Previous->MatchingParen && + Current.Previous->MatchingParen->Previous && + Current.Previous->MatchingParen->Previous->is( + TT_ObjCMethodSpecifier)) { + // This is the first part of an Objective-C selector name. (If there's no + // colon after this, this is the only place which annotates the identifier + // as a selector.) + Current.Type = TT_SelectorName; } else if (Current.isOneOf(tok::identifier, tok::kw_const) && Current.Previous && !Current.Previous->isOneOf(tok::equal, tok::at) && @@ -1240,7 +1421,7 @@ private: } } - /// \brief Take a guess at whether \p Tok starts a name of a function or + /// Take a guess at whether \p Tok starts a name of a function or /// variable declaration. /// /// This is a heuristic based on whether \p Tok is an identifier following @@ -1285,7 +1466,7 @@ private: PreviousNotConst->isSimpleTypeSpecifier(); } - /// \brief Determine whether ')' is ending a cast. + /// Determine whether ')' is ending a cast. bool rParenEndsCast(const FormatToken &Tok) { // C-style casts are only used in C++ and Java. if (!Style.isCpp() && Style.Language != FormatStyle::LK_Java) @@ -1382,7 +1563,7 @@ private: return true; } - /// \brief Return the type of the given token assuming it is * or &. + /// Return the type of the given token assuming it is * or &. TokenType determineStarAmpUsage(const FormatToken &Tok, bool IsExpression, bool InTemplateArgument) { if (Style.Language == FormatStyle::LK_JavaScript) @@ -1459,10 +1640,8 @@ private: if (!PrevToken) return TT_UnaryOperator; - if (PrevToken->isOneOf(TT_CastRParen, TT_UnaryOperator) && - !PrevToken->is(tok::exclaim)) - // There aren't any trailing unary operators except for TypeScript's - // non-null operator (!). Thus, this must be squence of leading operators. + if (PrevToken->isOneOf(TT_CastRParen, TT_UnaryOperator)) + // This must be a sequence of leading unary operators. return TT_UnaryOperator; // Use heuristics to recognize unary operators. @@ -1479,7 +1658,7 @@ private: return TT_BinaryOperator; } - /// \brief Determine whether ++/-- are pre- or post-increments/-decrements. + /// Determine whether ++/-- are pre- or post-increments/-decrements. TokenType determineIncrementUsage(const FormatToken &Tok) { const FormatToken *PrevToken = Tok.getPreviousNonComment(); if (!PrevToken || PrevToken->is(TT_CastRParen)) @@ -1508,7 +1687,7 @@ private: static const int PrecedenceUnaryOperator = prec::PointerToMember + 1; static const int PrecedenceArrowAndPeriod = prec::PointerToMember + 2; -/// \brief Parses binary expressions by inserting fake parenthesis based on +/// Parses binary expressions by inserting fake parenthesis based on /// operator precedence. class ExpressionParser { public: @@ -1516,7 +1695,7 @@ public: AnnotatedLine &Line) : Style(Style), Keywords(Keywords), Current(Line.First) {} - /// \brief Parse expressions with the given operatore precedence. + /// Parse expressions with the given operator precedence. void parse(int Precedence = 0) { // Skip 'return' and ObjC selector colons as they are not part of a binary // expression. @@ -1603,7 +1782,7 @@ public: } private: - /// \brief Gets the precedence (+1) of the given token for binary operators + /// Gets the precedence (+1) of the given token for binary operators /// and other tokens that we treat like binary operators. int getCurrentPrecedence() { if (Current) { @@ -1662,7 +1841,7 @@ private: } } - /// \brief Parse unary operator expressions and surround them with fake + /// Parse unary operator expressions and surround them with fake /// parentheses if appropriate. void parseUnaryOperator() { llvm::SmallVector<FormatToken *, 2> Tokens; @@ -1723,15 +1902,18 @@ void TokenAnnotator::setCommentLineLevels( } } - if (NextNonCommentLine && CommentLine) { - // If the comment is currently aligned with the line immediately following - // it, that's probably intentional and we should keep it. - bool AlignedWithNextLine = - NextNonCommentLine->First->NewlinesBefore <= 1 && - NextNonCommentLine->First->OriginalColumn == - (*I)->First->OriginalColumn; - if (AlignedWithNextLine) - (*I)->Level = NextNonCommentLine->Level; + // If the comment is currently aligned with the line immediately following + // it, that's probably intentional and we should keep it. + if (NextNonCommentLine && CommentLine && + NextNonCommentLine->First->NewlinesBefore <= 1 && + NextNonCommentLine->First->OriginalColumn == + (*I)->First->OriginalColumn) { + // Align comments for preprocessor lines with the # in column 0. + // Otherwise, align with the next line. + (*I)->Level = (NextNonCommentLine->Type == LT_PreprocessorDirective || + NextNonCommentLine->Type == LT_ImportStatement) + ? 0 + : NextNonCommentLine->Level; } else { NextNonCommentLine = (*I)->First->isNot(tok::r_brace) ? (*I) : nullptr; } @@ -1962,8 +2144,20 @@ void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) { // FIXME: Only calculate this if CanBreakBefore is true once static // initializers etc. are sorted out. // FIXME: Move magic numbers to a better place. - Current->SplitPenalty = 20 * Current->BindingStrength + - splitPenalty(Line, *Current, InFunctionDecl); + + // Reduce penalty for aligning ObjC method arguments using the colon + // alignment as this is the canonical way (still prefer fitting everything + // into one line if possible). Trying to fit a whole expression into one + // line should not force other line breaks (e.g. when ObjC method + // expression is a part of other expression). + Current->SplitPenalty = splitPenalty(Line, *Current, InFunctionDecl); + if (Style.Language == FormatStyle::LK_ObjC && + Current->is(TT_SelectorName) && Current->ParameterIndex > 0) { + if (Current->ParameterIndex == 1) + Current->SplitPenalty += 5 * Current->BindingStrength; + } else { + Current->SplitPenalty += 20 * Current->BindingStrength; + } Current = Current->Next; } @@ -1983,7 +2177,7 @@ void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) { ++IndentLevel; } - DEBUG({ printDebugInfo(Line); }); + LLVM_DEBUG({ printDebugInfo(Line); }); } void TokenAnnotator::calculateUnbreakableTailLengths(AnnotatedLine &Line) { @@ -2043,7 +2237,7 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, return 35; if (!Right.isOneOf(TT_ObjCMethodExpr, TT_LambdaLSquare, TT_ArrayInitializerLSquare, - TT_DesignatedInitializerLSquare)) + TT_DesignatedInitializerLSquare, TT_AttributeSquare)) return 500; } @@ -2128,6 +2322,13 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, if (Left.is(tok::colon) && Left.is(TT_ObjCMethodExpr)) return Line.MightBeFunctionDecl ? 50 : 500; + // In Objective-C type declarations, avoid breaking after the category's + // open paren (we'll prefer breaking after the protocol list's opening + // angle bracket, if present). + if (Line.Type == LT_ObjCDecl && Left.is(tok::l_paren) && Left.Previous && + Left.Previous->isOneOf(tok::identifier, tok::greater)) + return 500; + if (Left.is(tok::l_paren) && InFunctionDecl && Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign) return 100; @@ -2144,6 +2345,8 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, if (Left.opensScope()) { if (Style.AlignAfterOpenBracket == FormatStyle::BAS_DontAlign) return 0; + if (Left.is(tok::l_brace) && !Style.Cpp11BracedListStyle) + return 19; return Left.ParameterCount > 1 ? Style.PenaltyBreakBeforeFirstCallParameter : 19; } @@ -2169,6 +2372,8 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, return 2; return 1; } + if (Left.ClosesTemplateDeclaration) + return Style.PenaltyBreakTemplateDeclaration; if (Left.is(TT_ConditionalExpr)) return prec::Conditional; prec::Level Level = Left.getPrecedence(); @@ -2205,9 +2410,12 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, : Style.SpacesInParentheses; if (Right.isOneOf(tok::semi, tok::comma)) return false; - if (Right.is(tok::less) && Line.Type == LT_ObjCDecl && - Style.ObjCSpaceBeforeProtocolList) - return true; + if (Right.is(tok::less) && Line.Type == LT_ObjCDecl) { + bool IsLightweightGeneric = + Right.MatchingParen && Right.MatchingParen->Next && + Right.MatchingParen->Next->is(tok::colon); + return !IsLightweightGeneric && Style.ObjCSpaceBeforeProtocolList; + } if (Right.is(tok::less) && Left.is(tok::kw_template)) return Style.SpaceAfterTemplateKeyword; if (Left.isOneOf(tok::exclaim, tok::tilde)) @@ -2221,8 +2429,17 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, return !Left.is(TT_ObjCMethodExpr); if (Left.is(tok::coloncolon)) return false; - if (Left.is(tok::less) || Right.isOneOf(tok::greater, tok::less)) + if (Left.is(tok::less) || Right.isOneOf(tok::greater, tok::less)) { + if (Style.Language == FormatStyle::LK_TextProto || + (Style.Language == FormatStyle::LK_Proto && + (Left.is(TT_DictLiteral) || Right.is(TT_DictLiteral)))) { + // Format empty list as `<>`. + if (Left.is(tok::less) && Right.is(tok::greater)) + return false; + return !Style.Cpp11BracedListStyle; + } return false; + } if (Right.is(tok::ellipsis)) return Left.Tok.isLiteral() || (Left.is(tok::identifier) && Left.Previous && Left.Previous->is(tok::kw_case)); @@ -2263,23 +2480,34 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, !Left.Previous->isOneOf(tok::l_paren, tok::coloncolon)); if (Right.is(tok::star) && Left.is(tok::l_paren)) return false; + const auto SpaceRequiredForArrayInitializerLSquare = + [](const FormatToken &LSquareTok, const FormatStyle &Style) { + return Style.SpacesInContainerLiterals || + ((Style.Language == FormatStyle::LK_Proto || + Style.Language == FormatStyle::LK_TextProto) && + !Style.Cpp11BracedListStyle && + LSquareTok.endsSequence(tok::l_square, tok::colon, + TT_SelectorName)); + }; if (Left.is(tok::l_square)) - return (Left.is(TT_ArrayInitializerLSquare) && - Style.SpacesInContainerLiterals && Right.isNot(tok::r_square)) || + return (Left.is(TT_ArrayInitializerLSquare) && Right.isNot(tok::r_square) && + SpaceRequiredForArrayInitializerLSquare(Left, Style)) || (Left.isOneOf(TT_ArraySubscriptLSquare, TT_StructuredBindingLSquare) && Style.SpacesInSquareBrackets && Right.isNot(tok::r_square)); if (Right.is(tok::r_square)) return Right.MatchingParen && - ((Style.SpacesInContainerLiterals && - Right.MatchingParen->is(TT_ArrayInitializerLSquare)) || + ((Right.MatchingParen->is(TT_ArrayInitializerLSquare) && + SpaceRequiredForArrayInitializerLSquare(*Right.MatchingParen, + Style)) || (Style.SpacesInSquareBrackets && Right.MatchingParen->isOneOf(TT_ArraySubscriptLSquare, - TT_StructuredBindingLSquare))); + TT_StructuredBindingLSquare)) || + Right.MatchingParen->is(TT_AttributeParen)); if (Right.is(tok::l_square) && !Right.isOneOf(TT_ObjCMethodExpr, TT_LambdaLSquare, TT_DesignatedInitializerLSquare, - TT_StructuredBindingLSquare) && + TT_StructuredBindingLSquare, TT_AttributeSquare) && !Left.isOneOf(tok::numeric_constant, TT_DictLiteral)) return false; if (Left.is(tok::l_brace) && Right.is(tok::r_brace)) @@ -2291,7 +2519,8 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, if (Left.is(TT_BlockComment)) return !Left.TokenText.endswith("=*/"); if (Right.is(tok::l_paren)) { - if (Left.is(tok::r_paren) && Left.is(TT_AttributeParen)) + if ((Left.is(tok::r_paren) && Left.is(TT_AttributeParen)) || + (Left.is(tok::r_square) && Left.is(TT_AttributeSquare))) return true; return Line.Type == LT_ObjCDecl || Left.is(tok::semi) || (Style.SpaceBeforeParens != FormatStyle::SBPO_Never && @@ -2329,6 +2558,13 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, return false; if (Left.is(TT_TemplateCloser) && Right.is(tok::l_square)) return false; + if (Left.is(tok::l_brace) && Left.endsSequence(TT_DictLiteral, tok::at)) + // Objective-C dictionary literal -> no space after opening brace. + return false; + if (Right.is(tok::r_brace) && Right.MatchingParen && + Right.MatchingParen->endsSequence(TT_DictLiteral, tok::at)) + // Objective-C dictionary literal -> no space before closing brace. + return false; return true; } @@ -2340,6 +2576,9 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, if (Style.isCpp()) { if (Left.is(tok::kw_operator)) return Right.is(tok::coloncolon); + if (Right.is(tok::l_brace) && Right.BlockKind == BK_BracedInit && + !Left.opensScope() && Style.SpaceBeforeCpp11BracedList) + return true; } else if (Style.Language == FormatStyle::LK_Proto || Style.Language == FormatStyle::LK_TextProto) { if (Right.is(tok::period) && @@ -2351,6 +2590,19 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, return true; if (Right.isOneOf(tok::l_brace, tok::less) && Left.is(TT_SelectorName)) return true; + // Slashes occur in text protocol extension syntax: [type/type] { ... }. + if (Left.is(tok::slash) || Right.is(tok::slash)) + return false; + if (Left.MatchingParen && Left.MatchingParen->is(TT_ProtoExtensionLSquare) && + Right.isOneOf(tok::l_brace, tok::less)) + return !Style.Cpp11BracedListStyle; + // A percent is probably part of a formatting specification, such as %lld. + if (Left.is(tok::percent)) + return false; + // Preserve the existence of a space before a percent for cases like 0x%04x + // and "%d %d" + if (Left.is(tok::numeric_constant) && Right.is(tok::percent)) + return Right.WhitespaceRange.getEnd() != Right.WhitespaceRange.getBegin(); } else if (Style.Language == FormatStyle::LK_JavaScript) { if (Left.is(TT_JsFatArrow)) return true; @@ -2402,7 +2654,7 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, // (e.g. as "const x of y" in a for loop), or after a destructuring // operation (const [x, y] of z, const {a, b} of c). (Left.is(Keywords.kw_of) && Left.Previous && - (Left.Previous->Tok.getIdentifierInfo() || + (Left.Previous->Tok.is(tok::identifier) || Left.Previous->isOneOf(tok::r_square, tok::r_brace)))) && (!Left.Previous || !Left.Previous->is(tok::period))) return true; @@ -2455,8 +2707,10 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, if (Line.Type == LT_ObjCMethodDecl) { if (Left.is(TT_ObjCMethodSpecifier)) return true; - if (Left.is(tok::r_paren) && Right.is(tok::identifier)) - // Don't space between ')' and <id> + if (Left.is(tok::r_paren) && canBeObjCSelectorComponent(Right)) + // Don't space between ')' and <id> or ')' and 'new'. 'new' is not a + // keyword in Objective-C, and '+ (instancetype)new;' is a standard class + // method declaration. return false; } if (Line.Type == LT_ObjCProperty && @@ -2472,8 +2726,15 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, return true; if (Right.is(tok::comma)) return false; - if (Right.isOneOf(TT_CtorInitializerColon, TT_ObjCBlockLParen)) + if (Right.is(TT_ObjCBlockLParen)) return true; + if (Right.is(TT_CtorInitializerColon)) + return Style.SpaceBeforeCtorInitializerColon; + if (Right.is(TT_InheritanceColon) && !Style.SpaceBeforeInheritanceColon) + return false; + if (Right.is(TT_RangeBasedForLoopColon) && + !Style.SpaceBeforeRangeBasedForLoopColon) + return false; if (Right.is(tok::colon)) { if (Line.First->isOneOf(tok::kw_case, tok::kw_default) || !Right.getNextNonComment() || Right.getNextNonComment()->is(tok::semi)) @@ -2486,6 +2747,8 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, return false; if (Right.is(TT_DictLiteral)) return Style.SpacesInContainerLiterals; + if (Right.is(TT_AttributeColon)) + return false; return true; } if (Left.is(TT_UnaryOperator)) @@ -2497,9 +2760,13 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, return Style.SpaceAfterCStyleCast || Right.isOneOf(TT_BinaryOperator, TT_SelectorName); - if (Left.is(tok::greater) && Right.is(tok::greater)) + if (Left.is(tok::greater) && Right.is(tok::greater)) { + if (Style.Language == FormatStyle::LK_TextProto || + (Style.Language == FormatStyle::LK_Proto && Left.is(TT_DictLiteral))) + return !Style.Cpp11BracedListStyle; return Right.is(TT_TemplateCloser) && Left.is(TT_TemplateCloser) && (Style.Standard != FormatStyle::LS_Cpp11 || Style.SpacesInAngles); + } if (Right.isOneOf(tok::arrow, tok::arrowstar, tok::periodstar) || Left.isOneOf(tok::arrow, tok::period, tok::arrowstar, tok::periodstar) || (Right.is(tok::period) && Right.isNot(TT_DesignatedInitializerPeriod))) @@ -2517,7 +2784,8 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, Style.Standard == FormatStyle::LS_Cpp03) || !(Left.isOneOf(tok::l_paren, tok::r_paren, tok::l_square, tok::kw___super, TT_TemplateCloser, - TT_TemplateOpener)); + TT_TemplateOpener)) || + (Left.is(tok ::l_paren) && Style.SpacesInParentheses); if ((Left.is(TT_TemplateOpener)) != (Right.is(TT_TemplateCloser))) return Style.SpacesInAngles; // Space before TT_StructuredBindingLSquare. @@ -2597,7 +2865,8 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line, return true; } else if (Style.Language == FormatStyle::LK_Cpp || Style.Language == FormatStyle::LK_ObjC || - Style.Language == FormatStyle::LK_Proto) { + Style.Language == FormatStyle::LK_Proto || + Style.Language == FormatStyle::LK_TextProto) { if (Left.isStringLiteral() && Right.isStringLiteral()) return true; } @@ -2639,7 +2908,7 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line, if (Right.Previous->ClosesTemplateDeclaration && Right.Previous->MatchingParen && Right.Previous->MatchingParen->NestingLevel == 0 && - Style.AlwaysBreakTemplateDeclarations) + Style.AlwaysBreakTemplateDeclarations == FormatStyle::BTDS_Yes) return true; if (Right.is(TT_CtorInitializerComma) && Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma && @@ -2650,13 +2919,14 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line, !Style.ConstructorInitializerAllOnOneLineOrOnePerLine) return true; // Break only if we have multiple inheritance. - if (Style.BreakBeforeInheritanceComma && Right.is(TT_InheritanceComma)) + if (Style.BreakInheritanceList == FormatStyle::BILS_BeforeComma && + Right.is(TT_InheritanceComma)) return true; if (Right.is(tok::string_literal) && Right.TokenText.startswith("R\"")) - // Raw string literals are special wrt. line breaks. The author has made a - // deliberate choice and might have aligned the contents of the string - // literal accordingly. Thus, we try keep existing line breaks. - return Right.NewlinesBefore > 0; + // Multiline raw string literals are special wrt. line breaks. The author + // has made a deliberate choice and might have aligned the contents of the + // string literal accordingly. Thus, we try keep existing line breaks. + return Right.IsMultiline && Right.NewlinesBefore > 0; if ((Right.Previous->is(tok::l_brace) || (Right.Previous->is(tok::less) && Right.Previous->Previous && Right.Previous->Previous->is(tok::equal))) && @@ -2683,6 +2953,94 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line, (Line.Last->is(tok::l_brace) || Style.BreakAfterJavaFieldAnnotations)) return true; + if (Right.is(TT_ProtoExtensionLSquare)) + return true; + + // In text proto instances if a submessage contains at least 2 entries and at + // least one of them is a submessage, like A { ... B { ... } ... }, + // put all of the entries of A on separate lines by forcing the selector of + // the submessage B to be put on a newline. + // + // Example: these can stay on one line: + // a { scalar_1: 1 scalar_2: 2 } + // a { b { key: value } } + // + // and these entries need to be on a new line even if putting them all in one + // line is under the column limit: + // a { + // scalar: 1 + // b { key: value } + // } + // + // We enforce this by breaking before a submessage field that has previous + // siblings, *and* breaking before a field that follows a submessage field. + // + // Be careful to exclude the case [proto.ext] { ... } since the `]` is + // the TT_SelectorName there, but we don't want to break inside the brackets. + // + // Another edge case is @submessage { key: value }, which is a common + // substitution placeholder. In this case we want to keep `@` and `submessage` + // together. + // + // We ensure elsewhere that extensions are always on their own line. + if ((Style.Language == FormatStyle::LK_Proto || + Style.Language == FormatStyle::LK_TextProto) && + Right.is(TT_SelectorName) && !Right.is(tok::r_square) && Right.Next) { + // Keep `@submessage` together in: + // @submessage { key: value } + if (Right.Previous && Right.Previous->is(tok::at)) + return false; + // Look for the scope opener after selector in cases like: + // selector { ... + // selector: { ... + // selector: @base { ... + FormatToken *LBrace = Right.Next; + if (LBrace && LBrace->is(tok::colon)) { + LBrace = LBrace->Next; + if (LBrace && LBrace->is(tok::at)) { + LBrace = LBrace->Next; + if (LBrace) + LBrace = LBrace->Next; + } + } + if (LBrace && + // The scope opener is one of {, [, <: + // selector { ... } + // selector [ ... ] + // selector < ... > + // + // In case of selector { ... }, the l_brace is TT_DictLiteral. + // In case of an empty selector {}, the l_brace is not TT_DictLiteral, + // so we check for immediately following r_brace. + ((LBrace->is(tok::l_brace) && + (LBrace->is(TT_DictLiteral) || + (LBrace->Next && LBrace->Next->is(tok::r_brace)))) || + LBrace->is(TT_ArrayInitializerLSquare) || LBrace->is(tok::less))) { + // If Left.ParameterCount is 0, then this submessage entry is not the + // first in its parent submessage, and we want to break before this entry. + // If Left.ParameterCount is greater than 0, then its parent submessage + // might contain 1 or more entries and we want to break before this entry + // if it contains at least 2 entries. We deal with this case later by + // detecting and breaking before the next entry in the parent submessage. + if (Left.ParameterCount == 0) + return true; + // However, if this submessage is the first entry in its parent + // submessage, Left.ParameterCount might be 1 in some cases. + // We deal with this case later by detecting an entry + // following a closing paren of this submessage. + } + + // If this is an entry immediately following a submessage, it will be + // preceded by a closing paren of that submessage, like in: + // left---. .---right + // v v + // sub: { ... } key: value + // If there was a comment between `}` an `key` above, then `key` would be + // put on a new line anyways. + if (Left.isOneOf(tok::r_brace, tok::greater, tok::r_square)) + return true; + } + return false; } @@ -2708,14 +3066,19 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, Keywords.kw_readonly, Keywords.kw_abstract, Keywords.kw_get, Keywords.kw_set, Keywords.kw_async, Keywords.kw_await)) return false; // Otherwise automatic semicolon insertion would trigger. - if (Left.Tok.getIdentifierInfo() && - Right.startsSequence(tok::l_square, tok::r_square)) - return false; // breaking in "foo[]" creates illegal TS type syntax. + if (Right.NestingLevel == 0 && + (Left.Tok.getIdentifierInfo() || + Left.isOneOf(tok::r_square, tok::r_paren)) && + Right.isOneOf(tok::l_square, tok::l_paren)) + return false; // Otherwise automatic semicolon insertion would trigger. if (Left.is(TT_JsFatArrow) && Right.is(tok::l_brace)) return false; if (Left.is(TT_JsTypeColon)) return true; - if (Right.NestingLevel == 0 && Right.is(Keywords.kw_is)) + // Don't wrap between ":" and "!" of a strict prop init ("field!: type;"). + if (Left.is(tok::exclaim) && Right.is(tok::colon)) + return false; + if (Right.is(Keywords.kw_is)) return false; if (Left.is(Keywords.kw_in)) return Style.BreakBeforeBinaryOperators == FormatStyle::BOS_None; @@ -2774,16 +3137,56 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, return Style.BreakBeforeTernaryOperators; if (Left.is(TT_ConditionalExpr) || Left.is(tok::question)) return !Style.BreakBeforeTernaryOperators; + if (Left.is(TT_InheritanceColon)) + return Style.BreakInheritanceList == FormatStyle::BILS_AfterColon; if (Right.is(TT_InheritanceColon)) - return true; + return Style.BreakInheritanceList != FormatStyle::BILS_AfterColon; if (Right.is(TT_ObjCMethodExpr) && !Right.is(tok::r_square) && Left.isNot(TT_SelectorName)) return true; + if (Right.is(tok::colon) && !Right.isOneOf(TT_CtorInitializerColon, TT_InlineASMColon)) return false; - if (Left.is(tok::colon) && Left.isOneOf(TT_DictLiteral, TT_ObjCMethodExpr)) + if (Left.is(tok::colon) && Left.isOneOf(TT_DictLiteral, TT_ObjCMethodExpr)) { + if (Style.Language == FormatStyle::LK_Proto || + Style.Language == FormatStyle::LK_TextProto) { + if (!Style.AlwaysBreakBeforeMultilineStrings && Right.isStringLiteral()) + return false; + // Prevent cases like: + // + // submessage: + // { key: valueeeeeeeeeeee } + // + // when the snippet does not fit into one line. + // Prefer: + // + // submessage: { + // key: valueeeeeeeeeeee + // } + // + // instead, even if it is longer by one line. + // + // Note that this allows allows the "{" to go over the column limit + // when the column limit is just between ":" and "{", but that does + // not happen too often and alternative formattings in this case are + // not much better. + // + // The code covers the cases: + // + // submessage: { ... } + // submessage: < ... > + // repeated: [ ... ] + if (((Right.is(tok::l_brace) || Right.is(tok::less)) && + Right.is(TT_DictLiteral)) || + Right.is(TT_ArrayInitializerLSquare)) + return false; + } return true; + } + if (Right.is(tok::r_square) && Right.MatchingParen && + Right.MatchingParen->is(TT_ProtoExtensionLSquare)) + return false; if (Right.is(TT_SelectorName) || (Right.is(tok::identifier) && Right.Next && Right.Next->is(TT_ObjCMethodExpr))) return Left.isNot(tok::period); // FIXME: Properly parse ObjC calls. @@ -2806,6 +3209,9 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, if (Left.is(tok::equal) && !Right.isOneOf(tok::kw_default, tok::kw_delete) && Line.Type == LT_VirtualFunctionDecl && Left.NestingLevel == 0) return false; + if (Left.is(tok::equal) && Right.is(tok::l_brace) && + !Style.Cpp11BracedListStyle) + return false; if (Left.is(tok::l_paren) && Left.is(TT_AttributeParen)) return false; if (Left.is(tok::l_paren) && Left.Previous && @@ -2831,7 +3237,8 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, return !Right.isOneOf(tok::l_brace, tok::semi, tok::equal, tok::l_paren, tok::less, tok::coloncolon); - if (Right.is(tok::kw___attribute)) + if (Right.is(tok::kw___attribute) || + (Right.is(tok::l_square) && Right.is(TT_AttributeSquare))) return true; if (Left.is(tok::identifier) && Right.is(tok::string_literal)) @@ -2850,9 +3257,11 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, if (Right.is(TT_CtorInitializerComma) && Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma) return true; - if (Left.is(TT_InheritanceComma) && Style.BreakBeforeInheritanceComma) + if (Left.is(TT_InheritanceComma) && + Style.BreakInheritanceList == FormatStyle::BILS_BeforeComma) return false; - if (Right.is(TT_InheritanceComma) && Style.BreakBeforeInheritanceComma) + if (Right.is(TT_InheritanceComma) && + Style.BreakInheritanceList == FormatStyle::BILS_BeforeComma) return true; if ((Left.is(tok::greater) && Right.is(tok::greater)) || (Left.is(tok::less) && Right.is(tok::less))) @@ -2872,6 +3281,9 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, (Style.BreakBeforeBinaryOperators == FormatStyle::BOS_None || Left.getPrecedence() == prec::Assignment)) return true; + if ((Left.is(TT_AttributeSquare) && Right.is(tok::l_square)) || + (Left.is(tok::r_square) && Right.is(TT_AttributeSquare))) + return false; return Left.isOneOf(tok::comma, tok::coloncolon, tok::semi, tok::l_brace, tok::kw_class, tok::kw_struct, tok::comment) || Right.isMemberAccess() || @@ -2898,6 +3310,7 @@ void TokenAnnotator::printDebugInfo(const AnnotatedLine &Line) { for (unsigned i = 0, e = Tok->FakeLParens.size(); i != e; ++i) llvm::errs() << Tok->FakeLParens[i] << "/"; llvm::errs() << " FakeRParens=" << Tok->FakeRParens; + llvm::errs() << " II=" << Tok->Tok.getIdentifierInfo(); llvm::errs() << " Text='" << Tok->TokenText << "'\n"; if (!Tok->Next) assert(Tok == Line.Last); diff --git a/lib/Format/TokenAnnotator.h b/lib/Format/TokenAnnotator.h index 04a18d45b82e..a3124fcb3d65 100644 --- a/lib/Format/TokenAnnotator.h +++ b/lib/Format/TokenAnnotator.h @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// /// /// \file -/// \brief This file implements a token annotator, i.e. creates +/// This file implements a token annotator, i.e. creates /// \c AnnotatedTokens out of \c FormatTokens with required extra information. /// //===----------------------------------------------------------------------===// @@ -40,6 +40,7 @@ public: AnnotatedLine(const UnwrappedLine &Line) : First(Line.Tokens.front().Tok), Level(Line.Level), MatchingOpeningBlockLineIndex(Line.MatchingOpeningBlockLineIndex), + MatchingClosingBlockLineIndex(Line.MatchingClosingBlockLineIndex), InPPDirective(Line.InPPDirective), MustBeDeclaration(Line.MustBeDeclaration), MightBeFunctionDecl(false), IsMultiVariableDeclStmt(false), Affected(false), @@ -112,6 +113,7 @@ public: LineType Type; unsigned Level; size_t MatchingOpeningBlockLineIndex; + size_t MatchingClosingBlockLineIndex; bool InPPDirective; bool MustBeDeclaration; bool MightBeFunctionDecl; @@ -136,14 +138,14 @@ private: void operator=(const AnnotatedLine &) = delete; }; -/// \brief Determines extra information about the tokens comprising an +/// Determines extra information about the tokens comprising an /// \c UnwrappedLine. class TokenAnnotator { public: TokenAnnotator(const FormatStyle &Style, const AdditionalKeywords &Keywords) : Style(Style), Keywords(Keywords) {} - /// \brief Adapts the indent levels of comment lines to the indent of the + /// Adapts the indent levels of comment lines to the indent of the /// subsequent line. // FIXME: Can/should this be done in the UnwrappedLineParser? void setCommentLineLevels(SmallVectorImpl<AnnotatedLine *> &Lines); @@ -152,14 +154,14 @@ public: void calculateFormattingInformation(AnnotatedLine &Line); private: - /// \brief Calculate the penalty for splitting before \c Tok. + /// Calculate the penalty for splitting before \c Tok. unsigned splitPenalty(const AnnotatedLine &Line, const FormatToken &Tok, bool InFunctionDecl); bool spaceRequiredBetween(const AnnotatedLine &Line, const FormatToken &Left, const FormatToken &Right); - bool spaceRequiredBefore(const AnnotatedLine &Line, const FormatToken &Tok); + bool spaceRequiredBefore(const AnnotatedLine &Line, const FormatToken &Right); bool mustBreakBefore(const AnnotatedLine &Line, const FormatToken &Right); diff --git a/lib/Format/UnwrappedLineFormatter.cpp b/lib/Format/UnwrappedLineFormatter.cpp index 60dc1a7169d1..906dae40cbee 100644 --- a/lib/Format/UnwrappedLineFormatter.cpp +++ b/lib/Format/UnwrappedLineFormatter.cpp @@ -7,6 +7,7 @@ // //===----------------------------------------------------------------------===// +#include "NamespaceEndCommentsFixer.h" #include "UnwrappedLineFormatter.h" #include "WhitespaceManager.h" #include "llvm/Support/Debug.h" @@ -26,7 +27,7 @@ bool startsExternCBlock(const AnnotatedLine &Line) { NextNext && NextNext->is(tok::l_brace); } -/// \brief Tracks the indent level of \c AnnotatedLines across levels. +/// Tracks the indent level of \c AnnotatedLines across levels. /// /// \c nextLine must be called for each \c AnnotatedLine, after which \c /// getIndent() will return the indent for the last line \c nextLine was called @@ -45,10 +46,10 @@ public: IndentForLevel.push_back(Style.IndentWidth * i + AdditionalIndent); } - /// \brief Returns the indent for the current line. + /// Returns the indent for the current line. unsigned getIndent() const { return Indent; } - /// \brief Update the indent state given that \p Line is going to be formatted + /// Update the indent state given that \p Line is going to be formatted /// next. void nextLine(const AnnotatedLine &Line) { Offset = getIndentOffset(*Line.First); @@ -66,14 +67,14 @@ public: Indent += Offset; } - /// \brief Update the indent state given that \p Line indent should be + /// Update the indent state given that \p Line indent should be /// skipped. void skipLine(const AnnotatedLine &Line) { while (IndentForLevel.size() <= Line.Level) IndentForLevel.push_back(Indent); } - /// \brief Update the level indent to adapt to the given \p Line. + /// Update the level indent to adapt to the given \p Line. /// /// When a line is not formatted, we move the subsequent lines on the same /// level to the same indent. @@ -88,7 +89,7 @@ public: } private: - /// \brief Get the offset of the line relatively to the level. + /// Get the offset of the line relatively to the level. /// /// For example, 'public:' labels in classes are offset by 1 or 2 /// characters to the left from their level. @@ -104,7 +105,7 @@ private: return 0; } - /// \brief Get the indent of \p Level from \p IndentForLevel. + /// Get the indent of \p Level from \p IndentForLevel. /// /// \p IndentForLevel must contain the indent for the level \c l /// at \p IndentForLevel[l], or a value < 0 if the indent for @@ -121,16 +122,16 @@ private: const AdditionalKeywords &Keywords; const unsigned AdditionalIndent; - /// \brief The indent in characters for each level. + /// The indent in characters for each level. std::vector<int> IndentForLevel; - /// \brief Offset of the current line relative to the indent level. + /// Offset of the current line relative to the indent level. /// /// For example, the 'public' keywords is often indented with a negative /// offset. int Offset = 0; - /// \brief The current line's indent. + /// The current line's indent. unsigned Indent = 0; }; @@ -157,7 +158,7 @@ public: : Style(Style), Keywords(Keywords), End(Lines.end()), Next(Lines.begin()), AnnotatedLines(Lines) {} - /// \brief Returns the next line, merging multiple lines into one if possible. + /// Returns the next line, merging multiple lines into one if possible. const AnnotatedLine *getNextMergedLine(bool DryRun, LevelIndentTracker &IndentTracker) { if (Next == End) @@ -179,7 +180,7 @@ public: } private: - /// \brief Calculates how many lines can be merged into 1 starting at \p I. + /// Calculates how many lines can be merged into 1 starting at \p I. unsigned tryFitMultipleLinesInOne(LevelIndentTracker &IndentTracker, SmallVectorImpl<AnnotatedLine *>::const_iterator I, @@ -251,9 +252,9 @@ private: if (Style.CompactNamespaces) { if (isNamespaceDeclaration(TheLine)) { int i = 0; - unsigned closingLine = TheLine->MatchingOpeningBlockLineIndex - 1; + unsigned closingLine = TheLine->MatchingClosingBlockLineIndex - 1; for (; I + 1 + i != E && isNamespaceDeclaration(I[i + 1]) && - closingLine == I[i + 1]->MatchingOpeningBlockLineIndex && + closingLine == I[i + 1]->MatchingClosingBlockLineIndex && I[i + 1]->Last->TotalLength < Limit; i++, closingLine--) { // No extra indent for compacted namespaces @@ -304,9 +305,23 @@ private: if (TheLine->First->is(tok::l_brace) && TheLine->First == TheLine->Last && I != AnnotatedLines.begin() && I[-1]->First->isOneOf(tok::kw_if, tok::kw_while, tok::kw_for)) { - return Style.AllowShortBlocksOnASingleLine - ? tryMergeSimpleBlock(I - 1, E, Limit) - : 0; + unsigned MergedLines = 0; + if (Style.AllowShortBlocksOnASingleLine) { + MergedLines = tryMergeSimpleBlock(I - 1, E, Limit); + // If we managed to merge the block, discard the first merged line + // since we are merging starting from I. + if (MergedLines > 0) + --MergedLines; + } + return MergedLines; + } + // Don't merge block with left brace wrapped after ObjC special blocks + if (TheLine->First->is(tok::l_brace) && I != AnnotatedLines.begin() && + I[-1]->First->is(tok::at) && I[-1]->First->Next) { + tok::ObjCKeywordKind kwId = I[-1]->First->Next->Tok.getObjCKeywordID(); + if (kwId == clang::tok::objc_autoreleasepool || + kwId == clang::tok::objc_synchronized) + return 0; } // Try to merge a block with left brace wrapped that wasn't yet covered if (TheLine->Last->is(tok::l_brace)) { @@ -644,14 +659,14 @@ static void markFinalized(FormatToken *Tok) { static void printLineState(const LineState &State) { llvm::dbgs() << "State: "; for (const ParenState &P : State.Stack) { - llvm::dbgs() << P.Indent << "|" << P.LastSpace << "|" << P.NestedBlockIndent - << " "; + llvm::dbgs() << (P.Tok ? P.Tok->TokenText : "F") << "|" << P.Indent << "|" + << P.LastSpace << "|" << P.NestedBlockIndent << " "; } llvm::dbgs() << State.NextToken->TokenText << "\n"; } #endif -/// \brief Base class for classes that format one \c AnnotatedLine. +/// Base class for classes that format one \c AnnotatedLine. class LineFormatter { public: LineFormatter(ContinuationIndenter *Indenter, WhitespaceManager *Whitespaces, @@ -661,7 +676,7 @@ public: BlockFormatter(BlockFormatter) {} virtual ~LineFormatter() {} - /// \brief Formats an \c AnnotatedLine and returns the penalty. + /// Formats an \c AnnotatedLine and returns the penalty. /// /// If \p DryRun is \c false, directly applies the changes. virtual unsigned formatLine(const AnnotatedLine &Line, @@ -670,7 +685,7 @@ public: bool DryRun) = 0; protected: - /// \brief If the \p State's next token is an r_brace closing a nested block, + /// If the \p State's next token is an r_brace closing a nested block, /// format the nested block before it. /// /// Returns \c true if all children could be placed successfully and adapts @@ -752,7 +767,7 @@ private: UnwrappedLineFormatter *BlockFormatter; }; -/// \brief Formatter that keeps the existing line breaks. +/// Formatter that keeps the existing line breaks. class NoColumnLimitLineFormatter : public LineFormatter { public: NoColumnLimitLineFormatter(ContinuationIndenter *Indenter, @@ -761,7 +776,7 @@ public: UnwrappedLineFormatter *BlockFormatter) : LineFormatter(Indenter, Whitespaces, Style, BlockFormatter) {} - /// \brief Formats the line, simply keeping all of the input's line breaking + /// Formats the line, simply keeping all of the input's line breaking /// decisions. unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent, unsigned FirstStartColumn, bool DryRun) override { @@ -780,7 +795,7 @@ public: } }; -/// \brief Formatter that puts all tokens into a single line without breaks. +/// Formatter that puts all tokens into a single line without breaks. class NoLineBreakFormatter : public LineFormatter { public: NoLineBreakFormatter(ContinuationIndenter *Indenter, @@ -788,7 +803,7 @@ public: UnwrappedLineFormatter *BlockFormatter) : LineFormatter(Indenter, Whitespaces, Style, BlockFormatter) {} - /// \brief Puts all tokens into a single line. + /// Puts all tokens into a single line. unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent, unsigned FirstStartColumn, bool DryRun) override { unsigned Penalty = 0; @@ -803,7 +818,7 @@ public: } }; -/// \brief Finds the best way to break lines. +/// Finds the best way to break lines. class OptimizingLineFormatter : public LineFormatter { public: OptimizingLineFormatter(ContinuationIndenter *Indenter, @@ -812,7 +827,7 @@ public: UnwrappedLineFormatter *BlockFormatter) : LineFormatter(Indenter, Whitespaces, Style, BlockFormatter) {} - /// \brief Formats the line by finding the best line breaks with line lengths + /// Formats the line by finding the best line breaks with line lengths /// below the column limit. unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent, unsigned FirstStartColumn, bool DryRun) override { @@ -835,14 +850,14 @@ private: } }; - /// \brief A pair of <penalty, count> that is used to prioritize the BFS on. + /// A pair of <penalty, count> that is used to prioritize the BFS on. /// /// In case of equal penalties, we want to prefer states that were inserted /// first. During state generation we make sure that we insert states first /// that break the line as late as possible. typedef std::pair<unsigned, unsigned> OrderedPenalty; - /// \brief An edge in the solution space from \c Previous->State to \c State, + /// An edge in the solution space from \c Previous->State to \c State, /// inserting a newline dependent on the \c NewLine. struct StateNode { StateNode(const LineState &State, bool NewLine, StateNode *Previous) @@ -852,16 +867,16 @@ private: StateNode *Previous; }; - /// \brief An item in the prioritized BFS search queue. The \c StateNode's + /// An item in the prioritized BFS search queue. The \c StateNode's /// \c State has the given \c OrderedPenalty. typedef std::pair<OrderedPenalty, StateNode *> QueueItem; - /// \brief The BFS queue type. + /// The BFS queue type. typedef std::priority_queue<QueueItem, std::vector<QueueItem>, std::greater<QueueItem>> QueueType; - /// \brief Analyze the entire solution space starting from \p InitialState. + /// Analyze the entire solution space starting from \p InitialState. /// /// This implements a variant of Dijkstra's algorithm on the graph that spans /// the solution space (\c LineStates are the nodes). The algorithm tries to @@ -890,7 +905,8 @@ private: Penalty = Queue.top().first.first; StateNode *Node = Queue.top().second; if (!Node->State.NextToken) { - DEBUG(llvm::dbgs() << "\n---\nPenalty for line: " << Penalty << "\n"); + LLVM_DEBUG(llvm::dbgs() + << "\n---\nPenalty for line: " << Penalty << "\n"); break; } Queue.pop(); @@ -914,7 +930,7 @@ private: if (Queue.empty()) { // We were unable to find a solution, do nothing. // FIXME: Add diagnostic? - DEBUG(llvm::dbgs() << "Could not find a solution.\n"); + LLVM_DEBUG(llvm::dbgs() << "Could not find a solution.\n"); return 0; } @@ -922,13 +938,14 @@ private: if (!DryRun) reconstructPath(InitialState, Queue.top().second); - DEBUG(llvm::dbgs() << "Total number of analyzed states: " << Count << "\n"); - DEBUG(llvm::dbgs() << "---\n"); + LLVM_DEBUG(llvm::dbgs() + << "Total number of analyzed states: " << Count << "\n"); + LLVM_DEBUG(llvm::dbgs() << "---\n"); return Penalty; } - /// \brief Add the following state to the analysis queue \c Queue. + /// Add the following state to the analysis queue \c Queue. /// /// Assume the current state is \p PreviousNode and has been reached with a /// penalty of \p Penalty. Insert a line break if \p NewLine is \c true. @@ -950,7 +967,7 @@ private: ++(*Count); } - /// \brief Applies the best formatting by reconstructing the path in the + /// Applies the best formatting by reconstructing the path in the /// solution space that leads to \c Best. void reconstructPath(LineState &State, StateNode *Best) { std::deque<StateNode *> Path; @@ -965,7 +982,7 @@ private: formatChildren(State, (*I)->NewLine, /*DryRun=*/false, Penalty); Penalty += Indenter->addTokenToState(State, (*I)->NewLine, false); - DEBUG({ + LLVM_DEBUG({ printLineState((*I)->Previous->State); if ((*I)->NewLine) { llvm::dbgs() << "Penalty for placing " @@ -1018,9 +1035,12 @@ UnwrappedLineFormatter::format(const SmallVectorImpl<AnnotatedLine *> &Lines, // scope was added. However, we need to carefully stop doing this when we // exit the scope of affected lines to prevent indenting a the entire // remaining file if it currently missing a closing brace. + bool PreviousRBrace = + PreviousLine && PreviousLine->startsWith(tok::r_brace); bool ContinueFormatting = TheLine.Level > RangeMinLevel || - (TheLine.Level == RangeMinLevel && !TheLine.startsWith(tok::r_brace)); + (TheLine.Level == RangeMinLevel && !PreviousRBrace && + !TheLine.startsWith(tok::r_brace)); bool FixIndentation = (FixBadIndentation || ContinueFormatting) && Indent != TheLine.First->OriginalColumn; @@ -1036,8 +1056,7 @@ UnwrappedLineFormatter::format(const SmallVectorImpl<AnnotatedLine *> &Lines, if (ShouldFormat && TheLine.Type != LT_Invalid) { if (!DryRun) { bool LastLine = Line->First->is(tok::eof); - formatFirstToken(TheLine, PreviousLine, - Indent, + formatFirstToken(TheLine, PreviousLine, Lines, Indent, LastLine ? LastStartColumn : NextStartColumn + Indent); } @@ -1081,7 +1100,7 @@ UnwrappedLineFormatter::format(const SmallVectorImpl<AnnotatedLine *> &Lines, TheLine.LeadingEmptyLinesAffected); // Format the first token. if (ReformatLeadingWhitespace) - formatFirstToken(TheLine, PreviousLine, + formatFirstToken(TheLine, PreviousLine, Lines, TheLine.First->OriginalColumn, TheLine.First->OriginalColumn); else @@ -1103,10 +1122,10 @@ UnwrappedLineFormatter::format(const SmallVectorImpl<AnnotatedLine *> &Lines, return Penalty; } -void UnwrappedLineFormatter::formatFirstToken(const AnnotatedLine &Line, - const AnnotatedLine *PreviousLine, - unsigned Indent, - unsigned NewlineIndent) { +void UnwrappedLineFormatter::formatFirstToken( + const AnnotatedLine &Line, const AnnotatedLine *PreviousLine, + const SmallVectorImpl<AnnotatedLine *> &Lines, unsigned Indent, + unsigned NewlineIndent) { FormatToken &RootToken = *Line.First; if (RootToken.is(tok::eof)) { unsigned Newlines = std::min(RootToken.NewlinesBefore, 1u); @@ -1120,7 +1139,9 @@ void UnwrappedLineFormatter::formatFirstToken(const AnnotatedLine &Line, // Remove empty lines before "}" where applicable. if (RootToken.is(tok::r_brace) && (!RootToken.Next || - (RootToken.Next->is(tok::semi) && !RootToken.Next->Next))) + (RootToken.Next->is(tok::semi) && !RootToken.Next->Next)) && + // Do not remove empty lines before namespace closing "}". + !getNamespaceToken(&Line, Lines)) Newlines = std::min(Newlines, 1u); // Remove empty lines at the start of nested blocks (lambdas/arrow functions) if (PreviousLine == nullptr && Line.Level > 0) diff --git a/lib/Format/UnwrappedLineFormatter.h b/lib/Format/UnwrappedLineFormatter.h index 6432ca83a4c9..dac210ea62b1 100644 --- a/lib/Format/UnwrappedLineFormatter.h +++ b/lib/Format/UnwrappedLineFormatter.h @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// /// /// \file -/// \brief Implements a combinartorial exploration of all the different +/// Implements a combinartorial exploration of all the different /// linebreaks unwrapped lines can be formatted in. /// //===----------------------------------------------------------------------===// @@ -37,7 +37,7 @@ public: : Indenter(Indenter), Whitespaces(Whitespaces), Style(Style), Keywords(Keywords), SourceMgr(SourceMgr), Status(Status) {} - /// \brief Format the current block and return the penalty. + /// Format the current block and return the penalty. unsigned format(const SmallVectorImpl<AnnotatedLine *> &Lines, bool DryRun = false, int AdditionalIndent = 0, bool FixBadIndentation = false, @@ -46,13 +46,14 @@ public: unsigned LastStartColumn = 0); private: - /// \brief Add a new line and the required indent before the first Token + /// Add a new line and the required indent before the first Token /// of the \c UnwrappedLine if there was no structural parsing error. void formatFirstToken(const AnnotatedLine &Line, - const AnnotatedLine *PreviousLine, unsigned Indent, - unsigned NewlineIndent); + const AnnotatedLine *PreviousLine, + const SmallVectorImpl<AnnotatedLine *> &Lines, + unsigned Indent, unsigned NewlineIndent); - /// \brief Returns the column limit for a line, taking into account whether we + /// Returns the column limit for a line, taking into account whether we /// need an escaped newline due to a continued preprocessor directive. unsigned getColumnLimit(bool InPPDirective, const AnnotatedLine *NextLine) const; diff --git a/lib/Format/UnwrappedLineParser.cpp b/lib/Format/UnwrappedLineParser.cpp index b8608dcac9c7..e5afa1264abb 100644 --- a/lib/Format/UnwrappedLineParser.cpp +++ b/lib/Format/UnwrappedLineParser.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// /// /// \file -/// \brief This file contains the implementation of the UnwrappedLineParser, +/// This file contains the implementation of the UnwrappedLineParser, /// which turns a stream of tokens into UnwrappedLines. /// //===----------------------------------------------------------------------===// @@ -83,6 +83,8 @@ public: : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken), PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource), Token(nullptr), PreviousToken(nullptr) { + FakeEOF.Tok.startToken(); + FakeEOF.Tok.setKind(tok::eof); TokenSource = this; Line.Level = 0; Line.InPPDirective = true; @@ -102,7 +104,7 @@ public: PreviousToken = Token; Token = PreviousTokenSource->getNextToken(); if (eof()) - return getFakeEOF(); + return &FakeEOF; return Token; } @@ -121,17 +123,7 @@ private: /*MinColumnToken=*/PreviousToken); } - FormatToken *getFakeEOF() { - static bool EOFInitialized = false; - static FormatToken FormatTok; - if (!EOFInitialized) { - FormatTok.Tok.startToken(); - FormatTok.Tok.setKind(tok::eof); - EOFInitialized = true; - } - return &FormatTok; - } - + FormatToken FakeEOF; UnwrappedLine &Line; FormatTokenSource *&TokenSource; FormatToken *&ResetToken; @@ -234,14 +226,17 @@ UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style, CurrentLines(&Lines), Style(Style), Keywords(Keywords), CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr), Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1), - IfNdefCondition(nullptr), FoundIncludeGuardStart(false), - IncludeGuardRejected(false), FirstStartColumn(FirstStartColumn) {} + IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None + ? IG_Rejected + : IG_Inited), + IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {} void UnwrappedLineParser::reset() { PPBranchLevel = -1; - IfNdefCondition = nullptr; - FoundIncludeGuardStart = false; - IncludeGuardRejected = false; + IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None + ? IG_Rejected + : IG_Inited; + IncludeGuardToken = nullptr; Line.reset(new UnwrappedLine); CommentsBeforeNextToken.clear(); FormatTok = nullptr; @@ -257,13 +252,21 @@ void UnwrappedLineParser::parse() { IndexedTokenSource TokenSource(AllTokens); Line->FirstStartColumn = FirstStartColumn; do { - DEBUG(llvm::dbgs() << "----\n"); + LLVM_DEBUG(llvm::dbgs() << "----\n"); reset(); Tokens = &TokenSource; TokenSource.reset(); readToken(); parseFile(); + + // If we found an include guard then all preprocessor directives (other than + // the guard) are over-indented by one. + if (IncludeGuard == IG_Found) + for (auto &Line : Lines) + if (Line.InPPDirective && Line.Level > 0) + --Line.Level; + // Create line with eof token. pushToken(FormatTok); addUnwrappedLine(); @@ -300,6 +303,18 @@ void UnwrappedLineParser::parseFile() { else parseLevel(/*HasOpeningBrace=*/false); // Make sure to format the remaining tokens. + // + // LK_TextProto is special since its top-level is parsed as the body of a + // braced list, which does not necessarily have natural line separators such + // as a semicolon. Comments after the last entry that have been determined to + // not belong to that line, as in: + // key: value + // // endfile comment + // do not have a chance to be put on a line of their own until this point. + // Here we add this newline before end-of-file comments. + if (Style.Language == FormatStyle::LK_TextProto && + !CommentsBeforeNextToken.empty()) + addUnwrappedLine(); flushComments(true); addUnwrappedLine(); } @@ -333,7 +348,19 @@ void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) { nextToken(); addUnwrappedLine(); break; - case tok::kw_default: + case tok::kw_default: { + unsigned StoredPosition = Tokens->getPosition(); + FormatToken *Next = Tokens->getNextToken(); + FormatTok = Tokens->setPosition(StoredPosition); + if (Next && Next->isNot(tok::colon)) { + // default not followed by ':' is not a case label; treat it like + // an identifier. + parseStructuralElement(); + break; + } + // Else, if it is 'default:', fall through to the case handling. + LLVM_FALLTHROUGH; + } case tok::kw_case: if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) { @@ -426,12 +453,19 @@ void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { (Style.isCpp() && NextTok->is(tok::l_paren)) || NextTok->isOneOf(tok::comma, tok::period, tok::colon, tok::r_paren, tok::r_square, tok::l_brace, - tok::l_square, tok::ellipsis) || + tok::ellipsis) || (NextTok->is(tok::identifier) && !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) || (NextTok->is(tok::semi) && (!ExpectClassBody || LBraceStack.size() != 1)) || (NextTok->isBinaryOperator() && !NextIsObjCMethod); + if (NextTok->is(tok::l_square)) { + // We can have an array subscript after a braced init + // list, but C++11 attributes are expected after blocks. + NextTok = Tokens->getNextToken(); + ++ReadTokens; + ProbablyBracedList = NextTok->isNot(tok::l_square); + } } if (ProbablyBracedList) { Tok->BlockKind = BK_BracedInit; @@ -540,7 +574,7 @@ void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel, Line->MatchingOpeningBlockLineIndex = OpeningLineIndex; if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) { // Update the opening line to add the forward reference as well - (*CurrentLines)[OpeningLineIndex].MatchingOpeningBlockLineIndex = + (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex = CurrentLines->size() - 1; } } @@ -712,26 +746,27 @@ void UnwrappedLineParser::parsePPIf(bool IfDef) { // If there's a #ifndef on the first line, and the only lines before it are // comments, it could be an include guard. bool MaybeIncludeGuard = IfNDef; - if (!IncludeGuardRejected && !FoundIncludeGuardStart && MaybeIncludeGuard) { + if (IncludeGuard == IG_Inited && MaybeIncludeGuard) for (auto &Line : Lines) { if (!Line.Tokens.front().Tok->is(tok::comment)) { MaybeIncludeGuard = false; - IncludeGuardRejected = true; + IncludeGuard = IG_Rejected; break; } } - } --PPBranchLevel; parsePPUnknown(); ++PPBranchLevel; - if (!IncludeGuardRejected && !FoundIncludeGuardStart && MaybeIncludeGuard) - IfNdefCondition = IfCondition; + if (IncludeGuard == IG_Inited && MaybeIncludeGuard) { + IncludeGuard = IG_IfNdefed; + IncludeGuardToken = IfCondition; + } } void UnwrappedLineParser::parsePPElse() { // If a potential include guard has an #else, it's not an include guard. - if (FoundIncludeGuardStart && PPBranchLevel == 0) - FoundIncludeGuardStart = false; + if (IncludeGuard == IG_Defined && PPBranchLevel == 0) + IncludeGuard = IG_Rejected; conditionalCompilationAlternative(); if (PPBranchLevel > -1) --PPBranchLevel; @@ -745,34 +780,37 @@ void UnwrappedLineParser::parsePPEndIf() { conditionalCompilationEnd(); parsePPUnknown(); // If the #endif of a potential include guard is the last thing in the file, - // then we count it as a real include guard and subtract one from every - // preprocessor indent. + // then we found an include guard. unsigned TokenPosition = Tokens->getPosition(); FormatToken *PeekNext = AllTokens[TokenPosition]; - if (FoundIncludeGuardStart && PPBranchLevel == -1 && PeekNext->is(tok::eof) && + if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && + PeekNext->is(tok::eof) && Style.IndentPPDirectives != FormatStyle::PPDIS_None) - for (auto &Line : Lines) - if (Line.InPPDirective && Line.Level > 0) - --Line.Level; + IncludeGuard = IG_Found; } void UnwrappedLineParser::parsePPDefine() { nextToken(); if (FormatTok->Tok.getKind() != tok::identifier) { + IncludeGuard = IG_Rejected; + IncludeGuardToken = nullptr; parsePPUnknown(); return; } - if (IfNdefCondition && IfNdefCondition->TokenText == FormatTok->TokenText) { - FoundIncludeGuardStart = true; + + if (IncludeGuard == IG_IfNdefed && + IncludeGuardToken->TokenText == FormatTok->TokenText) { + IncludeGuard = IG_Defined; + IncludeGuardToken = nullptr; for (auto &Line : Lines) { if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) { - FoundIncludeGuardStart = false; + IncludeGuard = IG_Rejected; break; } } } - IfNdefCondition = nullptr; + nextToken(); if (FormatTok->Tok.getKind() == tok::l_paren && FormatTok->WhitespaceRange.getBegin() == @@ -799,7 +837,6 @@ void UnwrappedLineParser::parsePPUnknown() { if (Style.IndentPPDirectives == FormatStyle::PPDIS_AfterHash) Line->Level += PPBranchLevel + 1; addUnwrappedLine(); - IfNdefCondition = nullptr; } // Here we blacklist certain tokens that are not usually the first token in an @@ -932,49 +969,6 @@ void UnwrappedLineParser::parseStructuralElement() { return; } switch (FormatTok->Tok.getKind()) { - case tok::at: - nextToken(); - if (FormatTok->Tok.is(tok::l_brace)) { - nextToken(); - parseBracedList(); - break; - } - switch (FormatTok->Tok.getObjCKeywordID()) { - case tok::objc_public: - case tok::objc_protected: - case tok::objc_package: - case tok::objc_private: - return parseAccessSpecifier(); - case tok::objc_interface: - case tok::objc_implementation: - return parseObjCInterfaceOrImplementation(); - case tok::objc_protocol: - return parseObjCProtocol(); - case tok::objc_end: - return; // Handled by the caller. - case tok::objc_optional: - case tok::objc_required: - nextToken(); - addUnwrappedLine(); - return; - case tok::objc_autoreleasepool: - nextToken(); - if (FormatTok->Tok.is(tok::l_brace)) { - if (Style.BraceWrapping.AfterObjCDeclaration) - addUnwrappedLine(); - parseBlock(/*MustBeDeclaration=*/false); - } - addUnwrappedLine(); - return; - case tok::objc_try: - // This branch isn't strictly necessary (the kw_try case below would - // do this too after the tok::at is parsed above). But be explicit. - parseTryCatch(); - return; - default: - break; - } - break; case tok::kw_asm: nextToken(); if (FormatTok->is(tok::l_brace)) { @@ -1032,8 +1026,12 @@ void UnwrappedLineParser::parseStructuralElement() { // 'default: string' field declaration. break; nextToken(); - parseLabel(); - return; + if (FormatTok->is(tok::colon)) { + parseLabel(); + return; + } + // e.g. "default void f() {}" in a Java interface. + break; case tok::kw_case: if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) // 'case: string' field declaration. @@ -1117,6 +1115,56 @@ void UnwrappedLineParser::parseStructuralElement() { if (FormatTok->Tok.is(tok::l_brace)) { nextToken(); parseBracedList(); + break; + } + switch (FormatTok->Tok.getObjCKeywordID()) { + case tok::objc_public: + case tok::objc_protected: + case tok::objc_package: + case tok::objc_private: + return parseAccessSpecifier(); + case tok::objc_interface: + case tok::objc_implementation: + return parseObjCInterfaceOrImplementation(); + case tok::objc_protocol: + if (parseObjCProtocol()) + return; + break; + case tok::objc_end: + return; // Handled by the caller. + case tok::objc_optional: + case tok::objc_required: + nextToken(); + addUnwrappedLine(); + return; + case tok::objc_autoreleasepool: + nextToken(); + if (FormatTok->Tok.is(tok::l_brace)) { + if (Style.BraceWrapping.AfterControlStatement) + addUnwrappedLine(); + parseBlock(/*MustBeDeclaration=*/false); + } + addUnwrappedLine(); + return; + case tok::objc_synchronized: + nextToken(); + if (FormatTok->Tok.is(tok::l_paren)) + // Skip synchronization object + parseParens(); + if (FormatTok->Tok.is(tok::l_brace)) { + if (Style.BraceWrapping.AfterControlStatement) + addUnwrappedLine(); + parseBlock(/*MustBeDeclaration=*/false); + } + addUnwrappedLine(); + return; + case tok::objc_try: + // This branch isn't strictly necessary (the kw_try case below would + // do this too after the tok::at is parsed above). But be explicit. + parseTryCatch(); + return; + default: + break; } break; case tok::kw_enum: @@ -1369,13 +1417,16 @@ bool UnwrappedLineParser::tryToParseLambdaIntroducer() { const FormatToken *Previous = FormatTok->Previous; if (Previous && (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new, - tok::kw_delete) || + tok::kw_delete, tok::l_square) || FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() || Previous->isSimpleTypeSpecifier())) { nextToken(); return false; } nextToken(); + if (FormatTok->is(tok::l_square)) { + return false; + } parseSquare(/*LambdaIntroducer=*/true); return true; } @@ -2083,11 +2134,33 @@ void UnwrappedLineParser::parseRecord(bool ParseAsExpr) { // "} n, m;" will end up in one unwrapped line. } +void UnwrappedLineParser::parseObjCMethod() { + assert(FormatTok->Tok.isOneOf(tok::l_paren, tok::identifier) && + "'(' or identifier expected."); + do { + if (FormatTok->Tok.is(tok::semi)) { + nextToken(); + addUnwrappedLine(); + return; + } else if (FormatTok->Tok.is(tok::l_brace)) { + parseBlock(/*MustBeDeclaration=*/false); + addUnwrappedLine(); + return; + } else { + nextToken(); + } + } while (!eof()); +} + void UnwrappedLineParser::parseObjCProtocolList() { assert(FormatTok->Tok.is(tok::less) && "'<' expected."); - do + do { nextToken(); - while (!eof() && FormatTok->Tok.isNot(tok::greater)); + // Early exit in case someone forgot a close angle. + if (FormatTok->isOneOf(tok::semi, tok::l_brace) || + FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) + return; + } while (!eof() && FormatTok->Tok.isNot(tok::greater)); nextToken(); // Skip '>'. } @@ -2106,6 +2179,9 @@ void UnwrappedLineParser::parseObjCUntilAtEnd() { // Ignore stray "}". parseStructuralElement doesn't consume them. nextToken(); addUnwrappedLine(); + } else if (FormatTok->isOneOf(tok::minus, tok::plus)) { + nextToken(); + parseObjCMethod(); } else { parseStructuralElement(); } @@ -2113,10 +2189,37 @@ void UnwrappedLineParser::parseObjCUntilAtEnd() { } void UnwrappedLineParser::parseObjCInterfaceOrImplementation() { + assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface || + FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation); nextToken(); nextToken(); // interface name - // @interface can be followed by either a base class, or a category. + // @interface can be followed by a lightweight generic + // specialization list, then either a base class or a category. + if (FormatTok->Tok.is(tok::less)) { + // Unlike protocol lists, generic parameterizations support + // nested angles: + // + // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> : + // NSObject <NSCopying, NSSecureCoding> + // + // so we need to count how many open angles we have left. + unsigned NumOpenAngles = 1; + do { + nextToken(); + // Early exit in case someone forgot a close angle. + if (FormatTok->isOneOf(tok::semi, tok::l_brace) || + FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) + break; + if (FormatTok->Tok.is(tok::less)) + ++NumOpenAngles; + else if (FormatTok->Tok.is(tok::greater)) { + assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative"); + --NumOpenAngles; + } + } while (!eof() && NumOpenAngles != 0); + nextToken(); // Skip '>'. + } if (FormatTok->Tok.is(tok::colon)) { nextToken(); nextToken(); // base class name @@ -2140,8 +2243,21 @@ void UnwrappedLineParser::parseObjCInterfaceOrImplementation() { parseObjCUntilAtEnd(); } -void UnwrappedLineParser::parseObjCProtocol() { +// Returns true for the declaration/definition form of @protocol, +// false for the expression form. +bool UnwrappedLineParser::parseObjCProtocol() { + assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol); nextToken(); + + if (FormatTok->is(tok::l_paren)) + // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);". + return false; + + // The definition/declaration form, + // @protocol Foo + // - (int)someMethod; + // @end + nextToken(); // protocol name if (FormatTok->Tok.is(tok::less)) @@ -2150,11 +2266,13 @@ void UnwrappedLineParser::parseObjCProtocol() { // Check for protocol declaration. if (FormatTok->Tok.is(tok::semi)) { nextToken(); - return addUnwrappedLine(); + addUnwrappedLine(); + return true; } addUnwrappedLine(); parseObjCUntilAtEnd(); + return true; } void UnwrappedLineParser::parseJavaScriptEs6ImportExport() { @@ -2231,7 +2349,7 @@ LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line, void UnwrappedLineParser::addUnwrappedLine() { if (Line->Tokens.empty()) return; - DEBUG({ + LLVM_DEBUG({ if (CurrentLines == &Lines) printDebugInfo(*Line); }); diff --git a/lib/Format/UnwrappedLineParser.h b/lib/Format/UnwrappedLineParser.h index 1d8ccabbd0f8..87254832c635 100644 --- a/lib/Format/UnwrappedLineParser.h +++ b/lib/Format/UnwrappedLineParser.h @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// /// /// \file -/// \brief This file contains the declaration of the UnwrappedLineParser, +/// This file contains the declaration of the UnwrappedLineParser, /// which turns a stream of tokens into UnwrappedLines. /// //===----------------------------------------------------------------------===// @@ -28,7 +28,7 @@ namespace format { struct UnwrappedLineNode; -/// \brief An unwrapped line is a sequence of \c Token, that we would like to +/// An unwrapped line is a sequence of \c Token, that we would like to /// put on a single line if there was no column limit. /// /// This is used as a main interface between the \c UnwrappedLineParser and the @@ -38,22 +38,26 @@ struct UnwrappedLine { UnwrappedLine(); // FIXME: Don't use std::list here. - /// \brief The \c Tokens comprising this \c UnwrappedLine. + /// The \c Tokens comprising this \c UnwrappedLine. std::list<UnwrappedLineNode> Tokens; - /// \brief The indent level of the \c UnwrappedLine. + /// The indent level of the \c UnwrappedLine. unsigned Level; - /// \brief Whether this \c UnwrappedLine is part of a preprocessor directive. + /// Whether this \c UnwrappedLine is part of a preprocessor directive. bool InPPDirective; bool MustBeDeclaration; - /// \brief If this \c UnwrappedLine closes a block in a sequence of lines, + /// If this \c UnwrappedLine closes a block in a sequence of lines, /// \c MatchingOpeningBlockLineIndex stores the index of the corresponding /// opening line. Otherwise, \c MatchingOpeningBlockLineIndex must be /// \c kInvalidIndex. - size_t MatchingOpeningBlockLineIndex; + size_t MatchingOpeningBlockLineIndex = kInvalidIndex; + + /// If this \c UnwrappedLine opens a block, stores the index of the + /// line with the corresponding closing brace. + size_t MatchingClosingBlockLineIndex = kInvalidIndex; static const size_t kInvalidIndex = -1; @@ -116,10 +120,11 @@ private: // parses the record as a child block, i.e. if the class declaration is an // expression. void parseRecord(bool ParseAsExpr = false); + void parseObjCMethod(); void parseObjCProtocolList(); void parseObjCUntilAtEnd(); void parseObjCInterfaceOrImplementation(); - void parseObjCProtocol(); + bool parseObjCProtocol(); void parseJavaScriptEs6ImportExport(); bool tryToParseLambda(); bool tryToParseLambdaIntroducer(); @@ -141,7 +146,7 @@ private: // token. // // NextTok specifies the next token. A null pointer NextTok is supported, and - // signifies either the absense of a next token, or that the next token + // signifies either the absence of a next token, or that the next token // shouldn't be taken into accunt for the analysis. void distributeComments(const SmallVectorImpl<FormatToken *> &Comments, const FormatToken *NextTok); @@ -248,10 +253,23 @@ private: // sequence. std::stack<int> PPChainBranchIndex; - // Contains the #ifndef condition for a potential include guard. - FormatToken *IfNdefCondition; - bool FoundIncludeGuardStart; - bool IncludeGuardRejected; + // Include guard search state. Used to fixup preprocessor indent levels + // so that include guards do not participate in indentation. + enum IncludeGuardState { + IG_Inited, // Search started, looking for #ifndef. + IG_IfNdefed, // #ifndef found, IncludeGuardToken points to condition. + IG_Defined, // Matching #define found, checking other requirements. + IG_Found, // All requirements met, need to fix indents. + IG_Rejected, // Search failed or never started. + }; + + // Current state of include guard search. + IncludeGuardState IncludeGuard; + + // Points to the #ifndef condition for a potential include guard. Null unless + // IncludeGuardState == IG_IfNdefed. + FormatToken *IncludeGuardToken; + // Contains the first start column where the source begins. This is zero for // normal source code and may be nonzero when formatting a code fragment that // does not start at the beginning of the file. diff --git a/lib/Format/UsingDeclarationsSorter.cpp b/lib/Format/UsingDeclarationsSorter.cpp index ef0c7a7d5a45..9e49e7913033 100644 --- a/lib/Format/UsingDeclarationsSorter.cpp +++ b/lib/Format/UsingDeclarationsSorter.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// /// /// \file -/// \brief This file implements UsingDeclarationsSorter, a TokenAnalyzer that +/// This file implements UsingDeclarationsSorter, a TokenAnalyzer that /// sorts consecutive using declarations. /// //===----------------------------------------------------------------------===// @@ -161,7 +161,7 @@ void endUsingDeclarationBlock( StringRef Text(SourceMgr.getCharacterData(SortedBegin), SourceMgr.getCharacterData(SortedEnd) - SourceMgr.getCharacterData(SortedBegin)); - DEBUG({ + LLVM_DEBUG({ StringRef OldText(SourceMgr.getCharacterData(Begin), SourceMgr.getCharacterData(End) - SourceMgr.getCharacterData(Begin)); @@ -187,8 +187,7 @@ std::pair<tooling::Replacements, unsigned> UsingDeclarationsSorter::analyze( TokenAnnotator &Annotator, SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, FormatTokenLexer &Tokens) { const SourceManager &SourceMgr = Env.getSourceManager(); - AffectedRangeMgr.computeAffectedLines(AnnotatedLines.begin(), - AnnotatedLines.end()); + AffectedRangeMgr.computeAffectedLines(AnnotatedLines); tooling::Replacements Fixes; SmallVector<UsingDeclaration, 4> UsingDeclarations; for (size_t I = 0, E = AnnotatedLines.size(); I != E; ++I) { diff --git a/lib/Format/UsingDeclarationsSorter.h b/lib/Format/UsingDeclarationsSorter.h index 6f137712d841..7e5cf7610d67 100644 --- a/lib/Format/UsingDeclarationsSorter.h +++ b/lib/Format/UsingDeclarationsSorter.h @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// /// /// \file -/// \brief This file declares UsingDeclarationsSorter, a TokenAnalyzer that +/// This file declares UsingDeclarationsSorter, a TokenAnalyzer that /// sorts consecutive using declarations. /// //===----------------------------------------------------------------------===// diff --git a/lib/Format/WhitespaceManager.cpp b/lib/Format/WhitespaceManager.cpp index a5477a996327..7070ce03c864 100644 --- a/lib/Format/WhitespaceManager.cpp +++ b/lib/Format/WhitespaceManager.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// /// /// \file -/// \brief This file implements WhitespaceManager class. +/// This file implements WhitespaceManager class. /// //===----------------------------------------------------------------------===// @@ -90,7 +90,7 @@ const tooling::Replacements &WhitespaceManager::generateReplacements() { if (Changes.empty()) return Replaces; - std::sort(Changes.begin(), Changes.end(), Change::IsBeforeInFile(SourceMgr)); + llvm::sort(Changes.begin(), Changes.end(), Change::IsBeforeInFile(SourceMgr)); calculateLineBreakInformation(); alignConsecutiveDeclarations(); alignConsecutiveAssignments(); diff --git a/lib/Format/WhitespaceManager.h b/lib/Format/WhitespaceManager.h index af20dc5616a7..db90343f7294 100644 --- a/lib/Format/WhitespaceManager.h +++ b/lib/Format/WhitespaceManager.h @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// /// /// \file -/// \brief WhitespaceManager class manages whitespace around tokens and their +/// WhitespaceManager class manages whitespace around tokens and their /// replacements. /// //===----------------------------------------------------------------------===// @@ -24,7 +24,7 @@ namespace clang { namespace format { -/// \brief Manages the whitespaces around tokens and their replacements. +/// Manages the whitespaces around tokens and their replacements. /// /// This includes special handling for certain constructs, e.g. the alignment of /// trailing line comments. @@ -41,7 +41,7 @@ public: bool UseCRLF) : SourceMgr(SourceMgr), Style(Style), UseCRLF(UseCRLF) {} - /// \brief Replaces the whitespace in front of \p Tok. Only call once for + /// Replaces the whitespace in front of \p Tok. Only call once for /// each \c AnnotatedToken. /// /// \p StartOfTokenColumn is the column at which the token will start after @@ -51,7 +51,7 @@ public: unsigned StartOfTokenColumn, bool InPPDirective = false); - /// \brief Adds information about an unchangeable token's whitespace. + /// Adds information about an unchangeable token's whitespace. /// /// Needs to be called for every token for which \c replaceWhitespace /// was not called. @@ -59,7 +59,7 @@ public: llvm::Error addReplacement(const tooling::Replacement &Replacement); - /// \brief Inserts or replaces whitespace in the middle of a token. + /// Inserts or replaces whitespace in the middle of a token. /// /// Inserts \p PreviousPostfix, \p Newlines, \p Spaces and \p CurrentPrefix /// (in this order) at \p Offset inside \p Tok, replacing \p ReplaceChars @@ -79,13 +79,13 @@ public: StringRef CurrentPrefix, bool InPPDirective, unsigned Newlines, int Spaces); - /// \brief Returns all the \c Replacements created during formatting. + /// Returns all the \c Replacements created during formatting. const tooling::Replacements &generateReplacements(); - /// \brief Represents a change before a token, a break inside a token, + /// Represents a change before a token, a break inside a token, /// or the layout of an unchanged token (or whitespace within). struct Change { - /// \brief Functor to sort changes in original source order. + /// Functor to sort changes in original source order. class IsBeforeInFile { public: IsBeforeInFile(const SourceManager &SourceMgr) : SourceMgr(SourceMgr) {} @@ -95,7 +95,7 @@ public: const SourceManager &SourceMgr; }; - /// \brief Creates a \c Change. + /// Creates a \c Change. /// /// The generated \c Change will replace the characters at /// \p OriginalWhitespaceRange with a concatenation of @@ -165,35 +165,35 @@ public: }; private: - /// \brief Calculate \c IsTrailingComment, \c TokenLength for the last tokens + /// Calculate \c IsTrailingComment, \c TokenLength for the last tokens /// or token parts in a line and \c PreviousEndOfTokenColumn and /// \c EscapedNewlineColumn for the first tokens or token parts in a line. void calculateLineBreakInformation(); - /// \brief Align consecutive assignments over all \c Changes. + /// Align consecutive assignments over all \c Changes. void alignConsecutiveAssignments(); - /// \brief Align consecutive declarations over all \c Changes. + /// Align consecutive declarations over all \c Changes. void alignConsecutiveDeclarations(); - /// \brief Align trailing comments over all \c Changes. + /// Align trailing comments over all \c Changes. void alignTrailingComments(); - /// \brief Align trailing comments from change \p Start to change \p End at + /// Align trailing comments from change \p Start to change \p End at /// the specified \p Column. void alignTrailingComments(unsigned Start, unsigned End, unsigned Column); - /// \brief Align escaped newlines over all \c Changes. + /// Align escaped newlines over all \c Changes. void alignEscapedNewlines(); - /// \brief Align escaped newlines from change \p Start to change \p End at + /// Align escaped newlines from change \p Start to change \p End at /// the specified \p Column. void alignEscapedNewlines(unsigned Start, unsigned End, unsigned Column); - /// \brief Fill \c Replaces with the replacements for all effective changes. + /// Fill \c Replaces with the replacements for all effective changes. void generateChanges(); - /// \brief Stores \p Text as the replacement for the whitespace in \p Range. + /// Stores \p Text as the replacement for the whitespace in \p Range. void storeReplacement(SourceRange Range, StringRef Text); void appendNewlineText(std::string &Text, unsigned Newlines); void appendEscapedNewlineText(std::string &Text, unsigned Newlines, |